|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v1 4/6] vpci: add SR-IOV support for PVH Dom0
On 7/25/25 20:39, Teddy Astie wrote: > Le 25/07/2025 à 16:26, Mykyta Poturai a écrit : >> From: Stewart Hildebrand <stewart.hildebrand@xxxxxxx> >> >> This code is expected to only be used by privileged domains, >> unprivileged domains should not get access to the SR-IOV capability. >> >> Implement RW handlers for PCI_SRIOV_CTRL register to dynamically >> map/unmap VF BARS. Recalculate BAR sizes before mapping VFs to account >> for possible changes in the system page size register. >> >> Relies on dom0 to enable SR-IOV and PHYSDEVOP to inform Xen about >> addition/removal of VFs. >> >> Signed-off-by: Stewart Hildebrand <stewart.hildebrand@xxxxxxx> >> Signed-off-by: Mykyta Poturai <mykyta_poturai@xxxxxxxx> >> --- >> CHANGELOG.md | 3 +- >> SUPPORT.md | 2 - >> xen/drivers/vpci/Makefile | 2 +- >> xen/drivers/vpci/header.c | 3 + >> xen/drivers/vpci/sriov.c | 235 ++++++++++++++++++++++++++++++++++++++ >> xen/drivers/vpci/vpci.c | 1 + >> xen/include/xen/vpci.h | 7 +- >> 7 files changed, 247 insertions(+), 6 deletions(-) >> create mode 100644 xen/drivers/vpci/sriov.c >> >> diff --git a/CHANGELOG.md b/CHANGELOG.md >> index 5f31ca08fe..7b0e8beb76 100644 >> --- a/CHANGELOG.md >> +++ b/CHANGELOG.md >> @@ -23,8 +23,7 @@ The format is based on [Keep a >> Changelog](https://keepachangelog.com/en/1.0.0/ >> - On x86: >> - Option to attempt to fixup p2m page-faults on PVH dom0. >> - Resizable BARs is supported for PVH dom0. >> - - Support PCI passthrough for HVM domUs when dom0 is PVH (note SR-IOV >> - capability usage is not yet supported on PVH dom0). >> + - Support PCI passthrough for HVM domUs when dom0 is PVH. >> - Smoke tests for the FreeBSD Xen builds in Cirrus CI. >> >> - On Arm: >> diff --git a/SUPPORT.md b/SUPPORT.md >> index 6a82a92189..830b598714 100644 >> --- a/SUPPORT.md >> +++ b/SUPPORT.md >> @@ -170,8 +170,6 @@ unexpected behavior or issues on some hardware. >> >> At least the following features are missing on a PVH dom0: >> >> - * PCI SR-IOV. >> - >> * Native NMI forwarding (nmi=dom0 command line option). >> >> * MCE handling. > > I would prefer changelog/support changes to be a separate patch or > alternatively at the last patch as I don't think SR-IOV is fully working > without the 2 remaining ones. > >> diff --git a/xen/drivers/vpci/Makefile b/xen/drivers/vpci/Makefile >> index a7c8a30a89..fe1e57b64d 100644 >> --- a/xen/drivers/vpci/Makefile >> +++ b/xen/drivers/vpci/Makefile >> @@ -1,2 +1,2 @@ >> -obj-y += vpci.o header.o rebar.o >> +obj-y += vpci.o header.o rebar.o sriov.o >> obj-$(CONFIG_HAS_PCI_MSI) += msi.o msix.o >> diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c >> index f947f652cd..0a840c6dcc 100644 >> --- a/xen/drivers/vpci/header.c >> +++ b/xen/drivers/vpci/header.c >> @@ -839,6 +839,9 @@ static int cf_check init_header(struct pci_dev *pdev) >> >> ASSERT(rw_is_write_locked(&pdev->domain->pci_lock)); >> >> + if ( pdev->info.is_virtfn ) >> + return 0; >> + >> switch ( pci_conf_read8(pdev->sbdf, PCI_HEADER_TYPE) & 0x7f ) >> { >> case PCI_HEADER_TYPE_NORMAL: >> diff --git a/xen/drivers/vpci/sriov.c b/xen/drivers/vpci/sriov.c >> new file mode 100644 >> index 0000000000..640430e3e9 >> --- /dev/null >> +++ b/xen/drivers/vpci/sriov.c >> @@ -0,0 +1,235 @@ >> +/* SPDX-License-Identifier: GPL-2.0-only */ >> +/* >> + * Handlers for accesses to the SR-IOV capability structure. >> + * >> + * Copyright (C) 2018 Citrix Systems R&D >> + */ >> + >> +#include <xen/sched.h> >> +#include <xen/vpci.h> >> + >> +static int vf_init_bars(const struct pci_dev *vf_pdev) >> +{ >> + unsigned int i, sriov_pos; >> + int vf_idx, rc; >> + const struct pci_dev *pf_pdev = vf_pdev->pf_pdev; >> + uint16_t offset, stride; >> + struct vpci_bar *bars = vf_pdev->vpci->header.bars; >> + struct vpci_bar *physfn_vf_bars = pf_pdev->vpci->sriov->vf_bars; >> + >> + sriov_pos = pci_find_ext_capability(pf_pdev->sbdf, >> PCI_EXT_CAP_ID_SRIOV); >> + offset = pci_conf_read16(pf_pdev->sbdf, sriov_pos + >> PCI_SRIOV_VF_OFFSET); >> + stride = pci_conf_read16(pf_pdev->sbdf, sriov_pos + >> PCI_SRIOV_VF_STRIDE); >> + >> + vf_idx = vf_pdev->sbdf.sbdf; >> + vf_idx -= pf_pdev->sbdf.sbdf + offset; > > We can probably rewrite it as > > vf_idx = vf_pdev->sbdf.sbdf - (pf_pdev->sbdf.sbdf + offset); > > especially with sbdf being potentially not representable as a int (even > though very unlikely). > >> + if ( vf_idx < 0 ) >> + return -EINVAL; >> + if ( stride ) >> + { >> + if ( vf_idx % stride ) >> + return -EINVAL; >> + vf_idx /= stride; >> + } >> + >> + /* >> + * Set up BARs for this VF out of PF's VF BARs taking into account >> + * the index of the VF. >> + */ >> + for ( i = 0; i < PCI_SRIOV_NUM_BARS; i++ ) >> + { >> + bars[i].addr = physfn_vf_bars[i].addr + vf_idx * >> physfn_vf_bars[i].size; >> + bars[i].guest_addr = bars[i].addr; >> + bars[i].size = physfn_vf_bars[i].size; >> + bars[i].type = physfn_vf_bars[i].type; >> + bars[i].prefetchable = physfn_vf_bars[i].prefetchable; >> + rc = vpci_bar_add_rangeset(vf_pdev, &bars[i], i); >> + if ( rc ) >> + return rc; >> + } >> + >> + return 0; >> +} >> + >> +static int map_vf(const struct pci_dev *vf_pdev, uint16_t cmd) >> +{ >> + int rc; >> + >> + ASSERT(rw_is_write_locked(&vf_pdev->domain->pci_lock)); >> + >> + rc = vf_init_bars(vf_pdev); >> + if ( rc ) >> + return rc; >> + >> + return vpci_modify_bars(vf_pdev, cmd, false); >> +} >> + >> +static int size_vf_bars(struct pci_dev *pf_pdev, unsigned int sriov_pos) >> +{ >> + /* >> + * NB: a non-const pci_dev of the PF is needed in order to update >> + * vf_rlen. >> + */ >> + struct vpci_bar *bars; >> + unsigned int i; >> + int rc = 0; >> + >> + ASSERT(rw_is_write_locked(&pf_pdev->domain->pci_lock)); >> + ASSERT(!pf_pdev->info.is_virtfn); >> + >> + if ( !pf_pdev->vpci->sriov ) >> + return -EINVAL; >> + >> + /* Read BARs for VFs out of PF's SR-IOV extended capability. */ >> + bars = pf_pdev->vpci->sriov->vf_bars; >> + /* Set the BARs addresses and size. */ >> + for ( i = 0; i < PCI_SRIOV_NUM_BARS; i += rc ) >> + { >> + unsigned int idx = sriov_pos + PCI_SRIOV_BAR + i * 4; >> + uint32_t bar; >> + uint64_t addr, size; >> + >> + bar = pci_conf_read32(pf_pdev->sbdf, idx); >> + >> + rc = pci_size_mem_bar(pf_pdev->sbdf, idx, &addr, &size, >> + PCI_BAR_VF | >> + ((i == PCI_SRIOV_NUM_BARS - 1) ? PCI_BAR_LAST >> + : 0)); >> + >> + /* >> + * Update vf_rlen on the PF. According to the spec the size of >> + * the BARs can change if the system page size register is >> + * modified, so always update rlen when enabling VFs. >> + */ >> + pf_pdev->physfn.vf_rlen[i] = size; >> + >> + if ( !size ) >> + { >> + bars[i].type = VPCI_BAR_EMPTY; >> + continue; >> + } >> + >> + bars[i].addr = addr; >> + bars[i].guest_addr = addr; >> + bars[i].size = size; >> + bars[i].prefetchable = bar & PCI_BASE_ADDRESS_MEM_PREFETCH; >> + >> + switch ( rc ) >> + { >> + case 1: >> + bars[i].type = VPCI_BAR_MEM32; >> + break; >> + >> + case 2: >> + bars[i].type = VPCI_BAR_MEM64_LO; >> + bars[i + 1].type = VPCI_BAR_MEM64_HI; >> + break; >> + >> + default: >> + ASSERT_UNREACHABLE(); >> + } >> + } >> + >> + rc = rc > 0 ? 0 : rc; >> + >> + return rc; >> +} >> + >> +static void cf_check control_write(const struct pci_dev *pdev, unsigned int >> reg, >> + uint32_t val, void *data) >> +{ >> + unsigned int sriov_pos = reg - PCI_SRIOV_CTRL; >> + uint16_t control = pci_conf_read16(pdev->sbdf, reg); >> + bool mem_enabled = control & PCI_SRIOV_CTRL_MSE; >> + bool new_mem_enabled = val & PCI_SRIOV_CTRL_MSE; >> + >> + ASSERT(!pdev->info.is_virtfn); >> + >> + if ( new_mem_enabled != mem_enabled ) >> + { >> + struct pci_dev *vf_pdev; >> + if ( new_mem_enabled ) >> + { >> + /* FIXME casting away const-ness to modify vf_rlen */ >> + size_vf_bars((struct pci_dev *)pdev, sriov_pos); >> + >> + list_for_each_entry(vf_pdev, &pdev->vf_list, vf_list) >> + map_vf(vf_pdev, PCI_COMMAND_MEMORY); >> + } >> + else >> + { >> + list_for_each_entry(vf_pdev, &pdev->vf_list, vf_list) >> + map_vf(vf_pdev, 0); >> + } >> + } >> + >> + pci_conf_write16(pdev->sbdf, reg, val); >> +} >> + >> +static int vf_init_header(struct pci_dev *vf_pdev) >> +{ >> + const struct pci_dev *pf_pdev; >> + unsigned int sriov_pos; >> + int rc = 0; >> + uint16_t ctrl; >> + >> + ASSERT(rw_is_write_locked(&vf_pdev->domain->pci_lock)); >> + >> + if ( !vf_pdev->info.is_virtfn ) >> + return 0; >> + >> + pf_pdev = vf_pdev->pf_pdev; >> + ASSERT(pf_pdev); >> + >> + sriov_pos = pci_find_ext_capability(pf_pdev->sbdf, >> PCI_EXT_CAP_ID_SRIOV); >> + ctrl = pci_conf_read16(pf_pdev->sbdf, sriov_pos + PCI_SRIOV_CTRL); >> + >> + if ( (pf_pdev->domain == vf_pdev->domain) && (ctrl & >> PCI_SRIOV_CTRL_MSE) ) >> + { >> + rc = map_vf(vf_pdev, PCI_COMMAND_MEMORY); >> + if ( rc ) >> + return rc; >> + } >> + >> + return rc; >> +} >> + >> +static int init_sriov(struct pci_dev *pdev) >> +{ >> + unsigned int pos; >> + >> + if ( pdev->info.is_virtfn ) >> + return vf_init_header(pdev); >> + >> + pos = pci_find_ext_capability(pdev->sbdf, PCI_EXT_CAP_ID_SRIOV); >> + >> + if ( !pos ) >> + return 0; >> + >> + if ( !is_hardware_domain(pdev->domain) ) >> + { >> + printk(XENLOG_ERR >> + "%pp: SR-IOV configuration unsupported for unpriv %pd\n", >> + &pdev->sbdf, pdev->domain); >> + return 0; >> + } > > Should it instead rely on xsm to know if it the operation is allowed or > not for this domain ? > Hi, Do you have a suggestion on which hook should be used here? Semantically both resource_plug_pci and resource_setup_pci make sense here but I am not sure which one to choose as I am not very familiar with XSM. >> + >> + pdev->vpci->sriov = xzalloc(struct vpci_sriov); >> + if ( !pdev->vpci->sriov ) >> + return -ENOMEM; >> + >> + return vpci_add_register(pdev->vpci, vpci_hw_read16, control_write, >> + pos + PCI_SRIOV_CTRL, 2, NULL); >> +} >> + >> +REGISTER_VPCI_INIT(init_sriov, VPCI_PRIORITY_LOW); >> + >> +/* >> + * Local variables: >> + * mode: C >> + * c-file-style: "BSD" >> + * c-basic-offset: 4 >> + * tab-width: 4 >> + * indent-tabs-mode: nil >> + * End: >> + */ >> diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c >> index 09988f04c2..7af6651831 100644 >> --- a/xen/drivers/vpci/vpci.c >> +++ b/xen/drivers/vpci/vpci.c >> @@ -120,6 +120,7 @@ void vpci_deassign_device(struct pci_dev *pdev) >> for ( i = 0; i < ARRAY_SIZE(pdev->vpci->header.bars); i++ ) >> rangeset_destroy(pdev->vpci->header.bars[i].mem); >> >> + xfree(pdev->vpci->sriov); >> xfree(pdev->vpci->msix); >> xfree(pdev->vpci->msi); >> xfree(pdev->vpci); >> diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h >> index 06f7039f20..9e8dcab17e 100644 >> --- a/xen/include/xen/vpci.h >> +++ b/xen/include/xen/vpci.h >> @@ -138,7 +138,6 @@ struct vpci { >> * upon to know whether BARs are mapped into the guest p2m. >> */ >> bool bars_mapped : 1; >> - /* FIXME: currently there's no support for SR-IOV. */ >> } header; >> >> /* MSI data. */ >> @@ -192,6 +191,12 @@ struct vpci { >> struct vpci_arch_msix_entry arch; >> } entries[]; >> } *msix; >> + >> + struct vpci_sriov { >> + /* PF only */ >> + struct vpci_bar vf_bars[PCI_SRIOV_NUM_BARS]; >> + } *sriov; >> + >> #ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT >> /* Guest SBDF of the device. */ >> #define INVALID_GUEST_SBDF ((pci_sbdf_t){ .sbdf = ~0U }) > > > > Teddy Astie | Vates XCP-ng Developer > > XCP-ng & Xen Orchestra - Vates solutions > > web: https://vates.tech/ > -- Mykyta
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |