[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH v3] x86/msi: fix locking for SR-IOV devices
In commit 4f78438b45e2 ("vpci: use per-domain PCI lock to protect vpci structure") a lock moved from allocate_and_map_msi_pirq() to the caller and changed from pcidevs_lock() to read_lock(&d->pci_lock). However, one call path wasn't updated to reflect the change, leading to a failed assertion observed under the following conditions: * PV dom0 * Debug build (debug=y) of Xen * There is an SR-IOV device in the system with one or more VFs enabled * Dom0 has loaded the driver for the VF and enabled MSI-X (XEN) Assertion 'd || pcidevs_locked()' failed at drivers/passthrough/pci.c:535 (XEN) ----[ Xen-4.20-unstable x86_64 debug=y Not tainted ]---- ... (XEN) Xen call trace: (XEN) [<ffff82d040284da8>] R pci_get_pdev+0x4c/0xab (XEN) [<ffff82d040344f5c>] F arch/x86/msi.c#read_pci_mem_bar+0x58/0x272 (XEN) [<ffff82d04034530e>] F arch/x86/msi.c#msix_capability_init+0x198/0x755 (XEN) [<ffff82d040345dad>] F arch/x86/msi.c#__pci_enable_msix+0x82/0xe8 (XEN) [<ffff82d0403463e5>] F pci_enable_msi+0x3f/0x78 (XEN) [<ffff82d04034be2b>] F map_domain_pirq+0x2a4/0x6dc (XEN) [<ffff82d04034d4d5>] F allocate_and_map_msi_pirq+0x103/0x262 (XEN) [<ffff82d04035da5d>] F physdev_map_pirq+0x210/0x259 (XEN) [<ffff82d04035e798>] F do_physdev_op+0x9c3/0x1454 (XEN) [<ffff82d040329475>] F pv_hypercall+0x5ac/0x6af (XEN) [<ffff82d0402012d3>] F lstar_enter+0x143/0x150 In read_pci_mem_bar(), the VF obtains the struct pci_dev pointer for its associated PF to access the vf_rlen array. This array is initialized in pci_add_device() and is only populated in the associated PF's struct pci_dev. Add a link from the VF's struct pci_dev to the associated PF struct pci_dev, ensuring the PF's struct doesn't get deallocated until all its VFs have gone away. Access the vf_rlen array via the new link to the PF, and remove the troublesome call to pci_get_pdev(). Add a call to pci_get_pdev() inside the pcidevs_lock()-locked section of pci_add_device() to set up the link from VF to PF. In case the new pci_add_device() invocation fails to find the associated PF (returning NULL), we are no worse off than before: read_pci_mem_bar() will still return 0 in that case. Note that currently the only way for Xen to know if a device is a VF is if the toolstack tells Xen about it. Using PHYSDEVOP_manage_pci_add for a VF is not a case that Xen handles. Fixes: 4f78438b45e2 ("vpci: use per-domain PCI lock to protect vpci structure") Reported-by: Teddy Astie <teddy.astie@xxxxxxxxxx> Signed-off-by: Stewart Hildebrand <stewart.hildebrand@xxxxxxx> --- Candidate for backport to 4.19 v2->v3: * link from VF to PF's struct pci_dev * v1->v2: * remove call to pci_get_pdev() --- xen/arch/x86/msi.c | 34 ++++++++++++++------------- xen/drivers/passthrough/pci.c | 44 +++++++++++++++++++++++++++++++---- xen/include/xen/pci.h | 12 +++++++++- 3 files changed, 69 insertions(+), 21 deletions(-) diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c index 0c97fbb3fc03..1e3a0c450224 100644 --- a/xen/arch/x86/msi.c +++ b/xen/arch/x86/msi.c @@ -662,34 +662,32 @@ static int msi_capability_init(struct pci_dev *dev, return 0; } -static u64 read_pci_mem_bar(u16 seg, u8 bus, u8 slot, u8 func, u8 bir, int vf) +static u64 read_pci_mem_bar(struct pf_info *pf_info, u16 seg, u8 bus, u8 slot, + u8 func, u8 bir, int vf) { + pci_sbdf_t sbdf = PCI_SBDF(seg, bus, slot, func); u8 limit; u32 addr, base = PCI_BASE_ADDRESS_0; u64 disp = 0; if ( vf >= 0 ) { - struct pci_dev *pdev = pci_get_pdev(NULL, - PCI_SBDF(seg, bus, slot, func)); unsigned int pos; uint16_t ctrl, num_vf, offset, stride; - if ( !pdev ) - return 0; - - pos = pci_find_ext_capability(pdev->sbdf, PCI_EXT_CAP_ID_SRIOV); - ctrl = pci_conf_read16(pdev->sbdf, pos + PCI_SRIOV_CTRL); - num_vf = pci_conf_read16(pdev->sbdf, pos + PCI_SRIOV_NUM_VF); - offset = pci_conf_read16(pdev->sbdf, pos + PCI_SRIOV_VF_OFFSET); - stride = pci_conf_read16(pdev->sbdf, pos + PCI_SRIOV_VF_STRIDE); + pos = pci_find_ext_capability(sbdf, PCI_EXT_CAP_ID_SRIOV); + ctrl = pci_conf_read16(sbdf, pos + PCI_SRIOV_CTRL); + num_vf = pci_conf_read16(sbdf, pos + PCI_SRIOV_NUM_VF); + offset = pci_conf_read16(sbdf, pos + PCI_SRIOV_VF_OFFSET); + stride = pci_conf_read16(sbdf, pos + PCI_SRIOV_VF_STRIDE); if ( !pos || !(ctrl & PCI_SRIOV_CTRL_VFE) || !(ctrl & PCI_SRIOV_CTRL_MSE) || !num_vf || !offset || (num_vf > 1 && !stride) || bir >= PCI_SRIOV_NUM_BARS || - !pdev->vf_rlen[bir] ) + !pf_info || + !pf_info->vf_rlen[bir] ) return 0; base = pos + PCI_SRIOV_BAR; vf -= PCI_BDF(bus, slot, func) + offset; @@ -703,8 +701,8 @@ static u64 read_pci_mem_bar(u16 seg, u8 bus, u8 slot, u8 func, u8 bir, int vf) } if ( vf >= num_vf ) return 0; - BUILD_BUG_ON(ARRAY_SIZE(pdev->vf_rlen) != PCI_SRIOV_NUM_BARS); - disp = vf * pdev->vf_rlen[bir]; + BUILD_BUG_ON(ARRAY_SIZE(pf_info->vf_rlen) != PCI_SRIOV_NUM_BARS); + disp = vf * pf_info->vf_rlen[bir]; limit = PCI_SRIOV_NUM_BARS; } else switch ( pci_conf_read8(PCI_SBDF(seg, bus, slot, func), @@ -813,6 +811,7 @@ static int msix_capability_init(struct pci_dev *dev, int vf; paddr_t pba_paddr; unsigned int pba_offset; + struct pf_info *pf_info = NULL; if ( !dev->info.is_virtfn ) { @@ -827,9 +826,12 @@ static int msix_capability_init(struct pci_dev *dev, pslot = PCI_SLOT(dev->info.physfn.devfn); pfunc = PCI_FUNC(dev->info.physfn.devfn); vf = dev->sbdf.bdf; + if ( dev->virtfn.pf_pdev ) + pf_info = &dev->virtfn.pf_pdev->physfn; } - table_paddr = read_pci_mem_bar(seg, pbus, pslot, pfunc, bir, vf); + table_paddr = read_pci_mem_bar(pf_info, seg, pbus, pslot, pfunc, bir, + vf); WARN_ON(msi && msi->table_base != table_paddr); if ( !table_paddr ) { @@ -852,7 +854,7 @@ static int msix_capability_init(struct pci_dev *dev, pba_offset = pci_conf_read32(dev->sbdf, msix_pba_offset_reg(pos)); bir = (u8)(pba_offset & PCI_MSIX_BIRMASK); - pba_paddr = read_pci_mem_bar(seg, pbus, pslot, pfunc, bir, vf); + pba_paddr = read_pci_mem_bar(pf_info, seg, pbus, pslot, pfunc, bir, vf); WARN_ON(!pba_paddr); pba_paddr += pba_offset & ~PCI_MSIX_BIRMASK; diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c index 5a446d3dcee0..1294836bca44 100644 --- a/xen/drivers/passthrough/pci.c +++ b/xen/drivers/passthrough/pci.c @@ -341,6 +341,8 @@ static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn) list_add(&pdev->alldevs_list, &pseg->alldevs_list); + INIT_LIST_HEAD(&pdev->physfn.vf_list); + /* update bus2bridge */ switch ( pdev->type = pdev_type(pseg->nr, bus, devfn) ) { @@ -446,7 +448,27 @@ static void free_pdev(struct pci_seg *pseg, struct pci_dev *pdev) list_del(&pdev->alldevs_list); pdev_msi_deinit(pdev); - xfree(pdev); + + if ( pdev->info.is_virtfn ) + { + struct pci_dev *pf_pdev = pdev->virtfn.pf_pdev; + + if ( pf_pdev ) + { + list_del(&pdev->virtfn.next); + if ( pf_pdev->physfn.dealloc_pending && + list_empty(&pf_pdev->physfn.vf_list) ) + xfree(pf_pdev); + } + xfree(pdev); + } + else + { + if ( list_empty(&pdev->physfn.vf_list) ) + xfree(pdev); + else + pdev->physfn.dealloc_pending = true; + } } static void __init _pci_hide_device(struct pci_dev *pdev) @@ -700,10 +722,22 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn, * extended function. */ if ( pdev->info.is_virtfn ) + { + struct pci_dev *pf_pdev; + pdev->info.is_extfn = pf_is_extfn; + pf_pdev = pci_get_pdev(NULL, + PCI_SBDF(seg, info->physfn.bus, + info->physfn.devfn)); + if ( pf_pdev ) + { + pdev->virtfn.pf_pdev = pf_pdev; + list_add(&pdev->virtfn.next, &pf_pdev->physfn.vf_list); + } + } } - if ( !pdev->info.is_virtfn && !pdev->vf_rlen[0] ) + if ( !pdev->info.is_virtfn && !pdev->physfn.vf_rlen[0] ) { unsigned int pos = pci_find_ext_capability(pdev->sbdf, PCI_EXT_CAP_ID_SRIOV); @@ -715,7 +749,9 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn, { unsigned int i; - BUILD_BUG_ON(ARRAY_SIZE(pdev->vf_rlen) != PCI_SRIOV_NUM_BARS); + BUILD_BUG_ON(ARRAY_SIZE(pdev->physfn.vf_rlen) != + PCI_SRIOV_NUM_BARS); + for ( i = 0; i < PCI_SRIOV_NUM_BARS; ) { unsigned int idx = pos + PCI_SRIOV_BAR + i * 4; @@ -730,7 +766,7 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn, continue; } ret = pci_size_mem_bar(pdev->sbdf, idx, NULL, - &pdev->vf_rlen[i], + &pdev->physfn.vf_rlen[i], PCI_BAR_VF | ((i == PCI_SRIOV_NUM_BARS - 1) ? PCI_BAR_LAST : 0)); diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h index 63e49f0117e9..5bb6fef3934b 100644 --- a/xen/include/xen/pci.h +++ b/xen/include/xen/pci.h @@ -150,7 +150,17 @@ struct pci_dev { unsigned int count; #define PT_FAULT_THRESHOLD 10 } fault; - u64 vf_rlen[6]; + struct pf_info { + /* Only populated for PFs (info.is_virtfn == false) */ + uint64_t vf_rlen[PCI_SRIOV_NUM_BARS]; + struct list_head vf_list; /* List head */ + bool dealloc_pending; + } physfn; + struct { + /* Only populated for VFs (info.is_virtfn == true) */ + struct pci_dev *pf_pdev; /* Link from VF to PF */ + struct list_head next; /* Entry in vf_list */ + } virtfn; /* Data for vPCI. */ struct vpci *vpci; base-commit: d61248b0fa2cdfc0be1d806c43d1b211a72b5d49 -- 2.46.0
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |