[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [RFC PATCH] pci: introduce per-domain PCI rwlock
Add per-domain d->pci_lock that protects access to d->pdev_list. Purpose of this lock is to give guarantees to VPCI code that underlying pdev will not disappear under feet. Later it will also protect pdev->vpci structure and pdev access in modify_bars(). Suggested-by: Roger Pau Monné <roger.pau@xxxxxxxxxx> Suggested-by: Jan Beulich <jbeulich@xxxxxxxx> Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@xxxxxxxx> --- This patch should be part of VPCI series, but I am posting it as a sinle-patch RFC to discuss changes to x86 MM and IOMMU code. I opted to factor out part of the changes from "vpci: introduce per-domain lock to protect vpci structure" commit to ease up review process. --- xen/arch/x86/hvm/hvm.c | 2 + xen/arch/x86/hvm/vmx/vmcs.c | 2 + xen/arch/x86/mm.c | 6 ++ xen/arch/x86/mm/p2m-pod.c | 7 +++ xen/arch/x86/mm/paging.c | 6 ++ xen/common/domain.c | 1 + xen/drivers/passthrough/amd/iommu_cmd.c | 4 +- xen/drivers/passthrough/amd/pci_amd_iommu.c | 15 ++++- xen/drivers/passthrough/pci.c | 70 +++++++++++++++++---- xen/drivers/passthrough/vtd/iommu.c | 19 +++++- xen/include/xen/sched.h | 1 + 11 files changed, 117 insertions(+), 16 deletions(-) diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index a67ef79dc0..089fbe38a7 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -2381,12 +2381,14 @@ int hvm_set_cr0(unsigned long value, bool may_defer) } } + read_lock(&d->pci_lock); if ( ((value ^ old_value) & X86_CR0_CD) && is_iommu_enabled(d) && hvm_funcs.handle_cd && (!rangeset_is_empty(d->iomem_caps) || !rangeset_is_empty(d->arch.ioport_caps) || has_arch_pdevs(d)) ) alternative_vcall(hvm_funcs.handle_cd, v, value); + read_unlock(&d->pci_lock); hvm_update_cr(v, 0, value); diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c index b209563625..88bbcbbd99 100644 --- a/xen/arch/x86/hvm/vmx/vmcs.c +++ b/xen/arch/x86/hvm/vmx/vmcs.c @@ -1889,6 +1889,7 @@ void cf_check vmx_do_resume(void) * 2: execute wbinvd on all dirty pCPUs when guest wbinvd exits. * If VT-d engine can force snooping, we don't need to do these. */ + read_lock(&v->domain->pci_lock); if ( has_arch_pdevs(v->domain) && !iommu_snoop && !cpu_has_wbinvd_exiting ) { @@ -1896,6 +1897,7 @@ void cf_check vmx_do_resume(void) if ( cpu != -1 ) flush_mask(cpumask_of(cpu), FLUSH_CACHE); } + read_unlock(&v->domain->pci_lock); vmx_clear_vmcs(v); vmx_load_vmcs(v); diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index be2b10a391..f1e882a980 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -858,12 +858,15 @@ get_page_from_l1e( return 0; } + read_lock(&l1e_owner->pci_lock); if ( unlikely(l1f & l1_disallow_mask(l1e_owner)) ) { gdprintk(XENLOG_WARNING, "Bad L1 flags %x\n", l1f & l1_disallow_mask(l1e_owner)); + read_unlock(&l1e_owner->pci_lock); return -EINVAL; } + read_unlock(&l1e_owner->pci_lock); valid = mfn_valid(_mfn(mfn)); @@ -2142,12 +2145,15 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e, { struct page_info *page = NULL; + read_lock(&pt_dom->pci_lock); if ( unlikely(l1e_get_flags(nl1e) & l1_disallow_mask(pt_dom)) ) { gdprintk(XENLOG_WARNING, "Bad L1 flags %x\n", l1e_get_flags(nl1e) & l1_disallow_mask(pt_dom)); + read_unlock(&pt_dom->pci_lock); return -EINVAL; } + read_unlock(&pt_dom->pci_lock); /* Translate foreign guest address. */ if ( cmd != MMU_PT_UPDATE_NO_TRANSLATE && diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c index 9969eb45fa..07e0bedad7 100644 --- a/xen/arch/x86/mm/p2m-pod.c +++ b/xen/arch/x86/mm/p2m-pod.c @@ -349,10 +349,12 @@ p2m_pod_set_mem_target(struct domain *d, unsigned long target) ASSERT( pod_target >= p2m->pod.count ); + read_lock(&d->pci_lock); if ( has_arch_pdevs(d) || cache_flush_permitted(d) ) ret = -ENOTEMPTY; else ret = p2m_pod_set_cache_target(p2m, pod_target, 1/*preemptible*/); + read_unlock(&d->pci_lock); out: pod_unlock(p2m); @@ -1401,8 +1403,13 @@ guest_physmap_mark_populate_on_demand(struct domain *d, unsigned long gfn, if ( !paging_mode_translate(d) ) return -EINVAL; + read_lock(&d->pci_lock); if ( has_arch_pdevs(d) || cache_flush_permitted(d) ) + { + read_unlock(&d->pci_lock); return -ENOTEMPTY; + } + read_unlock(&d->pci_lock); do { rc = mark_populate_on_demand(d, gfn, chunk_order); diff --git a/xen/arch/x86/mm/paging.c b/xen/arch/x86/mm/paging.c index 34d833251b..fb8f7ff7cf 100644 --- a/xen/arch/x86/mm/paging.c +++ b/xen/arch/x86/mm/paging.c @@ -205,21 +205,27 @@ static int paging_log_dirty_enable(struct domain *d) { int ret; + read_lock(&d->pci_lock); if ( has_arch_pdevs(d) ) { /* * Refuse to turn on global log-dirty mode * if the domain is sharing the P2M with the IOMMU. */ + read_unlock(&d->pci_lock); return -EINVAL; } if ( paging_mode_log_dirty(d) ) + { + read_unlock(&d->pci_lock); return -EINVAL; + } domain_pause(d); ret = d->arch.paging.log_dirty.ops->enable(d); domain_unpause(d); + read_unlock(&d->pci_lock); return ret; } diff --git a/xen/common/domain.c b/xen/common/domain.c index caaa402637..5d8a8836da 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -645,6 +645,7 @@ struct domain *domain_create(domid_t domid, #ifdef CONFIG_HAS_PCI INIT_LIST_HEAD(&d->pdev_list); + rwlock_init(&d->pci_lock); #endif /* All error paths can depend on the above setup. */ diff --git a/xen/drivers/passthrough/amd/iommu_cmd.c b/xen/drivers/passthrough/amd/iommu_cmd.c index 40ddf366bb..b67aee31f6 100644 --- a/xen/drivers/passthrough/amd/iommu_cmd.c +++ b/xen/drivers/passthrough/amd/iommu_cmd.c @@ -308,11 +308,12 @@ void amd_iommu_flush_iotlb(u8 devfn, const struct pci_dev *pdev, flush_command_buffer(iommu, iommu_dev_iotlb_timeout); } -static void amd_iommu_flush_all_iotlbs(const struct domain *d, daddr_t daddr, +static void amd_iommu_flush_all_iotlbs(struct domain *d, daddr_t daddr, unsigned int order) { struct pci_dev *pdev; + read_lock(&d->pci_lock); for_each_pdev( d, pdev ) { u8 devfn = pdev->devfn; @@ -323,6 +324,7 @@ static void amd_iommu_flush_all_iotlbs(const struct domain *d, daddr_t daddr, } while ( devfn != pdev->devfn && PCI_SLOT(devfn) == PCI_SLOT(pdev->devfn) ); } + read_unlock(&d->pci_lock); } /* Flush iommu cache after p2m changes. */ diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c b/xen/drivers/passthrough/amd/pci_amd_iommu.c index 94e3775506..8541b66a93 100644 --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c @@ -102,6 +102,8 @@ static bool any_pdev_behind_iommu(const struct domain *d, { const struct pci_dev *pdev; + ASSERT(rw_is_locked(&d->pci_lock)); + for_each_pdev ( d, pdev ) { if ( pdev == exclude ) @@ -467,17 +469,24 @@ static int cf_check reassign_device( if ( !QUARANTINE_SKIP(target, pdev) ) { + read_lock(&target->pci_lock); rc = amd_iommu_setup_domain_device(target, iommu, devfn, pdev); if ( rc ) return rc; + read_unlock(&target->pci_lock); } else amd_iommu_disable_domain_device(source, iommu, devfn, pdev); if ( devfn == pdev->devfn && pdev->domain != target ) { - list_move(&pdev->domain_list, &target->pdev_list); - pdev->domain = target; + write_lock(&pdev->domain->pci_lock); + list_del(&pdev->domain_list); + write_unlock(&pdev->domain->pci_lock); + + write_lock(&target->pci_lock); + list_add(&pdev->domain_list, &target->pdev_list); + write_unlock(&target->pci_lock); } /* @@ -628,12 +637,14 @@ static int cf_check amd_iommu_add_device(u8 devfn, struct pci_dev *pdev) fresh_domid = true; } + read_lock(&pdev->domain->pci_lock); ret = amd_iommu_setup_domain_device(pdev->domain, iommu, devfn, pdev); if ( ret && fresh_domid ) { iommu_free_domid(pdev->arch.pseudo_domid, iommu->domid_map); pdev->arch.pseudo_domid = DOMID_INVALID; } + read_unlock(&pdev->domain->pci_lock); return ret; } diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c index 07d1986d33..1831e1b0c0 100644 --- a/xen/drivers/passthrough/pci.c +++ b/xen/drivers/passthrough/pci.c @@ -454,7 +454,9 @@ static void __init _pci_hide_device(struct pci_dev *pdev) if ( pdev->domain ) return; pdev->domain = dom_xen; + write_lock(&dom_xen->pci_lock); list_add(&pdev->domain_list, &dom_xen->pdev_list); + write_unlock(&dom_xen->pci_lock); } int __init pci_hide_device(unsigned int seg, unsigned int bus, @@ -530,7 +532,7 @@ struct pci_dev *pci_get_pdev(const struct domain *d, pci_sbdf_t sbdf) { struct pci_dev *pdev; - ASSERT(d || pcidevs_locked()); + ASSERT((d && rw_is_locked(&d->pci_lock)) || pcidevs_locked()); /* * The hardware domain owns the majority of the devices in the system. @@ -748,7 +750,9 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn, if ( !pdev->domain ) { pdev->domain = hardware_domain; + write_lock(&hardware_domain->pci_lock); list_add(&pdev->domain_list, &hardware_domain->pdev_list); + write_unlock(&hardware_domain->pci_lock); /* * For devices not discovered by Xen during boot, add vPCI handlers @@ -887,26 +891,62 @@ static int deassign_device(struct domain *d, uint16_t seg, uint8_t bus, int pci_release_devices(struct domain *d) { - struct pci_dev *pdev, *tmp; - u8 bus, devfn; - int ret; + int combined_ret; + LIST_HEAD(failed_pdevs); pcidevs_lock(); - ret = arch_pci_clean_pirqs(d); - if ( ret ) + write_lock(&d->pci_lock); + combined_ret = arch_pci_clean_pirqs(d); + if ( combined_ret ) { pcidevs_unlock(); - return ret; + write_unlock(&d->pci_lock); + return combined_ret; } - list_for_each_entry_safe ( pdev, tmp, &d->pdev_list, domain_list ) + + while ( !list_empty(&d->pdev_list) ) { - bus = pdev->bus; - devfn = pdev->devfn; - ret = deassign_device(d, pdev->seg, bus, devfn) ?: ret; + struct pci_dev *pdev = list_first_entry(&d->pdev_list, + struct pci_dev, + domain_list); + uint16_t seg = pdev->seg; + uint8_t bus = pdev->bus; + uint8_t devfn = pdev->devfn; + int ret; + + write_unlock(&d->pci_lock); + ret = deassign_device(d, seg, bus, devfn); + write_lock(&d->pci_lock); + if ( ret ) + { + bool still_present = false; + const struct pci_dev *tmp; + + /* + * We need to check if deassign_device() left our pdev in + * domain's list. As we dropped the lock, we can't be sure + * that list wasn't permutated in some random way, so we + * need to traverse the whole list. + */ + for_each_pdev ( d, tmp ) + { + if ( tmp == pdev ) + { + still_present = true; + break; + } + } + if ( still_present ) + list_move(&pdev->domain_list, &failed_pdevs); + combined_ret = ret; + } } + + list_splice(&failed_pdevs, &d->pdev_list); + write_unlock(&d->pci_lock); pcidevs_unlock(); - return ret; + return combined_ret; } #define PCI_CLASS_BRIDGE_HOST 0x0600 @@ -1125,7 +1165,9 @@ static int __hwdom_init cf_check _setup_hwdom_pci_devices( if ( !pdev->domain ) { pdev->domain = ctxt->d; + write_lock(&ctxt->d->pci_lock); list_add(&pdev->domain_list, &ctxt->d->pdev_list); + write_unlock(&ctxt->d->pci_lock); setup_one_hwdom_device(ctxt, pdev); } else if ( pdev->domain == dom_xen ) @@ -1487,6 +1529,7 @@ static int iommu_get_device_group( return group_id; pcidevs_lock(); + read_lock(&d->pci_lock); for_each_pdev( d, pdev ) { unsigned int b = pdev->bus; @@ -1501,6 +1544,7 @@ static int iommu_get_device_group( sdev_id = iommu_call(ops, get_device_group_id, seg, b, df); if ( sdev_id < 0 ) { + read_unlock(&d->pci_lock); pcidevs_unlock(); return sdev_id; } @@ -1511,6 +1555,7 @@ static int iommu_get_device_group( if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) ) { + read_unlock(&d->pci_lock); pcidevs_unlock(); return -EFAULT; } @@ -1518,6 +1563,7 @@ static int iommu_get_device_group( } } + read_unlock(&d->pci_lock); pcidevs_unlock(); return i; diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c index 0e3062c820..6a36cc18fe 100644 --- a/xen/drivers/passthrough/vtd/iommu.c +++ b/xen/drivers/passthrough/vtd/iommu.c @@ -186,6 +186,8 @@ static bool any_pdev_behind_iommu(const struct domain *d, { const struct pci_dev *pdev; + ASSERT(rw_is_locked(&d->pci_lock)); + for_each_pdev ( d, pdev ) { const struct acpi_drhd_unit *drhd; @@ -2765,6 +2767,7 @@ static int cf_check reassign_device_ownership( if ( !QUARANTINE_SKIP(target, pdev->arch.vtd.pgd_maddr) ) { + read_lock(&target->pci_lock); if ( !has_arch_pdevs(target) ) vmx_pi_hooks_assign(target); @@ -2780,21 +2783,26 @@ static int cf_check reassign_device_ownership( #endif ret = domain_context_mapping(target, devfn, pdev); + read_unlock(&target->pci_lock); if ( !ret && pdev->devfn == devfn && !QUARANTINE_SKIP(source, pdev->arch.vtd.pgd_maddr) ) { const struct acpi_drhd_unit *drhd = acpi_find_matched_drhd_unit(pdev); + read_lock(&source->pci_lock); if ( drhd ) check_cleanup_domid_map(source, pdev, drhd->iommu); + read_unlock(&source->pci_lock); } } else { const struct acpi_drhd_unit *drhd; + read_lock(&source->pci_lock); drhd = domain_context_unmap(source, devfn, pdev); + read_unlock(&source->pci_lock); ret = IS_ERR(drhd) ? PTR_ERR(drhd) : 0; } if ( ret ) @@ -2806,12 +2814,21 @@ static int cf_check reassign_device_ownership( if ( devfn == pdev->devfn && pdev->domain != target ) { - list_move(&pdev->domain_list, &target->pdev_list); + write_lock(&pdev->domain->pci_lock); + list_del(&pdev->domain_list); + write_unlock(&pdev->domain->pci_lock); + + write_lock(&target->pci_lock); + list_add(&pdev->domain_list, &target->pdev_list); + write_unlock(&target->pci_lock); + pdev->domain = target; } + read_lock(&source->pci_lock); if ( !has_arch_pdevs(source) ) vmx_pi_hooks_deassign(source); + read_unlock(&source->pci_lock); /* * If the device belongs to the hardware domain, and it has RMRR, don't diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index 85242a73d3..80dd150bbf 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -460,6 +460,7 @@ struct domain #ifdef CONFIG_HAS_PCI struct list_head pdev_list; + rwlock_t pci_lock; #endif #ifdef CONFIG_HAS_PASSTHROUGH -- 2.41.0
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |