[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v8 01/13] pci: introduce per-domain PCI rwlock


  • To: "xen-devel@xxxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxxx>
  • From: Volodymyr Babchuk <Volodymyr_Babchuk@xxxxxxxx>
  • Date: Thu, 20 Jul 2023 00:32:31 +0000
  • Accept-language: en-US
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=epam.com; dmarc=pass action=none header.from=epam.com; dkim=pass header.d=epam.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=iKqHtyDUDA8wFxvQepqDFxbxA7jnJx0SzIFiYOphyxM=; b=eCqwV3mcykF2KIDkepRT2dKaSU+GWKBjiRkSDG8eX/QZbputTmrhKjDG1L3zeOUeMku97chKWeZSuSF7yCoKuMsGFT5RiGLqI2GUrvQfqNaNsjjCiwJXp5/lZ9sboB468v8Nu/NqyOkggViNMFpLKncuZ1JMKI3kHuSLLcuXBNBQT2V3mATXVLFFHTCO0lTWtXsMIC4kfBd2L7DKddfQUYe9yBk1kz8cJPCZ4lDA4Z68OtB6mxBj5VDkKn5DaztGx0xL6MZj6ocPh8jzcNCxoJjo1rlkO2+q7LY4uLnaX0thVUNBHGmvxjjJ/GXoze1EGI9QMTFK4EmTnIvmP00Bew==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=jwMuXJV4hDcgX76yzhHxDh+sOKtQ+0l0jRTb4FOfd4dDit1OA239Nnfnp/Xzf+uqHI/eQMq6UghXSpj6lDxhfqPuPPxF7VvODdudWWTq76HxmjGrOvhKQezbZpI8UEzcgQ3oF5lKoxTE9Tjb6AUvmJiZc2vrOgh5RDVGjeLofur903FeLWDD0y7dzbx8Cfeqv16H6eHqdap127mDHH8d6cA9c/4zMlLLD3UW2Vv1DCNWRPv1tLDVD4yMyhPEudv8+LhAYFSSttwHbEcq0P9evv7h4Bm8tZdQzu09cKkPMCrwbRc/UvYUDP+QwBUUI660cAm1A9PNdHi+4qbynshD/Q==
  • Cc: Volodymyr Babchuk <Volodymyr_Babchuk@xxxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>, Jan Beulich <jbeulich@xxxxxxxx>
  • Delivery-date: Thu, 20 Jul 2023 00:32:50 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>
  • Thread-index: AQHZuqGr3g/gocvkCU2kdSqq613AoQ==
  • Thread-topic: [PATCH v8 01/13] pci: introduce per-domain PCI rwlock

Add per-domain d->pci_lock that protects access to
d->pdev_list. Purpose of this lock is to give guarantees to VPCI code
that underlying pdev will not disappear under feet. This is a rw-lock,
but this patch adds only write_lock()s. There will be read_lock()
users in the next patches.

This lock should be taken in write mode every time d->pdev_list is
altered. This covers both accesses to d->pdev_list and accesses to
pdev->domain_list fields. All write accesses also should be protected
by pcidevs_lock() as well. Idea is that any user that wants read
access to the list or to the devices stored in the list should use
either this new d->pci_lock or old pcidevs_lock(). Usage of any of
this two locks will ensure only that pdev of interest will not
disappear from under feet and that the pdev still will be assigned to
the same domain. Of course, any new users should use pcidevs_lock()
when it is appropriate (e.g. when accessing any other state that is
protected by the said lock).

Any write access to pdev->domain_list should be protected by both
pcidevs_lock() and d->pci_lock in the write mode.

Suggested-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
Suggested-by: Jan Beulich <jbeulich@xxxxxxxx>
Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@xxxxxxxx>

---

Changes in v8:
 - New patch

Changes in v8 vs RFC:
 - Removed all read_locks after discussion with Roger in #xendevel
 - pci_release_devices() now returns the first error code
 - extended commit message
 - added missing lock in pci_remove_device()
 - extended locked region in pci_add_device() to protect list_del() calls
---
 xen/common/domain.c                         |  1 +
 xen/drivers/passthrough/amd/pci_amd_iommu.c |  9 ++-
 xen/drivers/passthrough/pci.c               | 68 +++++++++++++++++----
 xen/drivers/passthrough/vtd/iommu.c         |  9 ++-
 xen/include/xen/sched.h                     |  1 +
 5 files changed, 74 insertions(+), 14 deletions(-)

diff --git a/xen/common/domain.c b/xen/common/domain.c
index caaa402637..5d8a8836da 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -645,6 +645,7 @@ struct domain *domain_create(domid_t domid,
 
 #ifdef CONFIG_HAS_PCI
     INIT_LIST_HEAD(&d->pdev_list);
+    rwlock_init(&d->pci_lock);
 #endif
 
     /* All error paths can depend on the above setup. */
diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c 
b/xen/drivers/passthrough/amd/pci_amd_iommu.c
index 94e3775506..e2f2e2e950 100644
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -476,8 +476,13 @@ static int cf_check reassign_device(
 
     if ( devfn == pdev->devfn && pdev->domain != target )
     {
-        list_move(&pdev->domain_list, &target->pdev_list);
-        pdev->domain = target;
+        write_lock(&pdev->domain->pci_lock);
+        list_del(&pdev->domain_list);
+        write_unlock(&pdev->domain->pci_lock);
+
+        write_lock(&target->pci_lock);
+        list_add(&pdev->domain_list, &target->pdev_list);
+        write_unlock(&target->pci_lock);
     }
 
     /*
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 95846e84f2..5b4632ead2 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -454,7 +454,9 @@ static void __init _pci_hide_device(struct pci_dev *pdev)
     if ( pdev->domain )
         return;
     pdev->domain = dom_xen;
+    write_lock(&dom_xen->pci_lock);
     list_add(&pdev->domain_list, &dom_xen->pdev_list);
+    write_unlock(&dom_xen->pci_lock);
 }
 
 int __init pci_hide_device(unsigned int seg, unsigned int bus,
@@ -747,6 +749,7 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn,
     ret = 0;
     if ( !pdev->domain )
     {
+        write_lock(&hardware_domain->pci_lock);
         pdev->domain = hardware_domain;
         list_add(&pdev->domain_list, &hardware_domain->pdev_list);
 
@@ -760,6 +763,7 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn,
             printk(XENLOG_ERR "Setup of vPCI failed: %d\n", ret);
             list_del(&pdev->domain_list);
             pdev->domain = NULL;
+            write_unlock(&hardware_domain->pci_lock);
             goto out;
         }
         ret = iommu_add_device(pdev);
@@ -768,8 +772,10 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn,
             vpci_remove_device(pdev);
             list_del(&pdev->domain_list);
             pdev->domain = NULL;
+            write_unlock(&hardware_domain->pci_lock);
             goto out;
         }
+        write_unlock(&hardware_domain->pci_lock);
     }
     else
         iommu_enable_device(pdev);
@@ -812,11 +818,13 @@ int pci_remove_device(u16 seg, u8 bus, u8 devfn)
     list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
         if ( pdev->bus == bus && pdev->devfn == devfn )
         {
+            write_lock(&pdev->domain->pci_lock);
             vpci_remove_device(pdev);
             pci_cleanup_msi(pdev);
             ret = iommu_remove_device(pdev);
             if ( pdev->domain )
                 list_del(&pdev->domain_list);
+            write_unlock(&pdev->domain->pci_lock);
             printk(XENLOG_DEBUG "PCI remove device %pp\n", &pdev->sbdf);
             free_pdev(pseg, pdev);
             break;
@@ -887,26 +895,62 @@ static int deassign_device(struct domain *d, uint16_t 
seg, uint8_t bus,
 
 int pci_release_devices(struct domain *d)
 {
-    struct pci_dev *pdev, *tmp;
-    u8 bus, devfn;
-    int ret;
+    int combined_ret;
+    LIST_HEAD(failed_pdevs);
 
     pcidevs_lock();
-    ret = arch_pci_clean_pirqs(d);
-    if ( ret )
+    write_lock(&d->pci_lock);
+    combined_ret = arch_pci_clean_pirqs(d);
+    if ( combined_ret )
     {
         pcidevs_unlock();
-        return ret;
+        write_unlock(&d->pci_lock);
+        return combined_ret;
     }
-    list_for_each_entry_safe ( pdev, tmp, &d->pdev_list, domain_list )
+
+    while ( !list_empty(&d->pdev_list) )
     {
-        bus = pdev->bus;
-        devfn = pdev->devfn;
-        ret = deassign_device(d, pdev->seg, bus, devfn) ?: ret;
+        struct pci_dev *pdev = list_first_entry(&d->pdev_list,
+                                                struct pci_dev,
+                                                domain_list);
+        uint16_t seg = pdev->seg;
+        uint8_t bus = pdev->bus;
+        uint8_t devfn = pdev->devfn;
+        int ret;
+
+        write_unlock(&d->pci_lock);
+        ret = deassign_device(d, seg, bus, devfn);
+        write_lock(&d->pci_lock);
+        if ( ret )
+        {
+            bool still_present = false;
+            const struct pci_dev *tmp;
+
+            /*
+             * We need to check if deassign_device() left our pdev in
+             * domain's list. As we dropped the lock, we can't be sure
+             * that list wasn't permutated in some random way, so we
+             * need to traverse the whole list.
+             */
+            for_each_pdev ( d, tmp )
+            {
+                if ( tmp == pdev )
+                {
+                    still_present = true;
+                    break;
+                }
+            }
+            if ( still_present )
+                list_move(&pdev->domain_list, &failed_pdevs);
+            combined_ret = combined_ret?:ret;
+        }
     }
+
+    list_splice(&failed_pdevs, &d->pdev_list);
+    write_unlock(&d->pci_lock);
     pcidevs_unlock();
 
-    return ret;
+    return combined_ret;
 }
 
 #define PCI_CLASS_BRIDGE_HOST    0x0600
@@ -1125,7 +1169,9 @@ static int __hwdom_init cf_check _setup_hwdom_pci_devices(
             if ( !pdev->domain )
             {
                 pdev->domain = ctxt->d;
+                write_lock(&ctxt->d->pci_lock);
                 list_add(&pdev->domain_list, &ctxt->d->pdev_list);
+                write_unlock(&ctxt->d->pci_lock);
                 setup_one_hwdom_device(ctxt, pdev);
             }
             else if ( pdev->domain == dom_xen )
diff --git a/xen/drivers/passthrough/vtd/iommu.c 
b/xen/drivers/passthrough/vtd/iommu.c
index 0e3062c820..55ee3f110d 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -2806,7 +2806,14 @@ static int cf_check reassign_device_ownership(
 
     if ( devfn == pdev->devfn && pdev->domain != target )
     {
-        list_move(&pdev->domain_list, &target->pdev_list);
+        write_lock(&pdev->domain->pci_lock);
+        list_del(&pdev->domain_list);
+        write_unlock(&pdev->domain->pci_lock);
+
+        write_lock(&target->pci_lock);
+        list_add(&pdev->domain_list, &target->pdev_list);
+        write_unlock(&target->pci_lock);
+
         pdev->domain = target;
     }
 
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 85242a73d3..80dd150bbf 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -460,6 +460,7 @@ struct domain
 
 #ifdef CONFIG_HAS_PCI
     struct list_head pdev_list;
+    rwlock_t pci_lock;
 #endif
 
 #ifdef CONFIG_HAS_PASSTHROUGH
-- 
2.41.0

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.