[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[xen master] pci: introduce per-domain PCI rwlock



commit 8862c7b8b659c9736347dd133c9a15fdefbe689e
Author:     Volodymyr Babchuk <volodymyr_babchuk@xxxxxxxx>
AuthorDate: Thu Dec 21 11:46:32 2023 +0100
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Thu Dec 21 11:46:32 2023 +0100

    pci: introduce per-domain PCI rwlock
    
    Add per-domain d->pci_lock that protects access to
    d->pdev_list. Purpose of this lock is to give guarantees to VPCI code
    that underlying pdev will not disappear under feet. This is a rw-lock,
    but this patch adds only write_lock()s. There will be read_lock()
    users in the next patches.
    
    This lock should be taken in write mode every time d->pdev_list is
    altered. All write accesses also should be protected by pcidevs_lock()
    as well. Idea is that any user that wants read access to the list or
    to the devices stored in the list should use either this new
    d->pci_lock or old pcidevs_lock(). Usage of any of this two locks will
    ensure only that pdev of interest will not disappear from under feet
    and that the pdev still will be assigned to the same domain. Of
    course, any new users should use pcidevs_lock() when it is
    appropriate (e.g. when accessing any other state that is protected by
    the said lock). In case both the newly introduced per-domain rwlock
    and the pcidevs lock is taken, the latter must be acquired first.
    
    Suggested-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
    Suggested-by: Jan Beulich <jbeulich@xxxxxxxx>
    Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@xxxxxxxx>
    Signed-off-by: Stewart Hildebrand <stewart.hildebrand@xxxxxxx>
    Reviewed-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
    Acked-by: Stefano Stabellini <sstabellini@xxxxxxxxxx>
    Acked-by: Jan Beulich <jbeulich@xxxxxxxx>
---
 xen/common/domain.c                         |  1 +
 xen/drivers/passthrough/amd/pci_amd_iommu.c |  9 +++-
 xen/drivers/passthrough/pci.c               | 71 ++++++++++++++++++++++++-----
 xen/drivers/passthrough/vtd/iommu.c         |  9 +++-
 xen/include/xen/sched.h                     | 22 +++++++++
 5 files changed, 99 insertions(+), 13 deletions(-)

diff --git a/xen/common/domain.c b/xen/common/domain.c
index c5954cdb1a..f6f5574996 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -651,6 +651,7 @@ struct domain *domain_create(domid_t domid,
 
 #ifdef CONFIG_HAS_PCI
     INIT_LIST_HEAD(&d->pdev_list);
+    rwlock_init(&d->pci_lock);
 #endif
 
     /* All error paths can depend on the above setup. */
diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c 
b/xen/drivers/passthrough/amd/pci_amd_iommu.c
index 4f556e8a72..f6efd88e36 100644
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -477,8 +477,15 @@ static int cf_check reassign_device(
 
     if ( devfn == pdev->devfn && pdev->domain != target )
     {
-        list_move(&pdev->domain_list, &target->pdev_list);
+        write_lock(&source->pci_lock);
+        list_del(&pdev->domain_list);
+        write_unlock(&source->pci_lock);
+
         pdev->domain = target;
+
+        write_lock(&target->pci_lock);
+        list_add(&pdev->domain_list, &target->pdev_list);
+        write_unlock(&target->pci_lock);
     }
 
     /*
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 28ed8ea817..1439d1ef2b 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -453,7 +453,9 @@ static void __init _pci_hide_device(struct pci_dev *pdev)
     if ( pdev->domain )
         return;
     pdev->domain = dom_xen;
+    write_lock(&dom_xen->pci_lock);
     list_add(&pdev->domain_list, &dom_xen->pdev_list);
+    write_unlock(&dom_xen->pci_lock);
 }
 
 int __init pci_hide_device(unsigned int seg, unsigned int bus,
@@ -746,7 +748,9 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn,
     if ( !pdev->domain )
     {
         pdev->domain = hardware_domain;
+        write_lock(&hardware_domain->pci_lock);
         list_add(&pdev->domain_list, &hardware_domain->pdev_list);
+        write_unlock(&hardware_domain->pci_lock);
 
         /*
          * For devices not discovered by Xen during boot, add vPCI handlers
@@ -756,7 +760,9 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn,
         if ( ret )
         {
             printk(XENLOG_ERR "Setup of vPCI failed: %d\n", ret);
+            write_lock(&hardware_domain->pci_lock);
             list_del(&pdev->domain_list);
+            write_unlock(&hardware_domain->pci_lock);
             pdev->domain = NULL;
             goto out;
         }
@@ -764,7 +770,9 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn,
         if ( ret )
         {
             vpci_remove_device(pdev);
+            write_lock(&hardware_domain->pci_lock);
             list_del(&pdev->domain_list);
+            write_unlock(&hardware_domain->pci_lock);
             pdev->domain = NULL;
             goto out;
         }
@@ -814,7 +822,11 @@ int pci_remove_device(u16 seg, u8 bus, u8 devfn)
             pci_cleanup_msi(pdev);
             ret = iommu_remove_device(pdev);
             if ( pdev->domain )
+            {
+                write_lock(&pdev->domain->pci_lock);
                 list_del(&pdev->domain_list);
+                write_unlock(&pdev->domain->pci_lock);
+            }
             printk(XENLOG_DEBUG "PCI remove device %pp\n", &pdev->sbdf);
             free_pdev(pseg, pdev);
             break;
@@ -885,26 +897,61 @@ static int deassign_device(struct domain *d, uint16_t 
seg, uint8_t bus,
 
 int pci_release_devices(struct domain *d)
 {
-    struct pci_dev *pdev, *tmp;
-    u8 bus, devfn;
-    int ret;
+    int combined_ret;
+    LIST_HEAD(failed_pdevs);
 
     pcidevs_lock();
-    ret = arch_pci_clean_pirqs(d);
-    if ( ret )
+
+    combined_ret = arch_pci_clean_pirqs(d);
+    if ( combined_ret )
     {
         pcidevs_unlock();
-        return ret;
+        return combined_ret;
     }
-    list_for_each_entry_safe ( pdev, tmp, &d->pdev_list, domain_list )
+
+    write_lock(&d->pci_lock);
+
+    while ( !list_empty(&d->pdev_list) )
     {
-        bus = pdev->bus;
-        devfn = pdev->devfn;
-        ret = deassign_device(d, pdev->seg, bus, devfn) ?: ret;
+        struct pci_dev *pdev = list_first_entry(&d->pdev_list,
+                                                struct pci_dev,
+                                                domain_list);
+        uint16_t seg = pdev->seg;
+        uint8_t bus = pdev->bus;
+        uint8_t devfn = pdev->devfn;
+        int ret;
+
+        write_unlock(&d->pci_lock);
+        ret = deassign_device(d, seg, bus, devfn);
+        write_lock(&d->pci_lock);
+        if ( ret )
+        {
+            const struct pci_dev *tmp;
+
+            /*
+             * We need to check if deassign_device() left our pdev in
+             * domain's list. As we dropped the lock, we can't be sure
+             * that list wasn't permutated in some random way, so we
+             * need to traverse the whole list.
+             */
+            for_each_pdev ( d, tmp )
+            {
+                if ( tmp == pdev )
+                {
+                    list_move_tail(&pdev->domain_list, &failed_pdevs);
+                    break;
+                }
+            }
+
+            combined_ret = combined_ret ?: ret;
+        }
     }
+
+    list_splice(&failed_pdevs, &d->pdev_list);
+    write_unlock(&d->pci_lock);
     pcidevs_unlock();
 
-    return ret;
+    return combined_ret;
 }
 
 #define PCI_CLASS_BRIDGE_HOST    0x0600
@@ -1124,7 +1171,9 @@ static int __hwdom_init cf_check _setup_hwdom_pci_devices(
             if ( !pdev->domain )
             {
                 pdev->domain = ctxt->d;
+                write_lock(&ctxt->d->pci_lock);
                 list_add(&pdev->domain_list, &ctxt->d->pdev_list);
+                write_unlock(&ctxt->d->pci_lock);
                 setup_one_hwdom_device(ctxt, pdev);
             }
             else if ( pdev->domain == dom_xen )
diff --git a/xen/drivers/passthrough/vtd/iommu.c 
b/xen/drivers/passthrough/vtd/iommu.c
index bc6181c9f9..99b642f12e 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -2816,8 +2816,15 @@ static int cf_check reassign_device_ownership(
 
     if ( devfn == pdev->devfn && pdev->domain != target )
     {
-        list_move(&pdev->domain_list, &target->pdev_list);
+        write_lock(&source->pci_lock);
+        list_del(&pdev->domain_list);
+        write_unlock(&source->pci_lock);
+
         pdev->domain = target;
+
+        write_lock(&target->pci_lock);
+        list_add(&pdev->domain_list, &target->pdev_list);
+        write_unlock(&target->pci_lock);
     }
 
     if ( !has_arch_pdevs(source) )
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 3609ef88c4..9da91e0e62 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -461,6 +461,28 @@ struct domain
 
 #ifdef CONFIG_HAS_PCI
     struct list_head pdev_list;
+    /*
+     * pci_lock protects access to pdev_list.
+     *
+     * Any user *reading* from pdev_list, or from devices stored in pdev_list,
+     * should hold either pcidevs_lock() or pci_lock in read mode. Optionally,
+     * both locks may be held for reads as long as the locking order is
+     * observed.
+     *
+     * Any user *writing* to pdev_list, or to devices stored in pdev_list,
+     * should hold both pcidevs_lock() and pci_lock in write mode, and observe
+     * the locking order.
+     *
+     * The locking order is:
+     * 1. pcidevs_lock()
+     * 2. d->pci_lock
+     *
+     * Additionally, users of both pci_lock and vpci->lock should observe the
+     * following locking order:
+     * 1. d->pci_lock
+     * 2. pdev->vpci->lock
+     */
+    rwlock_t pci_lock;
 #endif
 
 #ifdef CONFIG_HAS_PASSTHROUGH
--
generated by git-patchbot for /home/xen/git/xen.git#master



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.