[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v5 04/14] vpci: cancel pending map/unmap on vpci removal

To: xen-devel@xxxxxxxxxxxxxxxxxxxx
From: Oleksandr Andrushchenko <andr2000@xxxxxxxxx>
Date: Thu, 25 Nov 2021 13:02:41 +0200
Cc: julien@xxxxxxx, sstabellini@xxxxxxxxxx, oleksandr_tyshchenko@xxxxxxxx, volodymyr_babchuk@xxxxxxxx, Artem_Mygaiev@xxxxxxxx, roger.pau@xxxxxxxxxx, jbeulich@xxxxxxxx, andrew.cooper3@xxxxxxxxxx, george.dunlap@xxxxxxxxxx, paul@xxxxxxx, bertrand.marquis@xxxxxxx, rahul.singh@xxxxxxx, Oleksandr Andrushchenko <oleksandr_andrushchenko@xxxxxxxx>
Delivery-date: Thu, 25 Nov 2021 11:03:02 +0000
List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

From: Oleksandr Andrushchenko <oleksandr_andrushchenko@xxxxxxxx>

When a vPCI is removed for a PCI device it is possible that we have
scheduled a delayed work for map/unmap operations for that device.
For example, the following scenario can illustrate the problem:

pci_physdev_op
   pci_add_device
       init_bars -> modify_bars -> defer_map -> raise_softirq(SCHEDULE_SOFTIRQ)
   iommu_add_device <- FAILS
   vpci_remove_device -> xfree(pdev->vpci)

leave_hypervisor_to_guest
   vpci_process_pending: v->vpci.mem != NULL; v->vpci.pdev->vpci == NULL

For the hardware domain we continue execution as the worse that
could happen is that MMIO mappings are left in place when the
device has been deassigned.

For unprivileged domains that get a failure in the middle of a vPCI
{un}map operation we need to destroy them, as we don't know in which
state the p2m is. This can only happen in vpci_process_pending for
DomUs as they won't be allowed to call pci_add_device.

Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@xxxxxxxx>

---
Cc: Roger Pau Monné <roger.pau@xxxxxxxxxx>
---
Since v4:
 - crash guest domain if map/unmap operation didn't succeed
 - re-work vpci cancel work to cancel work on all vCPUs
 - use new locking scheme with pdev->vpci_lock
New in v4

Fixes: 86dbcf6e30cb ("vpci: cancel pending map/unmap on vpci removal")

---

Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@xxxxxxxx>
---
 xen/drivers/vpci/header.c | 49 ++++++++++++++++++++++++++++++---------
 xen/drivers/vpci/vpci.c   |  2 ++
 xen/include/xen/pci.h     |  5 ++++
 xen/include/xen/vpci.h    |  6 +++++
 4 files changed, 51 insertions(+), 11 deletions(-)

diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
index bd23c0274d48..ba333fb2f9b0 100644
--- a/xen/drivers/vpci/header.c
+++ b/xen/drivers/vpci/header.c
@@ -131,7 +131,13 @@ static void modify_decoding(const struct pci_dev *pdev, 
uint16_t cmd,
 
 bool vpci_process_pending(struct vcpu *v)
 {
-    if ( v->vpci.mem )
+    struct pci_dev *pdev = v->vpci.pdev;
+
+    if ( !pdev )
+        return false;
+
+    spin_lock(&pdev->vpci_lock);
+    if ( !pdev->vpci_cancel_pending && v->vpci.mem )
     {
         struct map_data data = {
             .d = v->domain,
@@ -140,32 +146,53 @@ bool vpci_process_pending(struct vcpu *v)
         int rc = rangeset_consume_ranges(v->vpci.mem, map_range, &data);
 
         if ( rc == -ERESTART )
+        {
+            spin_unlock(&pdev->vpci_lock);
             return true;
+        }
 
-        spin_lock(&v->vpci.pdev->vpci_lock);
-        if ( v->vpci.pdev->vpci )
+        if ( pdev->vpci )
             /* Disable memory decoding unconditionally on failure. */
-            modify_decoding(v->vpci.pdev,
+            modify_decoding(pdev,
                             rc ? v->vpci.cmd & ~PCI_COMMAND_MEMORY : 
v->vpci.cmd,
                             !rc && v->vpci.rom_only);
-        spin_unlock(&v->vpci.pdev->vpci_lock);
 
-        rangeset_destroy(v->vpci.mem);
-        v->vpci.mem = NULL;
         if ( rc )
+        {
             /*
              * FIXME: in case of failure remove the device from the domain.
              * Note that there might still be leftover mappings. While this is
-             * safe for Dom0, for DomUs the domain will likely need to be
-             * killed in order to avoid leaking stale p2m mappings on
-             * failure.
+             * safe for Dom0, for DomUs the domain needs to be killed in order
+             * to avoid leaking stale p2m mappings on failure.
              */
-            vpci_remove_device(v->vpci.pdev);
+            if ( is_hardware_domain(v->domain) )
+                vpci_remove_device_locked(pdev);
+            else
+                domain_crash(v->domain);
+        }
     }
+    spin_unlock(&pdev->vpci_lock);
 
     return false;
 }
 
+void vpci_cancel_pending_locked(struct pci_dev *pdev)
+{
+    struct vcpu *v;
+
+    ASSERT(spin_is_locked(&pdev->vpci_lock));
+
+    /* Cancel any pending work now on all vCPUs. */
+    for_each_vcpu( pdev->domain, v )
+    {
+        if ( v->vpci.mem && (v->vpci.pdev == pdev) )
+        {
+            rangeset_destroy(v->vpci.mem);
+            v->vpci.mem = NULL;
+        }
+    }
+}
+
 static int __init apply_map(struct domain *d, const struct pci_dev *pdev,
                             struct rangeset *mem, uint16_t cmd)
 {
diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
index ceaac4516ff8..37103e207635 100644
--- a/xen/drivers/vpci/vpci.c
+++ b/xen/drivers/vpci/vpci.c
@@ -54,7 +54,9 @@ void vpci_remove_device_locked(struct pci_dev *pdev)
 {
     ASSERT(spin_is_locked(&pdev->vpci_lock));
 
+    pdev->vpci_cancel_pending = true;
     vpci_remove_device_handlers_locked(pdev);
+    vpci_cancel_pending_locked(pdev);
     xfree(pdev->vpci->msix);
     xfree(pdev->vpci->msi);
     xfree(pdev->vpci);
diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
index 3f60d6c6c6dd..52d302ac5f35 100644
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -135,6 +135,11 @@ struct pci_dev {
 
     /* Data for vPCI. */
     spinlock_t vpci_lock;
+    /*
+     * Set if PCI device is being removed now and we need to cancel any
+     * pending map/unmap operations.
+     */
+    bool vpci_cancel_pending;
     struct vpci *vpci;
 };
 
diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
index 8b22bdef11d0..cfff87e5801e 100644
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -57,6 +57,7 @@ uint32_t vpci_hw_read32(const struct pci_dev *pdev, unsigned 
int reg,
  * should not run.
  */
 bool __must_check vpci_process_pending(struct vcpu *v);
+void vpci_cancel_pending_locked(struct pci_dev *pdev);
 
 struct vpci {
     /* List of vPCI handlers for a device. */
@@ -253,6 +254,11 @@ static inline bool __must_check 
vpci_process_pending(struct vcpu *v)
     ASSERT_UNREACHABLE();
     return false;
 }
+
+static inline void vpci_cancel_pending_locked(struct pci_dev *pdev)
+{
+    ASSERT_UNREACHABLE();
+}
 #endif
 
 #endif
-- 
2.25.1

References:
- [PATCH v5 00/14] PCI devices passthrough on Arm, part 3
  - From: Oleksandr Andrushchenko

Prev by Date: [PATCH v5 03/14] vpci: move lock outside of struct vpci
Next by Date: [PATCH v5 05/14] vpci: add hooks for PCI device assign/de-assign
Previous by thread: [PATCH v5 03/14] vpci: move lock outside of struct vpci
Next by thread: [PATCH v5 05/14] vpci: add hooks for PCI device assign/de-assign
Index(es):
- Date
- Thread

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.