[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 2/3] x86/msi: remove return value from msi_set_mask_bit()





On 11/11/2022 09:44, Jan Beulich wrote:

The idea of the WARN() / BUG_ON() is to
- not leave failed unmasking unrecorded,
- not continue after failure to mask an entry.

Then lets make msi_set_mask_bit() unable to fail with something like this (untested) patch. Would this be acceptable?

David

From 837649a70d44455f4fd98e2eaa46dcf35a56d00a Mon Sep 17 00:00:00 2001
From: David Vrabel <dvrabel@xxxxxxxxxxxx>
Date: Fri, 11 Nov 2022 14:30:16 +0000
Subject: [PATCH] x86: Always enable memory space decodes when using MSI-X

Instead of the numerous (racy) checks for memory space accesses being
enabled before writing the the MSI-X table, force Memory Space Enable
to be set in the Command register if MSI-X is used.

This allows the memory_decoded() function and the associated error
paths to be removed (since it will always return true). In particular,
msi_set_mask_bit() can no longer fail and its return value is removed.

Note that if the PCI device is a virtual function, the relevant
command register is in the corresponding physical function.

Signed-off-by: David Vrabel <dvrabel@xxxxxxxxxxxx>
---
 xen/arch/x86/include/asm/pci.h |   3 +
 xen/arch/x86/msi.c             | 116 +++++++++------------------------
 xen/arch/x86/pci.c             |  39 ++++++++++-
 3 files changed, 71 insertions(+), 87 deletions(-)

diff --git a/xen/arch/x86/include/asm/pci.h b/xen/arch/x86/include/asm/pci.h
index f4a58c8acf..4f59b70959 100644
--- a/xen/arch/x86/include/asm/pci.h
+++ b/xen/arch/x86/include/asm/pci.h
@@ -32,8 +32,11 @@ struct arch_pci_dev {
     domid_t pseudo_domid;
     mfn_t leaf_mfn;
     struct page_list_head pgtables_list;
+    uint16_t host_command;
+    uint16_t guest_command;
 };

+void pci_command_override(struct pci_dev *pdev, uint16_t val);
 int pci_conf_write_intercept(unsigned int seg, unsigned int bdf,
                              unsigned int reg, unsigned int size,
                              uint32_t *data);
diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c
index d0bf63df1d..2f8667aa7b 100644
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -124,27 +124,11 @@ static void msix_put_fixmap(struct arch_msix *msix, int idx)
     spin_unlock(&msix->table_lock);
 }

-static bool memory_decoded(const struct pci_dev *dev)
-{
-    pci_sbdf_t sbdf = dev->sbdf;
-
-    if ( dev->info.is_virtfn )
-    {
-        sbdf.bus = dev->info.physfn.bus;
-        sbdf.devfn = dev->info.physfn.devfn;
-    }
-
-    return pci_conf_read16(sbdf, PCI_COMMAND) & PCI_COMMAND_MEMORY;
-}
-
static bool msix_memory_decoded(const struct pci_dev *dev, unsigned int pos)
 {
     uint16_t control = pci_conf_read16(dev->sbdf, msix_control_reg(pos));

-    if ( !(control & PCI_MSIX_FLAGS_ENABLE) )
-        return false;
-
-    return memory_decoded(dev);
+    return control & PCI_MSIX_FLAGS_ENABLE;
 }

 /*
@@ -314,7 +298,7 @@ int msi_maskable_irq(const struct msi_desc *entry)
            || entry->msi_attrib.maskbit;
 }

-static bool msi_set_mask_bit(struct irq_desc *desc, bool host, bool guest)
+static void msi_set_mask_bit(struct irq_desc *desc, bool host, bool guest)
 {
     struct msi_desc *entry = desc->msi_desc;
     struct pci_dev *pdev;
@@ -354,45 +338,26 @@ static bool msi_set_mask_bit(struct irq_desc *desc, bool host, bool guest)
                              control | (PCI_MSIX_FLAGS_ENABLE |
                                         PCI_MSIX_FLAGS_MASKALL));
         }
-        if ( likely(memory_decoded(pdev)) )
-        {
- writel(flag, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
-            readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);

-            if ( likely(control & PCI_MSIX_FLAGS_ENABLE) )
-                break;
-
-            entry->msi_attrib.host_masked = host;
-            entry->msi_attrib.guest_masked = guest;
+        writel(flag, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
+        readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);

-            flag = true;
-        }
-        else if ( flag && !(control & PCI_MSIX_FLAGS_MASKALL) )
+        if ( unlikely(!(control & PCI_MSIX_FLAGS_ENABLE)) )
         {
-            domid_t domid = pdev->domain->domain_id;
-
-            maskall = true;
-            if ( pdev->msix->warned != domid )
-            {
-                pdev->msix->warned = domid;
-                printk(XENLOG_G_WARNING
- "cannot mask IRQ %d: masking MSI-X on Dom%d's %pp\n",
-                       desc->irq, domid, &pdev->sbdf);
-            }
+            pdev->msix->host_maskall = maskall;
+            if ( maskall || pdev->msix->guest_maskall )
+                control |= PCI_MSIX_FLAGS_MASKALL;
+            pci_conf_write16(pdev->sbdf,
+ msix_control_reg(entry->msi_attrib.pos), control);
         }
-        pdev->msix->host_maskall = maskall;
-        if ( maskall || pdev->msix->guest_maskall )
-            control |= PCI_MSIX_FLAGS_MASKALL;
-        pci_conf_write16(pdev->sbdf,
-                         msix_control_reg(entry->msi_attrib.pos), control);
-        return flag;
+        break;
     default:
-        return 0;
+        ASSERT_UNREACHABLE();
+        break;
     }
+
     entry->msi_attrib.host_masked = host;
     entry->msi_attrib.guest_masked = guest;
-
-    return 1;
 }

 static int msi_get_mask_bit(const struct msi_desc *entry)
@@ -418,16 +383,12 @@ static int msi_get_mask_bit(const struct msi_desc *entry)

 void cf_check mask_msi_irq(struct irq_desc *desc)
 {
-    if ( unlikely(!msi_set_mask_bit(desc, 1,
- desc->msi_desc->msi_attrib.guest_masked)) )
-        BUG_ON(!(desc->status & IRQ_DISABLED));
+    msi_set_mask_bit(desc, 1, desc->msi_desc->msi_attrib.guest_masked);
 }

 void cf_check unmask_msi_irq(struct irq_desc *desc)
 {
-    if ( unlikely(!msi_set_mask_bit(desc, 0,
- desc->msi_desc->msi_attrib.guest_masked)) )
-        WARN();
+    msi_set_mask_bit(desc, 0, desc->msi_desc->msi_attrib.guest_masked);
 }

 void guest_mask_msi_irq(struct irq_desc *desc, bool mask)
@@ -437,15 +398,13 @@ void guest_mask_msi_irq(struct irq_desc *desc, bool mask)

 static unsigned int cf_check startup_msi_irq(struct irq_desc *desc)
 {
- if ( unlikely(!msi_set_mask_bit(desc, 0, !!(desc->status & IRQ_GUEST))) )
-        WARN();
+    msi_set_mask_bit(desc, 0, !!(desc->status & IRQ_GUEST));
     return 0;
 }

 static void cf_check shutdown_msi_irq(struct irq_desc *desc)
 {
-    if ( unlikely(!msi_set_mask_bit(desc, 1, 1)) )
-        BUG_ON(!(desc->status & IRQ_DISABLED));
+    msi_set_mask_bit(desc, 1, 1);
 }

 void cf_check ack_nonmaskable_msi_irq(struct irq_desc *desc)
@@ -785,6 +744,12 @@ static int msix_capability_init(struct pci_dev *dev,

     ASSERT(pcidevs_locked());

+    /*
+     * Force enable access to the MSI-X tables, so access to the
+     * per-vector mask bits always works.
+     */
+    pci_command_override(dev, PCI_COMMAND_MEMORY);
+
     control = pci_conf_read16(dev->sbdf, msix_control_reg(pos));
     /*
* Ensure MSI-X interrupts are masked during setup. Some devices require
@@ -797,13 +762,6 @@ static int msix_capability_init(struct pci_dev *dev,
                      control | (PCI_MSIX_FLAGS_ENABLE |
                                 PCI_MSIX_FLAGS_MASKALL));

-    if ( unlikely(!memory_decoded(dev)) )
-    {
-        pci_conf_write16(dev->sbdf, msix_control_reg(pos),
-                         control & ~PCI_MSIX_FLAGS_ENABLE);
-        return -ENXIO;
-    }
-
     if ( desc )
     {
         entry = alloc_msi_entry(1);
@@ -1122,19 +1080,15 @@ static void __pci_disable_msix(struct msi_desc *entry)

     BUG_ON(list_empty(&dev->msi_list));

-    if ( likely(memory_decoded(dev)) )
-        writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
-    else if ( !(control & PCI_MSIX_FLAGS_MASKALL) )
-    {
- printk(XENLOG_WARNING "cannot disable IRQ %d: masking MSI-X on %pp\n",
-               entry->irq, &dev->sbdf);
-        maskall = true;
-    }
+    writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
+
     dev->msix->host_maskall = maskall;
     if ( maskall || dev->msix->guest_maskall )
         control |= PCI_MSIX_FLAGS_MASKALL;
     pci_conf_write16(dev->sbdf, msix_control_reg(pos), control);

+    pci_command_override(dev, 0);
+
     _pci_cleanup_msix(dev->msix);
 }

@@ -1353,13 +1307,6 @@ int pci_restore_msi_state(struct pci_dev *pdev)
             pci_conf_write16(pdev->sbdf, msix_control_reg(pos),
                              control | (PCI_MSIX_FLAGS_ENABLE |
                                         PCI_MSIX_FLAGS_MASKALL));
-            if ( unlikely(!memory_decoded(pdev)) )
-            {
-                spin_unlock_irqrestore(&desc->lock, flags);
-                pci_conf_write16(pdev->sbdf, msix_control_reg(pos),
-                                 control & ~PCI_MSIX_FLAGS_ENABLE);
-                return -ENXIO;
-            }
         }
         type = entry->msi_attrib.type;

@@ -1368,10 +1315,9 @@ int pci_restore_msi_state(struct pci_dev *pdev)

         for ( i = 0; ; )
         {
-            if ( unlikely(!msi_set_mask_bit(desc,
- entry[i].msi_attrib.host_masked, - entry[i].msi_attrib.guest_masked)) )
-                BUG();
+            msi_set_mask_bit(desc,
+                             entry[i].msi_attrib.host_masked,
+                             entry[i].msi_attrib.guest_masked);

             if ( !--nr )
                 break;
diff --git a/xen/arch/x86/pci.c b/xen/arch/x86/pci.c
index 97b792e578..0c4b49f042 100644
--- a/xen/arch/x86/pci.c
+++ b/xen/arch/x86/pci.c
@@ -69,6 +69,24 @@ void pci_conf_write(uint32_t cf8, uint8_t offset, uint8_t bytes, uint32_t data)
     spin_unlock_irqrestore(&pci_config_lock, flags);
 }

+void pci_command_override(struct pci_dev *pdev, uint16_t val)
+{
+    pci_sbdf_t sbdf = pdev->sbdf;
+
+    ASSERT(pcidevs_locked());
+
+    if ( pdev->info.is_virtfn )
+    {
+        sbdf.bus = pdev->info.physfn.bus;
+        sbdf.devfn = pdev->info.physfn.devfn;
+
+        pdev = pci_get_pdev(NULL, sbdf);
+    }
+
+    pdev->arch.host_command = val;
+ pci_conf_write16(sbdf, PCI_COMMAND, pdev->arch.host_command | pdev->arch.guest_command);
+}
+
 int pci_conf_write_intercept(unsigned int seg, unsigned int bdf,
                              unsigned int reg, unsigned int size,
                              uint32_t *data)
@@ -85,14 +103,31 @@ int pci_conf_write_intercept(unsigned int seg, unsigned int bdf,
      * Avoid expensive operations when no hook is going to do anything
      * for the access anyway.
      */
-    if ( reg < 64 || reg >= 256 )
+    if ( reg != PCI_COMMAND && (reg < 64 || reg >= 256) )
         return 0;

     pcidevs_lock();

     pdev = pci_get_pdev(NULL, PCI_SBDF(seg, bdf));
     if ( pdev )
-        rc = pci_msi_conf_write_intercept(pdev, reg, size, data);
+    {
+        switch ( reg )
+        {
+        case PCI_COMMAND:
+            if ( size == 2 )
+            {
+                pdev->arch.guest_command = *data;
+                *data |= pdev->arch.host_command;
+            }
+            else
+                rc = -EACCESS;
+            break;
+
+        default:
+            rc = pci_msi_conf_write_intercept(pdev, reg, size, data);
+            break;
+        }
+    }

     pcidevs_unlock();

--
2.37.1




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.