[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] vpci/msix: handle accesses adjacent to the MSI-X table


  • To: xen-devel@xxxxxxxxxxxxxxxxxxxx
  • From: Roger Pau Monne <roger.pau@xxxxxxxxxx>
  • Date: Tue, 14 Mar 2023 11:13:31 +0100
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=citrix.com; dmarc=pass action=none header.from=citrix.com; dkim=pass header.d=citrix.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=DlbQb2nbl8GJ3brMpKES1M8hi+FxR0QVkO6xDYCDg4I=; b=Q4SjvpHf96w43fp4kP9aCq9LhofvQgWVgI9/O5zSWvXWIJTvKXNt6qSr9Lh3K6xIekKZfcYXbDZTHml7FYALj4UkqSu3BOecW5aZl7MCtZhh7BO7fL1aOBI+4YEzwei+aS/k9BplKVTIk7Z2pxqgVLFTKjYbLoYYBzfxE8sGd6jq7pJjCdrDN7AyYOKrtUKrpQUe9kLTkecxGO74Rwiu6cG2Ih0Ze/hPFftdEaz0xZSD6vuGVSYgXK9Pd853zoan4iGmsSAkNAnPPQ3JeH5Gzfyema8XIe3LoVptNM+lKoNmfRcTWHPOxBbyPvygM2wyYm2GM+rqbYx3NZzPgEwbPQ==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=SIwQX7ur8uwauvWXQ6DqywbrXMJod8KEOeeaxlWz4InRFGSh+uZlSX0MFlVa1J9nFa+MmhKLeKWvQ4xonOR4+FxW2PFDE0tOc782dT0P1Ratt9pQ2kaerYpqfk5+5nQ3wHf8ELi05itgl9PoxzgfOIKE6fExEZLcZ07C8iIxwF2FDvXkcIhzw9ckbODJk6sddJ1Hn+XReF5KbQLed9x7r32xBXfAhyg7JEc/6M6voCS3oV6QgULofcLnraz3MAbGIYuMkyvQhTfBLk0gsX8r3jkhvm+3tkkSSRD++lY8q+us8DhoXAx0sPeqh1hS18ZjR0tfzo+hvhgbNQsiAVOLeQ==
  • Authentication-results: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=citrix.com;
  • Cc: jbeulich@xxxxxxxx, Roger Pau Monne <roger.pau@xxxxxxxxxx>
  • Delivery-date: Tue, 14 Mar 2023 10:14:07 +0000
  • Ironport-data: A9a23:63QfRKN+DiUdhrDvrR23lsFynXyQoLVcMsEvi/4bfWQNrUoj1GQBm zMaWGuAafrYMTHyfNlxOom08U1VuMLcyYdrHQto+SlhQUwRpJueD7x1DKtS0wC6dZSfER09v 63yTvGacajYm1eF/k/F3oDJ9CU6jufQAOKnUoYoAwgpLSd8UiAtlBl/rOAwh49skLCRDhiE/ Nj/uKUzAnf8s9JPGj9SuvLrRC9H5qyo42tD5wBmPJingXeF/5UrJMNHTU2OByOQrrl8RoaSW +vFxbelyWLVlz9F5gSNy+uTnuUiG9Y+DCDW4pZkc/HKbitq/0Te5p0TJvsEAXq7vh3S9zxHJ HehgrTrIeshFvWkdO3wyHC0GQkmVUFN0OevzXRSLaV/ZqAJGpfh66wGMa04AWEX0rtJMXFVx 9sjEwEyYECstseZ/KywScA506zPLOGzVG8ekldJ6GiBSNoDH9XESaiM4sJE1jAtgMwIBezZe 8cSdTtoalLHfgFLPVAUTpk5mY9EhFGmK2Ee9A3T+PpxujCKpOBy+OGF3N79YNuFSN8Thk+Fj mnH4374ElcRM9n3JT+tqyr037WVwXmjMG4UPJij27ltnQfI/UkWJl4JcXK3ouKjrnfrDrqzL GRRoELCt5Ma5EGtC9XwQRC8iHqFpQIHHcpdFfUg7wOAwbaS5ByWbkAGRDNcbN0ttOctWCcnk FSOmrvU6SdHtbSUTTeR8+mSpDbrYSwNdzZaPGkDUBcP5MTlrMcrlBXTQ91/EamzyNroBTX3x DPMpy8771kOsfM2O2yA1Qivq1qRSlLhF2bZOi2/srqZ0z5E
  • Ironport-hdrordr: A9a23:gsgl7q5tqDgvrpe7ZwPXwfSCI+orL9Y04lQ7vn2ZFiY5TiXIra qTdaogviMc6QxhEE3I/OrtV5VoLkmsl6KdjbNhdItKPzOWwFdAUrsSibcKqgeIc0Oeh41gPM FbAt1D4bXLfC5HZOnBkW6F+r0bsby6Gc6T9JXj5kYoZxprZshbnnNE40ugYwxLbTgDIaB8OI uX58JBqTblUXMLbv6jDn1Ae+TYvdXEmL/vfBZDXnccmUqzpALtzIS/PwmT3x8YXT8K6bA+8V Ldmwi8wqm4qfm0xjLVymeWxZVLn9nKzMdFGaW3+4goAwSprjztSJVqWrWEsjxwiOaz6GwymN 2JmBskN9Qb0QKcQongyyGM5yDQlBIVr1Pyw16RhnXu5ebjQighNsZHjYVFNjPE9ksJprhHof t29lPck6ASIQLLnSz76dSNfQptjFCIrX0rlvNWp2BDULEZdKRaoeUkjRto+a87bXnHAb0cYa 5T5YDnlbJrmGqhHjXkV7xUsZqRtzoIb027q3M5y4GoOght7TpEJnQjtYcidw87heMAorl/lq v52/dT5f9zp4ktHOBA7NNte7rFNoT7LCi8QF66EBDbE6ELUki93KLf8fE84e2wZZwEpaFC46 jpQRdRsGIoZljjEtKDx4Ba6xylehTFYR39jsla64Nlp73gQbaDC1z4dHk+18Okr+4DHMHRQf C6IfttcoHeBHqrEYNExBDhV5JPMnx2arxohv8rH0iOqtnKLYOvvOrdf/bcYKb3FF8fKx7CK2 pGRjn6P8VG4ASxVnv/nRSUR3PhE3aPgq5YAezB/uAJyI8CcpNXqAMck1ik/cTjE0wljpAL
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

The handling of the MSI-X table accesses by Xen requires that any
pages part of the MSI-X table are not mapped into the domain physmap.
As a result, any device registers in the same pages as the start or
the end of the MSIX table is not currently accessible, as the accesses
are just dropped.

Note the spec forbids such placing of registers, as the MSIX and PBA
tables must be 4K isolated from any other registers:

"If a Base Address register that maps address space for the MSI-X
Table or MSI-X PBA also maps other usable address space that is not
associated with MSI-X structures, locations (e.g., for CSRs) used in
the other address space must not share any naturally aligned 4-KB
address range with one where either MSI-X structure resides."

Yet the 'Intel Wi-Fi 6 AX201' device on one of my boxes has registers
in the same page as the MSIX table, and thus won't work on a PVH dom0
without this fix.

In order to cope with the behavior passthrough any accesses that fall
on the same page as the MSIX table (but don't fall between) it to the
underlying hardware.  Such forwarding also takes care of the PBA
accesses in case the PBA is sharing a page with the MSIX table, so it
allows to remove the code doing this handling in msix_{read,write}.

Signed-off-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
---
 xen/drivers/vpci/msix.c | 259 +++++++++++++++++++++++++---------------
 xen/drivers/vpci/vpci.c |   7 +-
 xen/include/xen/vpci.h  |   6 +-
 3 files changed, 175 insertions(+), 97 deletions(-)

diff --git a/xen/drivers/vpci/msix.c b/xen/drivers/vpci/msix.c
index bea0cc7aed..1b59c7fc14 100644
--- a/xen/drivers/vpci/msix.c
+++ b/xen/drivers/vpci/msix.c
@@ -27,6 +27,13 @@
     ((addr) >= vmsix_table_addr(vpci, nr) &&                              \
      (addr) < vmsix_table_addr(vpci, nr) + vmsix_table_size(vpci, nr))
 
+#define VMSIX_ADDR_ADJACENT(addr, vpci, nr)                               \
+    ((PFN_DOWN(addr) == PFN_DOWN(vmsix_table_addr(vpci, nr)) &&           \
+      (addr) < vmsix_table_addr(vpci, nr)) ||                             \
+     (PFN_DOWN(addr) == PFN_DOWN(vmsix_table_addr(vpci, nr) +             \
+                                 vmsix_table_size(vpci, nr) - 1) &&       \
+      (addr) >= vmsix_table_addr(vpci, nr) + vmsix_table_size(vpci, nr)))
+
 static uint32_t cf_check control_read(
     const struct pci_dev *pdev, unsigned int reg, void *data)
 {
@@ -145,11 +152,9 @@ static struct vpci_msix *msix_find(const struct domain *d, 
unsigned long addr)
     list_for_each_entry ( msix, &d->arch.hvm.msix_tables, next )
     {
         const struct vpci_bar *bars = msix->pdev->vpci->header.bars;
-        unsigned int i;
 
-        for ( i = 0; i < ARRAY_SIZE(msix->tables); i++ )
-            if ( bars[msix->tables[i] & PCI_MSIX_BIRMASK].enabled &&
-                 VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, i) )
+        if ( bars[msix->tables[VPCI_MSIX_TABLE] & PCI_MSIX_BIRMASK].enabled &&
+             VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_TABLE) )
                 return msix;
     }
 
@@ -182,36 +187,38 @@ static struct vpci_msix_entry *get_entry(struct vpci_msix 
*msix,
     return &msix->entries[(addr - start) / PCI_MSIX_ENTRY_SIZE];
 }
 
-static void __iomem *get_pba(struct vpci *vpci)
+static void __iomem *get_table(struct vpci *vpci, unsigned int slot)
 {
     struct vpci_msix *msix = vpci->msix;
     /*
-     * PBA will only be unmapped when the device is deassigned, so access it
+     * MSIX will only be unmapped when the device is deassigned, so access it
      * without holding the vpci lock.
      */
-    void __iomem *pba = read_atomic(&msix->pba);
+    void __iomem *table = read_atomic(&msix->table[slot]);
 
-    if ( likely(pba) )
-        return pba;
+    if ( likely(table) )
+        return table;
 
-    pba = ioremap(vmsix_table_addr(vpci, VPCI_MSIX_PBA),
-                  vmsix_table_size(vpci, VPCI_MSIX_PBA));
-    if ( !pba )
-        return read_atomic(&msix->pba);
+    table = ioremap(round_pgdown(vmsix_table_addr(vpci, VPCI_MSIX_TABLE) +
+                                 (slot == VPCI_MSIX_TBL_HEAD ?
+                                  0 : vmsix_table_size(vpci, 
VPCI_MSIX_TABLE))),
+                    PAGE_SIZE);
+    if ( !table )
+        return read_atomic(&msix->table[slot]);
 
     spin_lock(&vpci->lock);
-    if ( !msix->pba )
+    if ( !msix->table[slot] )
     {
-        write_atomic(&msix->pba, pba);
+        write_atomic(&msix->table[slot], table);
         spin_unlock(&vpci->lock);
     }
     else
     {
         spin_unlock(&vpci->lock);
-        iounmap(pba);
+        iounmap(table);
     }
 
-    return read_atomic(&msix->pba);
+    return read_atomic(&msix->table[slot]);
 }
 
 static int cf_check msix_read(
@@ -230,45 +237,6 @@ static int cf_check msix_read(
     if ( !access_allowed(msix->pdev, addr, len) )
         return X86EMUL_OKAY;
 
-    if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
-    {
-        struct vpci *vpci = msix->pdev->vpci;
-        unsigned int idx = addr - vmsix_table_addr(vpci, VPCI_MSIX_PBA);
-        const void __iomem *pba = get_pba(vpci);
-
-        /*
-         * Access to PBA.
-         *
-         * TODO: note that this relies on having the PBA identity mapped to the
-         * guest address space. If this changes the address will need to be
-         * translated.
-         */
-        if ( !pba )
-        {
-            gprintk(XENLOG_WARNING,
-                    "%pp: unable to map MSI-X PBA, report all pending\n",
-                    &msix->pdev->sbdf);
-            return X86EMUL_OKAY;
-        }
-
-        switch ( len )
-        {
-        case 4:
-            *data = readl(pba + idx);
-            break;
-
-        case 8:
-            *data = readq(pba + idx);
-            break;
-
-        default:
-            ASSERT_UNREACHABLE();
-            break;
-        }
-
-        return X86EMUL_OKAY;
-    }
-
     spin_lock(&msix->pdev->vpci->lock);
     entry = get_entry(msix, addr);
     offset = addr & (PCI_MSIX_ENTRY_SIZE - 1);
@@ -317,43 +285,6 @@ static int cf_check msix_write(
     if ( !access_allowed(msix->pdev, addr, len) )
         return X86EMUL_OKAY;
 
-    if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
-    {
-        struct vpci *vpci = msix->pdev->vpci;
-        unsigned int idx = addr - vmsix_table_addr(vpci, VPCI_MSIX_PBA);
-        const void __iomem *pba = get_pba(vpci);
-
-        if ( !is_hardware_domain(d) )
-            /* Ignore writes to PBA for DomUs, it's behavior is undefined. */
-            return X86EMUL_OKAY;
-
-        if ( !pba )
-        {
-            /* Unable to map the PBA, ignore write. */
-            gprintk(XENLOG_WARNING,
-                    "%pp: unable to map MSI-X PBA, write ignored\n",
-                    &msix->pdev->sbdf);
-            return X86EMUL_OKAY;
-        }
-
-        switch ( len )
-        {
-        case 4:
-            writel(data, pba + idx);
-            break;
-
-        case 8:
-            writeq(data, pba + idx);
-            break;
-
-        default:
-            ASSERT_UNREACHABLE();
-            break;
-        }
-
-        return X86EMUL_OKAY;
-    }
-
     spin_lock(&msix->pdev->vpci->lock);
     entry = get_entry(msix, addr);
     offset = addr & (PCI_MSIX_ENTRY_SIZE - 1);
@@ -438,6 +369,145 @@ static const struct hvm_mmio_ops vpci_msix_table_ops = {
     .write = msix_write,
 };
 
+const static struct vpci_msix *adjacent_find(const struct domain *d,
+                                             unsigned long addr)
+{
+    const struct vpci_msix *msix;
+
+    list_for_each_entry ( msix, &d->arch.hvm.msix_tables, next )
+        /*
+         * So far vPCI only traps accesses to the MSIX table, but not the PBA
+         * explicitly, and hence we only need to check for the hole created by
+         * the MSIX table.
+         *
+         * If the PBA table is also trapped, the check here should be expanded
+         * to take it into account.
+         */
+        if ( VMSIX_ADDR_ADJACENT(addr, msix->pdev->vpci, VPCI_MSIX_TABLE) )
+            return msix;
+
+    return NULL;
+}
+
+static int cf_check adjacent_accept(struct vcpu *v, unsigned long addr)
+{
+    return !!adjacent_find(v->domain, addr);
+}
+
+static int cf_check adjacent_read(
+    struct vcpu *v, unsigned long addr, unsigned int len, unsigned long *data)
+{
+    const struct domain *d = v->domain;
+    const struct vpci_msix *msix = adjacent_find(d, addr);
+    const void __iomem *mem;
+    paddr_t msix_tbl;
+    struct vpci *vpci;
+
+    *data = ~0ul;
+
+    if ( !msix )
+        return X86EMUL_RETRY;
+
+    vpci = msix->pdev->vpci;
+    msix_tbl = vmsix_table_addr(vpci, VPCI_MSIX_TABLE);
+
+    if ( addr + len > round_pgup(msix_tbl +
+                                 vmsix_table_size(vpci, VPCI_MSIX_TABLE)) )
+        return X86EMUL_OKAY;
+
+    mem = get_table(vpci,
+                    PFN_DOWN(addr) == PFN_DOWN(msix_tbl) ? VPCI_MSIX_TBL_HEAD
+                                                         : VPCI_MSIX_TBL_TAIL);
+    if ( !mem )
+        return X86EMUL_OKAY;
+
+    switch ( len )
+    {
+    case 1:
+        *data = readb(mem + PAGE_OFFSET(addr));
+        break;
+
+    case 2:
+        *data = readw(mem + PAGE_OFFSET(addr));
+        break;
+
+    case 4:
+        *data = readl(mem + PAGE_OFFSET(addr));
+        break;
+
+    case 8:
+        *data = readq(mem + PAGE_OFFSET(addr));
+        break;
+
+    default:
+        ASSERT_UNREACHABLE();
+    }
+
+    return X86EMUL_OKAY;
+}
+
+static int cf_check adjacent_write(
+    struct vcpu *v, unsigned long addr, unsigned int len, unsigned long data)
+{
+    const struct domain *d = v->domain;
+    const struct vpci_msix *msix = adjacent_find(d, addr);
+    void __iomem *mem;
+    paddr_t msix_tbl;
+    struct vpci *vpci;
+
+    if ( !msix )
+        return X86EMUL_RETRY;
+
+    vpci = msix->pdev->vpci;
+    msix_tbl = vmsix_table_addr(vpci, VPCI_MSIX_TABLE);
+
+    if ( addr + len > round_pgup(msix_tbl +
+                                 vmsix_table_size(vpci, VPCI_MSIX_TABLE)) )
+        return X86EMUL_OKAY;
+
+    if ( (VMSIX_ADDR_IN_RANGE(addr, vpci, VPCI_MSIX_PBA) ||
+          VMSIX_ADDR_IN_RANGE(addr + len - 1, vpci, VPCI_MSIX_PBA)) &&
+         !is_hardware_domain(d) )
+        /* Ignore writes to PBA for DomUs, it's undefined behavior. */
+        return X86EMUL_OKAY;
+
+    mem = get_table(vpci,
+                    PFN_DOWN(addr) == PFN_DOWN(msix_tbl) ? VPCI_MSIX_TBL_HEAD
+                                                         : VPCI_MSIX_TBL_TAIL);
+    if ( !mem )
+        return X86EMUL_OKAY;
+
+    switch ( len )
+    {
+    case 1:
+        writeb(data, mem + PAGE_OFFSET(addr));
+        break;
+
+    case 2:
+        writew(data, mem + PAGE_OFFSET(addr));
+        break;
+
+    case 4:
+        writel(data, mem + PAGE_OFFSET(addr));
+        break;
+
+    case 8:
+        writeq(data, mem + PAGE_OFFSET(addr));
+        break;
+
+    default:
+        ASSERT_UNREACHABLE();
+    }
+
+    return X86EMUL_OKAY;
+}
+
+static const struct hvm_mmio_ops vpci_msix_adj_ops = {
+    .check = adjacent_accept,
+    .read = adjacent_read,
+    .write = adjacent_write,
+};
+
 int vpci_make_msix_hole(const struct pci_dev *pdev)
 {
     struct domain *d = pdev->domain;
@@ -530,7 +600,10 @@ static int cf_check init_msix(struct pci_dev *pdev)
     }
 
     if ( list_empty(&d->arch.hvm.msix_tables) )
+    {
         register_mmio_handler(d, &vpci_msix_table_ops);
+        register_mmio_handler(d, &vpci_msix_adj_ops);
+    }
 
     pdev->vpci->msix = msix;
     list_add(&msix->next, &d->arch.hvm.msix_tables);
diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
index 6d48d496bb..652807a4a4 100644
--- a/xen/drivers/vpci/vpci.c
+++ b/xen/drivers/vpci/vpci.c
@@ -54,9 +54,12 @@ void vpci_remove_device(struct pci_dev *pdev)
     spin_unlock(&pdev->vpci->lock);
     if ( pdev->vpci->msix )
     {
+        unsigned int i;
+
         list_del(&pdev->vpci->msix->next);
-        if ( pdev->vpci->msix->pba )
-            iounmap(pdev->vpci->msix->pba);
+        for ( i = 0; i < ARRAY_SIZE(pdev->vpci->msix->table); i++ )
+            if ( pdev->vpci->msix->table[i] )
+                iounmap(pdev->vpci->msix->table[i]);
     }
     xfree(pdev->vpci->msix);
     xfree(pdev->vpci->msi);
diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
index d8acfeba8a..b1ea312778 100644
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -133,8 +133,10 @@ struct vpci {
         bool enabled         : 1;
         /* Masked? */
         bool masked          : 1;
-        /* PBA map */
-        void __iomem *pba;
+        /* Partial table map. */
+#define VPCI_MSIX_TBL_HEAD 0
+#define VPCI_MSIX_TBL_TAIL 1
+        void __iomem *table[2];
         /* Entries. */
         struct vpci_msix_entry {
             uint64_t addr;
-- 
2.39.0




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.