[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[xen staging] x86/hvm: Allow access to registers on the same page as MSI-X table



commit b2cd07a0447bfa25e96ae13e190225b61a3670cb
Author:     Marek Marczykowski-Górecki <marmarek@xxxxxxxxxxxxxxxxxxxxxx>
AuthorDate: Fri May 10 05:53:22 2024 +0200
Commit:     Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
CommitDate: Sat May 11 00:13:43 2024 +0100

    x86/hvm: Allow access to registers on the same page as MSI-X table
    
    Some devices (notably Intel Wifi 6 AX210 card) keep auxiliary registers
    on the same page as MSI-X table. Device model (especially one in
    stubdomain) cannot really handle those, as direct writes to that page is
    refused (page is on the mmio_ro_ranges list). Instead, extend
    msixtbl_mmio_ops to handle such accesses too.
    
    Doing this, requires correlating read/write location with guest
    MSI-X table address. Since QEMU doesn't map MSI-X table to the guest,
    it requires msixtbl_entry->gtable, which is HVM-only. Similar feature
    for PV would need to be done separately.
    
    This will be also used to read Pending Bit Array, if it lives on the same
    page, making QEMU not needing /dev/mem access at all (especially helpful
    with lockdown enabled in dom0). If PBA lives on another page, QEMU will
    map it to the guest directly.
    If PBA lives on the same page, discard writes and log a message.
    Technically, writes outside of PBA could be allowed, but at this moment
    the precise location of PBA isn't saved, and also no known device abuses
    the spec in this way (at least yet).
    
    To access those registers, msixtbl_mmio_ops need the relevant page
    mapped. MSI handling already has infrastructure for that, using fixmap,
    so try to map first/last page of the MSI-X table (if necessary) and save
    their fixmap indexes. Note that msix_get_fixmap() does reference
    counting and reuses existing mapping, so just call it directly, even if
    the page was mapped before. Also, it uses a specific range of fixmap
    indexes which doesn't include 0, so use 0 as default ("not mapped")
    value - which simplifies code a bit.
    
    Based on assumption that all MSI-X page accesses are handled by Xen, do
    not forward adjacent accesses to other hypothetical ioreq servers, even
    if the access wasn't handled for some reason (failure to map pages etc).
    Relevant places log a message about that already.
    
    Signed-off-by: Marek Marczykowski-Górecki <marmarek@xxxxxxxxxxxxxxxxxxxxxx>
    Reviewed-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
---
 xen/arch/x86/hvm/vmsi.c        | 206 +++++++++++++++++++++++++++++++++++++++--
 xen/arch/x86/include/asm/msi.h |   5 +
 xen/arch/x86/msi.c             |  42 +++++++++
 3 files changed, 243 insertions(+), 10 deletions(-)

diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c
index 9999179837..fd83abb929 100644
--- a/xen/arch/x86/hvm/vmsi.c
+++ b/xen/arch/x86/hvm/vmsi.c
@@ -180,6 +180,10 @@ static bool msixtbl_initialised(const struct domain *d)
     return d->arch.hvm.msixtbl_list.next;
 }
 
+/*
+ * Lookup an msixtbl_entry on the same page as given addr. It's up to the
+ * caller to check if address is strictly part of the table - if relevant.
+ */
 static struct msixtbl_entry *msixtbl_find_entry(
     struct vcpu *v, unsigned long addr)
 {
@@ -187,8 +191,8 @@ static struct msixtbl_entry *msixtbl_find_entry(
     struct domain *d = v->domain;
 
     list_for_each_entry( entry, &d->arch.hvm.msixtbl_list, list )
-        if ( addr >= entry->gtable &&
-             addr < entry->gtable + entry->table_len )
+        if ( PFN_DOWN(addr) >= PFN_DOWN(entry->gtable) &&
+             PFN_DOWN(addr) <= PFN_DOWN(entry->gtable + entry->table_len - 1) )
             return entry;
 
     return NULL;
@@ -203,6 +207,10 @@ static struct msi_desc *msixtbl_addr_to_desc(
     if ( !entry || !entry->pdev )
         return NULL;
 
+    if ( addr <  entry->gtable ||
+         addr >= entry->gtable + entry->table_len )
+        return NULL;
+
     nr_entry = (addr - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
 
     list_for_each_entry( desc, &entry->pdev->msi_list, list )
@@ -213,6 +221,153 @@ static struct msi_desc *msixtbl_addr_to_desc(
     return NULL;
 }
 
+/*
+ * Returns:
+ *  - 0 (FIX_RESERVED) if no handling should be done
+ *  - a fixmap idx to use for handling
+ */
+static unsigned int get_adjacent_idx(
+    const struct msixtbl_entry *entry, unsigned long addr, bool write)
+{
+    unsigned int adj_type;
+    struct arch_msix *msix;
+
+    if ( !entry || !entry->pdev )
+    {
+        ASSERT_UNREACHABLE();
+        return 0;
+    }
+
+    if ( PFN_DOWN(addr) == PFN_DOWN(entry->gtable) && addr < entry->gtable )
+        adj_type = ADJ_IDX_FIRST;
+    else if ( PFN_DOWN(addr) == PFN_DOWN(entry->gtable + entry->table_len - 1) 
&&
+              addr >= entry->gtable + entry->table_len )
+        adj_type = ADJ_IDX_LAST;
+    else
+    {
+        /* All callers should already do equivalent range checking. */
+        ASSERT_UNREACHABLE();
+        return 0;
+    }
+
+    msix = entry->pdev->msix;
+    if ( !msix )
+    {
+        ASSERT_UNREACHABLE();
+        return 0;
+    }
+
+    if ( !msix->adj_access_idx[adj_type] )
+    {
+        if ( MSIX_CHECK_WARN(msix, entry->pdev->domain->domain_id,
+                             adjacent_not_initialized) )
+            gprintk(XENLOG_WARNING,
+                    "%pp: Page for adjacent(%d) MSI-X table access not 
initialized (addr %#lx, gtable %#lx)\n",
+                    &entry->pdev->sbdf, adj_type, addr, entry->gtable);
+        return 0;
+    }
+
+    /* If PBA lives on the same page too, discard writes. */
+    if ( write &&
+         ((adj_type == ADJ_IDX_LAST &&
+           msix->table.last == msix->pba.first) ||
+          (adj_type == ADJ_IDX_FIRST &&
+           msix->table.first == msix->pba.last)) )
+    {
+        if ( MSIX_CHECK_WARN(msix, entry->pdev->domain->domain_id,
+                             adjacent_pba) )
+            gprintk(XENLOG_WARNING,
+                    "%pp: MSI-X table and PBA share a page, "
+                    "discard write to adjacent memory (%#lx)\n",
+                    &entry->pdev->sbdf, addr);
+        return 0;
+    }
+
+    return msix->adj_access_idx[adj_type];
+}
+
+static void adjacent_read(
+    const struct msixtbl_entry *entry,
+    paddr_t address, unsigned int len, uint64_t *pval)
+{
+    const void __iomem *hwaddr;
+    unsigned int fixmap_idx;
+
+    ASSERT(IS_ALIGNED(address, len));
+
+    *pval = ~0UL;
+
+    fixmap_idx = get_adjacent_idx(entry, address, false);
+
+    if ( !fixmap_idx )
+        return;
+
+    hwaddr = fix_to_virt(fixmap_idx) + PAGE_OFFSET(address);
+
+    switch ( len )
+    {
+    case 1:
+        *pval = readb(hwaddr);
+        break;
+
+    case 2:
+        *pval = readw(hwaddr);
+        break;
+
+    case 4:
+        *pval = readl(hwaddr);
+        break;
+
+    case 8:
+        *pval = readq(hwaddr);
+        break;
+
+    default:
+        ASSERT_UNREACHABLE();
+        break;
+    }
+}
+
+static void adjacent_write(
+    const struct msixtbl_entry *entry,
+    paddr_t address, unsigned int len, uint64_t val)
+{
+    void __iomem *hwaddr;
+    unsigned int fixmap_idx;
+
+    ASSERT(IS_ALIGNED(address, len));
+
+    fixmap_idx = get_adjacent_idx(entry, address, true);
+
+    if ( !fixmap_idx )
+        return;
+
+    hwaddr = fix_to_virt(fixmap_idx) + PAGE_OFFSET(address);
+
+    switch ( len )
+    {
+    case 1:
+        writeb(val, hwaddr);
+        break;
+
+    case 2:
+        writew(val, hwaddr);
+        break;
+
+    case 4:
+        writel(val, hwaddr);
+        break;
+
+    case 8:
+        writeq(val, hwaddr);
+        break;
+
+    default:
+        ASSERT_UNREACHABLE();
+        break;
+    }
+}
+
 static int cf_check msixtbl_read(
     const struct hvm_io_handler *handler, uint64_t address, uint32_t len,
     uint64_t *pval)
@@ -222,7 +377,7 @@ static int cf_check msixtbl_read(
     unsigned int nr_entry, index;
     int r = X86EMUL_UNHANDLEABLE;
 
-    if ( (len != 4 && len != 8) || (address & (len - 1)) )
+    if ( !IS_ALIGNED(address, len) )
         return r;
 
     rcu_read_lock(&msixtbl_rcu_lock);
@@ -230,6 +385,18 @@ static int cf_check msixtbl_read(
     entry = msixtbl_find_entry(current, address);
     if ( !entry )
         goto out;
+
+    if ( address <  entry->gtable ||
+         address >= entry->gtable + entry->table_len )
+    {
+        adjacent_read(entry, address, len, pval);
+        r = X86EMUL_OKAY;
+        goto out;
+    }
+
+    if ( len != 4 && len != 8 )
+        goto out;
+
     offset = address & (PCI_MSIX_ENTRY_SIZE - 1);
 
     if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
@@ -291,6 +458,18 @@ static int msixtbl_write(struct vcpu *v, unsigned long 
address,
     entry = msixtbl_find_entry(v, address);
     if ( !entry )
         goto out;
+
+    if ( address <  entry->gtable ||
+         address >= entry->gtable + entry->table_len )
+    {
+        adjacent_write(entry, address, len, val);
+        r = X86EMUL_OKAY;
+        goto out;
+    }
+
+    if ( len != 4 && len != 8 )
+        goto out;
+
     nr_entry = array_index_nospec(((address - entry->gtable) /
                                    PCI_MSIX_ENTRY_SIZE),
                                   MAX_MSIX_TABLE_ENTRIES);
@@ -356,8 +535,8 @@ static int cf_check _msixtbl_write(
     const struct hvm_io_handler *handler, uint64_t address, uint32_t len,
     uint64_t val)
 {
-    /* Ignore invalid length or unaligned writes. */
-    if ( (len != 4 && len != 8) || !IS_ALIGNED(address, len) )
+    /* Ignore unaligned writes. */
+    if ( !IS_ALIGNED(address, len) )
         return X86EMUL_OKAY;
 
     /*
@@ -374,16 +553,23 @@ static bool cf_check msixtbl_range(
 {
     struct vcpu *curr = current;
     unsigned long addr = r->addr;
-    const struct msi_desc *desc;
+    const struct msixtbl_entry *entry;
+    bool ret = false;
 
     ASSERT(r->type == IOREQ_TYPE_COPY);
 
     rcu_read_lock(&msixtbl_rcu_lock);
-    desc = msixtbl_addr_to_desc(msixtbl_find_entry(curr, addr), addr);
+    entry = msixtbl_find_entry(curr, addr);
+    if ( entry &&
+          /* Adjacent access. */
+         (addr < entry->gtable || addr >= entry->gtable + entry->table_len ||
+          /* Otherwise check if there is a matching msi_desc. */
+          msixtbl_addr_to_desc(entry, addr)) )
+        ret = true;
     rcu_read_unlock(&msixtbl_rcu_lock);
 
-    if ( desc )
-        return 1;
+    if ( ret )
+        return ret;
 
     if ( r->state == STATE_IOREQ_READY && r->dir == IOREQ_WRITE )
     {
@@ -429,7 +615,7 @@ static bool cf_check msixtbl_range(
         }
     }
 
-    return 0;
+    return false;
 }
 
 static const struct hvm_io_ops msixtbl_mmio_ops = {
diff --git a/xen/arch/x86/include/asm/msi.h b/xen/arch/x86/include/asm/msi.h
index b64cb93c0c..748bc3cd6d 100644
--- a/xen/arch/x86/include/asm/msi.h
+++ b/xen/arch/x86/include/asm/msi.h
@@ -225,6 +225,9 @@ struct arch_msix {
     } table, pba;
     int table_refcnt[MAX_MSIX_TABLE_PAGES];
     int table_idx[MAX_MSIX_TABLE_PAGES];
+#define ADJ_IDX_FIRST 0
+#define ADJ_IDX_LAST  1
+    unsigned int adj_access_idx[2];
     spinlock_t table_lock;
     bool host_maskall, guest_maskall;
     domid_t warned_domid;
@@ -232,6 +235,8 @@ struct arch_msix {
         uint8_t all;
         struct {
             bool maskall                   : 1;
+            bool adjacent_not_initialized  : 1;
+            bool adjacent_pba              : 1;
         };
     } warned_kind;
 };
diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c
index fb0fab60f1..19830528b6 100644
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -913,6 +913,37 @@ static int msix_capability_init(struct pci_dev *dev,
         list_add_tail(&entry->list, &dev->msi_list);
         *desc = entry;
     }
+    else
+    {
+        /*
+         * If the MSI-X table doesn't start at the page boundary, map the 
first page for
+         * passthrough accesses.
+         */
+        if ( PAGE_OFFSET(table_paddr) )
+        {
+            int idx = msix_get_fixmap(msix, table_paddr, table_paddr);
+
+            if ( idx > 0 )
+                msix->adj_access_idx[ADJ_IDX_FIRST] = idx;
+            else
+                gprintk(XENLOG_ERR, "%pp: Failed to map first MSI-X table 
page: %d\n", &dev->sbdf, idx);
+        }
+        /*
+         * If the MSI-X table doesn't end on the page boundary, map the last 
page
+         * for passthrough accesses.
+         */
+        if ( PAGE_OFFSET(table_paddr + msix->nr_entries * PCI_MSIX_ENTRY_SIZE) 
)
+        {
+            uint64_t entry_paddr = table_paddr +
+                (msix->nr_entries - 1) * PCI_MSIX_ENTRY_SIZE;
+            int idx = msix_get_fixmap(msix, table_paddr, entry_paddr);
+
+            if ( idx > 0 )
+                msix->adj_access_idx[ADJ_IDX_LAST] = idx;
+            else
+                gprintk(XENLOG_ERR, "%pp: Failed to map last MSI-X table page: 
%d\n", &dev->sbdf, idx);
+        }
+    }
 
     if ( !msix->used_entries )
     {
@@ -1079,6 +1110,17 @@ static void _pci_cleanup_msix(struct arch_msix *msix)
         msix->table.first = 0;
         msix->table.last = 0;
 
+        if ( msix->adj_access_idx[ADJ_IDX_FIRST] )
+        {
+            msix_put_fixmap(msix, msix->adj_access_idx[ADJ_IDX_FIRST]);
+            msix->adj_access_idx[ADJ_IDX_FIRST] = 0;
+        }
+        if ( msix->adj_access_idx[ADJ_IDX_LAST] )
+        {
+            msix_put_fixmap(msix, msix->adj_access_idx[ADJ_IDX_LAST]);
+            msix->adj_access_idx[ADJ_IDX_LAST] = 0;
+        }
+
         if ( rangeset_remove_range(mmio_ro_ranges, msix->pba.first,
                                    msix->pba.last) )
             WARN();
--
generated by git-patchbot for /home/xen/git/xen.git#staging



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.