|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v4 9/9] vpci/msix: add MSI-X handlers
>>> Roger Pau Monne <roger.pau@xxxxxxxxxx> 06/30/17 5:01 PM >>>
>Note that accesses to the Table Offset, Table BIR, PBA Offset and PBA
>BIR are not trapped by Xen at the moment.
They're mandated r/o by the spec anyway.
>@@ -113,6 +148,35 @@ static int vpci_modify_bar(struct domain *d, const struct
>vpci_bar *bar,
>if ( IS_ERR(mem) )
>return -PTR_ERR(mem);
>
>+ /*
>+ * Make sure the MSI-X regions of the BAR are not mapped into the domain
>+ * p2m, or else the MSI-X handlers are useless. Only do this when mapping,
>+ * since that's when the memory decoding on the device is enabled.
>+ */
>+ for ( i = 0; map && i < ARRAY_SIZE(bar->msix); i++ )
>+ {
>+ struct vpci_msix_mem *msix = bar->msix[i];
>+
>+ if ( !msix || msix->addr == INVALID_PADDR )
>+ continue;
>+
>+ rc = vpci_unmap_msix(d, msix);
Why do you need this, instead of being able to simply rely on the rangeset
based (un)mapping?
>@@ -405,7 +475,20 @@ static int vpci_init_bars(struct pci_dev *pdev)
>continue;
>}
>
>- bars[i].addr = (cmd & PCI_COMMAND_MEMORY) ? addr : INVALID_PADDR;
>+ if ( cmd & PCI_COMMAND_MEMORY )
>+ {
>+ unsigned int j;
>+
>+ bars[i].addr = addr;
>+
>+ for ( j = 0; j < ARRAY_SIZE(bars[i].msix); j++ )
>+ if ( bars[i].msix[j] )
>+ bars[i].msix[j]->addr = bars[i].addr +
>+ bars[i].msix[j]->offset;
>+ }
>+ else
>+ bars[i].addr = INVALID_PADDR;
As (I think) mentioned elsewhere already, this init-time special case looks
dangerous (and unnecessary) to me (or else I'd expect you to also zap
the field when the memory decode bit is being cleared).
>--- /dev/null
>+++ b/xen/drivers/vpci/msix.c
>@@ -0,0 +1,503 @@
>+/*
>+ * Handlers for accesses to the MSI-X capability structure and the memory
>+ * region.
>+ *
>+ * Copyright (C) 2017 Citrix Systems R&D
>+ *
>+ * This program is free software; you can redistribute it and/or
>+ * modify it under the terms and conditions of the GNU General Public
>+ * License, version 2, as published by the Free Software Foundation.
>+ *
>+ * This program is distributed in the hope that it will be useful,
>+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
>+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>+ * General Public License for more details.
>+ *
>+ * You should have received a copy of the GNU General Public
>+ * License along with this program; If not, see
><http://www.gnu.org/licenses/>.
>+ */
>+
>+#include <xen/sched.h>
>+#include <xen/vpci.h>
>+#include <asm/msi.h>
>+#include <xen/p2m-common.h>
>+#include <xen/keyhandler.h>
>+
>+#define MSIX_SIZE(num) (offsetof(struct vpci_msix, entries[num]))
The outermost parens are pointless here.
>+static void vpci_msix_control_write(struct pci_dev *pdev, unsigned int reg,
>+ union vpci_val val, void *data)
>+{
>+ uint8_t seg = pdev->seg, bus = pdev->bus;
>+ uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
>+ struct vpci_msix *msix = data;
>+ paddr_t table_base = pdev->vpci->header.bars[msix->table.bir].addr;
>+ bool new_masked, new_enabled;
>+ unsigned int i;
>+ int rc;
>+
>+ new_masked = val.u16 & PCI_MSIX_FLAGS_MASKALL;
>+ new_enabled = val.u16 & PCI_MSIX_FLAGS_ENABLE;
>+
>+ if ( !msix->enabled && new_enabled )
>+ {
>+ /* MSI-X enabled. */
Insert "being"?
>+ for ( i = 0; i < msix->max_entries; i++ )
>+ {
>+ if ( msix->entries[i].masked )
>+ continue;
Why is the mask bit relevant here, but not the mask-all one?
>+ rc = vpci_msix_arch_enable(&msix->entries[i].arch, pdev,
>+ msix->entries[i].addr,
>+ msix->entries[i].data,
>+ msix->entries[i].nr, table_base);
>+ if ( rc )
>+ {
>+ gdprintk(XENLOG_ERR,
<+ "%04x:%02x:%02x.%u: unable to update entry %u: %d\n",
>+ seg, bus, slot, func, i, rc);
>+ return;
>+ }
>+
>+ vpci_msix_arch_mask(&msix->entries[i].arch, pdev, false);
Same question here.
>+ }
>+ }
>+ else if ( msix->enabled && !new_enabled )
>+ {
>+ /* MSI-X disabled. */
>+ for ( i = 0; i < msix->max_entries; i++ )
>+ {
>+ rc = vpci_msix_arch_disable(&msix->entries[i].arch, pdev);
>+ if ( rc )
>+ {
>+ gdprintk(XENLOG_ERR,
>+ "%04x:%02x:%02x.%u: unable to disable entry %u:
>%d\n",
>+ seg, bus, slot, func, i, rc);
>+ return;
>+ }
>+ }
>+ }
>+
>+ if ( (new_enabled != msix->enabled || new_masked != msix->masked) &&
>+ pci_msi_conf_write_intercept(pdev, reg, 2, &val.u32) >= 0 )
>+ pci_conf_write16(seg, bus, slot, func, reg, val.u32);
DYM val.u16 here?
>+static struct vpci_msix *vpci_msix_find(struct domain *d, unsigned long addr)
>+{
>+ struct vpci_msix *msix;
>+
>+ ASSERT(vpci_locked(d));
>+ list_for_each_entry ( msix, &d->arch.hvm_domain.msix_tables, next )
>+ {
>+ uint8_t seg = msix->pdev->seg, bus = msix->pdev->bus;
>+ uint8_t slot = PCI_SLOT(msix->pdev->devfn);
>+ uint8_t func = PCI_FUNC(msix->pdev->devfn);
>+ uint16_t cmd = pci_conf_read16(seg, bus, slot, func, PCI_COMMAND);
Perhaps better to keep a cached copy of the command register value?
>+static int vpci_msix_read(struct vcpu *v, unsigned long addr,
>+ unsigned int len, unsigned long *data)
>+{
>+ struct domain *d = v->domain;
>+ struct vpci_msix *msix;
>+ const struct vpci_msix_entry *entry;
>+ unsigned int offset;
>+
>+ vpci_lock(d);
>+
>+ msix = vpci_msix_find(d, addr);
>+ if ( !msix )
>+ {
>+ vpci_unlock(d);
>+ *data = ~0ul;
>+ return X86EMUL_OKAY;
>+ }
>+
>+ if ( vpci_msix_access_check(msix->pdev, addr, len) )
>+ {
>+ vpci_unlock(d);
>+ *data = ~0ul;
>+ return X86EMUL_OKAY;
>+ }
>+
>+ if ( MSIX_ADDR_IN_RANGE(addr, &msix->pba) )
>+ {
>+ /* Access to PBA. */
>+ switch ( len )
>+ {
>+ case 4:
>+ *data = readl(addr);
>+ break;
>+ case 8:
>+ *data = readq(addr);
>+ break;
>+ default:
>+ ASSERT_UNREACHABLE();
data = ~0ul;
>+static int vpci_msix_write(struct vcpu *v, unsigned long addr,
>+ unsigned int len, unsigned long data)
>+{
>+ struct domain *d = v->domain;
>+ struct vpci_msix *msix;
>+ struct vpci_msix_entry *entry;
>+ unsigned int offset;
>+
>+ vpci_lock(d);
>+ msix = vpci_msix_find(d, addr);
>+ if ( !msix )
>+ {
>+ vpci_unlock(d);
>+ return X86EMUL_OKAY;
>+ }
>+
>+ if ( MSIX_ADDR_IN_RANGE(addr, &msix->pba) )
>+ {
>+ /* Ignore writes to PBA, it's behavior is undefined. */
>+ vpci_unlock(d);
>+ return X86EMUL_OKAY;
>+ }
>+
>+ if ( vpci_msix_access_check(msix->pdev, addr, len) )
>+ {
>+ vpci_unlock(d);
>+ return X86EMUL_OKAY;
>+ }
>+
>+ /* Get the table entry and offset. */
>+ entry = vpci_msix_get_entry(msix, addr);
>+ offset = addr & (PCI_MSIX_ENTRY_SIZE - 1);
>+
>+ switch ( offset )
>+ {
>+ case PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET:
>+ if ( len == 8 )
>+ {
>+ entry->addr = data;
>+ break;
>+ }
>+ entry->addr &= ~0xffffffff;
With this, why not ...
>+ entry->addr |= data;
>+ break;
>+ case PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET:
>+ entry->addr &= ~((uint64_t)0xffffffff << 32);
... simply 0xffffffff here?
>+ entry->addr |= data << 32;
Iirc we've already talked about this being undefined for 32-bit arches (e.g.
ARM32), and the resulting need to make "data" uint64_t.
>+ break;
>+ case PCI_MSIX_ENTRY_DATA_OFFSET:
>+ /*
>+ * 8 byte writes to the msg data and vector control fields are
>+ * only allowed if the entry is masked.
>+ */
>+ if ( len == 8 && !entry->masked && !msix->masked && msix->enabled )
>+ {
>+ vpci_unlock(d);
>+ return X86EMUL_OKAY;
>+ }
I don't think this is correct - iirc such writes simply don't take effect
immediately
(but I then seem to recall this to apply to the address field and 32-bit writes
to
the data field as well). They'd instead take effect the next time the entry is
being
unmasked (or some such). A while ago I did fix the qemu code to behave in this
way.
>+ entry->data = data;
>+
>+ if ( len == 4 )
>+ break;
>+
>+ data >>= 32;
>+ /* fallthrough */
>+ case PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET:
>+ {
>+ bool new_masked = data & PCI_MSIX_VECTOR_BITMASK;
>+ struct pci_dev *pdev = msix->pdev;
>+ paddr_t table_base = pdev->vpci->header.bars[msix->table.bir].addr;
>+ int rc;
>+
>+ if ( !msix->enabled )
>+ {
>+ entry->masked = new_masked;
>+ break;
>+ }
>+
>+ if ( new_masked != entry->masked && !new_masked )
if ( !new_masked && entry->masked )
(or the other way around)
>+static int vpci_init_msix(struct pci_dev *pdev)
>+{
>+ struct domain *d = pdev->domain;
>+ uint8_t seg = pdev->seg, bus = pdev->bus;
>+ uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
>+ struct vpci_msix *msix;
>+ unsigned int msix_offset, i, max_entries;
>+ struct vpci_bar *table_bar, *pba_bar;
>+ uint16_t control;
>+ int rc;
>+
>+ msix_offset = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSIX);
>+ if ( !msix_offset )
>+ return 0;
>+
>+ control = pci_conf_read16(seg, bus, slot, func,
>+ msix_control_reg(msix_offset));
>+
>+ /* Get the maximum number of vectors the device supports. */
>+ max_entries = msix_table_size(control);
>+
>+ msix = xzalloc_bytes(MSIX_SIZE(max_entries));
>+ if ( !msix )
>+ return -ENOMEM;
>+
>+ msix->max_entries = max_entries;
>+ msix->pdev = pdev;
>+
>+ /* Find the MSI-X table address. */
>+ msix->table.offset = pci_conf_read32(seg, bus, slot, func,
>+ msix_table_offset_reg(msix_offset));
>+ msix->table.bir = msix->table.offset & PCI_MSIX_BIRMASK;
>+ msix->table.offset &= ~PCI_MSIX_BIRMASK;
>+ msix->table.size = msix->max_entries * PCI_MSIX_ENTRY_SIZE;
>+ msix->table.addr = INVALID_PADDR;
>+
>+ /* Find the MSI-X pba address. */
>+ msix->pba.offset = pci_conf_read32(seg, bus, slot, func,
>+ msix_pba_offset_reg(msix_offset));
>+ msix->pba.bir = msix->pba.offset & PCI_MSIX_BIRMASK;
>+ msix->pba.offset &= ~PCI_MSIX_BIRMASK;
>+ msix->pba.size = DIV_ROUND_UP(msix->max_entries, 8);
I think you want to round up to at least the next 32-bit boundary; the
spec talking about bits 63..00 even suggests a 64-bit boundary. The
table addresses being required to be qword aligned also supports this.
>+void vpci_dump_msix(void)
>+{
>+ struct domain *d;
>+ struct pci_dev *pdev;
const for all pointers in dump handlers, as far as possible.
>+ for_each_domain ( d )
>+ {
>+ if ( !has_vpci(d) )
>+ continue;
>+
>+ printk("vPCI MSI-X information for guest %u\n", d->domain_id);
Wouldn't it be better (more useful) to dump the MSI and MSI-X data for a
domain next to each other?
Apart from the comments here the ones give for the MSI patch apply
respectively.
Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |