[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH 3/3] x86/pci: Fix racy accesses to MSI-X Control register
Concurrent access the the MSI-X control register are not serialized with a suitable lock. For example, in msix_capability_init() access use the pcidevs_lock() but some calls to msi_set_mask_bit() use the interrupt descriptor lock. This can lead to MSI-X being incorrectly disabled and subsequent failures due to msix_memory_decoded() calls that check for MSI-X being enabled. This was seen with some non-compliant hardware that gated MSI-X messages on the per-vector mask bit only (i.e., the MSI-X Enable bit and Function Mask bits in the MSI-X Control register were ignored). An interrupt (with a pending move) for vector 0 would occur while vector 1 was being initialized in msix_capability_init(). Updates the the Control register would race and the vector 1 initialization would intermittently fail with -ENXIO. Typically a race between initializing a vector and another vector being moved doesn't occur because: 1. Racy Control accesses only occur when MSI-X is (guest) disabled 2 Hardware should only raise interrupts when MSI-X is enabled and unmasked. 3. Xen always sets Function Mask when temporarily enabling MSI-X. But there may be other race conditions depending on hardware and guest driver behaviour (e.g., disabling MSI-X when a IRQ has a pending move on another PCPU). Fix this by: 1. Tracking the host and guest enable state in a similar way to the host and guest maskall state. Note that since multiple CPUs can be updating different vectors concurrently, a counter is needed for the host enable state. 2. Add a new lock for serialize the Control read/modify/write sequence. 3. Wrap the above in two helper functions (msix_update_lock(), and msix_update_unlock()), which bracket any MSI-X register updates that require MSI-X to be (temporarily) enabled. Signed-off-by: David Vrabel <dvrabel@xxxxxxxxxxxx> SIM: https://t.corp.amazon.com/P63914633 CR: https://code.amazon.com/reviews/CR-79020945 --- xen/arch/x86/include/asm/msi.h | 3 + xen/arch/x86/msi.c | 215 +++++++++++++++++---------------- xen/drivers/passthrough/msi.c | 1 + 3 files changed, 114 insertions(+), 105 deletions(-) diff --git a/xen/arch/x86/include/asm/msi.h b/xen/arch/x86/include/asm/msi.h index fe670895ee..aa36e44f4e 100644 --- a/xen/arch/x86/include/asm/msi.h +++ b/xen/arch/x86/include/asm/msi.h @@ -237,7 +237,10 @@ struct arch_msix { int table_refcnt[MAX_MSIX_TABLE_PAGES]; int table_idx[MAX_MSIX_TABLE_PAGES]; spinlock_t table_lock; + spinlock_t control_lock; bool host_maskall, guest_maskall; + uint16_t host_enable; + bool guest_enable; domid_t warned; }; diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c index 6c675d11d1..8e394da07a 100644 --- a/xen/arch/x86/msi.c +++ b/xen/arch/x86/msi.c @@ -147,6 +147,57 @@ static bool msix_memory_decoded(const struct pci_dev *dev, unsigned int pos) return memory_decoded(dev); } + +/* + * Ensure MSI-X interrupts are masked during setup. Some devices require + * MSI-X to be enabled before we can touch the MSI-X registers. We need + * to mask all the vectors to prevent interrupts coming in before they're + * fully set up. + */ +static uint16_t msix_update_lock(struct pci_dev *dev, unsigned int pos) +{ + uint16_t control, new_control; + unsigned long flags; + + spin_lock_irqsave(&dev->msix->control_lock, flags); + + dev->msix->host_enable++; + + control = pci_conf_read16(dev->sbdf, msix_control_reg(pos)); + if ( !(control & PCI_MSIX_FLAGS_ENABLE) ) + { + new_control = control | PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL; + pci_conf_write16(dev->sbdf, msix_control_reg(pos), new_control); + } + else + dev->msix->guest_enable = true; + + spin_unlock_irqrestore(&dev->msix->control_lock, flags); + + return control; +} + +static void msix_update_unlock(struct pci_dev *dev, unsigned int pos, uint16_t control) +{ + uint16_t new_control; + unsigned long flags; + + spin_lock_irqsave(&dev->msix->control_lock, flags); + + dev->msix->host_enable--; + + new_control = control & ~(PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL); + + if ( dev->msix->host_enable || dev->msix->guest_enable ) + new_control |= PCI_MSIX_FLAGS_ENABLE; + if ( dev->msix->host_maskall || dev->msix->guest_maskall || dev->msix->host_enable ) + new_control |= PCI_MSIX_FLAGS_MASKALL; + if ( new_control != control ) + pci_conf_write16(dev->sbdf, msix_control_reg(pos), control); + + spin_unlock_irqrestore(&dev->msix->control_lock, flags); +} + /* * MSI message composition */ @@ -288,7 +339,7 @@ static void msi_set_enable(struct pci_dev *dev, int enable) __msi_set_enable(seg, bus, slot, func, pos, enable); } -static void msix_set_enable(struct pci_dev *dev, int enable) +static void msix_force_disable(struct pci_dev *dev) { int pos; u16 control, seg = dev->seg; @@ -299,11 +350,16 @@ static void msix_set_enable(struct pci_dev *dev, int enable) pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSIX); if ( pos ) { + spin_lock_irq(&dev->msix->control_lock); + + dev->msix->host_enable = false; + dev->msix->guest_enable = false; + control = pci_conf_read16(dev->sbdf, msix_control_reg(pos)); control &= ~PCI_MSIX_FLAGS_ENABLE; - if ( enable ) - control |= PCI_MSIX_FLAGS_ENABLE; pci_conf_write16(dev->sbdf, msix_control_reg(pos), control); + + spin_unlock_irq(&dev->msix->control_lock); } } @@ -318,9 +374,10 @@ static void msi_set_mask_bit(struct irq_desc *desc, bool host, bool guest) { struct msi_desc *entry = desc->msi_desc; struct pci_dev *pdev; - u16 seg, control; + u16 seg; u8 bus, slot, func; - bool flag = host || guest, maskall; + bool flag = host || guest; + uint16_t control; ASSERT(spin_is_locked(&desc->lock)); BUG_ON(!entry || !entry->dev); @@ -343,30 +400,18 @@ static void msi_set_mask_bit(struct irq_desc *desc, bool host, bool guest) } break; case PCI_CAP_ID_MSIX: - maskall = pdev->msix->host_maskall; - control = pci_conf_read16(pdev->sbdf, - msix_control_reg(entry->msi_attrib.pos)); - if ( unlikely(!(control & PCI_MSIX_FLAGS_ENABLE)) ) - { - pdev->msix->host_maskall = 1; - pci_conf_write16(pdev->sbdf, - msix_control_reg(entry->msi_attrib.pos), - control | (PCI_MSIX_FLAGS_ENABLE | - PCI_MSIX_FLAGS_MASKALL)); - } + control = msix_update_lock(pdev, entry->msi_attrib.pos); + if ( likely(memory_decoded(pdev)) ) { writel(flag, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); - - if ( likely(control & PCI_MSIX_FLAGS_ENABLE) ) - break; } - else if ( flag && !(control & PCI_MSIX_FLAGS_MASKALL) ) + else if ( !pdev->msix->host_maskall && !pdev->msix->guest_maskall ) { domid_t domid = pdev->domain->domain_id; - maskall = true; + pdev->msix->host_maskall = true; if ( pdev->msix->warned != domid ) { pdev->msix->warned = domid; @@ -375,11 +420,8 @@ static void msi_set_mask_bit(struct irq_desc *desc, bool host, bool guest) desc->irq, domid, &pdev->sbdf); } } - pdev->msix->host_maskall = maskall; - if ( maskall || pdev->msix->guest_maskall ) - control |= PCI_MSIX_FLAGS_MASKALL; - pci_conf_write16(pdev->sbdf, - msix_control_reg(entry->msi_attrib.pos), control); + + msix_update_unlock(pdev, entry->msi_attrib.pos, control); break; } entry->msi_attrib.host_masked = host; @@ -494,26 +536,19 @@ static struct msi_desc *alloc_msi_entry(unsigned int nr) int setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc) { - const struct pci_dev *pdev = msidesc->dev; - unsigned int cpos = msix_control_reg(msidesc->msi_attrib.pos); - u16 control = ~0; + struct pci_dev *pdev = msidesc->dev; + uint16_t control = 0; int rc; if ( msidesc->msi_attrib.type == PCI_CAP_ID_MSIX ) - { - control = pci_conf_read16(pdev->sbdf, cpos); - if ( !(control & PCI_MSIX_FLAGS_ENABLE) ) - pci_conf_write16(pdev->sbdf, cpos, - control | (PCI_MSIX_FLAGS_ENABLE | - PCI_MSIX_FLAGS_MASKALL)); - } + control = msix_update_lock(pdev, msidesc->msi_attrib.pos); rc = __setup_msi_irq(desc, msidesc, msi_maskable_irq(msidesc) ? &pci_msi_maskable : &pci_msi_nonmaskable); - if ( !(control & PCI_MSIX_FLAGS_ENABLE) ) - pci_conf_write16(pdev->sbdf, cpos, control); + if ( control ) + msix_update_unlock(pdev, msidesc->msi_attrib.pos, control); return rc; } @@ -754,14 +789,14 @@ static int msix_capability_init(struct pci_dev *dev, { struct arch_msix *msix = dev->msix; struct msi_desc *entry = NULL; - u16 control; u64 table_paddr; u32 table_offset; u16 seg = dev->seg; u8 bus = dev->bus; u8 slot = PCI_SLOT(dev->devfn); u8 func = PCI_FUNC(dev->devfn); - bool maskall = msix->host_maskall; + uint16_t control; + int ret = 0; unsigned int pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSIX); @@ -770,37 +805,22 @@ static int msix_capability_init(struct pci_dev *dev, ASSERT(pcidevs_locked()); - control = pci_conf_read16(dev->sbdf, msix_control_reg(pos)); - /* - * Ensure MSI-X interrupts are masked during setup. Some devices require - * MSI-X to be enabled before we can touch the MSI-X registers. We need - * to mask all the vectors to prevent interrupts coming in before they're - * fully set up. - */ - msix->host_maskall = 1; - pci_conf_write16(dev->sbdf, msix_control_reg(pos), - control | (PCI_MSIX_FLAGS_ENABLE | - PCI_MSIX_FLAGS_MASKALL)); - - if ( unlikely(!memory_decoded(dev)) ) - { - pci_conf_write16(dev->sbdf, msix_control_reg(pos), - control & ~PCI_MSIX_FLAGS_ENABLE); - return -ENXIO; - } - if ( desc ) { entry = alloc_msi_entry(1); if ( !entry ) - { - pci_conf_write16(dev->sbdf, msix_control_reg(pos), - control & ~PCI_MSIX_FLAGS_ENABLE); return -ENOMEM; - } ASSERT(msi); } + control = msix_update_lock(dev, pos); + + if ( unlikely(!memory_decoded(dev)) ) + { + ret = -ENXIO; + goto out; + } + /* Locate MSI-X table region */ table_offset = pci_conf_read32(dev->sbdf, msix_table_offset_reg(pos)); if ( !msix->used_entries && @@ -834,10 +854,8 @@ static int msix_capability_init(struct pci_dev *dev, { if ( !msi || !msi->table_base ) { - pci_conf_write16(dev->sbdf, msix_control_reg(pos), - control & ~PCI_MSIX_FLAGS_ENABLE); - xfree(entry); - return -ENXIO; + ret = -ENXIO; + goto out; } table_paddr = msi->table_base; } @@ -863,9 +881,8 @@ static int msix_capability_init(struct pci_dev *dev, } else if ( !msix->table.first ) { - pci_conf_write16(dev->sbdf, msix_control_reg(pos), control); - xfree(entry); - return -ENODATA; + ret = -ENODATA; + goto out; } else table_paddr = (msix->table.first << PAGE_SHIFT) + @@ -880,9 +897,8 @@ static int msix_capability_init(struct pci_dev *dev, if ( idx < 0 ) { - pci_conf_write16(dev->sbdf, msix_control_reg(pos), control); - xfree(entry); - return idx; + ret = idx; + goto out; } base = fix_to_virt(idx) + (entry_paddr & (PAGE_SIZE - 1)); @@ -906,12 +922,6 @@ static int msix_capability_init(struct pci_dev *dev, if ( !msix->used_entries ) { - maskall = false; - if ( !msix->guest_maskall ) - control &= ~PCI_MSIX_FLAGS_MASKALL; - else - control |= PCI_MSIX_FLAGS_MASKALL; - if ( rangeset_add_range(mmio_ro_ranges, msix->table.first, msix->table.last) ) WARN(); @@ -940,23 +950,13 @@ static int msix_capability_init(struct pci_dev *dev, WARN_ON(msix->table.first != (table_paddr >> PAGE_SHIFT)); ++msix->used_entries; - /* Restore MSI-X enabled bits */ - if ( !hardware_domain ) - { - /* - * ..., except for internal requests (before Dom0 starts), in which - * case we rather need to behave "normally", i.e. not follow the split - * brain model where Dom0 actually enables MSI (and disables INTx). - */ - pci_intx(dev, false); - control |= PCI_MSIX_FLAGS_ENABLE; - control &= ~PCI_MSIX_FLAGS_MASKALL; - maskall = 0; - } - msix->host_maskall = maskall; - pci_conf_write16(dev->sbdf, msix_control_reg(pos), control); + out: + if ( ret < 0 ) + xfree(entry); - return 0; + msix_update_unlock(dev, pos, control); + + return ret; } /** @@ -1180,7 +1180,7 @@ void pci_cleanup_msi(struct pci_dev *pdev) { /* Disable MSI and/or MSI-X */ msi_set_enable(pdev, 0); - msix_set_enable(pdev, 0); + msix_force_disable(pdev); msi_free_irqs(pdev); } @@ -1229,11 +1229,20 @@ int pci_msi_conf_write_intercept(struct pci_dev *pdev, unsigned int reg, if ( reg != msix_control_reg(pos) || size != 2 ) return -EACCES; + spin_lock_irq(&pdev->msix->control_lock); + + pdev->msix->guest_enable = !!(*data & PCI_MSIX_FLAGS_ENABLE); + if ( pdev->msix->host_enable ) + *data |= PCI_MSIX_FLAGS_ENABLE; pdev->msix->guest_maskall = !!(*data & PCI_MSIX_FLAGS_MASKALL); if ( pdev->msix->host_maskall ) *data |= PCI_MSIX_FLAGS_MASKALL; - return 1; + pci_conf_write16(pdev->sbdf, reg, *data); + + spin_unlock_irq(&pdev->msix->control_lock); + + return -EPERM; /* Already done the write. */ } } @@ -1324,15 +1333,12 @@ int pci_restore_msi_state(struct pci_dev *pdev) } else if ( !type && entry->msi_attrib.type == PCI_CAP_ID_MSIX ) { - control = pci_conf_read16(pdev->sbdf, msix_control_reg(pos)); - pci_conf_write16(pdev->sbdf, msix_control_reg(pos), - control | (PCI_MSIX_FLAGS_ENABLE | - PCI_MSIX_FLAGS_MASKALL)); + control = msix_update_lock(pdev, pos); + if ( unlikely(!memory_decoded(pdev)) ) { + msix_update_unlock(pdev, pos, control); spin_unlock_irqrestore(&desc->lock, flags); - pci_conf_write16(pdev->sbdf, msix_control_reg(pos), - control & ~PCI_MSIX_FLAGS_ENABLE); return -ENXIO; } } @@ -1372,8 +1378,7 @@ int pci_restore_msi_state(struct pci_dev *pdev) } if ( type == PCI_CAP_ID_MSIX ) - pci_conf_write16(pdev->sbdf, msix_control_reg(pos), - control | PCI_MSIX_FLAGS_ENABLE); + msix_update_unlock(pdev, pos, control); return 0; } diff --git a/xen/drivers/passthrough/msi.c b/xen/drivers/passthrough/msi.c index ce1a450f6f..436c78b7aa 100644 --- a/xen/drivers/passthrough/msi.c +++ b/xen/drivers/passthrough/msi.c @@ -44,6 +44,7 @@ int pdev_msi_init(struct pci_dev *pdev) return -ENOMEM; spin_lock_init(&msix->table_lock); + spin_lock_init(&msix->control_lock); ctrl = pci_conf_read16(pdev->sbdf, msix_control_reg(pos)); msix->nr_entries = msix_table_size(ctrl); -- 2.30.2
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |