x86/MSI-X: access MSI-X table only after having enabled MSI-X As done in Linux by f598282f51 ("PCI: Fix the NIU MSI-X problem in a better way") and its broken predecessor, make sure we don't access the MSI-X table without having enabled MSI-X first, using the mask-all flag instead to prevent interrupts from occurring. Signed-off-by: Jan Beulich --- v3: temporarily enable MSI-X in setup_msi_irq() if not already enabled --- a/xen/arch/x86/msi.c +++ b/xen/arch/x86/msi.c @@ -142,6 +142,17 @@ static bool_t memory_decoded(const struc PCI_COMMAND_MEMORY); } +static bool_t msix_memory_decoded(const struct pci_dev *dev, unsigned int pos) +{ + u16 control = pci_conf_read16(dev->seg, dev->bus, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), msix_control_reg(pos)); + + if ( !(control & PCI_MSIX_FLAGS_ENABLE) ) + return 0; + + return memory_decoded(dev); +} + /* * MSI message composition */ @@ -219,7 +230,8 @@ static bool_t read_msi_msg(struct msi_de void __iomem *base; base = entry->mask_base; - if ( unlikely(!memory_decoded(entry->dev)) ) + if ( unlikely(!msix_memory_decoded(entry->dev, + entry->msi_attrib.pos)) ) return 0; msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); @@ -285,7 +297,8 @@ static int write_msi_msg(struct msi_desc void __iomem *base; base = entry->mask_base; - if ( unlikely(!memory_decoded(entry->dev)) ) + if ( unlikely(!msix_memory_decoded(entry->dev, + entry->msi_attrib.pos)) ) return -ENXIO; writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); @@ -379,7 +392,7 @@ static bool_t msi_set_mask_bit(struct ir { struct msi_desc *entry = desc->msi_desc; struct pci_dev *pdev; - u16 seg; + u16 seg, control; u8 bus, slot, func; ASSERT(spin_is_locked(&desc->lock)); @@ -401,35 +414,38 @@ static bool_t msi_set_mask_bit(struct ir } break; case PCI_CAP_ID_MSIX: + control = pci_conf_read16(seg, bus, slot, func, + msix_control_reg(entry->msi_attrib.pos)); + if ( unlikely(!(control & PCI_MSIX_FLAGS_ENABLE)) ) + pci_conf_write16(seg, bus, slot, func, + msix_control_reg(entry->msi_attrib.pos), + control | (PCI_MSIX_FLAGS_ENABLE | + PCI_MSIX_FLAGS_MASKALL)); if ( likely(memory_decoded(pdev)) ) { writel(flag, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); - break; + if ( likely(control & PCI_MSIX_FLAGS_ENABLE) ) + break; + flag = 1; } - if ( flag ) + else if ( flag && !(control & PCI_MSIX_FLAGS_MASKALL) ) { - u16 control; domid_t domid = pdev->domain->domain_id; - control = pci_conf_read16(seg, bus, slot, func, - msix_control_reg(entry->msi_attrib.pos)); - if ( control & PCI_MSIX_FLAGS_MASKALL ) - break; - pci_conf_write16(seg, bus, slot, func, - msix_control_reg(entry->msi_attrib.pos), - control | PCI_MSIX_FLAGS_MASKALL); + control |= PCI_MSIX_FLAGS_MASKALL; if ( pdev->msix->warned != domid ) { pdev->msix->warned = domid; printk(XENLOG_G_WARNING - "cannot mask IRQ %d: masked MSI-X on Dom%d's %04x:%02x:%02x.%u\n", + "cannot mask IRQ %d: masking MSI-X on Dom%d's %04x:%02x:%02x.%u\n", desc->irq, domid, pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); } - break; } - /* fall through */ + pci_conf_write16(seg, bus, slot, func, + msix_control_reg(entry->msi_attrib.pos), control); + return flag; default: return 0; } @@ -454,7 +470,8 @@ static int msi_get_mask_bit(const struct entry->msi.mpos) >> entry->msi_attrib.entry_nr) & 1; case PCI_CAP_ID_MSIX: - if ( unlikely(!memory_decoded(entry->dev)) ) + if ( unlikely(!msix_memory_decoded(entry->dev, + entry->msi_attrib.pos)) ) break; return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) & 1; } @@ -543,9 +560,31 @@ static struct msi_desc *alloc_msi_entry( int setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc) { - return __setup_msi_irq(desc, msidesc, - msi_maskable_irq(msidesc) ? &pci_msi_maskable - : &pci_msi_nonmaskable); + const struct pci_dev *pdev = msidesc->dev; + unsigned int cpos = msix_control_reg(msidesc->msi_attrib.pos); + u16 control = ~0; + int rc; + + if ( msidesc->msi_attrib.type == PCI_CAP_ID_MSIX ) + { + control = pci_conf_read16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn), cpos); + if ( !(control & PCI_MSIX_FLAGS_ENABLE) ) + pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn), cpos, + control | (PCI_MSIX_FLAGS_ENABLE | + PCI_MSIX_FLAGS_MASKALL)); + } + + rc = __setup_msi_irq(desc, msidesc, + msi_maskable_irq(msidesc) ? &pci_msi_maskable + : &pci_msi_nonmaskable); + + if ( !(control & PCI_MSIX_FLAGS_ENABLE) ) + pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn), cpos, control); + + return rc; } int __setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc, @@ -785,16 +824,32 @@ static int msix_capability_init(struct p pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSIX); control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos)); - msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */ + /* + * Ensure MSI-X interrupts are masked during setup. Some devices require + * MSI-X to be enabled before we can touch the MSI-X registers. We need + * to mask all the vectors to prevent interrupts coming in before they're + * fully set up. + */ + pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), + control | (PCI_MSIX_FLAGS_ENABLE | + PCI_MSIX_FLAGS_MASKALL)); if ( unlikely(!memory_decoded(dev)) ) + { + pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), + control & ~PCI_MSIX_FLAGS_ENABLE); return -ENXIO; + } if ( desc ) { entry = alloc_msi_entry(1); if ( !entry ) + { + pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), + control & ~PCI_MSIX_FLAGS_ENABLE); return -ENOMEM; + } ASSERT(msi); } @@ -825,6 +880,8 @@ static int msix_capability_init(struct p { if ( !msi || !msi->table_base ) { + pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), + control & ~PCI_MSIX_FLAGS_ENABLE); xfree(entry); return -ENXIO; } @@ -867,6 +924,8 @@ static int msix_capability_init(struct p if ( idx < 0 ) { + pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), + control & ~PCI_MSIX_FLAGS_ENABLE); xfree(entry); return idx; } @@ -922,8 +981,7 @@ static int msix_capability_init(struct p ++msix->used_entries; /* Restore MSI-X enabled bits */ - pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), - control & ~PCI_MSIX_FLAGS_MASKALL); + pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control); return 0; } @@ -1072,7 +1130,10 @@ static void __pci_disable_msix(struct ms pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSIX); control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos)); - msix_set_enable(dev, 0); + if ( unlikely(!(control & PCI_MSIX_FLAGS_ENABLE)) ) + pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), + control | (PCI_MSIX_FLAGS_ENABLE | + PCI_MSIX_FLAGS_MASKALL)); BUG_ON(list_empty(&dev->msi_list)); @@ -1198,6 +1259,8 @@ int pci_restore_msi_state(struct pci_dev list_for_each_entry_safe( entry, tmp, &pdev->msi_list, list ) { unsigned int i = 0, nr = 1; + u16 control = 0; + u8 slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn); irq = entry->irq; desc = &irq_desc[irq]; @@ -1224,10 +1287,18 @@ int pci_restore_msi_state(struct pci_dev } else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX ) { - msix_set_enable(pdev, 0); + control = pci_conf_read16(pdev->seg, pdev->bus, slot, func, + msix_control_reg(entry->msi_attrib.pos)); + pci_conf_write16(pdev->seg, pdev->bus, slot, func, + msix_control_reg(entry->msi_attrib.pos), + control | (PCI_MSIX_FLAGS_ENABLE | + PCI_MSIX_FLAGS_MASKALL)); if ( unlikely(!memory_decoded(pdev)) ) { spin_unlock_irqrestore(&desc->lock, flags); + pci_conf_write16(pdev->seg, pdev->bus, slot, func, + msix_control_reg(entry->msi_attrib.pos), + control & ~PCI_MSIX_FLAGS_ENABLE); return -ENXIO; } } @@ -1256,11 +1327,9 @@ int pci_restore_msi_state(struct pci_dev if ( entry->msi_attrib.type == PCI_CAP_ID_MSI ) { unsigned int cpos = msi_control_reg(entry->msi_attrib.pos); - u16 control = pci_conf_read16(pdev->seg, pdev->bus, - PCI_SLOT(pdev->devfn), - PCI_FUNC(pdev->devfn), cpos); - control &= ~PCI_MSI_FLAGS_QSIZE; + control = pci_conf_read16(pdev->seg, pdev->bus, slot, func, cpos) & + ~PCI_MSI_FLAGS_QSIZE; multi_msi_enable(control, entry->msi.nvec); pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), cpos, control); @@ -1268,7 +1337,9 @@ int pci_restore_msi_state(struct pci_dev msi_set_enable(pdev, 1); } else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX ) - msix_set_enable(pdev, 1); + pci_conf_write16(pdev->seg, pdev->bus, slot, func, + msix_control_reg(entry->msi_attrib.pos), + control | PCI_MSIX_FLAGS_ENABLE); } return 0;