On 12.09.12 at 14:47, Wei Wang<wei.wang2@xxxxxxx> wrote:
I found this patch triggered a xen BUG
Indeed - my default builds don't use high enough a CPU count to
see this. I'm sorry for that. Below a drop-in replacement for the
patch (it would unlikely apply cleanly to the tip of staging unstable
due to Keir's cleanup after the removal of x86-32 support.
Jan
--- 2012-09-11.orig/xen/arch/x86/msi.c 2012-09-13 08:40:47.000000000 +0200
+++ 2012-09-11/xen/arch/x86/msi.c 2012-09-13 08:43:21.000000000 +0200
@@ -13,6 +13,7 @@
#include<xen/delay.h>
#include<xen/sched.h>
#include<xen/acpi.h>
+#include<xen/cpu.h>
#include<xen/errno.h>
#include<xen/pci.h>
#include<xen/pci_regs.h>
@@ -32,8 +33,9 @@
#include<xsm/xsm.h>
/* bitmap indicate which fixed map is free */
-DEFINE_SPINLOCK(msix_fixmap_lock);
-DECLARE_BITMAP(msix_fixmap_pages, FIX_MSIX_MAX_PAGES);
+static DEFINE_SPINLOCK(msix_fixmap_lock);
+static DECLARE_BITMAP(msix_fixmap_pages, FIX_MSIX_MAX_PAGES);
+static DEFINE_PER_CPU(cpumask_var_t, scratch_mask);
static int msix_fixmap_alloc(void)
{
@@ -126,13 +128,17 @@ void msi_compose_msg(struct irq_desc *de
unsigned dest;
int vector = desc->arch.vector;
- if ( cpumask_empty(desc->arch.cpu_mask) ) {
+ memset(msg, 0, sizeof(*msg));
+ if ( !cpumask_intersects(desc->arch.cpu_mask,&cpu_online_map) ) {
dprintk(XENLOG_ERR,"%s, compose msi message error!!\n", __func__);
return;
}
if ( vector ) {
- dest = cpu_mask_to_apicid(desc->arch.cpu_mask);
+ cpumask_t *mask = this_cpu(scratch_mask);
+
+ cpumask_and(mask, desc->arch.cpu_mask,&cpu_online_map);
+ dest = cpu_mask_to_apicid(mask);
msg->address_hi = MSI_ADDR_BASE_HI;
msg->address_lo =
@@ -281,23 +287,27 @@ static void set_msi_affinity(struct irq_
write_msi_msg(msi_desc,&msg);
}
+void __msi_set_enable(u16 seg, u8 bus, u8 slot, u8 func, int pos, int enable)
+{
+ u16 control = pci_conf_read16(seg, bus, slot, func, pos + PCI_MSI_FLAGS);
+
+ control&= ~PCI_MSI_FLAGS_ENABLE;
+ if ( enable )
+ control |= PCI_MSI_FLAGS_ENABLE;
+ pci_conf_write16(seg, bus, slot, func, pos + PCI_MSI_FLAGS, control);
+}
+
static void msi_set_enable(struct pci_dev *dev, int enable)
{
int pos;
- u16 control, seg = dev->seg;
+ u16 seg = dev->seg;
u8 bus = dev->bus;
u8 slot = PCI_SLOT(dev->devfn);
u8 func = PCI_FUNC(dev->devfn);
pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI);
if ( pos )
- {
- control = pci_conf_read16(seg, bus, slot, func, pos + PCI_MSI_FLAGS);
- control&= ~PCI_MSI_FLAGS_ENABLE;
- if ( enable )
- control |= PCI_MSI_FLAGS_ENABLE;
- pci_conf_write16(seg, bus, slot, func, pos + PCI_MSI_FLAGS, control);
- }
+ __msi_set_enable(seg, bus, slot, func, pos, enable);
}
static void msix_set_enable(struct pci_dev *dev, int enable)
@@ -379,12 +389,12 @@ static int msi_get_mask_bit(const struct
return -1;
}
-static void mask_msi_irq(struct irq_desc *desc)
+void mask_msi_irq(struct irq_desc *desc)
{
msi_set_mask_bit(desc, 1);
}
-static void unmask_msi_irq(struct irq_desc *desc)
+void unmask_msi_irq(struct irq_desc *desc)
{
msi_set_mask_bit(desc, 0);
}
@@ -395,7 +405,7 @@ static unsigned int startup_msi_irq(stru
return 0;
}
-static void ack_nonmaskable_msi_irq(struct irq_desc *desc)
+void ack_nonmaskable_msi_irq(struct irq_desc *desc)
{
irq_complete_move(desc);
move_native_irq(desc);
@@ -407,7 +417,7 @@ static void ack_maskable_msi_irq(struct
ack_APIC_irq(); /* ACKTYPE_NONE */
}
-static void end_nonmaskable_msi_irq(struct irq_desc *desc, u8 vector)
+void end_nonmaskable_msi_irq(struct irq_desc *desc, u8 vector)
{
ack_APIC_irq(); /* ACKTYPE_EOI */
}
@@ -1071,6 +1081,39 @@ unsigned int pci_msix_get_table_len(stru
return len;
}
+static int msi_cpu_callback(
+ struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned long)hcpu;
+ int rc = 0;
+
+ switch ( action )
+ {
+ case CPU_UP_PREPARE:
+ if ( !alloc_cpumask_var(&per_cpu(scratch_mask, cpu)) )
+ rc = -ENOMEM;
+ break;
+ case CPU_UP_CANCELED:
+ case CPU_DEAD:
+ free_cpumask_var(per_cpu(scratch_mask, cpu));
+ break;
+ default:
+ break;
+ }
+
+ return notifier_from_errno(rc);
+}
+
+static struct notifier_block msi_cpu_nfb = {
+ .notifier_call = msi_cpu_callback
+};
+
+void __init early_msi_init(void)
+{
+ register_cpu_notifier(&msi_cpu_nfb);
+ msi_cpu_callback(&msi_cpu_nfb, CPU_UP_PREPARE, NULL);
+}
+
static void dump_msi(unsigned char key)
{
unsigned int irq;
--- 2012-09-11.orig/xen/arch/x86/setup.c 2012-09-03 08:25:31.000000000
+0200
+++ 2012-09-11/xen/arch/x86/setup.c 2012-09-13 08:44:49.000000000 +0200
@@ -35,6 +35,7 @@
#include<asm/processor.h>
#include<asm/mpspec.h>
#include<asm/apic.h>
+#include<asm/msi.h>
#include<asm/desc.h>
#include<asm/paging.h>
#include<asm/e820.h>
@@ -1274,6 +1275,8 @@ void __init __start_xen(unsigned long mb
acpi_mmcfg_init();
#endif
+ early_msi_init();
+
iommu_setup(); /* setup iommu if available */
smp_prepare_cpus(max_cpus);
--- 2012-09-11.orig/xen/drivers/passthrough/amd/iommu_detect.c 2012-09-13
08:40:47.000000000 +0200
+++ 2012-09-11/xen/drivers/passthrough/amd/iommu_detect.c 2012-06-20
17:33:15.000000000 +0200
@@ -31,7 +31,6 @@ static int __init get_iommu_msi_capabili
u16 seg, u8 bus, u8 dev, u8 func, struct amd_iommu *iommu)
{
int pos;
- u16 control;
pos = pci_find_cap_offset(seg, bus, dev, func, PCI_CAP_ID_MSI);
@@ -41,9 +40,6 @@ static int __init get_iommu_msi_capabili
AMD_IOMMU_DEBUG("Found MSI capability block at %#x\n", pos);
iommu->msi_cap = pos;
- control = pci_conf_read16(seg, bus, dev, func,
- iommu->msi_cap + PCI_MSI_FLAGS);
- iommu->maskbit = control& PCI_MSI_FLAGS_MASKBIT;
return 0;
}
--- 2012-09-11.orig/xen/drivers/passthrough/amd/iommu_init.c 2012-06-13
08:39:38.000000000 +0200
+++ 2012-09-11/xen/drivers/passthrough/amd/iommu_init.c 2012-09-11
11:26:28.000000000 +0200
@@ -467,20 +467,9 @@ static void iommu_msi_set_affinity(struc
return;
}
- memset(&msg, 0, sizeof(msg));
- msg.data = MSI_DATA_VECTOR(desc->arch.vector)& 0xff;
- msg.data |= 1<< 14;
- msg.data |= (INT_DELIVERY_MODE != dest_LowestPrio) ?
- MSI_DATA_DELIVERY_FIXED:
- MSI_DATA_DELIVERY_LOWPRI;
-
- msg.address_hi =0;
- msg.address_lo = (MSI_ADDRESS_HEADER<< (MSI_ADDRESS_HEADER_SHIFT + 8));
- msg.address_lo |= INT_DEST_MODE ? MSI_ADDR_DESTMODE_LOGIC:
- MSI_ADDR_DESTMODE_PHYS;
- msg.address_lo |= (INT_DELIVERY_MODE != dest_LowestPrio) ?
- MSI_ADDR_REDIRECTION_CPU:
- MSI_ADDR_REDIRECTION_LOWPRI;
+ msi_compose_msg(desc,&msg);
+ /* Is this override really needed? */
+ msg.address_lo&= ~MSI_ADDR_DEST_ID_MASK;
msg.address_lo |= MSI_ADDR_DEST_ID(dest& 0xff);
pci_conf_write32(seg, bus, dev, func,
@@ -494,18 +483,8 @@ static void iommu_msi_set_affinity(struc
static void amd_iommu_msi_enable(struct amd_iommu *iommu, int flag)
{
- u16 control;
- int bus = PCI_BUS(iommu->bdf);
- int dev = PCI_SLOT(iommu->bdf);
- int func = PCI_FUNC(iommu->bdf);
-
- control = pci_conf_read16(iommu->seg, bus, dev, func,
- iommu->msi_cap + PCI_MSI_FLAGS);
- control&= ~PCI_MSI_FLAGS_ENABLE;
- if ( flag )
- control |= flag;
- pci_conf_write16(iommu->seg, bus, dev, func,
- iommu->msi_cap + PCI_MSI_FLAGS, control);
+ __msi_set_enable(iommu->seg, PCI_BUS(iommu->bdf), PCI_SLOT(iommu->bdf),
+ PCI_FUNC(iommu->bdf), iommu->msi_cap, flag);
}
static void iommu_msi_unmask(struct irq_desc *desc)
@@ -513,10 +492,6 @@ static void iommu_msi_unmask(struct irq_
unsigned long flags;
struct amd_iommu *iommu = desc->action->dev_id;
- /* FIXME: do not support mask bits at the moment */
- if ( iommu->maskbit )
- return;
-
spin_lock_irqsave(&iommu->lock, flags);
amd_iommu_msi_enable(iommu, IOMMU_CONTROL_ENABLED);
spin_unlock_irqrestore(&iommu->lock, flags);
@@ -529,10 +504,6 @@ static void iommu_msi_mask(struct irq_de
irq_complete_move(desc);
- /* FIXME: do not support mask bits at the moment */
- if ( iommu->maskbit )
- return;
-
spin_lock_irqsave(&iommu->lock, flags);
amd_iommu_msi_enable(iommu, IOMMU_CONTROL_DISABLED);
spin_unlock_irqrestore(&iommu->lock, flags);
@@ -562,6 +533,37 @@ static hw_irq_controller iommu_msi_type
.set_affinity = iommu_msi_set_affinity,
};
+static unsigned int iommu_maskable_msi_startup(struct irq_desc *desc)
+{
+ iommu_msi_unmask(desc);
+ unmask_msi_irq(desc);
+ return 0;
+}
+
+static void iommu_maskable_msi_shutdown(struct irq_desc *desc)
+{
+ mask_msi_irq(desc);
+ iommu_msi_mask(desc);
+}
+
+/*
+ * While the names may appear mismatched, we indeed want to use the non-
+ * maskable flavors here, as we want the ACK to be issued in ->end().
+ */
+#define iommu_maskable_msi_ack ack_nonmaskable_msi_irq
+#define iommu_maskable_msi_end end_nonmaskable_msi_irq
+
+static hw_irq_controller iommu_maskable_msi_type = {
+ .typename = "IOMMU-M-MSI",
+ .startup = iommu_maskable_msi_startup,
+ .shutdown = iommu_maskable_msi_shutdown,
+ .enable = unmask_msi_irq,
+ .disable = mask_msi_irq,
+ .ack = iommu_maskable_msi_ack,
+ .end = iommu_maskable_msi_end,
+ .set_affinity = iommu_msi_set_affinity,
+};
+
static void parse_event_log_entry(struct amd_iommu *iommu, u32 entry[])
{
u16 domain_id, device_id, bdf, cword, flags;
@@ -784,6 +786,7 @@ static void iommu_interrupt_handler(int
static int __init set_iommu_interrupt_handler(struct amd_iommu *iommu)
{
int irq, ret;
+ u16 control;
irq = create_irq(NUMA_NO_NODE);
if ( irq<= 0 )
@@ -792,7 +795,11 @@ static int __init set_iommu_interrupt_ha
return 0;
}
- irq_desc[irq].handler =&iommu_msi_type;
+ control = pci_conf_read16(iommu->seg, PCI_BUS(iommu->bdf),
+ PCI_SLOT(iommu->bdf), PCI_FUNC(iommu->bdf),
+ iommu->msi_cap + PCI_MSI_FLAGS);
+ irq_desc[irq].handler = control& PCI_MSI_FLAGS_MASKBIT ?
+&iommu_maskable_msi_type :&iommu_msi_type;
ret = request_irq(irq, iommu_interrupt_handler, 0, "amd_iommu", iommu);
if ( ret )
{
--- 2012-09-11.orig/xen/include/asm-x86/amd-iommu.h 2012-09-13
08:40:47.000000000 +0200
+++ 2012-09-11/xen/include/asm-x86/amd-iommu.h 2012-06-13 10:23:45.000000000
+0200
@@ -83,6 +83,7 @@ struct amd_iommu {
u16 seg;
u16 bdf;
u16 cap_offset;
+ u8 msi_cap;
iommu_cap_t cap;
u8 ht_flags;
@@ -101,9 +102,6 @@ struct amd_iommu {
uint64_t exclusion_base;
uint64_t exclusion_limit;
- int msi_cap;
- int maskbit;
-
int enabled;
int irq;
};
--- 2012-09-11.orig/xen/include/asm-x86/msi.h 2012-09-13 08:40:47.000000000
+0200
+++ 2012-09-11/xen/include/asm-x86/msi.h 2012-09-13 08:45:14.000000000
+0200
@@ -153,18 +153,6 @@ int msi_free_irq(struct msi_desc *entry)
/*
* MSI Defined Data Structures
*/
-#define MSI_ADDRESS_HEADER 0xfee
-#define MSI_ADDRESS_HEADER_SHIFT 12
-#define MSI_ADDRESS_HEADER_MASK 0xfff000
-#define MSI_ADDRESS_DEST_ID_MASK 0xfff0000f
-#define MSI_TARGET_CPU_MASK 0xff
-#define MSI_TARGET_CPU_SHIFT 12
-#define MSI_DELIVERY_MODE 0
-#define MSI_LEVEL_MODE 1 /* Edge always assert */
-#define MSI_TRIGGER_MODE 0 /* MSI is edge sensitive */
-#define MSI_PHYSICAL_MODE 0
-#define MSI_LOGICAL_MODE 1
-#define MSI_REDIRECTION_HINT_MODE 0
struct msg_data {
#if defined(__LITTLE_ENDIAN_BITFIELD)
@@ -212,6 +200,12 @@ struct msg_address {
__u32 hi_address;
} __attribute__ ((packed));
+void early_msi_init(void);
void msi_compose_msg(struct irq_desc *, struct msi_msg *);
+void __msi_set_enable(u16 seg, u8 bus, u8 slot, u8 func, int pos, int enable);
+void mask_msi_irq(struct irq_desc *);
+void unmask_msi_irq(struct irq_desc *);
+void ack_nonmaskable_msi_irq(struct irq_desc *);
+void end_nonmaskable_msi_irq(struct irq_desc *, u8 vector);
#endif /* __ASM_MSI_H */