AMD IOMMU: only disable interrupt remapping when certain IVRS consistency checks fail After some more thought on the XSA-36 and specifically the comments we got regarding disabling the IOMMU in this situation altogether making things worse instead of better, I came to the conclusion that we can actually restrict the action in affected cases to just disabling interrupt remapping. That doesn't make the situation worse than prior to the XSA-36 fixes (where interrupt remapping didn't really protect domains from one another), but allows at least DMA isolation to still be utilized. In the course of this, the calls to the interrupt remapping related IOMMU implementation specific operations needed to be re-qualified from depending on iommu_enabled to iommu_intremap (as was already the case for x86's HPET code). That in turn required to make sure iommu_intremap gets properly cleared when the respective initialization fails (or isn't being done at all). Along with making sure interrupt remapping doesn't get inconsistently enabled on some IOMMUs and not on others in the VT-d code, this in turn allowed quite a bit of cleanup on the VT-d side (if desired, that cleanup could of course be broken out into a separate patch). Signed-off-by: Jan Beulich --- a/xen/arch/x86/msi.c +++ b/xen/arch/x86/msi.c @@ -207,7 +207,7 @@ static void read_msi_msg(struct msi_desc BUG(); } - if ( iommu_enabled ) + if ( iommu_intremap ) iommu_read_msi_from_ire(entry, msg); } @@ -215,7 +215,7 @@ static void write_msi_msg(struct msi_des { entry->msg = *msg; - if ( iommu_enabled ) + if ( iommu_intremap ) { ASSERT(msg != &entry->msg); iommu_update_ire_from_msi(entry, msg); @@ -489,7 +489,7 @@ int msi_free_irq(struct msi_desc *entry) } /* Free the unused IRTE if intr remap enabled */ - if ( iommu_enabled ) + if ( iommu_intremap ) iommu_update_ire_from_msi(entry, NULL); list_del(&entry->list); --- a/xen/drivers/passthrough/amd/iommu_acpi.c +++ b/xen/drivers/passthrough/amd/iommu_acpi.c @@ -682,10 +682,10 @@ static u16 __init parse_ivhd_device_spec printk(XENLOG_ERR "IVHD Error: Conflicting IO-APIC %#x entries\n", special->handle); if ( amd_iommu_perdev_intremap ) - return 0; + iommu_intremap = 0; } } - else + else if ( iommu_intremap ) { /* set device id of ioapic */ ioapic_sbdf[special->handle].bdf = bdf; @@ -697,7 +697,7 @@ static u16 __init parse_ivhd_device_spec !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup ) { printk(XENLOG_ERR "IVHD Error: Out of memory\n"); - return 0; + iommu_intremap = 0; } } break; @@ -706,7 +706,7 @@ static u16 __init parse_ivhd_device_spec { printk(XENLOG_ERR "IVHD Error: Invalid IO-APIC %#x\n", special->handle); - return 0; + iommu_intremap = 0; } break; case ACPI_IVHD_HPET: @@ -932,7 +932,7 @@ static int __init parse_ivrs_table(struc printk(XENLOG_ERR "IVHD Error: no information for IO-APIC %#x\n", IO_APIC_ID(apic)); if ( amd_iommu_perdev_intremap ) - error = -ENXIO; + iommu_intremap = 0; else { ioapic_sbdf[IO_APIC_ID(apic)].pin_setup = xzalloc_array( @@ -940,7 +940,7 @@ static int __init parse_ivrs_table(struc if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup ) { printk(XENLOG_ERR "IVHD Error: Out of memory\n"); - error = -ENOMEM; + iommu_intremap = 0; } } } --- a/xen/drivers/passthrough/amd/iommu_init.c +++ b/xen/drivers/passthrough/amd/iommu_init.c @@ -1157,7 +1157,7 @@ int __init amd_iommu_init(void) BUG_ON( !iommu_found() ); if ( amd_iommu_perdev_intremap && amd_sp5100_erratum28() ) - goto error_out; + iommu_intremap = 0; ivrs_bdf_entries = amd_iommu_get_ivrs_dev_entries(); @@ -1173,7 +1173,7 @@ int __init amd_iommu_init(void) goto error_out; /* initialize io-apic interrupt remapping entries */ - if ( amd_iommu_setup_ioapic_remapping() != 0 ) + if ( iommu_intremap && amd_iommu_setup_ioapic_remapping() != 0 ) goto error_out; /* allocate and initialize a global device table shared by all iommus */ --- a/xen/drivers/passthrough/iommu.c +++ b/xen/drivers/passthrough/iommu.c @@ -470,6 +470,8 @@ int __init iommu_setup(void) rc = iommu_hardware_setup(); iommu_enabled = (rc == 0); } + if ( !iommu_enabled ) + iommu_intremap = 0; if ( (force_iommu && !iommu_enabled) || (force_intremap && !iommu_intremap) ) @@ -487,6 +489,7 @@ int __init iommu_setup(void) printk(" - Dom0 mode: %s\n", iommu_passthrough ? "Passthrough" : iommu_dom0_strict ? "Strict" : "Relaxed"); + printk("Interrupt remapping %sabled\n", iommu_intremap ? "en" : "dis"); return rc; } --- a/xen/drivers/passthrough/vtd/intremap.c +++ b/xen/drivers/passthrough/vtd/intremap.c @@ -373,7 +373,7 @@ unsigned int io_apic_read_remap_rte( struct iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic)); struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu); - if ( !ir_ctrl || !ir_ctrl->iremap_maddr || !ir_ctrl->iremap_num || + if ( !ir_ctrl->iremap_num || ( (index = apic_pin_2_ir_idx[apic][ioapic_pin]) < 0 ) ) return __io_apic_read(apic, reg); @@ -396,15 +396,8 @@ void io_apic_write_remap_rte( struct IO_APIC_route_remap_entry *remap_rte; unsigned int rte_upper = (reg & 1) ? 1 : 0; struct iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic)); - struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu); int saved_mask; - if ( !ir_ctrl || !ir_ctrl->iremap_maddr ) - { - __io_apic_write(apic, reg, value); - return; - } - old_rte = __ioapic_read_entry(apic, ioapic_pin, 1); remap_rte = (struct IO_APIC_route_remap_entry *) &old_rte; @@ -653,20 +646,11 @@ void msi_msg_read_remap_rte( { struct pci_dev *pdev = msi_desc->dev; struct acpi_drhd_unit *drhd = NULL; - struct iommu *iommu = NULL; - struct ir_ctrl *ir_ctrl; drhd = pdev ? acpi_find_matched_drhd_unit(pdev) : hpet_to_drhd(msi_desc->hpet_id); - if ( !drhd ) - return; - iommu = drhd->iommu; - - ir_ctrl = iommu_ir_ctrl(iommu); - if ( !ir_ctrl || !ir_ctrl->iremap_maddr ) - return; - - remap_entry_to_msi_msg(iommu, msg); + if ( drhd ) + remap_entry_to_msi_msg(drhd->iommu, msg); } void msi_msg_write_remap_rte( @@ -674,20 +658,11 @@ void msi_msg_write_remap_rte( { struct pci_dev *pdev = msi_desc->dev; struct acpi_drhd_unit *drhd = NULL; - struct iommu *iommu = NULL; - struct ir_ctrl *ir_ctrl; drhd = pdev ? acpi_find_matched_drhd_unit(pdev) : hpet_to_drhd(msi_desc->hpet_id); - if ( !drhd ) - return; - iommu = drhd->iommu; - - ir_ctrl = iommu_ir_ctrl(iommu); - if ( !ir_ctrl || !ir_ctrl->iremap_maddr ) - return; - - msi_msg_to_remap_entry(iommu, pdev, msi_desc, msg); + if ( drhd ) + msi_msg_to_remap_entry(drhd->iommu, pdev, msi_desc, msg); } int __init intel_setup_hpet_msi(struct msi_desc *msi_desc) --- a/xen/drivers/passthrough/vtd/iommu.c +++ b/xen/drivers/passthrough/vtd/iommu.c @@ -2065,6 +2065,9 @@ static int init_vtd_hw(void) break; } } + if ( !iommu_intremap ) + for_each_drhd_unit ( drhd ) + disable_intremap(drhd->iommu); } /* --- a/xen/include/asm-x86/io_apic.h +++ b/xen/include/asm-x86/io_apic.h @@ -129,7 +129,7 @@ struct IO_APIC_route_entry { extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; /* Only need to remap ioapic RTE (reg: 10~3Fh) */ -#define ioapic_reg_remapped(reg) (iommu_enabled && ((reg) >= 0x10)) +#define ioapic_reg_remapped(reg) (iommu_intremap && ((reg) >= 0x10)) static inline unsigned int __io_apic_read(unsigned int apic, unsigned int reg) {