VT-d: fix Device-TLB flush timeout issue From: Quan Xu If Device-TLB flush timed out, we hide the target ATS device immediately. By hiding the device, we make sure it can't be assigned to any domain any longer (see device_assigned). Signed-off-by: Quan Xu Signed-off-by: Jan Beulich --- v15: Re-base on heavily changed patch 1. Call disable_ats_device() and domain_crash() from iommu_dev_iotlb_flush_timeout() and move the function to passthrough/pci.c to fix the ARM build. As a result of calling disable_ats_device() also use list_for_each_entry_safe() in dev_invalidate_iotlb(). --- a/xen/drivers/passthrough/pci.c +++ b/xen/drivers/passthrough/pci.c @@ -32,6 +32,7 @@ #include #include #include +#include "ats.h" struct pci_seg { struct list_head alldevs_list; @@ -1504,6 +1505,34 @@ static int iommu_get_device_group( return i; } +void iommu_dev_iotlb_flush_timeout(struct domain *d, struct pci_dev *pdev) +{ + pcidevs_lock(); + + disable_ats_device(pdev); + + ASSERT(pdev->domain); + if ( d != pdev->domain ) + { + pcidevs_unlock(); + return; + } + + list_del(&pdev->domain_list); + pdev->domain = NULL; + _pci_hide_device(pdev); + + if ( !d->is_shutting_down && printk_ratelimit() ) + printk(XENLOG_ERR + "dom%d: ATS device %04x:%02x:%02x.%u flush failed\n", + d->domain_id, pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn)); + if ( !is_hardware_domain(d) ) + domain_crash(d); + + pcidevs_unlock(); +} + int iommu_do_pci_domctl( struct xen_domctl *domctl, struct domain *d, XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) --- a/xen/drivers/passthrough/vtd/extern.h +++ b/xen/drivers/passthrough/vtd/extern.h @@ -25,6 +25,7 @@ #define VTDPREFIX "[VT-D]" +struct pci_ats_dev; extern bool_t rwbf_quirk; void print_iommu_regs(struct acpi_drhd_unit *drhd); @@ -60,8 +61,8 @@ int dev_invalidate_iotlb(struct iommu *i u64 addr, unsigned int size_order, u64 type); int __must_check qinval_device_iotlb_sync(struct iommu *iommu, - u32 max_invs_pend, - u16 sid, u16 size, u64 addr); + struct pci_dev *pdev, + u16 did, u16 size, u64 addr); unsigned int get_cache_line_size(void); void cacheline_flush(char *); --- a/xen/drivers/passthrough/vtd/qinval.c +++ b/xen/drivers/passthrough/vtd/qinval.c @@ -27,11 +27,11 @@ #include "dmar.h" #include "vtd.h" #include "extern.h" +#include "../ats.h" #define VTD_QI_TIMEOUT 1 -static int __must_check invalidate_sync(struct iommu *iommu, - bool_t flush_dev_iotlb); +static int __must_check invalidate_sync(struct iommu *iommu); static void print_qi_regs(struct iommu *iommu) { @@ -103,7 +103,7 @@ static int __must_check queue_invalidate unmap_vtd_domain_page(qinval_entries); - return invalidate_sync(iommu, 0); + return invalidate_sync(iommu); } static int __must_check queue_invalidate_iotlb_sync(struct iommu *iommu, @@ -140,7 +140,7 @@ static int __must_check queue_invalidate qinval_update_qtail(iommu, index); spin_unlock_irqrestore(&iommu->register_lock, flags); - return invalidate_sync(iommu, 0); + return invalidate_sync(iommu); } static int __must_check queue_invalidate_wait(struct iommu *iommu, @@ -199,25 +199,53 @@ static int __must_check queue_invalidate return -EOPNOTSUPP; } -static int __must_check invalidate_sync(struct iommu *iommu, - bool_t flush_dev_iotlb) +static int __must_check invalidate_sync(struct iommu *iommu) { struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu); ASSERT(qi_ctrl->qinval_maddr); - return queue_invalidate_wait(iommu, 0, 1, 1, flush_dev_iotlb); + return queue_invalidate_wait(iommu, 0, 1, 1, 0); } -int qinval_device_iotlb_sync(struct iommu *iommu, - u32 max_invs_pend, - u16 sid, u16 size, u64 addr) +static int __must_check dev_invalidate_sync(struct iommu *iommu, + struct pci_dev *pdev, u16 did) +{ + struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu); + int rc; + + ASSERT(qi_ctrl->qinval_maddr); + rc = queue_invalidate_wait(iommu, 0, 1, 1, 1); + if ( rc == -ETIMEDOUT ) + { + struct domain *d = NULL; + + if ( test_bit(did, iommu->domid_bitmap) ) + d = rcu_lock_domain_by_id(iommu->domid_map[did]); + + /* + * In case the domain has been freed or the IOMMU domid bitmap is + * not valid, the device no longer belongs to this domain. + */ + if ( d == NULL ) + return rc; + + iommu_dev_iotlb_flush_timeout(d, pdev); + rcu_unlock_domain(d); + } + + return rc; +} + +int qinval_device_iotlb_sync(struct iommu *iommu, struct pci_dev *pdev, + u16 did, u16 size, u64 addr) { unsigned long flags; unsigned int index; u64 entry_base; struct qinval_entry *qinval_entry, *qinval_entries; + ASSERT(pdev); spin_lock_irqsave(&iommu->register_lock, flags); index = qinval_next_index(iommu); entry_base = iommu_qi_ctrl(iommu)->qinval_maddr + @@ -227,9 +255,9 @@ int qinval_device_iotlb_sync(struct iomm qinval_entry->q.dev_iotlb_inv_dsc.lo.type = TYPE_INVAL_DEVICE_IOTLB; qinval_entry->q.dev_iotlb_inv_dsc.lo.res_1 = 0; - qinval_entry->q.dev_iotlb_inv_dsc.lo.max_invs_pend = max_invs_pend; + qinval_entry->q.dev_iotlb_inv_dsc.lo.max_invs_pend = pdev->ats.queue_depth; qinval_entry->q.dev_iotlb_inv_dsc.lo.res_2 = 0; - qinval_entry->q.dev_iotlb_inv_dsc.lo.sid = sid; + qinval_entry->q.dev_iotlb_inv_dsc.lo.sid = PCI_BDF2(pdev->bus, pdev->devfn); qinval_entry->q.dev_iotlb_inv_dsc.lo.res_3 = 0; qinval_entry->q.dev_iotlb_inv_dsc.hi.size = size; @@ -240,7 +268,7 @@ int qinval_device_iotlb_sync(struct iomm qinval_update_qtail(iommu, index); spin_unlock_irqrestore(&iommu->register_lock, flags); - return invalidate_sync(iommu, 1); + return dev_invalidate_sync(iommu, pdev, did); } static int __must_check queue_invalidate_iec_sync(struct iommu *iommu, @@ -271,7 +299,7 @@ static int __must_check queue_invalidate qinval_update_qtail(iommu, index); spin_unlock_irqrestore(&iommu->register_lock, flags); - ret = invalidate_sync(iommu, 0); + ret = invalidate_sync(iommu); /* * reading vt-d architecture register will ensure --- a/xen/drivers/passthrough/vtd/x86/ats.c +++ b/xen/drivers/passthrough/vtd/x86/ats.c @@ -109,15 +109,14 @@ out: int dev_invalidate_iotlb(struct iommu *iommu, u16 did, u64 addr, unsigned int size_order, u64 type) { - const struct pci_dev *pdev; + struct pci_dev *pdev, *temp; int ret = 0; if ( !ecap_dev_iotlb(iommu->ecap) ) return ret; - list_for_each_entry( pdev, &iommu->ats_devices, ats.list ) + list_for_each_entry_safe( pdev, temp, &iommu->ats_devices, ats.list ) { - u16 sid = PCI_BDF2(pdev->bus, pdev->devfn); bool_t sbit; int rc = 0; @@ -131,8 +130,7 @@ int dev_invalidate_iotlb(struct iommu *i /* invalidate all translations: sbit=1,bit_63=0,bit[62:12]=1 */ sbit = 1; addr = (~0UL << PAGE_SHIFT_4K) & 0x7FFFFFFFFFFFFFFF; - rc = qinval_device_iotlb_sync(iommu, pdev->ats.queue_depth, - sid, sbit, addr); + rc = qinval_device_iotlb_sync(iommu, pdev, did, sbit, addr); break; case DMA_TLB_PSI_FLUSH: if ( !device_in_domain(iommu, pdev, did) ) @@ -151,8 +149,7 @@ int dev_invalidate_iotlb(struct iommu *i addr |= (((u64)1 << (size_order - 1)) - 1) << PAGE_SHIFT_4K; } - rc = qinval_device_iotlb_sync(iommu, pdev->ats.queue_depth, - sid, sbit, addr); + rc = qinval_device_iotlb_sync(iommu, pdev, did, sbit, addr); break; default: dprintk(XENLOG_WARNING VTDPREFIX, "invalid vt-d flush type\n"); --- a/xen/include/xen/iommu.h +++ b/xen/include/xen/iommu.h @@ -207,6 +207,8 @@ int __must_check iommu_iotlb_flush(struc unsigned int page_count); int __must_check iommu_iotlb_flush_all(struct domain *d); +void iommu_dev_iotlb_flush_timeout(struct domain *d, struct pci_dev *pdev); + /* * The purpose of the iommu_dont_flush_iotlb optional cpu flag is to * avoid unecessary iotlb_flush in the low level IOMMU code.