[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [xen master] IOMMU/x86: maintain a per-device pseudo domain ID
commit 97af062b89d52c0ecf7af254b53345c97d438e33 Author: Jan Beulich <jbeulich@xxxxxxxx> AuthorDate: Tue Apr 5 14:19:10 2022 +0200 Commit: Jan Beulich <jbeulich@xxxxxxxx> CommitDate: Tue Apr 5 14:19:10 2022 +0200 IOMMU/x86: maintain a per-device pseudo domain ID In order to subsequently enable per-device quarantine page tables, we'll need domain-ID-like identifiers to be inserted in the respective device (AMD) or context (Intel) table entries alongside the per-device page table root addresses. Make use of "real" domain IDs occupying only half of the value range coverable by domid_t. Note that in VT-d's iommu_alloc() I didn't want to introduce new memory leaks in case of error, but existing ones don't get plugged - that'll be the subject of a later change. The VT-d changes are slightly asymmetric, but this way we can avoid assigning pseudo domain IDs to devices which would never be mapped while still avoiding to add a new parameter to domain_context_unmap(). Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> Reviewed-by: Paul Durrant <paul@xxxxxxx> Reviewed-by: Kevin Tian <kevin.tian@xxxxxxxxx> Reviewed-by: Roger Pau Monné <roger.pau@xxxxxxxxxx> --- xen/arch/x86/include/asm/iommu.h | 4 ++ xen/arch/x86/include/asm/pci.h | 8 ++- xen/drivers/passthrough/amd/iommu.h | 1 + xen/drivers/passthrough/amd/iommu_detect.c | 8 +++ xen/drivers/passthrough/amd/pci_amd_iommu.c | 22 ++++++- xen/drivers/passthrough/pci.c | 11 +++- xen/drivers/passthrough/vtd/iommu.c | 90 ++++++++++++++++++++++++----- xen/drivers/passthrough/vtd/iommu.h | 1 + xen/drivers/passthrough/x86/iommu.c | 60 +++++++++++++++++++ 9 files changed, 184 insertions(+), 21 deletions(-) diff --git a/xen/arch/x86/include/asm/iommu.h b/xen/arch/x86/include/asm/iommu.h index 5060f97124..9ccf4f8bdd 100644 --- a/xen/arch/x86/include/asm/iommu.h +++ b/xen/arch/x86/include/asm/iommu.h @@ -140,6 +140,10 @@ static inline void iommu_sync_cache(const void *addr, unsigned int size) cache_writeback(addr, size); } +unsigned long *iommu_init_domid(domid_t reserve); +domid_t iommu_alloc_domid(unsigned long *map); +void iommu_free_domid(domid_t domid, unsigned long *map); + int __must_check iommu_free_pgtables(struct domain *d); struct domain_iommu; struct page_info *__must_check iommu_alloc_pgtable(struct domain_iommu *hd); diff --git a/xen/arch/x86/include/asm/pci.h b/xen/arch/x86/include/asm/pci.h index 443f25347d..f944017128 100644 --- a/xen/arch/x86/include/asm/pci.h +++ b/xen/arch/x86/include/asm/pci.h @@ -13,6 +13,12 @@ struct arch_pci_dev { vmask_t used_vectors; + /* + * These fields are (de)initialized under pcidevs-lock. Other uses of + * them don't race (de)initialization and hence don't strictly need any + * locking. + */ + domid_t pseudo_domid; }; int pci_conf_write_intercept(unsigned int seg, unsigned int bdf, @@ -36,6 +42,6 @@ static always_inline bool is_pci_passthrough_enabled(void) return true; } -static inline void arch_pci_init_pdev(struct pci_dev *pdev) {} +void arch_pci_init_pdev(struct pci_dev *pdev); #endif /* __X86_PCI_H__ */ diff --git a/xen/drivers/passthrough/amd/iommu.h b/xen/drivers/passthrough/amd/iommu.h index 722b92fd78..ec7e030273 100644 --- a/xen/drivers/passthrough/amd/iommu.h +++ b/xen/drivers/passthrough/amd/iommu.h @@ -96,6 +96,7 @@ struct amd_iommu { struct ring_buffer cmd_buffer; struct ring_buffer event_log; struct ring_buffer ppr_log; + unsigned long *domid_map; int exclusion_enable; int exclusion_allow_all; diff --git a/xen/drivers/passthrough/amd/iommu_detect.c b/xen/drivers/passthrough/amd/iommu_detect.c index f52e7b90e1..fd89475a8d 100644 --- a/xen/drivers/passthrough/amd/iommu_detect.c +++ b/xen/drivers/passthrough/amd/iommu_detect.c @@ -223,6 +223,11 @@ int __init amd_iommu_detect_one_acpi( if ( rt ) goto out; + iommu->domid_map = iommu_init_domid(DOMID_INVALID); + rt = -ENOMEM; + if ( !iommu->domid_map ) + goto out; + rt = pci_ro_device(iommu->seg, bus, PCI_DEVFN(dev, func)); if ( rt ) printk(XENLOG_ERR "Could not mark config space of %pp read-only (%d)\n", @@ -233,7 +238,10 @@ int __init amd_iommu_detect_one_acpi( out: if ( rt ) + { + xfree(iommu->domid_map); xfree(iommu); + } return rt; } diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c b/xen/drivers/passthrough/amd/pci_amd_iommu.c index 38d2e5e15e..c9a1cdddaa 100644 --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c @@ -539,6 +539,8 @@ static int cf_check amd_iommu_add_device(u8 devfn, struct pci_dev *pdev) struct amd_iommu *iommu; u16 bdf; struct ivrs_mappings *ivrs_mappings; + bool fresh_domid = false; + int ret; if ( !pdev->domain ) return -EINVAL; @@ -606,7 +608,22 @@ static int cf_check amd_iommu_add_device(u8 devfn, struct pci_dev *pdev) AMD_IOMMU_WARN("%pd: unity mapping failed for %pp\n", pdev->domain, &pdev->sbdf); - return amd_iommu_setup_domain_device(pdev->domain, iommu, devfn, pdev); + if ( iommu_quarantine && pdev->arch.pseudo_domid == DOMID_INVALID ) + { + pdev->arch.pseudo_domid = iommu_alloc_domid(iommu->domid_map); + if ( pdev->arch.pseudo_domid == DOMID_INVALID ) + return -ENOSPC; + fresh_domid = true; + } + + ret = amd_iommu_setup_domain_device(pdev->domain, iommu, devfn, pdev); + if ( ret && fresh_domid ) + { + iommu_free_domid(pdev->arch.pseudo_domid, iommu->domid_map); + pdev->arch.pseudo_domid = DOMID_INVALID; + } + + return ret; } static int cf_check amd_iommu_remove_device(u8 devfn, struct pci_dev *pdev) @@ -638,6 +655,9 @@ static int cf_check amd_iommu_remove_device(u8 devfn, struct pci_dev *pdev) AMD_IOMMU_WARN("%pd: unity unmapping failed for %pp\n", pdev->domain, &pdev->sbdf); + iommu_free_domid(pdev->arch.pseudo_domid, iommu->domid_map); + pdev->arch.pseudo_domid = DOMID_INVALID; + if ( amd_iommu_perdev_intremap && ivrs_mappings[bdf].dte_requestor_id == bdf && ivrs_mappings[bdf].intremap_table ) diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c index c6d99af5d4..b59c1b61b7 100644 --- a/xen/drivers/passthrough/pci.c +++ b/xen/drivers/passthrough/pci.c @@ -1342,9 +1342,14 @@ static int cf_check _dump_pci_devices(struct pci_seg *pseg, void *arg) list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list ) { - printk("%pp - %pd - node %-3d", - &pdev->sbdf, pdev->domain, - (pdev->node != NUMA_NO_NODE) ? pdev->node : -1); + printk("%pp - ", &pdev->sbdf); +#ifdef CONFIG_X86 + if ( pdev->domain == dom_io ) + printk("DomIO:%x", pdev->arch.pseudo_domid); + else +#endif + printk("%pd", pdev->domain); + printk(" - node %-3d", (pdev->node != NUMA_NO_NODE) ? pdev->node : -1); pdev_dump_msi(pdev); printk("\n"); } diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c index 2fd079a901..870bf465bc 100644 --- a/xen/drivers/passthrough/vtd/iommu.c +++ b/xen/drivers/passthrough/vtd/iommu.c @@ -22,6 +22,7 @@ #include <xen/sched.h> #include <xen/xmalloc.h> #include <xen/domain_page.h> +#include <xen/err.h> #include <xen/iocap.h> #include <xen/iommu.h> #include <xen/numa.h> @@ -1199,6 +1200,8 @@ int __init iommu_alloc(struct acpi_drhd_unit *drhd) { struct vtd_iommu *iommu; unsigned int sagaw, agaw = 0, nr_dom; + domid_t reserved_domid = DOMID_INVALID; + int rc; iommu = xzalloc(struct vtd_iommu); if ( iommu == NULL ) @@ -1269,7 +1272,7 @@ int __init iommu_alloc(struct acpi_drhd_unit *drhd) nr_dom = cap_ndoms(iommu->cap); - if ( nr_dom <= DOMID_MASK + cap_caching_mode(iommu->cap) ) + if ( nr_dom <= DOMID_MASK * 2 + cap_caching_mode(iommu->cap) ) { /* Allocate domain id (bit) maps. */ iommu->domid_bitmap = xzalloc_array(unsigned long, @@ -1293,9 +1296,24 @@ int __init iommu_alloc(struct acpi_drhd_unit *drhd) /* Don't leave dangling NULL pointers. */ iommu->domid_bitmap = ZERO_BLOCK_PTR; iommu->domid_map = ZERO_BLOCK_PTR; + + /* + * If Caching mode is set, then invalid translations are tagged + * with domain id 0. Hence reserve the ID taking up bit/slot 0. + */ + reserved_domid = convert_domid(iommu, 0) ?: DOMID_INVALID; } + iommu->pseudo_domid_map = iommu_init_domid(reserved_domid); + rc = -ENOMEM; + if ( !iommu->pseudo_domid_map ) + goto free; + return 0; + + free: + iommu_free(drhd); + return rc; } void __init iommu_free(struct acpi_drhd_unit *drhd) @@ -1318,6 +1336,7 @@ void __init iommu_free(struct acpi_drhd_unit *drhd) xfree(iommu->domid_bitmap); xfree(iommu->domid_map); + xfree(iommu->pseudo_domid_map); if ( iommu->msi.irq >= 0 ) destroy_irq(iommu->msi.irq); @@ -1588,8 +1607,8 @@ int domain_context_mapping_one( return rc ?: pdev && prev_dom; } -static int domain_context_unmap(struct domain *d, uint8_t devfn, - struct pci_dev *pdev); +static const struct acpi_drhd_unit *domain_context_unmap( + struct domain *d, uint8_t devfn, struct pci_dev *pdev); static int domain_context_mapping(struct domain *domain, u8 devfn, struct pci_dev *pdev) @@ -1597,6 +1616,7 @@ static int domain_context_mapping(struct domain *domain, u8 devfn, const struct acpi_drhd_unit *drhd = acpi_find_matched_drhd_unit(pdev); const struct acpi_rmrr_unit *rmrr; paddr_t pgd_maddr = dom_iommu(domain)->arch.vtd.pgd_maddr; + domid_t orig_domid = pdev->arch.pseudo_domid; int ret = 0; unsigned int i, mode = 0; uint16_t seg = pdev->seg, bdf; @@ -1655,6 +1675,14 @@ static int domain_context_mapping(struct domain *domain, u8 devfn, if ( !drhd ) return -ENODEV; + if ( iommu_quarantine && orig_domid == DOMID_INVALID ) + { + pdev->arch.pseudo_domid = + iommu_alloc_domid(drhd->iommu->pseudo_domid_map); + if ( pdev->arch.pseudo_domid == DOMID_INVALID ) + return -ENOSPC; + } + if ( iommu_debug ) printk(VTDPREFIX "%pd:PCIe: map %pp\n", domain, &PCI_SBDF3(seg, bus, devfn)); @@ -1672,6 +1700,14 @@ static int domain_context_mapping(struct domain *domain, u8 devfn, if ( !drhd ) return -ENODEV; + if ( iommu_quarantine && orig_domid == DOMID_INVALID ) + { + pdev->arch.pseudo_domid = + iommu_alloc_domid(drhd->iommu->pseudo_domid_map); + if ( pdev->arch.pseudo_domid == DOMID_INVALID ) + return -ENOSPC; + } + if ( iommu_debug ) printk(VTDPREFIX "%pd:PCI: map %pp\n", domain, &PCI_SBDF3(seg, bus, devfn)); @@ -1745,6 +1781,13 @@ static int domain_context_mapping(struct domain *domain, u8 devfn, if ( !ret && devfn == pdev->devfn ) pci_vtd_quirk(pdev); + if ( ret && drhd && orig_domid == DOMID_INVALID ) + { + iommu_free_domid(pdev->arch.pseudo_domid, + drhd->iommu->pseudo_domid_map); + pdev->arch.pseudo_domid = DOMID_INVALID; + } + return ret; } @@ -1830,8 +1873,10 @@ int domain_context_unmap_one( return rc; } -static int domain_context_unmap(struct domain *domain, u8 devfn, - struct pci_dev *pdev) +static const struct acpi_drhd_unit *domain_context_unmap( + struct domain *domain, + uint8_t devfn, + struct pci_dev *pdev) { const struct acpi_drhd_unit *drhd = acpi_find_matched_drhd_unit(pdev); struct vtd_iommu *iommu = drhd ? drhd->iommu : NULL; @@ -1845,16 +1890,16 @@ static int domain_context_unmap(struct domain *domain, u8 devfn, if ( iommu_debug ) printk(VTDPREFIX "%pd:Hostbridge: skip %pp unmap\n", domain, &PCI_SBDF3(seg, bus, devfn)); - return is_hardware_domain(domain) ? 0 : -EPERM; + return ERR_PTR(is_hardware_domain(domain) ? 0 : -EPERM); case DEV_TYPE_PCIe_BRIDGE: case DEV_TYPE_PCIe2PCI_BRIDGE: case DEV_TYPE_LEGACY_PCI_BRIDGE: - return 0; + return ERR_PTR(0); case DEV_TYPE_PCIe_ENDPOINT: if ( !iommu ) - return -ENODEV; + return ERR_PTR(-ENODEV); if ( iommu_debug ) printk(VTDPREFIX "%pd:PCIe: unmap %pp\n", @@ -1868,7 +1913,7 @@ static int domain_context_unmap(struct domain *domain, u8 devfn, case DEV_TYPE_PCI: if ( !iommu ) - return -ENODEV; + return ERR_PTR(-ENODEV); if ( iommu_debug ) printk(VTDPREFIX "%pd:PCI: unmap %pp\n", @@ -1915,14 +1960,14 @@ static int domain_context_unmap(struct domain *domain, u8 devfn, default: dprintk(XENLOG_ERR VTDPREFIX, "%pd:unknown(%u): %pp\n", domain, pdev->type, &PCI_SBDF3(seg, bus, devfn)); - return -EINVAL; + return ERR_PTR(-EINVAL); } if ( !ret && pdev->devfn == devfn && !QUARANTINE_SKIP(domain, dom_iommu(domain)->arch.vtd.pgd_maddr) ) check_cleanup_domid_map(domain, pdev, iommu); - return ret; + return drhd; } static void cf_check iommu_clear_root_pgtable(struct domain *d) @@ -2149,16 +2194,17 @@ static int cf_check intel_iommu_enable_device(struct pci_dev *pdev) static int cf_check intel_iommu_remove_device(u8 devfn, struct pci_dev *pdev) { + const struct acpi_drhd_unit *drhd; struct acpi_rmrr_unit *rmrr; u16 bdf; - int ret, i; + unsigned int i; if ( !pdev->domain ) return -EINVAL; - ret = domain_context_unmap(pdev->domain, devfn, pdev); - if ( ret ) - return ret; + drhd = domain_context_unmap(pdev->domain, devfn, pdev); + if ( IS_ERR(drhd) ) + return PTR_ERR(drhd); for_each_rmrr_device ( rmrr, bdf, i ) { @@ -2175,6 +2221,13 @@ static int cf_check intel_iommu_remove_device(u8 devfn, struct pci_dev *pdev) rmrr->end_address, 0); } + if ( drhd ) + { + iommu_free_domid(pdev->arch.pseudo_domid, + drhd->iommu->pseudo_domid_map); + pdev->arch.pseudo_domid = DOMID_INVALID; + } + return 0; } @@ -2547,7 +2600,12 @@ static int cf_check reassign_device_ownership( } } else - ret = domain_context_unmap(source, devfn, pdev); + { + const struct acpi_drhd_unit *drhd; + + drhd = domain_context_unmap(source, devfn, pdev); + ret = IS_ERR(drhd) ? PTR_ERR(drhd) : 0; + } if ( ret ) { if ( !has_arch_pdevs(target) ) diff --git a/xen/drivers/passthrough/vtd/iommu.h b/xen/drivers/passthrough/vtd/iommu.h index 67e34dd875..3c76218f76 100644 --- a/xen/drivers/passthrough/vtd/iommu.h +++ b/xen/drivers/passthrough/vtd/iommu.h @@ -503,6 +503,7 @@ struct vtd_iommu { } flush; struct list_head ats_devices; + unsigned long *pseudo_domid_map; /* "pseudo" domain id bitmap */ unsigned long *domid_bitmap; /* domain id bitmap */ domid_t *domid_map; /* domain id mapping array */ uint32_t version; diff --git a/xen/drivers/passthrough/x86/iommu.c b/xen/drivers/passthrough/x86/iommu.c index 9c5fb6fa46..b942f2119e 100644 --- a/xen/drivers/passthrough/x86/iommu.c +++ b/xen/drivers/passthrough/x86/iommu.c @@ -391,6 +391,66 @@ void __hwdom_init arch_iommu_hwdom_init(struct domain *d) return; } +void arch_pci_init_pdev(struct pci_dev *pdev) +{ + pdev->arch.pseudo_domid = DOMID_INVALID; +} + +unsigned long *__init iommu_init_domid(domid_t reserve) +{ + unsigned long *map; + + if ( !iommu_quarantine ) + return ZERO_BLOCK_PTR; + + BUILD_BUG_ON(DOMID_MASK * 2U >= UINT16_MAX); + + map = xzalloc_array(unsigned long, BITS_TO_LONGS(UINT16_MAX - DOMID_MASK)); + if ( map && reserve != DOMID_INVALID ) + { + ASSERT(reserve > DOMID_MASK); + __set_bit(reserve & DOMID_MASK, map); + } + + return map; +} + +domid_t iommu_alloc_domid(unsigned long *map) +{ + /* + * This is used uniformly across all IOMMUs, such that on typical + * systems we wouldn't re-use the same ID very quickly (perhaps never). + */ + static unsigned int start; + unsigned int idx = find_next_zero_bit(map, UINT16_MAX - DOMID_MASK, start); + + ASSERT(pcidevs_locked()); + + if ( idx >= UINT16_MAX - DOMID_MASK ) + idx = find_first_zero_bit(map, UINT16_MAX - DOMID_MASK); + if ( idx >= UINT16_MAX - DOMID_MASK ) + return DOMID_INVALID; + + __set_bit(idx, map); + + start = idx + 1; + + return idx | (DOMID_MASK + 1); +} + +void iommu_free_domid(domid_t domid, unsigned long *map) +{ + ASSERT(pcidevs_locked()); + + if ( domid == DOMID_INVALID ) + return; + + ASSERT(domid > DOMID_MASK); + + if ( !__test_and_clear_bit(domid & DOMID_MASK, map) ) + BUG(); +} + int iommu_free_pgtables(struct domain *d) { struct domain_iommu *hd = dom_iommu(d); -- generated by git-patchbot for /home/xen/git/xen.git#master
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |