|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [xen staging-4.16] IOMMU/x86: maintain a per-device pseudo domain ID
commit 3e65372436fb6bbfdc59e4175d4a41b398000e0a
Author: Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Tue Apr 5 14:48:29 2022 +0200
Commit: Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Tue Apr 5 14:48:29 2022 +0200
IOMMU/x86: maintain a per-device pseudo domain ID
In order to subsequently enable per-device quarantine page tables, we'll
need domain-ID-like identifiers to be inserted in the respective device
(AMD) or context (Intel) table entries alongside the per-device page
table root addresses.
Make use of "real" domain IDs occupying only half of the value range
coverable by domid_t.
Note that in VT-d's iommu_alloc() I didn't want to introduce new memory
leaks in case of error, but existing ones don't get plugged - that'll be
the subject of a later change.
The VT-d changes are slightly asymmetric, but this way we can avoid
assigning pseudo domain IDs to devices which would never be mapped while
still avoiding to add a new parameter to domain_context_unmap().
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Reviewed-by: Paul Durrant <paul@xxxxxxx>
Reviewed-by: Kevin Tian <kevin.tian@xxxxxxxxx>
Reviewed-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
master commit: 97af062b89d52c0ecf7af254b53345c97d438e33
master date: 2022-04-05 14:19:10 +0200
---
xen/drivers/passthrough/amd/iommu.h | 1 +
xen/drivers/passthrough/amd/iommu_detect.c | 8 +++
xen/drivers/passthrough/amd/pci_amd_iommu.c | 22 +++++++-
xen/drivers/passthrough/pci.c | 11 ++--
xen/drivers/passthrough/vtd/iommu.c | 82 +++++++++++++++++++++++------
xen/drivers/passthrough/vtd/iommu.h | 1 +
xen/drivers/passthrough/x86/iommu.c | 52 ++++++++++++++++++
xen/include/asm-x86/iommu.h | 4 ++
xen/include/asm-x86/pci.h | 8 ++-
9 files changed, 168 insertions(+), 21 deletions(-)
diff --git a/xen/drivers/passthrough/amd/iommu.h
b/xen/drivers/passthrough/amd/iommu.h
index 2e1bd85e7c..5ae86cdec8 100644
--- a/xen/drivers/passthrough/amd/iommu.h
+++ b/xen/drivers/passthrough/amd/iommu.h
@@ -96,6 +96,7 @@ struct amd_iommu {
struct ring_buffer cmd_buffer;
struct ring_buffer event_log;
struct ring_buffer ppr_log;
+ unsigned long *domid_map;
int exclusion_enable;
int exclusion_allow_all;
diff --git a/xen/drivers/passthrough/amd/iommu_detect.c
b/xen/drivers/passthrough/amd/iommu_detect.c
index f52e7b90e1..58bc6d01b3 100644
--- a/xen/drivers/passthrough/amd/iommu_detect.c
+++ b/xen/drivers/passthrough/amd/iommu_detect.c
@@ -223,6 +223,11 @@ int __init amd_iommu_detect_one_acpi(
if ( rt )
goto out;
+ iommu->domid_map = iommu_init_domid();
+ rt = -ENOMEM;
+ if ( !iommu->domid_map )
+ goto out;
+
rt = pci_ro_device(iommu->seg, bus, PCI_DEVFN(dev, func));
if ( rt )
printk(XENLOG_ERR "Could not mark config space of %pp read-only
(%d)\n",
@@ -233,7 +238,10 @@ int __init amd_iommu_detect_one_acpi(
out:
if ( rt )
+ {
+ xfree(iommu->domid_map);
xfree(iommu);
+ }
return rt;
}
diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c
b/xen/drivers/passthrough/amd/pci_amd_iommu.c
index ac6a0ed199..f170a69a45 100644
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -539,6 +539,8 @@ static int amd_iommu_add_device(u8 devfn, struct pci_dev
*pdev)
struct amd_iommu *iommu;
u16 bdf;
struct ivrs_mappings *ivrs_mappings;
+ bool fresh_domid = false;
+ int ret;
if ( !pdev->domain )
return -EINVAL;
@@ -606,7 +608,22 @@ static int amd_iommu_add_device(u8 devfn, struct pci_dev
*pdev)
AMD_IOMMU_WARN("%pd: unity mapping failed for %pp\n",
pdev->domain, &pdev->sbdf);
- return amd_iommu_setup_domain_device(pdev->domain, iommu, devfn, pdev);
+ if ( iommu_quarantine && pdev->arch.pseudo_domid == DOMID_INVALID )
+ {
+ pdev->arch.pseudo_domid = iommu_alloc_domid(iommu->domid_map);
+ if ( pdev->arch.pseudo_domid == DOMID_INVALID )
+ return -ENOSPC;
+ fresh_domid = true;
+ }
+
+ ret = amd_iommu_setup_domain_device(pdev->domain, iommu, devfn, pdev);
+ if ( ret && fresh_domid )
+ {
+ iommu_free_domid(pdev->arch.pseudo_domid, iommu->domid_map);
+ pdev->arch.pseudo_domid = DOMID_INVALID;
+ }
+
+ return ret;
}
static int amd_iommu_remove_device(u8 devfn, struct pci_dev *pdev)
@@ -638,6 +655,9 @@ static int amd_iommu_remove_device(u8 devfn, struct pci_dev
*pdev)
AMD_IOMMU_WARN("%pd: unity unmapping failed for %pp\n",
pdev->domain, &pdev->sbdf);
+ iommu_free_domid(pdev->arch.pseudo_domid, iommu->domid_map);
+ pdev->arch.pseudo_domid = DOMID_INVALID;
+
if ( amd_iommu_perdev_intremap &&
ivrs_mappings[bdf].dte_requestor_id == bdf &&
ivrs_mappings[bdf].intremap_table )
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 0d8ab2e716..dd1c0ea329 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -1271,9 +1271,14 @@ static int _dump_pci_devices(struct pci_seg *pseg, void
*arg)
list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
{
- printk("%pp - %pd - node %-3d",
- &pdev->sbdf, pdev->domain,
- (pdev->node != NUMA_NO_NODE) ? pdev->node : -1);
+ printk("%pp - ", &pdev->sbdf);
+#ifdef CONFIG_X86
+ if ( pdev->domain == dom_io )
+ printk("DomIO:%x", pdev->arch.pseudo_domid);
+ else
+#endif
+ printk("%pd", pdev->domain);
+ printk(" - node %-3d", (pdev->node != NUMA_NO_NODE) ? pdev->node : -1);
pdev_dump_msi(pdev);
printk("\n");
}
diff --git a/xen/drivers/passthrough/vtd/iommu.c
b/xen/drivers/passthrough/vtd/iommu.c
index 988465523d..a38fc1a64f 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -22,6 +22,7 @@
#include <xen/sched.h>
#include <xen/xmalloc.h>
#include <xen/domain_page.h>
+#include <xen/err.h>
#include <xen/iocap.h>
#include <xen/iommu.h>
#include <xen/numa.h>
@@ -1215,7 +1216,7 @@ int __init iommu_alloc(struct acpi_drhd_unit *drhd)
{
struct vtd_iommu *iommu;
unsigned long sagaw, nr_dom;
- int agaw;
+ int agaw, rc;
iommu = xzalloc(struct vtd_iommu);
if ( iommu == NULL )
@@ -1301,7 +1302,16 @@ int __init iommu_alloc(struct acpi_drhd_unit *drhd)
if ( !iommu->domid_map )
return -ENOMEM;
+ iommu->pseudo_domid_map = iommu_init_domid();
+ rc = -ENOMEM;
+ if ( !iommu->pseudo_domid_map )
+ goto free;
+
return 0;
+
+ free:
+ iommu_free(drhd);
+ return rc;
}
void __init iommu_free(struct acpi_drhd_unit *drhd)
@@ -1324,6 +1334,7 @@ void __init iommu_free(struct acpi_drhd_unit *drhd)
xfree(iommu->domid_bitmap);
xfree(iommu->domid_map);
+ xfree(iommu->pseudo_domid_map);
if ( iommu->msi.irq >= 0 )
destroy_irq(iommu->msi.irq);
@@ -1593,8 +1604,8 @@ int domain_context_mapping_one(
return rc ?: pdev && prev_dom;
}
-static int domain_context_unmap(struct domain *d, uint8_t devfn,
- struct pci_dev *pdev);
+static const struct acpi_drhd_unit *domain_context_unmap(
+ struct domain *d, uint8_t devfn, struct pci_dev *pdev);
static int domain_context_mapping(struct domain *domain, u8 devfn,
struct pci_dev *pdev)
@@ -1602,6 +1613,7 @@ static int domain_context_mapping(struct domain *domain,
u8 devfn,
const struct acpi_drhd_unit *drhd = acpi_find_matched_drhd_unit(pdev);
const struct acpi_rmrr_unit *rmrr;
paddr_t pgd_maddr = dom_iommu(domain)->arch.vtd.pgd_maddr;
+ domid_t orig_domid = pdev->arch.pseudo_domid;
int ret = 0;
unsigned int i, mode = 0;
uint16_t seg = pdev->seg, bdf;
@@ -1660,6 +1672,14 @@ static int domain_context_mapping(struct domain *domain,
u8 devfn,
if ( !drhd )
return -ENODEV;
+ if ( iommu_quarantine && orig_domid == DOMID_INVALID )
+ {
+ pdev->arch.pseudo_domid =
+ iommu_alloc_domid(drhd->iommu->pseudo_domid_map);
+ if ( pdev->arch.pseudo_domid == DOMID_INVALID )
+ return -ENOSPC;
+ }
+
if ( iommu_debug )
printk(VTDPREFIX "%pd:PCIe: map %pp\n",
domain, &PCI_SBDF3(seg, bus, devfn));
@@ -1677,6 +1697,14 @@ static int domain_context_mapping(struct domain *domain,
u8 devfn,
if ( !drhd )
return -ENODEV;
+ if ( iommu_quarantine && orig_domid == DOMID_INVALID )
+ {
+ pdev->arch.pseudo_domid =
+ iommu_alloc_domid(drhd->iommu->pseudo_domid_map);
+ if ( pdev->arch.pseudo_domid == DOMID_INVALID )
+ return -ENOSPC;
+ }
+
if ( iommu_debug )
printk(VTDPREFIX "%pd:PCI: map %pp\n",
domain, &PCI_SBDF3(seg, bus, devfn));
@@ -1750,6 +1778,13 @@ static int domain_context_mapping(struct domain *domain,
u8 devfn,
if ( !ret && devfn == pdev->devfn )
pci_vtd_quirk(pdev);
+ if ( ret && drhd && orig_domid == DOMID_INVALID )
+ {
+ iommu_free_domid(pdev->arch.pseudo_domid,
+ drhd->iommu->pseudo_domid_map);
+ pdev->arch.pseudo_domid = DOMID_INVALID;
+ }
+
return ret;
}
@@ -1835,8 +1870,10 @@ int domain_context_unmap_one(
return rc;
}
-static int domain_context_unmap(struct domain *domain, u8 devfn,
- struct pci_dev *pdev)
+static const struct acpi_drhd_unit *domain_context_unmap(
+ struct domain *domain,
+ uint8_t devfn,
+ struct pci_dev *pdev)
{
const struct acpi_drhd_unit *drhd = acpi_find_matched_drhd_unit(pdev);
struct vtd_iommu *iommu = drhd ? drhd->iommu : NULL;
@@ -1850,16 +1887,16 @@ static int domain_context_unmap(struct domain *domain,
u8 devfn,
if ( iommu_debug )
printk(VTDPREFIX "%pd:Hostbridge: skip %pp unmap\n",
domain, &PCI_SBDF3(seg, bus, devfn));
- return is_hardware_domain(domain) ? 0 : -EPERM;
+ return ERR_PTR(is_hardware_domain(domain) ? 0 : -EPERM);
case DEV_TYPE_PCIe_BRIDGE:
case DEV_TYPE_PCIe2PCI_BRIDGE:
case DEV_TYPE_LEGACY_PCI_BRIDGE:
- return 0;
+ return ERR_PTR(0);
case DEV_TYPE_PCIe_ENDPOINT:
if ( !iommu )
- return -ENODEV;
+ return ERR_PTR(-ENODEV);
if ( iommu_debug )
printk(VTDPREFIX "%pd:PCIe: unmap %pp\n",
@@ -1873,7 +1910,7 @@ static int domain_context_unmap(struct domain *domain, u8
devfn,
case DEV_TYPE_PCI:
if ( !iommu )
- return -ENODEV;
+ return ERR_PTR(-ENODEV);
if ( iommu_debug )
printk(VTDPREFIX "%pd:PCI: unmap %pp\n",
@@ -1920,14 +1957,14 @@ static int domain_context_unmap(struct domain *domain,
u8 devfn,
default:
dprintk(XENLOG_ERR VTDPREFIX, "%pd:unknown(%u): %pp\n",
domain, pdev->type, &PCI_SBDF3(seg, bus, devfn));
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
}
if ( !ret && pdev->devfn == devfn &&
!QUARANTINE_SKIP(domain, dom_iommu(domain)->arch.vtd.pgd_maddr) )
check_cleanup_domid_map(domain, pdev, iommu);
- return ret;
+ return drhd;
}
static void iommu_clear_root_pgtable(struct domain *d)
@@ -2154,16 +2191,17 @@ static int intel_iommu_enable_device(struct pci_dev
*pdev)
static int intel_iommu_remove_device(u8 devfn, struct pci_dev *pdev)
{
+ const struct acpi_drhd_unit *drhd;
struct acpi_rmrr_unit *rmrr;
u16 bdf;
- int ret, i;
+ unsigned int i;
if ( !pdev->domain )
return -EINVAL;
- ret = domain_context_unmap(pdev->domain, devfn, pdev);
- if ( ret )
- return ret;
+ drhd = domain_context_unmap(pdev->domain, devfn, pdev);
+ if ( IS_ERR(drhd) )
+ return PTR_ERR(drhd);
for_each_rmrr_device ( rmrr, bdf, i )
{
@@ -2180,6 +2218,13 @@ static int intel_iommu_remove_device(u8 devfn, struct
pci_dev *pdev)
rmrr->end_address, 0);
}
+ if ( drhd )
+ {
+ iommu_free_domid(pdev->arch.pseudo_domid,
+ drhd->iommu->pseudo_domid_map);
+ pdev->arch.pseudo_domid = DOMID_INVALID;
+ }
+
return 0;
}
@@ -2556,7 +2601,12 @@ static int reassign_device_ownership(
}
}
else
- ret = domain_context_unmap(source, devfn, pdev);
+ {
+ const struct acpi_drhd_unit *drhd;
+
+ drhd = domain_context_unmap(source, devfn, pdev);
+ ret = IS_ERR(drhd) ? PTR_ERR(drhd) : 0;
+ }
if ( ret )
{
if ( !has_arch_pdevs(target) )
diff --git a/xen/drivers/passthrough/vtd/iommu.h
b/xen/drivers/passthrough/vtd/iommu.h
index 2e4d39cc61..025895c7d0 100644
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -508,6 +508,7 @@ struct vtd_iommu {
} flush;
struct list_head ats_devices;
+ unsigned long *pseudo_domid_map; /* "pseudo" domain id bitmap */
unsigned long *domid_bitmap; /* domain id bitmap */
u16 *domid_map; /* domain id mapping array */
uint32_t version;
diff --git a/xen/drivers/passthrough/x86/iommu.c
b/xen/drivers/passthrough/x86/iommu.c
index 295d853003..a36a6bd4b2 100644
--- a/xen/drivers/passthrough/x86/iommu.c
+++ b/xen/drivers/passthrough/x86/iommu.c
@@ -387,6 +387,58 @@ void __hwdom_init arch_iommu_hwdom_init(struct domain *d)
return;
}
+void arch_pci_init_pdev(struct pci_dev *pdev)
+{
+ pdev->arch.pseudo_domid = DOMID_INVALID;
+}
+
+unsigned long *__init iommu_init_domid(void)
+{
+ if ( !iommu_quarantine )
+ return ZERO_BLOCK_PTR;
+
+ BUILD_BUG_ON(DOMID_MASK * 2U >= UINT16_MAX);
+
+ return xzalloc_array(unsigned long,
+ BITS_TO_LONGS(UINT16_MAX - DOMID_MASK));
+}
+
+domid_t iommu_alloc_domid(unsigned long *map)
+{
+ /*
+ * This is used uniformly across all IOMMUs, such that on typical
+ * systems we wouldn't re-use the same ID very quickly (perhaps never).
+ */
+ static unsigned int start;
+ unsigned int idx = find_next_zero_bit(map, UINT16_MAX - DOMID_MASK, start);
+
+ ASSERT(pcidevs_locked());
+
+ if ( idx >= UINT16_MAX - DOMID_MASK )
+ idx = find_first_zero_bit(map, UINT16_MAX - DOMID_MASK);
+ if ( idx >= UINT16_MAX - DOMID_MASK )
+ return DOMID_INVALID;
+
+ __set_bit(idx, map);
+
+ start = idx + 1;
+
+ return idx | (DOMID_MASK + 1);
+}
+
+void iommu_free_domid(domid_t domid, unsigned long *map)
+{
+ ASSERT(pcidevs_locked());
+
+ if ( domid == DOMID_INVALID )
+ return;
+
+ ASSERT(domid > DOMID_MASK);
+
+ if ( !__test_and_clear_bit(domid & DOMID_MASK, map) )
+ BUG();
+}
+
int iommu_free_pgtables(struct domain *d)
{
struct domain_iommu *hd = dom_iommu(d);
diff --git a/xen/include/asm-x86/iommu.h b/xen/include/asm-x86/iommu.h
index cb794fe1ef..7d6dc5186a 100644
--- a/xen/include/asm-x86/iommu.h
+++ b/xen/include/asm-x86/iommu.h
@@ -141,6 +141,10 @@ int pi_update_irte(const struct pi_desc *pi_desc, const
struct pirq *pirq,
iommu_vcall(ops, sync_cache, addr, size); \
})
+unsigned long *iommu_init_domid(void);
+domid_t iommu_alloc_domid(unsigned long *map);
+void iommu_free_domid(domid_t domid, unsigned long *map);
+
int __must_check iommu_free_pgtables(struct domain *d);
struct domain_iommu;
struct page_info *__must_check iommu_alloc_pgtable(struct domain_iommu *hd);
diff --git a/xen/include/asm-x86/pci.h b/xen/include/asm-x86/pci.h
index 443f25347d..f944017128 100644
--- a/xen/include/asm-x86/pci.h
+++ b/xen/include/asm-x86/pci.h
@@ -13,6 +13,12 @@
struct arch_pci_dev {
vmask_t used_vectors;
+ /*
+ * These fields are (de)initialized under pcidevs-lock. Other uses of
+ * them don't race (de)initialization and hence don't strictly need any
+ * locking.
+ */
+ domid_t pseudo_domid;
};
int pci_conf_write_intercept(unsigned int seg, unsigned int bdf,
@@ -36,6 +42,6 @@ static always_inline bool is_pci_passthrough_enabled(void)
return true;
}
-static inline void arch_pci_init_pdev(struct pci_dev *pdev) {}
+void arch_pci_init_pdev(struct pci_dev *pdev);
#endif /* __X86_PCI_H__ */
--
generated by git-patchbot for /home/xen/git/xen.git#staging-4.16
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |