[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] VT-d: per-iommu domain-id



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1260258690 0
# Node ID 66ff18dd385841b534efd24fe3a3e33abdbd98ad
# Parent  ab0d71f7f596048194c4389bd4fc8a25221ac380
VT-d: per-iommu domain-id

Currently, xen uses shared iommu domain-id across all the VT-d units
in the platform. The number of iommu domain-ids (NR_DID, e.g. 256)
supported by each VT-d unit is reported in Capability register. The
limitation of current implementation is it only can support at most
NR_DID domains with VT-d in the entire platform, even though the
platform can support N * NR_DID (where N is the number of VT-d
units). Imagine a platform with several SR_IOV NICs, and each NIC
supports 128 VFs. It possibly beyond the NR_DID.

This patch implements iommu domain-id management per iommu (VT-d
unit), hence solves above limitation. It removes the global domain-id
bitmap, instead use domain-id bitmap in struct iommu, and also involve
an array to map guest domain-id and iommu domain-id, which is used to
iommu domain-id when flush context cache or IOTLB. When a device is
assigned to a guest, choose an available iommu domain-id from the
device's iommu, and map guest domain id to the domain-id mapping
array. When a device is deassigned from a guest, clear the domain-id
bit in domain-id bitmap and clear the corresponding entry in domain-id
map array if there is no other devices under the same iommu owned by
the guest.

Signed-off-by: Weidong Han <weidong.han@xxxxxxxxx>
---
 xen/drivers/passthrough/vtd/iommu.c |  213 +++++++++++++++++++++++-------------
 xen/include/xen/hvm/iommu.h         |    1 
 xen/include/xen/iommu.h             |    2 
 3 files changed, 143 insertions(+), 73 deletions(-)

diff -r ab0d71f7f596 -r 66ff18dd3858 xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c       Tue Dec 08 07:49:54 2009 +0000
+++ b/xen/drivers/passthrough/vtd/iommu.c       Tue Dec 08 07:51:30 2009 +0000
@@ -38,46 +38,70 @@
 #include "extern.h"
 #include "vtd.h"
 
-#define domain_iommu_domid(d) ((d)->arch.hvm_domain.hvm_iommu.iommu_domid)
-
 int nr_iommus;
-static spinlock_t domid_bitmap_lock;    /* protect domain id bitmap */
-static int domid_bitmap_size;           /* domain id bitmap size in bits */
-static unsigned long *domid_bitmap;     /* iommu domain id bitmap */
 static bool_t rwbf_quirk;
 
 static void setup_dom0_devices(struct domain *d);
 static void setup_dom0_rmrr(struct domain *d);
 
+static int domain_iommu_domid(struct domain *d,
+                              struct iommu *iommu)
+{
+    unsigned long nr_dom, i;
+
+    nr_dom = cap_ndoms(iommu->cap);
+    i = find_first_bit(iommu->domid_bitmap, nr_dom);
+    while ( i < nr_dom )
+    {
+        if ( iommu->domid_map[i] == d->domain_id )
+            return i;
+
+        i = find_next_bit(iommu->domid_bitmap, nr_dom, i+1);
+    }
+
+    gdprintk(XENLOG_ERR VTDPREFIX,
+             "Cannot get valid iommu domid: domid=%d iommu->index=%d\n",
+             d->domain_id, iommu->index);
+    return -1;
+}
+
 #define DID_FIELD_WIDTH 16
 #define DID_HIGH_OFFSET 8
-static void context_set_domain_id(struct context_entry *context,
-                                  struct domain *d)
-{
-    domid_t iommu_domid = domain_iommu_domid(d);
-
-    if ( iommu_domid == 0 )
-    {
-        spin_lock(&domid_bitmap_lock);
-        iommu_domid = find_first_zero_bit(domid_bitmap, domid_bitmap_size);
-        set_bit(iommu_domid, domid_bitmap);
-        spin_unlock(&domid_bitmap_lock);
-        d->arch.hvm_domain.hvm_iommu.iommu_domid = iommu_domid;
-    }
-
-    context->hi &= (1 << DID_HIGH_OFFSET) - 1;
-    context->hi |= iommu_domid << DID_HIGH_OFFSET;
-}
-
-static void iommu_domid_release(struct domain *d)
-{
-    domid_t iommu_domid = domain_iommu_domid(d);
-
-    if ( iommu_domid != 0 )
-    {
-        d->arch.hvm_domain.hvm_iommu.iommu_domid = 0;
-        clear_bit(iommu_domid, domid_bitmap);
-    }
+static int context_set_domain_id(struct context_entry *context,
+                                 struct domain *d,
+                                 struct iommu *iommu)
+{
+    unsigned long nr_dom, i;
+    int found = 0;
+
+    ASSERT(spin_is_locked(&iommu->lock));
+
+    nr_dom = cap_ndoms(iommu->cap);
+    i = find_first_bit(iommu->domid_bitmap, nr_dom);
+    while ( i < nr_dom )
+    {
+        if ( iommu->domid_map[i] == d->domain_id )
+        {
+            found = 1;
+            break;
+        }
+        i = find_next_bit(iommu->domid_bitmap, nr_dom, i+1);
+    }
+
+    if ( found == 0 )
+    {
+        i = find_first_zero_bit(iommu->domid_bitmap, nr_dom);
+        if ( i >= nr_dom )
+        {
+            gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no free domain ids\n");
+            return -EFAULT;
+        }
+        iommu->domid_map[i] = d->domain_id;
+    }
+
+    set_bit(i, iommu->domid_bitmap);
+    context->hi |= (i & ((1 << DID_FIELD_WIDTH) - 1)) << DID_HIGH_OFFSET;
+    return 0;
 }
 
 static struct intel_iommu *alloc_intel_iommu(void)
@@ -526,6 +550,7 @@ static void dma_pte_clear_one(struct dom
     struct dma_pte *page = NULL, *pte = NULL;
     u64 pg_maddr;
     int flush_dev_iotlb;
+    int iommu_domid;
 
     spin_lock(&hd->mapping_lock);
     /* get last level pte */
@@ -557,7 +582,10 @@ static void dma_pte_clear_one(struct dom
         if ( test_bit(iommu->index, &hd->iommu_bitmap) )
         {
             flush_dev_iotlb = find_ats_dev_drhd(iommu) ? 1 : 0;
-            if ( iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
+            iommu_domid= domain_iommu_domid(domain, iommu);
+            if ( iommu_domid == -1 )
+                continue;
+            if ( iommu_flush_iotlb_psi(iommu, iommu_domid,
                                        addr, 1, 0, flush_dev_iotlb) )
                 iommu_flush_write_buffer(iommu);
         }
@@ -982,7 +1010,7 @@ static int iommu_alloc(struct acpi_drhd_
 static int iommu_alloc(struct acpi_drhd_unit *drhd)
 {
     struct iommu *iommu;
-    unsigned long sagaw;
+    unsigned long sagaw, nr_dom;
     int agaw;
 
     if ( nr_iommus > MAX_IOMMUS )
@@ -1033,6 +1061,25 @@ static int iommu_alloc(struct acpi_drhd_
     if ( !ecap_coherent(iommu->ecap) )
         iommus_incoherent = 1;
 
+    /* allocate domain id bitmap */
+    nr_dom = cap_ndoms(iommu->cap);
+    iommu->domid_bitmap = xmalloc_array(unsigned long, BITS_TO_LONGS(nr_dom));
+    if ( !iommu->domid_bitmap )
+        return -ENOMEM ;
+    memset(iommu->domid_bitmap, 0, nr_dom / 8);
+
+    /*
+     * if Caching mode is set, then invalid translations are tagged with
+     * domain id 0, Hence reserve bit 0 for it
+     */
+    if ( cap_caching_mode(iommu->cap) )
+        set_bit(0, iommu->domid_bitmap);
+
+    iommu->domid_map = xmalloc_array(u16, nr_dom);
+    if ( !iommu->domid_map )
+        return -ENOMEM ;
+    memset(iommu->domid_map, 0, nr_dom * sizeof(*iommu->domid_map));
+
     spin_lock_init(&iommu->lock);
     spin_lock_init(&iommu->register_lock);
 
@@ -1055,6 +1102,9 @@ static void iommu_free(struct acpi_drhd_
 
     if ( iommu->reg )
         iounmap(iommu->reg);
+
+    xfree(iommu->domid_bitmap);
+    xfree(iommu->domid_map);
 
     free_intel_iommu(iommu->intel);
     destroy_irq(iommu->irq);
@@ -1174,7 +1224,12 @@ static int domain_context_mapping_one(
         spin_unlock(&hd->mapping_lock);
     }
 
-    context_set_domain_id(context, domain);
+    if ( context_set_domain_id(context, domain, iommu) )
+    {
+        spin_unlock(&iommu->lock);
+        return -EFAULT;
+    }
+
     context_set_address_width(*context, agaw);
     context_set_fault_enable(*context);
     context_set_present(*context);
@@ -1292,6 +1347,10 @@ static int domain_context_unmap_one(
 {
     struct context_entry *context, *context_entries;
     u64 maddr;
+    int iommu_domid;
+    struct pci_dev *pdev;
+    struct acpi_drhd_unit *drhd;
+    int found = 0;
 
     ASSERT(spin_is_locked(&pcidevs_lock));
     spin_lock(&iommu->lock);
@@ -1311,14 +1370,50 @@ static int domain_context_unmap_one(
     context_clear_entry(*context);
     iommu_flush_cache_entry(context, sizeof(struct context_entry));
 
-    if ( iommu_flush_context_device(iommu, domain_iommu_domid(domain),
+    iommu_domid= domain_iommu_domid(domain, iommu);
+    if ( iommu_domid == -1 )
+    {
+        spin_unlock(&iommu->lock);
+        unmap_vtd_domain_page(context_entries);
+        return -EINVAL;
+    }
+
+    if ( iommu_flush_context_device(iommu, iommu_domid,
                                     (((u16)bus) << 8) | devfn,
                                     DMA_CCMD_MASK_NOBIT, 0) )
         iommu_flush_write_buffer(iommu);
     else
     {
         int flush_dev_iotlb = find_ats_dev_drhd(iommu) ? 1 : 0;
-        iommu_flush_iotlb_dsi(iommu, domain_iommu_domid(domain), 0, 
flush_dev_iotlb);
+        iommu_flush_iotlb_dsi(iommu, iommu_domid, 0, flush_dev_iotlb);
+    }
+
+
+    /*
+     * if no other devices under the same iommu owned by this domain,
+     * clear iommu in iommu_bitmap and clear domain_id in domid_bitmp
+     */
+    for_each_pdev ( domain, pdev )
+    {
+        if ( pdev->bus == bus && pdev->devfn == devfn )
+            continue;
+
+        drhd = acpi_find_matched_drhd_unit(pdev);
+        if ( drhd && drhd->iommu == iommu )
+        {
+            found = 1;
+            break;
+        }
+    }
+
+    if ( found == 0 )
+    {
+        struct hvm_iommu *hd = domain_hvm_iommu(domain);
+
+        clear_bit(iommu->index, &hd->iommu_bitmap);
+
+        clear_bit(iommu_domid, iommu->domid_bitmap);
+        iommu->domid_map[iommu_domid] = 0;
     }
 
     spin_unlock(&iommu->lock);
@@ -1397,11 +1492,8 @@ static int reassign_device_ownership(
     struct domain *target,
     u8 bus, u8 devfn)
 {
-    struct hvm_iommu *source_hd = domain_hvm_iommu(source);
     struct pci_dev *pdev;
-    struct acpi_drhd_unit *drhd;
-    struct iommu *pdev_iommu;
-    int ret, found = 0;
+    int ret;
 
     ASSERT(spin_is_locked(&pcidevs_lock));
     pdev = pci_get_pdev_by_domain(source, bus, devfn);
@@ -1409,10 +1501,9 @@ static int reassign_device_ownership(
     if (!pdev)
         return -ENODEV;
 
-    if ( (drhd = acpi_find_matched_drhd_unit(pdev)) == NULL )
-        return -ENODEV;
-    pdev_iommu = drhd->iommu;
-    domain_context_unmap(source, bus, devfn);
+    ret = domain_context_unmap(source, bus, devfn);
+    if ( ret )
+        return ret;
 
     ret = domain_context_mapping(target, bus, devfn);
     if ( ret )
@@ -1420,19 +1511,6 @@ static int reassign_device_ownership(
 
     list_move(&pdev->domain_list, &target->arch.pdev_list);
     pdev->domain = target;
-
-    for_each_pdev ( source, pdev )
-    {
-        drhd = acpi_find_matched_drhd_unit(pdev);
-        if ( drhd && drhd->iommu == pdev_iommu )
-        {
-            found = 1;
-            break;
-        }
-    }
-
-    if ( !found )
-        clear_bit(pdev_iommu->index, &source_hd->iommu_bitmap);
 
     return ret;
 }
@@ -1448,8 +1526,6 @@ void iommu_domain_teardown(struct domain
     iommu_free_pagetable(hd->pgd_maddr, agaw_to_level(hd->agaw));
     hd->pgd_maddr = 0;
     spin_unlock(&hd->mapping_lock);
-
-    iommu_domid_release(d);
 }
 
 static int intel_iommu_map_page(
@@ -1462,6 +1538,7 @@ static int intel_iommu_map_page(
     u64 pg_maddr;
     int pte_present;
     int flush_dev_iotlb;
+    int iommu_domid;
 
     /* do nothing if dom0 and iommu supports pass thru */
     if ( iommu_passthrough && (d->domain_id == 0) )
@@ -1501,7 +1578,10 @@ static int intel_iommu_map_page(
             continue;
 
         flush_dev_iotlb = find_ats_dev_drhd(iommu) ? 1 : 0;
-        if ( iommu_flush_iotlb_psi(iommu, domain_iommu_domid(d),
+        iommu_domid= domain_iommu_domid(d, iommu);
+        if ( iommu_domid == -1 )
+            continue;
+        if ( iommu_flush_iotlb_psi(iommu, iommu_domid,
                                    (paddr_t)gfn << PAGE_SHIFT_4K, 1,
                                    !pte_present, flush_dev_iotlb) )
             iommu_flush_write_buffer(iommu);
@@ -1780,7 +1860,6 @@ int intel_vtd_setup(void)
 
     platform_quirks();
 
-    spin_lock_init(&domid_bitmap_lock);
     clflush_size = get_cache_line_size();
 
     irq_to_iommu = xmalloc_array(struct iommu*, nr_irqs);
@@ -1827,16 +1906,6 @@ int intel_vtd_setup(void)
     P(iommu_qinval, "Queued Invalidation");
     P(iommu_intremap, "Interrupt Remapping");
 #undef P
-
-    /* Allocate domain id bitmap, and set bit 0 as reserved. */
-    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
-    domid_bitmap_size = cap_ndoms(drhd->iommu->cap);
-    domid_bitmap = xmalloc_array(unsigned long,
-                                 BITS_TO_LONGS(domid_bitmap_size));
-    if ( domid_bitmap == NULL )
-        goto error;
-    memset(domid_bitmap, 0, domid_bitmap_size / 8);
-    __set_bit(0, domid_bitmap);
 
     scan_pci_devices();
 
diff -r ab0d71f7f596 -r 66ff18dd3858 xen/include/xen/hvm/iommu.h
--- a/xen/include/xen/hvm/iommu.h       Tue Dec 08 07:49:54 2009 +0000
+++ b/xen/include/xen/hvm/iommu.h       Tue Dec 08 07:51:30 2009 +0000
@@ -34,7 +34,6 @@ struct hvm_iommu {
     spinlock_t mapping_lock;       /* io page table lock */
     int agaw;     /* adjusted guest address width, 0 is level 2 30-bit */
     struct list_head g2m_ioport_list;  /* guest to machine ioport mapping */
-    domid_t iommu_domid;           /* domain id stored in iommu */
     u64 iommu_bitmap;              /* bitmap of iommu(s) that the domain uses 
*/
 
     /* amd iommu support */
diff -r ab0d71f7f596 -r 66ff18dd3858 xen/include/xen/iommu.h
--- a/xen/include/xen/iommu.h   Tue Dec 08 07:49:54 2009 +0000
+++ b/xen/include/xen/iommu.h   Tue Dec 08 07:51:30 2009 +0000
@@ -55,6 +55,8 @@ struct iommu {
     u64 root_maddr; /* root entry machine address */
     int irq;
     struct intel_iommu *intel;
+    unsigned long *domid_bitmap;  /* domain id bitmap */
+    u16 *domid_map;               /* domain id mapping array */
 };
 
 int iommu_setup(void);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.