[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] iommu: Handle sibling device assignment correctly



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1211982083 -3600
# Node ID b60cf40fae135a38688892efee522d470abdf95b
# Parent  c2fab221b3ece1ecfc0cb2777655e181925fe340
iommu: Handle sibling device assignment correctly

Domctl interface is extended to allow libxc retrieve device group
information from hypervisor. Vendor-specific iommu_ops is also
extended by adding a new operation "get_device_group_id()", which is
currently a null pointer but could be implemented later for vt-d.

Error will be raised from tools side when user trying to assign PCI
device with a sibling device being driven by dom0. User will keep
being prompted until he has hidden the entire device group (at least,
the sibling devices driven by dom0) in dom0 kernel
parameter. Hopefully this framework could be flexible enough to
support both amd iommu and vt-d.

The following 2 cases are not covered by this patch, but should be
easy to handle.
* Checking for hot-plug devices (maybe we can delay calling
ImageHandler.signalDeviceModel() until all checks are done?)
* Checking for splitted device group between different passthru
domains

Signed-off-by: Wei Wang <wei.wang2@xxxxxxx>
---
 tools/libxc/xc_domain.c                     |   31 +++++++++++
 tools/libxc/xenctrl.h                       |    7 ++
 tools/python/xen/lowlevel/xc/xc.c           |   73 ++++++++++++++++++++++++++++
 tools/python/xen/xend/server/pciif.py       |   35 +++++++++++++
 xen/arch/x86/domctl.c                       |   39 ++++++++++++++
 xen/drivers/passthrough/amd/pci_amd_iommu.c |   11 ++++
 xen/drivers/passthrough/iommu.c             |   37 +++++++++++++-
 xen/drivers/passthrough/vtd/iommu.c         |    1 
 xen/include/public/domctl.h                 |   11 ++++
 xen/include/xen/iommu.h                     |    3 +
 10 files changed, 247 insertions(+), 1 deletion(-)

diff -r c2fab221b3ec -r b60cf40fae13 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c   Wed May 28 12:22:36 2008 +0100
+++ b/tools/libxc/xc_domain.c   Wed May 28 14:41:23 2008 +0100
@@ -767,6 +767,37 @@ int xc_assign_device(
     return do_domctl(xc_handle, &domctl);
 }
 
+int xc_get_device_group(
+    int xc_handle,
+    uint32_t domid,
+    uint32_t machine_bdf,
+    uint32_t max_sdevs,
+    uint32_t *num_sdevs,
+    uint32_t *sdev_array)
+{
+    int rc;
+    DECLARE_DOMCTL;
+
+    domctl.cmd = XEN_DOMCTL_get_device_group;
+    domctl.domain = (domid_t)domid;
+
+    domctl.u.get_device_group.machine_bdf = machine_bdf;
+    domctl.u.get_device_group.max_sdevs = max_sdevs;
+
+    set_xen_guest_handle(domctl.u.get_device_group.sdev_array, sdev_array);
+
+    if ( lock_pages(sdev_array, max_sdevs * sizeof(*sdev_array)) != 0 )
+    {
+        PERROR("Could not lock memory for xc_get_device_group\n");
+        return -ENOMEM;
+    }
+    rc = do_domctl(xc_handle, &domctl);
+    unlock_pages(sdev_array, max_sdevs * sizeof(*sdev_array));
+
+    *num_sdevs = domctl.u.get_device_group.num_sdevs;
+    return rc;
+}
+
 int xc_test_assign_device(
     int xc_handle,
     uint32_t domid,
diff -r c2fab221b3ec -r b60cf40fae13 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Wed May 28 12:22:36 2008 +0100
+++ b/tools/libxc/xenctrl.h     Wed May 28 14:41:23 2008 +0100
@@ -955,6 +955,13 @@ int xc_assign_device(int xc_handle,
                      uint32_t domid,
                      uint32_t machine_bdf);
 
+int xc_get_device_group(int xc_handle,
+                     uint32_t domid,
+                     uint32_t machine_bdf,
+                     uint32_t max_sdevs,
+                     uint32_t *num_sdevs,
+                     uint32_t *sdev_array);
+
 int xc_test_assign_device(int xc_handle,
                           uint32_t domid,
                           uint32_t machine_bdf);
diff -r c2fab221b3ec -r b60cf40fae13 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Wed May 28 12:22:36 2008 +0100
+++ b/tools/python/xen/lowlevel/xc/xc.c Wed May 28 14:41:23 2008 +0100
@@ -646,6 +646,68 @@ static PyObject *pyxc_deassign_device(Xc
     return Py_BuildValue("i", bdf);
 }
 
+static PyObject *pyxc_get_device_group(XcObject *self,
+                                         PyObject *args)
+{
+    domid_t domid;
+    uint32_t bdf = 0;
+    uint32_t max_sdevs, num_sdevs;
+    int seg, bus, dev, func, rc, i;
+    PyObject *Pystr;
+    char *group_str;
+    char dev_str[9];
+    uint32_t *sdev_array;
+
+    if ( !PyArg_ParseTuple(args, "iiiii", &domid, &seg, &bus, &dev, &func) )
+        return NULL;
+
+    /* Maximum allowed siblings device number per group */
+    max_sdevs = 1024;
+
+    if ( (sdev_array = malloc(max_sdevs * sizeof(*sdev_array))) == NULL )
+        return PyErr_NoMemory();
+    memset(sdev_array, 0, max_sdevs * sizeof(*sdev_array));
+
+    bdf |= (bus & 0xff) << 16;
+    bdf |= (dev & 0x1f) << 11;
+    bdf |= (func & 0x7) << 8;
+
+    rc = xc_get_device_group(self->xc_handle,
+        domid, bdf, max_sdevs, &num_sdevs, sdev_array);
+
+    if ( rc < 0 )
+    {
+      free(sdev_array); 
+      return pyxc_error_to_exception();
+    }
+
+    if ( !num_sdevs )
+    {
+       free(sdev_array);
+       return Py_BuildValue("s", "");
+    }
+
+    if ( (group_str = malloc(num_sdevs * sizeof(dev_str))) == NULL )
+        return PyErr_NoMemory();
+    memset(group_str, '\0', num_sdevs * sizeof(dev_str));
+
+    for ( i = 0; i < num_sdevs; i++ )
+    {
+        bus = (sdev_array[i] >> 16) & 0xff;
+        dev = (sdev_array[i] >> 11) & 0x1f;
+        func = (sdev_array[i] >> 8) & 0x7;
+        sprintf(dev_str, "%02x:%02x.%x,", bus, dev, func);
+        strcat(group_str, dev_str);
+    }
+
+    Pystr = Py_BuildValue("s", group_str);
+
+    free(sdev_array);
+    free(group_str);
+
+    return Pystr;
+}
+
 #ifdef __ia64__
 static PyObject *pyxc_nvram_init(XcObject *self,
                                  PyObject *args)
@@ -1583,6 +1645,17 @@ static PyMethodDef pyxc_methods[] = {
       " param   [int]:      No. of HVM param.\n"
       " value   [long]:     Value of param.\n"
       "Returns: [int] 0 on success.\n" },
+
+    { "get_device_group",
+      (PyCFunction)pyxc_get_device_group,
+      METH_VARARGS, "\n"
+      "get sibling devices infomation.\n"
+      " dom     [int]:      Domain to assign device to.\n"
+      " seg     [int]:      PCI segment.\n"
+      " bus     [int]:      PCI bus.\n"
+      " dev     [int]:      PCI dev.\n"
+      " func    [int]:      PCI func.\n"
+      "Returns: [string]:   Sibling devices \n" },
 
      { "test_assign_device",
        (PyCFunction)pyxc_test_assign_device,
diff -r c2fab221b3ec -r b60cf40fae13 tools/python/xen/xend/server/pciif.py
--- a/tools/python/xen/xend/server/pciif.py     Wed May 28 12:22:36 2008 +0100
+++ b/tools/python/xen/xend/server/pciif.py     Wed May 28 14:41:23 2008 +0100
@@ -226,6 +226,39 @@ class PciController(DevController):
 
         return sxpr    
 
+    def CheckSiblingDevices(self, domid, dev):
+        """ Check if all sibling devices of dev are owned by pciback
+        """
+        if not self.vm.info.is_hvm():
+            return
+
+        group_str = xc.get_device_group(domid, dev.domain, dev.bus, dev.slot, 
dev.func)
+        if group_str == "":
+            return
+
+        #group string format xx:xx.x,xx:xx.x,
+        devstr_len = group_str.find(',')
+        for i in range(0, len(group_str), devstr_len + 1):
+            (bus, slotfunc) = group_str[i:i + devstr_len].split(':')
+            (slot, func) = slotfunc.split('.')
+            b = parse_hex(bus)
+            d = parse_hex(slot)
+            f = parse_hex(func)
+            try:
+                sdev = PciDevice(dev.domain, b, d, f)
+            except Exception, e:
+                #no dom0 drivers bound to sdev
+                continue
+
+            if sdev.driver!='pciback':
+                raise VmError(("pci: PCI Backend does not own\n "+ \
+                    "sibling device %s of device %s\n"+ \
+                    "See the pciback.hide kernel "+ \
+                    "command-line parameter or\n"+ \
+                    "bind your slot/device to the PCI backend using sysfs" \
+                    )%(sdev.name, dev.name))
+        return
+
     def setupOneDevice(self, domain, bus, slot, func):
         """ Attach I/O resources for device to frontend domain
         """
@@ -244,6 +277,8 @@ class PciController(DevController):
                     "command-line parameter or\n"+ \
                     "bind your slot/device to the PCI backend using sysfs" \
                     )%(dev.name))
+
+        self.CheckSiblingDevices(fe_domid, dev)
 
         PCIQuirk(dev.vendor, dev.device, dev.subvendor, dev.subdevice, domain, 
                 bus, slot, func)
diff -r c2fab221b3ec -r b60cf40fae13 xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c     Wed May 28 12:22:36 2008 +0100
+++ b/xen/arch/x86/domctl.c     Wed May 28 14:41:23 2008 +0100
@@ -526,6 +526,45 @@ long arch_do_domctl(
     }
     break;
 
+    case XEN_DOMCTL_get_device_group:
+    {
+        struct domain *d;
+        u32 max_sdevs;
+        u8 bus, devfn;
+        XEN_GUEST_HANDLE_64(uint32) sdevs;
+        int num_sdevs;
+
+        ret = -ENOSYS;
+        if ( !iommu_enabled )
+            break;
+
+        ret = -EINVAL;
+        if ( (d = rcu_lock_domain_by_id(domctl->domain)) == NULL )
+            break;
+
+        bus = (domctl->u.get_device_group.machine_bdf >> 16) & 0xff;
+        devfn = (domctl->u.get_device_group.machine_bdf >> 8) & 0xff;
+        max_sdevs = domctl->u.get_device_group.max_sdevs;
+        sdevs = domctl->u.get_device_group.sdev_array;
+
+        num_sdevs = iommu_get_device_group(d, bus, devfn, sdevs, max_sdevs);
+        if ( num_sdevs < 0 )
+        {
+            dprintk(XENLOG_ERR, "iommu_get_device_group() failed!\n");
+            ret = -EFAULT;
+            domctl->u.get_device_group.num_sdevs = 0;
+        }
+        else
+        {
+            ret = 0;
+            domctl->u.get_device_group.num_sdevs = num_sdevs;
+        }
+        if ( copy_to_guest(u_domctl, domctl, 1) )
+            ret = -EFAULT;
+        rcu_unlock_domain(d);
+    }
+    break;
+
     case XEN_DOMCTL_test_assign_device:
     {
         u8 bus, devfn;
diff -r c2fab221b3ec -r b60cf40fae13 xen/drivers/passthrough/amd/pci_amd_iommu.c
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c       Wed May 28 12:22:36 
2008 +0100
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c       Wed May 28 14:41:23 
2008 +0100
@@ -635,6 +635,16 @@ static void amd_iommu_return_device(
     reassign_device(s, t, bus, devfn);
 }
 
+static int amd_iommu_group_id(u8 bus, u8 devfn)
+{
+    int rt;
+    int bdf = (bus << 8) | devfn;
+    rt = ( bdf < ivrs_bdf_entries ) ?
+        ivrs_mappings[bdf].dte_requestor_id :
+        bdf;
+    return rt;
+}
+
 struct iommu_ops amd_iommu_ops = {
     .init = amd_iommu_domain_init,
     .assign_device  = amd_iommu_assign_device,
@@ -642,4 +652,5 @@ struct iommu_ops amd_iommu_ops = {
     .map_page = amd_iommu_map_page,
     .unmap_page = amd_iommu_unmap_page,
     .reassign_device = amd_iommu_return_device,
+    .get_device_group_id = amd_iommu_group_id,
 };
diff -r c2fab221b3ec -r b60cf40fae13 xen/drivers/passthrough/iommu.c
--- a/xen/drivers/passthrough/iommu.c   Wed May 28 12:22:36 2008 +0100
+++ b/xen/drivers/passthrough/iommu.c   Wed May 28 14:41:23 2008 +0100
@@ -16,6 +16,7 @@
 #include <xen/sched.h>
 #include <xen/iommu.h>
 #include <xen/paging.h>
+#include <xen/guest_access.h>
 
 extern struct iommu_ops intel_iommu_ops;
 extern struct iommu_ops amd_iommu_ops;
@@ -216,7 +217,41 @@ static int iommu_setup(void)
 }
 __initcall(iommu_setup);
 
-
+int iommu_get_device_group(struct domain *d, u8 bus, u8 devfn, 
+    XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs)
+{
+    struct hvm_iommu *hd = domain_hvm_iommu(d);
+    struct pci_dev *pdev;
+    int group_id, sdev_id;
+    u32 bdf;
+    int i = 0;
+    struct iommu_ops *ops = hd->platform_ops;
+
+    if ( !iommu_enabled || !ops || !ops->get_device_group_id )
+        return 0;
+
+    group_id = ops->get_device_group_id(bus, devfn);
+
+    list_for_each_entry(pdev,
+        &(dom0->arch.hvm_domain.hvm_iommu.pdev_list), list)
+    {
+        if ( (pdev->bus == bus) && (pdev->devfn == devfn) )
+            continue;
+
+        sdev_id = ops->get_device_group_id(pdev->bus, pdev->devfn);
+        if ( (sdev_id == group_id) && (i < max_sdevs) )
+        {
+            bdf = 0;
+            bdf |= (pdev->bus & 0xff) << 16;
+            bdf |= (pdev->devfn & 0xff) << 8;
+            if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) )
+                return -1;
+            i++;
+        }
+    }
+
+    return i;
+}
 /*
  * Local variables:
  * mode: C
diff -r c2fab221b3ec -r b60cf40fae13 xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c       Wed May 28 12:22:36 2008 +0100
+++ b/xen/drivers/passthrough/vtd/iommu.c       Wed May 28 14:41:23 2008 +0100
@@ -1955,6 +1955,7 @@ struct iommu_ops intel_iommu_ops = {
     .map_page = intel_iommu_map_page,
     .unmap_page = intel_iommu_unmap_page,
     .reassign_device = reassign_device_ownership,
+    .get_device_group_id = NULL,
 };
 
 /*
diff -r c2fab221b3ec -r b60cf40fae13 xen/include/public/domctl.h
--- a/xen/include/public/domctl.h       Wed May 28 12:22:36 2008 +0100
+++ b/xen/include/public/domctl.h       Wed May 28 14:41:23 2008 +0100
@@ -448,6 +448,16 @@ typedef struct xen_domctl_assign_device 
 typedef struct xen_domctl_assign_device xen_domctl_assign_device_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_device_t);
 
+/* Retrieve sibling devices infomation of machine_bdf */
+#define XEN_DOMCTL_get_device_group 50
+struct xen_domctl_get_device_group {
+    uint32_t  machine_bdf;      /* IN */
+    uint32_t  max_sdevs;        /* IN */
+    uint32_t  num_sdevs;        /* OUT */
+    XEN_GUEST_HANDLE_64(uint32)  sdev_array;   /* OUT */
+};
+typedef struct xen_domctl_get_device_group xen_domctl_get_device_group_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_get_device_group_t);
 
 /* Pass-through interrupts: bind real irq -> hvm devfn. */
 #define XEN_DOMCTL_bind_pt_irq       38
@@ -619,6 +629,7 @@ struct xen_domctl {
         struct xen_domctl_hvmcontext        hvmcontext;
         struct xen_domctl_address_size      address_size;
         struct xen_domctl_sendtrigger       sendtrigger;
+        struct xen_domctl_get_device_group  get_device_group;
         struct xen_domctl_assign_device     assign_device;
         struct xen_domctl_bind_pt_irq       bind_pt_irq;
         struct xen_domctl_memory_mapping    memory_mapping;
diff -r c2fab221b3ec -r b60cf40fae13 xen/include/xen/iommu.h
--- a/xen/include/xen/iommu.h   Wed May 28 12:22:36 2008 +0100
+++ b/xen/include/xen/iommu.h   Wed May 28 14:41:23 2008 +0100
@@ -61,6 +61,8 @@ int device_assigned(u8 bus, u8 devfn);
 int device_assigned(u8 bus, u8 devfn);
 int assign_device(struct domain *d, u8 bus, u8 devfn);
 void deassign_device(struct domain *d, u8 bus, u8 devfn);
+int iommu_get_device_group(struct domain *d, u8 bus, u8 devfn, 
+    XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs);
 void reassign_device_ownership(struct domain *source,
                                struct domain *target,
                                u8 bus, u8 devfn);
@@ -98,6 +100,7 @@ struct iommu_ops {
     int (*unmap_page)(struct domain *d, unsigned long gfn);
     void (*reassign_device)(struct domain *s, struct domain *t,
                             u8 bus, u8 devfn);
+    int (*get_device_group_id)(u8 bus, u8 devfn);
 };
 
 #endif /* _IOMMU_H_ */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.