[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [v8][PATCH 02/17] introduce XEN_DOMCTL_set_rdm



This should be based on a new parameter globally, 'pci_rdmforce'.

pci_rdmforce = 1 => Of course this should be 0 by default.

'1' means we should force check to reserve all ranges. If failed
VM wouldn't be created successfully. This also can give user a
chance to work well with later hotplug, even if not a device
assignment while creating VM.

But we can override that by one specific pci device:

pci = ['AA:BB.CC,rdmforce=0/1]

But this 'rdmforce' should be 1 by default since obviously any
passthrough device always need to do this. Actually no one really
want to set as '0' so it may be unnecessary but I'd like to leave
this as a potential approach.

So this domctl provides an approach to control how to populate
reserved device memory by tools.

Note we always post a message to user about this once we owns
RMRR.

Signed-off-by: Tiejun Chen <tiejun.chen@xxxxxxxxx>
---
 docs/man/xl.cfg.pod.5              |  6 +++++
 docs/misc/vtd.txt                  | 15 ++++++++++++
 tools/libxc/include/xenctrl.h      |  6 +++++
 tools/libxc/xc_domain.c            | 28 +++++++++++++++++++++++
 tools/libxl/libxl_create.c         |  3 +++
 tools/libxl/libxl_dm.c             | 47 ++++++++++++++++++++++++++++++++++++++
 tools/libxl/libxl_internal.h       |  4 ++++
 tools/libxl/libxl_types.idl        |  2 ++
 tools/libxl/libxlu_pci.c           |  2 ++
 tools/libxl/xl_cmdimpl.c           | 10 ++++++++
 xen/drivers/passthrough/pci.c      | 39 +++++++++++++++++++++++++++++++
 xen/drivers/passthrough/vtd/dmar.c |  8 +++++++
 xen/include/asm-x86/hvm/domain.h   |  4 ++++
 xen/include/public/domctl.h        | 21 +++++++++++++++++
 xen/xsm/flask/hooks.c              |  1 +
 15 files changed, 196 insertions(+)

diff --git a/docs/man/xl.cfg.pod.5 b/docs/man/xl.cfg.pod.5
index 622ea53..9adc41e 100644
--- a/docs/man/xl.cfg.pod.5
+++ b/docs/man/xl.cfg.pod.5
@@ -645,6 +645,12 @@ dom0 without confirmation.  Please use with care.
 D0-D3hot power management states for the PCI device. False (0) by
 default.
 
+=item B<rdmforce=BOOLEAN>
+
+(HVM/x86 only) Specifies that the VM would force to check and try to
+reserve all reserved device memory, like RMRR, associated to the PCI
+device. False (0) by default.
+
 =back
 
 =back
diff --git a/docs/misc/vtd.txt b/docs/misc/vtd.txt
index 9af0e99..23544d5 100644
--- a/docs/misc/vtd.txt
+++ b/docs/misc/vtd.txt
@@ -111,6 +111,21 @@ in the config file:
 To override for a specific device:
        pci = [ '01:00.0,msitranslate=0', '03:00.0' ]
 
+RDM, 'reserved device memory', for PCI Device Passthrough
+---------------------------------------------------------
+
+The BIOS controls some devices in terms of some reginos of memory used for
+these devices. This kind of region should be reserved before creating a VM
+to make sure they are not occupied by RAM/MMIO to conflict, and also we can
+create necessary IOMMU table successfully.
+
+To enable this globally, add "pci_rdmforce" in the config file:
+
+       pci_rdmforce = 1         (default is 0)
+
+Or just enable for a specific device:
+       pci = [ '01:00.0,rdmforce=1', '03:00.0' ]
+
 
 Caveat on Conventional PCI Device Passthrough
 ---------------------------------------------
diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index 0ad8b8d..84012fe 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -2038,6 +2038,12 @@ int xc_assign_device(xc_interface *xch,
                      uint32_t domid,
                      uint32_t machine_bdf);
 
+int xc_domain_device_setrdm(xc_interface *xch,
+                            uint32_t domid,
+                            uint32_t num_pcidevs,
+                            uint32_t pci_rdmforce,
+                            struct xen_guest_pcidev_info *pcidevs);
+
 int xc_get_device_group(xc_interface *xch,
                      uint32_t domid,
                      uint32_t machine_bdf,
diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
index b864872..7fd43e9 100644
--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -1633,6 +1633,34 @@ int xc_assign_device(
     return do_domctl(xch, &domctl);
 }
 
+int xc_domain_device_setrdm(xc_interface *xch,
+                            uint32_t domid,
+                            uint32_t num_pcidevs,
+                            uint32_t pci_rdmforce,
+                            struct xen_guest_pcidev_info *pcidevs)
+{
+    int ret;
+    DECLARE_DOMCTL;
+    DECLARE_HYPERCALL_BOUNCE(pcidevs,
+                             num_pcidevs*sizeof(xen_guest_pcidev_info_t),
+                             XC_HYPERCALL_BUFFER_BOUNCE_IN);
+
+    if ( xc_hypercall_bounce_pre(xch, pcidevs) )
+        return -1;
+
+    domctl.cmd = XEN_DOMCTL_set_rdm;
+    domctl.domain = (domid_t)domid;
+    domctl.u.set_rdm.flags = pci_rdmforce;
+    domctl.u.set_rdm.num_pcidevs = num_pcidevs;
+    set_xen_guest_handle(domctl.u.set_rdm.pcidevs, pcidevs);
+
+    ret = do_domctl(xch, &domctl);
+
+    xc_hypercall_bounce_post(xch, pcidevs);
+
+    return ret;
+}
+
 int xc_get_device_group(
     xc_interface *xch,
     uint32_t domid,
diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index 1198225..c615686 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -862,6 +862,9 @@ static void initiate_domain_create(libxl__egc *egc,
     ret = libxl__domain_build_info_setdefault(gc, &d_config->b_info);
     if (ret) goto error_out;
 
+    ret = libxl__domain_device_setrdm(gc, d_config, domid);
+    if (ret) goto error_out;
+
     if (!sched_params_valid(gc, domid, &d_config->b_info.sched_params)) {
         LOG(ERROR, "Invalid scheduling parameters\n");
         ret = ERROR_INVAL;
diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c
index 3e191c3..e50587d 100644
--- a/tools/libxl/libxl_dm.c
+++ b/tools/libxl/libxl_dm.c
@@ -90,6 +90,53 @@ const char *libxl__domain_device_model(libxl__gc *gc,
     return dm;
 }
 
+int libxl__domain_device_setrdm(libxl__gc *gc,
+                                libxl_domain_config *d_config,
+                                uint32_t dm_domid)
+{
+    int i, ret;
+    libxl_ctx *ctx = libxl__gc_owner(gc);
+    struct xen_guest_pcidev_info *pcidevs = NULL;
+    uint32_t rdmforce = 0;
+
+    if ( d_config->num_pcidevs )
+    {
+        pcidevs = 
malloc(d_config->num_pcidevs*sizeof(xen_guest_pcidev_info_t));
+        if ( pcidevs )
+        {
+            for (i = 0; i < d_config->num_pcidevs; i++)
+            {
+                pcidevs[i].devfn = PCI_DEVFN(d_config->pcidevs[i].dev,
+                                             d_config->pcidevs[i].func);
+                pcidevs[i].bus = d_config->pcidevs[i].bus;
+                pcidevs[i].seg = d_config->pcidevs[i].domain;
+                pcidevs[i].flags = d_config->pcidevs[i].rdmforce &
+                                   PCI_DEV_RDM_CHECK;
+            }
+        }
+        else
+        {
+            LIBXL__LOG(CTX, LIBXL__LOG_ERROR,
+                               "Can't allocate for pcidevs.");
+            return -1;
+        }
+    }
+    rdmforce = libxl_defbool_val(d_config->b_info.rdmforce) ? 1 : 0;
+
+    /* Nothing to do. */
+    if ( !rdmforce && !d_config->num_pcidevs )
+        return 0;
+
+    ret = xc_domain_device_setrdm(ctx->xch, dm_domid,
+                                  (uint32_t)d_config->num_pcidevs,
+                                  rdmforce,
+                                  pcidevs);
+    if ( d_config->num_pcidevs )
+        free(pcidevs);
+
+    return ret;
+}
+
 const libxl_vnc_info *libxl__dm_vnc(const libxl_domain_config *guest_config)
 {
     const libxl_vnc_info *vnc = NULL;
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index a38f695..be397a6 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -1477,6 +1477,10 @@ _hidden int libxl__need_xenpv_qemu(libxl__gc *gc,
         int nr_disks, libxl_device_disk *disks,
         int nr_channels, libxl_device_channel *channels);
 
+_hidden int libxl__domain_device_setrdm(libxl__gc *gc,
+                                        libxl_domain_config *info,
+                                        uint32_t domid);
+
 /*
  * This function will cause the whole libxl process to hang
  * if the device model does not respond.  It is deprecated.
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index f7fc695..0076a32 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -398,6 +398,7 @@ libxl_domain_build_info = Struct("domain_build_info",[
     ("kernel",           string),
     ("cmdline",          string),
     ("ramdisk",          string),
+    ("rdmforce",         libxl_defbool),
     ("u", KeyedUnion(None, libxl_domain_type, "type",
                 [("hvm", Struct(None, [("firmware",         string),
                                        ("bios",             libxl_bios_type),
@@ -518,6 +519,7 @@ libxl_device_pci = Struct("device_pci", [
     ("power_mgmt", bool),
     ("permissive", bool),
     ("seize", bool),
+    ("rdmforce", bool),
     ])
 
 libxl_device_vtpm = Struct("device_vtpm", [
diff --git a/tools/libxl/libxlu_pci.c b/tools/libxl/libxlu_pci.c
index 26fb143..989eac8 100644
--- a/tools/libxl/libxlu_pci.c
+++ b/tools/libxl/libxlu_pci.c
@@ -143,6 +143,8 @@ int xlu_pci_parse_bdf(XLU_Config *cfg, libxl_device_pci 
*pcidev, const char *str
                     pcidev->permissive = atoi(tok);
                 }else if ( !strcmp(optkey, "seize") ) {
                     pcidev->seize = atoi(tok);
+                }else if ( !strcmp(optkey, "rdmforce") ) {
+                    pcidev->rdmforce = atoi(tok);
                 }else{
                     XLU__PCI_ERR(cfg, "Unknown PCI BDF option: %s", optkey);
                 }
diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
index 0e754e7..9c23733 100644
--- a/tools/libxl/xl_cmdimpl.c
+++ b/tools/libxl/xl_cmdimpl.c
@@ -919,6 +919,7 @@ static void parse_config_data(const char *config_source,
     int pci_msitranslate = 0;
     int pci_permissive = 0;
     int pci_seize = 0;
+    int pci_rdmforce = 0;
     int i, e;
 
     libxl_domain_create_info *c_info = &d_config->c_info;
@@ -1699,6 +1700,9 @@ skip_vfb:
     if (!xlu_cfg_get_long (config, "pci_seize", &l, 0))
         pci_seize = l;
 
+    if (!xlu_cfg_get_long (config, "pci_rdmforce", &l, 0))
+        pci_rdmforce = l;
+
     /* To be reworked (automatically enabled) once the auto ballooning
      * after guest starts is done (with PCI devices passed in). */
     if (c_info->type == LIBXL_DOMAIN_TYPE_PV) {
@@ -1719,6 +1723,7 @@ skip_vfb:
             pcidev->power_mgmt = pci_power_mgmt;
             pcidev->permissive = pci_permissive;
             pcidev->seize = pci_seize;
+            pcidev->rdmforce = pci_rdmforce;
             if (!xlu_pci_parse_bdf(config, pcidev, buf))
                 d_config->num_pcidevs++;
         }
@@ -1726,6 +1731,11 @@ skip_vfb:
             libxl_defbool_set(&b_info->u.pv.e820_host, true);
     }
 
+    if ((c_info->type == LIBXL_DOMAIN_TYPE_HVM) && pci_rdmforce)
+        libxl_defbool_set(&b_info->rdmforce, true);
+    else
+        libxl_defbool_set(&b_info->rdmforce, false);
+
     switch (xlu_cfg_get_list(config, "cpuid", &cpuids, 0, 1)) {
     case 0:
         {
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 78c6977..ae924ad 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -34,6 +34,7 @@
 #include <xen/tasklet.h>
 #include <xsm/xsm.h>
 #include <asm/msi.h>
+#include <xen/stdbool.h>
 
 struct pci_seg {
     struct list_head alldevs_list;
@@ -1553,6 +1554,44 @@ int iommu_do_pci_domctl(
         }
         break;
 
+    case XEN_DOMCTL_set_rdm:
+    {
+        struct xen_domctl_set_rdm *xdsr = &domctl->u.set_rdm;
+        struct xen_guest_pcidev_info *pcidevs = NULL;
+        struct domain *d = rcu_lock_domain_by_any_id(domctl->domain);
+
+        if ( d == NULL )
+            return -ESRCH;
+
+        d->arch.hvm_domain.pci_force =
+                            xdsr->flags & PCI_DEV_RDM_CHECK ? true : false;
+        d->arch.hvm_domain.num_pcidevs = xdsr->num_pcidevs;
+        d->arch.hvm_domain.pcidevs = NULL;
+
+        if ( xdsr->num_pcidevs )
+        {
+            pcidevs = xmalloc_array(xen_guest_pcidev_info_t,
+                                    xdsr->num_pcidevs);
+            if ( pcidevs == NULL )
+            {
+                rcu_unlock_domain(d);
+                return -ENOMEM;
+            }
+
+            if ( copy_from_guest(pcidevs, xdsr->pcidevs,
+                                 xdsr->num_pcidevs*sizeof(*pcidevs)) )
+            {
+                xfree(pcidevs);
+                rcu_unlock_domain(d);
+                return -EFAULT;
+            }
+        }
+
+        d->arch.hvm_domain.pcidevs = pcidevs;
+        rcu_unlock_domain(d);
+    }
+        break;
+
     case XEN_DOMCTL_assign_device:
         if ( unlikely(d->is_dying) )
         {
diff --git a/xen/drivers/passthrough/vtd/dmar.c 
b/xen/drivers/passthrough/vtd/dmar.c
index 1152c3a..5e41e7a 100644
--- a/xen/drivers/passthrough/vtd/dmar.c
+++ b/xen/drivers/passthrough/vtd/dmar.c
@@ -674,6 +674,14 @@ acpi_parse_one_rmrr(struct acpi_dmar_header *header)
                         "  RMRR region: base_addr %"PRIx64
                         " end_address %"PRIx64"\n",
                         rmrru->base_address, rmrru->end_address);
+            /*
+             * TODO: we may provide a precise paramter just to reserve
+             * RMRR range specific to one device.
+             */
+            dprintk(XENLOG_WARNING VTDPREFIX,
+                    "So please set pci_rdmforce to reserve these ranges"
+                    " if you need such a device in hotplug case.\n");
+
             acpi_register_rmrr_unit(rmrru);
         }
     }
diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
index 2757c7f..38530e5 100644
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -90,6 +90,10 @@ struct hvm_domain {
     /* Cached CF8 for guest PCI config cycles */
     uint32_t                pci_cf8;
 
+    bool_t                  pci_force;
+    uint32_t                num_pcidevs;
+    struct xen_guest_pcidev_info      *pcidevs;
+
     struct pl_time         pl_time;
 
     struct hvm_io_handler *io_handler;
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 57e2ed7..ba8970d 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -508,6 +508,25 @@ struct xen_domctl_get_device_group {
 typedef struct xen_domctl_get_device_group xen_domctl_get_device_group_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_get_device_group_t);
 
+/* Currently just one bit to indicate force to check Reserved Device Memory. */
+#define PCI_DEV_RDM_CHECK   0x1
+struct xen_guest_pcidev_info {
+    uint16_t    seg;
+    uint8_t     bus;
+    uint8_t     devfn;
+    uint32_t    flags;
+};
+typedef struct xen_guest_pcidev_info xen_guest_pcidev_info_t;
+DEFINE_XEN_GUEST_HANDLE(xen_guest_pcidev_info_t);
+/* Control whether/how we check and reserve device memory. */
+struct xen_domctl_set_rdm {
+    uint32_t    flags;
+    uint32_t    num_pcidevs;
+    XEN_GUEST_HANDLE_64(xen_guest_pcidev_info_t) pcidevs;
+};
+typedef struct xen_domctl_set_rdm xen_domctl_set_rdm_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_rdm_t);
+
 /* Pass-through interrupts: bind real irq -> hvm devfn. */
 /* XEN_DOMCTL_bind_pt_irq */
 /* XEN_DOMCTL_unbind_pt_irq */
@@ -1070,6 +1089,7 @@ struct xen_domctl {
 #define XEN_DOMCTL_setvnumainfo                  74
 #define XEN_DOMCTL_psr_cmt_op                    75
 #define XEN_DOMCTL_arm_configure_domain          76
+#define XEN_DOMCTL_set_rdm                       77
 #define XEN_DOMCTL_gdbsx_guestmemio            1000
 #define XEN_DOMCTL_gdbsx_pausevcpu             1001
 #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
@@ -1135,6 +1155,7 @@ struct xen_domctl {
         struct xen_domctl_gdbsx_domstatus   gdbsx_domstatus;
         struct xen_domctl_vnuma             vnuma;
         struct xen_domctl_psr_cmt_op        psr_cmt_op;
+        struct xen_domctl_set_rdm           set_rdm;
         uint8_t                             pad[128];
     } u;
 };
diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
index d48463f..5a760e2 100644
--- a/xen/xsm/flask/hooks.c
+++ b/xen/xsm/flask/hooks.c
@@ -592,6 +592,7 @@ static int flask_domctl(struct domain *d, int cmd)
     case XEN_DOMCTL_test_assign_device:
     case XEN_DOMCTL_assign_device:
     case XEN_DOMCTL_deassign_device:
+    case XEN_DOMCTL_set_rdm:
 #endif
         return 0;
 
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.