[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [v7][RFC][PATCH 06/13] hvmloader/ram: check if guest memory is out of reserved device memory maps



On 2014/11/6 18:06, Jan Beulich wrote:
On 06.11.14 at 10:28, <tiejun.chen@xxxxxxxxx> wrote:
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -484,6 +484,14 @@ struct xen_domctl_assign_device {
   typedef struct xen_domctl_assign_device xen_domctl_assign_device_t;
   DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_device_t);

+/* Control whether/how we check and reserve device memory. */
+struct xen_domctl_set_rdm {
+    uint32_t  pci_rdmforce;
+    uint32_t  pci_dev_bitmap;

So this allows for 32 devices; considering that you index the bitmap

Sorry its my fault when I just focus on figuring out a doable way. We need to cover 256 x 32 x 8 = 65536.

by BDF, all this covers are 0000:00:00.0 through 0000:00:03.7.
Hardly enough to cover even a single pass through device (iirc the
range above is fully used by emulated devices). And of course a

Are you saying Xen restrict some BDFs specific to emulate some devices? But I don't see these associated codes.

much larger bitmap isn't a good solution either.

So I guess I need to reconstruct something new, please see the new draft codes.


Nor am I really clear what you need the 32 bits of "pci_rdmforce"
for, nor why this field isn't just being named "force". Without a

This variable can tell Xen to force check and reserve all RMRR ranges in that case the user is sure he's going to hotplug a device later but indeed, he really don't assign any device while creating a VM.

Please see the new draft codes as well.


comment alongside the fields, it's remaining guesswork anyway
when and how this domctl is to be used. Looking at your change
to intel_iommu_get_reserved_device_memory() the field appears
to be simply redundant.

--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -158,14 +158,14 @@ struct iommu_ops {
       void (*crash_shutdown)(void);
       void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int 
page_count);
       void (*iotlb_flush_all)(struct domain *d);
-    int (*get_reserved_device_memory)(iommu_grdm_t *, void *);
+    int (*get_reserved_device_memory)(iommu_grdm_t *, struct domain *, void *);
       void (*dump_p2m_table)(struct domain *d);
   };

   void iommu_suspend(void);
   void iommu_resume(void);
   void iommu_crash_shutdown(void);
-int iommu_get_reserved_device_memory(iommu_grdm_t *, void *);
+int iommu_get_reserved_device_memory(iommu_grdm_t *, struct domain *, void *);

I don't see why these generic interfaces would need to change;
the only thing that would seem to need changing is the callback
function (and of course the context passed to it).


I'm not 100% sure if we can call current->domain in all scenarios. If you can help me confirm this I'd really like to remove this change :) Now I assume this should be true as follows:

diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index 30764b4..5957d2e 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -2036,6 +2036,12 @@ int xc_assign_device(xc_interface *xch,
                      uint32_t domid,
                      uint32_t machine_bdf);

+int xc_domain_device_setrdm(xc_interface *xch,
+                            uint32_t domid,
+                            uint32_t num_pcidevs,
+                            uint32_t pci_rdmforce,
+                            struct xen_guest_pcidev_info *pcidevs);
+
 int xc_get_device_group(xc_interface *xch,
                      uint32_t domid,
                      uint32_t machine_bdf,
diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
index 009e351..f38b400 100644
--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -1642,6 +1642,34 @@ int xc_assign_device(
     return do_domctl(xch, &domctl);
 }

+int xc_domain_device_setrdm(xc_interface *xch,
+                            uint32_t domid,
+                            uint32_t num_pcidevs,
+                            uint32_t pci_rdmforce,
+                            struct xen_guest_pcidev_info *pcidevs)
+{
+    int ret;
+    DECLARE_DOMCTL;
+    DECLARE_HYPERCALL_BOUNCE(pcidevs,
+                             num_pcidevs*sizeof(xen_guest_pcidev_info_t),
+                             XC_HYPERCALL_BUFFER_BOUNCE_IN);
+
+    if ( xc_hypercall_bounce_pre(xch, pcidevs) )
+        return -1;
+
+    domctl.cmd = XEN_DOMCTL_set_rdm;
+    domctl.domain = domid;
+    domctl.u.set_rdm.pci_rdmforce = pci_rdmforce;
+    domctl.u.set_rdm.num_pcidevs = num_pcidevs;
+    set_xen_guest_handle(domctl.u.set_rdm.pcidevs, pcidevs);
+
+    ret = do_domctl(xch, &domctl);
+
+    xc_hypercall_bounce_post(xch, pcidevs);
+
+    return ret;
+}
+
 int xc_get_device_group(
     xc_interface *xch,
     uint32_t domid,
diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c
index d8bd9b3..ac48a82 100644
--- a/tools/libxc/xc_domain_restore.c
+++ b/tools/libxc/xc_domain_restore.c
@@ -2165,8 +2165,8 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,

         if ( !ext_vcpucontext )
             goto vcpu_ext_state_restore;
-        memcpy(&domctl.u.ext_vcpucontext, vcpup, 128);
-        vcpup += 128;
+        memcpy(&domctl.u.ext_vcpucontext, vcpup, 112);
+        vcpup += 112;
         domctl.cmd = XEN_DOMCTL_set_ext_vcpucontext;
         domctl.domain = dom;
         frc = xc_domctl(xch, &domctl);
diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index b1ff5ae..2429416 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -860,6 +860,9 @@ static void initiate_domain_create(libxl__egc *egc,
     ret = libxl__domain_build_info_setdefault(gc, &d_config->b_info);
     if (ret) goto error_out;

+    ret = libxl__domain_device_setrdm(gc, d_config, domid);
+    if (ret) goto error_out;
+
     if (!sched_params_valid(gc, domid, &d_config->b_info.sched_params)) {
         LOG(ERROR, "Invalid scheduling parameters\n");
         ret = ERROR_INVAL;
diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c
index 3e191c3..9e402d1 100644
--- a/tools/libxl/libxl_dm.c
+++ b/tools/libxl/libxl_dm.c
@@ -90,6 +90,42 @@ const char *libxl__domain_device_model(libxl__gc *gc,
     return dm;
 }

+int libxl__domain_device_setrdm(libxl__gc *gc,
+                                libxl_domain_config *d_config,
+                                uint32_t dm_domid)
+{
+    int i, ret;
+    libxl_ctx *ctx = libxl__gc_owner(gc);
+    struct xen_guest_pcidev_info *pcidevs = NULL;
+
+    if ( d_config->num_pcidevs )
+    {
+ pcidevs = malloc(d_config->num_pcidevs*sizeof(xen_guest_pcidev_info_t));
+        if ( pcidevs )
+        {
+            for (i = 0; i < d_config->num_pcidevs; i++)
+            {
+                pcidevs[i].func = d_config->pcidevs[i].func;
+                pcidevs[i].dev = d_config->pcidevs[i].dev;
+                pcidevs[i].bus = d_config->pcidevs[i].bus;
+                pcidevs[i].domain = d_config->pcidevs[i].domain;
+                pcidevs[i].rdmforce = d_config->pcidevs[i].rdmforce;
+            }
+        }
+        else
+            LIBXL__LOG(CTX, LIBXL__LOG_ERROR,
+                               "Can't allocate for pcidevs.");
+    }
+
+    ret = xc_domain_device_setrdm(ctx->xch, dm_domid,
+                                  (uint32_t)d_config->num_pcidevs,
+                                  d_config->b_info.pci_rdmforce,
+                                  pcidevs);
+    free(pcidevs);
+
+    return ret;
+}
+
const libxl_vnc_info *libxl__dm_vnc(const libxl_domain_config *guest_config)
 {
     const libxl_vnc_info *vnc = NULL;
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 4361421..c48a0e6 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -1471,6 +1471,11 @@ _hidden int libxl__domain_build(libxl__gc *gc,
 /* for device model creation */
 _hidden const char *libxl__domain_device_model(libxl__gc *gc,
const libxl_domain_build_info *info);
+
+_hidden int libxl__domain_device_setrdm(libxl__gc *gc,
+                                        libxl_domain_config *d_config,
+                                        uint32_t domid);
+
 _hidden int libxl__need_xenpv_qemu(libxl__gc *gc,
         int nr_consoles, libxl__device_console *consoles,
         int nr_vfbs, libxl_device_vfb *vfbs,
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index ca3f724..b700263 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -398,6 +398,7 @@ libxl_domain_build_info = Struct("domain_build_info",[
     ("kernel",           string),
     ("cmdline",          string),
     ("ramdisk",          string),
+    ("pci_rdmforce",   uint32),
     ("u", KeyedUnion(None, libxl_domain_type, "type",
                 [("hvm", Struct(None, [("firmware",         string),
("bios", libxl_bios_type),
@@ -518,6 +519,7 @@ libxl_device_pci = Struct("device_pci", [
     ("power_mgmt", bool),
     ("permissive", bool),
     ("seize", bool),
+    ("rdmforce", bool),
     ])

 libxl_device_vtpm = Struct("device_vtpm", [
diff --git a/tools/libxl/libxlu_pci.c b/tools/libxl/libxlu_pci.c
index 26fb143..989eac8 100644
--- a/tools/libxl/libxlu_pci.c
+++ b/tools/libxl/libxlu_pci.c
@@ -143,6 +143,8 @@ int xlu_pci_parse_bdf(XLU_Config *cfg, libxl_device_pci *pcidev, const char *str
                     pcidev->permissive = atoi(tok);
                 }else if ( !strcmp(optkey, "seize") ) {
                     pcidev->seize = atoi(tok);
+                }else if ( !strcmp(optkey, "rdmforce") ) {
+                    pcidev->rdmforce = atoi(tok);
                 }else{
XLU__PCI_ERR(cfg, "Unknown PCI BDF option: %s", optkey);
                 }
diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
index 3c9f146..436b4f3 100644
--- a/tools/libxl/xl_cmdimpl.c
+++ b/tools/libxl/xl_cmdimpl.c
@@ -904,6 +904,7 @@ static void replace_string(char **str, const char *val)
     *str = xstrdup(val);
 }

+#define PCI_BDF(b,d,f) ((((b) & 0xff) << 8) | PCI_DEVFN(d,f))
 static void parse_config_data(const char *config_source,
                               const char *config_data,
                               int config_len,
@@ -919,6 +920,7 @@ static void parse_config_data(const char *config_source,
     int pci_msitranslate = 0;
     int pci_permissive = 0;
     int pci_seize = 0;
+    int pci_rdmforce = 0;
     int i, e;

     libxl_domain_create_info *c_info = &d_config->c_info;
@@ -1699,6 +1701,9 @@ skip_vfb:
     if (!xlu_cfg_get_long (config, "pci_seize", &l, 0))
         pci_seize = l;

+    if (!xlu_cfg_get_long (config, "pci_rdmforce", &l, 0))
+        pci_rdmforce = l;
+
     /* To be reworked (automatically enabled) once the auto ballooning
      * after guest starts is done (with PCI devices passed in). */
     if (c_info->type == LIBXL_DOMAIN_TYPE_PV) {
@@ -1719,9 +1724,11 @@ skip_vfb:
             pcidev->power_mgmt = pci_power_mgmt;
             pcidev->permissive = pci_permissive;
             pcidev->seize = pci_seize;
+            pcidev->rdmforce = pci_rdmforce;
             if (!xlu_pci_parse_bdf(config, pcidev, buf))
                 d_config->num_pcidevs++;
         }
+        d_config->b_info.pci_rdmforce = pci_rdmforce;
         if (d_config->num_pcidevs && c_info->type == LIBXL_DOMAIN_TYPE_PV)
             libxl_defbool_set(&b_info->u.pv.e820_host, true);
     }
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 1eba833..daf259e 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -1540,6 +1540,34 @@ int iommu_do_pci_domctl(
         }
         break;

+    case XEN_DOMCTL_set_rdm:
+    {
+        struct xen_domctl_set_rdm *xdsr = &domctl->u.set_rdm;
+        struct xen_guest_pcidev_info *pcidevs;
+        int i;
+
+        pcidevs = xmalloc_array(xen_guest_pcidev_info_t,
+                                domctl->u.set_rdm.num_pcidevs);
+        if ( pcidevs == NULL )
+        {
+            return -ENOMEM;
+        }
+
+        for ( i = 0; i < xdsr->num_pcidevs; ++i )
+        {
+            if ( __copy_from_guest_offset(pcidevs, xdsr->pcidevs, i, 1) )
+            {
+                xfree(pcidevs);
+                return -EFAULT;
+            }
+        }
+
+        d->arch.hvm_domain.pci_rdmforce = domctl->u.set_rdm.pci_rdmforce;
+        d->arch.hvm_domain.num_pcidevs = domctl->u.set_rdm.num_pcidevs;
+        d->arch.hvm_domain.pcidevs = pcidevs;
+    }
+        break;
+
     case XEN_DOMCTL_assign_device:
         if ( unlikely(d->is_dying) )
         {
diff --git a/xen/drivers/passthrough/vtd/dmar.c b/xen/drivers/passthrough/vtd/dmar.c
index 546eca9..4b35c04 100644
--- a/xen/drivers/passthrough/vtd/dmar.c
+++ b/xen/drivers/passthrough/vtd/dmar.c
@@ -28,6 +28,7 @@
 #include <xen/xmalloc.h>
 #include <xen/pci.h>
 #include <xen/pci_regs.h>
+#include <xen/sched.h>
 #include <asm/string.h>
 #include "dmar.h"
 #include "iommu.h"
@@ -925,14 +926,37 @@ int intel_iommu_get_reserved_device_memory(iommu_grdm_t *func, void *ctxt)
 {
     struct acpi_rmrr_unit *rmrr;
     int rc = 0;
+    int i, j;
+    u16 bdf, pt_bdf;
+    struct domain *d = current->domain;

-    list_for_each_entry(rmrr, &acpi_rmrr_units, list)
+    for_each_rmrr_device ( rmrr, bdf, i )
     {
-        rc = func(PFN_DOWN(rmrr->base_address),
-                  PFN_UP(rmrr->end_address) - PFN_DOWN(rmrr->base_address),
-                  ctxt);
-        if ( rc )
-            break;
+        if ( d->arch.hvm_domain.pci_rdmforce )
+        {
+            rc = func(PFN_DOWN(rmrr->base_address),
+                      PFN_UP(rmrr->end_address) -
+                      PFN_DOWN(rmrr->base_address),
+                      ctxt);
+            if ( rc )
+                break;
+        }
+        else
+        {
+            for ( j = 0; j < d->arch.hvm_domain.num_pcidevs; j++ )
+            {
+                pt_bdf = PCI_BDF(d->arch.hvm_domain.pcidevs[j].bus,
+                                 d->arch.hvm_domain.pcidevs[j].dev,
+                                 d->arch.hvm_domain.pcidevs[j].func);
+                if ( pt_bdf == bdf )
+                    rc = func(PFN_DOWN(rmrr->base_address),
+                              PFN_UP(rmrr->end_address) -
+                              PFN_DOWN(rmrr->base_address),
+                              ctxt);
+                if ( rc )
+                    break;
+            }
+        }
     }

     return rc;
diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
index 2757c7f..b18ce40 100644
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -90,6 +90,10 @@ struct hvm_domain {
     /* Cached CF8 for guest PCI config cycles */
     uint32_t                pci_cf8;

+    uint32_t                pci_rdmforce;
+    uint32_t                num_pcidevs;
+    struct xen_guest_pcidev_info      *pcidevs;
+
     struct pl_time         pl_time;

     struct hvm_io_handler *io_handler;
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 58b19e7..4e47fac 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -484,6 +484,24 @@ struct xen_domctl_assign_device {
 typedef struct xen_domctl_assign_device xen_domctl_assign_device_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_device_t);

+struct xen_guest_pcidev_info {
+    uint8_t func;
+    uint8_t dev;
+    uint8_t bus;
+    int domain;
+    int rdmforce;
+};
+typedef struct xen_guest_pcidev_info xen_guest_pcidev_info_t;
+DEFINE_XEN_GUEST_HANDLE(xen_guest_pcidev_info_t);
+/* Control whether/how we check and reserve device memory. */
+struct xen_domctl_set_rdm {
+    uint32_t  pci_rdmforce;
+    uint32_t  num_pcidevs;
+    XEN_GUEST_HANDLE(xen_guest_pcidev_info_t) pcidevs;
+};
+typedef struct xen_domctl_set_rdm xen_domctl_set_rdm_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_rdm_t);
+
 /* Retrieve sibling devices infomation of machine_sbdf */
 /* XEN_DOMCTL_get_device_group */
 struct xen_domctl_get_device_group {
@@ -1056,6 +1074,7 @@ struct xen_domctl {
 #define XEN_DOMCTL_set_vcpu_msrs                 73
 #define XEN_DOMCTL_setvnumainfo                  74
 #define XEN_DOMCTL_psr_cmt_op                    75
+#define XEN_DOMCTL_set_rdm                       76
 #define XEN_DOMCTL_gdbsx_guestmemio            1000
 #define XEN_DOMCTL_gdbsx_pausevcpu             1001
 #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
@@ -1118,7 +1137,8 @@ struct xen_domctl {
         struct xen_domctl_gdbsx_domstatus   gdbsx_domstatus;
         struct xen_domctl_vnuma             vnuma;
         struct xen_domctl_psr_cmt_op        psr_cmt_op;
-        uint8_t                             pad[128];
+        struct xen_domctl_set_rdm           set_rdm;
+        uint8_t                             pad[112];
     } u;
 };
 typedef struct xen_domctl xen_domctl_t;


Thanks
Tiejun

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.