[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [v10][PATCH 11/16] tools/libxl: detect and avoid conflicts with RDM



On 2015/7/21 20:33, Ian Jackson wrote:
Chen, Tiejun writes ("Re: [v10][PATCH 11/16] tools/libxl: detect and avoid conflicts 
with RDM"):
[Ian Jackson:]
The most obvious answer to me would be that if an rdms array is
specified, the strategy should be ignored.  That is how the automatic
numa placement API works.

I'm not familiar with this.

Do you mean this?

      if (d_config->num_rdms)
          strategy = LIBXL_RDM_RESERVE_STRATEGY_IGNORE;

But except this global strategy, we also have per-device setting so
maybe something like this?

      if (d_config->num_rdms)
          return 0;

This latter was the kind of approach I had in mind.  I think that will
do for 4.6.


Thanks for your guideline on this patch, and also I realize lots of things need to be improved. So I'm going to make a TODO list publically after this feature freeze.

Now just for this patch, please take a final review (I just hope so.)

diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index 2f8e590..a4bd2a1 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -407,7 +407,7 @@ int libxl__domain_build(libxl__gc *gc,

     switch (info->type) {
     case LIBXL_DOMAIN_TYPE_HVM:
-        ret = libxl__build_hvm(gc, domid, info, state);
+        ret = libxl__build_hvm(gc, domid, d_config, state);
         if (ret)
             goto out;

diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c
index 634b8d2..3afa735 100644
--- a/tools/libxl/libxl_dm.c
+++ b/tools/libxl/libxl_dm.c
@@ -92,6 +92,280 @@ const char *libxl__domain_device_model(libxl__gc *gc,
     return dm;
 }

+static int
+libxl__xc_device_get_rdm(libxl__gc *gc,
+                         uint32_t flag,
+                         uint16_t seg,
+                         uint8_t bus,
+                         uint8_t devfn,
+                         unsigned int *nr_entries,
+                         struct xen_reserved_device_memory **xrdm)
+{
+    int rc = 0, r;
+
+    /*
+     * We really can't presume how many entries we can get in advance.
+     */
+    *nr_entries = 0;
+    r = xc_reserved_device_memory_map(CTX->xch, flag, seg, bus, devfn,
+                                      NULL, nr_entries);
+    assert(r <= 0);
+    /* "0" means we have no any rdm entry. */
+    if (!r) goto out;
+
+    if (errno != ENOBUFS) {
+        rc = ERROR_FAIL;
+        goto out;
+    }
+
+    GCNEW_ARRAY(*xrdm, *nr_entries);
+    r = xc_reserved_device_memory_map(CTX->xch, flag, seg, bus, devfn,
+                                      *xrdm, nr_entries);
+    if (r)
+        rc = ERROR_FAIL;
+
+ out:
+    if (rc) {
+        *nr_entries = 0;
+        *xrdm = NULL;
+        LOG(ERROR, "Could not get reserved device memory maps.\n");
+    }
+    return rc;
+}
+
+/*
+ * Check whether there exists rdm hole in the specified memory range.
+ * Returns true if exists, else returns false.
+ */
+static bool overlaps_rdm(uint64_t start, uint64_t memsize,
+                         uint64_t rdm_start, uint64_t rdm_size)
+{
+    return (start + memsize > rdm_start) && (start < rdm_start + rdm_size);
+}
+
+static void
+add_rdm_entry(libxl__gc *gc, libxl_domain_config *d_config,
+              uint64_t rdm_start, uint64_t rdm_size, int rdm_policy)
+{
+    d_config->num_rdms++;
+    d_config->rdms = libxl__realloc(NOGC, d_config->rdms,
+                            d_config->num_rdms * sizeof(libxl_device_rdm));
+
+    d_config->rdms[d_config->num_rdms - 1].start = rdm_start;
+    d_config->rdms[d_config->num_rdms - 1].size = rdm_size;
+    d_config->rdms[d_config->num_rdms - 1].policy = rdm_policy;
+}
+
+/*
+ * Check reported RDM regions and handle potential gfn conflicts according
+ * to user preferred policy.
+ *
+ * RDM can reside in address space beyond 4G theoretically, but we never
+ * see this in real world. So in order to avoid breaking highmem layout
+ * we don't solve highmem conflict. Note this means highmem rmrr could
+ * still be supported if no conflict.
+ *
+ * But in the case of lowmem, RDM probably scatter the whole RAM space.
+ * Especially multiple RDM entries would worsen this to lead a complicated
+ * memory layout. And then its hard to extend hvm_info_table{} to work
+ * hvmloader out. So here we're trying to figure out a simple solution to
+ * avoid breaking existing layout. So when a conflict occurs,
+ *
+ * #1. Above a predefined boundary (default 2G)
+ * - Move lowmem_end below reserved region to solve conflict;
+ *
+ * #2. Below a predefined boundary (default 2G)
+ * - Check strict/relaxed policy.
+ * "strict" policy leads to fail libxl.
+ * "relaxed" policy issue a warning message and also mask this entry
+ * INVALID to indicate we shouldn't expose this entry to hvmloader.
+ * Note when both policies are specified on a given region, the per-device
+ * policy should override the global policy.
+ */
+int libxl__domain_device_construct_rdm(libxl__gc *gc,
+                                       libxl_domain_config *d_config,
+                                       uint64_t rdm_mem_boundary,
+                                       struct xc_hvm_build_args *args)
+{
+    int i, j, conflict, rc;
+    struct xen_reserved_device_memory *xrdm = NULL;
+    uint32_t strategy = d_config->b_info.u.hvm.rdm.strategy;
+    uint16_t seg;
+    uint8_t bus, devfn;
+    uint64_t rdm_start, rdm_size;
+ uint64_t highmem_end = args->highmem_end ? args->highmem_end : (1ull<<32);
+
+    /*
+     * We just want to construct RDM once since RDM is specific to the
+     * given platform, so this shouldn't change again.
+     */
+    if (d_config->num_rdms)
+        return 0;
+
+    /* Might not expose rdm. */
+    if (strategy == LIBXL_RDM_RESERVE_STRATEGY_IGNORE &&
+        !d_config->num_pcidevs)
+        return 0;
+
+    /* Query all RDM entries in this platform */
+    if (strategy == LIBXL_RDM_RESERVE_STRATEGY_HOST) {
+        unsigned int nr_entries;
+
+        /* Collect all rdm info if exist. */
+        rc = libxl__xc_device_get_rdm(gc, PCI_DEV_RDM_ALL,
+                                      0, 0, 0, &nr_entries, &xrdm);
+        if (rc)
+            goto out;
+        if (!nr_entries)
+            return 0;
+
+        assert(xrdm);
+
+        for (i = 0; i < nr_entries; i++)
+        {
+            add_rdm_entry(gc, d_config,
+                          pfn_to_paddr(xrdm[i].start_pfn),
+                          pfn_to_paddr(xrdm[i].nr_pages),
+                          d_config->b_info.u.hvm.rdm.policy);
+        }
+    }
+
+    /* Query RDM entries per-device */
+    for (i = 0; i < d_config->num_pcidevs; i++) {
+        unsigned int nr_entries;
+        bool new = true;
+
+        seg = d_config->pcidevs[i].domain;
+        bus = d_config->pcidevs[i].bus;
+        devfn = PCI_DEVFN(d_config->pcidevs[i].dev,
+                          d_config->pcidevs[i].func);
+        nr_entries = 0;
+        rc = libxl__xc_device_get_rdm(gc, ~PCI_DEV_RDM_ALL,
+                                      seg, bus, devfn, &nr_entries, &xrdm);
+        if (rc)
+            goto out;
+        /* No RDM to associated with this device. */
+        if (!nr_entries)
+            continue;
+
+        assert(xrdm);
+
+        /*
+         * Need to check whether this entry is already saved in the array.
+         * This could come from two cases:
+         *
+         *   - user may configure to get all RDMs in this platform, which
+         *   is already queried before this point
+         *   - or two assigned devices may share one RDM entry
+         *
+         * Different policies may be configured on the same RDM due to
+         * above two cases. But we don't allow to assign such a group
+         * devies right now so it doesn't come true in our case.
+         */
+        for (j = 0; j < d_config->num_rdms; j++) {
+            if (d_config->rdms[j].start == pfn_to_paddr(xrdm[0].start_pfn))
+            {
+                /*
+                 * So the per-device policy always override the global
+                 * policy in this case.
+                 */
+                d_config->rdms[j].policy = d_config->pcidevs[i].rdm_policy;
+                new = false;
+                break;
+            }
+        }
+
+        if (new) {
+            add_rdm_entry(gc, d_config,
+                          pfn_to_paddr(xrdm[0].start_pfn),
+                          pfn_to_paddr(xrdm[0].nr_pages),
+                          d_config->pcidevs[i].rdm_policy);
+        }
+    }
+
+    /*
+     * Next step is to check and avoid potential conflict between RDM
+     * entries and guest RAM. To avoid intrusive impact to existing
+     * memory layout {lowmem, mmio, highmem} which is passed around
+     * various function blocks, below conflicts are not handled which
+     * are rare and handling them would lead to a more scattered
+     * layout:
+     *  - RDM  in highmem area (>4G)
+     *  - RDM lower than a defined memory boundary (e.g. 2G)
+     * Otherwise for conflicts between boundary and 4G, we'll simply
+     * move lowmem end below reserved region to solve conflict.
+     *
+     * If a conflict is detected on a given RDM entry, an error will
+     * be returned if 'strict' policy is specified. Instead, if
+     * 'relaxed' policy specified, this conflict is treated just as a
+     * warning, but we mark this RDM entry as INVALID to indicate that
+     * this entry shouldn't be exposed to hvmloader.
+     *
+     * Firstly we should check the case of rdm < 4G because we may
+     * need to expand highmem_end.
+     */
+    for (i = 0; i < d_config->num_rdms; i++) {
+        rdm_start = d_config->rdms[i].start;
+        rdm_size = d_config->rdms[i].size;
+        conflict = overlaps_rdm(0, args->lowmem_end, rdm_start, rdm_size);
+
+        if (!conflict)
+            continue;
+
+        /* Just check if RDM > our memory boundary. */
+        if (rdm_start > rdm_mem_boundary) {
+            /*
+             * We will move downwards lowmem_end so we have to expand
+             * highmem_end.
+             */
+            highmem_end += (args->lowmem_end - rdm_start);
+            /* Now move downwards lowmem_end. */
+            args->lowmem_end = rdm_start;
+        }
+    }
+
+    /* Sync highmem_end. */
+    args->highmem_end = highmem_end;
+
+    /*
+     * Finally we can take same policy to check lowmem(< 2G) and
+     * highmem adjusted above.
+     */
+    for (i = 0; i < d_config->num_rdms; i++) {
+        rdm_start = d_config->rdms[i].start;
+        rdm_size = d_config->rdms[i].size;
+        /* Does this entry conflict with lowmem? */
+        conflict = overlaps_rdm(0, args->lowmem_end,
+                                rdm_start, rdm_size);
+        /* Does this entry conflict with highmem? */
+        conflict |= overlaps_rdm((1ULL<<32),
+                                 args->highmem_end - (1ULL<<32),
+                                 rdm_start, rdm_size);
+
+        if (!conflict)
+            continue;
+
+        if (d_config->rdms[i].policy == LIBXL_RDM_RESERVE_POLICY_STRICT) {
+ LOG(ERROR, "RDM conflict at 0x%lx.\n", d_config->rdms[i].start);
+            goto out;
+        } else {
+            LOG(WARN, "Ignoring RDM conflict at 0x%lx.\n",
+                      d_config->rdms[i].start);
+
+            /*
+             * Then mask this INVALID to indicate we shouldn't expose this
+             * to hvmloader.
+             */
+            d_config->rdms[i].policy = LIBXL_RDM_RESERVE_POLICY_INVALID;
+        }
+    }
+
+    return 0;
+
+ out:
+    return ERROR_FAIL;
+}
+
const libxl_vnc_info *libxl__dm_vnc(const libxl_domain_config *guest_config)
 {
     const libxl_vnc_info *vnc = NULL;
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index 4cb247a..7e39047 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -921,13 +921,20 @@ out:
 }

 int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
-              libxl_domain_build_info *info,
+              libxl_domain_config *d_config,
               libxl__domain_build_state *state)
 {
     libxl_ctx *ctx = libxl__gc_owner(gc);
     struct xc_hvm_build_args args = {};
     int ret, rc = ERROR_FAIL;
     uint64_t mmio_start, lowmem_end, highmem_end;
+    libxl_domain_build_info *const info = &d_config->b_info;
+    /*
+     * Currently we fix this as 2G to guarantee how to handle
+     * our rdm policy. But we'll provide a parameter to set
+     * this dynamically.
+     */
+    uint64_t rdm_mem_boundary = 0x80000000;

     memset(&args, 0, sizeof(struct xc_hvm_build_args));
     /* The params from the configuration file are in Mb, which are then
@@ -965,6 +972,14 @@ int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
     args.highmem_end = highmem_end;
     args.mmio_start = mmio_start;

+    rc = libxl__domain_device_construct_rdm(gc, d_config,
+                                            rdm_mem_boundary,
+                                            &args);
+    if (rc) {
+        LOG(ERROR, "checking reserved device memory failed");
+        goto out;
+    }
+
     if (info->num_vnuma_nodes != 0) {
         int i;

diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 69fdad8..5e4e771 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -144,6 +144,9 @@
 #endif
   /* all of these macros preserve errno (saving and restoring) */

+/* Convert pfn to physical address space. */
+#define pfn_to_paddr(x) ((uint64_t)(x) << XC_PAGE_SHIFT)
+
 /* logging */
_hidden void libxl__logv(libxl_ctx *ctx, xentoollog_level msglevel, int errnoval, const char *file /* may be 0 */, int line /* ignored if !file */, @@ -1076,7 +1079,7 @@ _hidden int libxl__build_post(libxl__gc *gc, uint32_t domid,
 _hidden int libxl__build_pv(libxl__gc *gc, uint32_t domid,
libxl_domain_build_info *info, libxl__domain_build_state *state);
 _hidden int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
-              libxl_domain_build_info *info,
+              libxl_domain_config *d_config,
               libxl__domain_build_state *state);

 _hidden int libxl__qemu_traditional_cmd(libxl__gc *gc, uint32_t domid,
@@ -1584,6 +1587,15 @@ _hidden int libxl__need_xenpv_qemu(libxl__gc *gc,
         int nr_channels, libxl_device_channel *channels);

 /*
+ * This function will fix reserved device memory conflict
+ * according to user's configuration.
+ */
+_hidden int libxl__domain_device_construct_rdm(libxl__gc *gc,
+                                   libxl_domain_config *d_config,
+                                   uint64_t rdm_mem_guard,
+                                   struct xc_hvm_build_args *args);
+
+/*
  * This function will cause the whole libxl process to hang
  * if the device model does not respond.  It is deprecated.
  *
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index db9f75a..157fa59 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -586,6 +586,12 @@ libxl_device_pci = Struct("device_pci", [
     ("rdm_policy",      libxl_rdm_reserve_policy),
     ])

+libxl_device_rdm = Struct("device_rdm", [
+    ("start", uint64),
+    ("size", uint64),
+    ("policy", libxl_rdm_reserve_policy),
+    ])
+
 libxl_device_dtdev = Struct("device_dtdev", [
     ("path", string),
     ])
@@ -616,6 +622,7 @@ libxl_domain_config = Struct("domain_config", [
     ("disks", Array(libxl_device_disk, "num_disks")),
     ("nics", Array(libxl_device_nic, "num_nics")),
     ("pcidevs", Array(libxl_device_pci, "num_pcidevs")),
+    ("rdms", Array(libxl_device_rdm, "num_rdms")),
     ("dtdevs", Array(libxl_device_dtdev, "num_dtdevs")),
     ("vfbs", Array(libxl_device_vfb, "num_vfbs")),
     ("vkbs", Array(libxl_device_vkb, "num_vkbs")),
--
1.9.1

Thanks
Tiejun

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.