[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen staging] tools: libxl/xl: run NUMA placement even when an hard-affinity is set



commit 2347a9b639b0ee3988f07e2227a6ca6e2e945418
Author:     Dario Faggioli <dfaggioli@xxxxxxxx>
AuthorDate: Fri Oct 19 17:54:41 2018 +0200
Commit:     Wei Liu <wei.liu2@xxxxxxxxxx>
CommitDate: Tue Nov 6 10:19:32 2018 +0000

    tools: libxl/xl: run NUMA placement even when an hard-affinity is set
    
    Right now, if either an hard or soft-affinity are explicitly specified
    in a domain's config file, automatic NUMA placement is skipped. However,
    automatic NUMA placement affects only the soft-affinity of the domain
    which is being created.
    
    Therefore, it is ok to let it run if an hard-affinity is specified. The
    semantics will be that the best placement candidate would be found,
    respecting the specified hard-affinity, i.e., using only the nodes that
    contain the pcpus in the hard-affinity mask.
    
    This is particularly helpful if global xl pinning masks are defined, as
    made possible by commit aa67b97ed34279c43 ("xl.conf: Add global affinity
    masks"). In fact, without this commit, defining a global affinity mask
    would also mean disabling automatic placement, but that does not
    necessarily have to be the case (especially in large systems).
    
    Signed-off-by: Dario Faggioli <dfaggioli@xxxxxxxx>
    Acked-by: Wei Liu <wei.liu2@xxxxxxxxxx>
---
 tools/libxl/libxl_dom.c | 43 +++++++++++++++++++++++++++++++++++++------
 tools/xl/xl_parse.c     |  6 ++++--
 2 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index c66f3893d7..598af71562 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -27,6 +27,8 @@
 
 #include "_paths.h"
 
+//#define DEBUG 1
+
 libxl_domain_type libxl__domain_type(libxl__gc *gc, uint32_t domid)
 {
     libxl_ctx *ctx = libxl__gc_owner(gc);
@@ -142,12 +144,13 @@ static int numa_place_domain(libxl__gc *gc, uint32_t 
domid,
 {
     int found;
     libxl__numa_candidate candidate;
-    libxl_bitmap cpupool_nodemap;
+    libxl_bitmap cpumap, cpupool_nodemap, *map;
     libxl_cpupoolinfo cpupool_info;
     int i, cpupool, rc = 0;
     uint64_t memkb;
 
     libxl__numa_candidate_init(&candidate);
+    libxl_bitmap_init(&cpumap);
     libxl_bitmap_init(&cpupool_nodemap);
     libxl_cpupoolinfo_init(&cpupool_info);
 
@@ -162,6 +165,35 @@ static int numa_place_domain(libxl__gc *gc, uint32_t domid,
     rc = libxl_cpupool_info(CTX, &cpupool_info, cpupool);
     if (rc)
         goto out;
+    map = &cpupool_info.cpumap;
+
+    /*
+     * If there's a well defined hard affinity mask (i.e., the same one for all
+     * the vcpus), we can try to run the placement considering only the pcpus
+     * within such mask.
+     */
+    if (info->num_vcpu_hard_affinity)
+    {
+#ifdef DEBUG
+        int j;
+
+        for (j = 0; j < info->num_vcpu_hard_affinity; j++)
+            assert(libxl_bitmap_equal(&info->vcpu_hard_affinity[0],
+                                      &info->vcpu_hard_affinity[j], 0));
+#endif /* DEBUG */
+
+        rc = libxl_bitmap_and(CTX, &cpumap, &info->vcpu_hard_affinity[0],
+                              &cpupool_info.cpumap);
+        if (rc)
+            goto out;
+
+        /* Hard affinity must contain at least one cpu of our cpupool */
+        if (libxl_bitmap_is_empty(&cpumap)) {
+            LOG(ERROR, "Hard affinity completely outside of domain's 
cpupool!");
+            rc = ERROR_INVAL;
+            goto out;
+        }
+    }
 
     rc = libxl_domain_need_memory(CTX, info, &memkb);
     if (rc)
@@ -174,8 +206,7 @@ static int numa_place_domain(libxl__gc *gc, uint32_t domid,
     /* Find the best candidate with enough free memory and at least
      * as much pcpus as the domain has vcpus.  */
     rc = libxl__get_numa_candidate(gc, memkb, info->max_vcpus,
-                                   0, 0, &cpupool_info.cpumap,
-                                   numa_cmpf, &candidate, &found);
+                                   0, 0, map, numa_cmpf, &candidate, &found);
     if (rc)
         goto out;
 
@@ -206,6 +237,7 @@ static int numa_place_domain(libxl__gc *gc, uint32_t domid,
  out:
     libxl__numa_candidate_dispose(&candidate);
     libxl_bitmap_dispose(&cpupool_nodemap);
+    libxl_bitmap_dispose(&cpumap);
     libxl_cpupoolinfo_dispose(&cpupool_info);
     return rc;
 }
@@ -373,9 +405,8 @@ int libxl__build_pre(libxl__gc *gc, uint32_t domid,
      * reflect the placement result if that is the case
      */
     if (libxl_defbool_val(info->numa_placement)) {
-        if (info->cpumap.size || info->num_vcpu_hard_affinity ||
-            info->num_vcpu_soft_affinity)
-            LOG(WARN, "Can't run NUMA placement, as an (hard or soft) "
+        if (info->cpumap.size || info->num_vcpu_soft_affinity)
+            LOG(WARN, "Can't run NUMA placement, as a soft "
                       "affinity has been specified explicitly");
         else if (info->nodemap.size)
             LOG(WARN, "Can't run NUMA placement, as the domain has "
diff --git a/tools/xl/xl_parse.c b/tools/xl/xl_parse.c
index 0bda28152b..352cd214dd 100644
--- a/tools/xl/xl_parse.c
+++ b/tools/xl/xl_parse.c
@@ -356,7 +356,7 @@ static void parse_vcpu_affinity(libxl_domain_build_info 
*b_info,
             j++;
         }
 
-        /* We have a list of cpumaps, disable automatic placement */
+        /* When we have a list of cpumaps, always disable automatic placement 
*/
         libxl_defbool_set(&b_info->numa_placement, false);
     } else {
         int i;
@@ -380,7 +380,9 @@ static void parse_vcpu_affinity(libxl_domain_build_info 
*b_info,
                               &vcpu_affinity_array[0]);
         }
 
-        libxl_defbool_set(&b_info->numa_placement, false);
+        /* We have soft affinity already, disable automatic placement */
+        if (!is_hard)
+            libxl_defbool_set(&b_info->numa_placement, false);
     }
 }
 
--
generated by git-patchbot for /home/xen/git/xen.git#staging

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/xen-changelog

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.