[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 08 of 10 v2] libxl: automatic placement deals with node-affinity



On 19/12/12 19:07, Dario Faggioli wrote:
Which basically means the following two things:
  1) during domain creation, it is the node-affinity of
     the domain --rather than the vcpu-affinities of its
     VCPUs-- that is affected by automatic placement;
  2) during automatic placement, when counting how many
     VCPUs are already "bound" to a placement candidate
     (as part of the process of choosing the best
     candidate), both vcpu-affinity and node-affinity
     are considered.

Signed-off-by: Dario Faggioli <dario.faggioli@xxxxxxxxxx>
Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
Acked-by: Juergen Gross <juergen.gross@xxxxxxxxxxxxxx>

Re-confirming Ack.
 -George


diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -133,13 +133,13 @@ static int numa_place_domain(libxl__gc *
  {
      int found;
      libxl__numa_candidate candidate;
-    libxl_bitmap candidate_nodemap;
+    libxl_bitmap cpupool_nodemap;
      libxl_cpupoolinfo cpupool_info;
      int i, cpupool, rc = 0;
      uint32_t memkb;
libxl__numa_candidate_init(&candidate);
-    libxl_bitmap_init(&candidate_nodemap);
+    libxl_bitmap_init(&cpupool_nodemap);
/*
       * Extract the cpumap from the cpupool the domain belong to. In fact,
@@ -156,7 +156,7 @@ static int numa_place_domain(libxl__gc *
      rc = libxl_domain_need_memory(CTX, info, &memkb);
      if (rc)
          goto out;
-    if (libxl_node_bitmap_alloc(CTX, &candidate_nodemap, 0)) {
+    if (libxl_node_bitmap_alloc(CTX, &cpupool_nodemap, 0)) {
          rc = ERROR_FAIL;
          goto out;
      }
@@ -174,17 +174,19 @@ static int numa_place_domain(libxl__gc *
      if (found == 0)
          goto out;
- /* Map the candidate's node map to the domain's info->cpumap */
-    libxl__numa_candidate_get_nodemap(gc, &candidate, &candidate_nodemap);
-    rc = libxl_nodemap_to_cpumap(CTX, &candidate_nodemap, &info->cpumap);
+    /* Map the candidate's node map to the domain's info->nodemap */
+    libxl__numa_candidate_get_nodemap(gc, &candidate, &info->nodemap);
+
+    /* Avoid trying to set the affinity to nodes that might be in the
+     * candidate's nodemap but out of our cpupool. */
+    rc = libxl_cpumap_to_nodemap(CTX, &cpupool_info.cpumap,
+                                 &cpupool_nodemap);
      if (rc)
          goto out;
- /* Avoid trying to set the affinity to cpus that might be in the
-     * nodemap but not in our cpupool. */
-    libxl_for_each_set_bit(i, info->cpumap) {
-        if (!libxl_bitmap_test(&cpupool_info.cpumap, i))
-            libxl_bitmap_reset(&info->cpumap, i);
+    libxl_for_each_set_bit(i, info->nodemap) {
+        if (!libxl_bitmap_test(&cpupool_nodemap, i))
+            libxl_bitmap_reset(&info->nodemap, i);
      }
LOG(DETAIL, "NUMA placement candidate with %d nodes, %d cpus and "
@@ -193,7 +195,7 @@ static int numa_place_domain(libxl__gc *
out:
      libxl__numa_candidate_dispose(&candidate);
-    libxl_bitmap_dispose(&candidate_nodemap);
+    libxl_bitmap_dispose(&cpupool_nodemap);
      libxl_cpupoolinfo_dispose(&cpupool_info);
      return rc;
  }
@@ -211,10 +213,10 @@ int libxl__build_pre(libxl__gc *gc, uint
      /*
       * Check if the domain has any CPU affinity. If not, try to build
       * up one. In case numa_place_domain() find at least a suitable
-     * candidate, it will affect info->cpumap accordingly; if it
+     * candidate, it will affect info->nodemap accordingly; if it
       * does not, it just leaves it as it is. This means (unless
       * some weird error manifests) the subsequent call to
-     * libxl_set_vcpuaffinity_all() will do the actual placement,
+     * libxl_domain_set_nodeaffinity() will do the actual placement,
       * whatever that turns out to be.
       */
      if (libxl_defbool_val(info->numa_placement)) {
diff --git a/tools/libxl/libxl_numa.c b/tools/libxl/libxl_numa.c
--- a/tools/libxl/libxl_numa.c
+++ b/tools/libxl/libxl_numa.c
@@ -184,7 +184,7 @@ static int nr_vcpus_on_nodes(libxl__gc *
                               int vcpus_on_node[])
  {
      libxl_dominfo *dinfo = NULL;
-    libxl_bitmap vcpu_nodemap;
+    libxl_bitmap dom_nodemap, vcpu_nodemap;
      int nr_doms, nr_cpus;
      int i, j, k;
@@ -197,6 +197,12 @@ static int nr_vcpus_on_nodes(libxl__gc *
          return ERROR_FAIL;
      }
+ if (libxl_node_bitmap_alloc(CTX, &dom_nodemap, 0) < 0) {
+        libxl_bitmap_dispose(&vcpu_nodemap);
+        libxl_dominfo_list_free(dinfo, nr_doms);
+        return ERROR_FAIL;
+    }
+
      for (i = 0; i < nr_doms; i++) {
          libxl_vcpuinfo *vinfo;
          int nr_dom_vcpus;
@@ -205,14 +211,21 @@ static int nr_vcpus_on_nodes(libxl__gc *
          if (vinfo == NULL)
              continue;
+ /* Retrieve the domain's node-affinity map */
+        libxl_domain_get_nodeaffinity(CTX, dinfo[i].domid, &dom_nodemap);
+
          for (j = 0; j < nr_dom_vcpus; j++) {
-            /* For each vcpu of each domain, increment the elements of
-             * the array corresponding to the nodes where the vcpu runs */
+            /*
+             * For each vcpu of each domain, it must have both vcpu-affinity
+             * and node-affinity to (a pcpu belonging to) a certain node to
+             * cause an increment in the corresponding element of the array.
+             */
              libxl_bitmap_set_none(&vcpu_nodemap);
              libxl_for_each_set_bit(k, vinfo[j].cpumap) {
                  int node = tinfo[k].node;
if (libxl_bitmap_test(suitable_cpumap, k) &&
+                    libxl_bitmap_test(&dom_nodemap, node) &&
                      !libxl_bitmap_test(&vcpu_nodemap, node)) {
                      libxl_bitmap_set(&vcpu_nodemap, node);
                      vcpus_on_node[node]++;
@@ -223,6 +236,7 @@ static int nr_vcpus_on_nodes(libxl__gc *
          libxl_vcpuinfo_list_free(vinfo, nr_dom_vcpus);
      }
+ libxl_bitmap_dispose(&dom_nodemap);
      libxl_bitmap_dispose(&vcpu_nodemap);
      libxl_dominfo_list_free(dinfo, nr_doms);
      return 0;


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.