|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 09 of 10 v2] libxl: have NUMA placement deal with cpupools
On Fri, 2012-06-15 at 18:04 +0100, Dario Faggioli wrote:
> In such a way that only the cpus belonging to the cpupool of the
> domain being placed are considered for the placement itself.
>
> This happens by filtering out all the nodes in which the cpupool has
> not any cpu from the placement candidates. After that -- as a cpu pooling
> not necessarily happens at NUMA nodes boundaries -- we also make sure
> only the actual cpus that are part of the pool are considered when
> counting how much processors a placement candidate is able to provide.
>
> Signed-off-by: Dario Faggioli <dario.faggioli@xxxxxxxxxx>
>
> diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
> --- a/tools/libxl/libxl_dom.c
> +++ b/tools/libxl/libxl_dom.c
> @@ -198,15 +198,27 @@ static void comb_get_nodemap(comb_iter_t
> libxl_bitmap_set(nodemap, it[i]);
> }
>
> +/* Retrieve how many nodes a nodemap spans. */
> +static int nodemap_to_nr_nodes(const libxl_bitmap *nodemap)
> +{
> + int i, nr_nodes = 0;
> +
> + libxl_for_each_set_bit(i, *nodemap)
> + nr_nodes++;
> + return nr_nodes;
> +}
> +
> /* Retrieve the number of cpus that the nodes that are part of the nodemap
> - * span. */
> + * span and that are also set in suitable_cpumap. */
> static int nodemap_to_nodes_cpus(libxl_cputopology *tinfo, int nr_cpus,
> + const libxl_bitmap *suitable_cpumap,
> const libxl_bitmap *nodemap)
> {
> int i, nodes_cpus = 0;
>
> for (i = 0; i < nr_cpus; i++) {
> - if (libxl_bitmap_test(nodemap, tinfo[i].node))
> + if (libxl_bitmap_test(suitable_cpumap, i) &&
> + libxl_bitmap_test(nodemap, tinfo[i].node))
> nodes_cpus++;
> }
> return nodes_cpus;
> @@ -311,12 +323,13 @@ static int cpus_per_node_count(libxl_cpu
> int libxl__get_numa_candidates(libxl__gc *gc,
> uint32_t min_free_memkb, int min_cpus,
> int min_nodes, int max_nodes,
> + const libxl_bitmap *suitable_cpumap,
> libxl__numa_candidate *cndts[], int *nr_cndts)
> {
> libxl__numa_candidate *new_cndts = NULL;
> libxl_cputopology *tinfo = NULL;
> libxl_numainfo *ninfo = NULL;
> - libxl_bitmap nodemap;
> + libxl_bitmap suitable_nodemap, nodemap;
> int nr_nodes, nr_cpus;
> int array_size, rc;
>
> @@ -340,6 +353,15 @@ int libxl__get_numa_candidates(libxl__gc
> if (rc)
> goto out;
>
> + /* Allocate and prepare the map of the node that can be utilized for
> + * placement, basing on the map of suitable cpus. */
> + rc = libxl_node_bitmap_alloc(CTX, &suitable_nodemap);
> + if (rc)
> + goto out;
> + rc = libxl_cpumap_to_nodemap(CTX, suitable_cpumap, &suitable_nodemap);
> + if (rc)
> + goto out;
> +
> /*
> * Round up and down some of the constraints. For instance, the minimum
> * number of cpus a candidate should have must at least be non-negative.
> @@ -391,9 +413,14 @@ int libxl__get_numa_candidates(libxl__gc
> for (comb_ok = comb_init(gc, &comb_iter, nr_nodes, min_nodes);
> comb_ok;
> comb_ok = comb_next(comb_iter, nr_nodes, min_nodes)) {
> uint32_t nodes_free_memkb;
> - int nodes_cpus;
> + int i, nodes_cpus;
>
> + /* Get the nodemap for the combination and filter unwnted nodes
> */
unwanted
> comb_get_nodemap(comb_iter, &nodemap, min_nodes);
> + libxl_for_each_set_bit(i, nodemap) {
> + if (!libxl_bitmap_test(&suitable_nodemap, i))
> + libxl_bitmap_reset(&nodemap, i);
> + }
>
> /* If there is not enough memoy in this combination, skip it
> * and go generating the next one... */
> @@ -402,7 +429,8 @@ int libxl__get_numa_candidates(libxl__gc
> continue;
>
> /* And the same applies if this combination is short in cpus */
> - nodes_cpus = nodemap_to_nodes_cpus(tinfo, nr_cpus, &nodemap);
> + nodes_cpus = nodemap_to_nodes_cpus(tinfo, nr_cpus,
> suitable_cpumap,
> + &nodemap);
> if (min_cpus > 0 && nodes_cpus < min_cpus)
> continue;
>
> @@ -427,12 +455,13 @@ int libxl__get_numa_candidates(libxl__gc
> new_cndts[*nr_cndts].nr_domains =
> nodemap_to_nr_domains(gc, tinfo,
> &nodemap);
> new_cndts[*nr_cndts].free_memkb = nodes_free_memkb;
> - new_cndts[*nr_cndts].nr_nodes = min_nodes;
> + new_cndts[*nr_cndts].nr_nodes = nodemap_to_nr_nodes(&nodemap);
> new_cndts[*nr_cndts].nr_cpus = nodes_cpus;
>
> LOG(DEBUG, "NUMA placement candidate #%d found: nr_nodes=%d, "
> "nr_cpus=%d, free_memkb=%"PRIu32"", *nr_cndts,
> - min_nodes, new_cndts[*nr_cndts].nr_cpus,
> + new_cndts[*nr_cndts].nr_nodes,
> + new_cndts[*nr_cndts].nr_cpus,
> new_cndts[*nr_cndts].free_memkb / 1024);
>
> (*nr_cndts)++;
> @@ -442,6 +471,7 @@ int libxl__get_numa_candidates(libxl__gc
>
> *cndts = new_cndts;
> out:
> + libxl_bitmap_dispose(&suitable_nodemap);
> libxl_bitmap_dispose(&nodemap);
> libxl_cputopology_list_free(tinfo, nr_cpus);
> libxl_numainfo_list_free(ninfo, nr_nodes);
> @@ -485,23 +515,27 @@ static int numa_cmpf(const void *v1, con
> }
>
> /* The actual automatic NUMA placement routine */
> -static int numa_place_domain(libxl__gc *gc, libxl_domain_build_info *info)
> +static int numa_place_domain(libxl__gc *gc, uint32_t domid,
> + libxl_domain_build_info *info)
> {
> int nr_candidates = 0;
> libxl__numa_candidate *candidates = NULL;
> libxl_bitmap candidate_nodemap;
> - libxl_cpupoolinfo *pinfo;
> - int nr_pools, rc = 0;
> + libxl_cpupoolinfo cpupool_info;
> + int i, cpupool, rc = 0;
> uint32_t memkb;
>
> - /* First of all, if cpupools are in use, better not to mess with them */
> - pinfo = libxl_list_cpupool(CTX, &nr_pools);
> - if (!pinfo)
> - return ERROR_FAIL;
> - if (nr_pools > 1) {
> - LOG(NOTICE, "skipping NUMA placement as cpupools are in use");
> - goto out;
> - }
> + /*
> + * Extract the cpumap from the cpupool the domain belong to. In fact,
> + * it only makes sense to consider the cpus/nodes that are in there
> + * for placement.
> + */
> + rc = cpupool = libxl__domain_cpupool(gc, domid);
> + if (rc < 0)
> + return rc;
> + rc = libxl_cpupool_info(CTX, &cpupool_info, cpupool);
> + if (rc)
> + return rc;
>
> rc = libxl_domain_need_memory(CTX, info, &memkb);
> if (rc)
> @@ -513,7 +547,8 @@ static int numa_place_domain(libxl__gc *
>
> /* Find all the candidates with enough free memory and at least
> * as much pcpus as the domain has vcpus. */
> - rc = libxl__get_numa_candidates(gc, memkb, info->max_vcpus, 0, 0,
> + rc = libxl__get_numa_candidates(gc, memkb, info->max_vcpus,
> + 0, 0, &cpupool_info.cpumap,
> &candidates, &nr_candidates);
> if (rc)
> goto out;
> @@ -538,13 +573,20 @@ static int numa_place_domain(libxl__gc *
> if (rc)
> goto out;
>
> + /* Avoid trying to set the affinity to cpus that might be in the
> + * nodemap but not in our cpupool. */
> + libxl_for_each_set_bit(i, info->cpumap) {
> + if (!libxl_bitmap_test(&cpupool_info.cpumap, i))
> + libxl_bitmap_reset(&info->cpumap, i);
> + }
> +
> LOG(DETAIL, "NUMA placement candidate with %d nodes, %d cpus and "
> "%"PRIu32" KB free selected", candidates[0].nr_nodes,
> candidates[0].nr_cpus, candidates[0].free_memkb / 1024);
>
> out:
> libxl_bitmap_dispose(&candidate_nodemap);
> - libxl_cpupoolinfo_list_free(pinfo, nr_pools);
> + libxl_cpupoolinfo_dispose(&cpupool_info);
> return rc;
> }
>
> @@ -567,7 +609,7 @@ int libxl__build_pre(libxl__gc *gc, uint
> * whatever that turns out to be.
> */
> if (libxl_bitmap_is_full(&info->cpumap)) {
> - int rc = numa_place_domain(gc, info);
> + int rc = numa_place_domain(gc, domid, info);
> if (rc)
> return rc;
> }
> diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
> --- a/tools/libxl/libxl_internal.h
> +++ b/tools/libxl/libxl_internal.h
> @@ -2094,14 +2094,17 @@ typedef struct {
> * least that amount of free memory and that number of cpus, respectively. If
> * min_free_memkb and/or min_cpus are 0, the candidates' free memory and
> number
> * of cpus won't be checked at all, which means a candidate will always be
> - * considered suitable wrt the specific constraint. cndts is where the list
> of
> - * exactly nr_cndts candidates is returned. Note that, in case no candidates
> - * are found at all, the function returns successfully, but with nr_cndts
> equal
> - * to zero.
> + * considered suitable wrt the specific constraint. suitable_cpumap is useful
> + * for specifyin we want only the cpus in that mask to be considered while
specifying
Apart from those two spelling errors:
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
> + * generating placement candidates (for example because of cpupools). cndts
> is
> + * where the list of exactly nr_cndts candidates is returned. Note that, in
> + * case no candidates are found at all, the function returns successfully,
> but
> + * with nr_cndts equal to zero.
> */
> _hidden int libxl__get_numa_candidates(libxl__gc *gc,
> uint32_t min_free_memkb, int min_cpus,
> int min_nodes, int max_nodes,
> + const libxl_bitmap *suitable_cpumap,
> libxl__numa_candidate *cndts[], int
> *nr_cndts);
>
> /* allocation and deallocation for placement candidates */
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |