[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH] xl: add memory allocation logic for numa platform
On Tue, 2011-08-09 at 05:55 +0100, Zhang, Yang Z wrote: > For numa platform, we need to allocate memory for guest on which guest cpu > reside. This patch add > this feature for xl. Just use the simple algorithm to select the best node. > > diff -r 0f36c2eec2e1 tools/libxl/libxl.c > --- a/tools/libxl/libxl.c Thu Jul 28 15:40:54 2011 +0100 > +++ b/tools/libxl/libxl.c Tue Aug 09 12:46:18 2011 +0800 > @@ -2236,6 +2236,109 @@ > return ERROR_FAIL; > } > > +static inline void set_bit(int nr, volatile void *addr) > +{ > + asm volatile ( > + "btsl %1,%0" > + : "=m" (ADDR) > + : "Ir" (nr), "m" (ADDR) : "memory"); > +} Please use C code for this. There is no way this code is performance critical need to hand code asm for it. I'm any case if you need something like this you should define a proper API for dealing with the libxl_nodeinfo data type and not a void * based thing. Take a look at the libxl_cpumap_* methods for example. Actually, looking closer, the "cpumap" member of your nodeinfo should actually _be_ a cpumap and use the correct interfaces, having it as a string makes no sense at all AFAICT. > + > +int libxl_get_numainfo(libxl_ctx *ctx, libxl_numainfo_t *numainfo) > +{ > + xc_numainfo_t ninfo = { 0 }; > + libxl_physinfo physinfo = { 0 }; > + libxl_topologyinfo topoinfo; > + int i, max_nodes, max_cpus, node; > + libxl_nodeinfo_t *nodeinfo; > + DECLARE_HYPERCALL_BUFFER(xc_node_to_memsize_t, node_memsize); > + DECLARE_HYPERCALL_BUFFER(xc_node_to_memfree_t, node_memfree); > + > + if (libxl_get_physinfo(ctx, &physinfo)) > + goto out; > + > + max_cpus = physinfo.max_cpu_id + 1; > + max_nodes = NUMA_NO_NODE + 1; > + numainfo->max_cpus = max_cpus; > + > + numainfo->cpu_to_node = calloc(max_cpus, sizeof (unsigned long)); > + if (numainfo->cpu_to_node == NULL) > + goto out; > + > + numainfo->nodeinfo = (char *)calloc(max_nodes, sizeof(libxl_nodeinfo_t)); > + if (numainfo->nodeinfo == NULL) > + goto out; > + > + nodeinfo = (libxl_nodeinfo_t *)numainfo->nodeinfo; Why declare it as a string if you want it to be a libxl_nodeinfo? Please just use the correct types in the IDL. > + node_memsize = xc_hypercall_buffer_alloc(ctx->xch, node_memsize, > sizeof(*node_memsize) * max_nodes); > + if ( node_memsize == NULL ) > + goto out; > + node_memfree = xc_hypercall_buffer_alloc(ctx->xch, node_memfree, > sizeof(*node_memfree) * max_nodes); > + if ( node_memfree == NULL ) > + goto out; > + > + set_xen_guest_handle(ninfo.node_to_memsize, node_memsize); > + set_xen_guest_handle(ninfo.node_to_memfree, node_memfree); > + ninfo.max_node_index = max_nodes - 1; > + > + if ( xc_numainfo(ctx->xch, &ninfo) != 0 ) > + goto out; > + > + max_nodes = ninfo.max_node_index + 1; > + numainfo->max_nodes = max_nodes; > + > + if (libxl_get_topologyinfo(ctx, &topoinfo)) > + goto out; > + > + for ( i = 0; i <= max_nodes; i++ ) { > + if (node_memsize[i] != INVALID_MEM_NODE) { > + nodeinfo[i].online = 1; > + nodeinfo[i].cpumap = malloc(BITS_TO_LONGS(max_cpus) * sizeof > (unsigned long)); > + bzero(nodeinfo[i].cpumap, BITS_TO_LONGS(max_cpus) * sizeof > (unsigned long)); > + > + /* Total Memory */ > + nodeinfo[i].total_memkb = node_memsize[i] >> 10; /* KB */ > + > + /* Free Memory */ > + nodeinfo[i].free_memkb = node_memfree[i] >> 10; /* KB */ > + } else > + nodeinfo[i].online = 0; > + } > + > + for (i = 0; i < max_cpus; i++) > + if (topoinfo.coremap.array[i] != LIBXL_CPUARRAY_INVALID_ENTRY) { > + node = topoinfo.nodemap.array[i]; > + set_bit(i, nodeinfo[node].cpumap); > + numainfo->cpu_to_node[i] = node; > + } > + libxl_topologyinfo_destroy(&topoinfo); > + > + xc_hypercall_buffer_free(ctx->xch, node_memsize); > + xc_hypercall_buffer_free(ctx->xch, node_memfree); > + return 0; > + > +out: > + if (numainfo->cpu_to_node) > + free(numainfo->cpu_to_node); > + if (numainfo->nodeinfo); > + free(numainfo->nodeinfo); > + xc_hypercall_buffer_free(ctx->xch, node_memsize); > + xc_hypercall_buffer_free(ctx->xch, node_memfree); > + return ERROR_FAIL; > +} > + > +void libxl_free_numainfo(libxl_numainfo_t *numainfo) > +{ > + int i; > + libxl_nodeinfo_t *nodeinfo = (libxl_nodeinfo_t *)numainfo->nodeinfo; > + > + for(i = 0; i < numainfo->max_nodes; i++) > + if(nodeinfo[i].cpumap) > + free(nodeinfo[i].cpumap); > + free(numainfo->cpu_to_node); > + free(numainfo->nodeinfo); The IDL will define you a libxl_numainfo_destroy which does pretty much exactly this, if you use the right types in the IDL of course. > +} > + > const libxl_version_info* libxl_get_version_info(libxl_ctx *ctx) > { > union { > diff -r 0f36c2eec2e1 tools/libxl/libxl.h > --- a/tools/libxl/libxl.h Thu Jul 28 15:40:54 2011 +0100 > +++ b/tools/libxl/libxl.h Tue Aug 09 12:46:18 2011 +0800 > @@ -459,6 +459,16 @@ > > int libxl_get_physinfo(libxl_ctx *ctx, libxl_physinfo *physinfo); > int libxl_get_topologyinfo(libxl_ctx *ctx, libxl_topologyinfo *info); > + > +#define NUMA_NO_NODE 0xFF > +#define INVALID_MEM_NODE 0ul > +#define BITS_PER_LONG (sizeof(unsigned long) * 8) > +#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG) > +#define ADDR (*(volatile long *) addr) These are polluting the user visible interface with non-namespaced headers. Since you should be using the libxl_cpumap type for your cpumaps I expect a bunch of them should just go away. Others should be moved to the relevant internal header and IMHO that ADDR macro has no business existing in the first place. > + > +int libxl_get_numainfo(libxl_ctx *ctx, libxl_numainfo_t *numainfo); > +void libxl_free_numainfo(libxl_numainfo_t *numainfo); > + > libxl_vcpuinfo *libxl_list_vcpu(libxl_ctx *ctx, uint32_t domid, > int *nb_vcpu, int *nrcpus); > int libxl_set_vcpuaffinity(libxl_ctx *ctx, uint32_t domid, uint32_t vcpuid, > diff -r 0f36c2eec2e1 tools/libxl/libxl.idl > --- a/tools/libxl/libxl.idl Thu Jul 28 15:40:54 2011 +0100 > +++ b/tools/libxl/libxl.idl Tue Aug 09 12:46:18 2011 +0800 > @@ -366,6 +366,19 @@ > ("socketmap", libxl_cpuarray, False, "cpu to socket map"), > ("nodemap", libxl_cpuarray, False, "cpu to node map"), > ]) > +libxl_nodeinfo = Struct("nodeinfo_t", [ > + ("free_memkb", uint32), > + ("total_memkb", uint32), > + ("candidate", uint32), > + ("online", uint32), > + ("cpumap", string), > + ]) > +libxl_numainfo = Struct("numainfo_t", [ libxl does not use the _t suffic on data types. How do these two new datastructures relate to the existing topology info? I think you also need to add comments to many of these fields (you can write them as the third entry in the tuple). What is a candidate for example? It seems to relate to some internal detail of the algorithm used by libxl internally rather than being a field which is useful to the external user of libxl. It would be preferable to separate out such internal and external data and only expose the external stuff via the API. > + ("nodeinfo", string), > + ("max_nodes", uint32), > + ("cpu_to_node", string), > + ("max_cpus", uint32), > + ]) > > libxl_sched_credit = Struct("sched_credit", [ > ("weight", integer), > diff -r 0f36c2eec2e1 tools/libxl/libxl_create.c > --- a/tools/libxl/libxl_create.c Thu Jul 28 15:40:54 2011 +0100 > +++ b/tools/libxl/libxl_create.c Tue Aug 09 12:46:18 2011 +0800 > @@ -159,6 +159,86 @@ > return 0; > } > > +static int find_best_node(libxl_ctx *ctx, libxl_numainfo_t *numainfo) > +{ > + int nr_doms, i, j, nr_vcpu, nrcpus, best_node, pcpu, node_id; > + unsigned long max_nodes = numainfo->max_nodes; > + unsigned long *nodeload; > + libxl_dominfo *dominfo; > + libxl_vcpuinfo *vcpuinfo; > + libxl_nodeinfo_t *nodeinfo = (libxl_nodeinfo_t *)numainfo->nodeinfo; > + > + nodeload = malloc(max_nodes * sizeof(*nodeload)); > + bzero(nodeload, max_nodes * sizeof(*nodeload)); > + > + if (!(dominfo = libxl_list_domain(ctx, &nr_doms))) > + goto out; > + > + for (i = 0; i < nr_doms; i++) { > + vcpuinfo = libxl_list_vcpu(ctx, dominfo[i].domid, &nr_vcpu, &nrcpus); > + if (!vcpuinfo) > + goto out; > + for (j = 0; j < nr_vcpu; j++) { > + if (!vcpuinfo[j].online) > + continue; > + pcpu = vcpuinfo[j].cpu; > + node_id = numainfo->cpu_to_node[pcpu]; > + if (nodeinfo[node_id].candidate) > + nodeload[node_id]++; > + else > + nodeload[node_id] += 8; > + } > + free(vcpuinfo); > + } > + best_node = 0; > + for (i = 1; i < max_nodes; i++) > + if(nodeinfo[i].candidate && nodeinfo[i].online > + && nodeload[i] < nodeload[best_node]) > + best_node = i; > + > + return best_node; > +out: > + if (dominfo) > + free(dominfo); > + return -1; > +} > + > +static int libxl_node_select(libxl_ctx *ctx, libxl_domain_build_info > *b_info, uint32_t domid) > +{ > + unsigned long i, best_node; > + unsigned long needmem = b_info->max_memkb; > + libxl_numainfo_t numainfo ={ 0 }; > + libxl_nodeinfo_t *nodeinfo; > + > + if (libxl_get_numainfo(ctx, &numainfo)) { > + fprintf(stderr, "libxl_get_topologyinfo failed.\n"); > + return -1; > + } > + > + if (numainfo.max_nodes < 2) { > + printf("max_nodes = %d\n", numainfo.max_nodes); > + return 0; > + } > + > + nodeinfo = (libxl_nodeinfo_t *)numainfo.nodeinfo; > + > + for (i = 0; i < numainfo.max_nodes; i++) > + if (nodeinfo[i].free_memkb > needmem) > + nodeinfo[i].candidate = 1; > + > + best_node = find_best_node(ctx, &numainfo); > + if (best_node == -1) { > + libxl_numainfo_t_destroy(&numainfo); > + return -1; > + } > + > + for (i = 0; i < b_info->max_vcpus; i++) > + xc_vcpu_setaffinity(ctx->xch, domid, i, (uint8_t > *)(nodeinfo[best_node].cpumap)); > + > + libxl_numainfo_t_destroy(&numainfo); > + return 0; > +} > + > int libxl__domain_build(libxl__gc *gc, > libxl_domain_build_info *info, > libxl_device_model_info *dm_info, > @@ -168,11 +248,15 @@ > char **vments = NULL, **localents = NULL; > struct timeval start_time; > int i, ret; > + libxl_ctx *ctx = libxl__gc_owner(gc); > > ret = libxl__build_pre(gc, domid, info, state); > if (ret) > goto out; > > + if (libxl_node_select(ctx, info, domid)) > + printf("Cannot find best node, using defaul algorithm\n"); Something is up with your whitespacing here (and a few other places). Please read tools/libxl/CODING_STYLE Also please use the correct LIBXL__LOG functions and do not print to stdout. This message should probably be a debug level if inedeed it needs to exist at all. The same goes for all the other printf calls which you have added. Is libxl_node_select intended to be called by external users of the library? If not then it should be libxl__node_select and take a gc not a ctx. > gettimeofday(&start_time, NULL); > > switch (info->type) { > > _______________________________________________ > Xen-devel mailing list > Xen-devel@xxxxxxxxxxxxxxxxxxx > http://lists.xensource.com/xen-devel _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |