Xen project Mailing List

Re: [Xen-devel] [PATCH v10 8/9] libxl: vnuma nodes placement bits

On Wed, Sep 03, 2014 at 12:24:17AM -0400, Elena Ufimtseva wrote: > Automatic numa placement cancels manual vnode placement > mechanism. If numa placement explicitly specified, try > to fit vnodes to the physical nodes. > > Changes since v8: > - Addded number of ranges vmemranges for future support of > multi-range nodes; For PV guest it is set to same value > as number of vNUMA nodes. > Three comments below. The errno values should have the default Exxx (EINVAL, EAGAIN, etc), while the return should return ERROR_EINVAL, ERROR_BADFAIL, ERROR_FAIL, etc (see libxl_types.idl). > Signed-off-by: Elena Ufimtseva <ufimtseva@xxxxxxxxx> > --- > tools/libxl/libxl_create.c | 1 + > tools/libxl/libxl_dom.c | 204 > ++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 205 insertions(+) > > diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c > index fc332ef..a4977d2 100644 > --- a/tools/libxl/libxl_create.c > +++ b/tools/libxl/libxl_create.c > @@ -203,6 +203,7 @@ int libxl__domain_build_info_setdefault(libxl__gc *gc, > } > > libxl_defbool_setdefault(&b_info->numa_placement, true); > + libxl_defbool_setdefault(&b_info->vnuma_autoplacement, true); > > if (b_info->max_memkb == LIBXL_MEMKB_DEFAULT) > b_info->max_memkb = 32 * 1024; > diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c > index c944804..b1376c4 100644 > --- a/tools/libxl/libxl_dom.c > +++ b/tools/libxl/libxl_dom.c > @@ -23,6 +23,7 @@ > #include <xc_dom.h> > #include <xen/hvm/hvm_info_table.h> > #include <xen/hvm/hvm_xs_strings.h> > +#include <libxl_vnuma.h> > > libxl_domain_type libxl__domain_type(libxl__gc *gc, uint32_t domid) > { > @@ -227,12 +228,114 @@ static void hvm_set_conf_params(xc_interface *handle, > uint32_t domid, > libxl_defbool_val(info->u.hvm.nested_hvm)); > } > > +/* sets vnode_to_pnode map. */ > +static int libxl__init_vnode_to_pnode(libxl__gc *gc, uint32_t domid, > + libxl_domain_build_info *info) > +{ > + unsigned int i, n; > + int nr_nodes = 0; > + uint64_t *vnodes_mem; > + unsigned long long *nodes_claim = NULL; > + libxl_numainfo *ninfo = NULL; > + > + if (info->vnuma_vnodemap == NULL) { > + info->vnuma_vnodemap = libxl__calloc(gc, info->vnodes, > + sizeof(*info->vnuma_vnodemap)); > + } > + > + /* default setting. */ > + for (i = 0; i < info->vnodes; i++) > + info->vnuma_vnodemap[i] = LIBXC_VNUMA_NO_NODE; > + > + /* Get NUMA info. */ > + ninfo = libxl_get_numainfo(CTX, &nr_nodes); > + if (ninfo == NULL) > + return ERROR_FAIL; > + /* Nothing to see if only one NUMA node. */ > + if (nr_nodes <= 1) > + return 0; > + > + vnodes_mem = info->vnuma_mem; > + /* > + * TODO: change algorithm. The current just fits the nodes > + * by its memory sizes. If no p-node found, will be used default > + * value of LIBXC_VNUMA_NO_NODE. > + */ > + nodes_claim = libxl__calloc(gc, info->vnodes, sizeof(*nodes_claim)); > + if ( !nodes_claim ) A bit odd. The rest of the patch has a different style. > + return ERROR_FAIL; > + > + libxl_for_each_set_bit(n, info->nodemap) > + { > + for (i = 0; i < info->vnodes; i++) > + { > + unsigned long mem_sz = vnodes_mem[i] << 20; > + if ((nodes_claim[n] + mem_sz <= ninfo[n].free) && > + /* vnode was not set yet. */ > + (info->vnuma_vnodemap[i] == LIBXC_VNUMA_NO_NODE ) ) > + { > + info->vnuma_vnodemap[i] = n; > + nodes_claim[n] += mem_sz; > + } > + } > + } > + > + return 0; > +} > + > +/* > + * Builds vnode memory regions from configuration info > + * for vnuma nodes with multiple regions. > + */ > +static int libxl__build_vnuma_ranges(libxl__gc *gc, > + uint32_t domid, > + /* IN: mem sizes in megabytes */ > + libxl_domain_build_info *b_info, > + /* OUT: linux NUMA blocks addresses */ > + vmemrange_t **memrange) > +{ > + /* > + * For non-PV domains, contruction of the regions will > + * need to have its own implementation. > + */ > + if (b_info->type != LIBXL_DOMAIN_TYPE_PV) { > + LOG(DETAIL, "vNUMA is only supported for PV guests now.\n"); > + errno = EINVAL; > + return -1; > + } > + > + if (b_info->vnodes == 0) { > + errno = EINVAL; > + return -1; How come you return -1 here.. > + } > + > + b_info->vmemranges = b_info->vnodes; > + > + *memrange = libxl__calloc(gc, b_info->vnodes, > + sizeof(vmemrange_t)); > + > + /* > + * For PV domain along with alignment, regions nid will > + * be set to corresponding vnuma node number and ignored > + * later during allocation. > + */ > + > + if (libxl__vnuma_align_mem(gc, domid, b_info, *memrange) < 0) { > + LOG(DETAIL, "Failed to align memory map.\n"); > + errno = ERROR_INVAL; errno = EINVAL? > + return ERROR_FAIL; .. but here you return ERROR_FAIL ? > + } > + > + return 0; > +} > + > int libxl__build_pre(libxl__gc *gc, uint32_t domid, > libxl_domain_config *d_config, libxl__domain_build_state > *state) > { > libxl_domain_build_info *const info = &d_config->b_info; > libxl_ctx *ctx = libxl__gc_owner(gc); > char *xs_domid, *con_domid; > + struct vmemrange *memrange; > int rc; > > if (xc_domain_max_vcpus(ctx->xch, domid, info->max_vcpus) != 0) { > @@ -240,6 +343,20 @@ int libxl__build_pre(libxl__gc *gc, uint32_t domid, > return ERROR_FAIL; > } > > + if (libxl__build_vnuma_ranges(gc, domid, info, &memrange) != 0) { > + LOG(DETAIL, "Failed to build vnuma nodes memory ranges.\n"); > + return ERROR_FAIL; > + > + } > + > + /* > + * NUMA placement and vNUMA autoplacement handling: > + * If numa_placement is set to default, do not use vnode to pnode > + * mapping as automatic placement algorithm will find best numa nodes. > + * If numa_placement is not used, we can try and use domain vnode > + * to pnode mask. > + */ > + > /* > * Check if the domain has any CPU or node affinity already. If not, try > * to build up the latter via automatic NUMA placement. In fact, in case > @@ -298,7 +415,33 @@ int libxl__build_pre(libxl__gc *gc, uint32_t domid, > NULL, &cpumap_soft); > > libxl_bitmap_dispose(&cpumap_soft); > + > + /* > + * If vnode_to_pnode mask was defined, dont use it if we > automatically > + * place domain on NUMA nodes, just give warning. > + */ > + if (!libxl_defbool_val(info->vnuma_autoplacement)) { > + LOG(INFO, "Automatic NUMA placement for domain is turned on. \ > + vnode to physical nodes mapping will not be used."); > + } > + if (libxl__init_vnode_to_pnode(gc, domid, info) < 0) { > + LOG(ERROR, "Failed to build vnode to pnode map\n"); > + return ERROR_FAIL; > + } > + } else { > + if (!libxl_defbool_val(info->vnuma_autoplacement)) { > + if (!libxl__vnodemap_is_usable(gc, info)) { > + LOG(ERROR, "Defined vnode to pnode domain map cannot be > used.\n"); > + return ERROR_FAIL; > + } > + } else { > + if (libxl__init_vnode_to_pnode(gc, domid, info) < 0) { > + LOG(ERROR, "Failed to build vnode to pnode map.\n"); > + return ERROR_FAIL; > + } > + } > } > + Spurious. > if (info->nodemap.size) > libxl_domain_set_nodeaffinity(ctx, domid, &info->nodemap); > /* As mentioned in libxl.h, vcpu_hard_array takes precedence */ > @@ -339,6 +482,22 @@ int libxl__build_pre(libxl__gc *gc, uint32_t domid, > return ERROR_FAIL; > } > > + /* > + * XEN_DOMCTL_setvnuma subop hypercall needs to know max mem > + * for domain set by xc_domain_setmaxmem. So set vNUMA after > + * maxmem is being set. > + * memrange should contain regions if multi-region nodes are > + * suppoted. For PV domain regions are ignored. > + */ > + if (xc_domain_setvnuma(ctx->xch, domid, info->vnodes, > + info->vmemranges, > + info->max_vcpus, memrange, > + info->vdistance, info->vnuma_vcpumap, > + info->vnuma_vnodemap) < 0) { > + LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "Couldn't set vnuma > topology"); > + return ERROR_FAIL; > + } > + > xs_domid = xs_read(ctx->xsh, XBT_NULL, "/tool/xenstored/domid", NULL); > state->store_domid = xs_domid ? atoi(xs_domid) : 0; > free(xs_domid); > @@ -434,6 +593,46 @@ retry_transaction: > return 0; > } > > +/* > + * Function fills the xc_dom_image with memory sizes for later > + * use in domain memory allocator. No regions here are being > + * filled and used in allocator as the regions do belong to one node. > + */ > +static int libxl__dom_vnuma_init(struct libxl_domain_build_info *info, > + struct xc_dom_image *dom) > +{ > + errno = ERROR_INVAL; Shouldnt that be EINVAL? > + > + if (info->vnodes == 0) > + return -1; > + > + info->vmemranges = info->vnodes; > + > + dom->vnode_to_pnode = (unsigned int *)malloc( > + info->vnodes * sizeof(*info->vnuma_vnodemap)); > + dom->numa_memszs = (uint64_t *)malloc( > + info->vnodes * sizeof(*info->vnuma_mem)); > + > + errno = ERROR_FAIL; > + if ( dom->numa_memszs == NULL || dom->vnode_to_pnode == NULL ) { > + info->vnodes = 0; > + if (dom->vnode_to_pnode) > + free(dom->vnode_to_pnode); > + if (dom->numa_memszs) > + free(dom->numa_memszs); > + return -1; And this return ERROR_FAIL? > + } > + > + memcpy(dom->numa_memszs, info->vnuma_mem, > + sizeof(*info->vnuma_mem) * info->vnodes); > + memcpy(dom->vnode_to_pnode, info->vnuma_vnodemap, > + sizeof(*info->vnuma_vnodemap) * info->vnodes); > + > + dom->vnodes = info->vnodes; > + > + return 0; > +} > + > int libxl__build_pv(libxl__gc *gc, uint32_t domid, > libxl_domain_build_info *info, libxl__domain_build_state *state) > { > @@ -491,6 +690,11 @@ int libxl__build_pv(libxl__gc *gc, uint32_t domid, > dom->xenstore_domid = state->store_domid; > dom->claim_enabled = libxl_defbool_val(info->claim_mode); > > + if ( (ret = libxl__dom_vnuma_init(info, dom)) != 0 ) { > + LOGE(ERROR, "Failed to set doman vnuma"); > + goto out; > + } > + > if ( (ret = xc_dom_boot_xen_init(dom, ctx->xch, domid)) != 0 ) { > LOGE(ERROR, "xc_dom_boot_xen_init failed"); > goto out; > -- > 1.7.10.4 > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.