[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v5 4/8] vnuma topology parsing routines
Parses vnuma topoplogy number of nodes and memory ranges. If not defined, initializes vnuma with only one node and default topology. Signed-off-by: Elena Ufimtseva <ufimtseva@xxxxxxxxx> --- tools/libxl/libxl_vnuma.h | 11 ++ tools/libxl/xl_cmdimpl.c | 406 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 417 insertions(+) create mode 100644 tools/libxl/libxl_vnuma.h diff --git a/tools/libxl/libxl_vnuma.h b/tools/libxl/libxl_vnuma.h new file mode 100644 index 0000000..f1568ae --- /dev/null +++ b/tools/libxl/libxl_vnuma.h @@ -0,0 +1,11 @@ +#include "libxl_osdeps.h" /* must come before any other headers */ + +#define VNUMA_NO_NODE ~((unsigned int)0) + +/* + * Max vNUMA node size in Mb is taken 64Mb even now Linux lets + * 32Mb, thus letting some slack. Will be modified to match Linux. + */ +#define MIN_VNODE_SIZE 64U + +#define MAX_VNUMA_NODES (unsigned int)1 << 10 diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c index 5195914..59855ed 100644 --- a/tools/libxl/xl_cmdimpl.c +++ b/tools/libxl/xl_cmdimpl.c @@ -40,6 +40,7 @@ #include "libxl_json.h" #include "libxlutil.h" #include "xl.h" +#include "libxl_vnuma.h" /* For calls which return an errno on failure */ #define CHK_ERRNOVAL( call ) ({ \ @@ -725,6 +726,403 @@ static void parse_top_level_sdl_options(XLU_Config *config, xlu_cfg_replace_string (config, "xauthority", &sdl->xauthority, 0); } + +static unsigned int get_list_item_uint(XLU_ConfigList *list, unsigned int i) +{ + const char *buf; + char *ep; + unsigned long ul; + int rc = -EINVAL; + buf = xlu_cfg_get_listitem(list, i); + if (!buf) + return rc; + ul = strtoul(buf, &ep, 10); + if (ep == buf) + return rc; + if (ul >= UINT16_MAX) + return rc; + return (unsigned int)ul; +} + +static void vdistance_set(unsigned int *vdistance, + unsigned int nr_vnodes, + unsigned int samenode, + unsigned int othernode) +{ + unsigned int idx, slot; + for (idx = 0; idx < nr_vnodes; idx++) + for (slot = 0; slot < nr_vnodes; slot++) + *(vdistance + slot * nr_vnodes + idx) = + idx == slot ? samenode : othernode; +} + +static void vcputovnode_default(unsigned int *cpu_to_node, + unsigned int nr_vnodes, + unsigned int max_vcpus) +{ + unsigned int cpu; + for (cpu = 0; cpu < max_vcpus; cpu++) + cpu_to_node[cpu] = cpu % nr_vnodes; +} + +/* Split domain memory between vNUMA nodes equally */ +static int split_vnumamem(libxl_domain_build_info *b_info) +{ + unsigned long long vnodemem = 0; + unsigned long n; + unsigned int i; + + /* In MBytes */ + if (b_info->nr_nodes == 0) + return -1; + vnodemem = (b_info->max_memkb >> 10) / b_info->nr_nodes; + if (vnodemem < MIN_VNODE_SIZE) + return -1; + /* reminder in MBytes */ + n = (b_info->max_memkb >> 10) % b_info->nr_nodes; + /* get final sizes in MBytes */ + for (i = 0; i < (b_info->nr_nodes - 1); i++) + b_info->numa_memszs[i] = vnodemem; + /* add the reminder to the last node */ + b_info->numa_memszs[i] = vnodemem + n; + return 0; +} + +static void vnode_to_pnode_default(unsigned int *vnode_to_pnode, + unsigned int nr_vnodes) +{ + unsigned int i; + for (i = 0; i < nr_vnodes; i++) + vnode_to_pnode[i] = VNUMA_NO_NODE; +} + +/* + * init vNUMA to "zero config" with one node and all other + * topology parameters set to default. + */ +static int vnuma_zero_config(libxl_domain_build_info *b_info) +{ + b_info->nr_nodes = 1; + /* all memory goes to this one vnode */ + if (!(b_info->numa_memszs = (uint64_t *)calloc(b_info->nr_nodes, + sizeof(*b_info->numa_memszs)))) + goto bad_vnumazerocfg; + + if (!(b_info->cpu_to_node = (unsigned int *)calloc(b_info->max_vcpus, + sizeof(*b_info->cpu_to_node)))) + goto bad_vnumazerocfg; + + if (!(b_info->distance = (unsigned int *)calloc(b_info->nr_nodes * + b_info->nr_nodes, sizeof(*b_info->distance)))) + goto bad_vnumazerocfg; + + if (!(b_info->vnode_to_pnode = (unsigned int *)calloc(b_info->nr_nodes, + sizeof(*b_info->vnode_to_pnode)))) + goto bad_vnumazerocfg; + + b_info->numa_memszs[0] = b_info->max_memkb >> 10; + + /* all vcpus assigned to this vnode */ + vcputovnode_default(b_info->cpu_to_node, b_info->nr_nodes, + b_info->max_vcpus); + + /* default vdistance is 10 */ + vdistance_set(b_info->distance, b_info->nr_nodes, 10, 10); + + /* VNUMA_NO_NODE for vnode_to_pnode */ + vnode_to_pnode_default(b_info->vnode_to_pnode, b_info->nr_nodes); + + /* + * will be placed to some physical nodes defined by automatic + * numa placement or VNUMA_NO_NODE will not request exact node + */ + libxl_defbool_set(&b_info->vnuma_autoplacement, true); + return 0; + + bad_vnumazerocfg: + return -1; +} + +/* Caller must exit */ +static void free_vnuma_info(libxl_domain_build_info *b_info) +{ + free(b_info->numa_memszs); + free(b_info->distance); + free(b_info->cpu_to_node); + free(b_info->vnode_to_pnode); + b_info->nr_nodes = 0; +} + +/* +static int vdistance_parse(char *vdistcfg, unsigned int *vdistance, + unsigned int nr_vnodes) +{ + char *endptr, *toka, *tokb, *saveptra = NULL, *saveptrb = NULL; + unsigned int *vdist_tmp = NULL; + int rc = 0; + unsigned int i, j, parsed = 0; + unsigned long dist; + + rc = -EINVAL; + if (vdistance == NULL) { + return rc; + } + vdist_tmp = (unsigned int *)malloc(nr_vnodes * nr_vnodes * sizeof(*vdistance)); + if (vdist_tmp == NULL) + return rc; + + i = j = 0; + for (toka = strtok_r(vdistcfg, ",", &saveptra); toka; + toka = strtok_r(NULL, ",", &saveptra)) { + if ( i >= nr_vnodes ) + goto vdist_parse_err; + for (tokb = strtok_r(toka, " ", &saveptrb); tokb; + tokb = strtok_r(NULL, " ", &saveptrb)) { + if (j >= nr_vnodes) + goto vdist_parse_err; + dist = strtol(tokb, &endptr, 10); + if (dist > UINT16_MAX || dist < 0) + goto vdist_parse_err; + if (tokb == endptr) + goto vdist_parse_err; + *(vdist_tmp + j*nr_vnodes + i) = dist; + parsed++; + j++; + } + i++; + j = 0; + } + rc = parsed; + memcpy(vdistance, vdist_tmp, nr_vnodes * nr_vnodes * sizeof(*vdistance)); + + vdist_parse_err: + free(vdist_tmp); + return rc; +} +*/ + +static void parse_vnuma_config(XLU_Config *config, libxl_domain_build_info *b_info) +{ + XLU_ConfigList *vnumamemcfg; + XLU_ConfigList *vdistancecfg, *vnodemap, *vcpumap; + int nr_vnuma_regions; + int nr_vdist, nr_vnodemap, nr_vcpumap, i; + unsigned long long vnuma_memparsed = 0; + long l; + unsigned long ul; + const char *buf; + + if (!xlu_cfg_get_long (config, "vnodes", &l, 0)) { + if (l > MAX_VNUMA_NODES) { + fprintf(stderr, "Too many vnuma nodes, max %d is allowed.\n", MAX_VNUMA_NODES); + goto bad_vnuma_config; + } + b_info->nr_nodes = l; + + xlu_cfg_get_defbool(config, "vnuma_autoplacement", &b_info->vnuma_autoplacement, 0); + + /* Only construct nodes with at least one vcpu for now */ + if (b_info->nr_nodes != 0 && b_info->max_vcpus >= b_info->nr_nodes) { + if (!xlu_cfg_get_list(config, "vnumamem", + &vnumamemcfg, &nr_vnuma_regions, 0)) { + + if (nr_vnuma_regions != b_info->nr_nodes) { + fprintf(stderr, "Number of numa regions (vnumamem = %d) is incorrect (should be %d).\n", + nr_vnuma_regions, b_info->nr_nodes); + goto bad_vnuma_config; + } + + b_info->numa_memszs = calloc(b_info->nr_nodes, + sizeof(*b_info->numa_memszs)); + if (b_info->numa_memszs == NULL) { + fprintf(stderr, "Unable to allocate memory for vnuma ranges.\n"); + goto bad_vnuma_config; + } + + char *ep; + /* + * Will parse only nr_vnodes times, even if we have more/less regions. + * Take care of it later if less or discard if too many regions. + */ + for (i = 0; i < b_info->nr_nodes; i++) { + buf = xlu_cfg_get_listitem(vnumamemcfg, i); + if (!buf) { + fprintf(stderr, + "xl: Unable to get element %d in vnuma memory list.\n", i); + break; + } + ul = strtoul(buf, &ep, 10); + if (ep == buf) { + fprintf(stderr, + "xl: Invalid argument parsing vnumamem: %s.\n", buf); + break; + } + + /* 32Mb is a min size for a node, taken from Linux */ + if (ul >= UINT32_MAX || ul < MIN_VNODE_SIZE) { + fprintf(stderr, "xl: vnuma memory %lu is not within %u - %u range.\n", + ul, MIN_VNODE_SIZE, UINT32_MAX); + break; + } + + /* memory in MBytes */ + b_info->numa_memszs[i] = ul; + } + + /* Total memory for vNUMA parsed to verify */ + for (i = 0; i < nr_vnuma_regions; i++) + vnuma_memparsed = vnuma_memparsed + (b_info->numa_memszs[i]); + + /* Amount of memory for vnodes same as total? */ + if ((vnuma_memparsed << 10) != (b_info->max_memkb)) { + fprintf(stderr, "xl: vnuma memory is not the same as domain memory size.\n"); + goto bad_vnuma_config; + } + } else { + b_info->numa_memszs = calloc(b_info->nr_nodes, + sizeof(*b_info->numa_memszs)); + if (b_info->numa_memszs == NULL) { + fprintf(stderr, "Unable to allocate memory for vnuma ranges.\n"); + goto bad_vnuma_config; + } + + fprintf(stderr, "WARNING: vNUMA memory ranges were not specified.\n"); + fprintf(stderr, "Using default equal vnode memory size %lu Kbytes to cover %lu Kbytes.\n", + b_info->max_memkb / b_info->nr_nodes, b_info->max_memkb); + + if (split_vnumamem(b_info) < 0) { + fprintf(stderr, "Could not split vnuma memory into equal chunks.\n"); + goto bad_vnuma_config; + } + } + + b_info->distance = calloc(b_info->nr_nodes * b_info->nr_nodes, + sizeof(*b_info->distance)); + if (b_info->distance == NULL) + goto bad_vnuma_config; + + if (!xlu_cfg_get_list(config, "vdistance", &vdistancecfg, &nr_vdist, 0)) { + int d1, d2; + /* + * First value is the same node distance, the second as the + * rest of distances. The following is required right now to + * avoid non-symmetrical distance table as it may break latest kernel. + * TODO: Better way to analyze extended distance table, possibly + * OS specific. + */ + d1 = get_list_item_uint(vdistancecfg, 0); + d2 = get_list_item_uint(vdistancecfg, 1); + + if (d1 >= 0 && d2 >= 0 && d1 < d2) { + vdistance_set(b_info->distance, b_info->nr_nodes, d1, d2); + } else { + fprintf(stderr, "WARNING: vnuma distance values are incorrect.\n"); + goto bad_vnuma_config; + } + + } else { + fprintf(stderr, "Could not parse vnuma distances.\n"); + vdistance_set(b_info->distance, b_info->nr_nodes, 10, 20); + } + + b_info->cpu_to_node = (unsigned int *)calloc(b_info->max_vcpus, + sizeof(*b_info->cpu_to_node)); + if (b_info->cpu_to_node == NULL) + goto bad_vnuma_config; + + if (!xlu_cfg_get_list(config, "numa_cpumask", + &vcpumap, &nr_vcpumap, 0)) { + if (nr_vcpumap == b_info->max_vcpus) { + unsigned int vnode, vcpumask = 0, vmask; + vmask = ~(~0 << nr_vcpumap); + for (i = 0; i < nr_vcpumap; i++) { + vnode = get_list_item_uint(vcpumap, i); + if (vnode >= 0 && vnode < b_info->nr_nodes) { + vcpumask |= (1 << i); + b_info->cpu_to_node[i] = vnode; + } + } + + /* Did it covered all vnodes in the vcpu mask? */ + if ( !(((vmask & vcpumask) + 1) == (1 << nr_vcpumap)) ) { + fprintf(stderr, "WARNING: Not all vnodes were covered in numa_cpumask.\n"); + goto bad_vnuma_config; + } + } else { + fprintf(stderr, "WARNING: Bad vnuma_vcpumap.\n"); + goto bad_vnuma_config; + } + } + else + vcputovnode_default(b_info->cpu_to_node, + b_info->nr_nodes, + b_info->max_vcpus); + + /* There is mapping to NUMA physical nodes? */ + b_info->vnode_to_pnode = (unsigned int *)calloc(b_info->nr_nodes, + sizeof(*b_info->vnode_to_pnode)); + if (b_info->vnode_to_pnode == NULL) + goto bad_vnuma_config; + if (!xlu_cfg_get_list(config, "vnuma_vnodemap",&vnodemap, + &nr_vnodemap, 0)) { + /* + * If not specified or incorred, will be defined + * later based on the machine architecture, configuration + * and memory availble when creating domain. + */ + if (nr_vnodemap == b_info->nr_nodes) { + unsigned int vnodemask = 0, pnode, smask; + smask = ~(~0 << b_info->nr_nodes); + for (i = 0; i < b_info->nr_nodes; i++) { + pnode = get_list_item_uint(vnodemap, i); + if (pnode >= 0) { + vnodemask |= (1 << i); + b_info->vnode_to_pnode[i] = pnode; + } + } + + /* Did it covered all vnodes in the mask? */ + if ( !(((vnodemask & smask) + 1) == (1 << nr_vnodemap)) ) { + fprintf(stderr, "WARNING: Not all vnodes were covered vnuma_vnodemap.\n"); + + if (libxl_defbool_val(b_info->vnuma_autoplacement)) { + fprintf(stderr, "Automatic placement will be used for vnodes.\n"); + vnode_to_pnode_default(b_info->vnode_to_pnode, b_info->nr_nodes); + } else + goto bad_vnuma_config; + } + } else { + fprintf(stderr, "WARNING: Incorrect vnuma_vnodemap.\n"); + + if (libxl_defbool_val(b_info->vnuma_autoplacement)) { + fprintf(stderr, "Automatic placement will be used for vnodes.\n"); + vnode_to_pnode_default(b_info->vnode_to_pnode, b_info->nr_nodes); + } else + goto bad_vnuma_config; + } + } else { + fprintf(stderr, "WARNING: Missing vnuma_vnodemap.\n"); + + if (libxl_defbool_val(b_info->vnuma_autoplacement)) { + fprintf(stderr, "Automatic placement will be used for vnodes.\n"); + vnode_to_pnode_default(b_info->vnode_to_pnode, b_info->nr_nodes); + } else + goto bad_vnuma_config; + } + } + else if (vnuma_zero_config(b_info)) + goto bad_vnuma_config; + } + /* If vnuma topology is not defined for domain, init one node */ + else if (vnuma_zero_config(b_info)) + goto bad_vnuma_config; + return; + + bad_vnuma_config: + free_vnuma_info(b_info); + exit(1); +} + static void parse_config_data(const char *config_source, const char *config_data, int config_len, @@ -1081,6 +1479,14 @@ static void parse_config_data(const char *config_source, exit(1); } + libxl_defbool_set(&b_info->vnuma_autoplacement, false); + + /* + * If there is no vnuma in config, "zero" vnuma config + * will be initialized with one node and other defaults. + */ + parse_vnuma_config(config, b_info); + xlu_cfg_replace_string (config, "bootloader", &b_info->u.pv.bootloader, 0); switch (xlu_cfg_get_list_as_string_list(config, "bootloader_args", &b_info->u.pv.bootloader_args, 1)) -- 1.7.10.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |