[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v8 21/21] xl: vNUMA support
This patch includes configuration options parser and documentation. Please find the hunk to xl.cfg.pod.5 for more information. Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx> Cc: Ian Campbell <ian.campbell@xxxxxxxxxx> Cc: Ian Jackson <ian.jackson@xxxxxxxxxxxxx> --- Changes in v8: 1. State all options are mandatory in manpage. 2. Rework xl config parser. Changes in v7: 1. Fill in max_memkb with vNUMA memory settings. 2. Check vcpus specified in vnuma matches maxvcpus=. 3. Update manpage. 4. Drop Dario's reviewed-by due to above changes. Changes in v6: 1. Disable NUMA auto-placement. --- docs/man/xl.cfg.pod.5 | 59 +++++++++++++- tools/libxl/xl_cmdimpl.c | 204 ++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 250 insertions(+), 13 deletions(-) diff --git a/docs/man/xl.cfg.pod.5 b/docs/man/xl.cfg.pod.5 index 408653f..93cd7d2 100644 --- a/docs/man/xl.cfg.pod.5 +++ b/docs/man/xl.cfg.pod.5 @@ -41,8 +41,8 @@ value). =item B<[ VALUE, VALUE, ... ]> -A list of C<VALUES> of the above types. Lists are homogeneous and are -not nested. +A list of C<VALUES> of the above types. Lists can be heterogeneous and +nested. =back @@ -266,6 +266,61 @@ it will crash. =back +=head3 Guest Virtual NUMA Configuration + +=over 4 + +=item B<vnuma=[ VNODE_SPEC, VNODE_SPEC, ... ] + +Specify virtual NUMA configuration with positional arguments. The +nth B<VNODE_SPEC> in the list specifies the configuration of nth +virtual node. + +Each B<VNODE_SPEC> is a list, which has a form of +"[VNODE_CONFIG_OPTION,VNODE_CONFIG_OPTION, ... ]" (without quotes). + +For example vnuma = [ ["pnode=0","size=512","vcpus=0-4","vdistances=10,20"] ] +means vnode 0 is mapped to pnode 0, has 512MB ram, has vcpus 0 to 4, the +distance to itself is 10 and the distance to vnode 1 is 20. + +Each B<VNODE_CONFIG_OPTION> is a quoted key=value pair. Supported +B<VNODE_CONFIG_OPTION>s are (they are all mandatory at the moment): + +=over 4 + +=item B<pnode=NUMBER> + +Specify which physical node this virtual node maps to. + +=item B<size=MBYTES> + +Specify the size of this virtual node. The sum of memory size of all +vnodes will become B<maxmem=>. If B<maxmem=> is specified separately, +a check is performed to make sure the sum of all vnode memory matches +B<maxmem=>. + +=item B<vcpus=CPU-STRING> + +Specify which vcpus belong to this node. B<CPU-STRING> is a string +separated by comma. You can specify range and single cpu. An example +is "vcpus=0-5,8", which means you specify vcpu 0 to vcpu 5, and vcpu +8. + +=item B<vdistances=NUMBER, NUMBER, ... > + +Specify virtual distance from this node to all nodes (including +itself) with positional arguments. For example, "vdistance=10,20" +for vnode 0 means the distance from vnode 0 to vnode 0 is 10, from +vnode 0 to vnode 1 is 20. The number of arguments supplied must match +the total number of vnodes. + +Normally you can use the values from "xl info -n" or "numactl +--hardware" to fill in vdistance list. + +=back + +=back + =head3 Event Actions =over 4 diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c index 5b45213..4bd84a2 100644 --- a/tools/libxl/xl_cmdimpl.c +++ b/tools/libxl/xl_cmdimpl.c @@ -987,13 +987,183 @@ static int parse_nic_config(libxl_device_nic *nic, XLU_Config **config, char *to return 0; } +static unsigned long parse_ulong(const char *str) +{ + char *endptr; + unsigned long val; + + val = strtoul(str, &endptr, 10); + if (endptr == str || val == ULONG_MAX) { + fprintf(stderr, "xl: failed to convert \"%s\" to number\n", str); + exit(1); + } + return val; +} + +static void parse_vnuma_config(const XLU_Config *config, + libxl_domain_build_info *b_info) +{ + libxl_physinfo physinfo; + uint32_t nr_nodes; + XLU_ConfigList *vnuma; + int i, j, len, num_vnuma; + unsigned long max_vcpus = 0, max_memkb = 0; + /* Temporary storage for parsed vcpus information to avoid + * parsing config twice. This array has num_vnuma elements. + */ + struct vcpu_range_parsed { + unsigned long start, end; + } *vcpu_range_parsed; + + libxl_physinfo_init(&physinfo); + if (libxl_get_physinfo(ctx, &physinfo) != 0) { + libxl_physinfo_dispose(&physinfo); + fprintf(stderr, "libxl_get_physinfo failed\n"); + exit(1); + } + + nr_nodes = physinfo.nr_nodes; + libxl_physinfo_dispose(&physinfo); + + if (xlu_cfg_get_list(config, "vnuma", &vnuma, &num_vnuma, 1)) + return; + + b_info->num_vnuma_nodes = num_vnuma; + b_info->vnuma_nodes = xcalloc(num_vnuma, sizeof(libxl_vnode_info)); + vcpu_range_parsed = xcalloc(num_vnuma, sizeof(*vcpu_range_parsed)); + + for (i = 0; i < b_info->num_vnuma_nodes; i++) { + libxl_vnode_info *p = &b_info->vnuma_nodes[i]; + + libxl_vnode_info_init(p); + p->distances = xcalloc(b_info->num_vnuma_nodes, + sizeof(*p->distances)); + p->num_distances = b_info->num_vnuma_nodes; + } + + for (i = 0; i < num_vnuma; i++) { + XLU_ConfigValue *vnode_spec, *conf_option; + XLU_ConfigList *vnode_config_list; + int conf_count; + libxl_vnode_info *p = &b_info->vnuma_nodes[i]; + + vnode_spec = xlu_cfg_get_listitem2(vnuma, i); + assert(vnode_spec); + + xlu_cfg_value_get_list(config, vnode_spec, &vnode_config_list, 0); + if (!vnode_config_list) { + fprintf(stderr, "xl: cannot get vnode config option list\n"); + exit(1); + } + + for (conf_count = 0; + (conf_option = + xlu_cfg_get_listitem2(vnode_config_list, conf_count)); + conf_count++) { + + if (xlu_cfg_value_type(conf_option) == XLU_STRING) { + char *buf, *option_untrimmed, *value_untrimmed; + char *option, *value; + unsigned long val; + + xlu_cfg_value_get_string(config, conf_option, &buf, 0); + + if (!buf) continue; + + if (split_string_into_pair(buf, "=", + &option_untrimmed, + &value_untrimmed)) { + fprintf(stderr, "xl: failed to split \"%s\" into pair\n", + buf); + exit(1); + } + trim(isspace, option_untrimmed, &option); + trim(isspace, value_untrimmed, &value); + + if (!strcmp("pnode", option)) { + val = parse_ulong(value); + if (val >= nr_nodes) { + fprintf(stderr, + "xl: invalid pnode number: %lu\n", val); + exit(1); + } + p->pnode = val; + libxl_defbool_set(&b_info->numa_placement, false); + } else if (!strcmp("size", option)) { + val = parse_ulong(value); + p->memkb = val << 10; + max_memkb += p->memkb; + } else if (!strcmp("vcpus", option)) { + libxl_string_list cpu_spec_list; + unsigned long s, e; + + split_string_into_string_list(value, ",", &cpu_spec_list); + len = libxl_string_list_length(&cpu_spec_list); + + for (j = 0; j < len; j++) + parse_range(cpu_spec_list[j], &s, &e); + + vcpu_range_parsed[i].start = s; + vcpu_range_parsed[i].end = e; + max_vcpus += (e - s + 1); + libxl_string_list_dispose(&cpu_spec_list); + } else if (!strcmp("vdistances", option)) { + libxl_string_list vdist; + + split_string_into_string_list(value, ",", &vdist); + len = libxl_string_list_length(&vdist); + + for (j = 0; j < len; j++) { + val = parse_ulong(value); + p->distances[j] = val; + } + libxl_string_list_dispose(&vdist); + } + free(option); + free(value); + free(option_untrimmed); + free(value_untrimmed); + } + } + } + + /* User has specified maxvcpus= */ + if (b_info->max_vcpus != 0 && b_info->max_vcpus != max_vcpus) { + fprintf(stderr, "xl: vnuma vcpus and maxvcpus= mismatch\n"); + exit(1); + } else + b_info->max_vcpus = max_vcpus; + + /* User has specified maxmem= */ + if (b_info->max_memkb != LIBXL_MEMKB_DEFAULT && + b_info->max_memkb != max_memkb) { + fprintf(stderr, "xl: maxmem and vnuma memory size mismatch\n"); + exit(1); + } else + b_info->max_memkb = max_memkb; + + for (i = 0; i < b_info->num_vnuma_nodes; i++) { + libxl_vnode_info *p = &b_info->vnuma_nodes[i]; + int cpu; + + libxl_cpu_bitmap_alloc(ctx, &p->vcpus, b_info->max_vcpus); + libxl_bitmap_set_none(&p->vcpus); + for (cpu = vcpu_range_parsed[i].start; + cpu <= vcpu_range_parsed[i].end; + cpu++) + libxl_bitmap_set(&p->vcpus, cpu); + } + + free(vcpu_range_parsed); +} + static void parse_config_data(const char *config_source, const char *config_data, int config_len, libxl_domain_config *d_config) { const char *buf; - long l; + long l, vcpus = 0; XLU_Config *config; XLU_ConfigList *cpus, *vbds, *nics, *pcis, *cvfbs, *cpuids, *vtpms; XLU_ConfigList *channels, *ioports, *irqs, *iomem, *viridian; @@ -1080,9 +1250,14 @@ static void parse_config_data(const char *config_source, if (!xlu_cfg_get_long (config, "extratime", &l, 0)) b_info->sched_params.extratime = l; - if (!xlu_cfg_get_long (config, "vcpus", &l, 0)) { - b_info->max_vcpus = l; + if (!xlu_cfg_get_long (config, "memory", &l, 0)) + b_info->target_memkb = l * 1024; + + if (!xlu_cfg_get_long (config, "maxmem", &l, 0)) + b_info->max_memkb = l * 1024; + if (!xlu_cfg_get_long (config, "vcpus", &l, 0)) { + vcpus = l; if (libxl_cpu_bitmap_alloc(ctx, &b_info->avail_vcpus, l)) { fprintf(stderr, "Unable to allocate cpumap\n"); exit(1); @@ -1095,6 +1270,21 @@ static void parse_config_data(const char *config_source, if (!xlu_cfg_get_long (config, "maxvcpus", &l, 0)) b_info->max_vcpus = l; + parse_vnuma_config(config, b_info); + + /* Set max_memkb to target_memkb and max_vcpus to avail_vcpus if + * they are not set by user specified config option or vnuma. + */ + if (b_info->max_memkb == LIBXL_MEMKB_DEFAULT) + b_info->max_memkb = b_info->target_memkb; + if (b_info->max_vcpus == 0) + b_info->max_vcpus = vcpus; + + if (b_info->max_vcpus < vcpus) { + fprintf(stderr, "xl: maxvcpus < vcpus\n"); + exit(1); + } + buf = NULL; if (!xlu_cfg_get_list (config, "cpus", &cpus, &num_cpus, 1) || !xlu_cfg_get_string (config, "cpus", &buf, 0)) @@ -1105,14 +1295,6 @@ static void parse_config_data(const char *config_source, !xlu_cfg_get_string (config, "cpus_soft", &buf, 0)) parse_vcpu_affinity(b_info, cpus, buf, num_cpus, false); - if (!xlu_cfg_get_long (config, "memory", &l, 0)) { - b_info->max_memkb = l * 1024; - b_info->target_memkb = b_info->max_memkb; - } - - if (!xlu_cfg_get_long (config, "maxmem", &l, 0)) - b_info->max_memkb = l * 1024; - libxl_defbool_set(&b_info->claim_mode, claim_mode); if (xlu_cfg_get_string (config, "on_poweroff", &buf, 0)) -- 1.9.1 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |