[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v7 6/9] libxl: build numa nodes memory blocks
Create the vmemrange structure based on the PV guests E820 map. Values are in in Megabytes. Also export the E820 filter code e820_sanitize out to be available internally. Signed-off-by: Elena Ufimtseva <ufimtseva@xxxxxxxxx> --- tools/libxl/libxl_internal.h | 9 ++ tools/libxl/libxl_numa.c | 193 ++++++++++++++++++++++++++++++++++++++++++ tools/libxl/libxl_x86.c | 3 +- 3 files changed, 204 insertions(+), 1 deletion(-) diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index beb052e..63ccb5e 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -3088,6 +3088,15 @@ void libxl__numa_candidate_put_nodemap(libxl__gc *gc, libxl_bitmap_copy(CTX, &cndt->nodemap, nodemap); } +bool libxl__vnodemap_is_usable(libxl__gc *gc, libxl_domain_build_info *info); + +int e820_sanitize(libxl_ctx *ctx, struct e820entry src[], uint32_t *nr_entries, + unsigned long map_limitkb, unsigned long balloon_kb); + +int libxl__vnuma_align_mem(libxl__gc *gc, uint32_t domid, + struct libxl_domain_build_info *b_info, + vmemrange_t *memblks); + _hidden int libxl__ms_vm_genid_set(libxl__gc *gc, uint32_t domid, const libxl_ms_vm_genid *id); diff --git a/tools/libxl/libxl_numa.c b/tools/libxl/libxl_numa.c index 94ca4fe..4ae547e 100644 --- a/tools/libxl/libxl_numa.c +++ b/tools/libxl/libxl_numa.c @@ -19,6 +19,8 @@ #include "libxl_internal.h" +#include "libxl_vnuma.h" + /* * What follows are helpers for generating all the k-combinations * without repetitions of a set S with n elements in it. Formally @@ -508,6 +510,197 @@ int libxl__get_numa_candidate(libxl__gc *gc, } /* + * Check if we can fit vnuma nodes to numa pnodes + * from vnode_to_pnode array. + */ +bool libxl__vnodemap_is_usable(libxl__gc *gc, + libxl_domain_build_info *info) +{ + unsigned int i; + libxl_numainfo *ninfo = NULL; + unsigned long long *claim; + unsigned int node; + uint64_t *sz_array; + int nr_nodes = 0; + + /* Cannot use specified mapping if not NUMA machine. */ + ninfo = libxl_get_numainfo(CTX, &nr_nodes); + if (ninfo == NULL) + return false; + + sz_array = info->vnuma_mem; + claim = libxl__calloc(gc, info->vnodes, sizeof(*claim)); + /* Get total memory required on each physical node. */ + for (i = 0; i < info->vnodes; i++) + { + node = info->vnuma_vnodemap[i]; + + if (node < nr_nodes) + claim[node] += (sz_array[i] << 20); + else + goto vnodemapout; + } + for (i = 0; i < nr_nodes; i++) { + if (claim[i] > ninfo[i].free) + /* Cannot complete user request, falling to default. */ + goto vnodemapout; + } + + vnodemapout: + return true; +} + +/* + * Returns number of absent pages within e820 map + * between start and end addresses passed. Needed + * to correctly set numa memory ranges for domain. + */ +static unsigned long e820_memory_hole_size(unsigned long start, + unsigned long end, + struct e820entry e820[], + unsigned int nr) +{ + unsigned int i; + unsigned long absent, start_blk, end_blk; + + /* init absent number of pages with all memmap size. */ + absent = end - start; + for (i = 0; i < nr; i++) { + /* if not E820_RAM region, skip it. */ + if (e820[i].type == E820_RAM) { + start_blk = e820[i].addr; + end_blk = e820[i].addr + e820[i].size; + /* beginning address is within this region? */ + if (start >= start_blk && start <= end_blk) { + if (end > end_blk) + absent -= end_blk - start; + else + /* fit the region? then no absent pages. */ + absent -= end - start; + continue; + } + /* found the end of range in this region? */ + if (end <= end_blk && end >= start_blk) { + absent -= end - start_blk; + /* no need to look for more ranges. */ + break; + } + } + } + return absent; +} + +/* + * For each node, build memory block start and end addresses. + * Substract any memory hole from the range found in e820 map. + * vnode memory size are passed here in megabytes, the result is + * in memory block addresses. + * Linux kernel will adjust numa memory block sizes on its own. + * But we want to provide to the kernel numa block addresses that + * will be the same in kernel and hypervisor. + */ +#define max(a,b) ((a > b) ? a : b) +int libxl__vnuma_align_mem(libxl__gc *gc, + uint32_t domid, + /* IN: mem sizes in megabytes */ + libxl_domain_build_info *b_info, + /* OUT: linux NUMA blocks addresses */ + vmemrange_t *memblks) +{ + unsigned int i; + int j, rc; + uint64_t next_start_blk, end_max = 0, size; + uint32_t nr; + struct e820entry map[E820MAX]; + + errno = ERROR_INVAL; + if (b_info->vnodes == 0) + return -EINVAL; + + if (!memblks || !b_info->vnuma_mem) + return -EINVAL; + + libxl_ctx *ctx = libxl__gc_owner(gc); + + /* Retrieve e820 map for this host. */ + rc = xc_get_machine_memory_map(ctx->xch, map, E820MAX); + + if (rc < 0) { + errno = rc; + return -EINVAL; + } + nr = rc; + rc = e820_sanitize(ctx, map, &nr, b_info->target_memkb, + (b_info->max_memkb - b_info->target_memkb) + + b_info->u.pv.slack_memkb); + if (rc) + { + errno = rc; + return -EINVAL; + } + + /* find max memory address for this host. */ + for (j = 0; j < nr; j++) + if (map[j].type == E820_RAM) { + end_max = max(end_max, map[j].addr + map[j].size); + } + + memset(memblks, 0, sizeof(*memblks) * b_info->vnodes); + next_start_blk = 0; + + memblks[0].start = map[0].addr; + + for (i = 0; i < b_info->vnodes; i++) { + + memblks[i].start += next_start_blk; + memblks[i].end = memblks[i].start + (b_info->vnuma_mem[i] << 20); + + if (memblks[i].end > end_max) { + LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, + "Shrunk vNUMA memory block %d address to max e820 address: \ + %#010lx -> %#010lx\n", i, memblks[i].end, end_max); + memblks[i].end = end_max; + break; + } + + size = memblks[i].end - memblks[i].start; + /* + * For pv host with e820_host option turned on we need + * to take into account memory holes. For pv host with + * e820_host disabled or unset, the map is a contiguous + * RAM region. + */ + if (libxl_defbool_val(b_info->u.pv.e820_host)) { + while((memblks[i].end - memblks[i].start - + e820_memory_hole_size(memblks[i].start, + memblks[i].end, map, nr)) < size ) + { + memblks[i].end += MIN_VNODE_SIZE << 10; + if (memblks[i].end > end_max) { + memblks[i].end = end_max; + LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, + "Shrunk vNUMA memory block %d address to max e820 \ + address: %#010lx -> %#010lx\n", i, memblks[i].end, + end_max); + break; + } + } + } + next_start_blk = memblks[i].end; + LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,"i %d, start = %#010lx, \ + end = %#010lx\n", i, memblks[i].start, memblks[i].end); + } + + /* Did not form memory addresses for every node? */ + if (i != b_info->vnodes) { + LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Not all nodes were populated with \ + block addresses, only %d out of %d", i, b_info->vnodes); + return -EINVAL; + } + return 0; +} + +/* * Local variables: * mode: C * c-basic-offset: 4 diff --git a/tools/libxl/libxl_x86.c b/tools/libxl/libxl_x86.c index 7589060..46e84e4 100644 --- a/tools/libxl/libxl_x86.c +++ b/tools/libxl/libxl_x86.c @@ -1,5 +1,6 @@ #include "libxl_internal.h" #include "libxl_arch.h" +#include "libxl_vnuma.h" static const char *e820_names(int type) { @@ -14,7 +15,7 @@ static const char *e820_names(int type) return "Unknown"; } -static int e820_sanitize(libxl_ctx *ctx, struct e820entry src[], +int e820_sanitize(libxl_ctx *ctx, struct e820entry src[], uint32_t *nr_entries, unsigned long map_limitkb, unsigned long balloon_kb) -- 1.7.10.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |