|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH RESEND v7 6/9] libxl: build numa nodes memory blocks
Create the vmemrange structure based on the
PV guests E820 map. Values are in in Megabytes.
Also export the E820 filter code e820_sanitize
out to be available internally.
Signed-off-by: Elena Ufimtseva <ufimtseva@xxxxxxxxx>
---
tools/libxl/libxl_internal.h | 9 ++
tools/libxl/libxl_numa.c | 193 ++++++++++++++++++++++++++++++++++++++++++
tools/libxl/libxl_x86.c | 3 +-
3 files changed, 204 insertions(+), 1 deletion(-)
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index beb052e..63ccb5e 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -3088,6 +3088,15 @@ void libxl__numa_candidate_put_nodemap(libxl__gc *gc,
libxl_bitmap_copy(CTX, &cndt->nodemap, nodemap);
}
+bool libxl__vnodemap_is_usable(libxl__gc *gc, libxl_domain_build_info *info);
+
+int e820_sanitize(libxl_ctx *ctx, struct e820entry src[], uint32_t *nr_entries,
+ unsigned long map_limitkb, unsigned long balloon_kb);
+
+int libxl__vnuma_align_mem(libxl__gc *gc, uint32_t domid,
+ struct libxl_domain_build_info *b_info,
+ vmemrange_t *memblks);
+
_hidden int libxl__ms_vm_genid_set(libxl__gc *gc, uint32_t domid,
const libxl_ms_vm_genid *id);
diff --git a/tools/libxl/libxl_numa.c b/tools/libxl/libxl_numa.c
index 94ca4fe..4ae547e 100644
--- a/tools/libxl/libxl_numa.c
+++ b/tools/libxl/libxl_numa.c
@@ -19,6 +19,8 @@
#include "libxl_internal.h"
+#include "libxl_vnuma.h"
+
/*
* What follows are helpers for generating all the k-combinations
* without repetitions of a set S with n elements in it. Formally
@@ -508,6 +510,197 @@ int libxl__get_numa_candidate(libxl__gc *gc,
}
/*
+ * Check if we can fit vnuma nodes to numa pnodes
+ * from vnode_to_pnode array.
+ */
+bool libxl__vnodemap_is_usable(libxl__gc *gc,
+ libxl_domain_build_info *info)
+{
+ unsigned int i;
+ libxl_numainfo *ninfo = NULL;
+ unsigned long long *claim;
+ unsigned int node;
+ uint64_t *sz_array;
+ int nr_nodes = 0;
+
+ /* Cannot use specified mapping if not NUMA machine. */
+ ninfo = libxl_get_numainfo(CTX, &nr_nodes);
+ if (ninfo == NULL)
+ return false;
+
+ sz_array = info->vnuma_mem;
+ claim = libxl__calloc(gc, info->vnodes, sizeof(*claim));
+ /* Get total memory required on each physical node. */
+ for (i = 0; i < info->vnodes; i++)
+ {
+ node = info->vnuma_vnodemap[i];
+
+ if (node < nr_nodes)
+ claim[node] += (sz_array[i] << 20);
+ else
+ goto vnodemapout;
+ }
+ for (i = 0; i < nr_nodes; i++) {
+ if (claim[i] > ninfo[i].free)
+ /* Cannot complete user request, falling to default. */
+ goto vnodemapout;
+ }
+
+ vnodemapout:
+ return true;
+}
+
+/*
+ * Returns number of absent pages within e820 map
+ * between start and end addresses passed. Needed
+ * to correctly set numa memory ranges for domain.
+ */
+static unsigned long e820_memory_hole_size(unsigned long start,
+ unsigned long end,
+ struct e820entry e820[],
+ unsigned int nr)
+{
+ unsigned int i;
+ unsigned long absent, start_blk, end_blk;
+
+ /* init absent number of pages with all memmap size. */
+ absent = end - start;
+ for (i = 0; i < nr; i++) {
+ /* if not E820_RAM region, skip it. */
+ if (e820[i].type == E820_RAM) {
+ start_blk = e820[i].addr;
+ end_blk = e820[i].addr + e820[i].size;
+ /* beginning address is within this region? */
+ if (start >= start_blk && start <= end_blk) {
+ if (end > end_blk)
+ absent -= end_blk - start;
+ else
+ /* fit the region? then no absent pages. */
+ absent -= end - start;
+ continue;
+ }
+ /* found the end of range in this region? */
+ if (end <= end_blk && end >= start_blk) {
+ absent -= end - start_blk;
+ /* no need to look for more ranges. */
+ break;
+ }
+ }
+ }
+ return absent;
+}
+
+/*
+ * For each node, build memory block start and end addresses.
+ * Substract any memory hole from the range found in e820 map.
+ * vnode memory size are passed here in megabytes, the result is
+ * in memory block addresses.
+ * Linux kernel will adjust numa memory block sizes on its own.
+ * But we want to provide to the kernel numa block addresses that
+ * will be the same in kernel and hypervisor.
+ */
+#define max(a,b) ((a > b) ? a : b)
+int libxl__vnuma_align_mem(libxl__gc *gc,
+ uint32_t domid,
+ /* IN: mem sizes in megabytes */
+ libxl_domain_build_info *b_info,
+ /* OUT: linux NUMA blocks addresses */
+ vmemrange_t *memblks)
+{
+ unsigned int i;
+ int j, rc;
+ uint64_t next_start_blk, end_max = 0, size;
+ uint32_t nr;
+ struct e820entry map[E820MAX];
+
+ errno = ERROR_INVAL;
+ if (b_info->vnodes == 0)
+ return -EINVAL;
+
+ if (!memblks || !b_info->vnuma_mem)
+ return -EINVAL;
+
+ libxl_ctx *ctx = libxl__gc_owner(gc);
+
+ /* Retrieve e820 map for this host. */
+ rc = xc_get_machine_memory_map(ctx->xch, map, E820MAX);
+
+ if (rc < 0) {
+ errno = rc;
+ return -EINVAL;
+ }
+ nr = rc;
+ rc = e820_sanitize(ctx, map, &nr, b_info->target_memkb,
+ (b_info->max_memkb - b_info->target_memkb) +
+ b_info->u.pv.slack_memkb);
+ if (rc)
+ {
+ errno = rc;
+ return -EINVAL;
+ }
+
+ /* find max memory address for this host. */
+ for (j = 0; j < nr; j++)
+ if (map[j].type == E820_RAM) {
+ end_max = max(end_max, map[j].addr + map[j].size);
+ }
+
+ memset(memblks, 0, sizeof(*memblks) * b_info->vnodes);
+ next_start_blk = 0;
+
+ memblks[0].start = map[0].addr;
+
+ for (i = 0; i < b_info->vnodes; i++) {
+
+ memblks[i].start += next_start_blk;
+ memblks[i].end = memblks[i].start + (b_info->vnuma_mem[i] << 20);
+
+ if (memblks[i].end > end_max) {
+ LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,
+ "Shrunk vNUMA memory block %d address to max e820 address:
\
+ %#010lx -> %#010lx\n", i, memblks[i].end, end_max);
+ memblks[i].end = end_max;
+ break;
+ }
+
+ size = memblks[i].end - memblks[i].start;
+ /*
+ * For pv host with e820_host option turned on we need
+ * to take into account memory holes. For pv host with
+ * e820_host disabled or unset, the map is a contiguous
+ * RAM region.
+ */
+ if (libxl_defbool_val(b_info->u.pv.e820_host)) {
+ while((memblks[i].end - memblks[i].start -
+ e820_memory_hole_size(memblks[i].start,
+ memblks[i].end, map, nr)) < size )
+ {
+ memblks[i].end += MIN_VNODE_SIZE << 10;
+ if (memblks[i].end > end_max) {
+ memblks[i].end = end_max;
+ LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,
+ "Shrunk vNUMA memory block %d address to max e820 \
+ address: %#010lx -> %#010lx\n", i, memblks[i].end,
+ end_max);
+ break;
+ }
+ }
+ }
+ next_start_blk = memblks[i].end;
+ LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,"i %d, start = %#010lx, \
+ end = %#010lx\n", i, memblks[i].start, memblks[i].end);
+ }
+
+ /* Did not form memory addresses for every node? */
+ if (i != b_info->vnodes) {
+ LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Not all nodes were populated with \
+ block addresses, only %d out of %d", i, b_info->vnodes);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/*
* Local variables:
* mode: C
* c-basic-offset: 4
diff --git a/tools/libxl/libxl_x86.c b/tools/libxl/libxl_x86.c
index 7589060..46e84e4 100644
--- a/tools/libxl/libxl_x86.c
+++ b/tools/libxl/libxl_x86.c
@@ -1,5 +1,6 @@
#include "libxl_internal.h"
#include "libxl_arch.h"
+#include "libxl_vnuma.h"
static const char *e820_names(int type)
{
@@ -14,7 +15,7 @@ static const char *e820_names(int type)
return "Unknown";
}
-static int e820_sanitize(libxl_ctx *ctx, struct e820entry src[],
+int e820_sanitize(libxl_ctx *ctx, struct e820entry src[],
uint32_t *nr_entries,
unsigned long map_limitkb,
unsigned long balloon_kb)
--
1.7.10.4
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |