[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH] x86: Get rid of p2m_host array allocation for HVM guests
When allocating the guest memory for an HVM domain, libxc keeps the P2M mapping for the entirety of the guest memory around for the time of the launch as xc_dom_image->p2m_host. For guests that have a large memory (3904 GiB), the p2m_host allocation takes more than 7.5 GiB of space, and leaves xl susceptible to getting OOM-killed on guest creation. Convert the p2m_host table lookups to an arch-specific function that returns the mapping on-the-fly for x86 HVM guests to avoid this allocation, bringing down xl's memory usage from > 8GiB to < 70Mib for such launches. Signed-off-by: Varad Gautam <vrd@xxxxxxxxx> --- Applies to stable-4.11+. tools/libxc/include/xc_dom.h | 11 +++- tools/libxc/xc_dom_arm.c | 2 + tools/libxc/xc_dom_core.c | 4 +- tools/libxc/xc_dom_x86.c | 126 ++++++++++++++++++++++++++++--------------- 4 files changed, 99 insertions(+), 44 deletions(-) diff --git a/tools/libxc/include/xc_dom.h b/tools/libxc/include/xc_dom.h index 8a66889..43abc0d 100644 --- a/tools/libxc/include/xc_dom.h +++ b/tools/libxc/include/xc_dom.h @@ -131,6 +131,9 @@ struct xc_dom_image { * a hybrid guest this means that it maps GPFNs to GPFNS. * * Note that the input is offset by rambase. + * + * This is not populated for guests that provide an arch-specific + * lookup hook in arch_hooks. */ xen_pfn_t *p2m_host; void *p2m_guest; @@ -274,6 +277,10 @@ struct xc_dom_arch { int arch_private_size; struct xc_dom_arch *next; + + /* arch-specific p2m table lookup to get rid of the p2m_host array stored in + * xc_dom_image. */ + xen_pfn_t (*p2m_host) (struct xc_dom_image *dom, unsigned long idx); }; void xc_dom_register_arch_hooks(struct xc_dom_arch *hooks); @@ -437,7 +444,9 @@ static inline xen_pfn_t xc_dom_p2m(struct xc_dom_image *dom, xen_pfn_t pfn) return pfn; if (pfn < dom->rambase_pfn || pfn >= dom->rambase_pfn + dom->total_pages) return INVALID_MFN; - return dom->p2m_host[pfn - dom->rambase_pfn]; + return dom->arch_hooks->p2m_host ? + dom->arch_hooks->p2m_host(dom, pfn - dom->rambase_pfn) + : dom->p2m_host[pfn - dom->rambase_pfn]; } #endif /* _XC_DOM_H */ diff --git a/tools/libxc/xc_dom_arm.c b/tools/libxc/xc_dom_arm.c index 5b9eca6..b15c6d2 100644 --- a/tools/libxc/xc_dom_arm.c +++ b/tools/libxc/xc_dom_arm.c @@ -547,6 +547,7 @@ static struct xc_dom_arch xc_dom_32 = { .meminit = meminit, .bootearly = bootearly, .bootlate = bootlate, + .p2m_host = NULL, }; static struct xc_dom_arch xc_dom_64 = { @@ -563,6 +564,7 @@ static struct xc_dom_arch xc_dom_64 = { .meminit = meminit, .bootearly = bootearly, .bootlate = bootlate, + .p2m_host = NULL, }; static void __init register_arch_hooks(void) diff --git a/tools/libxc/xc_dom_core.c b/tools/libxc/xc_dom_core.c index 9bd04cb..f3eaae3 100644 --- a/tools/libxc/xc_dom_core.c +++ b/tools/libxc/xc_dom_core.c @@ -985,7 +985,9 @@ int xc_dom_update_guest_p2m(struct xc_dom_image *dom) __FUNCTION__, dom->p2m_size); p2m_32 = dom->p2m_guest; for ( i = 0; i < dom->p2m_size; i++ ) - if ( dom->p2m_host[i] != INVALID_PFN ) + if ( dom->arch_hooks->p2m_host ) + p2m_32[i] = dom->arch_hooks->p2m_host(dom, i); + else if ( dom->p2m_host[i] != INVALID_PFN ) p2m_32[i] = dom->p2m_host[i]; else p2m_32[i] = (uint32_t) - 1; diff --git a/tools/libxc/xc_dom_x86.c b/tools/libxc/xc_dom_x86.c index 3ab918c..58f9894 100644 --- a/tools/libxc/xc_dom_x86.c +++ b/tools/libxc/xc_dom_x86.c @@ -101,6 +101,10 @@ struct xc_dom_image_x86 { #define MAPPING_MAX 2 struct xc_dom_x86_mapping maps[MAPPING_MAX]; struct xc_dom_params *params; + + /* Used to fake vmemrange information in case vNUMA information was not provided. */ + xen_vmemrange_t dummy_vmemrange[2]; + unsigned int nr_dummy_vmemranges; }; /* get guest IO ABI protocol */ @@ -1252,13 +1256,13 @@ static int meminit_hvm(struct xc_dom_image *dom) unsigned int memflags = 0; int claim_enabled = dom->claim_enabled; uint64_t total_pages; - xen_vmemrange_t dummy_vmemrange[2]; unsigned int dummy_vnode_to_pnode[1]; xen_vmemrange_t *vmemranges; unsigned int *vnode_to_pnode; unsigned int nr_vmemranges, nr_vnodes; xc_interface *xch = dom->xch; uint32_t domid = dom->guest_domid; + struct xc_dom_image_x86 *domx86 = dom->arch_private; if ( nr_pages > target_pages ) memflags |= XENMEMF_populate_on_demand; @@ -1274,25 +1278,26 @@ static int meminit_hvm(struct xc_dom_image *dom) * has no effect on the actual result. */ - dummy_vmemrange[0].start = 0; - dummy_vmemrange[0].end = dom->lowmem_end; - dummy_vmemrange[0].flags = 0; - dummy_vmemrange[0].nid = 0; - nr_vmemranges = 1; + domx86->dummy_vmemrange[0].start = 0; + domx86->dummy_vmemrange[0].end = dom->lowmem_end; + domx86->dummy_vmemrange[0].flags = 0; + domx86->dummy_vmemrange[0].nid = 0; + domx86->nr_dummy_vmemranges = 1; if ( dom->highmem_end > (1ULL << 32) ) { - dummy_vmemrange[1].start = 1ULL << 32; - dummy_vmemrange[1].end = dom->highmem_end; - dummy_vmemrange[1].flags = 0; - dummy_vmemrange[1].nid = 0; + domx86->dummy_vmemrange[1].start = 1ULL << 32; + domx86->dummy_vmemrange[1].end = dom->highmem_end; + domx86->dummy_vmemrange[1].flags = 0; + domx86->dummy_vmemrange[1].nid = 0; - nr_vmemranges++; + domx86->nr_dummy_vmemranges++; } dummy_vnode_to_pnode[0] = XC_NUMA_NO_NODE; nr_vnodes = 1; - vmemranges = dummy_vmemrange; + vmemranges = domx86->dummy_vmemrange; + nr_vmemranges = domx86->nr_dummy_vmemranges; vnode_to_pnode = dummy_vnode_to_pnode; } else @@ -1329,25 +1334,6 @@ static int meminit_hvm(struct xc_dom_image *dom) } dom->p2m_size = p2m_size; - dom->p2m_host = xc_dom_malloc(dom, sizeof(xen_pfn_t) * - dom->p2m_size); - if ( dom->p2m_host == NULL ) - { - DOMPRINTF("Could not allocate p2m"); - goto error_out; - } - - for ( i = 0; i < p2m_size; i++ ) - dom->p2m_host[i] = ((xen_pfn_t)-1); - for ( vmemid = 0; vmemid < nr_vmemranges; vmemid++ ) - { - uint64_t pfn; - - for ( pfn = vmemranges[vmemid].start >> PAGE_SHIFT; - pfn < vmemranges[vmemid].end >> PAGE_SHIFT; - pfn++ ) - dom->p2m_host[pfn] = pfn; - } /* * Try to claim pages for early warning of insufficient memory available. @@ -1395,8 +1381,12 @@ static int meminit_hvm(struct xc_dom_image *dom) */ if ( dom->device_model ) { + xen_pfn_t pfn_batch[0xa0]; + for ( i = 0; i < 0xa0; i++ ) + pfn_batch[i] = dom->arch_hooks->p2m_host(dom, i); + rc = xc_domain_populate_physmap_exact( - xch, domid, 0xa0, 0, memflags, &dom->p2m_host[0x00]); + xch, domid, 0xa0, 0, memflags, &pfn_batch[0x00]); if ( rc != 0 ) { DOMPRINTF("Could not populate low memory (< 0xA0).\n"); @@ -1439,7 +1429,7 @@ static int meminit_hvm(struct xc_dom_image *dom) if ( count > max_pages ) count = max_pages; - cur_pfn = dom->p2m_host[cur_pages]; + cur_pfn = dom->arch_hooks->p2m_host(dom, cur_pages); /* Take care the corner cases of super page tails */ if ( ((cur_pfn & (SUPERPAGE_1GB_NR_PFNS-1)) != 0) && @@ -1465,8 +1455,7 @@ static int meminit_hvm(struct xc_dom_image *dom) xen_pfn_t sp_extents[nr_extents]; for ( i = 0; i < nr_extents; i++ ) - sp_extents[i] = - dom->p2m_host[cur_pages+(i<<SUPERPAGE_1GB_SHIFT)]; + sp_extents[i] = dom->arch_hooks->p2m_host(dom, cur_pages+(i<<SUPERPAGE_1GB_SHIFT)); done = xc_domain_populate_physmap(xch, domid, nr_extents, SUPERPAGE_1GB_SHIFT, @@ -1505,8 +1494,7 @@ static int meminit_hvm(struct xc_dom_image *dom) xen_pfn_t sp_extents[nr_extents]; for ( i = 0; i < nr_extents; i++ ) - sp_extents[i] = - dom->p2m_host[cur_pages+(i<<SUPERPAGE_2MB_SHIFT)]; + sp_extents[i] = dom->arch_hooks->p2m_host(dom, cur_pages+(i<<SUPERPAGE_2MB_SHIFT)); done = xc_domain_populate_physmap(xch, domid, nr_extents, SUPERPAGE_2MB_SHIFT, @@ -1521,14 +1509,39 @@ static int meminit_hvm(struct xc_dom_image *dom) } } } - /* Fall back to 4kB extents. */ if ( count != 0 ) { - rc = xc_domain_populate_physmap_exact( - xch, domid, count, 0, new_memflags, &dom->p2m_host[cur_pages]); - cur_pages += count; - stat_normal_pages += count; + unsigned long nr_extents; + xen_pfn_t *pfn_batch; + + pfn_batch = calloc(SUPERPAGE_1GB_NR_PFNS, sizeof(*pfn_batch)); + if ( !pfn_batch ) { + DOMPRINTF("Could not allocate memory to construct physmap batch."); + rc = -1; + goto error_out; + } + + while ( count > 0 ) { + for ( i = 0; i < count && i < SUPERPAGE_1GB_NR_PFNS; i++) + pfn_batch[i] = dom->arch_hooks->p2m_host(dom, cur_pages+i); + + nr_extents = count > SUPERPAGE_1GB_NR_PFNS ? SUPERPAGE_1GB_NR_PFNS : count; + rc = xc_domain_populate_physmap_exact(xch, domid, nr_extents, + 0, new_memflags, &pfn_batch[0]); + if ( rc != 0 ) { + DOMPRINTF("Could not populate physmap batch."); + free(pfn_batch); + rc = -1; + goto error_out; + } + + stat_normal_pages += nr_extents; + cur_pages += nr_extents; + count -= nr_extents; + } + + free(pfn_batch); } } @@ -1780,6 +1793,31 @@ static int bootlate_hvm(struct xc_dom_image *dom) return 0; } +static xen_pfn_t p2m_host_hvm(struct xc_dom_image *dom, unsigned long idx) +{ + struct xc_dom_image_x86 *domx86 = dom->arch_private; + xen_vmemrange_t *vmemranges; + unsigned int nr_vmemranges; + int vmemid; + + if ( dom->nr_vmemranges ) { + vmemranges = dom->vmemranges; + nr_vmemranges = dom->nr_vmemranges; + } else { + vmemranges = domx86->dummy_vmemrange; + nr_vmemranges = domx86->nr_dummy_vmemranges; + } + + for ( vmemid = 0; vmemid < nr_vmemranges ; vmemid++ ) { + if ( idx >= (vmemranges[vmemid].start >> XC_DOM_PAGE_SHIFT(dom)) + && idx < (vmemranges[vmemid].end >> XC_DOM_PAGE_SHIFT(dom)) ) { + return idx; + } + } + + return ((xen_pfn_t)-1); +} + bool xc_dom_translated(const struct xc_dom_image *dom) { /* HVM guests are translated. PV guests are not. */ @@ -1805,6 +1843,7 @@ static struct xc_dom_arch xc_dom_32_pae = { .meminit = meminit_pv, .bootearly = bootearly, .bootlate = bootlate_pv, + .p2m_host = NULL, }; static struct xc_dom_arch xc_dom_64 = { @@ -1824,6 +1863,7 @@ static struct xc_dom_arch xc_dom_64 = { .meminit = meminit_pv, .bootearly = bootearly, .bootlate = bootlate_pv, + .p2m_host = NULL, }; static struct xc_dom_arch xc_hvm_32 = { @@ -1831,6 +1871,7 @@ static struct xc_dom_arch xc_hvm_32 = { .native_protocol = XEN_IO_PROTO_ABI_X86_32, .page_shift = PAGE_SHIFT_X86, .sizeof_pfn = 4, + .arch_private_size = sizeof(struct xc_dom_image_x86), .alloc_magic_pages = alloc_magic_pages_hvm, .alloc_pgtables = alloc_pgtables_hvm, .setup_pgtables = NULL, @@ -1840,6 +1881,7 @@ static struct xc_dom_arch xc_hvm_32 = { .meminit = meminit_hvm, .bootearly = bootearly, .bootlate = bootlate_hvm, + .p2m_host = p2m_host_hvm, }; static void __init register_arch_hooks(void) -- 2.7.4 Amazon Development Center Germany GmbH Krausenstr. 38 10117 Berlin Geschaeftsfuehrung: Christian Schlaeger, Ralf Herbrich Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B Sitz: Berlin Ust-ID: DE 289 237 879 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |