Xen project Mailing List

Re: [Xen-devel] [PATCH] Make restore work properly with PV superpage flag

To: Dave McCracken <dcm@xxxxxxxx>

From: Ian Campbell <Ian.Campbell@xxxxxxxxxx>

Date: Thu, 18 Oct 2012 11:05:43 +0100

Cc: George Dunlap <George.Dunlap@xxxxxxxxxxxxx>, Xen Developers List <xen-devel@xxxxxxxxxxxxxxxxxxx>, "Keir \(Xen.org\)" <keir@xxxxxxx>

Delivery-date: Thu, 18 Oct 2012 10:07:27 +0000

List-id: Xen developer discussion <xen-devel.lists.xen.org>

George, Keir, and thoughts or opinions on this patch? On Fri, 2012-10-12 at 18:32 +0100, Dave McCracken wrote: > For PV guests, the superpage flag means to unconditionally allocate all > pages as superpages, which is required for Linux hugepages to work. Support > for this on restore was not supported. This patch adds proper support for > the superpage flag on restore. > > For HVM guests, the superpage flag has been used to mean attempt to allocate > superpages if possible on restore. This functionality has been preserved. > > Signed-off-by: Dave McCracken <dave.mccracken@xxxxxxxxxx> > > -------- > > xc_domain_restore.c | 130 > ++++++++++++++++++++++++++++++++++++++++------------ > 1 file changed, 101 insertions(+), 29 deletions(-) > > > --- xen-unstable/tools/libxc/xc_domain_restore.c 2012-08-18 > 10:04:53.000000000 -0500 > +++ xen-unstable-restore//tools/libxc/xc_domain_restore.c 2012-08-20 > 07:24:22.000000000 -0500 > @@ -23,6 +23,19 @@ > * > */ > > +/* > + * The superpages flag in restore has two different meanings depending on > + * the type of domain. > + * > + * For an HVM domain, the flag means to look for properly aligned contiguous > + * pages and try to allocate a superpage to satisfy it. If that fails, > + * fall back to small pages. > + * > + * For a PV domain, the flag means allocate all memory as superpages. If > that > + * fails, the restore fails. This behavior is required for PV guests who > + * want to use superpages. > + */ > + > #include <stdlib.h> > #include <unistd.h> > > @@ -41,6 +54,9 @@ struct restore_ctx { > xen_pfn_t *live_p2m; /* Live mapping of the table mapping each PFN to > its current MFN. */ > xen_pfn_t *p2m; /* A table mapping each PFN to its new MFN. */ > xen_pfn_t *p2m_batch; /* A table of P2M mappings in the current region. > */ > + xen_pfn_t *p2m_saved_batch; /* Copy of p2m_batch array for pv superpage > alloc */ > + int superpages; /* Superpage allocation has been requested */ > + int hvm; /* This is an hvm domain */ > int completed; /* Set when a consistent image is available */ > int last_checkpoint; /* Set when we should commit to the current > checkpoint when it completes. */ > int compressing; /* Set when sender signals that pages would be sent > compressed (for Remus) */ > @@ -49,11 +65,6 @@ struct restore_ctx { > > #define HEARTBEAT_MS 1000 > > -#define SUPERPAGE_PFN_SHIFT 9 > -#define SUPERPAGE_NR_PFNS (1UL << SUPERPAGE_PFN_SHIFT) > - > -#define SUPER_PAGE_START(pfn) (((pfn) & (SUPERPAGE_NR_PFNS-1)) == 0 ) > - > #ifndef __MINIOS__ > static ssize_t rdexact(xc_interface *xch, struct restore_ctx *ctx, > int fd, void* buf, size_t size) > @@ -103,6 +114,49 @@ static ssize_t rdexact(xc_interface *xch > #else > #define RDEXACT read_exact > #endif > + > +#define SUPERPAGE_PFN_SHIFT 9 > +#define SUPERPAGE_NR_PFNS (1UL << SUPERPAGE_PFN_SHIFT) > +#define SUPERPAGE(_pfn) ((_pfn) & (~(SUPERPAGE_NR_PFNS-1))) > +#define SUPER_PAGE_START(pfn) (((pfn) & (SUPERPAGE_NR_PFNS-1)) == 0 ) Why this code motion? > + > +/* > +** When we're restoring into a pv superpage-allocated guest, we take > +** a copy of the p2m_batch array to preserve the pfn, then allocate the > +** corresponding superpages. We then fill in the p2m array using the saved > +** pfns. So the pfns are not contiguous, but we back them with a super page? IOW they are super pages in machine address space but not physical address space? If they are contig in p-space you just need to save the first one? > +*/ > +static int alloc_superpage_mfns( > + xc_interface *xch, uint32_t dom, struct restore_ctx *ctx, int nr_mfns) > +{ > + int i, j, max = 0; > + unsigned long pfn, base_pfn, mfn; > + > + for (i = 0; i < nr_mfns; i++) > + { > + pfn = ctx->p2m_batch[i]; > + base_pfn = SUPERPAGE(pfn); > + if (ctx->p2m[base_pfn] != (INVALID_P2M_ENTRY-2)) > + { > + ctx->p2m_saved_batch[max] = base_pfn; Is p2m_saved_batch used anywhere other than in this function? Could it be a local variable? > + ctx->p2m_batch[max] = base_pfn; > + max++; > + ctx->p2m[base_pfn] = INVALID_P2M_ENTRY-2; What is this magic -2? Can we get a symbolic constant (probably for the whole expression)? > + } > + } > + if (xc_domain_populate_physmap_exact(xch, dom, max, SUPERPAGE_PFN_SHIFT, > + 0, ctx->p2m_batch) != 0) > + return 1; > + > + for (i = 0; i < max; i++) > + { > + mfn = ctx->p2m_batch[i]; > + pfn = ctx->p2m_saved_batch[i]; If the orriginal was != (INVALID_P2M_ENTRY-2) then you didn't initialise p2m_saved_batch for this index? > + for (j = 0; j < SUPERPAGE_NR_PFNS; j++) > + ctx->p2m[pfn++] = mfn++; > + } > + return 0; > +} > /* > ** In the state file (or during transfer), all page-table pages are > ** converted into a 'canonical' form where references to actual mfns > @@ -113,7 +167,7 @@ static ssize_t rdexact(xc_interface *xch > static int uncanonicalize_pagetable( > xc_interface *xch, uint32_t dom, struct restore_ctx *ctx, void *page) > { > - int i, pte_last, nr_mfns = 0; > + int i, rc, pte_last, nr_mfns = 0; > unsigned long pfn; > uint64_t pte; > struct domain_info_context *dinfo = &ctx->dinfo; > @@ -152,13 +206,20 @@ static int uncanonicalize_pagetable( > } > > /* Allocate the requisite number of mfns. */ > - if ( nr_mfns && > - (xc_domain_populate_physmap_exact(xch, dom, nr_mfns, 0, 0, > - ctx->p2m_batch) != 0) ) > - { > - ERROR("Failed to allocate memory for batch.!\n"); > - errno = ENOMEM; > - return 0; > + if (nr_mfns) > + { > + if (!ctx->hvm && ctx->superpages) > + rc = alloc_superpage_mfns(xch, dom, ctx, nr_mfns); > + else > + rc = xc_domain_populate_physmap_exact(xch, dom, nr_mfns, 0, 0, > + ctx->p2m_batch); Would it be clearer to rename alloc_superpage_mfns to (e.g.) alloc_guest_mfns and have it start: if ( ctx->hvm || !ctx->superpages ) return xc_domain... Keeps the allocation logic in one place, since it is repeated again below. > + > + if (rc) > + { > + ERROR("Failed to allocate memory for batch.!\n"); > + errno = ENOMEM; > + return 0; > + } > } > > /* Second pass: uncanonicalize each present PTE */ > @@ -1018,8 +1079,8 @@ static int pagebuf_get(xc_interface *xch > > static int apply_batch(xc_interface *xch, uint32_t dom, struct restore_ctx > *ctx, > xen_pfn_t* region_mfn, unsigned long* pfn_type, int > pae_extended_cr3, > - unsigned int hvm, struct xc_mmu* mmu, > - pagebuf_t* pagebuf, int curbatch, int superpages) > + struct xc_mmu* mmu, > + pagebuf_t* pagebuf, int curbatch) > { > int i, j, curpage, nr_mfns; > int k, scount; > @@ -1061,7 +1122,7 @@ static int apply_batch(xc_interface *xch > /* Is this the next expected continuation? */ > if ( pfn == superpage_start + scount ) > { > - if ( !superpages ) > + if ( !ctx->superpages ) > { > ERROR("Unexpexted codepath with no superpages"); > return -1; > @@ -1114,16 +1175,17 @@ static int apply_batch(xc_interface *xch > } > > /* Are we ready to start a new superpage candidate? */ > - if ( superpages && SUPER_PAGE_START(pfn) ) > + if ( ctx->hvm && ctx->superpages && SUPER_PAGE_START(pfn) ) Can we push the movement of these things into the ctx struct into a separate preparatory patch so it's easier to see the actual code changes. > { > superpage_start=pfn; > scount++; > - continue; > } > - > - /* Add the current pfn to pfn_batch */ > - ctx->p2m_batch[nr_mfns++] = pfn; > - ctx->p2m[pfn]--; > + else > + { > + /* Add the current pfn to pfn_batch */ > + ctx->p2m_batch[nr_mfns++] = pfn; > + ctx->p2m[pfn]--; > + } > } > } > > @@ -1144,9 +1206,14 @@ static int apply_batch(xc_interface *xch > { > DPRINTF("Mapping order 0, %d; first pfn %lx\n", nr_mfns, > ctx->p2m_batch[0]); > > - if(xc_domain_populate_physmap_exact(xch, dom, nr_mfns, 0, > - 0, ctx->p2m_batch) != 0) > - { > + if (!ctx->hvm && ctx->superpages) > + rc = alloc_superpage_mfns(xch, dom, ctx, nr_mfns); > + else > + rc = xc_domain_populate_physmap_exact(xch, dom, nr_mfns, 0, 0, > + ctx->p2m_batch); > + > + if (rc) > + { > ERROR("Failed to allocate memory for batch.!\n"); > errno = ENOMEM; > return -1; > @@ -1175,7 +1242,7 @@ static int apply_batch(xc_interface *xch > || pagetype == XEN_DOMCTL_PFINFO_XALLOC ) > region_mfn[i] = ~0UL; /* map will fail but we don't care */ > else > - region_mfn[i] = hvm ? pfn : ctx->p2m[pfn]; > + region_mfn[i] = ctx->hvm ? pfn : ctx->p2m[pfn]; > } > > /* Map relevant mfns */ > @@ -1298,7 +1365,7 @@ static int apply_batch(xc_interface *xch > } > } > > - if ( !hvm && > + if ( !ctx->hvm && > xc_add_mmu_update(xch, mmu, > (((unsigned long long)mfn) << PAGE_SHIFT) > | MMU_MACHPHYS_UPDATE, pfn) ) > @@ -1389,6 +1456,9 @@ int xc_domain_restore(xc_interface *xch, > > memset(ctx, 0, sizeof(*ctx)); > > + ctx->superpages = superpages; > + ctx->hvm = hvm; > + > ctxt = xc_hypercall_buffer_alloc(xch, ctxt, sizeof(*ctxt)); > > if ( ctxt == NULL ) > @@ -1452,6 +1522,9 @@ int xc_domain_restore(xc_interface *xch, > > region_mfn = malloc(ROUNDUP(MAX_BATCH_SIZE * sizeof(xen_pfn_t), > PAGE_SHIFT)); > ctx->p2m_batch = malloc(ROUNDUP(MAX_BATCH_SIZE * sizeof(xen_pfn_t), > PAGE_SHIFT)); > + if (!ctx->hvm && ctx->superpages) > + ctx->p2m_saved_batch = > + malloc(ROUNDUP(MAX_BATCH_SIZE * sizeof(xen_pfn_t), PAGE_SHIFT)); > > if ( (ctx->p2m == NULL) || (pfn_type == NULL) || > (region_mfn == NULL) || (ctx->p2m_batch == NULL) ) > @@ -1575,8 +1648,7 @@ int xc_domain_restore(xc_interface *xch, > int brc; > > brc = apply_batch(xch, dom, ctx, region_mfn, pfn_type, > - pae_extended_cr3, hvm, mmu, &pagebuf, curbatch, > - superpages); > + pae_extended_cr3, mmu, &pagebuf, curbatch); > if ( brc < 0 ) > goto out; > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@xxxxxxxxxxxxx > http://lists.xen.org/xen-devel _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.