|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH] Make restore work properly with PV superpage flag
George, Keir, and thoughts or opinions on this patch?
On Fri, 2012-10-12 at 18:32 +0100, Dave McCracken wrote:
> For PV guests, the superpage flag means to unconditionally allocate all
> pages as superpages, which is required for Linux hugepages to work. Support
> for this on restore was not supported. This patch adds proper support for
> the superpage flag on restore.
>
> For HVM guests, the superpage flag has been used to mean attempt to allocate
> superpages if possible on restore. This functionality has been preserved.
>
> Signed-off-by: Dave McCracken <dave.mccracken@xxxxxxxxxx>
>
> --------
>
> xc_domain_restore.c | 130
> ++++++++++++++++++++++++++++++++++++++++------------
> 1 file changed, 101 insertions(+), 29 deletions(-)
>
>
> --- xen-unstable/tools/libxc/xc_domain_restore.c 2012-08-18
> 10:04:53.000000000 -0500
> +++ xen-unstable-restore//tools/libxc/xc_domain_restore.c 2012-08-20
> 07:24:22.000000000 -0500
> @@ -23,6 +23,19 @@
> *
> */
>
> +/*
> + * The superpages flag in restore has two different meanings depending on
> + * the type of domain.
> + *
> + * For an HVM domain, the flag means to look for properly aligned contiguous
> + * pages and try to allocate a superpage to satisfy it. If that fails,
> + * fall back to small pages.
> + *
> + * For a PV domain, the flag means allocate all memory as superpages. If
> that
> + * fails, the restore fails. This behavior is required for PV guests who
> + * want to use superpages.
> + */
> +
> #include <stdlib.h>
> #include <unistd.h>
>
> @@ -41,6 +54,9 @@ struct restore_ctx {
> xen_pfn_t *live_p2m; /* Live mapping of the table mapping each PFN to
> its current MFN. */
> xen_pfn_t *p2m; /* A table mapping each PFN to its new MFN. */
> xen_pfn_t *p2m_batch; /* A table of P2M mappings in the current region.
> */
> + xen_pfn_t *p2m_saved_batch; /* Copy of p2m_batch array for pv superpage
> alloc */
> + int superpages; /* Superpage allocation has been requested */
> + int hvm; /* This is an hvm domain */
> int completed; /* Set when a consistent image is available */
> int last_checkpoint; /* Set when we should commit to the current
> checkpoint when it completes. */
> int compressing; /* Set when sender signals that pages would be sent
> compressed (for Remus) */
> @@ -49,11 +65,6 @@ struct restore_ctx {
>
> #define HEARTBEAT_MS 1000
>
> -#define SUPERPAGE_PFN_SHIFT 9
> -#define SUPERPAGE_NR_PFNS (1UL << SUPERPAGE_PFN_SHIFT)
> -
> -#define SUPER_PAGE_START(pfn) (((pfn) & (SUPERPAGE_NR_PFNS-1)) == 0 )
> -
> #ifndef __MINIOS__
> static ssize_t rdexact(xc_interface *xch, struct restore_ctx *ctx,
> int fd, void* buf, size_t size)
> @@ -103,6 +114,49 @@ static ssize_t rdexact(xc_interface *xch
> #else
> #define RDEXACT read_exact
> #endif
> +
> +#define SUPERPAGE_PFN_SHIFT 9
> +#define SUPERPAGE_NR_PFNS (1UL << SUPERPAGE_PFN_SHIFT)
> +#define SUPERPAGE(_pfn) ((_pfn) & (~(SUPERPAGE_NR_PFNS-1)))
> +#define SUPER_PAGE_START(pfn) (((pfn) & (SUPERPAGE_NR_PFNS-1)) == 0 )
Why this code motion?
> +
> +/*
> +** When we're restoring into a pv superpage-allocated guest, we take
> +** a copy of the p2m_batch array to preserve the pfn, then allocate the
> +** corresponding superpages. We then fill in the p2m array using the saved
> +** pfns.
So the pfns are not contiguous, but we back them with a super page? IOW
they are super pages in machine address space but not physical address
space?
If they are contig in p-space you just need to save the first one?
> +*/
> +static int alloc_superpage_mfns(
> + xc_interface *xch, uint32_t dom, struct restore_ctx *ctx, int nr_mfns)
> +{
> + int i, j, max = 0;
> + unsigned long pfn, base_pfn, mfn;
> +
> + for (i = 0; i < nr_mfns; i++)
> + {
> + pfn = ctx->p2m_batch[i];
> + base_pfn = SUPERPAGE(pfn);
> + if (ctx->p2m[base_pfn] != (INVALID_P2M_ENTRY-2))
> + {
> + ctx->p2m_saved_batch[max] = base_pfn;
Is p2m_saved_batch used anywhere other than in this function? Could it
be a local variable?
> + ctx->p2m_batch[max] = base_pfn;
> + max++;
> + ctx->p2m[base_pfn] = INVALID_P2M_ENTRY-2;
What is this magic -2? Can we get a symbolic constant (probably for the
whole expression)?
> + }
> + }
> + if (xc_domain_populate_physmap_exact(xch, dom, max, SUPERPAGE_PFN_SHIFT,
> + 0, ctx->p2m_batch) != 0)
> + return 1;
> +
> + for (i = 0; i < max; i++)
> + {
> + mfn = ctx->p2m_batch[i];
> + pfn = ctx->p2m_saved_batch[i];
If the orriginal was != (INVALID_P2M_ENTRY-2) then you didn't initialise
p2m_saved_batch for this index?
> + for (j = 0; j < SUPERPAGE_NR_PFNS; j++)
> + ctx->p2m[pfn++] = mfn++;
> + }
> + return 0;
> +}
> /*
> ** In the state file (or during transfer), all page-table pages are
> ** converted into a 'canonical' form where references to actual mfns
> @@ -113,7 +167,7 @@ static ssize_t rdexact(xc_interface *xch
> static int uncanonicalize_pagetable(
> xc_interface *xch, uint32_t dom, struct restore_ctx *ctx, void *page)
> {
> - int i, pte_last, nr_mfns = 0;
> + int i, rc, pte_last, nr_mfns = 0;
> unsigned long pfn;
> uint64_t pte;
> struct domain_info_context *dinfo = &ctx->dinfo;
> @@ -152,13 +206,20 @@ static int uncanonicalize_pagetable(
> }
>
> /* Allocate the requisite number of mfns. */
> - if ( nr_mfns &&
> - (xc_domain_populate_physmap_exact(xch, dom, nr_mfns, 0, 0,
> - ctx->p2m_batch) != 0) )
> - {
> - ERROR("Failed to allocate memory for batch.!\n");
> - errno = ENOMEM;
> - return 0;
> + if (nr_mfns)
> + {
> + if (!ctx->hvm && ctx->superpages)
> + rc = alloc_superpage_mfns(xch, dom, ctx, nr_mfns);
> + else
> + rc = xc_domain_populate_physmap_exact(xch, dom, nr_mfns, 0, 0,
> + ctx->p2m_batch);
Would it be clearer to rename alloc_superpage_mfns to (e.g.)
alloc_guest_mfns and have it start:
if ( ctx->hvm || !ctx->superpages )
return xc_domain...
Keeps the allocation logic in one place, since it is repeated again
below.
> +
> + if (rc)
> + {
> + ERROR("Failed to allocate memory for batch.!\n");
> + errno = ENOMEM;
> + return 0;
> + }
> }
>
> /* Second pass: uncanonicalize each present PTE */
> @@ -1018,8 +1079,8 @@ static int pagebuf_get(xc_interface *xch
>
> static int apply_batch(xc_interface *xch, uint32_t dom, struct restore_ctx
> *ctx,
> xen_pfn_t* region_mfn, unsigned long* pfn_type, int
> pae_extended_cr3,
> - unsigned int hvm, struct xc_mmu* mmu,
> - pagebuf_t* pagebuf, int curbatch, int superpages)
> + struct xc_mmu* mmu,
> + pagebuf_t* pagebuf, int curbatch)
> {
> int i, j, curpage, nr_mfns;
> int k, scount;
> @@ -1061,7 +1122,7 @@ static int apply_batch(xc_interface *xch
> /* Is this the next expected continuation? */
> if ( pfn == superpage_start + scount )
> {
> - if ( !superpages )
> + if ( !ctx->superpages )
> {
> ERROR("Unexpexted codepath with no superpages");
> return -1;
> @@ -1114,16 +1175,17 @@ static int apply_batch(xc_interface *xch
> }
>
> /* Are we ready to start a new superpage candidate? */
> - if ( superpages && SUPER_PAGE_START(pfn) )
> + if ( ctx->hvm && ctx->superpages && SUPER_PAGE_START(pfn) )
Can we push the movement of these things into the ctx struct into a
separate preparatory patch so it's easier to see the actual code
changes.
> {
> superpage_start=pfn;
> scount++;
> - continue;
> }
> -
> - /* Add the current pfn to pfn_batch */
> - ctx->p2m_batch[nr_mfns++] = pfn;
> - ctx->p2m[pfn]--;
> + else
> + {
> + /* Add the current pfn to pfn_batch */
> + ctx->p2m_batch[nr_mfns++] = pfn;
> + ctx->p2m[pfn]--;
> + }
> }
> }
>
> @@ -1144,9 +1206,14 @@ static int apply_batch(xc_interface *xch
> {
> DPRINTF("Mapping order 0, %d; first pfn %lx\n", nr_mfns,
> ctx->p2m_batch[0]);
>
> - if(xc_domain_populate_physmap_exact(xch, dom, nr_mfns, 0,
> - 0, ctx->p2m_batch) != 0)
> - {
> + if (!ctx->hvm && ctx->superpages)
> + rc = alloc_superpage_mfns(xch, dom, ctx, nr_mfns);
> + else
> + rc = xc_domain_populate_physmap_exact(xch, dom, nr_mfns, 0, 0,
> + ctx->p2m_batch);
> +
> + if (rc)
> + {
> ERROR("Failed to allocate memory for batch.!\n");
> errno = ENOMEM;
> return -1;
> @@ -1175,7 +1242,7 @@ static int apply_batch(xc_interface *xch
> || pagetype == XEN_DOMCTL_PFINFO_XALLOC )
> region_mfn[i] = ~0UL; /* map will fail but we don't care */
> else
> - region_mfn[i] = hvm ? pfn : ctx->p2m[pfn];
> + region_mfn[i] = ctx->hvm ? pfn : ctx->p2m[pfn];
> }
>
> /* Map relevant mfns */
> @@ -1298,7 +1365,7 @@ static int apply_batch(xc_interface *xch
> }
> }
>
> - if ( !hvm &&
> + if ( !ctx->hvm &&
> xc_add_mmu_update(xch, mmu,
> (((unsigned long long)mfn) << PAGE_SHIFT)
> | MMU_MACHPHYS_UPDATE, pfn) )
> @@ -1389,6 +1456,9 @@ int xc_domain_restore(xc_interface *xch,
>
> memset(ctx, 0, sizeof(*ctx));
>
> + ctx->superpages = superpages;
> + ctx->hvm = hvm;
> +
> ctxt = xc_hypercall_buffer_alloc(xch, ctxt, sizeof(*ctxt));
>
> if ( ctxt == NULL )
> @@ -1452,6 +1522,9 @@ int xc_domain_restore(xc_interface *xch,
>
> region_mfn = malloc(ROUNDUP(MAX_BATCH_SIZE * sizeof(xen_pfn_t),
> PAGE_SHIFT));
> ctx->p2m_batch = malloc(ROUNDUP(MAX_BATCH_SIZE * sizeof(xen_pfn_t),
> PAGE_SHIFT));
> + if (!ctx->hvm && ctx->superpages)
> + ctx->p2m_saved_batch =
> + malloc(ROUNDUP(MAX_BATCH_SIZE * sizeof(xen_pfn_t), PAGE_SHIFT));
>
> if ( (ctx->p2m == NULL) || (pfn_type == NULL) ||
> (region_mfn == NULL) || (ctx->p2m_batch == NULL) )
> @@ -1575,8 +1648,7 @@ int xc_domain_restore(xc_interface *xch,
> int brc;
>
> brc = apply_batch(xch, dom, ctx, region_mfn, pfn_type,
> - pae_extended_cr3, hvm, mmu, &pagebuf, curbatch,
> - superpages);
> + pae_extended_cr3, mmu, &pagebuf, curbatch);
> if ( brc < 0 )
> goto out;
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxx
> http://lists.xen.org/xen-devel
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |