[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v10 3/3] tools/libxc: use superpages during restore of HVM guest



During creating of a HVM domU meminit_hvm() tries to map superpages.
After save/restore or migration this mapping is lost, everything is
allocated in single pages. This causes a performance degradition after
migration.

Add neccessary code to preallocate a superpage for the chunk of pfns
that is received. In case a pfn was not populated on the sending side it
must be freed on the receiving side to avoid over-allocation.

The existing code for x86_pv is moved unmodified into its own file.

Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
---
 tools/libxc/xc_sr_common.h          |  30 +-
 tools/libxc/xc_sr_restore.c         |  75 +----
 tools/libxc/xc_sr_restore_x86_hvm.c | 536 ++++++++++++++++++++++++++++++++++++
 tools/libxc/xc_sr_restore_x86_pv.c  |  72 ++++-
 4 files changed, 635 insertions(+), 78 deletions(-)

diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h
index a728c93e53..0477c20617 100644
--- a/tools/libxc/xc_sr_common.h
+++ b/tools/libxc/xc_sr_common.h
@@ -139,6 +139,15 @@ struct xc_sr_restore_ops
      */
     int (*setup)(struct xc_sr_context *ctx);
 
+    /**
+     * Populate PFNs
+     *
+     * Given a set of pfns, obtain memory from Xen to fill the physmap for the
+     * unpopulated subset.
+     */
+    int (*populate_pfns)(struct xc_sr_context *ctx, unsigned count,
+                         const xen_pfn_t *original_pfns, const uint32_t 
*types);
+
     /**
      * Process an individual record from the stream.  The caller shall take
      * care of processing common records (e.g. END, PAGE_DATA).
@@ -224,6 +233,8 @@ struct xc_sr_context
 
             int send_back_fd;
             unsigned long p2m_size;
+            unsigned long max_pages;
+            unsigned long tot_pages;
             xc_hypercall_buffer_t dirty_bitmap_hbuf;
 
             /* From Image Header. */
@@ -336,6 +347,17 @@ struct xc_sr_context
                     /* HVM context blob. */
                     void *context;
                     size_t contextsz;
+
+                    /* Bitmap of currently allocated PFNs during restore. */
+                    struct xc_sr_bitmap attempted_1g;
+                    struct xc_sr_bitmap attempted_2m;
+                    struct xc_sr_bitmap allocated_pfns;
+                    xen_pfn_t idx1G_prev, idx2M_prev;
+
+                    /* List of PFNs for decrease_reservation */
+                    xen_pfn_t *extents;
+                    unsigned long max_extents;
+                    unsigned long nr_extents;
                 } restore;
             };
         } x86_hvm;
@@ -460,14 +482,6 @@ static inline int write_record(struct xc_sr_context *ctx,
  */
 int read_record(struct xc_sr_context *ctx, int fd, struct xc_sr_record *rec);
 
-/*
- * This would ideally be private in restore.c, but is needed by
- * x86_pv_localise_page() if we receive pagetables frames ahead of the
- * contents of the frames they point at.
- */
-int populate_pfns(struct xc_sr_context *ctx, unsigned count,
-                  const xen_pfn_t *original_pfns, const uint32_t *types);
-
 #endif
 /*
  * Local variables:
diff --git a/tools/libxc/xc_sr_restore.c b/tools/libxc/xc_sr_restore.c
index d53948e1a6..8cd9289d1a 100644
--- a/tools/libxc/xc_sr_restore.c
+++ b/tools/libxc/xc_sr_restore.c
@@ -68,74 +68,6 @@ static int read_headers(struct xc_sr_context *ctx)
     return 0;
 }
 
-/*
- * Given a set of pfns, obtain memory from Xen to fill the physmap for the
- * unpopulated subset.  If types is NULL, no page type checking is performed
- * and all unpopulated pfns are populated.
- */
-int populate_pfns(struct xc_sr_context *ctx, unsigned count,
-                  const xen_pfn_t *original_pfns, const uint32_t *types)
-{
-    xc_interface *xch = ctx->xch;
-    xen_pfn_t *mfns = malloc(count * sizeof(*mfns)),
-        *pfns = malloc(count * sizeof(*pfns));
-    unsigned i, nr_pfns = 0;
-    int rc = -1;
-
-    if ( !mfns || !pfns )
-    {
-        ERROR("Failed to allocate %zu bytes for populating the physmap",
-              2 * count * sizeof(*mfns));
-        goto err;
-    }
-
-    for ( i = 0; i < count; ++i )
-    {
-        if ( (!types || (types &&
-                         (types[i] != XEN_DOMCTL_PFINFO_XTAB &&
-                          types[i] != XEN_DOMCTL_PFINFO_BROKEN))) &&
-             !pfn_is_populated(ctx, original_pfns[i]) )
-        {
-            rc = pfn_set_populated(ctx, original_pfns[i]);
-            if ( rc )
-                goto err;
-            pfns[nr_pfns] = mfns[nr_pfns] = original_pfns[i];
-            ++nr_pfns;
-        }
-    }
-
-    if ( nr_pfns )
-    {
-        rc = xc_domain_populate_physmap_exact(
-            xch, ctx->domid, nr_pfns, 0, 0, mfns);
-        if ( rc )
-        {
-            PERROR("Failed to populate physmap");
-            goto err;
-        }
-
-        for ( i = 0; i < nr_pfns; ++i )
-        {
-            if ( mfns[i] == INVALID_MFN )
-            {
-                ERROR("Populate physmap failed for pfn %u", i);
-                rc = -1;
-                goto err;
-            }
-
-            ctx->restore.ops.set_gfn(ctx, pfns[i], mfns[i]);
-        }
-    }
-
-    rc = 0;
-
- err:
-    free(pfns);
-    free(mfns);
-
-    return rc;
-}
-
 /*
  * Given a list of pfns, their types, and a block of page data from the
  * stream, populate and record their types, map the relevant subset and copy
@@ -161,7 +93,7 @@ static int process_page_data(struct xc_sr_context *ctx, 
unsigned count,
         goto err;
     }
 
-    rc = populate_pfns(ctx, count, pfns, types);
+    rc = ctx->restore.ops.populate_pfns(ctx, count, pfns, types);
     if ( rc )
     {
         ERROR("Failed to populate pfns for batch of %u pages", count);
@@ -826,7 +758,12 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
         return -1;
     }
 
+    /* See xc_domain_getinfo */
+    ctx.restore.max_pages = ctx.dominfo.max_memkb >> (PAGE_SHIFT-10);
+    ctx.restore.tot_pages = ctx.dominfo.nr_pages;
     ctx.restore.p2m_size = nr_pfns;
+    DPRINTF("dom %u p2m_size %lx max_pages %lx",
+            ctx.domid, ctx.restore.p2m_size, ctx.restore.max_pages);
 
     if ( ctx.dominfo.hvm )
     {
diff --git a/tools/libxc/xc_sr_restore_x86_hvm.c 
b/tools/libxc/xc_sr_restore_x86_hvm.c
index 1dca85354a..451205b1a0 100644
--- a/tools/libxc/xc_sr_restore_x86_hvm.c
+++ b/tools/libxc/xc_sr_restore_x86_hvm.c
@@ -135,6 +135,9 @@ static int x86_hvm_localise_page(struct xc_sr_context *ctx,
 static int x86_hvm_setup(struct xc_sr_context *ctx)
 {
     xc_interface *xch = ctx->xch;
+    struct xc_sr_bitmap *bm;
+    void *p;
+    unsigned long bits;
 
     if ( ctx->restore.guest_type != DHDR_TYPE_X86_HVM )
     {
@@ -149,7 +152,37 @@ static int x86_hvm_setup(struct xc_sr_context *ctx)
         return -1;
     }
 
+    bm = &ctx->x86_hvm.restore.attempted_1g;
+    bits = (ctx->restore.p2m_size >> SUPERPAGE_1GB_SHIFT) + 1;
+    if ( xc_sr_bitmap_resize(bm, bits) == false )
+        goto out;
+
+    bm = &ctx->x86_hvm.restore.attempted_2m;
+    bits = (ctx->restore.p2m_size >> SUPERPAGE_2MB_SHIFT) + 1;
+    if ( xc_sr_bitmap_resize(bm, bits) == false )
+        goto out;
+
+    bm = &ctx->x86_hvm.restore.allocated_pfns;
+    bits = ctx->restore.p2m_size + 1;
+    if ( xc_sr_bitmap_resize(bm, bits) == false )
+        goto out;
+
+    /*
+     * Preallocate array for holes.
+     * Any size will do, the sender is free to send batches of arbitrary 
length.
+     */
+    bits = 16;
+    p = calloc(bits, sizeof(*ctx->x86_hvm.restore.extents));
+    if ( !p )
+        goto out;
+    ctx->x86_hvm.restore.extents = p;
+    ctx->x86_hvm.restore.max_extents = bits;
+
     return 0;
+
+out:
+    PERROR("Unable to allocate memory for pfn bitmaps");
+    return -1;
 }
 
 /*
@@ -224,10 +257,512 @@ static int x86_hvm_stream_complete(struct xc_sr_context 
*ctx)
 static int x86_hvm_cleanup(struct xc_sr_context *ctx)
 {
     free(ctx->x86_hvm.restore.context);
+    free(ctx->x86_hvm.restore.extents);
+    xc_sr_bitmap_free(&ctx->x86_hvm.restore.attempted_1g);
+    xc_sr_bitmap_free(&ctx->x86_hvm.restore.attempted_2m);
+    xc_sr_bitmap_free(&ctx->x86_hvm.restore.allocated_pfns);
 
     return 0;
 }
 
+/*
+ * Set a pfn as allocated, expanding the tracking structures if needed.
+ */
+static int pfn_set_allocated(struct xc_sr_context *ctx, xen_pfn_t pfn)
+{
+    xc_interface *xch = ctx->xch;
+
+    if ( !xc_sr_set_bit(pfn, &ctx->x86_hvm.restore.allocated_pfns) )
+    {
+        ERROR("Failed to realloc allocated_pfns bitmap");
+        errno = ENOMEM;
+        return -1;
+    }
+    return 0;
+}
+
+/* track allocation of a superpage */
+struct x86_hvm_sp {
+    xen_pfn_t pfn;
+    xen_pfn_t base_pfn;
+    unsigned long index;
+    unsigned long count;
+};
+
+/*
+ * Try to allocate a 1GB page for this pfn, but avoid Over-allocation.
+ * If this succeeds, mark the range of 2MB pages as busy.
+ */
+static bool x86_hvm_alloc_1g(struct xc_sr_context *ctx, struct x86_hvm_sp *sp)
+{
+    xc_interface *xch = ctx->xch;
+    struct xc_sr_bitmap *bm;
+    unsigned int order, shift;
+    int i, done;
+    xen_pfn_t extent;
+
+    bm = &ctx->x86_hvm.restore.attempted_1g;
+
+    /* Only one attempt to avoid overlapping allocation */
+    if ( xc_sr_test_and_set_bit(sp->index, bm) )
+        return false;
+
+    order = SUPERPAGE_1GB_SHIFT;
+    sp->count = 1ULL << order;
+
+    /* Allocate only if there is room for another superpage */
+    if ( ctx->restore.tot_pages + sp->count > ctx->restore.max_pages )
+        return false;
+
+    extent = sp->base_pfn = (sp->pfn >> order) << order;
+    done = xc_domain_populate_physmap(xch, ctx->domid, 1, order, 0, &extent);
+    if ( done < 0 )
+    {
+        PERROR("populate_physmap failed.");
+        return false;
+    }
+    if ( done == 0 )
+        return false;
+
+    DPRINTF("1G base_pfn %" PRI_xen_pfn "\n", sp->base_pfn);
+
+    /* Mark all 2MB pages as done to avoid overlapping allocation */
+    bm = &ctx->x86_hvm.restore.attempted_2m;
+    shift = SUPERPAGE_1GB_SHIFT - SUPERPAGE_2MB_SHIFT;
+    for ( i = 0; i < (sp->count >> shift); i++ )
+        xc_sr_set_bit((sp->base_pfn >> SUPERPAGE_2MB_SHIFT) + i, bm);
+
+    return true;
+}
+
+/* Allocate a 2MB page if x86_hvm_alloc_1g failed, avoid Over-allocation. */
+static bool x86_hvm_alloc_2m(struct xc_sr_context *ctx, struct x86_hvm_sp *sp)
+{
+    xc_interface *xch = ctx->xch;
+    struct xc_sr_bitmap *bm;
+    unsigned int order;
+    int done;
+    xen_pfn_t extent;
+
+    bm = &ctx->x86_hvm.restore.attempted_2m;
+
+    /* Only one attempt to avoid overlapping allocation */
+    if ( xc_sr_test_and_set_bit(sp->index, bm) )
+        return false;
+
+    order = SUPERPAGE_2MB_SHIFT;
+    sp->count = 1ULL << order;
+
+    /* Allocate only if there is room for another superpage */
+    if ( ctx->restore.tot_pages + sp->count > ctx->restore.max_pages )
+        return false;
+
+    extent = sp->base_pfn = (sp->pfn >> order) << order;
+    done = xc_domain_populate_physmap(xch, ctx->domid, 1, order, 0, &extent);
+    if ( done < 0 )
+    {
+        PERROR("populate_physmap failed.");
+        return false;
+    }
+    if ( done == 0 )
+        return false;
+
+    DPRINTF("2M base_pfn %" PRI_xen_pfn "\n", sp->base_pfn);
+    return true;
+}
+
+/* Allocate a single page if x86_hvm_alloc_2m failed. */
+static bool x86_hvm_alloc_4k(struct xc_sr_context *ctx, struct x86_hvm_sp *sp)
+{
+    xc_interface *xch = ctx->xch;
+    unsigned int order;
+    int done;
+    xen_pfn_t extent;
+
+    order = 0;
+    sp->count = 1ULL << order;
+
+    /* Allocate only if there is room for another page */
+    if ( ctx->restore.tot_pages + sp->count > ctx->restore.max_pages )
+        return false;
+
+    extent = sp->base_pfn = (sp->pfn >> order) << order;
+    done = xc_domain_populate_physmap(xch, ctx->domid, 1, order, 0, &extent);
+    if ( done < 0 )
+    {
+        PERROR("populate_physmap failed.");
+        return false;
+    }
+    if ( done == 0 )
+        return false;
+
+    DPRINTF("4K base_pfn %" PRI_xen_pfn "\n", sp->base_pfn);
+    return true;
+}
+/*
+ * Attempt to allocate a superpage where the pfn resides.
+ */
+static int x86_hvm_allocate_pfn(struct xc_sr_context *ctx, xen_pfn_t pfn)
+{
+    xc_interface *xch = ctx->xch;
+    bool success;
+    int rc = -1;
+    unsigned long idx_1g, idx_2m;
+    struct x86_hvm_sp sp = {
+        .pfn = pfn
+    };
+
+    if ( xc_sr_test_bit(pfn, &ctx->x86_hvm.restore.allocated_pfns) )
+        return 0;
+
+    idx_1g = pfn >> SUPERPAGE_1GB_SHIFT;
+    idx_2m = pfn >> SUPERPAGE_2MB_SHIFT;
+    if ( !xc_sr_bitmap_resize(&ctx->x86_hvm.restore.attempted_1g, idx_1g) )
+    {
+        PERROR("Failed to realloc attempted_1g for pfn %" PRI_xen_pfn, pfn );
+        return -1;
+    }
+    if ( !xc_sr_bitmap_resize(&ctx->x86_hvm.restore.attempted_2m, idx_2m) )
+    {
+        PERROR("Failed to realloc attempted_2m for pfn %" PRI_xen_pfn, pfn );
+        return -1;
+    }
+
+    sp.index = idx_1g;
+    success = x86_hvm_alloc_1g(ctx, &sp);
+
+    if ( success == false )
+    {
+        sp.index = idx_2m;
+        success = x86_hvm_alloc_2m(ctx, &sp);
+    }
+
+    if ( success == false )
+    {
+        sp.index = 0;
+        success = x86_hvm_alloc_4k(ctx, &sp);
+    }
+
+    if ( success == true )
+    {
+        do {
+            sp.count--;
+            ctx->restore.tot_pages++;
+            rc = pfn_set_allocated(ctx, sp.base_pfn + sp.count);
+            if ( rc )
+                break;
+        } while ( sp.count );
+    }
+    return rc;
+}
+
+/* Keep track of pfns that need to be released. */
+static bool x86_hvm_stash_pfn(struct xc_sr_context *ctx, xen_pfn_t pfn)
+{
+    xc_interface *xch = ctx->xch;
+    unsigned long idx = ctx->x86_hvm.restore.nr_extents;
+
+    if ( idx > ctx->x86_hvm.restore.max_extents )
+    {
+        unsigned long max_extents = ctx->x86_hvm.restore.max_extents * 2;
+        size_t size = sizeof(*ctx->x86_hvm.restore.extents) * max_extents;
+        void *p = realloc(ctx->x86_hvm.restore.extents, size);
+
+        if ( !p )
+        {
+            PERROR("Failed to realloc extents array %lu", max_extents);
+            return false;
+        }
+        ctx->x86_hvm.restore.extents = p;
+        ctx->x86_hvm.restore.max_extents = max_extents;
+    }
+
+    ctx->x86_hvm.restore.extents[idx] = pfn;
+    ctx->x86_hvm.restore.nr_extents++;
+    return true;
+}
+
+/*
+ * Check if a range of pfns represents a contiguous superpage
+ * Returns the number of contiguous pages
+ */
+static unsigned long x86_hvm_scan_2m(xen_pfn_t *pfns, unsigned long idx,
+                                     unsigned long max)
+{
+    xen_pfn_t val = pfns[idx];
+    unsigned long i = 0;
+
+    /* First pfn was already checked */
+    do {
+        val++;
+        i++;
+        if ( pfns[idx + i] != val )
+            break;
+    } while ( i < SUPERPAGE_2MB_NR_PFNS );
+
+    return i;
+}
+
+static bool x86_hvm_release_2m_sp(struct xc_sr_context *ctx, unsigned long idx)
+{
+    xc_interface *xch = ctx->xch;
+    int rc;
+    uint32_t domid = ctx->domid;
+    unsigned int order = SUPERPAGE_2MB_SHIFT;
+    xen_pfn_t base_pfn = ctx->x86_hvm.restore.extents[idx];
+
+    DPRINTF("releasing 2mb at %" PRI_xen_pfn, base_pfn);
+    rc = xc_domain_decrease_reservation_exact(xch, domid, 1, order, &base_pfn);
+    if ( rc )
+    {
+        PERROR("Failed to release 2mb at %lx", idx);
+        return false;
+    }
+    return true;
+}
+
+static bool x86_hvm_release_pages(struct xc_sr_context *ctx,
+                                  unsigned long start,
+                                  unsigned long idx)
+{
+    xc_interface *xch = ctx->xch;
+    int rc;
+    uint32_t domid = ctx->domid;
+    unsigned int order = 0;
+    unsigned long num = idx - start;
+    xen_pfn_t *pfns = &ctx->x86_hvm.restore.extents[start];
+
+    DPRINTF("releasing %lu 4k pages", num);
+    rc = xc_domain_decrease_reservation_exact(xch, domid, num, order, pfns);
+    if ( rc )
+    {
+        PERROR("Failed to release %lu pfns", num);
+        return false;
+    }
+    return true;
+}
+
+/* Release pfns which are not populated. */
+static bool x86_hvm_free_pfns(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    xen_pfn_t *pfns = ctx->x86_hvm.restore.extents;
+    xen_pfn_t mask;
+    unsigned long idx, start, num, max;
+
+    max = ctx->x86_hvm.restore.nr_extents;
+    if ( !max )
+        return true;
+
+    mask = (1UL << SUPERPAGE_2MB_SHIFT) - 1;
+    idx = 0;
+    start = 0;
+    while ( idx < max )
+    {
+        /* This is the start of a 2M range, release as a single superpage */
+        if ( (pfns[idx] & mask ) == 0 &&
+             idx + SUPERPAGE_2MB_NR_PFNS <= max )
+        {
+            num = x86_hvm_scan_2m(pfns, idx, max);
+            DPRINTF("found %lu pfns at %" PRI_xen_pfn, num, pfns[idx]);
+            if ( num == SUPERPAGE_2MB_NR_PFNS )
+            {
+                /* Release range before this superpage */
+                if ( (idx - start) > 0 &&
+                     x86_hvm_release_pages(ctx, start, idx) == false )
+                    return false;
+                if ( x86_hvm_release_2m_sp(ctx, idx) == false )
+                    return false;
+                start = idx + num;
+            }
+            idx += num;
+        }
+        else
+        {
+            idx++;
+        }
+    }
+
+    /* Release remaining pages, or everything if no superpage was found */
+    if ( (idx - start) > 0 && x86_hvm_release_pages(ctx, start, idx) == false )
+            return false;
+
+    ctx->x86_hvm.restore.nr_extents = 0;
+    return true;
+}
+
+static bool x86_hvm_punch_hole(struct xc_sr_context *ctx, xen_pfn_t max_pfn)
+{
+    xc_interface *xch = ctx->xch;
+    struct xc_sr_bitmap *bm = &ctx->x86_hvm.restore.allocated_pfns;
+    xen_pfn_t pfn, start_pfn;
+    unsigned int freed = 0, order;
+
+    /* Expand the bitmap to allow clearing bits up to max_pfn */
+    if ( !xc_sr_bitmap_resize(bm, max_pfn) )
+    {
+        PERROR("Failed to realloc allocated_pfns %" PRI_xen_pfn, max_pfn);
+        return false;
+    }
+    /*
+     * Scan the entire superpage because several batches will fit into
+     * a superpage, and it is unknown which pfn triggered the allocation.
+     */
+    order = SUPERPAGE_1GB_SHIFT;
+    pfn = start_pfn = (max_pfn >> order) << order;
+
+    while ( pfn <= max_pfn )
+    {
+        if ( !pfn_is_populated(ctx, pfn) &&
+            xc_sr_test_and_clear_bit(pfn, bm) )
+        {
+            if ( x86_hvm_stash_pfn(ctx, pfn) == false )
+                return false;
+            ctx->restore.tot_pages--;
+            freed++;
+        }
+        pfn++;
+    }
+
+    if ( freed )
+    {
+        DPRINTF("%u pages to be freed between %" PRI_xen_pfn " %" PRI_xen_pfn,
+                freed, start_pfn, max_pfn);
+        if ( x86_hvm_free_pfns(ctx) == false )
+            return false;
+    }
+
+    return true;
+}
+
+/* Avoid allocating a superpage if a hole exists */
+static bool x86_hvm_mark_hole_in_sp(struct xc_sr_context *ctx, xen_pfn_t pfn)
+{
+    xc_interface *xch = ctx->xch;
+    struct xc_sr_bitmap *bm;
+    unsigned long idx_1g, idx_2m;
+
+    idx_1g = pfn >> SUPERPAGE_1GB_SHIFT;
+    idx_2m = pfn >> SUPERPAGE_2MB_SHIFT;
+
+    bm = &ctx->x86_hvm.restore.attempted_1g;
+    if ( xc_sr_set_bit(idx_1g, bm) == false )
+    {
+        PERROR("Failed to realloc attempted_1g for pfn %" PRI_xen_pfn, pfn );
+        return false;
+    }
+
+    bm = &ctx->x86_hvm.restore.attempted_2m;
+    if ( xc_sr_set_bit(idx_2m, bm) == false )
+    {
+        PERROR("Failed to realloc attempted_2m for pfn %" PRI_xen_pfn, pfn );
+        return false;
+    }
+    return true;
+}
+
+/*
+ * Try to allocate superpages.
+ * This works without memory map only if the pfns arrive in incremental order.
+ */
+static int x86_hvm_populate_pfns(struct xc_sr_context *ctx, unsigned int count,
+                                 const xen_pfn_t *pfns, const uint32_t *types)
+{
+    xc_interface *xch = ctx->xch;
+    xen_pfn_t pfn, min_pfn = pfns[0], max_pfn = pfns[0];
+    xen_pfn_t idx1G, idx2M;
+    unsigned int i, order;
+    int rc = -1;
+
+    /*
+     * Analyze the array:
+     * - to show statistics
+     * - to indicate holes to the superpage allocator
+     *   this would be more efficient with batches for 1G instead of 4M
+     *   with 4M batches a 1G superpage might be allocated before a hole is 
seen
+     */
+    for ( i = 0; i < count; ++i )
+    {
+        if ( pfns[i] < min_pfn )
+            min_pfn = pfns[i];
+        if ( pfns[i] > max_pfn )
+            max_pfn = pfns[i];
+
+        switch (types[i]) {
+            case XEN_DOMCTL_PFINFO_XTAB:
+            case XEN_DOMCTL_PFINFO_BROKEN:
+                if ( x86_hvm_mark_hole_in_sp(ctx, pfns[i]) == false )
+                    goto err;
+                break;
+            default:
+                break;
+        }
+    }
+    DPRINTF("batch of %u pfns between %" PRI_xen_pfn " %" PRI_xen_pfn "\n",
+            count, min_pfn, max_pfn);
+
+    for ( i = 0; i < count; ++i )
+    {
+        pfn = pfns[i];
+        idx1G = pfn >> SUPERPAGE_1GB_SHIFT;
+        idx2M = pfn >> SUPERPAGE_2MB_SHIFT;
+
+        /*
+         * Handle batches smaller than 1GB.
+         * If this pfn is in another 2MB superpage it is required to punch 
holes
+         * to release memory, starting from the 1GB boundary up to the highest
+         * pfn within the previous 2MB superpage.
+         */
+        if ( ctx->x86_hvm.restore.idx1G_prev == idx1G &&
+             ctx->x86_hvm.restore.idx2M_prev == idx2M )
+        {
+            /* Same 2MB superpage, nothing to do */
+        }
+        else
+        {
+            /*
+             * If this next pfn is within another 1GB or 2MB superpage it is
+             * required to scan the entire previous superpage because there
+             * might be holes between the last pfn and the end of the superpage
+             * containing that pfn.
+             */
+            if ( ctx->x86_hvm.restore.idx1G_prev != idx1G )
+            {
+                order = SUPERPAGE_1GB_SHIFT;
+                max_pfn = ((ctx->x86_hvm.restore.idx1G_prev + 1) << order) - 1;
+            }
+            else
+            {
+                order = SUPERPAGE_2MB_SHIFT;
+                max_pfn = ((ctx->x86_hvm.restore.idx2M_prev + 1) << order) - 1;
+            }
+
+            if ( x86_hvm_punch_hole(ctx, max_pfn) == false )
+                goto err;
+        }
+
+        if ( (types[i] != XEN_DOMCTL_PFINFO_XTAB &&
+              types[i] != XEN_DOMCTL_PFINFO_BROKEN) &&
+             !pfn_is_populated(ctx, pfn) )
+        {
+            rc = x86_hvm_allocate_pfn(ctx, pfn);
+            if ( rc )
+                goto err;
+            rc = pfn_set_populated(ctx, pfn);
+            if ( rc )
+                goto err;
+        }
+        ctx->x86_hvm.restore.idx1G_prev = idx1G;
+        ctx->x86_hvm.restore.idx2M_prev = idx2M;
+    }
+
+    rc = 0;
+
+ err:
+    return rc;
+}
+
+
 struct xc_sr_restore_ops restore_ops_x86_hvm =
 {
     .pfn_is_valid    = x86_hvm_pfn_is_valid,
@@ -236,6 +771,7 @@ struct xc_sr_restore_ops restore_ops_x86_hvm =
     .set_page_type   = x86_hvm_set_page_type,
     .localise_page   = x86_hvm_localise_page,
     .setup           = x86_hvm_setup,
+    .populate_pfns   = x86_hvm_populate_pfns,
     .process_record  = x86_hvm_process_record,
     .stream_complete = x86_hvm_stream_complete,
     .cleanup         = x86_hvm_cleanup,
diff --git a/tools/libxc/xc_sr_restore_x86_pv.c 
b/tools/libxc/xc_sr_restore_x86_pv.c
index 50e25c162c..87957559bc 100644
--- a/tools/libxc/xc_sr_restore_x86_pv.c
+++ b/tools/libxc/xc_sr_restore_x86_pv.c
@@ -936,6 +936,75 @@ static void x86_pv_set_gfn(struct xc_sr_context *ctx, 
xen_pfn_t pfn,
         ((uint32_t *)ctx->x86_pv.p2m)[pfn] = mfn;
 }
 
+/*
+ * Given a set of pfns, obtain memory from Xen to fill the physmap for the
+ * unpopulated subset.  If types is NULL, no page type checking is performed
+ * and all unpopulated pfns are populated.
+ */
+static int x86_pv_populate_pfns(struct xc_sr_context *ctx, unsigned count,
+                                const xen_pfn_t *original_pfns,
+                                const uint32_t *types)
+{
+    xc_interface *xch = ctx->xch;
+    xen_pfn_t *mfns = malloc(count * sizeof(*mfns)),
+        *pfns = malloc(count * sizeof(*pfns));
+    unsigned i, nr_pfns = 0;
+    int rc = -1;
+
+    if ( !mfns || !pfns )
+    {
+        ERROR("Failed to allocate %zu bytes for populating the physmap",
+              2 * count * sizeof(*mfns));
+        goto err;
+    }
+
+    for ( i = 0; i < count; ++i )
+    {
+        if ( (!types || (types &&
+                         (types[i] != XEN_DOMCTL_PFINFO_XTAB &&
+                          types[i] != XEN_DOMCTL_PFINFO_BROKEN))) &&
+             !pfn_is_populated(ctx, original_pfns[i]) )
+        {
+            rc = pfn_set_populated(ctx, original_pfns[i]);
+            if ( rc )
+                goto err;
+            pfns[nr_pfns] = mfns[nr_pfns] = original_pfns[i];
+            ++nr_pfns;
+        }
+    }
+
+    if ( nr_pfns )
+    {
+        rc = xc_domain_populate_physmap_exact(
+            xch, ctx->domid, nr_pfns, 0, 0, mfns);
+        if ( rc )
+        {
+            PERROR("Failed to populate physmap");
+            goto err;
+        }
+
+        for ( i = 0; i < nr_pfns; ++i )
+        {
+            if ( mfns[i] == INVALID_MFN )
+            {
+                ERROR("Populate physmap failed for pfn %u", i);
+                rc = -1;
+                goto err;
+            }
+
+            ctx->restore.ops.set_gfn(ctx, pfns[i], mfns[i]);
+        }
+    }
+
+    rc = 0;
+
+ err:
+    free(pfns);
+    free(mfns);
+
+    return rc;
+}
+
 /*
  * restore_ops function.  Convert pfns back to mfns in pagetables.  Possibly
  * needs to populate new frames if a PTE is found referring to a frame which
@@ -980,7 +1049,7 @@ static int x86_pv_localise_page(struct xc_sr_context *ctx,
         }
     }
 
-    if ( to_populate && populate_pfns(ctx, to_populate, pfns, NULL) )
+    if ( to_populate && x86_pv_populate_pfns(ctx, to_populate, pfns, NULL) )
         return -1;
 
     for ( i = 0; i < (PAGE_SIZE / sizeof(uint64_t)); ++i )
@@ -1160,6 +1229,7 @@ struct xc_sr_restore_ops restore_ops_x86_pv =
     .set_gfn         = x86_pv_set_gfn,
     .localise_page   = x86_pv_localise_page,
     .setup           = x86_pv_setup,
+    .populate_pfns   = x86_pv_populate_pfns,
     .process_record  = x86_pv_process_record,
     .stream_complete = x86_pv_stream_complete,
     .cleanup         = x86_pv_cleanup,

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.