[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v10 13/15] tools/libxc: common restore code



Restore a domain from the new format.  This reads and validates the domain and
image header and loads the guest memory from the PAGE_DATA records, populating
the p2m as it does so.

This provides the xc_domain_restore2() function as an alternative to the
existing xc_domain_restore().

Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Acked-by: Ian Campbell <Ian.Campbell@xxxxxxxxxx>
CC: Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx>
CC: Wei Liu <wei.liu2@xxxxxxxxxx>
---
 tools/libxc/xc_sr_restore.c |  507 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 503 insertions(+), 4 deletions(-)

diff --git a/tools/libxc/xc_sr_restore.c b/tools/libxc/xc_sr_restore.c
index 045f486..0bf4bae 100644
--- a/tools/libxc/xc_sr_restore.c
+++ b/tools/libxc/xc_sr_restore.c
@@ -1,6 +1,133 @@
+#include <arpa/inet.h>
+
 #include "xc_sr_common.h"
 
 /*
+ * Read and validate the Image and Domain headers.
+ */
+static int read_headers(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    struct xc_sr_ihdr ihdr;
+    struct xc_sr_dhdr dhdr;
+
+    if ( read_exact(ctx->fd, &ihdr, sizeof(ihdr)) )
+    {
+        PERROR("Failed to read Image Header from stream");
+        return -1;
+    }
+
+    ihdr.id      = ntohl(ihdr.id);
+    ihdr.version = ntohl(ihdr.version);
+    ihdr.options = ntohs(ihdr.options);
+
+    if ( ihdr.marker != IHDR_MARKER )
+    {
+        ERROR("Invalid marker: Got 0x%016"PRIx64, ihdr.marker);
+        return -1;
+    }
+    else if ( ihdr.id != IHDR_ID )
+    {
+        ERROR("Invalid ID: Expected 0x%08x, Got 0x%08x", IHDR_ID, ihdr.id);
+        return -1;
+    }
+    else if ( ihdr.version != IHDR_VERSION )
+    {
+        ERROR("Invalid Version: Expected %d, Got %d",
+              ihdr.version, IHDR_VERSION);
+        return -1;
+    }
+    else if ( ihdr.options & IHDR_OPT_BIG_ENDIAN )
+    {
+        ERROR("Unable to handle big endian streams");
+        return -1;
+    }
+
+    ctx->restore.format_version = ihdr.version;
+
+    if ( read_exact(ctx->fd, &dhdr, sizeof(dhdr)) )
+    {
+        PERROR("Failed to read Domain Header from stream");
+        return -1;
+    }
+
+    ctx->restore.guest_type = dhdr.type;
+    ctx->restore.guest_page_size = (1U << dhdr.page_shift);
+
+    if ( dhdr.xen_major == 0 )
+    {
+        IPRINTF("Found %s domain, converted from legacy stream format",
+                dhdr_type_to_str(dhdr.type));
+        DPRINTF("  Legacy conversion script version %u", dhdr.xen_minor);
+    }
+    else
+        IPRINTF("Found %s domain from Xen %u.%u",
+                dhdr_type_to_str(dhdr.type), dhdr.xen_major, dhdr.xen_minor);
+    return 0;
+}
+
+/*
+ * Reads a record from the stream, and fills in the record structure.
+ *
+ * Returns 0 on success and non-0 on failure.
+ *
+ * On success, the records type and size shall be valid.
+ * - If size is 0, data shall be NULL.
+ * - If size is non-0, data shall be a buffer allocated by malloc() which must
+ *   be passed to free() by the caller.
+ *
+ * On failure, the contents of the record structure are undefined.
+ */
+static int read_record(struct xc_sr_context *ctx, struct xc_sr_record *rec)
+{
+    xc_interface *xch = ctx->xch;
+    struct xc_sr_rhdr rhdr;
+    size_t datasz;
+
+    if ( read_exact(ctx->fd, &rhdr, sizeof(rhdr)) )
+    {
+        PERROR("Failed to read Record Header from stream");
+        return -1;
+    }
+    else if ( rhdr.length > REC_LENGTH_MAX )
+    {
+        ERROR("Record (0x%08x, %s) length %#x exceeds max (%#x)", rhdr.type,
+              rec_type_to_str(rhdr.type), rhdr.length, REC_LENGTH_MAX);
+        return -1;
+    }
+
+    datasz = ROUNDUP(rhdr.length, REC_ALIGN_ORDER);
+
+    if ( datasz )
+    {
+        rec->data = malloc(datasz);
+
+        if ( !rec->data )
+        {
+            ERROR("Unable to allocate %zu bytes for record data (0x%08x, %s)",
+                  datasz, rhdr.type, rec_type_to_str(rhdr.type));
+            return -1;
+        }
+
+        if ( read_exact(ctx->fd, rec->data, datasz) )
+        {
+            free(rec->data);
+            rec->data = NULL;
+            PERROR("Failed to read %zu bytes of data for record (0x%08x, %s)",
+                   datasz, rhdr.type, rec_type_to_str(rhdr.type));
+            return -1;
+        }
+    }
+    else
+        rec->data = NULL;
+
+    rec->type   = rhdr.type;
+    rec->length = rhdr.length;
+
+    return 0;
+};
+
+/*
  * Is a pfn populated?
  */
 static bool pfn_is_populated(const struct xc_sr_context *ctx, xen_pfn_t pfn)
@@ -12,7 +139,7 @@ static bool pfn_is_populated(const struct xc_sr_context 
*ctx, xen_pfn_t pfn)
 
 /*
  * Set a pfn as populated, expanding the tracking structures if needed. To
- * avoid realloc()ing too excessivly, the size increased to the nearest power
+ * avoid realloc()ing too excessively, the size increased to the nearest power
  * of two large enough to contain the required pfn.
  */
 static int pfn_set_populated(struct xc_sr_context *ctx, xen_pfn_t pfn)
@@ -59,7 +186,7 @@ static int pfn_set_populated(struct xc_sr_context *ctx, 
xen_pfn_t pfn)
 
 /*
  * Given a set of pfns, obtain memory from Xen to fill the physmap for the
- * unpopulated subset.  If types is NULL, no page typechecking is performed
+ * unpopulated subset.  If types is NULL, no page type checking is performed
  * and all unpopulated pfns are populated.
  */
 int populate_pfns(struct xc_sr_context *ctx, unsigned count,
@@ -125,16 +252,388 @@ int populate_pfns(struct xc_sr_context *ctx, unsigned 
count,
     return rc;
 }
 
+/*
+ * Given a list of pfns, their types, and a block of page data from the
+ * stream, populate and record their types, map the relevant subset and copy
+ * the data into the guest.
+ */
+static int process_page_data(struct xc_sr_context *ctx, unsigned count,
+                             xen_pfn_t *pfns, uint32_t *types, void *page_data)
+{
+    xc_interface *xch = ctx->xch;
+    xen_pfn_t *mfns = malloc(count * sizeof(*mfns));
+    int *map_errs = malloc(count * sizeof(*map_errs));
+    int rc;
+    void *mapping = NULL, *guest_page = NULL;
+    unsigned i,    /* i indexes the pfns from the record. */
+        j,         /* j indexes the subset of pfns we decide to map. */
+        nr_pages = 0;
+
+    if ( !mfns || !map_errs )
+    {
+        rc = -1;
+        ERROR("Failed to allocate %zu bytes to process page data",
+              count * (sizeof(*mfns) + sizeof(*map_errs)));
+        goto err;
+    }
+
+    rc = populate_pfns(ctx, count, pfns, types);
+    if ( rc )
+    {
+        ERROR("Failed to populate pfns for batch of %u pages", count);
+        goto err;
+    }
+
+    for ( i = 0; i < count; ++i )
+    {
+        ctx->restore.ops.set_page_type(ctx, pfns[i], types[i]);
+
+        switch ( types[i] )
+        {
+        case XEN_DOMCTL_PFINFO_NOTAB:
+
+        case XEN_DOMCTL_PFINFO_L1TAB:
+        case XEN_DOMCTL_PFINFO_L1TAB | XEN_DOMCTL_PFINFO_LPINTAB:
+
+        case XEN_DOMCTL_PFINFO_L2TAB:
+        case XEN_DOMCTL_PFINFO_L2TAB | XEN_DOMCTL_PFINFO_LPINTAB:
+
+        case XEN_DOMCTL_PFINFO_L3TAB:
+        case XEN_DOMCTL_PFINFO_L3TAB | XEN_DOMCTL_PFINFO_LPINTAB:
+
+        case XEN_DOMCTL_PFINFO_L4TAB:
+        case XEN_DOMCTL_PFINFO_L4TAB | XEN_DOMCTL_PFINFO_LPINTAB:
+
+            mfns[nr_pages++] = ctx->restore.ops.pfn_to_gfn(ctx, pfns[i]);
+            break;
+        }
+    }
+
+    /* Nothing to do? */
+    if ( nr_pages == 0 )
+        goto done;
+
+    mapping = guest_page = xc_map_foreign_bulk(
+        xch, ctx->domid, PROT_READ | PROT_WRITE,
+        mfns, map_errs, nr_pages);
+    if ( !mapping )
+    {
+        rc = -1;
+        PERROR("Unable to map %u mfns for %u pages of data",
+               nr_pages, count);
+        goto err;
+    }
+
+    for ( i = 0, j = 0; i < count; ++i )
+    {
+        switch ( types[i] )
+        {
+        case XEN_DOMCTL_PFINFO_XTAB:
+        case XEN_DOMCTL_PFINFO_BROKEN:
+        case XEN_DOMCTL_PFINFO_XALLOC:
+            /* No page data to deal with. */
+            continue;
+        }
+
+        if ( map_errs[j] )
+        {
+            rc = -1;
+            ERROR("Mapping pfn %lx (mfn %lx, type %#x)failed with %d",
+                  pfns[i], mfns[j], types[i], map_errs[j]);
+            goto err;
+        }
+
+        /* Undo page normalisation done by the saver. */
+        rc = ctx->restore.ops.localise_page(ctx, types[i], page_data);
+        if ( rc )
+        {
+            ERROR("Failed to localise pfn %lx (type %#x)",
+                  pfns[i], types[i] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT);
+            goto err;
+        }
+
+        if ( ctx->restore.verify )
+        {
+            /* Verify mode - compare incoming data to what we already have. */
+            if ( memcmp(guest_page, page_data, PAGE_SIZE) )
+                ERROR("verify pfn %lx failed (type %#x)",
+                      pfns[i], types[i] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT);
+        }
+        else
+        {
+            /* Regular mode - copy incoming data into place. */
+            memcpy(guest_page, page_data, PAGE_SIZE);
+        }
+
+        ++j;
+        guest_page += PAGE_SIZE;
+        page_data += PAGE_SIZE;
+    }
+
+ done:
+    rc = 0;
+
+ err:
+    if ( mapping )
+        munmap(mapping, nr_pages * PAGE_SIZE);
+
+    free(map_errs);
+    free(mfns);
+
+    return rc;
+}
+
+/*
+ * Validate a PAGE_DATA record from the stream, and pass the results to
+ * process_page_data() to actually perform the legwork.
+ */
+static int handle_page_data(struct xc_sr_context *ctx, struct xc_sr_record 
*rec)
+{
+    xc_interface *xch = ctx->xch;
+    struct xc_sr_rec_page_data_header *pages = rec->data;
+    unsigned i, pages_of_data = 0;
+    int rc = -1;
+
+    xen_pfn_t *pfns = NULL, pfn;
+    uint32_t *types = NULL, type;
+
+    if ( rec->length < sizeof(*pages) )
+    {
+        ERROR("PAGE_DATA record truncated: length %u, min %zu",
+              rec->length, sizeof(*pages));
+        goto err;
+    }
+    else if ( pages->count < 1 )
+    {
+        ERROR("Expected at least 1 pfn in PAGE_DATA record");
+        goto err;
+    }
+    else if ( rec->length < sizeof(*pages) + (pages->count * sizeof(uint64_t)) 
)
+    {
+        ERROR("PAGE_DATA record (length %u) too short to contain %u"
+              " pfns worth of information", rec->length, pages->count);
+        goto err;
+    }
+
+    pfns = malloc(pages->count * sizeof(*pfns));
+    types = malloc(pages->count * sizeof(*types));
+    if ( !pfns || !types )
+    {
+        ERROR("Unable to allocate enough memory for %u pfns",
+              pages->count);
+        goto err;
+    }
+
+    for ( i = 0; i < pages->count; ++i )
+    {
+        pfn = pages->pfn[i] & PAGE_DATA_PFN_MASK;
+        if ( !ctx->restore.ops.pfn_is_valid(ctx, pfn) )
+        {
+            ERROR("pfn %#lx (index %u) outside domain maximum", pfn, i);
+            goto err;
+        }
+
+        type = (pages->pfn[i] & PAGE_DATA_TYPE_MASK) >> 32;
+        if ( ((type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT) >= 5) &&
+             ((type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT) <= 8) )
+        {
+            ERROR("Invalid type %#x for pfn %#lx (index %u)", type, pfn, i);
+            goto err;
+        }
+        else if ( type < XEN_DOMCTL_PFINFO_BROKEN )
+            /* NOTAB and all L1 through L4 tables (including pinned) should
+             * have a page worth of data in the record. */
+            pages_of_data++;
+
+        pfns[i] = pfn;
+        types[i] = type;
+    }
+
+    if ( rec->length != (sizeof(*pages) +
+                         (sizeof(uint64_t) * pages->count) +
+                         (PAGE_SIZE * pages_of_data)) )
+    {
+        ERROR("PAGE_DATA record wrong size: length %u, expected "
+              "%zu + %zu + %lu", rec->length, sizeof(*pages),
+              (sizeof(uint64_t) * pages->count), (PAGE_SIZE * pages_of_data));
+        goto err;
+    }
+
+    rc = process_page_data(ctx, pages->count, pfns, types,
+                           &pages->pfn[pages->count]);
+ err:
+    free(types);
+    free(pfns);
+
+    return rc;
+}
+
+/*
+ * Restore a domain.
+ */
+static int restore(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    struct xc_sr_record rec;
+    int rc, saved_rc = 0, saved_errno = 0;
+
+    IPRINTF("Restoring domain");
+
+    rc = ctx->restore.ops.setup(ctx);
+    if ( rc )
+        goto err;
+
+    ctx->restore.max_populated_pfn = (32 * 1024 / 4) - 1;
+    ctx->restore.populated_pfns = bitmap_alloc(
+        ctx->restore.max_populated_pfn + 1);
+    if ( !ctx->restore.populated_pfns )
+    {
+        ERROR("Unable to allocate memory for populated_pfns bitmap");
+        goto err;
+    }
+
+    do
+    {
+        rc = read_record(ctx, &rec);
+        if ( rc )
+            goto err;
+
+        switch ( rec.type )
+        {
+        case REC_TYPE_END:
+            break;
+
+        case REC_TYPE_PAGE_DATA:
+            rc = handle_page_data(ctx, &rec);
+            break;
+
+        case REC_TYPE_VERIFY:
+            DPRINTF("Verify mode enabled");
+            ctx->restore.verify = true;
+            break;
+
+        default:
+            rc = ctx->restore.ops.process_record(ctx, &rec);
+            break;
+        }
+
+        free(rec.data);
+
+        if ( rc == RECORD_NOT_PROCESSED )
+        {
+            if ( rec.type & REC_TYPE_OPTIONAL )
+                DPRINTF("Ignoring optional record %#x (%s)",
+                        rec.type, rec_type_to_str(rec.type));
+            else
+            {
+                ERROR("Mandatory record %#x (%s) not handled",
+                      rec.type, rec_type_to_str(rec.type));
+                rc = -1;
+            }
+        }
+
+        if ( rc )
+            goto err;
+
+    } while ( rec.type != REC_TYPE_END );
+
+    rc = ctx->restore.ops.stream_complete(ctx);
+    if ( rc )
+        goto err;
+
+    IPRINTF("Restore successful");
+    goto done;
+
+ err:
+    saved_errno = errno;
+    saved_rc = rc;
+    PERROR("Restore failed");
+
+ done:
+    free(ctx->restore.populated_pfns);
+    rc = ctx->restore.ops.cleanup(ctx);
+    if ( rc )
+        PERROR("Failed to clean up");
+
+    if ( saved_rc )
+    {
+        rc = saved_rc;
+        errno = saved_errno;
+    }
+
+    return rc;
+}
+
 int xc_domain_restore2(xc_interface *xch, int io_fd, uint32_t dom,
                        unsigned int store_evtchn, unsigned long *store_mfn,
                        domid_t store_domid, unsigned int console_evtchn,
-                       unsigned long *console_mfn, domid_t console_domid,
+                       unsigned long *console_gfn, domid_t console_domid,
                        unsigned int hvm, unsigned int pae, int superpages,
                        int checkpointed_stream,
                        struct restore_callbacks *callbacks)
 {
+    struct xc_sr_context ctx =
+        {
+            .xch = xch,
+            .fd = io_fd,
+        };
+
+    /* GCC 4.4 (of CentOS 6.x vintage) can' t initialise anonymous unions. */
+    ctx.restore.console_evtchn = console_evtchn;
+    ctx.restore.console_domid = console_domid;
+    ctx.restore.xenstore_evtchn = store_evtchn;
+    ctx.restore.xenstore_domid = store_domid;
+    ctx.restore.callbacks = callbacks;
+
     IPRINTF("In experimental %s", __func__);
-    return -1;
+    DPRINTF("fd %d, dom %u, hvm %u, pae %u, superpages %d"
+            ", checkpointed_stream %d", io_fd, dom, hvm, pae,
+            superpages, checkpointed_stream);
+
+    if ( xc_domain_getinfo(xch, dom, 1, &ctx.dominfo) != 1 )
+    {
+        PERROR("Failed to get domain info");
+        return -1;
+    }
+
+    if ( ctx.dominfo.domid != dom )
+    {
+        ERROR("Domain %u does not exist", dom);
+        return -1;
+    }
+
+    ctx.domid = dom;
+
+    if ( read_headers(&ctx) )
+        return -1;
+
+    if ( ctx.dominfo.hvm )
+    {
+        ctx.restore.ops = restore_ops_x86_hvm;
+        if ( restore(&ctx) )
+            return -1;
+    }
+    else
+    {
+        ctx.restore.ops = restore_ops_x86_pv;
+        if ( restore(&ctx) )
+            return -1;
+    }
+
+    IPRINTF("XenStore: mfn %#lx, dom %d, evt %u",
+            ctx.restore.xenstore_gfn,
+            ctx.restore.xenstore_domid,
+            ctx.restore.xenstore_evtchn);
+
+    IPRINTF("Console: mfn %#lx, dom %d, evt %u",
+            ctx.restore.console_gfn,
+            ctx.restore.console_domid,
+            ctx.restore.console_evtchn);
+
+    *console_gfn = ctx.restore.console_gfn;
+    *store_mfn = ctx.restore.xenstore_gfn;
+
+    return 0;
 }
 
 /*
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.