|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v5 RFC 14/14] tools/libxc: noarch restore code
Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Signed-off-by: Frediano Ziglio <frediano.ziglio@xxxxxxxxxx>
Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
---
tools/libxc/saverestore/common.h | 6 +
tools/libxc/saverestore/restore.c | 556 ++++++++++++++++++++++++++++++++++++-
2 files changed, 561 insertions(+), 1 deletion(-)
diff --git a/tools/libxc/saverestore/common.h b/tools/libxc/saverestore/common.h
index e16e0de..2d44961 100644
--- a/tools/libxc/saverestore/common.h
+++ b/tools/libxc/saverestore/common.h
@@ -292,6 +292,12 @@ static inline int write_record(struct context *ctx, struct
record *rec)
return write_split_record(ctx, rec, NULL, 0);
}
+/* TODO - find a better way of hiding this. It should be private to
+ * restore.c, but is needed by x86_pv_localise_page()
+ */
+int populate_pfns(struct context *ctx, unsigned count,
+ const xen_pfn_t *original_pfns, const uint32_t *types);
+
#endif
/*
* Local variables:
diff --git a/tools/libxc/saverestore/restore.c
b/tools/libxc/saverestore/restore.c
index 6624baa..c00742d 100644
--- a/tools/libxc/saverestore/restore.c
+++ b/tools/libxc/saverestore/restore.c
@@ -1,5 +1,499 @@
+#include <arpa/inet.h>
+
#include "common.h"
+/*
+ * Read and validate the Image and Domain headers.
+ */
+static int read_headers(struct context *ctx)
+{
+ xc_interface *xch = ctx->xch;
+ struct ihdr ihdr;
+ struct dhdr dhdr;
+
+ if ( read_exact(ctx->fd, &ihdr, sizeof(ihdr)) )
+ {
+ PERROR("Failed to read Image Header from stream");
+ return -1;
+ }
+
+ ihdr.id = ntohl(ihdr.id);
+ ihdr.version = ntohl(ihdr.version);
+ ihdr.options = ntohs(ihdr.options);
+
+ if ( ihdr.marker != IHDR_MARKER )
+ {
+ ERROR("Invalid marker: Got 0x%016"PRIx64, ihdr.marker);
+ return -1;
+ }
+ else if ( ihdr.id != IHDR_ID )
+ {
+ ERROR("Invalid ID: Expected 0x%08"PRIx32", Got 0x%08"PRIx32,
+ IHDR_ID, ihdr.id);
+ return -1;
+ }
+ else if ( ihdr.version != IHDR_VERSION )
+ {
+ ERROR("Invalid Version: Expected %d, Got %d", ihdr.version,
IHDR_VERSION);
+ return -1;
+ }
+ else if ( ihdr.options & IHDR_OPT_BIG_ENDIAN )
+ {
+ ERROR("Unable to handle big endian streams");
+ return -1;
+ }
+
+ ctx->restore.format_version = ihdr.version;
+
+ if ( read_exact(ctx->fd, &dhdr, sizeof(dhdr)) )
+ {
+ PERROR("Failed to read Domain Header from stream");
+ return -1;
+ }
+
+ ctx->restore.guest_type = dhdr.type;
+ ctx->restore.guest_page_size = (1U << dhdr.page_shift);
+
+ IPRINTF("Found %s domain from Xen %d.%d",
+ dhdr_type_to_str(dhdr.type), dhdr.xen_major, dhdr.xen_minor);
+ return 0;
+}
+
+/**
+ * Reads a record from the stream, and fills in the record structure.
+ *
+ * Returns 0 on success and non-0 on failure.
+ *
+ * On success, the records type and size shall be valid.
+ * - If size is 0, data shall be NULL.
+ * - If size is non-0, data shall be a buffer allocated by malloc() which must
+ * be passed to free() by the caller.
+ *
+ * On failure, the contents of the record structure are undefined.
+ */
+static int read_record(struct context *ctx, struct record *rec)
+{
+ xc_interface *xch = ctx->xch;
+ struct rhdr rhdr;
+ size_t datasz;
+
+ if ( read_exact(ctx->fd, &rhdr, sizeof(rhdr)) )
+ {
+ PERROR("Failed to read Record Header from stream");
+ return -1;
+ }
+ else if ( rhdr.length > REC_LENGTH_MAX )
+ {
+ ERROR("Record (0x%08"PRIx32", %s) length 0x%"PRIx32
+ " exceeds max (0x%"PRIx32")",
+ rhdr.type, rec_type_to_str(rhdr.type),
+ rhdr.length, REC_LENGTH_MAX);
+ return -1;
+ }
+
+ datasz = ROUNDUP(rhdr.length, REC_ALIGN_ORDER);
+
+ if ( datasz )
+ {
+ rec->data = malloc(datasz);
+
+ if ( !rec->data )
+ {
+ ERROR("Unable to allocate %zu bytes for record data
(0x%08"PRIx32", %s)",
+ datasz, rhdr.type, rec_type_to_str(rhdr.type));
+ return -1;
+ }
+
+ if ( read_exact(ctx->fd, rec->data, datasz) )
+ {
+ free(rec->data);
+ rec->data = NULL;
+ PERROR("Failed to read %zu bytes of data for record
(0x%08"PRIx32", %s)",
+ datasz, rhdr.type, rec_type_to_str(rhdr.type));
+ return -1;
+ }
+ }
+ else
+ rec->data = NULL;
+
+ rec->type = rhdr.type;
+ rec->length = rhdr.length;
+
+ return 0;
+};
+
+/*
+ * Is a pfn populated?
+ */
+static bool pfn_is_populated(const struct context *ctx, xen_pfn_t pfn)
+{
+ if ( !ctx->restore.populated_pfns || pfn > ctx->restore.max_populated_pfn )
+ return false;
+ return test_bit(pfn, ctx->restore.populated_pfns);
+}
+
+/*
+ * Set a pfn as populated, expanding the tracking structures if needed.
+ */
+static int pfn_set_populated(struct context *ctx, xen_pfn_t pfn)
+{
+ xc_interface *xch = ctx->xch;
+
+ if ( !ctx->restore.populated_pfns || pfn > ctx->restore.max_populated_pfn )
+ {
+ unsigned long new_max_pfn = ((pfn + 1024) & ~1023) - 1;
+ size_t old_sz, new_sz;
+ unsigned long *p;
+
+ old_sz = bitmap_size(ctx->restore.max_populated_pfn + 1);
+ new_sz = bitmap_size(new_max_pfn + 1);
+
+ p = realloc(ctx->restore.populated_pfns, new_sz);
+ if ( !p )
+ {
+ PERROR("Failed to realloc populated bitmap");
+ return -1;
+ }
+
+ memset((uint8_t *)p + old_sz, 0x00, new_sz - old_sz);
+
+ ctx->restore.populated_pfns = p;
+ ctx->restore.max_populated_pfn = new_max_pfn;
+ }
+
+ set_bit(pfn, ctx->restore.populated_pfns);
+
+ return 0;
+}
+
+/*
+ * Given a set of pfns, obtain memory from Xen to fill the physmap for the
+ * unpopulated subset.
+ */
+int populate_pfns(struct context *ctx, unsigned count,
+ const xen_pfn_t *original_pfns, const uint32_t *types)
+{
+ xc_interface *xch = ctx->xch;
+ xen_pfn_t *mfns = malloc(count * sizeof(*mfns)),
+ *pfns = malloc(count * sizeof(*pfns));
+ unsigned i, nr_pfns = 0;
+ int rc = -1;
+
+ if ( !mfns || !pfns )
+ {
+ ERROR("Failed to allocate %zu bytes for populating the physmap",
+ 2 * count * sizeof(*mfns));
+ goto err;
+ }
+
+ for ( i = 0; i < count; ++i )
+ {
+ if ( types[i] != XEN_DOMCTL_PFINFO_XTAB &&
+ types[i] != XEN_DOMCTL_PFINFO_BROKEN &&
+ !pfn_is_populated(ctx, original_pfns[i]) )
+ {
+ pfns[nr_pfns] = mfns[nr_pfns] = original_pfns[i];
+ ++nr_pfns;
+ }
+ }
+
+ if ( nr_pfns )
+ {
+ rc = xc_domain_populate_physmap_exact(xch, ctx->domid, nr_pfns, 0, 0,
mfns);
+ if ( rc )
+ {
+ PERROR("Failed to populate physmap");
+ goto err;
+ }
+
+ for ( i = 0; i < nr_pfns; ++i )
+ {
+ rc = pfn_set_populated(ctx, pfns[i]);
+ if ( rc )
+ goto err;
+ ctx->ops.set_gfn(ctx, pfns[i], mfns[i]);
+ }
+ }
+
+ rc = 0;
+
+ err:
+ free(pfns);
+ free(mfns);
+
+ return rc;
+}
+
+/*
+ * Given a list of pfns, their types, and a block of page data from the
+ * stream, populate and record their types, map the relevent subset and copy
+ * the data into the guest.
+ */
+static int process_page_data(struct context *ctx, unsigned count,
+ xen_pfn_t *pfns, uint32_t *types, void *page_data)
+{
+ xc_interface *xch = ctx->xch;
+ xen_pfn_t *mfns = malloc(count * sizeof(*mfns));
+ int *map_errs = malloc(count * sizeof(*map_errs));
+ int rc = -1;
+ void *mapping = NULL, *guest_page = NULL;
+ unsigned i, /* i indexes the pfns from the record. */
+ j, /* j indexes the subset of pfns we decide to map. */
+ nr_pages;
+
+ if ( !mfns || !map_errs )
+ {
+ ERROR("Failed to allocate %zu bytes to process page data",
+ count * (sizeof(*mfns) + sizeof(*map_errs)));
+ goto err;
+ }
+
+ rc = populate_pfns(ctx, count, pfns, types);
+ if ( rc )
+ {
+ ERROR("Failed to populate pfns for batch of %u pages", count);
+ goto err;
+ }
+ rc = -1;
+
+ for ( i = 0, nr_pages = 0; i < count; ++i )
+ {
+ ctx->ops.set_page_type(ctx, pfns[i], types[i]);
+
+ switch ( types[i] )
+ {
+ case XEN_DOMCTL_PFINFO_NOTAB:
+
+ case XEN_DOMCTL_PFINFO_L1TAB:
+ case XEN_DOMCTL_PFINFO_L1TAB | XEN_DOMCTL_PFINFO_LPINTAB:
+
+ case XEN_DOMCTL_PFINFO_L2TAB:
+ case XEN_DOMCTL_PFINFO_L2TAB | XEN_DOMCTL_PFINFO_LPINTAB:
+
+ case XEN_DOMCTL_PFINFO_L3TAB:
+ case XEN_DOMCTL_PFINFO_L3TAB | XEN_DOMCTL_PFINFO_LPINTAB:
+
+ case XEN_DOMCTL_PFINFO_L4TAB:
+ case XEN_DOMCTL_PFINFO_L4TAB | XEN_DOMCTL_PFINFO_LPINTAB:
+
+ mfns[nr_pages++] = ctx->ops.pfn_to_gfn(ctx, pfns[i]);
+ break;
+ }
+
+ }
+
+ if ( nr_pages > 0 )
+ {
+ mapping = guest_page = xc_map_foreign_bulk(
+ xch, ctx->domid, PROT_READ | PROT_WRITE,
+ mfns, map_errs, nr_pages);
+ if ( !mapping )
+ {
+ PERROR("Unable to map %u mfns for %u pages of data",
+ nr_pages, count);
+ goto err;
+ }
+ }
+
+ for ( i = 0, j = 0; i < count; ++i )
+ {
+ switch ( types[i] )
+ {
+ case XEN_DOMCTL_PFINFO_XTAB:
+ case XEN_DOMCTL_PFINFO_BROKEN:
+ case XEN_DOMCTL_PFINFO_XALLOC:
+ /* No page data to deal with. */
+ continue;
+ }
+
+ if ( map_errs[j] )
+ {
+ ERROR("Mapping pfn %lx (mfn %lx, type %#"PRIx32")failed with %d",
+ pfns[i], mfns[j], types[i], map_errs[j]);
+ goto err;
+ }
+
+ memcpy(guest_page, page_data, PAGE_SIZE);
+
+ /* Undo page normalisation done by the saver. */
+ rc = ctx->restore.ops.localise_page(ctx, types[i], guest_page);
+ if ( rc )
+ {
+ DPRINTF("Failed to localise");
+ goto err;
+ }
+
+ ++j;
+ guest_page += PAGE_SIZE;
+ page_data += PAGE_SIZE;
+ }
+
+ rc = 0;
+
+ err:
+ if ( mapping )
+ munmap(mapping, nr_pages * PAGE_SIZE);
+
+ free(map_errs);
+ free(mfns);
+
+ return rc;
+}
+
+/*
+ * Validate a PAGE_DATA record from the stream, and pass the results to
+ * process_page_data() to actually perform the legwork.
+ */
+static int handle_page_data(struct context *ctx, struct record *rec)
+{
+ xc_interface *xch = ctx->xch;
+ struct rec_page_data_header *pages = rec->data;
+ unsigned i, pages_of_data = 0;
+ int rc = -1;
+
+ xen_pfn_t *pfns = NULL, pfn;
+ uint32_t *types = NULL, type;
+
+ if ( rec->length < sizeof(*pages) )
+ {
+ ERROR("PAGE_DATA record truncated: length %"PRIu32", min %zu",
+ rec->length, sizeof(*pages));
+ goto err;
+ }
+ else if ( pages->count < 1 )
+ {
+ ERROR("Expected at least 1 pfn in PAGE_DATA record");
+ goto err;
+ }
+ else if ( rec->length < sizeof(*pages) + (pages->count * sizeof(uint64_t))
)
+ {
+ ERROR("PAGE_DATA record (length %"PRIu32") too short to contain %"
+ PRIu32" pfns worth of information", rec->length, pages->count);
+ goto err;
+ }
+
+ pfns = malloc(pages->count * sizeof(*pfns));
+ types = malloc(pages->count * sizeof(*types));
+ if ( !pfns || !types )
+ {
+ ERROR("Unable to allocate enough memory for %"PRIu32" pfns",
+ pages->count);
+ goto err;
+ }
+
+ for ( i = 0; i < pages->count; ++i )
+ {
+ pfn = pages->pfn[i] & PAGE_DATA_PFN_MASK;
+ if ( !ctx->ops.pfn_is_valid(ctx, pfn) )
+ {
+ ERROR("pfn %#lx (index %u) outside domain maximum", pfn, i);
+ goto err;
+ }
+
+ type = (pages->pfn[i] & PAGE_DATA_TYPE_MASK) >> 32;
+ if ( ((type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT) >= 5) &&
+ ((type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT) <= 8) )
+ {
+ ERROR("Invalid type %#"PRIx32" for pfn %#lx (index %u)", type,
pfn, i);
+ goto err;
+ }
+ else if ( type < XEN_DOMCTL_PFINFO_BROKEN )
+ /* NOTAB and all L1 thru L4 tables (including pinned) should have
+ * a page worth of data in the record. */
+ pages_of_data++;
+
+ pfns[i] = pfn;
+ types[i] = type;
+ }
+
+ if ( rec->length != (sizeof(*pages) +
+ (sizeof(uint64_t) * pages->count) +
+ (PAGE_SIZE * pages_of_data)) )
+ {
+ ERROR("PAGE_DATA record wrong size: length %"PRIu32", expected "
+ "%zu + %zu + %zu", rec->length, sizeof(*pages),
+ (sizeof(uint64_t) * pages->count), (PAGE_SIZE * pages_of_data));
+ goto err;
+ }
+
+ rc = process_page_data(ctx, pages->count, pfns, types,
+ &pages->pfn[pages->count]);
+ err:
+ free(types);
+ free(pfns);
+
+ return rc;
+}
+
+/*
+ * Restore a domain.
+ */
+static int restore(struct context *ctx)
+{
+ xc_interface *xch = ctx->xch;
+ struct record rec;
+ int rc, saved_rc = 0, saved_errno = 0;
+
+ IPRINTF("Restoring domain");
+
+ rc = ctx->restore.ops.setup(ctx);
+ if ( rc )
+ goto err;
+
+ do
+ {
+ rc = read_record(ctx, &rec);
+ if ( rc )
+ goto err;
+
+ switch ( rec.type )
+ {
+ case REC_TYPE_END:
+ DPRINTF("End record");
+ break;
+
+ case REC_TYPE_PAGE_DATA:
+ rc = handle_page_data(ctx, &rec);
+ break;
+
+ default:
+ rc = ctx->restore.ops.process_record(ctx, &rec);
+ break;
+ }
+
+ free(rec.data);
+ if ( rc )
+ goto err;
+
+ } while ( rec.type != REC_TYPE_END );
+
+ rc = ctx->restore.ops.stream_complete(ctx);
+ if ( rc )
+ goto err;
+
+ IPRINTF("Restore successful");
+ goto done;
+
+ err:
+ saved_errno = errno;
+ saved_rc = rc;
+ PERROR("Restore failed");
+
+ done:
+ free(ctx->restore.populated_pfns);
+ rc = ctx->restore.ops.cleanup(ctx);
+ if ( rc )
+ PERROR("Failed to clean up");
+
+ if ( saved_rc )
+ {
+ rc = saved_rc;
+ errno = saved_errno;
+ }
+
+ return rc;
+}
+
int xc_domain_restore2(xc_interface *xch, int io_fd, uint32_t dom,
unsigned int store_evtchn, unsigned long *store_mfn,
domid_t store_domid, unsigned int console_evtchn,
@@ -8,8 +502,68 @@ int xc_domain_restore2(xc_interface *xch, int io_fd,
uint32_t dom,
int checkpointed_stream,
struct restore_callbacks *callbacks)
{
+ struct context ctx =
+ {
+ .xch = xch,
+ .fd = io_fd,
+ };
+
+ /* GCC 4.4 (of CentOS 6.x vintage) can' t initialise anonymous unions :( */
+ ctx.restore.console_evtchn = console_evtchn;
+ ctx.restore.console_domid = console_domid;
+ ctx.restore.xenstore_evtchn = store_evtchn;
+ ctx.restore.xenstore_domid = store_domid;
+ ctx.restore.callbacks = callbacks;
+
IPRINTF("In experimental %s", __func__);
- return -1;
+
+ if ( xc_domain_getinfo(xch, dom, 1, &ctx.dominfo) != 1 )
+ {
+ PERROR("Failed to get domain info");
+ return -1;
+ }
+
+ if ( ctx.dominfo.domid != dom )
+ {
+ ERROR("Domain %d does not exist", dom);
+ return -1;
+ }
+
+ ctx.domid = dom;
+ IPRINTF("Restoring domain %d", dom);
+
+ if ( read_headers(&ctx) )
+ return -1;
+
+ if ( ctx.dominfo.hvm )
+ {
+ ctx.ops = common_ops_x86_hvm;
+ ctx.restore.ops = restore_ops_x86_hvm;
+ if ( restore(&ctx) )
+ return -1;
+ }
+ else
+ {
+ ctx.ops = common_ops_x86_pv;
+ ctx.restore.ops = restore_ops_x86_pv;
+ if ( restore(&ctx) )
+ return -1;
+ }
+
+ DPRINTF("XenStore: mfn %#lx, dom %d, evt %u",
+ ctx.restore.xenstore_mfn,
+ ctx.restore.xenstore_domid,
+ ctx.restore.xenstore_evtchn);
+
+ DPRINTF("Console: mfn %#lx, dom %d, evt %u",
+ ctx.restore.console_mfn,
+ ctx.restore.console_domid,
+ ctx.restore.console_evtchn);
+
+ *console_mfn = ctx.restore.console_mfn;
+ *store_mfn = ctx.restore.xenstore_mfn;
+
+ return 0;
}
/*
--
1.7.10.4
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |