|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 6/6] tools/libxc: x86 pv restore implementation
Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Signed-off-by: Frediano Ziglio <frediano.ziglio@xxxxxxxxxx>
---
tools/libxc/saverestore/common.c | 51 ++
tools/libxc/saverestore/common.h | 35 ++
tools/libxc/saverestore/restore.c | 112 +++-
tools/libxc/saverestore/restore_x86_pv.c | 977 ++++++++++++++++++++++++++++++
4 files changed, 1174 insertions(+), 1 deletion(-)
create mode 100644 tools/libxc/saverestore/restore_x86_pv.c
diff --git a/tools/libxc/saverestore/common.c b/tools/libxc/saverestore/common.c
index df18447..dbfae21 100644
--- a/tools/libxc/saverestore/common.c
+++ b/tools/libxc/saverestore/common.c
@@ -84,6 +84,57 @@ int write_split_record(struct context *ctx, struct record
*rec,
return 0;
}
+int read_record(struct context *ctx, struct record *rec)
+{
+ xc_interface *xch = ctx->xch;
+ struct rhdr rhdr;
+ size_t datasz;
+
+ if ( read_exact(ctx->fd, &rhdr, sizeof rhdr) )
+ {
+ PERROR("Failed to read Record Header from stream");
+ return -1;
+ }
+ else if ( rhdr.length > REC_LENGTH_MAX )
+ {
+ ERROR("Record (0x%08"PRIx32", %s) length 0x%"PRIx32
+ " exceeds max (0x%"PRIx32")",
+ rhdr.type, rec_type_to_str(rhdr.type),
+ rhdr.length, REC_LENGTH_MAX);
+ return -1;
+ }
+
+ datasz = (rhdr.length + 7) & ~7U;
+
+ if ( datasz )
+ {
+ rec->data = malloc(datasz);
+
+ if ( !rec->data )
+ {
+ ERROR("Unable to allocate %zu bytes for record data
(0x%08"PRIx32", %s)",
+ datasz, rhdr.type, rec_type_to_str(rhdr.type));
+ return -1;
+ }
+
+ if ( read_exact(ctx->fd, rec->data, datasz) )
+ {
+ free(rec->data);
+ rec->data = NULL;
+ PERROR("Failed to read %zu bytes of data for record
(0x%08"PRIx32", %s)",
+ datasz, rhdr.type, rec_type_to_str(rhdr.type));
+ return -1;
+ }
+ }
+ else
+ rec->data = NULL;
+
+ rec->type = rhdr.type;
+ rec->length = rhdr.length;
+
+ return 0;
+};
+
/*
* Local variables:
* mode: C
diff --git a/tools/libxc/saverestore/common.h b/tools/libxc/saverestore/common.h
index a2c8cee..249e18f 100644
--- a/tools/libxc/saverestore/common.h
+++ b/tools/libxc/saverestore/common.h
@@ -7,9 +7,12 @@
#include "../xg_private.h"
#include "../xg_save_restore.h"
+#include "../xc_dom.h"
#undef GET_FIELD
#undef SET_FIELD
+#undef MEMCPY_FIELD
+#undef MEMSET_ARRAY_FIELD
#undef mfn_to_pfn
#undef pfn_to_mfn
@@ -95,6 +98,8 @@ struct context
/* Saves an x86 PV domain. */
int save_x86_pv(struct context *ctx);
+/* Restores an x86 PV domain. */
+int restore_x86_pv(struct context *ctx);
struct record
{
@@ -118,6 +123,22 @@ struct record
(_p)->x32._f = (_v); \
})
+/* memcpy field _f from _s to _d, of an *_any union */
+#define MEMCPY_FIELD(_c, _d, _s, _f) \
+ ({ if ( (_c)->x86_pv.width == 8 ) \
+ memcpy(&(_d)->x64._f, &(_s)->x64._f, sizeof((_d)->x64._f)); \
+ else \
+ memcpy(&(_d)->x32._f, &(_s)->x32._f, sizeof((_d)->x32._f)); \
+ })
+
+/* memset array field _f with value _v, from an *_any union */
+#define MEMSET_ARRAY_FIELD(_c, _d, _f, _v) \
+ ({ if ( (_c)->x86_pv.width == 8 ) \
+ memset(&(_d)->x64._f[0], (_v), sizeof((_d)->x64._f)); \
+ else \
+ memset(&(_d)->x32._f[0], (_v), sizeof((_d)->x32._f)); \
+ })
+
/*
* Writes a split record to the stream, applying correct padding where
* appropriate. It is common when sending records containing blobs from Xen
@@ -143,6 +164,20 @@ static inline int write_record(struct context *ctx, struct
record *rec)
return write_split_record(ctx, rec, NULL, 0);
}
+/*
+ * Reads a record from the stream, and fills in the record structure.
+ *
+ * Returns 0 on success and non-0 on failure.
+ *
+ * On success, the records type and size shall be valid.
+ * - If size is 0, data shall be NULL.
+ * - If size is non-0, data shall be a buffer allocated by malloc() which must
+ * be passed to free() by the caller.
+ *
+ * On failure, the contents of the record structure are undefined.
+ */
+int read_record(struct context *ctx, struct record *rec);
+
#endif
/*
* Local variables:
diff --git a/tools/libxc/saverestore/restore.c
b/tools/libxc/saverestore/restore.c
index 6624baa..6937aec 100644
--- a/tools/libxc/saverestore/restore.c
+++ b/tools/libxc/saverestore/restore.c
@@ -1,5 +1,62 @@
+#include <arpa/inet.h>
+
#include "common.h"
+static int read_headers(struct context *ctx)
+{
+ xc_interface *xch = ctx->xch;
+ struct ihdr ihdr;
+ struct dhdr dhdr;
+
+ if ( read_exact(ctx->fd, &ihdr, sizeof ihdr) )
+ {
+ PERROR("Failed to read Image Header from stream");
+ return -1;
+ }
+
+ ihdr.id = ntohl(ihdr.id);
+ ihdr.version = ntohl(ihdr.version);
+ ihdr.options = ntohs(ihdr.options);
+
+ if ( ihdr.marker != IHDR_MARKER )
+ {
+ ERROR("Invalid marker: Got 0x%016"PRIx64, ihdr.marker);
+ return -1;
+ }
+ else if ( ihdr.id != IHDR_ID )
+ {
+ ERROR("Invalid ID: Expected 0x%08"PRIx32", Got 0x%08"PRIx32,
+ IHDR_ID, ihdr.id);
+ return -1;
+ }
+ else if ( ihdr.version != IHDR_VERSION )
+ {
+ ERROR("Invalid Version: Expected %d, Got %d", ihdr.version,
IHDR_VERSION);
+ return -1;
+ }
+ else if ( ihdr.options & IHDR_OPT_BIG_ENDIAN )
+ {
+ ERROR("Unable to handle big endian streams");
+ return -1;
+ }
+
+ ctx->restore.format_version = ihdr.version;
+
+ if ( read_exact(ctx->fd, &dhdr, sizeof dhdr) )
+ {
+ PERROR("Failed to read Domain Header from stream");
+ return -1;
+ }
+
+ ctx->restore.guest_type = dhdr.type;
+ ctx->restore.guest_page_size = (1U << dhdr.page_shift);
+
+ IPRINTF("Found %s domain from Xen %d.%d",
+ dhdr_type_to_str(dhdr.type), dhdr.xen_major, dhdr.xen_minor);
+ return 0;
+}
+
+
int xc_domain_restore2(xc_interface *xch, int io_fd, uint32_t dom,
unsigned int store_evtchn, unsigned long *store_mfn,
domid_t store_domid, unsigned int console_evtchn,
@@ -8,8 +65,61 @@ int xc_domain_restore2(xc_interface *xch, int io_fd, uint32_t
dom,
int checkpointed_stream,
struct restore_callbacks *callbacks)
{
+ struct context ctx =
+ {
+ .xch = xch,
+ .fd = io_fd,
+ };
+
+ ctx.restore.console_evtchn = console_evtchn;
+ ctx.restore.console_domid = console_domid;
+ ctx.restore.xenstore_evtchn = store_evtchn;
+ ctx.restore.xenstore_domid = store_domid;
+
IPRINTF("In experimental %s", __func__);
- return -1;
+
+ if ( xc_domain_getinfo(xch, dom, 1, &ctx.dominfo) != 1 )
+ {
+ PERROR("Failed to get domain info");
+ return -1;
+ }
+
+ if ( ctx.dominfo.domid != dom )
+ {
+ ERROR("Domain %d does not exist", dom);
+ return -1;
+ }
+
+ ctx.domid = dom;
+ IPRINTF("Restoring domain %d", dom);
+
+ if ( read_headers(&ctx) )
+ return -1;
+
+ if ( ctx.dominfo.hvm )
+ {
+ ERROR("HVM Restore not supported yet");
+ return -1;
+ }
+ else
+ {
+ if ( restore_x86_pv(&ctx) )
+ return -1;
+
+ DPRINTF("XenStore: mfn %#lx, dom %d, evt %u",
+ ctx.restore.xenstore_mfn,
+ ctx.restore.xenstore_domid,
+ ctx.restore.xenstore_evtchn);
+
+ DPRINTF("Console: mfn %#lx, dom %d, evt %u",
+ ctx.restore.console_mfn,
+ ctx.restore.console_domid,
+ ctx.restore.console_evtchn);
+
+ *console_mfn = ctx.restore.console_mfn;
+ *store_mfn = ctx.restore.xenstore_mfn;
+ return 0;
+ }
}
/*
diff --git a/tools/libxc/saverestore/restore_x86_pv.c
b/tools/libxc/saverestore/restore_x86_pv.c
new file mode 100644
index 0000000..0659244
--- /dev/null
+++ b/tools/libxc/saverestore/restore_x86_pv.c
@@ -0,0 +1,977 @@
+#include <assert.h>
+#include <arpa/inet.h>
+
+#include "common_x86_pv.h"
+
+static int expand_p2m(struct context *ctx, unsigned long max_pfn)
+{
+ xc_interface *xch = ctx->xch;
+ unsigned long old_max = ctx->x86_pv.max_pfn, i;
+ unsigned long end_frame = (max_pfn + ctx->x86_pv.fpp) / ctx->x86_pv.fpp;
+ unsigned long old_end_frame = (old_max + ctx->x86_pv.fpp) /
ctx->x86_pv.fpp;
+ xen_pfn_t *p2m = NULL, *p2m_pfns = NULL;
+ uint32_t *pfn_types = NULL;
+ size_t p2msz, p2m_pfnsz, pfn_typesz;
+
+ /* We expect expand_p2m to be called exactly once, expanding from 0 the
+ * domains max, but assert some sanity */
+ assert(max_pfn > old_max);
+
+ p2msz = (max_pfn + 1) * ctx->x86_pv.width;
+ p2m = realloc(ctx->x86_pv.p2m, p2msz);
+ if ( !p2m )
+ {
+ ERROR("Failed to (re)alloc %zu bytes for p2m", p2msz);
+ return -1;
+ }
+ ctx->x86_pv.p2m = p2m;
+
+ pfn_typesz = (max_pfn + 1) * sizeof *pfn_types;
+ pfn_types = realloc(ctx->x86_pv.pfn_types, pfn_typesz);
+ if ( !pfn_types )
+ {
+ ERROR("Failed to (re)alloc %zu bytes for pfn_types", pfn_typesz);
+ return -1;
+ }
+ ctx->x86_pv.pfn_types = pfn_types;
+
+ p2m_pfnsz = (end_frame + 1) * sizeof *p2m_pfns;
+ p2m_pfns = realloc(ctx->x86_pv.p2m_pfns, p2m_pfnsz);
+ if ( !p2m_pfns )
+ {
+ ERROR("Failed to (re)alloc %zu bytes for p2m frame list", p2m_pfnsz);
+ return -1;
+ }
+ ctx->x86_pv.p2m_frames = end_frame;
+ ctx->x86_pv.p2m_pfns = p2m_pfns;
+
+ ctx->x86_pv.max_pfn = max_pfn;
+ for ( i = (old_max ? old_max + 1 : 0); i <= max_pfn; ++i )
+ {
+ set_p2m(ctx, i, INVALID_MFN);
+ ctx->x86_pv.pfn_types[i] = 0;
+ }
+
+ for ( i = (old_end_frame ? old_end_frame + 1 : 0); i <= end_frame; ++i )
+ ctx->x86_pv.p2m_pfns[i] = INVALID_MFN;
+
+ DPRINTF("Expanded p2m from %#lx to %#lx", old_max, max_pfn);
+ return 0;
+}
+
+static int pin_pagetables(struct context *ctx)
+{
+ xc_interface *xch = ctx->xch;
+ unsigned long i;
+ struct mmuext_op pin;
+
+ DPRINTF("Pinning pagetables");
+
+ for ( i = 0; i <= ctx->x86_pv.max_pfn; ++i )
+ {
+ if ( (ctx->x86_pv.pfn_types[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
+ continue;
+
+ switch ( ctx->x86_pv.pfn_types[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
+ {
+ case XEN_DOMCTL_PFINFO_L1TAB:
+ pin.cmd = MMUEXT_PIN_L1_TABLE;
+ break;
+ case XEN_DOMCTL_PFINFO_L2TAB:
+ pin.cmd = MMUEXT_PIN_L2_TABLE;
+ break;
+ case XEN_DOMCTL_PFINFO_L3TAB:
+ pin.cmd = MMUEXT_PIN_L3_TABLE;
+ break;
+ case XEN_DOMCTL_PFINFO_L4TAB:
+ pin.cmd = MMUEXT_PIN_L4_TABLE;
+ break;
+ default:
+ continue;
+ }
+
+ pin.arg1.mfn = pfn_to_mfn(ctx, i);
+
+ if ( xc_mmuext_op(xch, &pin, 1, ctx->domid) != 0 )
+ {
+ PERROR("Failed to pin page table for pfn %#lx", i);
+ return -1;
+ }
+
+ }
+
+ return 0;
+}
+
+static int process_start_info(struct context *ctx, vcpu_guest_context_any_t
*vcpu)
+{
+ xc_interface *xch = ctx->xch;
+ xen_pfn_t pfn, mfn;
+ start_info_any_t *guest_start_info = NULL;
+ int rc = -1;
+
+ pfn = GET_FIELD(ctx, vcpu, user_regs.edx);
+
+ if ( pfn > ctx->x86_pv.max_pfn )
+ {
+ ERROR("Start Info pfn %#lx out of range", pfn);
+ goto err;
+ }
+ else if ( ctx->x86_pv.pfn_types[pfn] != XEN_DOMCTL_PFINFO_NOTAB )
+ {
+ ERROR("Start Info pfn %#lx has bad type %lu", pfn,
+ ctx->x86_pv.pfn_types[pfn] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT);
+ goto err;
+ }
+
+ mfn = pfn_to_mfn(ctx, pfn);
+ if ( !mfn_in_pseudophysmap(ctx, mfn) )
+ {
+ ERROR("Start Info has bad MFN");
+ pseudophysmap_walk(ctx, mfn);
+ goto err;
+ }
+
+ guest_start_info = xc_map_foreign_range(
+ xch, ctx->domid, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn);
+ if ( !guest_start_info )
+ {
+ PERROR("Failed to map Start Info at mfn %#lx", mfn);
+ goto err;
+ }
+
+ /* Deal with xenstore stuff */
+ pfn = GET_FIELD(ctx, guest_start_info, store_mfn);
+ if ( pfn > ctx->x86_pv.max_pfn )
+ {
+ ERROR("XenStore pfn %#lx out of range", pfn);
+ goto err;
+ }
+
+ mfn = pfn_to_mfn(ctx, pfn);
+ if ( !mfn_in_pseudophysmap(ctx, mfn) )
+ {
+ ERROR("XenStore pfn has bad MFN");
+ pseudophysmap_walk(ctx, mfn);
+ goto err;
+ }
+
+ ctx->restore.xenstore_mfn = mfn;
+ SET_FIELD(ctx, guest_start_info, store_mfn, mfn);
+ SET_FIELD(ctx, guest_start_info, store_evtchn,
ctx->restore.xenstore_evtchn);
+
+
+ /* Deal with console stuff */
+ pfn = GET_FIELD(ctx, guest_start_info, console.domU.mfn);
+ if ( pfn > ctx->x86_pv.max_pfn )
+ {
+ ERROR("Console pfn %#lx out of range", pfn);
+ goto err;
+ }
+
+ mfn = pfn_to_mfn(ctx, pfn);
+ if ( !mfn_in_pseudophysmap(ctx, mfn) )
+ {
+ ERROR("Console pfn has bad MFN");
+ pseudophysmap_walk(ctx, mfn);
+ goto err;
+ }
+
+ ctx->restore.console_mfn = mfn;
+ SET_FIELD(ctx, guest_start_info, console.domU.mfn, mfn);
+ SET_FIELD(ctx, guest_start_info, console.domU.evtchn,
ctx->restore.console_evtchn);
+
+ /* Set other information */
+ SET_FIELD(ctx, guest_start_info, nr_pages, ctx->x86_pv.max_pfn + 1);
+ SET_FIELD(ctx, guest_start_info, shared_info,
+ ctx->dominfo.shared_info_frame << PAGE_SHIFT);
+ SET_FIELD(ctx, guest_start_info, flags, 0);
+
+ SET_FIELD(ctx, vcpu, user_regs.edx, mfn);
+ rc = 0;
+
+err:
+ if ( guest_start_info )
+ munmap(guest_start_info, PAGE_SIZE);
+
+ return rc;
+}
+
+static int update_guest_p2m(struct context *ctx)
+{
+ xc_interface *xch = ctx->xch;
+ xen_pfn_t mfn, pfn, *guest_p2m = NULL;
+ unsigned i;
+ int rc = -1;
+
+ for ( i = 0; i < ctx->x86_pv.p2m_frames; ++i )
+ {
+ pfn = ctx->x86_pv.p2m_pfns[i];
+
+ if ( pfn > ctx->x86_pv.max_pfn )
+ {
+ ERROR("pfn (%#lx) for p2m_frame_list[%u] out of range",
+ pfn, i);
+ goto err;
+ }
+ else if ( ctx->x86_pv.pfn_types[pfn] != XEN_DOMCTL_PFINFO_NOTAB )
+ {
+ ERROR("pfn (%#lx) for p2m_frame_list[%u] has bad type %lu", pfn, i,
+ ctx->x86_pv.pfn_types[pfn] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT);
+ goto err;
+ }
+
+ mfn = pfn_to_mfn(ctx, pfn);
+ if ( !mfn_in_pseudophysmap(ctx, mfn) )
+ {
+ ERROR("p2m_frame_list[%u] has bad MFN", i);
+ pseudophysmap_walk(ctx, mfn);
+ goto err;
+ }
+
+ ctx->x86_pv.p2m_pfns[i] = mfn;
+ }
+
+ guest_p2m = xc_map_foreign_pages(xch, ctx->domid, PROT_WRITE,
+ ctx->x86_pv.p2m_pfns,
+ ctx->x86_pv.p2m_frames );
+ if ( !guest_p2m )
+ {
+ PERROR("Failed to map p2m frames");
+ goto err;
+ }
+
+ memcpy(guest_p2m, ctx->x86_pv.p2m,
+ (ctx->x86_pv.max_pfn + 1) * ctx->x86_pv.width);
+ rc = 0;
+ err:
+ if ( guest_p2m )
+ munmap(guest_p2m, ctx->x86_pv.p2m_frames * PAGE_SIZE);
+
+ return rc;
+}
+
+static int populate_pfn(struct context *ctx, xen_pfn_t pfn)
+{
+ xc_interface *xch = ctx->xch;
+ xen_pfn_t mfn = pfn;
+ int rc;
+
+ if ( pfn_to_mfn(ctx, pfn) != INVALID_MFN )
+ return 0;
+
+ rc = xc_domain_populate_physmap_exact(xch, ctx->domid, 1, 0, 0, &mfn);
+ if ( rc )
+ {
+ ERROR("Failed to populate physmap");
+ return rc;
+ }
+
+ set_p2m(ctx, pfn, mfn);
+
+ /* This *really* should be true by now, or something has gone very wrong */
+ assert(mfn_in_pseudophysmap(ctx, mfn));
+
+ return 0;
+}
+
+static int localise_pagetable(struct context *ctx, uint64_t *table, xen_pfn_t
type)
+{
+ xc_interface *xch = ctx->xch;
+ uint64_t pte;
+ unsigned i;
+
+ type &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
+
+ for ( i = 0; i < (PAGE_SIZE / sizeof(uint64_t)); ++i )
+ {
+ pte = table[i];
+
+ if ( pte & _PAGE_PRESENT )
+ {
+ xen_pfn_t mfn, pfn;
+
+ pfn = pte_to_frame(ctx, pte);
+ mfn = pfn_to_mfn(ctx, pfn);
+
+ if ( mfn == INVALID_MFN )
+ {
+ if ( populate_pfn(ctx, pfn) )
+ return -1;
+
+ mfn = pfn_to_mfn(ctx, pfn);
+ }
+
+ if ( !mfn_in_pseudophysmap(ctx, mfn) )
+ {
+ ERROR("Bad MFN for L%lu[%u]",
+ type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT, i);
+ pseudophysmap_walk(ctx, mfn);
+ errno = ERANGE;
+ return -1;
+ }
+
+ update_pte(ctx, &pte, mfn);
+
+ table[i] = pte;
+ }
+ }
+
+ return 0;
+}
+
+static int handle_end(struct context *ctx, struct record *rec)
+{
+ xc_interface *xch = ctx->xch;
+
+ DPRINTF("End record");
+ return 0;
+}
+
+static int handle_page_data(struct context *ctx, struct record *rec)
+{
+ xc_interface *xch = ctx->xch;
+ struct rec_page_data_header *page = rec->data;
+ xen_pfn_t mfn, pfn, type;
+ void *guest_page = NULL;
+ int rc = -1, err;
+
+ if ( rec->length < sizeof *page )
+ {
+ ERROR("PAGE_DATA record trucated: length %"PRIu32", min %zu",
+ rec->length, sizeof *page);
+ goto cleanup;
+ }
+ else if ( page->count != 1 )
+ {
+ // TODO
+ ERROR("Unable to handle batched pages (yet)");
+ goto cleanup;
+ }
+
+ pfn = page->pfn[0] & PAGE_DATA_PFN_MASK;
+ if ( pfn > ctx->x86_pv.max_pfn )
+ {
+ ERROR("pfn %#lx outside domain maximum (%#lx)", pfn,
ctx->x86_pv.max_pfn);
+ goto cleanup;
+ }
+
+ type = (page->pfn[0] & PAGE_DATA_TYPE_MASK) >> 32;
+ if ( ((type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT) >= 5) &&
+ ((type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT) <= 8) )
+ {
+ ERROR("Invalid type %#lx for pfn %#lx", type, pfn);
+ goto cleanup;
+ }
+
+ ctx->x86_pv.pfn_types[pfn] = type;
+
+ switch ( type )
+ {
+ case XEN_DOMCTL_PFINFO_XTAB:
+ case XEN_DOMCTL_PFINFO_BROKEN:
+ /* No page data - leave alone */
+ rc = 0;
+ goto cleanup;
+ }
+
+ /* All other page types, need to allocate */
+ rc = populate_pfn(ctx, pfn);
+ if ( rc )
+ goto cleanup;
+
+ mfn = pfn_to_mfn(ctx, pfn);
+
+ guest_page = xc_map_foreign_bulk(
+ xch, ctx->domid, PROT_READ | PROT_WRITE, &mfn, &err, 1);
+ if ( !guest_page || err )
+ {
+ PERROR("Unable to map mfn %#lx (err %d)", mfn, err);
+ rc = -1;
+ goto cleanup;
+ }
+
+ /* XALLOC also has no page data */
+ if ( type != XEN_DOMCTL_PFINFO_XALLOC )
+ memcpy(guest_page, &page->pfn[1], PAGE_SIZE);
+
+ /* Pagetables need to be localised */
+ if ( ((type & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) >= XEN_DOMCTL_PFINFO_L1TAB
&&
+ (type & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) <= XEN_DOMCTL_PFINFO_L4TAB)
)
+ {
+ rc = localise_pagetable(ctx, guest_page, type);
+ if ( rc )
+ goto cleanup;
+ }
+
+ rc = 0;
+
+ cleanup:
+ if ( guest_page )
+ munmap(guest_page, PAGE_SIZE);
+
+ return rc;
+}
+
+static int handle_x86_pv_info(struct context *ctx, struct record *rec)
+{
+ xc_interface *xch = ctx->xch;
+ struct rec_x86_pv_info *info = rec->data;
+
+ if ( rec->length < sizeof *info )
+ {
+ ERROR("X86_PV_INFO record trucated: length %"PRIu32", expected %zu",
+ rec->length, sizeof *info);
+ return -1;
+ }
+ else if ( info->guest_width != 4 &&
+ info->guest_width != 8 )
+ {
+ ERROR("Unexpected guest width %"PRIu32", Expected 4 or 8",
+ info->guest_width);
+ return -1;
+ }
+ else if ( info->guest_width != ctx->x86_pv.width )
+ {
+ int rc;
+ struct xen_domctl domctl;
+
+ /* try to set address size, domain is always created 64 bit */
+ memset(&domctl, 0, sizeof(domctl));
+ domctl.domain = ctx->domid;
+ domctl.cmd = XEN_DOMCTL_set_address_size;
+ domctl.u.address_size.size = info->guest_width * 8;
+ rc = do_domctl(xch, &domctl);
+ if ( rc != 0 )
+ {
+ ERROR("Width of guest in stream (%"PRIu32
+ " bits) differs with existing domain (%"PRIu32" bits)",
+ info->guest_width * 8, ctx->x86_pv.width * 8);
+ return -1;
+ }
+
+ /* domain informations changed, better to refresh */
+ rc = x86_pv_domain_info(ctx);
+ if ( rc != 0 )
+ {
+ ERROR("Unable to refresh guest informations");
+ return -1;
+ }
+ }
+ else if ( info->pt_levels != 3 &&
+ info->pt_levels != 4 )
+ {
+ ERROR("Unexpected guest levels %"PRIu32", Expected 3 or 4",
+ info->pt_levels);
+ return -1;
+ }
+ else if ( info->pt_levels != ctx->x86_pv.levels )
+ {
+ ERROR("Levels of guest in stream (%"PRIu32
+ ") differs with existing domain (%"PRIu32")",
+ info->pt_levels, ctx->x86_pv.levels);
+ return -1;
+ }
+
+ DPRINTF("X86_PV_INFO record: %d bits, %d levels",
+ ctx->x86_pv.width * 8, ctx->x86_pv.levels);
+ return 0;
+}
+
+static int handle_x86_pv_p2m_frames(struct context *ctx, struct record *rec)
+{
+ xc_interface *xch = ctx->xch;
+ struct rec_x86_pv_p2m_frames *data = rec->data;
+ unsigned start, end, x;
+ int rc;
+
+ if ( rec->length < sizeof *data )
+ {
+ ERROR("X86_PV_P2M_FRAMES record trucated: length %"PRIu32", min %zu",
+ rec->length, sizeof *data + sizeof(uint64_t));
+ return -1;
+ }
+ else if ( data->start_pfn > data->end_pfn )
+ {
+ ERROR("End pfn in stream (%#"PRIx32") exceeds Start (%#"PRIx32")",
+ data->end_pfn, data->start_pfn);
+ return -1;
+ }
+
+ start = data->start_pfn / ctx->x86_pv.fpp;
+ end = data->end_pfn / ctx->x86_pv.fpp + 1;
+
+ if ( rec->length != sizeof *data + ((end - start) * sizeof (uint64_t)) )
+ {
+ ERROR("X86_PV_P2M_FRAMES record wrong size: start_pfn %#"PRIx32
+ ", end_pfn %#"PRIx32", length %"PRIu32
+ ", expected %zu + (%u - %u) * %zu",
+ data->start_pfn, data->end_pfn, rec->length,
+ sizeof *data, end, start, sizeof(uint64_t));
+ return -1;
+ }
+
+ if ( data->end_pfn > ctx->x86_pv.max_pfn )
+ {
+ rc = expand_p2m(ctx, data->end_pfn);
+ if ( rc )
+ return rc;
+ }
+
+ for ( x = 0; x <= (end - start); ++x )
+ ctx->x86_pv.p2m_pfns[start + x] = data->p2m_pfns[x];
+
+ DPRINTF("X86_PV_P2M_FRAMES record: GFNs %#"PRIx32"->%#"PRIx32,
+ data->start_pfn, data->end_pfn);
+ return 0;
+}
+
+static int handle_x86_pv_vcpu_basic(struct context *ctx, struct record *rec)
+{
+ xc_interface *xch = ctx->xch;
+ struct rec_x86_pv_vcpu *vhdr = rec->data;
+ vcpu_guest_context_any_t vcpu;
+ size_t vcpusz = ctx->x86_pv.width == 8 ? sizeof vcpu.x64 : sizeof vcpu.x32;
+ xen_pfn_t pfn, mfn;
+ unsigned long tmp;
+ unsigned i;
+ int rc = -1;
+
+ if ( rec->length <= sizeof *vhdr )
+ {
+ ERROR("X86_PV_VCPU_BASIC record trucated: length %"PRIu32", min %zu",
+ rec->length, sizeof *vhdr + 1);
+ goto err;
+ }
+ else if ( rec->length != sizeof *vhdr + vcpusz )
+ {
+ ERROR("X86_PV_VCPU_EXTENDED record wrong size: length %"PRIu32
+ ", expected %zu", rec->length, sizeof *vhdr + vcpusz);
+ goto err;
+ }
+ else if ( vhdr->vcpu_id > ctx->dominfo.max_vcpu_id )
+ {
+ ERROR("X86_PV_VCPU_BASIC record vcpu_id (%"PRIu32
+ ") exceeds domain max (%u)",
+ vhdr->vcpu_id, ctx->dominfo.max_vcpu_id);
+ goto err;
+ }
+
+ memcpy(&vcpu, &vhdr->context, vcpusz);
+
+ SET_FIELD(ctx, &vcpu, flags, GET_FIELD(ctx, &vcpu, flags) | VGCF_online);
+
+ /* Vcpu 0 is special: Convert the suspend record to an MFN */
+ if ( vhdr->vcpu_id == 0 )
+ {
+ rc = process_start_info(ctx, &vcpu);
+ if ( rc )
+ return rc;
+ rc = -1;
+ }
+
+ tmp = GET_FIELD(ctx, &vcpu, gdt_ents);
+ if ( tmp > 8192 )
+ {
+ ERROR("GDT entry count (%lu) out of range", tmp);
+ errno = ERANGE;
+ goto err;
+ }
+
+ /* Convert GDT frames to MFNs */
+ for ( i = 0; (i * 512) < tmp; ++i )
+ {
+ pfn = GET_FIELD(ctx, &vcpu, gdt_frames[i]);
+ if ( pfn >= ctx->x86_pv.max_pfn )
+ {
+ ERROR("GDT frame %u (pfn %#lx) out of range", i, pfn);
+ goto err;
+ }
+ else if ( ctx->x86_pv.pfn_types[pfn] != XEN_DOMCTL_PFINFO_NOTAB )
+ {
+ ERROR("GDT frame %u (pfn %#lx) has bad type %lu", i, pfn,
+ ctx->x86_pv.pfn_types[pfn] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT);
+ goto err;
+ }
+
+ mfn = pfn_to_mfn(ctx, pfn);
+ if ( !mfn_in_pseudophysmap(ctx, mfn) )
+ {
+ ERROR("GDT frame %u has bad MFN", i);
+ pseudophysmap_walk(ctx, mfn);
+ goto err;
+ }
+
+ SET_FIELD(ctx, &vcpu, gdt_frames[i], mfn);
+ }
+
+ /* Convert CR3 to an MFN */
+ pfn = cr3_to_mfn(ctx, GET_FIELD(ctx, &vcpu, ctrlreg[3]));
+ if ( pfn >= ctx->x86_pv.max_pfn )
+ {
+ ERROR("cr3 (pfn %#lx) out of range", pfn);
+ goto err;
+ }
+ else if ( (ctx->x86_pv.pfn_types[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
!=
+ (((xen_pfn_t)ctx->x86_pv.levels) <<
XEN_DOMCTL_PFINFO_LTAB_SHIFT) )
+ {
+ ERROR("cr3 (pfn %#lx) has bad type %lu, expected %lu", pfn,
+ ctx->x86_pv.pfn_types[pfn] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT,
+ ctx->x86_pv.levels);
+ goto err;
+ }
+
+ mfn = pfn_to_mfn(ctx, pfn);
+ if ( !mfn_in_pseudophysmap(ctx, mfn) )
+ {
+ ERROR("cr3 has bad MFN");
+ pseudophysmap_walk(ctx, mfn);
+ goto err;
+ }
+
+ SET_FIELD(ctx, &vcpu, ctrlreg[3], mfn_to_cr3(ctx, mfn));
+
+ /* 64bit guests: Convert CR1 (guest pagetables) to MFN */
+ if ( ctx->x86_pv.levels == 4 && (vcpu.x64.ctrlreg[1] & 1) )
+ {
+ pfn = vcpu.x64.ctrlreg[1] >> PAGE_SHIFT;
+
+ if ( pfn >= ctx->x86_pv.max_pfn )
+ {
+ ERROR("cr1 (pfn %#lx) out of range", pfn);
+ goto err;
+ }
+ else if ( (ctx->x86_pv.pfn_types[pfn] &
XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
+ (((xen_pfn_t)ctx->x86_pv.levels) <<
XEN_DOMCTL_PFINFO_LTAB_SHIFT) )
+ {
+ ERROR("cr1 (pfn %#lx) has bad type %lu, expected %lu", pfn,
+ ctx->x86_pv.pfn_types[pfn] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT,
+ ctx->x86_pv.levels);
+ goto err;
+ }
+
+ mfn = pfn_to_mfn(ctx, pfn);
+ if ( !mfn_in_pseudophysmap(ctx, mfn) )
+ {
+ ERROR("cr1 has bad MFN");
+ pseudophysmap_walk(ctx, mfn);
+ goto err;
+ }
+
+ vcpu.x64.ctrlreg[1] = (uint64_t)mfn << PAGE_SHIFT;
+ }
+
+ if ( xc_vcpu_setcontext(xch, ctx->domid, vhdr->vcpu_id, &vcpu) )
+ {
+ PERROR("Failed to set vcpu%"PRIu32"'s basic info", vhdr->vcpu_id);
+ goto err;
+ }
+
+ rc = 0;
+ DPRINTF("vcpu%d X86_PV_VCPU_BASIC record", vhdr->vcpu_id);
+ err:
+ return rc;
+}
+
+static int handle_x86_pv_vcpu_extended(struct context *ctx, struct record *rec)
+{
+ xc_interface *xch = ctx->xch;
+ struct rec_x86_pv_vcpu *vcpu = rec->data;
+ DECLARE_DOMCTL;
+
+ if ( rec->length <= sizeof *vcpu )
+ {
+ ERROR("X86_PV_VCPU_EXTENDED record trucated: length %"PRIu32", min
%zu",
+ rec->length, sizeof *vcpu + 1);
+ return -1;
+ }
+ else if ( rec->length > sizeof *vcpu + 128 )
+ {
+ ERROR("X86_PV_VCPU_EXTENDED record too long: length %"PRIu32", max
%zu",
+ rec->length, sizeof *vcpu + 128);
+ return -1;
+ }
+ else if ( vcpu->vcpu_id > ctx->dominfo.max_vcpu_id )
+ {
+ ERROR("X86_PV_VCPU_EXTENDED record vcpu_id (%"PRIu32
+ ") exceeds domain max (%u)",
+ vcpu->vcpu_id, ctx->dominfo.max_vcpu_id);
+ return -1;
+ }
+
+ domctl.cmd = XEN_DOMCTL_set_ext_vcpucontext;
+ domctl.domain = ctx->domid;
+ memcpy(&domctl.u.ext_vcpucontext, &vcpu->context, rec->length - sizeof
*vcpu);
+
+ if ( xc_domctl(xch, &domctl) != 0 )
+ {
+ PERROR("Failed to set vcpu%"PRIu32"'s extended info", vcpu->vcpu_id);
+ return -1;
+ }
+
+ DPRINTF("vcpu%d X86_PV_VCPU_EXTENDED record", vcpu->vcpu_id);
+ return 0;
+}
+
+static int handle_x86_pv_vcpu_xsave(struct context *ctx, struct record *rec)
+{
+ xc_interface *xch = ctx->xch;
+ struct rec_x86_pv_vcpu_xsave *vcpu = rec->data;
+ int rc;
+ DECLARE_DOMCTL;
+ DECLARE_HYPERCALL_BUFFER(void, buffer);
+ size_t buffersz;
+
+ if ( rec->length <= sizeof *vcpu )
+ {
+ ERROR("X86_PV_VCPU_XSAVE record trucated: length %"PRIu32", min %zu",
+ rec->length, sizeof *vcpu + 1);
+ return -1;
+ }
+ else if ( vcpu->vcpu_id > ctx->dominfo.max_vcpu_id )
+ {
+ ERROR("X86_PV_VCPU_EXTENDED record vcpu_id (%"PRIu32
+ ") exceeds domain max (%u)",
+ vcpu->vcpu_id, ctx->dominfo.max_vcpu_id);
+ return -1;
+ }
+
+ buffersz = rec->length - sizeof *vcpu;
+ buffer = xc_hypercall_buffer_alloc(xch, buffer, buffersz);
+ if ( !buffer )
+ {
+ ERROR("Unable to allocate %"PRIu64" bytes for xsave hypercall buffer",
+ buffersz);
+ return -1;
+ }
+
+ domctl.cmd = XEN_DOMCTL_setvcpuextstate;
+ domctl.domain = ctx->domid;
+ domctl.u.vcpuextstate.vcpu = vcpu->vcpu_id;
+ domctl.u.vcpuextstate.xfeature_mask = vcpu->xfeature_mask;
+ domctl.u.vcpuextstate.size = buffersz;
+ set_xen_guest_handle(domctl.u.vcpuextstate.buffer, buffer);
+
+ rc = xc_domctl(xch, &domctl);
+
+ xc_hypercall_buffer_free(xch, buffer);
+
+ if ( rc )
+ {
+ PERROR("Failed to set vcpu%"PRIu32"'s xsave info", vcpu->vcpu_id);
+ return rc;
+ }
+ else
+ {
+ DPRINTF("vcpu%d X86_PV_VCPU_XSAVE record", vcpu->vcpu_id);
+ return 0;
+ }
+}
+
+static int handle_x86_pv_shared_info(struct context *ctx, struct record *rec)
+{
+ xc_interface *xch = ctx->xch;
+ unsigned i;
+ int rc = -1;
+ shared_info_any_t *guest_shared_info = NULL;
+ shared_info_any_t *stream_shared_info = rec->data;
+
+ if ( rec->length != PAGE_SIZE )
+ {
+ ERROR("X86_PV_SHARED_INFO record wrong size: length %"PRIu32
+ ", expected %u", rec->length, PAGE_SIZE);
+ goto err;
+ }
+
+ guest_shared_info = xc_map_foreign_range(
+ xch, ctx->domid, PAGE_SIZE, PROT_READ | PROT_WRITE,
+ ctx->dominfo.shared_info_frame);
+ if ( !guest_shared_info )
+ {
+ PERROR("Failed to map Shared Info at mfn %#lx",
+ ctx->dominfo.shared_info_frame);
+ goto err;
+ }
+
+ MEMCPY_FIELD(ctx, guest_shared_info, stream_shared_info, vcpu_info);
+ MEMCPY_FIELD(ctx, guest_shared_info, stream_shared_info, arch);
+
+ SET_FIELD(ctx, guest_shared_info, arch.pfn_to_mfn_frame_list_list, 0);
+
+ MEMSET_ARRAY_FIELD(ctx, guest_shared_info, evtchn_pending, 0);
+ for ( i = 0; i < XEN_LEGACY_MAX_VCPUS; i++ )
+ SET_FIELD(ctx, guest_shared_info, vcpu_info[i].evtchn_pending_sel, 0);
+
+ MEMSET_ARRAY_FIELD(ctx, guest_shared_info, evtchn_mask, 0xff);
+
+ rc = 0;
+ err:
+
+ if ( guest_shared_info )
+ munmap(guest_shared_info, PAGE_SIZE);
+
+ return rc;
+}
+static int handle_tsc_info(struct context *ctx, struct record *rec)
+{
+ xc_interface *xch = ctx->xch;
+ struct rec_tsc_info *tsc = rec->data;
+
+ if ( rec->length != sizeof *tsc )
+ {
+ ERROR("TSC_INFO record wrong size: length %"PRIu32", expected %zu",
+ rec->length, sizeof *tsc);
+ return -1;
+ }
+
+ if ( xc_domain_set_tsc_info(xch, ctx->domid, tsc->mode,
+ tsc->nsec, tsc->khz, tsc->incarnation) )
+ {
+ PERROR("Unable to set TSC information");
+ return -1;
+ }
+
+ return 0;
+}
+
+int restore_x86_pv(struct context *ctx)
+{
+ xc_interface *xch = ctx->xch;
+ struct record rec;
+ int rc;
+
+ IPRINTF("In experimental %s", __func__);
+
+ if ( ctx->restore.guest_type != DHDR_TYPE_x86_pv )
+ {
+ ERROR("Unable to restore %s domain into an x86_pv domain",
+ dhdr_type_to_str(ctx->restore.guest_type));
+ return -1;
+ }
+ else if ( ctx->restore.guest_page_size != 4096 )
+ {
+ ERROR("Invalid page size %d for x86_pv domains",
+ ctx->restore.guest_page_size);
+ return -1;
+ }
+
+ rc = x86_pv_domain_info(ctx);
+ if ( rc )
+ goto err;
+
+ rc = x86_pv_map_m2p(ctx);
+ if ( rc )
+ goto err;
+
+ do
+ {
+ rc = read_record(ctx, &rec);
+ if ( rc )
+ goto err;
+
+ switch ( rec.type )
+ {
+ case REC_TYPE_end:
+ rc = handle_end(ctx, &rec);
+ break;
+
+ case REC_TYPE_page_data:
+ rc = handle_page_data(ctx, &rec);
+ break;
+
+ case REC_TYPE_x86_pv_info:
+ rc = handle_x86_pv_info(ctx, &rec);
+ break;
+
+ case REC_TYPE_x86_pv_p2m_frames:
+ rc = handle_x86_pv_p2m_frames(ctx, &rec);
+ break;
+
+ case REC_TYPE_x86_pv_vcpu_basic:
+ rc = handle_x86_pv_vcpu_basic(ctx, &rec);
+ break;
+
+ case REC_TYPE_x86_pv_vcpu_extended:
+ rc = handle_x86_pv_vcpu_extended(ctx, &rec);
+ break;
+
+ case REC_TYPE_x86_pv_vcpu_xsave:
+ rc = handle_x86_pv_vcpu_xsave(ctx, &rec);
+ break;
+
+ case REC_TYPE_x86_pv_shared_info:
+ rc = handle_x86_pv_shared_info(ctx, &rec);
+ break;
+
+ case REC_TYPE_tsc_info:
+ rc = handle_tsc_info(ctx, &rec);
+ break;
+
+ default:
+ if ( rec.type & REC_TYPE_optional )
+ {
+ IPRINTF("Ignoring optional record (0x%"PRIx32", %s)",
+ rec.type, rec_type_to_str(rec.type));
+ rc = 0;
+ break;
+ }
+
+ ERROR("Invalid record type (0x%"PRIx32", %s) for x86_pv domains",
+ rec.type, rec_type_to_str(rec.type));
+ rc = -1;
+ break;
+ }
+
+ free(rec.data);
+ if ( rc )
+ goto err;
+
+ } while ( rec.type != REC_TYPE_end );
+
+ IPRINTF("Finished reading records");
+
+ rc = pin_pagetables(ctx);
+ if ( rc )
+ goto err;
+
+ rc = update_guest_p2m(ctx);
+ if ( rc )
+ goto err;
+
+ rc = xc_dom_gnttab_seed(xch, ctx->domid,
+ ctx->restore.console_mfn,
+ ctx->restore.xenstore_mfn,
+ ctx->restore.console_domid,
+ ctx->restore.xenstore_domid);
+ if ( rc )
+ {
+ PERROR("Failed to seed grant table");
+ goto err;
+ }
+
+ /* all done */
+ IPRINTF("All Done");
+ assert(!rc);
+ goto cleanup;
+
+ err:
+ assert(rc);
+ cleanup:
+
+ free(ctx->x86_pv.p2m_pfns);
+
+ if ( ctx->x86_pv.m2p )
+ munmap(ctx->x86_pv.m2p, ctx->x86_pv.nr_m2p_frames * PAGE_SIZE);
+
+ return rc;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--
1.7.10.4
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |