|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v5 RFC 09/14] tools/libxc: x86 PV restore code
Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Signed-off-by: Frediano Ziglio <frediano.ziglio@xxxxxxxxxx>
Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
---
tools/libxc/saverestore/common.h | 2 +
tools/libxc/saverestore/restore_x86_pv.c | 965 ++++++++++++++++++++++++++++++
2 files changed, 967 insertions(+)
create mode 100644 tools/libxc/saverestore/restore_x86_pv.c
diff --git a/tools/libxc/saverestore/common.h b/tools/libxc/saverestore/common.h
index 5c8a370..bb21e01 100644
--- a/tools/libxc/saverestore/common.h
+++ b/tools/libxc/saverestore/common.h
@@ -248,6 +248,8 @@ extern struct common_ops common_ops_x86_pv;
extern struct save_ops save_ops_x86_pv;
+extern struct restore_ops restore_ops_x86_pv;
+
struct record
{
uint32_t type;
diff --git a/tools/libxc/saverestore/restore_x86_pv.c
b/tools/libxc/saverestore/restore_x86_pv.c
new file mode 100644
index 0000000..3174d4c
--- /dev/null
+++ b/tools/libxc/saverestore/restore_x86_pv.c
@@ -0,0 +1,965 @@
+#include <assert.h>
+
+#include "common_x86_pv.h"
+
+/*
+ * Expand our local tracking information for the p2m table and domains maximum
+ * size. Normally this will be called once to expand from 0 to max_pfn, but
+ * is liable to expand multiple times if the domain grows on the sending side
+ * after migration has started.
+ */
+static int expand_p2m(struct context *ctx, unsigned long max_pfn)
+{
+ xc_interface *xch = ctx->xch;
+ unsigned long old_max = ctx->x86_pv.max_pfn, i;
+ unsigned int fpp = PAGE_SIZE / ctx->x86_pv.width;
+ unsigned long end_frame = (max_pfn + fpp) / fpp;
+ unsigned long old_end_frame = (old_max + fpp) / fpp;
+ xen_pfn_t *p2m = NULL, *p2m_pfns = NULL;
+ uint32_t *pfn_types = NULL;
+ size_t p2msz, p2m_pfnsz, pfn_typesz;
+
+ assert(max_pfn > old_max);
+
+ p2msz = (max_pfn + 1) * ctx->x86_pv.width;
+ p2m = realloc(ctx->x86_pv.p2m, p2msz);
+ if ( !p2m )
+ {
+ ERROR("Failed to (re)alloc %zu bytes for p2m", p2msz);
+ return -1;
+ }
+ ctx->x86_pv.p2m = p2m;
+
+ pfn_typesz = (max_pfn + 1) * sizeof(*pfn_types);
+ pfn_types = realloc(ctx->x86_pv.pfn_types, pfn_typesz);
+ if ( !pfn_types )
+ {
+ ERROR("Failed to (re)alloc %zu bytes for pfn_types", pfn_typesz);
+ return -1;
+ }
+ ctx->x86_pv.pfn_types = pfn_types;
+
+ p2m_pfnsz = (end_frame + 1) * sizeof(*p2m_pfns);
+ p2m_pfns = realloc(ctx->x86_pv.p2m_pfns, p2m_pfnsz);
+ if ( !p2m_pfns )
+ {
+ ERROR("Failed to (re)alloc %zu bytes for p2m frame list", p2m_pfnsz);
+ return -1;
+ }
+ ctx->x86_pv.p2m_frames = end_frame;
+ ctx->x86_pv.p2m_pfns = p2m_pfns;
+
+ ctx->x86_pv.max_pfn = max_pfn;
+ for ( i = (old_max ? old_max + 1 : 0); i <= max_pfn; ++i )
+ {
+ ctx->ops.set_gfn(ctx, i, INVALID_MFN);
+ ctx->ops.set_page_type(ctx, i, 0);
+ }
+
+ for ( i = (old_end_frame ? old_end_frame + 1 : 0); i <= end_frame; ++i )
+ ctx->x86_pv.p2m_pfns[i] = INVALID_MFN;
+
+ DPRINTF("Expanded p2m from %#lx to %#lx", old_max, max_pfn);
+ return 0;
+}
+
+/*
+ * Pin all of the pagetables. TODO - batch the hypercalls.
+ */
+static int pin_pagetables(struct context *ctx)
+{
+ xc_interface *xch = ctx->xch;
+ unsigned long i;
+ struct mmuext_op pin;
+
+ DPRINTF("Pinning pagetables");
+
+ for ( i = 0; i <= ctx->x86_pv.max_pfn; ++i )
+ {
+ if ( (ctx->x86_pv.pfn_types[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
+ continue;
+
+ switch ( ctx->x86_pv.pfn_types[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
+ {
+ case XEN_DOMCTL_PFINFO_L1TAB:
+ pin.cmd = MMUEXT_PIN_L1_TABLE;
+ break;
+ case XEN_DOMCTL_PFINFO_L2TAB:
+ pin.cmd = MMUEXT_PIN_L2_TABLE;
+ break;
+ case XEN_DOMCTL_PFINFO_L3TAB:
+ pin.cmd = MMUEXT_PIN_L3_TABLE;
+ break;
+ case XEN_DOMCTL_PFINFO_L4TAB:
+ pin.cmd = MMUEXT_PIN_L4_TABLE;
+ break;
+ default:
+ continue;
+ }
+
+ pin.arg1.mfn = ctx->ops.pfn_to_gfn(ctx, i);
+
+ if ( xc_mmuext_op(xch, &pin, 1, ctx->domid) != 0 )
+ {
+ PERROR("Failed to pin page table for pfn %#lx", i);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Update details in a guests start_info strucutre.
+ */
+static int process_start_info(struct context *ctx, vcpu_guest_context_any_t
*vcpu)
+{
+ xc_interface *xch = ctx->xch;
+ xen_pfn_t pfn, mfn;
+ start_info_any_t *guest_start_info = NULL;
+ int rc = -1;
+
+ pfn = GET_FIELD(ctx, vcpu, user_regs.edx);
+
+ if ( pfn > ctx->x86_pv.max_pfn )
+ {
+ ERROR("Start Info pfn %#lx out of range", pfn);
+ goto err;
+ }
+ else if ( ctx->x86_pv.pfn_types[pfn] != XEN_DOMCTL_PFINFO_NOTAB )
+ {
+ ERROR("Start Info pfn %#lx has bad type %"PRIu32, pfn,
+ ctx->x86_pv.pfn_types[pfn] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT);
+ goto err;
+ }
+
+ mfn = ctx->ops.pfn_to_gfn(ctx, pfn);
+ if ( !mfn_in_pseudophysmap(ctx, mfn) )
+ {
+ ERROR("Start Info has bad mfn");
+ dump_bad_pseudophysmap_entry(ctx, mfn);
+ goto err;
+ }
+
+ guest_start_info = xc_map_foreign_range(
+ xch, ctx->domid, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn);
+ if ( !guest_start_info )
+ {
+ PERROR("Failed to map Start Info at mfn %#lx", mfn);
+ goto err;
+ }
+
+ /* Deal with xenstore stuff */
+ pfn = GET_FIELD(ctx, guest_start_info, store_mfn);
+ if ( pfn > ctx->x86_pv.max_pfn )
+ {
+ ERROR("XenStore pfn %#lx out of range", pfn);
+ goto err;
+ }
+
+ mfn = ctx->ops.pfn_to_gfn(ctx, pfn);
+ if ( !mfn_in_pseudophysmap(ctx, mfn) )
+ {
+ ERROR("XenStore pfn has bad mfn");
+ dump_bad_pseudophysmap_entry(ctx, mfn);
+ goto err;
+ }
+
+ ctx->restore.xenstore_mfn = mfn;
+ SET_FIELD(ctx, guest_start_info, store_mfn, mfn);
+ SET_FIELD(ctx, guest_start_info, store_evtchn,
ctx->restore.xenstore_evtchn);
+
+ /* Deal with console stuff */
+ pfn = GET_FIELD(ctx, guest_start_info, console.domU.mfn);
+ if ( pfn > ctx->x86_pv.max_pfn )
+ {
+ ERROR("Console pfn %#lx out of range", pfn);
+ goto err;
+ }
+
+ mfn = ctx->ops.pfn_to_gfn(ctx, pfn);
+ if ( !mfn_in_pseudophysmap(ctx, mfn) )
+ {
+ ERROR("Console pfn has bad mfn");
+ dump_bad_pseudophysmap_entry(ctx, mfn);
+ goto err;
+ }
+
+ ctx->restore.console_mfn = mfn;
+ SET_FIELD(ctx, guest_start_info, console.domU.mfn, mfn);
+ SET_FIELD(ctx, guest_start_info, console.domU.evtchn,
ctx->restore.console_evtchn);
+
+ /* Set other information */
+ SET_FIELD(ctx, guest_start_info, nr_pages, ctx->x86_pv.max_pfn + 1);
+ SET_FIELD(ctx, guest_start_info, shared_info,
+ ctx->dominfo.shared_info_frame << PAGE_SHIFT);
+ SET_FIELD(ctx, guest_start_info, flags, 0);
+
+ SET_FIELD(ctx, vcpu, user_regs.edx, mfn);
+ rc = 0;
+
+err:
+ if ( guest_start_info )
+ munmap(guest_start_info, PAGE_SIZE);
+
+ return rc;
+}
+
+/*
+ * Copy the p2m which has been constructed locally as memory has been
+ * allocated, over the p2m in guest, so the guest can find its memory again on
+ * resume.
+ */
+static int update_guest_p2m(struct context *ctx)
+{
+ xc_interface *xch = ctx->xch;
+ xen_pfn_t mfn, pfn, *guest_p2m = NULL;
+ unsigned i;
+ int rc = -1;
+
+ for ( i = 0; i < ctx->x86_pv.p2m_frames; ++i )
+ {
+ pfn = ctx->x86_pv.p2m_pfns[i];
+
+ if ( pfn > ctx->x86_pv.max_pfn )
+ {
+ ERROR("pfn (%#lx) for p2m_frame_list[%u] out of range",
+ pfn, i);
+ goto err;
+ }
+ else if ( ctx->x86_pv.pfn_types[pfn] != XEN_DOMCTL_PFINFO_NOTAB )
+ {
+ ERROR("pfn (%#lx) for p2m_frame_list[%u] has bad type %"PRIu32,
pfn, i,
+ ctx->x86_pv.pfn_types[pfn] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT);
+ goto err;
+ }
+
+ mfn = ctx->ops.pfn_to_gfn(ctx, pfn);
+ if ( !mfn_in_pseudophysmap(ctx, mfn) )
+ {
+ ERROR("p2m_frame_list[%u] has bad mfn", i);
+ dump_bad_pseudophysmap_entry(ctx, mfn);
+ goto err;
+ }
+
+ ctx->x86_pv.p2m_pfns[i] = mfn;
+ }
+
+ guest_p2m = xc_map_foreign_pages(xch, ctx->domid, PROT_WRITE,
+ ctx->x86_pv.p2m_pfns,
+ ctx->x86_pv.p2m_frames );
+ if ( !guest_p2m )
+ {
+ PERROR("Failed to map p2m frames");
+ goto err;
+ }
+
+ memcpy(guest_p2m, ctx->x86_pv.p2m,
+ (ctx->x86_pv.max_pfn + 1) * ctx->x86_pv.width);
+ rc = 0;
+ err:
+ if ( guest_p2m )
+ munmap(guest_p2m, ctx->x86_pv.p2m_frames * PAGE_SIZE);
+
+ return rc;
+}
+
+/*
+ * Process a toolstack record. TODO - remove from spec and code once libxl
+ * framing is sorted.
+ */
+static int handle_toolstack(struct context *ctx, struct record *rec)
+{
+ xc_interface *xch = ctx->xch;
+ int rc;
+
+ if ( !ctx->restore.callbacks || !ctx->restore.callbacks->toolstack_restore
)
+ return 0;
+
+ rc = ctx->restore.callbacks->toolstack_restore(ctx->domid, rec->data,
rec->length,
+
ctx->restore.callbacks->data);
+ if ( rc < 0 )
+ PERROR("restoring toolstack");
+ return rc;
+}
+
+/*
+ * Process an X86_PV_INFO record.
+ */
+static int handle_x86_pv_info(struct context *ctx, struct record *rec)
+{
+ xc_interface *xch = ctx->xch;
+ struct rec_x86_pv_info *info = rec->data;
+
+ if ( rec->length < sizeof(*info) )
+ {
+ ERROR("X86_PV_INFO record truncated: length %"PRIu32", expected %zu",
+ rec->length, sizeof(*info));
+ return -1;
+ }
+ else if ( info->guest_width != 4 &&
+ info->guest_width != 8 )
+ {
+ ERROR("Unexpected guest width %"PRIu32", Expected 4 or 8",
+ info->guest_width);
+ return -1;
+ }
+ else if ( info->guest_width != ctx->x86_pv.width )
+ {
+ int rc;
+ struct xen_domctl domctl;
+
+ /* Try to set address size, domain is always created 64 bit. */
+ memset(&domctl, 0, sizeof(domctl));
+ domctl.domain = ctx->domid;
+ domctl.cmd = XEN_DOMCTL_set_address_size;
+ domctl.u.address_size.size = info->guest_width * 8;
+ rc = do_domctl(xch, &domctl);
+ if ( rc != 0 )
+ {
+ ERROR("Width of guest in stream (%"PRIu32
+ " bits) differs with existing domain (%"PRIu32" bits)",
+ info->guest_width * 8, ctx->x86_pv.width * 8);
+ return -1;
+ }
+
+ /* Domain informations changed, better to refresh. */
+ rc = x86_pv_domain_info(ctx);
+ if ( rc != 0 )
+ {
+ ERROR("Unable to refresh guest informations");
+ return -1;
+ }
+ }
+ else if ( info->pt_levels != 3 &&
+ info->pt_levels != 4 )
+ {
+ ERROR("Unexpected guest levels %"PRIu32", Expected 3 or 4",
+ info->pt_levels);
+ return -1;
+ }
+ else if ( info->pt_levels != ctx->x86_pv.levels )
+ {
+ ERROR("Levels of guest in stream (%"PRIu32
+ ") differs with existing domain (%"PRIu32")",
+ info->pt_levels, ctx->x86_pv.levels);
+ return -1;
+ }
+
+ DPRINTF("X86_PV_INFO record: %d bits, %d levels",
+ ctx->x86_pv.width * 8, ctx->x86_pv.levels);
+ return 0;
+}
+
+/*
+ * Process an X86_PV_P2M_FRAMES record. Takes care of expanding the local p2m
+ * state if needed.
+ */
+static int handle_x86_pv_p2m_frames(struct context *ctx, struct record *rec)
+{
+ xc_interface *xch = ctx->xch;
+ struct rec_x86_pv_p2m_frames *data = rec->data;
+ unsigned start, end, x, fpp = PAGE_SIZE / ctx->x86_pv.width;
+ int rc;
+
+ if ( rec->length < sizeof(*data) )
+ {
+ ERROR("X86_PV_P2M_FRAMES record truncated: length %"PRIu32", min %zu",
+ rec->length, sizeof(*data) + sizeof(uint64_t));
+ return -1;
+ }
+ else if ( data->start_pfn > data->end_pfn )
+ {
+ ERROR("End pfn in stream (%#"PRIx32") exceeds Start (%#"PRIx32")",
+ data->end_pfn, data->start_pfn);
+ return -1;
+ }
+
+ start = data->start_pfn / fpp;
+ end = data->end_pfn / fpp + 1;
+
+ if ( rec->length != sizeof(*data) + ((end - start) * sizeof(uint64_t)) )
+ {
+ ERROR("X86_PV_P2M_FRAMES record wrong size: start_pfn %#"PRIx32
+ ", end_pfn %#"PRIx32", length %"PRIu32
+ ", expected %zu + (%u - %u) * %zu",
+ data->start_pfn, data->end_pfn, rec->length,
+ sizeof(*data), end, start, sizeof(uint64_t));
+ return -1;
+ }
+
+ if ( data->end_pfn > ctx->x86_pv.max_pfn )
+ {
+ rc = expand_p2m(ctx, data->end_pfn);
+ if ( rc )
+ return rc;
+ }
+
+ for ( x = 0; x < (end - start); ++x )
+ ctx->x86_pv.p2m_pfns[start + x] = data->p2m_pfns[x];
+
+ DPRINTF("X86_PV_P2M_FRAMES record: GFNs %#"PRIx32"->%#"PRIx32,
+ data->start_pfn, data->end_pfn);
+ return 0;
+}
+
+/*
+ * Process an X86_PV_VCPU_BASIC record from the stream.
+ */
+static int handle_x86_pv_vcpu_basic(struct context *ctx, struct record *rec)
+{
+ xc_interface *xch = ctx->xch;
+ struct rec_x86_pv_vcpu_hdr *vhdr = rec->data;
+ vcpu_guest_context_any_t vcpu;
+ size_t vcpusz = ctx->x86_pv.width == 8 ? sizeof(vcpu.x64) :
sizeof(vcpu.x32);
+ xen_pfn_t pfn, mfn;
+ unsigned long tmp;
+ unsigned i;
+ int rc = -1;
+
+ if ( rec->length <= sizeof(*vhdr) )
+ {
+ ERROR("X86_PV_VCPU_BASIC record truncated: length %"PRIu32", min %zu",
+ rec->length, sizeof(*vhdr) + 1);
+ goto err;
+ }
+ else if ( rec->length != sizeof(*vhdr) + vcpusz )
+ {
+ ERROR("X86_PV_VCPU_BASIC record wrong size: length %"PRIu32
+ ", expected %zu", rec->length, sizeof(*vhdr) + vcpusz);
+ goto err;
+ }
+ else if ( vhdr->vcpu_id > ctx->dominfo.max_vcpu_id )
+ {
+ ERROR("X86_PV_VCPU_BASIC record vcpu_id (%"PRIu32
+ ") exceeds domain max (%u)",
+ vhdr->vcpu_id, ctx->dominfo.max_vcpu_id);
+ goto err;
+ }
+
+ memcpy(&vcpu, &vhdr->context, vcpusz);
+
+ SET_FIELD(ctx, &vcpu, flags, GET_FIELD(ctx, &vcpu, flags) | VGCF_online);
+
+ /* Vcpu 0 is special: Convert the suspend record to an mfn. */
+ if ( vhdr->vcpu_id == 0 )
+ {
+ rc = process_start_info(ctx, &vcpu);
+ if ( rc )
+ return rc;
+ rc = -1;
+ }
+
+ tmp = GET_FIELD(ctx, &vcpu, gdt_ents);
+ if ( tmp > 8192 )
+ {
+ ERROR("GDT entry count (%lu) out of range", tmp);
+ errno = ERANGE;
+ goto err;
+ }
+
+ /* Convert GDT frames to mfns. */
+ for ( i = 0; (i * 512) < tmp; ++i )
+ {
+ pfn = GET_FIELD(ctx, &vcpu, gdt_frames[i]);
+ if ( pfn >= ctx->x86_pv.max_pfn )
+ {
+ ERROR("GDT frame %u (pfn %#lx) out of range", i, pfn);
+ goto err;
+ }
+ else if ( ctx->x86_pv.pfn_types[pfn] != XEN_DOMCTL_PFINFO_NOTAB )
+ {
+ ERROR("GDT frame %u (pfn %#lx) has bad type %"PRIu32, i, pfn,
+ ctx->x86_pv.pfn_types[pfn] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT);
+ goto err;
+ }
+
+ mfn = ctx->ops.pfn_to_gfn(ctx, pfn);
+ if ( !mfn_in_pseudophysmap(ctx, mfn) )
+ {
+ ERROR("GDT frame %u has bad mfn", i);
+ dump_bad_pseudophysmap_entry(ctx, mfn);
+ goto err;
+ }
+
+ SET_FIELD(ctx, &vcpu, gdt_frames[i], mfn);
+ }
+
+ /* Convert CR3 to an mfn. */
+ pfn = cr3_to_mfn(ctx, GET_FIELD(ctx, &vcpu, ctrlreg[3]));
+ if ( pfn >= ctx->x86_pv.max_pfn )
+ {
+ ERROR("cr3 (pfn %#lx) out of range", pfn);
+ goto err;
+ }
+ else if ( (ctx->x86_pv.pfn_types[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
!=
+ (((xen_pfn_t)ctx->x86_pv.levels) <<
XEN_DOMCTL_PFINFO_LTAB_SHIFT) )
+ {
+ ERROR("cr3 (pfn %#lx) has bad type %"PRIu32", expected %"PRIu32, pfn,
+ ctx->x86_pv.pfn_types[pfn] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT,
+ ctx->x86_pv.levels);
+ goto err;
+ }
+
+ mfn = ctx->ops.pfn_to_gfn(ctx, pfn);
+ if ( !mfn_in_pseudophysmap(ctx, mfn) )
+ {
+ ERROR("cr3 has bad mfn");
+ dump_bad_pseudophysmap_entry(ctx, mfn);
+ goto err;
+ }
+
+ SET_FIELD(ctx, &vcpu, ctrlreg[3], mfn_to_cr3(ctx, mfn));
+
+ /* 64bit guests: Convert CR1 (guest pagetables) to mfn. */
+ if ( ctx->x86_pv.levels == 4 && (vcpu.x64.ctrlreg[1] & 1) )
+ {
+ pfn = vcpu.x64.ctrlreg[1] >> PAGE_SHIFT;
+
+ if ( pfn >= ctx->x86_pv.max_pfn )
+ {
+ ERROR("cr1 (pfn %#lx) out of range", pfn);
+ goto err;
+ }
+ else if ( (ctx->x86_pv.pfn_types[pfn] &
XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
+ (((xen_pfn_t)ctx->x86_pv.levels) <<
XEN_DOMCTL_PFINFO_LTAB_SHIFT) )
+ {
+ ERROR("cr1 (pfn %#lx) has bad type %"PRIu32", expected %"PRIu32,
pfn,
+ ctx->x86_pv.pfn_types[pfn] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT,
+ ctx->x86_pv.levels);
+ goto err;
+ }
+
+ mfn = ctx->ops.pfn_to_gfn(ctx, pfn);
+ if ( !mfn_in_pseudophysmap(ctx, mfn) )
+ {
+ ERROR("cr1 has bad mfn");
+ dump_bad_pseudophysmap_entry(ctx, mfn);
+ goto err;
+ }
+
+ vcpu.x64.ctrlreg[1] = (uint64_t)mfn << PAGE_SHIFT;
+ }
+
+ if ( xc_vcpu_setcontext(xch, ctx->domid, vhdr->vcpu_id, &vcpu) )
+ {
+ PERROR("Failed to set vcpu%"PRIu32"'s basic info", vhdr->vcpu_id);
+ goto err;
+ }
+
+ rc = 0;
+ DPRINTF("vcpu%"PRId32" X86_PV_VCPU_BASIC record", vhdr->vcpu_id);
+ err:
+ return rc;
+}
+
+/*
+ * Process an X86_PV_VCPU_EXTENDED record from the stream.
+ */
+static int handle_x86_pv_vcpu_extended(struct context *ctx, struct record *rec)
+{
+ xc_interface *xch = ctx->xch;
+ struct rec_x86_pv_vcpu_hdr *vcpu = rec->data;
+ DECLARE_DOMCTL;
+
+ if ( rec->length <= sizeof(*vcpu) )
+ {
+ ERROR("X86_PV_VCPU_EXTENDED record truncated: length %"PRIu32", min
%zu",
+ rec->length, sizeof(*vcpu) + 1);
+ return -1;
+ }
+ else if ( rec->length > sizeof(*vcpu) + 128 )
+ {
+ ERROR("X86_PV_VCPU_EXTENDED record too long: length %"PRIu32", max
%zu",
+ rec->length, sizeof(*vcpu) + 128);
+ return -1;
+ }
+ else if ( vcpu->vcpu_id > ctx->dominfo.max_vcpu_id )
+ {
+ ERROR("X86_PV_VCPU_EXTENDED record vcpu_id (%"PRIu32
+ ") exceeds domain max (%u)",
+ vcpu->vcpu_id, ctx->dominfo.max_vcpu_id);
+ return -1;
+ }
+
+ domctl.cmd = XEN_DOMCTL_set_ext_vcpucontext;
+ domctl.domain = ctx->domid;
+ memcpy(&domctl.u.ext_vcpucontext, &vcpu->context,
+ rec->length - sizeof(*vcpu));
+
+ if ( xc_domctl(xch, &domctl) != 0 )
+ {
+ PERROR("Failed to set vcpu%"PRIu32"'s extended info", vcpu->vcpu_id);
+ return -1;
+ }
+
+ DPRINTF("vcpu%"PRId32" X86_PV_VCPU_EXTENDED record", vcpu->vcpu_id);
+ return 0;
+}
+
+/*
+ * Process an X86_PV_VCPU_XSAVE record from the stream.
+ */
+static int handle_x86_pv_vcpu_xsave(struct context *ctx, struct record *rec)
+{
+ xc_interface *xch = ctx->xch;
+ struct rec_x86_pv_vcpu_hdr *vhdr = rec->data;
+ int rc;
+ DECLARE_DOMCTL;
+ DECLARE_HYPERCALL_BUFFER(void, buffer);
+ size_t buffersz;
+
+ if ( rec->length <= sizeof(*vhdr) )
+ {
+ ERROR("X86_PV_VCPU_XSAVE record truncated: length %"PRIu32", min %zu",
+ rec->length, sizeof(*vhdr) + 1);
+ return -1;
+ }
+ else if ( vhdr->vcpu_id > ctx->dominfo.max_vcpu_id )
+ {
+ ERROR("X86_PV_VCPU_XSAVE record vcpu_id (%"PRIu32
+ ") exceeds domain max (%u)",
+ vhdr->vcpu_id, ctx->dominfo.max_vcpu_id);
+ return -1;
+ }
+
+ buffersz = rec->length - sizeof(*vhdr);
+ buffer = xc_hypercall_buffer_alloc(xch, buffer, buffersz);
+ if ( !buffer )
+ {
+ ERROR("Unable to allocate %"PRIu64" bytes for xsave hypercall buffer",
+ buffersz);
+ return -1;
+ }
+
+ domctl.cmd = XEN_DOMCTL_setvcpuextstate;
+ domctl.domain = ctx->domid;
+ domctl.u.vcpuextstate.vcpu = vhdr->vcpu_id;
+ domctl.u.vcpuextstate.size = buffersz;
+ set_xen_guest_handle(domctl.u.vcpuextstate.buffer, buffer);
+
+ memcpy(buffer, vhdr->context, buffersz);
+
+ rc = xc_domctl(xch, &domctl);
+
+ xc_hypercall_buffer_free(xch, buffer);
+
+ if ( rc )
+ PERROR("Failed to set vcpu%"PRIu32"'s xsave info", vhdr->vcpu_id);
+ else
+ DPRINTF("vcpu%"PRId32" X86_PV_VCPU_XSAVE record", vhdr->vcpu_id);
+
+ return rc;
+}
+
+/*
+ * Process an X86_PV_VCPU_MSRS record from the stream.
+ */
+static int handle_x86_pv_vcpu_msrs(struct context *ctx, struct record *rec)
+{
+ xc_interface *xch = ctx->xch;
+ struct rec_x86_pv_vcpu_hdr *vhdr = rec->data;
+ int rc;
+ DECLARE_DOMCTL;
+ DECLARE_HYPERCALL_BUFFER(void, buffer);
+ size_t buffersz = rec->length - sizeof(*vhdr);
+
+ if ( rec->length <= sizeof(*vhdr) )
+ {
+ ERROR("X86_PV_VCPU_MSRS record truncated: length %"PRIu32", min %zu",
+ rec->length, sizeof(*vhdr) + 1);
+ return -1;
+ }
+ else if ( vhdr->vcpu_id > ctx->dominfo.max_vcpu_id )
+ {
+ ERROR("X86_PV_VCPU_MSRS record vcpu_id (%"PRIu32
+ ") exceeds domain max (%u)",
+ vhdr->vcpu_id, ctx->dominfo.max_vcpu_id);
+ return -1;
+ }
+ else if ( buffersz % sizeof(xen_domctl_vcpu_msr_t) != 0 )
+ {
+ ERROR("X86_PV_VCPU_MSRS payload size %zu"
+ " expected to be a multiple of %zu",
+ buffersz, sizeof(xen_domctl_vcpu_msr_t));
+ return -1;
+ }
+
+ buffer = xc_hypercall_buffer_alloc(xch, buffer, buffersz);
+ if ( !buffer )
+ {
+ ERROR("Unable to allocate %zu bytes for msr hypercall buffer",
+ buffersz);
+ return -1;
+ }
+
+ domctl.cmd = XEN_DOMCTL_set_vcpu_msrs;
+ domctl.domain = ctx->domid;
+ domctl.u.vcpu_msrs.vcpu = vhdr->vcpu_id;
+ domctl.u.vcpu_msrs.msr_count = buffersz % sizeof(xen_domctl_vcpu_msr_t);
+ set_xen_guest_handle(domctl.u.vcpuextstate.buffer, buffer);
+
+ memcpy(buffer, vhdr->context, buffersz);
+
+ rc = xc_domctl(xch, &domctl);
+
+ xc_hypercall_buffer_free(xch, buffer);
+
+ if ( rc )
+ PERROR("Failed to set vcpu%"PRIu32"'s msrs", vhdr->vcpu_id);
+ else
+ DPRINTF("vcpu%"PRId32" X86_PV_VCPU_MSRS record", vhdr->vcpu_id);
+
+ return rc;
+}
+
+/*
+ * Process a SHARED_INFO record from the stream.
+ */
+static int handle_shared_info(struct context *ctx, struct record *rec)
+{
+ xc_interface *xch = ctx->xch;
+ unsigned i;
+ int rc = -1;
+ shared_info_any_t *guest_shared_info = NULL;
+ shared_info_any_t *stream_shared_info = rec->data;
+
+ if ( rec->length != PAGE_SIZE )
+ {
+ ERROR("X86_PV_SHARED_INFO record wrong size: length %"PRIu32
+ ", expected %lu", rec->length, PAGE_SIZE);
+ goto err;
+ }
+
+ guest_shared_info = xc_map_foreign_range(
+ xch, ctx->domid, PAGE_SIZE, PROT_READ | PROT_WRITE,
+ ctx->dominfo.shared_info_frame);
+ if ( !guest_shared_info )
+ {
+ PERROR("Failed to map Shared Info at mfn %#lx",
+ ctx->dominfo.shared_info_frame);
+ goto err;
+ }
+
+ MEMCPY_FIELD(ctx, guest_shared_info, stream_shared_info, vcpu_info);
+ MEMCPY_FIELD(ctx, guest_shared_info, stream_shared_info, arch);
+
+ SET_FIELD(ctx, guest_shared_info, arch.pfn_to_mfn_frame_list_list, 0);
+
+ MEMSET_ARRAY_FIELD(ctx, guest_shared_info, evtchn_pending, 0);
+ for ( i = 0; i < XEN_LEGACY_MAX_VCPUS; i++ )
+ SET_FIELD(ctx, guest_shared_info, vcpu_info[i].evtchn_pending_sel, 0);
+
+ MEMSET_ARRAY_FIELD(ctx, guest_shared_info, evtchn_mask, 0xff);
+
+ rc = 0;
+ err:
+
+ if ( guest_shared_info )
+ munmap(guest_shared_info, PAGE_SIZE);
+
+ return rc;
+}
+
+/*
+ * restore_ops function. Convert pfns back to mfns in pagetables. Possibly
+ * needs to populate new frames if a PTE is found referring to a frame which
+ * hasn't yet been seen from PAGE_DATA records.
+ */
+static int x86_pv_localise_page(struct context *ctx, uint32_t type, void *page)
+{
+ xc_interface *xch = ctx->xch;
+ uint64_t *table = page;
+ uint64_t pte;
+ unsigned i;
+
+ type &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
+
+ /* Only page tables need localisation. */
+ if ( type < XEN_DOMCTL_PFINFO_L1TAB || type > XEN_DOMCTL_PFINFO_L4TAB )
+ return 0;
+
+ for ( i = 0; i < (PAGE_SIZE / sizeof(uint64_t)); ++i )
+ {
+ pte = table[i];
+
+ if ( pte & _PAGE_PRESENT )
+ {
+ xen_pfn_t mfn, pfn;
+
+ pfn = pte_to_frame(ctx, pte);
+ mfn = ctx->ops.pfn_to_gfn(ctx, pfn);
+
+ if ( mfn == INVALID_MFN )
+ {
+ if ( populate_pfns(ctx, 1, &pfn, &type) )
+ return -1;
+
+ mfn = ctx->ops.pfn_to_gfn(ctx, pfn);
+ }
+
+ if ( !mfn_in_pseudophysmap(ctx, mfn) )
+ {
+ ERROR("Bad mfn for L%"PRIu32"[%u]",
+ type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT, i);
+ dump_bad_pseudophysmap_entry(ctx, mfn);
+ errno = ERANGE;
+ return -1;
+ }
+
+ update_pte(ctx, &pte, mfn);
+
+ table[i] = pte;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * restore_ops function. Confirm that the incoming stream matches the type of
+ * domain we are attempting to restore into.
+ */
+static int x86_pv_setup(struct context *ctx)
+{
+ xc_interface *xch = ctx->xch;
+ int rc;
+
+ if ( ctx->restore.guest_type != DHDR_TYPE_X86_PV )
+ {
+ ERROR("Unable to restore %s domain into an x86_pv domain",
+ dhdr_type_to_str(ctx->restore.guest_type));
+ return -1;
+ }
+ else if ( ctx->restore.guest_page_size != PAGE_SIZE )
+ {
+ ERROR("Invalid page size %d for x86_pv domains",
+ ctx->restore.guest_page_size);
+ return -1;
+ }
+
+ rc = x86_pv_domain_info(ctx);
+ if ( rc )
+ return rc;
+
+ rc = x86_pv_map_m2p(ctx);
+ if ( rc )
+ return rc;
+
+ return rc;
+}
+
+/*
+ * restore_ops function.
+ */
+static int x86_pv_process_record(struct context *ctx, struct record *rec)
+{
+ xc_interface *xch = ctx->xch;
+
+ switch ( rec->type )
+ {
+ case REC_TYPE_X86_PV_INFO:
+ return handle_x86_pv_info(ctx, rec);
+
+ case REC_TYPE_X86_PV_P2M_FRAMES:
+ return handle_x86_pv_p2m_frames(ctx, rec);
+
+ case REC_TYPE_X86_PV_VCPU_BASIC:
+ return handle_x86_pv_vcpu_basic(ctx, rec);
+
+ case REC_TYPE_X86_PV_VCPU_EXTENDED:
+ return handle_x86_pv_vcpu_extended(ctx, rec);
+
+ case REC_TYPE_X86_PV_VCPU_XSAVE:
+ return handle_x86_pv_vcpu_xsave(ctx, rec);
+
+ case REC_TYPE_SHARED_INFO:
+ return handle_shared_info(ctx, rec);
+
+ case REC_TYPE_TOOLSTACK:
+ return handle_toolstack(ctx, rec);
+
+ case REC_TYPE_TSC_INFO:
+ return handle_tsc_info(ctx, rec);
+
+ case REC_TYPE_X86_PV_VCPU_MSRS:
+ return handle_x86_pv_vcpu_msrs(ctx, rec);
+
+ default:
+ if ( rec->type & REC_TYPE_OPTIONAL )
+ {
+ IPRINTF("Ignoring optional record (0x%"PRIx32", %s)",
+ rec->type, rec_type_to_str(rec->type));
+ return 0;
+ }
+
+ ERROR("Invalid record type (0x%"PRIx32", %s) for x86_pv domains",
+ rec->type, rec_type_to_str(rec->type));
+ return -1;
+ }
+}
+
+/*
+ * restore_ops function. Pin the pagetables, rewrite the p2m and seed the
+ * grant table.
+ */
+static int x86_pv_stream_complete(struct context *ctx)
+{
+ xc_interface *xch = ctx->xch;
+ int rc;
+
+ rc = pin_pagetables(ctx);
+ if ( rc )
+ return rc;
+
+ rc = update_guest_p2m(ctx);
+ if ( rc )
+ return rc;
+
+ rc = xc_dom_gnttab_seed(xch, ctx->domid,
+ ctx->restore.console_mfn,
+ ctx->restore.xenstore_mfn,
+ ctx->restore.console_domid,
+ ctx->restore.xenstore_domid);
+ if ( rc )
+ {
+ PERROR("Failed to seed grant table");
+ return rc;
+ }
+
+ return rc;
+}
+
+/*
+ * restore_ops function.
+ */
+static int x86_pv_cleanup(struct context *ctx)
+{
+ free(ctx->x86_pv.p2m);
+ free(ctx->x86_pv.p2m_pfns);
+ free(ctx->x86_pv.pfn_types);
+
+ if ( ctx->x86_pv.m2p )
+ munmap(ctx->x86_pv.m2p, ctx->x86_pv.nr_m2p_frames * PAGE_SIZE);
+
+ return 0;
+}
+
+struct restore_ops restore_ops_x86_pv =
+{
+ .localise_page = x86_pv_localise_page,
+ .setup = x86_pv_setup,
+ .process_record = x86_pv_process_record,
+ .stream_complete = x86_pv_stream_complete,
+ .cleanup = x86_pv_cleanup,
+};
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--
1.7.10.4
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |