[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] [TOOLS] Make xc_domain_{save, restore} understand compat guests
# HG changeset patch # User Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx> # Date 1190220263 -3600 # Node ID 4c8394e3b011a30b94ad61454be4d9e38642e033 # Parent 177ebf350b4c37a5ed83ac1475d2ebd1f482f926 [TOOLS] Make xc_domain_{save, restore} understand compat guests Signed-off-by: Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx> --- tools/libxc/xc_core.c | 3 tools/libxc/xc_core_x86.c | 3 tools/libxc/xc_domain_restore.c | 244 ++++++++++++++++++++++------------------ tools/libxc/xc_domain_save.c | 231 +++++++++++++++++++++++-------------- tools/libxc/xc_resume.c | 13 -- tools/libxc/xg_private.h | 26 ++-- tools/libxc/xg_save_restore.h | 83 +++++++++++-- 7 files changed, 377 insertions(+), 226 deletions(-) diff -r 177ebf350b4c -r 4c8394e3b011 tools/libxc/xc_core.c --- a/tools/libxc/xc_core.c Wed Sep 19 15:42:56 2007 +0100 +++ b/tools/libxc/xc_core.c Wed Sep 19 17:44:23 2007 +0100 @@ -57,6 +57,9 @@ /* number of pages to write at a time */ #define DUMP_INCREMENT (4 * 1024) + +/* Don't yet support cross-address-size core dump */ +#define guest_width (sizeof (unsigned long)) /* string table */ struct xc_core_strtab { diff -r 177ebf350b4c -r 4c8394e3b011 tools/libxc/xc_core_x86.c --- a/tools/libxc/xc_core_x86.c Wed Sep 19 15:42:56 2007 +0100 +++ b/tools/libxc/xc_core_x86.c Wed Sep 19 17:44:23 2007 +0100 @@ -20,6 +20,9 @@ #include "xg_private.h" #include "xc_core.h" + +/* Don't yet support cross-address-size core dump */ +#define guest_width (sizeof (unsigned long)) static int nr_gpfns(int xc_handle, domid_t domid) { diff -r 177ebf350b4c -r 4c8394e3b011 tools/libxc/xc_domain_restore.c --- a/tools/libxc/xc_domain_restore.c Wed Sep 19 15:42:56 2007 +0100 +++ b/tools/libxc/xc_domain_restore.c Wed Sep 19 17:44:23 2007 +0100 @@ -56,6 +56,10 @@ static xen_pfn_t *p2m = NULL; /* A table of P2M mappings in the current region */ static xen_pfn_t *p2m_batch = NULL; +/* Address size of the guest, in bytes */ +unsigned int guest_width; + + static ssize_t read_exact(int fd, void *buf, size_t count) { @@ -168,22 +172,17 @@ static xen_pfn_t *load_p2m_frame_list(in static xen_pfn_t *load_p2m_frame_list(int io_fd, int *pae_extended_cr3) { xen_pfn_t *p2m_frame_list; - vcpu_guest_context_t ctxt; - - if ( (p2m_frame_list = malloc(P2M_FL_SIZE)) == NULL ) - { - ERROR("Couldn't allocate p2m_frame_list array"); - return NULL; - } - + vcpu_guest_context_either_t ctxt; + xen_pfn_t p2m_fl_zero; + /* Read first entry of P2M list, or extended-info signature (~0UL). */ - if ( !read_exact(io_fd, p2m_frame_list, sizeof(long)) ) + if ( !read_exact(io_fd, &p2m_fl_zero, sizeof(long)) ) { ERROR("read extended-info signature failed"); return NULL; } - if ( p2m_frame_list[0] == ~0UL ) + if ( p2m_fl_zero == ~0UL ) { uint32_t tot_bytes; @@ -211,25 +210,42 @@ static xen_pfn_t *load_p2m_frame_list(in /* VCPU context structure? */ if ( !strncmp(chunk_sig, "vcpu", 4) ) { - if ( !read_exact(io_fd, &ctxt, sizeof(ctxt)) ) + /* Pick a guest word-size and PT depth from the ctxt size */ + if ( chunk_bytes == sizeof (ctxt.x32) ) + { + guest_width = 4; + if ( pt_levels > 2 ) + pt_levels = 3; + } + else if ( chunk_bytes == sizeof (ctxt.x64) ) + { + guest_width = 8; + pt_levels = 4; + } + else + { + ERROR("bad extended-info context size %d", chunk_bytes); + return NULL; + } + + if ( !read_exact(io_fd, &ctxt, chunk_bytes) ) { ERROR("read extended-info vcpu context failed"); return NULL; } - tot_bytes -= sizeof(struct vcpu_guest_context); - chunk_bytes -= sizeof(struct vcpu_guest_context); - - if ( ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3) ) + tot_bytes -= chunk_bytes; + chunk_bytes = 0; + + if ( GET_FIELD(&ctxt, vm_assist) + & (1UL << VMASST_TYPE_pae_extended_cr3) ) *pae_extended_cr3 = 1; } /* Any remaining bytes of this chunk: read and discard. */ while ( chunk_bytes ) { - unsigned long sz = chunk_bytes; - if ( sz > P2M_FL_SIZE ) - sz = P2M_FL_SIZE; - if ( !read_exact(io_fd, p2m_frame_list, sz) ) + unsigned long sz = MIN(chunk_bytes, sizeof(xen_pfn_t)); + if ( !read_exact(io_fd, &p2m_fl_zero, sz) ) { ERROR("read-and-discard extended-info chunk bytes failed"); return NULL; @@ -240,15 +256,25 @@ static xen_pfn_t *load_p2m_frame_list(in } /* Now read the real first entry of P2M list. */ - if ( !read_exact(io_fd, p2m_frame_list, sizeof(long)) ) + if ( !read_exact(io_fd, &p2m_fl_zero, sizeof(xen_pfn_t)) ) { ERROR("read first entry of p2m_frame_list failed"); return NULL; } } - /* First entry is already read into the p2m array. */ - if ( !read_exact(io_fd, &p2m_frame_list[1], P2M_FL_SIZE - sizeof(long)) ) + /* Now that we know the guest's word-size, can safely allocate + * the p2m frame list */ + if ( (p2m_frame_list = malloc(P2M_FL_SIZE)) == NULL ) + { + ERROR("Couldn't allocate p2m_frame_list array"); + return NULL; + } + + /* First entry has already been read. */ + p2m_frame_list[0] = p2m_fl_zero; + if ( !read_exact(io_fd, &p2m_frame_list[1], + (P2M_FL_ENTRIES - 1) * sizeof(xen_pfn_t)) ) { ERROR("read p2m_frame_list failed"); return NULL; @@ -272,11 +298,11 @@ int xc_domain_restore(int xc_handle, int /* The new domain's shared-info frame number. */ unsigned long shared_info_frame; unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */ - shared_info_t *old_shared_info = (shared_info_t *)shared_info_page; - shared_info_t *new_shared_info; + shared_info_either_t *old_shared_info = (shared_info_either_t *)shared_info_page; + shared_info_either_t *new_shared_info; /* A copy of the CPU context of the guest. */ - vcpu_guest_context_t ctxt; + vcpu_guest_context_either_t ctxt; /* A table containing the type of each PFN (/not/ MFN!). */ unsigned long *pfn_type = NULL; @@ -291,7 +317,7 @@ int xc_domain_restore(int xc_handle, int xen_pfn_t *p2m_frame_list = NULL; /* A temporary mapping of the guest's start_info page. */ - start_info_t *start_info; + start_info_either_t *start_info; /* Our mapping of the current region (batch) */ char *region_base; @@ -324,16 +350,38 @@ int xc_domain_restore(int xc_handle, int } DPRINTF("xc_domain_restore start: p2m_size = %lx\n", p2m_size); - if ( !hvm ) - { - /* - * XXX For now, 32bit dom0's can only save/restore 32bit domUs - * on 64bit hypervisors. - */ + if ( !get_platform_info(xc_handle, dom, + &max_mfn, &hvirt_start, &pt_levels, &guest_width) ) + { + ERROR("Unable to get platform info."); + return 1; + } + + /* The *current* word size of the guest isn't very interesting; for now + * assume the guest will be the same as we are. We'll fix that later + * if we discover otherwise. */ + guest_width = sizeof(unsigned long); + pt_levels = (guest_width == 8) ? 4 : (pt_levels == 2) ? 2 : 3; + + if ( lock_pages(&ctxt, sizeof(ctxt)) ) + { + /* needed for build domctl, but might as well do early */ + ERROR("Unable to lock ctxt"); + return 1; + } + + if ( !hvm ) + { + /* Load the p2m frame list, plus potential extended info chunk */ + p2m_frame_list = load_p2m_frame_list(io_fd, &pae_extended_cr3); + if ( !p2m_frame_list ) + goto out; + + /* Now that we know the word size, tell Xen about it */ memset(&domctl, 0, sizeof(domctl)); domctl.domain = dom; domctl.cmd = XEN_DOMCTL_set_address_size; - domctl.u.address_size.size = sizeof(unsigned long) * 8; + domctl.u.address_size.size = guest_width * 8; rc = do_domctl(xc_handle, &domctl); if ( rc != 0 ) { @@ -343,30 +391,8 @@ int xc_domain_restore(int xc_handle, int rc = 1; } - if ( !get_platform_info(xc_handle, dom, - &max_mfn, &hvirt_start, &pt_levels) ) - { - ERROR("Unable to get platform info."); - return 1; - } - - if ( lock_pages(&ctxt, sizeof(ctxt)) ) - { - /* needed for build domctl, but might as well do early */ - ERROR("Unable to lock ctxt"); - return 1; - } - - /* Load the p2m frame list, plus potential extended info chunk */ - if ( !hvm ) - { - p2m_frame_list = load_p2m_frame_list(io_fd, &pae_extended_cr3); - if ( !p2m_frame_list ) - goto out; - } - /* We want zeroed memory so use calloc rather than malloc. */ - p2m = calloc(p2m_size, sizeof(xen_pfn_t)); + p2m = calloc(p2m_size, MAX(guest_width, sizeof (xen_pfn_t))); pfn_type = calloc(p2m_size, sizeof(unsigned long)); region_mfn = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t)); p2m_batch = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t)); @@ -963,14 +989,16 @@ int xc_domain_restore(int xc_handle, int if ( !(vcpumap & (1ULL << i)) ) continue; - if ( !read_exact(io_fd, &ctxt, sizeof(ctxt)) ) + if ( !read_exact(io_fd, &ctxt, ((guest_width == 8) + ? sizeof(ctxt.x64) + : sizeof(ctxt.x32))) ) { ERROR("Error when reading ctxt %d", i); goto out; } if ( !new_ctxt_format ) - ctxt.flags |= VGCF_online; + SET_FIELD(&ctxt, flags, GET_FIELD(&ctxt, flags) | VGCF_online); if ( i == 0 ) { @@ -978,48 +1006,49 @@ int xc_domain_restore(int xc_handle, int * Uncanonicalise the suspend-record frame number and poke * resume record. */ - pfn = ctxt.user_regs.edx; + pfn = GET_FIELD(&ctxt, user_regs.edx); if ( (pfn >= p2m_size) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB) ) { ERROR("Suspend record frame number is bad"); goto out; } - ctxt.user_regs.edx = mfn = p2m[pfn]; + mfn = p2m[pfn]; + SET_FIELD(&ctxt, user_regs.edx, mfn); start_info = xc_map_foreign_range( xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn); - start_info->nr_pages = p2m_size; - start_info->shared_info = shared_info_frame << PAGE_SHIFT; - start_info->flags = 0; - *store_mfn = start_info->store_mfn = p2m[start_info->store_mfn]; - start_info->store_evtchn = store_evtchn; - start_info->console.domU.mfn = p2m[start_info->console.domU.mfn]; - start_info->console.domU.evtchn = console_evtchn; - *console_mfn = start_info->console.domU.mfn; + SET_FIELD(start_info, nr_pages, p2m_size); + SET_FIELD(start_info, shared_info, shared_info_frame<<PAGE_SHIFT); + SET_FIELD(start_info, flags, 0); + *store_mfn = p2m[GET_FIELD(start_info, store_mfn)]; + SET_FIELD(start_info, store_mfn, *store_mfn); + SET_FIELD(start_info, store_evtchn, store_evtchn); + *console_mfn = p2m[GET_FIELD(start_info, console.domU.mfn)]; + SET_FIELD(start_info, console.domU.mfn, *console_mfn); + SET_FIELD(start_info, console.domU.evtchn, console_evtchn); munmap(start_info, PAGE_SIZE); } - /* Uncanonicalise each GDT frame number. */ - if ( ctxt.gdt_ents > 8192 ) + if ( GET_FIELD(&ctxt, gdt_ents) > 8192 ) { ERROR("GDT entry count out of range"); goto out; } - for ( j = 0; (512*j) < ctxt.gdt_ents; j++ ) - { - pfn = ctxt.gdt_frames[j]; + for ( j = 0; (512*j) < GET_FIELD(&ctxt, gdt_ents); j++ ) + { + pfn = GET_FIELD(&ctxt, gdt_frames[j]); if ( (pfn >= p2m_size) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB) ) { - ERROR("GDT frame number is bad"); + ERROR("GDT frame number %i (0x%lx) is bad", + j, (unsigned long)pfn); goto out; } - ctxt.gdt_frames[j] = p2m[pfn]; - } - + SET_FIELD(&ctxt, gdt_frames[j], p2m[pfn]); + } /* Uncanonicalise the page table base pointer. */ - pfn = xen_cr3_to_pfn(ctxt.ctrlreg[3]); + pfn = xen_cr3_to_pfn(GET_FIELD(&ctxt, ctrlreg[3])); if ( pfn >= p2m_size ) { @@ -1036,21 +1065,18 @@ int xc_domain_restore(int xc_handle, int (unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT); goto out; } - - ctxt.ctrlreg[3] = xen_pfn_to_cr3(p2m[pfn]); + SET_FIELD(&ctxt, ctrlreg[3], xen_pfn_to_cr3(p2m[pfn])); /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */ - if ( (pt_levels == 4) && ctxt.ctrlreg[1] ) - { - pfn = xen_cr3_to_pfn(ctxt.ctrlreg[1]); - + if ( (pt_levels == 4) && (ctxt.x64.ctrlreg[1] & 1) ) + { + pfn = xen_cr3_to_pfn(ctxt.x64.ctrlreg[1] & ~1); if ( pfn >= p2m_size ) { - ERROR("User PT base is bad: pfn=%lu p2m_size=%lu type=%08lx", - pfn, p2m_size, pfn_type[pfn]); + ERROR("User PT base is bad: pfn=%lu p2m_size=%lu", + pfn, p2m_size); goto out; } - if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) != ((unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) { @@ -1059,14 +1085,12 @@ int xc_domain_restore(int xc_handle, int (unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT); goto out; } - - ctxt.ctrlreg[1] = xen_pfn_to_cr3(p2m[pfn]); - } - + ctxt.x64.ctrlreg[1] = xen_pfn_to_cr3(p2m[pfn]); + } domctl.cmd = XEN_DOMCTL_setvcpucontext; domctl.domain = (domid_t)dom; domctl.u.vcpucontext.vcpu = i; - set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt); + set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt.c); rc = xc_domctl(xc_handle, &domctl); if ( rc != 0 ) { @@ -1087,22 +1111,16 @@ int xc_domain_restore(int xc_handle, int xc_handle, dom, PAGE_SIZE, PROT_WRITE, shared_info_frame); /* restore saved vcpu_info and arch specific info */ - memcpy(&new_shared_info->vcpu_info, - &old_shared_info->vcpu_info, - sizeof(new_shared_info->vcpu_info)); - memcpy(&new_shared_info->arch, - &old_shared_info->arch, - sizeof(new_shared_info->arch)); + MEMCPY_FIELD(new_shared_info, old_shared_info, vcpu_info); + MEMCPY_FIELD(new_shared_info, old_shared_info, arch); /* clear any pending events and the selector */ - memset(&(new_shared_info->evtchn_pending[0]), 0, - sizeof (new_shared_info->evtchn_pending)); + MEMSET_ARRAY_FIELD(new_shared_info, evtchn_pending, 0); for ( i = 0; i < MAX_VIRT_CPUS; i++ ) - new_shared_info->vcpu_info[i].evtchn_pending_sel = 0; + SET_FIELD(new_shared_info, vcpu_info[i].evtchn_pending_sel, 0); /* mask event channels */ - memset(&(new_shared_info->evtchn_mask[0]), 0xff, - sizeof (new_shared_info->evtchn_mask)); + MEMSET_ARRAY_FIELD(new_shared_info, evtchn_mask, 0xff); /* leave wallclock time. set by hypervisor */ munmap(new_shared_info, PAGE_SIZE); @@ -1113,10 +1131,9 @@ int xc_domain_restore(int xc_handle, int pfn = p2m_frame_list[i]; if ( (pfn >= p2m_size) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB) ) { - ERROR("PFN-to-MFN frame number is bad"); - goto out; - } - + ERROR("PFN-to-MFN frame number %i (%#lx) is bad", i, pfn); + goto out; + } p2m_frame_list[i] = p2m[pfn]; } @@ -1128,8 +1145,17 @@ int xc_domain_restore(int xc_handle, int goto out; } - memcpy(live_p2m, p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT)); - munmap(live_p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT)); + /* If the domain we're restoring has a different word size to ours, + * we need to repack the p2m appropriately */ + if ( guest_width > sizeof (xen_pfn_t) ) + for ( i = p2m_size - 1; i >= 0; i-- ) + ((uint64_t *)p2m)[i] = p2m[i]; + else if ( guest_width > sizeof (xen_pfn_t) ) + for ( i = 0; i < p2m_size; i++ ) + ((uint32_t *)p2m)[i] = p2m[i]; + + memcpy(live_p2m, p2m, ROUNDUP(p2m_size * guest_width, PAGE_SHIFT)); + munmap(live_p2m, ROUNDUP(p2m_size * guest_width, PAGE_SHIFT)); DPRINTF("Domain ready to be built.\n"); rc = 0; diff -r 177ebf350b4c -r 4c8394e3b011 tools/libxc/xc_domain_save.c --- a/tools/libxc/xc_domain_save.c Wed Sep 19 15:42:56 2007 +0100 +++ b/tools/libxc/xc_domain_save.c Wed Sep 19 17:44:23 2007 +0100 @@ -54,9 +54,17 @@ static xen_pfn_t *live_m2p = NULL; static xen_pfn_t *live_m2p = NULL; static unsigned long m2p_mfn0; +/* Address size of the guest */ +unsigned int guest_width; + /* grep fodder: machine_to_phys */ -#define mfn_to_pfn(_mfn) live_m2p[(_mfn)] +#define mfn_to_pfn(_mfn) (live_m2p[(_mfn)]) + +#define pfn_to_mfn(_pfn) \ + ((xen_pfn_t) ((guest_width==8) \ + ? (((uint64_t *)live_p2m)[(_pfn)]) \ + : (((uint32_t *)live_p2m)[(_pfn)]))) /* * Returns TRUE if the given machine frame number has a unique mapping @@ -65,19 +73,7 @@ static unsigned long m2p_mfn0; #define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn) \ (((_mfn) < (max_mfn)) && \ ((mfn_to_pfn(_mfn) < (p2m_size)) && \ - (live_p2m[mfn_to_pfn(_mfn)] == (_mfn)))) - -/* Returns TRUE if MFN is successfully converted to a PFN. */ -#define translate_mfn_to_pfn(_pmfn) \ -({ \ - unsigned long mfn = *(_pmfn); \ - int _res = 1; \ - if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) ) \ - _res = 0; \ - else \ - *(_pmfn) = mfn_to_pfn(mfn); \ - _res; \ -}) + (pfn_to_mfn(mfn_to_pfn(_mfn)) == (_mfn)))) /* ** During (live) save/migrate, we maintain a number of bitmaps to track @@ -451,22 +447,25 @@ static int suspend_and_state(int (*suspe ** it to update the MFN to a reasonable value. */ static void *map_frame_list_list(int xc_handle, uint32_t dom, - shared_info_t *shinfo) + shared_info_either_t *shinfo) { int count = 100; void *p; - - while ( count-- && (shinfo->arch.pfn_to_mfn_frame_list_list == 0) ) + uint64_t fll = GET_FIELD(shinfo, arch.pfn_to_mfn_frame_list_list); + + while ( count-- && (fll == 0) ) + { usleep(10000); - - if ( shinfo->arch.pfn_to_mfn_frame_list_list == 0 ) + fll = GET_FIELD(shinfo, arch.pfn_to_mfn_frame_list_list); + } + + if ( fll == 0 ) { ERROR("Timed out waiting for frame list updated."); return NULL; } - p = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ, - shinfo->arch.pfn_to_mfn_frame_list_list); + p = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ, fll); if ( p == NULL ) ERROR("Couldn't map p2m_frame_list_list (errno %d)", errno); @@ -659,15 +658,16 @@ static xen_pfn_t *map_and_save_p2m_table int io_fd, uint32_t dom, unsigned long p2m_size, - shared_info_t *live_shinfo) -{ - vcpu_guest_context_t ctxt; + shared_info_either_t *live_shinfo) +{ + vcpu_guest_context_either_t ctxt; /* Double and single indirect references to the live P2M table */ - xen_pfn_t *live_p2m_frame_list_list = NULL; - xen_pfn_t *live_p2m_frame_list = NULL; - - /* A copy of the pfn-to-mfn table frame list. */ + void *live_p2m_frame_list_list = NULL; + void *live_p2m_frame_list = NULL; + + /* Copies of the above. */ + xen_pfn_t *p2m_frame_list_list = NULL; xen_pfn_t *p2m_frame_list = NULL; /* The mapping of the live p2m table itself */ @@ -680,15 +680,50 @@ static xen_pfn_t *map_and_save_p2m_table if ( !live_p2m_frame_list_list ) goto out; + /* Get a local copy of the live_P2M_frame_list_list */ + if ( !(p2m_frame_list_list = malloc(PAGE_SIZE)) ) + { + ERROR("Couldn't allocate p2m_frame_list_list array"); + goto out; + } + memcpy(p2m_frame_list_list, live_p2m_frame_list_list, PAGE_SIZE); + + /* Canonicalize guest's unsigned long vs ours */ + if ( guest_width > sizeof(unsigned long) ) + for ( i = 0; i < PAGE_SIZE/sizeof(unsigned long); i++ ) + if ( i < PAGE_SIZE/guest_width ) + p2m_frame_list_list[i] = ((uint64_t *)p2m_frame_list_list)[i]; + else + p2m_frame_list_list[i] = 0; + else if ( guest_width < sizeof(unsigned long) ) + for ( i = PAGE_SIZE/sizeof(unsigned long) - 1; i >= 0; i++ ) + p2m_frame_list_list[i] = ((uint32_t *)p2m_frame_list_list)[i]; + live_p2m_frame_list = xc_map_foreign_batch(xc_handle, dom, PROT_READ, - live_p2m_frame_list_list, + p2m_frame_list_list, P2M_FLL_ENTRIES); if ( !live_p2m_frame_list ) { ERROR("Couldn't map p2m_frame_list"); goto out; } + + /* Get a local copy of the live_P2M_frame_list */ + if ( !(p2m_frame_list = malloc(P2M_FL_SIZE)) ) + { + ERROR("Couldn't allocate p2m_frame_list array"); + goto out; + } + memcpy(p2m_frame_list, live_p2m_frame_list, P2M_FL_SIZE); + + /* Canonicalize guest's unsigned long vs ours */ + if ( guest_width > sizeof(unsigned long) ) + for ( i = 0; i < P2M_FL_ENTRIES; i++ ) + p2m_frame_list[i] = ((uint64_t *)p2m_frame_list)[i]; + else if ( guest_width < sizeof(unsigned long) ) + for ( i = P2M_FL_ENTRIES - 1; i >= 0; i++ ) + p2m_frame_list[i] = ((uint32_t *)p2m_frame_list)[i]; /* Map all the frames of the pfn->mfn table. For migrate to succeed, @@ -697,7 +732,7 @@ static xen_pfn_t *map_and_save_p2m_table from a safety POV anyhow. */ p2m = xc_map_foreign_batch(xc_handle, dom, PROT_READ, - live_p2m_frame_list, + p2m_frame_list, P2M_FL_ENTRIES); if ( !p2m ) { @@ -706,27 +741,30 @@ static xen_pfn_t *map_and_save_p2m_table } live_p2m = p2m; /* So that translation macros will work */ - /* Get a local copy of the live_P2M_frame_list */ - if ( !(p2m_frame_list = malloc(P2M_FL_SIZE)) ) - { - ERROR("Couldn't allocate p2m_frame_list array"); - goto out; - } - memcpy(p2m_frame_list, live_p2m_frame_list, P2M_FL_SIZE); - /* Canonicalise the pfn-to-mfn table frame-number list. */ - for ( i = 0; i < p2m_size; i += fpp ) - { - if ( !translate_mfn_to_pfn(&p2m_frame_list[i/fpp]) ) + for ( i = 0; i < p2m_size; i += FPP ) + { + if ( !MFN_IS_IN_PSEUDOPHYS_MAP(p2m_frame_list[i/FPP]) ) { ERROR("Frame# in pfn-to-mfn frame list is not in pseudophys"); - ERROR("entry %d: p2m_frame_list[%ld] is 0x%"PRIx64, i, i/fpp, - (uint64_t)p2m_frame_list[i/fpp]); - goto out; - } - } - - if ( xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt) ) + ERROR("entry %d: p2m_frame_list[%ld] is 0x%"PRIx64", max 0x%lx", + i, i/FPP, (uint64_t)p2m_frame_list[i/FPP], max_mfn); + if ( p2m_frame_list[i/FPP] < max_mfn ) + { + ERROR("m2p[0x%"PRIx64"] = 0x%"PRIx64, + (uint64_t)p2m_frame_list[i/FPP], + (uint64_t)live_m2p[p2m_frame_list[i/FPP]]); + ERROR("p2m[0x%"PRIx64"] = 0x%"PRIx64, + (uint64_t)live_m2p[p2m_frame_list[i/FPP]], + (uint64_t)p2m[live_m2p[p2m_frame_list[i/FPP]]]); + + } + goto out; + } + p2m_frame_list[i/FPP] = mfn_to_pfn(p2m_frame_list[i/FPP]); + } + + if ( xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt.c) ) { ERROR("Could not get vcpu context"); goto out; @@ -737,25 +775,26 @@ static xen_pfn_t *map_and_save_p2m_table * a PAE guest understands extended CR3 (PDPTs above 4GB). Turns off * slow paths in the restore code. */ - if ( (pt_levels == 3) && - (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3)) ) { unsigned long signature = ~0UL; - uint32_t tot_sz = sizeof(struct vcpu_guest_context) + 8; - uint32_t chunk_sz = sizeof(struct vcpu_guest_context); + uint32_t chunk_sz = ((guest_width==8) + ? sizeof(ctxt.x64) + : sizeof(ctxt.x32)); + uint32_t tot_sz = chunk_sz + 8; char chunk_sig[] = "vcpu"; if ( !write_exact(io_fd, &signature, sizeof(signature)) || !write_exact(io_fd, &tot_sz, sizeof(tot_sz)) || !write_exact(io_fd, &chunk_sig, 4) || !write_exact(io_fd, &chunk_sz, sizeof(chunk_sz)) || - !write_exact(io_fd, &ctxt, sizeof(ctxt)) ) + !write_exact(io_fd, &ctxt, chunk_sz) ) { ERROR("write: extended info"); goto out; } } - if ( !write_exact(io_fd, p2m_frame_list, P2M_FL_SIZE) ) + if ( !write_exact(io_fd, p2m_frame_list, + P2M_FL_ENTRIES * sizeof(xen_pfn_t)) ) { ERROR("write: p2m_frame_list"); goto out; @@ -774,6 +813,9 @@ static xen_pfn_t *map_and_save_p2m_table if ( live_p2m_frame_list ) munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE); + if ( p2m_frame_list_list ) + free(p2m_frame_list_list); + if ( p2m_frame_list ) free(p2m_frame_list); @@ -798,7 +840,7 @@ int xc_domain_save(int xc_handle, int io unsigned long shared_info_frame; /* A copy of the CPU context of the guest. */ - vcpu_guest_context_t ctxt; + vcpu_guest_context_either_t ctxt; /* A table containing the type of each PFN (/not/ MFN!). */ unsigned long *pfn_type = NULL; @@ -808,7 +850,7 @@ int xc_domain_save(int xc_handle, int io char page[PAGE_SIZE]; /* Live mapping of shared info structure */ - shared_info_t *live_shinfo = NULL; + shared_info_either_t *live_shinfo = NULL; /* base of the region in which domain memory is mapped */ unsigned char *region_base = NULL; @@ -836,6 +878,8 @@ int xc_domain_save(int xc_handle, int io /* HVM: magic frames for ioreqs and xenstore comms. */ uint64_t magic_pfns[3]; /* ioreq_pfn, bufioreq_pfn, store_pfn */ + unsigned long mfn; + /* If no explicit control parameters given, use defaults */ max_iters = max_iters ? : DEF_MAX_ITERS; max_factor = max_factor ? : DEF_MAX_FACTOR; @@ -843,7 +887,7 @@ int xc_domain_save(int xc_handle, int io initialize_mbit_rate(); if ( !get_platform_info(xc_handle, dom, - &max_mfn, &hvirt_start, &pt_levels) ) + &max_mfn, &hvirt_start, &pt_levels, &guest_width) ) { ERROR("Unable to get platform info."); return 1; @@ -1006,7 +1050,6 @@ int xc_domain_save(int xc_handle, int io if ( !hvm ) { int err = 0; - unsigned long mfn; /* Map the P2M table, and write the list of P2M frames */ live_p2m = map_and_save_p2m_table(xc_handle, io_fd, dom, @@ -1023,7 +1066,7 @@ int xc_domain_save(int xc_handle, int io for ( i = 0; i < p2m_size; i++ ) { - mfn = live_p2m[i]; + mfn = pfn_to_mfn(i); if( (mfn != INVALID_P2M_ENTRY) && (mfn_to_pfn(mfn) != i) ) { DPRINTF("i=0x%x mfn=%lx live_m2p=%lx\n", i, @@ -1083,11 +1126,16 @@ int xc_domain_save(int xc_handle, int io int n = permute(N, p2m_size, order_nr); if ( debug ) - DPRINTF("%d pfn= %08lx mfn= %08lx %d [mfn]= %08lx\n", - iter, (unsigned long)n, hvm ? 0 : live_p2m[n], - test_bit(n, to_send), - hvm ? 0 : mfn_to_pfn(live_p2m[n]&0xFFFFF)); - + { + DPRINTF("%d pfn= %08lx mfn= %08lx %d", + iter, (unsigned long)n, + hvm ? 0 : pfn_to_mfn(n), + test_bit(n, to_send)); + if ( !hvm && is_mapped(pfn_to_mfn(n)) ) + DPRINTF(" [mfn]= %08lx", + mfn_to_pfn(pfn_to_mfn(n)&0xFFFFF)); + DPRINTF("\n"); + } if ( !last_iter && test_bit(n, to_send) && test_bit(n, to_skip) ) @@ -1118,7 +1166,7 @@ int xc_domain_save(int xc_handle, int io if ( hvm ) pfn_type[batch] = n; else - pfn_type[batch] = live_p2m[n]; + pfn_type[batch] = pfn_to_mfn(n); if ( !is_mapped(pfn_type[batch]) ) { @@ -1451,7 +1499,7 @@ int xc_domain_save(int xc_handle, int io for ( i = 0, j = 0; i < p2m_size; i++ ) { - if ( !is_mapped(live_p2m[i]) ) + if ( !is_mapped(pfn_to_mfn(i)) ) j++; } @@ -1463,7 +1511,7 @@ int xc_domain_save(int xc_handle, int io for ( i = 0, j = 0; i < p2m_size; ) { - if ( !is_mapped(live_p2m[i]) ) + if ( !is_mapped(pfn_to_mfn(i)) ) pfntab[j++] = i; i++; @@ -1480,63 +1528,75 @@ int xc_domain_save(int xc_handle, int io } } - if ( xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt) ) + if ( xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt.c) ) { ERROR("Could not get vcpu context"); goto out; } /* Canonicalise the suspend-record frame number. */ - if ( !translate_mfn_to_pfn(&ctxt.user_regs.edx) ) + mfn = GET_FIELD(&ctxt, user_regs.edx); + if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) ) { ERROR("Suspend record is not in range of pseudophys map"); goto out; } + SET_FIELD(&ctxt, user_regs.edx, mfn_to_pfn(mfn)); for ( i = 0; i <= info.max_vcpu_id; i++ ) { if ( !(vcpumap & (1ULL << i)) ) continue; - if ( (i != 0) && xc_vcpu_getcontext(xc_handle, dom, i, &ctxt) ) + if ( (i != 0) && xc_vcpu_getcontext(xc_handle, dom, i, &ctxt.c) ) { ERROR("No context for VCPU%d", i); goto out; } /* Canonicalise each GDT frame number. */ - for ( j = 0; (512*j) < ctxt.gdt_ents; j++ ) - { - if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[j]) ) + for ( j = 0; (512*j) < GET_FIELD(&ctxt, gdt_ents); j++ ) + { + mfn = GET_FIELD(&ctxt, gdt_frames[j]); + if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) ) { ERROR("GDT frame is not in range of pseudophys map"); goto out; } + SET_FIELD(&ctxt, gdt_frames[j], mfn_to_pfn(mfn)); } /* Canonicalise the page table base pointer. */ - if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[3])) ) + if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn( + GET_FIELD(&ctxt, ctrlreg[3]))) ) { ERROR("PT base is not in range of pseudophys map"); goto out; } - ctxt.ctrlreg[3] = - xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[3]))); + SET_FIELD(&ctxt, ctrlreg[3], + xen_pfn_to_cr3( + mfn_to_pfn( + xen_cr3_to_pfn( + GET_FIELD(&ctxt, ctrlreg[3]))))); /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */ - if ( (pt_levels == 4) && ctxt.ctrlreg[1] ) - { - if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[1])) ) + if ( (pt_levels == 4) && ctxt.x64.ctrlreg[1] ) + { + if ( !MFN_IS_IN_PSEUDOPHYS_MAP( + xen_cr3_to_pfn(ctxt.x64.ctrlreg[1])) ) { ERROR("PT base is not in range of pseudophys map"); goto out; } /* Least-significant bit means 'valid PFN'. */ - ctxt.ctrlreg[1] = 1 | - xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[1]))); - } - - if ( !write_exact(io_fd, &ctxt, sizeof(ctxt)) ) + ctxt.x64.ctrlreg[1] = 1 | + xen_pfn_to_cr3( + mfn_to_pfn(xen_cr3_to_pfn(ctxt.x64.ctrlreg[1]))); + } + + if ( !write_exact(io_fd, &ctxt, ((guest_width==8) + ? sizeof(ctxt.x64) + : sizeof(ctxt.x32))) ) { ERROR("Error when writing to state file (1) (errno %d)", errno); goto out; @@ -1547,7 +1607,8 @@ int xc_domain_save(int xc_handle, int io * Reset the MFN to be a known-invalid value. See map_frame_list_list(). */ memcpy(page, live_shinfo, PAGE_SIZE); - ((shared_info_t *)page)->arch.pfn_to_mfn_frame_list_list = 0; + SET_FIELD(((shared_info_either_t *)page), + arch.pfn_to_mfn_frame_list_list, 0); if ( !write_exact(io_fd, page, PAGE_SIZE) ) { ERROR("Error when writing to state file (1) (errno %d)", errno); diff -r 177ebf350b4c -r 4c8394e3b011 tools/libxc/xc_resume.c --- a/tools/libxc/xc_resume.c Wed Sep 19 15:42:56 2007 +0100 +++ b/tools/libxc/xc_resume.c Wed Sep 19 17:44:23 2007 +0100 @@ -8,13 +8,8 @@ #include <xen/foreign/x86_64.h> #include <xen/hvm/params.h> -/* Need to provide the right flavour of vcpu context for Xen */ -typedef union -{ - vcpu_guest_context_x86_64_t c64; - vcpu_guest_context_x86_32_t c32; - vcpu_guest_context_t c; -} vcpu_guest_context_either_t; +/* Don't yet support cross-address-size uncooperative resume */ +#define guest_width (sizeof (unsigned long)) static int modify_returncode(int xc_handle, uint32_t domid) { @@ -50,9 +45,9 @@ static int modify_returncode(int xc_hand if ( !info.hvm ) ctxt.c.user_regs.eax = 1; else if ( strstr(caps, "x86_64") ) - ctxt.c64.user_regs.eax = 1; + ctxt.x64.user_regs.eax = 1; else - ctxt.c32.user_regs.eax = 1; + ctxt.x32.user_regs.eax = 1; if ( (rc = xc_vcpu_setcontext(xc_handle, domid, 0, &ctxt.c)) != 0 ) return rc; diff -r 177ebf350b4c -r 4c8394e3b011 tools/libxc/xg_private.h --- a/tools/libxc/xg_private.h Wed Sep 19 15:42:56 2007 +0100 +++ b/tools/libxc/xg_private.h Wed Sep 19 17:44:23 2007 +0100 @@ -133,13 +133,6 @@ typedef l4_pgentry_64_t l4_pgentry_t; #define PAGE_SHIFT_X86 12 #define PAGE_SIZE_X86 (1UL << PAGE_SHIFT_X86) #define PAGE_MASK_X86 (~(PAGE_SIZE_X86-1)) -#if defined(__i386__) -#define MADDR_BITS_X86 44 -#elif defined(__x86_64__) -#define MADDR_BITS_X86 52 -#endif -#define MFN_MASK_X86 ((1ULL << (MADDR_BITS_X86 - PAGE_SHIFT_X86)) - 1) -#define MADDR_MASK_X86 (MFN_MASK_X86 << PAGE_SHIFT_X86) #define PAGE_SHIFT_IA64 14 #define PAGE_SIZE_IA64 (1UL << PAGE_SHIFT_IA64) @@ -147,19 +140,28 @@ typedef l4_pgentry_64_t l4_pgentry_t; #define ROUNDUP(_x,_w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1)) + +/* XXX SMH: following skanky macros rely on variable p2m_size being set */ +/* XXX TJD: also, "guest_width" should be the guest's sizeof(unsigned long) */ + /* Number of xen_pfn_t in a page */ -#define fpp (PAGE_SIZE/sizeof(xen_pfn_t)) -/* XXX SMH: following 3 skanky macros rely on variable p2m_size being set */ +#define FPP (PAGE_SIZE/(guest_width)) /* Number of entries in the pfn_to_mfn_frame_list_list */ -#define P2M_FLL_ENTRIES (((p2m_size)+(fpp*fpp)-1)/(fpp*fpp)) +#define P2M_FLL_ENTRIES (((p2m_size)+(FPP*FPP)-1)/(FPP*FPP)) /* Number of entries in the pfn_to_mfn_frame_list */ -#define P2M_FL_ENTRIES (((p2m_size)+fpp-1)/fpp) +#define P2M_FL_ENTRIES (((p2m_size)+FPP-1)/FPP) /* Size in bytes of the pfn_to_mfn_frame_list */ -#define P2M_FL_SIZE ((P2M_FL_ENTRIES)*sizeof(unsigned long)) +#define P2M_FL_SIZE ((P2M_FL_ENTRIES)*(guest_width)) + +/* Masks for PTE<->PFN conversions */ +#define MADDR_BITS_X86 ((guest_width == 8) ? 52 : 44) +#define MFN_MASK_X86 ((1ULL << (MADDR_BITS_X86 - PAGE_SHIFT_X86)) - 1) +#define MADDR_MASK_X86 (MFN_MASK_X86 << PAGE_SHIFT_X86) + #define PAEKERN_no 0 #define PAEKERN_yes 1 diff -r 177ebf350b4c -r 4c8394e3b011 tools/libxc/xg_save_restore.h --- a/tools/libxc/xg_save_restore.h Wed Sep 19 15:42:56 2007 +0100 +++ b/tools/libxc/xg_save_restore.h Wed Sep 19 17:44:23 2007 +0100 @@ -5,6 +5,9 @@ */ #include "xc_private.h" + +#include <xen/foreign/x86_32.h> +#include <xen/foreign/x86_64.h> /* ** We process save/restore/migrate in batches of pages; the below @@ -32,15 +35,19 @@ ** be a property of the domain, but for the moment we just read it ** from the hypervisor. ** +** - The width of a guest word (unsigned long), in bytes. +** ** Returns 1 on success, 0 on failure. */ static inline int get_platform_info(int xc_handle, uint32_t dom, /* OUT */ unsigned long *max_mfn, /* OUT */ unsigned long *hvirt_start, - /* OUT */ unsigned int *pt_levels) + /* OUT */ unsigned int *pt_levels, + /* OUT */ unsigned int *guest_width) { xen_capabilities_info_t xen_caps = ""; xen_platform_parameters_t xen_params; + DECLARE_DOMCTL; if (xc_version(xc_handle, XENVER_platform_parameters, &xen_params) != 0) return 0; @@ -52,17 +59,18 @@ static inline int get_platform_info(int *hvirt_start = xen_params.virt_start; - /* - * XXX For now, 32bit dom0's can only save/restore 32bit domUs - * on 64bit hypervisors, so no need to check which type of domain - * we're dealing with. - */ + memset(&domctl, 0, sizeof(domctl)); + domctl.domain = dom; + domctl.cmd = XEN_DOMCTL_get_address_size; + + if ( do_domctl(xc_handle, &domctl) != 0 ) + return 0; + + *guest_width = domctl.u.address_size.size / 8; + if (strstr(xen_caps, "xen-3.0-x86_64")) -#if defined(__i386__) - *pt_levels = 3; -#else - *pt_levels = 4; -#endif + /* Depends on whether it's a compat 32-on-64 guest */ + *pt_levels = ( (*guest_width == 8) ? 4 : 3 ); else if (strstr(xen_caps, "xen-3.0-x86_32p")) *pt_levels = 3; else if (strstr(xen_caps, "xen-3.0-x86_32")) @@ -95,3 +103,56 @@ static inline int get_platform_info(int /* Returns TRUE if the PFN is currently mapped */ #define is_mapped(pfn_type) (!((pfn_type) & 0x80000000UL)) + + +/* 32-on-64 support: saving 32bit guests from 64bit tools and vice versa */ +typedef union +{ + vcpu_guest_context_x86_64_t x64; + vcpu_guest_context_x86_32_t x32; + vcpu_guest_context_t c; +} vcpu_guest_context_either_t; + +typedef union +{ + shared_info_x86_64_t x64; + shared_info_x86_32_t x32; + shared_info_t s; +} shared_info_either_t; + +typedef union +{ + start_info_x86_64_t x64; + start_info_x86_32_t x32; + start_info_t s; +} start_info_either_t; + +#define GET_FIELD(_p, _f) ((guest_width==8) ? ((_p)->x64._f) : ((_p)->x32._f)) + +#define SET_FIELD(_p, _f, _v) do { \ + if (guest_width == 8) \ + (_p)->x64._f = (_v); \ + else \ + (_p)->x32._f = (_v); \ +} while (0) + +#define MEMCPY_FIELD(_d, _s, _f) do { \ + if (guest_width == 8) \ + memcpy(&(_d)->x64._f, &(_s)->x64._f,sizeof((_d)->x64._f)); \ + else \ + memcpy(&(_d)->x32._f, &(_s)->x32._f,sizeof((_d)->x32._f)); \ +} while (0) + +#define MEMSET_ARRAY_FIELD(_p, _f, _v) do { \ + if (guest_width == 8) \ + memset(&(_p)->x64._f[0], (_v), sizeof((_p)->x64._f)); \ + else \ + memset(&(_p)->x32._f[0], (_v), sizeof((_p)->x32._f)); \ +} while (0) + +#ifndef MAX +#define MAX(_a, _b) ((_a) >= (_b) ? (_a) : (_b)) +#endif +#ifndef MIN +#define MIN(_a, _b) ((_a) <= (_b) ? (_a) : (_b)) +#endif _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |