[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] [HVM] Save/restore: merge xc_linux_restore and xc_hvm_restore
# HG changeset patch # User Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx> # Date 1175782282 -3600 # Node ID e518f2fbdd724ca7b21789d2d075c7ee8665ddaa # Parent 602d061ff51f50d7b46bd5ca78c4b70fbe809d20 [HVM] Save/restore: merge xc_linux_restore and xc_hvm_restore into one function (and one file) since they share a lot of code Signed-off-by: Tim Deegan <Tim.Deegan@?ensource.com> --- tools/libxc/xc_hvm_restore.c | 351 ------------ tools/libxc/xc_linux_restore.c | 955 ----------------------------------- tools/libxc/Makefile | 4 tools/libxc/xc_domain_restore.c | 1086 ++++++++++++++++++++++++++++++++++++++++ tools/libxc/xc_hvm_save.c | 57 +- tools/libxc/xenguest.h | 22 tools/libxc/xg_private.c | 10 tools/xcutils/xc_restore.c | 10 8 files changed, 1137 insertions(+), 1358 deletions(-) diff -r 602d061ff51f -r e518f2fbdd72 tools/libxc/Makefile --- a/tools/libxc/Makefile Thu Apr 05 10:43:50 2007 +0100 +++ b/tools/libxc/Makefile Thu Apr 05 15:11:22 2007 +0100 @@ -26,8 +26,8 @@ CTRL_SRCS-$(CONFIG_X86_Linux) += xc_ptra GUEST_SRCS-y := GUEST_SRCS-y += xg_private.c -GUEST_SRCS-$(CONFIG_MIGRATE) += xc_linux_restore.c xc_linux_save.c -GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c xc_hvm_restore.c xc_hvm_save.c +GUEST_SRCS-$(CONFIG_MIGRATE) += xc_domain_restore.c xc_linux_save.c +GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c xc_hvm_save.c # symlink libelf from xen/common/libelf/ LIBELF_SRCS := libelf-tools.c libelf-loader.c diff -r 602d061ff51f -r e518f2fbdd72 tools/libxc/xc_domain_restore.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/libxc/xc_domain_restore.c Thu Apr 05 15:11:22 2007 +0100 @@ -0,0 +1,1086 @@ +/****************************************************************************** + * xc_domain_restore.c + * + * Restore the state of a guest session. + * + * Copyright (c) 2003, K A Fraser. + * Copyright (c) 2006, Intel Corporation + * Copyright (c) 2007, XenSource Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include <stdlib.h> +#include <unistd.h> + +#include "xg_private.h" +#include "xg_save_restore.h" +#include "xc_dom.h" + +#include <xen/hvm/ioreq.h> +#include <xen/hvm/params.h> + +/* max mfn of the current host machine */ +static unsigned long max_mfn; + +/* virtual starting address of the hypervisor */ +static unsigned long hvirt_start; + +/* #levels of page tables used by the current guest */ +static unsigned int pt_levels; + +/* number of pfns this guest has (i.e. number of entries in the P2M) */ +static unsigned long p2m_size; + +/* number of 'in use' pfns in the guest (i.e. #P2M entries with a valid mfn) */ +static unsigned long nr_pfns; + +/* Live mapping of the table mapping each PFN to its current MFN. */ +static xen_pfn_t *live_p2m = NULL; + +/* A table mapping each PFN to its new MFN. */ +static xen_pfn_t *p2m = NULL; + +/* A table of P2M mappings in the current region */ +static xen_pfn_t *p2m_batch = NULL; + +static ssize_t +read_exact(int fd, void *buf, size_t count) +{ + int r = 0, s; + unsigned char *b = buf; + + while (r < count) { + s = read(fd, &b[r], count - r); + if ((s == -1) && (errno == EINTR)) + continue; + if (s <= 0) { + break; + } + r += s; + } + + return (r == count) ? 1 : 0; +} + +/* +** In the state file (or during transfer), all page-table pages are +** converted into a 'canonical' form where references to actual mfns +** are replaced with references to the corresponding pfns. +** This function inverts that operation, replacing the pfn values with +** the (now known) appropriate mfn values. +*/ +static int uncanonicalize_pagetable(int xc_handle, uint32_t dom, + unsigned long type, void *page) +{ + int i, pte_last; + unsigned long pfn; + uint64_t pte; + int nr_mfns = 0; + + pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8); + + /* First pass: work out how many (if any) MFNs we need to alloc */ + for(i = 0; i < pte_last; i++) { + + if(pt_levels == 2) + pte = ((uint32_t *)page)[i]; + else + pte = ((uint64_t *)page)[i]; + + /* XXX SMH: below needs fixing for PROT_NONE etc */ + if(!(pte & _PAGE_PRESENT)) + continue; + + pfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86; + + if(pfn >= p2m_size) { + /* This "page table page" is probably not one; bail. */ + ERROR("Frame number in type %lu page table is out of range: " + "i=%d pfn=0x%lx p2m_size=%lu", + type >> 28, i, pfn, p2m_size); + return 0; + } + + if(p2m[pfn] == INVALID_P2M_ENTRY) { + /* Have a 'valid' PFN without a matching MFN - need to alloc */ + p2m_batch[nr_mfns++] = pfn; + } + } + + + /* Allocate the requistite number of mfns */ + if (nr_mfns && xc_domain_memory_populate_physmap( + xc_handle, dom, nr_mfns, 0, 0, p2m_batch) != 0) { + ERROR("Failed to allocate memory for batch.!\n"); + errno = ENOMEM; + return 0; + } + + /* Second pass: uncanonicalize each present PTE */ + nr_mfns = 0; + for(i = 0; i < pte_last; i++) { + + if(pt_levels == 2) + pte = ((uint32_t *)page)[i]; + else + pte = ((uint64_t *)page)[i]; + + /* XXX SMH: below needs fixing for PROT_NONE etc */ + if(!(pte & _PAGE_PRESENT)) + continue; + + pfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86; + + if(p2m[pfn] == INVALID_P2M_ENTRY) + p2m[pfn] = p2m_batch[nr_mfns++]; + + pte &= ~MADDR_MASK_X86; + pte |= (uint64_t)p2m[pfn] << PAGE_SHIFT; + + if(pt_levels == 2) + ((uint32_t *)page)[i] = (uint32_t)pte; + else + ((uint64_t *)page)[i] = (uint64_t)pte; + } + + return 1; +} + + +/* Load the p2m frame list, plus potential extended info chunk */ +static xen_pfn_t * load_p2m_frame_list(int io_fd, int *pae_extended_cr3) +{ + xen_pfn_t *p2m_frame_list; + vcpu_guest_context_t ctxt; + + if (!(p2m_frame_list = malloc(P2M_FL_SIZE))) { + ERROR("Couldn't allocate p2m_frame_list array"); + return NULL; + } + + /* Read first entry of P2M list, or extended-info signature (~0UL). */ + if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) { + ERROR("read extended-info signature failed"); + return NULL; + } + + if (p2m_frame_list[0] == ~0UL) { + uint32_t tot_bytes; + + /* Next 4 bytes: total size of following extended info. */ + if (!read_exact(io_fd, &tot_bytes, sizeof(tot_bytes))) { + ERROR("read extended-info size failed"); + return NULL; + } + + while (tot_bytes) { + uint32_t chunk_bytes; + char chunk_sig[4]; + + /* 4-character chunk signature + 4-byte remaining chunk size. */ + if (!read_exact(io_fd, chunk_sig, sizeof(chunk_sig)) || + !read_exact(io_fd, &chunk_bytes, sizeof(chunk_bytes))) { + ERROR("read extended-info chunk signature failed"); + return NULL; + } + tot_bytes -= 8; + + /* VCPU context structure? */ + if (!strncmp(chunk_sig, "vcpu", 4)) { + if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) { + ERROR("read extended-info vcpu context failed"); + return NULL; + } + tot_bytes -= sizeof(struct vcpu_guest_context); + chunk_bytes -= sizeof(struct vcpu_guest_context); + + if (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3)) + *pae_extended_cr3 = 1; + } + + /* Any remaining bytes of this chunk: read and discard. */ + while (chunk_bytes) { + unsigned long sz = chunk_bytes; + if ( sz > P2M_FL_SIZE ) + sz = P2M_FL_SIZE; + if (!read_exact(io_fd, p2m_frame_list, sz)) { + ERROR("read-and-discard extended-info chunk bytes failed"); + return NULL; + } + chunk_bytes -= sz; + tot_bytes -= sz; + } + } + + /* Now read the real first entry of P2M list. */ + if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) { + ERROR("read first entry of p2m_frame_list failed"); + return NULL; + } + } + + /* First entry is already read into the p2m array. */ + if (!read_exact(io_fd, &p2m_frame_list[1], P2M_FL_SIZE - sizeof(long))) { + ERROR("read p2m_frame_list failed"); + return NULL; + } + + return p2m_frame_list; +} + + + +int xc_domain_restore(int xc_handle, int io_fd, uint32_t dom, + unsigned int store_evtchn, unsigned long *store_mfn, + unsigned int console_evtchn, unsigned long *console_mfn, + unsigned int hvm, unsigned int pae) +{ + DECLARE_DOMCTL; + int rc = 1, i, j, n, m, pae_extended_cr3 = 0; + unsigned long mfn, pfn; + unsigned int prev_pc, this_pc; + int verify = 0; + int nraces = 0; + + /* The new domain's shared-info frame number. */ + unsigned long shared_info_frame; + unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */ + shared_info_t *shared_info = (shared_info_t *)shared_info_page; + + /* A copy of the CPU context of the guest. */ + vcpu_guest_context_t ctxt; + + /* A table containing the type of each PFN (/not/ MFN!). */ + unsigned long *pfn_type = NULL; + + /* A table of MFNs to map in the current region */ + xen_pfn_t *region_mfn = NULL; + + /* Types of the pfns in the current region */ + unsigned long region_pfn_type[MAX_BATCH_SIZE]; + + /* A temporary mapping, and a copy, of one frame of guest memory. */ + unsigned long *page = NULL; + + /* A copy of the pfn-to-mfn table frame list. */ + xen_pfn_t *p2m_frame_list = NULL; + + /* A temporary mapping of the guest's start_info page. */ + start_info_t *start_info; + + /* Our mapping of the current region (batch) */ + char *region_base; + + xc_mmu_t *mmu = NULL; + + /* used by debug verify code */ + unsigned long buf[PAGE_SIZE/sizeof(unsigned long)]; + + struct mmuext_op pin[MAX_PIN_BATCH]; + unsigned int nr_pins; + + uint64_t vcpumap = 1ULL; + unsigned int max_vcpu_id = 0; + int new_ctxt_format = 0; + + /* Magic frames in HVM guests: ioreqs and xenstore comms. */ + uint64_t magic_pfns[3]; /* ioreq_pfn, bufioreq_pfn, store_pfn */ + + /* Buffer for holding HVM context */ + uint8_t *hvm_buf = NULL; + + /* For info only */ + nr_pfns = 0; + + if ( !read_exact(io_fd, &p2m_size, sizeof(unsigned long)) ) + { + ERROR("read: p2m_size"); + goto out; + } + DPRINTF("xc_domain_restore start: p2m_size = %lx\n", p2m_size); + + if ( !hvm ) + { + /* + * XXX For now, 32bit dom0's can only save/restore 32bit domUs + * on 64bit hypervisors. + */ + memset(&domctl, 0, sizeof(domctl)); + domctl.domain = dom; + domctl.cmd = XEN_DOMCTL_set_address_size; + domctl.u.address_size.size = sizeof(unsigned long) * 8; + rc = do_domctl(xc_handle, &domctl); + if ( rc != 0 ) { + ERROR("Unable to set guest address size."); + goto out; + } + rc = 1; + } + + if(!get_platform_info(xc_handle, dom, + &max_mfn, &hvirt_start, &pt_levels)) { + ERROR("Unable to get platform info."); + return 1; + } + + if (lock_pages(&ctxt, sizeof(ctxt))) { + /* needed for build domctl, but might as well do early */ + ERROR("Unable to lock ctxt"); + return 1; + } + + /* Load the p2m frame list, plus potential extended info chunk */ + if ( !hvm ) + { + p2m_frame_list = load_p2m_frame_list(io_fd, &pae_extended_cr3); + if ( !p2m_frame_list ) + goto out; + } + + /* We want zeroed memory so use calloc rather than malloc. */ + p2m = calloc(p2m_size, sizeof(xen_pfn_t)); + pfn_type = calloc(p2m_size, sizeof(unsigned long)); + region_mfn = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t)); + p2m_batch = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t)); + + if ((p2m == NULL) || (pfn_type == NULL) || + (region_mfn == NULL) || (p2m_batch == NULL)) { + ERROR("memory alloc failed"); + errno = ENOMEM; + goto out; + } + + if (lock_pages(region_mfn, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) { + ERROR("Could not lock region_mfn"); + goto out; + } + + if (lock_pages(p2m_batch, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) { + ERROR("Could not lock p2m_batch"); + goto out; + } + + /* Get the domain's shared-info frame. */ + domctl.cmd = XEN_DOMCTL_getdomaininfo; + domctl.domain = (domid_t)dom; + if (xc_domctl(xc_handle, &domctl) < 0) { + ERROR("Could not get information on new domain"); + goto out; + } + shared_info_frame = domctl.u.getdomaininfo.shared_info_frame; + + /* Mark all PFNs as invalid; we allocate on demand */ + for ( pfn = 0; pfn < p2m_size; pfn++ ) + p2m[pfn] = INVALID_P2M_ENTRY; + + if(!(mmu = xc_init_mmu_updates(xc_handle, dom))) { + ERROR("Could not initialise for MMU updates"); + goto out; + } + + DPRINTF("Reloading memory pages: 0%%\n"); + + /* + * Now simply read each saved frame into its new machine frame. + * We uncanonicalise page tables as we go. + */ + prev_pc = 0; + + n = m = 0; + while (1) { + + int j, nr_mfns = 0; + + this_pc = (n * 100) / p2m_size; + if ( (this_pc - prev_pc) >= 5 ) + { + PPRINTF("\b\b\b\b%3d%%", this_pc); + prev_pc = this_pc; + } + + if (!read_exact(io_fd, &j, sizeof(int))) { + ERROR("Error when reading batch size"); + goto out; + } + + PPRINTF("batch %d\n",j); + + if (j == -1) { + verify = 1; + DPRINTF("Entering page verify mode\n"); + continue; + } + + if (j == -2) { + new_ctxt_format = 1; + if (!read_exact(io_fd, &max_vcpu_id, sizeof(int)) || + (max_vcpu_id >= 64) || + !read_exact(io_fd, &vcpumap, sizeof(uint64_t))) { + ERROR("Error when reading max_vcpu_id"); + goto out; + } + continue; + } + + if (j == 0) + break; /* our work here is done */ + + if (j > MAX_BATCH_SIZE) { + ERROR("Max batch size exceeded. Giving up."); + goto out; + } + + if (!read_exact(io_fd, region_pfn_type, j*sizeof(unsigned long))) { + ERROR("Error when reading region pfn types"); + goto out; + } + + /* First pass for this batch: work out how much memory to alloc */ + nr_mfns = 0; + for ( i = 0; i < j; i++ ) + { + unsigned long pfn, pagetype; + pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; + pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK; + + if ( (pagetype != XEN_DOMCTL_PFINFO_XTAB) && + (p2m[pfn] == INVALID_P2M_ENTRY) ) + { + /* Have a live PFN which hasn't had an MFN allocated */ + p2m_batch[nr_mfns++] = pfn; + } + } + + + /* Now allocate a bunch of mfns for this batch */ + if (nr_mfns && xc_domain_memory_populate_physmap( + xc_handle, dom, nr_mfns, 0, 0, p2m_batch) != 0) { + ERROR("Failed to allocate memory for batch.!\n"); + errno = ENOMEM; + goto out; + } + + /* Second pass for this batch: update p2m[] and region_mfn[] */ + nr_mfns = 0; + for ( i = 0; i < j; i++ ) + { + unsigned long pfn, pagetype; + pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; + pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK; + + if ( pagetype == XEN_DOMCTL_PFINFO_XTAB) + region_mfn[i] = ~0UL; /* map will fail but we don't care */ + else + { + if (p2m[pfn] == INVALID_P2M_ENTRY) { + /* We just allocated a new mfn above; update p2m */ + p2m[pfn] = p2m_batch[nr_mfns++]; + nr_pfns++; + } + + /* setup region_mfn[] for batch map. + * For HVM guests, this interface takes PFNs, not MFNs */ + region_mfn[i] = hvm ? pfn : p2m[pfn]; + } + } + + /* Map relevant mfns */ + region_base = xc_map_foreign_batch( + xc_handle, dom, PROT_WRITE, region_mfn, j); + + if ( region_base == NULL ) + { + ERROR("map batch failed"); + goto out; + } + + for ( i = 0; i < j; i++ ) + { + void *page; + unsigned long pagetype; + + pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; + pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK; + + if ( pagetype == XEN_DOMCTL_PFINFO_XTAB ) + /* a bogus/unmapped page: skip it */ + continue; + + if ( pfn > p2m_size ) + { + ERROR("pfn out of range"); + goto out; + } + + pfn_type[pfn] = pagetype; + + mfn = p2m[pfn]; + + /* In verify mode, we use a copy; otherwise we work in place */ + page = verify ? (void *)buf : (region_base + i*PAGE_SIZE); + + if (!read_exact(io_fd, page, PAGE_SIZE)) { + ERROR("Error when reading page (type was %lx)", pagetype); + goto out; + } + + pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK; + + if ( (pagetype >= XEN_DOMCTL_PFINFO_L1TAB) && + (pagetype <= XEN_DOMCTL_PFINFO_L4TAB) ) + { + /* + ** A page table page - need to 'uncanonicalize' it, i.e. + ** replace all the references to pfns with the corresponding + ** mfns for the new domain. + ** + ** On PAE we need to ensure that PGDs are in MFNs < 4G, and + ** so we may need to update the p2m after the main loop. + ** Hence we defer canonicalization of L1s until then. + */ + if ((pt_levels != 3) || + pae_extended_cr3 || + (pagetype != XEN_DOMCTL_PFINFO_L1TAB)) { + + if (!uncanonicalize_pagetable(xc_handle, dom, + pagetype, page)) { + /* + ** Failing to uncanonicalize a page table can be ok + ** under live migration since the pages type may have + ** changed by now (and we'll get an update later). + */ + DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n", + pagetype >> 28, pfn, mfn); + nraces++; + continue; + } + } + } + else if ( pagetype != XEN_DOMCTL_PFINFO_NOTAB ) + { + ERROR("Bogus page type %lx page table is out of range: " + "i=%d p2m_size=%lu", pagetype, i, p2m_size); + goto out; + + } + + + if (verify) { + + int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE); + + if (res) { + + int v; + + DPRINTF("************** pfn=%lx type=%lx gotcs=%08lx " + "actualcs=%08lx\n", pfn, pfn_type[pfn], + csum_page(region_base + i*PAGE_SIZE), + csum_page(buf)); + + for (v = 0; v < 4; v++) { + + unsigned long *p = (unsigned long *) + (region_base + i*PAGE_SIZE); + if (buf[v] != p[v]) + DPRINTF(" %d: %08lx %08lx\n", v, buf[v], p[v]); + } + } + } + + if (!hvm + && xc_add_mmu_update(xc_handle, mmu, + (((unsigned long long)mfn) << PAGE_SHIFT) + | MMU_MACHPHYS_UPDATE, pfn)) { + ERROR("failed machpys update mfn=%lx pfn=%lx", mfn, pfn); + goto out; + } + } /* end of 'batch' for loop */ + + munmap(region_base, j*PAGE_SIZE); + n+= j; /* crude stats */ + + /* + * Discard cache for portion of file read so far up to last + * page boundary every 16MB or so. + */ + m += j; + if ( m > MAX_PAGECACHE_USAGE ) + { + discard_file_cache(io_fd, 0 /* no flush */); + m = 0; + } + } + + /* + * Ensure we flush all machphys updates before potential PAE-specific + * reallocations below. + */ + if (!hvm && xc_finish_mmu_updates(xc_handle, mmu)) { + ERROR("Error doing finish_mmu_updates()"); + goto out; + } + + DPRINTF("Received all pages (%d races)\n", nraces); + + if ( hvm ) + { + uint32_t rec_len; + + /* Set HVM-specific parameters */ + if ( !read_exact(io_fd, magic_pfns, sizeof(magic_pfns)) ) + { + ERROR("error reading magic page addresses"); + goto out; + } + + /* These comms pages need to be zeroed at the start of day */ + if ( xc_clear_domain_page(xc_handle, dom, magic_pfns[0]) || + xc_clear_domain_page(xc_handle, dom, magic_pfns[1]) || + xc_clear_domain_page(xc_handle, dom, magic_pfns[2]) ) + { + ERROR("error zeroing magic pages"); + goto out; + } + + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, magic_pfns[0]); + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, magic_pfns[1]); + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, magic_pfns[2]); + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae); + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn); + *store_mfn = magic_pfns[2]; + + /* Read vcpu contexts */ + for (i = 0; i <= max_vcpu_id; i++) + { + if (!(vcpumap & (1ULL << i))) + continue; + + if ( !read_exact(io_fd, &(ctxt), sizeof(ctxt)) ) + { + ERROR("error read vcpu context.\n"); + goto out; + } + + if ( (rc = xc_vcpu_setcontext(xc_handle, dom, i, &ctxt)) ) + { + ERROR("Could not set vcpu context, rc=%d", rc); + goto out; + } + rc = 1; + } + + /* Read HVM context */ + if ( !read_exact(io_fd, &rec_len, sizeof(uint32_t)) ) + { + ERROR("error read hvm context size!\n"); + goto out; + } + + hvm_buf = malloc(rec_len); + if ( hvm_buf == NULL ) + { + ERROR("memory alloc for hvm context buffer failed"); + errno = ENOMEM; + goto out; + } + + if ( !read_exact(io_fd, hvm_buf, rec_len) ) + { + ERROR("error loading the HVM context"); + goto out; + } + + rc = xc_domain_hvm_setcontext(xc_handle, dom, hvm_buf, rec_len); + if ( rc ) + ERROR("error setting the HVM context"); + + goto out; + } + + /* Non-HVM guests only from here on */ + + if ((pt_levels == 3) && !pae_extended_cr3) { + + /* + ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This + ** is a little awkward and involves (a) finding all such PGDs and + ** replacing them with 'lowmem' versions; (b) upating the p2m[] + ** with the new info; and (c) canonicalizing all the L1s using the + ** (potentially updated) p2m[]. + ** + ** This is relatively slow (and currently involves two passes through + ** the pfn_type[] array), but at least seems to be correct. May wish + ** to consider more complex approaches to optimize this later. + */ + + int j, k; + + /* First pass: find all L3TABs current in > 4G mfns and get new mfns */ + for ( i = 0; i < p2m_size; i++ ) + { + if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) == + XEN_DOMCTL_PFINFO_L3TAB) && + (p2m[i] > 0xfffffUL) ) + { + unsigned long new_mfn; + uint64_t l3ptes[4]; + uint64_t *l3tab; + + l3tab = (uint64_t *) + xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ, p2m[i]); + + for(j = 0; j < 4; j++) + l3ptes[j] = l3tab[j]; + + munmap(l3tab, PAGE_SIZE); + + if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) { + ERROR("Couldn't get a page below 4GB :-("); + goto out; + } + + p2m[i] = new_mfn; + if (xc_add_mmu_update(xc_handle, mmu, + (((unsigned long long)new_mfn) + << PAGE_SHIFT) | + MMU_MACHPHYS_UPDATE, i)) { + ERROR("Couldn't m2p on PAE root pgdir"); + goto out; + } + + l3tab = (uint64_t *) + xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ | PROT_WRITE, p2m[i]); + + for(j = 0; j < 4; j++) + l3tab[j] = l3ptes[j]; + + munmap(l3tab, PAGE_SIZE); + + } + } + + /* Second pass: find all L1TABs and uncanonicalize them */ + j = 0; + + for ( i = 0; i < p2m_size; i++ ) + { + if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) == + XEN_DOMCTL_PFINFO_L1TAB) ) + { + region_mfn[j] = p2m[i]; + j++; + } + + if(i == (p2m_size-1) || j == MAX_BATCH_SIZE) { + + if (!(region_base = xc_map_foreign_batch( + xc_handle, dom, PROT_READ | PROT_WRITE, + region_mfn, j))) { + ERROR("map batch failed"); + goto out; + } + + for(k = 0; k < j; k++) { + if(!uncanonicalize_pagetable(xc_handle, dom, + XEN_DOMCTL_PFINFO_L1TAB, + region_base + k*PAGE_SIZE)) { + ERROR("failed uncanonicalize pt!"); + goto out; + } + } + + munmap(region_base, j*PAGE_SIZE); + j = 0; + } + } + + if (xc_finish_mmu_updates(xc_handle, mmu)) { + ERROR("Error doing finish_mmu_updates()"); + goto out; + } + } + + /* + * Pin page tables. Do this after writing to them as otherwise Xen + * will barf when doing the type-checking. + */ + nr_pins = 0; + for ( i = 0; i < p2m_size; i++ ) + { + if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 ) + continue; + + switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK ) + { + case XEN_DOMCTL_PFINFO_L1TAB: + pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE; + break; + + case XEN_DOMCTL_PFINFO_L2TAB: + pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE; + break; + + case XEN_DOMCTL_PFINFO_L3TAB: + pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE; + break; + + case XEN_DOMCTL_PFINFO_L4TAB: + pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE; + break; + + default: + continue; + } + + pin[nr_pins].arg1.mfn = p2m[i]; + nr_pins++; + + /* Batch full? Then flush. */ + if (nr_pins == MAX_PIN_BATCH) { + if (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0) { + ERROR("Failed to pin batch of %d page tables", nr_pins); + goto out; + } + nr_pins = 0; + } + } + + /* Flush final partial batch. */ + if ((nr_pins != 0) && (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0)) { + ERROR("Failed to pin batch of %d page tables", nr_pins); + goto out; + } + + DPRINTF("\b\b\b\b100%%\n"); + DPRINTF("Memory reloaded (%ld pages)\n", nr_pfns); + + /* Get the list of PFNs that are not in the psuedo-phys map */ + { + unsigned int count; + unsigned long *pfntab; + int nr_frees, rc; + + if (!read_exact(io_fd, &count, sizeof(count))) { + ERROR("Error when reading pfn count"); + goto out; + } + + if(!(pfntab = malloc(sizeof(unsigned long) * count))) { + ERROR("Out of memory"); + goto out; + } + + if (!read_exact(io_fd, pfntab, sizeof(unsigned long)*count)) { + ERROR("Error when reading pfntab"); + goto out; + } + + nr_frees = 0; + for (i = 0; i < count; i++) { + + unsigned long pfn = pfntab[i]; + + if(p2m[pfn] != INVALID_P2M_ENTRY) { + /* pfn is not in physmap now, but was at some point during + the save/migration process - need to free it */ + pfntab[nr_frees++] = p2m[pfn]; + p2m[pfn] = INVALID_P2M_ENTRY; // not in pseudo-physical map + } + } + + if (nr_frees > 0) { + + struct xen_memory_reservation reservation = { + .nr_extents = nr_frees, + .extent_order = 0, + .domid = dom + }; + set_xen_guest_handle(reservation.extent_start, pfntab); + + if ((rc = xc_memory_op(xc_handle, XENMEM_decrease_reservation, + &reservation)) != nr_frees) { + ERROR("Could not decrease reservation : %d", rc); + goto out; + } else + DPRINTF("Decreased reservation by %d pages\n", count); + } + } + + for (i = 0; i <= max_vcpu_id; i++) { + if (!(vcpumap & (1ULL << i))) + continue; + + if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) { + ERROR("Error when reading ctxt %d", i); + goto out; + } + + if ( !new_ctxt_format ) + ctxt.flags |= VGCF_online; + + if (i == 0) { + /* + * Uncanonicalise the suspend-record frame number and poke + * resume record. + */ + pfn = ctxt.user_regs.edx; + if ((pfn >= p2m_size) || + (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) { + ERROR("Suspend record frame number is bad"); + goto out; + } + ctxt.user_regs.edx = mfn = p2m[pfn]; + start_info = xc_map_foreign_range( + xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn); + start_info->nr_pages = p2m_size; + start_info->shared_info = shared_info_frame << PAGE_SHIFT; + start_info->flags = 0; + *store_mfn = start_info->store_mfn = p2m[start_info->store_mfn]; + start_info->store_evtchn = store_evtchn; + start_info->console.domU.mfn = p2m[start_info->console.domU.mfn]; + start_info->console.domU.evtchn = console_evtchn; + *console_mfn = start_info->console.domU.mfn; + munmap(start_info, PAGE_SIZE); + } + + /* Uncanonicalise each GDT frame number. */ + if (ctxt.gdt_ents > 8192) { + ERROR("GDT entry count out of range"); + goto out; + } + + for (j = 0; (512*j) < ctxt.gdt_ents; j++) { + pfn = ctxt.gdt_frames[j]; + if ((pfn >= p2m_size) || + (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) { + ERROR("GDT frame number is bad"); + goto out; + } + ctxt.gdt_frames[j] = p2m[pfn]; + } + + /* Uncanonicalise the page table base pointer. */ + pfn = xen_cr3_to_pfn(ctxt.ctrlreg[3]); + + if (pfn >= p2m_size) { + ERROR("PT base is bad: pfn=%lu p2m_size=%lu type=%08lx", + pfn, p2m_size, pfn_type[pfn]); + goto out; + } + + if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) != + ((unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) { + ERROR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx", + pfn, p2m_size, pfn_type[pfn], + (unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT); + goto out; + } + + ctxt.ctrlreg[3] = xen_pfn_to_cr3(p2m[pfn]); + + /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */ + if ( (pt_levels == 4) && ctxt.ctrlreg[1] ) + { + pfn = xen_cr3_to_pfn(ctxt.ctrlreg[1]); + + if (pfn >= p2m_size) { + ERROR("User PT base is bad: pfn=%lu p2m_size=%lu type=%08lx", + pfn, p2m_size, pfn_type[pfn]); + goto out; + } + + if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) != + ((unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) { + ERROR("User PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx", + pfn, p2m_size, pfn_type[pfn], + (unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT); + goto out; + } + + ctxt.ctrlreg[1] = xen_pfn_to_cr3(p2m[pfn]); + } + + domctl.cmd = XEN_DOMCTL_setvcpucontext; + domctl.domain = (domid_t)dom; + domctl.u.vcpucontext.vcpu = i; + set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt); + rc = xc_domctl(xc_handle, &domctl); + if (rc != 0) { + ERROR("Couldn't build vcpu%d", i); + goto out; + } + rc = 1; + } + + if (!read_exact(io_fd, shared_info_page, PAGE_SIZE)) { + ERROR("Error when reading shared info page"); + goto out; + } + + /* clear any pending events and the selector */ + memset(&(shared_info->evtchn_pending[0]), 0, + sizeof (shared_info->evtchn_pending)); + for ( i = 0; i < MAX_VIRT_CPUS; i++ ) + shared_info->vcpu_info[i].evtchn_pending_sel = 0; + + /* Copy saved contents of shared-info page. No checking needed. */ + page = xc_map_foreign_range( + xc_handle, dom, PAGE_SIZE, PROT_WRITE, shared_info_frame); + memcpy(page, shared_info, PAGE_SIZE); + munmap(page, PAGE_SIZE); + + /* Uncanonicalise the pfn-to-mfn table frame-number list. */ + for (i = 0; i < P2M_FL_ENTRIES; i++) { + pfn = p2m_frame_list[i]; + if ((pfn >= p2m_size) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) { + ERROR("PFN-to-MFN frame number is bad"); + goto out; + } + + p2m_frame_list[i] = p2m[pfn]; + } + + /* Copy the P2M we've constructed to the 'live' P2M */ + if (!(live_p2m = xc_map_foreign_batch(xc_handle, dom, PROT_WRITE, + p2m_frame_list, P2M_FL_ENTRIES))) { + ERROR("Couldn't map p2m table"); + goto out; + } + + memcpy(live_p2m, p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT)); + munmap(live_p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT)); + + DPRINTF("Domain ready to be built.\n"); + rc = 0; + + out: + if ( (rc != 0) && (dom != 0) ) + xc_domain_destroy(xc_handle, dom); + free(mmu); + free(p2m); + free(pfn_type); + free(hvm_buf); + + /* discard cache for save file */ + discard_file_cache(io_fd, 1 /*flush*/); + + DPRINTF("Restore exit with rc=%d\n", rc); + + return rc; +} diff -r 602d061ff51f -r e518f2fbdd72 tools/libxc/xc_hvm_restore.c --- a/tools/libxc/xc_hvm_restore.c Thu Apr 05 10:43:50 2007 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,351 +0,0 @@ -/****************************************************************************** - * xc_hvm_restore.c - * - * Restore the state of a HVM guest. - * - * Copyright (c) 2003, K A Fraser. - * Copyright (c) 2006 Intel Corperation - * rewriten for hvm guest by Zhai Edwin <edwin.zhai@xxxxxxxxx> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - */ - -#include <stdlib.h> -#include <unistd.h> - -#include "xg_private.h" -#include "xg_save_restore.h" - -#include <xen/hvm/ioreq.h> -#include <xen/hvm/params.h> -#include <xen/hvm/e820.h> - -static ssize_t -read_exact(int fd, void *buf, size_t count) -{ - int r = 0, s; - unsigned char *b = buf; - - while ( r < count ) - { - s = read(fd, &b[r], count - r); - if ( (s == -1) && (errno == EINTR) ) - continue; - if ( s <= 0 ) - break; - r += s; - } - - return (r == count) ? 1 : 0; -} - -#define BPL (sizeof(long)*8) -#define test_bit(bit, map) !!((map)[(bit)/BPL] & (1UL << ((bit) % BPL))) -#define set_bit(bit, map) ((map)[(bit)/BPL] |= (1UL << ((bit) % BPL))) -static int test_and_set_bit(unsigned long nr, unsigned long *map) -{ - int rc = test_bit(nr, map); - if ( !rc ) - set_bit(nr, map); - return rc; -} - -int xc_hvm_restore(int xc_handle, int io_fd, uint32_t dom, - unsigned int store_evtchn, unsigned long *store_mfn, - unsigned int pae, unsigned int apic) -{ - DECLARE_DOMCTL; - - /* A copy of the CPU context of the guest. */ - vcpu_guest_context_t ctxt; - - char *region_base; - - unsigned long buf[PAGE_SIZE/sizeof(unsigned long)]; - - xc_dominfo_t info; - unsigned int rc = 1, n, i; - uint32_t rec_len, nr_vcpus; - uint8_t *hvm_buf = NULL; - - /* Magic frames: ioreqs and xenstore comms. */ - uint64_t magic_pfns[3]; /* ioreq_pfn, bufioreq_pfn, store_pfn */ - - unsigned long pfn; - int verify = 0; - - /* Types of the pfns in the current region */ - unsigned long region_pfn_type[MAX_BATCH_SIZE]; - xen_pfn_t pfn_alloc_batch[MAX_BATCH_SIZE]; - unsigned int pfn_alloc_batch_size; - - /* The size of an array big enough to contain all guest pfns */ - unsigned long max_pfn = 0xfffffUL; /* initial memory map guess: 4GB */ - unsigned long *pfn_bitmap = NULL, *new_pfn_bitmap; - - DPRINTF("xc_hvm_restore:dom=%d, store_evtchn=%d, " - "pae=%u, apic=%u.\n", dom, store_evtchn, pae, apic); - - DPRINTF("xc_hvm_restore start: max_pfn = %lx\n", max_pfn); - - if ( mlock(&ctxt, sizeof(ctxt)) ) - { - /* needed for build dom0 op, but might as well do early */ - ERROR("Unable to mlock ctxt"); - return 1; - } - - if ( xc_domain_getinfo(xc_handle, dom, 1, &info) != 1 ) - { - ERROR("Could not get domain info"); - return 1; - } - - domctl.cmd = XEN_DOMCTL_getdomaininfo; - domctl.domain = (domid_t)dom; - if ( xc_domctl(xc_handle, &domctl) < 0 ) - { - ERROR("Could not get information on new domain"); - goto out; - } - - pfn_bitmap = calloc((max_pfn+1)/8, 1); - if ( pfn_bitmap == NULL ) - { - ERROR("Could not allocate pfn bitmap"); - goto out; - } - - n = 0; - for ( ; ; ) - { - int j; - - if ( !read_exact(io_fd, &j, sizeof(int)) ) - { - ERROR("HVM restore Error when reading batch size"); - goto out; - } - - PPRINTF("batch %d\n",j); - - if ( j == -1 ) - { - verify = 1; - DPRINTF("Entering page verify mode\n"); - continue; - } - - if ( j == 0 ) - break; /* our work here is done */ - - if ( j > MAX_BATCH_SIZE ) - { - ERROR("Max batch size exceeded. Giving up."); - goto out; - } - - if ( !read_exact(io_fd, region_pfn_type, j*sizeof(unsigned long)) ) - { - ERROR("Error when reading region pfn types"); - goto out; - } - - pfn_alloc_batch_size = 0; - for ( i = 0; i < j; i++ ) - { - pfn = region_pfn_type[i]; - if ( pfn & XEN_DOMCTL_PFINFO_LTAB_MASK ) - continue; - - while ( pfn > max_pfn ) - { - if ( max_pfn >= 0xfffffff ) - { - ERROR("Maximum PFN beyond reason (1TB) %lx\n", pfn); - goto out; - } - max_pfn = 2*max_pfn + 1; - new_pfn_bitmap = realloc(pfn_bitmap, (max_pfn+1)/8); - if ( new_pfn_bitmap == NULL ) - { - ERROR("Could not realloc pfn bitmap for max_pfn=%lx\n", - max_pfn); - goto out; - } - pfn_bitmap = new_pfn_bitmap; - memset(&pfn_bitmap[(max_pfn+1)/(2*BPL)], 0, (max_pfn+1)/(2*8)); - } - - if ( !test_and_set_bit(pfn, pfn_bitmap) ) - pfn_alloc_batch[pfn_alloc_batch_size++] = pfn; - } - - if ( pfn_alloc_batch_size != 0 ) - { - rc = xc_domain_memory_populate_physmap( - xc_handle, dom, pfn_alloc_batch_size, 0, 0, pfn_alloc_batch); - if ( rc != 0 ) - { - PERROR("Could not allocate %u pages for HVM guest.\n", - pfn_alloc_batch_size); - goto out; - } - } - - region_base = xc_map_foreign_batch( - xc_handle, dom, PROT_WRITE, region_pfn_type, j); - - for ( i = 0; i < j; i++ ) - { - void *page; - - pfn = region_pfn_type[i]; - if ( pfn & XEN_DOMCTL_PFINFO_LTAB_MASK ) - continue; - - /* In verify mode, we use a copy; otherwise we work in place */ - page = verify ? (void *)buf : (region_base + i*PAGE_SIZE); - - if ( !read_exact(io_fd, page, PAGE_SIZE) ) - { - ERROR("Error when reading page (%x)", i); - goto out; - } - - if ( verify ) - { - int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE); - if ( res ) - { - int v; - - DPRINTF("************** pfn=%lx gotcs=%08lx " - "actualcs=%08lx\n", pfn, - csum_page(region_base + i*PAGE_SIZE), - csum_page(buf)); - - for ( v = 0; v < 4; v++ ) - { - unsigned long *p = (unsigned long *) - (region_base + i*PAGE_SIZE); - if (buf[v] != p[v]) - DPRINTF(" %d: %08lx %08lx\n", v, buf[v], p[v]); - } - } - } - - } /* end of 'batch' for loop */ - - munmap(region_base, j*PAGE_SIZE); - n += j; /* crude stats */ - } - - xc_set_hvm_param(xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae); - xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn); - - if ( !read_exact(io_fd, magic_pfns, sizeof(magic_pfns)) ) - { - ERROR("error reading magic page addresses\n"); - goto out; - } - - if ( xc_clear_domain_page(xc_handle, dom, magic_pfns[0]) || - xc_clear_domain_page(xc_handle, dom, magic_pfns[1]) || - xc_clear_domain_page(xc_handle, dom, magic_pfns[2]) ) - { - rc = -1; - goto out; - } - - xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, magic_pfns[0]); - xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, magic_pfns[1]); - xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, magic_pfns[2]); - *store_mfn = magic_pfns[2]; - DPRINTF("hvm restore: calculate new store_mfn=0x%lx.\n", *store_mfn); - - if ( !read_exact(io_fd, &nr_vcpus, sizeof(uint32_t)) ) - { - ERROR("error read nr vcpu !\n"); - goto out; - } - DPRINTF("hvm restore:get nr_vcpus=%d.\n", nr_vcpus); - - for ( i = 0; i < nr_vcpus; i++ ) - { - if ( !read_exact(io_fd, &rec_len, sizeof(uint32_t)) ) - { - ERROR("error read vcpu context size!\n"); - goto out; - } - if ( rec_len != sizeof(ctxt) ) - { - ERROR("vcpu context size dismatch!\n"); - goto out; - } - - if ( !read_exact(io_fd, &(ctxt), sizeof(ctxt)) ) - { - ERROR("error read vcpu context.\n"); - goto out; - } - - if ( (rc = xc_vcpu_setcontext(xc_handle, dom, i, &ctxt)) ) - { - ERROR("Could not set vcpu context, rc=%d", rc); - goto out; - } - } - - /* restore hvm context including pic/pit/shpage */ - if ( !read_exact(io_fd, &rec_len, sizeof(uint32_t)) ) - { - ERROR("error read hvm context size!\n"); - goto out; - } - - hvm_buf = malloc(rec_len); - if ( hvm_buf == NULL ) - { - ERROR("memory alloc for hvm context buffer failed"); - errno = ENOMEM; - goto out; - } - - if ( !read_exact(io_fd, hvm_buf, rec_len) ) - { - ERROR("error read hvm buffer!\n"); - goto out; - } - - if ( (rc = xc_domain_hvm_setcontext(xc_handle, dom, hvm_buf, rec_len)) ) - { - ERROR("error set hvm buffer!\n"); - goto out; - } - - rc = 0; - goto out; - - out: - if ( (rc != 0) && (dom != 0) ) - xc_domain_destroy(xc_handle, dom); - free(hvm_buf); - free(pfn_bitmap); - - DPRINTF("Restore exit with rc=%d\n", rc); - - return rc; -} diff -r 602d061ff51f -r e518f2fbdd72 tools/libxc/xc_hvm_save.c --- a/tools/libxc/xc_hvm_save.c Thu Apr 05 10:43:50 2007 +0100 +++ b/tools/libxc/xc_hvm_save.c Thu Apr 05 15:11:22 2007 +0100 @@ -305,6 +305,8 @@ int xc_hvm_save(int xc_handle, int io_fd unsigned long total_sent = 0; + uint64_t vcpumap = 1ULL; + DPRINTF("xc_hvm_save: dom=%d, max_iters=%d, max_factor=%d, flags=0x%x, " "live=%d, debug=%d.\n", dom, max_iters, max_factor, flags, live, debug); @@ -371,6 +373,12 @@ int xc_hvm_save(int xc_handle, int io_fd /* Size of any array that covers 0 ... max_pfn */ pfn_array_size = max_pfn + 1; + if ( !write_exact(io_fd, &pfn_array_size, sizeof(unsigned long)) ) + { + ERROR("Error when writing to state file (1)"); + goto out; + } + /* pretend we sent all the pages last iteration */ sent_last_iter = pfn_array_size; @@ -644,6 +652,32 @@ int xc_hvm_save(int xc_handle, int io_fd DPRINTF("All HVM memory is saved\n"); + { + struct { + int minustwo; + int max_vcpu_id; + uint64_t vcpumap; + } chunk = { -2, info.max_vcpu_id }; + + if (info.max_vcpu_id >= 64) { + ERROR("Too many VCPUS in guest!"); + goto out; + } + + for (i = 1; i <= info.max_vcpu_id; i++) { + xc_vcpuinfo_t vinfo; + if ((xc_vcpu_getinfo(xc_handle, dom, i, &vinfo) == 0) && + vinfo.online) + vcpumap |= 1ULL << i; + } + + chunk.vcpumap = vcpumap; + if(!write_exact(io_fd, &chunk, sizeof(chunk))) { + ERROR("Error when writing to state file (errno %d)", errno); + goto out; + } + } + /* Zero terminate */ i = 0; if ( !write_exact(io_fd, &i, sizeof(int)) ) @@ -666,33 +700,22 @@ int xc_hvm_save(int xc_handle, int io_fd goto out; } - /* save vcpu/vmcs context */ - if ( !write_exact(io_fd, &nr_vcpus, sizeof(uint32_t)) ) - { - ERROR("error write nr vcpus"); - goto out; - } - - /*XXX: need a online map to exclude down cpu */ + /* save vcpu/vmcs contexts */ for ( i = 0; i < nr_vcpus; i++ ) { + if (!(vcpumap & (1ULL << i))) + continue; + if ( xc_vcpu_getcontext(xc_handle, dom, i, &ctxt) ) { ERROR("HVM:Could not get vcpu context"); goto out; } - rec_size = sizeof(ctxt); - DPRINTF("write %d vcpucontext of total %d.\n", i, nr_vcpus); - if ( !write_exact(io_fd, &rec_size, sizeof(uint32_t)) ) - { - ERROR("error write vcpu ctxt size"); - goto out; - } - + DPRINTF("write vcpu %d context.\n", i); if ( !write_exact(io_fd, &(ctxt), sizeof(ctxt)) ) { - ERROR("write vmcs failed!\n"); + ERROR("write vcpu context failed!\n"); goto out; } } diff -r 602d061ff51f -r e518f2fbdd72 tools/libxc/xc_linux_restore.c --- a/tools/libxc/xc_linux_restore.c Thu Apr 05 10:43:50 2007 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,955 +0,0 @@ -/****************************************************************************** - * xc_linux_restore.c - * - * Restore the state of a Linux session. - * - * Copyright (c) 2003, K A Fraser. - */ - -#include <stdlib.h> -#include <unistd.h> - -#include "xg_private.h" -#include "xg_save_restore.h" -#include "xc_dom.h" - -/* max mfn of the current host machine */ -static unsigned long max_mfn; - -/* virtual starting address of the hypervisor */ -static unsigned long hvirt_start; - -/* #levels of page tables used by the current guest */ -static unsigned int pt_levels; - -/* number of pfns this guest has (i.e. number of entries in the P2M) */ -static unsigned long p2m_size; - -/* number of 'in use' pfns in the guest (i.e. #P2M entries with a valid mfn) */ -static unsigned long nr_pfns; - -/* Live mapping of the table mapping each PFN to its current MFN. */ -static xen_pfn_t *live_p2m = NULL; - -/* A table mapping each PFN to its new MFN. */ -static xen_pfn_t *p2m = NULL; - -/* A table of P2M mappings in the current region */ -static xen_pfn_t *p2m_batch = NULL; - -static ssize_t -read_exact(int fd, void *buf, size_t count) -{ - int r = 0, s; - unsigned char *b = buf; - - while (r < count) { - s = read(fd, &b[r], count - r); - if ((s == -1) && (errno == EINTR)) - continue; - if (s <= 0) { - break; - } - r += s; - } - - return (r == count) ? 1 : 0; -} - -/* -** In the state file (or during transfer), all page-table pages are -** converted into a 'canonical' form where references to actual mfns -** are replaced with references to the corresponding pfns. -** This function inverts that operation, replacing the pfn values with -** the (now known) appropriate mfn values. -*/ -static int uncanonicalize_pagetable(int xc_handle, uint32_t dom, - unsigned long type, void *page) -{ - int i, pte_last; - unsigned long pfn; - uint64_t pte; - int nr_mfns = 0; - - pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8); - - /* First pass: work out how many (if any) MFNs we need to alloc */ - for(i = 0; i < pte_last; i++) { - - if(pt_levels == 2) - pte = ((uint32_t *)page)[i]; - else - pte = ((uint64_t *)page)[i]; - - /* XXX SMH: below needs fixing for PROT_NONE etc */ - if(!(pte & _PAGE_PRESENT)) - continue; - - pfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86; - - if(pfn >= p2m_size) { - /* This "page table page" is probably not one; bail. */ - ERROR("Frame number in type %lu page table is out of range: " - "i=%d pfn=0x%lx p2m_size=%lu", - type >> 28, i, pfn, p2m_size); - return 0; - } - - if(p2m[pfn] == INVALID_P2M_ENTRY) { - /* Have a 'valid' PFN without a matching MFN - need to alloc */ - p2m_batch[nr_mfns++] = pfn; - } - } - - - /* Alllocate the requistite number of mfns */ - if (nr_mfns && xc_domain_memory_populate_physmap( - xc_handle, dom, nr_mfns, 0, 0, p2m_batch) != 0) { - ERROR("Failed to allocate memory for batch.!\n"); - errno = ENOMEM; - return 0; - } - - /* Second pass: uncanonicalize each present PTE */ - nr_mfns = 0; - for(i = 0; i < pte_last; i++) { - - if(pt_levels == 2) - pte = ((uint32_t *)page)[i]; - else - pte = ((uint64_t *)page)[i]; - - /* XXX SMH: below needs fixing for PROT_NONE etc */ - if(!(pte & _PAGE_PRESENT)) - continue; - - pfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86; - - if(p2m[pfn] == INVALID_P2M_ENTRY) - p2m[pfn] = p2m_batch[nr_mfns++]; - - pte &= ~MADDR_MASK_X86; - pte |= (uint64_t)p2m[pfn] << PAGE_SHIFT; - - if(pt_levels == 2) - ((uint32_t *)page)[i] = (uint32_t)pte; - else - ((uint64_t *)page)[i] = (uint64_t)pte; - } - - return 1; -} - - -int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom, - unsigned int store_evtchn, unsigned long *store_mfn, - unsigned int console_evtchn, unsigned long *console_mfn) -{ - DECLARE_DOMCTL; - int rc = 1, i, j, n, m, pae_extended_cr3 = 0; - unsigned long mfn, pfn; - unsigned int prev_pc, this_pc; - int verify = 0; - int nraces = 0; - - /* The new domain's shared-info frame number. */ - unsigned long shared_info_frame; - unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */ - shared_info_t *shared_info = (shared_info_t *)shared_info_page; - - /* A copy of the CPU context of the guest. */ - vcpu_guest_context_t ctxt; - - /* A table containing the type of each PFN (/not/ MFN!). */ - unsigned long *pfn_type = NULL; - - /* A table of MFNs to map in the current region */ - xen_pfn_t *region_mfn = NULL; - - /* Types of the pfns in the current region */ - unsigned long region_pfn_type[MAX_BATCH_SIZE]; - - /* A temporary mapping, and a copy, of one frame of guest memory. */ - unsigned long *page = NULL; - - /* A copy of the pfn-to-mfn table frame list. */ - xen_pfn_t *p2m_frame_list = NULL; - - /* A temporary mapping of the guest's start_info page. */ - start_info_t *start_info; - - /* Our mapping of the current region (batch) */ - char *region_base; - - xc_mmu_t *mmu = NULL; - - /* used by debug verify code */ - unsigned long buf[PAGE_SIZE/sizeof(unsigned long)]; - - struct mmuext_op pin[MAX_PIN_BATCH]; - unsigned int nr_pins; - - uint64_t vcpumap = 1ULL; - unsigned int max_vcpu_id = 0; - int new_ctxt_format = 0; - - /* For info only */ - nr_pfns = 0; - - if ( !read_exact(io_fd, &p2m_size, sizeof(unsigned long)) ) - { - ERROR("read: p2m_size"); - goto out; - } - DPRINTF("xc_linux_restore start: p2m_size = %lx\n", p2m_size); - - /* - * XXX For now, 32bit dom0's can only save/restore 32bit domUs - * on 64bit hypervisors. - */ - memset(&domctl, 0, sizeof(domctl)); - domctl.domain = dom; - domctl.cmd = XEN_DOMCTL_set_address_size; - domctl.u.address_size.size = sizeof(unsigned long) * 8; - rc = do_domctl(xc_handle, &domctl); - if ( rc != 0 ) { - ERROR("Unable to set guest address size."); - goto out; - } - - if(!get_platform_info(xc_handle, dom, - &max_mfn, &hvirt_start, &pt_levels)) { - ERROR("Unable to get platform info."); - return 1; - } - - if (lock_pages(&ctxt, sizeof(ctxt))) { - /* needed for build domctl, but might as well do early */ - ERROR("Unable to lock ctxt"); - return 1; - } - - if (!(p2m_frame_list = malloc(P2M_FL_SIZE))) { - ERROR("Couldn't allocate p2m_frame_list array"); - goto out; - } - - /* Read first entry of P2M list, or extended-info signature (~0UL). */ - if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) { - ERROR("read extended-info signature failed"); - goto out; - } - - if (p2m_frame_list[0] == ~0UL) { - uint32_t tot_bytes; - - /* Next 4 bytes: total size of following extended info. */ - if (!read_exact(io_fd, &tot_bytes, sizeof(tot_bytes))) { - ERROR("read extended-info size failed"); - goto out; - } - - while (tot_bytes) { - uint32_t chunk_bytes; - char chunk_sig[4]; - - /* 4-character chunk signature + 4-byte remaining chunk size. */ - if (!read_exact(io_fd, chunk_sig, sizeof(chunk_sig)) || - !read_exact(io_fd, &chunk_bytes, sizeof(chunk_bytes))) { - ERROR("read extended-info chunk signature failed"); - goto out; - } - tot_bytes -= 8; - - /* VCPU context structure? */ - if (!strncmp(chunk_sig, "vcpu", 4)) { - if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) { - ERROR("read extended-info vcpu context failed"); - goto out; - } - tot_bytes -= sizeof(struct vcpu_guest_context); - chunk_bytes -= sizeof(struct vcpu_guest_context); - - if (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3)) - pae_extended_cr3 = 1; - } - - /* Any remaining bytes of this chunk: read and discard. */ - while (chunk_bytes) { - unsigned long sz = chunk_bytes; - if ( sz > P2M_FL_SIZE ) - sz = P2M_FL_SIZE; - if (!read_exact(io_fd, p2m_frame_list, sz)) { - ERROR("read-and-discard extended-info chunk bytes failed"); - goto out; - } - chunk_bytes -= sz; - tot_bytes -= sz; - } - } - - /* Now read the real first entry of P2M list. */ - if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) { - ERROR("read first entry of p2m_frame_list failed"); - goto out; - } - } - - /* First entry is already read into the p2m array. */ - if (!read_exact(io_fd, &p2m_frame_list[1], P2M_FL_SIZE - sizeof(long))) { - ERROR("read p2m_frame_list failed"); - goto out; - } - - /* We want zeroed memory so use calloc rather than malloc. */ - p2m = calloc(p2m_size, sizeof(xen_pfn_t)); - pfn_type = calloc(p2m_size, sizeof(unsigned long)); - region_mfn = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t)); - p2m_batch = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t)); - - if ((p2m == NULL) || (pfn_type == NULL) || - (region_mfn == NULL) || (p2m_batch == NULL)) { - ERROR("memory alloc failed"); - errno = ENOMEM; - goto out; - } - - if (lock_pages(region_mfn, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) { - ERROR("Could not lock region_mfn"); - goto out; - } - - if (lock_pages(p2m_batch, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) { - ERROR("Could not lock p2m_batch"); - goto out; - } - - /* Get the domain's shared-info frame. */ - domctl.cmd = XEN_DOMCTL_getdomaininfo; - domctl.domain = (domid_t)dom; - if (xc_domctl(xc_handle, &domctl) < 0) { - ERROR("Could not get information on new domain"); - goto out; - } - shared_info_frame = domctl.u.getdomaininfo.shared_info_frame; - - /* Mark all PFNs as invalid; we allocate on demand */ - for ( pfn = 0; pfn < p2m_size; pfn++ ) - p2m[pfn] = INVALID_P2M_ENTRY; - - if(!(mmu = xc_init_mmu_updates(xc_handle, dom))) { - ERROR("Could not initialise for MMU updates"); - goto out; - } - - DPRINTF("Reloading memory pages: 0%%\n"); - - /* - * Now simply read each saved frame into its new machine frame. - * We uncanonicalise page tables as we go. - */ - prev_pc = 0; - - n = m = 0; - while (1) { - - int j, nr_mfns = 0; - - this_pc = (n * 100) / p2m_size; - if ( (this_pc - prev_pc) >= 5 ) - { - PPRINTF("\b\b\b\b%3d%%", this_pc); - prev_pc = this_pc; - } - - if (!read_exact(io_fd, &j, sizeof(int))) { - ERROR("Error when reading batch size"); - goto out; - } - - PPRINTF("batch %d\n",j); - - if (j == -1) { - verify = 1; - DPRINTF("Entering page verify mode\n"); - continue; - } - - if (j == -2) { - new_ctxt_format = 1; - if (!read_exact(io_fd, &max_vcpu_id, sizeof(int)) || - (max_vcpu_id >= 64) || - !read_exact(io_fd, &vcpumap, sizeof(uint64_t))) { - ERROR("Error when reading max_vcpu_id"); - goto out; - } - continue; - } - - if (j == 0) - break; /* our work here is done */ - - if (j > MAX_BATCH_SIZE) { - ERROR("Max batch size exceeded. Giving up."); - goto out; - } - - if (!read_exact(io_fd, region_pfn_type, j*sizeof(unsigned long))) { - ERROR("Error when reading region pfn types"); - goto out; - } - - /* First pass for this batch: work out how much memory to alloc */ - nr_mfns = 0; - for ( i = 0; i < j; i++ ) - { - unsigned long pfn, pagetype; - pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; - pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK; - - if ( (pagetype != XEN_DOMCTL_PFINFO_XTAB) && - (p2m[pfn] == INVALID_P2M_ENTRY) ) - { - /* Have a live PFN which hasn't had an MFN allocated */ - p2m_batch[nr_mfns++] = pfn; - } - } - - - /* Now allocate a bunch of mfns for this batch */ - if (nr_mfns && xc_domain_memory_populate_physmap( - xc_handle, dom, nr_mfns, 0, 0, p2m_batch) != 0) { - ERROR("Failed to allocate memory for batch.!\n"); - errno = ENOMEM; - goto out; - } - - /* Second pass for this batch: update p2m[] and region_mfn[] */ - nr_mfns = 0; - for ( i = 0; i < j; i++ ) - { - unsigned long pfn, pagetype; - pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; - pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK; - - if ( pagetype == XEN_DOMCTL_PFINFO_XTAB) - region_mfn[i] = ~0UL; /* map will fail but we don't care */ - else - { - if (p2m[pfn] == INVALID_P2M_ENTRY) { - /* We just allocated a new mfn above; update p2m */ - p2m[pfn] = p2m_batch[nr_mfns++]; - nr_pfns++; - } - - /* setup region_mfn[] for batch map */ - region_mfn[i] = p2m[pfn]; - } - } - - /* Map relevant mfns */ - region_base = xc_map_foreign_batch( - xc_handle, dom, PROT_WRITE, region_mfn, j); - - if ( region_base == NULL ) - { - ERROR("map batch failed"); - goto out; - } - - for ( i = 0; i < j; i++ ) - { - void *page; - unsigned long pagetype; - - pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; - pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK; - - if ( pagetype == XEN_DOMCTL_PFINFO_XTAB ) - /* a bogus/unmapped page: skip it */ - continue; - - if ( pfn > p2m_size ) - { - ERROR("pfn out of range"); - goto out; - } - - pfn_type[pfn] = pagetype; - - mfn = p2m[pfn]; - - /* In verify mode, we use a copy; otherwise we work in place */ - page = verify ? (void *)buf : (region_base + i*PAGE_SIZE); - - if (!read_exact(io_fd, page, PAGE_SIZE)) { - ERROR("Error when reading page (type was %lx)", pagetype); - goto out; - } - - pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK; - - if ( (pagetype >= XEN_DOMCTL_PFINFO_L1TAB) && - (pagetype <= XEN_DOMCTL_PFINFO_L4TAB) ) - { - /* - ** A page table page - need to 'uncanonicalize' it, i.e. - ** replace all the references to pfns with the corresponding - ** mfns for the new domain. - ** - ** On PAE we need to ensure that PGDs are in MFNs < 4G, and - ** so we may need to update the p2m after the main loop. - ** Hence we defer canonicalization of L1s until then. - */ - if ((pt_levels != 3) || - pae_extended_cr3 || - (pagetype != XEN_DOMCTL_PFINFO_L1TAB)) { - - if (!uncanonicalize_pagetable(xc_handle, dom, - pagetype, page)) { - /* - ** Failing to uncanonicalize a page table can be ok - ** under live migration since the pages type may have - ** changed by now (and we'll get an update later). - */ - DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n", - pagetype >> 28, pfn, mfn); - nraces++; - continue; - } - } - } - else if ( pagetype != XEN_DOMCTL_PFINFO_NOTAB ) - { - ERROR("Bogus page type %lx page table is out of range: " - "i=%d p2m_size=%lu", pagetype, i, p2m_size); - goto out; - - } - - - if (verify) { - - int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE); - - if (res) { - - int v; - - DPRINTF("************** pfn=%lx type=%lx gotcs=%08lx " - "actualcs=%08lx\n", pfn, pfn_type[pfn], - csum_page(region_base + i*PAGE_SIZE), - csum_page(buf)); - - for (v = 0; v < 4; v++) { - - unsigned long *p = (unsigned long *) - (region_base + i*PAGE_SIZE); - if (buf[v] != p[v]) - DPRINTF(" %d: %08lx %08lx\n", v, buf[v], p[v]); - } - } - } - - if (xc_add_mmu_update(xc_handle, mmu, - (((unsigned long long)mfn) << PAGE_SHIFT) - | MMU_MACHPHYS_UPDATE, pfn)) { - ERROR("failed machpys update mfn=%lx pfn=%lx", mfn, pfn); - goto out; - } - } /* end of 'batch' for loop */ - - munmap(region_base, j*PAGE_SIZE); - n+= j; /* crude stats */ - - /* - * Discard cache for portion of file read so far up to last - * page boundary every 16MB or so. - */ - m += j; - if ( m > MAX_PAGECACHE_USAGE ) - { - discard_file_cache(io_fd, 0 /* no flush */); - m = 0; - } - } - - /* - * Ensure we flush all machphys updates before potential PAE-specific - * reallocations below. - */ - if (xc_finish_mmu_updates(xc_handle, mmu)) { - ERROR("Error doing finish_mmu_updates()"); - goto out; - } - - DPRINTF("Received all pages (%d races)\n", nraces); - - if ((pt_levels == 3) && !pae_extended_cr3) { - - /* - ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This - ** is a little awkward and involves (a) finding all such PGDs and - ** replacing them with 'lowmem' versions; (b) upating the p2m[] - ** with the new info; and (c) canonicalizing all the L1s using the - ** (potentially updated) p2m[]. - ** - ** This is relatively slow (and currently involves two passes through - ** the pfn_type[] array), but at least seems to be correct. May wish - ** to consider more complex approaches to optimize this later. - */ - - int j, k; - - /* First pass: find all L3TABs current in > 4G mfns and get new mfns */ - for ( i = 0; i < p2m_size; i++ ) - { - if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) == - XEN_DOMCTL_PFINFO_L3TAB) && - (p2m[i] > 0xfffffUL) ) - { - unsigned long new_mfn; - uint64_t l3ptes[4]; - uint64_t *l3tab; - - l3tab = (uint64_t *) - xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, - PROT_READ, p2m[i]); - - for(j = 0; j < 4; j++) - l3ptes[j] = l3tab[j]; - - munmap(l3tab, PAGE_SIZE); - - if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) { - ERROR("Couldn't get a page below 4GB :-("); - goto out; - } - - p2m[i] = new_mfn; - if (xc_add_mmu_update(xc_handle, mmu, - (((unsigned long long)new_mfn) - << PAGE_SHIFT) | - MMU_MACHPHYS_UPDATE, i)) { - ERROR("Couldn't m2p on PAE root pgdir"); - goto out; - } - - l3tab = (uint64_t *) - xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, - PROT_READ | PROT_WRITE, p2m[i]); - - for(j = 0; j < 4; j++) - l3tab[j] = l3ptes[j]; - - munmap(l3tab, PAGE_SIZE); - - } - } - - /* Second pass: find all L1TABs and uncanonicalize them */ - j = 0; - - for ( i = 0; i < p2m_size; i++ ) - { - if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) == - XEN_DOMCTL_PFINFO_L1TAB) ) - { - region_mfn[j] = p2m[i]; - j++; - } - - if(i == (p2m_size-1) || j == MAX_BATCH_SIZE) { - - if (!(region_base = xc_map_foreign_batch( - xc_handle, dom, PROT_READ | PROT_WRITE, - region_mfn, j))) { - ERROR("map batch failed"); - goto out; - } - - for(k = 0; k < j; k++) { - if(!uncanonicalize_pagetable(xc_handle, dom, - XEN_DOMCTL_PFINFO_L1TAB, - region_base + k*PAGE_SIZE)) { - ERROR("failed uncanonicalize pt!"); - goto out; - } - } - - munmap(region_base, j*PAGE_SIZE); - j = 0; - } - } - - if (xc_finish_mmu_updates(xc_handle, mmu)) { - ERROR("Error doing finish_mmu_updates()"); - goto out; - } - } - - /* - * Pin page tables. Do this after writing to them as otherwise Xen - * will barf when doing the type-checking. - */ - nr_pins = 0; - for ( i = 0; i < p2m_size; i++ ) - { - if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 ) - continue; - - switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK ) - { - case XEN_DOMCTL_PFINFO_L1TAB: - pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE; - break; - - case XEN_DOMCTL_PFINFO_L2TAB: - pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE; - break; - - case XEN_DOMCTL_PFINFO_L3TAB: - pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE; - break; - - case XEN_DOMCTL_PFINFO_L4TAB: - pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE; - break; - - default: - continue; - } - - pin[nr_pins].arg1.mfn = p2m[i]; - nr_pins++; - - /* Batch full? Then flush. */ - if (nr_pins == MAX_PIN_BATCH) { - if (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0) { - ERROR("Failed to pin batch of %d page tables", nr_pins); - goto out; - } - nr_pins = 0; - } - } - - /* Flush final partial batch. */ - if ((nr_pins != 0) && (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0)) { - ERROR("Failed to pin batch of %d page tables", nr_pins); - goto out; - } - - DPRINTF("\b\b\b\b100%%\n"); - DPRINTF("Memory reloaded (%ld pages)\n", nr_pfns); - - /* Get the list of PFNs that are not in the psuedo-phys map */ - { - unsigned int count; - unsigned long *pfntab; - int nr_frees, rc; - - if (!read_exact(io_fd, &count, sizeof(count))) { - ERROR("Error when reading pfn count"); - goto out; - } - - if(!(pfntab = malloc(sizeof(unsigned long) * count))) { - ERROR("Out of memory"); - goto out; - } - - if (!read_exact(io_fd, pfntab, sizeof(unsigned long)*count)) { - ERROR("Error when reading pfntab"); - goto out; - } - - nr_frees = 0; - for (i = 0; i < count; i++) { - - unsigned long pfn = pfntab[i]; - - if(p2m[pfn] != INVALID_P2M_ENTRY) { - /* pfn is not in physmap now, but was at some point during - the save/migration process - need to free it */ - pfntab[nr_frees++] = p2m[pfn]; - p2m[pfn] = INVALID_P2M_ENTRY; // not in pseudo-physical map - } - } - - if (nr_frees > 0) { - - struct xen_memory_reservation reservation = { - .nr_extents = nr_frees, - .extent_order = 0, - .domid = dom - }; - set_xen_guest_handle(reservation.extent_start, pfntab); - - if ((rc = xc_memory_op(xc_handle, XENMEM_decrease_reservation, - &reservation)) != nr_frees) { - ERROR("Could not decrease reservation : %d", rc); - goto out; - } else - DPRINTF("Decreased reservation by %d pages\n", count); - } - } - - for (i = 0; i <= max_vcpu_id; i++) { - if (!(vcpumap & (1ULL << i))) - continue; - - if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) { - ERROR("Error when reading ctxt %d", i); - goto out; - } - - if ( !new_ctxt_format ) - ctxt.flags |= VGCF_online; - - if (i == 0) { - /* - * Uncanonicalise the suspend-record frame number and poke - * resume record. - */ - pfn = ctxt.user_regs.edx; - if ((pfn >= p2m_size) || - (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) { - ERROR("Suspend record frame number is bad"); - goto out; - } - ctxt.user_regs.edx = mfn = p2m[pfn]; - start_info = xc_map_foreign_range( - xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn); - start_info->nr_pages = p2m_size; - start_info->shared_info = shared_info_frame << PAGE_SHIFT; - start_info->flags = 0; - *store_mfn = start_info->store_mfn = p2m[start_info->store_mfn]; - start_info->store_evtchn = store_evtchn; - start_info->console.domU.mfn = p2m[start_info->console.domU.mfn]; - start_info->console.domU.evtchn = console_evtchn; - *console_mfn = start_info->console.domU.mfn; - munmap(start_info, PAGE_SIZE); - } - - /* Uncanonicalise each GDT frame number. */ - if (ctxt.gdt_ents > 8192) { - ERROR("GDT entry count out of range"); - goto out; - } - - for (j = 0; (512*j) < ctxt.gdt_ents; j++) { - pfn = ctxt.gdt_frames[j]; - if ((pfn >= p2m_size) || - (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) { - ERROR("GDT frame number is bad"); - goto out; - } - ctxt.gdt_frames[j] = p2m[pfn]; - } - - /* Uncanonicalise the page table base pointer. */ - pfn = xen_cr3_to_pfn(ctxt.ctrlreg[3]); - - if (pfn >= p2m_size) { - ERROR("PT base is bad: pfn=%lu p2m_size=%lu type=%08lx", - pfn, p2m_size, pfn_type[pfn]); - goto out; - } - - if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) != - ((unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) { - ERROR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx", - pfn, p2m_size, pfn_type[pfn], - (unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT); - goto out; - } - - ctxt.ctrlreg[3] = xen_pfn_to_cr3(p2m[pfn]); - - /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */ - if ( (pt_levels == 4) && ctxt.ctrlreg[1] ) - { - pfn = xen_cr3_to_pfn(ctxt.ctrlreg[1]); - - if (pfn >= p2m_size) { - ERROR("User PT base is bad: pfn=%lu p2m_size=%lu type=%08lx", - pfn, p2m_size, pfn_type[pfn]); - goto out; - } - - if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) != - ((unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) { - ERROR("User PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx", - pfn, p2m_size, pfn_type[pfn], - (unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT); - goto out; - } - - ctxt.ctrlreg[1] = xen_pfn_to_cr3(p2m[pfn]); - } - - domctl.cmd = XEN_DOMCTL_setvcpucontext; - domctl.domain = (domid_t)dom; - domctl.u.vcpucontext.vcpu = i; - set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt); - rc = xc_domctl(xc_handle, &domctl); - if (rc != 0) { - ERROR("Couldn't build vcpu%d", i); - goto out; - } - } - - if (!read_exact(io_fd, shared_info_page, PAGE_SIZE)) { - ERROR("Error when reading shared info page"); - goto out; - } - - /* clear any pending events and the selector */ - memset(&(shared_info->evtchn_pending[0]), 0, - sizeof (shared_info->evtchn_pending)); - for ( i = 0; i < MAX_VIRT_CPUS; i++ ) - shared_info->vcpu_info[i].evtchn_pending_sel = 0; - - /* Copy saved contents of shared-info page. No checking needed. */ - page = xc_map_foreign_range( - xc_handle, dom, PAGE_SIZE, PROT_WRITE, shared_info_frame); - memcpy(page, shared_info, PAGE_SIZE); - munmap(page, PAGE_SIZE); - - /* Uncanonicalise the pfn-to-mfn table frame-number list. */ - for (i = 0; i < P2M_FL_ENTRIES; i++) { - pfn = p2m_frame_list[i]; - if ((pfn >= p2m_size) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) { - ERROR("PFN-to-MFN frame number is bad"); - goto out; - } - - p2m_frame_list[i] = p2m[pfn]; - } - - /* Copy the P2M we've constructed to the 'live' P2M */ - if (!(live_p2m = xc_map_foreign_batch(xc_handle, dom, PROT_WRITE, - p2m_frame_list, P2M_FL_ENTRIES))) { - ERROR("Couldn't map p2m table"); - goto out; - } - - memcpy(live_p2m, p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT)); - munmap(live_p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT)); - - DPRINTF("Domain ready to be built.\n"); - - out: - if ( (rc != 0) && (dom != 0) ) - xc_domain_destroy(xc_handle, dom); - free(mmu); - free(p2m); - free(pfn_type); - - /* discard cache for save file */ - discard_file_cache(io_fd, 1 /*flush*/); - - DPRINTF("Restore exit with rc=%d\n", rc); - - return rc; -} diff -r 602d061ff51f -r e518f2fbdd72 tools/libxc/xenguest.h --- a/tools/libxc/xenguest.h Thu Apr 05 10:43:50 2007 +0100 +++ b/tools/libxc/xenguest.h Thu Apr 05 15:11:22 2007 +0100 @@ -38,29 +38,21 @@ int xc_hvm_save(int xc_handle, int io_fd void (*qemu_flip_buffer)(int, int)); /** - * This function will restore a saved domain running Linux. + * This function will restore a saved domain. * * @parm xc_handle a handle to an open hypervisor interface * @parm fd the file descriptor to restore a domain from * @parm dom the id of the domain * @parm store_evtchn the store event channel for this domain to use * @parm store_mfn returned with the mfn of the store page + * @parm hvm non-zero if this is a HVM restore + * @parm pae non-zero if this HVM domain has PAE support enabled * @return 0 on success, -1 on failure */ -int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom, - unsigned int store_evtchn, unsigned long *store_mfn, - unsigned int console_evtchn, unsigned long *console_mfn); - -/** - * This function will restore a saved hvm domain running unmodified guest. - * - * @parm store_mfn pass mem size & returned with the mfn of the store page - * @return 0 on success, -1 on failure - */ -int xc_hvm_restore(int xc_handle, int io_fd, uint32_t dom, - unsigned int store_evtchn, - unsigned long *store_mfn, - unsigned int pae, unsigned int apic); +int xc_domain_restore(int xc_handle, int io_fd, uint32_t dom, + unsigned int store_evtchn, unsigned long *store_mfn, + unsigned int console_evtchn, unsigned long *console_mfn, + unsigned int hvm, unsigned int pae); /** * This function will create a domain for a paravirtualized Linux diff -r 602d061ff51f -r e518f2fbdd72 tools/libxc/xg_private.c --- a/tools/libxc/xg_private.c Thu Apr 05 10:43:50 2007 +0100 +++ b/tools/libxc/xg_private.c Thu Apr 05 15:11:22 2007 +0100 @@ -204,16 +204,6 @@ __attribute__((weak)) int (*suspend)(int domid), void *(*init_qemu_maps)(int, unsigned), void (*qemu_flip_buffer)(int, int)) -{ - errno = ENOSYS; - return -1; -} - -__attribute__((weak)) - int xc_hvm_restore(int xc_handle, int io_fd, uint32_t dom, - unsigned int store_evtchn, - unsigned long *store_mfn, - unsigned int pae, unsigned int apic) { errno = ENOSYS; return -1; diff -r 602d061ff51f -r e518f2fbdd72 tools/xcutils/xc_restore.c --- a/tools/xcutils/xc_restore.c Thu Apr 05 10:43:50 2007 +0100 +++ b/tools/xcutils/xc_restore.c Thu Apr 05 15:11:22 2007 +0100 @@ -39,14 +39,8 @@ main(int argc, char **argv) pae = atoi(argv[6]); apic = atoi(argv[7]); - if ( hvm ) - ret = xc_hvm_restore(xc_fd, io_fd, domid, - store_evtchn, &store_mfn, - pae, apic); - else - ret = xc_linux_restore(xc_fd, io_fd, domid, - store_evtchn, &store_mfn, - console_evtchn, &console_mfn); + ret = xc_domain_restore(xc_fd, io_fd, domid, store_evtchn, &store_mfn, + console_evtchn, &console_mfn, hvm, pae); if ( ret == 0 ) { _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |