[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen master] tools/libxc: Remove legacy migration implementation



commit b15bc4345e772df92e5ffdbc4c1e9ae2a6206617
Author:     Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
AuthorDate: Mon Jul 20 11:37:54 2015 +0100
Commit:     Ian Campbell <ian.campbell@xxxxxxxxxx>
CommitDate: Tue Jul 28 11:11:27 2015 +0100

    tools/libxc: Remove legacy migration implementation
    
    It is no longer used.
    
    One complication is that xc_map_m2p() has users in xc_offline_page.c,
    xen-mfndump and xen-mceinj.  Move its implementation into
    xc_offline_page (for want of a better location) beside it's current
    user.
    
    Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
    CC: Ian Campbell <Ian.Campbell@xxxxxxxxxx>
    CC: Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx>
    CC: Wei Liu <wei.liu2@xxxxxxxxxx>
    Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
    [ ijc -- drop mentions of removed files from MAINTAINERS ]
---
 MAINTAINERS                     |    2 -
 tools/libxc/Makefile            |    1 -
 tools/libxc/xc_domain_restore.c | 2411 ---------------------------------------
 tools/libxc/xc_domain_save.c    | 2198 -----------------------------------
 tools/libxc/xc_offline_page.c   |   59 +
 tools/libxc/xg_save_restore.h   |  247 ----
 6 files changed, 59 insertions(+), 4859 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index a4e64ea..73a96c9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -272,8 +272,6 @@ M:  Shriram Rajagopalan <rshriram@xxxxxxxxx>
 M:     Yang Hongyang <yanghy@xxxxxxxxxxxxxx>
 S:     Maintained
 F:     docs/README.remus
-F:     tools/libxc/xc_domain_save.c
-F:     tools/libxc/xc_domain_restore.c
 F:     tools/blktap2/drivers/block-remus.c
 F:     tools/blktap2/drivers/hashtable*
 F:     tools/libxl/libxl_remus_*
diff --git a/tools/libxc/Makefile b/tools/libxc/Makefile
index b0a3e05..8ae0ea0 100644
--- a/tools/libxc/Makefile
+++ b/tools/libxc/Makefile
@@ -54,7 +54,6 @@ CTRL_SRCS-$(CONFIG_MiniOS) += xc_minios.c
 GUEST_SRCS-y :=
 GUEST_SRCS-y += xg_private.c xc_suspend.c
 ifeq ($(CONFIG_MIGRATE),y)
-GUEST_SRCS-y += xc_domain_restore.c xc_domain_save.c
 GUEST_SRCS-y += xc_sr_common.c
 GUEST_SRCS-$(CONFIG_X86) += xc_sr_common_x86.c
 GUEST_SRCS-$(CONFIG_X86) += xc_sr_common_x86_pv.c
diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c
deleted file mode 100644
index 8435f6b..0000000
--- a/tools/libxc/xc_domain_restore.c
+++ /dev/null
@@ -1,2411 +0,0 @@
-/******************************************************************************
- * xc_domain_restore.c
- *
- * Restore the state of a guest session.
- *
- * Copyright (c) 2003, K A Fraser.
- * Copyright (c) 2006, Intel Corporation
- * Copyright (c) 2007, XenSource Inc.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  
USA
- *
- */
-
-/*
- * The superpages flag in restore has two different meanings depending on
- * the type of domain.
- *
- * For an HVM domain, the flag means to look for properly aligned contiguous
- * pages and try to allocate a superpage to satisfy it.  If that fails,
- * fall back to small pages.
- *
- * For a PV domain, the flag means allocate all memory as superpages.  If that
- * fails, the restore fails.  This behavior is required for PV guests who
- * want to use superpages.
- */
-
-#include <stdlib.h>
-#include <unistd.h>
-#include <inttypes.h>
-
-#include "xg_private.h"
-#include "xg_save_restore.h"
-#include "xc_dom.h"
-
-#include <xen/hvm/ioreq.h>
-#include <xen/hvm/params.h>
-
-struct restore_ctx {
-    unsigned long max_mfn; /* max mfn of the current host machine */
-    unsigned long hvirt_start; /* virtual starting address of the hypervisor */
-    unsigned int pt_levels; /* #levels of page tables used by the current 
guest */
-    unsigned long nr_pfns; /* number of 'in use' pfns in the guest (i.e. #P2M 
entries with a valid mfn) */
-    xen_pfn_t *live_p2m; /* Live mapping of the table mapping each PFN to its 
current MFN. */
-    xen_pfn_t *p2m; /* A table mapping each PFN to its new MFN. */
-    xen_pfn_t *p2m_batch; /* A table of P2M mappings in the current region.  */
-    xen_pfn_t *p2m_saved_batch; /* Copy of p2m_batch array for pv superpage 
alloc */
-    int superpages; /* Superpage allocation has been requested */
-    int hvm;    /* This is an hvm domain */
-    int completed; /* Set when a consistent image is available */
-    int last_checkpoint; /* Set when we should commit to the current 
checkpoint when it completes. */
-    int compressing; /* Set when sender signals that pages would be sent 
compressed (for Remus) */
-    struct domain_info_context dinfo;
-};
-
-#define HEARTBEAT_MS 1000
-
-#ifndef __MINIOS__
-static ssize_t rdexact(xc_interface *xch, struct restore_ctx *ctx,
-                       int fd, void* buf, size_t size)
-{
-    size_t offset = 0;
-    ssize_t len;
-    struct timeval tv;
-    fd_set rfds;
-
-    while ( offset < size )
-    {
-        if ( ctx->completed ) {
-            /* expect a heartbeat every HEARBEAT_MS ms maximum */
-            tv.tv_sec = HEARTBEAT_MS / 1000;
-            tv.tv_usec = (HEARTBEAT_MS % 1000) * 1000;
-
-            FD_ZERO(&rfds);
-            FD_SET(fd, &rfds);
-            len = select(fd + 1, &rfds, NULL, NULL, &tv);
-            if ( len == -1 && errno == EINTR )
-                continue;
-            if ( !FD_ISSET(fd, &rfds) ) {
-                ERROR("%s failed (select returned %zd)", __func__, len);
-                errno = ETIMEDOUT;
-                return -1;
-            }
-        }
-
-        len = read(fd, buf + offset, size - offset);
-        if ( (len == -1) && ((errno == EINTR) || (errno == EAGAIN)) )
-            continue;
-        if ( len == 0 ) {
-            ERROR("0-length read");
-            errno = 0;
-        }
-        if ( len <= 0 ) {
-            ERROR("%s failed (read rc: %zd, errno: %d)", __func__, len, errno);
-            return -1;
-        }
-        offset += len;
-    }
-
-    return 0;
-}
-
-#define RDEXACT(fd,buf,size) rdexact(xch, ctx, fd, buf, size)
-#else
-#define RDEXACT read_exact
-#endif
-
-#define SUPERPAGE_PFN_SHIFT  9
-#define SUPERPAGE_NR_PFNS    (1UL << SUPERPAGE_PFN_SHIFT)
-#define SUPERPAGE(_pfn) ((_pfn) & (~(SUPERPAGE_NR_PFNS-1)))
-#define SUPER_PAGE_START(pfn)    (((pfn) & (SUPERPAGE_NR_PFNS-1)) == 0 )
-
-/*
-** When we're restoring into a pv superpage-allocated guest, we take
-** a copy of the p2m_batch array to preserve the pfn, then allocate the
-** corresponding superpages.  We then fill in the p2m array using the saved
-** pfns.
-*/
-static int alloc_superpage_mfns(
-    xc_interface *xch, uint32_t dom, struct restore_ctx *ctx, int nr_mfns)
-{
-    int i, j, max = 0;
-    unsigned long pfn, base_pfn, mfn;
-
-    for (i = 0; i < nr_mfns; i++)
-    {
-        pfn = ctx->p2m_batch[i];
-        base_pfn = SUPERPAGE(pfn);
-        if (ctx->p2m[base_pfn] != (INVALID_P2M_ENTRY-2))
-        {
-            ctx->p2m_saved_batch[max] = base_pfn;
-            ctx->p2m_batch[max] = base_pfn;
-            max++;
-            ctx->p2m[base_pfn] = INVALID_P2M_ENTRY-2;
-        }
-    }
-    if (xc_domain_populate_physmap_exact(xch, dom, max, SUPERPAGE_PFN_SHIFT,
-                                         0, ctx->p2m_batch) != 0)
-        return 1;
-
-    for (i = 0; i < max; i++)
-    {
-        mfn = ctx->p2m_batch[i];
-        pfn = ctx->p2m_saved_batch[i];
-        for (j = 0; j < SUPERPAGE_NR_PFNS; j++)
-            ctx->p2m[pfn++] = mfn++;
-    }
-    return 0;
-}
-/*
-** In the state file (or during transfer), all page-table pages are
-** converted into a 'canonical' form where references to actual mfns
-** are replaced with references to the corresponding pfns.
-** This function inverts that operation, replacing the pfn values with
-** the (now known) appropriate mfn values.
-*/
-static int uncanonicalize_pagetable(
-    xc_interface *xch, uint32_t dom, struct restore_ctx *ctx, void *page)
-{
-    int i, rc, pte_last, nr_mfns = 0;
-    unsigned long pfn;
-    uint64_t pte;
-    struct domain_info_context *dinfo = &ctx->dinfo;
-
-    pte_last = PAGE_SIZE / 8;
-
-    /* First pass: work out how many (if any) MFNs we need to alloc */
-    for ( i = 0; i < pte_last; i++ )
-    {
-        pte = ((uint64_t *)page)[i];
-
-        /* XXX SMH: below needs fixing for PROT_NONE etc */
-        if ( !(pte & _PAGE_PRESENT) )
-            continue;
-        
-        pfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86;
-        
-        if ( pfn >= dinfo->p2m_size )
-        {
-            /* This "page table page" is probably not one; bail. */
-            ERROR("Frame number in page table is out of range: "
-                  "i=%d pfn=0x%lx p2m_size=%lu",
-                  i, pfn, dinfo->p2m_size);
-            return 0;
-        }
-        
-        if ( ctx->p2m[pfn] == INVALID_P2M_ENTRY )
-        {
-            /* Have a 'valid' PFN without a matching MFN - need to alloc */
-            ctx->p2m_batch[nr_mfns++] = pfn; 
-            ctx->p2m[pfn]--;
-        }
-    }
-
-    /* Allocate the requisite number of mfns. */
-    if (nr_mfns)
-    {
-        if (!ctx->hvm && ctx->superpages)
-            rc = alloc_superpage_mfns(xch, dom, ctx, nr_mfns);
-        else
-            rc = xc_domain_populate_physmap_exact(xch, dom, nr_mfns, 0, 0,
-                                                  ctx->p2m_batch);
-
-        if (rc)
-        {
-            ERROR("Failed to allocate memory for batch.!\n");
-            errno = ENOMEM;
-            return 0;
-        }
-    }
-    
-    /* Second pass: uncanonicalize each present PTE */
-    nr_mfns = 0;
-    for ( i = 0; i < pte_last; i++ )
-    {
-        pte = ((uint64_t *)page)[i];
-        
-        /* XXX SMH: below needs fixing for PROT_NONE etc */
-        if ( !(pte & _PAGE_PRESENT) )
-            continue;
-        
-        pfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86;
-
-        if ( ctx->p2m[pfn] == (INVALID_P2M_ENTRY-1) )
-            ctx->p2m[pfn] = ctx->p2m_batch[nr_mfns++];
-
-        pte &= ~MADDR_MASK_X86;
-        pte |= (uint64_t)ctx->p2m[pfn] << PAGE_SHIFT;
-
-        ((uint64_t *)page)[i] = (uint64_t)pte;
-    }
-
-    return 1;
-}
-
-
-/* Load the p2m frame list, plus potential extended info chunk */
-static xen_pfn_t *load_p2m_frame_list(
-    xc_interface *xch, struct restore_ctx *ctx,
-    int io_fd, int *pae_extended_cr3, int *ext_vcpucontext,
-    uint32_t *vcpuextstate_size)
-{
-    xen_pfn_t *p2m_frame_list;
-    vcpu_guest_context_any_t ctxt;
-    xen_pfn_t p2m_fl_zero;
-    struct domain_info_context *dinfo = &ctx->dinfo;
-
-    /* Read first entry of P2M list, or extended-info signature (~0UL). */
-    if ( RDEXACT(io_fd, &p2m_fl_zero, sizeof(long)) )
-    {
-        PERROR("read extended-info signature failed");
-        return NULL;
-    }
-    
-    if ( p2m_fl_zero == ~0UL )
-    {
-        uint32_t tot_bytes;
-        
-        /* Next 4 bytes: total size of following extended info. */
-        if ( RDEXACT(io_fd, &tot_bytes, sizeof(tot_bytes)) )
-        {
-            PERROR("read extended-info size failed");
-            return NULL;
-        }
-        
-        while ( tot_bytes )
-        {
-            uint32_t chunk_bytes;
-            char     chunk_sig[4];
-            
-            /* 4-character chunk signature + 4-byte remaining chunk size. */
-            if ( RDEXACT(io_fd, chunk_sig, sizeof(chunk_sig)) ||
-                 RDEXACT(io_fd, &chunk_bytes, sizeof(chunk_bytes)) ||
-                 (tot_bytes < (chunk_bytes + 8)) )
-            {
-                PERROR("read extended-info chunk signature failed");
-                return NULL;
-            }
-            tot_bytes -= 8;
-
-            /* VCPU context structure? */
-            if ( !strncmp(chunk_sig, "vcpu", 4) )
-            {
-                /* Pick a guest word-size and PT depth from the ctxt size */
-                if ( chunk_bytes == sizeof (ctxt.x32) )
-                {
-                    dinfo->guest_width = 4;
-                    ctx->pt_levels = 3;
-                }
-                else if ( chunk_bytes == sizeof (ctxt.x64) )
-                {
-                    dinfo->guest_width = 8;
-                    ctx->pt_levels = 4;
-                }
-                else 
-                {
-                    ERROR("bad extended-info context size %d", chunk_bytes);
-                    return NULL;
-                }
-
-                if ( RDEXACT(io_fd, &ctxt, chunk_bytes) )
-                {
-                    PERROR("read extended-info vcpu context failed");
-                    return NULL;
-                }
-                tot_bytes -= chunk_bytes;
-                chunk_bytes = 0;
-
-                if ( GET_FIELD(&ctxt, vm_assist, dinfo->guest_width)
-                     & (1UL << VMASST_TYPE_pae_extended_cr3) )
-                    *pae_extended_cr3 = 1;
-            }
-            else if ( !strncmp(chunk_sig, "extv", 4) )
-            {
-                *ext_vcpucontext = 1;
-            }
-            else if ( !strncmp(chunk_sig, "xcnt", 4) )
-            {
-                if ( RDEXACT(io_fd, vcpuextstate_size, 
sizeof(*vcpuextstate_size)) )
-                {
-                    PERROR("read extended vcpu state size failed");
-                    return NULL;
-                }
-                tot_bytes -= chunk_bytes;
-                chunk_bytes = 0;
-            }
-            
-            /* Any remaining bytes of this chunk: read and discard. */
-            while ( chunk_bytes )
-            {
-                unsigned long sz = min_t(unsigned long, chunk_bytes, 
sizeof(xen_pfn_t));
-                if ( RDEXACT(io_fd, &p2m_fl_zero, sz) )
-                {
-                    PERROR("read-and-discard extended-info chunk bytes 
failed");
-                    return NULL;
-                }
-                chunk_bytes -= sz;
-                tot_bytes   -= sz;
-            }
-        }
-
-        /* Now read the real first entry of P2M list. */
-        if ( RDEXACT(io_fd, &p2m_fl_zero, sizeof(xen_pfn_t)) )
-        {
-            PERROR("read first entry of p2m_frame_list failed");
-            return NULL;
-        }
-    }
-
-    /* Now that we know the guest's word-size, can safely allocate 
-     * the p2m frame list */
-    if ( (p2m_frame_list = malloc(P2M_TOOLS_FL_SIZE)) == NULL )
-    {
-        ERROR("Couldn't allocate p2m_frame_list array");
-        return NULL;
-    }
-
-    /* First entry has already been read. */
-    p2m_frame_list[0] = p2m_fl_zero;
-    if ( RDEXACT(io_fd, &p2m_frame_list[1], 
-                 (P2M_FL_ENTRIES - 1) * sizeof(xen_pfn_t)) )
-    {
-        PERROR("read p2m_frame_list failed");
-        free(p2m_frame_list);
-        return NULL;
-    }
-    
-    return p2m_frame_list;
-}
-
-typedef struct {
-    int ishvm;
-    union {
-        struct tailbuf_pv {
-            unsigned int pfncount;
-            unsigned long* pfntab;
-            unsigned int vcpucount;
-            unsigned char* vcpubuf;
-            unsigned char shared_info_page[PAGE_SIZE];
-        } pv;
-        struct tailbuf_hvm {
-            uint64_t magicpfns[3];
-            uint32_t hvmbufsize, reclen;
-            uint8_t* hvmbuf;
-            struct {
-                uint32_t magic;
-                uint32_t version;
-                uint64_t len;
-            } qemuhdr;
-            uint32_t qemubufsize;
-            uint8_t* qemubuf;
-        } hvm;
-    } u;
-} tailbuf_t;
-
-/* read stream until EOF, growing buffer as necssary */
-static int compat_buffer_qemu(xc_interface *xch, struct restore_ctx *ctx,
-                              int fd, struct tailbuf_hvm *buf)
-{
-    uint8_t *qbuf, *tmp;
-    int blen = 0, dlen = 0;
-    int rc;
-
-    /* currently save records tend to be about 7K */
-    blen = 8192;
-    if ( !(qbuf = malloc(blen)) ) {
-        ERROR("Error allocating QEMU buffer");
-        return -1;
-    }
-
-    while( (rc = read(fd, qbuf+dlen, blen-dlen)) > 0 ) {
-        DPRINTF("Read %d bytes of QEMU data\n", rc);
-        dlen += rc;
-
-        if (dlen == blen) {
-            DPRINTF("%d-byte QEMU buffer full, reallocating...\n", dlen);
-            blen += 4096;
-            tmp = realloc(qbuf, blen);
-            if ( !tmp ) {
-                ERROR("Error growing QEMU buffer to %d bytes", blen);
-                free(qbuf);
-                return -1;
-            }
-            qbuf = tmp;
-        }
-    }
-
-    if ( rc < 0 ) {
-        ERROR("Error reading QEMU data");
-        free(qbuf);
-        return -1;
-    }
-
-    if ( memcmp(qbuf, "QEVM", 4) ) {
-        ERROR("Invalid QEMU magic: 0x%08"PRIx32, *(uint32_t*)qbuf);
-        free(qbuf);
-        return -1;
-    }
-
-    buf->qemubuf = qbuf;
-    buf->qemubufsize = dlen;
-
-    return 0;
-}
-
-static int buffer_qemu(xc_interface *xch, struct restore_ctx *ctx,
-                       int fd, struct tailbuf_hvm *buf)
-{
-    uint32_t qlen;
-    uint8_t *tmp;
-
-    if ( RDEXACT(fd, &qlen, sizeof(qlen)) ) {
-        PERROR("Error reading QEMU header length");
-        return -1;
-    }
-
-    if ( qlen > buf->qemubufsize ) {
-        if ( buf->qemubuf) {
-            tmp = realloc(buf->qemubuf, qlen);
-            if ( tmp )
-                buf->qemubuf = tmp;
-            else {
-                ERROR("Error reallocating QEMU state buffer");
-                return -1;
-            }
-        } else {
-            buf->qemubuf = malloc(qlen);
-            if ( !buf->qemubuf ) {
-                ERROR("Error allocating QEMU state buffer");
-                return -1;
-            }
-        }
-    }
-    buf->qemubufsize = qlen;
-
-    if ( RDEXACT(fd, buf->qemubuf, buf->qemubufsize) ) {
-        PERROR("Error reading QEMU state");
-        return -1;
-    }
-
-    return 0;
-}
-
-static int dump_qemu(xc_interface *xch, uint32_t dom, struct tailbuf_hvm *buf)
-{
-    int saved_errno;
-    char path[256];
-    FILE *fp;
-
-    sprintf(path, XC_DEVICE_MODEL_RESTORE_FILE".%u", dom);
-    fp = fopen(path, "wb");
-    if ( !fp )
-        return -1;
-
-    DPRINTF("Writing %d bytes of QEMU data\n", buf->qemubufsize);
-    if ( fwrite(buf->qemubuf, 1, buf->qemubufsize, fp) != buf->qemubufsize) {
-        saved_errno = errno;
-        fclose(fp);
-        errno = saved_errno;
-        return -1;
-    }
-
-    fclose(fp);
-
-    return 0;
-}
-
-static int buffer_tail_hvm(xc_interface *xch, struct restore_ctx *ctx,
-                           struct tailbuf_hvm *buf, int fd,
-                           unsigned int max_vcpu_id, uint64_t *vcpumap,
-                           int ext_vcpucontext,
-                           uint32_t vcpuextstate_size)
-{
-    uint8_t *tmp;
-    unsigned char qemusig[21];
-
-    if ( RDEXACT(fd, buf->magicpfns, sizeof(buf->magicpfns)) ) {
-        PERROR("Error reading magic PFNs");
-        return -1;
-    }
-
-    if ( RDEXACT(fd, &buf->reclen, sizeof(buf->reclen)) ) {
-        PERROR("Error reading HVM params size");
-        return -1;
-    }
-
-    if ( buf->reclen > buf->hvmbufsize ) {
-        if ( buf->hvmbuf) {
-            tmp = realloc(buf->hvmbuf, buf->reclen);
-            if ( tmp ) {
-                buf->hvmbuf = tmp;
-                buf->hvmbufsize = buf->reclen;
-            } else {
-                ERROR("Error reallocating HVM param buffer");
-                return -1;
-            }
-        } else {
-            buf->hvmbuf = malloc(buf->reclen);
-            if ( !buf->hvmbuf ) {
-                ERROR("Error allocating HVM param buffer");
-                return -1;
-            }
-            buf->hvmbufsize = buf->reclen;
-        }
-    }
-
-    if ( RDEXACT(fd, buf->hvmbuf, buf->reclen) ) {
-        PERROR("Error reading HVM params");
-        return -1;
-    }
-
-    if ( RDEXACT(fd, qemusig, sizeof(qemusig)) ) {
-        PERROR("Error reading QEMU signature");
-        return -1;
-    }
-
-    /* The legacy live-migration QEMU record has no length information.
-     * Short of reimplementing the QEMU parser, we're forced to just read
-     * until EOF.
-     *
-     * Gets around this by sending a different signatures for the new
-     * live-migration QEMU record and Remus which includes a length
-     * prefix
-     */
-    if ( !memcmp(qemusig, "QemuDeviceModelRecord", sizeof(qemusig)) )
-        return compat_buffer_qemu(xch, ctx, fd, buf);
-    else if ( !memcmp(qemusig, "DeviceModelRecord0002", sizeof(qemusig)) ||
-              !memcmp(qemusig, "RemusDeviceModelState", sizeof(qemusig)) )
-        return buffer_qemu(xch, ctx, fd, buf);
-
-    qemusig[20] = '\0';
-    ERROR("Invalid QEMU signature: %s", qemusig);
-    return -1;
-}
-
-static int buffer_tail_pv(xc_interface *xch, struct restore_ctx *ctx,
-                          struct tailbuf_pv *buf, int fd,
-                          unsigned int max_vcpu_id, uint64_t *vcpumap,
-                          int ext_vcpucontext,
-                          uint32_t vcpuextstate_size)
-{
-    unsigned int i;
-    size_t pfnlen, vcpulen;
-    struct domain_info_context *dinfo = &ctx->dinfo;
-
-    /* TODO: handle changing pfntab and vcpu counts */
-    /* PFN tab */
-    if ( RDEXACT(fd, &buf->pfncount, sizeof(buf->pfncount)) ||
-         (buf->pfncount > (1U << 28)) ) /* up to 1TB of address space */
-    {
-        PERROR("Error when reading pfn count");
-        return -1;
-    }
-    pfnlen = sizeof(unsigned long) * buf->pfncount;
-    if ( !(buf->pfntab) ) {
-        if ( !(buf->pfntab = malloc(pfnlen)) ) {
-            ERROR("Error allocating PFN tail buffer");
-            return -1;
-        }
-    }
-    // DPRINTF("Reading PFN tab: %d bytes\n", pfnlen);
-    if ( RDEXACT(fd, buf->pfntab, pfnlen) ) {
-        PERROR("Error when reading pfntab");
-        goto free_pfntab;
-    }
-
-    /* VCPU contexts */
-    buf->vcpucount = 0;
-    for (i = 0; i <= max_vcpu_id; i++) {
-        // DPRINTF("vcpumap: %llx, cpu: %d, bit: %llu\n", vcpumap[i/64], i, 
(vcpumap[i/64] & (1ULL << (i%64))));
-        if ( (!(vcpumap[i/64] & (1ULL << (i%64)))) )
-            continue;
-        buf->vcpucount++;
-    }
-    // DPRINTF("VCPU count: %d\n", buf->vcpucount);
-    vcpulen = ((dinfo->guest_width == 8) ? sizeof(vcpu_guest_context_x86_64_t)
-               : sizeof(vcpu_guest_context_x86_32_t)) * buf->vcpucount;
-    if ( ext_vcpucontext )
-        vcpulen += 128 * buf->vcpucount;
-    vcpulen += vcpuextstate_size * buf->vcpucount;
-
-    if ( !(buf->vcpubuf) ) {
-        if ( !(buf->vcpubuf = malloc(vcpulen)) ) {
-            ERROR("Error allocating VCPU ctxt tail buffer");
-            goto free_pfntab;
-        }
-    }
-    // DPRINTF("Reading VCPUS: %d bytes\n", vcpulen);
-    if ( RDEXACT(fd, buf->vcpubuf, vcpulen) ) {
-        PERROR("Error when reading ctxt");
-        goto free_vcpus;
-    }
-
-    /* load shared_info_page */
-    // DPRINTF("Reading shared info: %lu bytes\n", PAGE_SIZE);
-    if ( RDEXACT(fd, buf->shared_info_page, PAGE_SIZE) ) {
-        PERROR("Error when reading shared info page");
-        goto free_vcpus;
-    }
-
-    return 0;
-
-  free_vcpus:
-    if (buf->vcpubuf) {
-        free (buf->vcpubuf);
-        buf->vcpubuf = NULL;
-    }
-  free_pfntab:
-    if (buf->pfntab) {
-        free (buf->pfntab);
-        buf->pfntab = NULL;
-    }
-
-    return -1;
-}
-
-static int buffer_tail(xc_interface *xch, struct restore_ctx *ctx,
-                       tailbuf_t *buf, int fd, unsigned int max_vcpu_id,
-                       uint64_t *vcpumap, int ext_vcpucontext,
-                       uint32_t vcpuextstate_size)
-{
-    if ( buf->ishvm )
-        return buffer_tail_hvm(xch, ctx, &buf->u.hvm, fd, max_vcpu_id, vcpumap,
-                               ext_vcpucontext, vcpuextstate_size);
-    else
-        return buffer_tail_pv(xch, ctx, &buf->u.pv, fd, max_vcpu_id, vcpumap,
-                              ext_vcpucontext, vcpuextstate_size);
-}
-
-static void tailbuf_free_hvm(struct tailbuf_hvm *buf)
-{
-    if ( buf->hvmbuf ) {
-        free(buf->hvmbuf);
-        buf->hvmbuf = NULL;
-    }
-    if ( buf->qemubuf ) {
-        free(buf->qemubuf);
-        buf->qemubuf = NULL;
-    }
-}
-
-static void tailbuf_free_pv(struct tailbuf_pv *buf)
-{
-    if ( buf->vcpubuf ) {
-        free(buf->vcpubuf);
-        buf->vcpubuf = NULL;
-    }
-    if ( buf->pfntab ) {
-        free(buf->pfntab);
-        buf->pfntab = NULL;
-    }
-}
-
-static void tailbuf_free(tailbuf_t *buf)
-{
-    if ( buf->ishvm )
-        tailbuf_free_hvm(&buf->u.hvm);
-    else
-        tailbuf_free_pv(&buf->u.pv);
-}
-
-struct toolstack_data_t {
-    uint8_t *data;
-    uint32_t len;
-};
-
-typedef struct {
-    void* pages;
-    /* pages is of length nr_physpages, pfn_types is of length nr_pages */
-    unsigned int nr_physpages, nr_pages;
-
-    /* checkpoint compression state */
-    int compressing;
-    unsigned long compbuf_pos, compbuf_size;
-
-    /* Types of the pfns in the current region */
-    unsigned long* pfn_types;
-
-    int verify;
-
-    int new_ctxt_format;
-    int max_vcpu_id;
-    uint64_t vcpumap[XC_SR_MAX_VCPUS/64];
-    uint64_t identpt;
-    uint64_t paging_ring_pfn;
-    uint64_t monitor_ring_pfn;
-    uint64_t sharing_ring_pfn;
-    uint64_t vm86_tss;
-    uint64_t console_pfn;
-    uint64_t acpi_ioport_location;
-    uint64_t viridian;
-    uint64_t vm_generationid_addr;
-    uint64_t ioreq_server_pfn;
-    uint64_t nr_ioreq_server_pages;
-
-    struct toolstack_data_t tdata;
-} pagebuf_t;
-
-static int pagebuf_init(pagebuf_t* buf)
-{
-    memset(buf, 0, sizeof(*buf));
-    return 0;
-}
-
-static void pagebuf_free(pagebuf_t* buf)
-{
-    if (buf->tdata.data != NULL) {
-        free(buf->tdata.data);
-        buf->tdata.data = NULL;
-    }
-    if (buf->pages) {
-        free(buf->pages);
-        buf->pages = NULL;
-    }
-    if(buf->pfn_types) {
-        free(buf->pfn_types);
-        buf->pfn_types = NULL;
-    }
-}
-
-static int pagebuf_get_one(xc_interface *xch, struct restore_ctx *ctx,
-                           pagebuf_t* buf, int fd, uint32_t dom)
-{
-    int count, countpages, oldcount, i;
-    void* ptmp;
-    unsigned long compbuf_size;
-
-    if ( RDEXACT(fd, &count, sizeof(count)) )
-    {
-        PERROR("Error when reading batch size");
-        return -1;
-    }
-
-    // DPRINTF("reading batch of %d pages\n", count);
-
-    switch ( count )
-    {
-    case 0:
-        // DPRINTF("Last batch read\n");
-        return 0;
-
-    case XC_SAVE_ID_ENABLE_VERIFY_MODE:
-        DPRINTF("Entering page verify mode\n");
-        buf->verify = 1;
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_VCPU_INFO:
-        buf->new_ctxt_format = 1;
-        if ( RDEXACT(fd, &buf->max_vcpu_id, sizeof(buf->max_vcpu_id)) ||
-             buf->max_vcpu_id >= XC_SR_MAX_VCPUS ||
-             RDEXACT(fd, buf->vcpumap, vcpumap_sz(buf->max_vcpu_id)) ) {
-            PERROR("Error when reading max_vcpu_id");
-            return -1;
-        }
-        // DPRINTF("Max VCPU ID: %d, vcpumap: %llx\n", buf->max_vcpu_id, 
buf->vcpumap[0]);
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_HVM_IDENT_PT:
-        /* Skip padding 4 bytes then read the EPT identity PT location. */
-        if ( RDEXACT(fd, &buf->identpt, sizeof(uint32_t)) ||
-             RDEXACT(fd, &buf->identpt, sizeof(uint64_t)) )
-        {
-            PERROR("error read the address of the EPT identity map");
-            return -1;
-        }
-        // DPRINTF("EPT identity map address: %llx\n", buf->identpt);
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_HVM_PAGING_RING_PFN:
-        /* Skip padding 4 bytes then read the paging ring location. */
-        if ( RDEXACT(fd, &buf->paging_ring_pfn, sizeof(uint32_t)) ||
-             RDEXACT(fd, &buf->paging_ring_pfn, sizeof(uint64_t)) )
-        {
-            PERROR("error read the paging ring pfn");
-            return -1;
-        }
-        // DPRINTF("paging ring pfn address: %llx\n", buf->paging_ring_pfn);
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_HVM_MONITOR_RING_PFN:
-        /* Skip padding 4 bytes then read the mem access ring location. */
-        if ( RDEXACT(fd, &buf->monitor_ring_pfn, sizeof(uint32_t)) ||
-             RDEXACT(fd, &buf->monitor_ring_pfn, sizeof(uint64_t)) )
-        {
-            PERROR("error read the access ring pfn");
-            return -1;
-        }
-        // DPRINTF("monitor ring pfn address: %llx\n", buf->monitor_ring_pfn);
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_HVM_SHARING_RING_PFN:
-        /* Skip padding 4 bytes then read the sharing ring location. */
-        if ( RDEXACT(fd, &buf->sharing_ring_pfn, sizeof(uint32_t)) ||
-             RDEXACT(fd, &buf->sharing_ring_pfn, sizeof(uint64_t)) )
-        {
-            PERROR("error read the sharing ring pfn");
-            return -1;
-        }
-        // DPRINTF("sharing ring pfn address: %llx\n", buf->sharing_ring_pfn);
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_HVM_VM86_TSS:
-        /* Skip padding 4 bytes then read the vm86 TSS location. */
-        if ( RDEXACT(fd, &buf->vm86_tss, sizeof(uint32_t)) ||
-             RDEXACT(fd, &buf->vm86_tss, sizeof(uint64_t)) )
-        {
-            PERROR("error read the address of the vm86 TSS");
-            return -1;
-        }
-        // DPRINTF("VM86 TSS location: %llx\n", buf->vm86_tss);
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_TMEM:
-        DPRINTF("xc_domain_restore start tmem\n");
-        if ( xc_tmem_restore(xch, dom, fd) ) {
-            PERROR("error reading/restoring tmem");
-            return -1;
-        }
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_TMEM_EXTRA:
-        if ( xc_tmem_restore_extra(xch, dom, fd) ) {
-            PERROR("error reading/restoring tmem extra");
-            return -1;
-        }
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_TSC_INFO:
-    {
-        uint32_t tsc_mode, khz, incarn;
-        uint64_t nsec;
-        if ( RDEXACT(fd, &tsc_mode, sizeof(uint32_t)) ||
-             RDEXACT(fd, &nsec, sizeof(uint64_t)) ||
-             RDEXACT(fd, &khz, sizeof(uint32_t)) ||
-             RDEXACT(fd, &incarn, sizeof(uint32_t)) ||
-             xc_domain_set_tsc_info(xch, dom, tsc_mode, nsec, khz, incarn) ) {
-            PERROR("error reading/restoring tsc info");
-            return -1;
-        }
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-    }
-
-    case XC_SAVE_ID_HVM_CONSOLE_PFN :
-        /* Skip padding 4 bytes then read the console pfn location. */
-        if ( RDEXACT(fd, &buf->console_pfn, sizeof(uint32_t)) ||
-             RDEXACT(fd, &buf->console_pfn, sizeof(uint64_t)) )
-        {
-            PERROR("error read the address of the console pfn");
-            return -1;
-        }
-        // DPRINTF("console pfn location: %llx\n", buf->console_pfn);
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_LAST_CHECKPOINT:
-        ctx->last_checkpoint = 1;
-        // DPRINTF("last checkpoint indication received");
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_HVM_ACPI_IOPORTS_LOCATION:
-        /* Skip padding 4 bytes then read the acpi ioport location. */
-        if ( RDEXACT(fd, &buf->acpi_ioport_location, sizeof(uint32_t)) ||
-             RDEXACT(fd, &buf->acpi_ioport_location, sizeof(uint64_t)) )
-        {
-            PERROR("error read the acpi ioport location");
-            return -1;
-        }
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_HVM_VIRIDIAN:
-        /* Skip padding 4 bytes then read the acpi ioport location. */
-        if ( RDEXACT(fd, &buf->viridian, sizeof(uint32_t)) ||
-             RDEXACT(fd, &buf->viridian, sizeof(uint64_t)) )
-        {
-            PERROR("error reading the viridian enlightenments");
-            return -1;
-        }
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_TOOLSTACK:
-        {
-            if ( RDEXACT(fd, &buf->tdata.len, sizeof(buf->tdata.len)) )
-            {
-                PERROR("error read toolstack id size");
-                return -1;
-            }
-            buf->tdata.data = (uint8_t*) realloc(buf->tdata.data, 
buf->tdata.len);
-            if ( buf->tdata.data == NULL )
-            {
-                PERROR("error memory allocation");
-                return -1;
-            }
-            if ( RDEXACT(fd, buf->tdata.data, buf->tdata.len) )
-            {
-                PERROR("error read toolstack id");
-                return -1;
-            }
-            return pagebuf_get_one(xch, ctx, buf, fd, dom);
-        }
-
-    case XC_SAVE_ID_ENABLE_COMPRESSION:
-        /* We cannot set compression flag directly in pagebuf structure,
-         * since this pagebuf still has uncompressed pages that are yet to
-         * be applied. We enable the compression field in pagebuf structure
-         * after receiving the first tailbuf.
-         */
-        ctx->compressing = 1;
-        // DPRINTF("compression flag received");
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_COMPRESSED_DATA:
-
-        /* read the length of compressed chunk coming in */
-        if ( RDEXACT(fd, &compbuf_size, sizeof(unsigned long)) )
-        {
-            PERROR("Error when reading compbuf_size");
-            return -1;
-        }
-        if (!compbuf_size) return 1;
-
-        buf->compbuf_size += compbuf_size;
-        if (!(ptmp = realloc(buf->pages, buf->compbuf_size))) {
-            ERROR("Could not (re)allocate compression buffer");
-            return -1;
-        }
-        buf->pages = ptmp;
-
-        if ( RDEXACT(fd, buf->pages + (buf->compbuf_size - compbuf_size),
-                     compbuf_size) ) {
-            PERROR("Error when reading compression buffer");
-            return -1;
-        }
-        return compbuf_size;
-
-    case XC_SAVE_ID_HVM_GENERATION_ID_ADDR:
-        /* Skip padding 4 bytes then read the generation id buffer location. */
-        if ( RDEXACT(fd, &buf->vm_generationid_addr, sizeof(uint32_t)) ||
-             RDEXACT(fd, &buf->vm_generationid_addr, sizeof(uint64_t)) )
-        {
-            PERROR("error read the generation id buffer location");
-            return -1;
-        }
-        DPRINTF("read generation id buffer address");
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_HVM_IOREQ_SERVER_PFN:
-        /* Skip padding 4 bytes then read the ioreq server gmfn base. */
-        if ( RDEXACT(fd, &buf->ioreq_server_pfn, sizeof(uint32_t)) ||
-             RDEXACT(fd, &buf->ioreq_server_pfn, sizeof(uint64_t)) )
-        {
-            PERROR("error read the ioreq server gmfn base");
-            return -1;
-        }
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_HVM_NR_IOREQ_SERVER_PAGES:
-        /* Skip padding 4 bytes then read the ioreq server gmfn count. */
-        if ( RDEXACT(fd, &buf->nr_ioreq_server_pages, sizeof(uint32_t)) ||
-             RDEXACT(fd, &buf->nr_ioreq_server_pages, sizeof(uint64_t)) )
-        {
-            PERROR("error read the ioreq server gmfn count");
-            return -1;
-        }
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    default:
-        if ( (count > MAX_BATCH_SIZE) || (count < 0) ) {
-            ERROR("Max batch size exceeded (%d). Giving up.", count);
-            errno = EMSGSIZE;
-            return -1;
-        }
-        break;
-    }
-
-    oldcount = buf->nr_pages;
-    buf->nr_pages += count;
-    if (!buf->pfn_types) {
-        if (!(buf->pfn_types = malloc(buf->nr_pages * 
sizeof(*(buf->pfn_types))))) {
-            ERROR("Could not allocate PFN type buffer");
-            return -1;
-        }
-    } else {
-        if (!(ptmp = realloc(buf->pfn_types, buf->nr_pages * 
sizeof(*(buf->pfn_types))))) {
-            ERROR("Could not reallocate PFN type buffer");
-            return -1;
-        }
-        buf->pfn_types = ptmp;
-    }
-    if ( RDEXACT(fd, buf->pfn_types + oldcount, count * 
sizeof(*(buf->pfn_types)))) {
-        PERROR("Error when reading region pfn types");
-        return -1;
-    }
-
-    countpages = count;
-    for (i = oldcount; i < buf->nr_pages; ++i)
-    {
-        unsigned long pagetype;
-
-        pagetype = buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK;
-        if ( pagetype == XEN_DOMCTL_PFINFO_XTAB ||
-             pagetype == XEN_DOMCTL_PFINFO_BROKEN ||
-             pagetype == XEN_DOMCTL_PFINFO_XALLOC )
-            --countpages;
-    }
-
-    if (!countpages)
-        return count;
-
-    /* If Remus Checkpoint Compression is turned on, we will only be
-     * receiving the pfn lists now. The compressed pages will come in later,
-     * following a <XC_SAVE_ID_COMPRESSED_DATA, compressedChunkSize> tuple.
-     */
-    if (buf->compressing)
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    oldcount = buf->nr_physpages;
-    buf->nr_physpages += countpages;
-    if (!buf->pages) {
-        if (!(buf->pages = malloc(buf->nr_physpages * PAGE_SIZE))) {
-            ERROR("Could not allocate page buffer");
-            return -1;
-        }
-    } else {
-        if (!(ptmp = realloc(buf->pages, buf->nr_physpages * PAGE_SIZE))) {
-            ERROR("Could not reallocate page buffer");
-            return -1;
-        }
-        buf->pages = ptmp;
-    }
-    if ( RDEXACT(fd, buf->pages + oldcount * PAGE_SIZE, countpages * 
PAGE_SIZE) ) {
-        PERROR("Error when reading pages");
-        return -1;
-    }
-
-    return count;
-}
-
-static int pagebuf_get(xc_interface *xch, struct restore_ctx *ctx,
-                       pagebuf_t* buf, int fd, uint32_t dom)
-{
-    int rc;
-
-    buf->nr_physpages = buf->nr_pages = 0;
-    buf->compbuf_pos = buf->compbuf_size = 0;
-
-    do {
-        rc = pagebuf_get_one(xch, ctx, buf, fd, dom);
-    } while (rc > 0);
-
-    if (rc < 0)
-        pagebuf_free(buf);
-
-    return rc;
-}
-
-static int apply_batch(xc_interface *xch, uint32_t dom, struct restore_ctx 
*ctx,
-                       xen_pfn_t* region_mfn, unsigned long* pfn_type, int 
pae_extended_cr3,
-                       struct xc_mmu* mmu,
-                       pagebuf_t* pagebuf, int curbatch, int *invalid_pages)
-{
-    int i, j, curpage, nr_mfns;
-    int k, scount;
-    unsigned long superpage_start=INVALID_P2M_ENTRY;
-    /* used by debug verify code */
-    unsigned long buf[PAGE_SIZE/sizeof(unsigned long)];
-    /* Our mapping of the current region (batch) */
-    char *region_base;
-    /* A temporary mapping, and a copy, of one frame of guest memory. */
-    unsigned long *page = NULL;
-    int nraces = 0;
-    struct domain_info_context *dinfo = &ctx->dinfo;
-    int* pfn_err = NULL;
-    int rc = -1;
-    int local_invalid_pages = 0;
-    /* We have handled curbatch pages before this batch, and there are
-     * *invalid_pages pages that are not in pagebuf->pages. So the first
-     * page for this page is (curbatch - *invalid_pages) page.
-     */
-    int first_page = curbatch - *invalid_pages;
-
-    unsigned long mfn, pfn, pagetype;
-
-    j = pagebuf->nr_pages - curbatch;
-    if (j > MAX_BATCH_SIZE)
-        j = MAX_BATCH_SIZE;
-
-    /* First pass for this batch: work out how much memory to alloc, and 
detect superpages */
-    nr_mfns = scount = 0;
-    for ( i = 0; i < j; i++ )
-    {
-        unsigned long pfn, pagetype;
-        pfn      = pagebuf->pfn_types[i + curbatch] & 
~XEN_DOMCTL_PFINFO_LTAB_MASK;
-        pagetype = pagebuf->pfn_types[i + curbatch] &  
XEN_DOMCTL_PFINFO_LTAB_MASK;
-
-        /* For allocation purposes, treat XEN_DOMCTL_PFINFO_XALLOC as a normal 
page */
-        if ( (pagetype != XEN_DOMCTL_PFINFO_XTAB) && 
-             (ctx->p2m[pfn] == INVALID_P2M_ENTRY) )
-        {
-            /* Have a live PFN which hasn't had an MFN allocated */
-
-            /* Logic if we're in the middle of detecting a candidate superpage 
*/
-            if ( superpage_start != INVALID_P2M_ENTRY )
-            {
-                /* Is this the next expected continuation? */
-                if ( pfn == superpage_start + scount )
-                {
-                    if ( !ctx->superpages )
-                    {
-                        ERROR("Unexpexted codepath with no superpages");
-                        return -1;
-                    }
-
-                    scount++;
-
-                    /* If we've found a whole superpage, allocate it and 
update p2m */
-                    if ( scount  == SUPERPAGE_NR_PFNS )
-                    {
-                        unsigned long supermfn;
-
-
-                        supermfn=superpage_start;
-                        if ( xc_domain_populate_physmap_exact(xch, dom, 1,
-                                         SUPERPAGE_PFN_SHIFT, 0, &supermfn) != 
0 )
-                        {
-                            DPRINTF("No 2M page available for pfn 0x%lx, fall 
back to 4K page.\n",
-                                    superpage_start);
-                            /* If we're falling back from a failed allocation, 
subtract one
-                             * from count, since the last page == pfn, which 
will behandled
-                             * anyway. */
-                            scount--;
-                            goto fallback;
-                        }
-
-                        DPRINTF("Mapping superpage (%d) pfn %lx, mfn %lx\n", 
scount, superpage_start, supermfn);
-                        for (k=0; k<scount; k++)
-                        {
-                            /* We just allocated a new mfn above; update p2m */
-                            ctx->p2m[superpage_start+k] = supermfn+k;
-                            ctx->nr_pfns++;
-                            /* region_map[] will be set below */
-                        }
-                        superpage_start=INVALID_P2M_ENTRY;
-                        scount=0;
-                    }
-                    continue;
-                }
-                
-            fallback:
-                DPRINTF("Falling back %d pages pfn %lx\n", scount, 
superpage_start);
-                for (k=0; k<scount; k++)
-                {
-                    ctx->p2m_batch[nr_mfns++] = superpage_start+k; 
-                    ctx->p2m[superpage_start+k]--;
-                }
-                superpage_start = INVALID_P2M_ENTRY;
-                scount=0;
-            }
-
-            /* Are we ready to start a new superpage candidate? */
-            if ( ctx->hvm && ctx->superpages && SUPER_PAGE_START(pfn) )
-            {
-                superpage_start=pfn;
-                scount++;
-            }
-            else
-            {
-                /* Add the current pfn to pfn_batch */
-                ctx->p2m_batch[nr_mfns++] = pfn;
-                ctx->p2m[pfn]--;
-            }
-        }
-    }
-
-    /* Clean up any partial superpage candidates */
-    if ( superpage_start != INVALID_P2M_ENTRY )
-    {
-        DPRINTF("Falling back %d pages pfn %lx\n", scount, superpage_start);
-        for (k=0; k<scount; k++)
-        {
-            ctx->p2m_batch[nr_mfns++] = superpage_start+k; 
-            ctx->p2m[superpage_start+k]--;
-        }
-        superpage_start = INVALID_P2M_ENTRY;
-    }
-
-    /* Now allocate a bunch of mfns for this batch */
-    if ( nr_mfns )
-    {
-        DPRINTF("Mapping order 0,  %d; first pfn %lx\n", nr_mfns, 
ctx->p2m_batch[0]);
-    
-        if (!ctx->hvm && ctx->superpages)
-            rc = alloc_superpage_mfns(xch, dom, ctx, nr_mfns);
-        else
-            rc = xc_domain_populate_physmap_exact(xch, dom, nr_mfns, 0, 0,
-                                                  ctx->p2m_batch);
-
-        if (rc)
-        {
-            ERROR("Failed to allocate memory for batch.!\n"); 
-            errno = ENOMEM;
-            return -1;
-        }
-    }
-
-    /* Second pass for this batch: update p2m[] and region_mfn[] */
-    nr_mfns = 0; 
-    for ( i = 0; i < j; i++ )
-    {
-        unsigned long pfn, pagetype;
-        pfn      = pagebuf->pfn_types[i + curbatch] & 
~XEN_DOMCTL_PFINFO_LTAB_MASK;
-        pagetype = pagebuf->pfn_types[i + curbatch] &  
XEN_DOMCTL_PFINFO_LTAB_MASK;
-
-        if ( pagetype != XEN_DOMCTL_PFINFO_XTAB
-             && ctx->p2m[pfn] == (INVALID_P2M_ENTRY-1) )
-        {
-            /* We just allocated a new mfn above; update p2m */
-            ctx->p2m[pfn] = ctx->p2m_batch[nr_mfns++]; 
-            ctx->nr_pfns++; 
-        }
-
-        /* setup region_mfn[] for batch map, if necessary.
-         * For HVM guests, this interface takes PFNs, not MFNs */
-        if ( pagetype == XEN_DOMCTL_PFINFO_XTAB
-             || pagetype == XEN_DOMCTL_PFINFO_XALLOC )
-            region_mfn[i] = ~0UL; /* map will fail but we don't care */
-        else
-            region_mfn[i] = ctx->hvm ? pfn : ctx->p2m[pfn];
-    }
-
-    /* Map relevant mfns */
-    pfn_err = calloc(j, sizeof(*pfn_err));
-    if ( pfn_err == NULL )
-    {
-        PERROR("allocation for pfn_err failed");
-        return -1;
-    }
-    region_base = xc_map_foreign_bulk(
-        xch, dom, PROT_WRITE, region_mfn, pfn_err, j);
-
-    if ( region_base == NULL )
-    {
-        PERROR("map batch failed");
-        free(pfn_err);
-        return -1;
-    }
-
-    for ( i = 0, curpage = -1; i < j; i++ )
-    {
-        pfn      = pagebuf->pfn_types[i + curbatch] & 
~XEN_DOMCTL_PFINFO_LTAB_MASK;
-        pagetype = pagebuf->pfn_types[i + curbatch] &  
XEN_DOMCTL_PFINFO_LTAB_MASK;
-
-        if ( pagetype == XEN_DOMCTL_PFINFO_XTAB
-             || pagetype == XEN_DOMCTL_PFINFO_XALLOC)
-        {
-            local_invalid_pages++;
-            /* a bogus/unmapped/allocate-only page: skip it */
-            continue;
-        }
-
-        if ( pagetype == XEN_DOMCTL_PFINFO_BROKEN )
-        {
-            if ( xc_set_broken_page_p2m(xch, dom, pfn) )
-            {
-                ERROR("Set p2m for broken page failed, "
-                      "dom=%d, pfn=%lx\n", dom, pfn);
-                goto err_mapped;
-            }
-
-            local_invalid_pages++;
-            continue;
-        }
-
-        if (pfn_err[i])
-        {
-            ERROR("unexpected PFN mapping failure pfn %lx map_mfn %lx p2m_mfn 
%lx",
-                  pfn, region_mfn[i], ctx->p2m[pfn]);
-            goto err_mapped;
-        }
-
-        ++curpage;
-
-        if ( pfn > dinfo->p2m_size )
-        {
-            ERROR("pfn out of range");
-            goto err_mapped;
-        }
-
-        pfn_type[pfn] = pagetype;
-
-        mfn = ctx->p2m[pfn];
-
-        /* In verify mode, we use a copy; otherwise we work in place */
-        page = pagebuf->verify ? (void *)buf : (region_base + i*PAGE_SIZE);
-
-        /* Remus - page decompression */
-        if (pagebuf->compressing)
-        {
-            if (xc_compression_uncompress_page(xch, pagebuf->pages,
-                                               pagebuf->compbuf_size,
-                                               &pagebuf->compbuf_pos,
-                                               (char *)page))
-            {
-                ERROR("Failed to uncompress page (pfn=%lx)\n", pfn);
-                goto err_mapped;
-            }
-        }
-        else
-            memcpy(page, pagebuf->pages + (first_page + curpage) * PAGE_SIZE,
-                   PAGE_SIZE);
-
-        pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
-
-        if ( (pagetype >= XEN_DOMCTL_PFINFO_L1TAB) &&
-             (pagetype <= XEN_DOMCTL_PFINFO_L4TAB) )
-        {
-            /*
-            ** A page table page - need to 'uncanonicalize' it, i.e.
-            ** replace all the references to pfns with the corresponding
-            ** mfns for the new domain.
-            **
-            ** On PAE we need to ensure that PGDs are in MFNs < 4G, and
-            ** so we may need to update the p2m after the main loop.
-            ** Hence we defer canonicalization of L1s until then.
-            */
-            if ((ctx->pt_levels != 3) ||
-                pae_extended_cr3 ||
-                (pagetype != XEN_DOMCTL_PFINFO_L1TAB)) {
-
-                if (!uncanonicalize_pagetable(xch, dom, ctx, page)) {
-                    /*
-                    ** Failing to uncanonicalize a page table can be ok
-                    ** under live migration since the pages type may have
-                    ** changed by now (and we'll get an update later).
-                    */
-                    DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
-                            pagetype >> 28, pfn, mfn);
-                    nraces++;
-                    continue;
-                }
-            }
-        }
-        else if ( pagetype != XEN_DOMCTL_PFINFO_NOTAB )
-        {
-            ERROR("Bogus page type %lx page table is out of range: "
-                  "i=%d p2m_size=%lu", pagetype, i, dinfo->p2m_size);
-            goto err_mapped;
-        }
-
-        if ( pagebuf->verify )
-        {
-            int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE);
-            if ( res )
-            {
-                int v;
-
-                DPRINTF("************** pfn=%lx type=%lx gotcs=%08lx "
-                        "actualcs=%08lx\n", pfn, pfn_type[pfn],
-                        csum_page(region_base + i * PAGE_SIZE),
-                        csum_page(buf));
-
-                for ( v = 0; v < 4; v++ )
-                {
-                    unsigned long *p = (unsigned long *)
-                        (region_base + i*PAGE_SIZE);
-                    if ( buf[v] != p[v] )
-                        DPRINTF("    %d: %08lx %08lx\n", v, buf[v], p[v]);
-                }
-            }
-        }
-
-        if ( !ctx->hvm &&
-             xc_add_mmu_update(xch, mmu,
-                               (((unsigned long long)mfn) << PAGE_SHIFT)
-                               | MMU_MACHPHYS_UPDATE, pfn) )
-        {
-            PERROR("failed machpys update mfn=%lx pfn=%lx", mfn, pfn);
-            goto err_mapped;
-        }
-    } /* end of 'batch' for loop */
-
-    rc = nraces;
-    *invalid_pages += local_invalid_pages;
-
-  err_mapped:
-    munmap(region_base, j*PAGE_SIZE);
-    free(pfn_err);
-
-    return rc;
-}
-
-int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
-                      unsigned int store_evtchn, unsigned long *store_mfn,
-                      domid_t store_domid, unsigned int console_evtchn,
-                      unsigned long *console_mfn, domid_t console_domid,
-                      unsigned int hvm, unsigned int pae, int superpages,
-                      int checkpointed_stream,
-                      struct restore_callbacks *callbacks)
-{
-    DECLARE_DOMCTL;
-    xc_dominfo_t info;
-    int rc = 1, frc, i, j, n, m, pae_extended_cr3 = 0, ext_vcpucontext = 0;
-    uint32_t vcpuextstate_size = 0;
-    unsigned long mfn, pfn;
-    int nraces = 0;
-
-    /* The new domain's shared-info frame number. */
-    unsigned long shared_info_frame;
-    unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */
-    shared_info_any_t *old_shared_info = 
-        (shared_info_any_t *)shared_info_page;
-    shared_info_any_t *new_shared_info;
-
-    /* A copy of the CPU context of the guest. */
-    DECLARE_HYPERCALL_BUFFER(vcpu_guest_context_any_t, ctxt);
-
-    /* A copy of the CPU eXtended States of the guest. */
-    DECLARE_HYPERCALL_BUFFER(void, buffer);
-
-    /* A table containing the type of each PFN (/not/ MFN!). */
-    unsigned long *pfn_type = NULL;
-
-    /* A table of MFNs to map in the current region */
-    xen_pfn_t *region_mfn = NULL;
-
-    /* A copy of the pfn-to-mfn table frame list. */
-    xen_pfn_t *p2m_frame_list = NULL;
-    
-    /* A temporary mapping of the guest's start_info page. */
-    start_info_any_t *start_info;
-
-    /* Our mapping of the current region (batch) */
-    char *region_base;
-
-    struct xc_mmu *mmu = NULL;
-
-    struct mmuext_op pin[MAX_PIN_BATCH];
-    unsigned int nr_pins;
-
-    uint64_t vcpumap[XC_SR_MAX_VCPUS/64] = { 1ULL };
-    unsigned int max_vcpu_id = 0;
-    int new_ctxt_format = 0;
-
-    pagebuf_t pagebuf;
-    tailbuf_t tailbuf, tmptail;
-    struct toolstack_data_t tdata, tdatatmp;
-    void* vcpup;
-    uint64_t console_pfn = 0;
-
-    int orig_io_fd_flags;
-
-    struct restore_ctx _ctx;
-    struct restore_ctx *ctx = &_ctx;
-    struct domain_info_context *dinfo = &ctx->dinfo;
-
-    if ( getenv("XG_MIGRATION_V2") )
-    {
-        return xc_domain_restore2(
-            xch, io_fd, dom, store_evtchn, store_mfn,
-            store_domid, console_evtchn, console_mfn, console_domid,
-            hvm,  pae,  superpages, checkpointed_stream, callbacks);
-    }
-
-    DPRINTF("%s: starting restore of new domid %u", __func__, dom);
-
-    pagebuf_init(&pagebuf);
-    memset(&tailbuf, 0, sizeof(tailbuf));
-    tailbuf.ishvm = hvm;
-    memset(&tdata, 0, sizeof(tdata));
-
-    memset(ctx, 0, sizeof(*ctx));
-
-    ctx->superpages = superpages;
-    ctx->hvm = hvm;
-    ctx->last_checkpoint = !checkpointed_stream;
-
-    ctxt = xc_hypercall_buffer_alloc(xch, ctxt, sizeof(*ctxt));
-
-    if ( ctxt == NULL )
-    {
-        PERROR("Unable to allocate VCPU ctxt buffer");
-        return 1;
-    }
-
-
-    if ( (orig_io_fd_flags = fcntl(io_fd, F_GETFL, 0)) < 0 ) {
-        PERROR("unable to read IO FD flags");
-        goto out;
-    }
-
-    if ( RDEXACT(io_fd, &dinfo->p2m_size, sizeof(unsigned long)) )
-    {
-        PERROR("read: p2m_size");
-        goto out;
-    }
-    DPRINTF("%s: p2m_size = %lx\n", __func__, dinfo->p2m_size);
-
-    if ( !get_platform_info(xch, dom,
-                            &ctx->max_mfn, &ctx->hvirt_start, &ctx->pt_levels, 
&dinfo->guest_width) )
-    {
-        ERROR("Unable to get platform info.");
-        return 1;
-    }
-    
-    /* The *current* word size of the guest isn't very interesting; for now
-     * assume the guest will be the same as we are.  We'll fix that later
-     * if we discover otherwise. */
-    dinfo->guest_width = sizeof(unsigned long);
-    ctx->pt_levels = (dinfo->guest_width == 8) ? 4 : 3;
-    
-    if ( !hvm ) 
-    {
-        /* Load the p2m frame list, plus potential extended info chunk */
-        p2m_frame_list = load_p2m_frame_list(xch, ctx,
-            io_fd, &pae_extended_cr3, &ext_vcpucontext,
-            &vcpuextstate_size);
-
-        if ( !p2m_frame_list )
-            goto out;
-
-        /* Now that we know the word size, tell Xen about it */
-        memset(&domctl, 0, sizeof(domctl));
-        domctl.domain = dom;
-        domctl.cmd    = XEN_DOMCTL_set_address_size;
-        domctl.u.address_size.size = dinfo->guest_width * 8;
-        frc = do_domctl(xch, &domctl);
-        if ( frc != 0 )
-        {
-            PERROR("Unable to set guest address size.");
-            goto out;
-        }
-    }
-
-    /* We want zeroed memory so use calloc rather than malloc. */
-    ctx->p2m   = calloc(dinfo->p2m_size, sizeof(xen_pfn_t));
-    pfn_type   = calloc(dinfo->p2m_size, sizeof(unsigned long));
-
-    region_mfn = malloc(ROUNDUP(MAX_BATCH_SIZE * sizeof(xen_pfn_t), 
PAGE_SHIFT));
-    ctx->p2m_batch = malloc(ROUNDUP(MAX_BATCH_SIZE * sizeof(xen_pfn_t), 
PAGE_SHIFT));
-    if (!ctx->hvm && ctx->superpages)
-    {
-        ctx->p2m_saved_batch =
-            malloc(ROUNDUP(MAX_BATCH_SIZE * sizeof(xen_pfn_t), PAGE_SHIFT));
-        if ( ctx->p2m_saved_batch == NULL )
-        {
-            ERROR("saved batch memory alloc failed");
-            errno = ENOMEM;
-            goto out;
-        }
-    }
-
-    if ( (ctx->p2m == NULL) || (pfn_type == NULL) ||
-         (region_mfn == NULL) || (ctx->p2m_batch == NULL) )
-    {
-        ERROR("memory alloc failed");
-        errno = ENOMEM;
-        goto out;
-    }
-
-    memset(region_mfn, 0,
-           ROUNDUP(MAX_BATCH_SIZE * sizeof(xen_pfn_t), PAGE_SHIFT)); 
-    memset(ctx->p2m_batch, 0,
-           ROUNDUP(MAX_BATCH_SIZE * sizeof(xen_pfn_t), PAGE_SHIFT)); 
-
-    /* Get the domain's shared-info frame. */
-    if ( xc_domain_getinfo(xch, (domid_t)dom, 1, &info) != 1 )
-    {
-        PERROR("Could not get information on new domain");
-        goto out;
-    }
-    shared_info_frame = info.shared_info_frame;
-
-    /* Mark all PFNs as invalid; we allocate on demand */
-    for ( pfn = 0; pfn < dinfo->p2m_size; pfn++ )
-        ctx->p2m[pfn] = INVALID_P2M_ENTRY;
-
-    mmu = xc_alloc_mmu_updates(xch, dom);
-    if ( mmu == NULL )
-    {
-        PERROR("Could not initialise for MMU updates");
-        goto out;
-    }
-
-    xc_set_progress_prefix(xch, "Reloading memory pages");
-    xc_report_progress_step(xch, 0, dinfo->p2m_size);
-
-    /*
-     * Now simply read each saved frame into its new machine frame.
-     * We uncanonicalise page tables as we go.
-     */
-
-    n = m = 0;
- loadpages:
-    for ( ; ; )
-    {
-        int j, curbatch, invalid_pages;
-
-        xc_report_progress_step(xch, n, dinfo->p2m_size);
-
-        if ( !ctx->completed ) {
-            pagebuf.nr_physpages = pagebuf.nr_pages = 0;
-            pagebuf.compbuf_pos = pagebuf.compbuf_size = 0;
-            if ( pagebuf_get_one(xch, ctx, &pagebuf, io_fd, dom) < 0 ) {
-                PERROR("Error when reading batch");
-                goto out;
-            }
-        }
-        j = pagebuf.nr_pages;
-
-        DBGPRINTF("batch %d\n",j);
-
-        if ( j == 0 ) {
-            /* catch vcpu updates */
-            if (pagebuf.new_ctxt_format) {
-                max_vcpu_id = pagebuf.max_vcpu_id;
-                memcpy(vcpumap, pagebuf.vcpumap, vcpumap_sz(max_vcpu_id));
-            }
-            /* should this be deferred? does it change? */
-            if ( pagebuf.identpt )
-                xc_hvm_param_set(xch, dom, HVM_PARAM_IDENT_PT, 
pagebuf.identpt);
-            if ( pagebuf.paging_ring_pfn )
-                xc_hvm_param_set(xch, dom, HVM_PARAM_PAGING_RING_PFN, 
pagebuf.paging_ring_pfn);
-            if ( pagebuf.monitor_ring_pfn )
-                xc_hvm_param_set(xch, dom, HVM_PARAM_MONITOR_RING_PFN, 
pagebuf.monitor_ring_pfn);
-            if ( pagebuf.sharing_ring_pfn )
-                xc_hvm_param_set(xch, dom, HVM_PARAM_SHARING_RING_PFN, 
pagebuf.sharing_ring_pfn);
-            if ( pagebuf.vm86_tss )
-                xc_hvm_param_set(xch, dom, HVM_PARAM_VM86_TSS, 
pagebuf.vm86_tss);
-            if ( pagebuf.console_pfn )
-                console_pfn = pagebuf.console_pfn;
-            if ( pagebuf.vm_generationid_addr )
-                xc_hvm_param_set(xch, dom, HVM_PARAM_VM_GENERATION_ID_ADDR,
-                                 pagebuf.vm_generationid_addr);
-
-            break;  /* our work here is done */
-        }
-
-        /* break pagebuf into batches */
-        curbatch = 0;
-        invalid_pages = 0;
-        while ( curbatch < j ) {
-            int brc;
-
-            brc = apply_batch(xch, dom, ctx, region_mfn, pfn_type,
-                              pae_extended_cr3, mmu, &pagebuf, curbatch,
-                              &invalid_pages);
-            if ( brc < 0 )
-                goto out;
-
-            nraces += brc;
-
-            curbatch += MAX_BATCH_SIZE;
-        }
-
-        pagebuf.nr_physpages = pagebuf.nr_pages = 0;
-        pagebuf.compbuf_pos = pagebuf.compbuf_size = 0;
-
-        n += j; /* crude stats */
-
-        /* 
-         * Discard cache for portion of file read so far up to last
-         *  page boundary every 16MB or so.
-         */
-        m += j;
-        if ( m > MAX_PAGECACHE_USAGE )
-        {
-            discard_file_cache(xch, io_fd, 0 /* no flush */);
-            m = 0;
-        }
-    }
-
-    /*
-     * Ensure we flush all machphys updates before potential PAE-specific
-     * reallocations below.
-     */
-    if ( !hvm && xc_flush_mmu_updates(xch, mmu) )
-    {
-        PERROR("Error doing flush_mmu_updates()");
-        goto out;
-    }
-
-    // DPRINTF("Received all pages (%d races)\n", nraces);
-
-    if ( !ctx->completed ) {
-
-        if ( buffer_tail(xch, ctx, &tailbuf, io_fd, max_vcpu_id, vcpumap,
-                         ext_vcpucontext, vcpuextstate_size) < 0 ) {
-            ERROR ("error buffering image tail");
-            goto out;
-        }
-
-        ctx->completed = 1;
-
-        /*
-         * If more checkpoints are expected then shift into
-         * nonblocking mode for the remainder.
-         */
-        if ( !ctx->last_checkpoint )
-            fcntl(io_fd, F_SETFL, orig_io_fd_flags | O_NONBLOCK);
-
-        /*
-         * If sender had sent enable compression flag, switch to compressed
-         * checkpoints mode once the first checkpoint is received.
-         */
-        if (ctx->compressing)
-            pagebuf.compressing = 1;
-    }
-
-    if (pagebuf.viridian != 0)
-        xc_hvm_param_set(xch, dom, HVM_PARAM_VIRIDIAN, pagebuf.viridian);
-
-    /*
-     * If we are migrating in from a host that does not support
-     * secondary emulators then nr_ioreq_server_pages will be 0, since
-     * there will be no XC_SAVE_ID_HVM_NR_IOREQ_SERVER_PAGES chunk in
-     * the image.
-     * If we are migrating from a host that does support secondary
-     * emulators then the XC_SAVE_ID_HVM_NR_IOREQ_SERVER_PAGES chunk
-     * will exist and is guaranteed to have a non-zero value. The
-     * existence of that chunk also implies the existence of the
-     * XC_SAVE_ID_HVM_IOREQ_SERVER_PFN chunk, which is also guaranteed
-     * to have a non-zero value.
-     */
-    if (!pagebuf.nr_ioreq_server_pages ^ !pagebuf.ioreq_server_pfn) {
-        ERROR("Inconsistent IOREQ Server settings (nr=%"PRIx64", 
pfn=%"PRIx64")",
-              pagebuf.nr_ioreq_server_pages, pagebuf.ioreq_server_pfn);
-    } else {
-        if (pagebuf.nr_ioreq_server_pages != 0 &&
-            pagebuf.ioreq_server_pfn != 0) {
-            xc_hvm_param_set(xch, dom, HVM_PARAM_NR_IOREQ_SERVER_PAGES,
-                             pagebuf.nr_ioreq_server_pages);
-            xc_hvm_param_set(xch, dom, HVM_PARAM_IOREQ_SERVER_PFN,
-                             pagebuf.ioreq_server_pfn);
-        }
-    }
-
-    if (pagebuf.acpi_ioport_location == 1) {
-        DBGPRINTF("Use new firmware ioport from the checkpoint\n");
-        xc_hvm_param_set(xch, dom, HVM_PARAM_ACPI_IOPORTS_LOCATION, 1);
-    } else if (pagebuf.acpi_ioport_location == 0) {
-        DBGPRINTF("Use old firmware ioport from the checkpoint\n");
-    } else {
-        ERROR("Error, unknow acpi ioport location (%"PRId64")", 
pagebuf.acpi_ioport_location);
-    }
-
-    tdatatmp = tdata;
-    tdata = pagebuf.tdata;
-    pagebuf.tdata = tdatatmp;
-
-    if ( ctx->last_checkpoint )
-    {
-        // DPRINTF("Last checkpoint, finishing\n");
-        goto finish;
-    }
-
-    // DPRINTF("Buffered checkpoint\n");
-
-    if ( pagebuf_get(xch, ctx, &pagebuf, io_fd, dom) ) {
-        PERROR("error when buffering batch, finishing");
-        /*
-         * Remus: discard the current incomplete checkpoint and restore
-         * backup from the last complete checkpoint.
-         */
-        goto finish;
-    }
-    memset(&tmptail, 0, sizeof(tmptail));
-    tmptail.ishvm = hvm;
-    if ( buffer_tail(xch, ctx, &tmptail, io_fd, max_vcpu_id, vcpumap,
-                     ext_vcpucontext, vcpuextstate_size) < 0 ) {
-        ERROR ("error buffering image tail, finishing");
-        /*
-         * Remus: discard the current incomplete checkpoint and restore
-         * backup from the last complete checkpoint.
-         */
-        goto finish;
-    }
-    tailbuf_free(&tailbuf);
-    memcpy(&tailbuf, &tmptail, sizeof(tailbuf));
-
-    goto loadpages;
-
-  /* With Remus: restore from last complete checkpoint */
-  finish:
-    if ( hvm )
-        goto finish_hvm;
-
-    if ( (ctx->pt_levels == 3) && !pae_extended_cr3 )
-    {
-        /*
-        ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This
-        ** is a little awkward and involves (a) finding all such PGDs and
-        ** replacing them with 'lowmem' versions; (b) upating the p2m[]
-        ** with the new info; and (c) canonicalizing all the L1s using the
-        ** (potentially updated) p2m[].
-        **
-        ** This is relatively slow (and currently involves two passes through
-        ** the pfn_type[] array), but at least seems to be correct. May wish
-        ** to consider more complex approaches to optimize this later.
-        */
-
-        int j, k;
-        
-        /* First pass: find all L3TABs current in > 4G mfns and get new mfns */
-        for ( i = 0; i < dinfo->p2m_size; i++ )
-        {
-            if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) ==
-                  XEN_DOMCTL_PFINFO_L3TAB) &&
-                 (ctx->p2m[i] > 0xfffffUL) )
-            {
-                unsigned long new_mfn;
-                uint64_t l3ptes[4];
-                uint64_t *l3tab;
-
-                l3tab = (uint64_t *)
-                    xc_map_foreign_range(xch, dom, PAGE_SIZE,
-                                         PROT_READ, ctx->p2m[i]);
-                if ( l3tab == NULL )
-                {
-                    PERROR("xc_map_foreign_range failed (for l3tab)");
-                    goto out;
-                }
-
-                for ( j = 0; j < 4; j++ )
-                    l3ptes[j] = l3tab[j];
-
-                munmap(l3tab, PAGE_SIZE);
-
-                new_mfn = xc_make_page_below_4G(xch, dom, ctx->p2m[i]);
-                if ( !new_mfn )
-                {
-                    PERROR("Couldn't get a page below 4GB :-(");
-                    goto out;
-                }
-
-                ctx->p2m[i] = new_mfn;
-                if ( xc_add_mmu_update(xch, mmu,
-                                       (((unsigned long long)new_mfn)
-                                        << PAGE_SHIFT) |
-                                       MMU_MACHPHYS_UPDATE, i) )
-                {
-                    PERROR("Couldn't m2p on PAE root pgdir");
-                    goto out;
-                }
-
-                l3tab = (uint64_t *)
-                    xc_map_foreign_range(xch, dom, PAGE_SIZE,
-                                         PROT_READ | PROT_WRITE, ctx->p2m[i]);
-                if ( l3tab == NULL )
-                {
-                    PERROR("xc_map_foreign_range failed (for l3tab, 2nd)");
-                    goto out;
-                }
-
-                for ( j = 0; j < 4; j++ )
-                    l3tab[j] = l3ptes[j];
-
-                munmap(l3tab, PAGE_SIZE);
-            }
-        }
-
-        /* Second pass: find all L1TABs and uncanonicalize them */
-        j = 0;
-
-        for ( i = 0; i < dinfo->p2m_size; i++ )
-        {
-            if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) ==
-                  XEN_DOMCTL_PFINFO_L1TAB) )
-            {
-                region_mfn[j] = ctx->p2m[i];
-                j++;
-            }
-
-            if ( (i == (dinfo->p2m_size-1)) || (j == MAX_BATCH_SIZE) )
-            {
-                region_base = xc_map_foreign_pages(
-                    xch, dom, PROT_READ | PROT_WRITE, region_mfn, j);
-                if ( region_base == NULL )
-                {
-                    PERROR("map batch failed");
-                    goto out;
-                }
-
-                for ( k = 0; k < j; k++ )
-                {
-                    if ( !uncanonicalize_pagetable(
-                        xch, dom, ctx,
-                        region_base + k*PAGE_SIZE) )
-                    {
-                        ERROR("failed uncanonicalize pt!");
-                        goto out;
-                    }
-                }
-
-                munmap(region_base, j*PAGE_SIZE);
-                j = 0;
-            }
-        }
-
-        if ( xc_flush_mmu_updates(xch, mmu) )
-        {
-            PERROR("Error doing xc_flush_mmu_updates()");
-            goto out;
-        }
-    }
-
-    /*
-     * Pin page tables. Do this after writing to them as otherwise Xen
-     * will barf when doing the type-checking.
-     */
-    nr_pins = 0;
-    for ( i = 0; i < dinfo->p2m_size; i++ )
-    {
-        if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
-            continue;
-
-        switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
-        {
-        case XEN_DOMCTL_PFINFO_L1TAB:
-            pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
-            break;
-
-        case XEN_DOMCTL_PFINFO_L2TAB:
-            pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
-            break;
-
-        case XEN_DOMCTL_PFINFO_L3TAB:
-            pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
-            break;
-
-        case XEN_DOMCTL_PFINFO_L4TAB:
-            pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
-            break;
-
-        default:
-            continue;
-        }
-
-        pin[nr_pins].arg1.mfn = ctx->p2m[i];
-        nr_pins++;
-
-        /* Batch full? Then flush. */
-        if ( nr_pins == MAX_PIN_BATCH )
-        {
-            if ( xc_mmuext_op(xch, pin, nr_pins, dom) < 0 )
-            {
-                PERROR("Failed to pin batch of %d page tables", nr_pins);
-                goto out;
-            }
-            nr_pins = 0;
-        }
-    }
-
-    /* Flush final partial batch. */
-    if ( (nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0) )
-    {
-        PERROR("Failed to pin batch of %d page tables", nr_pins);
-        goto out;
-    }
-
-    DPRINTF("Memory reloaded (%ld pages)\n", ctx->nr_pfns);
-
-    /* Get the list of PFNs that are not in the psuedo-phys map */
-    {
-        int nr_frees = 0;
-
-        for ( i = 0; i < tailbuf.u.pv.pfncount; i++ )
-        {
-            unsigned long pfn = tailbuf.u.pv.pfntab[i];
-
-            if ( ctx->p2m[pfn] != INVALID_P2M_ENTRY )
-            {
-                /* pfn is not in physmap now, but was at some point during
-                   the save/migration process - need to free it */
-                tailbuf.u.pv.pfntab[nr_frees++] = ctx->p2m[pfn];
-                ctx->p2m[pfn]  = INVALID_P2M_ENTRY; /* not in pseudo-physical 
map */
-            }
-        }
-
-        if ( nr_frees > 0 )
-        {
-            if ( (frc = xc_domain_decrease_reservation(xch, dom, nr_frees, 0, 
tailbuf.u.pv.pfntab)) != nr_frees )
-            {
-                PERROR("Could not decrease reservation : %d", frc);
-                goto out;
-            }
-            else
-                DPRINTF("Decreased reservation by %d pages\n", 
tailbuf.u.pv.pfncount);
-        }
-    }
-
-    vcpup = tailbuf.u.pv.vcpubuf;
-    for ( i = 0; i <= max_vcpu_id; i++ )
-    {
-        if ( !(vcpumap[i/64] & (1ULL << (i%64))) )
-            continue;
-
-        memcpy(ctxt, vcpup, ((dinfo->guest_width == 8) ? sizeof(ctxt->x64)
-                              : sizeof(ctxt->x32)));
-        vcpup += (dinfo->guest_width == 8) ? sizeof(ctxt->x64) : 
sizeof(ctxt->x32);
-
-        DPRINTF("read VCPU %d\n", i);
-
-        if ( !new_ctxt_format )
-            SET_FIELD(ctxt, flags,
-                      GET_FIELD(ctxt, flags, dinfo->guest_width) | VGCF_online,
-                      dinfo->guest_width);
-
-        if ( i == 0 )
-        {
-            /*
-             * Uncanonicalise the start info frame number and poke in
-             * updated values into the start info itself.
-             *
-             * The start info MFN is the 3rd argument to the
-             * HYPERVISOR_sched_op hypercall when op==SCHEDOP_shutdown
-             * and reason==SHUTDOWN_suspend, it is canonicalised in
-             * xc_domain_save and therefore the PFN is found in the
-             * edx register.
-             */
-            pfn = GET_FIELD(ctxt, user_regs.edx, dinfo->guest_width);
-            if ( (pfn >= dinfo->p2m_size) ||
-                 (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB) )
-            {
-                ERROR("Suspend record frame number is bad");
-                goto out;
-            }
-            mfn = ctx->p2m[pfn];
-            SET_FIELD(ctxt, user_regs.edx, mfn, dinfo->guest_width);
-            start_info = xc_map_foreign_range(
-                xch, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn);
-            if ( start_info == NULL )
-            {
-                PERROR("xc_map_foreign_range failed (for start_info)");
-                goto out;
-            }
-
-            SET_FIELD(start_info, nr_pages, dinfo->p2m_size, 
dinfo->guest_width);
-            SET_FIELD(start_info, shared_info, shared_info_frame<<PAGE_SHIFT, 
dinfo->guest_width);
-            SET_FIELD(start_info, flags, 0, dinfo->guest_width);
-            if ( GET_FIELD(start_info, store_mfn, dinfo->guest_width) > 
dinfo->p2m_size )
-            {
-                ERROR("Suspend record xenstore frame number is bad");
-                munmap(start_info, PAGE_SIZE);
-                goto out;
-            }
-            *store_mfn = ctx->p2m[GET_FIELD(start_info, store_mfn, 
dinfo->guest_width)];
-            SET_FIELD(start_info, store_mfn, *store_mfn, dinfo->guest_width);
-            SET_FIELD(start_info, store_evtchn, store_evtchn, 
dinfo->guest_width);
-            if ( GET_FIELD(start_info, console.domU.mfn, dinfo->guest_width) > 
dinfo->p2m_size )
-            {
-                ERROR("Suspend record console frame number is bad");
-                munmap(start_info, PAGE_SIZE);
-                goto out;
-            }
-            *console_mfn = ctx->p2m[GET_FIELD(start_info, console.domU.mfn, 
dinfo->guest_width)];
-            SET_FIELD(start_info, console.domU.mfn, *console_mfn, 
dinfo->guest_width);
-            SET_FIELD(start_info, console.domU.evtchn, console_evtchn, 
dinfo->guest_width);
-            munmap(start_info, PAGE_SIZE);
-        }
-        /* Uncanonicalise each GDT frame number. */
-        if ( GET_FIELD(ctxt, gdt_ents, dinfo->guest_width) > 8192 )
-        {
-            ERROR("GDT entry count out of range");
-            goto out;
-        }
-
-        for ( j = 0; (512*j) < GET_FIELD(ctxt, gdt_ents, dinfo->guest_width); 
j++ )
-        {
-            pfn = GET_FIELD(ctxt, gdt_frames[j], dinfo->guest_width);
-            if ( (pfn >= dinfo->p2m_size) ||
-                 (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB) )
-            {
-                ERROR("GDT frame number %i (0x%lx) is bad", 
-                      j, (unsigned long)pfn);
-                goto out;
-            }
-            SET_FIELD(ctxt, gdt_frames[j], ctx->p2m[pfn], dinfo->guest_width);
-        }
-        /* Uncanonicalise the page table base pointer. */
-        pfn = UNFOLD_CR3(GET_FIELD(ctxt, ctrlreg[3], dinfo->guest_width));
-
-        if ( pfn >= dinfo->p2m_size )
-        {
-            ERROR("PT base is bad: pfn=%lu p2m_size=%lu type=%08lx",
-                  pfn, dinfo->p2m_size, pfn_type[pfn]);
-            goto out;
-        }
-
-        if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
-             ((unsigned long)ctx->pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) )
-        {
-            ERROR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
-                  pfn, dinfo->p2m_size, pfn_type[pfn],
-                  (unsigned long)ctx->pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT);
-            goto out;
-        }
-        SET_FIELD(ctxt, ctrlreg[3], FOLD_CR3(ctx->p2m[pfn]), 
dinfo->guest_width);
-
-        /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */
-        if ( (ctx->pt_levels == 4) && (ctxt->x64.ctrlreg[1] & 1) )
-        {
-            pfn = UNFOLD_CR3(ctxt->x64.ctrlreg[1] & ~1);
-            if ( pfn >= dinfo->p2m_size )
-            {
-                ERROR("User PT base is bad: pfn=%lu p2m_size=%lu",
-                      pfn, dinfo->p2m_size);
-                goto out;
-            }
-            if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
-                 ((unsigned long)ctx->pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) 
)
-            {
-                ERROR("User PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
-                      pfn, dinfo->p2m_size, pfn_type[pfn],
-                      (unsigned 
long)ctx->pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT);
-                goto out;
-            }
-            ctxt->x64.ctrlreg[1] = FOLD_CR3(ctx->p2m[pfn]);
-        }
-        frc = xc_vcpu_setcontext(xch, dom, i, ctxt);
-        if ( frc != 0 )
-        {
-            PERROR("Couldn't build vcpu%d", i);
-            goto out;
-        }
-
-        if ( !ext_vcpucontext )
-            goto vcpu_ext_state_restore;
-        memcpy(&domctl.u.ext_vcpucontext, vcpup, 128);
-        vcpup += 128;
-        domctl.cmd = XEN_DOMCTL_set_ext_vcpucontext;
-        domctl.domain = dom;
-        frc = xc_domctl(xch, &domctl);
-        if ( frc != 0 )
-        {
-            PERROR("Couldn't set extended vcpu%d info", i);
-            goto out;
-        }
-
- vcpu_ext_state_restore:
-        if ( !vcpuextstate_size )
-            continue;
-
-        memcpy(&domctl.u.vcpuextstate.xfeature_mask, vcpup,
-               sizeof(domctl.u.vcpuextstate.xfeature_mask));
-        vcpup += sizeof(domctl.u.vcpuextstate.xfeature_mask);
-        memcpy(&domctl.u.vcpuextstate.size, vcpup,
-               sizeof(domctl.u.vcpuextstate.size));
-        vcpup += sizeof(domctl.u.vcpuextstate.size);
-
-        buffer = xc_hypercall_buffer_alloc(xch, buffer,
-                                           domctl.u.vcpuextstate.size);
-        if ( !buffer )
-        {
-            PERROR("Could not allocate buffer to restore eXtended States");
-            goto out;
-        }
-        memcpy(buffer, vcpup, domctl.u.vcpuextstate.size);
-        vcpup += domctl.u.vcpuextstate.size;
-
-        domctl.cmd = XEN_DOMCTL_setvcpuextstate;
-        domctl.domain = dom;
-        domctl.u.vcpuextstate.vcpu = i;
-        set_xen_guest_handle(domctl.u.vcpuextstate.buffer, buffer);
-        frc = xc_domctl(xch, &domctl);
-        if ( frc != 0 )
-        {
-            PERROR("Couldn't set eXtended States for vcpu%d", i);
-            goto out;
-        }
-        xc_hypercall_buffer_free(xch, buffer);
-    }
-
-    memcpy(shared_info_page, tailbuf.u.pv.shared_info_page, PAGE_SIZE);
-
-    DPRINTF("Completed checkpoint load\n");
-
-    /* Restore contents of shared-info page. No checking needed. */
-    new_shared_info = xc_map_foreign_range(
-        xch, dom, PAGE_SIZE, PROT_WRITE, shared_info_frame);
-    if ( new_shared_info == NULL )
-    {
-        PERROR("xc_map_foreign_range failed (for new_shared_info)");
-        goto out;
-    }
-
-    /* restore saved vcpu_info and arch specific info */
-    MEMCPY_FIELD(new_shared_info, old_shared_info, vcpu_info, 
dinfo->guest_width);
-    MEMCPY_FIELD(new_shared_info, old_shared_info, arch, dinfo->guest_width);
-
-    /* clear any pending events and the selector */
-    MEMSET_ARRAY_FIELD(new_shared_info, evtchn_pending, 0, dinfo->guest_width);
-    for ( i = 0; i < XEN_LEGACY_MAX_VCPUS; i++ )
-           SET_FIELD(new_shared_info, vcpu_info[i].evtchn_pending_sel, 0, 
dinfo->guest_width);
-
-    /* mask event channels */
-    MEMSET_ARRAY_FIELD(new_shared_info, evtchn_mask, 0xff, dinfo->guest_width);
-
-    /* leave wallclock time. set by hypervisor */
-    munmap(new_shared_info, PAGE_SIZE);
-
-    /* Uncanonicalise the pfn-to-mfn table frame-number list. */
-    for ( i = 0; i < P2M_FL_ENTRIES; i++ )
-    {
-        pfn = p2m_frame_list[i];
-        if ( (pfn >= dinfo->p2m_size) || (pfn_type[pfn] != 
XEN_DOMCTL_PFINFO_NOTAB) )
-        {
-            ERROR("PFN-to-MFN frame number %i (%#lx) is bad", i, pfn);
-            goto out;
-        }
-        p2m_frame_list[i] = ctx->p2m[pfn];
-    }
-
-    /* Copy the P2M we've constructed to the 'live' P2M */
-    if ( !(ctx->live_p2m = xc_map_foreign_pages(xch, dom, PROT_WRITE,
-                                           p2m_frame_list, P2M_FL_ENTRIES)) )
-    {
-        PERROR("Couldn't map p2m table");
-        goto out;
-    }
-
-    /* If the domain we're restoring has a different word size to ours,
-     * we need to adjust the live_p2m assignment appropriately */
-    if ( dinfo->guest_width > sizeof (xen_pfn_t) )
-        for ( i = dinfo->p2m_size - 1; i >= 0; i-- )
-            ((int64_t *)ctx->live_p2m)[i] = (long)ctx->p2m[i];
-    else if ( dinfo->guest_width < sizeof (xen_pfn_t) )
-        for ( i = 0; i < dinfo->p2m_size; i++ )   
-            ((uint32_t *)ctx->live_p2m)[i] = ctx->p2m[i];
-    else
-        memcpy(ctx->live_p2m, ctx->p2m, dinfo->p2m_size * sizeof(xen_pfn_t));
-    munmap(ctx->live_p2m, P2M_FL_ENTRIES * PAGE_SIZE);
-
-    frc = xc_dom_gnttab_seed(xch, dom, *console_mfn, *store_mfn,
-                             console_domid, store_domid);
-    if (frc != 0)
-    {
-        ERROR("error seeding grant table");
-        goto out;
-    }
-
-    DPRINTF("Domain ready to be built.\n");
-    rc = 0;
-    goto out;
-
-  finish_hvm:
-    if ( tdata.data != NULL )
-    {
-        if ( callbacks != NULL && callbacks->toolstack_restore != NULL )
-        {
-            frc = callbacks->toolstack_restore(dom, tdata.data, tdata.len,
-                                               callbacks->data);
-            free(tdata.data);
-            if ( frc < 0 )
-            {
-                PERROR("error calling toolstack_restore");
-                goto out;
-            }
-        } else {
-            rc = -1;
-            ERROR("toolstack data available but no callback provided\n");
-            free(tdata.data);
-            goto out;
-        }
-    }
-
-    /* Dump the QEMU state to a state file for QEMU to load */
-    if ( dump_qemu(xch, dom, &tailbuf.u.hvm) ) {
-        PERROR("Error dumping QEMU state to file");
-        goto out;
-    }
-
-    /* These comms pages need to be zeroed at the start of day */
-    if ( xc_clear_domain_page(xch, dom, tailbuf.u.hvm.magicpfns[0]) ||
-         xc_clear_domain_page(xch, dom, tailbuf.u.hvm.magicpfns[1]) ||
-         xc_clear_domain_page(xch, dom, tailbuf.u.hvm.magicpfns[2]) )
-    {
-        PERROR("error zeroing magic pages");
-        goto out;
-    }
-
-    if ( (frc = xc_hvm_param_set(xch, dom,
-                                 HVM_PARAM_IOREQ_PFN, 
tailbuf.u.hvm.magicpfns[0]))
-         || (frc = xc_hvm_param_set(xch, dom,
-                                    HVM_PARAM_BUFIOREQ_PFN, 
tailbuf.u.hvm.magicpfns[1]))
-         || (frc = xc_hvm_param_set(xch, dom,
-                                    HVM_PARAM_STORE_PFN, 
tailbuf.u.hvm.magicpfns[2]))
-         || (frc = xc_hvm_param_set(xch, dom,
-                                    HVM_PARAM_PAE_ENABLED, pae))
-         || (frc = xc_hvm_param_set(xch, dom,
-                                    HVM_PARAM_STORE_EVTCHN,
-                                    store_evtchn))
-         || (frc = xc_hvm_param_set(xch, dom,
-                                    HVM_PARAM_CONSOLE_EVTCHN,
-                                    console_evtchn)) )
-    {
-        PERROR("error setting HVM params: %i", frc);
-        goto out;
-    }
-    *store_mfn = tailbuf.u.hvm.magicpfns[2];
-
-    if ( console_pfn ) {
-        if ( xc_clear_domain_page(xch, dom, console_pfn) ) {
-            PERROR("error zeroing console page");
-            goto out;
-        }
-        if ( (frc = xc_hvm_param_set(xch, dom,
-                                    HVM_PARAM_CONSOLE_PFN, console_pfn)) ) {
-            PERROR("error setting HVM param: %i", frc);
-            goto out;
-        }
-        *console_mfn = console_pfn;
-    }
-
-    frc = xc_domain_hvm_setcontext(xch, dom, tailbuf.u.hvm.hvmbuf,
-                                   tailbuf.u.hvm.reclen);
-    if ( frc )
-    {
-        PERROR("error setting the HVM context");
-        goto out;
-    }
-
-    frc = xc_dom_gnttab_hvm_seed(xch, dom, *console_mfn, *store_mfn,
-                                 console_domid, store_domid);
-    if (frc != 0)
-    {
-        ERROR("error seeding grant table");
-        goto out;
-    }
-
-    /* HVM success! */
-    rc = 0;
-
- out:
-    if ( (rc != 0) && (dom != 0) )
-        xc_domain_destroy(xch, dom);
-    xc_hypercall_buffer_free(xch, ctxt);
-    free(mmu);
-    free(ctx->p2m);
-    free(pfn_type);
-    free(region_mfn);
-    free(ctx->p2m_batch);
-    pagebuf_free(&pagebuf);
-    tailbuf_free(&tailbuf);
-
-    /* discard cache for save file  */
-    discard_file_cache(xch, io_fd, 1 /*flush*/);
-
-    fcntl(io_fd, F_SETFL, orig_io_fd_flags);
-
-    DPRINTF("Restore exit of domid %u with rc=%d\n", dom, rc);
-
-    return rc;
-}
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/tools/libxc/xc_domain_save.c b/tools/libxc/xc_domain_save.c
deleted file mode 100644
index 3222473..0000000
--- a/tools/libxc/xc_domain_save.c
+++ /dev/null
@@ -1,2198 +0,0 @@
-/******************************************************************************
- * xc_linux_save.c
- *
- * Save the state of a running Linux session.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  
USA
- *
- * Copyright (c) 2003, K A Fraser.
- */
-
-#include <inttypes.h>
-#include <time.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/time.h>
-#include <assert.h>
-
-#include "xc_private.h"
-#include "xc_bitops.h"
-#include "xc_dom.h"
-#include "xg_private.h"
-#include "xg_save_restore.h"
-
-#include <xen/hvm/params.h>
-
-/*
-** Default values for important tuning parameters. Can override by passing
-** non-zero replacement values to xc_domain_save().
-**
-** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too.
-**
-*/
-#define DEF_MAX_ITERS   29   /* limit us to 30 times round loop   */
-#define DEF_MAX_FACTOR   3   /* never send more than 3x p2m_size  */
-
-struct save_ctx {
-    unsigned long hvirt_start; /* virtual starting address of the hypervisor */
-    unsigned int pt_levels; /* #levels of page tables used by the current 
guest */
-    unsigned long max_mfn; /* max mfn of the whole machine */
-    xen_pfn_t *live_p2m; /* Live mapping of the table mapping each PFN to its 
current MFN. */
-    xen_pfn_t *live_m2p; /* Live mapping of system MFN to PFN table. */
-    unsigned long m2p_mfn0;
-    struct domain_info_context dinfo;
-};
-
-/* buffer for output */
-struct outbuf {
-    void* buf;
-    size_t size;
-    size_t pos;
-    int write_count;
-};
-
-#define OUTBUF_SIZE (16384 * 1024)
-
-/* grep fodder: machine_to_phys */
-
-#define mfn_to_pfn(_mfn)  (ctx->live_m2p[(_mfn)])
-
-#define pfn_to_mfn(_pfn)                                            \
-  ((xen_pfn_t) ((dinfo->guest_width==8)                               \
-                ? (((uint64_t *)ctx->live_p2m)[(_pfn)])                  \
-                : ((((uint32_t *)ctx->live_p2m)[(_pfn)]) == 0xffffffffU  \
-                   ? (-1UL) : (((uint32_t *)ctx->live_p2m)[(_pfn)]))))
-
-/*
- * Returns TRUE if the given machine frame number has a unique mapping
- * in the guest's pseudophysical map.
- */
-#define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn)          \
-    (((_mfn) < (ctx->max_mfn)) &&                \
-     ((mfn_to_pfn(_mfn) < (dinfo->p2m_size)) &&   \
-      (pfn_to_mfn(mfn_to_pfn(_mfn)) == (_mfn))))
-
-#define SUPERPAGE_PFN_SHIFT  9
-#define SUPERPAGE_NR_PFNS    (1UL << SUPERPAGE_PFN_SHIFT)
-
-#define SUPER_PAGE_START(pfn)    (((pfn) & (SUPERPAGE_NR_PFNS-1)) == 0 )
-
-static uint64_t tv_to_us(struct timeval *new)
-{
-    return (new->tv_sec * 1000000) + new->tv_usec;
-}
-
-static uint64_t llgettimeofday(void)
-{
-    struct timeval now;
-    gettimeofday(&now, NULL);
-    return tv_to_us(&now);
-}
-
-static uint64_t tv_delta(struct timeval *new, struct timeval *old)
-{
-    return (((new->tv_sec - old->tv_sec)*1000000) +
-            (new->tv_usec - old->tv_usec));
-}
-
-static int noncached_write(xc_interface *xch,
-                           struct outbuf* ob,
-                           int fd, void *buffer, int len) 
-{
-    int rc = (write_exact(fd, buffer, len) == 0) ? len : -1;
-
-    ob->write_count += len;
-    if ( ob->write_count >= (MAX_PAGECACHE_USAGE * PAGE_SIZE) )
-    {
-        /* Time to discard cache - dont care if this fails */
-        int saved_errno = errno;
-        discard_file_cache(xch, fd, 0 /* no flush */);
-        errno = saved_errno;
-        ob->write_count = 0;
-    }
-
-    return rc;
-}
-
-static int outbuf_init(xc_interface *xch, struct outbuf* ob, size_t size)
-{
-    memset(ob, 0, sizeof(*ob));
-
-    if ( !(ob->buf = malloc(size)) ) {
-        DPRINTF("error allocating output buffer of size %zu\n", size);
-        return -1;
-    }
-
-    ob->size = size;
-
-    return 0;
-}
-
-static int outbuf_free(struct outbuf *ob)
-{
-    free(ob->buf);
-    ob->buf = NULL;
-    return 0;
-}
-
-static inline int outbuf_write(xc_interface *xch,
-                               struct outbuf* ob, void* buf, size_t len)
-{
-    if ( len > ob->size - ob->pos ) {
-        errno = ERANGE;
-        DBGPRINTF("outbuf_write: %zu > %zu@%zu\n", len, ob->size - ob->pos, 
ob->pos);
-        return -1;
-    }
-
-    memcpy(ob->buf + ob->pos, buf, len);
-    ob->pos += len;
-
-    return 0;
-}
-
-/* prep for nonblocking I/O */
-static int outbuf_flush(xc_interface *xch, struct outbuf* ob, int fd)
-{
-    int rc;
-    int cur = 0;
-
-    if ( !ob->pos )
-        return 0;
-
-    rc = write(fd, ob->buf, ob->pos);
-    while (rc < 0 || cur + rc < ob->pos) {
-        if (rc < 0 && errno != EAGAIN && errno != EINTR) {
-            DPRINTF("error flushing output: %d\n", errno);
-            return -1;
-        }
-        if (rc > 0)
-            cur += rc;
-
-        rc = write(fd, ob->buf + cur, ob->pos - cur);
-    }
-
-    ob->pos = 0;
-
-    return 0;
-}
-
-/* if there's no room in the buffer, flush it and try again. */
-static inline int outbuf_hardwrite(xc_interface *xch,
-                                   struct outbuf* ob, int fd, void* buf,
-                                   size_t len)
-{
-    if ( !len )
-        return 0;
-
-    if ( !outbuf_write(xch, ob, buf, len) )
-        return 0;
-
-    if ( outbuf_flush(xch, ob, fd) < 0 )
-        return -1;
-
-    return outbuf_write(xch, ob, buf, len);
-}
-
-/* start buffering output once we've reached checkpoint mode. */
-static inline int write_buffer(xc_interface *xch,
-                               int dobuf, struct outbuf* ob, int fd, void* buf,
-                               size_t len)
-{
-    if ( dobuf )
-        return outbuf_hardwrite(xch, ob, fd, buf, len);
-    else
-        return write_exact(fd, buf, len);
-}
-
-/* like write_buffer for noncached, which returns number of bytes written */
-static inline int write_uncached(xc_interface *xch,
-                                   int dobuf, struct outbuf* ob, int fd,
-                                   void* buf, size_t len)
-{
-    if ( dobuf )
-        return outbuf_hardwrite(xch, ob, fd, buf, len) ? -1 : len;
-    else
-        return noncached_write(xch, ob, fd, buf, len);
-}
-
-static int write_compressed(xc_interface *xch, comp_ctx *compress_ctx,
-                            int dobuf, struct outbuf* ob, int fd)
-{
-    int rc = 0;
-    int header = sizeof(int) + sizeof(unsigned long);
-    int marker = XC_SAVE_ID_COMPRESSED_DATA;
-    unsigned long compbuf_len = 0;
-
-    for(;;)
-    {
-        /* check for available space (atleast 8k) */
-        if ((ob->pos + header + XC_PAGE_SIZE * 2) > ob->size)
-        {
-            if (outbuf_flush(xch, ob, fd) < 0)
-            {
-                ERROR("Error when flushing outbuf intermediate");
-                return -1;
-            }
-        }
-
-        rc = xc_compression_compress_pages(xch, compress_ctx,
-                                           ob->buf + ob->pos + header,
-                                           ob->size - ob->pos - header,
-                                           &compbuf_len);
-        if (!rc)
-            break;
-
-        if (outbuf_hardwrite(xch, ob, fd, &marker, sizeof(marker)) < 0)
-        {
-            PERROR("Error when writing marker (errno %d)", errno);
-            return -1;
-        }
-
-        if (outbuf_hardwrite(xch, ob, fd, &compbuf_len, sizeof(compbuf_len)) < 
0)
-        {
-            PERROR("Error when writing compbuf_len (errno %d)", errno);
-            return -1;
-        }
-
-        ob->pos += (size_t) compbuf_len;
-        if (!dobuf && outbuf_flush(xch, ob, fd) < 0)
-        {
-            ERROR("Error when writing compressed chunk");
-            return -1;
-        }
-    }
-
-    return 0;
-}
-
-struct time_stats {
-    struct timeval wall;
-    long long d0_cpu, d1_cpu;
-};
-
-static int print_stats(xc_interface *xch, uint32_t domid, int pages_sent,
-                       struct time_stats *last,
-                       xc_shadow_op_stats_t *stats, int print)
-{
-    struct time_stats now;
-
-    gettimeofday(&now.wall, NULL);
-
-    now.d0_cpu = xc_domain_get_cpu_usage(xch, 0, /* FIXME */ 0)/1000;
-    now.d1_cpu = xc_domain_get_cpu_usage(xch, domid, /* FIXME */ 0)/1000;
-
-    if ( (now.d0_cpu == -1) || (now.d1_cpu == -1) )
-        DPRINTF("ARRHHH!!\n");
-
-    if ( print )
-    {
-        long long wall_delta;
-        long long d0_cpu_delta;
-        long long d1_cpu_delta;
-
-        wall_delta = tv_delta(&now.wall,&last->wall)/1000;
-        if ( wall_delta == 0 )
-            wall_delta = 1;
-
-        d0_cpu_delta = (now.d0_cpu - last->d0_cpu)/1000;
-        d1_cpu_delta = (now.d1_cpu - last->d1_cpu)/1000;
-
-        DPRINTF("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, "
-                "dirtied %dMb/s %" PRId32 " pages\n",
-                wall_delta,
-                (int)((d0_cpu_delta*100)/wall_delta),
-                (int)((d1_cpu_delta*100)/wall_delta),
-                (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))),
-                (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))),
-                stats->dirty_count);
-    }
-
-    *last = now;
-
-    return 0;
-}
-
-
-static int analysis_phase(xc_interface *xch, uint32_t domid, struct save_ctx 
*ctx,
-                          xc_hypercall_buffer_t *arr, int runs)
-{
-    long long start, now;
-    xc_shadow_op_stats_t stats;
-    int j;
-    struct domain_info_context *dinfo = &ctx->dinfo;
-
-    start = llgettimeofday();
-
-    for ( j = 0; j < runs; j++ )
-    {
-        int i;
-
-        xc_shadow_control(xch, domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
-                          arr, dinfo->p2m_size, NULL, 0, NULL);
-        DPRINTF("#Flush\n");
-        for ( i = 0; i < 40; i++ )
-        {
-            usleep(50000);
-            now = llgettimeofday();
-            xc_shadow_control(xch, domid, XEN_DOMCTL_SHADOW_OP_PEEK,
-                              NULL, 0, NULL, 0, &stats);
-            DPRINTF("now= %lld faults= %"PRId32" dirty= %"PRId32"\n",
-                    ((now-start)+500)/1000,
-                    stats.fault_count, stats.dirty_count);
-        }
-    }
-
-    return -1;
-}
-
-static int suspend_and_state(int (*suspend)(void*), void* data,
-                             xc_interface *xch, int io_fd, int dom,
-                             xc_dominfo_t *info)
-{
-    if ( !(*suspend)(data) )
-    {
-        ERROR("Suspend request failed");
-        return -1;
-    }
-
-    if ( (xc_domain_getinfo(xch, dom, 1, info) != 1) ||
-         !info->shutdown || (info->shutdown_reason != SHUTDOWN_suspend) )
-    {
-        ERROR("Domain not in suspended state");
-        return -1;
-    }
-
-    return 0;
-}
-
-/*
-** Map the top-level page of MFNs from the guest. The guest might not have
-** finished resuming from a previous restore operation, so we wait a while for
-** it to update the MFN to a reasonable value.
-*/
-static void *map_frame_list_list(xc_interface *xch, uint32_t dom,
-                                 struct save_ctx *ctx,
-                                 shared_info_any_t *shinfo)
-{
-    int count = 100;
-    void *p;
-    struct domain_info_context *dinfo = &ctx->dinfo;
-    uint64_t fll = GET_FIELD(shinfo, arch.pfn_to_mfn_frame_list_list, 
dinfo->guest_width);
-
-    while ( count-- && (fll == 0) )
-    {
-        usleep(10000);
-        fll = GET_FIELD(shinfo, arch.pfn_to_mfn_frame_list_list, 
dinfo->guest_width);
-    }
-
-    if ( fll == 0 )
-    {
-        ERROR("Timed out waiting for frame list updated.");
-        return NULL;
-    }
-
-    p = xc_map_foreign_range(xch, dom, PAGE_SIZE, PROT_READ, fll);
-    if ( p == NULL )
-        PERROR("Couldn't map p2m_frame_list_list (errno %d)", errno);
-
-    return p;
-}
-
-/*
-** During transfer (or in the state file), all page-table pages must be
-** converted into a 'canonical' form where references to actual mfns
-** are replaced with references to the corresponding pfns.
-**
-** This function performs the appropriate conversion, taking into account
-** which entries do not require canonicalization (in particular, those
-** entries which map the virtual address reserved for the hypervisor).
-*/
-static int canonicalize_pagetable(struct save_ctx *ctx,
-                           unsigned long type, unsigned long pfn,
-                           const void *spage, void *dpage)
-{
-    struct domain_info_context *dinfo = &ctx->dinfo;
-    int i, pte_last, xen_start, xen_end, race = 0; 
-    uint64_t pte;
-
-    /*
-    ** We need to determine which entries in this page table hold
-    ** reserved hypervisor mappings. This depends on the current
-    ** page table type as well as the number of paging levels.
-    */
-    xen_start = xen_end = pte_last = PAGE_SIZE / 8;
-
-    if ( (ctx->pt_levels == 3) && (type == XEN_DOMCTL_PFINFO_L3TAB) )
-        xen_start = L3_PAGETABLE_ENTRIES_PAE;
-
-    /*
-    ** In PAE only the L2 mapping the top 1GB contains Xen mappings.
-    ** We can spot this by looking for the guest's mappingof the m2p.
-    ** Guests must ensure that this check will fail for other L2s.
-    */
-    if ( (ctx->pt_levels == 3) && (type == XEN_DOMCTL_PFINFO_L2TAB) )
-    {
-        int hstart;
-        uint64_t he;
-
-        hstart = (ctx->hvirt_start >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff;
-        he = ((const uint64_t *) spage)[hstart];
-
-        if ( ((he >> PAGE_SHIFT) & MFN_MASK_X86) == ctx->m2p_mfn0 )
-        {
-            /* hvirt starts with xen stuff... */
-            xen_start = hstart;
-        }
-        else if ( ctx->hvirt_start != 0xf5800000 )
-        {
-            /* old L2s from before hole was shrunk... */
-            hstart = (0xf5800000 >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff;
-            he = ((const uint64_t *) spage)[hstart];
-            if ( ((he >> PAGE_SHIFT) & MFN_MASK_X86) == ctx->m2p_mfn0 )
-                xen_start = hstart;
-        }
-    }
-
-    if ( (ctx->pt_levels == 4) && (type == XEN_DOMCTL_PFINFO_L4TAB) )
-    {
-        /*
-        ** XXX SMH: should compute these from hvirt_start (which we have)
-        ** and hvirt_end (which we don't)
-        */
-        xen_start = 256;
-        xen_end   = 272;
-    }
-
-    /* Now iterate through the page table, canonicalizing each PTE */
-    for (i = 0; i < pte_last; i++ )
-    {
-        unsigned long pfn, mfn;
-
-        pte = ((const uint64_t*)spage)[i];
-
-        if ( (i >= xen_start) && (i < xen_end) )
-            pte = 0;
-
-        if ( pte & _PAGE_PRESENT )
-        {
-            mfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86;
-            if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
-            {
-                /* This will happen if the type info is stale which
-                   is quite feasible under live migration */
-                pfn  = 0;  /* zap it - we'll retransmit this page later */
-                /* XXX: We can't spot Xen mappings in compat-mode L2es 
-                 * from 64-bit tools, but the only thing in them is the
-                 * compat m2p, so we quietly zap them.  This doesn't
-                 * count as a race, so don't report it. */
-                if ( !(type == XEN_DOMCTL_PFINFO_L2TAB 
-                       && sizeof (unsigned long) > dinfo->guest_width) )
-                     race = 1;  /* inform the caller; fatal if !live */ 
-            }
-            else
-                pfn = mfn_to_pfn(mfn);
-
-            pte &= ~MADDR_MASK_X86;
-            pte |= (uint64_t)pfn << PAGE_SHIFT;
-
-            /*
-             * PAE guest L3Es can contain these flags when running on
-             * a 64bit hypervisor. We zap these here to avoid any
-             * surprise at restore time...
-             */
-            if ( (ctx->pt_levels == 3) &&
-                 (type == XEN_DOMCTL_PFINFO_L3TAB) &&
-                 (pte & (_PAGE_USER|_PAGE_RW|_PAGE_ACCESSED)) )

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.