[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] hvm: Lazy memory allocation during HVM restore. General cleanup and
# HG changeset patch # User kfraser@xxxxxxxxxxxxxxxxxxxxx # Date 1175699908 -3600 # Node ID 3f6ad86d37bd361069148c5a23a5006ce5d13ee0 # Parent e74bf030365829522dae5603d02fb0b198e2e808 hvm: Lazy memory allocation during HVM restore. General cleanup and reduce gross assumptions about memory-map layout. Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx> --- tools/libxc/ia64/xc_ia64_linux_restore.c | 12 - tools/libxc/xc_hvm_restore.c | 299 +++++++++++++-------------- tools/libxc/xc_hvm_save.c | 336 +++++++++++++++---------------- tools/libxc/xc_linux_restore.c | 13 - tools/libxc/xenguest.h | 8 tools/libxc/xg_private.c | 2 tools/python/xen/xend/XendCheckpoint.py | 7 tools/xcutils/xc_restore.c | 34 +-- 8 files changed, 339 insertions(+), 372 deletions(-) diff -r e74bf0303658 -r 3f6ad86d37bd tools/libxc/ia64/xc_ia64_linux_restore.c --- a/tools/libxc/ia64/xc_ia64_linux_restore.c Wed Apr 04 15:57:49 2007 +0100 +++ b/tools/libxc/ia64/xc_ia64_linux_restore.c Wed Apr 04 16:18:28 2007 +0100 @@ -20,9 +20,6 @@ static unsigned long p2m_size; /* number of 'in use' pfns in the guest (i.e. #P2M entries with a valid mfn) */ static unsigned long nr_pfns; -/* largest possible value of nr_pfns (i.e. domain's maximum memory size) */ -static unsigned long max_nr_pfns; - static ssize_t read_exact(int fd, void *buf, size_t count) { @@ -62,8 +59,7 @@ read_page(int xc_handle, int io_fd, uint } int -xc_linux_restore(int xc_handle, int io_fd, uint32_t dom, - unsigned long p2msize, unsigned long maxnrpfns, +xc_linux_restore(int xc_handle, int io_fd, uint32_t dom, unsigned long p2msize, unsigned int store_evtchn, unsigned long *store_mfn, unsigned int console_evtchn, unsigned long *console_mfn) { @@ -86,7 +82,6 @@ xc_linux_restore(int xc_handle, int io_f start_info_t *start_info; p2m_size = p2msize; - max_nr_pfns = maxnrpfns; /* For info only */ nr_pfns = 0; @@ -106,11 +101,6 @@ xc_linux_restore(int xc_handle, int io_f /* needed for build domctl, but might as well do early */ ERROR("Unable to mlock ctxt"); return 1; - } - - if (xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_nr_pfns)) != 0) { - errno = ENOMEM; - goto out; } /* Get pages. */ diff -r e74bf0303658 -r 3f6ad86d37bd tools/libxc/xc_hvm_restore.c --- a/tools/libxc/xc_hvm_restore.c Wed Apr 04 15:57:49 2007 +0100 +++ b/tools/libxc/xc_hvm_restore.c Wed Apr 04 16:18:28 2007 +0100 @@ -31,18 +31,6 @@ #include <xen/hvm/ioreq.h> #include <xen/hvm/params.h> #include <xen/hvm/e820.h> - -/* max mfn of the whole machine */ -static unsigned long max_mfn; - -/* virtual starting address of the hypervisor */ -static unsigned long hvirt_start; - -/* #levels of page tables used by the currrent guest */ -static unsigned int pt_levels; - -/* A list of PFNs that exist, used when allocating memory to the guest */ -static xen_pfn_t *pfns = NULL; static ssize_t read_exact(int fd, void *buf, size_t count) @@ -50,23 +38,33 @@ read_exact(int fd, void *buf, size_t cou int r = 0, s; unsigned char *b = buf; - while (r < count) { + while ( r < count ) + { s = read(fd, &b[r], count - r); - if ((s == -1) && (errno == EINTR)) + if ( (s == -1) && (errno == EINTR) ) continue; - if (s <= 0) { + if ( s <= 0 ) break; - } r += s; } return (r == count) ? 1 : 0; } -int xc_hvm_restore(int xc_handle, int io_fd, - uint32_t dom, unsigned long max_pfn, - unsigned int store_evtchn, unsigned long *store_mfn, - unsigned int pae, unsigned int apic) +#define BPL (sizeof(long)*8) +#define test_bit(bit, map) !!((map)[(bit)/BPL] & (1UL << ((bit) % BPL))) +#define set_bit(bit, map) ((map)[(bit)/BPL] |= (1UL << ((bit) % BPL))) +static int test_and_set_bit(unsigned long nr, unsigned long *map) +{ + int rc = test_bit(nr, map); + if ( !rc ) + set_bit(nr, map); + return rc; +} + +int xc_hvm_restore(int xc_handle, int io_fd, uint32_t dom, + unsigned int store_evtchn, unsigned long *store_mfn, + unsigned int pae, unsigned int apic) { DECLARE_DOMCTL; @@ -81,172 +79,157 @@ int xc_hvm_restore(int xc_handle, int io unsigned int rc = 1, n, i; uint32_t rec_len, nr_vcpus; uint8_t *hvm_buf = NULL; - unsigned long long v_end; - unsigned long shared_page_nr; + + /* Magic frames: ioreqs and xenstore comms. */ + uint64_t magic_pfns[3]; /* ioreq_pfn, bufioreq_pfn, store_pfn */ unsigned long pfn; - unsigned int prev_pc, this_pc; int verify = 0; /* Types of the pfns in the current region */ unsigned long region_pfn_type[MAX_BATCH_SIZE]; + xen_pfn_t pfn_alloc_batch[MAX_BATCH_SIZE]; + unsigned int pfn_alloc_batch_size; /* The size of an array big enough to contain all guest pfns */ - unsigned long pfn_array_size = max_pfn + 1; - - /* Number of pages of memory the guest has. *Not* the same as max_pfn. */ - unsigned long nr_pages = max_pfn; - /* MMIO hole doesn't contain RAM */ - if ( nr_pages >= HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT ) - nr_pages -= HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT; - /* VGA hole doesn't contain RAM */ - nr_pages -= 0x20; - - /* XXX: Unlikely to be true, but matches previous behaviour. :( */ - v_end = (nr_pages + 0x20) << PAGE_SHIFT; - - DPRINTF("xc_hvm_restore:dom=%d, nr_pages=0x%lx, store_evtchn=%d, " - "*store_mfn=%ld, pae=%u, apic=%u.\n", - dom, nr_pages, store_evtchn, *store_mfn, pae, apic); - - if(!get_platform_info(xc_handle, dom, - &max_mfn, &hvirt_start, &pt_levels)) { - ERROR("Unable to get platform info."); - return 1; - } - - DPRINTF("xc_hvm_restore start: nr_pages = %lx, max_pfn = %lx, " - "max_mfn = %lx, hvirt_start=%lx, pt_levels=%d\n", - nr_pages, max_pfn, max_mfn, hvirt_start, pt_levels); - - if (mlock(&ctxt, sizeof(ctxt))) { + unsigned long max_pfn = 0xfffffUL; /* initial memory map guess: 4GB */ + unsigned long *pfn_bitmap = NULL, *new_pfn_bitmap; + + DPRINTF("xc_hvm_restore:dom=%d, store_evtchn=%d, " + "pae=%u, apic=%u.\n", dom, store_evtchn, pae, apic); + + DPRINTF("xc_hvm_restore start: max_pfn = %lx\n", max_pfn); + + if ( mlock(&ctxt, sizeof(ctxt)) ) + { /* needed for build dom0 op, but might as well do early */ ERROR("Unable to mlock ctxt"); return 1; } - - pfns = malloc(pfn_array_size * sizeof(xen_pfn_t)); - if (pfns == NULL) { - ERROR("memory alloc failed"); - errno = ENOMEM; - goto out; - } - - if(xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(nr_pages)) != 0) { - errno = ENOMEM; - goto out; - } - - for ( i = 0; i < pfn_array_size; i++ ) - pfns[i] = i; - for ( i = HVM_BELOW_4G_RAM_END >> PAGE_SHIFT; i < pfn_array_size; i++ ) - pfns[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT; - - /* Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000. */ - rc = xc_domain_memory_populate_physmap( - xc_handle, dom, (nr_pages > 0xa0) ? 0xa0 : nr_pages, - 0, 0, &pfns[0x00]); - if ( (rc == 0) && (nr_pages > 0xc0) ) - rc = xc_domain_memory_populate_physmap( - xc_handle, dom, nr_pages - 0xa0, 0, 0, &pfns[0xc0]); - if ( rc != 0 ) - { - PERROR("Could not allocate memory for HVM guest.\n"); - goto out; - } - - - /**********XXXXXXXXXXXXXXXX******************/ - if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) { + if ( xc_domain_getinfo(xc_handle, dom, 1, &info) != 1 ) + { ERROR("Could not get domain info"); return 1; } domctl.cmd = XEN_DOMCTL_getdomaininfo; domctl.domain = (domid_t)dom; - if (xc_domctl(xc_handle, &domctl) < 0) { + if ( xc_domctl(xc_handle, &domctl) < 0 ) + { ERROR("Could not get information on new domain"); goto out; } - prev_pc = 0; + pfn_bitmap = calloc((max_pfn+1)/8, 1); + if ( pfn_bitmap == NULL ) + { + ERROR("Could not allocate pfn bitmap"); + goto out; + } n = 0; - while (1) { - + for ( ; ; ) + { int j; - this_pc = (n * 100) / nr_pages; - if ( (this_pc - prev_pc) >= 5 ) - { - PPRINTF("\b\b\b\b%3d%%", this_pc); - prev_pc = this_pc; - } - - if (!read_exact(io_fd, &j, sizeof(int))) { + if ( !read_exact(io_fd, &j, sizeof(int)) ) + { ERROR("HVM restore Error when reading batch size"); goto out; } PPRINTF("batch %d\n",j); - if (j == -1) { + if ( j == -1 ) + { verify = 1; DPRINTF("Entering page verify mode\n"); continue; } - if (j == 0) + if ( j == 0 ) break; /* our work here is done */ - if (j > MAX_BATCH_SIZE) { + if ( j > MAX_BATCH_SIZE ) + { ERROR("Max batch size exceeded. Giving up."); goto out; } - if (!read_exact(io_fd, region_pfn_type, j*sizeof(unsigned long))) { + if ( !read_exact(io_fd, region_pfn_type, j*sizeof(unsigned long)) ) + { ERROR("Error when reading region pfn types"); goto out; } - region_base = xc_map_foreign_batch( - xc_handle, dom, PROT_WRITE, region_pfn_type, j); - + pfn_alloc_batch_size = 0; for ( i = 0; i < j; i++ ) { - void *page; - pfn = region_pfn_type[i]; if ( pfn & XEN_DOMCTL_PFINFO_LTAB_MASK ) continue; - if ( pfn > max_pfn ) + while ( pfn > max_pfn ) { - ERROR("pfn out of range"); - goto out; + if ( max_pfn >= 0xfffffff ) + { + ERROR("Maximum PFN beyond reason (1TB) %lx\n", pfn); + goto out; + } + max_pfn = 2*max_pfn + 1; + new_pfn_bitmap = realloc(pfn_bitmap, (max_pfn+1)/8); + if ( new_pfn_bitmap == NULL ) + { + ERROR("Could not realloc pfn bitmap for max_pfn=%lx\n", + max_pfn); + goto out; + } + pfn_bitmap = new_pfn_bitmap; + memset(&pfn_bitmap[(max_pfn+1)/(2*BPL)], 0, (max_pfn+1)/(2*8)); } - if ( pfn >= 0xa0 && pfn < 0xc0) { - ERROR("hvm restore:pfn in vga hole"); - goto out; - } - + if ( !test_and_set_bit(pfn, pfn_bitmap) ) + pfn_alloc_batch[pfn_alloc_batch_size++] = pfn; + } + + if ( pfn_alloc_batch_size != 0 ) + { + rc = xc_domain_memory_populate_physmap( + xc_handle, dom, pfn_alloc_batch_size, 0, 0, pfn_alloc_batch); + if ( rc != 0 ) + { + PERROR("Could not allocate %u pages for HVM guest.\n", + pfn_alloc_batch_size); + goto out; + } + } + + region_base = xc_map_foreign_batch( + xc_handle, dom, PROT_WRITE, region_pfn_type, j); + + for ( i = 0; i < j; i++ ) + { + void *page; + + pfn = region_pfn_type[i]; + if ( pfn & XEN_DOMCTL_PFINFO_LTAB_MASK ) + continue; /* In verify mode, we use a copy; otherwise we work in place */ page = verify ? (void *)buf : (region_base + i*PAGE_SIZE); - if (!read_exact(io_fd, page, PAGE_SIZE)) { + if ( !read_exact(io_fd, page, PAGE_SIZE) ) + { ERROR("Error when reading page (%x)", i); goto out; } - if (verify) { - + if ( verify ) + { int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE); - - if (res) { - + if ( res ) + { int v; DPRINTF("************** pfn=%lx gotcs=%08lx " @@ -254,8 +237,8 @@ int xc_hvm_restore(int xc_handle, int io csum_page(region_base + i*PAGE_SIZE), csum_page(buf)); - for (v = 0; v < 4; v++) { - + for ( v = 0; v < 4; v++ ) + { unsigned long *p = (unsigned long *) (region_base + i*PAGE_SIZE); if (buf[v] != p[v]) @@ -265,82 +248,90 @@ int xc_hvm_restore(int xc_handle, int io } } /* end of 'batch' for loop */ + munmap(region_base, j*PAGE_SIZE); - n+= j; /* crude stats */ - - }/*while 1*/ + n += j; /* crude stats */ + } xc_set_hvm_param(xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae); xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn); - if ( v_end > HVM_BELOW_4G_RAM_END ) - shared_page_nr = (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) - 1; - else - shared_page_nr = (v_end >> PAGE_SHIFT) - 1; - - /* Ensure we clear these pages */ - if ( xc_clear_domain_page(xc_handle, dom, shared_page_nr) || - xc_clear_domain_page(xc_handle, dom, shared_page_nr-1) || - xc_clear_domain_page(xc_handle, dom, shared_page_nr-2) ) { + if ( !read_exact(io_fd, magic_pfns, sizeof(magic_pfns)) ) + { + ERROR("error reading magic page addresses\n"); + goto out; + } + + if ( xc_clear_domain_page(xc_handle, dom, magic_pfns[0]) || + xc_clear_domain_page(xc_handle, dom, magic_pfns[1]) || + xc_clear_domain_page(xc_handle, dom, magic_pfns[2]) ) + { rc = -1; goto out; } - xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, shared_page_nr-1); - xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, shared_page_nr-2); - xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, shared_page_nr); - - /* caculate the store_mfn , wrong val cause hang when introduceDomain */ - *store_mfn = (v_end >> PAGE_SHIFT) - 2; - DPRINTF("hvm restore: calculate new store_mfn=0x%lx, v_end=0x%llx.\n", - *store_mfn, v_end); - - if (!read_exact(io_fd, &nr_vcpus, sizeof(uint32_t))) { + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, magic_pfns[0]); + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, magic_pfns[1]); + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, magic_pfns[2]); + *store_mfn = magic_pfns[2]; + DPRINTF("hvm restore: calculate new store_mfn=0x%lx.\n", *store_mfn); + + if ( !read_exact(io_fd, &nr_vcpus, sizeof(uint32_t)) ) + { ERROR("error read nr vcpu !\n"); goto out; } DPRINTF("hvm restore:get nr_vcpus=%d.\n", nr_vcpus); - for (i =0; i < nr_vcpus; i++) { - if (!read_exact(io_fd, &rec_len, sizeof(uint32_t))) { + for ( i = 0; i < nr_vcpus; i++ ) + { + if ( !read_exact(io_fd, &rec_len, sizeof(uint32_t)) ) + { ERROR("error read vcpu context size!\n"); goto out; } - if (rec_len != sizeof(ctxt)) { + if ( rec_len != sizeof(ctxt) ) + { ERROR("vcpu context size dismatch!\n"); goto out; } - if (!read_exact(io_fd, &(ctxt), sizeof(ctxt))) { + if ( !read_exact(io_fd, &(ctxt), sizeof(ctxt)) ) + { ERROR("error read vcpu context.\n"); goto out; } - if ( (rc = xc_vcpu_setcontext(xc_handle, dom, i, &ctxt)) ) { + if ( (rc = xc_vcpu_setcontext(xc_handle, dom, i, &ctxt)) ) + { ERROR("Could not set vcpu context, rc=%d", rc); goto out; } } /* restore hvm context including pic/pit/shpage */ - if (!read_exact(io_fd, &rec_len, sizeof(uint32_t))) { + if ( !read_exact(io_fd, &rec_len, sizeof(uint32_t)) ) + { ERROR("error read hvm context size!\n"); goto out; } hvm_buf = malloc(rec_len); - if (hvm_buf == NULL) { + if ( hvm_buf == NULL ) + { ERROR("memory alloc for hvm context buffer failed"); errno = ENOMEM; goto out; } - if (!read_exact(io_fd, hvm_buf, rec_len)) { + if ( !read_exact(io_fd, hvm_buf, rec_len) ) + { ERROR("error read hvm buffer!\n"); goto out; } - if (( rc = xc_domain_hvm_setcontext(xc_handle, dom, hvm_buf, rec_len))) { + if ( (rc = xc_domain_hvm_setcontext(xc_handle, dom, hvm_buf, rec_len)) ) + { ERROR("error set hvm buffer!\n"); goto out; } @@ -351,8 +342,8 @@ int xc_hvm_restore(int xc_handle, int io out: if ( (rc != 0) && (dom != 0) ) xc_domain_destroy(xc_handle, dom); - free(pfns); free(hvm_buf); + free(pfn_bitmap); DPRINTF("Restore exit with rc=%d\n", rc); diff -r e74bf0303658 -r 3f6ad86d37bd tools/libxc/xc_hvm_save.c --- a/tools/libxc/xc_hvm_save.c Wed Apr 04 15:57:49 2007 +0100 +++ b/tools/libxc/xc_hvm_save.c Wed Apr 04 16:18:28 2007 +0100 @@ -45,15 +45,6 @@ #define DEF_MAX_ITERS 29 /* limit us to 30 times round loop */ #define DEF_MAX_FACTOR 3 /* never send more than 3x nr_pfns */ -/* max mfn of the whole machine */ -static unsigned long max_mfn; - -/* virtual starting address of the hypervisor */ -static unsigned long hvirt_start; - -/* #levels of page tables used by the current guest */ -static unsigned int pt_levels; - /* Shared-memory bitmaps for getting log-dirty bits from qemu */ static unsigned long *qemu_bitmaps[2]; static int qemu_active; @@ -274,9 +265,8 @@ int xc_hvm_save(int xc_handle, int io_fd xc_dominfo_t info; int rc = 1, i, j, last_iter, iter = 0; - int live = (flags & XCFLAGS_LIVE); - int debug = (flags & XCFLAGS_DEBUG); - int stdvga = (flags & XCFLAGS_STDVGA); + int live = !!(flags & XCFLAGS_LIVE); + int debug = !!(flags & XCFLAGS_DEBUG); int sent_last_iter, skip_this_iter; /* The highest guest-physical frame number used by the current guest */ @@ -285,8 +275,8 @@ int xc_hvm_save(int xc_handle, int io_fd /* The size of an array big enough to contain all guest pfns */ unsigned long pfn_array_size; - /* Other magic frames: ioreqs and xenstore comms */ - unsigned long ioreq_pfn, bufioreq_pfn, store_pfn; + /* Magic frames: ioreqs and xenstore comms. */ + uint64_t magic_pfns[3]; /* ioreq_pfn, bufioreq_pfn, store_pfn */ /* A copy of the CPU context of the guest. */ vcpu_guest_context_t ctxt; @@ -313,104 +303,71 @@ int xc_hvm_save(int xc_handle, int io_fd xc_shadow_op_stats_t stats; - unsigned long total_sent = 0; + unsigned long total_sent = 0; DPRINTF("xc_hvm_save: dom=%d, max_iters=%d, max_factor=%d, flags=0x%x, " "live=%d, debug=%d.\n", dom, max_iters, max_factor, flags, live, debug); /* If no explicit control parameters given, use defaults */ - if(!max_iters) - max_iters = DEF_MAX_ITERS; - if(!max_factor) - max_factor = DEF_MAX_FACTOR; + max_iters = max_iters ? : DEF_MAX_ITERS; + max_factor = max_factor ? : DEF_MAX_FACTOR; initialize_mbit_rate(); - if(!get_platform_info(xc_handle, dom, - &max_mfn, &hvirt_start, &pt_levels)) { - ERROR("HVM:Unable to get platform info."); + if ( xc_domain_getinfo(xc_handle, dom, 1, &info) != 1 ) + { + ERROR("HVM: Could not get domain info"); return 1; } - - if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) { - ERROR("HVM:Could not get domain info"); + nr_vcpus = info.nr_online_vcpus; + + if ( mlock(&ctxt, sizeof(ctxt)) ) + { + ERROR("HVM: Unable to mlock ctxt"); return 1; } - nr_vcpus = info.nr_online_vcpus; - - if (mlock(&ctxt, sizeof(ctxt))) { - ERROR("HVM:Unable to mlock ctxt"); - return 1; - } /* Only have to worry about vcpu 0 even for SMP */ - if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) { - ERROR("HVM:Could not get vcpu context"); - goto out; - } - - /* cheesy sanity check */ - if ((info.max_memkb >> (PAGE_SHIFT - 10)) > max_mfn) { - ERROR("Invalid HVM state record -- pfn count out of range: %lu", - (info.max_memkb >> (PAGE_SHIFT - 10))); - goto out; - } - - if ( xc_get_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, &store_pfn) - || xc_get_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, &ioreq_pfn) - || xc_get_hvm_param(xc_handle, dom, - HVM_PARAM_BUFIOREQ_PFN, &bufioreq_pfn) ) - { - ERROR("HVM: Could not read magic PFN parameters"); - goto out; - } - DPRINTF("saved hvm domain info:max_memkb=0x%lx, max_mfn=0x%lx, " - "nr_pages=0x%lx\n", info.max_memkb, max_mfn, info.nr_pages); - - if (live) { - - if (xc_shadow_control(xc_handle, dom, - XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY, - NULL, 0, NULL, 0, NULL) < 0) { + if ( xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt) ) + { + ERROR("HVM: Could not get vcpu context"); + goto out; + } + + DPRINTF("saved hvm domain info: max_memkb=0x%lx, nr_pages=0x%lx\n", + info.max_memkb, info.nr_pages); + + if ( live ) + { + /* Live suspend. Enable log-dirty mode. */ + if ( xc_shadow_control(xc_handle, dom, + XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY, + NULL, 0, NULL, 0, NULL) < 0 ) + { ERROR("Couldn't enable shadow mode"); goto out; } - last_iter = 0; - DPRINTF("hvm domain live migration debug start: logdirty enable.\n"); - } else { - /* This is a non-live suspend. Issue the call back to get the - domain suspended */ - - last_iter = 1; - - /* suspend hvm domain */ - if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt)) { + } + else + { + /* This is a non-live suspend. Suspend the domain .*/ + if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt) ) + { ERROR("HVM Domain appears not to have suspended"); goto out; } } - DPRINTF("after 1st handle hvm domain nr_pages=0x%lx, " + last_iter = !live; + + max_pfn = xc_memory_op(xc_handle, XENMEM_maximum_gpfn, &dom); + + DPRINTF("after 1st handle hvm domain max_pfn=0x%lx, " "max_memkb=0x%lx, live=%d.\n", - info.nr_pages, info.max_memkb, live); - - /* Calculate the highest PFN of "normal" memory: - * HVM memory is sequential except for the VGA and MMIO holes. */ - max_pfn = info.nr_pages - 1; - /* If the domain has a Cirrus framebuffer and we haven't already - * suspended qemu-dm, it will have 8MB of framebuffer memory - * still allocated, which we don't want to copy: qemu will save it - * for us later */ - if ( live && !stdvga ) - max_pfn -= 0x800; - /* Skip the VGA hole from 0xa0000 to 0xc0000 */ - max_pfn += 0x20; - /* Skip the MMIO hole: 256MB just below 4GB */ - if ( max_pfn >= (HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT) ) - max_pfn += (HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT); + max_pfn, info.max_memkb, live); /* Size of any array that covers 0 ... max_pfn */ pfn_array_size = max_pfn + 1; @@ -420,15 +377,15 @@ int xc_hvm_save(int xc_handle, int io_fd /* calculate the power of 2 order of pfn_array_size, e.g. 15->4 16->4 17->5 */ - for (i = pfn_array_size-1, order_nr = 0; i ; i >>= 1, order_nr++) + for ( i = pfn_array_size-1, order_nr = 0; i ; i >>= 1, order_nr++ ) continue; /* Setup to_send / to_fix and to_skip bitmaps */ to_send = malloc(BITMAP_SIZE); to_skip = malloc(BITMAP_SIZE); - - if (live) { + if ( live ) + { /* Get qemu-dm logging dirty pages too */ void *seg = init_qemu_maps(dom, BITMAP_SIZE); qemu_bitmaps[0] = seg; @@ -445,44 +402,47 @@ int xc_hvm_save(int xc_handle, int io_fd } hvm_buf = malloc(hvm_buf_size); - if (!to_send ||!to_skip ||!hvm_buf) { + if ( !to_send || !to_skip || !hvm_buf ) + { ERROR("Couldn't allocate memory"); goto out; } memset(to_send, 0xff, BITMAP_SIZE); - if (lock_pages(to_send, BITMAP_SIZE)) { + if ( lock_pages(to_send, BITMAP_SIZE) ) + { ERROR("Unable to lock to_send"); return 1; } /* (to fix is local only) */ - if (lock_pages(to_skip, BITMAP_SIZE)) { + if ( lock_pages(to_skip, BITMAP_SIZE) ) + { ERROR("Unable to lock to_skip"); return 1; } analysis_phase(xc_handle, dom, pfn_array_size, to_skip, 0); - /* We want zeroed memory so use calloc rather than malloc. */ pfn_batch = calloc(MAX_BATCH_SIZE, sizeof(*pfn_batch)); - - if (pfn_batch == NULL) { + if ( pfn_batch == NULL ) + { ERROR("failed to alloc memory for pfn_batch array"); errno = ENOMEM; goto out; } /* Start writing out the saved-domain record. */ - if (!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) { + if ( !write_exact(io_fd, &max_pfn, sizeof(unsigned long)) ) + { ERROR("write: max_pfn"); goto out; } - while(1) { - + for ( ; ; ) + { unsigned int prev_pc, sent_this_iter, N, batch; iter++; @@ -493,51 +453,56 @@ int xc_hvm_save(int xc_handle, int io_fd DPRINTF("Saving HVM domain memory pages: iter %d 0%%", iter); - while( N < pfn_array_size ){ - + while ( N < pfn_array_size ) + { unsigned int this_pc = (N * 100) / pfn_array_size; int rc; - if ((this_pc - prev_pc) >= 5) { + if ( (this_pc - prev_pc) >= 5 ) + { DPRINTF("\b\b\b\b%3d%%", this_pc); prev_pc = this_pc; } - /* slightly wasteful to peek the whole array evey time, - but this is fast enough for the moment. */ - if (!last_iter && (rc = xc_shadow_control( + if ( !last_iter ) + { + /* Slightly wasteful to peek the whole array evey time, + but this is fast enough for the moment. */ + rc = xc_shadow_control( xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK, to_skip, - pfn_array_size, NULL, 0, NULL)) != pfn_array_size) { - ERROR("Error peeking HVM shadow bitmap"); - goto out; - } - + pfn_array_size, NULL, 0, NULL); + if ( rc != pfn_array_size ) + { + ERROR("Error peeking HVM shadow bitmap"); + goto out; + } + } /* load pfn_batch[] with the mfn of all the pages we're doing in this batch. */ - for (batch = 0; batch < MAX_BATCH_SIZE && N < pfn_array_size; N++){ - + for ( batch = 0; + (batch < MAX_BATCH_SIZE) && (N < pfn_array_size); + N++ ) + { int n = permute(N, pfn_array_size, order_nr); - if (0&&debug) { + if ( 0 && debug ) DPRINTF("%d pfn= %08lx %d \n", iter, (unsigned long)n, test_bit(n, to_send)); - } - - if (!last_iter && test_bit(n, to_send)&& test_bit(n, to_skip)) + + if ( !last_iter && + test_bit(n, to_send) && + test_bit(n, to_skip) ) skip_this_iter++; /* stats keeping */ - if (!((test_bit(n, to_send) && !test_bit(n, to_skip)) || - (test_bit(n, to_send) && last_iter))) + if ( !((test_bit(n, to_send) && !test_bit(n, to_skip)) || + (test_bit(n, to_send) && last_iter)) ) continue; /* Skip PFNs that aren't really there */ - if ((n >= 0xa0 && n < 0xc0) /* VGA hole */ - || (n >= (HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT) - && n < (1ULL << 32) >> PAGE_SHIFT) /* 4G MMIO hole */ - || n == store_pfn - || n == ioreq_pfn - || n == bufioreq_pfn) + if ( (n >= 0xa0 && n < 0xc0) /* VGA hole */ + || (n >= (HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT) && + n < (1ULL << 32) >> PAGE_SHIFT) /* 4G MMIO hole */ ) continue; /* @@ -551,24 +516,27 @@ int xc_hvm_save(int xc_handle, int io_fd batch++; } - if (batch == 0) + if ( batch == 0 ) goto skip; /* vanishingly unlikely... */ - /* map_foreign use pfns now !*/ - if ((region_base = xc_map_foreign_batch( - xc_handle, dom, PROT_READ, pfn_batch, batch)) == 0) { + region_base = xc_map_foreign_batch( + xc_handle, dom, PROT_READ, pfn_batch, batch); + if ( region_base == 0 ) + { ERROR("map batch failed"); goto out; } /* write num of pfns */ - if(!write_exact(io_fd, &batch, sizeof(unsigned int))) { + if ( !write_exact(io_fd, &batch, sizeof(unsigned int)) ) + { ERROR("Error when writing to state file (2)"); goto out; } /* write all the pfns */ - if(!write_exact(io_fd, pfn_batch, sizeof(unsigned long)*batch)) { + if ( !write_exact(io_fd, pfn_batch, sizeof(unsigned long)*batch) ) + { ERROR("Error when writing to state file (3)"); goto out; } @@ -598,21 +566,23 @@ int xc_hvm_save(int xc_handle, int io_fd DPRINTF("\r %d: sent %d, skipped %d, ", iter, sent_this_iter, skip_this_iter ); - if (last_iter) { + if ( last_iter ) + { print_stats( xc_handle, dom, sent_this_iter, &stats, 1); - DPRINTF("Total pages sent= %ld (%.2fx)\n", total_sent, ((float)total_sent)/pfn_array_size ); } - if (last_iter && debug){ + if ( last_iter && debug ) + { int minusone = -1; memset(to_send, 0xff, BITMAP_SIZE); debug = 0; DPRINTF("Entering debug resend-all mode\n"); /* send "-1" to put receiver into debug mode */ - if(!write_exact(io_fd, &minusone, sizeof(int))) { + if ( !write_exact(io_fd, &minusone, sizeof(int)) ) + { ERROR("Error when writing to state file (6)"); goto out; } @@ -620,22 +590,22 @@ int xc_hvm_save(int xc_handle, int io_fd continue; } - if (last_iter) break; - - if (live) { - - - if( - ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) || - (iter >= max_iters) || - (sent_this_iter+skip_this_iter < 50) || - (total_sent > pfn_array_size*max_factor) ) { - + if ( last_iter ) + break; + + if ( live ) + { + if ( ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) || + (iter >= max_iters) || + (sent_this_iter+skip_this_iter < 50) || + (total_sent > pfn_array_size*max_factor) ) + { DPRINTF("Start last iteration for HVM domain\n"); last_iter = 1; - if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info, - &ctxt)) { + if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info, + &ctxt)) + { ERROR("Domain appears not to have suspended"); goto out; } @@ -645,25 +615,30 @@ int xc_hvm_save(int xc_handle, int io_fd (unsigned long)ctxt.user_regs.edx); } - if (xc_shadow_control(xc_handle, dom, - XEN_DOMCTL_SHADOW_OP_CLEAN, to_send, - pfn_array_size, NULL, - 0, &stats) != pfn_array_size) { + if ( xc_shadow_control(xc_handle, dom, + XEN_DOMCTL_SHADOW_OP_CLEAN, to_send, + pfn_array_size, NULL, + 0, &stats) != pfn_array_size ) + { ERROR("Error flushing shadow PT"); goto out; } /* Pull in the dirty bits from qemu too */ - if (!last_iter) { + if ( !last_iter ) + { qemu_active = qemu_non_active; qemu_non_active = qemu_active ? 0 : 1; qemu_flip_buffer(dom, qemu_active); - for (j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++) { + for ( j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++ ) + { to_send[j] |= qemu_bitmaps[qemu_non_active][j]; qemu_bitmaps[qemu_non_active][j] = 0; } - } else { - for (j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++) + } + else + { + for ( j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++ ) to_send[j] |= qemu_bitmaps[qemu_active][j]; } @@ -671,8 +646,6 @@ int xc_hvm_save(int xc_handle, int io_fd print_stats(xc_handle, dom, sent_this_iter, &stats, 1); } - - } /* end of while 1 */ @@ -680,52 +653,74 @@ int xc_hvm_save(int xc_handle, int io_fd /* Zero terminate */ i = 0; - if (!write_exact(io_fd, &i, sizeof(int))) { + if ( !write_exact(io_fd, &i, sizeof(int)) ) + { ERROR("Error when writing to state file (6)"); goto out; } + /* Save magic-page locations. */ + memset(magic_pfns, 0, sizeof(magic_pfns)); + xc_get_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, + (unsigned long *)&magic_pfns[0]); + xc_get_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, + (unsigned long *)&magic_pfns[1]); + xc_get_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, + (unsigned long *)&magic_pfns[2]); + if ( !write_exact(io_fd, magic_pfns, sizeof(magic_pfns)) ) + { + ERROR("Error when writing to state file (7)"); + goto out; + } /* save vcpu/vmcs context */ - if (!write_exact(io_fd, &nr_vcpus, sizeof(uint32_t))) { + if ( !write_exact(io_fd, &nr_vcpus, sizeof(uint32_t)) ) + { ERROR("error write nr vcpus"); goto out; } /*XXX: need a online map to exclude down cpu */ - for (i = 0; i < nr_vcpus; i++) { - - if (xc_vcpu_getcontext(xc_handle, dom, i, &ctxt)) { + for ( i = 0; i < nr_vcpus; i++ ) + { + if ( xc_vcpu_getcontext(xc_handle, dom, i, &ctxt) ) + { ERROR("HVM:Could not get vcpu context"); goto out; } rec_size = sizeof(ctxt); DPRINTF("write %d vcpucontext of total %d.\n", i, nr_vcpus); - if (!write_exact(io_fd, &rec_size, sizeof(uint32_t))) { + if ( !write_exact(io_fd, &rec_size, sizeof(uint32_t)) ) + { ERROR("error write vcpu ctxt size"); goto out; } - if (!write_exact(io_fd, &(ctxt), sizeof(ctxt)) ) { + if ( !write_exact(io_fd, &(ctxt), sizeof(ctxt)) ) + { ERROR("write vmcs failed!\n"); goto out; } } if ( (rec_size = xc_domain_hvm_getcontext(xc_handle, dom, hvm_buf, - hvm_buf_size)) == -1) { + hvm_buf_size)) == -1 ) + { ERROR("HVM:Could not get hvm buffer"); goto out; } - if (!write_exact(io_fd, &rec_size, sizeof(uint32_t))) { + if ( !write_exact(io_fd, &rec_size, sizeof(uint32_t)) ) + { ERROR("error write hvm buffer size"); goto out; } - if ( !write_exact(io_fd, hvm_buf, rec_size) ) { + if ( !write_exact(io_fd, hvm_buf, rec_size) ) + { ERROR("write HVM info failed!\n"); + goto out; } /* Success! */ @@ -733,12 +728,11 @@ int xc_hvm_save(int xc_handle, int io_fd out: - if (live) { - if(xc_shadow_control(xc_handle, dom, - XEN_DOMCTL_SHADOW_OP_OFF, - NULL, 0, NULL, 0, NULL) < 0) { + if ( live ) + { + if ( xc_shadow_control(xc_handle, dom, XEN_DOMCTL_SHADOW_OP_OFF, + NULL, 0, NULL, 0, NULL) < 0 ) DPRINTF("Warning - couldn't disable shadow mode"); - } } free(hvm_buf); diff -r e74bf0303658 -r 3f6ad86d37bd tools/libxc/xc_linux_restore.c --- a/tools/libxc/xc_linux_restore.c Wed Apr 04 15:57:49 2007 +0100 +++ b/tools/libxc/xc_linux_restore.c Wed Apr 04 16:18:28 2007 +0100 @@ -27,9 +27,6 @@ static unsigned long p2m_size; /* number of 'in use' pfns in the guest (i.e. #P2M entries with a valid mfn) */ static unsigned long nr_pfns; - -/* largest possible value of nr_pfns (i.e. domain's maximum memory size) */ -static unsigned long max_nr_pfns; /* Live mapping of the table mapping each PFN to its current MFN. */ static xen_pfn_t *live_p2m = NULL; @@ -145,7 +142,7 @@ static int uncanonicalize_pagetable(int int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom, - unsigned long p2msize, unsigned long maxnrpfns, + unsigned long p2msize, unsigned int store_evtchn, unsigned long *store_mfn, unsigned int console_evtchn, unsigned long *console_mfn) { @@ -198,7 +195,6 @@ int xc_linux_restore(int xc_handle, int int new_ctxt_format = 0; p2m_size = p2msize; - max_nr_pfns = maxnrpfns; /* For info only */ nr_pfns = 0; @@ -334,11 +330,6 @@ int xc_linux_restore(int xc_handle, int goto out; } shared_info_frame = domctl.u.getdomaininfo.shared_info_frame; - - if (xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_nr_pfns)) != 0) { - errno = ENOMEM; - goto out; - } /* Mark all PFNs as invalid; we allocate on demand */ for ( pfn = 0; pfn < p2m_size; pfn++ ) @@ -747,7 +738,7 @@ int xc_linux_restore(int xc_handle, int } DPRINTF("\b\b\b\b100%%\n"); - DPRINTF("Memory reloaded (%ld pages of max %ld)\n", nr_pfns, max_nr_pfns); + DPRINTF("Memory reloaded (%ld pages)\n", nr_pfns); /* Get the list of PFNs that are not in the psuedo-phys map */ { diff -r e74bf0303658 -r 3f6ad86d37bd tools/libxc/xenguest.h --- a/tools/libxc/xenguest.h Wed Apr 04 15:57:49 2007 +0100 +++ b/tools/libxc/xenguest.h Wed Apr 04 16:18:28 2007 +0100 @@ -50,7 +50,7 @@ int xc_hvm_save(int xc_handle, int io_fd * @return 0 on success, -1 on failure */ int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom, - unsigned long p2m_size, unsigned long max_nr_pfns, + unsigned long p2m_size, unsigned int store_evtchn, unsigned long *store_mfn, unsigned int console_evtchn, unsigned long *console_mfn); @@ -61,9 +61,9 @@ int xc_linux_restore(int xc_handle, int * @return 0 on success, -1 on failure */ int xc_hvm_restore(int xc_handle, int io_fd, uint32_t dom, - unsigned long max_pfn, unsigned int store_evtchn, - unsigned long *store_mfn, - unsigned int pae, unsigned int apic); + unsigned int store_evtchn, + unsigned long *store_mfn, + unsigned int pae, unsigned int apic); /** * This function will create a domain for a paravirtualized Linux diff -r e74bf0303658 -r 3f6ad86d37bd tools/libxc/xg_private.c --- a/tools/libxc/xg_private.c Wed Apr 04 15:57:49 2007 +0100 +++ b/tools/libxc/xg_private.c Wed Apr 04 16:18:28 2007 +0100 @@ -211,7 +211,7 @@ __attribute__((weak)) __attribute__((weak)) int xc_hvm_restore(int xc_handle, int io_fd, uint32_t dom, - unsigned long max_pfn, unsigned int store_evtchn, + unsigned int store_evtchn, unsigned long *store_mfn, unsigned int pae, unsigned int apic) { diff -r e74bf0303658 -r 3f6ad86d37bd tools/python/xen/xend/XendCheckpoint.py --- a/tools/python/xen/xend/XendCheckpoint.py Wed Apr 04 15:57:49 2007 +0100 +++ b/tools/python/xen/xend/XendCheckpoint.py Wed Apr 04 16:18:28 2007 +0100 @@ -91,7 +91,7 @@ def save(fd, dominfo, network, live, dst # more information. cmd = [xen.util.auxbin.pathTo(XC_SAVE), str(fd), str(dominfo.getDomid()), "0", "0", - str(int(live) | (int(hvm) << 2) | (int(stdvga) << 3)) ] + str(int(live) | (int(hvm) << 2)) ] log.debug("[xc_save]: %s", string.join(cmd)) def saveInputHandler(line, tochild): @@ -187,7 +187,6 @@ def restore(xd, fd, dominfo = None, paus assert console_port nr_pfns = (dominfo.getMemoryTarget() + 3) / 4 - max_nr_pfns = (dominfo.getMemoryMaximum() + 3) / 4 # if hvm, pass mem size to calculate the store_mfn image_cfg = dominfo.info.get('image', {}) @@ -221,8 +220,10 @@ def restore(xd, fd, dominfo = None, paus shadow_cur = xc.shadow_mem_control(dominfo.getDomid(), shadow) dominfo.info['shadow_memory'] = shadow_cur + xc.domain_setmaxmem(dominfo.getDomid(), dominfo.getMemoryMaximum()) + cmd = map(str, [xen.util.auxbin.pathTo(XC_RESTORE), - fd, dominfo.getDomid(), p2m_size, max_nr_pfns, + fd, dominfo.getDomid(), p2m_size, store_port, console_port, int(is_hvm), pae, apic]) log.debug("[xc_restore]: %s", string.join(cmd)) diff -r e74bf0303658 -r 3f6ad86d37bd tools/xcutils/xc_restore.c --- a/tools/xcutils/xc_restore.c Wed Apr 04 15:57:49 2007 +0100 +++ b/tools/xcutils/xc_restore.c Wed Apr 04 16:18:28 2007 +0100 @@ -21,37 +21,37 @@ main(int argc, char **argv) unsigned int xc_fd, io_fd, domid, store_evtchn, console_evtchn; unsigned int hvm, pae, apic; int ret; - unsigned long p2m_size, max_nr_pfns, store_mfn, console_mfn; + unsigned long p2m_size, store_mfn, console_mfn; - if (argc != 10) - errx(1, "usage: %s iofd domid p2m_size max_nr_pfns store_evtchn " + if ( argc != 9 ) + errx(1, "usage: %s iofd domid p2m_size store_evtchn " "console_evtchn hvm pae apic", argv[0]); xc_fd = xc_interface_open(); - if (xc_fd < 0) + if ( xc_fd < 0 ) errx(1, "failed to open control interface"); io_fd = atoi(argv[1]); domid = atoi(argv[2]); p2m_size = atoi(argv[3]); - max_nr_pfns = atoi(argv[4]); - store_evtchn = atoi(argv[5]); - console_evtchn = atoi(argv[6]); - hvm = atoi(argv[7]); - pae = atoi(argv[8]); - apic = atoi(argv[9]); + store_evtchn = atoi(argv[4]); + console_evtchn = atoi(argv[5]); + hvm = atoi(argv[6]); + pae = atoi(argv[7]); + apic = atoi(argv[8]); - if (hvm) { - ret = xc_hvm_restore(xc_fd, io_fd, domid, max_nr_pfns, store_evtchn, - &store_mfn, pae, apic); - } else + if ( hvm ) + ret = xc_hvm_restore(xc_fd, io_fd, domid, store_evtchn, + &store_mfn, pae, apic); + else ret = xc_linux_restore(xc_fd, io_fd, domid, p2m_size, - max_nr_pfns, store_evtchn, &store_mfn, + store_evtchn, &store_mfn, console_evtchn, &console_mfn); - if (ret == 0) { + if ( ret == 0 ) + { printf("store-mfn %li\n", store_mfn); - if (!hvm) + if ( !hvm ) printf("console-mfn %li\n", console_mfn); fflush(stdout); } _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |