[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [XenPPC][Patch] xc_ppc_linux_save.c xc_ppc_linux_restore.c
These two files need to be put under tools/libxc/powerpc64 . /****************************************************************************** * xc_ppc_linux_save.c * * Save the state of a running Linux session. * * Copyright (c) 2003, K A Fraser. * Rewritten for ppc */ #include <inttypes.h> #include <time.h> #include <stdlib.h> #include <unistd.h> #include <sys/time.h> #include "htab.h" #include "xg_private.h" /* ** Default values for important tuning parameters. Can override by passing ** non-zero replacement values to xc_linux_save(). ** ** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too. ** */ #define DEF_MAX_ITERS (4 - 1) /* limit us to 4 times round loop */ #define DEF_MAX_FACTOR 3 /* never send more than 3x nr_pfns */ /* ** During (live) save/migrate, we maintain a number of bitmaps to track ** which pages we have to send, and to skip. */ #define DECOR 0x80000000 // indicates htab address //#define DECOR 0x400000 // indicates htab address #define LOG_PTE_SIZE 4 #define INVALID_MFN (~0ULL) #define BITS_PER_LONG (sizeof(unsigned long) * 8) #define BITMAP_ENTRY(_nr,_bmap) \ ((unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG] #define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG) static inline int test_bit (int nr, volatile void * addr) { return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1; } static inline void clear_bit (int nr, volatile void * addr) { BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr)); } static inline void set_bit ( int nr, volatile void * addr) { BITMAP_ENTRY(nr, addr) |= (1UL << BITMAP_SHIFT(nr)); } /* total number of pages used by the current guest */ static unsigned long max_pfn; static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd, int dom, xc_dominfo_t *info) { int i = 0; DPRINTF("suspend_and_state()\n"); if (!(*suspend)(dom)) { ERROR("Suspend request failed"); return -1; } retry: if (xc_domain_getinfo(xc_handle, dom, 1, info) != 1) { ERROR("Could not get domain info"); return -1; } if (info->shutdown && info->shutdown_reason == SHUTDOWN_suspend) return 0; // success if (info->paused) { // try unpausing domain, wait, and retest xc_domain_unpause(xc_handle, dom); ERROR("Domain was paused. Wait and re-test."); usleep(10000); // 10ms goto retry; } if(++i < 100) { ERROR("Retry suspend domain."); usleep(10000); // 10ms goto retry; } ERROR("Unable to suspend domain."); return -1; } static inline ssize_t write_exact(int fd, void *buf, size_t count) { if (write(fd, buf, count) != count) return 0; return 1; } int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, uint32_t max_factor, uint32_t flags, int (*suspend)(int)) { DECLARE_DOMCTL; xc_dominfo_t info; int rc = 1; int debug = 0; /* The new domain's shared-info frame number. */ unsigned long shared_info_frame; /* A copy of the CPU context of the guest. */ vcpu_guest_context_t ctxt; xen_pfn_t *page_array = NULL; /* Live mapping of shared info structure */ shared_info_t *live_shinfo = NULL; DPRINTF("xc_linux_save (ppc): started dom=%d\n", dom); if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) { ERROR("Could not get domain info"); return 1; } shared_info_frame = info.shared_info_frame; /* Map the shared info frame */ live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ, shared_info_frame); if (!live_shinfo) { ERROR("Couldn't map live_shinfo"); goto out; } max_pfn = info.max_memkb >> (PAGE_SHIFT - 10); DPRINTF("max_pfn: 0x%08lx\n", max_pfn); page_array = malloc(max_pfn * sizeof(xen_pfn_t)); if (page_array == NULL) { ERROR("Could not allocate memory"); goto out; } /* This is expected by xm restore. */ if (!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) { ERROR("write: max_pfn"); goto out; } /* xc_linux_restore starts to read here. */ /* Write a version number. This can avoid searching for a stupid bug if the format change. The version is hard-coded, don't forget to change the restore code too! */ { unsigned long version = 1; if (!write_exact(io_fd, &version, sizeof(unsigned long))) { ERROR("write: version"); goto out; } } #if 0 domctl.cmd = XEN_DOMCTL_arch_setup; domctl.domain = (domid_t)dom; domctl.u.arch_setup.flags = XEN_DOMAINSETUP_query; if (xc_domctl(xc_handle, &domctl) < 0) { ERROR("Could not get domain setup"); goto out; } if (!write_exact(io_fd, &domctl.u.arch_setup, sizeof(domctl.u.arch_setup))) { ERROR("write: domain setup"); goto out; } #endif /* This is a non-live suspend. Issue the call back to get the domain suspended */ if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info)) { ERROR("Domain appears not to have suspended"); goto out; } { char *mem; xen_pfn_t pfn; unsigned int total_sent = 0; if (xc_get_pfn_list(xc_handle, dom, page_array, max_pfn) != max_pfn) { ERROR("Could not get the page frame list"); goto out; } /* Start writing out the saved-domain record. */ for (pfn = 0; pfn < max_pfn; pfn++){ // for (pfn = 0; pfn < 16; pfn++){ if (page_array[pfn] == INVALID_MFN) continue; if (debug) fprintf(stderr, "xc_linux_save: page %llx (%llu/%lu)\n", page_array[pfn], pfn, max_pfn); // DPRINTF( "xc_linux_save: page %llx (%llu/%lu)\n", // page_array[pfn], pfn, max_pfn); mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, page_array[pfn]); if (mem == NULL) { /* The page may have move. It will be remarked dirty. FIXME: to be tracked. */ fprintf(stderr, "cannot map page %llx: %s\n", page_array[pfn], strerror (errno)); continue; } else { if (pfn <8) DPRINTF("memory addres %lx \n", (ulong)mem); } if (!write_exact(io_fd, &pfn, sizeof(pfn))) { ERROR("Error when writing to state file (4)"); goto out; } if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) { ERROR("Error when writing to state file (5)"); goto out; } munmap(mem, PAGE_SIZE); total_sent++; } DPRINTF("total_sent: %d\n", total_sent); } fprintf (stderr, "All memory is saved\n"); /* terminate memory dump */ { xen_pfn_t pfn = INVALID_MFN; if (!write_exact(io_fd, &pfn, sizeof(pfn))) { ERROR("Error when writing to state file (6)"); goto out; } } { int i, k, num_ptes; char *mem, *temp, *copy; unsigned long N; unsigned int total_sent = 0; uint64_t htab_raddr; ulong htab_mfn, htab_pages; xen_pfn_t htab_rpn; union pte *ppte; num_ptes = xc_get_shadow_list(xc_handle, dom, &htab_raddr); if (num_ptes == -1){ ERROR("Could not get the shadow list"); goto out; } temp = malloc(PAGE_SIZE * 2); if (temp == NULL){ ERROR("Could not allocate temp memory"); goto out; } copy = (char *)(((ulong)temp + (PAGE_SIZE - 1)) & (~(PAGE_SIZE-1))); htab_mfn = htab_raddr >> PAGE_SHIFT; htab_pages = num_ptes / (PAGE_SIZE / (1UL << LOG_PTE_SIZE)); // DPRINTF("htab_pages: 0x%08lx htab_addr: %llx htab_mfn %lx\n", htab_pages,htab_raddr, htab_mfn); if (!write_exact(io_fd, &num_ptes, sizeof(num_ptes))) { ERROR("Error when writing to state file (4)"); goto out; } /* Replace rpn with guest pfn, then write out htab, by page */ for (N = 0; N < htab_pages; N++, htab_mfn++) { mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, htab_mfn | DECOR); if (mem == NULL){ ERROR("Cannot map htab_mfn 0x%08lx: %s\n", htab_mfn, strerror (errno)); goto out; } //else DPRINTF("success map htab to guest: %lx \n", (unsigned long)mem); memcpy(copy, mem, PAGE_SIZE); /*** TBD - improve search of page_array[] for htab_rpn ***/ ppte = (union pte *)copy; for (i = 0; i < (PAGE_SIZE / (1UL << LOG_PTE_SIZE)); i++, ppte++){ // DPRINTF(" htab : i %d, ppte->vsid %llx, rpn %llx \n", i , (unsigned long long)ppte->words.vsid, (unsigned long long)ppte->words.rpn); if (ppte->bits.v == 1){ // valid htab entry htab_rpn = ppte->bits.rpn; for (k = 0; k < max_pfn; k++){ if (htab_rpn == page_array[k]) break; } if (k >= max_pfn){ ERROR("htab_rpn: 0x%016llx not found in page_array[] %d", htab_rpn, i); ppte->bits.v = 0; goto out; } ppte->bits.rpn = k; } else { // invalid htab entry ppte->words.rpn = 0; } } if (write(io_fd, copy, PAGE_SIZE) != PAGE_SIZE) { ERROR("Error when writing to state file (7)"); goto out; } munmap(mem, PAGE_SIZE); total_sent++; } free(temp); DPRINTF("htab_sent: %d\n", total_sent); } #if 0 /* terminate htab dump*/ { xen_pfn_t pfn = INVALID_MFN; if (!write_exact(io_fd, &pfn, sizeof(pfn))) { ERROR("Error when writing to state file (8)"); goto out; } } #endif /*** TBD - save vcpu context for all vcpus ***/ if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) { ERROR("Could not get vcpu context"); goto out; } if (!write_exact(io_fd, &ctxt, sizeof(ctxt))) { ERROR("Error when writing to state file (1)"); goto out; } #if 0 mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, ctxt.privregs_pfn); if (mem == NULL) { ERROR("cannot map privreg page"); goto out; } if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) { ERROR("Error when writing privreg to state file (5)"); goto out; } munmap(mem, PAGE_SIZE); #endif if (!write_exact(io_fd, live_shinfo, PAGE_SIZE)) { ERROR("Error when writing to state file (1)"); goto out; } /* Success! */ rc = 0; DPRINTF("Domain ready to be built.\n"); domctl.cmd = XEN_DOMCTL_setvcpucontext; domctl.domain = (domid_t)dom; domctl.u.vcpucontext.vcpu = 0; set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt); rc = xc_domctl(xc_handle, &domctl); if (rc != 0) { ERROR("Couldn't build the domain"); goto out; } out: free(page_array); if (live_shinfo) munmap(live_shinfo, PAGE_SIZE); DPRINTF("Save exit rc=%d\n",rc); return !!rc; } /* * Local variables: * mode: C * c-set-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ /****************************************************************************** * xc_ppc_linux_restore.c * * Restore the state of a Linux session. * * Copyright (c) 2003, K A Fraser. * Rewritten for ppc */ #include <inttypes.h> #include <stdlib.h> #include <unistd.h> #include "htab.h" #include "xg_private.h" #define DECOR 0x80000000 // indicates htab address #define LOG_PTE_SIZE 4 #define INVALID_MFN (~0ULL) #define PFN_TO_KB(_pfn) ((_pfn) << (PAGE_SHIFT - 10)) //*** static unsigned char pg_buf[PAGE_SIZE]; /* total number of pages used by the current guest */ static unsigned long max_pfn; static ssize_t read_exact(int fd, void *buf, size_t count) { int r = 0, s; unsigned char *b = buf; while (r < count) { s = read(fd, &b[r], count - r); if ((s == -1) && (errno == EINTR)) continue; if (s <= 0) { break; } r += s; } return (r == count) ? 1 : 0; } static int read_page(int xc_handle, int io_fd, uint32_t dom, xen_pfn_t mfn) { void *mem; mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, mfn); if (mem == NULL) { ERROR("cannot map page"); return -1; } if (!read_exact(io_fd, mem, PAGE_SIZE)) { ERROR("Error when reading from state file (5)"); return -1; } munmap(mem, PAGE_SIZE); return 0; } int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom, unsigned long nr_pfns, unsigned int store_evtchn, unsigned long *store_mfn, unsigned int console_evtchn, unsigned long *console_mfn) { DECLARE_DOMCTL; int rc = 1, i; xen_pfn_t mfn = INVALID_MFN, pfn; unsigned long ver; int prntctr = 0; /* The new domain's shared-info frame number. */ unsigned long shared_info_frame; unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */ shared_info_t *shared_info = (shared_info_t *)shared_info_page; /* A copy of the CPU context of the guest. */ vcpu_guest_context_t ctxt; xen_pfn_t shared_info_pfn, *page_array = NULL; /* A temporary mapping of the guest's start_info page. */ start_info_t *start_info; max_pfn = nr_pfns; DPRINTF("xc_linux_restore start: max_pfn = %ld\n", max_pfn); if (!read_exact(io_fd, &ver, sizeof(unsigned long))) { ERROR("Error when reading version"); goto out; } if (ver != 1) { ERROR("version of save doesn't match"); goto out; } if (mlock(&ctxt, sizeof(ctxt))) { /* needed for build domctl, but might as well do early */ ERROR("Unable to mlock ctxt"); return 1; } /* Get the domain's shared-info frame. */ domctl.cmd = XEN_DOMCTL_getdomaininfo; domctl.domain = (domid_t)dom; if (xc_domctl(xc_handle, &domctl) < 0) { ERROR("Could not get information on new domain"); goto out; } shared_info_frame = domctl.u.getdomaininfo.shared_info_frame; if (xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_pfn)) != 0) { errno = ENOMEM; goto out; } #if 0 if (xc_domain_memory_increase_reservation(xc_handle, dom, max_pfn, 0, 0, NULL) != 0) { ERROR("Failed to increase reservation by %ld KB", PFN_TO_KB(max_pfn)); errno = ENOMEM; goto out; } DPRINTF("Increased domain reservation by %ld KB\n", PFN_TO_KB(max_pfn)); #endif #if 0 if (!read_exact(io_fd, &domctl.u.arch_setup, sizeof(domctl.u.arch_setup))) { ERROR("read: domain setup"); goto out; } /* Build firmware (will be overwritten). */ domctl.domain = (domid_t)dom; domctl.u.arch_setup.flags &= ~XEN_DOMAINSETUP_query; domctl.u.arch_setup.bp = ((nr_pfns - 3) << PAGE_SHIFT) + sizeof (start_info_t); domctl.u.arch_setup.maxmem = (nr_pfns - 3) << PAGE_SHIFT; domctl.cmd = XEN_DOMCTL_arch_setup; if (xc_domctl(xc_handle, &domctl)) goto out; #endif /* Get pages. */ page_array = malloc(max_pfn * sizeof(xen_pfn_t)); if (page_array == NULL ) { ERROR("Could not allocate memory"); goto out; } if (xc_get_pfn_list(xc_handle, dom, page_array, max_pfn) != max_pfn) { ERROR("Could not get the page frame list"); goto out; } DPRINTF("Reloading memory pages: 0%%\n"); while (1) { if (!read_exact(io_fd, &pfn, sizeof(xen_pfn_t))) { ERROR("Error when reading batch size"); goto out; } if (pfn == INVALID_MFN) break; if (pfn > max_pfn){ DPRINTF("pfn: 0x%016llx\n", pfn); continue; } mfn = page_array[pfn]; if (prntctr++ < 8) DPRINTF("xc_linux_restore: page %llu/%lu at %llx\n", pfn, max_pfn, mfn); #if 0 if (!read_exact(io_fd, pg_buf, PAGE_SIZE)) { //*** ERROR("Error when reading batch size"); goto out; } #endif if (read_page(xc_handle, io_fd, dom, mfn) < 0) goto out; } DPRINTF("Received all pages\n"); { int i, num_ptes, htab_ptes; uint64_t htab_raddr; ulong htab_mfn, htab_pages; unsigned long N; char *mem, *temp, *copy; unsigned int total_sent = 0; xen_pfn_t htab_rpn; union pte *ppte; htab_ptes = xc_get_shadow_list(xc_handle, dom, &htab_raddr); if (htab_ptes == -1){ ERROR("Could not get the shadow list"); goto out; } if (!read_exact(io_fd, &num_ptes, sizeof(num_ptes))) { ERROR("Error when reading num_ptes"); goto out; } if (num_ptes != htab_ptes){ ERROR("num_ptes != htab_ptes: %d %d htab_raddr: 0x%016llx", num_ptes, htab_ptes, htab_raddr); goto out; } temp = malloc(PAGE_SIZE * 2); if (temp == NULL){ ERROR("Could not allocate temp memory"); goto out; } copy = (char *)(((ulong)temp + (PAGE_SIZE - 1)) & (~(PAGE_SIZE-1))); htab_mfn = htab_raddr >> PAGE_SHIFT; htab_pages = htab_ptes / (PAGE_SIZE / (1UL << LOG_PTE_SIZE)); //DPRINTF("htab_pages: 0x%08lx\n", htab_pages); //DPRINTF("htab_mfn: 0x%08lx\n", htab_mfn); /* Replace guest pfn with rfn, then copy to htab, by page */ for (N = 0; N < htab_pages; N++, htab_mfn++) { mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, htab_mfn | DECOR); if (mem == NULL){ ERROR("Cannot map htab_mfn 0x%08lx: %s\n", htab_mfn, strerror (errno)); goto out; } if (!read_exact(io_fd, copy, PAGE_SIZE)) { ERROR("Error when reading htab page"); goto out; } ppte = (union pte *)copy; for (i = 0; i < (PAGE_SIZE / (1UL << LOG_PTE_SIZE)); i++, ppte++){ if (ppte->bits.v == 1){ // valid htab entry htab_rpn = ppte->bits.rpn; // guest's pfn if (htab_rpn >= max_pfn){ ERROR("htab_rpn: 0x%016llx not found in page_array[]", htab_rpn); goto out; } ppte->bits.rpn = page_array[htab_rpn]; // guest's rpn } else { // invalid htab entry ppte->words.rpn = 0; } } memcpy(mem, copy, PAGE_SIZE); munmap(mem, PAGE_SIZE); total_sent++; } free(temp); DPRINTF("htab_get: %d\n", total_sent); } if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) { ERROR("Error when reading ctxt"); goto out; } /* First to initialize. */ domctl.cmd = XEN_DOMCTL_setvcpucontext; domctl.domain = (domid_t)dom; domctl.u.vcpucontext.vcpu = 0; set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt); if (xc_domctl(xc_handle, &domctl) != 0) { ERROR("Couldn't set vcpu context"); goto out; } // DPRINTF("MSR saved %llx \n",(unsigned long long)ctxt.user_regs.msr); #if 0 /* Second to set registers... */ //*** ctxt.flags = VGCF_EXTRA_REGS; domctl.cmd = XEN_DOMCTL_setvcpucontext; domctl.domain = (domid_t)dom; domctl.u.vcpucontext.vcpu = 0; set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt); if (xc_domctl(xc_handle, &domctl) != 0) { ERROR("Couldn't set vcpu context"); goto out; } #endif /* Just a check. */ if (xc_vcpu_getcontext(xc_handle, dom, 0 /* XXX */, &ctxt)) { ERROR("Could not get vcpu context"); goto out; } #if 0 /* Then get privreg page. */ if (read_page(xc_handle, io_fd, dom, ctxt.privregs_pfn) < 0) { ERROR("Could not read vcpu privregs"); goto out; } #endif /* Read shared info. */ shared_info = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, shared_info_frame); if (shared_info == NULL) { ERROR("cannot map page"); goto out; } if (!read_exact(io_fd, shared_info, PAGE_SIZE)) { ERROR("Error when reading shared_info page"); goto out; } /* clear any pending events and the selector */ memset(&(shared_info->evtchn_pending[0]), 0, sizeof (shared_info->evtchn_pending)); for (i = 0; i < MAX_VIRT_CPUS; i++) shared_info->vcpu_info[i].evtchn_pending_sel = 0; #if 1 mfn = shared_info_frame - 3 ; // mfn = page_array[shared_info->arch.start_info_pfn]; DPRINTF("start_info mfn %llx \n", (unsigned long long) mfn); #endif munmap (shared_info, PAGE_SIZE); for (i=0;i<max_pfn;i++) if (page_array[i] == shared_info_frame) break; if ( i >= max_pfn) { ERROR(); goto out; } shared_info_pfn = (unsigned long) i; // rc = 0; goto out; //*** /* Uncanonicalise the suspend-record frame number and poke resume rec. */ start_info = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn); start_info->nr_pages = max_pfn; // DPRINTF("shared_info_mfn %llx \n", (unsigned long long)(shared_info_pfn << PAGE_SHIFT)); // DPRINTF("start_info->shared_info%llx \n", (unsigned long long)start_info->shared_info); start_info->shared_info = shared_info_pfn << PAGE_SHIFT; start_info->flags = 0; *store_mfn = page_array[start_info->store_mfn]; start_info->store_evtchn = store_evtchn; *console_mfn = page_array[start_info->console.domU.mfn]; start_info->console.domU.evtchn = console_evtchn; munmap(start_info, PAGE_SIZE); /* * Safety checking of saved context: * 1. user_regs is fine, as Xen checks that on context switch. * 2. fpu_ctxt is fine, as it can't hurt Xen. * 3. trap_ctxt needs the code selectors checked. * 4. ldt base must be page-aligned, no more than 8192 ents, ... * 5. gdt already done, and further checking is done by Xen. * 6. check that kernel_ss is safe. * 7. pt_base is already done. * 8. debugregs are checked by Xen. * 9. callback code selectors need checking. */ DPRINTF("Domain ready to be built.\n"); rc = 0; out: if ((rc != 0) && (dom != 0)) xc_domain_destroy(xc_handle, dom); #if 0 else { DPRINTF("Domain puased: "); xc_domain_pause(xc_handle,dom); DPRINTF("Domain unpuased: "); xc_domain_unpause(xc_handle,dom); } #endif free (page_array); DPRINTF("Restore exit with rc=%d\n", rc); return rc; } _______________________________________________ Xen-ppc-devel mailing list Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-ppc-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |