[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [XenPPC][Patch 1/3] xen patches for xm save/restore
Hi Hollis,Thanks for your comments. I rebuild the patches for xm save/restore and attach them below. I think that should work for you. The code has been tested for the basic function of guest domain save and restore. -- Yi Ge <kudva@xxxxxxxxxxxxxx> # HG changeset patch # User gy@xxxxxxxxxxxxxxxxxxxxxxxxxxxx # Date 1162500817 18000 # Node ID f21efe9b5210b1002819e7da602a26dba7787b0c # Parent f4d382795e57b926cd82256bcb3a74c539731796 [XenPPC] kernel support for Xen domain save/restore. Signed-off-by: Dan E Poff <poff@xxxxxxxxxxxxxx> Signed-off-by: Yi Ge <kudva@xxxxxxxxxxxxxx> diff -r f4d382795e57 -r f21efe9b5210 arch/powerpc/platforms/xen/hcall.c --- a/arch/powerpc/platforms/xen/hcall.c Wed Oct 25 17:22:54 2006 -0400 +++ b/arch/powerpc/platforms/xen/hcall.c Thu Nov 02 15:53:37 2006 -0500 @@ -256,6 +256,7 @@ static int xenppc_privcmd_domctl(privcmd case XEN_DOMCTL_pausedomain: case XEN_DOMCTL_unpausedomain: case XEN_DOMCTL_getdomaininfo: + case XEN_DOMCTL_getshadowlist: break; case XEN_DOMCTL_getmemlist: ret = xencomm_create( diff -r f4d382795e57 -r f21efe9b5210 arch/powerpc/platforms/xen/reboot.c --- a/arch/powerpc/platforms/xen/reboot.c Wed Oct 25 17:22:54 2006 -0400 +++ b/arch/powerpc/platforms/xen/reboot.c Thu Nov 02 15:53:37 2006 -0500 @@ -1,9 +1,19 @@ #include <linux/module.h> +#include <linux/kernel.h> #include <xen/interface/xen.h> #include <xen/interface/io/console.h> #include <xen/xencons.h> #include <asm/hypervisor.h> #include <asm/machdep.h> +#include <asm/mmu_context.h> +#include <xen/cpu_hotplug.h> +#include <xen/xenbus.h> +#include <xen/gnttab.h> +#include <xen/evtchn.h> + +#define SHUTDOWN_INVALID -1 + +static int shutting_down ; static void domain_machine_restart(char * __unused) { @@ -31,3 +41,86 @@ void xen_reboot_init(struct machdep_call ppc_md.halt = domain_machine_power_off; } } + +static void switch_idle_mm(void) +{ + struct mm_struct *mm = current->active_mm; + + if (mm == &init_mm) + return; + + atomic_inc(&init_mm.mm_count); + switch_mm(mm, &init_mm, current); + current->active_mm = &init_mm; + mmdrop(mm); +} + +int ppc_do_suspend(void *ignore) +{ + int err; + enum system_states temp_state; + + BUG_ON(smp_processor_id() != 0); + BUG_ON(in_interrupt()); + +#ifndef CONFIG_PPC_XEN + if (xen_feature(XENFEAT_auto_translated_physmap)) { + printk(KERN_WARNING "Cannot suspend in " + "auto_translated_physmap mode.\n"); + return -EOPNOTSUPP; + } +#endif + err = smp_suspend(); + if (err) + return err; + + xenbus_suspend(); + + preempt_disable(); + + __cli(); + + temp_state = system_state; + system_state = SYSTEM_SUSPEND_DISK; + + preempt_enable(); + + gnttab_suspend(); + + HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page; + + xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); + xen_start_info->console_mfn = mfn_to_pfn(xen_start_info->console_mfn); + + /* + * We'll stop somewhere inside this hypercall. When it returns, + * we'll start resuming after the restore. + */ + HYPERVISOR_suspend(virt_to_mfn(xen_start_info)); + + shutting_down = SHUTDOWN_INVALID; + + printk("Here we're resuming \n"); //DEBUG + + HYPERVISOR_shared_info = (shared_info_t *)__va(xen_start_info->shared_info); + memset(empty_zero_page, 0, PAGE_SIZE); + + gnttab_resume(); + + irq_resume(); + + switch_idle_mm(); + + system_state = temp_state; + + __sti(); + + xencons_resume(); + + xenbus_resume(); + + smp_resume(); + + return err; +} + diff -r f4d382795e57 -r f21efe9b5210 drivers/xen/core/reboot.c --- a/drivers/xen/core/reboot.c Wed Oct 25 17:22:54 2006 -0400 +++ b/drivers/xen/core/reboot.c Thu Nov 02 15:53:37 2006 -0500 @@ -151,7 +151,7 @@ static int __do_suspend(void *ignore) HYPERVISOR_suspend(virt_to_mfn(xen_start_info)); shutting_down = SHUTDOWN_INVALID; - + set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info); HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); @@ -192,13 +192,15 @@ static int __do_suspend(void *ignore) return err; } -#else /* CONFIG_PPC_XEN */ + +#else +int ppc_do_suspend(void *ignore); static int __do_suspend(void *ignore) { printk("SUSPEND!!??\n"); - return 0; -} -#endif /* CONFIG_PPC_XEN */ + return ppc_do_suspend(ignore); +} +#endif static int shutdown_process(void *__unused) { diff -r f4d382795e57 -r f21efe9b5210 include/asm-powerpc/system.h --- a/include/asm-powerpc/system.h Wed Oct 25 17:22:54 2006 -0400 +++ b/include/asm-powerpc/system.h Thu Nov 02 15:53:37 2006 -0500 @@ -433,5 +433,12 @@ extern void account_system_vtime(struct extern void account_system_vtime(struct task_struct *); #endif +#ifndef __cli + +#define __cli() local_irq_disable() +#define __sti() local_irq_enable() +#endif /* __cli */ + + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_SYSTEM_H */ diff -r f4d382795e57 -r f21efe9b5210 include/asm-powerpc/xen/asm/hypercall.h --- a/include/asm-powerpc/xen/asm/hypercall.h Wed Oct 25 17:22:54 2006 -0400 +++ b/include/asm-powerpc/xen/asm/hypercall.h Thu Nov 02 15:53:37 2006 -0500 @@ -60,6 +60,16 @@ static inline int HYPERVISOR_shutdown(un return HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown); } + +static inline int HYPERVISOR_suspend(unsigned long srec) +{ + struct sched_shutdown sched_shutdown = { + .reason = SHUTDOWN_suspend + }; + + return HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown); +} + static inline int HYPERVISOR_set_timer_op(unsigned long arg) { return plpar_hcall_norets(XEN_MARK(__HYPERVISOR_set_timer_op), arg); diff -r f4d382795e57 -r f21efe9b5210 include/xen/interface/arch-powerpc.h --- a/include/xen/interface/arch-powerpc.h Wed Oct 25 17:22:54 2006 -0400 +++ b/include/xen/interface/arch-powerpc.h Thu Nov 02 15:53:37 2006 -0500 @@ -29,7 +29,6 @@ #define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) #define XEN_GUEST_HANDLE(name) __guest_handle_ ## name -#define XEN_GUEST_HANDLE_64(name) __guest_handle_ ## name #define set_xen_guest_handle(hnd, val) \ do { \ if (sizeof ((hnd).__pad)) \ @@ -42,9 +41,6 @@ #endif #ifndef __ASSEMBLY__ - -typedef uint64_t uint64_aligned_t; - /* Guest handles for primitive C types. */ __DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char); __DEFINE_XEN_GUEST_HANDLE(uint, unsigned int); @@ -98,11 +94,66 @@ typedef struct cpu_user_regs cpu_user_re typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ /* XXX timebase */ +#define NUM_SLB_ENTRIES 64 +struct slb_entry { + uint64_t slb_vsid; + uint64_t slb_esid; +}; +typedef struct slb_entry slb_entry_t; + +#ifndef HAS_VMX +#define HAS_VMX 1 +#endif + +#ifndef HAS_FLOAT +#define HAS_FLOAT 1 +#endif + +#ifdef HAS_VMX +typedef struct { + uint32_t u[4]; +} __attribute__((aligned(16))) _vector128; +#endif /* HAS_VMX */ + + /* ONLY used to communicate with dom0! See also struct exec_domain. */ struct vcpu_guest_context { cpu_user_regs_t user_regs; /* User-level CPU registers */ + slb_entry_t slb_entries[NUM_SLB_ENTRIES]; /* Segment Lookaside Buffer */ + + /* Special-Purpose Registers */ + uint64_t sprg[4]; + uint64_t timebase; + uint64_t dar; + uint64_t dsisr; + + struct cpu_vcpu_tag { + uint64_t hid4; + } cpu; /* CPU-specific bits */ + + uint32_t dec; + + /* XXX etc */ +#ifdef HAS_FLOAT +#define NUM_FPRS 32 + double fprs[NUM_FPRS]; +#endif +#ifdef HAS_VMX + _vector128 vrs[32]; + _vector128 vscr; + uint32_t vrsave; +#endif + +#if 0 + struct xencomm *xencomm; + + /* I/O-port access bitmap. */ + u8 *iobmp; /* Guest kernel virtual address of the bitmap. */ + int iobmp_limit; /* Number of ports represented in the bitmap. */ + int iopl; /* Current IOPL for this VCPU. */ +#endif + uint64_t sdr1; /* Pagetable base */ - /* XXX etc */ }; typedef struct vcpu_guest_context vcpu_guest_context_t; DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); diff -r f4d382795e57 -r f21efe9b5210 include/xen/interface/domctl.h --- a/include/xen/interface/domctl.h Wed Oct 25 17:22:54 2006 -0400 +++ b/include/xen/interface/domctl.h Thu Nov 02 15:53:37 2006 -0500 @@ -354,6 +354,17 @@ struct xen_domctl_real_mode_area { }; typedef struct xen_domctl_real_mode_area xen_domctl_real_mode_area_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_real_mode_area_t); + +#define XEN_DOMCTL_getshadowlist 29 +struct xen_domctl_getshadowlist { + /* OUT variables. */ + /* Start of htab array */ + uint64_t htab_map; + /* Numver of ptes within htab */ + uint htab_num_ptes; +}; +typedef struct xen_domctl_getshadowlist xen_domctl_getshadowlist_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getshadowlist_t); struct xen_domctl { uint32_t cmd; @@ -381,6 +392,7 @@ struct xen_domctl { struct xen_domctl_arch_setup arch_setup; struct xen_domctl_settimeoffset settimeoffset; struct xen_domctl_real_mode_area real_mode_area; + struct xen_domctl_getshadowlist getshadowlist; uint8_t pad[128]; } u; }; # HG changeset patch # User gy@xxxxxxxxxxxxxxxxxxxxxxxxxxxx # Date 1162500350 18000 # Node ID 42e39f025aed363c4ae02adac55516d806af3531 # Parent 9148f7816d00bc45a8795a5119db9949894a3f89 [XenPPC] To enable the basic functions of domain save/restore in XenPPC. Signed-off-by: Dan E Poff <poff@xxxxxxxxxxxxxx> Singed-off-by: Yi Ge <kudva@xxxxxxxxxxxxxx> diff -r 9148f7816d00 -r 42e39f025aed config/powerpc64.mk --- a/config/powerpc64.mk Tue Oct 24 19:11:00 2006 -0400 +++ b/config/powerpc64.mk Thu Nov 02 15:45:50 2006 -0500 @@ -3,3 +3,4 @@ CONFIG_POWERPC_$(XEN_OS) := y CFLAGS += -DELFSIZE=64 LIBDIR := lib +CONFIG_XCUTILS :=y diff -r 9148f7816d00 -r 42e39f025aed tools/libxc/powerpc64/Makefile --- a/tools/libxc/powerpc64/Makefile Tue Oct 24 19:11:00 2006 -0400 +++ b/tools/libxc/powerpc64/Makefile Thu Nov 02 15:45:50 2006 -0500 @@ -2,5 +2,7 @@ GUEST_SRCS-y += powerpc64/xc_linux_build GUEST_SRCS-y += powerpc64/xc_linux_build.c GUEST_SRCS-y += powerpc64/xc_prose_build.c GUEST_SRCS-y += powerpc64/utils.c +GUEST_SRCS-y += powerpc64/xc_ppc_linux_save.c +GUEST_SRCS-y += powerpc64/xc_ppc_linux_restore.c CTRL_SRCS-y += powerpc64/xc_memory.c diff -r 9148f7816d00 -r 42e39f025aed tools/libxc/xc_private.c --- a/tools/libxc/xc_private.c Tue Oct 24 19:11:00 2006 -0400 +++ b/tools/libxc/xc_private.c Thu Nov 02 15:45:50 2006 -0500 @@ -306,6 +306,25 @@ int xc_get_pfn_list(int xc_handle, return (ret < 0) ? -1 : domctl.u.getmemlist.num_pfns; } + +int xc_get_shadow_list( int xc_handle, + uint32_t domid, + uint64_t *htab_raddr) +{ + DECLARE_DOMCTL; + int ret; + + domctl.cmd = XEN_DOMCTL_getshadowlist; + domctl.domain = (domid_t)domid; + + DPRINTF("xc_get_shadow_list() running \n"); + + ret = do_domctl(xc_handle, &domctl); + *htab_raddr = domctl.u.getshadowlist.htab_map; + + return (ret < 0) ? -1 : domctl.u.getshadowlist.htab_num_ptes; +} + #endif long xc_get_tot_pages(int xc_handle, uint32_t domid) diff -r 9148f7816d00 -r 42e39f025aed tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Tue Oct 24 19:11:00 2006 -0400 +++ b/tools/libxc/xenctrl.h Thu Nov 02 15:45:50 2006 -0500 @@ -529,6 +529,8 @@ int xc_get_pfn_list(int xc_handle, uint3 int xc_get_pfn_list(int xc_handle, uint32_t domid, xen_pfn_t *pfn_buf, unsigned long max_pfns); +int xc_get_shadow_list(int xc_handle, uint32_t domid, uint64_t *mfn_htab_map); + int xc_ia64_get_pfn_list(int xc_handle, uint32_t domid, xen_pfn_t *pfn_buf, unsigned int start_page, unsigned int nr_pages); diff -r 9148f7816d00 -r 42e39f025aed tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Tue Oct 24 19:11:00 2006 -0400 +++ b/tools/python/xen/xend/XendDomainInfo.py Thu Nov 02 15:45:50 2006 -0500 @@ -270,6 +270,7 @@ def restore(config): vm = findDomainClass()(parseConfig(config), None, None, False, False, True) try: vm.construct() + vm.allocMem2() vm.storeVmDetails() vm.createDevices() vm.createChannels() @@ -1369,6 +1370,66 @@ class XendDomainInfo: xc.domain_memory_increase_reservation(self.domid, reservation, 0, 0) + + + def allocMem2(self): + # Use architecture- and image-specific calculations to determine + # the various headrooms necessary, given the raw configured + # values. + # reservation, maxmem, memory, and shadow are all in KiB. + log.debug("allocMem2"); + + maxmem = self.info['maxmem'] * 1024 + memory = self.info['memory'] * 1024 + shadow = self.info['shadow_memory'] * 1024 + + log.debug("maxmem: 0x%08x", maxmem) + log.debug("memory: 0x%08x shadow: 0x%08x", memory, shadow) + + # Round shadow up to a multiple of a MiB, as shadow_mem_control + # takes MiB and we must not round down and end up under-providing. + shadow = ((shadow + 1023) / 1024) * 1024 + + # set memory limit + xc.domain_setmaxmem(self.domid, maxmem) + + # Make sure there's enough RAM available for the domain + balloon.free(memory + shadow) + + # Set up the shadow memory, i.e. the PowerPC hash table + shadow_cur = xc.shadow_mem_control(self.domid, shadow / 1024) + self.info['shadow_memory'] = shadow_cur + + rma_log = self.info['powerpc_rma_log'] + if rma_log == 0: + # use smallest RMA size available + rma_log = self.getRealModeLogs()[0] + + if rma_log not in self.getRealModeLogs(): + raise ValueError("rma_log(%d) must be one of" % rma_log, + self.getRealModeLogs()) + + self.info['powerpc_rma_log'] = rma_log # store info for FlatDeviceTree + + rma_kb = (1 << rma_log) / 1024 + if memory < rma_kb: + raise ValueError("Domain memory must be at least %d KB" % rma_kb) + + # allocate the RMA + xc.alloc_real_mode_area(self.domid, rma_log) + + # now allocate the remaining memory as large-order allocations + memory -= rma_kb + extent_log = 24 # 16 MB + extent_size = 1 << extent_log + page_log = 12 # 4 KB + extent_order = extent_log - page_log + for i in range(0, memory * 1024, extent_size): + log.debug("increase_reservation(%d, 0x%x, %d)", self.domid, + extent_size >> 10, extent_order) + xc.domain_memory_increase_reservation(self.domid, extent_size >> 10, + extent_order) + ## public: diff -r 9148f7816d00 -r 42e39f025aed xen/arch/powerpc/domain.c --- a/xen/arch/powerpc/domain.c Tue Oct 24 19:11:00 2006 -0400 +++ b/xen/arch/powerpc/domain.c Thu Nov 02 15:45:50 2006 -0500 @@ -143,7 +143,32 @@ void free_vcpu_struct(struct vcpu *v) int arch_set_info_guest(struct vcpu *v, vcpu_guest_context_t *c) { + int i; + memcpy(&v->arch.ctxt, &c->user_regs, sizeof(c->user_regs)); + + for ( i = 0; i < NUM_SLB_ENTRIES; i++) { + memcpy(&v->arch.slb_entries[i], &c->slb_entries[i], sizeof(struct slb_entry)); + } + + for ( i = 0; i< 4; i++) v->arch.sprg[i] = c->sprg[i]; + + v->arch.timebase = c->timebase; + v->arch.dar = c->dar; + v->arch.dsisr = c->dsisr; + + memcpy( &v->arch.cpu, &c->cpu, sizeof(struct cpu_vcpu)); + v->arch.dec = c->dec; + +#ifdef HAS_FLOAT + memcpy( v->arch.fprs, c->fprs, sizeof(double)*NUM_FPRS); +#endif /* HAS_FLOAT */ + +#ifdef HAS_VMX + memcpy( &v->arch.vrs, &c->vrs, sizeof(vector128)*32); + memcpy( &v->arch.vscr, &c->vscr, sizeof(vector128)); + v->arch.vrsave = c->vrsave; +#endif /* HAS_VMX */ printf("Domain[%d].%d: initializing\n", v->domain->domain_id, v->vcpu_id); diff -r 9148f7816d00 -r 42e39f025aed xen/arch/powerpc/domctl.c --- a/xen/arch/powerpc/domctl.c Tue Oct 24 19:11:00 2006 -0400 +++ b/xen/arch/powerpc/domctl.c Thu Nov 02 15:45:50 2006 -0500 @@ -29,10 +29,37 @@ #include <public/sysctl.h> #include <asm/processor.h> +#define DECOR 0x80000000 // indicates htab address + + void arch_getdomaininfo_ctxt(struct vcpu *, vcpu_guest_context_t *); void arch_getdomaininfo_ctxt(struct vcpu *v, vcpu_guest_context_t *c) { + int i; + memcpy(&c->user_regs, &v->arch.ctxt, sizeof(struct cpu_user_regs)); + for (i = 0; i < NUM_SLB_ENTRIES; i++) { + memcpy(&c->slb_entries[i],&v->arch.slb_entries[i],sizeof(struct slb_entry)); + } + + for (i = 0; i < 4; i++) c->sprg[i] = v->arch.sprg[i] ; + c->timebase = v->arch.timebase; + c->dar = v->arch.dar; + c->dsisr = v->arch.dsisr; + memcpy(&c->cpu,&v->arch.cpu,sizeof(struct cpu_vcpu)); + c->dec = v->arch.dec; + +#ifdef HAS_FLOAT + memcpy(c->fprs,v->arch.fprs,sizeof(double)*NUM_FPRS); +#endif /* HAS_FLOAT */ + +#ifdef HAS_VMX + memcpy(c->vrs, v->arch.vrs, sizeof(vector128)*32); + memcpy(&c->vscr, &v->arch.vscr, sizeof(vector128)); + c->vrsave = v->arch.vrsave; +#endif /* HAS_VMX */ + + /* XXX fill in rest of vcpu_guest_context_t */ } @@ -109,6 +136,52 @@ long arch_do_domctl(struct xen_domctl *d } break; + case XEN_DOMCTL_getshadowlist: + { + struct domain *d = find_domain_by_id(domctl->domain); + uint num_ptes; + + printk(" XEN_DOMCTL_getshadowlist: \n"); + + ret = -EINVAL; + if ( d != NULL) + { + ret = 0; + + domctl->u.getshadowlist.htab_map = (uint64_t)(d->arch.htab.map); + printk("htab_map: 0x%016lx\n",(uint64_t)(d->arch.htab.map)); + + num_ptes = 1UL << d->arch.htab.log_num_ptes; + domctl->u.getshadowlist.htab_num_ptes = num_ptes; + + printk("num_ptes : %d \n",num_ptes); + copy_to_guest(u_domctl, domctl, 1); +#if 0 + // Print out Shadow Htab pages + { + uint64_t htab_rpn; + int i =0; + uint64_t htab_addr = domctl->u.getshadowlist.htab_map; + union pte *ppte = (union pte *)htab_addr; + + + printk("htab valid entry: \n"); + for( i= 0 ; i< (PAGE_SIZE/ (1UL<< LOG_PTE_SIZE)) ; i++,ppte++) + //for( i= 0 ; i< num_ptes ; i++,ppte++) + { + // if (ppte->bits.v == 1) + { + htab_rpn = ppte->bits.rpn; + printk("htab : i %d, vsid %llx, rpn %llx \n", i, (unsigned long long)ppte->words.vsid, (unsigned long long)ppte->words.rpn); + } + } + } +#endif + put_domain(d); + } + } + break; + default: ret = -ENOSYS; break; diff -r 9148f7816d00 -r 42e39f025aed xen/arch/powerpc/mm.c --- a/xen/arch/powerpc/mm.c Tue Oct 24 19:11:00 2006 -0400 +++ b/xen/arch/powerpc/mm.c Thu Nov 02 15:45:50 2006 -0500 @@ -36,6 +36,8 @@ #define MEM_LOG(_f, _a...) ((void)0) #endif +#define DECOR 0x80000000UL + /* Frame table and its size in pages. */ struct page_info *frame_table; unsigned long max_page; @@ -386,6 +388,13 @@ ulong pfn2mfn(struct domain *d, ulong pf ulong foreign_map_pfn = 1UL << cpu_foreign_map_order(); /* quick tests first */ + if (pfn & DECOR) + { + //t = PFN_TYPE_FOREIGN; + mfn = pfn & ~DECOR; + //DPRINTK("pfn2mfn DECOR %lx mfn %lx\n",pfn,mfn); + } + else if (pfn & foreign_map_pfn) { t = PFN_TYPE_FOREIGN; mfn = pfn & ~(foreign_map_pfn); diff -r 9148f7816d00 -r 42e39f025aed xen/include/asm-powerpc/domain.h --- a/xen/include/asm-powerpc/domain.h Tue Oct 24 19:11:00 2006 -0400 +++ b/xen/include/asm-powerpc/domain.h Thu Nov 02 15:45:50 2006 -0500 @@ -48,10 +48,6 @@ struct arch_domain { uint large_page_order[4]; } __cacheline_aligned; -struct slb_entry { - ulong slb_vsid; - ulong slb_esid; -}; #define SLB_ESID_VALID (1ULL << (63 - 36)) #define SLB_ESID_CLASS (1ULL << (63 - 56)) #define SLB_ESID_MASK (~0ULL << (63 - 35)) @@ -60,9 +56,9 @@ struct slb_entry { struct xencomm; -typedef struct { - u32 u[4]; -} __attribute__((aligned(16))) vector128; +#ifdef HAS_VMX +typedef _vector128 vector128; +#endif /* HAS_VMX */ struct arch_vcpu { cpu_user_regs_t ctxt; /* User-level CPU registers */ diff -r 9148f7816d00 -r 42e39f025aed xen/include/asm-powerpc/htab.h --- a/xen/include/asm-powerpc/htab.h Tue Oct 24 19:11:00 2006 -0400 +++ b/xen/include/asm-powerpc/htab.h Thu Nov 02 15:45:50 2006 -0500 @@ -69,68 +69,68 @@ union pte { struct pte_words { - ulong vsid; - ulong rpn; + uint64_t vsid; + uint64_t rpn; } words; struct pte_bits { /* *INDENT-OFF* */ /* high word */ - ulong avpn: 57; /* [0-56] abbreviated virtual page number */ - ulong lock: 1; /* [57] hypervisor lock bit */ - ulong res: 1; /* [58] reserved for hypervisor */ - ulong bolted: 1; /* [59] XXX software-reserved; temp hack */ - ulong sw: 1; /* [60] reserved for software */ - ulong l: 1; /* [61] Large Page */ - ulong h: 1; /* [62] hash function id */ - ulong v: 1; /* [63] valid */ + uint64_t avpn: 57; /* [0-56] abbreviated virtual page number */ + uint64_t lock: 1; /* [57] hypervisor lock bit */ + uint64_t res: 1; /* [58] reserved for hypervisor */ + uint64_t bolted: 1; /* [59] XXX software-reserved; temp hack */ + uint64_t sw: 1; /* [60] reserved for software */ + uint64_t l: 1; /* [61] Large Page */ + uint64_t h: 1; /* [62] hash function id */ + uint64_t v: 1; /* [63] valid */ /* low word */ - ulong pp0: 1; /* [0] page protection bit 0 (current PowerPC + uint64_t pp0: 1; /* [0] page protection bit 0 (current PowerPC * specification says it can always be 0) */ - ulong ts: 1; /* [1] tag select */ - ulong rpn: 50; /* [2-51] real page number */ - ulong res2: 2; /* [52,53] reserved */ - ulong ac: 1; /* [54] address compare */ - ulong r: 1; /* [55] referenced */ - ulong c: 1; /* [56] changed */ - ulong w: 1; /* [57] write through */ - ulong i: 1; /* [58] cache inhibited */ - ulong m: 1; /* [59] memory coherent */ - ulong g: 1; /* [60] guarded */ - ulong n: 1; /* [61] no-execute */ - ulong pp1: 2; /* [62,63] page protection bits 1:2 */ + uint64_t ts: 1; /* [1] tag select */ + uint64_t rpn: 50; /* [2-51] real page number */ + uint64_t res2: 2; /* [52,53] reserved */ + uint64_t ac: 1; /* [54] address compare */ + uint64_t r: 1; /* [55] referenced */ + uint64_t c: 1; /* [56] changed */ + uint64_t w: 1; /* [57] write through */ + uint64_t i: 1; /* [58] cache inhibited */ + uint64_t m: 1; /* [59] memory coherent */ + uint64_t g: 1; /* [60] guarded */ + uint64_t n: 1; /* [61] no-execute */ + uint64_t pp1: 2; /* [62,63] page protection bits 1:2 */ /* *INDENT-ON* */ } bits; }; union ptel { - ulong word; + uint64_t word; struct ptel_bits { /* *INDENT-OFF* */ - ulong pp0: 1; /* page protection bit 0 (current PPC + uint64_t pp0: 1; /* page protection bit 0 (current PPC * AS says it can always be 0) */ - ulong ts: 1; /* tag select */ - ulong rpn: 50; /* real page number */ - ulong res2: 2; /* reserved */ - ulong ac: 1; /* address compare */ - ulong r: 1; /* referenced */ - ulong c: 1; /* changed */ - ulong w: 1; /* write through */ - ulong i: 1; /* cache inhibited */ - ulong m: 1; /* memory coherent */ - ulong g: 1; /* guarded */ - ulong n: 1; /* no-execute */ - ulong pp1: 2; /* page protection bits 1:2 */ + uint64_t ts: 1; /* tag select */ + uint64_t rpn: 50; /* real page number */ + uint64_t res2: 2; /* reserved */ + uint64_t ac: 1; /* address compare */ + uint64_t r: 1; /* referenced */ + uint64_t c: 1; /* changed */ + uint64_t w: 1; /* write through */ + uint64_t i: 1; /* cache inhibited */ + uint64_t m: 1; /* memory coherent */ + uint64_t g: 1; /* guarded */ + uint64_t n: 1; /* no-execute */ + uint64_t pp1: 2; /* page protection bits 1:2 */ /* *INDENT-ON* */ } bits; }; struct domain_htab { - ulong sdr1; + uint64_t sdr1; uint log_num_ptes; /* log number of PTEs in HTAB. */ uint order; /* order for freeing. */ union pte *map; /* access the htab like an array */ - ulong *shadow; /* idx -> logical translation array */ + uint64_t *shadow; /* idx -> logical translation array */ }; #endif diff -r 9148f7816d00 -r 42e39f025aed xen/include/public/arch-powerpc.h --- a/xen/include/public/arch-powerpc.h Tue Oct 24 19:11:00 2006 -0400 +++ b/xen/include/public/arch-powerpc.h Thu Nov 02 15:45:50 2006 -0500 @@ -94,11 +94,66 @@ typedef struct cpu_user_regs cpu_user_re typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ /* XXX timebase */ +#define NUM_SLB_ENTRIES 64 +struct slb_entry { + uint64_t slb_vsid; + uint64_t slb_esid; +}; +typedef struct slb_entry slb_entry_t; + +#ifndef HAS_VMX +#define HAS_VMX 1 +#endif + +#ifndef HAS_FLOAT +#define HAS_FLOAT 1 +#endif + +#ifdef HAS_VMX +typedef struct { + uint32_t u[4]; +} __attribute__((aligned(16))) _vector128; +#endif /* HAS_VMX */ + + /* ONLY used to communicate with dom0! See also struct exec_domain. */ struct vcpu_guest_context { cpu_user_regs_t user_regs; /* User-level CPU registers */ + slb_entry_t slb_entries[NUM_SLB_ENTRIES]; /* Segment Lookaside Buffer */ + + /* Special-Purpose Registers */ + uint64_t sprg[4]; + uint64_t timebase; + uint64_t dar; + uint64_t dsisr; + + struct cpu_vcpu_tag { + uint64_t hid4; + } cpu; /* CPU-specific bits */ + + uint32_t dec; + + /* XXX etc */ +#ifdef HAS_FLOAT +#define NUM_FPRS 32 + double fprs[NUM_FPRS]; +#endif +#ifdef HAS_VMX + _vector128 vrs[32]; + _vector128 vscr; + uint32_t vrsave; +#endif + +#if 0 + struct xencomm *xencomm; + + /* I/O-port access bitmap. */ + u8 *iobmp; /* Guest kernel virtual address of the bitmap. */ + int iobmp_limit; /* Number of ports represented in the bitmap. */ + int iopl; /* Current IOPL for this VCPU. */ +#endif + uint64_t sdr1; /* Pagetable base */ - /* XXX etc */ }; typedef struct vcpu_guest_context vcpu_guest_context_t; DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); diff -r 9148f7816d00 -r 42e39f025aed xen/include/public/domctl.h --- a/xen/include/public/domctl.h Tue Oct 24 19:11:00 2006 -0400 +++ b/xen/include/public/domctl.h Thu Nov 02 15:45:50 2006 -0500 @@ -354,6 +354,17 @@ struct xen_domctl_real_mode_area { }; typedef struct xen_domctl_real_mode_area xen_domctl_real_mode_area_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_real_mode_area_t); + +#define XEN_DOMCTL_getshadowlist 29 +struct xen_domctl_getshadowlist { + /* OUT variables */ + /* Start of htab array */ + uint64_t htab_map; + /* Number of ptes within htab */ + uint32_t htab_num_ptes; +}; +typedef struct xen_domctl_getshadowlist xen_domctl_getshadowlist_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getshadowlist_t); struct xen_domctl { uint32_t cmd; @@ -381,6 +392,7 @@ struct xen_domctl { struct xen_domctl_arch_setup arch_setup; struct xen_domctl_settimeoffset settimeoffset; struct xen_domctl_real_mode_area real_mode_area; + struct xen_domctl_getshadowlist getshadowlist; uint8_t pad[128]; } u; }; diff -r 9148f7816d00 -r 42e39f025aed tools/libxc/powerpc64/xc_ppc_linux_restore.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/libxc/powerpc64/xc_ppc_linux_restore.c Thu Nov 02 15:45:50 2006 -0500 @@ -0,0 +1,416 @@ +/****************************************************************************** + * xc_ppc_linux_restore.c + * + * Restore the state of a Linux session. + * + * Copyright (c) 2003, K A Fraser. + * Rewritten for ppc + */ + +#include <inttypes.h> +#include <stdlib.h> +#include <unistd.h> +#include "htab.h" + +#include "xg_private.h" + +#define DECOR 0x80000000 // indicates htab address +#define LOG_PTE_SIZE 4 + +#define INVALID_MFN (~0ULL) + +#define PFN_TO_KB(_pfn) ((_pfn) << (PAGE_SHIFT - 10)) + +//*** static unsigned char pg_buf[PAGE_SIZE]; + +/* total number of pages used by the current guest */ +static unsigned long max_pfn; + +static ssize_t +read_exact(int fd, void *buf, size_t count) +{ + int r = 0, s; + unsigned char *b = buf; + + while (r < count) { + s = read(fd, &b[r], count - r); + if ((s == -1) && (errno == EINTR)) + continue; + if (s <= 0) { + break; + } + r += s; + } + + return (r == count) ? 1 : 0; +} + +static int +read_page(int xc_handle, int io_fd, uint32_t dom, xen_pfn_t mfn) +{ + void *mem; + + mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ|PROT_WRITE, mfn); + if (mem == NULL) { + ERROR("cannot map page"); + return -1; + } + if (!read_exact(io_fd, mem, PAGE_SIZE)) { + ERROR("Error when reading from state file (5)"); + return -1; + } + munmap(mem, PAGE_SIZE); + return 0; +} + +int +xc_linux_restore(int xc_handle, int io_fd, uint32_t dom, + unsigned long nr_pfns, unsigned int store_evtchn, + unsigned long *store_mfn, unsigned int console_evtchn, + unsigned long *console_mfn) +{ + DECLARE_DOMCTL; + int rc = 1, i; + xen_pfn_t mfn = INVALID_MFN, pfn; + unsigned long ver; + int prntctr = 0; + + /* The new domain's shared-info frame number. */ + unsigned long shared_info_frame; + unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */ + shared_info_t *shared_info = (shared_info_t *)shared_info_page; + + /* A copy of the CPU context of the guest. */ + vcpu_guest_context_t ctxt; + + xen_pfn_t shared_info_pfn, *page_array = NULL; + + /* A temporary mapping of the guest's start_info page. */ + start_info_t *start_info; + + max_pfn = nr_pfns; + + DPRINTF("xc_linux_restore start: max_pfn = %ld\n", max_pfn); + + if (!read_exact(io_fd, &ver, sizeof(unsigned long))) { + ERROR("Error when reading version"); + goto out; + } + if (ver != 1) { + ERROR("version of save doesn't match"); + goto out; + } + + if (mlock(&ctxt, sizeof(ctxt))) { + /* needed for build domctl, but might as well do early */ + ERROR("Unable to mlock ctxt"); + return 1; + } + + /* Get the domain's shared-info frame. */ + domctl.cmd = XEN_DOMCTL_getdomaininfo; + domctl.domain = (domid_t)dom; + if (xc_domctl(xc_handle, &domctl) < 0) { + ERROR("Could not get information on new domain"); + goto out; + } + shared_info_frame = domctl.u.getdomaininfo.shared_info_frame; + + if (xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_pfn)) != 0) { + errno = ENOMEM; + goto out; + } + +#if 0 + if (xc_domain_memory_increase_reservation(xc_handle, dom, max_pfn, + 0, 0, NULL) != 0) { + ERROR("Failed to increase reservation by %ld KB", PFN_TO_KB(max_pfn)); + errno = ENOMEM; + goto out; + } + + DPRINTF("Increased domain reservation by %ld KB\n", PFN_TO_KB(max_pfn)); +#endif + +#if 0 + if (!read_exact(io_fd, &domctl.u.arch_setup, sizeof(domctl.u.arch_setup))) { + ERROR("read: domain setup"); + goto out; + } + + /* Build firmware (will be overwritten). */ + domctl.domain = (domid_t)dom; + domctl.u.arch_setup.flags &= ~XEN_DOMAINSETUP_query; + domctl.u.arch_setup.bp = ((nr_pfns - 3) << PAGE_SHIFT) + + sizeof (start_info_t); + domctl.u.arch_setup.maxmem = (nr_pfns - 3) << PAGE_SHIFT; + + domctl.cmd = XEN_DOMCTL_arch_setup; + if (xc_domctl(xc_handle, &domctl)) + goto out; +#endif + + /* Get pages. */ + page_array = malloc(max_pfn * sizeof(xen_pfn_t)); + if (page_array == NULL ) { + ERROR("Could not allocate memory"); + goto out; + } + + if (xc_get_pfn_list(xc_handle, dom, + page_array, max_pfn) != max_pfn) { + ERROR("Could not get the page frame list"); + goto out; + } + + DPRINTF("Reloading memory pages: 0%%\n"); + + while (1) { + if (!read_exact(io_fd, &pfn, sizeof(xen_pfn_t))) { + ERROR("Error when reading batch size"); + goto out; + } + if (pfn == INVALID_MFN) + break; + + if (pfn > max_pfn){ + DPRINTF("pfn: 0x%016llx\n", pfn); + continue; + } + + mfn = page_array[pfn]; + + if (prntctr++ < 8) + DPRINTF("xc_linux_restore: page %llu/%lu at %llx\n", pfn, max_pfn, mfn); + +#if 0 + if (!read_exact(io_fd, pg_buf, PAGE_SIZE)) { //*** + ERROR("Error when reading batch size"); + goto out; + } +#endif + + if (read_page(xc_handle, io_fd, dom, mfn) < 0) + goto out; + } + + DPRINTF("Received all pages\n"); + + { + int i, num_ptes, htab_ptes; + uint64_t htab_raddr; + ulong htab_mfn, htab_pages; + unsigned long N; + char *mem, *temp, *copy; + unsigned int total_sent = 0; + xen_pfn_t htab_rpn; + union pte *ppte; + + htab_ptes = xc_get_shadow_list(xc_handle, dom, &htab_raddr); + if (htab_ptes == -1){ + ERROR("Could not get the shadow list"); + goto out; + } + + if (!read_exact(io_fd, &num_ptes, sizeof(num_ptes))) { + ERROR("Error when reading num_ptes"); + goto out; + } + + if (num_ptes != htab_ptes){ + ERROR("num_ptes != htab_ptes: %d %d htab_raddr: 0x%016llx", + num_ptes, htab_ptes, htab_raddr); + goto out; + } + + temp = malloc(PAGE_SIZE * 2); + if (temp == NULL){ + ERROR("Could not allocate temp memory"); + goto out; + } + + copy = (char *)(((ulong)temp + (PAGE_SIZE - 1)) & (~(PAGE_SIZE-1))); + + htab_mfn = htab_raddr >> PAGE_SHIFT; + htab_pages = htab_ptes / (PAGE_SIZE / (1UL << LOG_PTE_SIZE)); + //DPRINTF("htab_pages: 0x%08lx\n", htab_pages); + //DPRINTF("htab_mfn: 0x%08lx\n", htab_mfn); + + /* Replace guest pfn with rfn, then copy to htab, by page */ + for (N = 0; N < htab_pages; N++, htab_mfn++) { + mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ|PROT_WRITE, htab_mfn | DECOR); + if (mem == NULL){ + ERROR("Cannot map htab_mfn 0x%08lx: %s\n", + htab_mfn, strerror (errno)); + goto out; + } + + if (!read_exact(io_fd, copy, PAGE_SIZE)) { + ERROR("Error when reading htab page"); + goto out; + } + + ppte = (union pte *)copy; + for (i = 0; i < (PAGE_SIZE / (1UL << LOG_PTE_SIZE)); i++, ppte++){ + if (ppte->bits.v == 1){ // valid htab entry + htab_rpn = ppte->bits.rpn; // guest's pfn + + if (htab_rpn >= max_pfn){ + ERROR("htab_rpn: 0x%016llx not found in page_array[]", + htab_rpn); + goto out; + } + + ppte->bits.rpn = page_array[htab_rpn]; // guest's rpn + + } else { // invalid htab entry + ppte->words.rpn = 0; + } + } + + memcpy(mem, copy, PAGE_SIZE); + + munmap(mem, PAGE_SIZE); + total_sent++; + } + + free(temp); + + DPRINTF("htab_get: %d\n", total_sent); + } + + + if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) { + ERROR("Error when reading ctxt"); + goto out; + } + + + /* First to initialize. */ + domctl.cmd = XEN_DOMCTL_setvcpucontext; + domctl.domain = (domid_t)dom; + domctl.u.vcpucontext.vcpu = 0; + set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt); + if (xc_domctl(xc_handle, &domctl) != 0) { + ERROR("Couldn't set vcpu context"); + goto out; + } + +// DPRINTF("MSR saved %llx \n",(unsigned long long)ctxt.user_regs.msr); +#if 0 + /* Second to set registers... */ +//*** ctxt.flags = VGCF_EXTRA_REGS; + domctl.cmd = XEN_DOMCTL_setvcpucontext; + domctl.domain = (domid_t)dom; + domctl.u.vcpucontext.vcpu = 0; + set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt); + if (xc_domctl(xc_handle, &domctl) != 0) { + ERROR("Couldn't set vcpu context"); + goto out; + } +#endif + + /* Just a check. */ + if (xc_vcpu_getcontext(xc_handle, dom, 0 /* XXX */, &ctxt)) { + ERROR("Could not get vcpu context"); + goto out; + } + +#if 0 + /* Then get privreg page. */ + if (read_page(xc_handle, io_fd, dom, ctxt.privregs_pfn) < 0) { + ERROR("Could not read vcpu privregs"); + goto out; + } +#endif + + /* Read shared info. */ + shared_info = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ|PROT_WRITE, shared_info_frame); + if (shared_info == NULL) { + ERROR("cannot map page"); + goto out; + } + if (!read_exact(io_fd, shared_info, PAGE_SIZE)) { + ERROR("Error when reading shared_info page"); + goto out; + } + + /* clear any pending events and the selector */ + memset(&(shared_info->evtchn_pending[0]), 0, + sizeof (shared_info->evtchn_pending)); + for (i = 0; i < MAX_VIRT_CPUS; i++) + shared_info->vcpu_info[i].evtchn_pending_sel = 0; + + +#if 1 + mfn = shared_info_frame - 3 ; +// mfn = page_array[shared_info->arch.start_info_pfn]; + DPRINTF("start_info mfn %llx \n", (unsigned long long) mfn); +#endif + + munmap (shared_info, PAGE_SIZE); + + for (i=0;i<max_pfn;i++) + if (page_array[i] == shared_info_frame) break; + if ( i >= max_pfn) + { + ERROR(); + goto out; + } + shared_info_pfn = (unsigned long) i; + + // rc = 0; goto out; //*** + + /* Uncanonicalise the suspend-record frame number and poke resume rec. */ + start_info = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ | PROT_WRITE, mfn); + start_info->nr_pages = max_pfn; + // DPRINTF("shared_info_mfn %llx \n", (unsigned long long)(shared_info_pfn << PAGE_SHIFT)); + // DPRINTF("start_info->shared_info%llx \n", (unsigned long long)start_info->shared_info); + start_info->shared_info = shared_info_pfn << PAGE_SHIFT; + start_info->flags = 0; + *store_mfn = page_array[start_info->store_mfn]; + start_info->store_evtchn = store_evtchn; + *console_mfn = page_array[start_info->console.domU.mfn]; + start_info->console.domU.evtchn = console_evtchn; + munmap(start_info, PAGE_SIZE); + + + /* + * Safety checking of saved context: + * 1. user_regs is fine, as Xen checks that on context switch. + * 2. fpu_ctxt is fine, as it can't hurt Xen. + * 3. trap_ctxt needs the code selectors checked. + * 4. ldt base must be page-aligned, no more than 8192 ents, ... + * 5. gdt already done, and further checking is done by Xen. + * 6. check that kernel_ss is safe. + * 7. pt_base is already done. + * 8. debugregs are checked by Xen. + * 9. callback code selectors need checking. + */ + DPRINTF("Domain ready to be built.\n"); + + rc = 0; + + out: + if ((rc != 0) && (dom != 0)) + xc_domain_destroy(xc_handle, dom); +#if 0 + else + { + DPRINTF("Domain puased: "); + xc_domain_pause(xc_handle,dom); + DPRINTF("Domain unpuased: "); + xc_domain_unpause(xc_handle,dom); + } + +#endif + free (page_array); + + DPRINTF("Restore exit with rc=%d\n", rc); + + return rc; +} diff -r 9148f7816d00 -r 42e39f025aed tools/libxc/powerpc64/xc_ppc_linux_save.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/libxc/powerpc64/xc_ppc_linux_save.c Thu Nov 02 15:45:50 2006 -0500 @@ -0,0 +1,432 @@ +/****************************************************************************** + * xc_ppc_linux_save.c + * + * Save the state of a running Linux session. + * + * Copyright (c) 2003, K A Fraser. + * Rewritten for ppc + */ + +#include <inttypes.h> +#include <time.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/time.h> +#include "htab.h" + +#include "xg_private.h" + +/* +** Default values for important tuning parameters. Can override by passing +** non-zero replacement values to xc_linux_save(). +** +** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too. +** +*/ +#define DEF_MAX_ITERS (4 - 1) /* limit us to 4 times round loop */ +#define DEF_MAX_FACTOR 3 /* never send more than 3x nr_pfns */ + +/* +** During (live) save/migrate, we maintain a number of bitmaps to track +** which pages we have to send, and to skip. +*/ + +#define DECOR 0x80000000 // indicates htab address +//#define DECOR 0x400000 // indicates htab address +#define LOG_PTE_SIZE 4 + +#define INVALID_MFN (~0ULL) + +#define BITS_PER_LONG (sizeof(unsigned long) * 8) + +#define BITMAP_ENTRY(_nr,_bmap) \ + ((unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG] + +#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG) + +static inline int test_bit (int nr, volatile void * addr) +{ + return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1; +} + +static inline void clear_bit (int nr, volatile void * addr) +{ + BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr)); +} + +static inline void set_bit ( int nr, volatile void * addr) +{ + BITMAP_ENTRY(nr, addr) |= (1UL << BITMAP_SHIFT(nr)); +} + +/* total number of pages used by the current guest */ +static unsigned long max_pfn; + + +static int +suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd, + int dom, xc_dominfo_t *info) +{ + int i = 0; + + DPRINTF("suspend_and_state()\n"); + if (!(*suspend)(dom)) { + ERROR("Suspend request failed"); + return -1; + } + +retry: + if (xc_domain_getinfo(xc_handle, dom, 1, info) != 1) { + ERROR("Could not get domain info"); + return -1; + } + + if (info->shutdown && info->shutdown_reason == SHUTDOWN_suspend) + return 0; // success + + if (info->paused) { + // try unpausing domain, wait, and retest + xc_domain_unpause(xc_handle, dom); + + ERROR("Domain was paused. Wait and re-test."); + usleep(10000); // 10ms + + goto retry; + } + + + if(++i < 100) { + ERROR("Retry suspend domain."); + usleep(10000); // 10ms + goto retry; + } + + ERROR("Unable to suspend domain."); + + return -1; +} + +static inline ssize_t +write_exact(int fd, void *buf, size_t count) +{ + if (write(fd, buf, count) != count) + return 0; + return 1; +} + +int +xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, + uint32_t max_factor, uint32_t flags, int (*suspend)(int)) +{ + DECLARE_DOMCTL; + xc_dominfo_t info; + + int rc = 1; + int debug = 0; + + /* The new domain's shared-info frame number. */ + unsigned long shared_info_frame; + + /* A copy of the CPU context of the guest. */ + vcpu_guest_context_t ctxt; + + xen_pfn_t *page_array = NULL; + + /* Live mapping of shared info structure */ + shared_info_t *live_shinfo = NULL; + + + DPRINTF("xc_linux_save (ppc): started dom=%d\n", dom); + + if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) { + ERROR("Could not get domain info"); + return 1; + } + + shared_info_frame = info.shared_info_frame; + + /* Map the shared info frame */ + live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ, shared_info_frame); + if (!live_shinfo) { + ERROR("Couldn't map live_shinfo"); + goto out; + } + + max_pfn = info.max_memkb >> (PAGE_SHIFT - 10); + + DPRINTF("max_pfn: 0x%08lx\n", max_pfn); + + page_array = malloc(max_pfn * sizeof(xen_pfn_t)); + if (page_array == NULL) { + ERROR("Could not allocate memory"); + goto out; + } + + /* This is expected by xm restore. */ + if (!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) { + ERROR("write: max_pfn"); + goto out; + } + + /* xc_linux_restore starts to read here. */ + /* Write a version number. This can avoid searching for a stupid bug + if the format change. + The version is hard-coded, don't forget to change the restore code + too! */ + { + unsigned long version = 1; + + if (!write_exact(io_fd, &version, sizeof(unsigned long))) { + ERROR("write: version"); + goto out; + } + } + +#if 0 + domctl.cmd = XEN_DOMCTL_arch_setup; + domctl.domain = (domid_t)dom; + domctl.u.arch_setup.flags = XEN_DOMAINSETUP_query; + if (xc_domctl(xc_handle, &domctl) < 0) { + ERROR("Could not get domain setup"); + goto out; + } + if (!write_exact(io_fd, &domctl.u.arch_setup, + sizeof(domctl.u.arch_setup))) { + ERROR("write: domain setup"); + goto out; + } +#endif + + /* This is a non-live suspend. Issue the call back to get the + domain suspended */ + + if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info)) { + ERROR("Domain appears not to have suspended"); + goto out; + } + + + { + char *mem; + xen_pfn_t pfn; + unsigned int total_sent = 0; + + if (xc_get_pfn_list(xc_handle, dom, + page_array, max_pfn) != max_pfn) { + ERROR("Could not get the page frame list"); + goto out; + } + + /* Start writing out the saved-domain record. */ + for (pfn = 0; pfn < max_pfn; pfn++){ + // for (pfn = 0; pfn < 16; pfn++){ + if (page_array[pfn] == INVALID_MFN) + continue; + + if (debug) + fprintf(stderr, "xc_linux_save: page %llx (%llu/%lu)\n", + page_array[pfn], pfn, max_pfn); + + // DPRINTF( "xc_linux_save: page %llx (%llu/%lu)\n", + // page_array[pfn], pfn, max_pfn); + mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ|PROT_WRITE, page_array[pfn]); + if (mem == NULL) { + /* The page may have move. + It will be remarked dirty. + FIXME: to be tracked. */ + fprintf(stderr, "cannot map page %llx: %s\n", + page_array[pfn], strerror (errno)); + continue; + } + else + { + if (pfn <8) + DPRINTF("memory addres %lx \n", (ulong)mem); + } + + if (!write_exact(io_fd, &pfn, sizeof(pfn))) { + ERROR("Error when writing to state file (4)"); + goto out; + } + + if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) { + ERROR("Error when writing to state file (5)"); + goto out; + } + munmap(mem, PAGE_SIZE); + total_sent++; + } + + DPRINTF("total_sent: %d\n", total_sent); + } + + fprintf (stderr, "All memory is saved\n"); + + /* terminate memory dump */ + { + xen_pfn_t pfn = INVALID_MFN; + if (!write_exact(io_fd, &pfn, sizeof(pfn))) { + ERROR("Error when writing to state file (6)"); + goto out; + } + } + + { + int i, k, num_ptes; + char *mem, *temp, *copy; + unsigned long N; + unsigned int total_sent = 0; + uint64_t htab_raddr; + ulong htab_mfn, htab_pages; + xen_pfn_t htab_rpn; + union pte *ppte; + + num_ptes = xc_get_shadow_list(xc_handle, dom, &htab_raddr); + if (num_ptes == -1){ + ERROR("Could not get the shadow list"); + goto out; + } + + temp = malloc(PAGE_SIZE * 2); + if (temp == NULL){ + ERROR("Could not allocate temp memory"); + goto out; + } + + copy = (char *)(((ulong)temp + (PAGE_SIZE - 1)) & (~(PAGE_SIZE-1))); + + htab_mfn = htab_raddr >> PAGE_SHIFT; + htab_pages = num_ptes / (PAGE_SIZE / (1UL << LOG_PTE_SIZE)); + // DPRINTF("htab_pages: 0x%08lx htab_addr: %llx htab_mfn %lx\n", htab_pages,htab_raddr, htab_mfn); + + if (!write_exact(io_fd, &num_ptes, sizeof(num_ptes))) { + ERROR("Error when writing to state file (4)"); + goto out; + } + /* Replace rpn with guest pfn, then write out htab, by page */ + for (N = 0; N < htab_pages; N++, htab_mfn++) { + mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ|PROT_WRITE, htab_mfn | DECOR); + if (mem == NULL){ + ERROR("Cannot map htab_mfn 0x%08lx: %s\n", + htab_mfn, strerror (errno)); + goto out; + } + //else DPRINTF("success map htab to guest: %lx \n", (unsigned long)mem); + + memcpy(copy, mem, PAGE_SIZE); + /*** TBD - improve search of page_array[] for htab_rpn ***/ + ppte = (union pte *)copy; + for (i = 0; i < (PAGE_SIZE / (1UL << LOG_PTE_SIZE)); i++, ppte++){ +// DPRINTF(" htab : i %d, ppte->vsid %llx, rpn %llx \n", i , (unsigned long long)ppte->words.vsid, (unsigned long long)ppte->words.rpn); + if (ppte->bits.v == 1){ // valid htab entry + htab_rpn = ppte->bits.rpn; + for (k = 0; k < max_pfn; k++){ + if (htab_rpn == page_array[k]) + break; + } + + if (k >= max_pfn){ + ERROR("htab_rpn: 0x%016llx not found in page_array[] %d", + htab_rpn, i); + ppte->bits.v = 0; + goto out; + } + + ppte->bits.rpn = k; + } else { // invalid htab entry + ppte->words.rpn = 0; + } + } + + if (write(io_fd, copy, PAGE_SIZE) != PAGE_SIZE) { + ERROR("Error when writing to state file (7)"); + goto out; + } + munmap(mem, PAGE_SIZE); + total_sent++; + } + + free(temp); + + DPRINTF("htab_sent: %d\n", total_sent); + } +#if 0 + /* terminate htab dump*/ + { + xen_pfn_t pfn = INVALID_MFN; + if (!write_exact(io_fd, &pfn, sizeof(pfn))) { + ERROR("Error when writing to state file (8)"); + goto out; + } + } +#endif + /*** TBD - save vcpu context for all vcpus ***/ + if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) { + ERROR("Could not get vcpu context"); + goto out; + } + + if (!write_exact(io_fd, &ctxt, sizeof(ctxt))) { + ERROR("Error when writing to state file (1)"); + goto out; + } + +#if 0 + mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ|PROT_WRITE, ctxt.privregs_pfn); + if (mem == NULL) { + ERROR("cannot map privreg page"); + goto out; + } + if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) { + ERROR("Error when writing privreg to state file (5)"); + goto out; + } + munmap(mem, PAGE_SIZE); +#endif + + if (!write_exact(io_fd, live_shinfo, PAGE_SIZE)) { + ERROR("Error when writing to state file (1)"); + goto out; + } + + /* Success! */ + rc = 0; + + + DPRINTF("Domain ready to be built.\n"); + + domctl.cmd = XEN_DOMCTL_setvcpucontext; + domctl.domain = (domid_t)dom; + domctl.u.vcpucontext.vcpu = 0; + set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt); + rc = xc_domctl(xc_handle, &domctl); + + if (rc != 0) { + ERROR("Couldn't build the domain"); + goto out; + } + + out: + + free(page_array); + if (live_shinfo) + munmap(live_shinfo, PAGE_SIZE); + + DPRINTF("Save exit rc=%d\n",rc); + + return !!rc; +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ _______________________________________________ Xen-ppc-devel mailing list Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-ppc-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |