[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 4/8] HVM save restore: vcpu context support
[PATCH 4/8] HVM save restore: vcpu context support Signed-off-by: Zhai Edwin <edwin.zhai@xxxxxxxxx> save/restore HVM vcpu context such as vmcs diff -r ee20d1905bde xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Thu Jan 11 16:40:55 2007 +0800 +++ b/xen/arch/x86/domain.c Thu Jan 11 16:46:59 2007 +0800 @@ -573,6 +573,7 @@ int arch_set_info_guest( else { hvm_load_cpu_guest_regs(v, &v->arch.guest_context.user_regs); + hvm_load_cpu_context(v, &v->arch.guest_context.hvmcpu_ctxt); } if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) ) diff -r ee20d1905bde xen/arch/x86/domctl.c --- a/xen/arch/x86/domctl.c Thu Jan 11 16:40:55 2007 +0800 +++ b/xen/arch/x86/domctl.c Thu Jan 11 16:49:34 2007 +0800 @@ -322,8 +322,10 @@ void arch_get_info_guest(struct vcpu *v, if ( is_hvm_vcpu(v) ) { - if ( !IS_COMPAT(v->domain) ) + if ( !IS_COMPAT(v->domain) ) { hvm_store_cpu_guest_regs(v, &c.nat->user_regs, c.nat->ctrlreg); + hvm_save_cpu_context(v, &c.nat->hvmcpu_ctxt); + } #ifdef CONFIG_COMPAT else { diff -r ee20d1905bde xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Thu Jan 11 16:40:55 2007 +0800 +++ b/xen/arch/x86/hvm/vmx/vmx.c Thu Jan 11 16:48:01 2007 +0800 @@ -429,6 +429,299 @@ static void vmx_store_cpu_guest_regs( vmx_vmcs_exit(v); } +static int __get_instruction_length(void); +int vmx_vmcs_save(struct vcpu *v, struct vmcs_data *c) +{ + unsigned long inst_len; + + inst_len = __get_instruction_length(); + c->eip = __vmread(GUEST_RIP); + +#ifdef HVM_DEBUG_SUSPEND + printk("vmx_vmcs_save: inst_len=0x%lx, eip=0x%"PRIx64".\n", + inst_len, c->eip); +#endif + + c->esp = __vmread(GUEST_RSP); + c->eflags = __vmread(GUEST_RFLAGS); + + c->cr0 = v->arch.hvm_vmx.cpu_shadow_cr0; + c->cr3 = v->arch.hvm_vmx.cpu_cr3; + c->cr4 = v->arch.hvm_vmx.cpu_shadow_cr4; + +#ifdef HVM_DEBUG_SUSPEND + printk("vmx_vmcs_save: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n", + c->cr3, + c->cr0, + c->cr4); +#endif + + c->idtr_limit = __vmread(GUEST_IDTR_LIMIT); + c->idtr_base = __vmread(GUEST_IDTR_BASE); + + c->gdtr_limit = __vmread(GUEST_GDTR_LIMIT); + c->gdtr_base = __vmread(GUEST_GDTR_BASE); + + c->cs_sel = __vmread(GUEST_CS_SELECTOR); + c->cs_limit = __vmread(GUEST_CS_LIMIT); + c->cs_base = __vmread(GUEST_CS_BASE); + c->cs_arbytes = __vmread(GUEST_CS_AR_BYTES); + + c->ds_sel = __vmread(GUEST_DS_SELECTOR); + c->ds_limit = __vmread(GUEST_DS_LIMIT); + c->ds_base = __vmread(GUEST_DS_BASE); + c->ds_arbytes = __vmread(GUEST_DS_AR_BYTES); + + c->es_sel = __vmread(GUEST_ES_SELECTOR); + c->es_limit = __vmread(GUEST_ES_LIMIT); + c->es_base = __vmread(GUEST_ES_BASE); + c->es_arbytes = __vmread(GUEST_ES_AR_BYTES); + + c->ss_sel = __vmread(GUEST_SS_SELECTOR); + c->ss_limit = __vmread(GUEST_SS_LIMIT); + c->ss_base = __vmread(GUEST_SS_BASE); + c->ss_arbytes = __vmread(GUEST_SS_AR_BYTES); + + c->fs_sel = __vmread(GUEST_FS_SELECTOR); + c->fs_limit = __vmread(GUEST_FS_LIMIT); + c->fs_base = __vmread(GUEST_FS_BASE); + c->fs_arbytes = __vmread(GUEST_FS_AR_BYTES); + + c->gs_sel = __vmread(GUEST_GS_SELECTOR); + c->gs_limit = __vmread(GUEST_GS_LIMIT); + c->gs_base = __vmread(GUEST_GS_BASE); + c->gs_arbytes = __vmread(GUEST_GS_AR_BYTES); + + c->tr_sel = __vmread(GUEST_TR_SELECTOR); + c->tr_limit = __vmread(GUEST_TR_LIMIT); + c->tr_base = __vmread(GUEST_TR_BASE); + c->tr_arbytes = __vmread(GUEST_TR_AR_BYTES); + + c->ldtr_sel = __vmread(GUEST_LDTR_SELECTOR); + c->ldtr_limit = __vmread(GUEST_LDTR_LIMIT); + c->ldtr_base = __vmread(GUEST_LDTR_BASE); + c->ldtr_arbytes = __vmread(GUEST_LDTR_AR_BYTES); + + c->sysenter_cs = __vmread(GUEST_SYSENTER_CS); + c->sysenter_esp = __vmread(GUEST_SYSENTER_ESP); + c->sysenter_eip = __vmread(GUEST_SYSENTER_EIP); + + return 1; +} + +int vmx_vmcs_restore(struct vcpu *v, struct vmcs_data *c) +{ + unsigned long mfn, old_base_mfn; + + vmx_vmcs_enter(v); + + __vmwrite(GUEST_RIP, c->eip); + __vmwrite(GUEST_RSP, c->esp); + __vmwrite(GUEST_RFLAGS, c->eflags); + + v->arch.hvm_vmx.cpu_shadow_cr0 = c->cr0; + __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0); + +#ifdef HVM_DEBUG_SUSPEND + printk("vmx_vmcs_restore: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n", + c->cr3, + c->cr0, + c->cr4); +#endif + + if (!vmx_paging_enabled(v)) { + printk("vmx_vmcs_restore: paging not enabled."); + goto skip_cr3; + } + + if (c->cr3 == v->arch.hvm_vmx.cpu_cr3) { + /* + * This is simple TLB flush, implying the guest has + * removed some translation or changed page attributes. + * We simply invalidate the shadow. + */ + mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT); + if (mfn != pagetable_get_pfn(v->arch.guest_table)) { + goto bad_cr3; + } + } else { + /* + * If different, make a shadow. Check if the PDBR is valid + * first. + */ + HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %"PRIx64"", c->cr3); + /* current!=vcpu as not called by arch_vmx_do_launch */ + mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT); + if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) { + goto bad_cr3; + } + old_base_mfn = pagetable_get_pfn(v->arch.guest_table); + v->arch.guest_table = pagetable_from_pfn(mfn); + if (old_base_mfn) + put_page(mfn_to_page(old_base_mfn)); + /* + * arch.shadow_table should now hold the next CR3 for shadow + */ + v->arch.hvm_vmx.cpu_cr3 = c->cr3; + } + + skip_cr3: +#if defined(__x86_64__) + if (vmx_long_mode_enabled(v)) { + unsigned long vm_entry_value; + vm_entry_value = __vmread(VM_ENTRY_CONTROLS); + vm_entry_value |= VM_ENTRY_IA32E_MODE; + __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value); + } +#endif + + __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK)); + v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4; + __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4); + + __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit); + __vmwrite(GUEST_IDTR_BASE, c->idtr_base); + + __vmwrite(GUEST_GDTR_LIMIT, c->gdtr_limit); + __vmwrite(GUEST_GDTR_BASE, c->gdtr_base); + + __vmwrite(GUEST_CS_SELECTOR, c->cs_sel); + __vmwrite(GUEST_CS_LIMIT, c->cs_limit); + __vmwrite(GUEST_CS_BASE, c->cs_base); + __vmwrite(GUEST_CS_AR_BYTES, c->cs_arbytes); + + __vmwrite(GUEST_DS_SELECTOR, c->ds_sel); + __vmwrite(GUEST_DS_LIMIT, c->ds_limit); + __vmwrite(GUEST_DS_BASE, c->ds_base); + __vmwrite(GUEST_DS_AR_BYTES, c->ds_arbytes); + + __vmwrite(GUEST_ES_SELECTOR, c->es_sel); + __vmwrite(GUEST_ES_LIMIT, c->es_limit); + __vmwrite(GUEST_ES_BASE, c->es_base); + __vmwrite(GUEST_ES_AR_BYTES, c->es_arbytes); + + __vmwrite(GUEST_SS_SELECTOR, c->ss_sel); + __vmwrite(GUEST_SS_LIMIT, c->ss_limit); + __vmwrite(GUEST_SS_BASE, c->ss_base); + __vmwrite(GUEST_SS_AR_BYTES, c->ss_arbytes); + + __vmwrite(GUEST_FS_SELECTOR, c->fs_sel); + __vmwrite(GUEST_FS_LIMIT, c->fs_limit); + __vmwrite(GUEST_FS_BASE, c->fs_base); + __vmwrite(GUEST_FS_AR_BYTES, c->fs_arbytes); + + __vmwrite(GUEST_GS_SELECTOR, c->gs_sel); + __vmwrite(GUEST_GS_LIMIT, c->gs_limit); + __vmwrite(GUEST_GS_BASE, c->gs_base); + __vmwrite(GUEST_GS_AR_BYTES, c->gs_arbytes); + + __vmwrite(GUEST_TR_SELECTOR, c->tr_sel); + __vmwrite(GUEST_TR_LIMIT, c->tr_limit); + __vmwrite(GUEST_TR_BASE, c->tr_base); + __vmwrite(GUEST_TR_AR_BYTES, c->tr_arbytes); + + __vmwrite(GUEST_LDTR_SELECTOR, c->ldtr_sel); + __vmwrite(GUEST_LDTR_LIMIT, c->ldtr_limit); + __vmwrite(GUEST_LDTR_BASE, c->ldtr_base); + __vmwrite(GUEST_LDTR_AR_BYTES, c->ldtr_arbytes); + + __vmwrite(GUEST_SYSENTER_CS, c->sysenter_cs); + __vmwrite(GUEST_SYSENTER_ESP, c->sysenter_esp); + __vmwrite(GUEST_SYSENTER_EIP, c->sysenter_eip); + + vmx_vmcs_exit(v); + + shadow_update_paging_modes(v); + return 0; + + bad_cr3: + gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"", c->cr3); + vmx_vmcs_exit(v); + return -EINVAL; +} + +#ifdef HVM_DEBUG_SUSPEND +static void dump_msr_state(struct vmx_msr_state *m) +{ + int i = 0; + printk("**** msr state ****\n"); + printk("shadow_gs=0x%lx, flags=0x%lx, msr_items:", m->shadow_gs, m->flags); + for (i = 0; i < VMX_MSR_COUNT; i++) + printk("0x%lx,", m->msrs[i]); + printk("\n"); +} +#else +static void dump_msr_state(struct vmx_msr_state *m) +{ +} +#endif + +void vmx_save_cpu_state(struct vcpu *v, struct hvmcpu_context *ctxt) +{ + struct vmcs_data *data = &ctxt->data; + struct vmx_msr_state *guest_state = &v->arch.hvm_vmx.msr_state; + unsigned long guest_flags = guest_state->flags; + int i = 0; + + data->shadow_gs = guest_state->shadow_gs; + data->vmxassist_enabled = v->arch.hvm_vmx.vmxassist_enabled; + /* save msrs */ + data->flags = guest_flags; + for (i = 0; i < VMX_MSR_COUNT; i++) + data->msr_items[i] = guest_state->msrs[i]; + + dump_msr_state(guest_state); +} + +void vmx_load_cpu_state(struct vcpu *v, struct hvmcpu_context *ctxt) +{ + int i = 0; + struct vmcs_data *data = &ctxt->data; + struct vmx_msr_state *guest_state = &v->arch.hvm_vmx.msr_state; + + /* restore msrs */ + guest_state->flags = data->flags; + for (i = 0; i < VMX_MSR_COUNT; i++) + guest_state->msrs[i] = data->msr_items[i]; + + guest_state->shadow_gs = data->shadow_gs; + + /*XXX:no need to restore msrs, current!=vcpu as not called by arch_vmx_do_launch */ +/* vmx_restore_guest_msrs(v);*/ + + v->arch.hvm_vmx.vmxassist_enabled = data->vmxassist_enabled; + + dump_msr_state(guest_state); +} + +void vmx_save_vmcs_ctxt(struct vcpu *v, struct hvmcpu_context *ctxt) +{ + struct vmcs_data *data = &ctxt->data; + + vmx_save_cpu_state(v, ctxt); + + vmx_vmcs_enter(v); + + vmx_vmcs_save(v, data); + + vmx_vmcs_exit(v); + +} + +void vmx_load_vmcs_ctxt(struct vcpu *v, struct hvmcpu_context *ctxt) +{ + vmx_load_cpu_state(v, ctxt); + + if (vmx_vmcs_restore(v, &ctxt->data)) { + printk("vmx_vmcs restore failed!\n"); + domain_crash(v->domain); + } + + /* only load vmcs once */ + ctxt->valid = 0; + +} + /* * The VMX spec (section 4.3.1.2, Checks on Guest Segment * Registers) says that virtual-8086 mode guests' segment @@ -750,6 +1043,9 @@ static void vmx_setup_hvm_funcs(void) hvm_funcs.store_cpu_guest_regs = vmx_store_cpu_guest_regs; hvm_funcs.load_cpu_guest_regs = vmx_load_cpu_guest_regs; + + hvm_funcs.save_cpu_ctxt = vmx_save_vmcs_ctxt; + hvm_funcs.load_cpu_ctxt = vmx_load_vmcs_ctxt; hvm_funcs.paging_enabled = vmx_paging_enabled; hvm_funcs.long_mode_enabled = vmx_long_mode_enabled; diff -r ee20d1905bde xen/include/asm-x86/hvm/hvm.h --- a/xen/include/asm-x86/hvm/hvm.h Thu Jan 11 16:40:55 2007 +0800 +++ b/xen/include/asm-x86/hvm/hvm.h Thu Jan 11 16:48:01 2007 +0800 @@ -79,6 +79,13 @@ struct hvm_function_table { struct vcpu *v, struct cpu_user_regs *r, unsigned long *crs); void (*load_cpu_guest_regs)( struct vcpu *v, struct cpu_user_regs *r); + + /* save and load hvm guest cpu context for save/restore */ + void (*save_cpu_ctxt)( + struct vcpu *v, struct hvmcpu_context *ctxt); + void (*load_cpu_ctxt)( + struct vcpu *v, struct hvmcpu_context *ctxt); + /* * Examine specifics of the guest state: * 1) determine whether paging is enabled, @@ -157,6 +164,35 @@ hvm_load_cpu_guest_regs(struct vcpu *v, hvm_funcs.load_cpu_guest_regs(v, r); } +void hvm_set_guest_time(struct vcpu *v, u64 gtime); +u64 hvm_get_guest_time(struct vcpu *v); + +static inline void +hvm_save_cpu_context( + struct vcpu *v, struct hvmcpu_context *ctxt) +{ + hvm_funcs.save_cpu_ctxt(v, ctxt); + + /* save guest time */ + ctxt->gtime = hvm_get_guest_time(v); + + /* set valid flag to recover whole vmcs when restore */ + ctxt->valid = 0x55885588; +} + +static inline void +hvm_load_cpu_context( + struct vcpu *v, struct hvmcpu_context *ctxt) +{ + if ( ctxt->valid != 0x55885588) + return; + + hvm_funcs.load_cpu_ctxt(v, ctxt); + + /* restore guest time*/ + hvm_set_guest_time(v, ctxt->gtime); +} + static inline int hvm_paging_enabled(struct vcpu *v) { @@ -222,8 +258,6 @@ void hvm_cpuid(unsigned int input, unsig void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx); void hvm_stts(struct vcpu *v); -void hvm_set_guest_time(struct vcpu *v, u64 gtime); -u64 hvm_get_guest_time(struct vcpu *v); void hvm_migrate_timers(struct vcpu *v); void hvm_do_resume(struct vcpu *v); diff -r ee20d1905bde xen/include/public/arch-x86/xen.h --- a/xen/include/public/arch-x86/xen.h Thu Jan 11 16:40:55 2007 +0800 +++ b/xen/include/public/arch-x86/xen.h Thu Jan 11 16:51:03 2007 +0800 @@ -107,6 +107,70 @@ DEFINE_XEN_GUEST_HANDLE(trap_info_t); DEFINE_XEN_GUEST_HANDLE(trap_info_t); typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ + +/* + * World vmcs state + */ +struct vmcs_data { + uint64_t eip; /* execution pointer */ + uint64_t esp; /* stack pointer */ + uint64_t eflags; /* flags register */ + uint64_t cr0; + uint64_t cr3; /* page table directory */ + uint64_t cr4; + uint32_t idtr_limit; /* idt */ + uint64_t idtr_base; + uint32_t gdtr_limit; /* gdt */ + uint64_t gdtr_base; + uint32_t cs_sel; /* cs selector */ + uint32_t cs_limit; + uint64_t cs_base; + uint32_t cs_arbytes; + uint32_t ds_sel; /* ds selector */ + uint32_t ds_limit; + uint64_t ds_base; + uint32_t ds_arbytes; + uint32_t es_sel; /* es selector */ + uint32_t es_limit; + uint64_t es_base; + uint32_t es_arbytes; + uint32_t ss_sel; /* ss selector */ + uint32_t ss_limit; + uint64_t ss_base; + uint32_t ss_arbytes; + uint32_t fs_sel; /* fs selector */ + uint32_t fs_limit; + uint64_t fs_base; + uint32_t fs_arbytes; + uint32_t gs_sel; /* gs selector */ + uint32_t gs_limit; + uint64_t gs_base; + uint32_t gs_arbytes; + uint32_t tr_sel; /* task selector */ + uint32_t tr_limit; + uint64_t tr_base; + uint32_t tr_arbytes; + uint32_t ldtr_sel; /* ldtr selector */ + uint32_t ldtr_limit; + uint64_t ldtr_base; + uint32_t ldtr_arbytes; + uint32_t sysenter_cs; + uint64_t sysenter_esp; + uint64_t sysenter_eip; + /* msr for em64t */ + uint64_t shadow_gs; + uint64_t flags; + /* same size as VMX_MSR_COUNT */ + uint64_t msr_items[6]; + uint64_t vmxassist_enabled; +}; +typedef struct vmcs_data vmcs_data_t; + +struct hvmcpu_context { + uint32_t valid; + struct vmcs_data data; + uint64_t gtime; +}; /* * The following is all CPU context. Note that the fpu_ctxt block is filled @@ -154,6 +218,7 @@ struct vcpu_guest_context { #endif #endif unsigned long vm_assist; /* VMASST_TYPE_* bitmap */ + struct hvmcpu_context hvmcpu_ctxt; /* whole vmcs region */ #ifdef __x86_64__ /* Segment base addresses. */ uint64_t fs_base; diff -r ee20d1905bde xen/include/xlat.lst --- a/xen/include/xlat.lst Thu Jan 11 16:40:55 2007 +0800 +++ b/xen/include/xlat.lst Thu Jan 11 16:51:35 2007 +0800 @@ -8,6 +8,8 @@ ? vcpu_time_info xen.h ! cpu_user_regs arch-x86/xen-@arch@.h ! trap_info arch-x86/xen.h +! hvmcpu_context arch-x86/xen.h +! vmcs_data arch-x86/xen.h ! vcpu_guest_context arch-x86/xen.h ? acm_getdecision acm_ops.h ! ctl_cpumap domctl.h _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |