[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 4/8] HVM save restore: vcpu context support



[PATCH 4/8] HVM save restore: vcpu context support

Signed-off-by: Zhai Edwin <edwin.zhai@xxxxxxxxx>

save/restore HVM vcpu context such as vmcs

diff -r ee20d1905bde xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Thu Jan 11 16:40:55 2007 +0800
+++ b/xen/arch/x86/domain.c     Thu Jan 11 16:46:59 2007 +0800
@@ -573,6 +573,7 @@ int arch_set_info_guest(
     else
     {
         hvm_load_cpu_guest_regs(v, &v->arch.guest_context.user_regs);
+        hvm_load_cpu_context(v, &v->arch.guest_context.hvmcpu_ctxt);
     }
 
     if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) )
diff -r ee20d1905bde xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c     Thu Jan 11 16:40:55 2007 +0800
+++ b/xen/arch/x86/domctl.c     Thu Jan 11 16:49:34 2007 +0800
@@ -322,8 +322,10 @@ void arch_get_info_guest(struct vcpu *v,
 
     if ( is_hvm_vcpu(v) )
     {
-        if ( !IS_COMPAT(v->domain) )
+        if ( !IS_COMPAT(v->domain) ) {
             hvm_store_cpu_guest_regs(v, &c.nat->user_regs, c.nat->ctrlreg);
+            hvm_save_cpu_context(v, &c.nat->hvmcpu_ctxt);
+        }
 #ifdef CONFIG_COMPAT
         else
         {
diff -r ee20d1905bde xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Thu Jan 11 16:40:55 2007 +0800
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Thu Jan 11 16:48:01 2007 +0800
@@ -429,6 +429,299 @@ static void vmx_store_cpu_guest_regs(
     vmx_vmcs_exit(v);
 }
 
+static int __get_instruction_length(void);
+int vmx_vmcs_save(struct vcpu *v, struct vmcs_data *c)
+{
+    unsigned long inst_len;
+
+    inst_len = __get_instruction_length();
+    c->eip = __vmread(GUEST_RIP);
+
+#ifdef HVM_DEBUG_SUSPEND
+    printk("vmx_vmcs_save: inst_len=0x%lx, eip=0x%"PRIx64".\n", 
+            inst_len, c->eip);
+#endif
+
+    c->esp = __vmread(GUEST_RSP);
+    c->eflags = __vmread(GUEST_RFLAGS);
+
+    c->cr0 = v->arch.hvm_vmx.cpu_shadow_cr0;
+    c->cr3 = v->arch.hvm_vmx.cpu_cr3;
+    c->cr4 = v->arch.hvm_vmx.cpu_shadow_cr4;
+
+#ifdef HVM_DEBUG_SUSPEND
+    printk("vmx_vmcs_save: cr3=0x%"PRIx64", cr0=0x%"PRIx64", 
cr4=0x%"PRIx64".\n",
+            c->cr3,
+            c->cr0,
+            c->cr4);
+#endif
+
+    c->idtr_limit = __vmread(GUEST_IDTR_LIMIT);
+    c->idtr_base = __vmread(GUEST_IDTR_BASE);
+
+    c->gdtr_limit = __vmread(GUEST_GDTR_LIMIT);
+    c->gdtr_base = __vmread(GUEST_GDTR_BASE);
+
+    c->cs_sel = __vmread(GUEST_CS_SELECTOR);
+    c->cs_limit = __vmread(GUEST_CS_LIMIT);
+    c->cs_base = __vmread(GUEST_CS_BASE);
+    c->cs_arbytes = __vmread(GUEST_CS_AR_BYTES);
+
+    c->ds_sel = __vmread(GUEST_DS_SELECTOR);
+    c->ds_limit = __vmread(GUEST_DS_LIMIT);
+    c->ds_base = __vmread(GUEST_DS_BASE);
+    c->ds_arbytes = __vmread(GUEST_DS_AR_BYTES);
+
+    c->es_sel = __vmread(GUEST_ES_SELECTOR);
+    c->es_limit = __vmread(GUEST_ES_LIMIT);
+    c->es_base = __vmread(GUEST_ES_BASE);
+    c->es_arbytes = __vmread(GUEST_ES_AR_BYTES);
+
+    c->ss_sel = __vmread(GUEST_SS_SELECTOR);
+    c->ss_limit = __vmread(GUEST_SS_LIMIT);
+    c->ss_base = __vmread(GUEST_SS_BASE);
+    c->ss_arbytes = __vmread(GUEST_SS_AR_BYTES);
+
+    c->fs_sel = __vmread(GUEST_FS_SELECTOR);
+    c->fs_limit = __vmread(GUEST_FS_LIMIT);
+    c->fs_base = __vmread(GUEST_FS_BASE);
+    c->fs_arbytes = __vmread(GUEST_FS_AR_BYTES);
+
+    c->gs_sel = __vmread(GUEST_GS_SELECTOR);
+    c->gs_limit = __vmread(GUEST_GS_LIMIT);
+    c->gs_base = __vmread(GUEST_GS_BASE);
+    c->gs_arbytes = __vmread(GUEST_GS_AR_BYTES);
+
+    c->tr_sel = __vmread(GUEST_TR_SELECTOR);
+    c->tr_limit = __vmread(GUEST_TR_LIMIT);
+    c->tr_base = __vmread(GUEST_TR_BASE);
+    c->tr_arbytes = __vmread(GUEST_TR_AR_BYTES);
+
+    c->ldtr_sel = __vmread(GUEST_LDTR_SELECTOR);
+    c->ldtr_limit = __vmread(GUEST_LDTR_LIMIT);
+    c->ldtr_base = __vmread(GUEST_LDTR_BASE);
+    c->ldtr_arbytes = __vmread(GUEST_LDTR_AR_BYTES);
+
+    c->sysenter_cs = __vmread(GUEST_SYSENTER_CS);
+    c->sysenter_esp = __vmread(GUEST_SYSENTER_ESP);
+    c->sysenter_eip = __vmread(GUEST_SYSENTER_EIP);
+
+    return 1;
+}
+
+int vmx_vmcs_restore(struct vcpu *v, struct vmcs_data *c)
+{
+    unsigned long mfn, old_base_mfn;
+
+    vmx_vmcs_enter(v);
+
+    __vmwrite(GUEST_RIP, c->eip);
+    __vmwrite(GUEST_RSP, c->esp);
+    __vmwrite(GUEST_RFLAGS, c->eflags);
+
+    v->arch.hvm_vmx.cpu_shadow_cr0 = c->cr0;
+    __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
+
+#ifdef HVM_DEBUG_SUSPEND
+    printk("vmx_vmcs_restore: cr3=0x%"PRIx64", cr0=0x%"PRIx64", 
cr4=0x%"PRIx64".\n",
+            c->cr3,
+            c->cr0,
+            c->cr4);
+#endif
+
+    if (!vmx_paging_enabled(v)) {
+        printk("vmx_vmcs_restore: paging not enabled.");
+        goto skip_cr3;
+    }
+
+    if (c->cr3 == v->arch.hvm_vmx.cpu_cr3) {
+        /*
+         * This is simple TLB flush, implying the guest has
+         * removed some translation or changed page attributes.
+         * We simply invalidate the shadow.
+         */
+        mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
+        if (mfn != pagetable_get_pfn(v->arch.guest_table)) {
+            goto bad_cr3;
+        }
+    } else {
+        /*
+         * If different, make a shadow. Check if the PDBR is valid
+         * first.
+         */
+        HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %"PRIx64"", c->cr3);
+        /* current!=vcpu as not called by arch_vmx_do_launch */
+        mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
+        if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) {
+            goto bad_cr3;
+        }
+        old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
+        v->arch.guest_table = pagetable_from_pfn(mfn);
+        if (old_base_mfn)
+             put_page(mfn_to_page(old_base_mfn));
+        /*
+         * arch.shadow_table should now hold the next CR3 for shadow
+         */
+        v->arch.hvm_vmx.cpu_cr3 = c->cr3;
+    }
+
+ skip_cr3:
+#if defined(__x86_64__)
+    if (vmx_long_mode_enabled(v)) {
+        unsigned long vm_entry_value;
+        vm_entry_value = __vmread(VM_ENTRY_CONTROLS);
+        vm_entry_value |= VM_ENTRY_IA32E_MODE;
+        __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
+    }
+#endif
+
+    __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK));
+    v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4;
+    __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
+
+    __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit);
+    __vmwrite(GUEST_IDTR_BASE, c->idtr_base);
+
+    __vmwrite(GUEST_GDTR_LIMIT, c->gdtr_limit);
+    __vmwrite(GUEST_GDTR_BASE, c->gdtr_base);
+
+    __vmwrite(GUEST_CS_SELECTOR, c->cs_sel);
+    __vmwrite(GUEST_CS_LIMIT, c->cs_limit);
+    __vmwrite(GUEST_CS_BASE, c->cs_base);
+    __vmwrite(GUEST_CS_AR_BYTES, c->cs_arbytes);
+
+    __vmwrite(GUEST_DS_SELECTOR, c->ds_sel);
+    __vmwrite(GUEST_DS_LIMIT, c->ds_limit);
+    __vmwrite(GUEST_DS_BASE, c->ds_base);
+    __vmwrite(GUEST_DS_AR_BYTES, c->ds_arbytes);
+
+    __vmwrite(GUEST_ES_SELECTOR, c->es_sel);
+    __vmwrite(GUEST_ES_LIMIT, c->es_limit);
+    __vmwrite(GUEST_ES_BASE, c->es_base);
+    __vmwrite(GUEST_ES_AR_BYTES, c->es_arbytes);
+
+    __vmwrite(GUEST_SS_SELECTOR, c->ss_sel);
+    __vmwrite(GUEST_SS_LIMIT, c->ss_limit);
+    __vmwrite(GUEST_SS_BASE, c->ss_base);
+    __vmwrite(GUEST_SS_AR_BYTES, c->ss_arbytes);
+
+    __vmwrite(GUEST_FS_SELECTOR, c->fs_sel);
+    __vmwrite(GUEST_FS_LIMIT, c->fs_limit);
+    __vmwrite(GUEST_FS_BASE, c->fs_base);
+    __vmwrite(GUEST_FS_AR_BYTES, c->fs_arbytes);
+
+    __vmwrite(GUEST_GS_SELECTOR, c->gs_sel);
+    __vmwrite(GUEST_GS_LIMIT, c->gs_limit);
+    __vmwrite(GUEST_GS_BASE, c->gs_base);
+    __vmwrite(GUEST_GS_AR_BYTES, c->gs_arbytes);
+
+    __vmwrite(GUEST_TR_SELECTOR, c->tr_sel);
+    __vmwrite(GUEST_TR_LIMIT, c->tr_limit);
+    __vmwrite(GUEST_TR_BASE, c->tr_base);
+    __vmwrite(GUEST_TR_AR_BYTES, c->tr_arbytes);
+
+    __vmwrite(GUEST_LDTR_SELECTOR, c->ldtr_sel);
+    __vmwrite(GUEST_LDTR_LIMIT, c->ldtr_limit);
+    __vmwrite(GUEST_LDTR_BASE, c->ldtr_base);
+    __vmwrite(GUEST_LDTR_AR_BYTES, c->ldtr_arbytes);
+
+    __vmwrite(GUEST_SYSENTER_CS, c->sysenter_cs);
+    __vmwrite(GUEST_SYSENTER_ESP, c->sysenter_esp);
+    __vmwrite(GUEST_SYSENTER_EIP, c->sysenter_eip);
+
+    vmx_vmcs_exit(v);
+
+    shadow_update_paging_modes(v);
+    return 0;
+
+ bad_cr3:
+    gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"", c->cr3);
+    vmx_vmcs_exit(v);
+    return -EINVAL;
+}
+
+#ifdef HVM_DEBUG_SUSPEND
+static void dump_msr_state(struct vmx_msr_state *m)
+{
+    int i = 0;
+    printk("**** msr state ****\n");
+    printk("shadow_gs=0x%lx, flags=0x%lx, msr_items:", m->shadow_gs, m->flags);
+    for (i = 0; i < VMX_MSR_COUNT; i++)
+        printk("0x%lx,", m->msrs[i]);
+    printk("\n");
+}
+#else
+static void dump_msr_state(struct vmx_msr_state *m)
+{
+}
+#endif
+        
+void vmx_save_cpu_state(struct vcpu *v, struct hvmcpu_context *ctxt)
+{
+    struct vmcs_data *data = &ctxt->data;
+    struct vmx_msr_state *guest_state = &v->arch.hvm_vmx.msr_state;
+    unsigned long guest_flags = guest_state->flags;
+    int i = 0;
+
+    data->shadow_gs = guest_state->shadow_gs;
+    data->vmxassist_enabled = v->arch.hvm_vmx.vmxassist_enabled;
+    /* save msrs */
+    data->flags = guest_flags;
+    for (i = 0; i < VMX_MSR_COUNT; i++)
+        data->msr_items[i] = guest_state->msrs[i];
+
+    dump_msr_state(guest_state);
+}
+
+void vmx_load_cpu_state(struct vcpu *v, struct hvmcpu_context *ctxt)
+{
+    int i = 0;
+    struct vmcs_data *data = &ctxt->data;
+    struct vmx_msr_state *guest_state = &v->arch.hvm_vmx.msr_state;
+
+    /* restore msrs */
+    guest_state->flags = data->flags;
+    for (i = 0; i < VMX_MSR_COUNT; i++)
+        guest_state->msrs[i] = data->msr_items[i];
+
+    guest_state->shadow_gs = data->shadow_gs;
+
+    /*XXX:no need to restore msrs, current!=vcpu as not called by 
arch_vmx_do_launch */
+/*    vmx_restore_guest_msrs(v);*/
+
+    v->arch.hvm_vmx.vmxassist_enabled = data->vmxassist_enabled;
+
+    dump_msr_state(guest_state);
+}
+
+void vmx_save_vmcs_ctxt(struct vcpu *v, struct hvmcpu_context *ctxt)
+{
+    struct vmcs_data *data = &ctxt->data;
+
+    vmx_save_cpu_state(v, ctxt);
+
+    vmx_vmcs_enter(v);
+
+    vmx_vmcs_save(v, data);
+
+    vmx_vmcs_exit(v);
+
+}
+
+void vmx_load_vmcs_ctxt(struct vcpu *v, struct hvmcpu_context *ctxt)
+{
+    vmx_load_cpu_state(v, ctxt);
+
+    if (vmx_vmcs_restore(v, &ctxt->data)) {
+        printk("vmx_vmcs restore failed!\n");
+        domain_crash(v->domain);
+    }
+
+    /* only load vmcs once */
+    ctxt->valid = 0;
+
+}
+
 /*
  * The VMX spec (section 4.3.1.2, Checks on Guest Segment
  * Registers) says that virtual-8086 mode guests' segment
@@ -750,6 +1043,9 @@ static void vmx_setup_hvm_funcs(void)
 
     hvm_funcs.store_cpu_guest_regs = vmx_store_cpu_guest_regs;
     hvm_funcs.load_cpu_guest_regs = vmx_load_cpu_guest_regs;
+
+    hvm_funcs.save_cpu_ctxt = vmx_save_vmcs_ctxt;
+    hvm_funcs.load_cpu_ctxt = vmx_load_vmcs_ctxt;
 
     hvm_funcs.paging_enabled = vmx_paging_enabled;
     hvm_funcs.long_mode_enabled = vmx_long_mode_enabled;
diff -r ee20d1905bde xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h     Thu Jan 11 16:40:55 2007 +0800
+++ b/xen/include/asm-x86/hvm/hvm.h     Thu Jan 11 16:48:01 2007 +0800
@@ -79,6 +79,13 @@ struct hvm_function_table {
         struct vcpu *v, struct cpu_user_regs *r, unsigned long *crs);
     void (*load_cpu_guest_regs)(
         struct vcpu *v, struct cpu_user_regs *r);
+
+    /* save and load hvm guest cpu context for save/restore */
+    void (*save_cpu_ctxt)(
+        struct vcpu *v, struct hvmcpu_context *ctxt);
+    void (*load_cpu_ctxt)(
+        struct vcpu *v, struct hvmcpu_context *ctxt);
+
     /*
      * Examine specifics of the guest state:
      * 1) determine whether paging is enabled,
@@ -157,6 +164,35 @@ hvm_load_cpu_guest_regs(struct vcpu *v, 
     hvm_funcs.load_cpu_guest_regs(v, r);
 }
 
+void hvm_set_guest_time(struct vcpu *v, u64 gtime);
+u64 hvm_get_guest_time(struct vcpu *v);
+
+static inline void
+hvm_save_cpu_context(
+        struct vcpu *v, struct hvmcpu_context *ctxt)
+{
+    hvm_funcs.save_cpu_ctxt(v, ctxt);
+
+    /* save guest time */
+    ctxt->gtime = hvm_get_guest_time(v);
+
+    /* set valid flag to recover whole vmcs when restore */
+    ctxt->valid = 0x55885588;
+}
+
+static inline void
+hvm_load_cpu_context(
+        struct vcpu *v, struct hvmcpu_context *ctxt)
+{
+    if ( ctxt->valid != 0x55885588)
+        return;
+
+    hvm_funcs.load_cpu_ctxt(v, ctxt);
+
+    /* restore guest time*/
+    hvm_set_guest_time(v, ctxt->gtime);
+}
+
 static inline int
 hvm_paging_enabled(struct vcpu *v)
 {
@@ -222,8 +258,6 @@ void hvm_cpuid(unsigned int input, unsig
 void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
                                    unsigned int *ecx, unsigned int *edx);
 void hvm_stts(struct vcpu *v);
-void hvm_set_guest_time(struct vcpu *v, u64 gtime);
-u64 hvm_get_guest_time(struct vcpu *v);
 void hvm_migrate_timers(struct vcpu *v);
 void hvm_do_resume(struct vcpu *v);
 
diff -r ee20d1905bde xen/include/public/arch-x86/xen.h
--- a/xen/include/public/arch-x86/xen.h Thu Jan 11 16:40:55 2007 +0800
+++ b/xen/include/public/arch-x86/xen.h Thu Jan 11 16:51:03 2007 +0800
@@ -107,6 +107,70 @@ DEFINE_XEN_GUEST_HANDLE(trap_info_t);
 DEFINE_XEN_GUEST_HANDLE(trap_info_t);
 
 typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */
+
+/*
+ * World vmcs state
+ */
+struct vmcs_data {
+    uint64_t  eip;        /* execution pointer */
+    uint64_t  esp;        /* stack pointer */
+    uint64_t  eflags;     /* flags register */
+    uint64_t  cr0;
+    uint64_t  cr3;        /* page table directory */
+    uint64_t  cr4;
+    uint32_t  idtr_limit; /* idt */
+    uint64_t  idtr_base;
+    uint32_t  gdtr_limit; /* gdt */
+    uint64_t  gdtr_base;
+    uint32_t  cs_sel;     /* cs selector */
+    uint32_t  cs_limit;
+    uint64_t  cs_base;
+    uint32_t  cs_arbytes;
+    uint32_t  ds_sel;     /* ds selector */
+    uint32_t  ds_limit;
+    uint64_t  ds_base;
+    uint32_t  ds_arbytes;
+    uint32_t  es_sel;     /* es selector */
+    uint32_t  es_limit;
+    uint64_t  es_base;
+    uint32_t  es_arbytes;
+    uint32_t  ss_sel;     /* ss selector */
+    uint32_t  ss_limit;
+    uint64_t  ss_base;
+    uint32_t  ss_arbytes;
+    uint32_t  fs_sel;     /* fs selector */
+    uint32_t  fs_limit;
+    uint64_t  fs_base;
+    uint32_t  fs_arbytes;
+    uint32_t  gs_sel;     /* gs selector */
+    uint32_t  gs_limit;
+    uint64_t  gs_base;
+    uint32_t  gs_arbytes;
+    uint32_t  tr_sel;     /* task selector */
+    uint32_t  tr_limit;
+    uint64_t  tr_base;
+    uint32_t  tr_arbytes;
+    uint32_t  ldtr_sel;   /* ldtr selector */
+    uint32_t  ldtr_limit;
+    uint64_t  ldtr_base;
+    uint32_t  ldtr_arbytes;
+    uint32_t  sysenter_cs;
+    uint64_t  sysenter_esp;
+    uint64_t  sysenter_eip;
+    /* msr for em64t */
+    uint64_t shadow_gs;
+    uint64_t flags;
+    /* same size as VMX_MSR_COUNT */
+    uint64_t msr_items[6];
+    uint64_t vmxassist_enabled;
+};
+typedef struct vmcs_data vmcs_data_t;
+
+struct hvmcpu_context {
+    uint32_t valid;
+    struct vmcs_data data;
+    uint64_t gtime;
+};
 
 /*
  * The following is all CPU context. Note that the fpu_ctxt block is filled 
@@ -154,6 +218,7 @@ struct vcpu_guest_context {
 #endif
 #endif
     unsigned long vm_assist;                /* VMASST_TYPE_* bitmap */
+    struct hvmcpu_context hvmcpu_ctxt;      /* whole vmcs region */
 #ifdef __x86_64__
     /* Segment base addresses. */
     uint64_t      fs_base;
diff -r ee20d1905bde xen/include/xlat.lst
--- a/xen/include/xlat.lst      Thu Jan 11 16:40:55 2007 +0800
+++ b/xen/include/xlat.lst      Thu Jan 11 16:51:35 2007 +0800
@@ -8,6 +8,8 @@
 ?      vcpu_time_info                  xen.h
 !      cpu_user_regs                   arch-x86/xen-@arch@.h
 !      trap_info                       arch-x86/xen.h
+!      hvmcpu_context                  arch-x86/xen.h
+!      vmcs_data                       arch-x86/xen.h
 !      vcpu_guest_context              arch-x86/xen.h
 ?      acm_getdecision                 acm_ops.h
 !      ctl_cpumap                      domctl.h

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.