[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] Nested VMX: Switch shadow/virtual VMCS between n1/n2 guests.
# HG changeset patch # User Eddie Dong <eddie.dong@xxxxxxxxx> # Date 1307607849 -28800 # Node ID c95338e40c50999e64053ccea0dcd07c23449269 # Parent b2b400ec30816e237d427e94c50fd8e169cbd943 Nested VMX: Switch shadow/virtual VMCS between n1/n2 guests. Signed-off-by: Qing He <qing.he@xxxxxxxxx> Signed-off-by: Eddie Dong <eddie.dong@xxxxxxxxx> Signed-off-by: Tim Deegan <Tim.Deegan@xxxxxxxxxx> Committed-by: Tim Deegan <Tim.Deegan@xxxxxxxxxx> --- diff -r b2b400ec3081 -r c95338e40c50 xen/arch/x86/hvm/vmx/entry.S --- a/xen/arch/x86/hvm/vmx/entry.S Thu Jun 09 16:24:09 2011 +0800 +++ b/xen/arch/x86/hvm/vmx/entry.S Thu Jun 09 16:24:09 2011 +0800 @@ -119,6 +119,7 @@ .globl vmx_asm_do_vmentry vmx_asm_do_vmentry: call vmx_intr_assist + call nvmx_switch_guest get_current(bx) cli diff -r b2b400ec3081 -r c95338e40c50 xen/arch/x86/hvm/vmx/vvmx.c --- a/xen/arch/x86/hvm/vmx/vvmx.c Thu Jun 09 16:24:09 2011 +0800 +++ b/xen/arch/x86/hvm/vmx/vvmx.c Thu Jun 09 16:24:09 2011 +0800 @@ -473,6 +473,41 @@ set_shadow_control(v, SECONDARY_VM_EXEC_CONTROL, value); } +static void nvmx_update_pin_control(struct vcpu *v, unsigned long host_cntrl) +{ + u32 shadow_cntrl; + struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); + + shadow_cntrl = __get_vvmcs(nvcpu->nv_vvmcx, PIN_BASED_VM_EXEC_CONTROL); + shadow_cntrl &= ~PIN_BASED_PREEMPT_TIMER; + shadow_cntrl |= host_cntrl; + __vmwrite(PIN_BASED_VM_EXEC_CONTROL, shadow_cntrl); +} + +static void nvmx_update_exit_control(struct vcpu *v, unsigned long host_cntrl) +{ + u32 shadow_cntrl; + struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); + + shadow_cntrl = __get_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_CONTROLS); + shadow_cntrl &= ~(VM_EXIT_SAVE_DEBUG_CNTRLS + | VM_EXIT_SAVE_GUEST_PAT + | VM_EXIT_SAVE_GUEST_EFER + | VM_EXIT_SAVE_PREEMPT_TIMER); + shadow_cntrl |= host_cntrl; + __vmwrite(VM_EXIT_CONTROLS, shadow_cntrl); +} + +static void nvmx_update_entry_control(struct vcpu *v) +{ + u32 shadow_cntrl; + struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); + + shadow_cntrl = __get_vvmcs(nvcpu->nv_vvmcx, VM_ENTRY_CONTROLS); + shadow_cntrl &= ~(VM_ENTRY_LOAD_GUEST_PAT | VM_ENTRY_LOAD_GUEST_EFER); + __vmwrite(VM_ENTRY_CONTROLS, shadow_cntrl); +} + void nvmx_update_exception_bitmap(struct vcpu *v, unsigned long value) { set_shadow_control(v, EXCEPTION_BITMAP, value); @@ -525,6 +560,348 @@ } /* + * Context synchronized between shadow and virtual VMCS. + */ +static unsigned long vmcs_gstate_field[] = { + /* 16 BITS */ + GUEST_ES_SELECTOR, + GUEST_CS_SELECTOR, + GUEST_SS_SELECTOR, + GUEST_DS_SELECTOR, + GUEST_FS_SELECTOR, + GUEST_GS_SELECTOR, + GUEST_LDTR_SELECTOR, + GUEST_TR_SELECTOR, + /* 64 BITS */ + VMCS_LINK_POINTER, + GUEST_IA32_DEBUGCTL, +#ifndef CONFIG_X86_64 + VMCS_LINK_POINTER_HIGH, + GUEST_IA32_DEBUGCTL_HIGH, +#endif + /* 32 BITS */ + GUEST_ES_LIMIT, + GUEST_CS_LIMIT, + GUEST_SS_LIMIT, + GUEST_DS_LIMIT, + GUEST_FS_LIMIT, + GUEST_GS_LIMIT, + GUEST_LDTR_LIMIT, + GUEST_TR_LIMIT, + GUEST_GDTR_LIMIT, + GUEST_IDTR_LIMIT, + GUEST_ES_AR_BYTES, + GUEST_CS_AR_BYTES, + GUEST_SS_AR_BYTES, + GUEST_DS_AR_BYTES, + GUEST_FS_AR_BYTES, + GUEST_GS_AR_BYTES, + GUEST_LDTR_AR_BYTES, + GUEST_TR_AR_BYTES, + GUEST_INTERRUPTIBILITY_INFO, + GUEST_ACTIVITY_STATE, + GUEST_SYSENTER_CS, + /* natural */ + GUEST_ES_BASE, + GUEST_CS_BASE, + GUEST_SS_BASE, + GUEST_DS_BASE, + GUEST_FS_BASE, + GUEST_GS_BASE, + GUEST_LDTR_BASE, + GUEST_TR_BASE, + GUEST_GDTR_BASE, + GUEST_IDTR_BASE, + GUEST_DR7, + /* + * Following guest states are in local cache (cpu_user_regs) + GUEST_RSP, + GUEST_RIP, + */ + GUEST_RFLAGS, + GUEST_PENDING_DBG_EXCEPTIONS, + GUEST_SYSENTER_ESP, + GUEST_SYSENTER_EIP, +}; + +/* + * Context: shadow -> virtual VMCS + */ +static unsigned long vmcs_ro_field[] = { + GUEST_PHYSICAL_ADDRESS, + VM_INSTRUCTION_ERROR, + VM_EXIT_REASON, + VM_EXIT_INTR_INFO, + VM_EXIT_INTR_ERROR_CODE, + IDT_VECTORING_INFO, + IDT_VECTORING_ERROR_CODE, + VM_EXIT_INSTRUCTION_LEN, + VMX_INSTRUCTION_INFO, + EXIT_QUALIFICATION, + GUEST_LINEAR_ADDRESS +}; + +static struct vmcs_host_to_guest { + unsigned long host_field; + unsigned long guest_field; +} vmcs_h2g_field[] = { + {HOST_ES_SELECTOR, GUEST_ES_SELECTOR}, + {HOST_CS_SELECTOR, GUEST_CS_SELECTOR}, + {HOST_SS_SELECTOR, GUEST_SS_SELECTOR}, + {HOST_DS_SELECTOR, GUEST_DS_SELECTOR}, + {HOST_FS_SELECTOR, GUEST_FS_SELECTOR}, + {HOST_GS_SELECTOR, GUEST_GS_SELECTOR}, + {HOST_TR_SELECTOR, GUEST_TR_SELECTOR}, + {HOST_SYSENTER_CS, GUEST_SYSENTER_CS}, + {HOST_FS_BASE, GUEST_FS_BASE}, + {HOST_GS_BASE, GUEST_GS_BASE}, + {HOST_TR_BASE, GUEST_TR_BASE}, + {HOST_GDTR_BASE, GUEST_GDTR_BASE}, + {HOST_IDTR_BASE, GUEST_IDTR_BASE}, + {HOST_SYSENTER_ESP, GUEST_SYSENTER_ESP}, + {HOST_SYSENTER_EIP, GUEST_SYSENTER_EIP}, +}; + +static void vvmcs_to_shadow(void *vvmcs, unsigned int field) +{ + u64 value; + + value = __get_vvmcs(vvmcs, field); + __vmwrite(field, value); +} + +static void shadow_to_vvmcs(void *vvmcs, unsigned int field) +{ + u64 value; + int rc; + + value = __vmread_safe(field, &rc); + if ( !rc ) + __set_vvmcs(vvmcs, field, value); +} + +static void load_shadow_control(struct vcpu *v) +{ + /* + * Set shadow controls: PIN_BASED, CPU_BASED, EXIT, ENTRY + * and EXCEPTION + * Enforce the removed features + */ + nvmx_update_pin_control(v, vmx_pin_based_exec_control); + vmx_update_cpu_exec_control(v); + nvmx_update_exit_control(v, vmx_vmexit_control); + nvmx_update_entry_control(v); + vmx_update_exception_bitmap(v); +} + +static void load_shadow_guest_state(struct vcpu *v) +{ + struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); + void *vvmcs = nvcpu->nv_vvmcx; + int i; + + /* vvmcs.gstate to shadow vmcs.gstate */ + for ( i = 0; i < ARRAY_SIZE(vmcs_gstate_field); i++ ) + vvmcs_to_shadow(vvmcs, vmcs_gstate_field[i]); + + hvm_set_cr0(__get_vvmcs(vvmcs, GUEST_CR0)); + hvm_set_cr4(__get_vvmcs(vvmcs, GUEST_CR4)); + hvm_set_cr3(__get_vvmcs(vvmcs, GUEST_CR3)); + + vvmcs_to_shadow(vvmcs, VM_ENTRY_INTR_INFO); + vvmcs_to_shadow(vvmcs, VM_ENTRY_EXCEPTION_ERROR_CODE); + vvmcs_to_shadow(vvmcs, VM_ENTRY_INSTRUCTION_LEN); + + vvmcs_to_shadow(vvmcs, CR0_READ_SHADOW); + vvmcs_to_shadow(vvmcs, CR4_READ_SHADOW); + vvmcs_to_shadow(vvmcs, CR0_GUEST_HOST_MASK); + vvmcs_to_shadow(vvmcs, CR4_GUEST_HOST_MASK); + + /* TODO: PDPTRs for nested ept */ + /* TODO: CR3 target control */ +} + +static void virtual_vmentry(struct cpu_user_regs *regs) +{ + struct vcpu *v = current; + struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); + void *vvmcs = nvcpu->nv_vvmcx; +#ifdef __x86_64__ + unsigned long lm_l1, lm_l2; +#endif + + vmx_vmcs_switch(v->arch.hvm_vmx.vmcs, nvcpu->nv_n2vmcx); + + nestedhvm_vcpu_enter_guestmode(v); + nvcpu->nv_vmentry_pending = 0; + nvcpu->nv_vmswitch_in_progress = 1; + +#ifdef __x86_64__ + /* + * EFER handling: + * hvm_set_efer won't work if CR0.PG = 1, so we change the value + * directly to make hvm_long_mode_enabled(v) work in L2. + * An additional update_paging_modes is also needed if + * there is 32/64 switch. v->arch.hvm_vcpu.guest_efer doesn't + * need to be saved, since its value on vmexit is determined by + * L1 exit_controls + */ + lm_l1 = !!hvm_long_mode_enabled(v); + lm_l2 = !!(__get_vvmcs(vvmcs, VM_ENTRY_CONTROLS) & + VM_ENTRY_IA32E_MODE); + + if ( lm_l2 ) + v->arch.hvm_vcpu.guest_efer |= EFER_LMA | EFER_LME; + else + v->arch.hvm_vcpu.guest_efer &= ~(EFER_LMA | EFER_LME); +#endif + + load_shadow_control(v); + load_shadow_guest_state(v); + +#ifdef __x86_64__ + if ( lm_l1 != lm_l2 ) + paging_update_paging_modes(v); +#endif + + regs->eip = __get_vvmcs(vvmcs, GUEST_RIP); + regs->esp = __get_vvmcs(vvmcs, GUEST_RSP); + regs->eflags = __get_vvmcs(vvmcs, GUEST_RFLAGS); + + /* TODO: EPT_POINTER */ +} + +static void sync_vvmcs_guest_state(struct vcpu *v, struct cpu_user_regs *regs) +{ + int i; + unsigned long mask; + unsigned long cr; + struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); + void *vvmcs = nvcpu->nv_vvmcx; + + /* copy shadow vmcs.gstate back to vvmcs.gstate */ + for ( i = 0; i < ARRAY_SIZE(vmcs_gstate_field); i++ ) + shadow_to_vvmcs(vvmcs, vmcs_gstate_field[i]); + /* RIP, RSP are in user regs */ + __set_vvmcs(vvmcs, GUEST_RIP, regs->eip); + __set_vvmcs(vvmcs, GUEST_RSP, regs->esp); + + /* SDM 20.6.6: L2 guest execution may change GUEST CR0/CR4 */ + mask = __get_vvmcs(vvmcs, CR0_GUEST_HOST_MASK); + if ( ~mask ) + { + cr = __get_vvmcs(vvmcs, GUEST_CR0); + cr = (cr & mask) | (__vmread(GUEST_CR0) & ~mask); + __set_vvmcs(vvmcs, GUEST_CR0, cr); + } + + mask = __get_vvmcs(vvmcs, CR4_GUEST_HOST_MASK); + if ( ~mask ) + { + cr = __get_vvmcs(vvmcs, GUEST_CR4); + cr = (cr & mask) | (__vmread(GUEST_CR4) & ~mask); + __set_vvmcs(vvmcs, GUEST_CR4, cr); + } + + /* CR3 sync if exec doesn't want cr3 load exiting: i.e. nested EPT */ + if ( !(__n2_exec_control(v) & CPU_BASED_CR3_LOAD_EXITING) ) + shadow_to_vvmcs(vvmcs, GUEST_CR3); +} + +static void sync_vvmcs_ro(struct vcpu *v) +{ + int i; + struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); + + for ( i = 0; i < ARRAY_SIZE(vmcs_ro_field); i++ ) + shadow_to_vvmcs(nvcpu->nv_vvmcx, vmcs_ro_field[i]); +} + +static void load_vvmcs_host_state(struct vcpu *v) +{ + int i; + u64 r; + void *vvmcs = vcpu_nestedhvm(v).nv_vvmcx; + + for ( i = 0; i < ARRAY_SIZE(vmcs_h2g_field); i++ ) + { + r = __get_vvmcs(vvmcs, vmcs_h2g_field[i].host_field); + __vmwrite(vmcs_h2g_field[i].guest_field, r); + } + + hvm_set_cr0(__get_vvmcs(vvmcs, HOST_CR0)); + hvm_set_cr4(__get_vvmcs(vvmcs, HOST_CR4)); + hvm_set_cr3(__get_vvmcs(vvmcs, HOST_CR3)); + + __set_vvmcs(vvmcs, VM_ENTRY_INTR_INFO, 0); +} + +static void virtual_vmexit(struct cpu_user_regs *regs) +{ + struct vcpu *v = current; + struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); +#ifdef __x86_64__ + unsigned long lm_l1, lm_l2; +#endif + + sync_vvmcs_ro(v); + sync_vvmcs_guest_state(v, regs); + + vmx_vmcs_switch(v->arch.hvm_vmx.vmcs, nvcpu->nv_n1vmcx); + + nestedhvm_vcpu_exit_guestmode(v); + nvcpu->nv_vmexit_pending = 0; + +#ifdef __x86_64__ + lm_l2 = !!hvm_long_mode_enabled(v); + lm_l1 = !!(__get_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_CONTROLS) & + VM_EXIT_IA32E_MODE); + + if ( lm_l1 ) + v->arch.hvm_vcpu.guest_efer |= EFER_LMA | EFER_LME; + else + v->arch.hvm_vcpu.guest_efer &= ~(EFER_LMA | EFER_LME); +#endif + + vmx_update_cpu_exec_control(v); + vmx_update_exception_bitmap(v); + + load_vvmcs_host_state(v); + +#ifdef __x86_64__ + if ( lm_l1 != lm_l2 ) + paging_update_paging_modes(v); +#endif + + regs->eip = __get_vvmcs(nvcpu->nv_vvmcx, HOST_RIP); + regs->esp = __get_vvmcs(nvcpu->nv_vvmcx, HOST_RSP); + regs->eflags = __vmread(GUEST_RFLAGS); + + vmreturn(regs, VMSUCCEED); +} + +asmlinkage void nvmx_switch_guest(void) +{ + struct vcpu *v = current; + struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); + struct cpu_user_regs *regs = guest_cpu_user_regs(); + + /* + * a softirq may interrupt us between a virtual vmentry is + * just handled and the true vmentry. If during this window, + * a L1 virtual interrupt causes another virtual vmexit, we + * cannot let that happen or VM_ENTRY_INTR_INFO will be lost. + */ + if ( unlikely(nvcpu->nv_vmswitch_in_progress) ) + return; + + if ( nestedhvm_vcpu_in_guestmode(v) && nvcpu->nv_vmexit_pending ) + virtual_vmexit(regs); + else if ( !nestedhvm_vcpu_in_guestmode(v) && nvcpu->nv_vmentry_pending ) + virtual_vmentry(regs); +} + +/* * VMX instructions handling */ diff -r b2b400ec3081 -r c95338e40c50 xen/include/asm-x86/hvm/vmx/vmcs.h --- a/xen/include/asm-x86/hvm/vmx/vmcs.h Thu Jun 09 16:24:09 2011 +0800 +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h Thu Jun 09 16:24:09 2011 +0800 @@ -160,6 +160,7 @@ #define PIN_BASED_PREEMPT_TIMER 0x00000040 extern u32 vmx_pin_based_exec_control; +#define VM_EXIT_SAVE_DEBUG_CNTRLS 0x00000004 #define VM_EXIT_IA32E_MODE 0x00000200 #define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 #define VM_EXIT_SAVE_GUEST_PAT 0x00040000 diff -r b2b400ec3081 -r c95338e40c50 xen/include/asm-x86/hvm/vmx/vvmx.h --- a/xen/include/asm-x86/hvm/vmx/vvmx.h Thu Jun 09 16:24:09 2011 +0800 +++ b/xen/include/asm-x86/hvm/vmx/vvmx.h Thu Jun 09 16:24:09 2011 +0800 @@ -165,6 +165,7 @@ void nvmx_update_secondary_exec_control(struct vcpu *v, unsigned long value); void nvmx_update_exception_bitmap(struct vcpu *v, unsigned long value); +asmlinkage void nvmx_switch_guest(void); #endif /* __ASM_X86_HVM_VVMX_H__ */ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |