[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] Nested VMX: interrupt/exception handling for n2 guest
# HG changeset patch # User Eddie Dong <eddie.dong@xxxxxxxxx> # Date 1307607849 -28800 # Node ID 3ded99964cdf2a9939f5e938ae110ee67e40412a # Parent c95338e40c50999e64053ccea0dcd07c23449269 Nested VMX: interrupt/exception handling for n2 guest Signed-off-by: Qing He <qing.he@xxxxxxxxx> Signed-off-by: Eddie Dong <eddie.dong@xxxxxxxxx> Acked-by: Tim Deegan <Tim.Deegan@xxxxxxxxxx> Committed-by: Tim Deegan <Tim.Deegan@xxxxxxxxxx> --- diff -r c95338e40c50 -r 3ded99964cdf xen/arch/x86/hvm/vmx/intr.c --- a/xen/arch/x86/hvm/vmx/intr.c Thu Jun 09 16:24:09 2011 +0800 +++ b/xen/arch/x86/hvm/vmx/intr.c Thu Jun 09 16:24:09 2011 +0800 @@ -35,6 +35,7 @@ #include <asm/hvm/vmx/vmcs.h> #include <asm/hvm/vpic.h> #include <asm/hvm/vlapic.h> +#include <asm/hvm/nestedhvm.h> #include <public/hvm/ioreq.h> #include <asm/hvm/trace.h> @@ -109,6 +110,96 @@ } } +/* + * Injecting interrupts for nested virtualization + * + * When injecting virtual interrupts (originated from L0), there are + * two major possibilities, within L1 context and within L2 context + * 1. L1 context (in_nesting == 0) + * Everything is the same as without nested, check RFLAGS.IF to + * see if the injection can be done, using VMCS to inject the + * interrupt + * + * 2. L2 context (in_nesting == 1) + * Causes a virtual VMExit, RFLAGS.IF is ignored, whether to ack + * irq according to intr_ack_on_exit, shouldn't block normally, + * except for: + * a. context transition + * interrupt needs to be blocked at virtual VMEntry time + * b. L2 idtv reinjection + * if L2 idtv is handled within L0 (e.g. L0 shadow page fault), + * it needs to be reinjected without exiting to L1, interrupt + * injection should be blocked as well at this point. + * + * Unfortunately, interrupt blocking in L2 won't work with simple + * intr_window_open (which depends on L2's IF). To solve this, + * the following algorithm can be used: + * v->arch.hvm_vmx.exec_control.VIRTUAL_INTR_PENDING now denotes + * only L0 control, physical control may be different from it. + * - if in L1, it behaves normally, intr window is written + * to physical control as it is + * - if in L2, replace it to MTF (or NMI window) if possible + * - if MTF/NMI window is not used, intr window can still be + * used but may have negative impact on interrupt performance. + */ + +enum hvm_intblk nvmx_intr_blocked(struct vcpu *v) +{ + int r = hvm_intblk_none; + struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); + + if ( nestedhvm_vcpu_in_guestmode(v) ) + { + if ( nvcpu->nv_vmexit_pending || + nvcpu->nv_vmswitch_in_progress || + (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK) ) + r = hvm_intblk_rflags_ie; + } + else if ( nvcpu->nv_vmentry_pending ) + r = hvm_intblk_rflags_ie; + + return r; +} + +static int nvmx_intr_intercept(struct vcpu *v, struct hvm_intack intack) +{ + u32 exit_ctrl; + + if ( nvmx_intr_blocked(v) != hvm_intblk_none ) + { + enable_intr_window(v, intack); + return 1; + } + + if ( nestedhvm_vcpu_in_guestmode(v) ) + { + if ( intack.source == hvm_intsrc_pic || + intack.source == hvm_intsrc_lapic ) + { + vmx_inject_extint(intack.vector); + + exit_ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, + VM_EXIT_CONTROLS); + if ( exit_ctrl & VM_EXIT_ACK_INTR_ON_EXIT ) + { + /* for now, duplicate the ack path in vmx_intr_assist */ + hvm_vcpu_ack_pending_irq(v, intack); + pt_intr_post(v, intack); + + intack = hvm_vcpu_has_pending_irq(v); + if ( unlikely(intack.source != hvm_intsrc_none) ) + enable_intr_window(v, intack); + } + else + enable_intr_window(v, intack); + + return 1; + } + } + + return 0; +} + asmlinkage void vmx_intr_assist(void) { struct hvm_intack intack; @@ -132,6 +223,9 @@ if ( likely(intack.source == hvm_intsrc_none) ) goto out; + if ( unlikely(nvmx_intr_intercept(v, intack)) ) + goto out; + intblk = hvm_interrupt_blocked(v, intack); if ( intblk == hvm_intblk_tpr ) { diff -r c95338e40c50 -r 3ded99964cdf xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Thu Jun 09 16:24:09 2011 +0800 +++ b/xen/arch/x86/hvm/vmx/vmx.c Thu Jun 09 16:24:09 2011 +0800 @@ -1240,6 +1240,31 @@ __ept_sync_domain, d, 1); } +void nvmx_enqueue_n2_exceptions(struct vcpu *v, + unsigned long intr_fields, int error_code) +{ + struct nestedvmx *nvmx = &vcpu_2_nvmx(v); + + if ( !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK) ) { + /* enqueue the exception till the VMCS switch back to L1 */ + nvmx->intr.intr_info = intr_fields; + nvmx->intr.error_code = error_code; + vcpu_nestedhvm(v).nv_vmexit_pending = 1; + return; + } + else + gdprintk(XENLOG_ERR, "Double Fault on Nested Guest: exception %lx %x" + "on %lx %x\n", intr_fields, error_code, + nvmx->intr.intr_info, nvmx->intr.error_code); +} + +static int nvmx_vmexit_exceptions(struct vcpu *v, unsigned int trapnr, + int errcode, unsigned long cr2) +{ + nvmx_enqueue_n2_exceptions(v, trapnr, errcode); + return NESTEDHVM_VMEXIT_DONE; +} + static void __vmx_inject_exception(int trap, int type, int error_code) { unsigned long intr_fields; @@ -1269,11 +1294,16 @@ void vmx_inject_hw_exception(int trap, int error_code) { - unsigned long intr_info = __vmread(VM_ENTRY_INTR_INFO); + unsigned long intr_info; struct vcpu *curr = current; int type = X86_EVENTTYPE_HW_EXCEPTION; + if ( nestedhvm_vcpu_in_guestmode(curr) ) + intr_info = vcpu_2_nvmx(curr).intr.intr_info; + else + intr_info = __vmread(VM_ENTRY_INTR_INFO); + switch ( trap ) { case TRAP_debug: @@ -1305,7 +1335,16 @@ error_code = 0; } - __vmx_inject_exception(trap, type, error_code); + if ( nestedhvm_vcpu_in_guestmode(curr) && + nvmx_intercepts_exception(curr, trap, error_code) ) + { + nvmx_enqueue_n2_exceptions (curr, + INTR_INFO_VALID_MASK | (type<<8) | trap, + error_code); + return; + } + else + __vmx_inject_exception(trap, type, error_code); if ( trap == TRAP_page_fault ) HVMTRACE_LONG_2D(PF_INJECT, error_code, @@ -1316,12 +1355,38 @@ void vmx_inject_extint(int trap) { + struct vcpu *v = current; + u32 pin_based_cntrl; + + if ( nestedhvm_vcpu_in_guestmode(v) ) { + pin_based_cntrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, + PIN_BASED_VM_EXEC_CONTROL); + if ( pin_based_cntrl && PIN_BASED_EXT_INTR_MASK ) { + nvmx_enqueue_n2_exceptions (v, + INTR_INFO_VALID_MASK | (X86_EVENTTYPE_EXT_INTR<<8) | trap, + HVM_DELIVER_NO_ERROR_CODE); + return; + } + } __vmx_inject_exception(trap, X86_EVENTTYPE_EXT_INTR, HVM_DELIVER_NO_ERROR_CODE); } void vmx_inject_nmi(void) { + struct vcpu *v = current; + u32 pin_based_cntrl; + + if ( nestedhvm_vcpu_in_guestmode(v) ) { + pin_based_cntrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, + PIN_BASED_VM_EXEC_CONTROL); + if ( pin_based_cntrl && PIN_BASED_NMI_EXITING ) { + nvmx_enqueue_n2_exceptions (v, + INTR_INFO_VALID_MASK | (X86_EVENTTYPE_NMI<<8) | TRAP_nmi, + HVM_DELIVER_NO_ERROR_CODE); + return; + } + } __vmx_inject_exception(2, X86_EVENTTYPE_NMI, HVM_DELIVER_NO_ERROR_CODE); } @@ -1421,7 +1486,10 @@ .nhvm_vcpu_reset = nvmx_vcpu_reset, .nhvm_vcpu_guestcr3 = nvmx_vcpu_guestcr3, .nhvm_vcpu_hostcr3 = nvmx_vcpu_hostcr3, - .nhvm_vcpu_asid = nvmx_vcpu_asid + .nhvm_vcpu_asid = nvmx_vcpu_asid, + .nhvm_vmcx_guest_intercepts_trap = nvmx_intercepts_exception, + .nhvm_vcpu_vmexit_trap = nvmx_vmexit_exceptions, + .nhvm_intr_blocked = nvmx_intr_blocked }; struct hvm_function_table * __init start_vmx(void) @@ -2232,7 +2300,8 @@ hvm_maybe_deassert_evtchn_irq(); idtv_info = __vmread(IDT_VECTORING_INFO); - if ( exit_reason != EXIT_REASON_TASK_SWITCH ) + if ( !nestedhvm_vcpu_in_guestmode(v) && + exit_reason != EXIT_REASON_TASK_SWITCH ) vmx_idtv_reinject(idtv_info); switch ( exit_reason ) @@ -2584,6 +2653,9 @@ domain_crash(v->domain); break; } + + if ( nestedhvm_vcpu_in_guestmode(v) ) + nvmx_idtv_handling(); } asmlinkage void vmx_vmenter_helper(void) diff -r c95338e40c50 -r 3ded99964cdf xen/arch/x86/hvm/vmx/vvmx.c --- a/xen/arch/x86/hvm/vmx/vvmx.c Thu Jun 09 16:24:09 2011 +0800 +++ b/xen/arch/x86/hvm/vmx/vvmx.c Thu Jun 09 16:24:09 2011 +0800 @@ -393,6 +393,27 @@ regs->eflags = eflags; } +int nvmx_intercepts_exception(struct vcpu *v, unsigned int trap, + int error_code) +{ + struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); + u32 exception_bitmap, pfec_match=0, pfec_mask=0; + int r; + + ASSERT ( trap < 32 ); + + exception_bitmap = __get_vvmcs(nvcpu->nv_vvmcx, EXCEPTION_BITMAP); + r = exception_bitmap & (1 << trap) ? 1: 0; + + if ( trap == TRAP_page_fault ) { + pfec_match = __get_vvmcs(nvcpu->nv_vvmcx, PAGE_FAULT_ERROR_CODE_MATCH); + pfec_mask = __get_vvmcs(nvcpu->nv_vvmcx, PAGE_FAULT_ERROR_CODE_MASK); + if ( (error_code & pfec_mask) != pfec_match ) + r = !r; + } + return r; +} + /* * Nested VMX uses "strict" condition to exit from * L2 guest if either L1 VMM or L0 VMM expect to exit. @@ -464,6 +485,7 @@ __vmwrite(IO_BITMAP_B, virt_to_maddr(bitmap) + PAGE_SIZE); } + /* TODO: change L0 intr window to MTF or NMI window */ __vmwrite(CPU_BASED_VM_EXEC_CONTROL, shadow_cntrl); } @@ -836,6 +858,42 @@ __set_vvmcs(vvmcs, VM_ENTRY_INTR_INFO, 0); } +static void sync_exception_state(struct vcpu *v) +{ + struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); + struct nestedvmx *nvmx = &vcpu_2_nvmx(v); + + if ( !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK) ) + return; + + switch ( (nvmx->intr.intr_info & INTR_INFO_INTR_TYPE_MASK) >> 8 ) + { + case X86_EVENTTYPE_EXT_INTR: + /* rename exit_reason to EXTERNAL_INTERRUPT */ + __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_REASON, + EXIT_REASON_EXTERNAL_INTERRUPT); + __set_vvmcs(nvcpu->nv_vvmcx, EXIT_QUALIFICATION, 0); + __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_INFO, + nvmx->intr.intr_info); + break; + + case X86_EVENTTYPE_HW_EXCEPTION: + case X86_EVENTTYPE_SW_INTERRUPT: + case X86_EVENTTYPE_SW_EXCEPTION: + /* throw to L1 */ + __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_INFO, + nvmx->intr.intr_info); + __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_ERROR_CODE, + nvmx->intr.error_code); + break; + case X86_EVENTTYPE_NMI: + default: + gdprintk(XENLOG_ERR, "Exception state %lx not handled\n", + nvmx->intr.intr_info); + break; + } +} + static void virtual_vmexit(struct cpu_user_regs *regs) { struct vcpu *v = current; @@ -846,6 +904,7 @@ sync_vvmcs_ro(v); sync_vvmcs_guest_state(v, regs); + sync_exception_state(v); vmx_vmcs_switch(v->arch.hvm_vmx.vmcs, nvcpu->nv_n1vmcx); @@ -1158,3 +1217,38 @@ return X86EMUL_OKAY; } +void nvmx_idtv_handling(void) +{ + struct vcpu *v = current; + struct nestedvmx *nvmx = &vcpu_2_nvmx(v); + struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); + unsigned int idtv_info = __vmread(IDT_VECTORING_INFO); + + if ( likely(!(idtv_info & INTR_INFO_VALID_MASK)) ) + return; + + /* + * If L0 can solve the fault that causes idt vectoring, it should + * be reinjected, otherwise, pass to L1. + */ + if ( (__vmread(VM_EXIT_REASON) != EXIT_REASON_EPT_VIOLATION && + !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK)) || + (__vmread(VM_EXIT_REASON) == EXIT_REASON_EPT_VIOLATION && + !nvcpu->nv_vmexit_pending) ) + { + __vmwrite(VM_ENTRY_INTR_INFO, idtv_info & ~INTR_INFO_RESVD_BITS_MASK); + if ( idtv_info & INTR_INFO_DELIVER_CODE_MASK ) + __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, + __vmread(IDT_VECTORING_ERROR_CODE)); + /* + * SDM 23.2.4, if L1 tries to inject a software interrupt + * and the delivery fails, VM_EXIT_INSTRUCTION_LEN receives + * the value of previous VM_ENTRY_INSTRUCTION_LEN. + * + * This means EXIT_INSTRUCTION_LEN is always valid here, for + * software interrupts both injected by L1, and generated in L2. + */ + __vmwrite(VM_ENTRY_INSTRUCTION_LEN, __vmread(VM_EXIT_INSTRUCTION_LEN)); + } +} + diff -r c95338e40c50 -r 3ded99964cdf xen/include/asm-x86/hvm/vmx/vvmx.h --- a/xen/include/asm-x86/hvm/vmx/vvmx.h Thu Jun 09 16:24:09 2011 +0800 +++ b/xen/include/asm-x86/hvm/vmx/vvmx.h Thu Jun 09 16:24:09 2011 +0800 @@ -93,6 +93,9 @@ uint64_t nvmx_vcpu_guestcr3(struct vcpu *v); uint64_t nvmx_vcpu_hostcr3(struct vcpu *v); uint32_t nvmx_vcpu_asid(struct vcpu *v); +enum hvm_intblk nvmx_intr_blocked(struct vcpu *v); +int nvmx_intercepts_exception(struct vcpu *v, + unsigned int trap, int error_code); int nvmx_handle_vmxon(struct cpu_user_regs *regs); int nvmx_handle_vmxoff(struct cpu_user_regs *regs); @@ -166,6 +169,7 @@ unsigned long value); void nvmx_update_exception_bitmap(struct vcpu *v, unsigned long value); asmlinkage void nvmx_switch_guest(void); +void nvmx_idtv_handling(void); #endif /* __ASM_X86_HVM_VVMX_H__ */ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |