[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 16 of 20] interrupt/exception handling for n2 guest



# HG changeset patch
# User Eddie Dong <eddie.dong@xxxxxxxxx>
# Date 1307003601 -28800
# Node ID f14f451a780e60e920c057e44fa1bc3ee40495a7
# Parent  bd15acfc9b822ccf27b5c7603e600e5e11733907
interrupt/exception handling for n2 guest

Signed-off-by: Qing He <qing.he@xxxxxxxxx>
Signed-off-by: Eddie Dong <eddie.dong@xxxxxxxxx>

diff -r bd15acfc9b82 -r f14f451a780e xen/arch/x86/hvm/vmx/intr.c
--- a/xen/arch/x86/hvm/vmx/intr.c       Thu Jun 02 16:33:21 2011 +0800
+++ b/xen/arch/x86/hvm/vmx/intr.c       Thu Jun 02 16:33:21 2011 +0800
@@ -35,6 +35,7 @@
 #include <asm/hvm/vmx/vmcs.h>
 #include <asm/hvm/vpic.h>
 #include <asm/hvm/vlapic.h>
+#include <asm/hvm/nestedhvm.h>
 #include <public/hvm/ioreq.h>
 #include <asm/hvm/trace.h>
 
@@ -109,6 +110,102 @@ static void enable_intr_window(struct vc
     }
 }
 
+/*
+ * Injecting interrupts for nested virtualization
+ *
+ *  When injecting virtual interrupts (originated from L0), there are
+ *  two major possibilities, within L1 context and within L2 context
+ *   1. L1 context (in_nesting == 0)
+ *     Everything is the same as without nested, check RFLAGS.IF to
+ *     see if the injection can be done, using VMCS to inject the
+ *     interrupt
+ *
+ *   2. L2 context (in_nesting == 1)
+ *     Causes a virtual VMExit, RFLAGS.IF is ignored, whether to ack
+ *     irq according to intr_ack_on_exit, shouldn't block normally,
+ *     except for:
+ *    a. context transition
+ *     interrupt needs to be blocked at virtual VMEntry time
+ *    b. L2 idtv reinjection
+ *     if L2 idtv is handled within L0 (e.g. L0 shadow page fault),
+ *     it needs to be reinjected without exiting to L1, interrupt
+ *     injection should be blocked as well at this point.
+ *
+ *  Unfortunately, interrupt blocking in L2 won't work with simple
+ *  intr_window_open (which depends on L2's IF). To solve this,
+ *  the following algorithm can be used:
+ *   v->arch.hvm_vmx.exec_control.VIRTUAL_INTR_PENDING now denotes
+ *   only L0 control, physical control may be different from it.
+ *       - if in L1, it behaves normally, intr window is written
+ *         to physical control as it is
+ *       - if in L2, replace it to MTF (or NMI window) if possible
+ *       - if MTF/NMI window is not used, intr window can still be
+ *         used but may have negative impact on interrupt performance.
+ */
+
+enum hvm_intblk nvmx_intr_blocked(struct vcpu *v)
+{
+    int r = hvm_intblk_none;
+    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
+
+    if ( nestedhvm_vcpu_in_guestmode(v) )
+    {
+        if ( nvcpu->nv_vmexit_pending ||
+             nvcpu->nv_vmswitch_in_progress ||
+             (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK) )
+            r = hvm_intblk_rflags_ie;
+    }
+    else if ( nvcpu->nv_vmentry_pending )
+        r = hvm_intblk_rflags_ie;
+
+    return r;
+}
+
+static int nvmx_intr_intercept(struct vcpu *v, struct hvm_intack intack)
+{
+    u32 exit_ctrl;
+
+    /*
+     * TODO:
+     *   - if L1 intr-window exiting == 0
+     *   - vNMI
+     */
+
+    if ( nvmx_intr_blocked(v) != hvm_intblk_none )
+    {
+        enable_intr_window(v, intack);
+        return 1;
+    }
+
+    if ( nestedhvm_vcpu_in_guestmode(v) )
+    {
+        if ( intack.source == hvm_intsrc_pic ||
+                 intack.source == hvm_intsrc_lapic )
+        {
+            vmx_inject_extint(intack.vector);
+
+            exit_ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx,
+                            VM_EXIT_CONTROLS);
+            if ( exit_ctrl & VM_EXIT_ACK_INTR_ON_EXIT )
+            {
+                /* for now, duplicate the ack path in vmx_intr_assist */
+                hvm_vcpu_ack_pending_irq(v, intack);
+                pt_intr_post(v, intack);
+
+                intack = hvm_vcpu_has_pending_irq(v);
+                if ( unlikely(intack.source != hvm_intsrc_none) )
+                    enable_intr_window(v, intack);
+            }
+            else
+                enable_intr_window(v, intack);
+
+            return 1;
+        }
+    }
+
+    return 0;
+}
+
 asmlinkage void vmx_intr_assist(void)
 {
     struct hvm_intack intack;
@@ -132,6 +229,9 @@ asmlinkage void vmx_intr_assist(void)
         if ( likely(intack.source == hvm_intsrc_none) )
             goto out;
 
+        if ( unlikely(nvmx_intr_intercept(v, intack)) )
+            goto out;
+
         intblk = hvm_interrupt_blocked(v, intack);
         if ( intblk == hvm_intblk_tpr )
         {
diff -r bd15acfc9b82 -r f14f451a780e xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Thu Jun 02 16:33:21 2011 +0800
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Thu Jun 02 16:33:21 2011 +0800
@@ -1243,6 +1243,31 @@ void ept_sync_domain(struct domain *d)
                      __ept_sync_domain, d, 1);
 }
 
+void nvmx_enqueue_n2_exceptions(struct vcpu *v, 
+            unsigned long intr_fields, int error_code)
+{
+    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
+
+    if ( !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK) ) {
+        /* enqueue the exception till the VMCS switch back to L1 */
+        nvmx->intr.intr_info = intr_fields;
+        nvmx->intr.error_code = error_code;
+        vcpu_nestedhvm(v).nv_vmexit_pending = 1;
+        return;
+    }
+    else
+        gdprintk(XENLOG_ERR, "Double Fault on Nested Guest: exception %lx %x"
+                 "on %lx %x\n", intr_fields, error_code,
+                 nvmx->intr.intr_info, nvmx->intr.error_code);
+}
+
+static int nvmx_vmexit_exceptions(struct vcpu *v, unsigned int trapnr,
+                      int errcode, unsigned long cr2)
+{
+    nvmx_enqueue_n2_exceptions(v, trapnr, errcode);
+    return NESTEDHVM_VMEXIT_DONE;
+}
+
 static void __vmx_inject_exception(int trap, int type, int error_code)
 {
     unsigned long intr_fields;
@@ -1272,11 +1297,16 @@ static void __vmx_inject_exception(int t
 
 void vmx_inject_hw_exception(int trap, int error_code)
 {
-    unsigned long intr_info = __vmread(VM_ENTRY_INTR_INFO);
+    unsigned long intr_info;
     struct vcpu *curr = current;
 
     int type = X86_EVENTTYPE_HW_EXCEPTION;
 
+    if ( nestedhvm_vcpu_in_guestmode(curr) )
+        intr_info = vcpu_2_nvmx(curr).intr.intr_info;
+    else
+        intr_info = __vmread(VM_ENTRY_INTR_INFO);
+
     switch ( trap )
     {
     case TRAP_debug:
@@ -1308,7 +1338,16 @@ void vmx_inject_hw_exception(int trap, i
             error_code = 0;
     }
 
-    __vmx_inject_exception(trap, type, error_code);
+    if ( nestedhvm_vcpu_in_guestmode(curr) &&
+         nvmx_intercepts_exception(curr, trap, error_code) )
+    {
+        nvmx_enqueue_n2_exceptions (curr, 
+            INTR_INFO_VALID_MASK | (type<<8) | trap,
+            error_code); 
+        return;
+    }
+    else
+        __vmx_inject_exception(trap, type, error_code);
 
     if ( trap == TRAP_page_fault )
         HVMTRACE_LONG_2D(PF_INJECT, error_code,
@@ -1319,12 +1358,38 @@ void vmx_inject_hw_exception(int trap, i
 
 void vmx_inject_extint(int trap)
 {
+    struct vcpu *v = current;
+    u32    pin_based_cntrl;
+
+    if ( nestedhvm_vcpu_in_guestmode(v) ) {
+        pin_based_cntrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, 
+                                     PIN_BASED_VM_EXEC_CONTROL);
+        if ( pin_based_cntrl && PIN_BASED_EXT_INTR_MASK ) {
+            nvmx_enqueue_n2_exceptions (v, 
+               INTR_INFO_VALID_MASK | (X86_EVENTTYPE_EXT_INTR<<8) | trap,
+               HVM_DELIVER_NO_ERROR_CODE);
+            return;
+        }
+    }
     __vmx_inject_exception(trap, X86_EVENTTYPE_EXT_INTR,
                            HVM_DELIVER_NO_ERROR_CODE);
 }
 
 void vmx_inject_nmi(void)
 {
+    struct vcpu *v = current;
+    u32    pin_based_cntrl;
+
+    if ( nestedhvm_vcpu_in_guestmode(v) ) {
+        pin_based_cntrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, 
+                                     PIN_BASED_VM_EXEC_CONTROL);
+        if ( pin_based_cntrl && PIN_BASED_NMI_EXITING ) {
+            nvmx_enqueue_n2_exceptions (v, 
+               INTR_INFO_VALID_MASK | (X86_EVENTTYPE_NMI<<8) | TRAP_nmi,
+               HVM_DELIVER_NO_ERROR_CODE);
+            return;
+        }
+    }
     __vmx_inject_exception(2, X86_EVENTTYPE_NMI,
                            HVM_DELIVER_NO_ERROR_CODE);
 }
@@ -1424,7 +1489,10 @@ static struct hvm_function_table __read_
     .nhvm_vcpu_reset      = nvmx_vcpu_reset,
     .nhvm_vcpu_guestcr3   = nvmx_vcpu_guestcr3,
     .nhvm_vcpu_hostcr3    = nvmx_vcpu_hostcr3,
-    .nhvm_vcpu_asid       = nvmx_vcpu_asid
+    .nhvm_vcpu_asid       = nvmx_vcpu_asid,
+    .nhvm_vmcx_guest_intercepts_trap = nvmx_intercepts_exception,
+    .nhvm_vcpu_vmexit_trap = nvmx_vmexit_exceptions,
+    .nhvm_intr_blocked    = nvmx_intr_blocked
 };
 
 struct hvm_function_table * __init start_vmx(void)
@@ -2237,7 +2305,8 @@ asmlinkage void vmx_vmexit_handler(struc
     hvm_maybe_deassert_evtchn_irq();
 
     idtv_info = __vmread(IDT_VECTORING_INFO);
-    if ( exit_reason != EXIT_REASON_TASK_SWITCH )
+    if ( !nestedhvm_vcpu_in_guestmode(v) && 
+         exit_reason != EXIT_REASON_TASK_SWITCH )
         vmx_idtv_reinject(idtv_info);
 
     switch ( exit_reason )
@@ -2585,6 +2654,9 @@ asmlinkage void vmx_vmexit_handler(struc
         domain_crash(v->domain);
         break;
     }
+
+    if ( nestedhvm_vcpu_in_guestmode(v) )
+        nvmx_idtv_handling();
 }
 
 asmlinkage void vmx_vmenter_helper(void)
diff -r bd15acfc9b82 -r f14f451a780e xen/arch/x86/hvm/vmx/vvmx.c
--- a/xen/arch/x86/hvm/vmx/vvmx.c       Thu Jun 02 16:33:21 2011 +0800
+++ b/xen/arch/x86/hvm/vmx/vvmx.c       Thu Jun 02 16:33:21 2011 +0800
@@ -392,6 +392,27 @@ static void vmreturn(struct cpu_user_reg
     regs->eflags = eflags;
 }
 
+int nvmx_intercepts_exception(struct vcpu *v, unsigned int trap,
+                               int error_code)
+{
+    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
+    u32 exception_bitmap, pfec_match=0, pfec_mask=0;
+    int r;
+
+    ASSERT ( trap < 32 );
+
+    exception_bitmap = __get_vvmcs(nvcpu->nv_vvmcx, EXCEPTION_BITMAP);
+    r = exception_bitmap & (1 << trap) ? 1: 0;
+
+    if ( trap == TRAP_page_fault ) {
+        pfec_match = __get_vvmcs(nvcpu->nv_vvmcx, PAGE_FAULT_ERROR_CODE_MATCH);
+        pfec_mask  = __get_vvmcs(nvcpu->nv_vvmcx, PAGE_FAULT_ERROR_CODE_MASK);
+        if ( (error_code & pfec_mask) != pfec_match )
+            r = !r;
+    }
+    return r;
+}
+
 /*
  * Nested VMX uses "strict" condition to exit from 
  * L2 guest if either L1 VMM or L0 VMM expect to exit.
@@ -465,6 +486,7 @@ void nvmx_update_exec_control(struct vcp
         __vmwrite(IO_BITMAP_B, virt_to_maddr(bitmap) + PAGE_SIZE);
     }
 
+    /* TODO: change L0 intr window to MTF or NMI window */
     __vmwrite(CPU_BASED_VM_EXEC_CONTROL, shadow_cntrl);
 }
 
@@ -868,6 +890,42 @@ static void load_vvmcs_host_state(struct
     __set_vvmcs(vvmcs, VM_ENTRY_INTR_INFO, 0);
 }
 
+static void sync_exception_state(struct vcpu *v)
+{
+    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
+    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
+
+    if ( !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK) )
+        return;
+
+    switch ( nvmx->intr.intr_info & INTR_INFO_INTR_TYPE_MASK )
+    {
+    case X86_EVENTTYPE_EXT_INTR:
+        /* rename exit_reason to EXTERNAL_INTERRUPT */
+        __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_REASON,
+                    EXIT_REASON_EXTERNAL_INTERRUPT);
+        __set_vvmcs(nvcpu->nv_vvmcx, EXIT_QUALIFICATION, 0);
+        __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_INFO,
+                    nvmx->intr.intr_info);
+        break;
+
+    case X86_EVENTTYPE_HW_EXCEPTION:
+    case X86_EVENTTYPE_SW_INTERRUPT:
+    case X86_EVENTTYPE_SW_EXCEPTION:
+        /* throw to L1 */
+        __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_INFO,
+                    nvmx->intr.intr_info);
+        __set_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_ERROR_CODE,
+                    nvmx->intr.error_code);
+        break;
+    case X86_EVENTTYPE_NMI:
+    default:
+        gdprintk(XENLOG_ERR, "Exception state %lx not handled\n",
+               nvmx->intr.intr_info); 
+        break;
+    }
+}
+
 static void virtual_vmexit(struct cpu_user_regs *regs)
 {
     struct vcpu *v = current;
@@ -878,6 +936,7 @@ static void virtual_vmexit(struct cpu_us
 
     sync_vvmcs_ro(v);
     sync_vvmcs_guest_state(v, regs);
+    sync_exception_state(v);
 
     vmx_vmcs_switch(v, v->arch.hvm_vmx.vmcs, nvcpu->nv_n1vmcx);
 
@@ -1169,3 +1228,40 @@ int nvmx_handle_vmwrite(struct cpu_user_
     return X86EMUL_OKAY;
 }
 
+void nvmx_idtv_handling(void)
+{
+    struct vcpu *v = current;
+    struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
+    struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
+    unsigned int idtv_info = __vmread(IDT_VECTORING_INFO);
+
+    if ( likely(!(idtv_info & INTR_INFO_VALID_MASK)) )
+        return;
+
+    /*
+     * If L0 can solve the fault that causes idt vectoring, it should
+     * be reinjected, otherwise, pass to L1.
+     */
+    if ( (__vmread(VM_EXIT_REASON) != EXIT_REASON_EPT_VIOLATION &&
+          !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK)) ||
+         (__vmread(VM_EXIT_REASON) == EXIT_REASON_EPT_VIOLATION &&
+          !nvcpu->nv_vmexit_pending) )
+    {
+        __vmwrite(VM_ENTRY_INTR_INFO, idtv_info & ~INTR_INFO_RESVD_BITS_MASK);
+        if ( idtv_info & INTR_INFO_DELIVER_CODE_MASK )
+           __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE,
+                        __vmread(IDT_VECTORING_ERROR_CODE));
+        /*
+         * SDM 23.2.4, if L1 tries to inject a software interrupt
+         * and the delivery fails, VM_EXIT_INSTRUCTION_LEN receives
+         * the value of previous VM_ENTRY_INSTRUCTION_LEN.
+         *
+         * This means EXIT_INSTRUCTION_LEN is always valid here, for
+         * software interrupts both injected by L1, and generated in L2.
+         */
+        __vmwrite(VM_ENTRY_INSTRUCTION_LEN, __vmread(VM_EXIT_INSTRUCTION_LEN));
+   }
+
+    /* TODO: NMI */
+}
+
diff -r bd15acfc9b82 -r f14f451a780e xen/include/asm-x86/hvm/vmx/vvmx.h
--- a/xen/include/asm-x86/hvm/vmx/vvmx.h        Thu Jun 02 16:33:21 2011 +0800
+++ b/xen/include/asm-x86/hvm/vmx/vvmx.h        Thu Jun 02 16:33:21 2011 +0800
@@ -93,6 +93,9 @@ int nvmx_vcpu_reset(struct vcpu *v);
 uint64_t nvmx_vcpu_guestcr3(struct vcpu *v);
 uint64_t nvmx_vcpu_hostcr3(struct vcpu *v);
 uint32_t nvmx_vcpu_asid(struct vcpu *v);
+enum hvm_intblk nvmx_intr_blocked(struct vcpu *v);
+int nvmx_intercepts_exception(struct vcpu *v, 
+                              unsigned int trap, int error_code);
 
 int nvmx_handle_vmxon(struct cpu_user_regs *regs);
 int nvmx_handle_vmxoff(struct cpu_user_regs *regs);
@@ -166,6 +169,7 @@ void nvmx_update_secondary_exec_control(
                                         unsigned long value);
 void nvmx_update_exception_bitmap(struct vcpu *v, unsigned long value);
 asmlinkage void nvmx_switch_guest(void);
+void nvmx_idtv_handling(void);
 
 #endif /* __ASM_X86_HVM_VVMX_H__ */
 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.