[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [V10 PATCH 23/23] PVH xen: introduce vmexit handler for PVH



On Mon, Aug 12, 2013 at 5:00 PM, George Dunlap <dunlapg@xxxxxxxxx> wrote:
> Overall I have the same comment here as I had for the VMCS patch: the
> code looks 98% identical.  Substantial differences seem to be:
>  - emulation of privileged ops
>  - cpuid
>  - cr4 handling
>
> It seems like it would be much better to share the codepath and just
> put "is_pvh_domain()" in the places where it needs to be different.

So below is a patch which, I think, should be functionally mostly
equivalent to the patch you have -- but a *lot* shorter, and also
*very* clear about how PVH is different than normal HVM.  I think this
is definitely the better approach.

-George

    PVH xen: introduce vmexit handler for PVH

    This version has unified PVH and HVM vmexit handlers.

    A couple of notes:
    - No check for cr3 accesses; that's a HAP/shadow issue, not a PVH one
    - debug trap and ept violation cause guest crash now, as with HVM
    - Don't know what to do if a hcall returns invalidate
    - Don't know what to do on task switch
    - Removed single_step=0 on MTF; may not be correct.

    Signed-off-by: Mukesh Rathor <mukesh.rathor@xxxxxxxxxx>
    Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index c742d7b..e9f9ef6 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -1776,7 +1776,17 @@ int hvm_set_cr0(unsigned long value)
          (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PG )
         goto gpf;

-    if ( (value & X86_CR0_PG) && !(old_value & X86_CR0_PG) )
+
+
+    /* A pvh is not expected to change to real mode. */
+    if ( is_pvh_vcpu(v)
+         && (value & (X86_CR0_PE | X86_CR0_PG)) != (X86_CR0_PG | X86_CR0_PE) )
+    {
+        printk(XENLOG_G_WARNING
+               "PVH attempting to turn off PE/PG. CR0:%lx\n", new_cr0);
+        goto gpf
+    }
+    else if ( (value & X86_CR0_PG) && !(old_value & X86_CR0_PG) )
     {
         if ( v->arch.hvm_vcpu.guest_efer & EFER_LME )
         {
@@ -1953,6 +1963,11 @@ int hvm_set_cr4(unsigned long value)
      * Modifying CR4.{PSE,PAE,PGE,SMEP}, or clearing CR4.PCIDE
      * invalidate all TLB entries.
      */
+    /*
+     * PVH: I assume this is suitable -- it subsumes the conditions
+     * from the custom PVH handler:
+     *  (old_val ^ new) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE)
+     */
     if ( ((old_cr ^ value) &
           (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE | X86_CR4_SMEP)) ||
          (!(value & X86_CR4_PCIDE) && (old_cr & X86_CR4_PCIDE)) )
diff --git a/xen/arch/x86/hvm/vmx/pvh.c b/xen/arch/x86/hvm/vmx/pvh.c
index 8e61d23..a5a8ee1 100644
--- a/xen/arch/x86/hvm/vmx/pvh.c
+++ b/xen/arch/x86/hvm/vmx/pvh.c
@@ -20,10 +20,6 @@
 #include <asm/hvm/nestedhvm.h>
 #include <asm/xstate.h>

-/* Implemented in the next patch */
-void vmx_pvh_vmexit_handler(struct cpu_user_regs *regs)
-{
-}

 /*
  * Set vmcs fields in support of vcpu_op -> VCPUOP_initialise hcall.  Called
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index bbfa130..37ec385 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1244,6 +1244,12 @@ static void vmx_update_guest_cr(struct vcpu *v,
unsigned int cr)
              */
             v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_SMEP;
         }
+        if ( is_pvh_vcpu(v) )
+        {
+            /* What is this for? */
+            v->arch.hvm_vcpu.hw_cr[4] |= X86_CR4_VMXE | X86_CR4_MCE;
+        }
+
         __vmwrite(GUEST_CR4, v->arch.hvm_vcpu.hw_cr[4]);
         __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[4]);
         break;
@@ -2242,7 +2248,8 @@ static void ept_handle_violation(unsigned long
qualification, paddr_t gpa)
         __trace_var(TRC_HVM_NPF, 0, sizeof(_d), &_d);
     }

-    ret = hvm_hap_nested_page_fault(gpa,
+    ret = is_pvh_domain(d) ? 0 :
+        hvm_hap_nested_page_fault(gpa,
                                    qualification & EPT_GLA_VALID       ? 1 : 0,
                                    qualification & EPT_GLA_VALID
                                      ? __vmread(GUEST_LINEAR_ADDRESS) : ~0ull,
@@ -2490,12 +2497,6 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
     if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) )
         return vmx_failed_vmentry(exit_reason, regs);

-    if ( is_pvh_vcpu(v) )
-    {
-        vmx_pvh_vmexit_handler(regs);
-        return;
-    }
-
     if ( v->arch.hvm_vmx.vmx_realmode )
     {
         /* Put RFLAGS back the way the guest wants it */
@@ -2654,8 +2655,16 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
             /* Already handled above. */
             break;
         case TRAP_invalid_op:
-            HVMTRACE_1D(TRAP, vector);
-            vmx_vmexit_ud_intercept(regs);
+            if ( is_pvh_domain(d) )
+            {
+                if ( guest_kernel_mode(current, regs) ||
!emulate_forced_invalid_op(regs) )
+                    hvm_inject_hw_exception(TRAP_invalid_op,
HVM_DELIVER_NO_ERROR_CODE);
+            }
+            else
+            {
+                HVMTRACE_1D(TRAP, vector);
+                vmx_vmexit_ud_intercept(regs);
+            }
             break;
         default:
             HVMTRACE_1D(TRAP, vector);
@@ -2685,6 +2694,8 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
         };
         int32_t ecode = -1, source;

+        /* PVH FIXME: What to do? */
+
         exit_qualification = __vmread(EXIT_QUALIFICATION);
         source = (exit_qualification >> 30) & 3;
         /* Vectored event should fill in interrupt information. */
@@ -2704,8 +2715,8 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
         break;
     }
     case EXIT_REASON_CPUID:
+        is_pvh_domain(d) ? pv_cpuid(regs) : vmx_do_cpuid(regs);
         vmx_update_guest_eip(); /* Safe: CPUID */
-        vmx_do_cpuid(regs);
         break;
     case EXIT_REASON_HLT:
         vmx_update_guest_eip(); /* Safe: HLT */
@@ -2731,6 +2742,7 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
         if ( rc != HVM_HCALL_preempted )
         {
             vmx_update_guest_eip(); /* Safe: VMCALL */
+            /* PVH FIXME: What to do? */
             if ( rc == HVM_HCALL_invalidate )
                 send_invalidate_req();
         }
@@ -2750,7 +2762,28 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
     case EXIT_REASON_MSR_READ:
     {
         uint64_t msr_content;
-        if ( hvm_msr_read_intercept(regs->ecx, &msr_content) == X86EMUL_OKAY )
+        if ( is_pvh_vcpu(v) )
+        {
+            u64 msr_content = 0;
+
+            switch ( regs->ecx )
+            {
+            case MSR_IA32_MISC_ENABLE:
+                rdmsrl(MSR_IA32_MISC_ENABLE, msr_content);
+                msr_content |= MSR_IA32_MISC_ENABLE_BTS_UNAVAIL |
+                    MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL;
+                break;
+
+            default:
+                /* PVH fixme: see hvm_msr_read_intercept(). */
+                rdmsrl(regs->ecx, msr_content);
+                break;
+            }
+            regs->eax = (uint32_t)msr_content;
+            regs->edx = (uint32_t)(msr_content >> 32);
+            vmx_update_guest_eip(); /* Safe: RDMSR */
+        }
+        else if ( hvm_msr_read_intercept(regs->ecx, &msr_content) ==
X86EMUL_OKAY )
         {
             regs->eax = (uint32_t)msr_content;
             regs->edx = (uint32_t)(msr_content >> 32);
@@ -2853,21 +2886,42 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
     }

     case EXIT_REASON_IO_INSTRUCTION:
-        exit_qualification = __vmread(EXIT_QUALIFICATION);
-        if ( exit_qualification & 0x10 )
+        if ( is_pvh_vcpu(v) )
         {
-            /* INS, OUTS */
-            if ( !handle_mmio() )
-                hvm_inject_hw_exception(TRAP_gp_fault, 0);
+            /*
+             * Note: A PVH guest sets IOPL natively by setting bits in
+             *       the eflags, and not via hypercalls used by a PV.
+             */
+            struct segment_register seg;
+            int requested = (regs->rflags & X86_EFLAGS_IOPL) >> 12;
+            int curr_lvl = (regs->rflags & X86_EFLAGS_VM) ? 3 : 0;
+
+            if ( curr_lvl == 0 )
+            {
+                hvm_get_segment_register(current, x86_seg_ss, &seg);
+                curr_lvl = seg.attr.fields.dpl;
+            }
+            if ( requested < curr_lvl || !emulate_privileged_op(regs) )
+                hvm_inject_hw_exception(TRAP_gp_fault, regs->error_code);
         }
         else
         {
-            /* IN, OUT */
-            uint16_t port = (exit_qualification >> 16) & 0xFFFF;
-            int bytes = (exit_qualification & 0x07) + 1;
-            int dir = (exit_qualification & 0x08) ? IOREQ_READ : IOREQ_WRITE;
-            if ( handle_pio(port, bytes, dir) )
-                vmx_update_guest_eip(); /* Safe: IN, OUT */
+            exit_qualification = __vmread(EXIT_QUALIFICATION);
+            if ( exit_qualification & 0x10 )
+            {
+                /* INS, OUTS */
+                if ( !handle_mmio() )
+                    hvm_inject_hw_exception(TRAP_gp_fault, 0);
+            }
+            else
+            {
+                /* IN, OUT */
+                uint16_t port = (exit_qualification >> 16) & 0xFFFF;
+                int bytes = (exit_qualification & 0x07) + 1;
+                int dir = (exit_qualification & 0x08) ? IOREQ_READ :
IOREQ_WRITE;
+                if ( handle_pio(port, bytes, dir) )
+                    vmx_update_guest_eip(); /* Safe: IN, OUT */
+            }
         }
         break;

@@ -2890,6 +2944,7 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
     case EXIT_REASON_MONITOR_TRAP_FLAG:
         v->arch.hvm_vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG;
         vmx_update_cpu_exec_control(v);
+        /* PVH code set hvm_vcpu.single_step = 0 -- is that necessary? */
         if ( v->arch.hvm_vcpu.single_step ) {
           hvm_memory_event_single_step(regs->eip);
           if ( v->domain->debugger_attached )

Attachment: pvh-vmexit-unified.diff
Description: Binary data

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.