[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 6/6] x86/VT-x: Fix 64bit HVM guests on Harpertown cores



c/s fd32dcfe4c "x86/vmx: Don't leak EFER.NXE into guest context" had an
unintended consequence on Harpertown cores which, as it turns out, don't
load MSR_EFER fully from the MSR Load List - on reentry to the guest,
EFER.SCE is clear irrespective of the value in load list.

This, being catastrophic to 64bit guests, is far worse than the EFER.NXE
leakage which was trying to be fixed.

Introduce cpu_bug_msr_ll_efer_sce to encapsulate this partial revert.
Avoid adding MSR_EFER to the Load Lists on impacted hardware, and
reintroduce the logic to use the guests EFER.SCE setting.

In the common case of running 64bit HVM guests, these extra adjustments
to EFER should only be hit during guest boot.

Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
CC: Jan Beulich <JBeulich@xxxxxxxx>
CC: Wei Liu <wei.liu2@xxxxxxxxxx>
CC: Roger Pau Monné <roger.pau@xxxxxxxxxx>
CC: Jun Nakajima <jun.nakajima@xxxxxxxxx>
CC: Kevin Tian <kevin.tian@xxxxxxxxx>

This is RFC at the moment, because the test lab is full at the moment
and I don't have a Harpertown CPU to hand.  I'm fairly sure the change
is complete and will test when it becomes available, but I don't expect
to make any code changes.
---
 xen/arch/x86/cpu/intel.c          |  7 +++++++
 xen/arch/x86/hvm/vmx/vmx.c        | 30 +++++++++++++++++++++++++++++-
 xen/include/asm-x86/cpufeature.h  |  1 +
 xen/include/asm-x86/cpufeatures.h |  1 +
 xen/include/asm-x86/hvm/hvm.h     | 10 ++++++++--
 5 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c
index 65fa3d6..7d1cb1d 100644
--- a/xen/arch/x86/cpu/intel.c
+++ b/xen/arch/x86/cpu/intel.c
@@ -331,6 +331,13 @@ static void init_intel(struct cpuinfo_x86 *c)
        /* Work around errata */
        Intel_errata_workarounds(c);
 
+       /*
+        * Harpertown cores don't context switch MSR_EFER correctly when it
+        * appears in an MSR Load List.
+        */
+       if (c->x86 == 0x6 && c->x86_model == 0x17)
+               setup_force_cpu_cap(X86_BUG_MSR_LL_EFER_SCE);
+
        if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
                (c->x86 == 0x6 && c->x86_model >= 0x0e))
                __set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 64af8bf..051f5c1 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -899,6 +899,15 @@ static void vmx_fpu_leave(struct vcpu *v)
 
 static void vmx_ctxt_switch_from(struct vcpu *v)
 {
+    /* Always restore to Xen's SCE setting when leaving VT-x context. */
+    if ( unlikely(cpu_bug_msr_ll_efer_sce) )
+    {
+        uint64_t efer = read_efer();
+
+        if ( !(efer & EFER_SCE) )
+            write_efer(efer | EFER_SCE);
+    }
+
     /*
      * Return early if trying to do a context switch without VMX enabled,
      * this can happen when the hypervisor shuts down with HVM guests
@@ -931,6 +940,14 @@ static void vmx_ctxt_switch_from(struct vcpu *v)
 
 static void vmx_ctxt_switch_to(struct vcpu *v)
 {
+    /*
+     * When we can't context switch MSR_EFER correctly, we need to load the
+     * guests SCE value to ensure that guest userspace functions correctly.
+     */
+    if ( unlikely(cpu_bug_msr_ll_efer_sce) &&
+         !(v->arch.hvm.guest_efer & EFER_SCE) )
+        write_efer(read_efer() & ~EFER_SCE);
+
     vmx_restore_guest_msrs(v);
     vmx_restore_dr(v);
 
@@ -1684,7 +1701,18 @@ static void vmx_update_guest_efer(struct vcpu *v)
     /* We expect to use EFER loading in the common case, but... */
     if ( likely(cpu_has_vmx_efer) )
         __vmwrite(GUEST_EFER, guest_efer);
-
+    /*
+     * On Harpertown cores, putting MSR_EFER in the MSR Load List causes
+     * EFER.SCE to be loaded as 0 when VMRESUME completes (although EFER.NX is
+     * loaded correctly).  This is catastrophic for 64bit guests.  In such
+     * cases, we run with the guests choice of SCE in EFER, and the guest gets
+     * Xen's choice of NX.
+     */
+    else if ( unlikely(cpu_bug_msr_ll_efer_sce) )
+    {
+        if ( (guest_efer ^ xen_efer) & EFER_SCE )
+            write_efer((xen_efer & ~EFER_SCE) | (guest_efer & EFER_SCE));
+    }
     /* ... on Gen1 VT-x hardware, we have to use MSR load/save lists instead. 
*/
     else
     {
diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h
index 0f3bb5a..4daf52a 100644
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -126,6 +126,7 @@
 #define cpu_bug_amd_erratum_121 boot_cpu_has(X86_BUG_AMD_ERRATUM_121)
 #define cpu_bug_null_seg        boot_cpu_has(X86_BUG_NULL_SEG)
 #define cpu_bug_fpu_err_ptr     boot_cpu_has(X86_BUG_FPU_PTR_LEAK)
+#define cpu_bug_msr_ll_efer_sce boot_cpu_has(X86_BUG_MSR_LL_EFER_SCE)
 
 enum _cache_type {
     CACHE_TYPE_NULL = 0,
diff --git a/xen/include/asm-x86/cpufeatures.h 
b/xen/include/asm-x86/cpufeatures.h
index e7d4171..435abea 100644
--- a/xen/include/asm-x86/cpufeatures.h
+++ b/xen/include/asm-x86/cpufeatures.h
@@ -41,6 +41,7 @@ XEN_CPUFEATURE(XEN_LBR,           X86_SYNTH(22)) /* Xen uses 
MSR_DEBUGCTL.LBR */
 #define X86_BUG_AMD_ERRATUM_121   X86_BUG( 0) /* Hang on fetch across 
non-canonical boundary. */
 #define X86_BUG_NULL_SEG          X86_BUG( 1) /* NULL-ing a selector preserves 
the base and limit. */
 #define X86_BUG_FPU_PTR_LEAK      X86_BUG( 2) /* (F)XRSTOR doesn't load 
FOP/FIP/FDP. */
+#define X86_BUG_MSR_LL_EFER_SCE   X86_BUG( 3) /* MSR Load List clears 
EFER.SCE. */
 
 /* Total number of capability words, inc synth and bug words. */
 #define NCAPINTS (FSCAPINTS + X86_NR_SYNTH + X86_NR_BUG) /* N 32-bit words 
worth of info */
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 0a10b51..3c2c1ec 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -366,8 +366,14 @@ unsigned long hvm_cr4_guest_valid_bits(const struct domain 
*d, bool restore);
     (hvm_paging_enabled(v) && ((v)->arch.hvm.guest_cr[4] & X86_CR4_SMEP))
 #define hvm_smap_enabled(v) \
     (hvm_paging_enabled(v) && ((v)->arch.hvm.guest_cr[4] & X86_CR4_SMAP))
-#define hvm_nx_enabled(v) \
-    ((v)->arch.hvm.guest_efer & EFER_NX)
+/*
+ * A consequence of not being able to put MSR_EFER in the MSR Load List is
+ * that we can't context switch EFER.NXE correctly for guests.  The guest gets
+ * Xen's value, and has no choice in the matter.
+ */
+#define hvm_nx_enabled(v)                                         \
+    ((likely(!cpu_bug_msr_ll_efer_sce) ? (v)->arch.hvm.guest_efer \
+                                       : read_efer()) & EFER_NX)
 #define hvm_pku_enabled(v) \
     (hvm_paging_enabled(v) && ((v)->arch.hvm.guest_cr[4] & X86_CR4_PKE))
 
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.