 
	
| [Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v2 3/3] x86/vmx: fix vmentry failure with TSX bits in LBR
 During VM entry, H/W will automatically load guest's MSRs from MSR-load
area in the same way as they would be written by WRMSR.
However, under the following conditions:
    1. LBR (Last Branch Record) MSRs were placed in the MSR-load area
    2. Address format of LBR includes TSX bits 61:62
    3. CPU has TSX support disabled
VM entry will fail with a message in the log similar to:
    (XEN) [   97.239514] d1v0 vmentry failure (reason 0x80000022): MSR loading 
(entry 3)
    (XEN) [   97.239516]   msr 00000680 val 1fff800000102e60 (mbz 0)
This happens because of the following behaviour:
    - When capturing branches, LBR H/W will always clear bits 61:62
      regardless of the sign extension
    - For WRMSR, bits 61:62 are considered the part of the sign extension
This bug affects only certain pCPUs (e.g. Haswell) with vCPUs that
use LBR.  Fix it by sign-extending TSX bits in all LBR entries during
VM entry in affected cases.
LBR MSRs are currently not Live Migrated. In order to implement such
functionality, the MSR levelling work has to be done first because
hosts can have different LBR formats.
Signed-off-by: Sergey Dyasli <sergey.dyasli@xxxxxxxxxx>
---
v1 --> v2:
- LBR_FORMAT_MSR_IA32_PERF_CAP define is added
- enum for LBR_FORMAT_* is added
- LBR_FROM_SIGNEXT_2MSB is now a define instead of static const
- u32/64 replaced with uint32/64_t
- Renamed "last_in_from_ip" --> "lbr_lastint_from"
- Added defines for the layout of struct lbr_info[]
- Added a comment to vmx_lbr_tsx_fixup() about MSR array being sorted
- Fixed tabs for cpu_has_* defines
- arch_vmx_struct::lbr_tsx_fixup_enabled is moved to fill up 1-byte hole
- Made lbr_from_start, lbr_from_end and lbr_lastint_from
  global static __read_mostly.
  Moved their initialization into vmx_lbr_tsx_fixup_check().
 - Information about Live Migration is added to the commit message
 xen/arch/x86/hvm/vmx/vmx.c         | 90 ++++++++++++++++++++++++++++++++++++++
 xen/include/asm-x86/cpufeature.h   |  3 ++
 xen/include/asm-x86/hvm/vmx/vmcs.h |  1 +
 xen/include/asm-x86/msr-index.h    |  2 +
 4 files changed, 96 insertions(+)
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 42f4fbd..3547b78 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2284,6 +2284,8 @@ static void pi_notification_interrupt(struct 
cpu_user_regs *regs)
     raise_softirq(VCPU_KICK_SOFTIRQ);
 }
 
+static void __init vmx_lbr_tsx_fixup_check(void);
+
 const struct hvm_function_table * __init start_vmx(void)
 {
     set_in_cr4(X86_CR4_VMXE);
@@ -2353,6 +2355,8 @@ const struct hvm_function_table * __init start_vmx(void)
 
     setup_vmcs_dump();
 
+    vmx_lbr_tsx_fixup_check();
+
     return &vmx_function_table;
 }
 
@@ -2513,6 +2517,14 @@ static int vmx_cr_access(unsigned long 
exit_qualification)
     return X86EMUL_OKAY;
 }
 
+/* This defines the layout of struct lbr_info[] */
+#define LBR_LASTINT_FROM_IDX    0
+#define LBR_LASTINT_TO_IDX      1
+#define LBR_LASTBRANCH_TOS_IDX  2
+#define LBR_LASTBRANCH_FROM_IDX 3
+#define LBR_LASTBRANCH_TO_IDX   4
+#define LBR_LASTBRANCH_INFO     5
+
 static const struct lbr_info {
     u32 base, count;
 } p4_lbr[] = {
@@ -2618,6 +2630,56 @@ static const struct lbr_info *last_branch_msr_get(void)
     return NULL;
 }
 
+enum
+{
+    LBR_FORMAT_32                 = 0x0, /* 32-bit record format */
+    LBR_FORMAT_LIP                = 0x1, /* 64-bit LIP record format */
+    LBR_FORMAT_EIP                = 0x2, /* 64-bit EIP record format */
+    LBR_FORMAT_EIP_FLAGS          = 0x3, /* 64-bit EIP, Flags */
+    LBR_FORMAT_EIP_FLAGS_TSX      = 0x4, /* 64-bit EIP, Flags, TSX */
+    LBR_FORMAT_EIP_FLAGS_TSX_INFO = 0x5, /* 64-bit EIP, Flags, TSX, LBR_INFO */
+    LBR_FORMAT_EIP_FLAGS_CYCLES   = 0x6, /* 64-bit EIP, Flags, Cycles */
+};
+
+#define LBR_FROM_SIGNEXT_2MSB  ((1ULL << 59) | (1ULL << 60))
+
+static bool __read_mostly vmx_lbr_tsx_fixup_needed;
+static uint32_t __read_mostly lbr_from_start;
+static uint32_t __read_mostly lbr_from_end;
+static uint32_t __read_mostly lbr_lastint_from;
+
+static void __init vmx_lbr_tsx_fixup_check(void)
+{
+    bool tsx_support = cpu_has_hle || cpu_has_rtm;
+    uint64_t caps;
+    uint32_t lbr_format;
+
+    /* Fixup is needed only when TSX support is disabled ... */
+    if ( tsx_support )
+        return;
+
+    if ( !cpu_has_pdcm )
+        return;
+
+    rdmsrl(MSR_IA32_PERF_CAPABILITIES, caps);
+    lbr_format = caps & LBR_FORMAT_MSR_IA32_PERF_CAP;
+
+    /* ... and the address format of LBR includes TSX bits 61:62 */
+    if ( lbr_format == LBR_FORMAT_EIP_FLAGS_TSX )
+    {
+        const struct lbr_info *lbr = last_branch_msr_get();
+
+        if ( lbr == NULL )
+            return;
+
+        lbr_lastint_from = lbr[LBR_LASTINT_FROM_IDX].base;
+        lbr_from_start = lbr[LBR_LASTBRANCH_FROM_IDX].base;
+        lbr_from_end = lbr_from_start + lbr[LBR_LASTBRANCH_FROM_IDX].count;
+
+        vmx_lbr_tsx_fixup_needed = true;
+    }
+}
+
 static int is_last_branch_msr(u32 ecx)
 {
     const struct lbr_info *lbr = last_branch_msr_get();
@@ -2876,7 +2938,11 @@ static int vmx_msr_write_intercept(unsigned int msr, 
uint64_t msr_content)
             for ( ; (rc == 0) && lbr->count; lbr++ )
                 for ( i = 0; (rc == 0) && (i < lbr->count); i++ )
                     if ( (rc = vmx_add_guest_msr(lbr->base + i)) == 0 )
+                    {
                         vmx_disable_intercept_for_msr(v, lbr->base + i, 
MSR_TYPE_R | MSR_TYPE_W);
+                        if ( vmx_lbr_tsx_fixup_needed )
+                            v->arch.hvm_vmx.lbr_tsx_fixup_enabled = true;
+                    }
         }
 
         if ( (rc < 0) ||
@@ -3897,6 +3963,27 @@ out:
     }
 }
 
+static void vmx_lbr_tsx_fixup(void)
+{
+    struct vcpu *curr = current;
+    unsigned int msr_count = curr->arch.hvm_vmx.msr_count;
+    struct vmx_msr_entry *msr_area = curr->arch.hvm_vmx.msr_area;
+    struct vmx_msr_entry *msr;
+
+    if ( (msr = vmx_find_msr(lbr_from_start, VMX_GUEST_MSR)) != NULL )
+    {
+        /*
+         * Sign extend into bits 61:62 while preserving bit 63
+         * The loop relies on the fact that MSR array is sorted.
+         */
+        for ( ; msr < msr_area + msr_count && msr->index < lbr_from_end; msr++ 
)
+            msr->data |= ((LBR_FROM_SIGNEXT_2MSB & msr->data) << 2);
+    }
+
+    if ( (msr = vmx_find_msr(lbr_lastint_from, VMX_GUEST_MSR)) != NULL )
+        msr->data |= ((LBR_FROM_SIGNEXT_2MSB & msr->data) << 2);
+}
+
 void vmx_vmenter_helper(const struct cpu_user_regs *regs)
 {
     struct vcpu *curr = current;
@@ -3953,6 +4040,9 @@ void vmx_vmenter_helper(const struct cpu_user_regs *regs)
     }
 
  out:
+    if ( unlikely(curr->arch.hvm_vmx.lbr_tsx_fixup_enabled) )
+        vmx_lbr_tsx_fixup();
+
     HVMTRACE_ND(VMENTRY, 0, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
 
     __vmwrite(GUEST_RIP,    regs->rip);
diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h
index 39ad582..3b187ac 100644
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -78,6 +78,9 @@
 #define cpu_has_cmp_legacy     boot_cpu_has(X86_FEATURE_CMP_LEGACY)
 #define cpu_has_tbm            boot_cpu_has(X86_FEATURE_TBM)
 #define cpu_has_itsc           boot_cpu_has(X86_FEATURE_ITSC)
+#define cpu_has_hle            boot_cpu_has(X86_FEATURE_HLE)
+#define cpu_has_rtm            boot_cpu_has(X86_FEATURE_RTM)
+#define cpu_has_pdcm           boot_cpu_has(X86_FEATURE_PDCM)
 
 enum _cache_type {
     CACHE_TYPE_NULL = 0,
diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h 
b/xen/include/asm-x86/hvm/vmx/vmcs.h
index 903af51..2c9c3c9 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -147,6 +147,7 @@ struct arch_vmx_struct {
     uint8_t              vmx_realmode;
     /* Are we emulating rather than VMENTERing? */
     uint8_t              vmx_emulate;
+    bool                 lbr_tsx_fixup_enabled;
     /* Bitmask of segments that we can't safely use in virtual 8086 mode */
     uint16_t             vm86_segment_mask;
     /* Shadow CS, SS, DS, ES, FS, GS, TR while in virtual 8086 mode */
diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
index 98dbff1..8841419 100644
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -55,6 +55,8 @@
 #define MSR_IA32_PEBS_ENABLE           0x000003f1
 #define MSR_IA32_DS_AREA               0x00000600
 #define MSR_IA32_PERF_CAPABILITIES     0x00000345
+/* Lower 6 bits define the format of the address in the LBR stack */
+#define LBR_FORMAT_MSR_IA32_PERF_CAP   0x3f
 
 #define MSR_IA32_BNDCFGS               0x00000d90
 #define IA32_BNDCFGS_ENABLE            0x00000001
-- 
2.9.3
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel
 
 
 | 
|  | Lists.xenproject.org is hosted with RackSpace, monitoring our |