[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v2 3/3] x86/vmx: fix vmentry failure with TSX bits in LBR



> From: Sergey Dyasli [mailto:sergey.dyasli@xxxxxxxxxx]
> Sent: Friday, February 17, 2017 11:43 PM
> 
> During VM entry, H/W will automatically load guest's MSRs from MSR-load
> area in the same way as they would be written by WRMSR.
> 
> However, under the following conditions:
> 
>     1. LBR (Last Branch Record) MSRs were placed in the MSR-load area
>     2. Address format of LBR includes TSX bits 61:62
>     3. CPU has TSX support disabled
> 
> VM entry will fail with a message in the log similar to:
> 
>     (XEN) [   97.239514] d1v0 vmentry failure (reason 0x80000022): MSR 
> loading (entry
> 3)
>     (XEN) [   97.239516]   msr 00000680 val 1fff800000102e60 (mbz 0)
> 
> This happens because of the following behaviour:
> 
>     - When capturing branches, LBR H/W will always clear bits 61:62
>       regardless of the sign extension
>     - For WRMSR, bits 61:62 are considered the part of the sign extension
> 
> This bug affects only certain pCPUs (e.g. Haswell) with vCPUs that
> use LBR.  Fix it by sign-extending TSX bits in all LBR entries during
> VM entry in affected cases.
> 
> LBR MSRs are currently not Live Migrated. In order to implement such
> functionality, the MSR levelling work has to be done first because
> hosts can have different LBR formats.
> 
> Signed-off-by: Sergey Dyasli <sergey.dyasli@xxxxxxxxxx>
> ---
> v1 --> v2:
> - LBR_FORMAT_MSR_IA32_PERF_CAP define is added
> - enum for LBR_FORMAT_* is added
> - LBR_FROM_SIGNEXT_2MSB is now a define instead of static const
> - u32/64 replaced with uint32/64_t
> - Renamed "last_in_from_ip" --> "lbr_lastint_from"
> - Added defines for the layout of struct lbr_info[]
> - Added a comment to vmx_lbr_tsx_fixup() about MSR array being sorted
> - Fixed tabs for cpu_has_* defines
> - arch_vmx_struct::lbr_tsx_fixup_enabled is moved to fill up 1-byte hole
> - Made lbr_from_start, lbr_from_end and lbr_lastint_from
>   global static __read_mostly.
>   Moved their initialization into vmx_lbr_tsx_fixup_check().
>  - Information about Live Migration is added to the commit message
> 
>  xen/arch/x86/hvm/vmx/vmx.c         | 90
> ++++++++++++++++++++++++++++++++++++++
>  xen/include/asm-x86/cpufeature.h   |  3 ++
>  xen/include/asm-x86/hvm/vmx/vmcs.h |  1 +
>  xen/include/asm-x86/msr-index.h    |  2 +
>  4 files changed, 96 insertions(+)
> 
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index 42f4fbd..3547b78 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -2284,6 +2284,8 @@ static void pi_notification_interrupt(struct 
> cpu_user_regs
> *regs)
>      raise_softirq(VCPU_KICK_SOFTIRQ);
>  }
> 
> +static void __init vmx_lbr_tsx_fixup_check(void);
> +
>  const struct hvm_function_table * __init start_vmx(void)
>  {
>      set_in_cr4(X86_CR4_VMXE);
> @@ -2353,6 +2355,8 @@ const struct hvm_function_table * __init start_vmx(void)
> 
>      setup_vmcs_dump();
> 
> +    vmx_lbr_tsx_fixup_check();
> +
>      return &vmx_function_table;
>  }
> 
> @@ -2513,6 +2517,14 @@ static int vmx_cr_access(unsigned long 
> exit_qualification)
>      return X86EMUL_OKAY;
>  }
> 
> +/* This defines the layout of struct lbr_info[] */
> +#define LBR_LASTINT_FROM_IDX    0
> +#define LBR_LASTINT_TO_IDX      1
> +#define LBR_LASTBRANCH_TOS_IDX  2
> +#define LBR_LASTBRANCH_FROM_IDX 3
> +#define LBR_LASTBRANCH_TO_IDX   4
> +#define LBR_LASTBRANCH_INFO     5
> +
>  static const struct lbr_info {
>      u32 base, count;
>  } p4_lbr[] = {
> @@ -2618,6 +2630,56 @@ static const struct lbr_info *last_branch_msr_get(void)
>      return NULL;
>  }
> 
> +enum
> +{
> +    LBR_FORMAT_32                 = 0x0, /* 32-bit record format */
> +    LBR_FORMAT_LIP                = 0x1, /* 64-bit LIP record format */
> +    LBR_FORMAT_EIP                = 0x2, /* 64-bit EIP record format */
> +    LBR_FORMAT_EIP_FLAGS          = 0x3, /* 64-bit EIP, Flags */
> +    LBR_FORMAT_EIP_FLAGS_TSX      = 0x4, /* 64-bit EIP, Flags, TSX */
> +    LBR_FORMAT_EIP_FLAGS_TSX_INFO = 0x5, /* 64-bit EIP, Flags, TSX, LBR_INFO 
> */
> +    LBR_FORMAT_EIP_FLAGS_CYCLES   = 0x6, /* 64-bit EIP, Flags, Cycles */
> +};
> +
> +#define LBR_FROM_SIGNEXT_2MSB  ((1ULL << 59) | (1ULL << 60))
> +
> +static bool __read_mostly vmx_lbr_tsx_fixup_needed;
> +static uint32_t __read_mostly lbr_from_start;
> +static uint32_t __read_mostly lbr_from_end;
> +static uint32_t __read_mostly lbr_lastint_from;
> +
> +static void __init vmx_lbr_tsx_fixup_check(void)
> +{
> +    bool tsx_support = cpu_has_hle || cpu_has_rtm;
> +    uint64_t caps;
> +    uint32_t lbr_format;
> +
> +    /* Fixup is needed only when TSX support is disabled ... */
> +    if ( tsx_support )
> +        return;

!tsx_support

> +
> +    if ( !cpu_has_pdcm )
> +        return;

combine two ifs into one

> +
> +    rdmsrl(MSR_IA32_PERF_CAPABILITIES, caps);
> +    lbr_format = caps & LBR_FORMAT_MSR_IA32_PERF_CAP;
> +
> +    /* ... and the address format of LBR includes TSX bits 61:62 */

what's "..."?

> +    if ( lbr_format == LBR_FORMAT_EIP_FLAGS_TSX )
> +    {
> +        const struct lbr_info *lbr = last_branch_msr_get();
> +
> +        if ( lbr == NULL )
> +            return;

move above check before rdmsrl. At least avoid one unnecessary
msr read when LBR is not available.

> +
> +        lbr_lastint_from = lbr[LBR_LASTINT_FROM_IDX].base;
> +        lbr_from_start = lbr[LBR_LASTBRANCH_FROM_IDX].base;
> +        lbr_from_end = lbr_from_start + lbr[LBR_LASTBRANCH_FROM_IDX].count;
> +
> +        vmx_lbr_tsx_fixup_needed = true;
> +    }
> +}
> +
>  static int is_last_branch_msr(u32 ecx)
>  {
>      const struct lbr_info *lbr = last_branch_msr_get();
> @@ -2876,7 +2938,11 @@ static int vmx_msr_write_intercept(unsigned int msr, 
> uint64_t
> msr_content)
>              for ( ; (rc == 0) && lbr->count; lbr++ )
>                  for ( i = 0; (rc == 0) && (i < lbr->count); i++ )
>                      if ( (rc = vmx_add_guest_msr(lbr->base + i)) == 0 )
> +                    {
>                          vmx_disable_intercept_for_msr(v, lbr->base + i, 
> MSR_TYPE_R |
> MSR_TYPE_W);
> +                        if ( vmx_lbr_tsx_fixup_needed )
> +                            v->arch.hvm_vmx.lbr_tsx_fixup_enabled = true;
> +                    }
>          }
> 
>          if ( (rc < 0) ||
> @@ -3897,6 +3963,27 @@ out:
>      }
>  }
> 
> +static void vmx_lbr_tsx_fixup(void)
> +{
> +    struct vcpu *curr = current;
> +    unsigned int msr_count = curr->arch.hvm_vmx.msr_count;
> +    struct vmx_msr_entry *msr_area = curr->arch.hvm_vmx.msr_area;
> +    struct vmx_msr_entry *msr;
> +
> +    if ( (msr = vmx_find_msr(lbr_from_start, VMX_GUEST_MSR)) != NULL )

Curious whether NULL is a valid situation here. If not we need handle it.
Otherwise please ignore this comment.

> +    {
> +        /*
> +         * Sign extend into bits 61:62 while preserving bit 63
> +         * The loop relies on the fact that MSR array is sorted.
> +         */
> +        for ( ; msr < msr_area + msr_count && msr->index < lbr_from_end; 
> msr++ )
> +            msr->data |= ((LBR_FROM_SIGNEXT_2MSB & msr->data) << 2);
> +    }
> +
> +    if ( (msr = vmx_find_msr(lbr_lastint_from, VMX_GUEST_MSR)) != NULL )
> +        msr->data |= ((LBR_FROM_SIGNEXT_2MSB & msr->data) << 2);
> +}
> +
>  void vmx_vmenter_helper(const struct cpu_user_regs *regs)
>  {
>      struct vcpu *curr = current;
> @@ -3953,6 +4040,9 @@ void vmx_vmenter_helper(const struct cpu_user_regs 
> *regs)
>      }
> 
>   out:
> +    if ( unlikely(curr->arch.hvm_vmx.lbr_tsx_fixup_enabled) )
> +        vmx_lbr_tsx_fixup();
> +
>      HVMTRACE_ND(VMENTRY, 0, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
> 
>      __vmwrite(GUEST_RIP,    regs->rip);
> diff --git a/xen/include/asm-x86/cpufeature.h 
> b/xen/include/asm-x86/cpufeature.h
> index 39ad582..3b187ac 100644
> --- a/xen/include/asm-x86/cpufeature.h
> +++ b/xen/include/asm-x86/cpufeature.h
> @@ -78,6 +78,9 @@
>  #define cpu_has_cmp_legacy   boot_cpu_has(X86_FEATURE_CMP_LEGACY)
>  #define cpu_has_tbm          boot_cpu_has(X86_FEATURE_TBM)
>  #define cpu_has_itsc         boot_cpu_has(X86_FEATURE_ITSC)
> +#define cpu_has_hle          boot_cpu_has(X86_FEATURE_HLE)
> +#define cpu_has_rtm          boot_cpu_has(X86_FEATURE_RTM)
> +#define cpu_has_pdcm         boot_cpu_has(X86_FEATURE_PDCM)
> 
>  enum _cache_type {
>      CACHE_TYPE_NULL = 0,
> diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h
> b/xen/include/asm-x86/hvm/vmx/vmcs.h
> index 903af51..2c9c3c9 100644
> --- a/xen/include/asm-x86/hvm/vmx/vmcs.h
> +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
> @@ -147,6 +147,7 @@ struct arch_vmx_struct {
>      uint8_t              vmx_realmode;
>      /* Are we emulating rather than VMENTERing? */
>      uint8_t              vmx_emulate;
> +    bool                 lbr_tsx_fixup_enabled;
>      /* Bitmask of segments that we can't safely use in virtual 8086 mode */
>      uint16_t             vm86_segment_mask;
>      /* Shadow CS, SS, DS, ES, FS, GS, TR while in virtual 8086 mode */
> diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
> index 98dbff1..8841419 100644
> --- a/xen/include/asm-x86/msr-index.h
> +++ b/xen/include/asm-x86/msr-index.h
> @@ -55,6 +55,8 @@
>  #define MSR_IA32_PEBS_ENABLE         0x000003f1
>  #define MSR_IA32_DS_AREA             0x00000600
>  #define MSR_IA32_PERF_CAPABILITIES   0x00000345
> +/* Lower 6 bits define the format of the address in the LBR stack */
> +#define LBR_FORMAT_MSR_IA32_PERF_CAP 0x3f
> 
>  #define MSR_IA32_BNDCFGS             0x00000d90
>  #define IA32_BNDCFGS_ENABLE          0x00000001
> --
> 2.9.3


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.