[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen master] x86/xsave: fix migration from xsave-capable to xsave-incapable host



commit 4cc1344447a0458df5d222960f2adf1b65084fa8
Author:     Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Mon Sep 9 14:36:54 2013 +0200
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Mon Sep 9 14:36:54 2013 +0200

    x86/xsave: fix migration from xsave-capable to xsave-incapable host
    
    With CPUID features suitably masked this is supposed to work, but was
    completely broken (i.e. the case wasn't even considered when the
    original xsave save/restore code was written).
    
    First of all, xsave_enabled() wrongly returned the value of
    cpu_has_xsave, i.e. not even taking into consideration attributes of
    the vCPU in question. Instead this function ought to check whether the
    guest ever enabled xsave support (by writing a [non-zero] value to
    XCR0). As a result of this, a vCPU's xcr0 and xcr0_accum must no longer
    be initialized to XSTATE_FP_SSE (since that's a valid value a guest
    could write to XCR0), and the xsave/xrstor as well as the context
    switch code need to suitably account for this (by always enforcing at
    least this part of the state to be saved/loaded).
    
    This involves undoing large parts of c/s 22945:13a7d1f7f62c ("x86: add
    strictly sanity check for XSAVE/XRSTOR") - we need to cleanly
    distinguish between hardware capabilities and vCPU used features.
    
    Next both HVM and PV save code needed tweaking to not always save the
    full state supported by the underlying hardware, but just the parts
    that the guest actually used. Similarly the restore code should bail
    not just on state being restored that the hardware cannot handle, but
    also on inconsistent save state (inconsistent XCR0 settings or size of
    saved state not in line with XCR0).
    
    And finally the PV extended context get/set code needs to use slightly
    different logic than the HVM one, as here we can't just key off of
    xsave_enabled() (i.e. avoid doing anything if a guest doesn't use
    xsave) because the tools use this function to determine host
    capabilities as well as read/write vCPU state. The set operation in
    particular needs to be capable of cleanly dealing with input that
    consists of only the xcr0 and xcr0_accum values (if they're both zero
    then no further data is required).
    
    While for things to work correctly both sides (saving _and_ restoring
    host) need to run with the fixed code, afaict no breakage should occur
    if either side isn't up to date (other than the breakage that this
    patch attempts to fix).
    
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
    Reviewed-by: Yang Zhang <yang.z.zhang@xxxxxxxxx>
    Acked-by: Keir Fraser <keir@xxxxxxx>
---
 xen/arch/x86/domain.c         |   12 ++++--
 xen/arch/x86/domctl.c         |   51 +++++++++++++---------
 xen/arch/x86/hvm/hvm.c        |   94 ++++++++++++++++++++++++++--------------
 xen/arch/x86/hvm/vmx/vmcs.c   |    3 +-
 xen/arch/x86/i387.c           |   21 +++++----
 xen/arch/x86/traps.c          |    4 +-
 xen/arch/x86/xstate.c         |   63 ++++++++++++++++++++++-----
 xen/include/asm-x86/domain.h  |    6 +-
 xen/include/asm-x86/hvm/hvm.h |    2 +-
 xen/include/asm-x86/xstate.h  |    4 +-
 10 files changed, 172 insertions(+), 88 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 874742c..f7b0308 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -618,7 +618,7 @@ unsigned long pv_guest_cr4_fixup(const struct vcpu *v, 
unsigned long guest_cr4)
         hv_cr4_mask &= ~X86_CR4_DE;
     if ( cpu_has_fsgsbase && !is_pv_32bit_domain(v->domain) )
         hv_cr4_mask &= ~X86_CR4_FSGSBASE;
-    if ( xsave_enabled(v) )
+    if ( cpu_has_xsave )
         hv_cr4_mask &= ~X86_CR4_OSXSAVE;
 
     if ( (guest_cr4 & hv_cr4_mask) != (hv_cr4 & hv_cr4_mask) )
@@ -1351,9 +1351,13 @@ static void __context_switch(void)
     if ( !is_idle_vcpu(n) )
     {
         memcpy(stack_regs, &n->arch.user_regs, CTXT_SWITCH_STACK_BYTES);
-        if ( xsave_enabled(n) && n->arch.xcr0 != get_xcr0() &&
-             !set_xcr0(n->arch.xcr0) )
-            BUG();
+        if ( cpu_has_xsave )
+        {
+            u64 xcr0 = n->arch.xcr0 ?: XSTATE_FP_SSE;
+
+            if ( xcr0 != get_xcr0() && !set_xcr0(xcr0) )
+                BUG();
+        }
         vcpu_restore_fpu_eager(n);
         n->arch.ctxt_switch_to(n);
     }
diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index c2a04c4..e75918a 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -1047,11 +1047,8 @@ long arch_do_domctl(
         struct xen_domctl_vcpuextstate *evc;
         struct vcpu *v;
         uint32_t offset = 0;
-        uint64_t _xfeature_mask = 0;
-        uint64_t _xcr0, _xcr0_accum;
-        void *receive_buf = NULL, *_xsave_area;
 
-#define PV_XSAVE_SIZE (2 * sizeof(uint64_t) + xsave_cntxt_size)
+#define PV_XSAVE_SIZE(xcr0) (2 * sizeof(uint64_t) + xstate_ctxt_size(xcr0))
 
         evc = &domctl->u.vcpuextstate;
 
@@ -1062,15 +1059,16 @@ long arch_do_domctl(
 
         if ( domctl->cmd == XEN_DOMCTL_getvcpuextstate )
         {
+            unsigned int size = PV_XSAVE_SIZE(v->arch.xcr0_accum);
+
             if ( !evc->size && !evc->xfeature_mask )
             {
                 evc->xfeature_mask = xfeature_mask;
-                evc->size = PV_XSAVE_SIZE;
+                evc->size = size;
                 ret = 0;
                 goto vcpuextstate_out;
             }
-            if ( evc->size != PV_XSAVE_SIZE ||
-                 evc->xfeature_mask != xfeature_mask )
+            if ( evc->size != size || evc->xfeature_mask != xfeature_mask )
             {
                 ret = -EINVAL;
                 goto vcpuextstate_out;
@@ -1093,7 +1091,7 @@ long arch_do_domctl(
             offset += sizeof(v->arch.xcr0_accum);
             if ( copy_to_guest_offset(domctl->u.vcpuextstate.buffer,
                                       offset, (void *)v->arch.xsave_area,
-                                      xsave_cntxt_size) )
+                                      size - 2 * sizeof(uint64_t)) )
             {
                 ret = -EFAULT;
                 goto vcpuextstate_out;
@@ -1101,13 +1099,14 @@ long arch_do_domctl(
         }
         else
         {
-            ret = -EINVAL;
+            void *receive_buf;
+            uint64_t _xcr0, _xcr0_accum;
+            const struct xsave_struct *_xsave_area;
 
-            _xfeature_mask = evc->xfeature_mask;
-            /* xsave context must be restored on compatible target CPUs */
-            if ( (_xfeature_mask & xfeature_mask) != _xfeature_mask )
-                goto vcpuextstate_out;
-            if ( evc->size > PV_XSAVE_SIZE || evc->size < 2 * sizeof(uint64_t) 
)
+            ret = -EINVAL;
+            if ( evc->size < 2 * sizeof(uint64_t) ||
+                 evc->size > 2 * sizeof(uint64_t) +
+                             xstate_ctxt_size(xfeature_mask) )
                 goto vcpuextstate_out;
 
             receive_buf = xmalloc_bytes(evc->size);
@@ -1128,20 +1127,30 @@ long arch_do_domctl(
             _xcr0_accum = *(uint64_t *)(receive_buf + sizeof(uint64_t));
             _xsave_area = receive_buf + 2 * sizeof(uint64_t);
 
-            if ( !(_xcr0 & XSTATE_FP) || _xcr0 & ~xfeature_mask )
+            if ( _xcr0_accum )
             {
-                xfree(receive_buf);
-                goto vcpuextstate_out;
+                if ( evc->size >= 2 * sizeof(uint64_t) + XSTATE_AREA_MIN_SIZE )
+                    ret = validate_xstate(_xcr0, _xcr0_accum,
+                                          _xsave_area->xsave_hdr.xstate_bv,
+                                          evc->xfeature_mask);
             }
-            if ( (_xcr0 & _xcr0_accum) != _xcr0 )
+            else if ( !_xcr0 )
+                ret = 0;
+            if ( ret )
             {
                 xfree(receive_buf);
                 goto vcpuextstate_out;
             }
 
-            v->arch.xcr0 = _xcr0;
-            v->arch.xcr0_accum = _xcr0_accum;
-            memcpy(v->arch.xsave_area, _xsave_area, evc->size - 2 * 
sizeof(uint64_t) );
+            if ( evc->size <= PV_XSAVE_SIZE(_xcr0_accum) )
+            {
+                v->arch.xcr0 = _xcr0;
+                v->arch.xcr0_accum = _xcr0_accum;
+                memcpy(v->arch.xsave_area, _xsave_area,
+                       evc->size - 2 * sizeof(uint64_t));
+            }
+            else
+                ret = -EINVAL;
 
             xfree(receive_buf);
         }
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 1fcaed0..ebf1838 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -906,14 +906,12 @@ static int hvm_load_cpu_ctxt(struct domain *d, 
hvm_domain_context_t *h)
     hvm_set_segment_register(v, x86_seg_ldtr, &seg);
 
     /* In case xsave-absent save file is restored on a xsave-capable host */
-    if ( xsave_enabled(v) )
+    if ( cpu_has_xsave && !xsave_enabled(v) )
     {
         struct xsave_struct *xsave_area = v->arch.xsave_area;
 
         memcpy(v->arch.xsave_area, ctxt.fpu_regs, sizeof(ctxt.fpu_regs));
         xsave_area->xsave_hdr.xstate_bv = XSTATE_FP_SSE;
-        v->arch.xcr0_accum = XSTATE_FP_SSE;
-        v->arch.xcr0 = XSTATE_FP_SSE;
     }
     else
         memcpy(v->arch.fpu_ctxt, ctxt.fpu_regs, sizeof(ctxt.fpu_regs));
@@ -957,7 +955,9 @@ static int hvm_load_cpu_ctxt(struct domain *d, 
hvm_domain_context_t *h)
 HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_cpu_ctxt, hvm_load_cpu_ctxt,
                           1, HVMSR_PER_VCPU);
 
-#define HVM_CPU_XSAVE_SIZE  (3 * sizeof(uint64_t) + xsave_cntxt_size)
+#define HVM_CPU_XSAVE_SIZE(xcr0) (offsetof(struct hvm_hw_cpu_xsave, \
+                                           save_area) + \
+                                  xstate_ctxt_size(xcr0))
 
 static int hvm_save_cpu_xsave_states(struct domain *d, hvm_domain_context_t *h)
 {
@@ -969,20 +969,20 @@ static int hvm_save_cpu_xsave_states(struct domain *d, 
hvm_domain_context_t *h)
 
     for_each_vcpu ( d, v )
     {
+        unsigned int size = HVM_CPU_XSAVE_SIZE(v->arch.xcr0_accum);
+
         if ( !xsave_enabled(v) )
             continue;
-        if ( _hvm_init_entry(h, CPU_XSAVE_CODE, v->vcpu_id, 
HVM_CPU_XSAVE_SIZE) )
+        if ( _hvm_init_entry(h, CPU_XSAVE_CODE, v->vcpu_id, size) )
             return 1;
         ctxt = (struct hvm_hw_cpu_xsave *)&h->data[h->cur];
-        h->cur += HVM_CPU_XSAVE_SIZE;
-        memset(ctxt, 0, HVM_CPU_XSAVE_SIZE);
+        h->cur += size;
 
         ctxt->xfeature_mask = xfeature_mask;
         ctxt->xcr0 = v->arch.xcr0;
         ctxt->xcr0_accum = v->arch.xcr0_accum;
-        if ( v->fpu_initialised )
-            memcpy(&ctxt->save_area,
-                v->arch.xsave_area, xsave_cntxt_size);
+        memcpy(&ctxt->save_area, v->arch.xsave_area,
+               size - offsetof(struct hvm_hw_cpu_xsave, save_area));
     }
 
     return 0;
@@ -990,11 +990,11 @@ static int hvm_save_cpu_xsave_states(struct domain *d, 
hvm_domain_context_t *h)
 
 static int hvm_load_cpu_xsave_states(struct domain *d, hvm_domain_context_t *h)
 {
-    int vcpuid;
+    unsigned int vcpuid, size;
+    int err;
     struct vcpu *v;
     struct hvm_hw_cpu_xsave *ctxt;
     struct hvm_save_descriptor *desc;
-    uint64_t _xfeature_mask;
 
     /* Which vcpu is this? */
     vcpuid = hvm_load_instance(h);
@@ -1006,47 +1006,74 @@ static int hvm_load_cpu_xsave_states(struct domain *d, 
hvm_domain_context_t *h)
     }
 
     /* Fails since we can't restore an img saved on xsave-capable host. */
-    if ( !xsave_enabled(v) )
-        return -EINVAL;
+    if ( !cpu_has_xsave )
+        return -EOPNOTSUPP;
 
     /* Customized checking for entry since our entry is of variable length */
     desc = (struct hvm_save_descriptor *)&h->data[h->cur];
     if ( sizeof (*desc) > h->size - h->cur)
     {
         printk(XENLOG_G_WARNING
-               "HVM%d restore: not enough data left to read descriptor"
-               "for type %u\n", d->domain_id, CPU_XSAVE_CODE);
-        return -1;
+               "HVM%d.%d restore: not enough data left to read xsave 
descriptor\n",
+               d->domain_id, vcpuid);
+        return -ENODATA;
     }
     if ( desc->length + sizeof (*desc) > h->size - h->cur)
     {
         printk(XENLOG_G_WARNING
-               "HVM%d restore: not enough data left to read %u bytes "
-               "for type %u\n", d->domain_id, desc->length, CPU_XSAVE_CODE);
-        return -1;
+               "HVM%d.%d restore: not enough data left to read %u xsave 
bytes\n",
+               d->domain_id, vcpuid, desc->length);
+        return -ENODATA;
+    }
+    if ( desc->length < offsetof(struct hvm_hw_cpu_xsave, save_area) +
+                        XSTATE_AREA_MIN_SIZE )
+    {
+        printk(XENLOG_G_WARNING
+               "HVM%d.%d restore mismatch: xsave length %u < %zu\n",
+               d->domain_id, vcpuid, desc->length,
+               offsetof(struct hvm_hw_cpu_xsave,
+                        save_area) + XSTATE_AREA_MIN_SIZE);
+        return -EINVAL;
     }
-    if ( CPU_XSAVE_CODE != desc->typecode || (desc->length > 
HVM_CPU_XSAVE_SIZE) )
+    size = HVM_CPU_XSAVE_SIZE(xfeature_mask);
+    if ( desc->length > size )
     {
         printk(XENLOG_G_WARNING
-               "HVM%d restore mismatch: expected type %u with max length %u, "
-               "saw type %u length %u\n", d->domain_id, CPU_XSAVE_CODE,
-               (unsigned int)HVM_CPU_XSAVE_SIZE,
-               desc->typecode, desc->length);
-        return -1;
+               "HVM%d.%d restore mismatch: xsave length %u > %u\n",
+               d->domain_id, vcpuid, desc->length, size);
+        return -EOPNOTSUPP;
     }
     h->cur += sizeof (*desc);
-    /* Checking finished */
 
     ctxt = (struct hvm_hw_cpu_xsave *)&h->data[h->cur];
     h->cur += desc->length;
 
-    _xfeature_mask = ctxt->xfeature_mask;
-    if ( (_xfeature_mask & xfeature_mask) != _xfeature_mask )
-        return -EINVAL;
+    err = validate_xstate(ctxt->xcr0, ctxt->xcr0_accum,
+                          ctxt->save_area.xsave_hdr.xstate_bv,
+                          ctxt->xfeature_mask);
+    if ( err )
+    {
+        printk(XENLOG_G_WARNING
+               "HVM%d.%d restore: inconsistent xsave state (feat=%#"PRIx64
+               " accum=%#"PRIx64" xcr0=%#"PRIx64" bv=%#"PRIx64" err=%d)\n",
+               d->domain_id, vcpuid, ctxt->xfeature_mask, ctxt->xcr0_accum,
+               ctxt->xcr0, ctxt->save_area.xsave_hdr.xstate_bv, err);
+        return err;
+    }
+    size = HVM_CPU_XSAVE_SIZE(ctxt->xcr0_accum);
+    if ( desc->length > size )
+    {
+        printk(XENLOG_G_WARNING
+               "HVM%d.%d restore mismatch: xsave length %u > %u\n",
+               d->domain_id, vcpuid, desc->length, size);
+        return -EOPNOTSUPP;
+    }
+    /* Checking finished */
 
     v->arch.xcr0 = ctxt->xcr0;
     v->arch.xcr0_accum = ctxt->xcr0_accum;
-    memcpy(v->arch.xsave_area, &ctxt->save_area, xsave_cntxt_size);
+    memcpy(v->arch.xsave_area, &ctxt->save_area,
+           desc->length - offsetof(struct hvm_hw_cpu_xsave, save_area));
 
     return 0;
 }
@@ -1060,7 +1087,8 @@ static int __init 
__hvm_register_CPU_XSAVE_save_and_restore(void)
                         "CPU_XSAVE",
                         hvm_save_cpu_xsave_states,
                         hvm_load_cpu_xsave_states,
-                        HVM_CPU_XSAVE_SIZE + sizeof (struct 
hvm_save_descriptor),
+                        HVM_CPU_XSAVE_SIZE(xfeature_mask) +
+                            sizeof(struct hvm_save_descriptor),
                         HVMSR_PER_VCPU);
     return 0;
 }
@@ -2767,7 +2795,7 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, 
unsigned int *ebx,
             __clear_bit(X86_FEATURE_APIC & 31, edx);
 
         /* Fix up OSXSAVE. */
-        if ( xsave_enabled(v) )
+        if ( cpu_has_xsave )
             *ecx |= (v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_OSXSAVE) ?
                      cpufeat_mask(X86_FEATURE_OSXSAVE) : 0;
 
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 0b40b54..58d38b1 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -944,8 +944,7 @@ static int construct_vmcs(struct vcpu *v)
     /* Host control registers. */
     v->arch.hvm_vmx.host_cr0 = read_cr0() | X86_CR0_TS;
     __vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0);
-    __vmwrite(HOST_CR4,
-              mmu_cr4_features | (xsave_enabled(v) ? X86_CR4_OSXSAVE : 0));
+    __vmwrite(HOST_CR4, mmu_cr4_features);
 
     /* Host CS:RIP. */
     __vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS);
diff --git a/xen/arch/x86/i387.c b/xen/arch/x86/i387.c
index 1230a52..7649274 100644
--- a/xen/arch/x86/i387.c
+++ b/xen/arch/x86/i387.c
@@ -38,14 +38,15 @@ static inline void fpu_xrstor(struct vcpu *v, uint64_t mask)
 {
     bool_t ok;
 
+    ASSERT(v->arch.xsave_area);
     /*
      * XCR0 normally represents what guest OS set. In case of Xen itself, 
-     * we set all supported feature mask before doing save/restore.
+     * we set the accumulated feature mask before doing save/restore.
      */
-    ok = set_xcr0(v->arch.xcr0_accum);
+    ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE);
     ASSERT(ok);
     xrstor(v, mask);
-    ok = set_xcr0(v->arch.xcr0);
+    ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE);
     ASSERT(ok);
 }
 
@@ -137,13 +138,15 @@ static inline void fpu_xsave(struct vcpu *v)
 {
     bool_t ok;
 
-    /* XCR0 normally represents what guest OS set. In case of Xen itself,
-     * we set all accumulated feature mask before doing save/restore.
+    ASSERT(v->arch.xsave_area);
+    /*
+     * XCR0 normally represents what guest OS set. In case of Xen itself,
+     * we set the accumulated feature mask before doing save/restore.
      */
-    ok = set_xcr0(v->arch.xcr0_accum);
+    ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE);
     ASSERT(ok);
     xsave(v, v->arch.nonlazy_xstate_used ? XSTATE_ALL : XSTATE_LAZY);
-    ok = set_xcr0(v->arch.xcr0);
+    ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE);
     ASSERT(ok);
 }
 
@@ -232,7 +235,7 @@ void vcpu_restore_fpu_lazy(struct vcpu *v)
     if ( v->fpu_dirtied )
         return;
 
-    if ( xsave_enabled(v) )
+    if ( cpu_has_xsave )
         fpu_xrstor(v, XSTATE_LAZY);
     else if ( v->fpu_initialised )
     {
@@ -262,7 +265,7 @@ void vcpu_save_fpu(struct vcpu *v)
     /* This can happen, if a paravirtualised guest OS has set its CR0.TS. */
     clts();
 
-    if ( xsave_enabled(v) )
+    if ( cpu_has_xsave )
         fpu_xsave(v);
     else if ( cpu_has_fxsr )
         fpu_fxsave(v);
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 9db42c82..72e8566 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -817,7 +817,7 @@ static void pv_cpuid(struct cpu_user_regs *regs)
         __clear_bit(X86_FEATURE_PDCM % 32, &c);
         __clear_bit(X86_FEATURE_PCID % 32, &c);
         __clear_bit(X86_FEATURE_DCA % 32, &c);
-        if ( !xsave_enabled(current) )
+        if ( !cpu_has_xsave )
         {
             __clear_bit(X86_FEATURE_XSAVE % 32, &c);
             __clear_bit(X86_FEATURE_AVX % 32, &c);
@@ -842,7 +842,7 @@ static void pv_cpuid(struct cpu_user_regs *regs)
         break;
 
     case 0x0000000d: /* XSAVE */
-        if ( !xsave_enabled(current) )
+        if ( !cpu_has_xsave )
             goto unsupported;
         break;
 
diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c
index 7e459d0..c6b8dcc 100644
--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -21,7 +21,7 @@ bool_t __read_mostly cpu_has_xsaveopt;
  * the supported and enabled features on the processor, including the
  * XSAVE.HEADER. We only enable XCNTXT_MASK that we have known.
  */
-u32 xsave_cntxt_size;
+static u32 __read_mostly xsave_cntxt_size;
 
 /* A 64-bit bitmask of the XSAVE/XRSTOR features supported by processor. */
 u64 xfeature_mask;
@@ -206,13 +206,13 @@ void xrstor(struct vcpu *v, uint64_t mask)
 
 bool_t xsave_enabled(const struct vcpu *v)
 {
-    if ( cpu_has_xsave )
-    {
-        ASSERT(xsave_cntxt_size >= XSTATE_AREA_MIN_SIZE);
-        ASSERT(v->arch.xsave_area);
-    }
+    if ( !cpu_has_xsave )
+        return 0;
 
-    return cpu_has_xsave;      
+    ASSERT(xsave_cntxt_size >= XSTATE_AREA_MIN_SIZE);
+    ASSERT(v->arch.xsave_area);
+
+    return !!v->arch.xcr0_accum;
 }
 
 int xstate_alloc_save_area(struct vcpu *v)
@@ -238,8 +238,8 @@ int xstate_alloc_save_area(struct vcpu *v)
     save_area->fpu_sse.mxcsr = MXCSR_DEFAULT;
 
     v->arch.xsave_area = save_area;
-    v->arch.xcr0 = XSTATE_FP_SSE;
-    v->arch.xcr0_accum = XSTATE_FP_SSE;
+    v->arch.xcr0 = 0;
+    v->arch.xcr0_accum = 0;
 
     return 0;
 }
@@ -257,7 +257,11 @@ void xstate_init(bool_t bsp)
     u64 feature_mask;
 
     if ( boot_cpu_data.cpuid_level < XSTATE_CPUID )
+    {
+        BUG_ON(!bsp);
+        setup_clear_cpu_cap(X86_FEATURE_XSAVE);
         return;
+    }
 
     cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
 
@@ -277,7 +281,6 @@ void xstate_init(bool_t bsp)
     set_in_cr4(X86_CR4_OSXSAVE);
     if ( !set_xcr0(feature_mask) )
         BUG();
-    cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
 
     if ( bsp )
     {
@@ -286,14 +289,14 @@ void xstate_init(bool_t bsp)
          * xsave_cntxt_size is the max size required by enabled features.
          * We know FP/SSE and YMM about eax, and nothing about edx at present.
          */
-        xsave_cntxt_size = ebx;
+        xsave_cntxt_size = xstate_ctxt_size(feature_mask);
         printk("%s: using cntxt_size: %#x and states: %#"PRIx64"\n",
             __func__, xsave_cntxt_size, xfeature_mask);
     }
     else
     {
         BUG_ON(xfeature_mask != feature_mask);
-        BUG_ON(xsave_cntxt_size != ebx);
+        BUG_ON(xsave_cntxt_size != xstate_ctxt_size(feature_mask));
     }
 
     /* Check XSAVEOPT feature. */
@@ -304,6 +307,42 @@ void xstate_init(bool_t bsp)
         BUG_ON(!cpu_has_xsaveopt != !(eax & XSTATE_FEATURE_XSAVEOPT));
 }
 
+unsigned int xstate_ctxt_size(u64 xcr0)
+{
+    u32 ebx = 0;
+
+    if ( xcr0 )
+    {
+        u64 act_xcr0 = get_xcr0();
+        u32 eax, ecx, edx;
+        bool_t ok = set_xcr0(xcr0);
+
+        ASSERT(ok);
+        cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+        ASSERT(ebx <= ecx);
+        ok = set_xcr0(act_xcr0);
+        ASSERT(ok);
+    }
+
+    return ebx;
+}
+
+int validate_xstate(u64 xcr0, u64 xcr0_accum, u64 xstate_bv, u64 xfeat_mask)
+{
+    if ( (xcr0_accum & ~xfeat_mask) ||
+         (xstate_bv & ~xcr0_accum) ||
+         (xcr0 & ~xcr0_accum) ||
+         !(xcr0 & XSTATE_FP) ||
+         ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE)) ||
+         ((xcr0_accum & XSTATE_YMM) && !(xcr0_accum & XSTATE_SSE)) )
+        return -EINVAL;
+
+    if ( xcr0_accum & ~xfeature_mask )
+        return -EOPNOTSUPP;
+
+    return 0;
+}
+
 int handle_xsetbv(u32 index, u64 new_bv)
 {
     struct vcpu *curr = current;
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index d79464d..909f449 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -456,9 +456,9 @@ unsigned long pv_guest_cr4_fixup(const struct vcpu *, 
unsigned long guest_cr4);
 #define pv_guest_cr4_to_real_cr4(v)                         \
     (((v)->arch.pv_vcpu.ctrlreg[4]                          \
       | (mmu_cr4_features                                   \
-         & (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP))      \
-      | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0)          \
-      | ((xsave_enabled(v))? X86_CR4_OSXSAVE : 0))          \
+         & (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP |      \
+            X86_CR4_OSXSAVE))                               \
+      | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0))         \
      & ~X86_CR4_DE)
 #define real_cr4_to_pv_guest_cr4(c)                         \
     ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD        \
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 083f813..3376418 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -368,7 +368,7 @@ static inline int hvm_event_pending(struct vcpu *v)
         ((nestedhvm_enabled((_v)->domain) && cpu_has_vmx)\
                       ? X86_CR4_VMXE : 0)  |             \
         (cpu_has_pcid ? X86_CR4_PCIDE : 0) |             \
-        (xsave_enabled(_v) ? X86_CR4_OSXSAVE : 0))))
+        (cpu_has_xsave ? X86_CR4_OSXSAVE : 0))))
 
 /* These exceptions must always be intercepted. */
 #define HVM_TRAP_MASK ((1U << TRAP_machine_check) | (1U << TRAP_invalid_op))
diff --git a/xen/include/asm-x86/xstate.h b/xen/include/asm-x86/xstate.h
index 9bea08b..5617963 100644
--- a/xen/include/asm-x86/xstate.h
+++ b/xen/include/asm-x86/xstate.h
@@ -34,7 +34,6 @@
 #define XSTATE_NONLAZY (XSTATE_LWP)
 #define XSTATE_LAZY    (XSTATE_ALL & ~XSTATE_NONLAZY)
 
-extern unsigned int xsave_cntxt_size;
 extern u64 xfeature_mask;
 
 /* extended state save area */
@@ -77,11 +76,14 @@ uint64_t get_xcr0(void);
 void xsave(struct vcpu *v, uint64_t mask);
 void xrstor(struct vcpu *v, uint64_t mask);
 bool_t xsave_enabled(const struct vcpu *v);
+int __must_check validate_xstate(u64 xcr0, u64 xcr0_accum, u64 xstate_bv,
+                                 u64 xfeat_mask);
 int __must_check handle_xsetbv(u32 index, u64 new_bv);
 
 /* extended state init and cleanup functions */
 void xstate_free_save_area(struct vcpu *v);
 int xstate_alloc_save_area(struct vcpu *v);
 void xstate_init(bool_t bsp);
+unsigned int xstate_ctxt_size(u64 xcr0);
 
 #endif /* __ASM_XSTATE_H */
--
generated by git-patchbot for /home/xen/git/xen.git#master

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.