[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] x86: Xsave support for PV guests.



# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1288772120 0
# Node ID a3ec4b3b685e05e9890c37be0763b3c33f6dc722
# Parent  ee4d52f0d16a91583d511c18aa679a7fec6be86c
x86: Xsave support for PV guests.

Signed-off-by: Shan Haitao <haitao.shan@xxxxxxxxx>
Signed-off-by: Han Weidong <weidong.han@xxxxxxxxx>
---
 tools/libxc/xc_cpuid_x86.c     |    1 
 xen/arch/x86/domain.c          |   30 +++++++++++++++++++++++-
 xen/arch/x86/hvm/hvm.c         |   13 ----------
 xen/arch/x86/hvm/vmx/vmx.c     |    8 ++----
 xen/arch/x86/i387.c            |   16 ++++++++----
 xen/arch/x86/traps.c           |   51 ++++++++++++++++++++++++++++++++++-------
 xen/include/asm-x86/domain.h   |   20 +++++++++++++++-
 xen/include/asm-x86/hvm/vcpu.h |    9 -------
 xen/include/asm-x86/i387.h     |   22 +++++++++++++----
 9 files changed, 122 insertions(+), 48 deletions(-)

diff -r ee4d52f0d16a -r a3ec4b3b685e tools/libxc/xc_cpuid_x86.c
--- a/tools/libxc/xc_cpuid_x86.c        Tue Nov 02 07:35:52 2010 +0000
+++ b/tools/libxc/xc_cpuid_x86.c        Wed Nov 03 08:15:20 2010 +0000
@@ -323,7 +323,6 @@ static void xc_cpuid_pv_policy(
         clear_bit(X86_FEATURE_XTPR, regs[2]);
         clear_bit(X86_FEATURE_PDCM, regs[2]);
         clear_bit(X86_FEATURE_DCA, regs[2]);
-        clear_bit(X86_FEATURE_XSAVE, regs[2]);
         set_bit(X86_FEATURE_HYPERVISOR, regs[2]);
         break;
     case 0x80000001:
diff -r ee4d52f0d16a -r a3ec4b3b685e xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Tue Nov 02 07:35:52 2010 +0000
+++ b/xen/arch/x86/domain.c     Wed Nov 03 08:15:20 2010 +0000
@@ -343,10 +343,26 @@ int vcpu_initialise(struct vcpu *v)
 
     paging_vcpu_init(v);
 
+    if ( cpu_has_xsave )
+    {
+        /* XSAVE/XRSTOR requires the save area be 64-byte-boundary aligned. */
+        void *xsave_area = _xmalloc(xsave_cntxt_size, 64);
+        if ( xsave_area == NULL )
+            return -ENOMEM;
+
+        xsave_init_save_area(xsave_area);
+        v->arch.xsave_area = xsave_area;
+        v->arch.xcr0 = XSTATE_FP_SSE;
+        v->arch.xcr0_accum = XSTATE_FP_SSE;
+    }
+
     if ( is_hvm_domain(d) )
     {
         if ( (rc = hvm_vcpu_initialise(v)) != 0 )
+        {
+            xfree(v->arch.xsave_area);
             return rc;
+        }
     }
     else
     {
@@ -376,13 +392,21 @@ int vcpu_initialise(struct vcpu *v)
 
     spin_lock_init(&v->arch.shadow_ldt_lock);
 
-    return (is_pv_32on64_vcpu(v) ? setup_compat_l4(v) : 0);
+    rc = 0;
+    if ( is_pv_32on64_vcpu(v) )
+        rc = setup_compat_l4(v);
+    if ( !rc )
+        xfree(v->arch.xsave_area);
+
+    return rc;
 }
 
 void vcpu_destroy(struct vcpu *v)
 {
     if ( is_pv_32on64_vcpu(v) )
         release_compat_l4(v);
+
+    xfree(v->arch.xsave_area);
 
     if ( is_hvm_vcpu(v) )
         hvm_vcpu_destroy(v);
@@ -592,6 +616,8 @@ unsigned long pv_guest_cr4_fixup(const s
         hv_cr4_mask &= ~X86_CR4_DE;
     if ( cpu_has_fsgsbase && !is_pv_32bit_domain(v->domain) )
         hv_cr4_mask &= ~X86_CR4_FSGSBASE;
+    if ( cpu_has_xsave )
+        hv_cr4_mask &= ~X86_CR4_OSXSAVE;
 
     if ( (guest_cr4 & hv_cr4_mask) != (hv_cr4 & hv_cr4_mask) )
         gdprintk(XENLOG_WARNING,
@@ -1367,6 +1393,8 @@ static void __context_switch(void)
         memcpy(stack_regs,
                &n->arch.guest_context.user_regs,
                CTXT_SWITCH_STACK_BYTES);
+        if ( cpu_has_xsave && n->arch.xcr0 != get_xcr0() )
+            set_xcr0(n->arch.xcr0);
         n->arch.ctxt_switch_to(n);
     }
 
diff -r ee4d52f0d16a -r a3ec4b3b685e xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Tue Nov 02 07:35:52 2010 +0000
+++ b/xen/arch/x86/hvm/hvm.c    Wed Nov 03 08:15:20 2010 +0000
@@ -805,18 +805,6 @@ int hvm_vcpu_initialise(struct vcpu *v)
 
     hvm_asid_flush_vcpu(v);
 
-    if ( cpu_has_xsave )
-    {
-        /* XSAVE/XRSTOR requires the save area be 64-byte-boundary aligned. */
-        void *xsave_area = _xmalloc(xsave_cntxt_size, 64);
-        if ( xsave_area == NULL )
-            return -ENOMEM;
-
-        xsave_init_save_area(xsave_area);
-        v->arch.hvm_vcpu.xsave_area = xsave_area;
-        v->arch.hvm_vcpu.xcr0 = XSTATE_FP_SSE;
-    }
-
     if ( (rc = vlapic_init(v)) != 0 )
         goto fail1;
 
@@ -879,7 +867,6 @@ void hvm_vcpu_destroy(struct vcpu *v)
     hvm_vcpu_cacheattr_destroy(v);
     vlapic_destroy(v);
     hvm_funcs.vcpu_destroy(v);
-    xfree(v->arch.hvm_vcpu.xsave_area);
 
     /* Event channel is already freed by evtchn_destroy(). */
     /*free_xen_event_channel(v, v->arch.hvm_vcpu.xen_port);*/
diff -r ee4d52f0d16a -r a3ec4b3b685e xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Tue Nov 02 07:35:52 2010 +0000
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Wed Nov 03 08:15:20 2010 +0000
@@ -652,10 +652,7 @@ static void vmx_ctxt_switch_to(struct vc
     struct domain *d = v->domain;
     unsigned long old_cr4 = read_cr4(), new_cr4 = mmu_cr4_features;
 
-    /* HOST_CR4 in VMCS is always mmu_cr4_features and
-     * CR4_OSXSAVE(if supported). Sync CR4 now. */
-    if ( cpu_has_xsave )
-        new_cr4 |= X86_CR4_OSXSAVE;
+    /* HOST_CR4 in VMCS is always mmu_cr4_features. Sync CR4 now. */
     if ( old_cr4 != new_cr4 )
         write_cr4(new_cr4);
 
@@ -2215,7 +2212,8 @@ static int vmx_handle_xsetbv(u64 new_bv)
     if ( (xfeature_mask & XSTATE_YMM & new_bv) && !(new_bv & XSTATE_SSE) )
         goto err;
 
-    v->arch.hvm_vcpu.xcr0 = new_bv;
+    v->arch.xcr0 = new_bv;
+    v->arch.xcr0_accum |= new_bv;
     set_xcr0(new_bv);
     return 0;
 err:
diff -r ee4d52f0d16a -r a3ec4b3b685e xen/arch/x86/i387.c
--- a/xen/arch/x86/i387.c       Tue Nov 02 07:35:52 2010 +0000
+++ b/xen/arch/x86/i387.c       Wed Nov 03 08:15:20 2010 +0000
@@ -33,9 +33,14 @@ void save_init_fpu(struct vcpu *v)
     if ( cr0 & X86_CR0_TS )
         clts();
 
-    if ( cpu_has_xsave && is_hvm_vcpu(v) )
-    {
+    if ( cpu_has_xsave )
+    {
+        /* XCR0 normally represents what guest OS set. In case of Xen itself,
+         * we set all accumulated feature mask before doing save/restore.
+         */
+        set_xcr0(v->arch.xcr0_accum);
         xsave(v);
+        set_xcr0(v->arch.xcr0);
     }
     else if ( cpu_has_fxsr )
     {
@@ -144,6 +149,9 @@ u32 xsave_cntxt_size;
 /* A 64-bit bitmask of the XSAVE/XRSTOR features supported by processor. */
 u64 xfeature_mask;
 
+/* Cached xcr0 for fast read */
+DEFINE_PER_CPU(uint64_t, xcr0);
+
 void xsave_init(void)
 {
     u32 eax, ebx, ecx, edx;
@@ -171,13 +179,11 @@ void xsave_init(void)
     BUG_ON(ecx < min_size);
 
     /*
-     * We will only enable the features we know for hvm guest. Here we use
-     * set/clear CR4_OSXSAVE and re-run cpuid to get xsave_cntxt_size.
+     * Set CR4_OSXSAVE and run "cpuid" to get xsave_cntxt_size.
      */
     set_in_cr4(X86_CR4_OSXSAVE);
     set_xcr0(eax & XCNTXT_MASK);
     cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
-    clear_in_cr4(X86_CR4_OSXSAVE);
 
     if ( cpu == 0 )
     {
diff -r ee4d52f0d16a -r a3ec4b3b685e xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Tue Nov 02 07:35:52 2010 +0000
+++ b/xen/arch/x86/traps.c      Wed Nov 03 08:15:20 2010 +0000
@@ -795,7 +795,6 @@ static void pv_cpuid(struct cpu_user_reg
         __clear_bit(X86_FEATURE_XTPR % 32, &c);
         __clear_bit(X86_FEATURE_PDCM % 32, &c);
         __clear_bit(X86_FEATURE_DCA % 32, &c);
-        __clear_bit(X86_FEATURE_XSAVE % 32, &c);
         if ( !cpu_has_apic )
            __clear_bit(X86_FEATURE_X2APIC % 32, &c);
         __set_bit(X86_FEATURE_HYPERVISOR % 32, &c);
@@ -1715,7 +1714,7 @@ static int emulate_privileged_op(struct 
     enum { lm_seg_none, lm_seg_fs, lm_seg_gs } lm_ovr = lm_seg_none;
     int rc;
     unsigned int port, i, data_sel, ar, data, bpmatch = 0;
-    unsigned int op_bytes, op_default, ad_bytes, ad_default;
+    unsigned int op_bytes, op_default, ad_bytes, ad_default, opsize_prefix= 0;
 #define rd_ad(reg) (ad_bytes >= sizeof(regs->reg) \
                     ? regs->reg \
                     : ad_bytes == 4 \
@@ -1751,6 +1750,7 @@ static int emulate_privileged_op(struct 
         switch ( opcode = insn_fetch(u8, code_base, eip, code_limit) )
         {
         case 0x66: /* operand-size override */
+            opsize_prefix = 1;
             op_bytes = op_default ^ 6; /* switch between 2/4 bytes */
             continue;
         case 0x67: /* address-size override */
@@ -2051,13 +2051,48 @@ static int emulate_privileged_op(struct 
         goto fail;
     switch ( opcode )
     {
-    case 0x1: /* RDTSCP */
-        if ( (v->arch.guest_context.ctrlreg[4] & X86_CR4_TSD) &&
-             !guest_kernel_mode(v, regs) )
+    case 0x1: /* RDTSCP and XSETBV */
+        switch ( insn_fetch(u8, code_base, eip, code_limit) )
+        {
+        case 0xf9: /* RDTSCP */
+            if ( (v->arch.guest_context.ctrlreg[4] & X86_CR4_TSD) &&
+                 !guest_kernel_mode(v, regs) )
+                goto fail;
+            pv_soft_rdtsc(v, regs, 1);
+            break;
+        case 0xd1: /* XSETBV */
+        {
+            u64 new_xfeature = (u32)regs->eax | ((u64)regs->edx << 32);
+
+            if ( lock || rep_prefix || opsize_prefix
+                 || !(v->arch.guest_context.ctrlreg[4] & X86_CR4_OSXSAVE) )
+            {
+                do_guest_trap(TRAP_invalid_op, regs, 0);
+                goto skip;
+            }
+
+            if ( !guest_kernel_mode(v, regs) )
+                goto fail;
+
+            switch ( (u32)regs->ecx )
+            {
+                case XCR_XFEATURE_ENABLED_MASK:
+                    /* bit 0 of XCR0 must be set and reserved bit must not be 
set */
+                    if ( !(new_xfeature & XSTATE_FP) || (new_xfeature & 
~xfeature_mask) )
+                        goto fail;
+
+                    v->arch.xcr0 = new_xfeature;
+                    v->arch.xcr0_accum |= new_xfeature;
+                    set_xcr0(new_xfeature);
+                    break;
+                default:
+                    goto fail;
+            }
+            break;
+        }
+        default:
             goto fail;
-        if ( insn_fetch(u8, code_base, eip, code_limit) != 0xf9 )
-            goto fail;
-        pv_soft_rdtsc(v, regs, 1);
+        }
         break;
 
     case 0x06: /* CLTS */
diff -r ee4d52f0d16a -r a3ec4b3b685e xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Tue Nov 02 07:35:52 2010 +0000
+++ b/xen/include/asm-x86/domain.h      Wed Nov 03 08:15:20 2010 +0000
@@ -400,6 +400,23 @@ struct arch_vcpu
     pagetable_t monitor_table;          /* (MFN) hypervisor PT (for HVM) */
     unsigned long cr3;                  /* (MA) value to install in HW CR3 */
 
+    /*
+     * The save area for Processor Extended States and the bitmask of the
+     * XSAVE/XRSTOR features. They are used by: 1) when a vcpu (which has
+     * dirtied FPU/SSE) is scheduled out we XSAVE the states here; 2) in
+     * #NM handler, we XRSTOR the states we XSAVE-ed;
+     */
+    void *xsave_area;
+    uint64_t xcr0;
+    /* Accumulated eXtended features mask for using XSAVE/XRESTORE by Xen
+     * itself, as we can never know whether guest OS depends on content
+     * preservation whenever guest OS clears one feature flag (for example,
+     * temporarily).
+     * However, processor should not be able to touch eXtended states before
+     * it explicitly enables it via xcr0.
+     */
+    uint64_t xcr0_accum;
+
     /* Current LDT details. */
     unsigned long shadow_ldt_mapcnt;
     spinlock_t shadow_ldt_lock;
@@ -435,7 +452,8 @@ unsigned long pv_guest_cr4_fixup(const s
 #define pv_guest_cr4_to_real_cr4(v)                         \
     (((v)->arch.guest_context.ctrlreg[4]                    \
       | (mmu_cr4_features & (X86_CR4_PGE | X86_CR4_PSE))    \
-      | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0))         \
+      | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0)         \
+      | ((cpu_has_xsave)? X86_CR4_OSXSAVE : 0))              \
       & ~X86_CR4_DE)
 #define real_cr4_to_pv_guest_cr4(c) \
     ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD | X86_CR4_OSXSAVE))
diff -r ee4d52f0d16a -r a3ec4b3b685e xen/include/asm-x86/hvm/vcpu.h
--- a/xen/include/asm-x86/hvm/vcpu.h    Tue Nov 02 07:35:52 2010 +0000
+++ b/xen/include/asm-x86/hvm/vcpu.h    Wed Nov 03 08:15:20 2010 +0000
@@ -48,15 +48,6 @@ struct hvm_vcpu {
      *  CR3:      Always used and kept up to date by paging subsystem.
      */
     unsigned long       hw_cr[5];
-
-    /*
-     * The save area for Processor Extended States and the bitmask of the
-     * XSAVE/XRSTOR features. They are used by: 1) when a vcpu (which has
-     * dirtied FPU/SSE) is scheduled out we XSAVE the states here; 2) in
-     * #NM handler, we XRSTOR the states we XSAVE-ed;
-     */
-    void *xsave_area;
-    uint64_t xcr0;
 
     struct vlapic       vlapic;
     s64                 cache_tsc_offset;
diff -r ee4d52f0d16a -r a3ec4b3b685e xen/include/asm-x86/i387.h
--- a/xen/include/asm-x86/i387.h        Tue Nov 02 07:35:52 2010 +0000
+++ b/xen/include/asm-x86/i387.h        Wed Nov 03 08:15:20 2010 +0000
@@ -49,6 +49,8 @@ struct xsave_struct
 #define REX_PREFIX
 #endif
 
+DECLARE_PER_CPU(uint64_t, xcr0);
+
 static inline void xsetbv(u32 index, u64 xfeatures)
 {
     u32 hi = xfeatures >> 32;
@@ -60,14 +62,20 @@ static inline void xsetbv(u32 index, u64
 
 static inline void set_xcr0(u64 xfeatures)
 {
+    this_cpu(xcr0) = xfeatures;
     xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures);
+}
+
+static inline uint64_t get_xcr0(void)
+{
+    return this_cpu(xcr0);
 }
 
 static inline void xsave(struct vcpu *v)
 {
     struct xsave_struct *ptr;
 
-    ptr =(struct xsave_struct *)v->arch.hvm_vcpu.xsave_area;
+    ptr =(struct xsave_struct *)v->arch.xsave_area;
 
     asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x27"
         :
@@ -79,7 +87,7 @@ static inline void xrstor(struct vcpu *v
 {
     struct xsave_struct *ptr;
 
-    ptr =(struct xsave_struct *)v->arch.hvm_vcpu.xsave_area;
+    ptr =(struct xsave_struct *)v->arch.xsave_area;
 
     asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x2f"
         :
@@ -108,14 +116,18 @@ static inline void setup_fpu(struct vcpu
     if ( !v->fpu_dirtied )
     {
         v->fpu_dirtied = 1;
-        if ( cpu_has_xsave && is_hvm_vcpu(v) )
+        if ( cpu_has_xsave )
         {
             if ( !v->fpu_initialised )
                 v->fpu_initialised = 1;
 
-            set_xcr0(v->arch.hvm_vcpu.xcr0 | XSTATE_FP_SSE);
+            /* XCR0 normally represents what guest OS set. In case of Xen
+             * itself, we set all supported feature mask before doing
+             * save/restore.
+             */
+            set_xcr0(v->arch.xcr0_accum);
             xrstor(v);
-            set_xcr0(v->arch.hvm_vcpu.xcr0);
+            set_xcr0(v->arch.xcr0);
         }
         else
         {

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.