[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] x86: Xsave support for PV guests.
# HG changeset patch # User Keir Fraser <keir@xxxxxxx> # Date 1288772120 0 # Node ID a3ec4b3b685e05e9890c37be0763b3c33f6dc722 # Parent ee4d52f0d16a91583d511c18aa679a7fec6be86c x86: Xsave support for PV guests. Signed-off-by: Shan Haitao <haitao.shan@xxxxxxxxx> Signed-off-by: Han Weidong <weidong.han@xxxxxxxxx> --- tools/libxc/xc_cpuid_x86.c | 1 xen/arch/x86/domain.c | 30 +++++++++++++++++++++++- xen/arch/x86/hvm/hvm.c | 13 ---------- xen/arch/x86/hvm/vmx/vmx.c | 8 ++---- xen/arch/x86/i387.c | 16 ++++++++---- xen/arch/x86/traps.c | 51 ++++++++++++++++++++++++++++++++++------- xen/include/asm-x86/domain.h | 20 +++++++++++++++- xen/include/asm-x86/hvm/vcpu.h | 9 ------- xen/include/asm-x86/i387.h | 22 +++++++++++++---- 9 files changed, 122 insertions(+), 48 deletions(-) diff -r ee4d52f0d16a -r a3ec4b3b685e tools/libxc/xc_cpuid_x86.c --- a/tools/libxc/xc_cpuid_x86.c Tue Nov 02 07:35:52 2010 +0000 +++ b/tools/libxc/xc_cpuid_x86.c Wed Nov 03 08:15:20 2010 +0000 @@ -323,7 +323,6 @@ static void xc_cpuid_pv_policy( clear_bit(X86_FEATURE_XTPR, regs[2]); clear_bit(X86_FEATURE_PDCM, regs[2]); clear_bit(X86_FEATURE_DCA, regs[2]); - clear_bit(X86_FEATURE_XSAVE, regs[2]); set_bit(X86_FEATURE_HYPERVISOR, regs[2]); break; case 0x80000001: diff -r ee4d52f0d16a -r a3ec4b3b685e xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Tue Nov 02 07:35:52 2010 +0000 +++ b/xen/arch/x86/domain.c Wed Nov 03 08:15:20 2010 +0000 @@ -343,10 +343,26 @@ int vcpu_initialise(struct vcpu *v) paging_vcpu_init(v); + if ( cpu_has_xsave ) + { + /* XSAVE/XRSTOR requires the save area be 64-byte-boundary aligned. */ + void *xsave_area = _xmalloc(xsave_cntxt_size, 64); + if ( xsave_area == NULL ) + return -ENOMEM; + + xsave_init_save_area(xsave_area); + v->arch.xsave_area = xsave_area; + v->arch.xcr0 = XSTATE_FP_SSE; + v->arch.xcr0_accum = XSTATE_FP_SSE; + } + if ( is_hvm_domain(d) ) { if ( (rc = hvm_vcpu_initialise(v)) != 0 ) + { + xfree(v->arch.xsave_area); return rc; + } } else { @@ -376,13 +392,21 @@ int vcpu_initialise(struct vcpu *v) spin_lock_init(&v->arch.shadow_ldt_lock); - return (is_pv_32on64_vcpu(v) ? setup_compat_l4(v) : 0); + rc = 0; + if ( is_pv_32on64_vcpu(v) ) + rc = setup_compat_l4(v); + if ( !rc ) + xfree(v->arch.xsave_area); + + return rc; } void vcpu_destroy(struct vcpu *v) { if ( is_pv_32on64_vcpu(v) ) release_compat_l4(v); + + xfree(v->arch.xsave_area); if ( is_hvm_vcpu(v) ) hvm_vcpu_destroy(v); @@ -592,6 +616,8 @@ unsigned long pv_guest_cr4_fixup(const s hv_cr4_mask &= ~X86_CR4_DE; if ( cpu_has_fsgsbase && !is_pv_32bit_domain(v->domain) ) hv_cr4_mask &= ~X86_CR4_FSGSBASE; + if ( cpu_has_xsave ) + hv_cr4_mask &= ~X86_CR4_OSXSAVE; if ( (guest_cr4 & hv_cr4_mask) != (hv_cr4 & hv_cr4_mask) ) gdprintk(XENLOG_WARNING, @@ -1367,6 +1393,8 @@ static void __context_switch(void) memcpy(stack_regs, &n->arch.guest_context.user_regs, CTXT_SWITCH_STACK_BYTES); + if ( cpu_has_xsave && n->arch.xcr0 != get_xcr0() ) + set_xcr0(n->arch.xcr0); n->arch.ctxt_switch_to(n); } diff -r ee4d52f0d16a -r a3ec4b3b685e xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Tue Nov 02 07:35:52 2010 +0000 +++ b/xen/arch/x86/hvm/hvm.c Wed Nov 03 08:15:20 2010 +0000 @@ -805,18 +805,6 @@ int hvm_vcpu_initialise(struct vcpu *v) hvm_asid_flush_vcpu(v); - if ( cpu_has_xsave ) - { - /* XSAVE/XRSTOR requires the save area be 64-byte-boundary aligned. */ - void *xsave_area = _xmalloc(xsave_cntxt_size, 64); - if ( xsave_area == NULL ) - return -ENOMEM; - - xsave_init_save_area(xsave_area); - v->arch.hvm_vcpu.xsave_area = xsave_area; - v->arch.hvm_vcpu.xcr0 = XSTATE_FP_SSE; - } - if ( (rc = vlapic_init(v)) != 0 ) goto fail1; @@ -879,7 +867,6 @@ void hvm_vcpu_destroy(struct vcpu *v) hvm_vcpu_cacheattr_destroy(v); vlapic_destroy(v); hvm_funcs.vcpu_destroy(v); - xfree(v->arch.hvm_vcpu.xsave_area); /* Event channel is already freed by evtchn_destroy(). */ /*free_xen_event_channel(v, v->arch.hvm_vcpu.xen_port);*/ diff -r ee4d52f0d16a -r a3ec4b3b685e xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Tue Nov 02 07:35:52 2010 +0000 +++ b/xen/arch/x86/hvm/vmx/vmx.c Wed Nov 03 08:15:20 2010 +0000 @@ -652,10 +652,7 @@ static void vmx_ctxt_switch_to(struct vc struct domain *d = v->domain; unsigned long old_cr4 = read_cr4(), new_cr4 = mmu_cr4_features; - /* HOST_CR4 in VMCS is always mmu_cr4_features and - * CR4_OSXSAVE(if supported). Sync CR4 now. */ - if ( cpu_has_xsave ) - new_cr4 |= X86_CR4_OSXSAVE; + /* HOST_CR4 in VMCS is always mmu_cr4_features. Sync CR4 now. */ if ( old_cr4 != new_cr4 ) write_cr4(new_cr4); @@ -2215,7 +2212,8 @@ static int vmx_handle_xsetbv(u64 new_bv) if ( (xfeature_mask & XSTATE_YMM & new_bv) && !(new_bv & XSTATE_SSE) ) goto err; - v->arch.hvm_vcpu.xcr0 = new_bv; + v->arch.xcr0 = new_bv; + v->arch.xcr0_accum |= new_bv; set_xcr0(new_bv); return 0; err: diff -r ee4d52f0d16a -r a3ec4b3b685e xen/arch/x86/i387.c --- a/xen/arch/x86/i387.c Tue Nov 02 07:35:52 2010 +0000 +++ b/xen/arch/x86/i387.c Wed Nov 03 08:15:20 2010 +0000 @@ -33,9 +33,14 @@ void save_init_fpu(struct vcpu *v) if ( cr0 & X86_CR0_TS ) clts(); - if ( cpu_has_xsave && is_hvm_vcpu(v) ) - { + if ( cpu_has_xsave ) + { + /* XCR0 normally represents what guest OS set. In case of Xen itself, + * we set all accumulated feature mask before doing save/restore. + */ + set_xcr0(v->arch.xcr0_accum); xsave(v); + set_xcr0(v->arch.xcr0); } else if ( cpu_has_fxsr ) { @@ -144,6 +149,9 @@ u32 xsave_cntxt_size; /* A 64-bit bitmask of the XSAVE/XRSTOR features supported by processor. */ u64 xfeature_mask; +/* Cached xcr0 for fast read */ +DEFINE_PER_CPU(uint64_t, xcr0); + void xsave_init(void) { u32 eax, ebx, ecx, edx; @@ -171,13 +179,11 @@ void xsave_init(void) BUG_ON(ecx < min_size); /* - * We will only enable the features we know for hvm guest. Here we use - * set/clear CR4_OSXSAVE and re-run cpuid to get xsave_cntxt_size. + * Set CR4_OSXSAVE and run "cpuid" to get xsave_cntxt_size. */ set_in_cr4(X86_CR4_OSXSAVE); set_xcr0(eax & XCNTXT_MASK); cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); - clear_in_cr4(X86_CR4_OSXSAVE); if ( cpu == 0 ) { diff -r ee4d52f0d16a -r a3ec4b3b685e xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Tue Nov 02 07:35:52 2010 +0000 +++ b/xen/arch/x86/traps.c Wed Nov 03 08:15:20 2010 +0000 @@ -795,7 +795,6 @@ static void pv_cpuid(struct cpu_user_reg __clear_bit(X86_FEATURE_XTPR % 32, &c); __clear_bit(X86_FEATURE_PDCM % 32, &c); __clear_bit(X86_FEATURE_DCA % 32, &c); - __clear_bit(X86_FEATURE_XSAVE % 32, &c); if ( !cpu_has_apic ) __clear_bit(X86_FEATURE_X2APIC % 32, &c); __set_bit(X86_FEATURE_HYPERVISOR % 32, &c); @@ -1715,7 +1714,7 @@ static int emulate_privileged_op(struct enum { lm_seg_none, lm_seg_fs, lm_seg_gs } lm_ovr = lm_seg_none; int rc; unsigned int port, i, data_sel, ar, data, bpmatch = 0; - unsigned int op_bytes, op_default, ad_bytes, ad_default; + unsigned int op_bytes, op_default, ad_bytes, ad_default, opsize_prefix= 0; #define rd_ad(reg) (ad_bytes >= sizeof(regs->reg) \ ? regs->reg \ : ad_bytes == 4 \ @@ -1751,6 +1750,7 @@ static int emulate_privileged_op(struct switch ( opcode = insn_fetch(u8, code_base, eip, code_limit) ) { case 0x66: /* operand-size override */ + opsize_prefix = 1; op_bytes = op_default ^ 6; /* switch between 2/4 bytes */ continue; case 0x67: /* address-size override */ @@ -2051,13 +2051,48 @@ static int emulate_privileged_op(struct goto fail; switch ( opcode ) { - case 0x1: /* RDTSCP */ - if ( (v->arch.guest_context.ctrlreg[4] & X86_CR4_TSD) && - !guest_kernel_mode(v, regs) ) + case 0x1: /* RDTSCP and XSETBV */ + switch ( insn_fetch(u8, code_base, eip, code_limit) ) + { + case 0xf9: /* RDTSCP */ + if ( (v->arch.guest_context.ctrlreg[4] & X86_CR4_TSD) && + !guest_kernel_mode(v, regs) ) + goto fail; + pv_soft_rdtsc(v, regs, 1); + break; + case 0xd1: /* XSETBV */ + { + u64 new_xfeature = (u32)regs->eax | ((u64)regs->edx << 32); + + if ( lock || rep_prefix || opsize_prefix + || !(v->arch.guest_context.ctrlreg[4] & X86_CR4_OSXSAVE) ) + { + do_guest_trap(TRAP_invalid_op, regs, 0); + goto skip; + } + + if ( !guest_kernel_mode(v, regs) ) + goto fail; + + switch ( (u32)regs->ecx ) + { + case XCR_XFEATURE_ENABLED_MASK: + /* bit 0 of XCR0 must be set and reserved bit must not be set */ + if ( !(new_xfeature & XSTATE_FP) || (new_xfeature & ~xfeature_mask) ) + goto fail; + + v->arch.xcr0 = new_xfeature; + v->arch.xcr0_accum |= new_xfeature; + set_xcr0(new_xfeature); + break; + default: + goto fail; + } + break; + } + default: goto fail; - if ( insn_fetch(u8, code_base, eip, code_limit) != 0xf9 ) - goto fail; - pv_soft_rdtsc(v, regs, 1); + } break; case 0x06: /* CLTS */ diff -r ee4d52f0d16a -r a3ec4b3b685e xen/include/asm-x86/domain.h --- a/xen/include/asm-x86/domain.h Tue Nov 02 07:35:52 2010 +0000 +++ b/xen/include/asm-x86/domain.h Wed Nov 03 08:15:20 2010 +0000 @@ -400,6 +400,23 @@ struct arch_vcpu pagetable_t monitor_table; /* (MFN) hypervisor PT (for HVM) */ unsigned long cr3; /* (MA) value to install in HW CR3 */ + /* + * The save area for Processor Extended States and the bitmask of the + * XSAVE/XRSTOR features. They are used by: 1) when a vcpu (which has + * dirtied FPU/SSE) is scheduled out we XSAVE the states here; 2) in + * #NM handler, we XRSTOR the states we XSAVE-ed; + */ + void *xsave_area; + uint64_t xcr0; + /* Accumulated eXtended features mask for using XSAVE/XRESTORE by Xen + * itself, as we can never know whether guest OS depends on content + * preservation whenever guest OS clears one feature flag (for example, + * temporarily). + * However, processor should not be able to touch eXtended states before + * it explicitly enables it via xcr0. + */ + uint64_t xcr0_accum; + /* Current LDT details. */ unsigned long shadow_ldt_mapcnt; spinlock_t shadow_ldt_lock; @@ -435,7 +452,8 @@ unsigned long pv_guest_cr4_fixup(const s #define pv_guest_cr4_to_real_cr4(v) \ (((v)->arch.guest_context.ctrlreg[4] \ | (mmu_cr4_features & (X86_CR4_PGE | X86_CR4_PSE)) \ - | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0)) \ + | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0) \ + | ((cpu_has_xsave)? X86_CR4_OSXSAVE : 0)) \ & ~X86_CR4_DE) #define real_cr4_to_pv_guest_cr4(c) \ ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD | X86_CR4_OSXSAVE)) diff -r ee4d52f0d16a -r a3ec4b3b685e xen/include/asm-x86/hvm/vcpu.h --- a/xen/include/asm-x86/hvm/vcpu.h Tue Nov 02 07:35:52 2010 +0000 +++ b/xen/include/asm-x86/hvm/vcpu.h Wed Nov 03 08:15:20 2010 +0000 @@ -48,15 +48,6 @@ struct hvm_vcpu { * CR3: Always used and kept up to date by paging subsystem. */ unsigned long hw_cr[5]; - - /* - * The save area for Processor Extended States and the bitmask of the - * XSAVE/XRSTOR features. They are used by: 1) when a vcpu (which has - * dirtied FPU/SSE) is scheduled out we XSAVE the states here; 2) in - * #NM handler, we XRSTOR the states we XSAVE-ed; - */ - void *xsave_area; - uint64_t xcr0; struct vlapic vlapic; s64 cache_tsc_offset; diff -r ee4d52f0d16a -r a3ec4b3b685e xen/include/asm-x86/i387.h --- a/xen/include/asm-x86/i387.h Tue Nov 02 07:35:52 2010 +0000 +++ b/xen/include/asm-x86/i387.h Wed Nov 03 08:15:20 2010 +0000 @@ -49,6 +49,8 @@ struct xsave_struct #define REX_PREFIX #endif +DECLARE_PER_CPU(uint64_t, xcr0); + static inline void xsetbv(u32 index, u64 xfeatures) { u32 hi = xfeatures >> 32; @@ -60,14 +62,20 @@ static inline void xsetbv(u32 index, u64 static inline void set_xcr0(u64 xfeatures) { + this_cpu(xcr0) = xfeatures; xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures); +} + +static inline uint64_t get_xcr0(void) +{ + return this_cpu(xcr0); } static inline void xsave(struct vcpu *v) { struct xsave_struct *ptr; - ptr =(struct xsave_struct *)v->arch.hvm_vcpu.xsave_area; + ptr =(struct xsave_struct *)v->arch.xsave_area; asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x27" : @@ -79,7 +87,7 @@ static inline void xrstor(struct vcpu *v { struct xsave_struct *ptr; - ptr =(struct xsave_struct *)v->arch.hvm_vcpu.xsave_area; + ptr =(struct xsave_struct *)v->arch.xsave_area; asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x2f" : @@ -108,14 +116,18 @@ static inline void setup_fpu(struct vcpu if ( !v->fpu_dirtied ) { v->fpu_dirtied = 1; - if ( cpu_has_xsave && is_hvm_vcpu(v) ) + if ( cpu_has_xsave ) { if ( !v->fpu_initialised ) v->fpu_initialised = 1; - set_xcr0(v->arch.hvm_vcpu.xcr0 | XSTATE_FP_SSE); + /* XCR0 normally represents what guest OS set. In case of Xen + * itself, we set all supported feature mask before doing + * save/restore. + */ + set_xcr0(v->arch.xcr0_accum); xrstor(v); - set_xcr0(v->arch.hvm_vcpu.xcr0); + set_xcr0(v->arch.xcr0); } else { _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |