[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [V5 2/4] x86/xsaves: enable xsaves/xrstors/xsavec in xen
This patch uses xsaves/xrstors instead of xsaveopt/xrstor to perform the xsave_area switching so that xen itself can benefit from them when available. For xsaves/xrstors only use compact format. Add format conversion support when perform guest os migration. Signed-off-by: Shuai Ruan <shuai.ruan@xxxxxxxxxxxxxxx> --- xen/arch/x86/domain.c | 3 + xen/arch/x86/domctl.c | 38 +++++++++++-- xen/arch/x86/hvm/hvm.c | 21 +++++-- xen/arch/x86/i387.c | 4 ++ xen/arch/x86/traps.c | 7 +-- xen/arch/x86/xstate.c | 132 ++++++++++++++++++++++++++++++------------- xen/include/asm-x86/xstate.h | 4 -- 7 files changed, 151 insertions(+), 58 deletions(-) diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index 045f6ff..b25094b 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -1529,6 +1529,9 @@ static void __context_switch(void) if ( xcr0 != get_xcr0() && !set_xcr0(xcr0) ) BUG(); } + if ( cpu_has_xsaves ) + if ( is_hvm_vcpu(n) ) + set_msr_xss(n->arch.hvm_vcpu.msr_xss); vcpu_restore_fpu_eager(n); n->arch.ctxt_switch_to(n); } diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c index bf62a88..e2cd0d4 100644 --- a/xen/arch/x86/domctl.c +++ b/xen/arch/x86/domctl.c @@ -867,7 +867,7 @@ long arch_do_domctl( if ( domctl->cmd == XEN_DOMCTL_getvcpuextstate ) { unsigned int size; - + void * xsave_area; ret = 0; vcpu_pause(v); @@ -896,9 +896,30 @@ long arch_do_domctl( ret = -EFAULT; offset += sizeof(v->arch.xcr0_accum); - if ( !ret && copy_to_guest_offset(evc->buffer, offset, - (void *)v->arch.xsave_area, - size - 2 * sizeof(uint64_t)) ) + + if ( !ret && (cpu_has_xsaves || cpu_has_xsavec) && + xsave_area_compressed(v->arch.xsave_area) ) + { + xsave_area = xmalloc_bytes(size); + if ( !xsave_area ) + { + ret = -ENOMEM; + vcpu_unpause(v); + goto vcpuextstate_out; + } + + save_xsave_states(v, xsave_area, + evc->size - 2*sizeof(uint64_t)); + + if ( !ret && copy_to_guest_offset(evc->buffer, offset, + xsave_area, size - + 2 * sizeof(uint64_t)) ) + ret = -EFAULT; + xfree(xsave_area); + } + else if ( !ret && copy_to_guest_offset(evc->buffer, offset, + (void *)v->arch.xsave_area, + size - 2 * sizeof(uint64_t)) ) ret = -EFAULT; vcpu_unpause(v); @@ -954,8 +975,13 @@ long arch_do_domctl( v->arch.xcr0_accum = _xcr0_accum; if ( _xcr0_accum & XSTATE_NONLAZY ) v->arch.nonlazy_xstate_used = 1; - memcpy(v->arch.xsave_area, _xsave_area, - evc->size - 2 * sizeof(uint64_t)); + if ( (cpu_has_xsaves || cpu_has_xsavec) && + !xsave_area_compressed(_xsave_area) ) + load_xsave_states(v, _xsave_area, + evc->size - 2*sizeof(uint64_t)); + else + memcpy(v->arch.xsave_area, (void *)_xsave_area, + evc->size - 2 * sizeof(uint64_t)); vcpu_unpause(v); } else diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 615fa89..ad0a53b 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -2148,8 +2148,13 @@ static int hvm_save_cpu_xsave_states(struct domain *d, hvm_domain_context_t *h) ctxt->xfeature_mask = xfeature_mask; ctxt->xcr0 = v->arch.xcr0; ctxt->xcr0_accum = v->arch.xcr0_accum; - memcpy(&ctxt->save_area, v->arch.xsave_area, - size - offsetof(struct hvm_hw_cpu_xsave, save_area)); + if ( (cpu_has_xsaves || cpu_has_xsavec) && + (xsave_area_compressed(v->arch.xsave_area)) ) + save_xsave_states(v, &ctxt->save_area, + size - offsetof(typeof(*ctxt), save_area)); + else + memcpy(&ctxt->save_area, v->arch.xsave_area, + size - offsetof(struct hvm_hw_cpu_xsave, save_area)); } return 0; @@ -2248,9 +2253,15 @@ static int hvm_load_cpu_xsave_states(struct domain *d, hvm_domain_context_t *h) v->arch.xcr0_accum = ctxt->xcr0_accum; if ( ctxt->xcr0_accum & XSTATE_NONLAZY ) v->arch.nonlazy_xstate_used = 1; - memcpy(v->arch.xsave_area, &ctxt->save_area, - min(desc->length, size) - offsetof(struct hvm_hw_cpu_xsave, - save_area)); + if ( (cpu_has_xsaves || cpu_has_xsavec) && + !xsave_area_compressed((struct xsave_struct *)&ctxt->save_area) ) + load_xsave_states(v, &ctxt->save_area, + min(desc->length, size) - + offsetof(struct hvm_hw_cpu_xsave,save_area)); + else + memcpy(v->arch.xsave_area, &ctxt->save_area, + min(desc->length, size) - offsetof(struct hvm_hw_cpu_xsave, + save_area)); return 0; } diff --git a/xen/arch/x86/i387.c b/xen/arch/x86/i387.c index 14f2a79..736197f 100644 --- a/xen/arch/x86/i387.c +++ b/xen/arch/x86/i387.c @@ -309,7 +309,11 @@ int vcpu_init_fpu(struct vcpu *v) return rc; if ( v->arch.xsave_area ) + { v->arch.fpu_ctxt = &v->arch.xsave_area->fpu_sse; + if ( cpu_has_xsaves || cpu_has_xsavec ) + v->arch.xsave_area->xsave_hdr.xcomp_bv |= XSTATE_COMPACTION_ENABLED; + } else { v->arch.fpu_ctxt = _xzalloc(sizeof(v->arch.xsave_area->fpu_sse), 16); diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 9f5a6c6..e3a84c5 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -935,10 +935,9 @@ void pv_cpuid(struct cpu_user_regs *regs) goto unsupported; if ( regs->_ecx == 1 ) { - a &= XSTATE_FEATURE_XSAVEOPT | - XSTATE_FEATURE_XSAVEC | - (cpu_has_xgetbv1 ? XSTATE_FEATURE_XGETBV1 : 0) | - (cpu_has_xsaves ? XSTATE_FEATURE_XSAVES : 0); + a &= cpufeat_mask(X86_FEATURE_XSAVEOPT) | + cpufeat_mask(X86_FEATURE_XSAVEC) | + (cpu_has_xgetbv1 ? cpufeat_mask(X86_FEATURE_XGETBV1) : 0); if ( !cpu_has_xsaves ) b = c = d = 0; } diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c index ff03b31..ae59a60 100644 --- a/xen/arch/x86/xstate.c +++ b/xen/arch/x86/xstate.c @@ -245,7 +245,15 @@ void xsave(struct vcpu *v, uint64_t mask) typeof(ptr->fpu_sse.fip.sel) fcs = ptr->fpu_sse.fip.sel; typeof(ptr->fpu_sse.fdp.sel) fds = ptr->fpu_sse.fdp.sel; - if ( cpu_has_xsaveopt ) + if ( cpu_has_xsaves ) + asm volatile ( ".byte 0x48,0x0f,0xc7,0x2f" + : "=m" (*ptr) + : "a" (lmask), "d" (hmask), "D" (ptr) ); + else if ( cpu_has_xsavec ) + asm volatile ( ".byte 0x48,0x0f,0xc7,0x27" + : "=m" (*ptr) + : "a" (lmask), "d" (hmask), "D" (ptr) ); + else if ( cpu_has_xsaveopt ) { /* * xsaveopt may not write the FPU portion even when the respective @@ -298,7 +306,15 @@ void xsave(struct vcpu *v, uint64_t mask) } else { - if ( cpu_has_xsaveopt ) + if ( cpu_has_xsaves ) + asm volatile ( ".byte 0x48,0x0f,0xc7,0x2f" + : "=m" (*ptr) + : "a" (lmask), "d" (hmask), "D" (ptr) ); + else if ( cpu_has_xsavec ) + asm volatile ( ".byte 0x48,0x0f,0xc7,0x27" + : "=m" (*ptr) + : "a" (lmask), "d" (hmask), "D" (ptr) ); + else if ( cpu_has_xsaveopt ) asm volatile ( ".byte 0x0f,0xae,0x37" : "=m" (*ptr) : "a" (lmask), "d" (hmask), "D" (ptr) ); @@ -341,36 +357,68 @@ void xrstor(struct vcpu *v, uint64_t mask) switch ( __builtin_expect(ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET], 8) ) { default: - asm volatile ( "1: .byte 0x48,0x0f,0xae,0x2f\n" - ".section .fixup,\"ax\" \n" - "2: mov %5,%%ecx \n" - " xor %1,%1 \n" - " rep stosb \n" - " lea %2,%0 \n" - " mov %3,%1 \n" - " jmp 1b \n" - ".previous \n" - _ASM_EXTABLE(1b, 2b) - : "+&D" (ptr), "+&a" (lmask) - : "m" (*ptr), "g" (lmask), "d" (hmask), - "m" (xsave_cntxt_size) - : "ecx" ); - break; + if ( cpu_has_xsaves ) + asm volatile ( "1: .byte 0x48,0x0f,0xc7,0x1f\n" + ".section .fixup,\"ax\" \n" + "2: mov %5,%%ecx \n" + " xor %1,%1 \n" + " rep stosb \n" + " lea %2,%0 \n" + " mov %3,%1 \n" + " jmp 1b \n" + ".previous \n" + _ASM_EXTABLE(1b, 2b) + : "+&D" (ptr), "+&a" (lmask) + : "m" (*ptr), "g" (lmask), "d" (hmask), + "m" (xsave_cntxt_size) + : "ecx" ); + else + asm volatile ( "1: .byte 0x48,0x0f,0xae,0x2f\n" + ".section .fixup,\"ax\" \n" + "2: mov %5,%%ecx \n" + " xor %1,%1 \n" + " rep stosb \n" + " lea %2,%0 \n" + " mov %3,%1 \n" + " jmp 1b \n" + ".previous \n" + _ASM_EXTABLE(1b, 2b) + : "+&D" (ptr), "+&a" (lmask) + : "m" (*ptr), "g" (lmask), "d" (hmask), + "m" (xsave_cntxt_size) + : "ecx" ); + break; case 4: case 2: - asm volatile ( "1: .byte 0x0f,0xae,0x2f\n" - ".section .fixup,\"ax\" \n" - "2: mov %5,%%ecx \n" - " xor %1,%1 \n" - " rep stosb \n" - " lea %2,%0 \n" - " mov %3,%1 \n" - " jmp 1b \n" - ".previous \n" - _ASM_EXTABLE(1b, 2b) - : "+&D" (ptr), "+&a" (lmask) - : "m" (*ptr), "g" (lmask), "d" (hmask), - "m" (xsave_cntxt_size) - : "ecx" ); + if ( cpu_has_xsaves ) + asm volatile ( "1: .byte 0x48,0x0f,0xc7,0x1f\n" + ".section .fixup,\"ax\" \n" + "2: mov %5,%%ecx \n" + " xor %1,%1 \n" + " rep stosb \n" + " lea %2,%0 \n" + " mov %3,%1 \n" + " jmp 1b \n" + ".previous \n" + _ASM_EXTABLE(1b, 2b) + : "+&D" (ptr), "+&a" (lmask) + : "m" (*ptr), "g" (lmask), "d" (hmask), + "m" (xsave_cntxt_size) + : "ecx" ); + else + asm volatile ( "1: .byte 0x0f,0xae,0x2f\n" + ".section .fixup,\"ax\" \n" + "2: mov %5,%%ecx \n" + " xor %1,%1 \n" + " rep stosb \n" + " lea %2,%0 \n" + " mov %3,%1 \n" + " jmp 1b \n" + ".previous \n" + _ASM_EXTABLE(1b, 2b) + : "+&D" (ptr), "+&a" (lmask) + : "m" (*ptr), "g" (lmask), "d" (hmask), + "m" (xsave_cntxt_size) + : "ecx" ); break; } } @@ -495,18 +543,24 @@ void xstate_init(bool_t bsp) cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); if ( bsp ) { - cpu_has_xsaveopt = !!(eax & XSTATE_FEATURE_XSAVEOPT); - cpu_has_xsavec = !!(eax & XSTATE_FEATURE_XSAVEC); - /* XXX cpu_has_xgetbv1 = !!(eax & XSTATE_FEATURE_XGETBV1); */ - /* XXX cpu_has_xsaves = !!(eax & XSTATE_FEATURE_XSAVES); */ + cpu_has_xsaveopt = !!(eax & cpufeat_mask(X86_FEATURE_XSAVEOPT)); + cpu_has_xsavec = !!(eax & cpufeat_mask(X86_FEATURE_XSAVEC)); + cpu_has_xgetbv1 = !!(eax & cpufeat_mask(X86_FEATURE_XGETBV1)); + cpu_has_xsaves = !!(eax & cpufeat_mask(X86_FEATURE_XSAVES)); } else { - BUG_ON(!cpu_has_xsaveopt != !(eax & XSTATE_FEATURE_XSAVEOPT)); - BUG_ON(!cpu_has_xsavec != !(eax & XSTATE_FEATURE_XSAVEC)); - /* XXX BUG_ON(!cpu_has_xgetbv1 != !(eax & XSTATE_FEATURE_XGETBV1)); */ - /* XXX BUG_ON(!cpu_has_xsaves != !(eax & XSTATE_FEATURE_XSAVES)); */ + BUG_ON(!cpu_has_xsaveopt != !(eax & cpufeat_mask(X86_FEATURE_XSAVEOPT))); + BUG_ON(!cpu_has_xsavec != !(eax & cpufeat_mask(X86_FEATURE_XSAVEC))); + BUG_ON(!cpu_has_xgetbv1 != !(eax & cpufeat_mask(X86_FEATURE_XGETBV1))); + BUG_ON(!cpu_has_xsaves != !(eax & cpufeat_mask(X86_FEATURE_XSAVES))); } + + if( setup_xstate_features(bsp) ) + BUG(); + + if ( bsp && (cpu_has_xsaves || cpu_has_xsavec) ) + setup_xstate_comp(); } static bool_t valid_xcr0(u64 xcr0) diff --git a/xen/include/asm-x86/xstate.h b/xen/include/asm-x86/xstate.h index a256525..715f096 100644 --- a/xen/include/asm-x86/xstate.h +++ b/xen/include/asm-x86/xstate.h @@ -15,10 +15,6 @@ #define MXCSR_DEFAULT 0x1f80 #define XSTATE_CPUID 0x0000000d -#define XSTATE_FEATURE_XSAVEOPT (1 << 0) /* sub-leaf 1, eax[bit 0] */ -#define XSTATE_FEATURE_XSAVEC (1 << 1) /* sub-leaf 1, eax[bit 1] */ -#define XSTATE_FEATURE_XGETBV1 (1 << 2) /* sub-leaf 1, eax[bit 2] */ -#define XSTATE_FEATURE_XSAVES (1 << 3) /* sub-leaf 1, eax[bit 3] */ #define XCR_XFEATURE_ENABLED_MASK 0x00000000 /* index of XCR0 */ -- 1.9.1 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |