--- a/xen/arch/x86/i387.c +++ b/xen/arch/x86/i387.c @@ -55,28 +55,54 @@ static inline void fpu_fxrstor(struct vc * possibility, which may occur if the block was passed to us by control * tools, by silently clearing the block. */ - asm volatile ( - /* See above for why the operands/constraints are this way. */ - "1: " REX64_PREFIX "fxrstor (%2)\n" - ".section .fixup,\"ax\" \n" - "2: push %%"__OP"ax \n" - " push %%"__OP"cx \n" - " push %%"__OP"di \n" - " lea %0,%%"__OP"di \n" - " mov %1,%%ecx \n" - " xor %%eax,%%eax \n" - " rep ; stosl \n" - " pop %%"__OP"di \n" - " pop %%"__OP"cx \n" - " pop %%"__OP"ax \n" - " jmp 1b \n" - ".previous \n" - _ASM_EXTABLE(1b, 2b) - : - : "m" (*fpu_ctxt), - "i" (sizeof(v->arch.xsave_area->fpu_sse)/4) - ,"cdaSDb" (fpu_ctxt) - ); + switch ( __builtin_expect(fpu_ctxt[FPU_WORD_SIZE_OFFSET], 8) ) + { + default: + asm volatile ( + /* See below for why the operands/constraints are this way. */ + "1: " REX64_PREFIX "fxrstor (%2)\n" + ".section .fixup,\"ax\" \n" + "2: push %%"__OP"ax \n" + " push %%"__OP"cx \n" + " push %%"__OP"di \n" + " mov %2,%%"__OP"di \n" + " mov %1,%%ecx \n" + " xor %%eax,%%eax \n" + " rep ; stosl \n" + " pop %%"__OP"di \n" + " pop %%"__OP"cx \n" + " pop %%"__OP"ax \n" + " jmp 1b \n" + ".previous \n" + _ASM_EXTABLE(1b, 2b) + : + : "m" (*fpu_ctxt), + "i" (sizeof(v->arch.xsave_area->fpu_sse)/4), + "cdaSDb" (fpu_ctxt) ); + break; + case 4: case 2: + asm volatile ( + "1: fxrstor (%2)\n" + ".section .fixup,\"ax\" \n" + "2: push %%"__OP"ax \n" + " push %%"__OP"cx \n" + " push %%"__OP"di \n" + " mov %2,%%"__OP"di \n" + " mov %1,%%ecx \n" + " xor %%eax,%%eax \n" + " rep ; stosl \n" + " pop %%"__OP"di \n" + " pop %%"__OP"cx \n" + " pop %%"__OP"ax \n" + " jmp 1b \n" + ".previous \n" + _ASM_EXTABLE(1b, 2b) + : + : "m" (*fpu_ctxt), + "i" (sizeof(v->arch.xsave_area->fpu_sse)/4), + "r" (fpu_ctxt) ); + break; + } } /* Restore x87 extended state */ @@ -105,15 +131,24 @@ static inline void fpu_xsave(struct vcpu static inline void fpu_fxsave(struct vcpu *v) { char *fpu_ctxt = v->arch.fpu_ctxt; + int word_size = guest_word_size(v); - /* - * The only way to force fxsaveq on a wide range of gas versions. On - * older versions the rex64 prefix works only if we force an - * addressing mode that doesn't require extended registers. - */ - asm volatile ( - REX64_PREFIX "fxsave (%1)" - : "=m" (*fpu_ctxt) : "cdaSDb" (fpu_ctxt) ); + switch ( __builtin_expect(word_size, 8) ) + { + default: + /* + * The only way to force fxsaveq on a wide range of gas versions. + * On older versions the rex64 prefix works only if we force an + * addressing mode that doesn't require extended registers. + */ + asm volatile ( REX64_PREFIX "fxsave (%1)" + : "=m" (*fpu_ctxt) : "cdaSDb" (fpu_ctxt) ); + break; + case 4: case 2: + asm volatile ( "fxsave %0" : "=m" (*fpu_ctxt) ); + break; + } + fpu_ctxt[FPU_WORD_SIZE_OFFSET] = word_size; /* Clear exception flags if FSW.ES is set. */ if ( unlikely(fpu_ctxt[2] & 0x80) ) @@ -253,6 +288,39 @@ void vcpu_destroy_fpu(struct vcpu *v) xfree(v->arch.fpu_ctxt); } +int guest_word_size(struct vcpu *v) +{ + int mode; + + if ( !is_hvm_vcpu(v) ) + { + if ( is_pv_32bit_vcpu(v) ) + return 4; + + asm ( "1: lar %1,%0 \n" + " jnz 2f \n" + "3: \n" + ".section .fixup,\"ax\"\n" + "2: xor %0,%0 \n" + " jmp 3b \n" + ".previous \n" + _ASM_EXTABLE(1b, 2b) + : "=r" (mode) + : "m" (guest_cpu_user_regs()->cs) ); + + return !(mode & _SEGMENT_S) || (mode & _SEGMENT_L) ? 8 : 4; + } + + switch ( mode = hvm_guest_x86_mode(v) ) + { + case 0: /* real mode */ + case 1: /* virtual mode */ + return 2; + } + + return mode; +} + /* * Local variables: * mode: C --- a/xen/arch/x86/xstate.c +++ b/xen/arch/x86/xstate.c @@ -56,32 +56,53 @@ void xsave(struct vcpu *v, uint64_t mask struct xsave_struct *ptr = v->arch.xsave_area; uint32_t hmask = mask >> 32; uint32_t lmask = mask; + int word_size = guest_word_size(v); - if ( cpu_has_xsaveopt ) - asm volatile ( - ".byte " REX_PREFIX "0x0f,0xae,0x37" - : - : "a" (lmask), "d" (hmask), "D"(ptr) - : "memory" ); - else - asm volatile ( - ".byte " REX_PREFIX "0x0f,0xae,0x27" - : - : "a" (lmask), "d" (hmask), "D"(ptr) - : "memory" ); + switch ( __builtin_expect(word_size, 8) ) + { + default: + if ( cpu_has_xsaveopt ) + asm volatile ( ".byte 0x48,0x0f,0xae,0x37" + : "=m" (*ptr) + : "a" (lmask), "d" (hmask), "D" (ptr) ); + else + asm volatile ( ".byte 0x48,0x0f,0xae,0x27" + : "=m" (*ptr) + : "a" (lmask), "d" (hmask), "D" (ptr) ); + break; + case 4: case 2: + if ( cpu_has_xsaveopt ) + asm volatile ( ".byte 0x0f,0xae,0x37" + : "=m" (*ptr) + : "a" (lmask), "d" (hmask), "D" (ptr) ); + else + asm volatile ( ".byte 0x0f,0xae,0x27" + : "=m" (*ptr) + : "a" (lmask), "d" (hmask), "D" (ptr) ); + break; + } + ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] = word_size; } void xrstor(struct vcpu *v, uint64_t mask) { uint32_t hmask = mask >> 32; uint32_t lmask = mask; - struct xsave_struct *ptr = v->arch.xsave_area; - asm volatile ( - ".byte " REX_PREFIX "0x0f,0xae,0x2f" - : - : "m" (*ptr), "a" (lmask), "d" (hmask), "D"(ptr) ); + switch ( __builtin_expect(ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET], 8) ) + { + default: + asm volatile ( ".byte 0x48,0x0f,0xae,0x2f" + : + : "m" (*ptr), "a" (lmask), "d" (hmask), "D" (ptr) ); + break; + case 4: case 2: + asm volatile ( ".byte 0x0f,0xae,0x2f" + : + : "m" (*ptr), "a" (lmask), "d" (hmask), "D" (ptr) ); + break; + } } bool_t xsave_enabled(const struct vcpu *v) --- a/xen/include/asm-x86/xstate.h +++ b/xen/include/asm-x86/xstate.h @@ -34,8 +34,6 @@ #define XSTATE_NONLAZY (XSTATE_LWP) #define XSTATE_LAZY (XSTATE_ALL & ~XSTATE_NONLAZY) -#define REX_PREFIX "0x48, " - /* extended state variables */ DECLARE_PER_CPU(uint64_t, xcr0); @@ -88,4 +86,14 @@ void xstate_free_save_area(struct vcpu * int xstate_alloc_save_area(struct vcpu *v); void xstate_init(void); +/* Byte offset within the FXSAVE (portion) of the stored word size. */ +#define FPU_WORD_SIZE_OFFSET 511 + +/* + * Used EXCLUSIVELY to determine the needed operand size override on + * XSAVE/FXSAVE. Any other use would need to make sure that the context + * is suitable for all operations this involves. + */ +int guest_word_size(struct vcpu *); + #endif /* __ASM_XSTATE_H */