x86/xsave: fix nonlazy state handling Nonlazy xstates should be xsaved each time when vcpu_save_fpu. Operation to nonlazy xstates will not trigger #NM exception, so whenever vcpu scheduled in it got restored and whenever scheduled out it should get saved. Currently this bug affects AMD LWP feature, and later Intel MPX feature. With the bugfix both LWP and MPX will work fine. Signed-off-by: Liu Jinsong Furthermore, during restore we also need to set nonlazy_xstate_used according to the incoming accumulated XCR0. Also adjust the changes to i387.c such that there won't be a pointless clts()/stts() pair. Signed-off-by: Jan Beulich --- a/xen/arch/x86/domctl.c +++ b/xen/arch/x86/domctl.c @@ -1146,6 +1146,8 @@ long arch_do_domctl( { v->arch.xcr0 = _xcr0; v->arch.xcr0_accum = _xcr0_accum; + if ( _xcr0_accum & XSTATE_NONLAZY ) + v->arch.nonlazy_xstate_used = 1; memcpy(v->arch.xsave_area, _xsave_area, evc->size - 2 * sizeof(uint64_t)); } --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -1119,6 +1119,8 @@ static int hvm_load_cpu_xsave_states(str v->arch.xcr0 = ctxt->xcr0; v->arch.xcr0_accum = ctxt->xcr0_accum; + if ( ctxt->xcr0_accum & XSTATE_NONLAZY ) + v->arch.nonlazy_xstate_used = 1; memcpy(v->arch.xsave_area, &ctxt->save_area, desc->length - offsetof(struct hvm_hw_cpu_xsave, save_area)); --- a/xen/arch/x86/i387.c +++ b/xen/arch/x86/i387.c @@ -133,11 +133,22 @@ static inline void fpu_frstor(struct vcp /*******************************/ /* FPU Save Functions */ /*******************************/ + +static inline uint64_t vcpu_xsave_mask(const struct vcpu *v) +{ + if ( v->fpu_dirtied ) + return v->arch.nonlazy_xstate_used ? XSTATE_ALL : XSTATE_LAZY; + + return v->arch.nonlazy_xstate_used ? XSTATE_NONLAZY : 0; +} + /* Save x87 extended state */ static inline void fpu_xsave(struct vcpu *v) { bool_t ok; + uint64_t mask = vcpu_xsave_mask(v); + ASSERT(mask); ASSERT(v->arch.xsave_area); /* * XCR0 normally represents what guest OS set. In case of Xen itself, @@ -145,7 +156,7 @@ static inline void fpu_xsave(struct vcpu */ ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE); ASSERT(ok); - xsave(v, v->arch.nonlazy_xstate_used ? XSTATE_ALL : XSTATE_LAZY); + xsave(v, mask); ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE); ASSERT(ok); } @@ -257,7 +268,7 @@ void vcpu_restore_fpu_lazy(struct vcpu * */ void vcpu_save_fpu(struct vcpu *v) { - if ( !v->fpu_dirtied ) + if ( !v->fpu_dirtied && !v->arch.nonlazy_xstate_used ) return; ASSERT(!is_idle_vcpu(v));