x86emul: correct handling of FPU insns faulting on memory write When an FPU instruction with a memory destination fails during the memory write, it should not affect FPU register state. Due to the way we emulate FPU (and SIMD) instructions, we can only guarantee this by - backing out changes to the FPU register state in such a case or - doing a descriptor read and/or page walk up front, perhaps with the stubs accessing the actual memory location then. The latter would require a significant change in how the emulator does its guest memory accessing, so for now the former variant is being chosen. Signed-off-by: Jan Beulich --- Note that the state save overhead (unless state hadn't been loaded at all before, which should only be possible if a guest is fiddling with the instruction stream under emulation) is taken for every FPU insn hitting the emulator. We could reduce this to just the ones writing to memory, but that would involve quite a few further changes and resulting code where even more code paths need to match up with one another. --- a/tools/fuzz/x86_instruction_emulator/x86-insn-emulator-fuzzer.c +++ b/tools/fuzz/x86_instruction_emulator/x86-insn-emulator-fuzzer.c @@ -433,6 +433,7 @@ static struct x86_emulate_ops fuzz_emulo SET(wbinvd), SET(invlpg), .get_fpu = emul_test_get_fpu, + .put_fpu = emul_test_put_fpu, .cpuid = emul_test_cpuid, }; #undef SET --- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -293,6 +293,7 @@ static struct x86_emulate_ops emulops = .read_cr = emul_test_read_cr, .read_msr = read_msr, .get_fpu = emul_test_get_fpu, + .put_fpu = emul_test_put_fpu, }; int main(int argc, char **argv) --- a/tools/tests/x86_emulator/x86_emulate.c +++ b/tools/tests/x86_emulator/x86_emulate.c @@ -138,4 +138,11 @@ int emul_test_get_fpu( return X86EMUL_OKAY; } +void emul_test_put_fpu( + struct x86_emulate_ctxt *ctxt, + enum x86_emulate_fpu_type backout) +{ + /* TBD */ +} + #include "x86_emulate/x86_emulate.c" --- a/tools/tests/x86_emulator/x86_emulate.h +++ b/tools/tests/x86_emulator/x86_emulate.h @@ -178,3 +178,7 @@ int emul_test_get_fpu( void *exception_callback_arg, enum x86_emulate_fpu_type type, struct x86_emulate_ctxt *ctxt); + +void emul_test_put_fpu( + struct x86_emulate_ctxt *ctxt, + enum x86_emulate_fpu_type backout); --- a/xen/arch/x86/hvm/emulate.c +++ b/xen/arch/x86/hvm/emulate.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -1619,6 +1620,35 @@ static int hvmemul_get_fpu( if ( !curr->fpu_dirtied ) hvm_funcs.fpu_dirty_intercept(); + else if ( type == X86EMUL_FPU_fpu ) + { + const typeof(curr->arch.xsave_area->fpu_sse) *fpu_ctxt = + curr->arch.fpu_ctxt; + + /* + * Latch current register state so that we can back out changes + * if needed (namely when a memory write fails after register state + * has already been updated). + * NB: We don't really need the "enable" part of the called function + * (->fpu_dirtied set implies CR0.TS clear), but the additional + * overhead should be low enough to not warrant introduction of yet + * another slightly different function. However, we need to undo the + * ->fpu_dirtied clearing the function does as well as the possible + * masking of all exceptions by FNSTENV.) + */ + save_fpu_enable(); + curr->fpu_dirtied = true; + if ( (fpu_ctxt->fcw & 0x3f) != 0x3f ) + { + uint16_t fcw; + + asm ( "fnstcw %0" : "=m" (fcw) ); + if ( (fcw & 0x3f) == 0x3f ) + asm ( "fldcw %0" :: "m" (fpu_ctxt->fcw) ); + else + ASSERT(fcw == fpu_ctxt->fcw); + } + } curr->arch.hvm_vcpu.fpu_exception_callback = exception_callback; curr->arch.hvm_vcpu.fpu_exception_callback_arg = exception_callback_arg; @@ -1627,10 +1657,24 @@ static int hvmemul_get_fpu( } static void hvmemul_put_fpu( - struct x86_emulate_ctxt *ctxt) + struct x86_emulate_ctxt *ctxt, + enum x86_emulate_fpu_type backout) { struct vcpu *curr = current; + curr->arch.hvm_vcpu.fpu_exception_callback = NULL; + + if ( backout == X86EMUL_FPU_fpu ) + { + /* + * To back out changes to the register file simply adjust state such + * that upon next FPU insn use by the guest we'll reload the state + * saved (or freshly loaded) by hvmemul_get_fpu(). + */ + curr->fpu_dirtied = false; + stts(); + hvm_funcs.fpu_leave(curr); + } } static int hvmemul_invlpg( --- a/xen/arch/x86/hvm/svm/svm.c +++ b/xen/arch/x86/hvm/svm/svm.c @@ -2268,6 +2268,7 @@ static struct hvm_function_table __initd .update_guest_cr = svm_update_guest_cr, .update_guest_efer = svm_update_guest_efer, .update_guest_vendor = svm_update_guest_vendor, + .fpu_leave = svm_fpu_leave, .set_guest_pat = svm_set_guest_pat, .get_guest_pat = svm_get_guest_pat, .set_tsc_offset = svm_set_tsc_offset, --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -2128,6 +2128,7 @@ static struct hvm_function_table __initd .update_guest_cr = vmx_update_guest_cr, .update_guest_efer = vmx_update_guest_efer, .update_guest_vendor = vmx_update_guest_vendor, + .fpu_leave = vmx_fpu_leave, .set_guest_pat = vmx_set_guest_pat, .get_guest_pat = vmx_get_guest_pat, .set_tsc_offset = vmx_set_tsc_offset, --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -964,6 +964,7 @@ static int _get_fpu( { unsigned long cr0; + fail_if(type == X86EMUL_FPU_fpu && !ops->put_fpu); fic->type = type; fail_if(!ops->read_cr); @@ -1025,11 +1026,14 @@ do { static void put_fpu( struct fpu_insn_ctxt *fic, + bool failed_late, struct x86_emulate_ctxt *ctxt, const struct x86_emulate_ops *ops) { - if ( fic->type != X86EMUL_FPU_none && ops->put_fpu ) - ops->put_fpu(ctxt); + if ( unlikely(failed_late) && fic->type == X86EMUL_FPU_fpu ) + ops->put_fpu(ctxt, X86EMUL_FPU_fpu); + else if ( fic->type != X86EMUL_FPU_none && ops->put_fpu ) + ops->put_fpu(ctxt, X86EMUL_FPU_none); } static inline bool fpu_check_write(void) @@ -3714,9 +3718,9 @@ x86_emulate( break; case 0x9b: /* wait/fwait */ - fic.insn_bytes = 1; host_and_vcpu_must_have(fpu); get_fpu(X86EMUL_FPU_wait, &fic); + fic.insn_bytes = 1; asm volatile ( "fwait" ::: "memory" ); check_fpu(&fic); break; @@ -7888,7 +7892,7 @@ x86_emulate( } complete_insn: /* Commit shadow register state. */ - put_fpu(&fic, ctxt, ops); + put_fpu(&fic, false, ctxt, ops); /* Zero the upper 32 bits of %rip if not in 64-bit mode. */ if ( !mode_64bit() ) @@ -7913,7 +7917,7 @@ x86_emulate( done: if ( rc != X86EMUL_OKAY ) - put_fpu(&fic, ctxt, ops); + put_fpu(&fic, fic.insn_bytes > 0 && dst.type == OP_MEM, ctxt, ops); put_stub(stub); return rc; #undef state --- a/xen/arch/x86/x86_emulate/x86_emulate.h +++ b/xen/arch/x86/x86_emulate/x86_emulate.h @@ -437,9 +437,12 @@ struct x86_emulate_ops * put_fpu: Relinquish the FPU. Unhook from FPU/SIMD exception handlers. * The handler, if installed, must be prepared to get called without * the get_fpu one having got called before! + * @backout: Undo updates to the specified register file (can, besides + * X86EMUL_FPU_none, only be X86EMUL_FPU_fpu at present); */ void (*put_fpu)( - struct x86_emulate_ctxt *ctxt); + struct x86_emulate_ctxt *ctxt, + enum x86_emulate_fpu_type backout); /* invlpg: Invalidate paging structures which map addressed byte. */ int (*invlpg)( --- a/xen/include/asm-x86/hvm/hvm.h +++ b/xen/include/asm-x86/hvm/hvm.h @@ -140,6 +140,8 @@ struct hvm_function_table { void (*update_guest_vendor)(struct vcpu *v); + void (*fpu_leave)(struct vcpu *v); + int (*get_guest_pat)(struct vcpu *v, u64 *); int (*set_guest_pat)(struct vcpu *v, u64);