[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 4/6] x86emul: reduce FPU handling code size
Pulling out the {get,put}_fpu() invocations from individual emulation paths leads to a couple of kb code size reduction in my builds. Note that this is fine exception-wise: - #UD and #NM have implementation defined order relative to one another, - data read #GP/#SS/#PF now properly are delivered after #NM/#UD. Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -874,59 +874,44 @@ do { } while (0) #define emulate_fpu_insn(_op) \ -do{ struct fpu_insn_ctxt fic; \ - get_fpu(X86EMUL_FPU_fpu, &fic); \ asm volatile ( \ "movb $2f-1f,%0 \n" \ "1: " _op " \n" \ "2: \n" \ - : "=m" (fic.insn_bytes) : : "memory" ); \ - put_fpu(&fic); \ -} while (0) + : "=m" (fic.insn_bytes) : : "memory" ) #define emulate_fpu_insn_memdst(_op, _arg) \ -do{ struct fpu_insn_ctxt fic; \ - get_fpu(X86EMUL_FPU_fpu, &fic); \ asm volatile ( \ "movb $2f-1f,%0 \n" \ "1: " _op " %1 \n" \ "2: \n" \ : "=m" (fic.insn_bytes), "=m" (_arg) \ - : : "memory" ); \ - put_fpu(&fic); \ -} while (0) + : : "memory" ) #define emulate_fpu_insn_memsrc(_op, _arg) \ -do{ struct fpu_insn_ctxt fic; \ - get_fpu(X86EMUL_FPU_fpu, &fic); \ asm volatile ( \ "movb $2f-1f,%0 \n" \ "1: " _op " %1 \n" \ "2: \n" \ : "=m" (fic.insn_bytes) \ - : "m" (_arg) : "memory" ); \ - put_fpu(&fic); \ -} while (0) + : "m" (_arg) : "memory" ) #define emulate_fpu_insn_stub(_bytes...) \ do { \ uint8_t *buf = get_stub(stub); \ unsigned int _nr = sizeof((uint8_t[]){ _bytes }); \ - struct fpu_insn_ctxt fic = { .insn_bytes = _nr }; \ + fic.insn_bytes = _nr; \ memcpy(buf, ((uint8_t[]){ _bytes, 0xc3 }), _nr + 1); \ - get_fpu(X86EMUL_FPU_fpu, &fic); \ stub.func(); \ - put_fpu(&fic); \ put_stub(stub); \ } while (0) #define emulate_fpu_insn_stub_eflags(bytes...) \ do { \ unsigned int nr_ = sizeof((uint8_t[]){ bytes }); \ - struct fpu_insn_ctxt fic_ = { .insn_bytes = nr_ }; \ unsigned long tmp_; \ + fic.insn_bytes = nr_; \ memcpy(get_stub(stub), ((uint8_t[]){ bytes, 0xc3 }), nr_ + 1); \ - get_fpu(X86EMUL_FPU_fpu, &fic_); \ asm volatile ( _PRE_EFLAGS("[eflags]", "[mask]", "[tmp]") \ "call *%[func];" \ _POST_EFLAGS("[eflags]", "[mask]", "[tmp]") \ @@ -934,7 +919,6 @@ do { [tmp] "=&r" (tmp_) \ : [func] "rm" (stub.func), \ [mask] "i" (EFLG_ZF|EFLG_PF|EFLG_CF) ); \ - put_fpu(&fic_); \ put_stub(stub); \ } while (0) @@ -2579,6 +2563,7 @@ x86_emulate( struct operand src = { .reg = PTR_POISON }; struct operand dst = { .reg = PTR_POISON }; enum x86_swint_type swint_type; + struct fpu_insn_ctxt fic; struct x86_emulate_stub stub = {}; DECLARE_ALIGNED(mmval_t, mmval); @@ -3272,15 +3257,12 @@ x86_emulate( break; case 0x9b: /* wait/fwait */ - { - struct fpu_insn_ctxt fic = { .insn_bytes = 1 }; - + fic.insn_bytes = 1; host_and_vcpu_must_have(fpu); get_fpu(X86EMUL_FPU_wait, &fic); asm volatile ( "fwait" ::: "memory" ); put_fpu(&fic); break; - } case 0x9c: /* pushf */ generate_exception_if((_regs.eflags & EFLG_VM) && @@ -3670,6 +3652,7 @@ x86_emulate( case 0xd8: /* FPU 0xd8 */ host_and_vcpu_must_have(fpu); + get_fpu(X86EMUL_FPU_fpu, &fic); switch ( modrm ) { case 0xc0 ... 0xc7: /* fadd %stN,%st */ @@ -3715,10 +3698,12 @@ x86_emulate( break; } } + put_fpu(&fic); break; case 0xd9: /* FPU 0xd9 */ host_and_vcpu_must_have(fpu); + get_fpu(X86EMUL_FPU_fpu, &fic); switch ( modrm ) { case 0xfb: /* fsincos */ @@ -3795,10 +3780,12 @@ x86_emulate( generate_exception_if(true, EXC_UD); } } + put_fpu(&fic); break; case 0xda: /* FPU 0xda */ host_and_vcpu_must_have(fpu); + get_fpu(X86EMUL_FPU_fpu, &fic); switch ( modrm ) { case 0xc0 ... 0xc7: /* fcmovb %stN */ @@ -3844,10 +3831,12 @@ x86_emulate( break; } } + put_fpu(&fic); break; case 0xdb: /* FPU 0xdb */ host_and_vcpu_must_have(fpu); + get_fpu(X86EMUL_FPU_fpu, &fic); switch ( modrm ) { case 0xc0 ... 0xc7: /* fcmovnb %stN */ @@ -3907,10 +3896,12 @@ x86_emulate( generate_exception_if(true, EXC_UD); } } + put_fpu(&fic); break; case 0xdc: /* FPU 0xdc */ host_and_vcpu_must_have(fpu); + get_fpu(X86EMUL_FPU_fpu, &fic); switch ( modrm ) { case 0xc0 ... 0xc7: /* fadd %st,%stN */ @@ -3956,10 +3947,12 @@ x86_emulate( break; } } + put_fpu(&fic); break; case 0xdd: /* FPU 0xdd */ host_and_vcpu_must_have(fpu); + get_fpu(X86EMUL_FPU_fpu, &fic); switch ( modrm ) { case 0xc0 ... 0xc7: /* ffree %stN */ @@ -4006,10 +3999,12 @@ x86_emulate( generate_exception_if(true, EXC_UD); } } + put_fpu(&fic); break; case 0xde: /* FPU 0xde */ host_and_vcpu_must_have(fpu); + get_fpu(X86EMUL_FPU_fpu, &fic); switch ( modrm ) { case 0xc0 ... 0xc7: /* faddp %stN */ @@ -4052,10 +4047,12 @@ x86_emulate( break; } } + put_fpu(&fic); break; case 0xdf: /* FPU 0xdf */ host_and_vcpu_must_have(fpu); + get_fpu(X86EMUL_FPU_fpu, &fic); switch ( modrm ) { case 0xe0: @@ -4125,6 +4122,7 @@ x86_emulate( break; } } + put_fpu(&fic); break; case 0xe0 ... 0xe2: /* loop{,z,nz} */ { @@ -4945,8 +4943,8 @@ x86_emulate( case X86EMUL_OPC_VEX_F2(0x0f, 0x11): /* vmovsd xmm,xmm/m64 */ { uint8_t *buf = get_stub(stub); - struct fpu_insn_ctxt fic = { .insn_bytes = 5 }; + fic.insn_bytes = 5; buf[0] = 0x3e; buf[1] = 0x3e; buf[2] = 0x0f; @@ -5215,8 +5213,8 @@ x86_emulate( case X86EMUL_OPC_VEX_66(0x0f, 0xd6): /* vmovq xmm,xmm/m64 */ { uint8_t *buf = get_stub(stub); - struct fpu_insn_ctxt fic = { .insn_bytes = 5 }; + fic.insn_bytes = 5; buf[0] = 0x3e; buf[1] = 0x3e; buf[2] = 0x0f; Attachment:
x86emul-FPU-reduce.patch _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx https://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |