[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH RFC] x86emul: add read-modify-write hook
In order to correctly emulate read-modify-write insns, especially LOCKed ones, we should not issue reads and writes separately. Use a new hook to combine both, and don't uniformly read the memory destination anymore. Instead, DstMem opcodes without Mov now need to have done so in their respective case blocks. For now, make use of the hook optional for callers; eventually this should become mandatory. Note that operand-size-wise the test harness'es rmw() function had been added only what has been observed to be necessary. Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> --- This mainly is in preparation for Andrew's "Improvements to in-hypervisor emulation" session on the meeting in Budapest. Note that this is unlikely to apply as is, as I have it sitting on top of various other pending patches, but it should be good enough for discussing the approach. TBD: not dealing with cmpxchg/cmpxchg{8,16}b yet - need to either alter that hook's behavior or add x86_rmw_cmpxchg (but the latter would require another input to ->rmw()) --- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -311,6 +311,139 @@ static int write( return X86EMUL_OKAY; } +static int rmw( + enum x86_rmw_op op, + enum x86_segment seg, + unsigned long offset, + void *p_data, + unsigned int bytes, + uint32_t *eflags, + struct x86_emulate_ctxt *ctxt) +{ + union { + uint8_t u8; + uint16_t u16; + uint32_t u32; + uint64_t u64; + } *ptr = (void *)offset, *data = p_data; + + if ( verbose ) + printf("** %s(%d, %u, %p,, %u,,)\n", __func__, op, seg, ptr, bytes); + + if ( !is_x86_user_segment(seg) ) + return X86EMUL_UNHANDLEABLE; + switch ( op ) + { + bool cf; + +#ifdef __x86_64__ +# define asm64 asm +# define popfl(n) "pop %q" #n +#else +# define asm64(...) return X86EMUL_UNHANDLEABLE +# define popfl(n) "pop %" #n +#endif + +#define BINOP(insn) \ + case x86_rmw_##insn: \ + if ( bytes != 4 ) \ + return printf("[%d:%u]", op, bytes), X86EMUL_UNHANDLEABLE; \ + asm ( #insn "l %2,%0; pushf; " popfl(1) \ + : "+m" (ptr->u32), "=rm" (*eflags) \ + : "r" (data->u32) ); \ + break + BINOP(add); + BINOP(and); + BINOP(or); + BINOP(sub); + BINOP(xor); +#undef BINOP + +#define BINOPC(insn) \ + case x86_rmw_##insn: \ + if ( bytes != 4 ) \ + return printf("[%d:%u]", op, bytes), X86EMUL_UNHANDLEABLE; \ + cf = *eflags & X86_EFLAGS_CF; \ + asm ( "shr $1,%3; " #insn "l %2,%0; pushf; " popfl(1) \ + : "+m" (ptr->u32), "=rm" (*eflags) \ + : "r" (data->u32), "q" (cf) ); \ + break + BINOPC(adc); + BINOPC(sbb); +#undef BINOPC + +#define UNOP(insn) \ + case x86_rmw_##insn: \ + if ( bytes != 4 ) \ + return X86EMUL_UNHANDLEABLE; \ + asm ( #insn "l %0; pushf; " popfl(1) \ + : "+m" (ptr->u32), "=rm" (*eflags) ); \ + break + UNOP(dec); + UNOP(inc); + UNOP(neg); +#undef UNOP + +#define BTOP(op) \ + case x86_rmw_bt##op: \ + switch ( bytes ) \ + { \ + case 4: \ + if ( data->u32 >= 32 ) \ + return X86EMUL_UNHANDLEABLE; \ + asm ( "bt" #op "l %2,%0; setc %1" \ + : "+m" (ptr->u32), "=qm" (cf) \ + : "r" (data->u32) ); \ + break; \ + case 8: \ + if ( data->u64 >= 64 ) \ + return X86EMUL_UNHANDLEABLE; \ + asm64 ( "bt" #op "q %2,%0; setc %1" \ + : "+m" (ptr->u64), "=qm" (cf) \ + : "r" (data->u64) ); \ + break; \ + default: \ + return X86EMUL_UNHANDLEABLE; \ + } \ + if ( cf ) \ + *eflags |= X86_EFLAGS_CF; \ + else \ + *eflags &= ~X86_EFLAGS_CF; \ + break + BTOP(c); + BTOP(r); + BTOP(s); +#undef BTOP + + case x86_rmw_not: + if ( bytes != 4 ) + return X86EMUL_UNHANDLEABLE; + asm ( "notl %0; pushf; pop %1" : "+m" (ptr->u32) ); + break; + + case x86_rmw_xadd: + if ( bytes != 2 ) + return X86EMUL_UNHANDLEABLE; + asm ( "xaddw %1,%0; pushf; " popfl(2) + : "+m" (ptr->u16), "+r" (data->u16), "=rm" (*eflags) ); + break; + + case x86_rmw_xchg: + if ( bytes != 4 ) + return X86EMUL_UNHANDLEABLE; + asm ( "xchgl %1,%0" : "+m" (ptr->u32), "+r" (data->u32) ); + break; + + default: + return X86EMUL_UNHANDLEABLE; + +#undef asm64 +#undef popfl + } + + return X86EMUL_OKAY; +} + static int cmpxchg( enum x86_segment seg, unsigned long offset, @@ -410,6 +543,7 @@ int main(int argc, char **argv) if ( !stack_exec ) printf("Warning: Stack could not be made executable (%d).\n", errno); + rmw_restart: printf("%-40s", "Testing addl %ecx,(%eax)..."); instr[0] = 0x01; instr[1] = 0x08; regs.eflags = 0x200; @@ -537,25 +671,6 @@ int main(int argc, char **argv) goto fail; printf("okay\n"); - printf("%-40s", "Testing rep movsw..."); - instr[0] = 0xf3; instr[1] = 0x66; instr[2] = 0xa5; - *res = 0x22334455; - regs.eflags = 0x200; - regs.ecx = 23; - regs.eip = (unsigned long)&instr[0]; - regs.esi = (unsigned long)res + 0; - regs.edi = (unsigned long)res + 2; - rc = x86_emulate(&ctxt, &emulops); - if ( (rc != X86EMUL_OKAY) || - (*res != 0x44554455) || - (regs.eflags != 0x200) || - (regs.ecx != 22) || - (regs.esi != ((unsigned long)res + 2)) || - (regs.edi != ((unsigned long)res + 4)) || - (regs.eip != (unsigned long)&instr[0]) ) - goto fail; - printf("okay\n"); - printf("%-40s", "Testing btrl $0x1,(%edi)..."); instr[0] = 0x0f; instr[1] = 0xba; instr[2] = 0x37; instr[3] = 0x01; *res = 0x2233445F; @@ -601,6 +716,48 @@ int main(int argc, char **argv) printf("okay\n"); #endif + printf("%-40s", "Testing xadd %ax,(%ecx)..."); + instr[0] = 0x66; instr[1] = 0x0f; instr[2] = 0xc1; instr[3] = 0x01; + regs.eflags = 0x200; + regs.eip = (unsigned long)&instr[0]; + regs.ecx = (unsigned long)res; + regs.eax = 0x12345678; + *res = 0x11111111; + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || + (*res != 0x11116789) || + (regs.eax != 0x12341111) || + ((regs.eflags&0x240) != 0x200) || + (regs.eip != (unsigned long)&instr[4]) ) + goto fail; + printf("okay\n"); + + if ( !emulops.rmw ) + { + printf("[Switching to read-modify-write mode]\n"); + emulops.rmw = rmw; + goto rmw_restart; + } + + printf("%-40s", "Testing rep movsw..."); + instr[0] = 0xf3; instr[1] = 0x66; instr[2] = 0xa5; + *res = 0x22334455; + regs.eflags = 0x200; + regs.ecx = 23; + regs.eip = (unsigned long)&instr[0]; + regs.esi = (unsigned long)res + 0; + regs.edi = (unsigned long)res + 2; + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || + (*res != 0x44554455) || + (regs.eflags != 0x200) || + (regs.ecx != 22) || + (regs.esi != ((unsigned long)res + 2)) || + (regs.edi != ((unsigned long)res + 4)) || + (regs.eip != (unsigned long)&instr[0]) ) + goto fail; + printf("okay\n"); + res[0] = 0x12345678; res[1] = 0x87654321; @@ -726,22 +883,6 @@ int main(int argc, char **argv) #endif printf("okay\n"); - printf("%-40s", "Testing xadd %ax,(%ecx)..."); - instr[0] = 0x66; instr[1] = 0x0f; instr[2] = 0xc1; instr[3] = 0x01; - regs.eflags = 0x200; - regs.eip = (unsigned long)&instr[0]; - regs.ecx = (unsigned long)res; - regs.eax = 0x12345678; - *res = 0x11111111; - rc = x86_emulate(&ctxt, &emulops); - if ( (rc != X86EMUL_OKAY) || - (*res != 0x11116789) || - (regs.eax != 0x12341111) || - ((regs.eflags&0x240) != 0x200) || - (regs.eip != (unsigned long)&instr[4]) ) - goto fail; - printf("okay\n"); - printf("%-40s", "Testing dec %ax..."); #ifndef __x86_64__ instr[0] = 0x66; instr[1] = 0x48; --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -3150,6 +3150,7 @@ x86_emulate( struct operand src = { .reg = PTR_POISON }; struct operand dst = { .reg = PTR_POISON }; unsigned long cr4; + enum x86_rmw_op rmw = x86_rmw_none; enum x86_emulate_fpu_type fpu_type = X86EMUL_FPU_none; struct x86_emulate_stub stub = {}; DECLARE_ALIGNED(mmval_t, mmval); @@ -3256,7 +3257,7 @@ x86_emulate( break; } - /* Decode and fetch the destination operand: register or memory. */ + /* Decode (but don't fetch) the destination operand: register or memory. */ switch ( d & DstMask ) { case DstNone: /* case DstImplicit: */ @@ -3339,7 +3340,13 @@ x86_emulate( case 8: dst.val = *(uint64_t *)dst.reg; break; } } - else if ( !(d & Mov) ) /* optimisation - avoid slow emulated read */ + else if ( d & Mov ) /* optimisation - avoid slow emulated read */ + { + /* Lock prefix is allowed only on RMW instructions. */ + generate_exception_if(lock_prefix, EXC_UD); + fail_if(!ops->write); + } + else if ( !ops->rmw ) { fail_if(lock_prefix ? !ops->cmpxchg : !ops->write); if ( (rc = read_ulong(dst.mem.seg, dst.mem.off, @@ -3347,12 +3354,6 @@ x86_emulate( goto done; dst.orig_val = dst.val; } - else - { - /* Lock prefix is allowed only on RMW instructions. */ - generate_exception_if(lock_prefix, EXC_UD); - fail_if(!ops->write); - } break; } @@ -3365,35 +3366,83 @@ x86_emulate( unsigned int i, n; unsigned long dummy; - case 0x00 ... 0x05: add: /* add */ - emulate_2op_SrcV("add", src, dst, _regs.eflags); + case 0x00: case 0x01: add: /* add reg,mem */ + if ( ops->rmw && dst.type == OP_MEM ) + rmw = x86_rmw_add; + else + { + case 0x02 ... 0x05: /* add */ + emulate_2op_SrcV("add", src, dst, _regs.eflags); + } break; - case 0x08 ... 0x0d: or: /* or */ - emulate_2op_SrcV("or", src, dst, _regs.eflags); + case 0x08: case 0x09: or: /* or reg,mem */ + if ( ops->rmw && dst.type == OP_MEM ) + rmw = x86_rmw_or; + else + { + case 0x0a ... 0x0d: /* or */ + emulate_2op_SrcV("or", src, dst, _regs.eflags); + } break; - case 0x10 ... 0x15: adc: /* adc */ - emulate_2op_SrcV("adc", src, dst, _regs.eflags); + case 0x10: case 0x11: adc: /* adc reg,mem */ + if ( ops->rmw && dst.type == OP_MEM ) + rmw = x86_rmw_adc; + else + { + case 0x12 ... 0x15: /* adc */ + emulate_2op_SrcV("adc", src, dst, _regs.eflags); + } break; - case 0x18 ... 0x1d: sbb: /* sbb */ - emulate_2op_SrcV("sbb", src, dst, _regs.eflags); + case 0x18: case 0x19: sbb: /* sbb reg,mem */ + if ( ops->rmw && dst.type == OP_MEM ) + rmw = x86_rmw_sbb; + else + { + case 0x1a ... 0x1d: /* sbb */ + emulate_2op_SrcV("sbb", src, dst, _regs.eflags); + } break; - case 0x20 ... 0x25: and: /* and */ - emulate_2op_SrcV("and", src, dst, _regs.eflags); + case 0x20: case 0x21: and: /* and reg,mem */ + if ( ops->rmw && dst.type == OP_MEM ) + rmw = x86_rmw_and; + else + { + case 0x22 ... 0x25: /* and */ + emulate_2op_SrcV("and", src, dst, _regs.eflags); + } break; - case 0x28 ... 0x2d: sub: /* sub */ - emulate_2op_SrcV("sub", src, dst, _regs.eflags); + case 0x28: case 0x29: sub: /* sub reg,mem */ + if ( ops->rmw && dst.type == OP_MEM ) + rmw = x86_rmw_sub; + else + { + case 0x2a ... 0x2d: /* sub */ + emulate_2op_SrcV("sub", src, dst, _regs.eflags); + } break; - case 0x30 ... 0x35: xor: /* xor */ - emulate_2op_SrcV("xor", src, dst, _regs.eflags); + case 0x30: case 0x31: xor: /* xor reg,mem */ + if ( ops->rmw && dst.type == OP_MEM ) + rmw = x86_rmw_xor; + else + { + case 0x32 ... 0x35: /* xor */ + emulate_2op_SrcV("xor", src, dst, _regs.eflags); + } break; - case 0x38 ... 0x3d: cmp: /* cmp */ + case 0x38: case 0x39: cmp: /* cmp reg,mem */ + if ( ops->rmw && dst.type == OP_MEM && + (rc = read_ulong(dst.mem.seg, dst.mem.off, &dst.val, + dst.bytes, ctxt, ops)) != X86EMUL_OKAY ) + goto done; + /* fall through */ + case 0x3a ... 0x3d: /* cmp */ generate_exception_if(lock_prefix, EXC_UD); emulate_2op_SrcV("cmp", src, dst, _regs.eflags); dst.type = OP_NONE; @@ -3709,6 +3758,11 @@ x86_emulate( break; case 0x86 ... 0x87: xchg: /* xchg */ + if ( ops->rmw && dst.type == OP_MEM ) + { + rmw = x86_rmw_xchg; + break; + } /* Write back the register source. */ switch ( dst.bytes ) { @@ -4036,6 +4090,13 @@ x86_emulate( case 0xc0 ... 0xc1: grp2: /* Grp2 */ generate_exception_if(lock_prefix, EXC_UD); + + if ( ops->rmw && dst.type == OP_MEM && + (rc = read_ulong(dst.mem.seg, dst.mem.off, &dst.val, + dst.bytes, ctxt, ops)) != X86EMUL_OKAY ) + goto done; + dst.orig_val = dst.val; + switch ( modrm_reg & 7 ) { case 0: /* rol */ @@ -4674,12 +4735,22 @@ x86_emulate( case 0 ... 1: /* test */ generate_exception_if(lock_prefix, EXC_UD); + if ( ops->rmw && dst.type == OP_MEM && + (rc = read_ulong(dst.mem.seg, dst.mem.off, &dst.val, + dst.bytes, ctxt, ops)) != X86EMUL_OKAY ) + goto done; goto test; case 2: /* not */ - dst.val = ~dst.val; + if ( ops->rmw && dst.type == OP_MEM ) + rmw = x86_rmw_not; + else + dst.val = ~dst.val; break; case 3: /* neg */ - emulate_1op("neg", dst, _regs.eflags); + if ( ops->rmw && dst.type == OP_MEM ) + rmw = x86_rmw_neg; + else + emulate_1op("neg", dst, _regs.eflags); break; case 4: /* mul */ _regs.eflags &= ~(X86_EFLAGS_OF | X86_EFLAGS_CF); @@ -4903,10 +4974,16 @@ x86_emulate( switch ( modrm_reg & 7 ) { case 0: /* inc */ - emulate_1op("inc", dst, _regs.eflags); + if ( ops->rmw && dst.type == OP_MEM ) + rmw = x86_rmw_inc; + else + emulate_1op("inc", dst, _regs.eflags); break; case 1: /* dec */ - emulate_1op("dec", dst, _regs.eflags); + if ( ops->rmw && dst.type == OP_MEM ) + rmw = x86_rmw_dec; + else + emulate_1op("dec", dst, _regs.eflags); break; case 2: /* call (near) */ dst.val = _regs.r(ip); @@ -6613,6 +6690,12 @@ x86_emulate( case X86EMUL_OPC(0x0f, 0xa3): bt: /* bt */ generate_exception_if(lock_prefix, EXC_UD); + + if ( ops->rmw && dst.type == OP_MEM && + (rc = read_ulong(dst.mem.seg, dst.mem.off, &dst.val, + dst.bytes, ctxt, ops)) != X86EMUL_OKAY ) + goto done; + emulate_2op_SrcV_nobyte("bt", src, dst, _regs.eflags); dst.type = OP_NONE; break; @@ -6624,6 +6707,12 @@ x86_emulate( uint8_t shift, width = dst.bytes << 3; generate_exception_if(lock_prefix, EXC_UD); + + if ( ops->rmw && dst.type == OP_MEM && + (rc = read_ulong(dst.mem.seg, dst.mem.off, &dst.val, + dst.bytes, ctxt, ops)) != X86EMUL_OKAY ) + goto done; + if ( b & 1 ) shift = _regs.cl; else @@ -6655,7 +6744,10 @@ x86_emulate( } case X86EMUL_OPC(0x0f, 0xab): bts: /* bts */ - emulate_2op_SrcV_nobyte("bts", src, dst, _regs.eflags); + if ( ops->rmw && dst.type == OP_MEM ) + rmw = x86_rmw_bts; + else + emulate_2op_SrcV_nobyte("bts", src, dst, _regs.eflags); break; case X86EMUL_OPC(0x0f, 0xae): case X86EMUL_OPC_66(0x0f, 0xae): /* Grp15 */ @@ -6779,6 +6871,12 @@ x86_emulate( case X86EMUL_OPC(0x0f, 0xb0): case X86EMUL_OPC(0x0f, 0xb1): /* cmpxchg */ fail_if(!ops->cmpxchg); + + if ( ops->rmw && dst.type == OP_MEM && + (rc = read_ulong(dst.mem.seg, dst.mem.off, &dst.val, + dst.bytes, ctxt, ops)) != X86EMUL_OKAY ) + goto done; + /* Save real source value, then compare EAX against destination. */ src.orig_val = src.val; src.val = _regs.r(ax); @@ -6813,7 +6911,10 @@ x86_emulate( goto les; case X86EMUL_OPC(0x0f, 0xb3): btr: /* btr */ - emulate_2op_SrcV_nobyte("btr", src, dst, _regs.eflags); + if ( ops->rmw && dst.type == OP_MEM ) + rmw = x86_rmw_btr; + else + emulate_2op_SrcV_nobyte("btr", src, dst, _regs.eflags); break; case X86EMUL_OPC(0x0f, 0xb6): /* movzx rm8,r{16,32,64} */ @@ -6847,7 +6948,10 @@ x86_emulate( break; case X86EMUL_OPC(0x0f, 0xbb): btc: /* btc */ - emulate_2op_SrcV_nobyte("btc", src, dst, _regs.eflags); + if ( ops->rmw && dst.type == OP_MEM ) + rmw = x86_rmw_btc; + else + emulate_2op_SrcV_nobyte("btc", src, dst, _regs.eflags); break; case X86EMUL_OPC(0x0f, 0xbc): /* bsf or tzcnt */ @@ -6920,6 +7024,11 @@ x86_emulate( break; case X86EMUL_OPC(0x0f, 0xc0): case X86EMUL_OPC(0x0f, 0xc1): /* xadd */ + if ( ops->rmw && dst.type == OP_MEM ) + { + rmw = x86_rmw_xadd; + break; + } /* Write back the register source. */ switch ( dst.bytes ) { @@ -8482,7 +8591,40 @@ x86_emulate( goto done; } - if ( state->simd_size ) + if ( rmw != x86_rmw_none ) + { + if ( lock_prefix ) + rmw |= x86_rmw_lock; + + rc = ops->rmw(rmw, dst.mem.seg, dst.mem.off, &src.val, dst.bytes, + &_regs.eflags, ctxt); + if ( rc != X86EMUL_OKAY ) + goto done; + + _regs.eflags = (ctxt->regs->eflags & ~X86_EFLAGS_ARITH_MASK) | + (_regs.eflags & X86_EFLAGS_ARITH_MASK); + + /* Some operations require a register to be written. */ + switch ( rmw ) + { + case x86_rmw_xchg: + case x86_rmw_xadd: + switch ( dst.bytes ) + { + case 1: *(uint8_t *)src.reg = (uint8_t)src.val; break; + case 2: *(uint16_t *)src.reg = (uint16_t)src.val; break; + case 4: *src.reg = (uint32_t)src.val; break; /* 64b reg: zero-extend */ + case 8: *src.reg = src.val; break; + } + break; + + default: + break; + } + + dst.type = OP_NONE; + } + else if ( state->simd_size ) { generate_exception_if(!op_bytes, EXC_UD); generate_exception_if(vex.opcx && (d & TwoOp) && vex.reg != 0xf, --- a/xen/arch/x86/x86_emulate/x86_emulate.h +++ b/xen/arch/x86/x86_emulate/x86_emulate.h @@ -114,6 +114,27 @@ struct segment_register { uint64_t base; }; +enum x86_rmw_op { + x86_rmw_none, + x86_rmw_adc, + x86_rmw_add, + x86_rmw_and, + x86_rmw_btc, + x86_rmw_btr, + x86_rmw_bts, + x86_rmw_dec, + x86_rmw_inc, + x86_rmw_neg, + x86_rmw_not, + x86_rmw_or, + x86_rmw_sub, + x86_rmw_sbb, + x86_rmw_xadd, + x86_rmw_xchg, + x86_rmw_xor, + x86_rmw_lock = 0x40, +}; + struct x86_emul_fpu_aux { unsigned long ip, dp; uint16_t cs, ds; @@ -230,6 +251,20 @@ struct x86_emulate_ops struct x86_emulate_ctxt *ctxt); /* + * rmw: Emulate a memory read-modify-write. + * @op: Operation to carry out. + * @bytes: Access length (0 < @bytes <= sizeof(long)). + */ + int (*rmw)( + enum x86_rmw_op op, + enum x86_segment seg, + unsigned long offset, + void *p_data, + unsigned int bytes, + uint32_t *eflags, + struct x86_emulate_ctxt *ctxt); + + /* * cmpxchg: Emulate an atomic (LOCKed) CMPXCHG operation. * @p_old: [IN ] Pointer to value expected to be current at @addr. * [OUT] Pointer to value found at @addr (may always be Attachment:
x86emul-RMW.patch _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx https://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |