x86emul: generate and make use of a canonical opcode representation This representation is then being made available to interested callers, to facilitate replacing their custom decoding. This entails combining the three main switch statements into one. Signed-off-by: Jan Beulich --- v2: Extend comments. Use uint8_t cast in X86EMUL_OPC(). Rename X86EMUL_OPC_KIND_MASK to X86EMUL_OPC_ENCODING_MASK. Add X86EMUL_OPC_LEGACY_. --- a/tools/tests/x86_emulator/x86_emulate.c +++ b/tools/tests/x86_emulator/x86_emulate.c @@ -14,6 +14,9 @@ typedef bool bool_t; #define ASSERT assert #define ASSERT_UNREACHABLE() assert(!__LINE__) +#define MASK_EXTR(v, m) (((v) & (m)) / ((m) & -(m))) +#define MASK_INSR(v, m) (((v) * ((m) & -(m))) & (m)) + #define cpu_has_amd_erratum(nr) 0 #define mark_regs_dirty(r) ((void)(r)) --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -1626,7 +1626,6 @@ struct x86_emulate_state { ext_8f09, ext_8f0a, } ext; - uint8_t opcode; uint8_t modrm, modrm_mod, modrm_reg, modrm_rm; uint8_t rex_prefix; bool lock_prefix; @@ -1672,7 +1671,7 @@ x86_decode_onebyte( { int rc = X86EMUL_OKAY; - switch ( state->opcode ) + switch ( ctxt->opcode ) { case 0x9a: /* call (far, absolute) */ case 0xea: /* jmp (far, absolute) */ @@ -1711,11 +1710,9 @@ x86_decode_twobyte( { int rc = X86EMUL_OKAY; - switch ( state->opcode ) + switch ( ctxt->opcode & X86EMUL_OPC_MASK ) { case 0x78: - if ( vex.opcx ) - break; switch ( vex.pfx ) { case vex_66: /* extrq $imm8, $imm8, xmm */ @@ -1724,7 +1721,23 @@ x86_decode_twobyte( imm2 = insn_fetch_type(uint8_t); break; } - break; + /* fall through */ + case 0x10 ... 0x18: + case 0x28 ... 0x2f: + case 0x50 ... 0x77: + case 0x79 ... 0x7f: + case 0xae: + case 0xc2: + case 0xc4 ... 0xc7: + case 0xd0 ... 0xfe: + ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK); + break; + /* Intentionally not handling here despite being modified by F3: + case 0xb8: jmpe / popcnt + case 0xbc: bsf / tzcnt + case 0xbd: bsr / lzcnt + * They're being dealt with in the execution phase (if at all). + */ } done: @@ -1732,13 +1745,35 @@ x86_decode_twobyte( } static int +x86_decode_0f38( + struct x86_emulate_state *state, + struct x86_emulate_ctxt *ctxt, + const struct x86_emulate_ops *ops) +{ + switch ( ctxt->opcode & X86EMUL_OPC_MASK ) + { + case 0x00 ... 0xef: + case 0xf2 ... 0xff: + ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK); + break; + + case 0xf0: case 0xf1: /* movbe / crc32 */ + if ( rep_prefix() ) + ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK); + break; + } + + return X86EMUL_OKAY; +} + +static int x86_decode( struct x86_emulate_state *state, struct x86_emulate_ctxt *ctxt, const struct x86_emulate_ops *ops) { uint8_t b, d, sib, sib_index, sib_base; - unsigned int def_op_bytes, def_ad_bytes; + unsigned int def_op_bytes, def_ad_bytes, opcode; int rc = X86EMUL_OKAY; memset(state, 0, sizeof(*state)); @@ -1819,29 +1854,31 @@ x86_decode( /* Opcode byte(s). */ d = opcode_table[b]; - if ( d == 0 ) + if ( d == 0 && b == 0x0f) { - /* Two-byte opcode? */ - if ( b == 0x0f ) + /* Two-byte opcode. */ + b = insn_fetch_type(uint8_t); + d = twobyte_table[b]; + switch ( b ) { + default: + opcode = b | MASK_INSR(0x0f, X86EMUL_OPC_EXT_MASK); + ext = ext_0f; + break; + case 0x38: b = insn_fetch_type(uint8_t); - d = twobyte_table[b]; - switch ( b ) - { - default: - ext = ext_0f; - break; - case 0x38: - b = insn_fetch_type(uint8_t); - ext = ext_0f38; - break; - case 0x3a: - b = insn_fetch_type(uint8_t); - ext = ext_0f3a; - break; - } + opcode = b | MASK_INSR(0x0f38, X86EMUL_OPC_EXT_MASK); + ext = ext_0f38; + break; + case 0x3a: + b = insn_fetch_type(uint8_t); + opcode = b | MASK_INSR(0x0f3a, X86EMUL_OPC_EXT_MASK); + ext = ext_0f3a; + break; } } + else + opcode = b; /* ModRM and SIB bytes. */ if ( d & ModRM ) @@ -1870,6 +1907,7 @@ x86_decode( vex.raw[0] = modrm; if ( b == 0xc5 ) { + opcode = X86EMUL_OPC_VEX_; vex.raw[1] = modrm; vex.opcx = vex_0f; vex.x = 1; @@ -1891,31 +1929,44 @@ x86_decode( op_bytes = 8; } } - if ( b == 0x62 ) + switch ( b ) { + case 0x62: + opcode = X86EMUL_OPC_EVEX_; evex.raw[0] = vex.raw[0]; evex.raw[1] = vex.raw[1]; evex.raw[2] = insn_fetch_type(uint8_t); vex.opcx = evex.opcx; + break; + case 0xc4: + opcode = X86EMUL_OPC_VEX_; + break; + default: + opcode = 0; + break; } } if ( mode_64bit() && !vex.r ) rex_prefix |= REX_R; b = insn_fetch_type(uint8_t); + opcode |= b | MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK); ext = vex.opcx; if ( b != 0x8f ) { switch ( ext ) { case vex_0f: + opcode |= MASK_INSR(0x0f, X86EMUL_OPC_EXT_MASK); d = twobyte_table[b]; break; case vex_0f38: + opcode |= MASK_INSR(0x0f38, X86EMUL_OPC_EXT_MASK); d = twobyte_table[0x38]; break; case vex_0f3a: + opcode |= MASK_INSR(0x0f3a, X86EMUL_OPC_EXT_MASK); d = twobyte_table[0x3a]; break; default: @@ -1925,7 +1976,11 @@ x86_decode( } else if ( ext < ext_8f08 + sizeof(xop_table) / sizeof(*xop_table) ) + { + opcode |= MASK_INSR(0x8f08 + ext - ext_8f08, + X86EMUL_OPC_EXT_MASK); d = xop_table[ext - ext_8f08]; + } else { rc = X86EMUL_UNHANDLEABLE; @@ -1995,9 +2050,7 @@ x86_decode( break; case ext_0f38: - if ( vex.opcx ) - break; - switch ( b ) + switch ( opcode & X86EMUL_OPC_MASK ) { case 0xf0: /* movbe / crc32 */ d |= repne_prefix() ? ByteOp : Mov; @@ -2006,8 +2059,6 @@ x86_decode( if ( !repne_prefix() ) d = (d & ~(DstMask | SrcMask)) | DstMem | SrcReg | Mov; break; - default: /* Until it is worth making this table based ... */ - return X86EMUL_UNHANDLEABLE; } break; @@ -2166,7 +2217,7 @@ x86_decode( break; } - state->opcode = b; + ctxt->opcode = opcode; state->desc = d; switch ( ext ) @@ -2180,7 +2231,14 @@ x86_decode( break; case ext_0f38: + rc = x86_decode_0f38(state, ctxt, ops); + break; + case ext_0f3a: + if ( !vex.opcx ) + ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK); + break; + case ext_8f08: case ext_8f09: case ext_8f0a: @@ -2222,7 +2280,7 @@ x86_emulate( /* Sync rIP to post decode value. */ _regs.eip = state.eip; - b = state.opcode; + b = ctxt->opcode; d = state.desc; #define state (&state) @@ -2389,24 +2447,7 @@ x86_emulate( break; } - switch ( ext ) - { - case ext_none: - break; - case ext_0f: - goto ext_0f_insn; - case ext_0f38: - goto ext_0f38_insn; - default: - ASSERT_UNREACHABLE(); - case ext_0f3a: - case ext_8f08: - case ext_8f09: - case ext_8f0a: - goto cannot_emulate; - } - - switch ( b ) + switch ( ctxt->opcode ) { struct segment_register cs; @@ -4108,15 +4149,7 @@ x86_emulate( } break; - default: - goto cannot_emulate; - } - goto writeback; - - ext_0f_insn: - switch ( b ) - { - case 0x00: /* Grp6 */ + case X86EMUL_OPC(0x0f, 0x00): /* Grp6 */ fail_if((modrm_reg & 6) != 2); generate_exception_if(!in_protmode(ctxt, ops), EXC_UD, -1); generate_exception_if(!mode_ring0(), EXC_GP, 0); @@ -4125,7 +4158,7 @@ x86_emulate( goto done; break; - case 0x01: /* Grp7 */ { + case X86EMUL_OPC(0x0f, 0x01): /* Grp7 */ { struct segment_register reg; unsigned long base, limit, cr0, cr0w; @@ -4270,7 +4303,7 @@ x86_emulate( break; } - case 0x05: /* syscall */ { + case X86EMUL_OPC(0x0f, 0x05): /* syscall */ { uint64_t msr_content; struct segment_register cs, ss; @@ -4330,7 +4363,7 @@ x86_emulate( break; } - case 0x06: /* clts */ + case X86EMUL_OPC(0x0f, 0x06): /* clts */ generate_exception_if(!mode_ring0(), EXC_GP, 0); fail_if((ops->read_cr == NULL) || (ops->write_cr == NULL)); if ( (rc = ops->read_cr(0, &dst.val, ctxt)) || @@ -4338,42 +4371,64 @@ x86_emulate( goto done; break; - case 0x08: /* invd */ - case 0x09: /* wbinvd */ + case X86EMUL_OPC(0x0f, 0x08): /* invd */ + case X86EMUL_OPC(0x0f, 0x09): /* wbinvd */ generate_exception_if(!mode_ring0(), EXC_GP, 0); fail_if(ops->wbinvd == NULL); if ( (rc = ops->wbinvd(ctxt)) != 0 ) goto done; break; - case 0x0b: /* ud2 */ - case 0xb9: /* ud1 */ - case 0xff: /* ud0 */ + case X86EMUL_OPC(0x0f, 0x0b): /* ud2 */ + case X86EMUL_OPC(0x0f, 0xb9): /* ud1 */ + case X86EMUL_OPC(0x0f, 0xff): /* ud0 */ generate_exception_if(1, EXC_UD, -1); - case 0x0d: /* GrpP (prefetch) */ - case 0x18: /* Grp16 (prefetch/nop) */ - case 0x19 ... 0x1f: /* nop (amd-defined) */ + case X86EMUL_OPC(0x0f, 0x0d): /* GrpP (prefetch) */ + case X86EMUL_OPC(0x0f, 0x18): /* Grp16 (prefetch/nop) */ + case X86EMUL_OPC(0x0f, 0x19) ... X86EMUL_OPC(0x0f, 0x1f): /* nop */ break; - case 0x2b: /* {,v}movntp{s,d} xmm,m128 */ - /* vmovntp{s,d} ymm,m256 */ + case X86EMUL_OPC(0x0f, 0x2b): /* movntps xmm,m128 */ + case X86EMUL_OPC_VEX(0x0f, 0x2b): /* vmovntps xmm,m128 */ + /* vmovntps ymm,m256 */ + case X86EMUL_OPC_66(0x0f, 0x2b): /* movntpd xmm,m128 */ + case X86EMUL_OPC_VEX_66(0x0f, 0x2b): /* vmovntpd xmm,m128 */ + /* vmovntpd ymm,m256 */ fail_if(ea.type != OP_MEM); /* fall through */ - case 0x28: /* {,v}movap{s,d} xmm/m128,xmm */ - /* vmovap{s,d} ymm/m256,ymm */ - case 0x29: /* {,v}movap{s,d} xmm,xmm/m128 */ - /* vmovap{s,d} ymm,ymm/m256 */ - fail_if(vex.pfx & VEX_PREFIX_SCALAR_MASK); - /* fall through */ - case 0x10: /* {,v}movup{s,d} xmm/m128,xmm */ - /* vmovup{s,d} ymm/m256,ymm */ - /* {,v}movss xmm/m32,xmm */ - /* {,v}movsd xmm/m64,xmm */ - case 0x11: /* {,v}movup{s,d} xmm,xmm/m128 */ - /* vmovup{s,d} ymm,ymm/m256 */ - /* {,v}movss xmm,xmm/m32 */ - /* {,v}movsd xmm,xmm/m64 */ + case X86EMUL_OPC(0x0f, 0x28): /* movaps xmm/m128,xmm */ + case X86EMUL_OPC_VEX(0x0f, 0x28): /* vmovaps xmm/m128,xmm */ + /* vmovaps ymm/m256,ymm */ + case X86EMUL_OPC_66(0x0f, 0x28): /* movapd xmm/m128,xmm */ + case X86EMUL_OPC_VEX_66(0x0f, 0x28): /* vmovapd xmm/m128,xmm */ + /* vmovapd ymm/m256,ymm */ + case X86EMUL_OPC(0x0f, 0x29): /* movaps xmm,xmm/m128 */ + case X86EMUL_OPC_VEX(0x0f, 0x29): /* vmovaps xmm,xmm/m128 */ + /* vmovaps ymm,ymm/m256 */ + case X86EMUL_OPC_66(0x0f, 0x29): /* movapd xmm,xmm/m128 */ + case X86EMUL_OPC_VEX_66(0x0f, 0x29): /* vmovapd xmm,xmm/m128 */ + /* vmovapd ymm,ymm/m256 */ + case X86EMUL_OPC(0x0f, 0x10): /* movups xmm/m128,xmm */ + case X86EMUL_OPC_VEX(0x0f, 0x10): /* vmovups xmm/m128,xmm */ + /* vmovups ymm/m256,ymm */ + case X86EMUL_OPC_66(0x0f, 0x10): /* movupd xmm/m128,xmm */ + case X86EMUL_OPC_VEX_66(0x0f, 0x10): /* vmovupd xmm/m128,xmm */ + /* vmovupd ymm/m256,ymm */ + case X86EMUL_OPC_F3(0x0f, 0x10): /* movss xmm/m32,xmm */ + case X86EMUL_OPC_VEX_F3(0x0f, 0x10): /* vmovss xmm/m32,xmm */ + case X86EMUL_OPC_F2(0x0f, 0x10): /* movsd xmm/m64,xmm */ + case X86EMUL_OPC_VEX_F2(0x0f, 0x10): /* vmovsd xmm/m64,xmm */ + case X86EMUL_OPC(0x0f, 0x11): /* movups xmm,xmm/m128 */ + case X86EMUL_OPC_VEX(0x0f, 0x11): /* vmovups xmm,xmm/m128 */ + /* vmovups ymm,ymm/m256 */ + case X86EMUL_OPC_66(0x0f, 0x11): /* movupd xmm,xmm/m128 */ + case X86EMUL_OPC_VEX_66(0x0f, 0x11): /* vmovupd xmm,xmm/m128 */ + /* vmovupd ymm,ymm/m256 */ + case X86EMUL_OPC_F3(0x0f, 0x11): /* movss xmm,xmm/m32 */ + case X86EMUL_OPC_VEX_F3(0x0f, 0x11): /* vmovss xmm,xmm/m32 */ + case X86EMUL_OPC_F2(0x0f, 0x11): /* movsd xmm,xmm/m64 */ + case X86EMUL_OPC_VEX_F2(0x0f, 0x11): /* vmovsd xmm,xmm/m64 */ { uint8_t *buf = get_stub(stub); struct fpu_insn_ctxt fic = { .insn_bytes = 5 }; @@ -4396,10 +4451,9 @@ x86_emulate( } else { - fail_if((vex.opcx != vex_0f) || - ((vex.reg != 0xf) && - ((ea.type == OP_MEM) || - !(vex.pfx & VEX_PREFIX_SCALAR_MASK)))); + fail_if((vex.reg != 0xf) && + ((ea.type == OP_MEM) || + !(vex.pfx & VEX_PREFIX_SCALAR_MASK))); host_and_vcpu_must_have(avx); get_fpu(X86EMUL_FPU_ymm, &fic); ea.bytes = 16 << vex.l; @@ -4437,10 +4491,10 @@ x86_emulate( break; } - case 0x20: /* mov cr,reg */ - case 0x21: /* mov dr,reg */ - case 0x22: /* mov reg,cr */ - case 0x23: /* mov reg,dr */ + case X86EMUL_OPC(0x0f, 0x20): /* mov cr,reg */ + case X86EMUL_OPC(0x0f, 0x21): /* mov dr,reg */ + case X86EMUL_OPC(0x0f, 0x22): /* mov reg,cr */ + case X86EMUL_OPC(0x0f, 0x23): /* mov reg,dr */ generate_exception_if(ea.type != OP_REG, EXC_UD, -1); generate_exception_if(!mode_ring0(), EXC_GP, 0); modrm_reg |= lock_prefix << 3; @@ -4476,7 +4530,7 @@ x86_emulate( goto done; break; - case 0x30: /* wrmsr */ { + case X86EMUL_OPC(0x0f, 0x30): /* wrmsr */ { uint64_t val = ((uint64_t)_regs.edx << 32) | (uint32_t)_regs.eax; generate_exception_if(!mode_ring0(), EXC_GP, 0); fail_if(ops->write_msr == NULL); @@ -4485,7 +4539,7 @@ x86_emulate( break; } - case 0x31: rdtsc: /* rdtsc */ { + case X86EMUL_OPC(0x0f, 0x31): rdtsc: /* rdtsc */ { unsigned long cr4; uint64_t val; if ( !mode_ring0() ) @@ -4503,7 +4557,7 @@ x86_emulate( break; } - case 0x32: /* rdmsr */ { + case X86EMUL_OPC(0x0f, 0x32): /* rdmsr */ { uint64_t val; generate_exception_if(!mode_ring0(), EXC_GP, 0); fail_if(ops->read_msr == NULL); @@ -4514,13 +4568,13 @@ x86_emulate( break; } - case 0x40 ... 0x4f: /* cmovcc */ + case X86EMUL_OPC(0x0f, 0x40) ... X86EMUL_OPC(0x0f, 0x4f): /* cmovcc */ dst.val = src.val; if ( !test_cc(b, _regs.eflags) ) dst.type = OP_NONE; break; - case 0x34: /* sysenter */ { + case X86EMUL_OPC(0x0f, 0x34): /* sysenter */ { uint64_t msr_content; struct segment_register cs, ss; int lm; @@ -4568,7 +4622,7 @@ x86_emulate( break; } - case 0x35: /* sysexit */ { + case X86EMUL_OPC(0x0f, 0x35): /* sysexit */ { uint64_t msr_content; struct segment_register cs, ss; bool_t user64 = !!(rex_prefix & REX_W); @@ -4607,18 +4661,26 @@ x86_emulate( break; } - case 0xe7: /* movntq mm,m64 */ - /* {,v}movntdq xmm,m128 */ - /* vmovntdq ymm,m256 */ + case X86EMUL_OPC(0x0f, 0xe7): /* movntq mm,m64 */ + case X86EMUL_OPC_66(0x0f, 0xe7): /* movntdq xmm,m128 */ + case X86EMUL_OPC_VEX_66(0x0f, 0xe7): /* vmovntdq xmm,m128 */ + /* vmovntdq ymm,m256 */ fail_if(ea.type != OP_MEM); - fail_if(vex.pfx == vex_f3); /* fall through */ - case 0x6f: /* movq mm/m64,mm */ - /* {,v}movdq{a,u} xmm/m128,xmm */ - /* vmovdq{a,u} ymm/m256,ymm */ - case 0x7f: /* movq mm,mm/m64 */ - /* {,v}movdq{a,u} xmm,xmm/m128 */ - /* vmovdq{a,u} ymm,ymm/m256 */ + case X86EMUL_OPC(0x0f, 0x6f): /* movq mm/m64,mm */ + case X86EMUL_OPC_66(0x0f, 0x6f): /* movdqa xmm/m128,xmm */ + case X86EMUL_OPC_F3(0x0f, 0x6f): /* movdqu xmm/m128,xmm */ + case X86EMUL_OPC_VEX_66(0x0f, 0x6f): /* vmovdqa xmm/m128,xmm */ + /* vmovdqa ymm/m256,ymm */ + case X86EMUL_OPC_VEX_F3(0x0f, 0x6f): /* vmovdqu xmm/m128,xmm */ + /* vmovdqu ymm/m256,ymm */ + case X86EMUL_OPC(0x0f, 0x7f): /* movq mm,mm/m64 */ + case X86EMUL_OPC_66(0x0f, 0x7f): /* movdqa xmm,xmm/m128 */ + case X86EMUL_OPC_VEX_66(0x0f, 0x7f): /* vmovdqa xmm,xmm/m128 */ + /* vmovdqa ymm,ymm/m256 */ + case X86EMUL_OPC_F3(0x0f, 0x7f): /* movdqu xmm,xmm/m128 */ + case X86EMUL_OPC_VEX_F3(0x0f, 0x7f): /* vmovdqu xmm,xmm/m128 */ + /* vmovdqu ymm,ymm/m256 */ { uint8_t *buf = get_stub(stub); struct fpu_insn_ctxt fic = { .insn_bytes = 5 }; @@ -4654,8 +4716,7 @@ x86_emulate( } else { - fail_if((vex.opcx != vex_0f) || (vex.reg != 0xf) || - ((vex.pfx != vex_66) && (vex.pfx != vex_f3))); + fail_if(vex.reg != 0xf); host_and_vcpu_must_have(avx); get_fpu(X86EMUL_FPU_ymm, &fic); ea.bytes = 16 << vex.l; @@ -4691,24 +4752,24 @@ x86_emulate( break; } - case 0x80 ... 0x8f: /* jcc (near) */ + case X86EMUL_OPC(0x0f, 0x80) ... X86EMUL_OPC(0x0f, 0x8f): /* jcc (near) */ if ( test_cc(b, _regs.eflags) ) jmp_rel((int32_t)src.val); break; - case 0x90 ... 0x9f: /* setcc */ + case X86EMUL_OPC(0x0f, 0x90) ... X86EMUL_OPC(0x0f, 0x9f): /* setcc */ dst.val = test_cc(b, _regs.eflags); break; - case 0xa0: /* push %%fs */ + case X86EMUL_OPC(0x0f, 0xa0): /* push %%fs */ src.val = x86_seg_fs; goto push_seg; - case 0xa1: /* pop %%fs */ + case X86EMUL_OPC(0x0f, 0xa1): /* pop %%fs */ src.val = x86_seg_fs; goto pop_seg; - case 0xa2: /* cpuid */ { + case X86EMUL_OPC(0x0f, 0xa2): /* cpuid */ { unsigned int eax = _regs.eax, ebx = _regs.ebx; unsigned int ecx = _regs.ecx, edx = _regs.edx; fail_if(ops->cpuid == NULL); @@ -4719,15 +4780,15 @@ x86_emulate( break; } - case 0xa3: bt: /* bt */ + case X86EMUL_OPC(0x0f, 0xa3): bt: /* bt */ emulate_2op_SrcV_nobyte("bt", src, dst, _regs.eflags); dst.type = OP_NONE; break; - case 0xa4: /* shld imm8,r,r/m */ - case 0xa5: /* shld %%cl,r,r/m */ - case 0xac: /* shrd imm8,r,r/m */ - case 0xad: /* shrd %%cl,r,r/m */ { + case X86EMUL_OPC(0x0f, 0xa4): /* shld imm8,r,r/m */ + case X86EMUL_OPC(0x0f, 0xa5): /* shld %%cl,r,r/m */ + case X86EMUL_OPC(0x0f, 0xac): /* shrd imm8,r,r/m */ + case X86EMUL_OPC(0x0f, 0xad): /* shrd %%cl,r,r/m */ { uint8_t shift, width = dst.bytes << 3; generate_exception_if(lock_prefix, EXC_UD, -1); @@ -4762,24 +4823,23 @@ x86_emulate( break; } - case 0xa8: /* push %%gs */ + case X86EMUL_OPC(0x0f, 0xa8): /* push %%gs */ src.val = x86_seg_gs; goto push_seg; - case 0xa9: /* pop %%gs */ + case X86EMUL_OPC(0x0f, 0xa9): /* pop %%gs */ src.val = x86_seg_gs; goto pop_seg; - case 0xab: bts: /* bts */ + case X86EMUL_OPC(0x0f, 0xab): bts: /* bts */ emulate_2op_SrcV_nobyte("bts", src, dst, _regs.eflags); break; - case 0xae: /* Grp15 */ + case X86EMUL_OPC(0x0f, 0xae): case X86EMUL_OPC_66(0x0f, 0xae): /* Grp15 */ switch ( modrm_reg & 7 ) { case 7: /* clflush{,opt} */ fail_if(modrm_mod == 3); - fail_if(rep_prefix()); fail_if(ops->wbinvd == NULL); if ( (rc = ops->wbinvd(ctxt)) != 0 ) goto done; @@ -4789,11 +4849,11 @@ x86_emulate( } break; - case 0xaf: /* imul */ + case X86EMUL_OPC(0x0f, 0xaf): /* imul */ emulate_2op_SrcV_srcmem("imul", src, dst, _regs.eflags); break; - case 0xb0 ... 0xb1: /* cmpxchg */ + case X86EMUL_OPC(0x0f, 0xb0): case X86EMUL_OPC(0x0f, 0xb1): /* cmpxchg */ /* Save real source value, then compare EAX against destination. */ src.orig_val = src.val; src.val = _regs.eax; @@ -4812,34 +4872,34 @@ x86_emulate( } break; - case 0xb2: /* lss */ + case X86EMUL_OPC(0x0f, 0xb2): /* lss */ dst.val = x86_seg_ss; goto les; - case 0xb3: btr: /* btr */ + case X86EMUL_OPC(0x0f, 0xb3): btr: /* btr */ emulate_2op_SrcV_nobyte("btr", src, dst, _regs.eflags); break; - case 0xb4: /* lfs */ + case X86EMUL_OPC(0x0f, 0xb4): /* lfs */ dst.val = x86_seg_fs; goto les; - case 0xb5: /* lgs */ + case X86EMUL_OPC(0x0f, 0xb5): /* lgs */ dst.val = x86_seg_gs; goto les; - case 0xb6: /* movzx rm8,r{16,32,64} */ + case X86EMUL_OPC(0x0f, 0xb6): /* movzx rm8,r{16,32,64} */ /* Recompute DstReg as we may have decoded AH/BH/CH/DH. */ dst.reg = decode_register(modrm_reg, &_regs, 0); dst.bytes = op_bytes; dst.val = (uint8_t)src.val; break; - case 0xb7: /* movzx rm16,r{16,32,64} */ + case X86EMUL_OPC(0x0f, 0xb7): /* movzx rm16,r{16,32,64} */ dst.val = (uint16_t)src.val; break; - case 0xba: /* Grp8 */ + case X86EMUL_OPC(0x0f, 0xba): /* Grp8 */ switch ( modrm_reg & 7 ) { case 4: goto bt; @@ -4850,11 +4910,11 @@ x86_emulate( } break; - case 0xbb: btc: /* btc */ + case X86EMUL_OPC(0x0f, 0xbb): btc: /* btc */ emulate_2op_SrcV_nobyte("btc", src, dst, _regs.eflags); break; - case 0xbc: /* bsf or tzcnt */ { + case X86EMUL_OPC(0x0f, 0xbc): /* bsf or tzcnt */ { bool_t zf; #ifdef __GCC_ASM_FLAG_OUTPUTS__ @@ -4886,7 +4946,7 @@ x86_emulate( break; } - case 0xbd: /* bsr or lzcnt */ { + case X86EMUL_OPC(0x0f, 0xbd): /* bsr or lzcnt */ { bool_t zf; #ifdef __GCC_ASM_FLAG_OUTPUTS__ @@ -4922,18 +4982,18 @@ x86_emulate( break; } - case 0xbe: /* movsx rm8,r{16,32,64} */ + case X86EMUL_OPC(0x0f, 0xbe): /* movsx rm8,r{16,32,64} */ /* Recompute DstReg as we may have decoded AH/BH/CH/DH. */ dst.reg = decode_register(modrm_reg, &_regs, 0); dst.bytes = op_bytes; dst.val = (int8_t)src.val; break; - case 0xbf: /* movsx rm16,r{16,32,64} */ + case X86EMUL_OPC(0x0f, 0xbf): /* movsx rm16,r{16,32,64} */ dst.val = (int16_t)src.val; break; - case 0xc0 ... 0xc1: /* xadd */ + case X86EMUL_OPC(0x0f, 0xc0): case X86EMUL_OPC(0x0f, 0xc1): /* xadd */ /* Write back the register source. */ switch ( dst.bytes ) { @@ -4944,14 +5004,14 @@ x86_emulate( } goto add; - case 0xc3: /* movnti */ + case X86EMUL_OPC(0x0f, 0xc3): /* movnti */ /* Ignore the non-temporal hint for now. */ vcpu_must_have_sse2(); generate_exception_if(dst.bytes <= 2, EXC_UD, -1); dst.val = src.val; break; - case 0xc7: /* Grp9 (cmpxchg8b/cmpxchg16b) */ { + case X86EMUL_OPC(0x0f, 0xc7): /* Grp9 (cmpxchg8b/cmpxchg16b) */ { unsigned long old[2], exp[2], new[2]; generate_exception_if((modrm_reg & 7) != 1, EXC_UD, -1); @@ -4995,7 +5055,7 @@ x86_emulate( break; } - case 0xc8 ... 0xcf: /* bswap */ + case X86EMUL_OPC(0x0f, 0xc8) ... X86EMUL_OPC(0x0f, 0xcf): /* bswap */ dst.type = OP_REG; dst.reg = decode_register( (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0); @@ -5016,72 +5076,57 @@ x86_emulate( } break; - default: - goto cannot_emulate; - } - goto writeback; - - ext_0f38_insn: - switch ( b ) - { - case 0xf0: case 0xf1: /* movbe / crc32 */ - generate_exception_if(repe_prefix(), EXC_UD, -1); - if ( repne_prefix() ) + case X86EMUL_OPC(0x0f38, 0xf0): /* movbe m,r */ + case X86EMUL_OPC(0x0f38, 0xf1): /* movbe r,m */ + vcpu_must_have_movbe(); + switch ( op_bytes ) { - /* crc32 */ -#ifdef HAVE_GAS_SSE4_2 - host_and_vcpu_must_have(sse4_2); - dst.bytes = rex_prefix & REX_W ? 8 : 4; - switch ( op_bytes ) - { - case 1: - asm ( "crc32b %1,%k0" : "+r" (dst.val) - : "qm" (*(uint8_t *)&src.val) ); - break; - case 2: - asm ( "crc32w %1,%k0" : "+r" (dst.val) - : "rm" (*(uint16_t *)&src.val) ); - break; - case 4: - asm ( "crc32l %1,%k0" : "+r" (dst.val) - : "rm" (*(uint32_t *)&src.val) ); - break; -# ifdef __x86_64__ - case 8: - asm ( "crc32q %1,%0" : "+r" (dst.val) : "rm" (src.val) ); - break; -# endif - default: - ASSERT_UNREACHABLE(); - } -#else /* !HAVE_GAS_SSE4_2 */ - goto cannot_emulate; + case 2: + asm ( "xchg %h0,%b0" : "=Q" (dst.val) + : "0" (*(uint32_t *)&src.val) ); + break; + case 4: +#ifdef __x86_64__ + asm ( "bswap %k0" : "=r" (dst.val) + : "0" (*(uint32_t *)&src.val) ); + break; + case 8: #endif + asm ( "bswap %0" : "=r" (dst.val) : "0" (src.val) ); + break; + default: + ASSERT_UNREACHABLE(); } - else + break; +#ifdef HAVE_GAS_SSE4_2 + case X86EMUL_OPC_F2(0x0f38, 0xf0): /* crc32 r/m8, r{32,64} */ + case X86EMUL_OPC_F2(0x0f38, 0xf1): /* crc32 r/m{16,32,64}, r{32,64} */ + host_and_vcpu_must_have(sse4_2); + dst.bytes = rex_prefix & REX_W ? 8 : 4; + switch ( op_bytes ) { - /* movbe */ - vcpu_must_have_movbe(); - switch ( op_bytes ) - { - case 2: - asm ( "xchg %h0,%b0" : "=Q" (dst.val) - : "0" (*(uint32_t *)&src.val) ); - break; - case 4: -#ifdef __x86_64__ - asm ( "bswap %k0" : "=r" (dst.val) - : "0" (*(uint32_t *)&src.val) ); - break; - case 8: -#endif - asm ( "bswap %0" : "=r" (dst.val) : "0" (src.val) ); - break; - default: - ASSERT_UNREACHABLE(); - } + case 1: + asm ( "crc32b %1,%k0" : "+r" (dst.val) + : "qm" (*(uint8_t *)&src.val) ); + break; + case 2: + asm ( "crc32w %1,%k0" : "+r" (dst.val) + : "rm" (*(uint16_t *)&src.val) ); + break; + case 4: + asm ( "crc32l %1,%k0" : "+r" (dst.val) + : "rm" (*(uint32_t *)&src.val) ); + break; +# ifdef __x86_64__ + case 8: + asm ( "crc32q %1,%0" : "+r" (dst.val) : "rm" (src.val) ); + break; +# endif + default: + ASSERT_UNREACHABLE(); } break; +#endif default: goto cannot_emulate; } --- a/xen/arch/x86/x86_emulate/x86_emulate.h +++ b/xen/arch/x86/x86_emulate/x86_emulate.h @@ -415,12 +415,15 @@ struct x86_emulate_ctxt /* Stack pointer width in bits (16, 32 or 64). */ unsigned int sp_size; - /* Set this if writes may have side effects. */ - uint8_t force_writeback; + /* Canonical opcode (see below). */ + unsigned int opcode; /* Software event injection support. */ enum x86_swint_emulation swint_emulate; + /* Set this if writes may have side effects. */ + uint8_t force_writeback; + /* Retirement state, set by the emulator (valid only on X86EMUL_OKAY). */ union { struct { @@ -435,6 +438,60 @@ struct x86_emulate_ctxt void *data; }; +/* + * Encode opcode extensions in the following way: + * 0x0xxxx for one byte opcodes + * 0x0fxxxx for 0f-prefixed opcodes (or their VEX/EVEX equivalents) + * 0x0f38xxxx for 0f38-prefixed opcodes (or their VEX/EVEX equivalents) + * 0x0f3axxxx for 0f3a-prefixed opcodes (or their VEX/EVEX equivalents) + * 0x8f08xxxx for 8f/8-prefixed XOP opcodes + * 0x8f09xxxx for 8f/9-prefixed XOP opcodes + * 0x8f0axxxx for 8f/a-prefixed XOP opcodes + * The low byte represents the base opcode withing the resepctive space, + * and some of bits 8..15 are used for encoding further information (see + * below). + * Hence no separate #define-s get added. + */ +#define X86EMUL_OPC_EXT_MASK 0xffff0000 +#define X86EMUL_OPC(ext, byte) ((uint8_t)(byte) | \ + MASK_INSR((ext), X86EMUL_OPC_EXT_MASK)) +/* + * This includes the 66, F3, and F2 prefixes (see also below) + * as well as VEX/EVEX: + */ +#define X86EMUL_OPC_MASK (0x000000ff | X86EMUL_OPC_PFX_MASK | \ + X86EMUL_OPC_ENCODING_MASK) + +/* + * Note that prefixes 66, F2, and F3 get encoded only when semantically + * meaningful, to reduce the complexity of interpreting this representation. + */ +#define X86EMUL_OPC_PFX_MASK 0x00000300 +# define X86EMUL_OPC_66(ext, byte) (X86EMUL_OPC(ext, byte) | 0x00000100) +# define X86EMUL_OPC_F3(ext, byte) (X86EMUL_OPC(ext, byte) | 0x00000200) +# define X86EMUL_OPC_F2(ext, byte) (X86EMUL_OPC(ext, byte) | 0x00000300) + +#define X86EMUL_OPC_ENCODING_MASK 0x00003000 +#define X86EMUL_OPC_LEGACY_ 0x00000000 +#define X86EMUL_OPC_VEX_ 0x00001000 +# define X86EMUL_OPC_VEX(ext, byte) \ + (X86EMUL_OPC(ext, byte) | X86EMUL_OPC_VEX_) +# define X86EMUL_OPC_VEX_66(ext, byte) \ + (X86EMUL_OPC_66(ext, byte) | X86EMUL_OPC_VEX_) +# define X86EMUL_OPC_VEX_F3(ext, byte) \ + (X86EMUL_OPC_F3(ext, byte) | X86EMUL_OPC_VEX_) +# define X86EMUL_OPC_VEX_F2(ext, byte) \ + (X86EMUL_OPC_F2(ext, byte) | X86EMUL_OPC_VEX_) +#define X86EMUL_OPC_EVEX_ 0x00002000 +# define X86EMUL_OPC_EVEX(ext, byte) \ + (X86EMUL_OPC(ext, byte) | X86EMUL_OPC_EVEX_) +# define X86EMUL_OPC_EVEX_66(ext, byte) \ + (X86EMUL_OPC_66(ext, byte) | X86EMUL_OPC_EVEX_) +# define X86EMUL_OPC_EVEX_F3(ext, byte) \ + (X86EMUL_OPC_F3(ext, byte) | X86EMUL_OPC_EVEX_) +# define X86EMUL_OPC_EVEX_F2(ext, byte) \ + (X86EMUL_OPC_F2(ext, byte) | X86EMUL_OPC_EVEX_) + struct x86_emulate_stub { union { void (*func)(void);