x86emul: support BMI2 insns Note that the adjustment to the mode_64bit() definition is so that we can avoid "#ifdef __x86_64__" around the 64-bit asm() portions. An alternative would be single asm()s with a conditional branch over the (manually encoded) REX64 prefix. Signed-off-by: Jan Beulich --- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -1019,6 +1019,178 @@ int main(int argc, char **argv) else printf("skipped\n"); + printf("%-40s", "Testing bzhi %edx,(%ecx),%ebx..."); + if ( stack_exec && cpu_has_bmi2 ) + { + decl_insn(bzhi); + + asm volatile ( put_insn(bzhi, "bzhi %%edx, (%0), %%ebx") + :: "c" (NULL) ); + set_insn(bzhi); + + regs.ecx = (unsigned long)res; + regs.edx = 0xff13; + regs.eflags = 0xa43; + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || regs.ebx != (*res & 0x7ffff) || + regs.edx != 0xff13 || *res != 0xfedcba98 || + (regs.eflags & 0xf6b) != 0x202 || !check_eip(bzhi) ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + + printf("%-40s", "Testing mulx (%eax),%ecx,%ebx..."); + if ( cpu_has_bmi2 ) + { + decl_insn(mulx); + + asm volatile ( put_insn(mulx, "mulx (%0), %%ecx, %%ebx") + :: "a" (NULL) ); + set_insn(mulx); + + regs.eax = (unsigned long)res; + regs.edx = 0x12345678; + regs.eflags = 0xac3; + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || regs.ebx != 0x121fa00a || + regs.ecx != 0x35068740 || *res != 0xfedcba98 || + regs.eflags != 0xac3 || !check_eip(mulx) ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + + printf("%-40s", "Testing pdep (%edx),%ecx,%ebx..."); + if ( stack_exec && cpu_has_bmi2 ) + { + decl_insn(pdep); + + asm volatile ( put_insn(pdep, "pdep (%0), %%ecx, %%ebx") + :: "d" (NULL) ); + set_insn(pdep); + + regs.ecx = 0x8cef; + regs.edx = (unsigned long)res; + regs.eflags = 0xa43; + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || regs.ebx != 0x850b298 || + regs.ecx != 0x8cef || *res != 0xfedcba98 || + regs.eflags != 0xa43 || !check_eip(pdep) ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + + printf("%-40s", "Testing pext (%edx),%ecx,%ebx..."); + if ( stack_exec && cpu_has_bmi2 ) + { + decl_insn(pext); + + asm volatile ( put_insn(pext, "pext (%0), %%ecx, %%ebx") + :: "d" (NULL) ); + set_insn(pext); + + regs.ecx = 0x137f8cef; + regs.edx = (unsigned long)res; + regs.eflags = 0xa43; + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || regs.ebx != 0x12f95 || + regs.ecx != 0x137f8cef || *res != 0xfedcba98 || + regs.eflags != 0xa43 || !check_eip(pext) ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + + printf("%-40s", "Testing rorx $16,(%ecx),%ebx..."); + if ( cpu_has_bmi2 ) + { + decl_insn(rorx); + + asm volatile ( put_insn(rorx, "rorx $16, (%0), %%ebx") + :: "c" (NULL) ); + set_insn(rorx); + + regs.ecx = (unsigned long)res; + regs.eflags = 0xa43; + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || regs.ebx != 0xba98fedc || + *res != 0xfedcba98 || + regs.eflags != 0xa43 || !check_eip(rorx) ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + + printf("%-40s", "Testing sarx %edx,(%ecx),%ebx..."); + if ( stack_exec && cpu_has_bmi2 ) + { + decl_insn(sarx); + + asm volatile ( put_insn(sarx, "sarx %%edx, (%0), %%ebx") + :: "c" (NULL) ); + set_insn(sarx); + + regs.ecx = (unsigned long)res; + regs.edx = 0xff13; + regs.eflags = 0xa43; + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || + regs.ebx != ((signed)*res >> (regs.edx & 0x1f)) || + regs.edx != 0xff13 || *res != 0xfedcba98 || + regs.eflags != 0xa43 || !check_eip(sarx) ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + + printf("%-40s", "Testing shlx %edx,(%ecx),%ebx..."); + if ( stack_exec && cpu_has_bmi2 ) + { + decl_insn(shlx); + + asm volatile ( put_insn(shlx, "shlx %%edx, (%0), %%ebx") + :: "c" (NULL) ); + set_insn(shlx); + + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || + regs.ebx != (*res << (regs.edx & 0x1f)) || + regs.edx != 0xff13 || *res != 0xfedcba98 || + regs.eflags != 0xa43 || !check_eip(shlx) ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + + printf("%-40s", "Testing shrx %edx,(%ecx),%ebx..."); + if ( stack_exec && cpu_has_bmi2 ) + { + decl_insn(shrx); + + asm volatile ( put_insn(shrx, "shrx %%edx, (%0), %%ebx") + :: "c" (NULL) ); + set_insn(shrx); + + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || + regs.ebx != (*res >> (regs.edx & 0x1f)) || + regs.edx != 0xff13 || *res != 0xfedcba98 || + regs.eflags != 0xa43 || !check_eip(shrx) ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + printf("%-40s", "Testing adcx/adox ..."); { static const unsigned int data[] = { --- a/tools/tests/x86_emulator/x86_emulate.h +++ b/tools/tests/x86_emulator/x86_emulate.h @@ -119,6 +119,12 @@ static inline uint64_t xgetbv(uint32_t x (res.b & (1U << 3)) != 0; \ }) +#define cpu_has_bmi2 ({ \ + struct cpuid_leaf res; \ + emul_test_cpuid(7, 0, &res, NULL); \ + (res.b & (1U << 8)) != 0; \ +}) + int emul_test_cpuid( uint32_t leaf, uint32_t subleaf, --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -707,7 +707,11 @@ do{ asm volatile ( }) #define truncate_ea(ea) truncate_word((ea), ad_bytes) -#define mode_64bit() (ctxt->addr_size == 64) +#ifdef __x86_64__ +# define mode_64bit() (ctxt->addr_size == 64) +#else +# define mode_64bit() false +#endif #define fail_if(p) \ do { \ @@ -1353,6 +1357,7 @@ static bool vcpu_has( #define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX, 7, ctxt, ops) #define vcpu_has_bmi1() vcpu_has( 7, EBX, 3, ctxt, ops) #define vcpu_has_hle() vcpu_has( 7, EBX, 4, ctxt, ops) +#define vcpu_has_bmi2() vcpu_has( 7, EBX, 8, ctxt, ops) #define vcpu_has_rtm() vcpu_has( 7, EBX, 11, ctxt, ops) #define vcpu_has_mpx() vcpu_has( 7, EBX, 14, ctxt, ops) #define vcpu_has_adx() vcpu_has( 7, EBX, 19, ctxt, ops) @@ -5880,12 +5885,21 @@ x86_emulate( #endif case X86EMUL_OPC_VEX(0x0f38, 0xf2): /* andn r/m,r,r */ + case X86EMUL_OPC_VEX(0x0f38, 0xf5): /* bzhi r,r/m,r */ + case X86EMUL_OPC_VEX_F3(0x0f38, 0xf5): /* pext r/m,r,r */ + case X86EMUL_OPC_VEX_F2(0x0f38, 0xf5): /* pdep r/m,r,r */ case X86EMUL_OPC_VEX(0x0f38, 0xf7): /* bextr r,r/m,r */ + case X86EMUL_OPC_VEX_66(0x0f38, 0xf7): /* shlx r,r/m,r */ + case X86EMUL_OPC_VEX_F3(0x0f38, 0xf7): /* sarx r,r/m,r */ + case X86EMUL_OPC_VEX_F2(0x0f38, 0xf7): /* shrx r,r/m,r */ { uint8_t *buf = get_stub(stub); typeof(vex) *pvex = container_of(buf + 1, typeof(vex), raw[0]); - host_and_vcpu_must_have(bmi1); + if ( b == 0xf5 || vex.pfx ) + host_and_vcpu_must_have(bmi2); + else + host_and_vcpu_must_have(bmi1); generate_exception_if(vex.l, EXC_UD); buf[0] = 0xc4; @@ -5973,6 +5987,33 @@ x86_emulate( break; } + case X86EMUL_OPC_VEX_F2(0x0f38, 0xf6): /* mulx r/m,r,r */ + vcpu_must_have(bmi2); + generate_exception_if(vex.l, EXC_UD); + ea.reg = decode_register(~vex.reg & (mode_64bit() ? 0xf : 7), + &_regs, 0); + if ( mode_64bit() && vex.w ) + asm ( "mulq %3" : "=a" (*ea.reg), "=d" (dst.val) + : "0" (src.val), "rm" (_regs.r(dx)) ); + else + asm ( "mull %3" : "=a" (*ea.reg), "=d" (dst.val) + : "0" ((uint32_t)src.val), "rm" (_regs._edx) ); + break; + + case X86EMUL_OPC_VEX_F2(0x0f3a, 0xf0): /* rorx imm,r/m,r */ + vcpu_must_have(bmi2); + generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD); + if ( ea.type == OP_REG ) + src.val = *ea.reg; + else if ( (rc = read_ulong(ea.mem.seg, ea.mem.off, &src.val, op_bytes, + ctxt, ops)) != X86EMUL_OKAY ) + goto done; + if ( mode_64bit() && vex.w ) + asm ( "rorq %b1,%0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) ); + else + asm ( "rorl %b1,%k0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) ); + break; + default: goto cannot_emulate; } --- a/xen/include/asm-x86/cpufeature.h +++ b/xen/include/asm-x86/cpufeature.h @@ -58,6 +58,7 @@ #define cpu_has_avx boot_cpu_has(X86_FEATURE_AVX) #define cpu_has_lwp boot_cpu_has(X86_FEATURE_LWP) #define cpu_has_bmi1 boot_cpu_has(X86_FEATURE_BMI1) +#define cpu_has_bmi2 boot_cpu_has(X86_FEATURE_BMI2) #define cpu_has_mpx boot_cpu_has(X86_FEATURE_MPX) #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) #define cpu_has_rdtscp boot_cpu_has(X86_FEATURE_RDTSCP)