[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v2 2/6] x86emul: support BMI2 insns



Note that the adjustment to the mode_64bit() definition is so that we
can avoid "#ifdef __x86_64__" around the 64-bit asm() portions. An
alternative would be single asm()s with a conditional branch over the
(manually encoded) REX64 prefix.

Note that RORX raising #UD when VEX.VVVV is not all ones is matching
observed behavior rather than what the SDM says.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
v2: Add remark about VEX.VVVV vs RORX. Use decode_vex_gpr().

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -1019,6 +1019,178 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing bzhi %edx,(%ecx),%ebx...");
+    if ( stack_exec && cpu_has_bmi2 )
+    {
+        decl_insn(bzhi);
+
+        asm volatile ( put_insn(bzhi, "bzhi %%edx, (%0), %%ebx")
+                       :: "c" (NULL) );
+        set_insn(bzhi);
+
+        regs.ecx    = (unsigned long)res;
+        regs.edx    = 0xff13;
+        regs.eflags = 0xa43;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ebx != (*res & 0x7ffff) ||
+             regs.edx != 0xff13 || *res != 0xfedcba98 ||
+             (regs.eflags & 0xf6b) != 0x202 || !check_eip(bzhi) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing mulx (%eax),%ecx,%ebx...");
+    if ( cpu_has_bmi2 )
+    {
+        decl_insn(mulx);
+
+        asm volatile ( put_insn(mulx, "mulx (%0), %%ecx, %%ebx")
+                       :: "a" (NULL) );
+        set_insn(mulx);
+
+        regs.eax    = (unsigned long)res;
+        regs.edx    = 0x12345678;
+        regs.eflags = 0xac3;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ebx != 0x121fa00a ||
+             regs.ecx != 0x35068740 || *res != 0xfedcba98 ||
+             regs.eflags != 0xac3 || !check_eip(mulx) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing pdep (%edx),%ecx,%ebx...");
+    if ( stack_exec && cpu_has_bmi2 )
+    {
+        decl_insn(pdep);
+
+        asm volatile ( put_insn(pdep, "pdep (%0), %%ecx, %%ebx")
+                       :: "d" (NULL) );
+        set_insn(pdep);
+
+        regs.ecx    = 0x8cef;
+        regs.edx    = (unsigned long)res;
+        regs.eflags = 0xa43;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ebx != 0x850b298 ||
+             regs.ecx != 0x8cef || *res != 0xfedcba98 ||
+             regs.eflags != 0xa43 || !check_eip(pdep) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing pext (%edx),%ecx,%ebx...");
+    if ( stack_exec && cpu_has_bmi2 )
+    {
+        decl_insn(pext);
+
+        asm volatile ( put_insn(pext, "pext (%0), %%ecx, %%ebx")
+                       :: "d" (NULL) );
+        set_insn(pext);
+
+        regs.ecx    = 0x137f8cef;
+        regs.edx    = (unsigned long)res;
+        regs.eflags = 0xa43;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ebx != 0x12f95 ||
+             regs.ecx != 0x137f8cef || *res != 0xfedcba98 ||
+             regs.eflags != 0xa43 || !check_eip(pext) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing rorx $16,(%ecx),%ebx...");
+    if ( cpu_has_bmi2 )
+    {
+        decl_insn(rorx);
+
+        asm volatile ( put_insn(rorx, "rorx $16, (%0), %%ebx")
+                       :: "c" (NULL) );
+        set_insn(rorx);
+
+        regs.ecx    = (unsigned long)res;
+        regs.eflags = 0xa43;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.ebx != 0xba98fedc ||
+             *res != 0xfedcba98 ||
+             regs.eflags != 0xa43 || !check_eip(rorx) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing sarx %edx,(%ecx),%ebx...");
+    if ( stack_exec && cpu_has_bmi2 )
+    {
+        decl_insn(sarx);
+
+        asm volatile ( put_insn(sarx, "sarx %%edx, (%0), %%ebx")
+                       :: "c" (NULL) );
+        set_insn(sarx);
+
+        regs.ecx    = (unsigned long)res;
+        regs.edx    = 0xff13;
+        regs.eflags = 0xa43;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) ||
+             regs.ebx != ((signed)*res >> (regs.edx & 0x1f)) ||
+             regs.edx != 0xff13 || *res != 0xfedcba98 ||
+             regs.eflags != 0xa43 || !check_eip(sarx) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing shlx %edx,(%ecx),%ebx...");
+    if ( stack_exec && cpu_has_bmi2 )
+    {
+        decl_insn(shlx);
+
+        asm volatile ( put_insn(shlx, "shlx %%edx, (%0), %%ebx")
+                       :: "c" (NULL) );
+        set_insn(shlx);
+
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) ||
+             regs.ebx != (*res << (regs.edx & 0x1f)) ||
+             regs.edx != 0xff13 || *res != 0xfedcba98 ||
+             regs.eflags != 0xa43 || !check_eip(shlx) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing shrx %edx,(%ecx),%ebx...");
+    if ( stack_exec && cpu_has_bmi2 )
+    {
+        decl_insn(shrx);
+
+        asm volatile ( put_insn(shrx, "shrx %%edx, (%0), %%ebx")
+                       :: "c" (NULL) );
+        set_insn(shrx);
+
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) ||
+             regs.ebx != (*res >> (regs.edx & 0x1f)) ||
+             regs.edx != 0xff13 || *res != 0xfedcba98 ||
+             regs.eflags != 0xa43 || !check_eip(shrx) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing adcx/adox ...");
     {
         static const unsigned int data[] = {
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -119,6 +119,12 @@ static inline uint64_t xgetbv(uint32_t x
     (res.b & (1U << 3)) != 0; \
 })
 
+#define cpu_has_bmi2 ({ \
+    struct cpuid_leaf res; \
+    emul_test_cpuid(7, 0, &res, NULL); \
+    (res.b & (1U << 8)) != 0; \
+})
+
 int emul_test_cpuid(
     uint32_t leaf,
     uint32_t subleaf,
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -707,7 +707,11 @@ do{ asm volatile (
 })
 #define truncate_ea(ea) truncate_word((ea), ad_bytes)
 
-#define mode_64bit() (ctxt->addr_size == 64)
+#ifdef __x86_64__
+# define mode_64bit() (ctxt->addr_size == 64)
+#else
+# define mode_64bit() false
+#endif
 
 #define fail_if(p)                                      \
 do {                                                    \
@@ -1353,6 +1357,7 @@ static bool vcpu_has(
 #define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX,  7, ctxt, ops)
 #define vcpu_has_bmi1()        vcpu_has(         7, EBX,  3, ctxt, ops)
 #define vcpu_has_hle()         vcpu_has(         7, EBX,  4, ctxt, ops)
+#define vcpu_has_bmi2()        vcpu_has(         7, EBX,  8, ctxt, ops)
 #define vcpu_has_rtm()         vcpu_has(         7, EBX, 11, ctxt, ops)
 #define vcpu_has_mpx()         vcpu_has(         7, EBX, 14, ctxt, ops)
 #define vcpu_has_adx()         vcpu_has(         7, EBX, 19, ctxt, ops)
@@ -5886,12 +5891,21 @@ x86_emulate(
 #endif
 
     case X86EMUL_OPC_VEX(0x0f38, 0xf2):    /* andn r/m,r,r */
+    case X86EMUL_OPC_VEX(0x0f38, 0xf5):    /* bzhi r,r/m,r */
+    case X86EMUL_OPC_VEX_F3(0x0f38, 0xf5): /* pext r/m,r,r */
+    case X86EMUL_OPC_VEX_F2(0x0f38, 0xf5): /* pdep r/m,r,r */
     case X86EMUL_OPC_VEX(0x0f38, 0xf7):    /* bextr r,r/m,r */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0xf7): /* shlx r,r/m,r */
+    case X86EMUL_OPC_VEX_F3(0x0f38, 0xf7): /* sarx r,r/m,r */
+    case X86EMUL_OPC_VEX_F2(0x0f38, 0xf7): /* shrx r,r/m,r */
     {
         uint8_t *buf = get_stub(stub);
         typeof(vex) *pvex = container_of(buf + 1, typeof(vex), raw[0]);
 
-        host_and_vcpu_must_have(bmi1);
+        if ( b == 0xf5 || vex.pfx )
+            host_and_vcpu_must_have(bmi2);
+        else
+            host_and_vcpu_must_have(bmi1);
         generate_exception_if(vex.l, EXC_UD);
 
         buf[0] = 0xc4;
@@ -5977,6 +5991,32 @@ x86_emulate(
         break;
     }
 
+    case X86EMUL_OPC_VEX_F2(0x0f38, 0xf6): /* mulx r/m,r,r */
+        vcpu_must_have(bmi2);
+        generate_exception_if(vex.l, EXC_UD);
+        ea.reg = decode_vex_gpr(vex.reg, &_regs, ctxt);
+        if ( mode_64bit() && vex.w )
+            asm ( "mulq %3" : "=a" (*ea.reg), "=d" (dst.val)
+                            : "0" (src.val), "rm" (_regs.r(dx)) );
+        else
+            asm ( "mull %3" : "=a" (*ea.reg), "=d" (dst.val)
+                            : "0" ((uint32_t)src.val), "rm" (_regs._edx) );
+        break;
+
+    case X86EMUL_OPC_VEX_F2(0x0f3a, 0xf0): /* rorx imm,r/m,r */
+        vcpu_must_have(bmi2);
+        generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
+        if ( ea.type == OP_REG )
+            src.val = *ea.reg;
+        else if ( (rc = read_ulong(ea.mem.seg, ea.mem.off, &src.val, op_bytes,
+                                   ctxt, ops)) != X86EMUL_OKAY )
+            goto done;
+        if ( mode_64bit() && vex.w )
+            asm ( "rorq %b1,%0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) );
+        else
+            asm ( "rorl %b1,%k0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) 
);
+        break;
+
     default:
         goto cannot_emulate;
     }
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -58,6 +58,7 @@
 #define cpu_has_avx             boot_cpu_has(X86_FEATURE_AVX)
 #define cpu_has_lwp             boot_cpu_has(X86_FEATURE_LWP)
 #define cpu_has_bmi1            boot_cpu_has(X86_FEATURE_BMI1)
+#define cpu_has_bmi2            boot_cpu_has(X86_FEATURE_BMI2)
 #define cpu_has_mpx             boot_cpu_has(X86_FEATURE_MPX)
 #define cpu_has_arch_perfmon    boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
 #define cpu_has_rdtscp          boot_cpu_has(X86_FEATURE_RDTSCP)


Attachment: x86emul-BMI2.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.