[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v5 13/17] x86emul: support SSE4.2 insns



Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Reviewed-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
v5: Change approach for dealing with differing prefix byte counts in
    VEX/non-VEX shared code. _eax -> eax, _edx -> edx.
v3: New.

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -2542,6 +2542,149 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing pcmpestri $0x1a,(%ecx),%xmm2...");
+    if ( stack_exec && cpu_has_sse4_2 )
+    {
+        decl_insn(pcmpestri);
+
+        memcpy(res, "abcdefgh\0\1\2\3\4\5\6\7", 16);
+        asm volatile ( "movq %0, %%xmm2\n"
+                       put_insn(pcmpestri, "pcmpestri $0b00011010, (%1), 
%%xmm2")
+                       :: "m" (res[0]), "c" (NULL) );
+
+        set_insn(pcmpestri);
+        regs.eax = regs.edx = 12;
+        regs.ecx = (unsigned long)res;
+        regs.eflags = X86_EFLAGS_PF | X86_EFLAGS_AF |
+                      X86_EFLAGS_IF | X86_EFLAGS_OF;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(pcmpestri) ||
+             regs.ecx != 9 ||
+             (regs.eflags & X86_EFLAGS_ARITH_MASK) !=
+             (X86_EFLAGS_CF | X86_EFLAGS_ZF | X86_EFLAGS_SF) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing pcmpestrm $0x5a,(%ecx),%xmm2...");
+    if ( stack_exec && cpu_has_sse4_2 )
+    {
+        decl_insn(pcmpestrm);
+
+        asm volatile ( "movq %0, %%xmm2\n"
+                       put_insn(pcmpestrm, "pcmpestrm $0b01011010, (%1), 
%%xmm2")
+                       :: "m" (res[0]), "c" (NULL) );
+
+        set_insn(pcmpestrm);
+        regs.ecx = (unsigned long)res;
+        regs.eflags = X86_EFLAGS_PF | X86_EFLAGS_AF |
+                      X86_EFLAGS_IF | X86_EFLAGS_OF;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(pcmpestrm) )
+            goto fail;
+        asm ( "pmovmskb %%xmm0, %0" : "=r" (rc) );
+        if ( rc != 0x0e00 ||
+             (regs.eflags & X86_EFLAGS_ARITH_MASK) !=
+             (X86_EFLAGS_CF | X86_EFLAGS_ZF | X86_EFLAGS_SF) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing pcmpistri $0x1a,(%ecx),%xmm2...");
+    if ( stack_exec && cpu_has_sse4_2 )
+    {
+        decl_insn(pcmpistri);
+
+        asm volatile ( "movq %0, %%xmm2\n"
+                       put_insn(pcmpistri, "pcmpistri $0b00011010, (%1), 
%%xmm2")
+                       :: "m" (res[0]), "c" (NULL) );
+
+        set_insn(pcmpistri);
+        regs.eflags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
+                      X86_EFLAGS_IF | X86_EFLAGS_OF;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(pcmpistri) ||
+             regs.ecx != 16 ||
+             (regs.eflags & X86_EFLAGS_ARITH_MASK) !=
+             (X86_EFLAGS_ZF | X86_EFLAGS_SF) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing pcmpistrm $0x4a,(%ecx),%xmm2...");
+    if ( stack_exec && cpu_has_sse4_2 )
+    {
+        decl_insn(pcmpistrm);
+
+        asm volatile ( "movq %0, %%xmm2\n"
+                       put_insn(pcmpistrm, "pcmpistrm $0b01001010, (%1), 
%%xmm2")
+                       :: "m" (res[0]), "c" (NULL) );
+
+        set_insn(pcmpistrm);
+        regs.ecx = (unsigned long)res;
+        regs.eflags = X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_IF;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(pcmpistrm) )
+            goto fail;
+        asm ( "pmovmskb %%xmm0, %0" : "=r" (rc) );
+        if ( rc != 0xffff ||
+            (regs.eflags & X86_EFLAGS_ARITH_MASK) !=
+            (X86_EFLAGS_CF | X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing vpcmpestri $0x7a,(%esi),%xmm2...");
+    if ( stack_exec && cpu_has_avx )
+    {
+        decl_insn(vpcmpestri);
+
+#ifdef __x86_64__
+        /*
+         * gas up to at least 2.27 doesn't honor explict "rex.w" for
+         * VEX/EVEX encoded instructions, and also doesn't provide any
+         * other means to control VEX.W.
+         */
+        asm volatile ( "movq %0, %%xmm2\n"
+                       put_insn(vpcmpestri,
+                                ".byte 0xC4, 0xE3, 0xF9, 0x61, 0x16, 0x7A")
+                       :: "m" (res[0]) );
+#else
+        asm volatile ( "movq %0, %%xmm2\n"
+                       put_insn(vpcmpestri,
+                                "vpcmpestri $0b01111010, (%1), %%xmm2")
+                       :: "m" (res[0]), "S" (NULL) );
+#endif
+
+        set_insn(vpcmpestri);
+#ifdef __x86_64__
+        regs.rax = ~0U + 1UL;
+        regs.rcx = ~0UL;
+#else
+        regs.eax = 0x7fffffff;
+#endif
+        regs.esi = (unsigned long)res;
+        regs.eflags = X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_SF |
+                      X86_EFLAGS_IF | X86_EFLAGS_OF;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(vpcmpestri) ||
+             regs.ecx != 11 ||
+             (regs.eflags & X86_EFLAGS_ARITH_MASK) !=
+             (X86_EFLAGS_ZF | X86_EFLAGS_CF) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing stmxcsr (%edx)...");
     if ( cpu_has_sse )
     {
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -100,6 +100,12 @@ static inline uint64_t xgetbv(uint32_t x
     (res.c & (1U << 19)) != 0; \
 })
 
+#define cpu_has_sse4_2 ({ \
+    struct cpuid_leaf res; \
+    emul_test_cpuid(1, 0, &res, NULL); \
+    (res.c & (1U << 20)) != 0; \
+})
+
 #define cpu_has_popcnt ({ \
     struct cpuid_leaf res; \
     emul_test_cpuid(1, 0, &res, NULL); \
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -370,7 +370,7 @@ static const struct {
     [0x2a] = { .simd_size = simd_packed_int, .two_op = 1 },
     [0x2b] = { .simd_size = simd_packed_int },
     [0x30 ... 0x35] = { .simd_size = simd_other, .two_op = 1 },
-    [0x38 ... 0x3f] = { .simd_size = simd_packed_int },
+    [0x37 ... 0x3f] = { .simd_size = simd_packed_int },
     [0x40] = { .simd_size = simd_packed_int },
     [0x41] = { .simd_size = simd_packed_int, .two_op = 1 },
     [0xf0] = { .two_op = 1 },
@@ -400,6 +400,7 @@ static const struct {
     [0x42] = { .simd_size = simd_packed_int },
     [0x4a ... 0x4b] = { .simd_size = simd_packed_fp, .four_op = 1 },
     [0x4c] = { .simd_size = simd_packed_int, .four_op = 1 },
+    [0x60 ... 0x63] = { .simd_size = simd_packed_int, .two_op = 1 },
     [0xf0] = {},
 };
 
@@ -6004,6 +6005,7 @@ x86_emulate(
     case X86EMUL_OPC_VEX_66(0x0f38, 0x28): /* vpmuldq 
{x,y}mm/mem,{x,y}mm,{x,y}mm */
     case X86EMUL_OPC_VEX_66(0x0f38, 0x29): /* vpcmpeqq 
{x,y}mm/mem,{x,y}mm,{x,y}mm */
     case X86EMUL_OPC_VEX_66(0x0f38, 0x2b): /* vpackusdw 
{x,y}mm/mem,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x37): /* vpcmpgtq 
{x,y}mm/mem,{x,y}mm,{x,y}mm */
     case X86EMUL_OPC_VEX_66(0x0f38, 0x38): /* vpminsb 
{x,y}mm/mem,{x,y}mm,{x,y}mm */
     case X86EMUL_OPC_VEX_66(0x0f38, 0x39): /* vpminsd 
{x,y}mm/mem,{x,y}mm,{x,y}mm */
     case X86EMUL_OPC_VEX_66(0x0f38, 0x3a): /* vpminub 
{x,y}mm/mem,{x,y}mm,{x,y}mm */
@@ -7175,6 +7177,10 @@ x86_emulate(
         }
         goto movdqa;
 
+    case X86EMUL_OPC_66(0x0f38, 0x37): /* pcmpgtq xmm/m128,xmm */
+        host_and_vcpu_must_have(sse4_2);
+        goto simd_0f38_common;
+
     case X86EMUL_OPC(0x0f38, 0xf0): /* movbe m,r */
     case X86EMUL_OPC(0x0f38, 0xf1): /* movbe r,m */
         vcpu_must_have(movbe);
@@ -7472,6 +7478,69 @@ x86_emulate(
         generate_exception_if(vex.w, EXC_UD);
         goto simd_0f_int_imm8;
 
+    case X86EMUL_OPC_66(0x0f3a, 0x60):     /* pcmpestrm $imm8,xmm/m128,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x60): /* vpcmpestrm $imm8,xmm/m128,xmm */
+    case X86EMUL_OPC_66(0x0f3a, 0x61):     /* pcmpestri $imm8,xmm/m128,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x61): /* vpcmpestri $imm8,xmm/m128,xmm */
+    case X86EMUL_OPC_66(0x0f3a, 0x62):     /* pcmpistrm $imm8,xmm/m128,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x62): /* vpcmpistrm $imm8,xmm/m128,xmm */
+    case X86EMUL_OPC_66(0x0f3a, 0x63):     /* pcmpistri $imm8,xmm/m128,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x63): /* vpcmpistri $imm8,xmm/m128,xmm */
+        if ( vex.opcx == vex_none )
+        {
+            host_and_vcpu_must_have(sse4_2);
+            get_fpu(X86EMUL_FPU_xmm, &fic);
+        }
+        else
+        {
+            generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
+            host_and_vcpu_must_have(avx);
+            get_fpu(X86EMUL_FPU_ymm, &fic);
+        }
+
+        opc = init_prefixes(stub);
+        if ( vex.opcx == vex_none )
+            opc++[0] = 0x3a;
+        opc[0] = b;
+        opc[1] = modrm;
+        if ( ea.type == OP_MEM )
+        {
+            /* Convert memory operand to (%rDI). */
+            rex_prefix &= ~REX_B;
+            vex.b = 1;
+            opc[1] &= 0x3f;
+            opc[1] |= 0x07;
+
+            rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 16, ctxt);
+            if ( rc != X86EMUL_OKAY )
+                goto done;
+        }
+        opc[2] = imm1;
+        fic.insn_bytes = PFX_BYTES + 3;
+        opc[3] = 0xc3;
+        if ( vex.opcx == vex_none )
+        {
+            /* Cover for extra prefix byte. */
+            --opc;
+            ++fic.insn_bytes;
+        }
+
+        copy_REX_VEX(opc, rex_prefix, vex);
+#ifdef __x86_64__
+        if ( rex_prefix & REX_W )
+            emulate_stub("=c" (dst.val), "m" (*mmvalp), "D" (mmvalp),
+                         "a" (_regs.rax), "d" (_regs.rdx));
+        else
+#endif
+            emulate_stub("=c" (dst.val), "m" (*mmvalp), "D" (mmvalp),
+                         "a" (_regs.eax), "d" (_regs.edx));
+
+        state->simd_size = simd_none;
+        if ( b & 1 )
+            _regs.r(cx) = (uint32_t)dst.val;
+        dst.type = OP_NONE;
+        break;
+
     case X86EMUL_OPC_VEX_F2(0x0f3a, 0xf0): /* rorx imm,r/m,r */
         vcpu_must_have(bmi2);
         generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);


Attachment: x86emul-SSE42.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.