[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v4 18/44] x86emul: support AVX512{F, BW} packed integer compare insns



Include VPTEST{,N}M{B,D,Q,W} as once again possibly used by the compiler
for comparison against all-zero vectors.

Also table entries for a few more insns get their .d8s field set right
away, again in order to not split and later re-combine the groups.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
v3: New.

--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -151,8 +151,16 @@ static const struct test avx512f_all[] =
     INSN_FP(mul,             0f, 59),
     INSN(pand,         66,   0f, db,    vl,     dq, vl),
     INSN(pandn,        66,   0f, df,    vl,     dq, vl),
+    INSN(pcmp,         66, 0f3a, 1f,    vl,     dq, vl),
+    INSN(pcmpeqd,      66,   0f, 76,    vl,      d, vl),
+    INSN(pcmpeqq,      66, 0f38, 29,    vl,      q, vl),
+    INSN(pcmpgtd,      66,   0f, 66,    vl,      d, vl),
+    INSN(pcmpgtq,      66, 0f38, 37,    vl,      q, vl),
+    INSN(pcmpu,        66, 0f3a, 1e,    vl,     dq, vl),
     INSN(por,          66,   0f, eb,    vl,     dq, vl),
     INSN(pternlog,     66, 0f3a, 25,    vl,     dq, vl),
+    INSN(ptestm,       66, 0f38, 27,    vl,     dq, vl),
+    INSN(ptestnm,      f3, 0f38, 27,    vl,     dq, vl),
     INSN(pxor,         66,   0f, ef,    vl,     dq, vl),
     INSN_PFP(shuf,           0f, c6),
     INSN_FP(sqrt,            0f, 51),
@@ -184,6 +192,14 @@ static const struct test avx512bw_all[]
     INSN(movdqu8,     f2,   0f, 7f,    vl,    b, vl),
     INSN(movdqu16,    f2,   0f, 6f,    vl,    w, vl),
     INSN(movdqu16,    f2,   0f, 7f,    vl,    w, vl),
+    INSN(pcmp,        66, 0f3a, 3f,    vl,   bw, vl),
+    INSN(pcmpeqb,     66,   0f, 74,    vl,    b, vl),
+    INSN(pcmpeqw,     66,   0f, 75,    vl,    w, vl),
+    INSN(pcmpgtb,     66,   0f, 64,    vl,    b, vl),
+    INSN(pcmpgtw,     66,   0f, 65,    vl,    w, vl),
+    INSN(pcmpu,       66, 0f3a, 3e,    vl,   bw, vl),
+    INSN(ptestm,      66, 0f38, 26,    vl,   bw, vl),
+    INSN(ptestnm,     f3, 0f38, 26,    vl,   bw, vl),
 };
 
 static const struct test avx512dq_all[] = {
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -313,14 +313,14 @@ static const struct twobyte_table {
     [0x5a ... 0x5b] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
     [0x5c ... 0x5f] = { DstImplicit|SrcMem|ModRM, simd_any_fp, d8s_vl },
     [0x60 ... 0x62] = { DstImplicit|SrcMem|ModRM, simd_other },
-    [0x63 ... 0x67] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
+    [0x63 ... 0x67] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
     [0x68 ... 0x6a] = { DstImplicit|SrcMem|ModRM, simd_other },
     [0x6b ... 0x6d] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
     [0x6e] = { DstImplicit|SrcMem|ModRM|Mov, simd_none, d8s_dq64 },
     [0x6f] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_int, d8s_vl },
     [0x70] = { SrcImmByte|ModRM|TwoOp, simd_other },
     [0x71 ... 0x73] = { DstImplicit|SrcImmByte|ModRM },
-    [0x74 ... 0x76] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
+    [0x74 ... 0x76] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
     [0x77] = { DstImplicit|SrcNone },
     [0x78] = { ImplicitOps|ModRM },
     [0x79] = { DstReg|SrcMem|ModRM, simd_packed_int },
@@ -444,13 +444,13 @@ static const struct ext0f38_table {
     [0x1b] = { .simd_size = simd_256, .two_op = 1, .d8s = d8s_vl_by_2 },
     [0x1c ... 0x1e] = { .simd_size = simd_packed_int, .two_op = 1 },
     [0x20 ... 0x25] = { .simd_size = simd_other, .two_op = 1 },
-    [0x28 ... 0x29] = { .simd_size = simd_packed_int },
+    [0x26 ... 0x29] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
     [0x2a] = { .simd_size = simd_packed_int, .two_op = 1, .d8s = d8s_vl },
     [0x2b] = { .simd_size = simd_packed_int },
     [0x2c ... 0x2d] = { .simd_size = simd_packed_fp },
     [0x2e ... 0x2f] = { .simd_size = simd_packed_fp, .to_mem = 1 },
     [0x30 ... 0x35] = { .simd_size = simd_other, .two_op = 1 },
-    [0x36 ... 0x3f] = { .simd_size = simd_packed_int },
+    [0x36 ... 0x3f] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
     [0x40] = { .simd_size = simd_packed_int },
     [0x41] = { .simd_size = simd_packed_int, .two_op = 1 },
     [0x45 ... 0x47] = { .simd_size = simd_packed_int },
@@ -516,6 +516,7 @@ static const struct ext0f3a_table {
     [0x18] = { .simd_size = simd_128 },
     [0x19] = { .simd_size = simd_128, .to_mem = 1, .two_op = 1 },
     [0x1d] = { .simd_size = simd_other, .to_mem = 1, .two_op = 1 },
+    [0x1e ... 0x1f] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
     [0x20] = { .simd_size = simd_none },
     [0x21] = { .simd_size = simd_other },
     [0x22] = { .simd_size = simd_none },
@@ -523,6 +524,7 @@ static const struct ext0f3a_table {
     [0x30 ... 0x33] = { .simd_size = simd_other, .two_op = 1 },
     [0x38] = { .simd_size = simd_128 },
     [0x39] = { .simd_size = simd_128, .to_mem = 1, .two_op = 1 },
+    [0x3e ... 0x3f] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
     [0x40 ... 0x41] = { .simd_size = simd_packed_fp },
     [0x42] = { .simd_size = simd_packed_int },
     [0x44] = { .simd_size = simd_packed_int },
@@ -6569,6 +6571,32 @@ x86_emulate(
         get_fpu(X86EMUL_FPU_mmx);
         goto simd_0f_common;
 
+    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x26): /* vptestnm{b,w} 
[xyz]mm/mem,[xyz]mm,k{k} */
+    case X86EMUL_OPC_EVEX_F3(0x0f38, 0x27): /* vptestnm{d,q} 
[xyz]mm/mem,[xyz]mm,k{k} */
+        op_bytes = 16 << evex.lr;
+        /* fall through */
+    case X86EMUL_OPC_EVEX_66(0x0f,   0x64): /* vpcmpeqb 
[xyz]mm/mem,[xyz]mm,k{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f,   0x65): /* vpcmpeqw 
[xyz]mm/mem,[xyz]mm,k{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f,   0x66): /* vpcmpeqd 
[xyz]mm/mem,[xyz]mm,k{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f,   0x74): /* vpcmpgtb 
[xyz]mm/mem,[xyz]mm,k{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f,   0x75): /* vpcmpgtw 
[xyz]mm/mem,[xyz]mm,k{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f,   0x76): /* vpcmpgtd 
[xyz]mm/mem,[xyz]mm,k{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x26): /* vptestm{b,w} 
[xyz]mm/mem,[xyz]mm,k{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x27): /* vptestm{d,q} 
[xyz]mm/mem,[xyz]mm,k{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x29): /* vpcmpeqq 
[xyz]mm/mem,[xyz]mm,k{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x37): /* vpcmpgtq 
[xyz]mm/mem,[xyz]mm,k{k} */
+        generate_exception_if(!evex.r || !evex.R || evex.z, EXC_UD);
+        if ( b & (ext == ext_0f38 ? 1 : 2) )
+        {
+            generate_exception_if(b != 0x27 && evex.w != (b & 1), EXC_UD);
+            goto avx512f_no_sae;
+        }
+        host_and_vcpu_must_have(avx512bw);
+        generate_exception_if(evex.br, EXC_UD);
+        elem_bytes = 1 << (ext == ext_0f ? b & 1 : evex.w);
+        avx512_vlen_check(false);
+        goto simd_zmm;
+
     CASE_SIMD_PACKED_INT(0x0f, 0x6e):    /* mov{d,q} r/m,{,x}mm */
     case X86EMUL_OPC_VEX_66(0x0f, 0x6e): /* vmov{d,q} r/m,xmm */
     CASE_SIMD_PACKED_INT(0x0f, 0x7e):    /* mov{d,q} {,x}mm,r/m */
@@ -7577,6 +7605,7 @@ x86_emulate(
                               EXC_UD);
         /* fall through */
     case X86EMUL_OPC_EVEX_66(0x0f3a, 0x25): /* vpternlog{d,q} 
$imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    avx512f_imm_no_sae:
         host_and_vcpu_must_have(avx512f);
         generate_exception_if(ea.type != OP_MEM && evex.br, EXC_UD);
         avx512_vlen_check(false);
@@ -8750,6 +8779,19 @@ x86_emulate(
         break;
     }
 
+    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x1e): /* vpcmpu{d,q} 
$imm8,[xyz]mm/mem,[xyz]mm,k{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x1f): /* vpcmp{d,q} 
$imm8,[xyz]mm/mem,[xyz]mm,k{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x3e): /* vpcmpu{b,w} 
$imm8,[xyz]mm/mem,[xyz]mm,k{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x3f): /* vpcmp{b,w} 
$imm8,[xyz]mm/mem,[xyz]mm,k{k} */
+        generate_exception_if(!evex.r || !evex.R || evex.z, EXC_UD);
+        if ( !(b & 0x20) )
+            goto avx512f_imm_no_sae;
+        host_and_vcpu_must_have(avx512bw);
+        generate_exception_if(evex.br, EXC_UD);
+        elem_bytes = 1 << evex.w;
+        avx512_vlen_check(false);
+        goto simd_imm8_zmm;
+
     case X86EMUL_OPC_66(0x0f3a, 0x20): /* pinsrb $imm8,r32/m8,xmm */
     case X86EMUL_OPC_66(0x0f3a, 0x22): /* pinsr{d,q} $imm8,r/m,xmm */
         host_and_vcpu_must_have(sse4_1);




_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.