[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v5 45/47] x86emul: support AVX512DQ floating point manipulation insns



This completes support of AVX512DQ in the insn emulator.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
v5: New.

--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -458,11 +458,17 @@ static const struct test avx512dq_all[]
     INSN(cvttps2uqq,     66,   0f, 78, vl_2,  d, vl),
     INSN(cvtuqq2pd,      f3,   0f, 7a,   vl,  q, vl),
     INSN(cvtuqq2ps,      f2,   0f, 7a,   vl,  q, vl),
+    INSN(fpclass,        66, 0f3a, 66,   vl, sd, vl),
+    INSN(fpclass,        66, 0f3a, 67,   el, sd, el),
     INSN_PFP(or,               0f, 56),
 //       pmovd2m,        f3, 0f38, 39,        d
 //       pmovm2,         f3, 0f38, 38,       dq
 //       pmovq2m,        f3, 0f38, 39,        q
     INSN(pmullq,         66, 0f38, 40,   vl,  q, vl),
+    INSN(range,          66, 0f3a, 50,   vl, sd, vl),
+    INSN(range,          66, 0f3a, 51,   el, sd, el),
+    INSN(reduce,         66, 0f3a, 56,   vl, sd, vl),
+    INSN(reduce,         66, 0f3a, 57,   el, sd, el),
     INSN_PFP(xor,              0f, 57),
 };
 
--- a/tools/tests/x86_emulator/simd.c
+++ b/tools/tests/x86_emulator/simd.c
@@ -278,10 +278,18 @@ static inline bool _to_bool(byte_vec_t b
 #   define broadcast_octet(x) B(broadcastf32x8_, _mask, x, undef(), ~0)
 #   define insert_octet(x, y, p) B(insertf32x8_, _mask, x, y, p, undef(), ~0)
 #  endif
+#  ifdef __AVX512DQ__
+#   define frac(x) B(reduceps, _mask, x, 0b00001011, undef(), ~0)
+#  endif
 #  define getexp(x) BR(getexpps, _mask, x, undef(), ~0)
 #  define getmant(x) BR(getmantps, _mask, x, 0, undef(), ~0)
-#  define max(x, y) BR_(maxps, _mask, x, y, undef(), ~0)
-#  define min(x, y) BR_(minps, _mask, x, y, undef(), ~0)
+#  ifdef __AVX512DQ__
+#   define max(x, y) BR(rangeps, _mask, x, y, 0b0101, undef(), ~0)
+#   define min(x, y) BR(rangeps, _mask, x, y, 0b0100, undef(), ~0)
+#  else
+#   define max(x, y) BR_(maxps, _mask, x, y, undef(), ~0)
+#   define min(x, y) BR_(minps, _mask, x, y, undef(), ~0)
+#  endif
 #  define mix(x, y) B(movaps, _mask, x, y, (0b0101010101010101 & ALL_TRUE))
 #  define scale(x, y) BR(scalefps, _mask, x, y, undef(), ~0)
 #  if VEC_SIZE == 64 && defined(__AVX512ER__)
@@ -343,10 +351,18 @@ static inline bool _to_bool(byte_vec_t b
 #   define broadcast_quartet(x) B(broadcastf64x4_, , x, undef(), ~0)
 #   define insert_quartet(x, y, p) B(insertf64x4_, _mask, x, y, p, undef(), ~0)
 #  endif
+#  ifdef __AVX512DQ__
+#   define frac(x) B(reducepd, _mask, x, 0b00001011, undef(), ~0)
+#  endif
 #  define getexp(x) BR(getexppd, _mask, x, undef(), ~0)
 #  define getmant(x) BR(getmantpd, _mask, x, 0, undef(), ~0)
-#  define max(x, y) BR_(maxpd, _mask, x, y, undef(), ~0)
-#  define min(x, y) BR_(minpd, _mask, x, y, undef(), ~0)
+#  ifdef __AVX512DQ__
+#   define max(x, y) BR(rangepd, _mask, x, y, 0b0101, undef(), ~0)
+#   define min(x, y) BR(rangepd, _mask, x, y, 0b0100, undef(), ~0)
+#  else
+#   define max(x, y) BR_(maxpd, _mask, x, y, undef(), ~0)
+#   define min(x, y) BR_(minpd, _mask, x, y, undef(), ~0)
+#  endif
 #  define mix(x, y) B(movapd, _mask, x, y, 0b01010101)
 #  define scale(x, y) BR(scalefpd, _mask, x, y, undef(), ~0)
 #  if VEC_SIZE == 64 && defined(__AVX512ER__)
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -3933,6 +3933,39 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+
+    printf("%-40s", "Testing vfpclasspsz $0x46,64(%edx),%k2...");
+    if ( stack_exec && cpu_has_avx512dq )
+    {
+        decl_insn(vfpclassps);
+
+        asm volatile ( put_insn(vfpclassps,
+                                /* 0x46: check for +/- 0 and neg. */
+                                "vfpclasspsz $0x46, 64(%0), %%k2")
+                       :: "d" (NULL) );
+
+        set_insn(vfpclassps);
+        for ( i = 0; i < 3; ++i )
+        {
+            res[16 + i * 5 + 0] = 0x00000000; /* +0 */
+            res[16 + i * 5 + 1] = 0x80000000; /* -0 */
+            res[16 + i * 5 + 2] = 0x80000001; /* -DEN */
+            res[16 + i * 5 + 3] = 0xff000000; /* -FIN */
+            res[16 + i * 5 + 4] = 0x7f000000; /* +FIN */
+        }
+        res[31] = 0;
+        regs.edx = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(vfpclassps) )
+            goto fail;
+        asm volatile ( "kmovw %%k2, %0" : "=g" (rc) );
+        if ( rc != 0xbdef )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
 #undef decl_insn
 #undef put_insn
 #undef set_insn
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -582,10 +582,16 @@ static const struct ext0f3a_table {
     [0x48 ... 0x49] = { .simd_size = simd_packed_fp, .four_op = 1 },
     [0x4a ... 0x4b] = { .simd_size = simd_packed_fp, .four_op = 1 },
     [0x4c] = { .simd_size = simd_packed_int, .four_op = 1 },
+    [0x50] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
+    [0x51] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
     [0x54] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
     [0x55] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
+    [0x56] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
+    [0x57] = { .simd_size = simd_scalar_vexw, .d8s = d8s_dq },
     [0x5c ... 0x5f] = { .simd_size = simd_packed_fp, .four_op = 1 },
     [0x60 ... 0x63] = { .simd_size = simd_packed_int, .two_op = 1 },
+    [0x66] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_vl },
+    [0x67] = { .simd_size = simd_scalar_vexw, .two_op = 1, .d8s = d8s_dq },
     [0x68 ... 0x69] = { .simd_size = simd_packed_fp, .four_op = 1 },
     [0x6a ... 0x6b] = { .simd_size = simd_scalar_opc, .four_op = 1 },
     [0x6c ... 0x6d] = { .simd_size = simd_packed_fp, .four_op = 1 },
@@ -9629,6 +9635,10 @@ x86_emulate(
                               EXC_UD);
         goto avx512f_imm8_no_sae;
 
+    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x50): /* vrangep{s,d} 
$imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x56): /* vreducep{s,d} 
$imm8,[xyz]mm/mem,[xyz]mm{k} */
+        host_and_vcpu_must_have(avx512dq);
+        /* fall through */
     case X86EMUL_OPC_EVEX_66(0x0f3a, 0x26): /* vgetmantp{s,d} 
$imm8,[xyz]mm/mem,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_66(0x0f3a, 0x54): /* vfixupimmp{s,d} 
$imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
         host_and_vcpu_must_have(avx512f);
@@ -9636,11 +9646,16 @@ x86_emulate(
             avx512_vlen_check(false);
         goto simd_imm8_zmm;
 
+    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x51): /* vranges{s,d} 
$imm8,xmm/mem,xmm,xmm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x57): /* vreduces{s,d} 
$imm8,xmm/mem,xmm,xmm{k} */
+        host_and_vcpu_must_have(avx512dq);
+        /* fall through */
     case X86EMUL_OPC_EVEX_66(0x0f3a, 0x27): /* vgetmants{s,d} 
$imm8,xmm/mem,xmm,xmm{k} */
     case X86EMUL_OPC_EVEX_66(0x0f3a, 0x55): /* vfixupimms{s,d} 
$imm8,xmm/mem,xmm,xmm{k} */
         host_and_vcpu_must_have(avx512f);
         if ( ea.type == OP_MEM )
         {
+    simd_imm8_zmm_scalar:
             generate_exception_if(evex.br, EXC_UD);
             avx512_vlen_check(true);
         }
@@ -9793,6 +9808,14 @@ x86_emulate(
         dst.type = OP_NONE;
         break;
 
+    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x66): /* vfpclassp{d,s} 
$imm8,[xyz]mm/mem,k{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f3a, 0x67): /* vfpclasss{d,s} 
$imm8,[xyz]mm/mem,k{k} */
+        host_and_vcpu_must_have(avx512dq);
+        generate_exception_if(!evex.r || !evex.R || evex.z, EXC_UD);
+        if ( b & 1 )
+            goto simd_imm8_zmm_scalar;
+        goto avx512f_imm8_no_sae;
+
     case X86EMUL_OPC(0x0f3a, 0xcc):     /* sha1rnds4 $imm8,xmm/m128,xmm */
         host_and_vcpu_must_have(sha);
         op_bytes = 16;




_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.