[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v3 14/34] x86emul: support AVX512{F, DQ} FP broadcast insns



Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
v3: New.

--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -91,6 +91,7 @@ enum esz {
 
 static const struct test avx512f_all[] = {
     INSN_FP(add,             0f, 58),
+    INSN(broadcastss,  66, 0f38, 18,    el,      d, el),
     INSN_FP(cmp,             0f, c2),
     INSN_FP(div,             0f, 5e),
     INSN(fmadd132,     66, 0f38, 98,    vl,     sd, vl),
@@ -162,6 +163,15 @@ static const struct test avx512f_128[] =
     INSN(movq,      66,   0f, d6, el,    q, el),
 };
 
+static const struct test avx512f_no128[] = {
+    INSN(broadcastf32x4, 66, 0f38, 1a, el_4,  d, vl),
+    INSN(broadcastsd,    66, 0f38, 19, el,    q, el),
+};
+
+static const struct test avx512f_512[] = {
+    INSN(broadcastf64x4, 66, 0f38, 1b, el_4, q, vl),
+};
+
 static const struct test avx512bw_all[] = {
     INSN(movdqu8,     f2,   0f, 6f,    vl,   b, vl),
     INSN(movdqu8,     f2,   0f, 7f,    vl,   b, vl),
@@ -176,8 +186,19 @@ static const struct test avx512dq_all[]
     INSN_PFP(xor,              0f, 57),
 };
 
+static const struct test avx512dq_no128[] = {
+    INSN(broadcastf32x2, 66, 0f38, 19, el_2, d, vl),
+    INSN(broadcastf64x2, 66, 0f38, 1a, el_2, q, vl),
+};
+
+static const struct test avx512dq_512[] = {
+    INSN(broadcastf32x8, 66, 0f38, 1b, el_8, d, vl),
+};
+
 static const unsigned char vl_all[] = { VL_512, VL_128, VL_256 };
 static const unsigned char vl_128[] = { VL_128 };
+static const unsigned char vl_no128[] = { VL_512, VL_256 };
+static const unsigned char vl_512[] = { VL_512 };
 
 /*
  * This table, indicating the presence of an immediate (byte) for an opcode
@@ -486,6 +507,10 @@ void evex_disp8_test(void *instr, struct
 
     RUN(avx512f, all);
     RUN(avx512f, 128);
+    RUN(avx512f, no128);
+    RUN(avx512f, 512);
     RUN(avx512bw, all);
     RUN(avx512dq, all);
+    RUN(avx512dq, no128);
+    RUN(avx512dq, 512);
 }
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -234,10 +234,16 @@ enum simd_opsize {
 
     /*
      * 128 bits of integer or floating point data, with no further
-     * formatting information.
+     * formatting information, or with it encoded by EVEX.W.
      */
     simd_128,
 
+    /*
+     * 256 bits of integer or floating point data, with formatting
+     * encoded by EVEX.W.
+     */
+    simd_256,
+
     /* Operand size encoded in non-standard way. */
     simd_other
 };
@@ -430,8 +436,10 @@ static const struct ext0f38_table {
     [0x13] = { .simd_size = simd_other, .two_op = 1 },
     [0x14 ... 0x16] = { .simd_size = simd_packed_fp },
     [0x17] = { .simd_size = simd_packed_int, .two_op = 1 },
-    [0x18 ... 0x19] = { .simd_size = simd_scalar_opc, .two_op = 1 },
-    [0x1a] = { .simd_size = simd_128, .two_op = 1 },
+    [0x18] = { .simd_size = simd_scalar_opc, .two_op = 1, .d8s = 2 },
+    [0x19] = { .simd_size = simd_scalar_opc, .two_op = 1, .d8s = 3 },
+    [0x1a] = { .simd_size = simd_128, .two_op = 1, .d8s = 4 },
+    [0x1b] = { .simd_size = simd_256, .two_op = 1, .d8s = d8s_vl_by_2 },
     [0x1c ... 0x1e] = { .simd_size = simd_packed_int, .two_op = 1 },
     [0x20 ... 0x25] = { .simd_size = simd_other, .two_op = 1 },
     [0x28 ... 0x29] = { .simd_size = simd_packed_int },
@@ -3320,6 +3328,10 @@ x86_decode(
         op_bytes = 16;
         break;
 
+    case simd_256:
+        op_bytes = 32;
+        break;
+
     default:
         op_bytes = 0;
         break;
@@ -7969,6 +7981,42 @@ x86_emulate(
         dst.type = OP_NONE;
         break;
 
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x18): /* vbroadcastss xmm/m32,[xyz]mm{k} 
*/
+        generate_exception_if(evex.w || evex.br, EXC_UD);
+    avx512_broadcast:
+        /*
+         * For the respective code below the main switch() to work we need to
+         * fold op_mask here: A source element gets read whenever any of its
+         * respective destination elements' mask bits is set.
+         */
+        if ( fault_suppression )
+        {
+            n = 1 << ((b & 3) - evex.w);
+            ASSERT(op_bytes == n * elem_bytes);
+            for ( i = n; i < (16 << evex.lr) / elem_bytes; i += n )
+                op_mask |= (op_mask >> i) & ((1 << n) - 1);
+        }
+        goto avx512f_no_sae;
+
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x1b): /* vbroadcastf32x8 m256,zmm{k} */
+                                            /* vbroadcastf64x4 m256,zmm{k} */
+        generate_exception_if(ea.type != OP_MEM || evex.lr != 2, EXC_UD);
+        /* fall through */
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x19): /* vbroadcastsd xmm/m64,{y,z}mm{k} 
*/
+                                            /* vbroadcastf32x2 
xmm/m64,{y,z}mm{k} */
+        generate_exception_if(!evex.lr || evex.br, EXC_UD);
+        if ( !evex.w )
+            host_and_vcpu_must_have(avx512dq);
+        goto avx512_broadcast;
+
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x1a): /* vbroadcastf32x4 m128,{y,z}mm{k} 
*/
+                                            /* vbroadcastf64x2 m128,{y,z}mm{k} 
*/
+        generate_exception_if(ea.type != OP_MEM || !evex.lr || evex.br,
+                              EXC_UD);
+        if ( evex.w )
+            host_and_vcpu_must_have(avx512dq);
+        goto avx512_broadcast;
+
     case X86EMUL_OPC_66(0x0f38, 0x20): /* pmovsxbw xmm/m64,xmm */
     case X86EMUL_OPC_66(0x0f38, 0x21): /* pmovsxbd xmm/m32,xmm */
     case X86EMUL_OPC_66(0x0f38, 0x22): /* pmovsxbq xmm/m16,xmm */




_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.