[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen staging] x86emul: support AVX512{F, BW} integer unpack insns



commit 8a37b072bcd519bd068c487e4882d965eb376b75
Author:     Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Tue May 21 08:23:57 2019 +0200
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Tue May 21 08:23:57 2019 +0200

    x86emul: support AVX512{F,BW} integer unpack insns
    
    There's once again one extra twobyte_table[] entry which gets its Disp8
    shift value set right away without getting support implemented just yet,
    again to avoid needlessly splitting groups of entries.
    
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
    Acked-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
 tools/tests/x86_emulator/evex-disp8.c  |  8 ++++++++
 tools/tests/x86_emulator/simd.c        |  8 ++++++++
 tools/tests/x86_emulator/simd.h        |  4 ++++
 xen/arch/x86/x86_emulate/x86_emulate.c | 23 ++++++++++++++++++++---
 4 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/tools/tests/x86_emulator/evex-disp8.c 
b/tools/tests/x86_emulator/evex-disp8.c
index c41621c587..bdc597ecd5 100644
--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -229,6 +229,10 @@ static const struct test avx512f_all[] = {
     INSN(pternlog,     66, 0f3a, 25,    vl,     dq, vl),
     INSN(ptestm,       66, 0f38, 27,    vl,     dq, vl),
     INSN(ptestnm,      f3, 0f38, 27,    vl,     dq, vl),
+    INSN(punpckhdq,    66,   0f, 6a,    vl,      d, vl),
+    INSN(punpckhqdq,   66,   0f, 6d,    vl,      q, vl),
+    INSN(punpckldq,    66,   0f, 62,    vl,      d, vl),
+    INSN(punpcklqdq,   66,   0f, 6c,    vl,      q, vl),
     INSN(pxor,         66,   0f, ef,    vl,     dq, vl),
     INSN_PFP(shuf,           0f, c6),
     INSN_FP(sqrt,            0f, 51),
@@ -327,6 +331,10 @@ static const struct test avx512bw_all[] = {
     INSN(psubw,       66,   0f, f9,    vl,    w, vl),
     INSN(ptestm,      66, 0f38, 26,    vl,   bw, vl),
     INSN(ptestnm,     f3, 0f38, 26,    vl,   bw, vl),
+    INSN(punpckhbw,   66,   0f, 68,    vl,    b, vl),
+    INSN(punpckhwd,   66,   0f, 69,    vl,    w, vl),
+    INSN(punpcklbw,   66,   0f, 60,    vl,    b, vl),
+    INSN(punpcklwd,   66,   0f, 61,    vl,    w, vl),
 };
 
 static const struct test avx512bw_128[] = {
diff --git a/tools/tests/x86_emulator/simd.c b/tools/tests/x86_emulator/simd.c
index 9c2a0a5a5a..47622c6a59 100644
--- a/tools/tests/x86_emulator/simd.c
+++ b/tools/tests/x86_emulator/simd.c
@@ -300,6 +300,10 @@ static inline bool _to_bool(byte_vec_t bv)
     asm ( "vpbroadcastd %k1, %0" : "=v" (t_) : "r" (x) ); \
     t_; \
 })
+#  if VEC_SIZE == 16
+#   define interleave_hi(x, y) ((vec_t)B(punpckhdq, _mask, (vsi_t)(x), 
(vsi_t)(y), (vsi_t)undef(), ~0))
+#   define interleave_lo(x, y) ((vec_t)B(punpckldq, _mask, (vsi_t)(x), 
(vsi_t)(y), (vsi_t)undef(), ~0))
+#  endif
 #  define mix(x, y) ((vec_t)B(movdqa32_, _mask, (vsi_t)(x), (vsi_t)(y), \
                               (0b0101010101010101 & ((1 << ELEM_COUNT) - 1))))
 #  define shrink1(x) ((half_t)B(pmovqd, _mask, (vdi_t)(x), (vsi_half_t){}, ~0))
@@ -317,6 +321,10 @@ static inline bool _to_bool(byte_vec_t bv)
     t_; \
 })
 #  endif
+#  if VEC_SIZE == 16
+#   define interleave_hi(x, y) ((vec_t)B(punpckhqdq, _mask, (vdi_t)(x), 
(vdi_t)(y), (vdi_t)undef(), ~0))
+#   define interleave_lo(x, y) ((vec_t)B(punpcklqdq, _mask, (vdi_t)(x), 
(vdi_t)(y), (vdi_t)undef(), ~0))
+#  endif
 #  define mix(x, y) ((vec_t)B(movdqa64_, _mask, (vdi_t)(x), (vdi_t)(y), 
0b01010101))
 # endif
 # if INT_SIZE == 4
diff --git a/tools/tests/x86_emulator/simd.h b/tools/tests/x86_emulator/simd.h
index d8f62ba472..8c5a419f46 100644
--- a/tools/tests/x86_emulator/simd.h
+++ b/tools/tests/x86_emulator/simd.h
@@ -252,6 +252,10 @@ OVR(pmovzxwq);
 OVR(pmulld);
 OVR(pmuldq);
 OVR(pmuludq);
+OVR(punpckhdq);
+OVR(punpckhqdq);
+OVR(punpckldq);
+OVR(punpcklqdq);
 # endif
 
 # undef OVR_VFP
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c 
b/xen/arch/x86/x86_emulate/x86_emulate.c
index 154ec1ca26..6ea189fe89 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -312,10 +312,10 @@ static const struct twobyte_table {
     [0x58 ... 0x59] = { DstImplicit|SrcMem|ModRM, simd_any_fp, d8s_vl },
     [0x5a ... 0x5b] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
     [0x5c ... 0x5f] = { DstImplicit|SrcMem|ModRM, simd_any_fp, d8s_vl },
-    [0x60 ... 0x62] = { DstImplicit|SrcMem|ModRM, simd_other },
+    [0x60 ... 0x62] = { DstImplicit|SrcMem|ModRM, simd_other, d8s_vl },
     [0x63 ... 0x67] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
-    [0x68 ... 0x6a] = { DstImplicit|SrcMem|ModRM, simd_other },
-    [0x6b ... 0x6d] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
+    [0x68 ... 0x6a] = { DstImplicit|SrcMem|ModRM, simd_other, d8s_vl },
+    [0x6b ... 0x6d] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
     [0x6e] = { DstImplicit|SrcMem|ModRM|Mov, simd_none, d8s_dq64 },
     [0x6f] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_int, d8s_vl },
     [0x70] = { SrcImmByte|ModRM|TwoOp, simd_other },
@@ -6657,6 +6657,12 @@ x86_emulate(
     case X86EMUL_OPC_EVEX_66(0x0f, 0xf6): /* vpsadbw 
[xyz]mm/mem,[xyz]mm,[xyz]mm */
         generate_exception_if(evex.opmsk, EXC_UD);
         /* fall through */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0x60): /* vpunpcklbw 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0x61): /* vpunpcklwd 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0x68): /* vpunpckhbw 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0x69): /* vpunpckhwd 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+        op_bytes = 16 << evex.lr;
+        /* fall through */
     case X86EMUL_OPC_EVEX_66(0x0f, 0xd1): /* vpsrlw 
xmm/m128,[xyz]mm,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_66(0x0f, 0xe1): /* vpsraw 
xmm/m128,[xyz]mm,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_66(0x0f, 0xf1): /* vpsllw 
xmm/m128,[xyz]mm,[xyz]mm{k} */
@@ -6684,6 +6690,13 @@ x86_emulate(
         elem_bytes = 1 << (b & 1);
         goto avx512f_no_sae;
 
+    case X86EMUL_OPC_EVEX_66(0x0f, 0x62): /* vpunpckldq 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0x6a): /* vpunpckhdq 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+        generate_exception_if(evex.w, EXC_UD);
+        fault_suppression = false;
+        op_bytes = 16 << evex.lr;
+        goto avx512f_no_sae;
+
     case X86EMUL_OPC_EVEX_F3(0x0f38, 0x26): /* vptestnm{b,w} 
[xyz]mm/mem,[xyz]mm,k{k} */
     case X86EMUL_OPC_EVEX_F3(0x0f38, 0x27): /* vptestnm{d,q} 
[xyz]mm/mem,[xyz]mm,k{k} */
         op_bytes = 16 << evex.lr;
@@ -6710,6 +6723,10 @@ x86_emulate(
         avx512_vlen_check(false);
         goto simd_zmm;
 
+    case X86EMUL_OPC_EVEX_66(0x0f, 0x6c): /* vpunpcklqdq 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0x6d): /* vpunpckhqdq 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+        fault_suppression = false;
+        /* fall through */
     case X86EMUL_OPC_EVEX_66(0x0f, 0xd4): /* vpaddq 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_66(0x0f, 0xf4): /* vpmuludq 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_66(0x0f38, 0x28): /* vpmuldq 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
--
generated by git-patchbot for /home/xen/git/xen.git#staging

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/xen-changelog

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.