[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen staging] x86emul: support AVX512{F, BW} zero- and sign-extending moves



commit 3988beb0865f1865a52d48a1c414735bc9ebef1d
Author:     Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Fri Apr 5 10:41:59 2019 +0200
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Fri Apr 5 10:41:59 2019 +0200

    x86emul: support AVX512{F,BW} zero- and sign-extending moves
    
    Note that the testing in simd.c doesn't really follow the ISA extension
    pattern - to fit the scheme, extensions from byte and word granular
    vectors can (currently) sensibly only happen in the AVX512BW case (and
    hence respective abstraction macros will be added there rather than
    here).
    
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
    Acked-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
 tools/tests/x86_emulator/evex-disp8.c  | 12 ++++++++++++
 tools/tests/x86_emulator/simd.c        |  2 ++
 tools/tests/x86_emulator/simd.h        | 10 ++++++++++
 xen/arch/x86/x86_emulate/x86_emulate.c | 33 +++++++++++++++++++++++++++++++--
 4 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/tools/tests/x86_emulator/evex-disp8.c 
b/tools/tests/x86_emulator/evex-disp8.c
index d581076824..f3435fa1e4 100644
--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -177,6 +177,16 @@ static const struct test avx512f_all[] = {
     INSN(pmaxu,        66, 0f38, 3f,    vl,     dq, vl),
     INSN(pmins,        66, 0f38, 39,    vl,     dq, vl),
     INSN(pminu,        66, 0f38, 3b,    vl,     dq, vl),
+    INSN(pmovsxbd,     66, 0f38, 21,    vl_4,    b, vl),
+    INSN(pmovsxbq,     66, 0f38, 22,    vl_8,    b, vl),
+    INSN(pmovsxwd,     66, 0f38, 23,    vl_2,    w, vl),
+    INSN(pmovsxwq,     66, 0f38, 24,    vl_4,    w, vl),
+    INSN(pmovsxdq,     66, 0f38, 25,    vl_2, d_nb, vl),
+    INSN(pmovzxbd,     66, 0f38, 31,    vl_4,    b, vl),
+    INSN(pmovzxbq,     66, 0f38, 32,    vl_8,    b, vl),
+    INSN(pmovzxwd,     66, 0f38, 33,    vl_2,    w, vl),
+    INSN(pmovzxwq,     66, 0f38, 34,    vl_4,    w, vl),
+    INSN(pmovzxdq,     66, 0f38, 35,    vl_2, d_nb, vl),
     INSN(pmuldq,       66, 0f38, 28,    vl,      q, vl),
     INSN(pmulld,       66, 0f38, 40,    vl,      d, vl),
     INSN(pmuludq,      66,   0f, f4,    vl,      q, vl),
@@ -274,6 +284,8 @@ static const struct test avx512bw_all[] = {
     INSN(pminsw,      66,   0f, ea,    vl,    w, vl),
     INSN(pminub,      66,   0f, da,    vl,    b, vl),
     INSN(pminuw,      66, 0f38, 3a,    vl,    w, vl),
+    INSN(pmovsxbw,    66, 0f38, 20,    vl_2,  b, vl),
+    INSN(pmovzxbw,    66, 0f38, 30,    vl_2,  b, vl),
     INSN(pmulhuw,     66,   0f, e4,    vl,    w, vl),
     INSN(pmulhw,      66,   0f, e5,    vl,    w, vl),
     INSN(pmullw,      66,   0f, d5,    vl,    w, vl),
diff --git a/tools/tests/x86_emulator/simd.c b/tools/tests/x86_emulator/simd.c
index d1f3f661a0..10235e8e0a 100644
--- a/tools/tests/x86_emulator/simd.c
+++ b/tools/tests/x86_emulator/simd.c
@@ -311,10 +311,12 @@ static inline bool _to_bool(byte_vec_t bv)
 #  define max(x, y) B(pmaxsd, _mask, x, y, undef(), ~0)
 #  define min(x, y) B(pminsd, _mask, x, y, undef(), ~0)
 #  define mul_full(x, y) ((vec_t)B(pmuldq, _mask, x, y, (vdi_t)undef(), ~0))
+#  define widen1(x) ((vec_t)B(pmovsxdq, _mask, x, (vdi_t)undef(), ~0))
 # elif UINT_SIZE == 4
 #  define max(x, y) ((vec_t)B(pmaxud, _mask, (vsi_t)(x), (vsi_t)(y), 
(vsi_t)undef(), ~0))
 #  define min(x, y) ((vec_t)B(pminud, _mask, (vsi_t)(x), (vsi_t)(y), 
(vsi_t)undef(), ~0))
 #  define mul_full(x, y) ((vec_t)B(pmuludq, _mask, (vsi_t)(x), (vsi_t)(y), 
(vdi_t)undef(), ~0))
+#  define widen1(x) ((vec_t)B(pmovzxdq, _mask, (vsi_half_t)(x), 
(vdi_t)undef(), ~0))
 # elif INT_SIZE == 8
 #  define max(x, y) ((vec_t)B(pmaxsq, _mask, (vdi_t)(x), (vdi_t)(y), 
(vdi_t)undef(), ~0))
 #  define min(x, y) ((vec_t)B(pminsq, _mask, (vdi_t)(x), (vdi_t)(y), 
(vdi_t)undef(), ~0))
diff --git a/tools/tests/x86_emulator/simd.h b/tools/tests/x86_emulator/simd.h
index 60e1420b3d..fb986be66a 100644
--- a/tools/tests/x86_emulator/simd.h
+++ b/tools/tests/x86_emulator/simd.h
@@ -222,6 +222,16 @@ REN(pxor, , d);
 #  endif
 OVR(movntdq);
 OVR(movntdqa);
+OVR(pmovsxbd);
+OVR(pmovsxbq);
+OVR(pmovsxdq);
+OVR(pmovsxwd);
+OVR(pmovsxwq);
+OVR(pmovzxbd);
+OVR(pmovzxbq);
+OVR(pmovzxdq);
+OVR(pmovzxwd);
+OVR(pmovzxwq);
 OVR(pmulld);
 OVR(pmuldq);
 OVR(pmuludq);
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c 
b/xen/arch/x86/x86_emulate/x86_emulate.c
index 6343103e56..d0d1c04b04 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -443,13 +443,23 @@ static const struct ext0f38_table {
     [0x1a] = { .simd_size = simd_128, .two_op = 1, .d8s = 4 },
     [0x1b] = { .simd_size = simd_256, .two_op = 1, .d8s = d8s_vl_by_2 },
     [0x1c ... 0x1e] = { .simd_size = simd_packed_int, .two_op = 1 },
-    [0x20 ... 0x25] = { .simd_size = simd_other, .two_op = 1 },
+    [0x20] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_2 },
+    [0x21] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_4 },
+    [0x22] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_8 },
+    [0x23] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_2 },
+    [0x24] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_4 },
+    [0x25] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_2 },
     [0x26 ... 0x29] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
     [0x2a] = { .simd_size = simd_packed_int, .two_op = 1, .d8s = d8s_vl },
     [0x2b] = { .simd_size = simd_packed_int },
     [0x2c ... 0x2d] = { .simd_size = simd_packed_fp },
     [0x2e ... 0x2f] = { .simd_size = simd_packed_fp, .to_mem = 1 },
-    [0x30 ... 0x35] = { .simd_size = simd_other, .two_op = 1 },
+    [0x30] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_2 },
+    [0x31] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_4 },
+    [0x32] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_8 },
+    [0x33] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_2 },
+    [0x34] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_4 },
+    [0x35] = { .simd_size = simd_other, .two_op = 1, .d8s = d8s_vl_by_2 },
     [0x36 ... 0x3f] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
     [0x40] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
     [0x41] = { .simd_size = simd_packed_int, .two_op = 1 },
@@ -8349,6 +8359,25 @@ x86_emulate(
         op_bytes = 16 >> (pmov_convert_delta[b & 7] - vex.l);
         goto simd_0f_int;
 
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x20): /* vpmovsxbw 
{x,y}mm/mem,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x30): /* vpmovzxbw 
{x,y}mm/mem,[xyz]mm{k} */
+        host_and_vcpu_must_have(avx512bw);
+        /* fall through */
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x21): /* vpmovsxbd xmm/mem,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x22): /* vpmovsxbq xmm/mem,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x23): /* vpmovsxwd 
{x,y}mm/mem,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x24): /* vpmovsxwq xmm/mem,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x25): /* vpmovsxdq 
{x,y}mm/mem,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x31): /* vpmovzxbd xmm/mem,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x32): /* vpmovzxbq xmm/mem,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x33): /* vpmovzxwd 
{x,y}mm/mem,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x34): /* vpmovzxwq xmm/mem,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f38, 0x35): /* vpmovzxdq 
{x,y}mm/mem,[xyz]mm{k} */
+        generate_exception_if(evex.brs || (evex.w && (b & 7) == 5), EXC_UD);
+        op_bytes = 32 >> (pmov_convert_delta[b & 7] + 1 - evex.lr);
+        elem_bytes = (b & 7) < 3 ? 1 : (b & 7) != 5 ? 2 : 4;
+        goto avx512f_no_sae;
+
     case X86EMUL_OPC_66(0x0f38, 0x2a):     /* movntdqa m128,xmm */
     case X86EMUL_OPC_VEX_66(0x0f38, 0x2a): /* vmovntdqa mem,{x,y}mm */
         generate_exception_if(ea.type != OP_MEM, EXC_UD);
--
generated by git-patchbot for /home/xen/git/xen.git#staging

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/xen-changelog

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.