[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen staging] x86emul: support AVX512F legacy-equivalent packed int/FP conversion insns



commit ed806f3737304b1aa01e8f7dfd01460018f8c023
Author:     Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Fri May 24 10:22:18 2019 +0200
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Fri May 24 10:22:18 2019 +0200

    x86emul: support AVX512F legacy-equivalent packed int/FP conversion insns
    
    ... including the two AVX512DQ forms which shared encodings, just with
    EVEX.W set there.
    
    VCVTDQ2PD, sharing its main opcode with others, needs a "manual"
    override of disp8scale.
    
    The simd_size changes for the twobyte_table[] entries are benign to
    pre-existing code, but allow decode_disp8scale() to work as is here.
    
    The at this point wrong placement of the 0xe6 case block is once again
    in anticipation of further additions of case labels.
    
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
    Acked-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
 tools/tests/x86_emulator/evex-disp8.c  |  6 +++++
 tools/tests/x86_emulator/simd.c        | 18 +++++++++++--
 tools/tests/x86_emulator/simd.h        |  7 ++++++
 xen/arch/x86/x86_emulate/x86_emulate.c | 46 ++++++++++++++++++++++++++++++++--
 4 files changed, 73 insertions(+), 4 deletions(-)

diff --git a/tools/tests/x86_emulator/evex-disp8.c 
b/tools/tests/x86_emulator/evex-disp8.c
index c808fed6cc..aff0aa7a27 100644
--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -109,8 +109,12 @@ static const struct test avx512f_all[] = {
     INSN_FP(cmp,             0f, c2),
     INSN(comisd,       66,   0f, 2f,    el,      q, el),
     INSN(comiss,         ,   0f, 2f,    el,      d, el),
+    INSN(cvtdq2pd,     f3,   0f, e6,    vl_2,    d, vl),
+    INSN(cvtdq2ps,       ,   0f, 5b,    vl,      d, vl),
+    INSN(cvtpd2dq,     f2,   0f, e6,    vl,      q, vl),
     INSN(cvtpd2ps,     66,   0f, 5a,    vl,      q, vl),
     INSN(cvtph2ps,     66, 0f38, 13,    vl_2, d_nb, vl),
+    INSN(cvtps2dq,     66,   0f, 5b,    vl,      d, vl),
     INSN(cvtps2pd,       ,   0f, 5a,    vl_2,    d, vl),
     INSN(cvtps2ph,     66, 0f3a, 1d,    vl_2, d_nb, vl),
     INSN(cvtsd2ss,     f2,   0f, 5a,    el,      q, el),
@@ -398,6 +402,8 @@ static const struct test avx512dq_all[] = {
     INSN_PFP(and,              0f, 54),
     INSN_PFP(andn,             0f, 55),
     INSN(broadcasti32x2, 66, 0f38, 59, el_2,  d, vl),
+    INSN(cvtqq2pd,       f3,   0f, e6,   vl,  q, vl),
+    INSN(cvtqq2ps,         ,   0f, 5b,   vl,  q, vl),
     INSN_PFP(or,               0f, 56),
 //       pmovd2m,        f3, 0f38, 39,        d
 //       pmovm2,         f3, 0f38, 38,       dq
diff --git a/tools/tests/x86_emulator/simd.c b/tools/tests/x86_emulator/simd.c
index fd575d6a9e..ee01361b10 100644
--- a/tools/tests/x86_emulator/simd.c
+++ b/tools/tests/x86_emulator/simd.c
@@ -92,6 +92,13 @@ static inline bool _to_bool(byte_vec_t bv)
 # define to_int(x) ((vec_t){ (int)(x)[0] })
 #elif VEC_SIZE == 8 && FLOAT_SIZE == 4 && defined(__3dNOW__)
 # define to_int(x) __builtin_ia32_pi2fd(__builtin_ia32_pf2id(x))
+#elif defined(FLOAT_SIZE) && VEC_SIZE > FLOAT_SIZE && defined(__AVX512F__) && \
+      (VEC_SIZE == 64 || defined(__AVX512VL__))
+# if FLOAT_SIZE == 4
+#  define to_int(x) BR(cvtdq2ps, _mask, BR(cvtps2dq, _mask, x, (vsi_t)undef(), 
~0), undef(), ~0)
+# elif FLOAT_SIZE == 8
+#  define to_int(x) B(cvtdq2pd, _mask, BR(cvtpd2dq, _mask, x, (vsi_half_t){}, 
~0), undef(), ~0)
+# endif
 #elif VEC_SIZE == 16 && defined(__SSE2__)
 # if FLOAT_SIZE == 4
 #  define to_int(x) __builtin_ia32_cvtdq2ps(__builtin_ia32_cvtps2dq(x))
@@ -1142,15 +1149,21 @@ int simd_test(void)
     touch(src);
     if ( !eq(x * -alt, -src) ) return __LINE__;
 
-# if defined(recip) && defined(to_int)
+# ifdef to_int
 
     touch(src);
+    x = to_int(src);
+    touch(src);
+    if ( !eq(x, src) ) return __LINE__;
+
+#  ifdef recip
+    touch(src);
     x = recip(src);
     touch(src);
     touch(x);
     if ( !eq(to_int(recip(x)), src) ) return __LINE__;
 
-#  ifdef rsqrt
+#   ifdef rsqrt
     x = src * src;
     touch(x);
     y = rsqrt(x);
@@ -1158,6 +1171,7 @@ int simd_test(void)
     if ( !eq(to_int(recip(y)), src) ) return __LINE__;
     touch(src);
     if ( !eq(to_int(y), to_int(recip(src))) ) return __LINE__;
+#   endif
 #  endif
 
 # endif
diff --git a/tools/tests/x86_emulator/simd.h b/tools/tests/x86_emulator/simd.h
index 8cf6ef2767..74e4eaa7c5 100644
--- a/tools/tests/x86_emulator/simd.h
+++ b/tools/tests/x86_emulator/simd.h
@@ -244,6 +244,7 @@ asm ( ".macro override insn    \n\t"
 OVR_INT(broadcast);
 OVR_SFP(broadcast);
 OVR_SFP(comi);
+OVR_VFP(cvtdq2);
 OVR_FP(add);
 OVR_INT(add);
 OVR_BW(adds);
@@ -330,13 +331,19 @@ REN(pandn, , d);
 REN(por, , d);
 REN(pxor, , d);
 #  endif
+OVR(cvtpd2dqx);
+OVR(cvtpd2dqy);
 OVR(cvtpd2psx);
 OVR(cvtpd2psy);
 OVR(cvtph2ps);
+OVR(cvtps2dq);
 OVR(cvtps2pd);
 OVR(cvtps2ph);
 OVR(cvtsd2ss);
 OVR(cvtss2sd);
+OVR(cvttpd2dqx);
+OVR(cvttpd2dqy);
+OVR(cvttps2dq);
 OVR(movddup);
 OVR(movntdq);
 OVR(movntdqa);
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c 
b/xen/arch/x86/x86_emulate/x86_emulate.c
index 2cad8c3741..23a2ab67d9 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -311,7 +311,7 @@ static const struct twobyte_table {
     [0x54 ... 0x57] = { DstImplicit|SrcMem|ModRM, simd_packed_fp, d8s_vl },
     [0x58 ... 0x59] = { DstImplicit|SrcMem|ModRM, simd_any_fp, d8s_vl },
     [0x5a] = { DstImplicit|SrcMem|ModRM|Mov, simd_any_fp, d8s_vl },
-    [0x5b] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
+    [0x5b] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_fp, d8s_vl },
     [0x5c ... 0x5f] = { DstImplicit|SrcMem|ModRM, simd_any_fp, d8s_vl },
     [0x60 ... 0x62] = { DstImplicit|SrcMem|ModRM, simd_other, d8s_vl },
     [0x63 ... 0x67] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
@@ -375,7 +375,7 @@ static const struct twobyte_table {
     [0xe0] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
     [0xe1 ... 0xe2] = { DstImplicit|SrcMem|ModRM, simd_128, 4 },
     [0xe3 ... 0xe5] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
-    [0xe6] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
+    [0xe6] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_fp, d8s_vl },
     [0xe7] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_int, d8s_vl },
     [0xe8 ... 0xef] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
     [0xf0] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
@@ -3049,6 +3049,11 @@ x86_decode(
                 if ( disp8scale == 2 && evex.pfx == vex_f3 )
                     disp8scale = 3;
                 break;
+
+            case 0xe6: /* vcvtdq2pd needs special casing */
+                if ( disp8scale && evex.pfx == vex_f3 && !evex.w && !evex.brs )
+                    --disp8scale;
+                break;
             }
             break;
 
@@ -6561,6 +6566,22 @@ x86_emulate(
         op_bytes = 16 << vex.l;
         goto simd_0f_cvt;
 
+    case X86EMUL_OPC_EVEX_66(0x0f, 0x5b): /* vcvtps2dq [xyz]mm/mem,[xyz]mm{k} 
*/
+    case X86EMUL_OPC_EVEX_F3(0x0f, 0x5b): /* vcvttps2dq [xyz]mm/mem,[xyz]mm{k} 
*/
+        generate_exception_if(evex.w, EXC_UD);
+        /* fall through */
+    case X86EMUL_OPC_EVEX(0x0f, 0x5b):    /* vcvtdq2ps [xyz]mm/mem,[xyz]mm{k} 
*/
+                                          /* vcvtqq2ps [xyz]mm/mem,{x,y}mm{k} 
*/
+        if ( evex.w )
+            host_and_vcpu_must_have(avx512dq);
+        else
+            host_and_vcpu_must_have(avx512f);
+        if ( ea.type != OP_REG || !evex.brs )
+            avx512_vlen_check(false);
+        d |= TwoOp;
+        op_bytes = 16 << evex.lr;
+        goto simd_zmm;
+
     CASE_SIMD_PACKED_INT(0x0f, 0x60):    /* punpcklbw {,x}mm/mem,{,x}mm */
     case X86EMUL_OPC_VEX_66(0x0f, 0x60): /* vpunpcklbw 
{x,y}mm/mem,{x,y}mm,{x,y}mm */
     CASE_SIMD_PACKED_INT(0x0f, 0x61):    /* punpcklwd {,x}mm/mem,{,x}mm */
@@ -7227,6 +7248,27 @@ x86_emulate(
         op_bytes = 8;
         goto simd_0f_xmm;
 
+    case X86EMUL_OPC_EVEX_66(0x0f, 0xe6):   /* vcvttpd2dq 
[xyz]mm/mem,{x,y}mm{k} */
+    case X86EMUL_OPC_EVEX_F2(0x0f, 0xe6):   /* vcvtpd2dq 
[xyz]mm/mem,{x,y}mm{k} */
+        generate_exception_if(!evex.w, EXC_UD);
+        /* fall through */
+    case X86EMUL_OPC_EVEX_F3(0x0f, 0xe6):   /* vcvtdq2pd 
{x,y}mm/mem,[xyz]mm{k} */
+                                            /* vcvtqq2pd 
[xyz]mm/mem,[xyz]mm{k} */
+        if ( evex.pfx != vex_f3 )
+            host_and_vcpu_must_have(avx512f);
+        else if ( evex.w )
+            host_and_vcpu_must_have(avx512dq);
+        else
+        {
+            host_and_vcpu_must_have(avx512f);
+            generate_exception_if(ea.type != OP_MEM && evex.brs, EXC_UD);
+        }
+        if ( ea.type != OP_REG || !evex.brs )
+            avx512_vlen_check(false);
+        d |= TwoOp;
+        op_bytes = 8 << (evex.w + evex.lr);
+        goto simd_zmm;
+
     case X86EMUL_OPC_F2(0x0f, 0xf0):     /* lddqu m128,xmm */
     case X86EMUL_OPC_VEX_F2(0x0f, 0xf0): /* vlddqu mem,{x,y}mm */
         generate_exception_if(ea.type != OP_MEM, EXC_UD);
--
generated by git-patchbot for /home/xen/git/xen.git#staging

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/xen-changelog

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.