[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen staging] x86emul: support AVX512DQ packed quad-int/FP conversion insns



commit d45ff77f7e5ee65a351a152b633a3346bc9ccbb6
Author:     Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Fri May 24 10:23:31 2019 +0200
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Fri May 24 10:23:31 2019 +0200

    x86emul: support AVX512DQ packed quad-int/FP conversion insns
    
    VCVT{,T}PS2QQ, sharing their main opcodes with others, once again need
    "manual" overrides of disp8scale.
    
    While not directly related here, also add a scalar variant of to_wint()
    to the test harness.
    
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
    Acked-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
 tools/tests/x86_emulator/evex-disp8.c  |  4 +++
 tools/tests/x86_emulator/simd.c        | 52 ++++++++++++++++++++++++++++++++--
 xen/arch/x86/x86_emulate/x86_emulate.c | 14 +++++++++
 3 files changed, 67 insertions(+), 3 deletions(-)

diff --git a/tools/tests/x86_emulator/evex-disp8.c 
b/tools/tests/x86_emulator/evex-disp8.c
index 847266ced3..0c9989ebfa 100644
--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -410,8 +410,12 @@ static const struct test avx512dq_all[] = {
     INSN_PFP(and,              0f, 54),
     INSN_PFP(andn,             0f, 55),
     INSN(broadcasti32x2, 66, 0f38, 59, el_2,  d, vl),
+    INSN(cvtpd2qq,       66,   0f, 7b,   vl,  q, vl),
+    INSN(cvtps2qq,       66,   0f, 7b, vl_2,  d, vl),
     INSN(cvtqq2pd,       f3,   0f, e6,   vl,  q, vl),
     INSN(cvtqq2ps,         ,   0f, 5b,   vl,  q, vl),
+    INSN(cvttpd2qq,      66,   0f, 7a,   vl,  q, vl),
+    INSN(cvttps2qq,      66,   0f, 7a, vl_2,  d, vl),
     INSN_PFP(or,               0f, 56),
 //       pmovd2m,        f3, 0f38, 39,        d
 //       pmovm2,         f3, 0f38, 38,       dq
diff --git a/tools/tests/x86_emulator/simd.c b/tools/tests/x86_emulator/simd.c
index 91e36bd8cc..c4d9e8ceb7 100644
--- a/tools/tests/x86_emulator/simd.c
+++ b/tools/tests/x86_emulator/simd.c
@@ -90,14 +90,35 @@ static inline bool _to_bool(byte_vec_t bv)
 
 #if VEC_SIZE == FLOAT_SIZE
 # define to_int(x) ({ int i_ = (x)[0]; touch(i_); ((vec_t){ i_ }); })
+# ifdef __x86_64__
+#  define to_wint(x) ({ long l_ = (x)[0]; touch(l_); ((vec_t){ l_ }); })
+# endif
 #elif VEC_SIZE == 8 && FLOAT_SIZE == 4 && defined(__3dNOW__)
 # define to_int(x) __builtin_ia32_pi2fd(__builtin_ia32_pf2id(x))
 #elif defined(FLOAT_SIZE) && VEC_SIZE > FLOAT_SIZE && defined(__AVX512F__) && \
       (VEC_SIZE == 64 || defined(__AVX512VL__))
 # if FLOAT_SIZE == 4
 #  define to_int(x) BR(cvtdq2ps, _mask, BR(cvtps2dq, _mask, x, (vsi_t)undef(), 
~0), undef(), ~0)
+#  ifdef __AVX512DQ__
+#   define to_wint(x) ({ \
+    vsf_half_t t_ = low_half(x); \
+    vdi_t lo_, hi_; \
+    touch(t_); \
+    lo_ = BR(cvtps2qq, _mask, t_, (vdi_t)undef(), ~0); \
+    t_ = high_half(x); \
+    touch(t_); \
+    hi_ = BR(cvtps2qq, _mask, t_, (vdi_t)undef(), ~0); \
+    touch(lo_); touch(hi_); \
+    insert_half(insert_half(undef(), \
+                            BR(cvtqq2ps, _mask, lo_, (vsf_half_t){}, ~0), 0), \
+                BR(cvtqq2ps, _mask, hi_, (vsf_half_t){}, ~0), 1); \
+})
+#  endif
 # elif FLOAT_SIZE == 8
 #  define to_int(x) B(cvtdq2pd, _mask, BR(cvtpd2dq, _mask, x, (vsi_half_t){}, 
~0), undef(), ~0)
+#  ifdef __AVX512DQ__
+#   define to_wint(x) BR(cvtqq2pd, _mask, BR(cvtpd2qq, _mask, x, 
(vdi_t)undef(), ~0), undef(), ~0)
+#  endif
 # endif
 #elif VEC_SIZE == 16 && defined(__SSE2__)
 # if FLOAT_SIZE == 4
@@ -121,6 +142,21 @@ static inline bool _to_bool(byte_vec_t bv)
 })
 #endif
 
+#if VEC_SIZE == 16 && FLOAT_SIZE == 4 && defined(__SSE__)
+# define low_half(x) (x)
+# define high_half(x) B_(movhlps, , undef(), x)
+/*
+ * GCC 7 (and perhaps earlier) report a bogus type mismatch for the conditional
+ * expression below. All works well with this no-op wrapper.
+ */
+static inline vec_t movlhps(vec_t x, vec_t y) {
+    return __builtin_ia32_movlhps(x, y);
+}
+# define insert_pair(x, y, p) \
+    ((p) ? movlhps(x, y) \
+         : ({ vec_t t_ = (x); t_[0] = (y)[0]; t_[1] = (y)[1]; t_; }))
+#endif
+
 #if VEC_SIZE == 8 && FLOAT_SIZE == 4 && defined(__3dNOW_A__)
 # define max __builtin_ia32_pfmax
 # define min __builtin_ia32_pfmin
@@ -149,13 +185,16 @@ static inline bool _to_bool(byte_vec_t bv)
 # if ELEM_COUNT == 8 /* vextractf{32,64}x4 */ || \
      (ELEM_COUNT == 16 && ELEM_SIZE == 4 && defined(__AVX512DQ__)) /* 
vextractf32x8 */ || \
      (ELEM_COUNT == 4 && ELEM_SIZE == 8 && defined(__AVX512DQ__)) /* 
vextractf64x2 */
-#  define low_half(x) ({ \
+#  define _half(x, lh) ({ \
     half_t t_; \
-    asm ( "vextractf%c[w]x%c[n] $0, %[s], %[d]" \
+    asm ( "vextractf%c[w]x%c[n] %[sel], %[s], %[d]" \
           : [d] "=m" (t_) \
-          : [s] "v" (x), [w] "i" (ELEM_SIZE * 8), [n] "i" (ELEM_COUNT / 2) ); \
+          : [s] "v" (x), [sel] "i" (lh), \
+            [w] "i" (ELEM_SIZE * 8), [n] "i" (ELEM_COUNT / 2) ); \
     t_; \
 })
+#  define low_half(x)  _half(x, 0)
+#  define high_half(x) _half(x, 1)
 # endif
 # if (ELEM_COUNT == 16 && ELEM_SIZE == 4) /* vextractf32x4 */ || \
      (ELEM_COUNT == 8 && ELEM_SIZE == 8 && defined(__AVX512DQ__)) /* 
vextractf64x2 */
@@ -1176,6 +1215,13 @@ int simd_test(void)
 
 # endif
 
+# ifdef to_wint
+    touch(src);
+    x = to_wint(src);
+    touch(src);
+    if ( !eq(x, src) ) return __LINE__;
+# endif
+
 # ifdef sqrt
     x = src * src;
     touch(x);
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c 
b/xen/arch/x86/x86_emulate/x86_emulate.c
index c1c9ae5be3..a48ab2411e 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -325,6 +325,8 @@ static const struct twobyte_table {
     [0x77] = { DstImplicit|SrcNone },
     [0x78] = { ImplicitOps|ModRM },
     [0x79] = { DstReg|SrcMem|ModRM, simd_packed_int },
+    [0x7a] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_fp, d8s_vl },
+    [0x7b] = { DstImplicit|SrcMem|ModRM|Mov, simd_other, d8s_vl },
     [0x7c ... 0x7d] = { DstImplicit|SrcMem|ModRM, simd_other },
     [0x7e] = { DstMem|SrcImplicit|ModRM|Mov, simd_none, d8s_dq64 },
     [0x7f] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_int, d8s_vl },
@@ -3051,6 +3053,12 @@ x86_decode(
                     --disp8scale;
                 break;
 
+            case 0x7a: /* vcvttps2qq needs special casing */
+            case 0x7b: /* vcvtps2qq needs special casing */
+                if ( disp8scale && evex.pfx == vex_66 && !evex.w && !evex.brs )
+                    --disp8scale;
+                break;
+
             case 0x7e: /* vmovq xmm/m64,xmm needs special casing */
                 if ( disp8scale == 2 && evex.pfx == vex_f3 )
                     disp8scale = 3;
@@ -7331,7 +7339,13 @@ x86_emulate(
         if ( evex.pfx != vex_f3 )
             host_and_vcpu_must_have(avx512f);
         else if ( evex.w )
+        {
+    case X86EMUL_OPC_EVEX_66(0x0f, 0x7a):   /* vcvttps2qq 
{x,y}mm/mem,[xyz]mm{k} */
+                                            /* vcvttpd2qq 
[xyz]mm/mem,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0x7b):   /* vcvtps2qq 
{x,y}mm/mem,[xyz]mm{k} */
+                                            /* vcvtpd2qq 
[xyz]mm/mem,[xyz]mm{k} */
             host_and_vcpu_must_have(avx512dq);
+        }
         else
         {
             host_and_vcpu_must_have(avx512f);
--
generated by git-patchbot for /home/xen/git/xen.git#staging

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/xen-changelog

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.