[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[xen staging] x86emul/test: encourage compiler to use more embedded broadcast



commit b8bc4588b32e8a40354defac29ceb9c90e570af8
Author:     Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Fri Jun 10 10:24:21 2022 +0200
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Fri Jun 10 10:24:21 2022 +0200

    x86emul/test: encourage compiler to use more embedded broadcast
    
    For one it was an oversight to leave dup_{hi,lo}() undefined for 512-bit
    vector size. And then in FMA testing we can also arrange for the
    compiler to (hopefully) recognize broadcasting potential. Plus we can
    replace the broadcast(1) use in the addsub() surrogate with inline
    assembly explicitly using embedded broadcast (even gcc12 still doesn't
    support broadcast for any of the addsub/subadd builtins).
    
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
    Acked-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
 tools/tests/x86_emulator/simd-fma.c | 26 +++++++++++++++++++++++++-
 tools/tests/x86_emulator/simd.c     |  7 +++++++
 tools/tests/x86_emulator/simd.h     |  2 ++
 3 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/tools/tests/x86_emulator/simd-fma.c 
b/tools/tests/x86_emulator/simd-fma.c
index 11a9dd6a27..d2ccefac9b 100644
--- a/tools/tests/x86_emulator/simd-fma.c
+++ b/tools/tests/x86_emulator/simd-fma.c
@@ -56,13 +56,27 @@ ENTRY(fma_test);
 #endif
 
 #if defined(fmaddsub) && !defined(addsub)
-# define addsub(x, y) fmaddsub(x, broadcast(1), y)
+# ifdef __AVX512F__
+#  define addsub(x, y) ({ \
+    vec_t t_; \
+    typeof(t_[0]) one_ = 1; \
+    asm ( "vfmaddsub231p" ELEM_SFX " %2%{1to%c4%}, %1, %0" \
+          : "=v" (t_) \
+          : "v" (x), "m" (one_), "0" (y), "i" (ELEM_COUNT) ); \
+    t_; \
+})
+# else
+#  define addsub(x, y) fmaddsub(x, broadcast(1), y)
+# endif
 #endif
 
 int fma_test(void)
 {
     unsigned int i;
     vec_t x, y, z, src, inv, one;
+#ifdef __AVX512F__
+    typeof(one[0]) one_ = 1;
+#endif
 
     for ( i = 0; i < ELEM_COUNT; ++i )
     {
@@ -71,6 +85,10 @@ int fma_test(void)
         one[i] = 1;
     }
 
+#ifdef __AVX512F__
+# define one one_
+#endif
+
     x = (src + one) * inv;
     y = (src - one) * inv;
     touch(src);
@@ -93,22 +111,28 @@ int fma_test(void)
     x = src + inv;
     y = src - inv;
     touch(inv);
+    touch(one);
     z = src * one + inv;
     if ( !eq(x, z) ) return __LINE__;
 
     touch(inv);
+    touch(one);
     z = -src * one - inv;
     if ( !eq(-x, z) ) return __LINE__;
 
     touch(inv);
+    touch(one);
     z = src * one - inv;
     if ( !eq(y, z) ) return __LINE__;
 
     touch(inv);
+    touch(one);
     z = -src * one + inv;
     if ( !eq(-y, z) ) return __LINE__;
     touch(inv);
 
+#undef one
+
 #if defined(addsub) && defined(fmaddsub)
     x = addsub(src * inv, one);
     y = addsub(src * inv, -one);
diff --git a/tools/tests/x86_emulator/simd.c b/tools/tests/x86_emulator/simd.c
index 198f7b933e..c805f1cc1e 100644
--- a/tools/tests/x86_emulator/simd.c
+++ b/tools/tests/x86_emulator/simd.c
@@ -912,6 +912,13 @@ static inline vec_t movlhps(vec_t x, vec_t y) {
 })
 #  endif
 # endif
+#elif VEC_SIZE == 64
+# if FLOAT_SIZE == 4
+#  define dup_hi(x) B(movshdup, _mask, x, undef(), ~0)
+#  define dup_lo(x) B(movsldup, _mask, x, undef(), ~0)
+# elif FLOAT_SIZE == 8
+#  define dup_lo(x) B(movddup, _mask, x, undef(), ~0)
+# endif
 #endif
 #if VEC_SIZE == 16 && defined(__SSSE3__) && !defined(__AVX512VL__)
 # if INT_SIZE == 1
diff --git a/tools/tests/x86_emulator/simd.h b/tools/tests/x86_emulator/simd.h
index 685d78d84b..936952ab52 100644
--- a/tools/tests/x86_emulator/simd.h
+++ b/tools/tests/x86_emulator/simd.h
@@ -49,8 +49,10 @@ float
 # define ELEM_SIZE FLOAT_SIZE
 # if FLOAT_SIZE == 4
 #  define MODE SF
+#  define ELEM_SFX "s"
 # elif FLOAT_SIZE == 8
 #  define MODE DF
+#  define ELEM_SFX "d"
 # endif
 #endif
 #ifndef VEC_SIZE
--
generated by git-patchbot for /home/xen/git/xen.git#staging



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.