[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH] x86emul/test: encourage compiler to use more embedded broadcast
For one it was an oversight to leave dup_{hi,lo}() undefined for 512-bit vector size. And then in FMA testing we can also arrange for the compiler to (hopefully) recognize broadcasting potential. Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> --- a/tools/tests/x86_emulator/simd.c +++ b/tools/tests/x86_emulator/simd.c @@ -912,6 +912,13 @@ static inline vec_t movlhps(vec_t x, vec }) # endif # endif +#elif VEC_SIZE == 64 +# if FLOAT_SIZE == 4 +# define dup_hi(x) B(movshdup, _mask, x, undef(), ~0) +# define dup_lo(x) B(movsldup, _mask, x, undef(), ~0) +# elif FLOAT_SIZE == 8 +# define dup_lo(x) B(movddup, _mask, x, undef(), ~0) +# endif #endif #if VEC_SIZE == 16 && defined(__SSSE3__) && !defined(__AVX512VL__) # if INT_SIZE == 1 --- a/tools/tests/x86_emulator/simd-fma.c +++ b/tools/tests/x86_emulator/simd-fma.c @@ -63,6 +63,9 @@ int fma_test(void) { unsigned int i; vec_t x, y, z, src, inv, one; +#ifdef __AVX512F__ + typeof(one[0]) one_ = 1; +#endif for ( i = 0; i < ELEM_COUNT; ++i ) { @@ -71,6 +74,10 @@ int fma_test(void) one[i] = 1; } +#ifdef __AVX512F__ +# define one one_ +#endif + x = (src + one) * inv; y = (src - one) * inv; touch(src); @@ -93,22 +100,28 @@ int fma_test(void) x = src + inv; y = src - inv; touch(inv); + touch(one); z = src * one + inv; if ( !eq(x, z) ) return __LINE__; touch(inv); + touch(one); z = -src * one - inv; if ( !eq(-x, z) ) return __LINE__; touch(inv); + touch(one); z = src * one - inv; if ( !eq(y, z) ) return __LINE__; touch(inv); + touch(one); z = -src * one + inv; if ( !eq(-y, z) ) return __LINE__; touch(inv); +#undef one + #if defined(addsub) && defined(fmaddsub) x = addsub(src * inv, one); y = addsub(src * inv, -one);
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |