[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v5 14/17] x86emul: test coverage for SSE3/SSSE3/SSE4* insns



... and their AVX equivalents. Note that a few instructions aren't
covered (yet), but those all fall into common pattern groups, so I
would hope that for now we can do with what is there.

Just like for SSE/SSE2, MMX insns aren't being covered at all, as
they're not easy to deal with: The compiler refuses to emit such for
other than uses of built-in functions.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
v4: New.

--- a/tools/tests/x86_emulator/Makefile
+++ b/tools/tests/x86_emulator/Makefile
@@ -22,24 +22,31 @@ sse-flts := 4
 sse2-vecs := $(sse-vecs)
 sse2-ints := 1 2 4 8
 sse2-flts := 4 8
+sse4-vecs := $(sse2-vecs)
+sse4-ints := $(sse2-ints)
+sse4-flts := $(sse2-flts)
 
 # When converting SSE to AVX, have the compiler avoid XMM0 to widen
-# coverage of the VEX.vvvv checks in the emulator.
-sse2avx := -ffixed-xmm0 -Wa,-msse2avx
+# coverage of the VEX.vvvv checks in the emulator. We must not do this,
+# however, for SSE4.1 and later, as there are instructions with XMM0 as
+# an implicit operand.
+sse2avx-sse  := -ffixed-xmm0 -Wa,-msse2avx
+sse2avx-sse2 := $(sse2avx-sse)
+sse2avx-sse4 := -Wa,-msse2avx
 
-simd-cflags := $(foreach flavor,sse sse2, \
+simd-cflags := $(foreach flavor,sse sse2 sse4, \
                  $(foreach vec,$($(flavor)-vecs), \
                    $(foreach int,$($(flavor)-ints), \
                      "-D$(flavor)_$(vec)i$(int) -m$(flavor) -O2 
-DVEC_SIZE=$(vec) -DINT_SIZE=$(int)" \
                      "-D$(flavor)_$(vec)u$(int) -m$(flavor) -O2 
-DVEC_SIZE=$(vec) -DUINT_SIZE=$(int)" \
-                     "-D$(flavor)_avx_$(vec)i$(int) -m$(flavor) $(sse2avx) -O2 
-DVEC_SIZE=$(vec) -DINT_SIZE=$(int)" \
-                     "-D$(flavor)_avx_$(vec)u$(int) -m$(flavor) $(sse2avx) -O2 
-DVEC_SIZE=$(vec) -DUINT_SIZE=$(int)") \
+                     "-D$(flavor)_avx_$(vec)i$(int) -m$(flavor) 
$(sse2avx-$(flavor)) -O2 -DVEC_SIZE=$(vec) -DINT_SIZE=$(int)" \
+                     "-D$(flavor)_avx_$(vec)u$(int) -m$(flavor) 
$(sse2avx-$(flavor)) -O2 -DVEC_SIZE=$(vec) -DUINT_SIZE=$(int)") \
                    $(foreach flt,$($(flavor)-flts), \
                      "-D$(flavor)_$(vec)f$(flt) -m$(flavor) -O2 
-DVEC_SIZE=$(vec) -DFLOAT_SIZE=$(flt)" \
-                     "-D$(flavor)_avx_$(vec)f$(flt) -m$(flavor) $(sse2avx) -O2 
-DVEC_SIZE=$(vec) -DFLOAT_SIZE=$(flt)")) \
+                     "-D$(flavor)_avx_$(vec)f$(flt) -m$(flavor) 
$(sse2avx-$(flavor)) -O2 -DVEC_SIZE=$(vec) -DFLOAT_SIZE=$(flt)")) \
                  $(foreach flt,$($(flavor)-flts), \
                    "-D$(flavor)_f$(flt) -m$(flavor) -mfpmath=sse -O2 
-DFLOAT_SIZE=$(flt)" \
-                   "-D$(flavor)_avx_f$(flt) -m$(flavor) -mfpmath=sse 
$(sse2avx) -O2 -DFLOAT_SIZE=$(flt)"))
+                   "-D$(flavor)_avx_f$(flt) -m$(flavor) -mfpmath=sse 
$(sse2avx-$(flavor)) -O2 -DFLOAT_SIZE=$(flt)"))
 
 $(addsuffix .h,$(TESTCASES)): %.h: %.c testcase.mk Makefile
        rm -f $@.new $*.bin
--- a/tools/tests/x86_emulator/simd.c
+++ b/tools/tests/x86_emulator/simd.c
@@ -70,7 +70,9 @@ typedef long long __attribute__((vector_
 #if VEC_SIZE == 8 && defined(__SSE__)
 # define to_bool(cmp) (__builtin_ia32_pmovmskb(cmp) == 0xff)
 #elif VEC_SIZE == 16
-# if defined(__SSE__) && ELEM_SIZE == 4
+# if defined(__SSE4_1__)
+#  define to_bool(cmp) __builtin_ia32_ptestc128(cmp, (vdi_t){} == 0)
+# elif defined(__SSE__) && ELEM_SIZE == 4
 #  define to_bool(cmp) (__builtin_ia32_movmskps(cmp) == 0xf)
 # elif defined(__SSE2__)
 #  if ELEM_SIZE == 8
@@ -182,9 +184,122 @@ static inline bool _to_bool(byte_vec_t b
     __builtin_ia32_maskmovdqu((vqi_t)(y), ~m_, d_); \
 })
 #endif
+#if VEC_SIZE == 16 && defined(__SSE3__)
+# if FLOAT_SIZE == 4
+#  define addsub(x, y) __builtin_ia32_addsubps(x, y)
+#  define dup_hi(x) __builtin_ia32_movshdup(x)
+#  define dup_lo(x) __builtin_ia32_movsldup(x)
+#  define hadd(x, y) __builtin_ia32_haddps(x, y)
+#  define hsub(x, y) __builtin_ia32_hsubps(x, y)
+# elif FLOAT_SIZE == 8
+#  define addsub(x, y) __builtin_ia32_addsubpd(x, y)
+#  define dup_lo(x) ({ \
+    double __attribute__((vector_size(16))) r_; \
+    asm ( "movddup %1,%0" : "=x" (r_) : "m" ((x)[0]) ); \
+    r_; \
+})
+#  define hadd(x, y) __builtin_ia32_haddpd(x, y)
+#  define hsub(x, y) __builtin_ia32_hsubpd(x, y)
+# endif
+#endif
+#if VEC_SIZE == 16 && defined(__SSSE3__)
+# if INT_SIZE == 1
+#  define abs(x) ((vec_t)__builtin_ia32_pabsb128((vqi_t)(x)))
+# elif INT_SIZE == 2
+#  define abs(x) __builtin_ia32_pabsw128(x)
+# elif INT_SIZE == 4
+#  define abs(x) __builtin_ia32_pabsd128(x)
+# endif
+# if INT_SIZE == 1 || UINT_SIZE == 1
+#  define copysignz(x, y) ((vec_t)__builtin_ia32_psignb128((vqi_t)(x), 
(vqi_t)(y)))
+#  define swap(x) ((vec_t)__builtin_ia32_pshufb128((vqi_t)(x), (vqi_t)(inv - 
1)))
+#  define rotr(x, n) ((vec_t)__builtin_ia32_palignr128((vdi_t)(x), (vdi_t)(x), 
(n) * 8))
+# elif INT_SIZE == 2 || UINT_SIZE == 2
+#  define copysignz(x, y) ((vec_t)__builtin_ia32_psignw128((vhi_t)(x), 
(vhi_t)(y)))
+#  define hadd(x, y) ((vec_t)__builtin_ia32_phaddw128((vhi_t)(x), (vhi_t)(y)))
+#  define hsub(x, y) ((vec_t)__builtin_ia32_phsubw128((vhi_t)(x), (vhi_t)(y)))
+#  define rotr(x, n) ((vec_t)__builtin_ia32_palignr128((vdi_t)(x), (vdi_t)(x), 
(n) * 16))
+# elif INT_SIZE == 4 || UINT_SIZE == 4
+#  define copysignz(x, y) ((vec_t)__builtin_ia32_psignd128((vsi_t)(x), 
(vsi_t)(y)))
+#  define hadd(x, y) ((vec_t)__builtin_ia32_phaddd128((vsi_t)(x), (vsi_t)(y)))
+#  define hsub(x, y) ((vec_t)__builtin_ia32_phsubd128((vsi_t)(x), (vsi_t)(y)))
+#  define rotr(x, n) ((vec_t)__builtin_ia32_palignr128((vdi_t)(x), (vdi_t)(x), 
(n) * 32))
+# elif INT_SIZE == 8 || UINT_SIZE == 8
+#  define rotr(x, n) ((vec_t)__builtin_ia32_palignr128((vdi_t)(x), (vdi_t)(x), 
(n) * 64))
+# endif
+#endif
+#if VEC_SIZE == 16 && defined(__SSE4_1__)
+# if INT_SIZE == 1
+#  define max(x, y) ((vec_t)__builtin_ia32_pmaxsb128((vqi_t)(x), (vqi_t)(y)))
+#  define min(x, y) ((vec_t)__builtin_ia32_pminsb128((vqi_t)(x), (vqi_t)(y)))
+#  define widen1(x) ((vec_t)__builtin_ia32_pmovsxbw128((vqi_t)(x)))
+#  define widen2(x) ((vec_t)__builtin_ia32_pmovsxbd128((vqi_t)(x)))
+#  define widen3(x) ((vec_t)__builtin_ia32_pmovsxbq128((vqi_t)(x)))
+# elif INT_SIZE == 2
+#  define widen1(x) ((vec_t)__builtin_ia32_pmovsxwd128(x))
+#  define widen2(x) ((vec_t)__builtin_ia32_pmovsxwq128(x))
+# elif INT_SIZE == 4
+#  define max(x, y) __builtin_ia32_pmaxsd128(x, y)
+#  define min(x, y) __builtin_ia32_pminsd128(x, y)
+#  define mul_full(x, y) ((vec_t)__builtin_ia32_pmuldq128(x, y))
+#  define widen1(x) ((vec_t)__builtin_ia32_pmovsxdq128(x))
+# elif UINT_SIZE == 1
+#  define widen1(x) ((vec_t)__builtin_ia32_pmovzxbw128((vqi_t)(x)))
+#  define widen2(x) ((vec_t)__builtin_ia32_pmovzxbd128((vqi_t)(x)))
+#  define widen3(x) ((vec_t)__builtin_ia32_pmovzxbq128((vqi_t)(x)))
+# elif UINT_SIZE == 2
+#  define max(x, y) ((vec_t)__builtin_ia32_pmaxuw128((vhi_t)(x), (vhi_t)(y)))
+#  define min(x, y) ((vec_t)__builtin_ia32_pminuw128((vhi_t)(x), (vhi_t)(y)))
+#  define widen1(x) ((vec_t)__builtin_ia32_pmovzxwd128((vhi_t)(x)))
+#  define widen2(x) ((vec_t)__builtin_ia32_pmovzxwq128((vhi_t)(x)))
+# elif UINT_SIZE == 4
+#  define max(x, y) ((vec_t)__builtin_ia32_pmaxud128((vsi_t)(x), (vsi_t)(y)))
+#  define min(x, y) ((vec_t)__builtin_ia32_pminud128((vsi_t)(x), (vsi_t)(y)))
+#  define widen1(x) ((vec_t)__builtin_ia32_pmovzxdq128((vsi_t)(x)))
+# endif
+# undef select
+# if defined(INT_SIZE) || defined(UINT_SIZE)
+#  define select(d, x, y, m) \
+    (*(d) = (vec_t)__builtin_ia32_pblendvb128((vqi_t)(y), (vqi_t)(x), 
(vqi_t)(m)))
+# elif FLOAT_SIZE == 4
+#  define dot_product(x, y) __builtin_ia32_dpps(x, y, 0b11110001)
+#  define select(d, x, y, m) (*(d) = __builtin_ia32_blendvps(y, x, m))
+#  define trunc(x) __builtin_ia32_roundps(x, 0b1011)
+# elif FLOAT_SIZE == 8
+#  define dot_product(x, y) __builtin_ia32_dppd(x, y, 0b00110001)
+#  define select(d, x, y, m) (*(d) = __builtin_ia32_blendvpd(y, x, m))
+#  define trunc(x) __builtin_ia32_roundpd(x, 0b1011)
+# endif
+# if INT_SIZE == 2 || UINT_SIZE == 2
+#  define mix(x, y) ((vec_t)__builtin_ia32_pblendw128((vhi_t)(x), (vhi_t)(y), 
0b10101010))
+# elif INT_SIZE == 4 || UINT_SIZE == 4
+#  define mix(x, y) ((vec_t)__builtin_ia32_pblendw128((vhi_t)(x), (vhi_t)(y), 
0b11001100))
+# elif INT_SIZE == 8 || UINT_SIZE == 8
+#  define mix(x, y) ((vec_t)__builtin_ia32_pblendw128((vhi_t)(x), (vhi_t)(y), 
0b11110000))
+# elif FLOAT_SIZE == 4
+#  define mix(x, y) __builtin_ia32_blendps(x, y, 0b1010)
+# elif FLOAT_SIZE == 8
+#  define mix(x, y) __builtin_ia32_blendpd(x, y, 0b10)
+# endif
+#endif
 #if VEC_SIZE == FLOAT_SIZE
 # define max(x, y) ((vec_t){({ typeof(x[0]) x_ = (x)[0], y_ = (y)[0]; x_ > y_ 
? x_ : y_; })})
 # define min(x, y) ((vec_t){({ typeof(x[0]) x_ = (x)[0], y_ = (y)[0]; x_ < y_ 
? x_ : y_; })})
+# ifdef __SSE4_1__
+#  if FLOAT_SIZE == 4
+#   define trunc(x) ({ \
+    float __attribute__((vector_size(16))) r_; \
+    asm ( "roundss $0b1011,%1,%0" : "=x" (r_) : "m" (x) ); \
+    (vec_t){ r_[0] }; \
+})
+#  elif FLOAT_SIZE == 8
+#   define trunc(x) ({ \
+    double __attribute__((vector_size(16))) r_; \
+    asm ( "roundsd $0b1011,%1,%0" : "=x" (r_) : "m" (x) ); \
+    (vec_t){ r_[0] }; \
+})
+#  endif
+# endif
 #endif
 
 /*
@@ -290,6 +405,14 @@ int simd_test(void)
     if ( !to_bool(sqrt(x) == src) ) return __LINE__;
 # endif
 
+# ifdef trunc
+    x = 1 / src;
+    y = (vec_t){ 1 };
+    touch(x);
+    z = trunc(x);
+    if ( !to_bool(y == z) ) return __LINE__;
+# endif
+
 #else
 
 # if ELEM_SIZE > 1
@@ -416,6 +539,17 @@ int simd_test(void)
 # endif
 #endif
 
+#ifdef abs
+    x = src * alt;
+    touch(x);
+    if ( !to_bool(abs(x) == src) ) return __LINE__;
+#endif
+
+#ifdef copysignz
+    touch(alt);
+    if ( !to_bool(copysignz((vec_t){} + 1, alt) == alt) ) return __LINE__;
+#endif
+
 #ifdef swap
     touch(src);
     if ( !to_bool(swap(src) == inv) ) return __LINE__;
@@ -435,16 +569,140 @@ int simd_test(void)
     if ( !to_bool(z == ELEM_COUNT / 2) ) return __LINE__;
 #endif
 
+#if defined(INT_SIZE) && defined(widen1) && defined(interleave_lo)
+
+    x = src * alt;
+    y = interleave_lo(x, alt < 0);
+    touch(x);
+    z = widen1(x);
+    touch(x);
+    if ( !to_bool(z == y) ) return __LINE__;
+
+# ifdef widen2
+    y = interleave_lo(alt < 0, alt < 0);
+    y = interleave_lo(z, y);
+    touch(x);
+    z = widen2(x);
+    touch(x);
+    if ( !to_bool(z == y) ) return __LINE__;
+
+#  ifdef widen3
+    y = interleave_lo(alt < 0, alt < 0);
+    y = interleave_lo(y, y);
+    y = interleave_lo(z, y);
+    touch(x);
+    z = widen3(x);
+    touch(x);
+    if ( !to_bool(z == y) ) return __LINE__;
+#  endif
+# endif
+
+#endif
+
+#if defined(UINT_SIZE) && defined(interleave_lo)
+
+    y = interleave_lo(src, (vec_t){});
+    z = interleave_lo(y, (vec_t){});
+
+# ifdef widen1
+    touch(src);
+    x = widen1(src);
+    touch(src);
+    if ( !to_bool(x == y) ) return __LINE__;
+# endif
+
+# ifdef widen2
+    touch(src);
+    x = widen2(src);
+    touch(src);
+    if ( !to_bool(x == z) ) return __LINE__;
+# endif
+
+# ifdef widen3
+    touch(src);
+    x = widen3(src);
+    touch(src);
+    if ( !to_bool(x == interleave_lo(z, (vec_t){})) ) return __LINE__;
+# endif
+
+#endif
+
+#ifdef dup_lo
+    touch(src);
+    x = dup_lo(src);
+    touch(src);
+    if ( !to_bool(x - src == (alt - 1) / 2) ) return __LINE__;
+#endif
+
+#ifdef dup_hi
+    touch(src);
+    x = dup_hi(src);
+    touch(src);
+    if ( !to_bool(x - src == (alt + 1) / 2) ) return __LINE__;
+#endif
+
+    for ( i = 0; i < ELEM_COUNT; ++i )
+        y[i] = (i & 1 ? inv : src)[i];
+
 #ifdef select
 # ifdef UINT_SIZE
     select(&z, src, inv, alt);
 # else
     select(&z, src, inv, alt > 0);
 # endif
-    for ( i = 0; i < ELEM_COUNT; ++i )
-        y[i] = (i & 1 ? inv : src)[i];
     if ( !to_bool(z == y) ) return __LINE__;
 #endif
 
+#ifdef mix
+    touch(src);
+    touch(inv);
+    x = mix(src, inv);
+    if ( !to_bool(x == y) ) return __LINE__;
+
+# ifdef addsub
+    touch(src);
+    touch(inv);
+    x = addsub(src, inv);
+    touch(src);
+    touch(inv);
+    y = mix(src - inv, src + inv);
+    if ( !to_bool(x == y) ) return __LINE__;
+# endif
+#endif
+
+#ifdef rotr
+    x = rotr(src, 1);
+    y = (src & (ELEM_COUNT - 1)) + 1;
+    if ( !to_bool(x == y) ) return __LINE__;
+#endif
+
+#ifdef dot_product
+    touch(src);
+    touch(inv);
+    x = dot_product(src, inv);
+    if ( !to_bool(x == (vec_t){ (ELEM_COUNT * (ELEM_COUNT + 1) *
+                                 (ELEM_COUNT + 2)) / 6 }) ) return __LINE__;
+#endif
+
+#ifdef hadd
+    x = src;
+    for ( i = ELEM_COUNT; i >>= 1; )
+    {
+        touch(x);
+        x = hadd((vec_t){}, x);
+    }
+    if ( x[ELEM_COUNT - 1] != (ELEM_COUNT * (ELEM_COUNT + 1)) / 2 ) return 
__LINE__;
+
+# ifdef hsub
+    touch(src);
+    touch(inv);
+    x = hsub(src, inv);
+    for ( i = ELEM_COUNT; i >>= 1; )
+        x = hadd(x, (vec_t){});
+    if ( !to_bool(x == 0) ) return __LINE__;
+# endif
+#endif
+
+
     return 0;
 }
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -30,12 +30,18 @@ static bool simd_check_sse2(void)
     return cpu_has_sse2;
 }
 
+static bool simd_check_sse4(void)
+{
+    return cpu_has_sse4_2;
+}
+
 static bool simd_check_avx(void)
 {
     return cpu_has_avx;
 }
 #define simd_check_sse_avx   simd_check_avx
 #define simd_check_sse2_avx  simd_check_avx
+#define simd_check_sse4_avx  simd_check_avx
 
 static void simd_set_regs(struct cpu_user_regs *regs)
 {
@@ -99,6 +105,18 @@ static const struct {
     SIMD(SSE2 packed u32,        sse2,      16u4),
     SIMD(SSE2 packed s64,        sse2,      16i8),
     SIMD(SSE2 packed u64,        sse2,      16u8),
+    SIMD(SSE4 scalar single,     sse4,        f4),
+    SIMD(SSE4 packed single,     sse4,      16f4),
+    SIMD(SSE4 scalar double,     sse4,        f8),
+    SIMD(SSE4 packed double,     sse4,      16f8),
+    SIMD(SSE4 packed s8,         sse4,      16i1),
+    SIMD(SSE4 packed u8,         sse4,      16u1),
+    SIMD(SSE4 packed s16,        sse4,      16i2),
+    SIMD(SSE4 packed u16,        sse4,      16u2),
+    SIMD(SSE4 packed s32,        sse4,      16i4),
+    SIMD(SSE4 packed u32,        sse4,      16u4),
+    SIMD(SSE4 packed s64,        sse4,      16i8),
+    SIMD(SSE4 packed u64,        sse4,      16u8),
     SIMD(SSE/AVX scalar single,  sse_avx,     f4),
     SIMD(SSE/AVX packed single,  sse_avx,   16f4),
     SIMD(SSE2/AVX scalar single, sse2_avx,    f4),
@@ -113,6 +131,18 @@ static const struct {
     SIMD(SSE2/AVX packed u32,    sse2_avx,  16u4),
     SIMD(SSE2/AVX packed s64,    sse2_avx,  16i8),
     SIMD(SSE2/AVX packed u64,    sse2_avx,  16u8),
+    SIMD(SSE4/AVX scalar single, sse4_avx,    f4),
+    SIMD(SSE4/AVX packed single, sse4_avx,  16f4),
+    SIMD(SSE4/AVX scalar double, sse4_avx,    f8),
+    SIMD(SSE4/AVX packed double, sse4_avx,  16f8),
+    SIMD(SSE4/AVX packed s8,     sse4_avx,  16i1),
+    SIMD(SSE4/AVX packed u8,     sse4_avx,  16u1),
+    SIMD(SSE4/AVX packed s16,    sse4_avx,  16i2),
+    SIMD(SSE4/AVX packed u16,    sse4_avx,  16u2),
+    SIMD(SSE4/AVX packed s32,    sse4_avx,  16i4),
+    SIMD(SSE4/AVX packed u32,    sse4_avx,  16u4),
+    SIMD(SSE4/AVX packed s64,    sse4_avx,  16i8),
+    SIMD(SSE4/AVX packed u64,    sse4_avx,  16u8),
 #undef SIMD_
 #undef SIMD
 };
@@ -2682,6 +2712,99 @@ int main(int argc, char **argv)
             goto fail;
         printf("okay\n");
     }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing extrq $4,$56,%xmm2...");
+    if ( stack_exec && cpu_has_sse4a )
+    {
+        decl_insn(extrq_imm);
+
+        res[0] = 0x44332211;
+        res[1] = 0x88776655;
+        asm volatile ( "movq %0, %%xmm2\n"
+                       put_insn(extrq_imm, "extrq $4, $56, %%xmm2")
+                       :: "m" (res[0]) : "memory" );
+
+        set_insn(extrq_imm);
+        rc = x86_emulate(&ctxt, &emulops);
+        asm ( "movq %%xmm2, %0" : "=m" (res[4]) :: "memory" );
+        if ( rc != X86EMUL_OKAY || !check_eip(extrq_imm) ||
+             res[4] != 0x54433221 || res[5] != 0x877665 )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing extrq %xmm3,%xmm2...");
+    if ( stack_exec && cpu_has_sse4a )
+    {
+        decl_insn(extrq_reg);
+
+        res[4] = 56 + (4 << 8);
+        res[5] = 0;
+        asm volatile ( "movq %0, %%xmm2\n"
+                       "movq %1, %%xmm3\n"
+                       put_insn(extrq_reg, "extrq %%xmm3, %%xmm2")
+                       :: "m" (res[0]), "m" (res[4]) : "memory" );
+
+        set_insn(extrq_reg);
+        rc = x86_emulate(&ctxt, &emulops);
+        asm ( "movq %%xmm2, %0" : "=m" (res[4]) :: "memory" );
+        if ( rc != X86EMUL_OKAY || !check_eip(extrq_reg) ||
+             res[4] != 0x54433221 || res[5] != 0x877665 )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing insertq $12,$40,%xmm2,%xmm3...");
+    if ( stack_exec && cpu_has_sse4a )
+    {
+        decl_insn(insertq_imm);
+
+        res[4] = 0xccbbaa99;
+        res[5] = 0x00ffeedd;
+        asm volatile ( "movq %1, %%xmm2\n"
+                       "movq %0, %%xmm3\n"
+                       put_insn(insertq_imm, "insertq $12, $40, %%xmm2, 
%%xmm3")
+                       :: "m" (res[0]), "m" (res[4]) : "memory" );
+
+        set_insn(insertq_imm);
+        rc = x86_emulate(&ctxt, &emulops);
+        asm ( "movq %%xmm3, %0" : "=m" (res[4]) :: "memory" );
+        if ( rc != X86EMUL_OKAY || !check_eip(insertq_imm) ||
+             res[4] != 0xbaa99211 || res[5] != 0x887ddccb )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing insertq %xmm2,%xmm3...");
+    if ( stack_exec && cpu_has_sse4a )
+    {
+        decl_insn(insertq_reg);
+
+        res[4] = 0xccbbaa99;
+        res[5] = 0x00ffeedd;
+        res[6] = 40 + (12 << 8);
+        res[7] = 0;
+        asm volatile ( "movdqu %1, %%xmm2\n"
+                       "movq %0, %%xmm3\n"
+                       put_insn(insertq_reg, "insertq %%xmm2, %%xmm3")
+                       :: "m" (res[0]), "m" (res[4]) : "memory" );
+
+        set_insn(insertq_reg);
+        rc = x86_emulate(&ctxt, &emulops);
+        asm ( "movq %%xmm3, %0" : "=m" (res[4]) :: "memory" );
+        if ( rc != X86EMUL_OKAY || !check_eip(insertq_reg) ||
+             res[4] != 0xbaa99211 || res[5] != 0x887ddccb )
+            goto fail;
+        printf("okay\n");
+    }
     else
         printf("skipped\n");
 
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -150,6 +150,12 @@ static inline uint64_t xgetbv(uint32_t x
     (res.b & (1U << 8)) != 0; \
 })
 
+#define cpu_has_sse4a ({ \
+    struct cpuid_leaf res; \
+    emul_test_cpuid(0x80000001, 0, &res, NULL); \
+    (res.c & (1U << 6)) != 0; \
+})
+
 #define cpu_has_tbm ({ \
     struct cpuid_leaf res; \
     emul_test_cpuid(0x80000001, 0, &res, NULL); \


Attachment: x86emul-SSE4-test.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.