x86emul: test coverage for SSE3/SSSE3/SSE4* insns ... and their AVX equivalents. Note that a few instructions aren't covered (yet), but those all fall into common pattern groups, so I would hope that for now we can do with what is there. Just like for SSE/SSE2, MMX insns aren't being covered at all, as they're not easy to deal with: The compiler refuses to emit such for other than uses of built-in functions. Signed-off-by: Jan Beulich --- v4: New. --- a/tools/tests/x86_emulator/Makefile +++ b/tools/tests/x86_emulator/Makefile @@ -22,24 +22,31 @@ sse-flts := 4 sse2-vecs := $(sse-vecs) sse2-ints := 1 2 4 8 sse2-flts := 4 8 +sse4-vecs := $(sse2-vecs) +sse4-ints := $(sse2-ints) +sse4-flts := $(sse2-flts) # When converting SSE to AVX, have the compiler avoid XMM0 to widen -# coverage of the VEX.vvvv checks in the emulator. -sse2avx := -ffixed-xmm0 -Wa,-msse2avx +# coverage of the VEX.vvvv checks in the emulator. We must not do this, +# however, for SSE4.1 and later, as there are instructions with XMM0 as +# an implicit operand. +sse2avx-sse := -ffixed-xmm0 -Wa,-msse2avx +sse2avx-sse2 := $(sse2avx-sse) +sse2avx-sse4 := -Wa,-msse2avx -simd-cflags := $(foreach flavor,sse sse2, \ +simd-cflags := $(foreach flavor,sse sse2 sse4, \ $(foreach vec,$($(flavor)-vecs), \ $(foreach int,$($(flavor)-ints), \ "-D$(flavor)_$(vec)i$(int) -m$(flavor) -O2 -DVEC_SIZE=$(vec) -DINT_SIZE=$(int)" \ "-D$(flavor)_$(vec)u$(int) -m$(flavor) -O2 -DVEC_SIZE=$(vec) -DUINT_SIZE=$(int)" \ - "-D$(flavor)_avx_$(vec)i$(int) -m$(flavor) $(sse2avx) -O2 -DVEC_SIZE=$(vec) -DINT_SIZE=$(int)" \ - "-D$(flavor)_avx_$(vec)u$(int) -m$(flavor) $(sse2avx) -O2 -DVEC_SIZE=$(vec) -DUINT_SIZE=$(int)") \ + "-D$(flavor)_avx_$(vec)i$(int) -m$(flavor) $(sse2avx-$(flavor)) -O2 -DVEC_SIZE=$(vec) -DINT_SIZE=$(int)" \ + "-D$(flavor)_avx_$(vec)u$(int) -m$(flavor) $(sse2avx-$(flavor)) -O2 -DVEC_SIZE=$(vec) -DUINT_SIZE=$(int)") \ $(foreach flt,$($(flavor)-flts), \ "-D$(flavor)_$(vec)f$(flt) -m$(flavor) -O2 -DVEC_SIZE=$(vec) -DFLOAT_SIZE=$(flt)" \ - "-D$(flavor)_avx_$(vec)f$(flt) -m$(flavor) $(sse2avx) -O2 -DVEC_SIZE=$(vec) -DFLOAT_SIZE=$(flt)")) \ + "-D$(flavor)_avx_$(vec)f$(flt) -m$(flavor) $(sse2avx-$(flavor)) -O2 -DVEC_SIZE=$(vec) -DFLOAT_SIZE=$(flt)")) \ $(foreach flt,$($(flavor)-flts), \ "-D$(flavor)_f$(flt) -m$(flavor) -mfpmath=sse -O2 -DFLOAT_SIZE=$(flt)" \ - "-D$(flavor)_avx_f$(flt) -m$(flavor) -mfpmath=sse $(sse2avx) -O2 -DFLOAT_SIZE=$(flt)")) + "-D$(flavor)_avx_f$(flt) -m$(flavor) -mfpmath=sse $(sse2avx-$(flavor)) -O2 -DFLOAT_SIZE=$(flt)")) $(addsuffix .h,$(TESTCASES)): %.h: %.c testcase.mk Makefile rm -f $@.new $*.bin --- a/tools/tests/x86_emulator/simd.c +++ b/tools/tests/x86_emulator/simd.c @@ -70,7 +70,9 @@ typedef long long __attribute__((vector_ #if VEC_SIZE == 8 && defined(__SSE__) # define to_bool(cmp) (__builtin_ia32_pmovmskb(cmp) == 0xff) #elif VEC_SIZE == 16 -# if defined(__SSE__) && ELEM_SIZE == 4 +# if defined(__SSE4_1__) +# define to_bool(cmp) __builtin_ia32_ptestc128(cmp, (vdi_t){} == 0) +# elif defined(__SSE__) && ELEM_SIZE == 4 # define to_bool(cmp) (__builtin_ia32_movmskps(cmp) == 0xf) # elif defined(__SSE2__) # if ELEM_SIZE == 8 @@ -182,9 +184,122 @@ static inline bool _to_bool(byte_vec_t b __builtin_ia32_maskmovdqu((vqi_t)(y), ~m_, d_); \ }) #endif +#if VEC_SIZE == 16 && defined(__SSE3__) +# if FLOAT_SIZE == 4 +# define addsub(x, y) __builtin_ia32_addsubps(x, y) +# define dup_hi(x) __builtin_ia32_movshdup(x) +# define dup_lo(x) __builtin_ia32_movsldup(x) +# define hadd(x, y) __builtin_ia32_haddps(x, y) +# define hsub(x, y) __builtin_ia32_hsubps(x, y) +# elif FLOAT_SIZE == 8 +# define addsub(x, y) __builtin_ia32_addsubpd(x, y) +# define dup_lo(x) ({ \ + double __attribute__((vector_size(16))) r_; \ + asm ( "movddup %1,%0" : "=x" (r_) : "m" ((x)[0]) ); \ + r_; \ +}) +# define hadd(x, y) __builtin_ia32_haddpd(x, y) +# define hsub(x, y) __builtin_ia32_hsubpd(x, y) +# endif +#endif +#if VEC_SIZE == 16 && defined(__SSSE3__) +# if INT_SIZE == 1 +# define abs(x) ((vec_t)__builtin_ia32_pabsb128((vqi_t)(x))) +# elif INT_SIZE == 2 +# define abs(x) __builtin_ia32_pabsw128(x) +# elif INT_SIZE == 4 +# define abs(x) __builtin_ia32_pabsd128(x) +# endif +# if INT_SIZE == 1 || UINT_SIZE == 1 +# define copysignz(x, y) ((vec_t)__builtin_ia32_psignb128((vqi_t)(x), (vqi_t)(y))) +# define swap(x) ((vec_t)__builtin_ia32_pshufb128((vqi_t)(x), (vqi_t)(inv - 1))) +# define rotr(x, n) ((vec_t)__builtin_ia32_palignr128((vdi_t)(x), (vdi_t)(x), (n) * 8)) +# elif INT_SIZE == 2 || UINT_SIZE == 2 +# define copysignz(x, y) ((vec_t)__builtin_ia32_psignw128((vhi_t)(x), (vhi_t)(y))) +# define hadd(x, y) ((vec_t)__builtin_ia32_phaddw128((vhi_t)(x), (vhi_t)(y))) +# define hsub(x, y) ((vec_t)__builtin_ia32_phsubw128((vhi_t)(x), (vhi_t)(y))) +# define rotr(x, n) ((vec_t)__builtin_ia32_palignr128((vdi_t)(x), (vdi_t)(x), (n) * 16)) +# elif INT_SIZE == 4 || UINT_SIZE == 4 +# define copysignz(x, y) ((vec_t)__builtin_ia32_psignd128((vsi_t)(x), (vsi_t)(y))) +# define hadd(x, y) ((vec_t)__builtin_ia32_phaddd128((vsi_t)(x), (vsi_t)(y))) +# define hsub(x, y) ((vec_t)__builtin_ia32_phsubd128((vsi_t)(x), (vsi_t)(y))) +# define rotr(x, n) ((vec_t)__builtin_ia32_palignr128((vdi_t)(x), (vdi_t)(x), (n) * 32)) +# elif INT_SIZE == 8 || UINT_SIZE == 8 +# define rotr(x, n) ((vec_t)__builtin_ia32_palignr128((vdi_t)(x), (vdi_t)(x), (n) * 64)) +# endif +#endif +#if VEC_SIZE == 16 && defined(__SSE4_1__) +# if INT_SIZE == 1 +# define max(x, y) ((vec_t)__builtin_ia32_pmaxsb128((vqi_t)(x), (vqi_t)(y))) +# define min(x, y) ((vec_t)__builtin_ia32_pminsb128((vqi_t)(x), (vqi_t)(y))) +# define widen1(x) ((vec_t)__builtin_ia32_pmovsxbw128((vqi_t)(x))) +# define widen2(x) ((vec_t)__builtin_ia32_pmovsxbd128((vqi_t)(x))) +# define widen3(x) ((vec_t)__builtin_ia32_pmovsxbq128((vqi_t)(x))) +# elif INT_SIZE == 2 +# define widen1(x) ((vec_t)__builtin_ia32_pmovsxwd128(x)) +# define widen2(x) ((vec_t)__builtin_ia32_pmovsxwq128(x)) +# elif INT_SIZE == 4 +# define max(x, y) __builtin_ia32_pmaxsd128(x, y) +# define min(x, y) __builtin_ia32_pminsd128(x, y) +# define mul_full(x, y) ((vec_t)__builtin_ia32_pmuldq128(x, y)) +# define widen1(x) ((vec_t)__builtin_ia32_pmovsxdq128(x)) +# elif UINT_SIZE == 1 +# define widen1(x) ((vec_t)__builtin_ia32_pmovzxbw128((vqi_t)(x))) +# define widen2(x) ((vec_t)__builtin_ia32_pmovzxbd128((vqi_t)(x))) +# define widen3(x) ((vec_t)__builtin_ia32_pmovzxbq128((vqi_t)(x))) +# elif UINT_SIZE == 2 +# define max(x, y) ((vec_t)__builtin_ia32_pmaxuw128((vhi_t)(x), (vhi_t)(y))) +# define min(x, y) ((vec_t)__builtin_ia32_pminuw128((vhi_t)(x), (vhi_t)(y))) +# define widen1(x) ((vec_t)__builtin_ia32_pmovzxwd128((vhi_t)(x))) +# define widen2(x) ((vec_t)__builtin_ia32_pmovzxwq128((vhi_t)(x))) +# elif UINT_SIZE == 4 +# define max(x, y) ((vec_t)__builtin_ia32_pmaxud128((vsi_t)(x), (vsi_t)(y))) +# define min(x, y) ((vec_t)__builtin_ia32_pminud128((vsi_t)(x), (vsi_t)(y))) +# define widen1(x) ((vec_t)__builtin_ia32_pmovzxdq128((vsi_t)(x))) +# endif +# undef select +# if defined(INT_SIZE) || defined(UINT_SIZE) +# define select(d, x, y, m) \ + (*(d) = (vec_t)__builtin_ia32_pblendvb128((vqi_t)(y), (vqi_t)(x), (vqi_t)(m))) +# elif FLOAT_SIZE == 4 +# define dot_product(x, y) __builtin_ia32_dpps(x, y, 0b11110001) +# define select(d, x, y, m) (*(d) = __builtin_ia32_blendvps(y, x, m)) +# define trunc(x) __builtin_ia32_roundps(x, 0b1011) +# elif FLOAT_SIZE == 8 +# define dot_product(x, y) __builtin_ia32_dppd(x, y, 0b00110001) +# define select(d, x, y, m) (*(d) = __builtin_ia32_blendvpd(y, x, m)) +# define trunc(x) __builtin_ia32_roundpd(x, 0b1011) +# endif +# if INT_SIZE == 2 || UINT_SIZE == 2 +# define mix(x, y) ((vec_t)__builtin_ia32_pblendw128((vhi_t)(x), (vhi_t)(y), 0b10101010)) +# elif INT_SIZE == 4 || UINT_SIZE == 4 +# define mix(x, y) ((vec_t)__builtin_ia32_pblendw128((vhi_t)(x), (vhi_t)(y), 0b11001100)) +# elif INT_SIZE == 8 || UINT_SIZE == 8 +# define mix(x, y) ((vec_t)__builtin_ia32_pblendw128((vhi_t)(x), (vhi_t)(y), 0b11110000)) +# elif FLOAT_SIZE == 4 +# define mix(x, y) __builtin_ia32_blendps(x, y, 0b1010) +# elif FLOAT_SIZE == 8 +# define mix(x, y) __builtin_ia32_blendpd(x, y, 0b10) +# endif +#endif #if VEC_SIZE == FLOAT_SIZE # define max(x, y) ((vec_t){({ typeof(x[0]) x_ = (x)[0], y_ = (y)[0]; x_ > y_ ? x_ : y_; })}) # define min(x, y) ((vec_t){({ typeof(x[0]) x_ = (x)[0], y_ = (y)[0]; x_ < y_ ? x_ : y_; })}) +# ifdef __SSE4_1__ +# if FLOAT_SIZE == 4 +# define trunc(x) ({ \ + float __attribute__((vector_size(16))) r_; \ + asm ( "roundss $0b1011,%1,%0" : "=x" (r_) : "m" (x) ); \ + (vec_t){ r_[0] }; \ +}) +# elif FLOAT_SIZE == 8 +# define trunc(x) ({ \ + double __attribute__((vector_size(16))) r_; \ + asm ( "roundsd $0b1011,%1,%0" : "=x" (r_) : "m" (x) ); \ + (vec_t){ r_[0] }; \ +}) +# endif +# endif #endif /* @@ -290,6 +405,14 @@ int simd_test(void) if ( !to_bool(sqrt(x) == src) ) return __LINE__; # endif +# ifdef trunc + x = 1 / src; + y = (vec_t){ 1 }; + touch(x); + z = trunc(x); + if ( !to_bool(y == z) ) return __LINE__; +# endif + #else # if ELEM_SIZE > 1 @@ -416,6 +539,17 @@ int simd_test(void) # endif #endif +#ifdef abs + x = src * alt; + touch(x); + if ( !to_bool(abs(x) == src) ) return __LINE__; +#endif + +#ifdef copysignz + touch(alt); + if ( !to_bool(copysignz((vec_t){} + 1, alt) == alt) ) return __LINE__; +#endif + #ifdef swap touch(src); if ( !to_bool(swap(src) == inv) ) return __LINE__; @@ -435,16 +569,140 @@ int simd_test(void) if ( !to_bool(z == ELEM_COUNT / 2) ) return __LINE__; #endif +#if defined(INT_SIZE) && defined(widen1) && defined(interleave_lo) + + x = src * alt; + y = interleave_lo(x, alt < 0); + touch(x); + z = widen1(x); + touch(x); + if ( !to_bool(z == y) ) return __LINE__; + +# ifdef widen2 + y = interleave_lo(alt < 0, alt < 0); + y = interleave_lo(z, y); + touch(x); + z = widen2(x); + touch(x); + if ( !to_bool(z == y) ) return __LINE__; + +# ifdef widen3 + y = interleave_lo(alt < 0, alt < 0); + y = interleave_lo(y, y); + y = interleave_lo(z, y); + touch(x); + z = widen3(x); + touch(x); + if ( !to_bool(z == y) ) return __LINE__; +# endif +# endif + +#endif + +#if defined(UINT_SIZE) && defined(interleave_lo) + + y = interleave_lo(src, (vec_t){}); + z = interleave_lo(y, (vec_t){}); + +# ifdef widen1 + touch(src); + x = widen1(src); + touch(src); + if ( !to_bool(x == y) ) return __LINE__; +# endif + +# ifdef widen2 + touch(src); + x = widen2(src); + touch(src); + if ( !to_bool(x == z) ) return __LINE__; +# endif + +# ifdef widen3 + touch(src); + x = widen3(src); + touch(src); + if ( !to_bool(x == interleave_lo(z, (vec_t){})) ) return __LINE__; +# endif + +#endif + +#ifdef dup_lo + touch(src); + x = dup_lo(src); + touch(src); + if ( !to_bool(x - src == (alt - 1) / 2) ) return __LINE__; +#endif + +#ifdef dup_hi + touch(src); + x = dup_hi(src); + touch(src); + if ( !to_bool(x - src == (alt + 1) / 2) ) return __LINE__; +#endif + + for ( i = 0; i < ELEM_COUNT; ++i ) + y[i] = (i & 1 ? inv : src)[i]; + #ifdef select # ifdef UINT_SIZE select(&z, src, inv, alt); # else select(&z, src, inv, alt > 0); # endif - for ( i = 0; i < ELEM_COUNT; ++i ) - y[i] = (i & 1 ? inv : src)[i]; if ( !to_bool(z == y) ) return __LINE__; #endif +#ifdef mix + touch(src); + touch(inv); + x = mix(src, inv); + if ( !to_bool(x == y) ) return __LINE__; + +# ifdef addsub + touch(src); + touch(inv); + x = addsub(src, inv); + touch(src); + touch(inv); + y = mix(src - inv, src + inv); + if ( !to_bool(x == y) ) return __LINE__; +# endif +#endif + +#ifdef rotr + x = rotr(src, 1); + y = (src & (ELEM_COUNT - 1)) + 1; + if ( !to_bool(x == y) ) return __LINE__; +#endif + +#ifdef dot_product + touch(src); + touch(inv); + x = dot_product(src, inv); + if ( !to_bool(x == (vec_t){ (ELEM_COUNT * (ELEM_COUNT + 1) * + (ELEM_COUNT + 2)) / 6 }) ) return __LINE__; +#endif + +#ifdef hadd + x = src; + for ( i = ELEM_COUNT; i >>= 1; ) + { + touch(x); + x = hadd((vec_t){}, x); + } + if ( x[ELEM_COUNT - 1] != (ELEM_COUNT * (ELEM_COUNT + 1)) / 2 ) return __LINE__; + +# ifdef hsub + touch(src); + touch(inv); + x = hsub(src, inv); + for ( i = ELEM_COUNT; i >>= 1; ) + x = hadd(x, (vec_t){}); + if ( !to_bool(x == 0) ) return __LINE__; +# endif +#endif + + return 0; } --- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -30,12 +30,18 @@ static bool simd_check_sse2(void) return cpu_has_sse2; } +static bool simd_check_sse4(void) +{ + return cpu_has_sse4_2; +} + static bool simd_check_avx(void) { return cpu_has_avx; } #define simd_check_sse_avx simd_check_avx #define simd_check_sse2_avx simd_check_avx +#define simd_check_sse4_avx simd_check_avx static void simd_set_regs(struct cpu_user_regs *regs) { @@ -99,6 +105,18 @@ static const struct { SIMD(SSE2 packed u32, sse2, 16u4), SIMD(SSE2 packed s64, sse2, 16i8), SIMD(SSE2 packed u64, sse2, 16u8), + SIMD(SSE4 scalar single, sse4, f4), + SIMD(SSE4 packed single, sse4, 16f4), + SIMD(SSE4 scalar double, sse4, f8), + SIMD(SSE4 packed double, sse4, 16f8), + SIMD(SSE4 packed s8, sse4, 16i1), + SIMD(SSE4 packed u8, sse4, 16u1), + SIMD(SSE4 packed s16, sse4, 16i2), + SIMD(SSE4 packed u16, sse4, 16u2), + SIMD(SSE4 packed s32, sse4, 16i4), + SIMD(SSE4 packed u32, sse4, 16u4), + SIMD(SSE4 packed s64, sse4, 16i8), + SIMD(SSE4 packed u64, sse4, 16u8), SIMD(SSE/AVX scalar single, sse_avx, f4), SIMD(SSE/AVX packed single, sse_avx, 16f4), SIMD(SSE2/AVX scalar single, sse2_avx, f4), @@ -113,6 +131,18 @@ static const struct { SIMD(SSE2/AVX packed u32, sse2_avx, 16u4), SIMD(SSE2/AVX packed s64, sse2_avx, 16i8), SIMD(SSE2/AVX packed u64, sse2_avx, 16u8), + SIMD(SSE4/AVX scalar single, sse4_avx, f4), + SIMD(SSE4/AVX packed single, sse4_avx, 16f4), + SIMD(SSE4/AVX scalar double, sse4_avx, f8), + SIMD(SSE4/AVX packed double, sse4_avx, 16f8), + SIMD(SSE4/AVX packed s8, sse4_avx, 16i1), + SIMD(SSE4/AVX packed u8, sse4_avx, 16u1), + SIMD(SSE4/AVX packed s16, sse4_avx, 16i2), + SIMD(SSE4/AVX packed u16, sse4_avx, 16u2), + SIMD(SSE4/AVX packed s32, sse4_avx, 16i4), + SIMD(SSE4/AVX packed u32, sse4_avx, 16u4), + SIMD(SSE4/AVX packed s64, sse4_avx, 16i8), + SIMD(SSE4/AVX packed u64, sse4_avx, 16u8), #undef SIMD_ #undef SIMD }; @@ -2682,6 +2712,99 @@ int main(int argc, char **argv) goto fail; printf("okay\n"); } + else + printf("skipped\n"); + + printf("%-40s", "Testing extrq $4,$56,%xmm2..."); + if ( stack_exec && cpu_has_sse4a ) + { + decl_insn(extrq_imm); + + res[0] = 0x44332211; + res[1] = 0x88776655; + asm volatile ( "movq %0, %%xmm2\n" + put_insn(extrq_imm, "extrq $4, $56, %%xmm2") + :: "m" (res[0]) : "memory" ); + + set_insn(extrq_imm); + rc = x86_emulate(&ctxt, &emulops); + asm ( "movq %%xmm2, %0" : "=m" (res[4]) :: "memory" ); + if ( rc != X86EMUL_OKAY || !check_eip(extrq_imm) || + res[4] != 0x54433221 || res[5] != 0x877665 ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + + printf("%-40s", "Testing extrq %xmm3,%xmm2..."); + if ( stack_exec && cpu_has_sse4a ) + { + decl_insn(extrq_reg); + + res[4] = 56 + (4 << 8); + res[5] = 0; + asm volatile ( "movq %0, %%xmm2\n" + "movq %1, %%xmm3\n" + put_insn(extrq_reg, "extrq %%xmm3, %%xmm2") + :: "m" (res[0]), "m" (res[4]) : "memory" ); + + set_insn(extrq_reg); + rc = x86_emulate(&ctxt, &emulops); + asm ( "movq %%xmm2, %0" : "=m" (res[4]) :: "memory" ); + if ( rc != X86EMUL_OKAY || !check_eip(extrq_reg) || + res[4] != 0x54433221 || res[5] != 0x877665 ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + + printf("%-40s", "Testing insertq $12,$40,%xmm2,%xmm3..."); + if ( stack_exec && cpu_has_sse4a ) + { + decl_insn(insertq_imm); + + res[4] = 0xccbbaa99; + res[5] = 0x00ffeedd; + asm volatile ( "movq %1, %%xmm2\n" + "movq %0, %%xmm3\n" + put_insn(insertq_imm, "insertq $12, $40, %%xmm2, %%xmm3") + :: "m" (res[0]), "m" (res[4]) : "memory" ); + + set_insn(insertq_imm); + rc = x86_emulate(&ctxt, &emulops); + asm ( "movq %%xmm3, %0" : "=m" (res[4]) :: "memory" ); + if ( rc != X86EMUL_OKAY || !check_eip(insertq_imm) || + res[4] != 0xbaa99211 || res[5] != 0x887ddccb ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + + printf("%-40s", "Testing insertq %xmm2,%xmm3..."); + if ( stack_exec && cpu_has_sse4a ) + { + decl_insn(insertq_reg); + + res[4] = 0xccbbaa99; + res[5] = 0x00ffeedd; + res[6] = 40 + (12 << 8); + res[7] = 0; + asm volatile ( "movdqu %1, %%xmm2\n" + "movq %0, %%xmm3\n" + put_insn(insertq_reg, "insertq %%xmm2, %%xmm3") + :: "m" (res[0]), "m" (res[4]) : "memory" ); + + set_insn(insertq_reg); + rc = x86_emulate(&ctxt, &emulops); + asm ( "movq %%xmm3, %0" : "=m" (res[4]) :: "memory" ); + if ( rc != X86EMUL_OKAY || !check_eip(insertq_reg) || + res[4] != 0xbaa99211 || res[5] != 0x887ddccb ) + goto fail; + printf("okay\n"); + } else printf("skipped\n"); --- a/tools/tests/x86_emulator/x86_emulate.h +++ b/tools/tests/x86_emulator/x86_emulate.h @@ -150,6 +150,12 @@ static inline uint64_t xgetbv(uint32_t x (res.b & (1U << 8)) != 0; \ }) +#define cpu_has_sse4a ({ \ + struct cpuid_leaf res; \ + emul_test_cpuid(0x80000001, 0, &res, NULL); \ + (res.c & (1U << 6)) != 0; \ +}) + #define cpu_has_tbm ({ \ struct cpuid_leaf res; \ emul_test_cpuid(0x80000001, 0, &res, NULL); \