[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v4 40/44] x86emul: support AVX512F legacy-equivalent packed int/FP conversion insns
... including the two AVX512DQ forms which shared encodings, just with EVEX.W set there. VCVTDQ2PD, sharing its main opcode with others, needs a "manual" override of disp8scale. The simd_size changes for the twobyte_table[] entries are benign to pre-existing code, but allow decode_disp8scale() to work as is here. The at this point wrong placement of the 0xe6 case block is once again in anticipation of further additions of case labels. Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> --- v4: New. --- a/tools/tests/x86_emulator/evex-disp8.c +++ b/tools/tests/x86_emulator/evex-disp8.c @@ -98,8 +98,12 @@ static const struct test avx512f_all[] = INSN_FP(cmp, 0f, c2), INSN(comisd, 66, 0f, 2f, el, q, el), INSN(comiss, , 0f, 2f, el, d, el), + INSN(cvtdq2pd, f3, 0f, e6, vl_2, d, vl), + INSN(cvtdq2ps, , 0f, 5b, vl, d, vl), + INSN(cvtpd2dq, f2, 0f, e6, vl, q, vl), INSN(cvtpd2ps, 66, 0f, 5a, vl, q, vl), INSN(cvtph2ps, 66, 0f38, 13, vl_2, d_nb, vl), + INSN(cvtps2dq, 66, 0f, 5b, vl, d, vl), INSN(cvtps2pd, , 0f, 5a, vl_2, d, vl), INSN(cvtps2ph, 66, 0f3a, 1d, vl_2, d_nb, vl), INSN(cvtsd2ss, f2, 0f, 5a, el, q, el), @@ -388,6 +392,8 @@ static const struct test avx512dq_all[] INSN_PFP(and, 0f, 54), INSN_PFP(andn, 0f, 55), INSN(broadcasti32x2, 66, 0f38, 59, el_2, d, vl), + INSN(cvtqq2pd, f3, 0f, e6, vl, q, vl), + INSN(cvtqq2ps, , 0f, 5b, vl, q, vl), INSN_PFP(or, 0f, 56), // pmovd2m, f3, 0f38, 39, d // pmovm2, f3, 0f38, 38, dq --- a/tools/tests/x86_emulator/simd.c +++ b/tools/tests/x86_emulator/simd.c @@ -92,6 +92,13 @@ static inline bool _to_bool(byte_vec_t b # define to_int(x) ((vec_t){ (int)(x)[0] }) #elif VEC_SIZE == 8 && FLOAT_SIZE == 4 && defined(__3dNOW__) # define to_int(x) __builtin_ia32_pi2fd(__builtin_ia32_pf2id(x)) +#elif defined(FLOAT_SIZE) && VEC_SIZE > FLOAT_SIZE && defined(__AVX512F__) && \ + (VEC_SIZE == 64 || defined(__AVX512VL__)) +# if FLOAT_SIZE == 4 +# define to_int(x) BR(cvtdq2ps, _mask, BR(cvtps2dq, _mask, x, (vsi_t)undef(), ~0), undef(), ~0) +# elif FLOAT_SIZE == 8 +# define to_int(x) B(cvtdq2pd, _mask, BR(cvtpd2dq, _mask, x, (vsi_half_t){}, ~0), undef(), ~0) +# endif #elif VEC_SIZE == 16 && defined(__SSE2__) # if FLOAT_SIZE == 4 # define to_int(x) __builtin_ia32_cvtdq2ps(__builtin_ia32_cvtps2dq(x)) @@ -1136,15 +1143,21 @@ int simd_test(void) touch(src); if ( !eq(x * -alt, -src) ) return __LINE__; -# if defined(recip) && defined(to_int) +# ifdef to_int touch(src); + x = to_int(src); + touch(src); + if ( !eq(x, src) ) return __LINE__; + +# ifdef recip + touch(src); x = recip(src); touch(src); touch(x); if ( !eq(to_int(recip(x)), src) ) return __LINE__; -# ifdef rsqrt +# ifdef rsqrt x = src * src; touch(x); y = rsqrt(x); @@ -1152,6 +1165,7 @@ int simd_test(void) if ( !eq(to_int(recip(y)), src) ) return __LINE__; touch(src); if ( !eq(to_int(y), to_int(recip(src))) ) return __LINE__; +# endif # endif # endif --- a/tools/tests/x86_emulator/simd.h +++ b/tools/tests/x86_emulator/simd.h @@ -244,6 +244,7 @@ asm ( ".macro override insn \n\t" OVR_INT(broadcast); OVR_SFP(broadcast); OVR_SFP(comi); +OVR_VFP(cvtdq2); OVR_FP(add); OVR_INT(add); OVR_BW(adds); @@ -330,13 +331,19 @@ REN(pandn, , d); REN(por, , d); REN(pxor, , d); # endif +OVR(cvtpd2dqx); +OVR(cvtpd2dqy); OVR(cvtpd2psx); OVR(cvtpd2psy); OVR(cvtph2ps); +OVR(cvtps2dq); OVR(cvtps2pd); OVR(cvtps2ph); OVR(cvtsd2ss); OVR(cvtss2sd); +OVR(cvttpd2dqx); +OVR(cvttpd2dqy); +OVR(cvttps2dq); OVR(movddup); OVR(movntdq); OVR(movntdqa); --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -311,7 +311,7 @@ static const struct twobyte_table { [0x54 ... 0x57] = { DstImplicit|SrcMem|ModRM, simd_packed_fp, d8s_vl }, [0x58 ... 0x59] = { DstImplicit|SrcMem|ModRM, simd_any_fp, d8s_vl }, [0x5a] = { DstImplicit|SrcMem|ModRM|Mov, simd_any_fp, d8s_vl }, - [0x5b] = { DstImplicit|SrcMem|ModRM|Mov, simd_other }, + [0x5b] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_fp, d8s_vl }, [0x5c ... 0x5f] = { DstImplicit|SrcMem|ModRM, simd_any_fp, d8s_vl }, [0x60 ... 0x62] = { DstImplicit|SrcMem|ModRM, simd_other, d8s_vl }, [0x63 ... 0x67] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl }, @@ -375,7 +375,7 @@ static const struct twobyte_table { [0xe0] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl }, [0xe1 ... 0xe2] = { DstImplicit|SrcMem|ModRM, simd_128, 4 }, [0xe3 ... 0xe5] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl }, - [0xe6] = { DstImplicit|SrcMem|ModRM|Mov, simd_other }, + [0xe6] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_fp, d8s_vl }, [0xe7] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_int, d8s_vl }, [0xe8 ... 0xef] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl }, [0xf0] = { DstImplicit|SrcMem|ModRM|Mov, simd_other }, @@ -3069,6 +3069,11 @@ x86_decode( if ( disp8scale == 2 && evex.pfx == vex_f3 ) disp8scale = 3; break; + + case 0xe6: /* vcvtdq2pd needs special casing */ + if ( disp8scale && evex.pfx == vex_f3 && !evex.w && !evex.br ) + --disp8scale; + break; } break; @@ -6553,6 +6558,22 @@ x86_emulate( op_bytes = 16 << vex.l; goto simd_0f_cvt; + case X86EMUL_OPC_EVEX_66(0x0f, 0x5b): /* vcvtps2dq [xyz]mm/mem,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_F3(0x0f, 0x5b): /* vcvttps2dq [xyz]mm/mem,[xyz]mm{k} */ + generate_exception_if(evex.w, EXC_UD); + /* fall through */ + case X86EMUL_OPC_EVEX(0x0f, 0x5b): /* vcvtdq2ps [xyz]mm/mem,[xyz]mm{k} */ + /* vcvtqq2ps [xyz]mm/mem,{x,y}mm{k} */ + if ( evex.w ) + host_and_vcpu_must_have(avx512dq); + else + host_and_vcpu_must_have(avx512f); + if ( ea.type == OP_MEM || !evex.br ) + avx512_vlen_check(false); + d |= TwoOp; + op_bytes = 16 << evex.lr; + goto simd_zmm; + CASE_SIMD_PACKED_INT(0x0f, 0x60): /* punpcklbw {,x}mm/mem,{,x}mm */ case X86EMUL_OPC_VEX_66(0x0f, 0x60): /* vpunpcklbw {x,y}mm/mem,{x,y}mm,{x,y}mm */ CASE_SIMD_PACKED_INT(0x0f, 0x61): /* punpcklwd {,x}mm/mem,{,x}mm */ @@ -7211,6 +7232,27 @@ x86_emulate( op_bytes = 8; goto simd_0f_xmm; + case X86EMUL_OPC_EVEX_66(0x0f, 0xe6): /* vcvttpd2dq [xyz]mm/mem,{x,y}mm{k} */ + case X86EMUL_OPC_EVEX_F2(0x0f, 0xe6): /* vcvtpd2dq [xyz]mm/mem,{x,y}mm{k} */ + generate_exception_if(!evex.w, EXC_UD); + /* fall through */ + case X86EMUL_OPC_EVEX_F3(0x0f, 0xe6): /* vcvtdq2pd {x,y}mm/mem,[xyz]mm{k} */ + /* vcvtqq2pd [xyz]mm/mem,[xyz]mm{k} */ + if ( evex.pfx != vex_f3 ) + host_and_vcpu_must_have(avx512f); + else if ( evex.w ) + host_and_vcpu_must_have(avx512dq); + else + { + host_and_vcpu_must_have(avx512f); + generate_exception_if(ea.type != OP_MEM && evex.br, EXC_UD); + } + if ( ea.type == OP_MEM || !evex.br ) + avx512_vlen_check(false); + d |= TwoOp; + op_bytes = 8 << (evex.w + evex.lr); + goto simd_zmm; + case X86EMUL_OPC_F2(0x0f, 0xf0): /* lddqu m128,xmm */ case X86EMUL_OPC_VEX_F2(0x0f, 0xf0): /* vlddqu mem,{x,y}mm */ generate_exception_if(ea.type != OP_MEM, EXC_UD); _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |