[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH v2 09/10] x86emul: handle AVX512-FP16 conversion to/from (packed) int{32,64} insns
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> --- a/tools/tests/x86_emulator/evex-disp8.c +++ b/tools/tests/x86_emulator/evex-disp8.c @@ -612,18 +612,36 @@ static const struct test avx512_fp16_all INSN(cmpph, , 0f3a, c2, vl, fp16, vl), INSN(cmpsh, f3, 0f3a, c2, el, fp16, el), INSN(comish, , map5, 2f, el, fp16, el), + INSN(cvtdq2ph, , map5, 5b, vl, d, vl), INSN(cvtpd2ph, 66, map5, 5a, vl, q, vl), + INSN(cvtph2dq, 66, map5, 5b, vl_2, fp16, vl), INSN(cvtph2pd, , map5, 5a, vl_4, fp16, vl), INSN(cvtph2psx, 66, map6, 13, vl_2, fp16, vl), + INSN(cvtph2qq, 66, map5, 7b, vl_4, fp16, vl), + INSN(cvtph2udq, , map5, 79, vl_2, fp16, vl), + INSN(cvtph2uqq, 66, map5, 79, vl_4, fp16, vl), INSN(cvtph2uw, , map5, 7d, vl, fp16, vl), INSN(cvtph2w, 66, map5, 7d, vl, fp16, vl), INSN(cvtps2phx, 66, map5, 1d, vl, d, vl), + INSN(cvtqq2ph, , map5, 5b, vl, q, vl), INSN(cvtsd2sh, f2, map5, 5a, el, q, el), INSN(cvtsh2sd, f3, map5, 5a, el, fp16, el), + INSN(cvtsh2si, f3, map5, 2d, el, fp16, el), INSN(cvtsh2ss, , map6, 13, el, fp16, el), + INSN(cvtsh2usi, f3, map5, 79, el, fp16, el), + INSN(cvtsi2sh, f3, map5, 2a, el, dq64, el), INSN(cvtss2sh, , map5, 1d, el, d, el), + INSN(cvttph2dq, f3, map5, 5b, vl_2, fp16, vl), + INSN(cvttph2qq, 66, map5, 7a, vl_4, fp16, vl), + INSN(cvttph2udq, , map5, 78, vl_2, fp16, vl), + INSN(cvttph2uqq, 66, map5, 78, vl_4, fp16, vl), INSN(cvttph2uw, , map5, 7c, vl, fp16, vl), INSN(cvttph2w, 66, map5, 7c, vl, fp16, vl), + INSN(cvttsh2si, f3, map5, 2c, el, fp16, el), + INSN(cvttsh2usi, f3, map5, 78, el, fp16, el), + INSN(cvtudq2ph, f2, map5, 7a, vl, d, vl), + INSN(cvtuqq2ph, f2, map5, 7a, vl, q, vl), + INSN(cvtusi2sh, f3, map5, 7b, el, dq64, el), INSN(cvtuw2ph, f2, map5, 7d, vl, fp16, vl), INSN(cvtw2ph, f3, map5, 7d, vl, fp16, vl), INSN(divph, , map5, 5e, vl, fp16, vl), --- a/tools/tests/x86_emulator/predicates.c +++ b/tools/tests/x86_emulator/predicates.c @@ -2033,6 +2033,9 @@ static const struct evex { { { 0x11 }, 2, T, W, pfx_f3, W0, LIG }, /* vmovsh */ { { 0x1d }, 2, T, R, pfx_66, W0, Ln }, /* vcvtps2phx */ { { 0x1d }, 2, T, R, pfx_no, W0, LIG }, /* vcvtss2sh */ + { { 0x2a }, 2, T, R, pfx_f3, Wn, LIG }, /* vcvtsi2sh */ + { { 0x2c }, 2, T, R, pfx_f3, Wn, LIG }, /* vcvttsh2si */ + { { 0x2d }, 2, T, R, pfx_f3, Wn, LIG }, /* vcvtsh2si */ { { 0x2e }, 2, T, R, pfx_no, W0, LIG }, /* vucomish */ { { 0x2f }, 2, T, R, pfx_no, W0, LIG }, /* vcomish */ { { 0x51 }, 2, T, R, pfx_no, W0, Ln }, /* vsqrtph */ @@ -2045,6 +2048,10 @@ static const struct evex { { { 0x5a }, 2, T, R, pfx_66, W1, Ln }, /* vcvtpd2ph */ { { 0x5a }, 2, T, R, pfx_f3, W0, LIG }, /* vcvtsh2sd */ { { 0x5a }, 2, T, R, pfx_f2, W1, LIG }, /* vcvtsd2sh */ + { { 0x5b }, 2, T, R, pfx_no, W0, Ln }, /* vcvtdq2ph */ + { { 0x5b }, 2, T, R, pfx_no, W1, Ln }, /* vcvtqq2ph */ + { { 0x5b }, 2, T, R, pfx_66, W0, Ln }, /* vcvtph2dq */ + { { 0x5b }, 2, T, R, pfx_f3, W0, Ln }, /* vcvttph2dq */ { { 0x5c }, 2, T, R, pfx_no, W0, Ln }, /* vsubph */ { { 0x5c }, 2, T, R, pfx_f3, W0, LIG }, /* vsubsh */ { { 0x5d }, 2, T, R, pfx_no, W0, Ln }, /* vminph */ @@ -2054,6 +2061,17 @@ static const struct evex { { { 0x5f }, 2, T, R, pfx_no, W0, Ln }, /* vmaxph */ { { 0x5f }, 2, T, R, pfx_f3, W0, LIG }, /* vmaxsh */ { { 0x6e }, 2, T, R, pfx_66, WIG, L0 }, /* vmovw */ + { { 0x78 }, 2, T, R, pfx_no, W0, Ln }, /* vcvttph2udq */ + { { 0x78 }, 2, T, R, pfx_66, W0, Ln }, /* vcvttph2uqq */ + { { 0x78 }, 2, T, R, pfx_f3, Wn, LIG }, /* vcvttsh2usi */ + { { 0x79 }, 2, T, R, pfx_no, W0, Ln }, /* vcvtph2udq */ + { { 0x79 }, 2, T, R, pfx_66, W0, Ln }, /* vcvtph2uqq */ + { { 0x79 }, 2, T, R, pfx_f3, Wn, LIG }, /* vcvtsh2usi */ + { { 0x7a }, 2, T, R, pfx_66, W0, Ln }, /* vcvttph2qq */ + { { 0x7a }, 2, T, R, pfx_f2, W0, Ln }, /* vcvtudq2ph */ + { { 0x7a }, 2, T, R, pfx_f2, W1, Ln }, /* vcvtuqq2ph */ + { { 0x7b }, 2, T, R, pfx_66, W0, Ln }, /* vcvtph2qq */ + { { 0x7b }, 2, T, R, pfx_f3, Wn, LIG }, /* vcvtusi2sh */ { { 0x7c }, 2, T, R, pfx_no, W0, Ln }, /* vcvttph2uw */ { { 0x7c }, 2, T, R, pfx_66, W0, Ln }, /* vcvttph2w */ { { 0x7d }, 2, T, R, pfx_no, W0, Ln }, /* vcvtph2uw */ --- a/xen/arch/x86/x86_emulate/decode.c +++ b/xen/arch/x86/x86_emulate/decode.c @@ -1497,12 +1497,25 @@ int x86emul_decode(struct x86_emulate_st s->simd_size = simd_scalar_vexw; break; + case 0x2a: /* vcvtsi2sh */ + break; + + case 0x2c: case 0x2d: /* vcvt{,t}sh2si */ + if ( s->evex.pfx == vex_f3 ) + s->fp16 = true; + break; + case 0x2e: case 0x2f: /* v{,u}comish */ if ( !s->evex.pfx ) s->fp16 = true; s->simd_size = simd_none; break; + case 0x5b: /* vcvt{d,q}q2ph, vcvt{,t}ph2dq */ + if ( s->evex.pfx && s->evex.pfx != vex_f2 ) + s->fp16 = true; + break; + case 0x6e: /* vmovw r/m16, xmm */ d = (d & ~SrcMask) | SrcMem16; /* fall through */ @@ -1512,6 +1525,17 @@ int x86emul_decode(struct x86_emulate_st s->simd_size = simd_none; break; + case 0x78: case 0x79: /* vcvt{,t}ph2u{d,q}q, vcvt{,t}sh2usi */ + if ( s->evex.pfx != vex_f2 ) + s->fp16 = true; + break; + + case 0x7a: /* vcvttph2qq, vcvtu{d,q}q2ph */ + case 0x7b: /* vcvtph2qq, vcvtusi2sh */ + if ( s->evex.pfx == vex_66 ) + s->fp16 = true; + break; + case 0x7c: /* vcvttph2{,u}w */ case 0x7d: /* vcvtph2{,u}w / vcvt{,u}w2ph */ d = DstReg | SrcMem | TwoOp; @@ -1524,10 +1548,34 @@ int x86emul_decode(struct x86_emulate_st switch ( b ) { + case 0x78: + case 0x79: + /* vcvt{,t}ph2u{d,q}q need special casing */ + if ( s->evex.pfx <= vex_66 ) + { + if ( !s->evex.brs ) + disp8scale -= 1 + (s->evex.pfx == vex_66); + break; + } + /* vcvt{,t}sh2usi needs special casing: fall through */ + case 0x2c: case 0x2d: /* vcvt{,t}sh2si need special casing */ + disp8scale = 1; + break; + case 0x5a: /* vcvtph2pd needs special casing */ if ( !s->evex.pfx && !s->evex.brs ) disp8scale -= 2; break; + + case 0x5b: /* vcvt{,t}ph2dq need special casing */ + if ( s->evex.pfx && !s->evex.brs ) + --disp8scale; + break; + + case 0x7a: case 0x7b: /* vcvt{,t}ph2qq need special casing */ + if ( s->evex.pfx == vex_66 && !s->evex.brs ) + disp8scale = s->evex.brs ? 1 : 2 + s->evex.lr; + break; } break; --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -3577,6 +3577,12 @@ x86_emulate( state->simd_size = simd_none; goto simd_0f_rm; +#ifndef X86EMUL_NO_SIMD + + case X86EMUL_OPC_EVEX_F3(5, 0x2a): /* vcvtsi2sh r/m,xmm,xmm */ + case X86EMUL_OPC_EVEX_F3(5, 0x7b): /* vcvtusi2sh r/m,xmm,xmm */ + host_and_vcpu_must_have(avx512_fp16); + /* fall through */ CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2a): /* vcvtsi2s{s,d} r/m,xmm,xmm */ CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x7b): /* vcvtusi2s{s,d} r/m,xmm,xmm */ generate_exception_if(evex.opmsk || (ea.type != OP_REG && evex.brs), @@ -3655,7 +3661,9 @@ x86_emulate( opc[1] = 0x01; rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, - vex.pfx & VEX_PREFIX_DOUBLE_MASK ? 8 : 4, ctxt); + vex.pfx & VEX_PREFIX_DOUBLE_MASK + ? 8 : 2 << !state->fp16, + ctxt); if ( rc != X86EMUL_OKAY ) goto done; } @@ -3685,6 +3693,12 @@ x86_emulate( state->simd_size = simd_none; break; + case X86EMUL_OPC_EVEX_F3(5, 0x2c): /* vcvttsh2si xmm/mem,reg */ + case X86EMUL_OPC_EVEX_F3(5, 0x2d): /* vcvtsh2si xmm/mem,reg */ + case X86EMUL_OPC_EVEX_F3(5, 0x78): /* vcvttsh2usi xmm/mem,reg */ + case X86EMUL_OPC_EVEX_F3(5, 0x79): /* vcvtsh2usi xmm/mem,reg */ + host_and_vcpu_must_have(avx512_fp16); + /* fall through */ CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2c): /* vcvtts{s,d}2si xmm/mem,reg */ CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2d): /* vcvts{s,d}2si xmm/mem,reg */ CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x78): /* vcvtts{s,d}2usi xmm/mem,reg */ @@ -3756,8 +3770,6 @@ x86_emulate( ASSERT(!state->simd_size); break; -#ifndef X86EMUL_NO_SIMD - case X86EMUL_OPC_EVEX(5, 0x2e): /* vucomish xmm/m16,xmm */ case X86EMUL_OPC_EVEX(5, 0x2f): /* vcomish xmm/m16,xmm */ host_and_vcpu_must_have(avx512_fp16); @@ -7787,6 +7799,38 @@ x86_emulate( 2 * evex.w); goto avx512f_all_fp; + case X86EMUL_OPC_EVEX (5, 0x5b): /* vcvtdq2ph [xyz]mm/mem,[xy]mm{k} */ + /* vcvtqq2ph [xyz]mm/mem,xmm{k} */ + case X86EMUL_OPC_EVEX_F2(5, 0x7a): /* vcvtudq2ph [xyz]mm/mem,[xy]mm{k} */ + /* vcvtuqq2ph [xyz]mm/mem,xmm{k} */ + host_and_vcpu_must_have(avx512_fp16); + if ( ea.type != OP_REG || !evex.brs ) + avx512_vlen_check(false); + op_bytes = 16 << evex.lr; + goto simd_zmm; + + case X86EMUL_OPC_EVEX_66(5, 0x5b): /* vcvtph2dq [xy]mm/mem,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_F3(5, 0x5b): /* vcvttph2dq [xy]mm/mem,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX (5, 0x78): /* vcvttph2udq [xy]mm/mem,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX (5, 0x79): /* vcvtph2udq [xy]mm/mem,[xyz]mm{k} */ + host_and_vcpu_must_have(avx512_fp16); + generate_exception_if(evex.w, EXC_UD); + if ( ea.type != OP_REG || !evex.brs ) + avx512_vlen_check(false); + op_bytes = 8 << evex.lr; + goto simd_zmm; + + case X86EMUL_OPC_EVEX_66(5, 0x78): /* vcvttph2uqq xmm/mem,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(5, 0x79): /* vcvtph2uqq xmm/mem,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(5, 0x7a): /* vcvttph2qq xmm/mem,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(5, 0x7b): /* vcvtph2qq xmm/mem,[xyz]mm{k} */ + host_and_vcpu_must_have(avx512_fp16); + generate_exception_if(evex.w, EXC_UD); + if ( ea.type != OP_REG || !evex.brs ) + avx512_vlen_check(false); + op_bytes = 4 << (evex.w + evex.lr); + goto simd_zmm; + case X86EMUL_OPC_EVEX (5, 0x7c): /* vcvttph2uw [xyz]mm/mem,[xyz]mm{k} */ case X86EMUL_OPC_EVEX_66(5, 0x7c): /* vcvttph2w [xyz]mm/mem,[xyz]mm{k} */ case X86EMUL_OPC_EVEX (5, 0x7d): /* vcvtph2uw [xyz]mm/mem,[xyz]mm{k} */
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |