[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [xen master] x86emul: handle AVX512-FP16 move insns
commit da2e0057c7e6ca3e449ae840839b83798c7d15c7 Author: Jan Beulich <jbeulich@xxxxxxxx> AuthorDate: Mon Jun 5 14:57:10 2023 +0200 Commit: Jan Beulich <jbeulich@xxxxxxxx> CommitDate: Mon Jun 5 14:57:10 2023 +0200 x86emul: handle AVX512-FP16 move insns Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> Acked-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> --- tools/tests/x86_emulator/evex-disp8.c | 8 ++++ tools/tests/x86_emulator/predicates.c | 4 ++ tools/tests/x86_emulator/test_x86_emulator.c | 70 ++++++++++++++++++++++++++++ xen/arch/x86/x86_emulate/decode.c | 11 ++++- xen/arch/x86/x86_emulate/x86_emulate.c | 21 ++++++++- 5 files changed, 112 insertions(+), 2 deletions(-) diff --git a/tools/tests/x86_emulator/evex-disp8.c b/tools/tests/x86_emulator/evex-disp8.c index f526321198..d0159199f8 100644 --- a/tools/tests/x86_emulator/evex-disp8.c +++ b/tools/tests/x86_emulator/evex-disp8.c @@ -622,6 +622,8 @@ static const struct test avx512_fp16_all[] = { INSN(maxsh, f3, map5, 5f, el, fp16, el), INSN(minph, , map5, 5d, vl, fp16, vl), INSN(minsh, f3, map5, 5d, el, fp16, el), + INSN(movsh, f3, map5, 10, el, fp16, el), + INSN(movsh, f3, map5, 11, el, fp16, el), INSN(mulph, , map5, 59, vl, fp16, vl), INSN(mulsh, f3, map5, 59, el, fp16, el), INSN(reduceph, , 0f3a, 56, vl, fp16, vl), @@ -635,6 +637,11 @@ static const struct test avx512_fp16_all[] = { INSN(ucomish, , map5, 2e, el, fp16, el), }; +static const struct test avx512_fp16_128[] = { + INSN(movw, 66, map5, 6e, el, fp16, el), + INSN(movw, 66, map5, 7e, el, fp16, el), +}; + static const struct test gfni_all[] = { INSN(gf2p8affineinvqb, 66, 0f3a, cf, vl, q, vl), INSN(gf2p8affineqb, 66, 0f3a, ce, vl, q, vl), @@ -1039,6 +1046,7 @@ void evex_disp8_test(void *instr, struct x86_emulate_ctxt *ctxt, RUN(avx512_vp2intersect, all); RUN(avx512_vpopcntdq, all); RUN(avx512_fp16, all); + RUN(avx512_fp16, 128); if ( cpu_has_avx512f ) { diff --git a/tools/tests/x86_emulator/predicates.c b/tools/tests/x86_emulator/predicates.c index 746bc2dec1..9a44195458 100644 --- a/tools/tests/x86_emulator/predicates.c +++ b/tools/tests/x86_emulator/predicates.c @@ -2045,6 +2045,8 @@ static const struct evex { { { 0xce }, 3, T, R, pfx_66, W1, Ln }, /* vgf2p8affineqb */ { { 0xcf }, 3, T, R, pfx_66, W1, Ln }, /* vgf2p8affineinvqb */ }, evex_map5[] = { + { { 0x10 }, 2, T, R, pfx_f3, W0, LIG }, /* vmovsh */ + { { 0x11 }, 2, T, W, pfx_f3, W0, LIG }, /* vmovsh */ { { 0x2e }, 2, T, R, pfx_no, W0, LIG }, /* vucomish */ { { 0x2f }, 2, T, R, pfx_no, W0, LIG }, /* vcomish */ { { 0x51 }, 2, T, R, pfx_no, W0, Ln }, /* vsqrtph */ @@ -2061,6 +2063,8 @@ static const struct evex { { { 0x5e }, 2, T, R, pfx_f3, W0, LIG }, /* vdivsh */ { { 0x5f }, 2, T, R, pfx_no, W0, Ln }, /* vmaxph */ { { 0x5f }, 2, T, R, pfx_f3, W0, LIG }, /* vmaxsh */ + { { 0x6e }, 2, T, R, pfx_66, WIG, L0 }, /* vmovw */ + { { 0x7e }, 2, T, W, pfx_66, WIG, L0 }, /* vmovw */ }; static const struct { diff --git a/tools/tests/x86_emulator/test_x86_emulator.c b/tools/tests/x86_emulator/test_x86_emulator.c index c0d908cc4f..bbb0d67a3b 100644 --- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -5173,6 +5173,76 @@ int main(int argc, char **argv) else printf("skipped\n"); + printf("%-40s", "Testing vmovsh 8(%ecx),%xmm5..."); + if ( stack_exec && cpu_has_avx512_fp16 ) + { + decl_insn(vmovsh_from_mem); + decl_insn(vmovw_to_gpr); + + asm volatile ( "vpcmpeqw %%ymm5, %%ymm5, %%ymm5\n\t" + put_insn(vmovsh_from_mem, + /* vmovsh 8(%0), %%xmm5 */ + ".byte 0x62, 0xf5, 0x7e, 0x08\n\t" + ".byte 0x10, 0x69, 0x04") + :: "c" (NULL) ); + + set_insn(vmovsh_from_mem); + res[2] = 0x3c00bc00; + regs.ecx = (unsigned long)res; + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || !check_eip(vmovsh_from_mem) ) + goto fail; + asm volatile ( "kmovw %2, %%k1\n\t" + "vmovdqu16 %1, %%zmm4%{%%k1%}%{z%}\n\t" + "vpcmpeqw %%zmm4, %%zmm5, %%k0\n\t" + "kmovw %%k0, %0" + : "=g" (rc) + : "m" (res[2]), "r" (1) ); + if ( rc != 0xffff ) + goto fail; + printf("okay\n"); + + printf("%-40s", "Testing vmovsh %xmm4,2(%eax){%k3}..."); + memset(res, ~0, 8); + res[2] = 0xbc00ffff; + memset(res + 3, ~0, 8); + regs.eax = (unsigned long)res; + regs.ecx = ~0; + for ( i = 0; i < 2; ++i ) + { + decl_insn(vmovsh_to_mem); + + asm volatile ( "kmovw %1, %%k3\n\t" + put_insn(vmovsh_to_mem, + /* vmovsh %%xmm4, 2(%0)%{%%k3%} */ + ".byte 0x62, 0xf5, 0x7e, 0x0b\n\t" + ".byte 0x11, 0x60, 0x01") + :: "a" (NULL), "r" (i) ); + + set_insn(vmovsh_to_mem); + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || !check_eip(vmovsh_to_mem) || + memcmp(res, res + 3 - i, 8) ) + goto fail; + } + printf("okay\n"); + + printf("%-40s", "Testing vmovw %xmm5,%ecx..."); + asm volatile ( put_insn(vmovw_to_gpr, + /* vmovw %%xmm5, %0 */ + ".byte 0x62, 0xf5, 0x7d, 0x08\n\t" + ".byte 0x7e, 0xe9") + :: "c" (NULL) ); + set_insn(vmovw_to_gpr); + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || !check_eip(vmovw_to_gpr) || + regs.ecx != 0xbc00 ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + printf("%-40s", "Testing invpcid 16(%ecx),%%edx..."); if ( stack_exec ) { diff --git a/xen/arch/x86/x86_emulate/decode.c b/xen/arch/x86/x86_emulate/decode.c index 7d97c9ecb0..b0a70340c9 100644 --- a/xen/arch/x86/x86_emulate/decode.c +++ b/xen/arch/x86/x86_emulate/decode.c @@ -575,7 +575,7 @@ static unsigned int decode_disp8scale(enum disp8scale scale, break; case d8s_dq64: - return 2 + (s->op_bytes == 8); + return 1 + !s->fp16 + (s->op_bytes == 8); } switch ( s->simd_size ) @@ -1458,6 +1458,15 @@ int x86emul_decode(struct x86_emulate_state *s, s->fp16 = true; s->simd_size = simd_none; break; + + case 0x6e: /* vmovw r/m16, xmm */ + d = (d & ~SrcMask) | SrcMem16; + /* fall through */ + case 0x7e: /* vmovw xmm, r/m16 */ + if ( s->evex.pfx == vex_66 ) + s->fp16 = true; + s->simd_size = simd_none; + break; } /* Like above re-use twobyte_table[] here. */ diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c index 09f1f9ace5..fa3406ee91 100644 --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -4357,6 +4357,15 @@ x86_emulate( #ifndef X86EMUL_NO_SIMD + case X86EMUL_OPC_EVEX_66(5, 0x7e): /* vmovw xmm,r/m16 */ + ASSERT(dst.bytes >= 4); + if ( dst.type == OP_MEM ) + dst.bytes = 2; + /* fall through */ + case X86EMUL_OPC_EVEX_66(5, 0x6e): /* vmovw r/m16,xmm */ + host_and_vcpu_must_have(avx512_fp16); + generate_exception_if(evex.w, X86_EXC_UD); + /* fall through */ case X86EMUL_OPC_EVEX_66(0x0f, 0x6e): /* vmov{d,q} r/m,xmm */ case X86EMUL_OPC_EVEX_66(0x0f, 0x7e): /* vmov{d,q} xmm,r/m */ generate_exception_if((evex.lr || evex.opmsk || evex.brs || @@ -7748,8 +7757,18 @@ x86_emulate( #ifndef X86EMUL_NO_SIMD + case X86EMUL_OPC_EVEX_F3(5, 0x10): /* vmovsh m16,xmm{k} */ + /* vmovsh xmm,xmm,xmm{k} */ + case X86EMUL_OPC_EVEX_F3(5, 0x11): /* vmovsh xmm,m16{k} */ + /* vmovsh xmm,xmm,xmm{k} */ + generate_exception_if(evex.brs, X86_EXC_UD); + if ( ea.type == OP_MEM ) + d |= TwoOp; + else + { case X86EMUL_OPC_EVEX_F3(5, 0x51): /* vsqrtsh xmm/m16,xmm,xmm{k} */ - d &= ~TwoOp; + d &= ~TwoOp; + } /* fall through */ case X86EMUL_OPC_EVEX(5, 0x51): /* vsqrtph [xyz]mm/mem,[xyz]mm{k} */ CASE_SIMD_SINGLE_FP(_EVEX, 5, 0x58): /* vadd{p,s}h [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ -- generated by git-patchbot for /home/xen/git/xen.git#master
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |