[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [xen master] x86emul: handle AVX512-FP16 fma-like insns
commit 67d458c84a93413e52662d00f1c5434f3c58e088 Author: Jan Beulich <jbeulich@xxxxxxxx> AuthorDate: Mon Jun 5 14:57:47 2023 +0200 Commit: Jan Beulich <jbeulich@xxxxxxxx> CommitDate: Mon Jun 5 14:57:47 2023 +0200 x86emul: handle AVX512-FP16 fma-like insns The Map6 encoding space is a very sparse clone of the "0f38" one. Once again re-use that table, as the entries corresponding to invalid opcodes in Map6 are simply benign with simd_size forced to other than simd_none (preventing undue memory reads in SrcMem handling early in x86_emulate()). Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> Acked-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> --- tools/tests/x86_emulator/evex-disp8.c | 30 ++++++++++++++++++++++++ tools/tests/x86_emulator/predicates.c | 32 +++++++++++++++++++++++++ xen/arch/x86/x86_emulate/decode.c | 37 +++++++++++++++++++++++++++++ xen/arch/x86/x86_emulate/private.h | 2 ++ xen/arch/x86/x86_emulate/x86_emulate.c | 43 ++++++++++++++++++++++++++++++++++ xen/arch/x86/x86_emulate/x86_emulate.h | 1 + 6 files changed, 145 insertions(+) diff --git a/tools/tests/x86_emulator/evex-disp8.c b/tools/tests/x86_emulator/evex-disp8.c index d0159199f8..de4daddc5f 100644 --- a/tools/tests/x86_emulator/evex-disp8.c +++ b/tools/tests/x86_emulator/evex-disp8.c @@ -614,6 +614,36 @@ static const struct test avx512_fp16_all[] = { INSN(comish, , map5, 2f, el, fp16, el), INSN(divph, , map5, 5e, vl, fp16, vl), INSN(divsh, f3, map5, 5e, el, fp16, el), + INSN(fmadd132ph, 66, map6, 98, vl, fp16, vl), + INSN(fmadd132sh, 66, map6, 99, el, fp16, el), + INSN(fmadd213ph, 66, map6, a8, vl, fp16, vl), + INSN(fmadd213sh, 66, map6, a9, el, fp16, el), + INSN(fmadd231ph, 66, map6, b8, vl, fp16, vl), + INSN(fmadd231sh, 66, map6, b9, el, fp16, el), + INSN(fmaddsub132ph, 66, map6, 96, vl, fp16, vl), + INSN(fmaddsub213ph, 66, map6, a6, vl, fp16, vl), + INSN(fmaddsub231ph, 66, map6, b6, vl, fp16, vl), + INSN(fmsub132ph, 66, map6, 9a, vl, fp16, vl), + INSN(fmsub132sh, 66, map6, 9b, el, fp16, el), + INSN(fmsub213ph, 66, map6, aa, vl, fp16, vl), + INSN(fmsub213sh, 66, map6, ab, el, fp16, el), + INSN(fmsub231ph, 66, map6, ba, vl, fp16, vl), + INSN(fmsub231sh, 66, map6, bb, el, fp16, el), + INSN(fmsubadd132ph, 66, map6, 97, vl, fp16, vl), + INSN(fmsubadd213ph, 66, map6, a7, vl, fp16, vl), + INSN(fmsubadd231ph, 66, map6, b7, vl, fp16, vl), + INSN(fnmadd132ph, 66, map6, 9c, vl, fp16, vl), + INSN(fnmadd132sh, 66, map6, 9d, el, fp16, el), + INSN(fnmadd213ph, 66, map6, ac, vl, fp16, vl), + INSN(fnmadd213sh, 66, map6, ad, el, fp16, el), + INSN(fnmadd231ph, 66, map6, bc, vl, fp16, vl), + INSN(fnmadd231sh, 66, map6, bd, el, fp16, el), + INSN(fnmsub132ph, 66, map6, 9e, vl, fp16, vl), + INSN(fnmsub132sh, 66, map6, 9f, el, fp16, el), + INSN(fnmsub213ph, 66, map6, ae, vl, fp16, vl), + INSN(fnmsub213sh, 66, map6, af, el, fp16, el), + INSN(fnmsub231ph, 66, map6, be, vl, fp16, vl), + INSN(fnmsub231sh, 66, map6, bf, el, fp16, el), INSN(fpclassph, , 0f3a, 66, vl, fp16, vl), INSN(fpclasssh, , 0f3a, 67, el, fp16, el), INSN(getmantph, , 0f3a, 26, vl, fp16, vl), diff --git a/tools/tests/x86_emulator/predicates.c b/tools/tests/x86_emulator/predicates.c index 9a44195458..2fff04635e 100644 --- a/tools/tests/x86_emulator/predicates.c +++ b/tools/tests/x86_emulator/predicates.c @@ -2065,6 +2065,37 @@ static const struct evex { { { 0x5f }, 2, T, R, pfx_f3, W0, LIG }, /* vmaxsh */ { { 0x6e }, 2, T, R, pfx_66, WIG, L0 }, /* vmovw */ { { 0x7e }, 2, T, W, pfx_66, WIG, L0 }, /* vmovw */ +}, evex_map6[] = { + { { 0x96 }, 2, T, R, pfx_66, W0, Ln }, /* vfmaddsub132ph */ + { { 0x97 }, 2, T, R, pfx_66, W0, Ln }, /* vfmsubadd132ph */ + { { 0x98 }, 2, T, R, pfx_66, W0, Ln }, /* vfmadd132ph */ + { { 0x99 }, 2, T, R, pfx_66, W0, LIG }, /* vfmadd132sh */ + { { 0x9a }, 2, T, R, pfx_66, W0, Ln }, /* vfmsub132ph */ + { { 0x9b }, 2, T, R, pfx_66, W0, LIG }, /* vfmsub132sh */ + { { 0x9c }, 2, T, R, pfx_66, W0, Ln }, /* vfnmadd132ph */ + { { 0x9d }, 2, T, R, pfx_66, W0, LIG }, /* vfnmadd132sh */ + { { 0x9e }, 2, T, R, pfx_66, W0, Ln }, /* vfnmsub132ph */ + { { 0x9f }, 2, T, R, pfx_66, W0, LIG }, /* vfnmsub132sh */ + { { 0xa6 }, 2, T, R, pfx_66, W0, Ln }, /* vfmaddsub213ph */ + { { 0xa7 }, 2, T, R, pfx_66, W0, Ln }, /* vfmsubadd213ph */ + { { 0xa8 }, 2, T, R, pfx_66, W0, Ln }, /* vfmadd213ph */ + { { 0xa9 }, 2, T, R, pfx_66, W0, LIG }, /* vfmadd213sh */ + { { 0xaa }, 2, T, R, pfx_66, W0, Ln }, /* vfmsub213ph */ + { { 0xab }, 2, T, R, pfx_66, W0, LIG }, /* vfmsub213sh */ + { { 0xac }, 2, T, R, pfx_66, W0, Ln }, /* vfnmadd213ph */ + { { 0xad }, 2, T, R, pfx_66, W0, LIG }, /* vfnmadd213sh */ + { { 0xae }, 2, T, R, pfx_66, W0, Ln }, /* vfnmsub213ph */ + { { 0xaf }, 2, T, R, pfx_66, W0, LIG }, /* vfnmsub213sh */ + { { 0xb6 }, 2, T, R, pfx_66, W0, Ln }, /* vfmaddsub231ph */ + { { 0xb7 }, 2, T, R, pfx_66, W0, Ln }, /* vfmsubadd231ph */ + { { 0xb8 }, 2, T, R, pfx_66, W0, Ln }, /* vfmadd231ph */ + { { 0xb9 }, 2, T, R, pfx_66, W0, LIG }, /* vfmadd231sh */ + { { 0xba }, 2, T, R, pfx_66, W0, Ln }, /* vfmsub231ph */ + { { 0xbb }, 2, T, R, pfx_66, W0, LIG }, /* vfmsub231sh */ + { { 0xbc }, 2, T, R, pfx_66, W0, Ln }, /* vfnmadd231ph */ + { { 0xbd }, 2, T, R, pfx_66, W0, LIG }, /* vfnmadd231sh */ + { { 0xbe }, 2, T, R, pfx_66, W0, Ln }, /* vfnmsub231ph */ + { { 0xbf }, 2, T, R, pfx_66, W0, LIG }, /* vfnmsub231sh */ }; static const struct { @@ -2076,6 +2107,7 @@ static const struct { { evex_0f3a, ARRAY_SIZE(evex_0f3a) }, { NULL, 0 }, { evex_map5, ARRAY_SIZE(evex_map5) }, + { evex_map6, ARRAY_SIZE(evex_map6) }, }; #undef Wn diff --git a/xen/arch/x86/x86_emulate/decode.c b/xen/arch/x86/x86_emulate/decode.c index b0a70340c9..9f68ac299d 100644 --- a/xen/arch/x86/x86_emulate/decode.c +++ b/xen/arch/x86/x86_emulate/decode.c @@ -1224,6 +1224,20 @@ int x86emul_decode(struct x86_emulate_state *s, d = twobyte_table[b].desc; s->simd_size = twobyte_table[b].size ?: simd_other; break; + + case evex_map6: + if ( !evex_encoded() ) + { + rc = X86EMUL_UNRECOGNIZED; + goto done; + } + opcode |= MASK_INSR(6, X86EMUL_OPC_EXT_MASK); + /* + * Re-use twobyte_table[]'s 0x38 entry here, for the + * similarity of the 0F38 entries with map 6. + */ + d = twobyte_table[0x38].desc; + break; } } else if ( s->ext < ext_8f08 + ARRAY_SIZE(xop_table) ) @@ -1473,6 +1487,28 @@ int x86emul_decode(struct x86_emulate_state *s, disp8scale = decode_disp8scale(twobyte_table[b].d8s, s); break; + case ext_map6: + /* + * Re-use ext0f38_table[] here, for the similarity of the entries + * valid in map 6. + */ + d = ext0f38_table[b].to_mem ? DstMem | SrcReg + : DstReg | SrcMem; + if ( ext0f38_table[b].two_op ) + d |= TwoOp; + s->simd_size = ext0f38_table[b].simd_size ?: simd_other; + + switch ( b ) + { + default: + if ( s->evex.pfx == vex_66 ) + s->fp16 = true; + break; + } + + disp8scale = decode_disp8scale(ext0f38_table[b].d8s, s); + break; + case ext_8f09: if ( ext8f09_table[b].two_op ) d |= TwoOp; @@ -1692,6 +1728,7 @@ int x86emul_decode(struct x86_emulate_state *s, break; case ext_map5: + case ext_map6: case ext_8f09: case ext_8f0a: break; diff --git a/xen/arch/x86/x86_emulate/private.h b/xen/arch/x86/x86_emulate/private.h index 8775c7019f..719dad59cd 100644 --- a/xen/arch/x86/x86_emulate/private.h +++ b/xen/arch/x86/x86_emulate/private.h @@ -184,6 +184,7 @@ enum vex_opcx { vex_0f38, vex_0f3a, evex_map5 = 5, + evex_map6, }; enum vex_pfx { @@ -239,6 +240,7 @@ struct x86_emulate_state { ext_0f38 = vex_0f38, ext_0f3a = vex_0f3a, ext_map5 = evex_map5, + ext_map6 = evex_map6, /* * For XOP use values such that the respective instruction field * can be used without adjustment. diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c index fa3406ee91..8fb695c464 100644 --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -7781,6 +7781,49 @@ x86_emulate( generate_exception_if(evex.w, X86_EXC_UD); goto avx512f_all_fp; + case X86EMUL_OPC_EVEX_66(6, 0x96): /* vfmaddsub132ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(6, 0x97): /* vfmsubadd132ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(6, 0x98): /* vfmadd132ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(6, 0x9a): /* vfmsub132ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(6, 0x9c): /* vfnmadd132ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(6, 0x9e): /* vfnmsub132ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(6, 0xa6): /* vfmaddsub213ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(6, 0xa7): /* vfmsubadd213ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(6, 0xa8): /* vfmadd213ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(6, 0xaa): /* vfmsub213ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(6, 0xac): /* vfnmadd213ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(6, 0xae): /* vfnmsub213ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(6, 0xb6): /* vfmaddsub231ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(6, 0xb7): /* vfmsubadd231ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(6, 0xb8): /* vfmadd231ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(6, 0xba): /* vfmsub231ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(6, 0xbc): /* vfnmadd231ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(6, 0xbe): /* vfnmsub231ph [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ + host_and_vcpu_must_have(avx512_fp16); + generate_exception_if(evex.w, X86_EXC_UD); + if ( ea.type != OP_REG || !evex.brs ) + avx512_vlen_check(false); + goto simd_zmm; + + case X86EMUL_OPC_EVEX_66(6, 0x99): /* vfmadd132sh xmm/m16,xmm,xmm{k} */ + case X86EMUL_OPC_EVEX_66(6, 0x9b): /* vfmsub132sh xmm/m16,xmm,xmm{k} */ + case X86EMUL_OPC_EVEX_66(6, 0x9d): /* vfnmadd132sh xmm/m16,xmm,xmm{k} */ + case X86EMUL_OPC_EVEX_66(6, 0x9f): /* vfnmsub132sh xmm/m16,xmm,xmm{k} */ + case X86EMUL_OPC_EVEX_66(6, 0xa9): /* vfmadd213sh xmm/m16,xmm,xmm{k} */ + case X86EMUL_OPC_EVEX_66(6, 0xab): /* vfmsub213sh xmm/m16,xmm,xmm{k} */ + case X86EMUL_OPC_EVEX_66(6, 0xad): /* vfnmadd213sh xmm/m16,xmm,xmm{k} */ + case X86EMUL_OPC_EVEX_66(6, 0xaf): /* vfnmsub213sh xmm/m16,xmm,xmm{k} */ + case X86EMUL_OPC_EVEX_66(6, 0xb9): /* vfmadd231sh xmm/m16,xmm,xmm{k} */ + case X86EMUL_OPC_EVEX_66(6, 0xbb): /* vfmsub231sh xmm/m16,xmm,xmm{k} */ + case X86EMUL_OPC_EVEX_66(6, 0xbd): /* vfnmadd231sh xmm/m16,xmm,xmm{k} */ + case X86EMUL_OPC_EVEX_66(6, 0xbf): /* vfnmsub231sh xmm/m16,xmm,xmm{k} */ + host_and_vcpu_must_have(avx512_fp16); + generate_exception_if(evex.w || (ea.type != OP_REG && evex.brs), + X86_EXC_UD); + if ( !evex.brs ) + avx512_vlen_check(true); + goto simd_zmm; + case X86EMUL_OPC_XOP(08, 0x85): /* vpmacssww xmm,xmm/m128,xmm,xmm */ case X86EMUL_OPC_XOP(08, 0x86): /* vpmacsswd xmm,xmm/m128,xmm,xmm */ case X86EMUL_OPC_XOP(08, 0x87): /* vpmacssdql xmm,xmm/m128,xmm,xmm */ diff --git a/xen/arch/x86/x86_emulate/x86_emulate.h b/xen/arch/x86/x86_emulate/x86_emulate.h index a0ed6f050b..40d5054fb7 100644 --- a/xen/arch/x86/x86_emulate/x86_emulate.h +++ b/xen/arch/x86/x86_emulate/x86_emulate.h @@ -611,6 +611,7 @@ struct x86_emulate_ctxt * 0x0f38xxxx for 0f38-prefixed opcodes (or their VEX/EVEX equivalents) * 0x0f3axxxx for 0f3a-prefixed opcodes (or their VEX/EVEX equivalents) * 0x5xxxx for Map5 opcodes (EVEX only) + * 0x6xxxx for Map6 opcodes (EVEX only) * 0x8f08xxxx for 8f/8-prefixed XOP opcodes * 0x8f09xxxx for 8f/9-prefixed XOP opcodes * 0x8f0axxxx for 8f/a-prefixed XOP opcodes -- generated by git-patchbot for /home/xen/git/xen.git#master
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |