|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [xen master] x86emul: handle AVX512-FP16 Map5 arithmetic insns
commit 6b4d5c26a6234943d8b7307df3e66f997c6fe74e
Author: Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Mon Jun 5 14:56:25 2023 +0200
Commit: Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Mon Jun 5 14:56:25 2023 +0200
x86emul: handle AVX512-FP16 Map5 arithmetic insns
This encoding space is a very sparse clone of the "twobyte" one. Re-use
that table, as the entries corresponding to invalid opcodes in Map5 are
simply benign with simd_size forced to other than simd_none (preventing
undue memory reads in SrcMem handling early in x86_emulate()).
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
tools/tests/x86_emulator/evex-disp8.c | 26 +++++++++++++++++++++---
tools/tests/x86_emulator/predicates.c | 19 +++++++++++++++++
xen/arch/x86/x86_emulate/decode.c | 37 ++++++++++++++++++++++++++++++++--
xen/arch/x86/x86_emulate/private.h | 6 ++++--
xen/arch/x86/x86_emulate/x86_emulate.c | 25 ++++++++++++++++++++++-
xen/arch/x86/x86_emulate/x86_emulate.h | 1 +
6 files changed, 106 insertions(+), 8 deletions(-)
diff --git a/tools/tests/x86_emulator/evex-disp8.c
b/tools/tests/x86_emulator/evex-disp8.c
index 63ed0fca40..f526321198 100644
--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -6,7 +6,7 @@
struct test {
const char *mnemonic;
unsigned int opc:8;
- unsigned int spc:2;
+ unsigned int spc:3;
unsigned int pfx:2;
unsigned int vsz:3;
unsigned int esz:4;
@@ -19,6 +19,10 @@ enum spc {
SPC_0f,
SPC_0f38,
SPC_0f3a,
+ SPC_unused4,
+ SPC_map5,
+ SPC_map6,
+ SPC_unused7,
};
enum pfx {
@@ -603,16 +607,32 @@ static const struct test avx512_vpopcntdq_all[] = {
};
static const struct test avx512_fp16_all[] = {
+ INSN(addph, , map5, 58, vl, fp16, vl),
+ INSN(addsh, f3, map5, 58, el, fp16, el),
INSN(cmpph, , 0f3a, c2, vl, fp16, vl),
INSN(cmpsh, f3, 0f3a, c2, el, fp16, el),
+ INSN(comish, , map5, 2f, el, fp16, el),
+ INSN(divph, , map5, 5e, vl, fp16, vl),
+ INSN(divsh, f3, map5, 5e, el, fp16, el),
INSN(fpclassph, , 0f3a, 66, vl, fp16, vl),
INSN(fpclasssh, , 0f3a, 67, el, fp16, el),
INSN(getmantph, , 0f3a, 26, vl, fp16, vl),
INSN(getmantsh, , 0f3a, 27, el, fp16, el),
+ INSN(maxph, , map5, 5f, vl, fp16, vl),
+ INSN(maxsh, f3, map5, 5f, el, fp16, el),
+ INSN(minph, , map5, 5d, vl, fp16, vl),
+ INSN(minsh, f3, map5, 5d, el, fp16, el),
+ INSN(mulph, , map5, 59, vl, fp16, vl),
+ INSN(mulsh, f3, map5, 59, el, fp16, el),
INSN(reduceph, , 0f3a, 56, vl, fp16, vl),
INSN(reducesh, , 0f3a, 57, el, fp16, el),
INSN(rndscaleph, , 0f3a, 08, vl, fp16, vl),
INSN(rndscalesh, , 0f3a, 0a, el, fp16, el),
+ INSN(sqrtph, , map5, 51, vl, fp16, vl),
+ INSN(sqrtsh, f3, map5, 51, el, fp16, el),
+ INSN(subph, , map5, 5c, vl, fp16, vl),
+ INSN(subsh, f3, map5, 5c, el, fp16, el),
+ INSN(ucomish, , map5, 2e, el, fp16, el),
};
static const struct test gfni_all[] = {
@@ -713,8 +733,8 @@ static void test_one(const struct test *test, enum vl vl,
union evex {
uint8_t raw[3];
struct {
- uint8_t opcx:2;
- uint8_t mbz:2;
+ uint8_t opcx:3;
+ uint8_t mbz:1;
uint8_t R:1;
uint8_t b:1;
uint8_t x:1;
diff --git a/tools/tests/x86_emulator/predicates.c
b/tools/tests/x86_emulator/predicates.c
index 50b5c75cbf..746bc2dec1 100644
--- a/tools/tests/x86_emulator/predicates.c
+++ b/tools/tests/x86_emulator/predicates.c
@@ -2044,6 +2044,23 @@ static const struct evex {
{ { 0xc2 }, 3, T, R, pfx_f3, W0, LIG }, /* vcmpsh */
{ { 0xce }, 3, T, R, pfx_66, W1, Ln }, /* vgf2p8affineqb */
{ { 0xcf }, 3, T, R, pfx_66, W1, Ln }, /* vgf2p8affineinvqb */
+}, evex_map5[] = {
+ { { 0x2e }, 2, T, R, pfx_no, W0, LIG }, /* vucomish */
+ { { 0x2f }, 2, T, R, pfx_no, W0, LIG }, /* vcomish */
+ { { 0x51 }, 2, T, R, pfx_no, W0, Ln }, /* vsqrtph */
+ { { 0x51 }, 2, T, R, pfx_f3, W0, LIG }, /* vsqrtsh */
+ { { 0x58 }, 2, T, R, pfx_no, W0, Ln }, /* vaddph */
+ { { 0x58 }, 2, T, R, pfx_f3, W0, LIG }, /* vaddsh */
+ { { 0x59 }, 2, T, R, pfx_no, W0, Ln }, /* vmulph */
+ { { 0x59 }, 2, T, R, pfx_f3, W0, LIG }, /* vmulsh */
+ { { 0x5c }, 2, T, R, pfx_no, W0, Ln }, /* vsubph */
+ { { 0x5c }, 2, T, R, pfx_f3, W0, LIG }, /* vsubsh */
+ { { 0x5d }, 2, T, R, pfx_no, W0, Ln }, /* vminph */
+ { { 0x5d }, 2, T, R, pfx_f3, W0, LIG }, /* vminsh */
+ { { 0x5e }, 2, T, R, pfx_no, W0, Ln }, /* vdivph */
+ { { 0x5e }, 2, T, R, pfx_f3, W0, LIG }, /* vdivsh */
+ { { 0x5f }, 2, T, R, pfx_no, W0, Ln }, /* vmaxph */
+ { { 0x5f }, 2, T, R, pfx_f3, W0, LIG }, /* vmaxsh */
};
static const struct {
@@ -2053,6 +2070,8 @@ static const struct {
{ evex_0f, ARRAY_SIZE(evex_0f) },
{ evex_0f38, ARRAY_SIZE(evex_0f38) },
{ evex_0f3a, ARRAY_SIZE(evex_0f3a) },
+ { NULL, 0 },
+ { evex_map5, ARRAY_SIZE(evex_map5) },
};
#undef Wn
diff --git a/xen/arch/x86/x86_emulate/decode.c
b/xen/arch/x86/x86_emulate/decode.c
index 13d71bd7e1..7d97c9ecb0 100644
--- a/xen/arch/x86/x86_emulate/decode.c
+++ b/xen/arch/x86/x86_emulate/decode.c
@@ -1208,9 +1208,22 @@ int x86emul_decode(struct x86_emulate_state *s,
opcode |= MASK_INSR(0x0f3a, X86EMUL_OPC_EXT_MASK);
d = twobyte_table[0x3a].desc;
break;
+
+ case evex_map5:
+ if ( !evex_encoded() )
+ {
default:
- rc = X86EMUL_UNRECOGNIZED;
- goto done;
+ rc = X86EMUL_UNRECOGNIZED;
+ goto done;
+ }
+ opcode |= MASK_INSR(5, X86EMUL_OPC_EXT_MASK);
+ /*
+ * Re-use twobyte_table[] here, for the similarity of
+ * the entries valid in map 5.
+ */
+ d = twobyte_table[b].desc;
+ s->simd_size = twobyte_table[b].size ?: simd_other;
+ break;
}
}
else if ( s->ext < ext_8f08 + ARRAY_SIZE(xop_table) )
@@ -1432,6 +1445,25 @@ int x86emul_decode(struct x86_emulate_state *s,
}
break;
+ case ext_map5:
+ switch ( b )
+ {
+ default:
+ if ( !(s->evex.pfx & VEX_PREFIX_DOUBLE_MASK) )
+ s->fp16 = true;
+ break;
+
+ case 0x2e: case 0x2f: /* v{,u}comish */
+ if ( !s->evex.pfx )
+ s->fp16 = true;
+ s->simd_size = simd_none;
+ break;
+ }
+
+ /* Like above re-use twobyte_table[] here. */
+ disp8scale = decode_disp8scale(twobyte_table[b].d8s, s);
+ break;
+
case ext_8f09:
if ( ext8f09_table[b].two_op )
d |= TwoOp;
@@ -1650,6 +1682,7 @@ int x86emul_decode(struct x86_emulate_state *s,
s->simd_size = ext8f08_table[b].simd_size;
break;
+ case ext_map5:
case ext_8f09:
case ext_8f0a:
break;
diff --git a/xen/arch/x86/x86_emulate/private.h
b/xen/arch/x86/x86_emulate/private.h
index 3644b5fbee..8775c7019f 100644
--- a/xen/arch/x86/x86_emulate/private.h
+++ b/xen/arch/x86/x86_emulate/private.h
@@ -183,6 +183,7 @@ enum vex_opcx {
vex_0f = vex_none + 1,
vex_0f38,
vex_0f3a,
+ evex_map5 = 5,
};
enum vex_pfx {
@@ -211,8 +212,8 @@ union vex {
union evex {
uint8_t raw[3];
struct { /* SDM names */
- uint8_t opcx:2; /* mm */
- uint8_t mbz:2;
+ uint8_t opcx:3; /* mmm */
+ uint8_t mbz:1;
uint8_t R:1; /* R' */
uint8_t b:1; /* B */
uint8_t x:1; /* X */
@@ -237,6 +238,7 @@ struct x86_emulate_state {
ext_0f = vex_0f,
ext_0f38 = vex_0f38,
ext_0f3a = vex_0f3a,
+ ext_map5 = evex_map5,
/*
* For XOP use values such that the respective instruction field
* can be used without adjustment.
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c
b/xen/arch/x86/x86_emulate/x86_emulate.c
index 5f1c439346..09f1f9ace5 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -3723,6 +3723,13 @@ x86_emulate(
ASSERT(!state->simd_size);
break;
+#ifndef X86EMUL_NO_SIMD
+
+ case X86EMUL_OPC_EVEX(5, 0x2e): /* vucomish xmm/m16,xmm */
+ case X86EMUL_OPC_EVEX(5, 0x2f): /* vcomish xmm/m16,xmm */
+ host_and_vcpu_must_have(avx512_fp16);
+ generate_exception_if(evex.w, X86_EXC_UD);
+ /* fall through */
CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x2e): /* vucomis{s,d} xmm/mem,xmm */
CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x2f): /* vcomis{s,d} xmm/mem,xmm */
generate_exception_if((evex.reg != 0xf || !evex.RX || evex.opmsk ||
@@ -3735,9 +3742,11 @@ x86_emulate(
get_fpu(X86EMUL_FPU_zmm);
opc = init_evex(stub);
- op_bytes = 4 << evex.w;
+ op_bytes = 2 << (!state->fp16 + evex.w);
goto vcomi;
+#endif
+
case X86EMUL_OPC(0x0f, 0x30): /* wrmsr */
generate_exception_if(!mode_ring0(), X86_EXC_GP, 0);
fail_if(ops->write_msr == NULL);
@@ -7739,6 +7748,20 @@ x86_emulate(
#ifndef X86EMUL_NO_SIMD
+ case X86EMUL_OPC_EVEX_F3(5, 0x51): /* vsqrtsh xmm/m16,xmm,xmm{k} */
+ d &= ~TwoOp;
+ /* fall through */
+ case X86EMUL_OPC_EVEX(5, 0x51): /* vsqrtph [xyz]mm/mem,[xyz]mm{k} */
+ CASE_SIMD_SINGLE_FP(_EVEX, 5, 0x58): /* vadd{p,s}h
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ CASE_SIMD_SINGLE_FP(_EVEX, 5, 0x59): /* vmul{p,s}h
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ CASE_SIMD_SINGLE_FP(_EVEX, 5, 0x5c): /* vsub{p,s}h
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ CASE_SIMD_SINGLE_FP(_EVEX, 5, 0x5d): /* vmin{p,s}h
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ CASE_SIMD_SINGLE_FP(_EVEX, 5, 0x5e): /* vdiv{p,s}h
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ CASE_SIMD_SINGLE_FP(_EVEX, 5, 0x5f): /* vmax{p,s}h
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ host_and_vcpu_must_have(avx512_fp16);
+ generate_exception_if(evex.w, X86_EXC_UD);
+ goto avx512f_all_fp;
+
case X86EMUL_OPC_XOP(08, 0x85): /* vpmacssww xmm,xmm/m128,xmm,xmm */
case X86EMUL_OPC_XOP(08, 0x86): /* vpmacsswd xmm,xmm/m128,xmm,xmm */
case X86EMUL_OPC_XOP(08, 0x87): /* vpmacssdql xmm,xmm/m128,xmm,xmm */
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.h
b/xen/arch/x86/x86_emulate/x86_emulate.h
index 6afbc877f1..a0ed6f050b 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -610,6 +610,7 @@ struct x86_emulate_ctxt
* 0x0fxxxx for 0f-prefixed opcodes (or their VEX/EVEX equivalents)
* 0x0f38xxxx for 0f38-prefixed opcodes (or their VEX/EVEX equivalents)
* 0x0f3axxxx for 0f3a-prefixed opcodes (or their VEX/EVEX equivalents)
+ * 0x5xxxx for Map5 opcodes (EVEX only)
* 0x8f08xxxx for 8f/8-prefixed XOP opcodes
* 0x8f09xxxx for 8f/9-prefixed XOP opcodes
* 0x8f0axxxx for 8f/a-prefixed XOP opcodes
--
generated by git-patchbot for /home/xen/git/xen.git#master
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |