|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH v4 09/16] x86emul: support AVX10.2 scalar compare insns
Simply clone code from their V{,U}COMIS{S,D,H} counterparts.
While there drop a redundant EVEX.W check from V{,U}COMISH handling.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
SDE: -dmr / -future
---
v4: Update encodings to latest spec version. Series re-ordering
adjustments.
v3: New.
--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -81,6 +81,7 @@ enum esz {
ESZ_w,
ESZ_bw,
ESZ_fp16,
+#define ESZ_bf16 ESZ_fp16
};
#ifndef __i386__
@@ -720,6 +721,14 @@ static const struct test vpclmulqdq_all[
INSN(pclmulqdq, 66, 0f3a, 44, vl, q_nb, vl)
};
+static const struct test avx10_2_all[] = {
+ INSN(comisbf16, 66, map5, 2f, el, bf16, el),
+ INSN_SFP(comx, 0f, 2f),
+ INSN(comxsh, f3, map5, 2f, el, fp16, el),
+ INSN_SFP(ucomx, 0f, 2e),
+ INSN(ucomxsh, f3, map5, 2e, el, fp16, el),
+};
+
static const struct test movrs_all[] = {
INSN(movrsb, f2, map5, 6f, vl, b, vl),
INSN(movrsd, f3, map5, 6f, vl, d_nb, vl),
@@ -1154,4 +1163,6 @@ void evex_disp8_test(void *instr, struct
run(ctxt->addr_size == 64 && cpu_has_movrs, movrs, all);
run(cpu_has_sm4, sm4, all);
}
+
+ run(cpu_has_avx10_2, avx10_2, all);
}
--- a/tools/tests/x86_emulator/predicates.c
+++ b/tools/tests/x86_emulator/predicates.c
@@ -1682,8 +1682,12 @@ static const struct evex {
{ { 0x2d }, 2, T, R, pfx_f2, Wn, LIG }, /* vcvtsd2si */
{ { 0x2e }, 2, T, R, pfx_no, W0, LIG }, /* vucomiss */
{ { 0x2e }, 2, T, R, pfx_66, W1, LIG }, /* vucomisd */
+ { { 0x2e }, 2, T, R, pfx_f2, W1, LIG }, /* vucomxsd */
+ { { 0x2e }, 2, T, R, pfx_f3, W0, LIG }, /* vucomxss */
{ { 0x2f }, 2, T, R, pfx_no, W0, LIG }, /* vcomiss */
{ { 0x2f }, 2, T, R, pfx_66, W1, LIG }, /* vcomisd */
+ { { 0x2f }, 2, T, R, pfx_f2, W1, LIG }, /* vcomxsd */
+ { { 0x2f }, 2, T, R, pfx_f3, W0, LIG }, /* vcomxss */
{ { 0x51 }, 2, T, R, pfx_no, W0, Ln }, /* vsqrtps */
{ { 0x51 }, 2, T, R, pfx_66, W1, Ln }, /* vsqrtpd */
{ { 0x51 }, 2, T, R, pfx_f3, W0, LIG }, /* vsqrtss */
@@ -2102,7 +2106,10 @@ static const struct evex {
{ { 0x2c }, 2, T, R, pfx_f3, Wn, LIG }, /* vcvttsh2si */
{ { 0x2d }, 2, T, R, pfx_f3, Wn, LIG }, /* vcvtsh2si */
{ { 0x2e }, 2, T, R, pfx_no, W0, LIG }, /* vucomish */
+ { { 0x2e }, 2, T, R, pfx_f3, W0, LIG }, /* vucomxsh */
{ { 0x2f }, 2, T, R, pfx_no, W0, LIG }, /* vcomish */
+ { { 0x2f }, 2, T, R, pfx_66, W0, LIG }, /* vcomisbf16 */
+ { { 0x2f }, 2, T, R, pfx_f3, W0, LIG }, /* vcomxsh */
{ { 0x51 }, 2, T, R, pfx_no, W0, Ln }, /* vsqrtph */
{ { 0x51 }, 2, T, R, pfx_f3, W0, LIG }, /* vsqrtsh */
{ { 0x58 }, 2, T, R, pfx_no, W0, Ln }, /* vaddph */
--- a/tools/tests/x86_emulator/x86-emulate.h
+++ b/tools/tests/x86_emulator/x86-emulate.h
@@ -215,6 +215,8 @@ void wrpkru(unsigned int val);
#define cpu_has_avx_vnni_int16 (cpu_policy.feat.avx_vnni_int16 && \
xcr0_mask(6))
#define cpu_has_avx10 (cpu_policy.feat.avx10 && xcr0_mask(0xe6))
+#define cpu_has_avx10_2 (cpu_policy.avx10.version >= 2 && \
+ xcr0_mask(0xe6))
#define cpu_has_xgetbv1 (cpu_has_xsave &&
cpu_policy.xstate.xgetbv1)
--- a/xen/arch/x86/x86_emulate/decode.c
+++ b/xen/arch/x86/x86_emulate/decode.c
@@ -1515,9 +1515,8 @@ int x86emul_decode(struct x86_emulate_st
s->fp16 = true;
break;
- case 0x2e: case 0x2f: /* v{,u}comish */
- if ( !s->evex.pfx )
- s->fp16 = true;
+ case 0x2e: case 0x2f: /* v{,u}com{i,x}sh, vcomisbf16 */
+ s->fp16 = true;
s->simd_size = simd_none;
break;
--- a/xen/arch/x86/x86_emulate/private.h
+++ b/xen/arch/x86/x86_emulate/private.h
@@ -318,7 +318,7 @@ struct x86_emulate_state {
bool lock_prefix;
bool not_64bit; /* Instruction not available in 64bit. */
bool fpu_ctrl; /* Instruction is an FPU control one. */
- bool fp16; /* Instruction has half-precision FP source operand. */
+ bool fp16; /* Instruction has half-precision FP or BF16 source. */
opcode_desc_t desc;
union vex vex;
union evex evex;
@@ -609,10 +609,10 @@ amd_like(const struct x86_emulate_ctxt *
#define vcpu_has_avx_vnni_int16() (ctxt->cpuid->feat.avx_vnni_int16)
#define vcpu_has_user_msr() (ctxt->cpuid->feat.user_msr)
-#define vcpu_has_avx10() (ctxt->cpuid->feat.avx10)
+#define vcpu_has_avx10(minor) (ctxt->cpuid->avx10.version >= (minor))
-#define vcpu_must_have(feat) \
- generate_exception_if(!vcpu_has_##feat(), X86_EXC_UD)
+#define vcpu_must_have(feat, ...) \
+ generate_exception_if(!vcpu_has_##feat(__VA_ARGS__), X86_EXC_UD)
#ifdef __XEN__
/*
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -3825,7 +3825,6 @@ x86_emulate(
case X86EMUL_OPC_EVEX(5, 0x2e): /* vucomish xmm/m16,xmm */
case X86EMUL_OPC_EVEX(5, 0x2f): /* vcomish xmm/m16,xmm */
visa_check(_fp16);
- generate_exception_if(evex.w, X86_EXC_UD);
/* fall through */
CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x2e): /* vucomis{s,d} xmm/mem,xmm */
CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x2f): /* vcomis{s,d} xmm/mem,xmm */
@@ -3834,6 +3833,7 @@ x86_emulate(
evex.w != evex.pfx),
X86_EXC_UD);
visa_check(f);
+ vcomi_evex:
if ( !evex.brs )
avx512_vlen_check(true);
get_fpu(X86EMUL_FPU_zmm);
@@ -3842,6 +3842,17 @@ x86_emulate(
op_bytes = 2 << (!state->fp16 + evex.w);
goto vcomi;
+ CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2e): /* vucomxs{s,d} xmm/mem,xmm */
+ CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2f): /* vcomxs{s,d} xmm/mem,xmm */
+ case X86EMUL_OPC_EVEX_F3(5, 0x2e): /* vucomxsh xmm/m16,xmm */
+ case X86EMUL_OPC_EVEX_66(5, 0x2f): /* vcomisbf16 xmm/m16,xmm */
+ case X86EMUL_OPC_EVEX_F3(5, 0x2f): /* vcomxsh xmm/m16,xmm */
+ generate_exception_if((evex.reg != 0xf || !evex.RX || evex.opmsk ||
+ evex.w != (evex.pfx == vex_f2)),
+ X86_EXC_UD);
+ vcpu_must_have(avx10, 2);
+ goto vcomi_evex;
+
#endif
case X86EMUL_OPC(0x0f, 0x30): /* wrmsr */
@@ -6831,7 +6842,7 @@ x86_emulate(
case X86EMUL_OPC_EVEX_F3(0x0f38, 0xda): /* vsm4key4
[xyz]mm/mem,[xyz]mm,[xyz]mm */
case X86EMUL_OPC_EVEX_F2(0x0f38, 0xda): /* vsm4rnds4
[xyz]mm/mem,[xyz]mm,[xyz]mm */
- vcpu_must_have(avx10);
+ vcpu_must_have(avx10, 1);
vcpu_must_have(sm4);
generate_exception_if(evex.w || evex.brs || evex.opmsk, X86_EXC_UD);
avx512_vlen_check(false);
@@ -7905,7 +7916,7 @@ x86_emulate(
fallthrough;
case X86EMUL_OPC_EVEX_F3(5, 0x6f): /* vmovrs{d,q} mem,[xyz]mm{k} */
generate_exception_if(ea.type != OP_MEM || evex.brs, X86_EXC_UD);
- vcpu_must_have(avx10);
+ vcpu_must_have(avx10, 1);
vcpu_must_have(movrs);
avx512_vlen_check(false);
op_bytes = 16 << evex.lr;
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |