|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen staging] x86emul: support basic AVX512 moves
commit ae9bfa9bc1a05686e68d4d87908312cc76f45c40
Author: Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Tue Nov 20 15:05:12 2018 +0100
Commit: Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Tue Nov 20 15:05:12 2018 +0100
x86emul: support basic AVX512 moves
Note: SDM Vol 2 rev 067 is not really consistent about EVEX.L'L for LIG
insns - the only place where this is made explicit is a table in
the section titled "Vector Length Orthogonality": While they
tolerate 0, 1, and 2, a value of 3 uniformly leads to #UD.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
tools/tests/x86_emulator/test_x86_emulator.c | 402 +++++++++++++++++++++++++++
tools/tests/x86_emulator/x86-emulate.c | 1 +
tools/tests/x86_emulator/x86-emulate.h | 1 +
xen/arch/x86/x86_emulate/x86_emulate.c | 335 ++++++++++++++++++++--
xen/arch/x86/x86_emulate/x86_emulate.h | 1 +
xen/include/asm-x86/cpufeature.h | 1 +
6 files changed, 720 insertions(+), 21 deletions(-)
diff --git a/tools/tests/x86_emulator/test_x86_emulator.c
b/tools/tests/x86_emulator/test_x86_emulator.c
index ed5a3d8853..a0dd1a2cc9 100644
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -1985,6 +1985,53 @@ int main(int argc, char **argv)
else
printf("skipped\n");
+ printf("%-40s", "Testing {evex} vmovq %xmm1,32(%edx)...");
+ if ( stack_exec && cpu_has_avx512f )
+ {
+ decl_insn(evex_vmovq_to_mem);
+
+ asm volatile ( "pcmpgtb %%xmm1, %%xmm1\n"
+ put_insn(evex_vmovq_to_mem, "%{evex%} vmovq %%xmm1,
32(%0)")
+ :: "d" (NULL) );
+
+ memset(res, 0xdb, 64);
+ set_insn(evex_vmovq_to_mem);
+ regs.ecx = 0;
+ regs.edx = (unsigned long)res;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( rc != X86EMUL_OKAY || !check_eip(evex_vmovq_to_mem) ||
+ *((uint64_t *)res + 4) ||
+ memcmp(res, res + 10, 24) ||
+ memcmp(res, res + 6, 8) )
+ goto fail;
+ printf("okay\n");
+ }
+ else
+ printf("skipped\n");
+
+ printf("%-40s", "Testing {evex} vmovq 32(%edx),%xmm0...");
+ if ( stack_exec && cpu_has_avx512f )
+ {
+ decl_insn(evex_vmovq_from_mem);
+
+ asm volatile ( "pcmpeqb %%xmm0, %%xmm0\n"
+ put_insn(evex_vmovq_from_mem, "%{evex%} vmovq 32(%0),
%%xmm0")
+ :: "d" (NULL) );
+
+ set_insn(evex_vmovq_from_mem);
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( rc != X86EMUL_OKAY || !check_eip(evex_vmovq_from_mem) )
+ goto fail;
+ asm ( "vmovq %1, %%xmm1\n\t"
+ "vpcmpeqq %%zmm0, %%zmm1, %%k0\n"
+ "kmovw %%k0, %0" : "=r" (rc) : "m" (res[8]) );
+ if ( rc != 0xff )
+ goto fail;
+ printf("okay\n");
+ }
+ else
+ printf("skipped\n");
+
printf("%-40s", "Testing movdqu %xmm2,(%ecx)...");
if ( stack_exec && cpu_has_sse2 )
{
@@ -2085,6 +2132,118 @@ int main(int argc, char **argv)
else
printf("skipped\n");
+ printf("%-40s", "Testing vmovdqu32 %zmm2,(%ecx){%k1}...");
+ if ( stack_exec && cpu_has_avx512f )
+ {
+ decl_insn(vmovdqu32_to_mem);
+
+ memset(res, 0x55, 128);
+
+ asm volatile ( "vpcmpeqd %%ymm2, %%ymm2, %%ymm2\n\t"
+ "kmovw %1,%%k1\n"
+ put_insn(vmovdqu32_to_mem,
+ "vmovdqu32 %%zmm2, (%0)%{%%k1%}")
+ :: "c" (NULL), "rm" (res[0]) );
+ set_insn(vmovdqu32_to_mem);
+
+ regs.ecx = (unsigned long)res;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( (rc != X86EMUL_OKAY) || memcmp(res + 16, res + 24, 32) ||
+ !check_eip(vmovdqu32_to_mem) )
+ goto fail;
+
+ res[16] = ~0; res[18] = ~0; res[20] = ~0; res[22] = ~0;
+ res[24] = 0; res[26] = 0; res[28] = 0; res[30] = 0;
+ if ( memcmp(res, res + 16, 64) )
+ goto fail;
+
+ printf("okay\n");
+ }
+ else
+ printf("skipped\n");
+
+ printf("%-40s", "Testing vmovdqu32 64(%edx),%zmm2{%k2}...");
+ if ( stack_exec && cpu_has_avx512f )
+ {
+ decl_insn(vmovdqu32_from_mem);
+
+ asm volatile ( "knotw %%k1, %%k2\n"
+ put_insn(vmovdqu32_from_mem,
+ "vmovdqu32 64(%0), %%zmm2%{%%k2%}")
+ :: "d" (NULL) );
+
+ set_insn(vmovdqu32_from_mem);
+ regs.ecx = 0;
+ regs.edx = (unsigned long)res;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( rc != X86EMUL_OKAY || !check_eip(vmovdqu32_from_mem) )
+ goto fail;
+ asm ( "vpcmpeqd %1, %%zmm2, %%k0\n\t"
+ "kmovw %%k0, %0" : "=r" (rc) : "m" (res[0]) );
+ if ( rc != 0xffff )
+ goto fail;
+ printf("okay\n");
+ }
+ else
+ printf("skipped\n");
+
+ printf("%-40s", "Testing vmovdqu16 %zmm3,(%ecx){%k1}...");
+ if ( stack_exec && cpu_has_avx512bw )
+ {
+ decl_insn(vmovdqu16_to_mem);
+
+ memset(res, 0x55, 128);
+
+ asm volatile ( "vpcmpeqw %%ymm3, %%ymm3, %%ymm3\n\t"
+ "kmovd %1,%%k1\n"
+ put_insn(vmovdqu16_to_mem,
+ "vmovdqu16 %%zmm3, (%0)%{%%k1%}")
+ :: "c" (NULL), "rm" (res[0]) );
+ set_insn(vmovdqu16_to_mem);
+
+ regs.ecx = (unsigned long)res;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( (rc != X86EMUL_OKAY) || memcmp(res + 16, res + 24, 32) ||
+ !check_eip(vmovdqu16_to_mem) )
+ goto fail;
+
+ for ( i = 16; i < 24; ++i )
+ res[i] |= 0x0000ffff;
+ for ( ; i < 32; ++i )
+ res[i] &= 0xffff0000;
+ if ( memcmp(res, res + 16, 64) )
+ goto fail;
+
+ printf("okay\n");
+ }
+ else
+ printf("skipped\n");
+
+ printf("%-40s", "Testing vmovdqu16 64(%edx),%zmm3{%k2}...");
+ if ( stack_exec && cpu_has_avx512bw )
+ {
+ decl_insn(vmovdqu16_from_mem);
+
+ asm volatile ( "knotd %%k1, %%k2\n"
+ put_insn(vmovdqu16_from_mem,
+ "vmovdqu16 64(%0), %%zmm3%{%%k2%}")
+ :: "d" (NULL) );
+
+ set_insn(vmovdqu16_from_mem);
+ regs.ecx = 0;
+ regs.edx = (unsigned long)res;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( rc != X86EMUL_OKAY || !check_eip(vmovdqu16_from_mem) )
+ goto fail;
+ asm ( "vpcmpeqw %1, %%zmm3, %%k0\n\t"
+ "kmovd %%k0, %0" : "=r" (rc) : "m" (res[0]) );
+ if ( rc != 0xffffffff )
+ goto fail;
+ printf("okay\n");
+ }
+ else
+ printf("skipped\n");
+
printf("%-40s", "Testing movsd %xmm5,(%ecx)...");
memset(res, 0x77, 64);
memset(res + 10, 0x66, 8);
@@ -2186,6 +2345,71 @@ int main(int argc, char **argv)
else
printf("skipped\n");
+ printf("%-40s", "Testing vmovsd %xmm5,16(%ecx){%k3}...");
+ memset(res, 0x88, 128);
+ memset(res + 20, 0x77, 8);
+ if ( stack_exec && cpu_has_avx512f )
+ {
+ decl_insn(vmovsd_masked_to_mem);
+
+ asm volatile ( "vbroadcastsd %0, %%ymm5\n\t"
+ "kxorw %%k3, %%k3, %%k3\n"
+ put_insn(vmovsd_masked_to_mem,
+ "vmovsd %%xmm5, 16(%1)%{%%k3%}")
+ :: "m" (res[20]), "c" (NULL) );
+
+ set_insn(vmovsd_masked_to_mem);
+ regs.ecx = 0;
+ regs.edx = 0;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( (rc != X86EMUL_OKAY) || !check_eip(vmovsd_masked_to_mem) )
+ goto fail;
+
+ asm volatile ( "kmovw %0, %%k3\n" :: "m" (res[20]) );
+
+ set_insn(vmovsd_masked_to_mem);
+ regs.ecx = (unsigned long)res;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( (rc != X86EMUL_OKAY) || !check_eip(vmovsd_masked_to_mem) ||
+ memcmp(res, res + 16, 64) )
+ goto fail;
+
+ printf("okay\n");
+ }
+ else
+ {
+ printf("skipped\n");
+ memset(res + 4, 0x77, 8);
+ }
+
+ printf("%-40s", "Testing vmovaps (%edx),%zmm7{%k3}{z}...");
+ if ( stack_exec && cpu_has_avx512f )
+ {
+ decl_insn(vmovaps_masked_from_mem);
+
+ asm volatile ( "vpcmpeqd %%xmm7, %%xmm7, %%xmm7\n\t"
+ "vbroadcastss %%xmm7, %%zmm7\n"
+ put_insn(vmovaps_masked_from_mem,
+ "vmovaps (%0), %%zmm7%{%%k3%}%{z%}")
+ :: "d" (NULL) );
+
+ set_insn(vmovaps_masked_from_mem);
+ regs.edx = (unsigned long)res;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( rc != X86EMUL_OKAY || !check_eip(vmovaps_masked_from_mem) )
+ goto fail;
+ asm ( "vcmpeqps %1, %%zmm7, %%k0\n\t"
+ "vxorps %%xmm0, %%xmm0, %%xmm0\n\t"
+ "vcmpeqps %%zmm0, %%zmm7, %%k1\n\t"
+ "kxorw %%k1, %%k0, %%k0\n\t"
+ "kmovw %%k0, %0" : "=r" (rc) : "m" (res[16]) );
+ if ( rc != 0xffff )
+ goto fail;
+ printf("okay\n");
+ }
+ else
+ printf("skipped\n");
+
printf("%-40s", "Testing movd %mm3,32(%ecx)...");
if ( stack_exec && cpu_has_mmx )
{
@@ -2341,6 +2565,55 @@ int main(int argc, char **argv)
else
printf("skipped\n");
+ printf("%-40s", "Testing {evex} vmovd %xmm3,32(%ecx)...");
+ if ( stack_exec && cpu_has_avx512f )
+ {
+ decl_insn(evex_vmovd_to_mem);
+
+ asm volatile ( "pcmpeqb %%xmm3, %%xmm3\n"
+ put_insn(evex_vmovd_to_mem,
+ "%{evex%} vmovd %%xmm3, 32(%0)")
+ :: "c" (NULL) );
+
+ memset(res, 0xbd, 64);
+ set_insn(evex_vmovd_to_mem);
+ regs.ecx = (unsigned long)res;
+ regs.edx = 0;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( rc != X86EMUL_OKAY || !check_eip(evex_vmovd_to_mem) ||
+ res[8] + 1 ||
+ memcmp(res, res + 9, 28) ||
+ memcmp(res, res + 6, 8) )
+ goto fail;
+ printf("okay\n");
+ }
+ else
+ printf("skipped\n");
+
+ printf("%-40s", "Testing {evex} vmovd 32(%ecx),%xmm4...");
+ if ( stack_exec && cpu_has_avx512f )
+ {
+ decl_insn(evex_vmovd_from_mem);
+
+ asm volatile ( "pcmpeqb %%xmm4, %%xmm4\n"
+ put_insn(evex_vmovd_from_mem,
+ "%{evex%} vmovd 32(%0), %%xmm4")
+ :: "c" (NULL) );
+
+ set_insn(evex_vmovd_from_mem);
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( rc != X86EMUL_OKAY || !check_eip(evex_vmovd_from_mem) )
+ goto fail;
+ asm ( "vmovd %1, %%xmm0\n\t"
+ "vpcmpeqd %%zmm4, %%zmm0, %%k0\n\t"
+ "kmovw %%k0, %0" : "=r" (rc) : "m" (res[8]) );
+ if ( rc != 0xffff )
+ goto fail;
+ printf("okay\n");
+ }
+ else
+ printf("skipped\n");
+
printf("%-40s", "Testing movd %mm3,%ebx...");
if ( stack_exec && cpu_has_mmx )
{
@@ -2507,6 +2780,57 @@ int main(int argc, char **argv)
else
printf("skipped\n");
+ printf("%-40s", "Testing {evex} vmovd %xmm2,%ebx...");
+ if ( stack_exec && cpu_has_avx512f )
+ {
+ decl_insn(evex_vmovd_to_reg);
+
+ /* See comment next to movd above. */
+ asm volatile ( "pcmpeqb %%xmm2, %%xmm2\n"
+ put_insn(evex_vmovd_to_reg,
+ "%{evex%} vmovd %%xmm2, %%ebx")
+ :: );
+
+ set_insn(evex_vmovd_to_reg);
+#ifdef __x86_64__
+ regs.rbx = 0xbdbdbdbdbdbdbdbdUL;
+#else
+ regs.ebx = 0xbdbdbdbdUL;
+#endif
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( (rc != X86EMUL_OKAY) || !check_eip(evex_vmovd_to_reg) ||
+ regs.ebx != 0xffffffff )
+ goto fail;
+ printf("okay\n");
+ }
+ else
+ printf("skipped\n");
+
+ printf("%-40s", "Testing {evex} vmovd %ebx,%xmm1...");
+ if ( stack_exec && cpu_has_avx512f )
+ {
+ decl_insn(evex_vmovd_from_reg);
+
+ /* See comment next to movd above. */
+ asm volatile ( "pcmpgtb %%xmm1, %%xmm1\n"
+ put_insn(evex_vmovd_from_reg,
+ "%{evex%} vmovd %%ebx, %%xmm1")
+ :: );
+
+ set_insn(evex_vmovd_from_reg);
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( (rc != X86EMUL_OKAY) || !check_eip(evex_vmovd_from_reg) )
+ goto fail;
+ asm ( "vmovd %1, %%xmm0\n\t"
+ "vpcmpeqd %%zmm1, %%zmm0, %%k0\n\t"
+ "kmovw %%k0, %0" : "=r" (rc) : "m" (res[8]) );
+ if ( rc != 0xffff )
+ goto fail;
+ printf("okay\n");
+ }
+ else
+ printf("skipped\n");
+
#ifdef __x86_64__
printf("%-40s", "Testing movq %mm3,32(%ecx)...");
if ( stack_exec && cpu_has_mmx )
@@ -2584,6 +2908,36 @@ int main(int argc, char **argv)
else
printf("skipped\n");
+ printf("%-40s", "Testing {evex} vmovq %xmm11,32(%ecx)...");
+ if ( stack_exec && cpu_has_avx512f )
+ {
+ decl_insn(evex_vmovq_to_mem2);
+
+ asm volatile ( "pcmpeqb %%xmm11, %%xmm11\n"
+#if 0 /* This may not work, as the assembler might pick opcode D6. */
+ put_insn(evex_vmovq_to_mem2,
+ "{evex} vmovq %%xmm11, 32(%0)")
+#else
+ put_insn(evex_vmovq_to_mem2,
+ ".byte 0x62, 0xf1, 0xfd, 0x08, 0x7e, 0x49,
0x04")
+#endif
+ :: "c" (NULL) );
+
+ memset(res, 0xbd, 64);
+ set_insn(evex_vmovq_to_mem2);
+ regs.ecx = (unsigned long)res;
+ regs.edx = 0;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( rc != X86EMUL_OKAY || !check_eip(evex_vmovq_to_mem2) ||
+ *((long *)res + 4) + 1 ||
+ memcmp(res, res + 10, 24) ||
+ memcmp(res, res + 6, 8) )
+ goto fail;
+ printf("okay\n");
+ }
+ else
+ printf("skipped\n");
+
printf("%-40s", "Testing movq %mm3,%rbx...");
if ( stack_exec && cpu_has_mmx )
{
@@ -2643,6 +2997,28 @@ int main(int argc, char **argv)
}
else
printf("skipped\n");
+
+ printf("%-40s", "Testing vmovq %xmm22,%rbx...");
+ if ( stack_exec && cpu_has_avx512f )
+ {
+ decl_insn(evex_vmovq_to_reg);
+
+ /* See comment next to movd above. */
+ asm volatile ( "pcmpeqq %%xmm2, %%xmm2\n\t"
+ "vmovq %%xmm2, %%xmm22\n"
+ put_insn(evex_vmovq_to_reg, "vmovq %%xmm22, %%rbx")
+ :: );
+
+ set_insn(evex_vmovq_to_reg);
+ regs.rbx = 0xbdbdbdbdbdbdbdbdUL;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( rc != X86EMUL_OKAY || !check_eip(evex_vmovq_to_reg) ||
+ regs.rbx + 1 )
+ goto fail;
+ printf("okay\n");
+ }
+ else
+ printf("skipped\n");
#endif
printf("%-40s", "Testing maskmovq %mm4,%mm4...");
@@ -2815,6 +3191,32 @@ int main(int argc, char **argv)
else
printf("skipped\n");
+ printf("%-40s", "Testing vmovntdqa 64(%ecx),%zmm4...");
+ if ( stack_exec && cpu_has_avx512f )
+ {
+ decl_insn(evex_vmovntdqa);
+
+ asm volatile ( "vpxor %%xmm4, %%xmm4, %%xmm4\n"
+ put_insn(evex_vmovntdqa, "vmovntdqa 64(%0), %%zmm4")
+ :: "c" (NULL) );
+
+ set_insn(evex_vmovntdqa);
+ memset(res, 0x55, 192);
+ memset(res + 16, 0xff, 64);
+ regs.ecx = (unsigned long)res;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( rc != X86EMUL_OKAY || !check_eip(evex_vmovntdqa) )
+ goto fail;
+ asm ( "vpbroadcastd %1, %%zmm2\n\t"
+ "vpcmpeqd %%zmm4, %%zmm2, %%k0\n\t"
+ "kmovw %%k0, %0" : "=r" (rc) : "0" (~0) );
+ if ( rc != 0xffff )
+ goto fail;
+ printf("okay\n");
+ }
+ else
+ printf("skipped\n");
+
printf("%-40s", "Testing pcmpestri $0x1a,(%ecx),%xmm2...");
if ( stack_exec && cpu_has_sse4_2 )
{
diff --git a/tools/tests/x86_emulator/x86-emulate.c
b/tools/tests/x86_emulator/x86-emulate.c
index a109e93ad3..8132fe7a7c 100644
--- a/tools/tests/x86_emulator/x86-emulate.c
+++ b/tools/tests/x86_emulator/x86-emulate.c
@@ -222,6 +222,7 @@ int emul_test_get_fpu(
if ( cpu_has_avx )
break;
case X86EMUL_FPU_opmask:
+ case X86EMUL_FPU_zmm:
if ( cpu_has_avx512f )
break;
default:
diff --git a/tools/tests/x86_emulator/x86-emulate.h
b/tools/tests/x86_emulator/x86-emulate.h
index 5635361e11..0d18847ea5 100644
--- a/tools/tests/x86_emulator/x86-emulate.h
+++ b/tools/tests/x86_emulator/x86-emulate.h
@@ -132,6 +132,7 @@ static inline bool xcr0_mask(uint64_t mask)
#define cpu_has_avx512f (cp.feat.avx512f && xcr0_mask(0xe6))
#define cpu_has_avx512dq (cp.feat.avx512dq && xcr0_mask(0xe6))
#define cpu_has_avx512bw (cp.feat.avx512bw && xcr0_mask(0xe6))
+#define cpu_has_avx512vl (cp.feat.avx512vl && xcr0_mask(0xe6))
#define cpu_has_xgetbv1 (cpu_has_xsave && cp.xstate.xgetbv1)
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c
b/xen/arch/x86/x86_emulate/x86_emulate.c
index ffd8632479..de6d79b692 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -243,9 +243,27 @@ enum simd_opsize {
};
typedef uint8_t simd_opsize_t;
+enum disp8scale {
+ /* Values 0 ... 4 are explicit sizes. */
+ d8s_bw = 5,
+ d8s_dq,
+ /* EVEX.W ignored outside of 64-bit mode */
+ d8s_dq64,
+ /*
+ * All further values must strictly be last and in the order
+ * given so that arithmetic on the values works.
+ */
+ d8s_vl,
+ d8s_vl_by_2,
+ d8s_vl_by_4,
+ d8s_vl_by_8,
+};
+typedef uint8_t disp8scale_t;
+
static const struct twobyte_table {
opcode_desc_t desc;
- simd_opsize_t size;
+ simd_opsize_t size:4;
+ disp8scale_t d8s:4;
} twobyte_table[256] = {
[0x00] = { ModRM },
[0x01] = { ImplicitOps|ModRM },
@@ -260,8 +278,8 @@ static const struct twobyte_table {
[0x0d] = { ImplicitOps|ModRM },
[0x0e] = { ImplicitOps },
[0x0f] = { ModRM|SrcImmByte },
- [0x10] = { DstImplicit|SrcMem|ModRM|Mov, simd_any_fp },
- [0x11] = { DstMem|SrcImplicit|ModRM|Mov, simd_any_fp },
+ [0x10] = { DstImplicit|SrcMem|ModRM|Mov, simd_any_fp, d8s_vl },
+ [0x11] = { DstMem|SrcImplicit|ModRM|Mov, simd_any_fp, d8s_vl },
[0x12] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
[0x13] = { DstMem|SrcImplicit|ModRM|Mov, simd_other },
[0x14 ... 0x15] = { DstImplicit|SrcMem|ModRM, simd_packed_fp },
@@ -270,10 +288,10 @@ static const struct twobyte_table {
[0x18 ... 0x1f] = { ImplicitOps|ModRM },
[0x20 ... 0x21] = { DstMem|SrcImplicit|ModRM },
[0x22 ... 0x23] = { DstImplicit|SrcMem|ModRM },
- [0x28] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_fp },
- [0x29] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_fp },
+ [0x28] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_fp, d8s_vl },
+ [0x29] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_fp, d8s_vl },
[0x2a] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
- [0x2b] = { DstMem|SrcImplicit|ModRM|Mov, simd_any_fp },
+ [0x2b] = { DstMem|SrcImplicit|ModRM|Mov, simd_any_fp, d8s_vl },
[0x2c ... 0x2d] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
[0x2e ... 0x2f] = { ImplicitOps|ModRM|TwoOp },
[0x30 ... 0x35] = { ImplicitOps },
@@ -292,8 +310,8 @@ static const struct twobyte_table {
[0x63 ... 0x67] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
[0x68 ... 0x6a] = { DstImplicit|SrcMem|ModRM, simd_other },
[0x6b ... 0x6d] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
- [0x6e] = { DstImplicit|SrcMem|ModRM|Mov },
- [0x6f] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_int },
+ [0x6e] = { DstImplicit|SrcMem|ModRM|Mov, simd_none, d8s_dq64 },
+ [0x6f] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_int, d8s_vl },
[0x70] = { SrcImmByte|ModRM|TwoOp, simd_other },
[0x71 ... 0x73] = { DstImplicit|SrcImmByte|ModRM },
[0x74 ... 0x76] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
@@ -301,8 +319,8 @@ static const struct twobyte_table {
[0x78] = { ImplicitOps|ModRM },
[0x79] = { DstReg|SrcMem|ModRM, simd_packed_int },
[0x7c ... 0x7d] = { DstImplicit|SrcMem|ModRM, simd_other },
- [0x7e] = { DstMem|SrcImplicit|ModRM|Mov },
- [0x7f] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_int },
+ [0x7e] = { DstMem|SrcImplicit|ModRM|Mov, simd_none, d8s_dq64 },
+ [0x7f] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_int, d8s_vl },
[0x80 ... 0x8f] = { DstImplicit|SrcImm },
[0x90 ... 0x9f] = { ByteOp|DstMem|SrcNone|ModRM|Mov },
[0xa0 ... 0xa1] = { ImplicitOps|Mov },
@@ -344,14 +362,14 @@ static const struct twobyte_table {
[0xd0] = { DstImplicit|SrcMem|ModRM, simd_other },
[0xd1 ... 0xd3] = { DstImplicit|SrcMem|ModRM, simd_other },
[0xd4 ... 0xd5] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
- [0xd6] = { DstMem|SrcImplicit|ModRM|Mov, simd_other },
+ [0xd6] = { DstMem|SrcImplicit|ModRM|Mov, simd_other, 3 },
[0xd7] = { DstReg|SrcImplicit|ModRM|Mov },
[0xd8 ... 0xdf] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
[0xe0] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
[0xe1 ... 0xe2] = { DstImplicit|SrcMem|ModRM, simd_other },
[0xe3 ... 0xe5] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
[0xe6] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
- [0xe7] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_int },
+ [0xe7] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_int, d8s_vl },
[0xe8 ... 0xef] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
[0xf0] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
[0xf1 ... 0xf3] = { DstImplicit|SrcMem|ModRM, simd_other },
@@ -406,6 +424,7 @@ static const struct ext0f38_table {
uint8_t to_mem:1;
uint8_t two_op:1;
uint8_t vsib:1;
+ disp8scale_t d8s:4;
} ext0f38_table[256] = {
[0x00 ... 0x0b] = { .simd_size = simd_packed_int },
[0x0c ... 0x0f] = { .simd_size = simd_packed_fp },
@@ -418,7 +437,7 @@ static const struct ext0f38_table {
[0x1c ... 0x1e] = { .simd_size = simd_packed_int, .two_op = 1 },
[0x20 ... 0x25] = { .simd_size = simd_other, .two_op = 1 },
[0x28 ... 0x29] = { .simd_size = simd_packed_int },
- [0x2a] = { .simd_size = simd_packed_int, .two_op = 1 },
+ [0x2a] = { .simd_size = simd_packed_int, .two_op = 1, .d8s = d8s_vl },
[0x2b] = { .simd_size = simd_packed_int },
[0x2c ... 0x2d] = { .simd_size = simd_other },
[0x2e ... 0x2f] = { .simd_size = simd_other, .to_mem = 1 },
@@ -656,6 +675,22 @@ union evex {
};
};
+#define EVEX_PFX_BYTES 4
+#define init_evex(stub) ({ \
+ uint8_t *buf_ = get_stub(stub); \
+ buf_[0] = 0x62; \
+ buf_ + EVEX_PFX_BYTES; \
+})
+
+#define copy_EVEX(ptr, evex) ({ \
+ if ( !mode_64bit() ) \
+ (evex).reg |= 8; \
+ (ptr)[1 - EVEX_PFX_BYTES] = (evex).raw[0]; \
+ (ptr)[2 - EVEX_PFX_BYTES] = (evex).raw[1]; \
+ (ptr)[3 - EVEX_PFX_BYTES] = (evex).raw[2]; \
+ container_of((ptr) + 1 - EVEX_PFX_BYTES, typeof(evex), raw[0]); \
+})
+
#define rep_prefix() (vex.pfx >= vex_f3)
#define repe_prefix() (vex.pfx == vex_f3)
#define repne_prefix() (vex.pfx == vex_f2)
@@ -768,6 +803,7 @@ typedef union {
uint64_t mmx;
uint64_t __attribute__ ((aligned(16))) xmm[2];
uint64_t __attribute__ ((aligned(32))) ymm[4];
+ uint64_t __attribute__ ((aligned(64))) zmm[8];
} mmval_t;
/*
@@ -1192,6 +1228,11 @@ static int _get_fpu(
switch ( type )
{
+ case X86EMUL_FPU_zmm:
+ if ( !(xcr0 & X86_XCR0_ZMM) || !(xcr0 & X86_XCR0_HI_ZMM) ||
+ !(xcr0 & X86_XCR0_OPMASK) )
+ return X86EMUL_UNHANDLEABLE;
+ /* fall through */
case X86EMUL_FPU_ymm:
if ( !(xcr0 & X86_XCR0_SSE) || !(xcr0 & X86_XCR0_YMM) )
return X86EMUL_UNHANDLEABLE;
@@ -1786,6 +1827,7 @@ static bool vcpu_has(
#define vcpu_has_clwb() vcpu_has( 7, EBX, 24, ctxt, ops)
#define vcpu_has_sha() vcpu_has( 7, EBX, 29, ctxt, ops)
#define vcpu_has_avx512bw() vcpu_has( 7, EBX, 30, ctxt, ops)
+#define vcpu_has_avx512vl() vcpu_has( 7, EBX, 31, ctxt, ops)
#define vcpu_has_rdpid() vcpu_has( 7, ECX, 22, ctxt, ops)
#define vcpu_has_clzero() vcpu_has(0x80000008, EBX, 0, ctxt, ops)
@@ -2159,6 +2201,65 @@ static unsigned long *decode_vex_gpr(
return decode_gpr(regs, ~vex_reg & (mode_64bit() ? 0xf : 7));
}
+static unsigned int decode_disp8scale(enum disp8scale scale,
+ const struct x86_emulate_state *state)
+{
+ switch ( scale )
+ {
+ case d8s_bw:
+ return state->evex.w;
+
+ default:
+ if ( scale < d8s_vl )
+ return scale;
+ if ( state->evex.br )
+ {
+ case d8s_dq:
+ return 2 + state->evex.w;
+ }
+ break;
+
+ case d8s_dq64:
+ return 2 + (state->op_bytes == 8);
+ }
+
+ switch ( state->simd_size )
+ {
+ case simd_any_fp:
+ case simd_single_fp:
+ if ( !(state->evex.pfx & VEX_PREFIX_SCALAR_MASK) )
+ break;
+ /* fall through */
+ case simd_scalar_opc:
+ case simd_scalar_vexw:
+ return 2 + state->evex.w;
+
+ case simd_128:
+ /* These should have an explicit size specified. */
+ ASSERT_UNREACHABLE();
+ return 4;
+
+ default:
+ break;
+ }
+
+ return 4 + state->evex.lr - (scale - d8s_vl);
+}
+
+#define avx512_vlen_check(lig) do { \
+ switch ( evex.lr ) \
+ { \
+ default: \
+ generate_exception(EXC_UD); \
+ case 2: \
+ break; \
+ case 0: case 1: \
+ if ( !(lig) ) \
+ host_and_vcpu_must_have(avx512vl); \
+ break; \
+ } \
+} while ( false )
+
static bool is_aligned(enum x86_segment seg, unsigned long offs,
unsigned int size, struct x86_emulate_ctxt *ctxt,
const struct x86_emulate_ops *ops)
@@ -2408,6 +2509,7 @@ x86_decode_twobyte(
if ( vex.pfx == vex_f3 ) /* movq xmm/m64,xmm */
{
case X86EMUL_OPC_VEX_F3(0, 0x7e): /* vmovq xmm/m64,xmm */
+ case X86EMUL_OPC_EVEX_F3(0, 0x7e): /* vmovq xmm/m64,xmm */
state->desc = DstImplicit | SrcMem | TwoOp;
state->simd_size = simd_other;
/* Avoid the state->desc clobbering of TwoOp below. */
@@ -2478,7 +2580,7 @@ x86_decode_twobyte(
}
/*
- * Scalar forms of most VEX-encoded TwoOp instructions have
+ * Scalar forms of most VEX-/EVEX-encoded TwoOp instructions have
* three operands. Those which do really have two operands
* should have exited earlier.
*/
@@ -2843,6 +2945,8 @@ x86_decode(
if ( d & ModRM )
{
+ unsigned int disp8scale = 0;
+
d &= ~ModRM;
#undef ModRM /* Only its aliases are valid to use from here on. */
modrm_reg = ((rex_prefix & 4) << 1) | ((modrm & 0x38) >> 3);
@@ -2885,6 +2989,9 @@ x86_decode(
break;
case ext_0f:
+ if ( evex_encoded() )
+ disp8scale = decode_disp8scale(twobyte_table[b].d8s, state);
+
switch ( b )
{
case 0x20: /* mov cr,reg */
@@ -2898,6 +3005,11 @@ x86_decode(
*/
modrm_mod = 3;
break;
+
+ case 0x7e: /* vmovq xmm/m64,xmm needs special casing */
+ if ( disp8scale == 2 && evex.pfx == vex_f3 )
+ disp8scale = 3;
+ break;
}
break;
@@ -2909,6 +3021,8 @@ x86_decode(
if ( ext0f38_table[b].vsib )
d |= vSIB;
state->simd_size = ext0f38_table[b].simd_size;
+ if ( evex_encoded() )
+ disp8scale = decode_disp8scale(ext0f38_table[b].d8s, state);
break;
case ext_8f09:
@@ -2977,7 +3091,7 @@ x86_decode(
ea.mem.off = insn_fetch_type(int16_t);
break;
case 1:
- ea.mem.off += insn_fetch_type(int8_t);
+ ea.mem.off += insn_fetch_type(int8_t) << disp8scale;
break;
case 2:
ea.mem.off += insn_fetch_type(int16_t);
@@ -3036,7 +3150,7 @@ x86_decode(
pc_rel = mode_64bit();
break;
case 1:
- ea.mem.off += insn_fetch_type(int8_t);
+ ea.mem.off += insn_fetch_type(int8_t) << disp8scale;
break;
case 2:
ea.mem.off += insn_fetch_type(int32_t);
@@ -3237,10 +3351,11 @@ x86_emulate(
struct x86_emulate_state state;
int rc;
uint8_t b, d, *opc = NULL;
- unsigned int first_byte = 0, insn_bytes = 0;
+ unsigned int first_byte = 0, elem_bytes, insn_bytes = 0;
+ uint64_t op_mask = ~0ULL;
bool singlestep = (_regs.eflags & X86_EFLAGS_TF) &&
!is_branch_step(ctxt, ops);
- bool sfence = false;
+ bool sfence = false, fault_suppression = false;
struct operand src = { .reg = PTR_POISON };
struct operand dst = { .reg = PTR_POISON };
unsigned long cr4;
@@ -3286,6 +3401,7 @@ x86_emulate(
b = ctxt->opcode;
d = state.desc;
#define state (&state)
+ elem_bytes = 4 << evex.w;
generate_exception_if(state->not_64bit && mode_64bit(), EXC_UD);
@@ -3360,6 +3476,28 @@ x86_emulate(
break;
}
+ /* With a memory operand, fetch the mask register in use (if any). */
+ if ( ea.type == OP_MEM && evex.opmsk )
+ {
+ uint8_t *stb = get_stub(stub);
+
+ /* KMOV{W,Q} %k<n>, (%rax) */
+ stb[0] = 0xc4;
+ stb[1] = 0xe1;
+ stb[2] = cpu_has_avx512bw ? 0xf8 : 0x78;
+ stb[3] = 0x91;
+ stb[4] = evex.opmsk << 3;
+ insn_bytes = 5;
+ stb[5] = 0xc3;
+
+ invoke_stub("", "", "+m" (op_mask) : "a" (&op_mask));
+
+ insn_bytes = 0;
+ put_stub(stub);
+
+ fault_suppression = true;
+ }
+
/* Decode (but don't fetch) the destination operand: register or memory. */
switch ( d & DstMask )
{
@@ -5722,6 +5860,41 @@ x86_emulate(
insn_bytes = PFX_BYTES + 2;
break;
+ CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x2b): /* vmovntp{s,d} [xyz]mm,mem */
+ generate_exception_if(ea.type != OP_MEM || evex.opmsk, EXC_UD);
+ sfence = true;
+ fault_suppression = false;
+ /* fall through */
+ CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x10): /* vmovup{s,d}
[xyz]mm/mem,[xyz]mm{k} */
+ CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x10): /* vmovs{s,d} mem,xmm{k} */
+ /* vmovs{s,d} xmm,xmm,xmm{k} */
+ CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x11): /* vmovup{s,d}
[xyz]mm,[xyz]mm/mem{k} */
+ CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x11): /* vmovs{s,d} xmm,mem{k} */
+ /* vmovs{s,d} xmm,xmm,xmm{k} */
+ CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x28): /* vmovap{s,d}
[xyz]mm/mem,[xyz]mm{k} */
+ CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x29): /* vmovap{s,d}
[xyz]mm,[xyz]mm/mem{k} */
+ /* vmovs{s,d} to/from memory have only two operands. */
+ if ( (b & ~1) == 0x10 && ea.type == OP_MEM )
+ d |= TwoOp;
+ generate_exception_if(evex.br, EXC_UD);
+ generate_exception_if(evex.w != (evex.pfx & VEX_PREFIX_DOUBLE_MASK),
+ EXC_UD);
+ host_and_vcpu_must_have(avx512f);
+ avx512_vlen_check(evex.pfx & VEX_PREFIX_SCALAR_MASK);
+ simd_zmm:
+ get_fpu(X86EMUL_FPU_zmm);
+ opc = init_evex(stub);
+ opc[0] = b;
+ opc[1] = modrm;
+ if ( ea.type == OP_MEM )
+ {
+ /* convert memory operand to (%rAX) */
+ evex.b = 1;
+ opc[1] &= 0x38;
+ }
+ insn_bytes = EVEX_PFX_BYTES + 2;
+ break;
+
case X86EMUL_OPC_66(0x0f, 0x12): /* movlpd m64,xmm */
case X86EMUL_OPC_VEX_66(0x0f, 0x12): /* vmovlpd m64,xmm,xmm */
CASE_SIMD_PACKED_FP(, 0x0f, 0x13): /* movlp{s,d} xmm,m64 */
@@ -6362,6 +6535,41 @@ x86_emulate(
ASSERT(!state->simd_size);
break;
+ case X86EMUL_OPC_EVEX_66(0x0f, 0x6e): /* vmov{d,q} r/m,xmm */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0x7e): /* vmov{d,q} xmm,r/m */
+ generate_exception_if((evex.lr || evex.opmsk || evex.br ||
+ evex.reg != 0xf || !evex.RX),
+ EXC_UD);
+ host_and_vcpu_must_have(avx512f);
+ get_fpu(X86EMUL_FPU_zmm);
+
+ opc = init_evex(stub);
+ opc[0] = b;
+ /* Convert memory/GPR operand to (%rAX). */
+ evex.b = 1;
+ if ( !mode_64bit() )
+ evex.w = 0;
+ opc[1] = modrm & 0x38;
+ insn_bytes = EVEX_PFX_BYTES + 2;
+ opc[2] = 0xc3;
+
+ copy_EVEX(opc, evex);
+ invoke_stub("", "", "+m" (src.val) : "a" (&src.val));
+ dst.val = src.val;
+
+ put_stub(stub);
+ ASSERT(!state->simd_size);
+ break;
+
+ case X86EMUL_OPC_EVEX_F3(0x0f, 0x7e): /* vmovq xmm/m64,xmm */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xd6): /* vmovq xmm,xmm/m64 */
+ generate_exception_if(evex.lr || !evex.w || evex.opmsk || evex.br,
+ EXC_UD);
+ host_and_vcpu_must_have(avx512f);
+ d |= TwoOp;
+ op_bytes = 8;
+ goto simd_zmm;
+
case X86EMUL_OPC_66(0x0f, 0xe7): /* movntdq xmm,m128 */
case X86EMUL_OPC_VEX_66(0x0f, 0xe7): /* vmovntdq {x,y}mm,mem */
generate_exception_if(ea.type != OP_MEM, EXC_UD);
@@ -6382,6 +6590,30 @@ x86_emulate(
goto simd_0f_avx;
goto simd_0f_sse2;
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xe7): /* vmovntdq [xyz]mm,mem */
+ generate_exception_if(ea.type != OP_MEM || evex.opmsk || evex.w,
+ EXC_UD);
+ sfence = true;
+ fault_suppression = false;
+ /* fall through */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0x6f): /* vmovdqa{32,64}
[xyz]mm/mem,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_F3(0x0f, 0x6f): /* vmovdqu{32,64}
[xyz]mm/mem,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0x7f): /* vmovdqa{32,64}
[xyz]mm,[xyz]mm/mem{k} */
+ case X86EMUL_OPC_EVEX_F3(0x0f, 0x7f): /* vmovdqu{32,64}
[xyz]mm,[xyz]mm/mem{k} */
+ vmovdqa:
+ generate_exception_if(evex.br, EXC_UD);
+ host_and_vcpu_must_have(avx512f);
+ avx512_vlen_check(false);
+ d |= TwoOp;
+ op_bytes = 16 << evex.lr;
+ goto simd_zmm;
+
+ case X86EMUL_OPC_EVEX_F2(0x0f, 0x6f): /* vmovdqu{8,16}
[xyz]mm/mem,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_F2(0x0f, 0x7f): /* vmovdqu{8,16}
[xyz]mm,[xyz]mm/mem{k} */
+ host_and_vcpu_must_have(avx512bw);
+ elem_bytes = 1 << evex.w;
+ goto vmovdqa;
+
case X86EMUL_OPC_VEX_66(0x0f, 0xd6): /* vmovq xmm,xmm/m64 */
generate_exception_if(vex.l, EXC_UD);
d |= TwoOp;
@@ -7748,6 +7980,15 @@ x86_emulate(
}
goto movdqa;
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x2a): /* vmovntdqa mem,[xyz]mm */
+ generate_exception_if(ea.type != OP_MEM || evex.opmsk || evex.w,
+ EXC_UD);
+ /* Ignore the non-temporal hint for now, using vmovdqa32 instead. */
+ asm volatile ( "mfence" ::: "memory" );
+ b = 0x6f;
+ evex.opcx = vex_0f;
+ goto vmovdqa;
+
case X86EMUL_OPC_VEX_66(0x0f38, 0x2c): /* vmaskmovps mem,{x,y}mm,{x,y}mm */
case X86EMUL_OPC_VEX_66(0x0f38, 0x2d): /* vmaskmovpd mem,{x,y}mm,{x,y}mm */
case X86EMUL_OPC_VEX_66(0x0f38, 0x2e): /* vmaskmovps {x,y}mm,{x,y}mm,mem */
@@ -8801,17 +9042,27 @@ x86_emulate(
else if ( state->simd_size )
{
generate_exception_if(!op_bytes, EXC_UD);
- generate_exception_if(vex.opcx && (d & TwoOp) && vex.reg != 0xf,
+ generate_exception_if((vex.opcx && (d & TwoOp) &&
+ (vex.reg != 0xf || (evex_encoded() &&
!evex.RX))),
EXC_UD);
if ( !opc )
BUG();
- opc[insn_bytes - PFX_BYTES] = 0xc3;
- copy_REX_VEX(opc, rex_prefix, vex);
+ if ( evex_encoded() )
+ {
+ opc[insn_bytes - EVEX_PFX_BYTES] = 0xc3;
+ copy_EVEX(opc, evex);
+ }
+ else
+ {
+ opc[insn_bytes - PFX_BYTES] = 0xc3;
+ copy_REX_VEX(opc, rex_prefix, vex);
+ }
if ( ea.type == OP_MEM )
{
uint32_t mxcsr = 0;
+ uint64_t full = 0;
if ( op_bytes < 16 ||
(vex.opcx
@@ -8833,6 +9084,45 @@ x86_emulate(
!is_aligned(ea.mem.seg, ea.mem.off, op_bytes,
ctxt, ops),
EXC_GP, 0);
+
+ EXPECT(elem_bytes > 0);
+ if ( evex.br )
+ {
+ ASSERT((d & DstMask) != DstMem);
+ op_bytes = elem_bytes;
+ }
+ if ( evex.opmsk )
+ {
+ ASSERT(!(op_bytes % elem_bytes));
+ full = ~0ULL >> (64 - op_bytes / elem_bytes);
+ op_mask &= full;
+ }
+ if ( fault_suppression )
+ {
+ if ( !op_mask )
+ goto simd_no_mem;
+ if ( !evex.br )
+ {
+ first_byte = __builtin_ctzll(op_mask);
+ op_mask >>= first_byte;
+ full >>= first_byte;
+ first_byte *= elem_bytes;
+ op_bytes = (64 - __builtin_clzll(op_mask)) * elem_bytes;
+ }
+ }
+ /*
+ * Independent of fault suppression we may need to read (parts of)
+ * the memory operand for the purpose of merging without splitting
+ * the write below into multiple ones. Note that the EVEX.Z check
+ * here isn't strictly needed, due to there not currently being
+ * any instructions allowing zeroing-merging on memory writes (and
+ * we raise #UD during DstMem processing far above in this case),
+ * yet conceptually the read is then unnecessary.
+ */
+ if ( evex.opmsk && !evex.z && (d & DstMask) == DstMem &&
+ op_mask != full )
+ d = (d & ~SrcMask) | SrcMem;
+
switch ( d & SrcMask )
{
case SrcMem:
@@ -8874,7 +9164,10 @@ x86_emulate(
}
}
else
+ {
+ simd_no_mem:
dst.type = OP_NONE;
+ }
/* {,v}maskmov{q,dqu}, as an exception, uses rDI. */
if ( likely((ctxt->opcode & ~(X86EMUL_OPC_PFX_MASK |
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.h
b/xen/arch/x86/x86_emulate/x86_emulate.h
index 3750f0c91c..55a9e0ed51 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -171,6 +171,7 @@ enum x86_emulate_fpu_type {
X86EMUL_FPU_xmm, /* SSE instruction set (%xmm0-%xmm7/15) */
X86EMUL_FPU_ymm, /* AVX/XOP instruction set (%ymm0-%ymm7/15) */
X86EMUL_FPU_opmask, /* AVX512 opmask instruction set (%k0-%k7) */
+ X86EMUL_FPU_zmm, /* AVX512 instruction set (%zmm0-%zmm7/31) */
/* This sentinel will never be passed to ->get_fpu(). */
X86EMUL_FPU_none
};
diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h
index 7e11a458bd..c2b0f6ae4e 100644
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -105,6 +105,7 @@
#define cpu_has_smap boot_cpu_has(X86_FEATURE_SMAP)
#define cpu_has_sha boot_cpu_has(X86_FEATURE_SHA)
#define cpu_has_avx512bw boot_cpu_has(X86_FEATURE_AVX512BW)
+#define cpu_has_avx512vl boot_cpu_has(X86_FEATURE_AVX512VL)
/* CPUID level 0x80000007.edx */
#define cpu_has_itsc boot_cpu_has(X86_FEATURE_ITSC)
--
generated by git-patchbot for /home/xen/git/xen.git#staging
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/xen-changelog
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |