|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [xen master] x86emul: support AVX-VNNI
commit a780b17c72ef9f40f024a32fcda19f2544acaf2a
Author: Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Fri Oct 30 14:29:06 2020 +0100
Commit: Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Fri Oct 30 14:29:06 2020 +0100
x86emul: support AVX-VNNI
These are VEX-encoded equivalents of the EVEX-encoded AVX512-VNNI ISA
extension.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
tools/libs/light/libxl_cpuid.c | 1 +
tools/misc/xen-cpuid.c | 2 +-
tools/tests/x86_emulator/predicates.c | 4 ++
tools/tests/x86_emulator/test_x86_emulator.c | 55 ++++++++++++++++++++++++++++
tools/tests/x86_emulator/x86-emulate.h | 1 +
xen/arch/x86/x86_emulate/x86_emulate.c | 9 +++++
xen/include/asm-x86/cpufeature.h | 1 +
xen/include/public/arch-x86/cpufeatureset.h | 1 +
xen/tools/gen-cpuid.py | 2 +-
9 files changed, 74 insertions(+), 2 deletions(-)
diff --git a/tools/libs/light/libxl_cpuid.c b/tools/libs/light/libxl_cpuid.c
index 16c077cceb..259612834e 100644
--- a/tools/libs/light/libxl_cpuid.c
+++ b/tools/libs/light/libxl_cpuid.c
@@ -226,6 +226,7 @@ int libxl_cpuid_parse_config(libxl_cpuid_policy_list
*cpuid, const char* str)
{"core-caps", 0x00000007, 0, CPUID_REG_EDX, 30, 1},
{"ssbd", 0x00000007, 0, CPUID_REG_EDX, 31, 1},
+ {"avx-vnni", 0x00000007, 1, CPUID_REG_EAX, 4, 1},
{"avx512-bf16", 0x00000007, 1, CPUID_REG_EAX, 5, 1},
{"lahfsahf", 0x80000001, NA, CPUID_REG_ECX, 0, 1},
diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c
index 2446941a47..c81aa93055 100644
--- a/tools/misc/xen-cpuid.c
+++ b/tools/misc/xen-cpuid.c
@@ -175,7 +175,7 @@ static const char *const str_7d0[32] =
static const char *const str_7a1[32] =
{
- /* 4 */ [ 5] = "avx512-bf16",
+ [ 4] = "avx-vnni", [ 5] = "avx512-bf16",
};
static const struct {
diff --git a/tools/tests/x86_emulator/predicates.c
b/tools/tests/x86_emulator/predicates.c
index 137681ba78..94b99c94e8 100644
--- a/tools/tests/x86_emulator/predicates.c
+++ b/tools/tests/x86_emulator/predicates.c
@@ -1335,6 +1335,10 @@ static const struct vex {
{ { 0x45 }, 2, T, R, pfx_66, Wn, Ln }, /* vpsrlv{d,q} */
{ { 0x46 }, 2, T, R, pfx_66, W0, Ln }, /* vpsravd */
{ { 0x47 }, 2, T, R, pfx_66, Wn, Ln }, /* vpsllv{d,q} */
+ { { 0x50 }, 2, T, R, pfx_66, W0, Ln }, /* vpdpbusd */
+ { { 0x51 }, 2, T, R, pfx_66, W0, Ln }, /* vpdpbusds */
+ { { 0x52 }, 2, T, R, pfx_66, W0, Ln }, /* vpdpwssd */
+ { { 0x53 }, 2, T, R, pfx_66, W0, Ln }, /* vpdpwssds */
{ { 0x58 }, 2, T, R, pfx_66, W0, Ln }, /* vpbroadcastd */
{ { 0x59 }, 2, T, R, pfx_66, W0, Ln }, /* vpbroadcastq */
{ { 0x5a }, 2, F, R, pfx_66, W0, L1 }, /* vbroadcasti128 */
diff --git a/tools/tests/x86_emulator/test_x86_emulator.c
b/tools/tests/x86_emulator/test_x86_emulator.c
index b9663214e2..cbb8f34f58 100644
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -5028,6 +5028,61 @@ int main(int argc, char **argv)
printf("okay\n");
}
+ printf("%-40s", "Testing vpdpwssd (%ecx),%{y,z}mmA,%{y,z}mmB...");
+ if ( stack_exec && cpu_has_avx512_vnni && cpu_has_avx_vnni )
+ {
+ /* Do the same operation two ways and compare the results. */
+ decl_insn(vpdpwssd_vex1);
+ decl_insn(vpdpwssd_vex2);
+ decl_insn(vpdpwssd_evex);
+
+ for ( i = 0; i < 24; ++i )
+ res[i] = i | (~i << 16);
+
+ asm volatile ( "vmovdqu32 32(%0), %%zmm1\n\t"
+ "vextracti64x4 $1, %%zmm1, %%ymm2\n\t"
+ "vpxor %%xmm0, %%xmm0, %%xmm3\n\t"
+ "vpxor %%xmm0, %%xmm0, %%xmm4\n\t"
+ "vpxor %%xmm0, %%xmm0, %%xmm5\n"
+ put_insn(vpdpwssd_vex1,
+ /* %{vex%} vpdpwssd (%1), %%ymm1, %%ymm3" */
+ ".byte 0xc4, 0xe2, 0x75, 0x52, 0x19") "\n"
+ put_insn(vpdpwssd_vex2,
+ /* "%{vex%} vpdpwssd 32(%1), %%ymm2, %%ymm4" */
+ ".byte 0xc4, 0xe2, 0x6d, 0x52, 0x61, 0x20")
"\n"
+ put_insn(vpdpwssd_evex,
+ /* "vpdpwssd (%1), %%zmm1, %%zmm5" */
+ ".byte 0x62, 0xf2, 0x75, 0x48, 0x52, 0x29")
+ :: "r" (res), "c" (NULL) );
+
+ set_insn(vpdpwssd_vex1);
+ regs.ecx = (unsigned long)res;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( rc != X86EMUL_OKAY || !check_eip(vpdpwssd_vex1) )
+ goto fail;
+
+ set_insn(vpdpwssd_vex2);
+ regs.ecx = (unsigned long)res;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( rc != X86EMUL_OKAY || !check_eip(vpdpwssd_vex2) )
+ goto fail;
+
+ set_insn(vpdpwssd_evex);
+ regs.ecx = (unsigned long)res;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( rc != X86EMUL_OKAY || !check_eip(vpdpwssd_evex) )
+ goto fail;
+
+ asm ( "vinserti64x4 $1, %%ymm4, %%zmm3, %%zmm0\n\t"
+ "vpcmpeqd %%zmm0, %%zmm5, %%k0\n\t"
+ "kmovw %%k0, %0" : "=g" (rc) );
+ if ( rc != 0xffff )
+ goto fail;
+ printf("okay\n");
+ }
+ else
+ printf("skipped\n");
+
printf("%-40s", "Testing invpcid 16(%ecx),%%edx...");
if ( stack_exec )
{
diff --git a/tools/tests/x86_emulator/x86-emulate.h
b/tools/tests/x86_emulator/x86-emulate.h
index fc52edf8ce..e1a2aaef68 100644
--- a/tools/tests/x86_emulator/x86-emulate.h
+++ b/tools/tests/x86_emulator/x86-emulate.h
@@ -170,6 +170,7 @@ static inline bool xcr0_mask(uint64_t mask)
#define cpu_has_avx512_4fmaps (cp.feat.avx512_4fmaps && xcr0_mask(0xe6))
#define cpu_has_avx512_vp2intersect (cp.feat.avx512_vp2intersect &&
xcr0_mask(0xe6))
#define cpu_has_serialize cp.feat.serialize
+#define cpu_has_avx_vnni (cp.feat.avx_vnni && xcr0_mask(6))
#define cpu_has_avx512_bf16 (cp.feat.avx512_bf16 && xcr0_mask(0xe6))
#define cpu_has_xgetbv1 (cpu_has_xsave && cp.xstate.xgetbv1)
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c
b/xen/arch/x86/x86_emulate/x86_emulate.c
index fd70551bc2..a35b63634b 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -2008,6 +2008,7 @@ amd_like(const struct x86_emulate_ctxt *ctxt)
#define vcpu_has_avx512_4fmaps() (ctxt->cpuid->feat.avx512_4fmaps)
#define vcpu_has_avx512_vp2intersect() (ctxt->cpuid->feat.avx512_vp2intersect)
#define vcpu_has_serialize() (ctxt->cpuid->feat.serialize)
+#define vcpu_has_avx_vnni() (ctxt->cpuid->feat.avx_vnni)
#define vcpu_has_avx512_bf16() (ctxt->cpuid->feat.avx512_bf16)
#define vcpu_must_have(feat) \
@@ -9453,6 +9454,14 @@ x86_emulate(
generate_exception_if(vex.l, EXC_UD);
goto simd_0f_avx;
+ case X86EMUL_OPC_VEX_66(0x0f38, 0x50): /* vpdpbusd
[xy]mm/mem,[xy]mm,[xy]mm */
+ case X86EMUL_OPC_VEX_66(0x0f38, 0x51): /* vpdpbusds
[xy]mm/mem,[xy]mm,[xy]mm */
+ case X86EMUL_OPC_VEX_66(0x0f38, 0x52): /* vpdpwssd
[xy]mm/mem,[xy]mm,[xy]mm */
+ case X86EMUL_OPC_VEX_66(0x0f38, 0x53): /* vpdpwssds
[xy]mm/mem,[xy]mm,[xy]mm */
+ host_and_vcpu_must_have(avx_vnni);
+ generate_exception_if(vex.w, EXC_UD);
+ goto simd_0f_avx;
+
case X86EMUL_OPC_EVEX_66(0x0f38, 0x50): /* vpdpbusd
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
case X86EMUL_OPC_EVEX_66(0x0f38, 0x51): /* vpdpbusds
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
case X86EMUL_OPC_EVEX_66(0x0f38, 0x52): /* vpdpwssd
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h
index 4de992781e..ad3d84bdde 100644
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -133,6 +133,7 @@
#define cpu_has_serialize boot_cpu_has(X86_FEATURE_SERIALIZE)
/* CPUID level 0x00000007:1.eax */
+#define cpu_has_avx_vnni boot_cpu_has(X86_FEATURE_AVX_VNNI)
#define cpu_has_avx512_bf16 boot_cpu_has(X86_FEATURE_AVX512_BF16)
/* Synthesized. */
diff --git a/xen/include/public/arch-x86/cpufeatureset.h
b/xen/include/public/arch-x86/cpufeatureset.h
index ef7cca334d..6f7efaad6d 100644
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -273,6 +273,7 @@ XEN_CPUFEATURE(CORE_CAPS, 9*32+30) /*
IA32_CORE_CAPABILITIES MSR */
XEN_CPUFEATURE(SSBD, 9*32+31) /*A MSR_SPEC_CTRL.SSBD available */
/* Intel-defined CPU features, CPUID level 0x00000007:1.eax, word 10 */
+XEN_CPUFEATURE(AVX_VNNI, 10*32+ 4) /*A AVX-VNNI Instructions */
XEN_CPUFEATURE(AVX512_BF16, 10*32+ 5) /*A AVX512 BFloat16 Instructions */
#endif /* XEN_CPUFEATURE */
diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py
index 143ee7cae1..50412b9a46 100755
--- a/xen/tools/gen-cpuid.py
+++ b/xen/tools/gen-cpuid.py
@@ -252,7 +252,7 @@ def crunch_numbers(state):
# feature flags. If want to use AVX512, AVX2 must be supported and
# enabled. Certain later extensions, acting on 256-bit vectors of
# integers, better depend on AVX2 than AVX.
- AVX2: [AVX512F, VAES, VPCLMULQDQ],
+ AVX2: [AVX512F, VAES, VPCLMULQDQ, AVX_VNNI],
# AVX512F is taken to mean hardware support for 512bit registers
# (which in practice depends on the EVEX prefix to encode) as well
--
generated by git-patchbot for /home/xen/git/xen.git#master
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |