[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH] x86/cpuid: Add AVX512_4VNNIW and AVX512_4FMAPS support
Add two new AVX512 subfeatures support for guest. AVX512_4VNNIW: Vector instructions for deep learning enhanced word variable precision. AVX512_4FMAPS: Vector instructions for deep learning floating-point single precision. Signed-off-by: Luwei Kang <luwei.kang@xxxxxxxxx> Signed-off-by: He Chen <he.chen@xxxxxxxxxxxxxxx> --- tools/libxc/xc_cpuid_x86.c | 8 ++++++-- xen/arch/x86/cpu/common.c | 2 +- xen/arch/x86/cpuid.c | 2 +- xen/arch/x86/hvm/hvm.c | 1 + xen/arch/x86/traps.c | 5 +++-- xen/include/asm-x86/cpuid.h | 1 + xen/include/public/arch-x86/cpufeatureset.h | 4 ++++ xen/tools/gen-cpuid.py | 2 +- 8 files changed, 18 insertions(+), 7 deletions(-) diff --git a/tools/libxc/xc_cpuid_x86.c b/tools/libxc/xc_cpuid_x86.c index 2ad9aeb..e9e3691 100644 --- a/tools/libxc/xc_cpuid_x86.c +++ b/tools/libxc/xc_cpuid_x86.c @@ -547,13 +547,15 @@ static void xc_cpuid_hvm_policy(xc_interface *xch, { regs[1] = info->featureset[featureword_of(X86_FEATURE_FSGSBASE)]; regs[2] = info->featureset[featureword_of(X86_FEATURE_PREFETCHWT1)]; + regs[3] = info->featureset[featureword_of(X86_FEATURE_AVX512_4VNNIW)]; } else { regs[1] = 0; regs[2] = 0; + regs[3] = 0; } - regs[0] = regs[3] = 0; + regs[0] = 0; break; case 0x0000000d: @@ -638,13 +640,15 @@ static void xc_cpuid_pv_policy(xc_interface *xch, { regs[1] = info->featureset[featureword_of(X86_FEATURE_FSGSBASE)]; regs[2] = info->featureset[featureword_of(X86_FEATURE_PREFETCHWT1)]; + regs[3] = info->featureset[featureword_of(X86_FEATURE_AVX512_4VNNIW)]; } else { regs[1] = 0; regs[2] = 0; + regs[3] = 0; } - regs[0] = regs[3] = 0; + regs[0] = 0; break; case 0x0000000d: diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c index 3475198..aaaa873 100644 --- a/xen/arch/x86/cpu/common.c +++ b/xen/arch/x86/cpu/common.c @@ -325,7 +325,7 @@ static void generic_identify(struct cpuinfo_x86 *c) cpuid_count(0x00000007, 0, &tmp, &c->x86_capability[cpufeat_word(X86_FEATURE_FSGSBASE)], &c->x86_capability[cpufeat_word(X86_FEATURE_PKU)], - &tmp); + &c->x86_capability[cpufeat_word(X86_FEATURE_AVX512_4VNNIW)]); } /* diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c index 63b2db9..3e85a63 100644 --- a/xen/arch/x86/cpuid.c +++ b/xen/arch/x86/cpuid.c @@ -78,7 +78,7 @@ static void __init calculate_raw_featureset(void) cpuid_count(0x7, 0, &tmp, &raw_featureset[FEATURESET_7b0], &raw_featureset[FEATURESET_7c0], - &tmp); + &raw_featureset[FEATURESET_7d0]); if ( max >= 0xd ) cpuid_count(0xd, 1, &raw_featureset[FEATURESET_Da1], diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 704fd64..752e5fb 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -3503,6 +3503,7 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx, special_features[FEATURESET_7b0]); *ecx &= hvm_featureset[FEATURESET_7c0]; + *edx &= hvm_featureset[FEATURESET_7d0]; /* Don't expose HAP-only features to non-hap guests. */ if ( !hap_enabled(d) ) diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 14abb62..2469e49 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -1128,6 +1128,7 @@ void pv_cpuid(struct cpu_user_regs *regs) special_features[FEATURESET_7b0]); c &= pv_featureset[FEATURESET_7c0]; + d &= pv_featureset[FEATURESET_7d0]; if ( !is_pvh_domain(currd) ) { @@ -1142,8 +1143,8 @@ void pv_cpuid(struct cpu_user_regs *regs) } } else - b = c = 0; - a = d = 0; + b = c = d = 0; + a = 0; break; case XSTATE_CPUID: diff --git a/xen/include/asm-x86/cpuid.h b/xen/include/asm-x86/cpuid.h index 2372474..ec8bbb5 100644 --- a/xen/include/asm-x86/cpuid.h +++ b/xen/include/asm-x86/cpuid.h @@ -17,6 +17,7 @@ #define FEATURESET_7c0 6 /* 0x00000007:0.ecx */ #define FEATURESET_e7d 7 /* 0x80000007.edx */ #define FEATURESET_e8b 8 /* 0x80000008.ebx */ +#define FEATURESET_7d0 9 /* 0x00000007:0.edx */ #ifndef __ASSEMBLY__ #include <xen/types.h> diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h index 9320c9e..565ccd5 100644 --- a/xen/include/public/arch-x86/cpufeatureset.h +++ b/xen/include/public/arch-x86/cpufeatureset.h @@ -234,6 +234,10 @@ XEN_CPUFEATURE(EFRO, 7*32+10) /* APERF/MPERF Read Only interface */ /* AMD-defined CPU features, CPUID level 0x80000008.ebx, word 8 */ XEN_CPUFEATURE(CLZERO, 8*32+ 0) /*A CLZERO instruction */ +/* Intel-defined CPU features, CPUID level 0x00000007:0.edx, word 9 */ +XEN_CPUFEATURE(AVX512_4VNNIW, 9*32+ 2) /*A AVX512 Neural Network Instructions */ +XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A AVX512 Multiply Accumulation Single Precision */ + #endif /* XEN_CPUFEATURE */ /* Clean up from a default include. Close the enum (for C). */ diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py index 005cad9..c29f1d3 100755 --- a/xen/tools/gen-cpuid.py +++ b/xen/tools/gen-cpuid.py @@ -253,7 +253,7 @@ def crunch_numbers(state): # 512bit registers, and the instructions themselves. All further AVX512 features # are built on top of AVX512F AVX512F: [AVX512DQ, AVX512IFMA, AVX512PF, AVX512ER, AVX512CD, - AVX512BW, AVX512VL, AVX512VBMI], + AVX512BW, AVX512VL, AVX512VBMI, AVX512_4VNNIW, AVX512_4FMAPS], } deep_features = tuple(sorted(deps.keys())) -- 2.7.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx https://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |