[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [xen staging] x86/xstate: enable AMX components
commit e3b5440012721560ff48859ba8c4ea7f72f9ef20 Author: Jan Beulich <jbeulich@xxxxxxxx> AuthorDate: Fri Sep 6 08:40:21 2024 +0200 Commit: Jan Beulich <jbeulich@xxxxxxxx> CommitDate: Fri Sep 6 08:40:21 2024 +0200 x86/xstate: enable AMX components These being controlled by XCR0, enabling support is relatively straightforward. Note however that there won't be any use of them until their dependent ISA extension CPUID flags are exposed, not the least due to recalculate_xstate() handling the dependencies in kind of a reverse manner. Note that xstate_check_sizes() already covers the two new states. Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> Reviewed-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> --- xen/arch/x86/cpu-policy.c | 3 +++ xen/arch/x86/include/asm/xstate.h | 3 ++- xen/include/public/arch-x86/cpufeatureset.h | 4 ++++ xen/tools/gen-cpuid.py | 9 +++++++-- 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c index 888e7ac4d4..1f5e42db5b 100644 --- a/xen/arch/x86/cpu-policy.c +++ b/xen/arch/x86/cpu-policy.c @@ -216,6 +216,9 @@ static void recalculate_xstate(struct cpu_policy *p) if ( p->feat.pku ) xstates |= X86_XCR0_PKRU; + if ( p->feat.amx_tile ) + xstates |= X86_XCR0_TILE_CFG | X86_XCR0_TILE_DATA; + /* Subleaf 0 */ p->xstate.max_size = xstate_uncompressed_size(xstates & ~XSTATE_XSAVES_ONLY); diff --git a/xen/arch/x86/include/asm/xstate.h b/xen/arch/x86/include/asm/xstate.h index ebeb2a3dca..b4ee555953 100644 --- a/xen/arch/x86/include/asm/xstate.h +++ b/xen/arch/x86/include/asm/xstate.h @@ -35,7 +35,8 @@ extern uint32_t mxcsr_mask; XSTATE_NONLAZY) #define XSTATE_ALL (~(1ULL << 63)) -#define XSTATE_NONLAZY (X86_XCR0_BNDREGS | X86_XCR0_BNDCSR | X86_XCR0_PKRU) +#define XSTATE_NONLAZY (X86_XCR0_BNDREGS | X86_XCR0_BNDCSR | X86_XCR0_PKRU | \ + X86_XCR0_TILE_CFG | X86_XCR0_TILE_DATA) #define XSTATE_LAZY (XSTATE_ALL & ~XSTATE_NONLAZY) #define XSTATE_XSAVES_ONLY 0 #define XSTATE_COMPACTION_ENABLED (1ULL << 63) diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h index abab78fa86..420ed21a20 100644 --- a/xen/include/public/arch-x86/cpufeatureset.h +++ b/xen/include/public/arch-x86/cpufeatureset.h @@ -278,8 +278,10 @@ XEN_CPUFEATURE(HYBRID, 9*32+15) /* Heterogeneous platform */ XEN_CPUFEATURE(TSXLDTRK, 9*32+16) /*a TSX load tracking suspend/resume insns */ XEN_CPUFEATURE(ARCH_LBR, 9*32+19) /* Architectural Last Branch Record */ XEN_CPUFEATURE(CET_IBT, 9*32+20) /* CET - Indirect Branch Tracking */ +XEN_CPUFEATURE(AMX_BF16, 9*32+22) /* AMX BFloat16 instruction */ XEN_CPUFEATURE(AVX512_FP16, 9*32+23) /*A AVX512 FP16 instructions */ XEN_CPUFEATURE(AMX_TILE, 9*32+24) /* AMX Tile architecture */ +XEN_CPUFEATURE(AMX_INT8, 9*32+25) /* AMX 8-bit integer instructions */ XEN_CPUFEATURE(IBRSB, 9*32+26) /*A IBRS and IBPB support (used by Intel) */ XEN_CPUFEATURE(STIBP, 9*32+27) /*A STIBP */ XEN_CPUFEATURE(L1D_FLUSH, 9*32+28) /*S MSR_FLUSH_CMD and L1D flush. */ @@ -297,6 +299,7 @@ XEN_CPUFEATURE(FZRM, 10*32+10) /*A Fast Zero-length REP MOVSB */ XEN_CPUFEATURE(FSRS, 10*32+11) /*A Fast Short REP STOSB */ XEN_CPUFEATURE(FSRCS, 10*32+12) /*A Fast Short REP CMPSB/SCASB */ XEN_CPUFEATURE(WRMSRNS, 10*32+19) /*S WRMSR Non-Serialising */ +XEN_CPUFEATURE(AMX_FP16, 10*32+21) /* AMX FP16 instruction */ XEN_CPUFEATURE(AVX_IFMA, 10*32+23) /*A AVX-IFMA Instructions */ /* AMD-defined CPU features, CPUID level 0x80000021.eax, word 11 */ @@ -331,6 +334,7 @@ XEN_CPUFEATURE(MCDT_NO, 13*32+ 5) /*A MCDT_NO */ /* Intel-defined CPU features, CPUID level 0x00000007:1.edx, word 15 */ XEN_CPUFEATURE(AVX_VNNI_INT8, 15*32+ 4) /*A AVX-VNNI-INT8 Instructions */ XEN_CPUFEATURE(AVX_NE_CONVERT, 15*32+ 5) /*A AVX-NE-CONVERT Instructions */ +XEN_CPUFEATURE(AMX_COMPLEX, 15*32+ 8) /* AMX Complex Instructions */ XEN_CPUFEATURE(AVX_VNNI_INT16, 15*32+10) /*A AVX-VNNI-INT16 Instructions */ XEN_CPUFEATURE(PREFETCHI, 15*32+14) /*A PREFETCHIT{0,1} Instructions */ XEN_CPUFEATURE(CET_SSS, 15*32+18) /* CET Supervisor Shadow Stacks safe to use */ diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py index 601eec6089..f7cad255da 100755 --- a/xen/tools/gen-cpuid.py +++ b/xen/tools/gen-cpuid.py @@ -252,7 +252,7 @@ def crunch_numbers(state): # instruction groups which are specified to require XSAVE for state # management. XSAVE: [XSAVEOPT, XSAVEC, XGETBV1, XSAVES, - AVX, MPX, PKU, LWP], + AVX, MPX, PKU, AMX_TILE, LWP], # AVX is taken to mean hardware support for 256bit registers (which in # practice depends on the VEX prefix to encode), and the instructions @@ -274,7 +274,7 @@ def crunch_numbers(state): # superpages, PCID and PKU are only available in 4 level paging. # NO_LMSL indicates the absense of Long Mode Segment Limits, which # have been dropped in hardware. - LM: [CX16, PCID, LAHF_LM, PAGE1GB, PKU, NO_LMSL], + LM: [CX16, PCID, LAHF_LM, PAGE1GB, PKU, NO_LMSL, AMX_TILE], # AMD K6-2+ and K6-III processors shipped with 3DNow+, beyond the # standard 3DNow in the earlier K6 processors. @@ -338,6 +338,11 @@ def crunch_numbers(state): # The behaviour described by RRSBA depend on eIBRS being active. EIBRS: [RRSBA], + + # AMX-TILE means hardware support for tile registers and general non- + # computational instructions. All further AMX features are built on top + # of AMX-TILE. + AMX_TILE: [AMX_BF16, AMX_INT8, AMX_FP16, AMX_COMPLEX], } deep_features = tuple(sorted(deps.keys())) -- generated by git-patchbot for /home/xen/git/xen.git#staging
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |