|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v4 00.9/16] x86/CPUID: enable AVX10 leaf
Le 09/04/2026 à 17:18, Jan Beulich a écrit :
> This requires bumping the number of basic leaves we support. Apart from
> this the logic is modeled as closely as possible after that of leaf 7
> handling.
>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
> ---
> The gen-cpuid.py adjustment is merely the minimum needed. It's not
> really clear to me whether someone turning off e.g. AVX512BW might then
> also validly expect AVX10 to be turned off.
>
AFAIUI, AVX512BW is a dependency of AVX10, as AVX10 implies that the
former is available.
> Do we want to synthesize AVX10 in the (max?) policies when all necessary
> AVX512* features are available, thus allowing migration from an AVX10
> host to a suitable non-AVX10 one?
>
I guess we want, there is not really a reason to not allow it.
Some software may check for AVX10 only, and not check for individual
AVX512 bits.
> The prior vsz<N> bits are now defined as reserved-at-1: No idea yet how
> to represent this properly.
>
> How a toolstack side equivalent (if any) of the init_dom0_cpuid_policy()
> change would look like is entirely unclear to me. How much should we
> take from the max policy, and how much should we permit/require the user
> to specify (and how would the latter look like)?
>
> While, as per a comment next to the call site of
> recalculate_cpuid_policy(), recalculate_*() are supposed to go away when
> x86_cpu_policies_are_compatible() is complete, some of the checking done
> in recalculate_misc() really wouldn't below there. We'd need another "is
> self-consistent" checking function.
> ---
> v4: Drop all traces of AVX10/256. Add max_subleaf check to
> x86_cpu_policies_are_compatible(). Add/adjust vsz<N> checks in
> recalculate_misc(). In the max policies, synthesize AVX512* when
> AVX10 is available.
> v3: Re-base.
> v2: Add logic to init_dom0_cpuid_policy(). Drop vsz128 field. Re-base.
>
> --- a/xen/arch/x86/cpu-policy.c
> +++ b/xen/arch/x86/cpu-policy.c
> @@ -211,7 +211,7 @@ static void recalculate_xstate(struct cp
> if ( p->feat.mpx )
> xstates |= X86_XCR0_BNDREGS | X86_XCR0_BNDCSR;
>
> - if ( p->feat.avx512f )
> + if ( p->feat.avx512f || p->feat.avx10 )
In principle, the avx10 check is redundant as avx512f must be set if
avx10 is set.
> xstates |= X86_XCR0_OPMASK | X86_XCR0_ZMM | X86_XCR0_HI_ZMM;
>
> if ( p->feat.pku )
> @@ -272,6 +272,18 @@ static void recalculate_misc(struct cpu_
>
> p->basic.raw[0xc] = EMPTY_LEAF;
>
> + zero_leaves(p->basic.raw, 0xe, 0x23);
> +
> + p->avx10.raw[0].b &= 0x000700ff;
> + p->avx10.raw[0].c = 0;
> + p->avx10.raw[0].d = 0;
> + if ( !p->feat.avx10 || !p->avx10.version ||
> + !p->avx10.vsz512 || !p->avx10.vsz256 || !p->avx10.vsz128 )
> + {
> + p->feat.avx10 = false;
> + memset(p->avx10.raw, 0, sizeof(p->avx10.raw));
> + }
> +
> p->extd.e1d &= ~CPUID_COMMON_1D_FEATURES;
>
> /* Most of Power/RAS hidden from guests. */
> @@ -400,6 +412,7 @@ static void __init guest_common_max_leav
> {
> p->basic.max_leaf = ARRAY_SIZE(p->basic.raw) - 1;
> p->feat.max_subleaf = ARRAY_SIZE(p->feat.raw) - 1;
> + p->avx10.max_subleaf = ARRAY_SIZE(p->avx10.raw) - 1;
> p->extd.max_leaf = 0x80000000U + ARRAY_SIZE(p->extd.raw) - 1;
> }
>
> @@ -408,6 +421,7 @@ static void __init guest_common_default_
> {
> p->basic.max_leaf = host_cpu_policy.basic.max_leaf;
> p->feat.max_subleaf = host_cpu_policy.feat.max_subleaf;
> + p->avx10.max_subleaf = host_cpu_policy.avx10.max_subleaf;
> p->extd.max_leaf = host_cpu_policy.extd.max_leaf;
> }
>
> @@ -503,6 +517,28 @@ static void __init guest_common_max_feat
> * function correctly when migrated here, even if ERMS isn't available.
> */
> __set_bit(X86_FEATURE_ERMS, fs);
> +
> + /*
> + * AVX10 is merely a re-declaration of a combination of AVX512 features.
> + * Synthesize the latter from the former, when available. Doing it the
> + * other way around would also require synthesizing the AVX10 CPUID leaf.
> + */
> + if ( test_bit(X86_FEATURE_AVX10, fs) )
> + {
> + __set_bit(X86_FEATURE_AVX512F, fs);
> + __set_bit(X86_FEATURE_AVX512VL, fs);
> + __set_bit(X86_FEATURE_AVX512BW, fs);
> + __set_bit(X86_FEATURE_AVX512DQ, fs);
> + __set_bit(X86_FEATURE_AVX512_BF16, fs);
> + __set_bit(X86_FEATURE_AVX512_FP16, fs);
> + __set_bit(X86_FEATURE_AVX512CD, fs);
> + __set_bit(X86_FEATURE_AVX512_BITALG, fs);
> + __set_bit(X86_FEATURE_AVX512_IFMA, fs);
> + __set_bit(X86_FEATURE_AVX512_VBMI, fs);
> + __set_bit(X86_FEATURE_AVX512_VBMI2, fs);
> + __set_bit(X86_FEATURE_AVX512_VNNI, fs);
> + __set_bit(X86_FEATURE_AVX512_VPOPCNTDQ, fs);
> + }
> }
>
> static void __init guest_common_default_feature_adjustments(uint32_t *fs)
> @@ -966,6 +1002,7 @@ void recalculate_cpuid_policy(struct dom
>
> p->basic.max_leaf = min(p->basic.max_leaf, max->basic.max_leaf);
> p->feat.max_subleaf = min(p->feat.max_subleaf, max->feat.max_subleaf);
> + p->avx10.max_subleaf = min(p->avx10.max_subleaf, max->avx10.max_subleaf);
> p->extd.max_leaf = 0x80000000U | min(p->extd.max_leaf & 0xffff,
> ((p->x86_vendor &
> (X86_VENDOR_AMD |
>
> X86_VENDOR_HYGON))
> @@ -1012,6 +1049,8 @@ void recalculate_cpuid_policy(struct dom
>
> if ( p->basic.max_leaf < XSTATE_CPUID )
> __clear_bit(X86_FEATURE_XSAVE, fs);
> + if ( p->basic.max_leaf < 0x24 )
> + __clear_bit(X86_FEATURE_AVX10, fs);
>
> sanitise_featureset(fs);
>
> @@ -1081,9 +1120,18 @@ void __init init_dom0_cpuid_policy(struc
> /* Apply dom0-cpuid= command line settings, if provided. */
> if ( dom0_cpuid_cmdline )
> {
> + const struct cpu_policy *max = is_pv_domain(d)
> + ? (IS_ENABLED(CONFIG_PV) ? &pv_max_cpu_policy : NULL)
> + : (IS_ENABLED(CONFIG_HVM) ? &hvm_max_cpu_policy : NULL);
> uint32_t fs[FSCAPINTS];
> unsigned int i;
>
> + if ( !max )
> + {
> + ASSERT_UNREACHABLE();
> + return;
> + }
> +
> x86_cpu_policy_to_featureset(p, fs);
>
> for ( i = 0; i < ARRAY_SIZE(fs); ++i )
> @@ -1093,6 +1141,13 @@ void __init init_dom0_cpuid_policy(struc
> }
>
> x86_cpu_featureset_to_policy(fs, p);
> +
> + /*
> + * Default-off features with their own leaves need those leaves
> + * re-populated from the max policy.
> + */
> + if ( p->feat.avx10 )
> + p->avx10 = max->avx10;
> }
>
> /*
> @@ -1125,6 +1180,8 @@ static void __init __maybe_unused build_
> sizeof(raw_cpu_policy.feat.raw));
> BUILD_BUG_ON(sizeof(raw_cpu_policy.xstate) !=
> sizeof(raw_cpu_policy.xstate.raw));
> + BUILD_BUG_ON(sizeof(raw_cpu_policy.avx10) !=
> + sizeof(raw_cpu_policy.avx10.raw));
> BUILD_BUG_ON(sizeof(raw_cpu_policy.extd) !=
> sizeof(raw_cpu_policy.extd.raw));
> }
> --- a/xen/arch/x86/cpuid.c
> +++ b/xen/arch/x86/cpuid.c
> @@ -226,6 +226,15 @@ void guest_cpuid(const struct vcpu *v, u
> *res = array_access_nospec(p->xstate.raw, subleaf);
> break;
>
> + case 0x24:
> + ASSERT(p->avx10.max_subleaf < ARRAY_SIZE(p->avx10.raw));
> + if ( subleaf > min_t(uint32_t, p->avx10.max_subleaf,
> + ARRAY_SIZE(p->avx10.raw) - 1) )
> + return;
> +
> + *res = array_access_nospec(p->avx10.raw, subleaf);
> + break;
> +
> default:
> *res = array_access_nospec(p->basic.raw, leaf);
> break;
> --- a/xen/arch/x86/lib/cpu-policy/copy-to-buffer.c
> +++ b/xen/arch/x86/lib/cpu-policy/copy-to-buffer.c
> @@ -123,6 +123,13 @@ int x86_cpuid_copy_to_buffer(const struc
> break;
> }
>
> + case 0x24:
> + for ( subleaf = 0;
> + subleaf <= MIN(p->avx10.max_subleaf,
> + ARRAY_SIZE(p->avx10.raw) - 1); ++subleaf )
> + COPY_LEAF(leaf, subleaf, &p->avx10.raw[subleaf]);
> + break;
> +
> default:
> COPY_LEAF(leaf, XEN_CPUID_NO_SUBLEAF, &p->basic.raw[leaf]);
> break;
> --- a/xen/arch/x86/lib/cpu-policy/copy-from-buffer.c
> +++ b/xen/arch/x86/lib/cpu-policy/copy-from-buffer.c
> @@ -108,6 +108,13 @@ int x86_cpuid_copy_from_buffer(struct cp
> array_access_nospec(p->xstate.raw, data.subleaf) = l;
> break;
>
> + case 0x24:
> + if ( data.subleaf >= ARRAY_SIZE(p->avx10.raw) )
> + goto out_of_range;
> +
> + array_access_nospec(p->avx10.raw, data.subleaf) = l;
> + break;
> +
> default:
> if ( data.subleaf != XEN_CPUID_NO_SUBLEAF )
> goto out_of_range;
> --- a/xen/arch/x86/lib/cpu-policy/cpuid.c
> +++ b/xen/arch/x86/lib/cpu-policy/cpuid.c
> @@ -125,6 +125,7 @@ void x86_cpu_policy_fill_native(struct c
> switch ( i )
> {
> case 0x4: case 0x7: case 0xb: case 0xd:
> + case 0x24:
> /* Multi-invocation leaves. Deferred. */
> continue;
> }
> @@ -218,6 +219,15 @@ void x86_cpu_policy_fill_native(struct c
> }
> }
>
> + if ( p->basic.max_leaf >= 0x24 )
> + {
> + cpuid_count_leaf(0x24, 0, &p->avx10.raw[0]);
> +
> + for ( i = 1; i <= MIN(p->avx10.max_subleaf,
> + ARRAY_SIZE(p->avx10.raw) - 1); ++i )
> + cpuid_count_leaf(0x24, i, &p->avx10.raw[i]);
Do we need to split the 0 iteration out ? I guess we can just start from
i = 0 instead.
> + }
> +
> /* Extended leaves. */
> cpuid_leaf(0x80000000U, &p->extd.raw[0]);
> for ( i = 1; i <= MIN(p->extd.max_leaf & 0xffffU,
> @@ -287,6 +297,9 @@ void x86_cpu_policy_clear_out_of_range_l
> ARRAY_SIZE(p->xstate.raw) - 1);
> }
>
> + if ( p->basic.max_leaf < 0x24 )
> + memset(p->avx10.raw, 0, sizeof(p->avx10.raw));
> +
> zero_leaves(p->extd.raw,
> ((p->extd.max_leaf >> 16) == 0x8000
> ? (p->extd.max_leaf & 0xffff) + 1 : 0),
> @@ -299,6 +312,8 @@ void __init x86_cpu_policy_bound_max_lea
> min_t(uint32_t, p->basic.max_leaf, ARRAY_SIZE(p->basic.raw) - 1);
> p->feat.max_subleaf =
> min_t(uint32_t, p->feat.max_subleaf, ARRAY_SIZE(p->feat.raw) - 1);
> + p->avx10.max_subleaf =
> + min_t(uint32_t, p->avx10.max_subleaf, ARRAY_SIZE(p->avx10.raw) - 1);
> p->extd.max_leaf = 0x80000000U | min_t(uint32_t, p->extd.max_leaf &
> 0xffff,
> ARRAY_SIZE(p->extd.raw) - 1);
> }
> @@ -326,6 +341,8 @@ void x86_cpu_policy_shrink_max_leaves(st
> */
> p->basic.raw[0xd] = p->xstate.raw[0];
>
> + p->basic.raw[0x24] = p->avx10.raw[0];
> +
> for ( i = p->basic.max_leaf; i; --i )
> if ( p->basic.raw[i].a | p->basic.raw[i].b |
> p->basic.raw[i].c | p->basic.raw[i].d )
> --- a/xen/arch/x86/lib/cpu-policy/policy.c
> +++ b/xen/arch/x86/lib/cpu-policy/policy.c
> @@ -24,6 +24,10 @@ int x86_cpu_policies_are_compatible(cons
> if ( guest->feat.max_subleaf > host->feat.max_subleaf )
> FAIL_CPUID(7, 0);
>
> + if ( guest->avx10.version > host->avx10.version ||
> + guest->avx10.max_subleaf > host->avx10.max_subleaf )
> + FAIL_CPUID(0x24, 0);
> +
> if ( guest->extd.max_leaf > host->extd.max_leaf )
> FAIL_CPUID(0x80000000U, NA);
>
> --- a/xen/include/public/arch-x86/cpufeatureset.h
> +++ b/xen/include/public/arch-x86/cpufeatureset.h
> @@ -366,6 +366,7 @@ XEN_CPUFEATURE(PREFETCHI, 15*32
> XEN_CPUFEATURE(USER_MSR, 15*32+15) /*s U{RD,WR}MSR Instructions
> */
> XEN_CPUFEATURE(UIRET_UIF, 15*32+17) /* UIRET updates UIF */
> XEN_CPUFEATURE(CET_SSS, 15*32+18) /* CET Supervisor Shadow
> Stacks safe to use */
> +XEN_CPUFEATURE(AVX10, 15*32+19) /* AVX10 Converged Vector ISA
> */
> XEN_CPUFEATURE(SLSM, 15*32+24) /* Static Lockstep Mode */
>
> /* Intel-defined CPU features, MSR_ARCH_CAPS 0x10a.eax, word 16 */
> --- a/xen/include/xen/lib/x86/cpu-policy.h
> +++ b/xen/include/xen/lib/x86/cpu-policy.h
> @@ -59,11 +59,12 @@ unsigned int x86_cpuid_lookup_vendor(uin
> */
> const char *x86_cpuid_vendor_to_str(unsigned int vendor);
>
> -#define CPUID_GUEST_NR_BASIC (0xdu + 1)
> +#define CPUID_GUEST_NR_BASIC (0x24u + 1)
> #define CPUID_GUEST_NR_CACHE (5u + 1)
> #define CPUID_GUEST_NR_FEAT (2u + 1)
> #define CPUID_GUEST_NR_TOPO (1u + 1)
> #define CPUID_GUEST_NR_XSTATE (62u + 1)
> +#define CPUID_GUEST_NR_AVX10 (0u + 1)
Intel specification now defines AVX10.2 which has a additional leaf
(even though the whole leaf is currently marked as "reserved").
> #define CPUID_GUEST_NR_EXTD_INTEL (0x8u + 1)
> #define CPUID_GUEST_NR_EXTD_AMD (0x21u + 1)
> #define CPUID_GUEST_NR_EXTD MAX(CPUID_GUEST_NR_EXTD_INTEL, \
> @@ -264,6 +265,19 @@ struct cpu_policy
> } comp[CPUID_GUEST_NR_XSTATE];
> } xstate;
>
> + /* Structured AVX10 information leaf: 0x000000024[xx] */
> + union {
> + struct cpuid_leaf raw[CPUID_GUEST_NR_AVX10];
> + struct {
> + /* Subleaf 0. */
> + uint32_t max_subleaf;
> + uint32_t version:8, :8;
> + bool vsz128:1, vsz256:1, vsz512:1;
> + uint32_t :13;
> + uint32_t /* c */:32, /* d */:32;
> + };
> + } avx10;
> +
> /* Extended leaves: 0x800000xx */
> union {
> struct cpuid_leaf raw[CPUID_GUEST_NR_EXTD];
> --- a/xen/tools/gen-cpuid.py
> +++ b/xen/tools/gen-cpuid.py
> @@ -294,7 +294,7 @@ def crunch_numbers(state):
> # enabled. Certain later extensions, acting on 256-bit vectors of
> # integers, better depend on AVX2 than AVX.
> AVX2: [AVX512F, VAES, VPCLMULQDQ, AVX_VNNI, AVX_IFMA, AVX_VNNI_INT8,
> - AVX_VNNI_INT16, SHA512, SM4],
> + AVX_VNNI_INT16, SHA512, SM4, AVX10],
>
I think we can instead make AVX10 a dependency on AVX512. Especially since
> Any processor that enumerates support for Intel AVX10 will also
enumerate support for Intel AVX, Intel AVX2, and Intel AVX-512 (see
Table 16-2).
with AVX-512 depending on AVX2.
> # AVX512F is taken to mean hardware support for 512bit registers
> # (which in practice depends on the EVEX prefix to encode) as well
>
>
--
Teddy Astie | Vates XCP-ng Developer
XCP-ng & Xen Orchestra - Vates solutions
web: https://vates.tech
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |