Xen project Mailing List

Re: [Xen-devel] [PATCH v3 10/28] xen/x86: Generate deep dependencies of features

To: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>

From: Konrad Rzeszutek Wilk <konrad@xxxxxxxxxx>

Date: Thu, 17 Mar 2016 15:45:40 -0400

Cc: Jan Beulich <JBeulich@xxxxxxxx>, Xen-devel <xen-devel@xxxxxxxxxxxxx>

Delivery-date: Thu, 17 Mar 2016 19:45:52 +0000

List-id: Xen developer discussion <xen-devel.lists.xen.org>

On Tue, Mar 15, 2016 at 03:35:06PM +0000, Andrew Cooper wrote: > Some features depend on other features. Working out and maintaining the exact > dependency tree is complicated, so it is expressed in the automatic generation > script, and flattened for faster runtime use. > > Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> > --- > CC: Jan Beulich <JBeulich@xxxxxxxx> > > v3: > * Vastly more reserch and comments > v2: > * New > --- > xen/arch/x86/cpuid.c | 54 +++++++++++++++++ > xen/include/asm-x86/cpuid.h | 2 + > xen/tools/gen-cpuid.py | 139 > +++++++++++++++++++++++++++++++++++++++++++- > 3 files changed, 194 insertions(+), 1 deletion(-) > > diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c > index 174cfa0..17487f5 100644 > --- a/xen/arch/x86/cpuid.c > +++ b/xen/arch/x86/cpuid.c > @@ -11,6 +11,7 @@ const uint32_t special_features[] = INIT_SPECIAL_FEATURES; > static const uint32_t __initconst pv_featuremask[] = INIT_PV_FEATURES; > static const uint32_t __initconst hvm_shadow_featuremask[] = > INIT_HVM_SHADOW_FEATURES; > static const uint32_t __initconst hvm_hap_featuremask[] = > INIT_HVM_HAP_FEATURES; > +static const uint32_t __initconst deep_features[] = INIT_DEEP_FEATURES; > > uint32_t __read_mostly raw_featureset[FSCAPINTS]; > uint32_t __read_mostly pv_featureset[FSCAPINTS]; > @@ -18,12 +19,34 @@ uint32_t __read_mostly hvm_featureset[FSCAPINTS]; > > static void __init sanitise_featureset(uint32_t *fs) > { > + uint32_t disabled_features[FSCAPINTS]; > unsigned int i; > > for ( i = 0; i < FSCAPINTS; ++i ) > { > /* Clamp to known mask. */ > fs[i] &= known_features[i]; > + > + /* > + * Identify which features with deep dependencies have been > + * disabled. > + */ > + disabled_features[i] = ~fs[i] & deep_features[i]; > + } > + > + for_each_set_bit(i, (void *)disabled_features, > + sizeof(disabled_features) * 8) > + { > + const uint32_t *dfs = lookup_deep_deps(i); > + unsigned int j; > + > + ASSERT(dfs); /* deep_features[] should guarentee this. */ > + > + for ( j = 0; j < FSCAPINTS; ++j ) > + { > + fs[j] &= ~dfs[j]; > + disabled_features[j] &= ~dfs[j]; What if this clears the entries you have already skipped over? Say you are at word 2 (i==2)and disabled_features[j] cleared word 1. The loop (for_each_set_bit) won't notice this. And furthermore lets assume that the clearing of a bit in word 1 should have done another dependency check which would clear something at word 7. You would miss that? I think? > + } > } > > /* > @@ -162,6 +185,36 @@ void __init calculate_featuresets(void) > calculate_hvm_featureset(); > } > > +const uint32_t * __init lookup_deep_deps(uint32_t feature) > +{ > + static const struct { > + uint32_t feature; > + uint32_t fs[FSCAPINTS]; > + } deep_deps[] __initconst = INIT_DEEP_DEPS; > + unsigned int start = 0, end = ARRAY_SIZE(deep_deps); > + > + BUILD_BUG_ON(ARRAY_SIZE(deep_deps) != NR_DEEP_DEPS); > + > + /* Fast early exit. */ > + if ( !test_bit(feature, deep_features) ) > + return NULL; > + > + /* deep_deps[] is sorted. Perform a binary search. */ > + while ( start < end ) > + { > + unsigned int mid = start + ((end - start) / 2); > + > + if ( deep_deps[mid].feature > feature ) > + end = mid; > + else if ( deep_deps[mid].feature < feature ) > + start = mid + 1; > + else > + return deep_deps[mid].fs; > + } > + > + return NULL; > +} > + > static void __init __maybe_unused build_assertions(void) > { > BUILD_BUG_ON(ARRAY_SIZE(known_features) != FSCAPINTS); > @@ -169,6 +222,7 @@ static void __init __maybe_unused build_assertions(void) > BUILD_BUG_ON(ARRAY_SIZE(pv_featuremask) != FSCAPINTS); > BUILD_BUG_ON(ARRAY_SIZE(hvm_shadow_featuremask) != FSCAPINTS); > BUILD_BUG_ON(ARRAY_SIZE(hvm_hap_featuremask) != FSCAPINTS); > + BUILD_BUG_ON(ARRAY_SIZE(deep_features) != FSCAPINTS); > } > > /* > diff --git a/xen/include/asm-x86/cpuid.h b/xen/include/asm-x86/cpuid.h > index 5041bcd..4725672 100644 > --- a/xen/include/asm-x86/cpuid.h > +++ b/xen/include/asm-x86/cpuid.h > @@ -29,6 +29,8 @@ extern uint32_t hvm_featureset[FSCAPINTS]; > > void calculate_featuresets(void); > > +const uint32_t *lookup_deep_deps(uint32_t feature); > + > #endif /* __ASSEMBLY__ */ > #endif /* !__X86_CPUID_H__ */ > > diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py > index 452c26a..ed368cb 100755 > --- a/xen/tools/gen-cpuid.py > +++ b/xen/tools/gen-cpuid.py > @@ -144,6 +144,127 @@ def crunch_numbers(state): > state.hvm_shadow = featureset_to_uint32s(state.raw_hvm_shadow, > nr_entries) > state.hvm_hap = featureset_to_uint32s(state.raw_hvm_hap, nr_entries) > > + # > + # Feature dependency information. > + # > + # !!! WARNING !!! > + # > + # A lot of this information is derived from the written text of vendors > + # software manuals, rather than directly from a statement. As such, it > + # does contain guesswork and assumptions, and may not accurately match > + # hardware implementations. > + # > + # It is however designed to create an end result for a guest which does > + # plausibly match real hardware. > + # > + # !!! WARNING !!! > + # > + # The format of this dictionary is that the feature in the key is a > direct > + # prerequisite of each feature in the value. s/of/for/ ? s/in the/enumarated/ > + # > + # The first consideration is about which functionality is physically > built s/about// > + # on top of other features. The second consideration, which is more > + # subjective, is whether real hardware would ever be found supporting > + # feature X but not Y. > + # > + deps = { > + # FPU is taken to mean support for the x87 regisers as well as the s/regisers/registers/ > + # instructions. MMX is documented to alias the %MM registers over > the > + # x87 %ST registers in hardware. > + FPU: [MMX], > + > + # The PSE36 feature indicates that reserved bits in a PSE superpage > + # may be used as extra physical address bits. > + PSE: [PSE36], > + > + # Entering Long Mode requires that %CR4.PAE is set. The NX pagetable > + # bit is only representable in the 64bit PTE format offered by PAE. > + PAE: [LM, NX], > + > + # APIC is special, but X2APIC does depend on APIC being available in > + # the first place. > + APIC: [X2APIC], > + > + # AMD built MMXExtentions and 3DNow as extentions to MMX. > + MMX: [MMXEXT, _3DNOW] EWhy the underscore? > + > + # The FXSAVE/FXRSTOR instructions were introduced into hardware > before > + # SSE, which is why they behave differently based on %CR4.OSFXSAVE > and > + # have their own feature bit. AMD however introduce the Fast FXSR s/introduce/introduced/ > + # feature as an optimisation. > + FXSR: [FFXSR], > + > + # SSE is taken to mean support for the %XMM registers as well as the > + # instructions. The SSE extentions were re-specified as core for > + # 64bit support. > + SSE: [SSE2, LM], > + > + # SSE2 was also re-specified as core for 64bit. The AESNI and SHA > + # instruction groups are documented to require checking for SSE2 > + # support as a prerequisite. > + SSE2: [SSE3, LM, AESNI, SHA], > + > + # AMD K10 processors has SSE3 and SSE4A. Bobcat/Barcelona processors > + # subsequently included SSSE3, and Bulldozer subsequently included > + # SSE4_1. Intel have never shipped SSE4A. > + SSE3: [SSSE3, SSE4_1, SSE4A], > + SSE4_1: [SSE4_2], > + > + # XSAVE is an extra set of instructions for state management, but > + # doesn't constitue new state itself. Some of the dependent features > + # are instructions built on top of base XSAVE, while others are new > + # instruction groups which are specified to require XSAVE for state > + # management. > + XSAVE: [XSAVEOPT, XSAVEC, XGETBV1, XSAVES, AVX, MPX], > + > + # AVX is taken to mean hardware support for VEX encoded instructions, > + # 256bit registers, and the instructions themselves. Each of these > + # subsequent instruction groups may only be VEX encoded. > + AVX: [FMA, FMA4, F16C, AVX2, XOP], > + > + # CX16 is only encodable in Long Mode. LAHF_LM indicates that the > + # SAHF/LAHF instructions are reintroduced in Long Mode. 1GB > + # superpages are only available in 4 level paging. > + LM: [CX16, LAHF_LM, PAGE1GB], > + > + # AMD K6-2+ and K6-III processors shipped with 3DNow+, beyond the > + # standard 3DNow in the earlier K6 processors. > + _3DNOW: [_3DNOWEXT], > + } > + > + deep_features = tuple(sorted(deps.keys())) > + state.deep_deps = {} > + > + for feat in deep_features: > + > + seen = [feat] > + to_process = list(deps[feat]) > + > + while len(to_process): > + > + # To debug, uncomment the following lines: > + # def repl(l): > + # return "[" + ", ".join((state.names[x] for x in l)) + "]" > + # print >>sys.stderr, "Feature %s, seen %s, to_process %s " % \ > + # (state.names[feat], repl(seen), repl(to_process)) > + > + f = to_process.pop(0) > + > + if f in seen: > + raise Fail("ERROR: Cycle found with %s when processing %s" > + % (state.names[f], state.names[feat])) > + > + seen.append(f) > + to_process = list(set(to_process + deps.get(f, []))) > + > + state.deep_deps[feat] = seen[1:] > + > + state.deep_features = featureset_to_uint32s(deps.keys(), nr_entries) > + state.nr_deep_deps = len(state.deep_deps.keys()) > + > + for k, v in state.deep_deps.iteritems(): > + state.deep_deps[k] = featureset_to_uint32s(v, nr_entries) > + > > def write_results(state): > state.output.write( > @@ -170,6 +291,12 @@ def write_results(state): > #define INIT_HVM_SHADOW_FEATURES { \\\n%s\n} > > #define INIT_HVM_HAP_FEATURES { \\\n%s\n} > + > +#define NR_DEEP_DEPS %sU > + > +#define INIT_DEEP_FEATURES { \\\n%s\n} > + > +#define INIT_DEEP_DEPS { \\ > """ % (state.nr_entries, > state.common_1d, > format_uint32s(state.known, 4), > @@ -177,10 +304,20 @@ def write_results(state): > format_uint32s(state.pv, 4), > format_uint32s(state.hvm_shadow, 4), > format_uint32s(state.hvm_hap, 4), > + state.nr_deep_deps, > + format_uint32s(state.deep_features, 4), > )) > > + for dep in sorted(state.deep_deps.keys()): > + state.output.write( > + " { %#xU, /* %s */ { \\\n%s\n }, }, \\\n" > + % (dep, state.names[dep], > + format_uint32s(state.deep_deps[dep], 8) > + )) > + > state.output.write( > -""" > +"""} > + > #endif /* __XEN_X86__FEATURESET_DATA__ */ > """) > > -- > 2.1.4 > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@xxxxxxxxxxxxx > http://lists.xen.org/xen-devel _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.