[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] x86: Sync cpu/intel_cacheinfo.c with Linux kernel
# HG changeset patch # User Keir Fraser <keir.fraser@xxxxxxxxxx> # Date 1200920963 0 # Node ID f1947dddb5a05d7b03ec9c8d8d09fe4fd500c485 # Parent 60ee6f97cb19d24214f69627740b3a6719d056e4 x86: Sync cpu/intel_cacheinfo.c with Linux kernel Signed-off-by: Xiaowei Yang <xiaowei.yang@xxxxxxxxx> --- xen/arch/x86/cpu/intel_cacheinfo.c | 347 ++++++++++++++++++++++++++++++++++--- 1 files changed, 321 insertions(+), 26 deletions(-) diff -r 60ee6f97cb19 -r f1947dddb5a0 xen/arch/x86/cpu/intel_cacheinfo.c --- a/xen/arch/x86/cpu/intel_cacheinfo.c Mon Jan 21 13:08:44 2008 +0000 +++ b/xen/arch/x86/cpu/intel_cacheinfo.c Mon Jan 21 13:09:23 2008 +0000 @@ -1,6 +1,16 @@ +/* + * Routines to indentify caches on Intel CPU. + * + * Changes: + * Venkatesh Pallipadi : Adding cache identification through cpuid(4) + * Ashok Raj <ashok.raj@xxxxxxxxx>: Work with CPU hotplug infrastructure. + * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. + */ + #include <xen/config.h> #include <xen/init.h> #include <xen/lib.h> +#include <xen/errno.h> #include <asm/processor.h> #define LVL_1_INST 1 @@ -17,7 +27,7 @@ struct _cache_table }; /* all the cache descriptor types we care about (no TLB or trace cache entries) */ -static struct _cache_table cache_table[] __devinitdata = +static struct _cache_table cache_table[] __cpuinitdata = { { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ @@ -30,13 +40,23 @@ static struct _cache_table cache_table[] { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */ { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */ { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */ { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */ { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */ + { 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */ { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */ { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */ { 0x44, LVL_2, 1024 }, /* 4-way set assoc, 32 byte line size */ { 0x45, LVL_2, 2048 }, /* 4-way set assoc, 32 byte line size */ + { 0x46, LVL_3, 4096 }, /* 4-way set assoc, 64 byte line size */ + { 0x47, LVL_3, 8192 }, /* 8-way set assoc, 64 byte line size */ + { 0x49, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ + { 0x4a, LVL_3, 6144 }, /* 12-way set assoc, 64 byte line size */ + { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ + { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */ + { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */ { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ @@ -44,6 +64,7 @@ static struct _cache_table cache_table[] { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */ { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */ { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */ + { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */ { 0x78, LVL_2, 1024 }, /* 4-way set assoc, 64 byte line size */ { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */ { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */ @@ -60,15 +81,276 @@ static struct _cache_table cache_table[] { 0x00, 0, 0} }; -unsigned int __devinit init_intel_cacheinfo(struct cpuinfo_x86 *c) + +enum _cache_type +{ + CACHE_TYPE_NULL = 0, + CACHE_TYPE_DATA = 1, + CACHE_TYPE_INST = 2, + CACHE_TYPE_UNIFIED = 3 +}; + +union _cpuid4_leaf_eax { + struct { + enum _cache_type type:5; + unsigned int level:3; + unsigned int is_self_initializing:1; + unsigned int is_fully_associative:1; + unsigned int reserved:4; + unsigned int num_threads_sharing:12; + unsigned int num_cores_on_die:6; + } split; + u32 full; +}; + +union _cpuid4_leaf_ebx { + struct { + unsigned int coherency_line_size:12; + unsigned int physical_line_partition:10; + unsigned int ways_of_associativity:10; + } split; + u32 full; +}; + +union _cpuid4_leaf_ecx { + struct { + unsigned int number_of_sets:32; + } split; + u32 full; +}; + +struct _cpuid4_info { + union _cpuid4_leaf_eax eax; + union _cpuid4_leaf_ebx ebx; + union _cpuid4_leaf_ecx ecx; + unsigned long size; + cpumask_t shared_cpu_map; +}; + +unsigned short num_cache_leaves; + +/* AMD doesn't have CPUID4. Emulate it here to report the same + information to the user. This makes some assumptions about the machine: + L2 not shared, no SMT etc. that is currently true on AMD CPUs. + + In theory the TLBs could be reported as fake type (they are in "dummy"). + Maybe later */ +union l1_cache { + struct { + unsigned line_size : 8; + unsigned lines_per_tag : 8; + unsigned assoc : 8; + unsigned size_in_kb : 8; + }; + unsigned val; +}; + +union l2_cache { + struct { + unsigned line_size : 8; + unsigned lines_per_tag : 4; + unsigned assoc : 4; + unsigned size_in_kb : 16; + }; + unsigned val; +}; + +union l3_cache { + struct { + unsigned line_size : 8; + unsigned lines_per_tag : 4; + unsigned assoc : 4; + unsigned res : 2; + unsigned size_encoded : 14; + }; + unsigned val; +}; + +static const unsigned short assocs[] = { + [1] = 1, [2] = 2, [4] = 4, [6] = 8, + [8] = 16, [0xa] = 32, [0xb] = 48, + [0xc] = 64, + [0xf] = 0xffff // ?? +}; + +static const unsigned char levels[] = { 1, 1, 2, 3 }; +static const unsigned char types[] = { 1, 2, 3, 3 }; + +static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, + union _cpuid4_leaf_ebx *ebx, + union _cpuid4_leaf_ecx *ecx) +{ + unsigned dummy; + unsigned line_size, lines_per_tag, assoc, size_in_kb; + union l1_cache l1i, l1d; + union l2_cache l2; + union l3_cache l3; + union l1_cache *l1 = &l1d; + + eax->full = 0; + ebx->full = 0; + ecx->full = 0; + + cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val); + cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val); + + switch (leaf) { + case 1: + l1 = &l1i; + case 0: + if (!l1->val) + return; + assoc = l1->assoc; + line_size = l1->line_size; + lines_per_tag = l1->lines_per_tag; + size_in_kb = l1->size_in_kb; + break; + case 2: + if (!l2.val) + return; + assoc = l2.assoc; + line_size = l2.line_size; + lines_per_tag = l2.lines_per_tag; + /* cpu_data has errata corrections for K7 applied */ + size_in_kb = current_cpu_data.x86_cache_size; + break; + case 3: + if (!l3.val) + return; + assoc = l3.assoc; + line_size = l3.line_size; + lines_per_tag = l3.lines_per_tag; + size_in_kb = l3.size_encoded * 512; + break; + default: + return; + } + + eax->split.is_self_initializing = 1; + eax->split.type = types[leaf]; + eax->split.level = levels[leaf]; + if (leaf == 3) + eax->split.num_threads_sharing = current_cpu_data.x86_max_cores - 1; + else + eax->split.num_threads_sharing = 0; + eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; + + + if (assoc == 0xf) + eax->split.is_fully_associative = 1; + ebx->split.coherency_line_size = line_size - 1; + ebx->split.ways_of_associativity = assocs[assoc] - 1; + ebx->split.physical_line_partition = lines_per_tag - 1; + ecx->split.number_of_sets = (size_in_kb * 1024) / line_size / + (ebx->split.ways_of_associativity + 1) - 1; +} + +static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) +{ + union _cpuid4_leaf_eax eax; + union _cpuid4_leaf_ebx ebx; + union _cpuid4_leaf_ecx ecx; + unsigned edx; + + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + amd_cpuid4(index, &eax, &ebx, &ecx); + else + cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); + if (eax.split.type == CACHE_TYPE_NULL) + return -EIO; /* better error ? */ + + this_leaf->eax = eax; + this_leaf->ebx = ebx; + this_leaf->ecx = ecx; + this_leaf->size = (ecx.split.number_of_sets + 1) * + (ebx.split.coherency_line_size + 1) * + (ebx.split.physical_line_partition + 1) * + (ebx.split.ways_of_associativity + 1); + return 0; +} + +static int __cpuinit find_num_cache_leaves(void) +{ + unsigned int eax, ebx, ecx, edx; + union _cpuid4_leaf_eax cache_eax; + int i = -1; + + do { + ++i; + /* Do cpuid(4) loop to find out num_cache_leaves */ + cpuid_count(4, i, &eax, &ebx, &ecx, &edx); + cache_eax.full = eax; + } while (cache_eax.split.type != CACHE_TYPE_NULL); + return i; +} + +unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) { unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ - - if (c->cpuid_level > 1) { + unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ + unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ + unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; + + if (c->cpuid_level > 3) { + static int is_initialized; + + if (is_initialized == 0) { + /* Init num_cache_leaves from boot CPU */ + num_cache_leaves = find_num_cache_leaves(); + is_initialized++; + } + + /* + * Whenever possible use cpuid(4), deterministic cache + * parameters cpuid leaf to find the cache details + */ + for (i = 0; i < num_cache_leaves; i++) { + struct _cpuid4_info this_leaf; + + int retval; + + retval = cpuid4_cache_lookup(i, &this_leaf); + if (retval >= 0) { + switch(this_leaf.eax.split.level) { + case 1: + if (this_leaf.eax.split.type == + CACHE_TYPE_DATA) + new_l1d = this_leaf.size/1024; + else if (this_leaf.eax.split.type == + CACHE_TYPE_INST) + new_l1i = this_leaf.size/1024; + break; + case 2: + new_l2 = this_leaf.size/1024; + num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; + index_msb = get_count_order(num_threads_sharing); + l2_id = c->apicid >> index_msb; + break; + case 3: + new_l3 = this_leaf.size/1024; + num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; + index_msb = get_count_order(num_threads_sharing); + l3_id = c->apicid >> index_msb; + break; + default: + break; + } + } + } + } + /* + * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for + * trace cache + */ + if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) { /* supports eax=2 call */ int i, j, n; int regs[4]; unsigned char *dp = (unsigned char *)regs; + int only_trace = 0; + + if (num_cache_leaves != 0 && c->x86 == 15) + only_trace = 1; /* Number of times to iterate */ n = cpuid_eax(2) & 0xFF; @@ -90,6 +372,8 @@ unsigned int __devinit init_intel_cachei while (cache_table[k].descriptor != 0) { if (cache_table[k].descriptor == des) { + if (only_trace && cache_table[k].cache_type != LVL_TRACE) + break; switch (cache_table[k].cache_type) { case LVL_1_INST: l1i += cache_table[k].size; @@ -115,28 +399,39 @@ unsigned int __devinit init_intel_cachei } } } - - if ( trace ) - printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); - else if ( l1i ) - printk (KERN_INFO "CPU: L1 I cache: %dK", l1i); - if ( l1d ) - printk(", L1 D cache: %dK\n", l1d); - else - printk("\n"); - if ( l2 ) - printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); - if ( l3 ) - printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); - - /* - * This assumes the L3 cache is shared; it typically lives in - * the northbridge. The L1 caches are included by the L2 - * cache, and so should not be included for the purpose of - * SMP switching weights. - */ - c->x86_cache_size = l2 ? l2 : (l1i+l1d); - } + } + + if (new_l1d) + l1d = new_l1d; + + if (new_l1i) + l1i = new_l1i; + + if (new_l2) { + l2 = new_l2; + } + + if (new_l3) { + l3 = new_l3; + } + + if (trace) + printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); + else if ( l1i ) + printk (KERN_INFO "CPU: L1 I cache: %dK", l1i); + + if (l1d) + printk(", L1 D cache: %dK\n", l1d); + else + printk("\n"); + + if (l2) + printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); + + if (l3) + printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); + + c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); return l2; } _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |