[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] Cleanup Intel CMCI support.
# HG changeset patch # User Keir Fraser <keir.fraser@xxxxxxxxxx> # Date 1229947640 0 # Node ID aa0fee8a6ef56dacca4fd6ef54851dad806cb5c5 # Parent 4d5203f95498ff83b4fbcd48500c1d2d20b23f91 Cleanup Intel CMCI support. Signed-off-by: Keir Fraser <keir.fraser@xxxxxxxxxx> --- xen/arch/x86/cpu/mcheck/mce.c | 10 - xen/arch/x86/cpu/mcheck/mce_intel.c | 225 ++++++++++++++---------------------- xen/arch/x86/smpboot.c | 61 +++------ xen/common/stop_machine.c | 31 +--- xen/include/asm-x86/processor.h | 2 xen/include/asm-x86/smp.h | 2 xen/include/xen/stop_machine.h | 4 7 files changed, 126 insertions(+), 209 deletions(-) diff -r 4d5203f95498 -r aa0fee8a6ef5 xen/arch/x86/cpu/mcheck/mce.c --- a/xen/arch/x86/cpu/mcheck/mce.c Mon Dec 22 08:12:33 2008 +0000 +++ b/xen/arch/x86/cpu/mcheck/mce.c Mon Dec 22 12:07:20 2008 +0000 @@ -115,16 +115,6 @@ int mce_available(struct cpuinfo_x86 *c) { return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); } - -/*Make sure there are no machine check on offlined or suspended CPUs*/ -void mce_disable_cpu(void) -{ - if (!mce_available(¤t_cpu_data) || mce_disabled == 1) - return; - printk(KERN_DEBUG "MCE: disable mce on CPU%d\n", smp_processor_id()); - clear_in_cr4(X86_CR4_MCE); -} - /* This has to be run for each processor */ void mcheck_init(struct cpuinfo_x86 *c) diff -r 4d5203f95498 -r aa0fee8a6ef5 xen/arch/x86/cpu/mcheck/mce_intel.c --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Mon Dec 22 08:12:33 2008 +0000 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Mon Dec 22 12:07:20 2008 +0000 @@ -18,7 +18,7 @@ extern int firstbank; #ifdef CONFIG_X86_MCE_THERMAL static void unexpected_thermal_interrupt(struct cpu_user_regs *regs) -{ +{ printk(KERN_ERR "Thermal: CPU%d: Unexpected LVT TMR interrupt!\n", smp_processor_id()); add_taint(TAINT_MACHINE_CHECK); @@ -67,11 +67,11 @@ static void intel_init_thermal(struct cp /* Thermal monitoring */ if (!cpu_has(c, X86_FEATURE_ACPI)) - return; /* -ENODEV */ + return; /* -ENODEV */ /* Clock modulation */ if (!cpu_has(c, X86_FEATURE_ACC)) - return; /* -ENODEV */ + return; /* -ENODEV */ /* first check if its enabled already, in which case there might * be some SMM goo which handles it, so we can't even put a handler @@ -87,7 +87,7 @@ static void intel_init_thermal(struct cp if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13))) tm2 = 1; - /* check whether a vector already exists, temporarily masked? */ + /* check whether a vector already exists, temporarily masked? */ if (h & APIC_VECTOR_MASK) { printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already installed\n", cpu, (h & APIC_VECTOR_MASK)); @@ -95,8 +95,8 @@ static void intel_init_thermal(struct cp } /* The temperature transition interrupt handler setup */ - h = THERMAL_APIC_VECTOR; /* our delivery vector */ - h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ + h = THERMAL_APIC_VECTOR; /* our delivery vector */ + h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ apic_write_around(APIC_LVTTHMR, h); rdmsr (MSR_IA32_THERM_INTERRUPT, l, h); @@ -121,7 +121,7 @@ static inline void intel_get_extended_ms if (nr_intel_ext_msrs == 0) return; - /*this function will called when CAP(9).MCG_EXT_P = 1*/ + /*this function will called when CAP(9).MCG_EXT_P = 1*/ memset(mc_ext, 0, sizeof(struct mcinfo_extended)); mc_ext->common.type = MC_TYPE_EXTENDED; mc_ext->common.size = sizeof(mc_ext); @@ -198,7 +198,7 @@ static int machine_check_poll(struct mc_ struct mcinfo_bank mcb; /*For CMCI, only owners checks the owned MSRs*/ if ( !test_bit(i, __get_cpu_var(mce_banks_owned)) && - (calltype & MC_FLAG_CMCI) ) + (calltype & MC_FLAG_CMCI) ) continue; rdmsrl(MSR_IA32_MC0_STATUS + 4 * i, status); @@ -277,38 +277,38 @@ static fastcall void intel_machine_check u32 mcgstl, mcgsth; int i; - rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); - if (mcgstl & (1<<0)) /* Recoverable ? */ - recover=0; + rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); + if (mcgstl & (1<<0)) /* Recoverable ? */ + recover=0; - printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", - smp_processor_id(), mcgsth, mcgstl); + printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", + smp_processor_id(), mcgsth, mcgstl); for (i=0; i<nr_mce_banks; i++) { - rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high); - if (high & (1<<31)) { - if (high & (1<<29)) - recover |= 1; - if (high & (1<<25)) - recover |= 2; - printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low); - high &= ~(1<<31); - if (high & (1<<27)) { - rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh); - printk ("[%08x%08x]", ahigh, alow); - } - if (high & (1<<26)) { - rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh); - printk (" at %08x%08x", ahigh, alow); - } - printk ("\n"); - } + rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high); + if (high & (1<<31)) { + if (high & (1<<29)) + recover |= 1; + if (high & (1<<25)) + recover |= 2; + printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low); + high &= ~(1<<31); + if (high & (1<<27)) { + rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh); + printk ("[%08x%08x]", ahigh, alow); + } + if (high & (1<<26)) { + rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh); + printk (" at %08x%08x", ahigh, alow); + } + printk ("\n"); + } } if (recover & 2) - mc_panic ("CPU context corrupt"); + mc_panic ("CPU context corrupt"); if (recover & 1) - mc_panic ("Unable to continue"); + mc_panic ("Unable to continue"); printk(KERN_EMERG "Attempting to continue.\n"); /* @@ -317,25 +317,21 @@ static fastcall void intel_machine_check * for errors if the OS could not log the error. */ for (i=0; i<nr_mce_banks; i++) { - u32 msr; - msr = MSR_IA32_MC0_STATUS+i*4; - rdmsr (msr, low, high); - if (high&(1<<31)) { - /* Clear it */ - wrmsr(msr, 0UL, 0UL); - /* Serialize */ - wmb(); - add_taint(TAINT_MACHINE_CHECK); - } + u32 msr; + msr = MSR_IA32_MC0_STATUS+i*4; + rdmsr (msr, low, high); + if (high&(1<<31)) { + /* Clear it */ + wrmsr(msr, 0UL, 0UL); + /* Serialize */ + wmb(); + add_taint(TAINT_MACHINE_CHECK); + } } mcgstl &= ~(1<<2); wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); } -extern void (*cpu_down_handler)(int down_cpu); -extern void (*cpu_down_rollback_handler)(int down_cpu); -extern void mce_disable_cpu(void); -static bool_t cmci_clear_lock = 0; static DEFINE_SPINLOCK(cmci_discover_lock); static DEFINE_PER_CPU(cpu_banks_t, no_cmci_banks); @@ -350,19 +346,16 @@ static int do_cmci_discover(int i) rdmsrl(msr, val); /* Some other CPU already owns this bank. */ if (val & CMCI_EN) { - clear_bit(i, __get_cpu_var(mce_banks_owned)); - goto out; + clear_bit(i, __get_cpu_var(mce_banks_owned)); + goto out; } wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD); rdmsrl(msr, val); if (!(val & CMCI_EN)) { - /* - * This bank does not support CMCI. The polling - * timer has to handle it. - */ - set_bit(i, __get_cpu_var(no_cmci_banks)); - return 0; + /* This bank does not support CMCI. Polling timer has to handle it. */ + set_bit(i, __get_cpu_var(no_cmci_banks)); + return 0; } set_bit(i, __get_cpu_var(mce_banks_owned)); out: @@ -370,23 +363,25 @@ out: return 1; } -void cmci_discover(void) -{ +static void cmci_discover(void) +{ + unsigned long flags; int i; printk(KERN_DEBUG "CMCI: find owner on CPU%d\n", smp_processor_id()); - spin_lock(&cmci_discover_lock); - for (i = 0; i < nr_mce_banks; i++) { - /*If the cpu is the bank owner, need not re-discover*/ - if (test_bit(i, __get_cpu_var(mce_banks_owned))) - continue; - do_cmci_discover(i); - } - spin_unlock(&cmci_discover_lock); + + spin_lock_irqsave(&cmci_discover_lock, flags); + + for (i = 0; i < nr_mce_banks; i++) + if (!test_bit(i, __get_cpu_var(mce_banks_owned))) + do_cmci_discover(i); + + spin_unlock_irqrestore(&cmci_discover_lock, flags); + printk(KERN_DEBUG "CMCI: CPU%d owner_map[%lx], no_cmci_map[%lx]\n", - smp_processor_id(), - *((unsigned long *)__get_cpu_var(mce_banks_owned)), - *((unsigned long *)__get_cpu_var(no_cmci_banks))); + smp_processor_id(), + *((unsigned long *)__get_cpu_var(mce_banks_owned)), + *((unsigned long *)__get_cpu_var(no_cmci_banks))); } /* @@ -402,11 +397,21 @@ void cmci_discover(void) static void mce_set_owner(void) { - if (!cmci_support || mce_disabled == 1) return; cmci_discover(); +} + +static void __cpu_mcheck_distribute_cmci(void *unused) +{ + cmci_discover(); +} + +void cpu_mcheck_distribute_cmci(void) +{ + if (cmci_support && !mce_disabled) + on_each_cpu(__cpu_mcheck_distribute_cmci, NULL, 0, 0); } static void clear_cmci(void) @@ -431,62 +436,12 @@ static void clear_cmci(void) } } -/*we need to re-set cmci owners when cpu_down fail or cpu_up*/ -static void cmci_reenable_cpu(void *h) -{ - if (!mce_available(¤t_cpu_data) || mce_disabled == 1) - return; - printk(KERN_DEBUG "CMCI: reenable mce on CPU%d\n", smp_processor_id()); - mce_set_owner(); - set_in_cr4(X86_CR4_MCE); -} - -/* When take cpu_down, we need to execute the impacted cmci_owner judge algorithm - * First, we need to clear the ownership on the dead CPU - * Then, other CPUs will check whether to take the bank's ownership from down_cpu - * CPU0 need not and "never" execute this path -*/ -void __cpu_clear_cmci( int down_cpu) -{ - int cpu = smp_processor_id(); - - if (!cmci_support && mce_disabled == 1) - return; - - if (cpu == 0) { - printk(KERN_DEBUG "CMCI: CPU0 need not be cleared\n"); - return; - } - - local_irq_disable(); - if (cpu == down_cpu){ - mce_disable_cpu(); +void cpu_mcheck_disable(void) +{ + clear_in_cr4(X86_CR4_MCE); + + if (cmci_support && !mce_disabled) clear_cmci(); - wmb(); - test_and_set_bool(cmci_clear_lock); - return; - } - while (!cmci_clear_lock) - cpu_relax(); - if (cpu != down_cpu) - mce_set_owner(); - - test_and_clear_bool(cmci_clear_lock); - local_irq_enable(); - -} - -void __cpu_clear_cmci_rollback( int down_cpu) -{ - cpumask_t down_map; - if (!cmci_support || mce_disabled == 1) - return; - - cpus_clear(down_map); - cpu_set(down_cpu, down_map); - printk(KERN_ERR "CMCI: cpu_down fail. " - "Reenable cmci on CPU%d\n", down_cpu); - on_selected_cpus(down_map, cmci_reenable_cpu, NULL, 1, 1); } static void intel_init_cmci(struct cpuinfo_x86 *c) @@ -511,11 +466,8 @@ static void intel_init_cmci(struct cpuin apic |= (APIC_DM_FIXED | APIC_LVT_MASKED); apic_write_around(APIC_CMCI, apic); - /*now clear mask flag*/ l = apic_read(APIC_CMCI); apic_write_around(APIC_CMCI, l & ~APIC_LVT_MASKED); - cpu_down_handler = __cpu_clear_cmci; - cpu_down_rollback_handler = __cpu_clear_cmci_rollback; } fastcall void smp_cmci_interrupt(struct cpu_user_regs *regs) @@ -588,7 +540,7 @@ static void mce_init(void) set_in_cr4(X86_CR4_MCE); rdmsr (MSR_IA32_MCG_CAP, l, h); - if (l & MCG_CTL_P) /* Control register present ? */ + if (l & MCG_CTL_P) /* Control register present ? */ wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); for (i = firstbank; i < nr_mce_banks; i++) @@ -611,15 +563,14 @@ static void mce_init(void) /*p4/p6 faimily has similar MCA initialization process*/ void intel_mcheck_init(struct cpuinfo_x86 *c) { - - mce_cap_init(c); - printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", - smp_processor_id()); - /* machine check is available */ - machine_check_vector = intel_machine_check; - mce_init(); - mce_intel_feature_init(c); - mce_set_owner(); + mce_cap_init(c); + printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", + smp_processor_id()); + /* machine check is available */ + machine_check_vector = intel_machine_check; + mce_init(); + mce_intel_feature_init(c); + mce_set_owner(); } /* diff -r 4d5203f95498 -r aa0fee8a6ef5 xen/arch/x86/smpboot.c --- a/xen/arch/x86/smpboot.c Mon Dec 22 08:12:33 2008 +0000 +++ b/xen/arch/x86/smpboot.c Mon Dec 22 12:07:20 2008 +0000 @@ -1237,25 +1237,11 @@ remove_siblinginfo(int cpu) } extern void fixup_irqs(cpumask_t map); - -/* - * Functions called when offline cpu. - * We need to process some new feature such as - * CMCI owner change when do cpu hotplug in latest - * Intel CPU families -*/ -void (*cpu_down_handler)(int down_cpu) = NULL; -void (*cpu_down_rollback_handler)(int down_cpu) = NULL; - - -int __cpu_disable(int down_cpu) +int __cpu_disable(void) { cpumask_t map = cpu_online_map; int cpu = smp_processor_id(); - /*Only down_cpu need to execute this function*/ - if (cpu != down_cpu) - return 0; /* * Perhaps use cpufreq to drop frequency, but that could go * into generic code. @@ -1278,6 +1264,8 @@ int __cpu_disable(int down_cpu) time_suspend(); + cpu_mcheck_disable(); + remove_siblinginfo(cpu); cpu_clear(cpu, map); @@ -1293,28 +1281,25 @@ void __cpu_die(unsigned int cpu) void __cpu_die(unsigned int cpu) { /* We don't do anything here: idle task is faking death itself. */ - unsigned int i; - - for (i = 0; i < 10; i++) { + unsigned int i = 0; + + for (;;) { /* They ack this in play_dead by setting CPU_DEAD */ if (per_cpu(cpu_state, cpu) == CPU_DEAD) { - printk ("CPU %d is now offline\n", cpu); + printk ("CPU %u is now offline\n", cpu); return; } mdelay(100); mb(); process_pending_timers(); - } - printk(KERN_ERR "CPU %u didn't die...\n", cpu); -} -static int take_cpu_down(void *down_cpu) -{ - - if (cpu_down_handler) - cpu_down_handler(*(int *)down_cpu); - wmb(); - - return __cpu_disable(*(int *)down_cpu); + if ((++i % 10) == 0) + printk(KERN_ERR "CPU %u still not dead...\n", cpu); + } +} + +static int take_cpu_down(void *unused) +{ + return __cpu_disable(); } int cpu_down(unsigned int cpu) @@ -1340,21 +1325,17 @@ int cpu_down(unsigned int cpu) printk("Prepare to bring CPU%d down...\n", cpu); - err = stop_machine_run(take_cpu_down, &cpu, cpu_online_map); - if ( err < 0 ) + err = stop_machine_run(take_cpu_down, NULL, cpu); + if (err < 0) goto out; __cpu_die(cpu); - if (cpu_online(cpu)) { - printk("Bad state (DEAD, but in online map) on CPU%d\n", cpu); - err = -EBUSY; - } + BUG_ON(cpu_online(cpu)); + + cpu_mcheck_distribute_cmci(); + out: - /*if cpu_offline failed, re-check cmci_owner*/ - - if ( err < 0 && cpu_down_rollback_handler) - cpu_down_rollback_handler(cpu); spin_unlock(&cpu_add_remove_lock); return err; } diff -r 4d5203f95498 -r aa0fee8a6ef5 xen/common/stop_machine.c --- a/xen/common/stop_machine.c Mon Dec 22 08:12:33 2008 +0000 +++ b/xen/common/stop_machine.c Mon Dec 22 12:07:20 2008 +0000 @@ -45,7 +45,7 @@ struct stopmachine_data { enum stopmachine_state state; atomic_t done; - cpumask_t fn_cpus; + unsigned int fn_cpu; int fn_result; int (*fn)(void *); void *fn_data; @@ -63,22 +63,21 @@ static void stopmachine_set_state(enum s cpu_relax(); } -int stop_machine_run(int (*fn)(void *), void *data, cpumask_t cpus) +int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu) { cpumask_t allbutself; unsigned int i, nr_cpus; - int cur_cpu, ret; + int ret; BUG_ON(!local_irq_is_enabled()); allbutself = cpu_online_map; - cur_cpu = smp_processor_id(); - cpu_clear(cur_cpu, allbutself); + cpu_clear(smp_processor_id(), allbutself); nr_cpus = cpus_weight(allbutself); if ( nr_cpus == 0 ) { - BUG_ON(!cpu_isset(cur_cpu, cpus)); + BUG_ON(cpu != smp_processor_id()); return (*fn)(data); } @@ -92,8 +91,7 @@ int stop_machine_run(int (*fn)(void *), stopmachine_data.fn = fn; stopmachine_data.fn_data = data; stopmachine_data.nr_cpus = nr_cpus; - stopmachine_data.fn_cpus = cpus; - stopmachine_data.fn_result = 0; + stopmachine_data.fn_cpu = cpu; atomic_set(&stopmachine_data.done, 0); stopmachine_data.state = STOPMACHINE_START; @@ -107,13 +105,8 @@ int stop_machine_run(int (*fn)(void *), local_irq_disable(); stopmachine_set_state(STOPMACHINE_DISABLE_IRQ); - /* callback will run on each cpu of the input map. - * If callback fails on any CPU, the stop_machine_run - * will return the *ORed* the failure - */ - if ( cpu_isset(cur_cpu, cpus) ){ - stopmachine_data.fn_result |= (*fn)(data); - } + if ( cpu == smp_processor_id() ) + stopmachine_data.fn_result = (*fn)(data); stopmachine_set_state(STOPMACHINE_INVOKE); ret = stopmachine_data.fn_result; @@ -128,6 +121,7 @@ static void stopmachine_softirq(void) static void stopmachine_softirq(void) { enum stopmachine_state state = STOPMACHINE_START; + smp_mb(); while ( state != STOPMACHINE_EXIT ) @@ -142,11 +136,10 @@ static void stopmachine_softirq(void) local_irq_disable(); break; case STOPMACHINE_INVOKE: - if ( cpu_isset(smp_processor_id(), stopmachine_data.fn_cpus )) { - stopmachine_data.fn_result |= + if ( stopmachine_data.fn_cpu == smp_processor_id() ) + stopmachine_data.fn_result = stopmachine_data.fn(stopmachine_data.fn_data); - } - break; + break; default: break; } diff -r 4d5203f95498 -r aa0fee8a6ef5 xen/include/asm-x86/processor.h --- a/xen/include/asm-x86/processor.h Mon Dec 22 08:12:33 2008 +0000 +++ b/xen/include/asm-x86/processor.h Mon Dec 22 12:07:20 2008 +0000 @@ -540,6 +540,8 @@ extern void mtrr_bp_init(void); void mcheck_init(struct cpuinfo_x86 *c); asmlinkage void do_machine_check(struct cpu_user_regs *regs); +void cpu_mcheck_distribute_cmci(void); +void cpu_mcheck_disable(void); int cpuid_hypervisor_leaves( uint32_t idx, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx); diff -r 4d5203f95498 -r aa0fee8a6ef5 xen/include/asm-x86/smp.h --- a/xen/include/asm-x86/smp.h Mon Dec 22 08:12:33 2008 +0000 +++ b/xen/include/asm-x86/smp.h Mon Dec 22 12:07:20 2008 +0000 @@ -101,7 +101,7 @@ static __inline int logical_smp_processo #endif -extern int __cpu_disable(int down_cpu); +extern int __cpu_disable(void); extern void __cpu_die(unsigned int cpu); #endif /* !__ASSEMBLY__ */ diff -r 4d5203f95498 -r aa0fee8a6ef5 xen/include/xen/stop_machine.h --- a/xen/include/xen/stop_machine.h Mon Dec 22 08:12:33 2008 +0000 +++ b/xen/include/xen/stop_machine.h Mon Dec 22 12:07:20 2008 +0000 @@ -5,7 +5,7 @@ * stop_machine_run: freeze the machine on all CPUs and run this function * @fn: the function to run * @data: the data ptr for the @fn() - * @cpus: cpus to run @fn() on. + * @cpu: the cpu to run @fn() on (or any, if @cpu == NR_CPUS). * * Description: This causes every other cpu to enter a safe point, with * each of which disables interrupts, and finally interrupts are disabled @@ -14,6 +14,6 @@ * * This can be thought of as a very heavy write lock, equivalent to * grabbing every spinlock in the kernel. */ -int stop_machine_run(int (*fn)(void *), void *data, cpumask_t cpu); +int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu); #endif /* __XEN_STOP_MACHINE_H__ */ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |