Xen project Mailing List

[Xen-changelog] [xen-unstable] Enable CMCI for Intel CPUs

From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>

Date: Mon, 22 Dec 2008 00:20:12 -0800

Delivery-date: Mon, 22 Dec 2008 00:21:14 -0800

List-id: BK change log <xen-changelog.lists.xensource.com>

# HG changeset patch # User Keir Fraser <keir.fraser@xxxxxxxxxx> # Date 1229933553 0 # Node ID 4d5203f95498ff83b4fbcd48500c1d2d20b23f91 # Parent 2dffa6ceb0af954e7f3a9ad7e993b8aee7b7de65 Enable CMCI for Intel CPUs Signed-off-by Yunhong Jiang <yunhong.jiang@xxxxxxxxx> Signed-off-by Liping Ke <liping.ke@xxxxxxxxx> --- xen/arch/x86/cpu/mcheck/p4.c | 270 --------- xen/arch/x86/cpu/mcheck/p6.c | 118 ---- xen/arch/x86/apic.c | 33 + xen/arch/x86/cpu/mcheck/Makefile | 3 xen/arch/x86/cpu/mcheck/k7.c | 1 xen/arch/x86/cpu/mcheck/mce.c | 31 - xen/arch/x86/cpu/mcheck/mce.h | 16 xen/arch/x86/cpu/mcheck/mce_intel.c | 681 +++++++++++++++++++++++++ xen/arch/x86/cpu/mcheck/non-fatal.c | 25 xen/arch/x86/cpu/mcheck/x86_mca.h | 19 xen/arch/x86/hvm/vmx/vmx.c | 8 xen/arch/x86/i8259.c | 1 xen/arch/x86/smpboot.c | 34 + xen/common/stop_machine.c | 31 - xen/include/asm-x86/apicdef.h | 2 xen/include/asm-x86/config.h | 2 xen/include/asm-x86/irq.h | 1 xen/include/asm-x86/mach-default/irq_vectors.h | 4 xen/include/asm-x86/msr-index.h | 6 xen/include/asm-x86/smp.h | 2 xen/include/public/arch-x86/xen-mca.h | 15 xen/include/xen/stop_machine.h | 4 22 files changed, 859 insertions(+), 448 deletions(-) diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/apic.c --- a/xen/arch/x86/apic.c Fri Dec 19 14:56:36 2008 +0000 +++ b/xen/arch/x86/apic.c Mon Dec 22 08:12:33 2008 +0000 @@ -99,8 +99,11 @@ void __init apic_intr_init(void) /* Performance Counters Interrupt */ set_intr_gate(PMU_APIC_VECTOR, pmu_apic_interrupt); - /* thermal monitor LVT interrupt */ -#ifdef CONFIG_X86_MCE_P4THERMAL + /* CMCI Correctable Machine Check Interrupt */ + set_intr_gate(CMCI_APIC_VECTOR, cmci_interrupt); + + /* thermal monitor LVT interrupt, for P4 and latest Intel CPU*/ +#ifdef CONFIG_X86_MCE_THERMAL set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); #endif } @@ -172,12 +175,17 @@ void clear_local_APIC(void) } /* lets not touch this if we didn't frob it */ -#ifdef CONFIG_X86_MCE_P4THERMAL +#ifdef CONFIG_X86_MCE_THERMAL if (maxlvt >= 5) { v = apic_read(APIC_LVTTHMR); apic_write_around(APIC_LVTTHMR, v | APIC_LVT_MASKED); } #endif + + if (maxlvt >= 6) { + v = apic_read(APIC_CMCI); + apic_write_around(APIC_CMCI, v | APIC_LVT_MASKED); + } /* * Clean APIC state for other OSs: */ @@ -189,10 +197,13 @@ void clear_local_APIC(void) if (maxlvt >= 4) apic_write_around(APIC_LVTPC, APIC_LVT_MASKED); -#ifdef CONFIG_X86_MCE_P4THERMAL +#ifdef CONFIG_X86_MCE_THERMAL if (maxlvt >= 5) apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED); #endif + if (maxlvt >= 6) + apic_write_around(APIC_CMCI, APIC_LVT_MASKED); + v = GET_APIC_VERSION(apic_read(APIC_LVR)); if (APIC_INTEGRATED(v)) { /* !82489DX */ if (maxlvt > 3) /* Due to Pentium errata 3AP and 11AP. */ @@ -597,6 +608,7 @@ static struct { unsigned int apic_spiv; unsigned int apic_lvtt; unsigned int apic_lvtpc; + unsigned int apic_lvtcmci; unsigned int apic_lvt0; unsigned int apic_lvt1; unsigned int apic_lvterr; @@ -608,7 +620,7 @@ int lapic_suspend(void) int lapic_suspend(void) { unsigned long flags; - + int maxlvt = get_maxlvt(); if (!apic_pm_state.active) return 0; @@ -620,6 +632,11 @@ int lapic_suspend(void) apic_pm_state.apic_spiv = apic_read(APIC_SPIV); apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); + + if (maxlvt >= 6) { + apic_pm_state.apic_lvtcmci = apic_read(APIC_CMCI); + } + apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); @@ -637,6 +654,7 @@ int lapic_resume(void) { unsigned int l, h; unsigned long flags; + int maxlvt = get_maxlvt(); if (!apic_pm_state.active) return 0; @@ -669,6 +687,11 @@ int lapic_resume(void) apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); + + if (maxlvt >= 6) { + apic_write(APIC_CMCI, apic_pm_state.apic_lvtcmci); + } + apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/cpu/mcheck/Makefile --- a/xen/arch/x86/cpu/mcheck/Makefile Fri Dec 19 14:56:36 2008 +0000 +++ b/xen/arch/x86/cpu/mcheck/Makefile Mon Dec 22 08:12:33 2008 +0000 @@ -3,8 +3,7 @@ obj-y += amd_k8.o obj-y += amd_k8.o obj-y += amd_f10.o obj-y += mce.o +obj-y += mce_intel.o obj-y += non-fatal.o -obj-y += p4.o obj-$(x86_32) += p5.o -obj-$(x86_32) += p6.o obj-$(x86_32) += winchip.o diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/cpu/mcheck/k7.c --- a/xen/arch/x86/cpu/mcheck/k7.c Fri Dec 19 14:56:36 2008 +0000 +++ b/xen/arch/x86/cpu/mcheck/k7.c Mon Dec 22 08:12:33 2008 +0000 @@ -14,6 +14,7 @@ #include <asm/msr.h> #include "mce.h" +#include "x86_mca.h" /* Machine Check Handler For AMD Athlon/Duron */ static fastcall void k7_machine_check(struct cpu_user_regs * regs, long error_code) diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/cpu/mcheck/mce.c --- a/xen/arch/x86/cpu/mcheck/mce.c Fri Dec 19 14:56:36 2008 +0000 +++ b/xen/arch/x86/cpu/mcheck/mce.c Mon Dec 22 08:12:33 2008 +0000 @@ -27,7 +27,7 @@ EXPORT_SYMBOL_GPL(nr_mce_banks); /* non- * to physical cpus present in the machine. * The more physical cpus are available, the more entries you need. */ -#define MAX_MCINFO 10 +#define MAX_MCINFO 20 struct mc_machine_notify { struct mc_info mc; @@ -110,6 +110,22 @@ static void amd_mcheck_init(struct cpuin } } +/*check the existence of Machine Check*/ +int mce_available(struct cpuinfo_x86 *c) +{ + return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); +} + +/*Make sure there are no machine check on offlined or suspended CPUs*/ +void mce_disable_cpu(void) +{ + if (!mce_available(&current_cpu_data) || mce_disabled == 1) + return; + printk(KERN_DEBUG "MCE: disable mce on CPU%d\n", smp_processor_id()); + clear_in_cr4(X86_CR4_MCE); +} + + /* This has to be run for each processor */ void mcheck_init(struct cpuinfo_x86 *c) { @@ -135,11 +151,13 @@ void mcheck_init(struct cpuinfo_x86 *c) #ifndef CONFIG_X86_64 if (c->x86==5) intel_p5_mcheck_init(c); - if (c->x86==6) - intel_p6_mcheck_init(c); #endif - if (c->x86==15) - intel_p4_mcheck_init(c); + /*If it is P6 or P4 family, including CORE 2 DUO series*/ + if (c->x86 == 6 || c->x86==15) + { + printk(KERN_DEBUG "MCE: Intel newly family MC Init\n"); + intel_mcheck_init(c); + } break; #ifndef CONFIG_X86_64 @@ -413,7 +431,7 @@ void x86_mcinfo_dump(struct mc_info *mi) if (mic == NULL) return; if (mic->type != MC_TYPE_BANK) - continue; + goto next; mc_bank = (struct mcinfo_bank *)mic; @@ -426,6 +444,7 @@ void x86_mcinfo_dump(struct mc_info *mi) printk(" at %16"PRIx64, mc_bank->mc_addr); printk("\n"); +next: mic = x86_mcinfo_next(mic); /* next entry */ if ((mic == NULL) || (mic->size == 0)) break; diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/cpu/mcheck/mce.h --- a/xen/arch/x86/cpu/mcheck/mce.h Fri Dec 19 14:56:36 2008 +0000 +++ b/xen/arch/x86/cpu/mcheck/mce.h Mon Dec 22 08:12:33 2008 +0000 @@ -1,14 +1,22 @@ #include <xen/init.h> +#include <asm/types.h> #include <asm/traps.h> +#include <asm/atomic.h> +#include <asm/percpu.h> + /* Init functions */ void amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c); void amd_k7_mcheck_init(struct cpuinfo_x86 *c); void amd_k8_mcheck_init(struct cpuinfo_x86 *c); void amd_f10_mcheck_init(struct cpuinfo_x86 *c); -void intel_p4_mcheck_init(struct cpuinfo_x86 *c); + + +void intel_mcheck_timer(struct cpuinfo_x86 *c); void intel_p5_mcheck_init(struct cpuinfo_x86 *c); -void intel_p6_mcheck_init(struct cpuinfo_x86 *c); +void intel_mcheck_init(struct cpuinfo_x86 *c); +void mce_intel_feature_init(struct cpuinfo_x86 *c); + void winchip_mcheck_init(struct cpuinfo_x86 *c); /* Function pointer used in the handlers to collect additional information @@ -19,6 +27,7 @@ extern int (*mc_callback_bank_extended)( uint16_t bank, uint64_t status); +int mce_available(struct cpuinfo_x86 *c); /* Helper functions used for collecting error telemetry */ struct mc_info *x86_mcinfo_getptr(void); void x86_mcinfo_clear(struct mc_info *mi); @@ -26,6 +35,3 @@ void x86_mcinfo_dump(struct mc_info *mi) void x86_mcinfo_dump(struct mc_info *mi); void mc_panic(char *s); -/* Global variables */ -extern int mce_disabled; -extern unsigned int nr_mce_banks; diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/cpu/mcheck/mce_intel.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Mon Dec 22 08:12:33 2008 +0000 @@ -0,0 +1,681 @@ +#include <xen/init.h> +#include <xen/types.h> +#include <xen/irq.h> +#include <xen/event.h> +#include <xen/kernel.h> +#include <xen/smp.h> +#include <asm/processor.h> +#include <asm/system.h> +#include <asm/msr.h> +#include "mce.h" +#include "x86_mca.h" + +DEFINE_PER_CPU(cpu_banks_t, mce_banks_owned); + +static int nr_intel_ext_msrs = 0; +static int cmci_support = 0; +extern int firstbank; + +#ifdef CONFIG_X86_MCE_THERMAL +static void unexpected_thermal_interrupt(struct cpu_user_regs *regs) +{ + printk(KERN_ERR "Thermal: CPU%d: Unexpected LVT TMR interrupt!\n", + smp_processor_id()); + add_taint(TAINT_MACHINE_CHECK); +} + +/* P4/Xeon Thermal transition interrupt handler */ +static void intel_thermal_interrupt(struct cpu_user_regs *regs) +{ + u32 l, h; + unsigned int cpu = smp_processor_id(); + static s_time_t next[NR_CPUS]; + + ack_APIC_irq(); + if (NOW() < next[cpu]) + return; + + next[cpu] = NOW() + MILLISECS(5000); + rdmsr(MSR_IA32_THERM_STATUS, l, h); + if (l & 0x1) { + printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu); + printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n", + cpu); + add_taint(TAINT_MACHINE_CHECK); + } else { + printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu); + } +} + +/* Thermal interrupt handler for this CPU setup */ +static void (*vendor_thermal_interrupt)(struct cpu_user_regs *regs) + = unexpected_thermal_interrupt; + +fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs) +{ + irq_enter(); + vendor_thermal_interrupt(regs); + irq_exit(); +} + +/* P4/Xeon Thermal regulation detect and init */ +static void intel_init_thermal(struct cpuinfo_x86 *c) +{ + u32 l, h; + int tm2 = 0; + unsigned int cpu = smp_processor_id(); + + /* Thermal monitoring */ + if (!cpu_has(c, X86_FEATURE_ACPI)) + return; /* -ENODEV */ + + /* Clock modulation */ + if (!cpu_has(c, X86_FEATURE_ACC)) + return; /* -ENODEV */ + + /* first check if its enabled already, in which case there might + * be some SMM goo which handles it, so we can't even put a handler + * since it might be delivered via SMI already -zwanem. + */ + rdmsr (MSR_IA32_MISC_ENABLE, l, h); + h = apic_read(APIC_LVTTHMR); + if ((l & (1<<3)) && (h & APIC_DM_SMI)) { + printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",cpu); + return; /* -EBUSY */ + } + + if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13))) + tm2 = 1; + + /* check whether a vector already exists, temporarily masked? */ + if (h & APIC_VECTOR_MASK) { + printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already installed\n", + cpu, (h & APIC_VECTOR_MASK)); + return; /* -EBUSY */ + } + + /* The temperature transition interrupt handler setup */ + h = THERMAL_APIC_VECTOR; /* our delivery vector */ + h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ + apic_write_around(APIC_LVTTHMR, h); + + rdmsr (MSR_IA32_THERM_INTERRUPT, l, h); + wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); + + /* ok we're good to go... */ + vendor_thermal_interrupt = intel_thermal_interrupt; + + rdmsr (MSR_IA32_MISC_ENABLE, l, h); + wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h); + + l = apic_read (APIC_LVTTHMR); + apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); + printk (KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", + cpu, tm2 ? "TM2" : "TM1"); + return; +} +#endif /* CONFIG_X86_MCE_THERMAL */ + +static inline void intel_get_extended_msrs(struct mcinfo_extended *mc_ext) +{ + if (nr_intel_ext_msrs == 0) + return; + + /*this function will called when CAP(9).MCG_EXT_P = 1*/ + memset(mc_ext, 0, sizeof(struct mcinfo_extended)); + mc_ext->common.type = MC_TYPE_EXTENDED; + mc_ext->common.size = sizeof(mc_ext); + mc_ext->mc_msrs = 10; + + mc_ext->mc_msr[0].reg = MSR_IA32_MCG_EAX; + rdmsrl(MSR_IA32_MCG_EAX, mc_ext->mc_msr[0].value); + mc_ext->mc_msr[1].reg = MSR_IA32_MCG_EBX; + rdmsrl(MSR_IA32_MCG_EBX, mc_ext->mc_msr[1].value); + mc_ext->mc_msr[2].reg = MSR_IA32_MCG_ECX; + rdmsrl(MSR_IA32_MCG_ECX, mc_ext->mc_msr[2].value); + + mc_ext->mc_msr[3].reg = MSR_IA32_MCG_EDX; + rdmsrl(MSR_IA32_MCG_EDX, mc_ext->mc_msr[3].value); + mc_ext->mc_msr[4].reg = MSR_IA32_MCG_ESI; + rdmsrl(MSR_IA32_MCG_ESI, mc_ext->mc_msr[4].value); + mc_ext->mc_msr[5].reg = MSR_IA32_MCG_EDI; + rdmsrl(MSR_IA32_MCG_EDI, mc_ext->mc_msr[5].value); + + mc_ext->mc_msr[6].reg = MSR_IA32_MCG_EBP; + rdmsrl(MSR_IA32_MCG_EBP, mc_ext->mc_msr[6].value); + mc_ext->mc_msr[7].reg = MSR_IA32_MCG_ESP; + rdmsrl(MSR_IA32_MCG_ESP, mc_ext->mc_msr[7].value); + mc_ext->mc_msr[8].reg = MSR_IA32_MCG_EFLAGS; + rdmsrl(MSR_IA32_MCG_EFLAGS, mc_ext->mc_msr[8].value); + mc_ext->mc_msr[9].reg = MSR_IA32_MCG_EIP; + rdmsrl(MSR_IA32_MCG_EIP, mc_ext->mc_msr[9].value); +} + +/* machine_check_poll might be called by following types: + * 1. called when do mcheck_init. + * 2. called in cmci interrupt handler + * 3. called in polling handler + * It will generate a new mc_info item if found CE/UC errors. DOM0 is the + * consumer. +*/ +static int machine_check_poll(struct mc_info *mi, int calltype) +{ + int exceptions = (read_cr4() & X86_CR4_MCE); + int i, nr_unit = 0, uc = 0, pcc = 0; + uint64_t status, addr; + struct mcinfo_global mcg; + struct mcinfo_extended mce; + unsigned int cpu; + struct domain *d; + + cpu = smp_processor_id(); + + if (!mi) { + printk(KERN_ERR "mcheck_poll: Failed to get mc_info entry\n"); + return 0; + } + x86_mcinfo_clear(mi); + + memset(&mcg, 0, sizeof(mcg)); + mcg.common.type = MC_TYPE_GLOBAL; + mcg.common.size = sizeof(mcg); + /*If called from cpu-reset check, don't need to fill them. + *If called from cmci context, we'll try to fill domid by memory addr + */ + mcg.mc_domid = -1; + mcg.mc_vcpuid = -1; + if (calltype == MC_FLAG_POLLED || calltype == MC_FLAG_RESET) + mcg.mc_flags = MC_FLAG_POLLED; + else if (calltype == MC_FLAG_CMCI) + mcg.mc_flags = MC_FLAG_CMCI; + mcg.mc_socketid = phys_proc_id[cpu]; + mcg.mc_coreid = cpu_core_id[cpu]; + mcg.mc_apicid = cpu_physical_id(cpu); + mcg.mc_core_threadid = mcg.mc_apicid & ( 1 << (smp_num_siblings - 1)); + rdmsrl(MSR_IA32_MCG_STATUS, mcg.mc_gstatus); + + for ( i = 0; i < nr_mce_banks; i++ ) { + struct mcinfo_bank mcb; + /*For CMCI, only owners checks the owned MSRs*/ + if ( !test_bit(i, __get_cpu_var(mce_banks_owned)) && + (calltype & MC_FLAG_CMCI) ) + continue; + rdmsrl(MSR_IA32_MC0_STATUS + 4 * i, status); + + if (! (status & MCi_STATUS_VAL) ) + continue; + /* + * Uncorrected events are handled by the exception + * handler when it is enabled. But when the exception + * is disabled such as when mcheck_init, log everything. + */ + if ((status & MCi_STATUS_UC) && exceptions) + continue; + + if (status & MCi_STATUS_UC) + uc = 1; + if (status & MCi_STATUS_PCC) + pcc = 1; + + memset(&mcb, 0, sizeof(mcb)); + mcb.common.type = MC_TYPE_BANK; + mcb.common.size = sizeof(mcb); + mcb.mc_bank = i; + mcb.mc_status = status; + if (status & MCi_STATUS_MISCV) + rdmsrl(MSR_IA32_MC0_MISC + 4 * i, mcb.mc_misc); + if (status & MCi_STATUS_ADDRV) { + rdmsrl(MSR_IA32_MC0_ADDR + 4 * i, addr); + d = maddr_get_owner(addr); + if ( d && (calltype == MC_FLAG_CMCI || calltype == MC_FLAG_POLLED) ) + mcb.mc_domid = d->domain_id; + } + if (cmci_support) + rdmsrl(MSR_IA32_MC0_CTL2 + i, mcb.mc_ctrl2); + if (calltype == MC_FLAG_CMCI) + rdtscll(mcb.mc_tsc); + x86_mcinfo_add(mi, &mcb); + nr_unit++; + add_taint(TAINT_MACHINE_CHECK); + /*Clear state for this bank */ + wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0); + printk(KERN_DEBUG "mcheck_poll: bank%i CPU%d status[%lx]\n", + i, cpu, status); + printk(KERN_DEBUG "mcheck_poll: CPU%d, SOCKET%d, CORE%d, APICID[%d], " + "thread[%d]\n", cpu, mcg.mc_socketid, + mcg.mc_coreid, mcg.mc_apicid, mcg.mc_core_threadid); + + } + /*if pcc = 1, uc must be 1*/ + if (pcc) + mcg.mc_flags |= MC_FLAG_UNCORRECTABLE; + else if (uc) + mcg.mc_flags |= MC_FLAG_RECOVERABLE; + else /*correctable*/ + mcg.mc_flags |= MC_FLAG_CORRECTABLE; + + if (nr_unit && nr_intel_ext_msrs && + (mcg.mc_gstatus & MCG_STATUS_EIPV)) { + intel_get_extended_msrs(&mce); + x86_mcinfo_add(mi, &mce); + } + if (nr_unit) + x86_mcinfo_add(mi, &mcg); + /*Clear global state*/ + return nr_unit; +} + +static fastcall void intel_machine_check(struct cpu_user_regs * regs, long error_code) +{ + /* MACHINE CHECK Error handler will be sent in another patch, + * simply copy old solutions here. This code will be replaced + * by upcoming machine check patches + */ + + int recover=1; + u32 alow, ahigh, high, low; + u32 mcgstl, mcgsth; + int i; + + rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); + if (mcgstl & (1<<0)) /* Recoverable ? */ + recover=0; + + printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", + smp_processor_id(), mcgsth, mcgstl); + + for (i=0; i<nr_mce_banks; i++) { + rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high); + if (high & (1<<31)) { + if (high & (1<<29)) + recover |= 1; + if (high & (1<<25)) + recover |= 2; + printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low); + high &= ~(1<<31); + if (high & (1<<27)) { + rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh); + printk ("[%08x%08x]", ahigh, alow); + } + if (high & (1<<26)) { + rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh); + printk (" at %08x%08x", ahigh, alow); + } + printk ("\n"); + } + } + + if (recover & 2) + mc_panic ("CPU context corrupt"); + if (recover & 1) + mc_panic ("Unable to continue"); + + printk(KERN_EMERG "Attempting to continue.\n"); + /* + * Do not clear the MSR_IA32_MCi_STATUS if the error is not + * recoverable/continuable.This will allow BIOS to look at the MSRs + * for errors if the OS could not log the error. + */ + for (i=0; i<nr_mce_banks; i++) { + u32 msr; + msr = MSR_IA32_MC0_STATUS+i*4; + rdmsr (msr, low, high); + if (high&(1<<31)) { + /* Clear it */ + wrmsr(msr, 0UL, 0UL); + /* Serialize */ + wmb(); + add_taint(TAINT_MACHINE_CHECK); + } + } + mcgstl &= ~(1<<2); + wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); +} + +extern void (*cpu_down_handler)(int down_cpu); +extern void (*cpu_down_rollback_handler)(int down_cpu); +extern void mce_disable_cpu(void); +static bool_t cmci_clear_lock = 0; +static DEFINE_SPINLOCK(cmci_discover_lock); +static DEFINE_PER_CPU(cpu_banks_t, no_cmci_banks); + +/* + * Discover bank sharing using the algorithm recommended in the SDM. + */ +static int do_cmci_discover(int i) +{ + unsigned msr = MSR_IA32_MC0_CTL2 + i; + u64 val; + + rdmsrl(msr, val); + /* Some other CPU already owns this bank. */ + if (val & CMCI_EN) { + clear_bit(i, __get_cpu_var(mce_banks_owned)); + goto out; + } + wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD); + rdmsrl(msr, val); + + if (!(val & CMCI_EN)) { + /* + * This bank does not support CMCI. The polling + * timer has to handle it. + */ + set_bit(i, __get_cpu_var(no_cmci_banks)); + return 0; + } + set_bit(i, __get_cpu_var(mce_banks_owned)); +out: + clear_bit(i, __get_cpu_var(no_cmci_banks)); + return 1; +} + +void cmci_discover(void) +{ + int i; + + printk(KERN_DEBUG "CMCI: find owner on CPU%d\n", smp_processor_id()); + spin_lock(&cmci_discover_lock); + for (i = 0; i < nr_mce_banks; i++) { + /*If the cpu is the bank owner, need not re-discover*/ + if (test_bit(i, __get_cpu_var(mce_banks_owned))) + continue; + do_cmci_discover(i); + } + spin_unlock(&cmci_discover_lock); + printk(KERN_DEBUG "CMCI: CPU%d owner_map[%lx], no_cmci_map[%lx]\n", + smp_processor_id(), + *((unsigned long *)__get_cpu_var(mce_banks_owned)), + *((unsigned long *)__get_cpu_var(no_cmci_banks))); +} + +/* + * Define an owner for each bank. Banks can be shared between CPUs + * and to avoid reporting events multiple times always set up one + * CPU as owner. + * + * The assignment has to be redone when CPUs go offline and + * any of the owners goes away. Also pollers run in parallel so we + * have to be careful to update the banks in a way that doesn't + * lose or duplicate events. + */ + +static void mce_set_owner(void) +{ + + if (!cmci_support || mce_disabled == 1) + return; + + cmci_discover(); +} + +static void clear_cmci(void) +{ + int i; + + if (!cmci_support || mce_disabled == 1) + return; + + printk(KERN_DEBUG "CMCI: clear_cmci support on CPU%d\n", + smp_processor_id()); + + for (i = 0; i < nr_mce_banks; i++) { + unsigned msr = MSR_IA32_MC0_CTL2 + i; + u64 val; + if (!test_bit(i, __get_cpu_var(mce_banks_owned))) + continue; + rdmsrl(msr, val); + if (val & (CMCI_EN|CMCI_THRESHOLD_MASK)) + wrmsrl(msr, val & ~(CMCI_EN|CMCI_THRESHOLD_MASK)); + clear_bit(i, __get_cpu_var(mce_banks_owned)); + } +} + +/*we need to re-set cmci owners when cpu_down fail or cpu_up*/ +static void cmci_reenable_cpu(void *h) +{ + if (!mce_available(&current_cpu_data) || mce_disabled == 1) + return; + printk(KERN_DEBUG "CMCI: reenable mce on CPU%d\n", smp_processor_id()); + mce_set_owner(); + set_in_cr4(X86_CR4_MCE); +} + +/* When take cpu_down, we need to execute the impacted cmci_owner judge algorithm + * First, we need to clear the ownership on the dead CPU + * Then, other CPUs will check whether to take the bank's ownership from down_cpu + * CPU0 need not and "never" execute this path +*/ +void __cpu_clear_cmci( int down_cpu) +{ + int cpu = smp_processor_id(); + + if (!cmci_support && mce_disabled == 1) + return; + + if (cpu == 0) { + printk(KERN_DEBUG "CMCI: CPU0 need not be cleared\n"); + return; + } + + local_irq_disable(); + if (cpu == down_cpu){ + mce_disable_cpu(); + clear_cmci(); + wmb(); + test_and_set_bool(cmci_clear_lock); + return; + } + while (!cmci_clear_lock) + cpu_relax(); + if (cpu != down_cpu) + mce_set_owner(); + + test_and_clear_bool(cmci_clear_lock); + local_irq_enable(); + +} + +void __cpu_clear_cmci_rollback( int down_cpu) +{ + cpumask_t down_map; + if (!cmci_support || mce_disabled == 1) + return; + + cpus_clear(down_map); + cpu_set(down_cpu, down_map); + printk(KERN_ERR "CMCI: cpu_down fail. " + "Reenable cmci on CPU%d\n", down_cpu); + on_selected_cpus(down_map, cmci_reenable_cpu, NULL, 1, 1); +} + +static void intel_init_cmci(struct cpuinfo_x86 *c) +{ + u32 l, apic; + int cpu = smp_processor_id(); + + if (!mce_available(c) || !cmci_support) { + printk(KERN_DEBUG "CMCI: CPU%d has no CMCI support\n", cpu); + return; + } + + apic = apic_read(APIC_CMCI); + if ( apic & APIC_VECTOR_MASK ) + { + printk(KERN_WARNING "CPU%d CMCI LVT vector (%#x) already installed\n", + cpu, ( apic & APIC_VECTOR_MASK )); + return; + } + + apic = CMCI_APIC_VECTOR; + apic |= (APIC_DM_FIXED | APIC_LVT_MASKED); + apic_write_around(APIC_CMCI, apic); + + /*now clear mask flag*/ + l = apic_read(APIC_CMCI); + apic_write_around(APIC_CMCI, l & ~APIC_LVT_MASKED); + cpu_down_handler = __cpu_clear_cmci; + cpu_down_rollback_handler = __cpu_clear_cmci_rollback; +} + +fastcall void smp_cmci_interrupt(struct cpu_user_regs *regs) +{ + int nr_unit; + struct mc_info *mi = x86_mcinfo_getptr(); + int cpu = smp_processor_id(); + + ack_APIC_irq(); + irq_enter(); + printk(KERN_DEBUG "CMCI: cmci_intr happen on CPU%d\n", cpu); + nr_unit = machine_check_poll(mi, MC_FLAG_CMCI); + if (nr_unit) { + x86_mcinfo_dump(mi); + if (dom0 && guest_enabled_event(dom0->vcpu[0], VIRQ_MCA)) + send_guest_global_virq(dom0, VIRQ_MCA); + } + irq_exit(); +} + +void mce_intel_feature_init(struct cpuinfo_x86 *c) +{ + +#ifdef CONFIG_X86_MCE_THERMAL + intel_init_thermal(c); +#endif + intel_init_cmci(c); +} + +static void mce_cap_init(struct cpuinfo_x86 *c) +{ + u32 l, h; + + rdmsr (MSR_IA32_MCG_CAP, l, h); + if ((l & MCG_CMCI_P) && cpu_has_apic) + cmci_support = 1; + + nr_mce_banks = l & 0xff; + if (nr_mce_banks > MAX_NR_BANKS) + printk(KERN_WARNING "MCE: exceed max mce banks\n"); + if (l & MCG_EXT_P) + { + nr_intel_ext_msrs = (l >> MCG_EXT_CNT) & 0xff; + printk (KERN_INFO "CPU%d: Intel Extended MCE MSRs (%d) available\n", + smp_processor_id(), nr_intel_ext_msrs); + } + /* for most of p6 family, bank 0 is an alias bios MSR. + * But after model>1a, bank 0 is available*/ + if ( c->x86 == 6 && c->x86_vendor == X86_VENDOR_INTEL + && c->x86_model < 0x1A) + firstbank = 1; + else + firstbank = 0; +} + +static void mce_init(void) +{ + u32 l, h; + int i, nr_unit; + struct mc_info *mi = x86_mcinfo_getptr(); + clear_in_cr4(X86_CR4_MCE); + /* log the machine checks left over from the previous reset. + * This also clears all registers*/ + + nr_unit = machine_check_poll(mi, MC_FLAG_RESET); + /*in the boot up stage, not expect inject to DOM0, but go print out + */ + if (nr_unit > 0) + x86_mcinfo_dump(mi); + + set_in_cr4(X86_CR4_MCE); + rdmsr (MSR_IA32_MCG_CAP, l, h); + if (l & MCG_CTL_P) /* Control register present ? */ + wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); + + for (i = firstbank; i < nr_mce_banks; i++) + { + /*Some banks are shared across cores, use MCi_CTRL to judge whether + * this bank has been initialized by other cores already.*/ + rdmsr(MSR_IA32_MC0_CTL + 4*i, l, h); + if (!l & !h) + { + /*if ctl is 0, this bank is never initialized*/ + printk(KERN_DEBUG "mce_init: init bank%d\n", i); + wrmsr (MSR_IA32_MC0_CTL + 4*i, 0xffffffff, 0xffffffff); + wrmsr (MSR_IA32_MC0_STATUS + 4*i, 0x0, 0x0); + } + } + if (firstbank) /*if cmci enabled, firstbank = 0*/ + wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0); +} + +/*p4/p6 faimily has similar MCA initialization process*/ +void intel_mcheck_init(struct cpuinfo_x86 *c) +{ + + mce_cap_init(c); + printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", + smp_processor_id()); + /* machine check is available */ + machine_check_vector = intel_machine_check; + mce_init(); + mce_intel_feature_init(c); + mce_set_owner(); +} + +/* + * Periodic polling timer for "silent" machine check errors. If the + * poller finds an MCE, poll faster. When the poller finds no more + * errors, poll slower +*/ +static struct timer mce_timer; + +#define MCE_PERIOD 4000 +#define MCE_MIN 2000 +#define MCE_MAX 32000 + +static u64 period = MCE_PERIOD; +static int adjust = 0; + +static void mce_intel_checkregs(void *info) +{ + int nr_unit; + struct mc_info *mi = x86_mcinfo_getptr(); + + if( !mce_available(&current_cpu_data)) + return; + nr_unit = machine_check_poll(mi, MC_FLAG_POLLED); + if (nr_unit) + { + x86_mcinfo_dump(mi); + adjust++; + if (dom0 && guest_enabled_event(dom0->vcpu[0], VIRQ_MCA)) + send_guest_global_virq(dom0, VIRQ_MCA); + } +} + +static void mce_intel_work_fn(void *data) +{ + on_each_cpu(mce_intel_checkregs, data, 1, 1); + if (adjust) { + period = period / (adjust + 1); + printk(KERN_DEBUG "mcheck_poll: Find error, shorten interval to %ld", + period); + } + else { + period *= 2; + } + if (period > MCE_MAX) + period = MCE_MAX; + if (period < MCE_MIN) + period = MCE_MIN; + set_timer(&mce_timer, NOW() + MILLISECS(period)); + adjust = 0; +} + +void intel_mcheck_timer(struct cpuinfo_x86 *c) +{ + printk(KERN_DEBUG "mcheck_poll: Init_mcheck_timer\n"); + init_timer(&mce_timer, mce_intel_work_fn, NULL, 0); + set_timer(&mce_timer, NOW() + MILLISECS(MCE_PERIOD)); +} + diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/cpu/mcheck/non-fatal.c --- a/xen/arch/x86/cpu/mcheck/non-fatal.c Fri Dec 19 14:56:36 2008 +0000 +++ b/xen/arch/x86/cpu/mcheck/non-fatal.c Mon Dec 22 08:12:33 2008 +0000 @@ -19,8 +19,8 @@ #include <asm/msr.h> #include "mce.h" - -static int firstbank; +#include "x86_mca.h" +int firstbank = 0; static struct timer mce_timer; #define MCE_PERIOD MILLISECS(15000) @@ -61,13 +61,8 @@ static int __init init_nonfatal_mce_chec struct cpuinfo_x86 *c = &boot_cpu_data; /* Check for MCE support */ - if (!cpu_has(c, X86_FEATURE_MCE)) + if (!mce_available(c)) return -ENODEV; - - /* Check for PPro style MCA */ - if (!cpu_has(c, X86_FEATURE_MCA)) - return -ENODEV; - /* * Check for non-fatal errors every MCE_RATE s */ @@ -85,12 +80,20 @@ static int __init init_nonfatal_mce_chec break; case X86_VENDOR_INTEL: - init_timer(&mce_timer, mce_work_fn, NULL, 0); - set_timer(&mce_timer, NOW() + MCE_PERIOD); + /* p5 family is different. P4/P6 and latest CPUs shares the + * same polling methods + */ + if ( c->x86 != 5 ) + { + /* some CPUs or banks don't support cmci, we need to + * enable this feature anyway + */ + intel_mcheck_timer(c); + } break; } - printk(KERN_INFO "MCA: Machine check polling timer started.\n"); + printk(KERN_INFO "mcheck_poll: Machine check polling timer started.\n"); return 0; } __initcall(init_nonfatal_mce_checker); diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/cpu/mcheck/p4.c --- a/xen/arch/x86/cpu/mcheck/p4.c Fri Dec 19 14:56:36 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,270 +0,0 @@ -/* - * P4 specific Machine Check Exception Reporting - */ - -#include <xen/init.h> -#include <xen/types.h> -#include <xen/kernel.h> -#include <xen/config.h> -#include <xen/smp.h> -#include <xen/irq.h> -#include <xen/time.h> -#include <asm/processor.h> -#include <asm/system.h> -#include <asm/msr.h> -#include <asm/apic.h> - -#include "mce.h" - -/* as supported by the P4/Xeon family */ -struct intel_mce_extended_msrs { - u32 eax; - u32 ebx; - u32 ecx; - u32 edx; - u32 esi; - u32 edi; - u32 ebp; - u32 esp; - u32 eflags; - u32 eip; - /* u32 *reserved[]; */ -}; - -static int mce_num_extended_msrs = 0; - - -#ifdef CONFIG_X86_MCE_P4THERMAL -static void unexpected_thermal_interrupt(struct cpu_user_regs *regs) -{ - printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", - smp_processor_id()); - add_taint(TAINT_MACHINE_CHECK); -} - -/* P4/Xeon Thermal transition interrupt handler */ -static void intel_thermal_interrupt(struct cpu_user_regs *regs) -{ - u32 l, h; - unsigned int cpu = smp_processor_id(); - static s_time_t next[NR_CPUS]; - - ack_APIC_irq(); - - if (NOW() < next[cpu]) - return; - - next[cpu] = NOW() + MILLISECS(5000); - rdmsr(MSR_IA32_THERM_STATUS, l, h); - if (l & 0x1) { - printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu); - printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n", - cpu); - add_taint(TAINT_MACHINE_CHECK); - } else { - printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu); - } -} - -/* Thermal interrupt handler for this CPU setup */ -static void (*vendor_thermal_interrupt)(struct cpu_user_regs *regs) = unexpected_thermal_interrupt; - -fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs) -{ - irq_enter(); - vendor_thermal_interrupt(regs); - irq_exit(); -} - -/* P4/Xeon Thermal regulation detect and init */ -static void intel_init_thermal(struct cpuinfo_x86 *c) -{ - u32 l, h; - unsigned int cpu = smp_processor_id(); - - /* Thermal monitoring */ - if (!cpu_has(c, X86_FEATURE_ACPI)) - return; /* -ENODEV */ - - /* Clock modulation */ - if (!cpu_has(c, X86_FEATURE_ACC)) - return; /* -ENODEV */ - - /* first check if its enabled already, in which case there might - * be some SMM goo which handles it, so we can't even put a handler - * since it might be delivered via SMI already -zwanem. - */ - rdmsr (MSR_IA32_MISC_ENABLE, l, h); - h = apic_read(APIC_LVTTHMR); - if ((l & (1<<3)) && (h & APIC_DM_SMI)) { - printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", - cpu); - return; /* -EBUSY */ - } - - /* check whether a vector already exists, temporarily masked? */ - if (h & APIC_VECTOR_MASK) { - printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already " - "installed\n", - cpu, (h & APIC_VECTOR_MASK)); - return; /* -EBUSY */ - } - - /* The temperature transition interrupt handler setup */ - h = THERMAL_APIC_VECTOR; /* our delivery vector */ - h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ - apic_write_around(APIC_LVTTHMR, h); - - rdmsr (MSR_IA32_THERM_INTERRUPT, l, h); - wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); - - /* ok we're good to go... */ - vendor_thermal_interrupt = intel_thermal_interrupt; - - rdmsr (MSR_IA32_MISC_ENABLE, l, h); - wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h); - - l = apic_read (APIC_LVTTHMR); - apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); - printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); - return; -} -#endif /* CONFIG_X86_MCE_P4THERMAL */ - - -/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ -static inline int intel_get_extended_msrs(struct intel_mce_extended_msrs *r) -{ - u32 h; - - if (mce_num_extended_msrs == 0) - goto done; - - rdmsr (MSR_IA32_MCG_EAX, r->eax, h); - rdmsr (MSR_IA32_MCG_EBX, r->ebx, h); - rdmsr (MSR_IA32_MCG_ECX, r->ecx, h); - rdmsr (MSR_IA32_MCG_EDX, r->edx, h); - rdmsr (MSR_IA32_MCG_ESI, r->esi, h); - rdmsr (MSR_IA32_MCG_EDI, r->edi, h); - rdmsr (MSR_IA32_MCG_EBP, r->ebp, h); - rdmsr (MSR_IA32_MCG_ESP, r->esp, h); - rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h); - rdmsr (MSR_IA32_MCG_EIP, r->eip, h); - - /* can we rely on kmalloc to do a dynamic - * allocation for the reserved registers? - */ -done: - return mce_num_extended_msrs; -} - -static fastcall void intel_machine_check(struct cpu_user_regs * regs, long error_code) -{ - int recover=1; - u32 alow, ahigh, high, low; - u32 mcgstl, mcgsth; - int i; - struct intel_mce_extended_msrs dbg; - - rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); - if (mcgstl & (1<<0)) /* Recoverable ? */ - recover=0; - - printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", - smp_processor_id(), mcgsth, mcgstl); - - if (intel_get_extended_msrs(&dbg)) { - printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n", - smp_processor_id(), dbg.eip, dbg.eflags); - printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n", - dbg.eax, dbg.ebx, dbg.ecx, dbg.edx); - printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", - dbg.esi, dbg.edi, dbg.ebp, dbg.esp); - } - - for (i=0; i<nr_mce_banks; i++) { - rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high); - if (high & (1<<31)) { - if (high & (1<<29)) - recover |= 1; - if (high & (1<<25)) - recover |= 2; - printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low); - high &= ~(1<<31); - if (high & (1<<27)) { - rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh); - printk ("[%08x%08x]", ahigh, alow); - } - if (high & (1<<26)) { - rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh); - printk (" at %08x%08x", ahigh, alow); - } - printk ("\n"); - } - } - - if (recover & 2) - mc_panic ("CPU context corrupt"); - if (recover & 1) - mc_panic ("Unable to continue"); - - printk(KERN_EMERG "Attempting to continue.\n"); - /* - * Do not clear the MSR_IA32_MCi_STATUS if the error is not - * recoverable/continuable.This will allow BIOS to look at the MSRs - * for errors if the OS could not log the error. - */ - for (i=0; i<nr_mce_banks; i++) { - u32 msr; - msr = MSR_IA32_MC0_STATUS+i*4; - rdmsr (msr, low, high); - if (high&(1<<31)) { - /* Clear it */ - wrmsr(msr, 0UL, 0UL); - /* Serialize */ - wmb(); - add_taint(TAINT_MACHINE_CHECK); - } - } - mcgstl &= ~(1<<2); - wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); -} - - -void intel_p4_mcheck_init(struct cpuinfo_x86 *c) -{ - u32 l, h; - int i; - - machine_check_vector = intel_machine_check; - wmb(); - - printk (KERN_INFO "Intel machine check architecture supported.\n"); - rdmsr (MSR_IA32_MCG_CAP, l, h); - if (l & (1<<8)) /* Control register present ? */ - wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); - nr_mce_banks = l & 0xff; - - for (i=0; i<nr_mce_banks; i++) { - wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); - wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); - } - - set_in_cr4 (X86_CR4_MCE); - printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", - smp_processor_id()); - - /* Check for P4/Xeon extended MCE MSRs */ - rdmsr (MSR_IA32_MCG_CAP, l, h); - if (l & (1<<9)) {/* MCG_EXT_P */ - mce_num_extended_msrs = (l >> 16) & 0xff; - printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)" - " available\n", - smp_processor_id(), mce_num_extended_msrs); - -#ifdef CONFIG_X86_MCE_P4THERMAL - /* Check for P4/Xeon Thermal monitor */ - intel_init_thermal(c); -#endif - } -} diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/cpu/mcheck/p6.c --- a/xen/arch/x86/cpu/mcheck/p6.c Fri Dec 19 14:56:36 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,118 +0,0 @@ -/* - * P6 specific Machine Check Exception Reporting - * (C) Copyright 2002 Alan Cox <alan@xxxxxxxxxx> - */ - -#include <xen/init.h> -#include <xen/types.h> -#include <xen/kernel.h> -#include <xen/smp.h> - -#include <asm/processor.h> -#include <asm/system.h> -#include <asm/msr.h> - -#include "mce.h" - -/* Machine Check Handler For PII/PIII */ -static fastcall void intel_machine_check(struct cpu_user_regs * regs, long error_code) -{ - int recover=1; - u32 alow, ahigh, high, low; - u32 mcgstl, mcgsth; - int i; - - rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); - if (mcgstl & (1<<0)) /* Recoverable ? */ - recover=0; - - printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", - smp_processor_id(), mcgsth, mcgstl); - - for (i=0; i<nr_mce_banks; i++) { - rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high); - if (high & (1<<31)) { - if (high & (1<<29)) - recover |= 1; - if (high & (1<<25)) - recover |= 2; - printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low); - high &= ~(1<<31); - if (high & (1<<27)) { - rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh); - printk ("[%08x%08x]", ahigh, alow); - } - if (high & (1<<26)) { - rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh); - printk (" at %08x%08x", ahigh, alow); - } - printk ("\n"); - } - } - - if (recover & 2) - mc_panic ("CPU context corrupt"); - if (recover & 1) - mc_panic ("Unable to continue"); - - printk (KERN_EMERG "Attempting to continue.\n"); - /* - * Do not clear the MSR_IA32_MCi_STATUS if the error is not - * recoverable/continuable.This will allow BIOS to look at the MSRs - * for errors if the OS could not log the error. - */ - for (i=0; i<nr_mce_banks; i++) { - unsigned int msr; - msr = MSR_IA32_MC0_STATUS+i*4; - rdmsr (msr,low, high); - if (high & (1<<31)) { - /* Clear it */ - wrmsr (msr, 0UL, 0UL); - /* Serialize */ - wmb(); - add_taint(TAINT_MACHINE_CHECK); - } - } - mcgstl &= ~(1<<2); - wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); -} - -/* Set up machine check reporting for processors with Intel style MCE */ -void intel_p6_mcheck_init(struct cpuinfo_x86 *c) -{ - u32 l, h; - int i; - - /* Check for MCE support */ - if (!cpu_has(c, X86_FEATURE_MCE)) - return; - - /* Check for PPro style MCA */ - if (!cpu_has(c, X86_FEATURE_MCA)) - return; - - /* Ok machine check is available */ - machine_check_vector = intel_machine_check; - wmb(); - - printk (KERN_INFO "Intel machine check architecture supported.\n"); - rdmsr (MSR_IA32_MCG_CAP, l, h); - if (l & (1<<8)) /* Control register present ? */ - wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); - nr_mce_banks = l & 0xff; - - /* - * Following the example in IA-32 SDM Vol 3: - * - MC0_CTL should not be written - * - Status registers on all banks should be cleared on reset - */ - for (i=1; i<nr_mce_banks; i++) - wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); - - for (i=0; i<nr_mce_banks; i++) - wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); - - set_in_cr4 (X86_CR4_MCE); - printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", - smp_processor_id()); -} diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/cpu/mcheck/x86_mca.h --- a/xen/arch/x86/cpu/mcheck/x86_mca.h Fri Dec 19 14:56:36 2008 +0000 +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Mon Dec 22 08:12:33 2008 +0000 @@ -28,7 +28,10 @@ /* Bitfield of the MSR_IA32_MCG_CAP register */ #define MCG_CAP_COUNT 0x00000000000000ffULL #define MCG_CTL_P 0x0000000000000100ULL -/* Bits 9-63 are reserved */ +#define MCG_EXT_P (1UL<<9) +#define MCG_EXT_CNT (16) +#define MCG_CMCI_P (1UL<<10) +/* Other bits are reserved */ /* Bitfield of the MSR_IA32_MCG_STATUS register */ #define MCG_STATUS_RIPV 0x0000000000000001ULL @@ -70,3 +73,17 @@ /* reserved bits */ #define MCi_STATUS_OTHER_RESERVED2 0x0180000000000000ULL +/*Intel Specific bitfield*/ +#define CMCI_THRESHOLD 0x2 + + +#define MAX_NR_BANKS 128 + +typedef DECLARE_BITMAP(cpu_banks_t, MAX_NR_BANKS); +DECLARE_PER_CPU(cpu_banks_t, mce_banks_owned); + +/* Global variables */ +extern int mce_disabled; +extern unsigned int nr_mce_banks; +extern int firstbank; + diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Fri Dec 19 14:56:36 2008 +0000 +++ b/xen/arch/x86/hvm/vmx/vmx.c Mon Dec 22 08:12:33 2008 +0000 @@ -2030,7 +2030,8 @@ static void vmx_do_extint(struct cpu_use fastcall void smp_spurious_interrupt(struct cpu_user_regs *regs); fastcall void smp_error_interrupt(struct cpu_user_regs *regs); fastcall void smp_pmu_apic_interrupt(struct cpu_user_regs *regs); -#ifdef CONFIG_X86_MCE_P4THERMAL + fastcall void smp_cmci_interrupt(struct cpu_user_regs *regs); +#ifdef CONFIG_X86_MCE_THERMAL fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs); #endif @@ -2060,10 +2061,13 @@ static void vmx_do_extint(struct cpu_use case ERROR_APIC_VECTOR: smp_error_interrupt(regs); break; + case CMCI_APIC_VECTOR: + smp_cmci_interrupt(regs); + break; case PMU_APIC_VECTOR: smp_pmu_apic_interrupt(regs); break; -#ifdef CONFIG_X86_MCE_P4THERMAL +#ifdef CONFIG_X86_MCE_THERMAL case THERMAL_APIC_VECTOR: smp_thermal_interrupt(regs); break; diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/i8259.c --- a/xen/arch/x86/i8259.c Fri Dec 19 14:56:36 2008 +0000 +++ b/xen/arch/x86/i8259.c Mon Dec 22 08:12:33 2008 +0000 @@ -74,6 +74,7 @@ BUILD_SMP_INTERRUPT(spurious_interrupt,S BUILD_SMP_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) BUILD_SMP_INTERRUPT(pmu_apic_interrupt,PMU_APIC_VECTOR) BUILD_SMP_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) +BUILD_SMP_INTERRUPT(cmci_interrupt, CMCI_APIC_VECTOR) #define IRQ(x,y) \ IRQ##x##y##_interrupt diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/smpboot.c --- a/xen/arch/x86/smpboot.c Fri Dec 19 14:56:36 2008 +0000 +++ b/xen/arch/x86/smpboot.c Mon Dec 22 08:12:33 2008 +0000 @@ -1237,11 +1237,25 @@ remove_siblinginfo(int cpu) } extern void fixup_irqs(cpumask_t map); -int __cpu_disable(void) + +/* + * Functions called when offline cpu. + * We need to process some new feature such as + * CMCI owner change when do cpu hotplug in latest + * Intel CPU families +*/ +void (*cpu_down_handler)(int down_cpu) = NULL; +void (*cpu_down_rollback_handler)(int down_cpu) = NULL; + + +int __cpu_disable(int down_cpu) { cpumask_t map = cpu_online_map; int cpu = smp_processor_id(); + /*Only down_cpu need to execute this function*/ + if (cpu != down_cpu) + return 0; /* * Perhaps use cpufreq to drop frequency, but that could go * into generic code. @@ -1293,10 +1307,14 @@ void __cpu_die(unsigned int cpu) } printk(KERN_ERR "CPU %u didn't die...\n", cpu); } - -static int take_cpu_down(void *unused) -{ - return __cpu_disable(); +static int take_cpu_down(void *down_cpu) +{ + + if (cpu_down_handler) + cpu_down_handler(*(int *)down_cpu); + wmb(); + + return __cpu_disable(*(int *)down_cpu); } int cpu_down(unsigned int cpu) @@ -1322,7 +1340,7 @@ int cpu_down(unsigned int cpu) printk("Prepare to bring CPU%d down...\n", cpu); - err = stop_machine_run(take_cpu_down, NULL, cpu); + err = stop_machine_run(take_cpu_down, &cpu, cpu_online_map); if ( err < 0 ) goto out; @@ -1333,6 +1351,10 @@ int cpu_down(unsigned int cpu) err = -EBUSY; } out: + /*if cpu_offline failed, re-check cmci_owner*/ + + if ( err < 0 && cpu_down_rollback_handler) + cpu_down_rollback_handler(cpu); spin_unlock(&cpu_add_remove_lock); return err; } diff -r 2dffa6ceb0af -r 4d5203f95498 xen/common/stop_machine.c --- a/xen/common/stop_machine.c Fri Dec 19 14:56:36 2008 +0000 +++ b/xen/common/stop_machine.c Mon Dec 22 08:12:33 2008 +0000 @@ -45,7 +45,7 @@ struct stopmachine_data { enum stopmachine_state state; atomic_t done; - unsigned int fn_cpu; + cpumask_t fn_cpus; int fn_result; int (*fn)(void *); void *fn_data; @@ -63,21 +63,22 @@ static void stopmachine_set_state(enum s cpu_relax(); } -int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu) +int stop_machine_run(int (*fn)(void *), void *data, cpumask_t cpus) { cpumask_t allbutself; unsigned int i, nr_cpus; - int ret; + int cur_cpu, ret; BUG_ON(!local_irq_is_enabled()); allbutself = cpu_online_map; - cpu_clear(smp_processor_id(), allbutself); + cur_cpu = smp_processor_id(); + cpu_clear(cur_cpu, allbutself); nr_cpus = cpus_weight(allbutself); if ( nr_cpus == 0 ) { - BUG_ON(cpu != smp_processor_id()); + BUG_ON(!cpu_isset(cur_cpu, cpus)); return (*fn)(data); } @@ -91,7 +92,8 @@ int stop_machine_run(int (*fn)(void *), stopmachine_data.fn = fn; stopmachine_data.fn_data = data; stopmachine_data.nr_cpus = nr_cpus; - stopmachine_data.fn_cpu = cpu; + stopmachine_data.fn_cpus = cpus; + stopmachine_data.fn_result = 0; atomic_set(&stopmachine_data.done, 0); stopmachine_data.state = STOPMACHINE_START; @@ -105,8 +107,13 @@ int stop_machine_run(int (*fn)(void *), local_irq_disable(); stopmachine_set_state(STOPMACHINE_DISABLE_IRQ); - if ( cpu == smp_processor_id() ) - stopmachine_data.fn_result = (*fn)(data); + /* callback will run on each cpu of the input map. + * If callback fails on any CPU, the stop_machine_run + * will return the *ORed* the failure + */ + if ( cpu_isset(cur_cpu, cpus) ){ + stopmachine_data.fn_result |= (*fn)(data); + } stopmachine_set_state(STOPMACHINE_INVOKE); ret = stopmachine_data.fn_result; @@ -121,7 +128,6 @@ static void stopmachine_softirq(void) static void stopmachine_softirq(void) { enum stopmachine_state state = STOPMACHINE_START; - smp_mb(); while ( state != STOPMACHINE_EXIT ) @@ -136,10 +142,11 @@ static void stopmachine_softirq(void) local_irq_disable(); break; case STOPMACHINE_INVOKE: - if ( stopmachine_data.fn_cpu == smp_processor_id() ) - stopmachine_data.fn_result = + if ( cpu_isset(smp_processor_id(), stopmachine_data.fn_cpus )) { + stopmachine_data.fn_result |= stopmachine_data.fn(stopmachine_data.fn_data); - break; + } + break; default: break; } diff -r 2dffa6ceb0af -r 4d5203f95498 xen/include/asm-x86/apicdef.h --- a/xen/include/asm-x86/apicdef.h Fri Dec 19 14:56:36 2008 +0000 +++ b/xen/include/asm-x86/apicdef.h Mon Dec 22 08:12:33 2008 +0000 @@ -80,6 +80,8 @@ #define APIC_LVTTHMR 0x330 #define APIC_LVTPC 0x340 #define APIC_LVT0 0x350 +#define APIC_CMCI 0x2F0 + #define APIC_LVT_TIMER_BASE_MASK (0x3<<18) #define GET_APIC_TIMER_BASE(x) (((x)>>18)&0x3) #define SET_APIC_TIMER_BASE(x) (((x)<<18)) diff -r 2dffa6ceb0af -r 4d5203f95498 xen/include/asm-x86/config.h --- a/xen/include/asm-x86/config.h Fri Dec 19 14:56:36 2008 +0000 +++ b/xen/include/asm-x86/config.h Mon Dec 22 08:12:33 2008 +0000 @@ -22,7 +22,7 @@ #define CONFIG_X86_IO_APIC 1 #define CONFIG_X86_PM_TIMER 1 #define CONFIG_HPET_TIMER 1 -#define CONFIG_X86_MCE_P4THERMAL 1 +#define CONFIG_X86_MCE_THERMAL 1 #define CONFIG_NUMA 1 #define CONFIG_DISCONTIGMEM 1 #define CONFIG_NUMA_EMU 1 diff -r 2dffa6ceb0af -r 4d5203f95498 xen/include/asm-x86/irq.h --- a/xen/include/asm-x86/irq.h Fri Dec 19 14:56:36 2008 +0000 +++ b/xen/include/asm-x86/irq.h Mon Dec 22 08:12:33 2008 +0000 @@ -33,6 +33,7 @@ fastcall void pmu_apic_interrupt(void); fastcall void pmu_apic_interrupt(void); fastcall void spurious_interrupt(void); fastcall void thermal_interrupt(void); +fastcall void cmci_interrupt(void); void disable_8259A_irq(unsigned int irq); void enable_8259A_irq(unsigned int irq); diff -r 2dffa6ceb0af -r 4d5203f95498 xen/include/asm-x86/mach-default/irq_vectors.h --- a/xen/include/asm-x86/mach-default/irq_vectors.h Fri Dec 19 14:56:36 2008 +0000 +++ b/xen/include/asm-x86/mach-default/irq_vectors.h Mon Dec 22 08:12:33 2008 +0000 @@ -10,13 +10,13 @@ #define THERMAL_APIC_VECTOR 0xfa #define LOCAL_TIMER_VECTOR 0xf9 #define PMU_APIC_VECTOR 0xf8 - +#define CMCI_APIC_VECTOR 0xf7 /* * High-priority dynamically-allocated vectors. For interrupts that * must be higher priority than any guest-bound interrupt. */ #define FIRST_HIPRIORITY_VECTOR 0xf0 -#define LAST_HIPRIORITY_VECTOR 0xf7 +#define LAST_HIPRIORITY_VECTOR 0xf6 /* Legacy PIC uses vectors 0xe0-0xef. */ #define FIRST_LEGACY_VECTOR 0xe0 diff -r 2dffa6ceb0af -r 4d5203f95498 xen/include/asm-x86/msr-index.h --- a/xen/include/asm-x86/msr-index.h Fri Dec 19 14:56:36 2008 +0000 +++ b/xen/include/asm-x86/msr-index.h Mon Dec 22 08:12:33 2008 +0000 @@ -92,8 +92,10 @@ #define MSR_IA32_MC0_STATUS 0x00000401 #define MSR_IA32_MC0_ADDR 0x00000402 #define MSR_IA32_MC0_MISC 0x00000403 - -#define MSR_IA32_MC1_CTL 0x00000404 +#define MSR_IA32_MC0_CTL2 0x00000280 +#define CMCI_EN (1UL<<30) +#define CMCI_THRESHOLD_MASK 0x7FFF + #define MSR_IA32_MC1_STATUS 0x00000405 #define MSR_IA32_MC1_ADDR 0x00000406 #define MSR_IA32_MC1_MISC 0x00000407 diff -r 2dffa6ceb0af -r 4d5203f95498 xen/include/asm-x86/smp.h --- a/xen/include/asm-x86/smp.h Fri Dec 19 14:56:36 2008 +0000 +++ b/xen/include/asm-x86/smp.h Mon Dec 22 08:12:33 2008 +0000 @@ -101,7 +101,7 @@ static __inline int logical_smp_processo #endif -extern int __cpu_disable(void); +extern int __cpu_disable(int down_cpu); extern void __cpu_die(unsigned int cpu); #endif /* !__ASSEMBLY__ */ diff -r 2dffa6ceb0af -r 4d5203f95498 xen/include/public/arch-x86/xen-mca.h --- a/xen/include/public/arch-x86/xen-mca.h Fri Dec 19 14:56:36 2008 +0000 +++ b/xen/include/public/arch-x86/xen-mca.h Mon Dec 22 08:12:33 2008 +0000 @@ -106,7 +106,10 @@ struct mcinfo_common { #define MC_FLAG_CORRECTABLE (1 << 0) #define MC_FLAG_UNCORRECTABLE (1 << 1) - +#define MC_FLAG_RECOVERABLE (1 << 2) +#define MC_FLAG_POLLED (1 << 3) +#define MC_FLAG_RESET (1 << 4) +#define MC_FLAG_CMCI (1 << 5) /* contains global x86 mc information */ struct mcinfo_global { struct mcinfo_common common; @@ -115,6 +118,7 @@ struct mcinfo_global { uint16_t mc_domid; uint32_t mc_socketid; /* physical socket of the physical core */ uint16_t mc_coreid; /* physical impacted core */ + uint8_t mc_apicid; uint16_t mc_core_threadid; /* core thread of physical core */ uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */ uint64_t mc_gstatus; /* global status */ @@ -132,6 +136,8 @@ struct mcinfo_bank { uint64_t mc_addr; /* bank address, only valid * if addr bit is set in mc_status */ uint64_t mc_misc; + uint64_t mc_ctrl2; + uint64_t mc_tsc; }; @@ -150,7 +156,12 @@ struct mcinfo_extended { * multiple times. */ uint32_t mc_msrs; /* Number of msr with valid values. */ - struct mcinfo_msr mc_msr[5]; + /* + * Currently Intel extended MSR (32/64) including all gp registers + * and E(R)DI, E(R)BP, E(R)SP, E(R)FLAGS, E(R)IP, E(R)MISC, only 10 + * of them might be useful. So expend this array to 10. + */ + struct mcinfo_msr mc_msr[10]; }; #define MCINFO_HYPERCALLSIZE 1024 diff -r 2dffa6ceb0af -r 4d5203f95498 xen/include/xen/stop_machine.h --- a/xen/include/xen/stop_machine.h Fri Dec 19 14:56:36 2008 +0000 +++ b/xen/include/xen/stop_machine.h Mon Dec 22 08:12:33 2008 +0000 @@ -5,7 +5,7 @@ * stop_machine_run: freeze the machine on all CPUs and run this function * @fn: the function to run * @data: the data ptr for the @fn() - * @cpu: the cpu to run @fn() on (or any, if @cpu == NR_CPUS). + * @cpus: cpus to run @fn() on. * * Description: This causes every other cpu to enter a safe point, with * each of which disables interrupts, and finally interrupts are disabled @@ -14,6 +14,6 @@ * * This can be thought of as a very heavy write lock, equivalent to * grabbing every spinlock in the kernel. */ -int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu); +int stop_machine_run(int (*fn)(void *), void *data, cpumask_t cpu); #endif /* __XEN_STOP_MACHINE_H__ */ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.