[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC][PATCH 1/2] MCA support for Intel64
Hi, This is a xen part patch. Signed-off-by: Kazuhiro Suzuki <kaz@xxxxxxxxxxxxxx> Thanks, KAZ diff -r f4552d9f6afb xen/arch/x86/cpu/mcheck/amd_f10.c --- a/xen/arch/x86/cpu/mcheck/amd_f10.c Tue Sep 23 17:11:33 2008 +0100 +++ b/xen/arch/x86/cpu/mcheck/amd_f10.c Fri Sep 26 14:30:17 2008 +0900 @@ -82,8 +82,6 @@ } -extern void k8_machine_check(struct cpu_user_regs *regs, long error_code); - /* AMD Family10 machine check */ void amd_f10_mcheck_init(struct cpuinfo_x86 *c) { @@ -91,7 +89,7 @@ uint32_t i; int cpu_nr; - machine_check_vector = k8_machine_check; + machine_check_vector = x86_machine_check; mc_callback_bank_extended = amd_f10_handler; cpu_nr = smp_processor_id(); wmb(); diff -r f4552d9f6afb xen/arch/x86/cpu/mcheck/amd_k8.c --- a/xen/arch/x86/cpu/mcheck/amd_k8.c Tue Sep 23 17:11:33 2008 +0100 +++ b/xen/arch/x86/cpu/mcheck/amd_k8.c Fri Sep 26 14:30:17 2008 +0900 @@ -70,219 +70,6 @@ #include "x86_mca.h" -/* Machine Check Handler for AMD K8 family series */ -void k8_machine_check(struct cpu_user_regs *regs, long error_code) -{ - struct vcpu *vcpu = current; - struct domain *curdom; - struct mc_info *mc_data; - struct mcinfo_global mc_global; - struct mcinfo_bank mc_info; - uint64_t status, addrv, miscv, uc; - uint32_t i; - unsigned int cpu_nr; - uint32_t xen_impacted = 0; -#define DOM_NORMAL 0 -#define DOM0_TRAP 1 -#define DOMU_TRAP 2 -#define DOMU_KILLED 4 - uint32_t dom_state = DOM_NORMAL; - - /* This handler runs as interrupt gate. So IPIs from the - * polling service routine are defered until we finished. - */ - - /* Disable interrupts for the _vcpu_. It may not re-scheduled to - * an other physical CPU or the impacted process in the guest - * continues running with corrupted data, otherwise. */ - vcpu_schedule_lock_irq(vcpu); - - mc_data = x86_mcinfo_getptr(); - cpu_nr = smp_processor_id(); - curdom = vcpu->domain; - - memset(&mc_global, 0, sizeof(mc_global)); - mc_global.common.type = MC_TYPE_GLOBAL; - mc_global.common.size = sizeof(mc_global); - - mc_global.mc_domid = curdom->domain_id; /* impacted domain */ - mc_global.mc_coreid = vcpu->processor; /* impacted physical cpu */ - BUG_ON(cpu_nr != vcpu->processor); - mc_global.mc_core_threadid = 0; - mc_global.mc_vcpuid = vcpu->vcpu_id; /* impacted vcpu */ -#if 0 /* TODO: on which socket is this physical core? - It's not clear to me how to figure this out. */ - mc_global.mc_socketid = ???; -#endif - mc_global.mc_flags |= MC_FLAG_UNCORRECTABLE; - rdmsrl(MSR_IA32_MCG_STATUS, mc_global.mc_gstatus); - - /* Quick check, who is impacted */ - xen_impacted = is_idle_domain(curdom); - - /* Dom0 */ - x86_mcinfo_clear(mc_data); - x86_mcinfo_add(mc_data, &mc_global); - - for (i = 0; i < nr_mce_banks; i++) { - struct domain *d; - - rdmsrl(MSR_IA32_MC0_STATUS + 4 * i, status); - - if (!(status & MCi_STATUS_VAL)) - continue; - - /* An error happened in this bank. - * This is expected to be an uncorrectable error, - * since correctable errors get polled. - */ - uc = status & MCi_STATUS_UC; - - memset(&mc_info, 0, sizeof(mc_info)); - mc_info.common.type = MC_TYPE_BANK; - mc_info.common.size = sizeof(mc_info); - mc_info.mc_bank = i; - mc_info.mc_status = status; - - addrv = 0; - if (status & MCi_STATUS_ADDRV) { - rdmsrl(MSR_IA32_MC0_ADDR + 4 * i, addrv); - - d = maddr_get_owner(addrv); - if (d != NULL) - mc_info.mc_domid = d->domain_id; - } - - miscv = 0; - if (status & MCi_STATUS_MISCV) - rdmsrl(MSR_IA32_MC0_MISC + 4 * i, miscv); - - mc_info.mc_addr = addrv; - mc_info.mc_misc = miscv; - - x86_mcinfo_add(mc_data, &mc_info); /* Dom0 */ - - if (mc_callback_bank_extended) - mc_callback_bank_extended(mc_data, i, status); - - /* clear status */ - wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0x0ULL); - wmb(); - add_taint(TAINT_MACHINE_CHECK); - } - - status = mc_global.mc_gstatus; - - /* clear MCIP or cpu enters shutdown state - * in case another MCE occurs. */ - status &= ~MCG_STATUS_MCIP; - wrmsrl(MSR_IA32_MCG_STATUS, status); - wmb(); - - /* For the details see the discussion "MCE/MCA concept" on xen-devel. - * The thread started here: - * http://lists.xensource.com/archives/html/xen-devel/2007-05/msg01015.html - */ - - /* MCG_STATUS_RIPV: - * When this bit is not set, then the instruction pointer onto the stack - * to resume at is not valid. If xen is interrupted, then we panic anyway - * right below. Otherwise it is up to the guest to figure out if - * guest kernel or guest userland is affected and should kill either - * itself or the affected process. - */ - - /* MCG_STATUS_EIPV: - * Evaluation of EIPV is the job of the guest. - */ - - if (xen_impacted) { - /* Now we are going to panic anyway. Allow interrupts, so that - * printk on serial console can work. */ - vcpu_schedule_unlock_irq(vcpu); - - /* Uh, that means, machine check exception - * inside Xen occured. */ - printk("Machine check exception occured in Xen.\n"); - - /* if MCG_STATUS_EIPV indicates, the IP on the stack is related - * to the error then it makes sense to print a stack trace. - * That can be useful for more detailed error analysis and/or - * error case studies to figure out, if we can clear - * xen_impacted and kill a DomU instead - * (i.e. if a guest only control structure is affected, but then - * we must ensure the bad pages are not re-used again). - */ - if (status & MCG_STATUS_EIPV) { - printk("MCE: Instruction Pointer is related to the error. " - "Therefore, print the execution state.\n"); - show_execution_state(regs); - } - x86_mcinfo_dump(mc_data); - panic("End of MCE. Use mcelog to decode above error codes.\n"); - } - - /* If Dom0 registered a machine check handler, which is only possible - * with a PV MCA driver, then ... */ - if ( guest_has_trap_callback(dom0, 0, TRAP_machine_check) ) { - dom_state = DOM0_TRAP; - - /* ... deliver machine check trap to Dom0. */ - send_guest_trap(dom0, 0, TRAP_machine_check); - - /* Xen may tell Dom0 now to notify the DomU. - * But this will happen through a hypercall. */ - } else - /* Dom0 did not register a machine check handler, but if DomU - * did so, then... */ - if ( guest_has_trap_callback(curdom, vcpu->vcpu_id, TRAP_machine_check) ) { - dom_state = DOMU_TRAP; - - /* ... deliver machine check trap to DomU */ - send_guest_trap(curdom, vcpu->vcpu_id, TRAP_machine_check); - } else { - /* hmm... noone feels responsible to handle the error. - * So, do a quick check if a DomU is impacted or not. - */ - if (curdom == dom0) { - /* Dom0 is impacted. Since noone can't handle - * this error, panic! */ - x86_mcinfo_dump(mc_data); - panic("MCE occured in Dom0, which it can't handle\n"); - - /* UNREACHED */ - } else { - dom_state = DOMU_KILLED; - - /* Enable interrupts. This basically results in - * calling sti on the *physical* cpu. But after - * domain_crash() the vcpu pointer is invalid. - * Therefore, we must unlock the irqs before killing - * it. */ - vcpu_schedule_unlock_irq(vcpu); - - /* DomU is impacted. Kill it and continue. */ - domain_crash(curdom); - } - } - - - switch (dom_state) { - case DOM0_TRAP: - case DOMU_TRAP: - /* Enable interrupts. */ - vcpu_schedule_unlock_irq(vcpu); - - /* guest softirqs and event callbacks are scheduled - * immediately after this handler exits. */ - break; - case DOMU_KILLED: - /* Nothing to do here. */ - break; - default: - BUG(); - } -} /* AMD K8 machine check */ @@ -292,7 +79,7 @@ uint32_t i; int cpu_nr; - machine_check_vector = k8_machine_check; + machine_check_vector = x86_machine_check; cpu_nr = smp_processor_id(); wmb(); diff -r f4552d9f6afb xen/arch/x86/cpu/mcheck/amd_nonfatal.c --- a/xen/arch/x86/cpu/mcheck/amd_nonfatal.c Tue Sep 23 17:11:33 2008 +0100 +++ b/xen/arch/x86/cpu/mcheck/amd_nonfatal.c Fri Sep 26 14:30:17 2008 +0900 @@ -65,117 +65,12 @@ #include "mce.h" #include "x86_mca.h" -static struct timer mce_timer; +static int hw_threshold = 0; -#define MCE_PERIOD MILLISECS(15000) -#define MCE_MIN MILLISECS(2000) -#define MCE_MAX MILLISECS(30000) +extern struct timer mce_timer; -static s_time_t period = MCE_PERIOD; -static int hw_threshold = 0; -static int adjust = 0; - -/* The polling service routine: - * Collects information of correctable errors and notifies - * Dom0 via an event. - */ -void mce_amd_checkregs(void *info) -{ - struct vcpu *vcpu = current; - struct mc_info *mc_data; - struct mcinfo_global mc_global; - struct mcinfo_bank mc_info; - uint64_t status, addrv, miscv; - unsigned int i; - unsigned int event_enabled; - unsigned int cpu_nr; - int error_found; - - /* We don't need a slot yet. Only allocate one on error. */ - mc_data = NULL; - - cpu_nr = smp_processor_id(); - event_enabled = guest_enabled_event(dom0->vcpu[0], VIRQ_MCA); - error_found = 0; - - memset(&mc_global, 0, sizeof(mc_global)); - mc_global.common.type = MC_TYPE_GLOBAL; - mc_global.common.size = sizeof(mc_global); - - mc_global.mc_domid = vcpu->domain->domain_id; /* impacted domain */ - mc_global.mc_coreid = vcpu->processor; /* impacted physical cpu */ - BUG_ON(cpu_nr != vcpu->processor); - mc_global.mc_core_threadid = 0; - mc_global.mc_vcpuid = vcpu->vcpu_id; /* impacted vcpu */ -#if 0 /* TODO: on which socket is this physical core? - It's not clear to me how to figure this out. */ - mc_global.mc_socketid = ???; -#endif - mc_global.mc_flags |= MC_FLAG_CORRECTABLE; - rdmsrl(MSR_IA32_MCG_STATUS, mc_global.mc_gstatus); - - for (i = 0; i < nr_mce_banks; i++) { - struct domain *d; - - rdmsrl(MSR_IA32_MC0_STATUS + i * 4, status); - - if (!(status & MCi_STATUS_VAL)) - continue; - - if (mc_data == NULL) { - /* Now we need a slot to fill in error telemetry. */ - mc_data = x86_mcinfo_getptr(); - BUG_ON(mc_data == NULL); - x86_mcinfo_clear(mc_data); - x86_mcinfo_add(mc_data, &mc_global); - } - - memset(&mc_info, 0, sizeof(mc_info)); - mc_info.common.type = MC_TYPE_BANK; - mc_info.common.size = sizeof(mc_info); - mc_info.mc_bank = i; - mc_info.mc_status = status; - - /* Increase polling frequency */ - error_found = 1; - - addrv = 0; - if (status & MCi_STATUS_ADDRV) { - rdmsrl(MSR_IA32_MC0_ADDR + i * 4, addrv); - - d = maddr_get_owner(addrv); - if (d != NULL) - mc_info.mc_domid = d->domain_id; - } - - miscv = 0; - if (status & MCi_STATUS_MISCV) - rdmsrl(MSR_IA32_MC0_MISC + i * 4, miscv); - - mc_info.mc_addr = addrv; - mc_info.mc_misc = miscv; - x86_mcinfo_add(mc_data, &mc_info); - - if (mc_callback_bank_extended) - mc_callback_bank_extended(mc_data, i, status); - - /* clear status */ - wrmsrl(MSR_IA32_MC0_STATUS + i * 4, 0x0ULL); - wmb(); - } - - if (error_found > 0) { - /* If Dom0 enabled the VIRQ_MCA event, then ... */ - if (event_enabled) - /* ... notify it. */ - send_guest_global_virq(dom0, VIRQ_MCA); - else - /* ... or dump it */ - x86_mcinfo_dump(mc_data); - } - - adjust += error_found; -} +extern s_time_t period; +extern int adjust; /* polling service routine invoker: * Adjust poll frequency at runtime. No error means slow polling frequency, @@ -186,7 +81,7 @@ */ static void mce_amd_work_fn(void *data) { - on_each_cpu(mce_amd_checkregs, data, 1, 1); + on_each_cpu(x86_mce_checkregs, data, 1, 1); if (adjust > 0) { if ( !guest_enabled_event(dom0->vcpu[0], VIRQ_MCA) ) { diff -r f4552d9f6afb xen/arch/x86/cpu/mcheck/mce.c --- a/xen/arch/x86/cpu/mcheck/mce.c Tue Sep 23 17:11:33 2008 +0100 +++ b/xen/arch/x86/cpu/mcheck/mce.c Fri Sep 26 14:30:17 2008 +0900 @@ -7,6 +7,8 @@ #include <xen/types.h> #include <xen/kernel.h> #include <xen/config.h> +#include <xen/sched.h> +#include <xen/sched-if.h> #include <xen/smp.h> #include <xen/errno.h> @@ -431,6 +433,226 @@ } while (1); } + +/* Machine Check Handler for AMD K8 family series and Intel P4/Xeon family */ +void x86_machine_check(struct cpu_user_regs *regs, long error_code) +{ + struct vcpu *vcpu = current; + struct domain *curdom; + struct mc_info *mc_data; + struct mcinfo_global mc_global; + struct mcinfo_bank mc_info; + uint64_t status, addrv, miscv, uc; + uint32_t i; + unsigned int cpu_nr; + uint32_t xen_impacted = 0; +#define DOM_NORMAL 0 +#define DOM0_TRAP 1 +#define DOMU_TRAP 2 +#define DOMU_KILLED 4 + uint32_t dom_state = DOM_NORMAL; + + /* This handler runs as interrupt gate. So IPIs from the + * polling service routine are defered until we finished. + */ + + /* Disable interrupts for the _vcpu_. It may not re-scheduled to + * an other physical CPU or the impacted process in the guest + * continues running with corrupted data, otherwise. */ + vcpu_schedule_lock_irq(vcpu); + + mc_data = x86_mcinfo_getptr(); + cpu_nr = smp_processor_id(); + curdom = vcpu->domain; + + memset(&mc_global, 0, sizeof(mc_global)); + mc_global.common.type = MC_TYPE_GLOBAL; + mc_global.common.size = sizeof(mc_global); + + mc_global.mc_domid = curdom->domain_id; /* impacted domain */ + mc_global.mc_coreid = vcpu->processor; /* impacted physical cpu */ + BUG_ON(cpu_nr != vcpu->processor); + mc_global.mc_core_threadid = 0; + mc_global.mc_vcpuid = vcpu->vcpu_id; /* impacted vcpu */ +#if 0 /* TODO: on which socket is this physical core? + It's not clear to me how to figure this out. */ + mc_global.mc_socketid = ???; +#endif + mc_global.mc_flags |= MC_FLAG_UNCORRECTABLE; + rdmsrl(MSR_IA32_MCG_STATUS, mc_global.mc_gstatus); + + /* Quick check, who is impacted */ + xen_impacted = is_idle_domain(curdom); + + /* Dom0 */ + x86_mcinfo_clear(mc_data); + x86_mcinfo_add(mc_data, &mc_global); + + for (i = 0; i < nr_mce_banks; i++) { + struct domain *d; + + rdmsrl(MSR_IA32_MC0_STATUS + 4 * i, status); + + if (!(status & MCi_STATUS_VAL)) + continue; + + /* An error happened in this bank. + * This is expected to be an uncorrectable error, + * since correctable errors get polled. + */ + uc = status & MCi_STATUS_UC; + + memset(&mc_info, 0, sizeof(mc_info)); + mc_info.common.type = MC_TYPE_BANK; + mc_info.common.size = sizeof(mc_info); + mc_info.mc_bank = i; + mc_info.mc_status = status; + + addrv = 0; + if (status & MCi_STATUS_ADDRV) { + rdmsrl(MSR_IA32_MC0_ADDR + 4 * i, addrv); + + d = maddr_get_owner(addrv); + if (d != NULL) + mc_info.mc_domid = d->domain_id; + } + + miscv = 0; + if (status & MCi_STATUS_MISCV) + rdmsrl(MSR_IA32_MC0_MISC + 4 * i, miscv); + + mc_info.mc_addr = addrv; + mc_info.mc_misc = miscv; + + x86_mcinfo_add(mc_data, &mc_info); /* Dom0 */ + + if (mc_callback_bank_extended) + mc_callback_bank_extended(mc_data, i, status); + + /* clear status */ + wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0x0ULL); + wmb(); + add_taint(TAINT_MACHINE_CHECK); + } + + /* Never do anything final for the previous reset */ + if (!regs) { + vcpu_schedule_unlock_irq(vcpu); + return; + } + + status = mc_global.mc_gstatus; + + /* clear MCIP or cpu enters shutdown state + * in case another MCE occurs. */ + status &= ~MCG_STATUS_MCIP; + wrmsrl(MSR_IA32_MCG_STATUS, status); + wmb(); + + /* For the details see the discussion "MCE/MCA concept" on xen-devel. + * The thread started here: + * http://lists.xensource.com/archives/html/xen-devel/2007-05/msg01015.html + */ + + /* MCG_STATUS_RIPV: + * When this bit is not set, then the instruction pointer onto the stack + * to resume at is not valid. If xen is interrupted, then we panic anyway + * right below. Otherwise it is up to the guest to figure out if + * guest kernel or guest userland is affected and should kill either + * itself or the affected process. + */ + + /* MCG_STATUS_EIPV: + * Evaluation of EIPV is the job of the guest. + */ + + if (xen_impacted) { + /* Now we are going to panic anyway. Allow interrupts, so that + * printk on serial console can work. */ + vcpu_schedule_unlock_irq(vcpu); + + /* Uh, that means, machine check exception + * inside Xen occured. */ + printk("Machine check exception occured in Xen.\n"); + + /* if MCG_STATUS_EIPV indicates, the IP on the stack is related + * to the error then it makes sense to print a stack trace. + * That can be useful for more detailed error analysis and/or + * error case studies to figure out, if we can clear + * xen_impacted and kill a DomU instead + * (i.e. if a guest only control structure is affected, but then + * we must ensure the bad pages are not re-used again). + */ + if (status & MCG_STATUS_EIPV) { + printk("MCE: Instruction Pointer is related to the error. " + "Therefore, print the execution state.\n"); + show_execution_state(regs); + } + x86_mcinfo_dump(mc_data); + panic("End of MCE. Use mcelog to decode above error codes.\n"); + } + + /* If Dom0 registered a machine check handler, which is only possible + * with a PV MCA driver, then ... */ + if ( guest_has_trap_callback(dom0, 0, TRAP_machine_check) ) { + dom_state = DOM0_TRAP; + + /* ... deliver machine check trap to Dom0. */ + send_guest_trap(dom0, 0, TRAP_machine_check); + + /* Xen may tell Dom0 now to notify the DomU. + * But this will happen through a hypercall. */ + } else + /* Dom0 did not register a machine check handler, but if DomU + * did so, then... */ + if ( guest_has_trap_callback(curdom, vcpu->vcpu_id, TRAP_machine_check) ) { + dom_state = DOMU_TRAP; + + /* ... deliver machine check trap to DomU */ + send_guest_trap(curdom, vcpu->vcpu_id, TRAP_machine_check); + } else { + /* hmm... noone feels responsible to handle the error. + * So, do a quick check if a DomU is impacted or not. + */ + if (curdom == dom0) { + /* Dom0 is impacted. Since noone can't handle + * this error, panic! */ + x86_mcinfo_dump(mc_data); + panic("MCE occured in Dom0, which it can't handle\n"); + + /* UNREACHED */ + } else { + dom_state = DOMU_KILLED; + + /* Enable interrupts. This basically results in + * calling sti on the *physical* cpu. But after + * domain_crash() the vcpu pointer is invalid. + * Therefore, we must unlock the irqs before killing + * it. */ + vcpu_schedule_unlock_irq(vcpu); + + /* DomU is impacted. Kill it and continue. */ + domain_crash(curdom); + } + } + + + switch (dom_state) { + case DOM0_TRAP: + case DOMU_TRAP: + /* Enable interrupts. */ + vcpu_schedule_unlock_irq(vcpu); + + /* guest softirqs and event callbacks are scheduled + * immediately after this handler exits. */ + break; + case DOMU_KILLED: + /* Nothing to do here. */ + break; + default: + BUG(); + } +} /* Machine Check Architecture Hypercall */ diff -r f4552d9f6afb xen/arch/x86/cpu/mcheck/non-fatal.c --- a/xen/arch/x86/cpu/mcheck/non-fatal.c Tue Sep 23 17:11:33 2008 +0100 +++ b/xen/arch/x86/cpu/mcheck/non-fatal.c Fri Sep 26 14:30:17 2008 +0900 @@ -14,16 +14,158 @@ #include <xen/smp.h> #include <xen/timer.h> #include <xen/errno.h> +#include <xen/event.h> #include <asm/processor.h> #include <asm/system.h> #include <asm/msr.h> #include "mce.h" +#include "x86_mca.h" static int firstbank; -static struct timer mce_timer; -#define MCE_PERIOD MILLISECS(15000) +struct timer mce_timer; + +s_time_t period = MCE_PERIOD; +int adjust = 0; + +/* The polling service routine: + * Collects information of correctable errors and notifies + * Dom0 via an event. + */ +void x86_mce_checkregs(void *info) +{ + struct vcpu *vcpu = current; + struct mc_info *mc_data; + struct mcinfo_global mc_global; + struct mcinfo_bank mc_info; + uint64_t status, addrv, miscv; + unsigned int i; + unsigned int event_enabled; + unsigned int cpu_nr; + int error_found; + + /* We don't need a slot yet. Only allocate one on error. */ + mc_data = NULL; + + cpu_nr = smp_processor_id(); + event_enabled = guest_enabled_event(dom0->vcpu[0], VIRQ_MCA); + error_found = 0; + + memset(&mc_global, 0, sizeof(mc_global)); + mc_global.common.type = MC_TYPE_GLOBAL; + mc_global.common.size = sizeof(mc_global); + + mc_global.mc_domid = vcpu->domain->domain_id; /* impacted domain */ + mc_global.mc_coreid = vcpu->processor; /* impacted physical cpu */ + BUG_ON(cpu_nr != vcpu->processor); + mc_global.mc_core_threadid = 0; + mc_global.mc_vcpuid = vcpu->vcpu_id; /* impacted vcpu */ +#if 0 /* TODO: on which socket is this physical core? + It's not clear to me how to figure this out. */ + mc_global.mc_socketid = ???; +#endif + mc_global.mc_flags |= MC_FLAG_CORRECTABLE; + rdmsrl(MSR_IA32_MCG_STATUS, mc_global.mc_gstatus); + + for (i = 0; i < nr_mce_banks; i++) { + struct domain *d; + + rdmsrl(MSR_IA32_MC0_STATUS + i * 4, status); + + if (!(status & MCi_STATUS_VAL)) + continue; + + if (mc_data == NULL) { + /* Now we need a slot to fill in error telemetry. */ + mc_data = x86_mcinfo_getptr(); + BUG_ON(mc_data == NULL); + x86_mcinfo_clear(mc_data); + x86_mcinfo_add(mc_data, &mc_global); + } + + memset(&mc_info, 0, sizeof(mc_info)); + mc_info.common.type = MC_TYPE_BANK; + mc_info.common.size = sizeof(mc_info); + mc_info.mc_bank = i; + mc_info.mc_status = status; + + /* Increase polling frequency */ + error_found = 1; + + addrv = 0; + if (status & MCi_STATUS_ADDRV) { + rdmsrl(MSR_IA32_MC0_ADDR + i * 4, addrv); + + d = maddr_get_owner(addrv); + if (d != NULL) + mc_info.mc_domid = d->domain_id; + } + + miscv = 0; + if (status & MCi_STATUS_MISCV) + rdmsrl(MSR_IA32_MC0_MISC + i * 4, miscv); + + mc_info.mc_addr = addrv; + mc_info.mc_misc = miscv; + x86_mcinfo_add(mc_data, &mc_info); + + if (mc_callback_bank_extended) + mc_callback_bank_extended(mc_data, i, status); + + /* clear status */ + wrmsrl(MSR_IA32_MC0_STATUS + i * 4, 0x0ULL); + wmb(); + } + + if (error_found > 0) { + /* If Dom0 enabled the VIRQ_MCA event, then ... */ + if (event_enabled) + /* ... notify it. */ + send_guest_global_virq(dom0, VIRQ_MCA); + else + /* ... or dump it */ + x86_mcinfo_dump(mc_data); + } + + adjust += error_found; +} + +static void p4_mce_work_fn(void *data) +{ + on_each_cpu(x86_mce_checkregs, NULL, 1, 1); + + if (adjust > 0) { + if ( !guest_enabled_event(dom0->vcpu[0], VIRQ_MCA) ) { + /* Dom0 did not enable VIRQ_MCA, so Xen is reporting. */ + printk("MCE: polling routine found correctable error. " + " Use mcelog to parse above error output.\n"); + } + } + + if (adjust > 0) { + /* Increase polling frequency */ + adjust++; /* adjust == 1 must have an effect */ + period /= adjust; + } else { + /* Decrease polling frequency */ + period *= 2; + } + if (period > MCE_MAX) { + /* limit: Poll at least every 30s */ + period = MCE_MAX; + } + if (period < MCE_MIN) { + /* limit: Poll every 2s. + * When this is reached an uncorrectable error + * is expected to happen, if Dom0 does nothing. + */ + period = MCE_MIN; + } + + set_timer(&mce_timer, NOW() + period); + adjust = 0; +} static void mce_checkregs (void *info) { @@ -85,6 +227,11 @@ break; case X86_VENDOR_INTEL: + if (c->x86 == 15) { /* P4/Xeon */ + init_timer(&mce_timer, p4_mce_work_fn, NULL, 0); + set_timer(&mce_timer, NOW() + period); + break; + } init_timer(&mce_timer, mce_work_fn, NULL, 0); set_timer(&mce_timer, NOW() + MCE_PERIOD); break; diff -r f4552d9f6afb xen/arch/x86/cpu/mcheck/p4.c --- a/xen/arch/x86/cpu/mcheck/p4.c Tue Sep 23 17:11:33 2008 +0100 +++ b/xen/arch/x86/cpu/mcheck/p4.c Fri Sep 26 14:30:17 2008 +0900 @@ -15,6 +15,7 @@ #include <asm/apic.h> #include "mce.h" +#include "x86_mca.h" /* as supported by the P4/Xeon family */ struct intel_mce_extended_msrs { @@ -32,6 +33,7 @@ }; static int mce_num_extended_msrs = 0; +static int mce_bootlog = 1; #ifdef CONFIG_X86_MCE_P4THERMAL @@ -158,85 +160,13 @@ return mce_num_extended_msrs; } -static fastcall void intel_machine_check(struct cpu_user_regs * regs, long error_code) -{ - int recover=1; - u32 alow, ahigh, high, low; - u32 mcgstl, mcgsth; - int i; - struct intel_mce_extended_msrs dbg; - - rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); - if (mcgstl & (1<<0)) /* Recoverable ? */ - recover=0; - - printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", - smp_processor_id(), mcgsth, mcgstl); - - if (intel_get_extended_msrs(&dbg)) { - printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n", - smp_processor_id(), dbg.eip, dbg.eflags); - printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n", - dbg.eax, dbg.ebx, dbg.ecx, dbg.edx); - printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", - dbg.esi, dbg.edi, dbg.ebp, dbg.esp); - } - - for (i=0; i<nr_mce_banks; i++) { - rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high); - if (high & (1<<31)) { - if (high & (1<<29)) - recover |= 1; - if (high & (1<<25)) - recover |= 2; - printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low); - high &= ~(1<<31); - if (high & (1<<27)) { - rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh); - printk ("[%08x%08x]", ahigh, alow); - } - if (high & (1<<26)) { - rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh); - printk (" at %08x%08x", ahigh, alow); - } - printk ("\n"); - } - } - - if (recover & 2) - panic ("CPU context corrupt"); - if (recover & 1) - panic ("Unable to continue"); - - printk(KERN_EMERG "Attempting to continue.\n"); - /* - * Do not clear the MSR_IA32_MCi_STATUS if the error is not - * recoverable/continuable.This will allow BIOS to look at the MSRs - * for errors if the OS could not log the error. - */ - for (i=0; i<nr_mce_banks; i++) { - u32 msr; - msr = MSR_IA32_MC0_STATUS+i*4; - rdmsr (msr, low, high); - if (high&(1<<31)) { - /* Clear it */ - wrmsr(msr, 0UL, 0UL); - /* Serialize */ - wmb(); - add_taint(TAINT_MACHINE_CHECK); - } - } - mcgstl &= ~(1<<2); - wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); -} - void intel_p4_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; int i; - machine_check_vector = intel_machine_check; + machine_check_vector = x86_machine_check; wmb(); printk (KERN_INFO "Intel machine check architecture supported.\n"); @@ -244,6 +174,10 @@ if (l & (1<<8)) /* Control register present ? */ wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); nr_mce_banks = l & 0xff; + + /* Log the machine checks left over from the previous reset. + This also clears all registers */ + x86_machine_check(NULL, mce_bootlog ? -1 : -2); for (i=0; i<nr_mce_banks; i++) { wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); diff -r f4552d9f6afb xen/arch/x86/cpu/mcheck/x86_mca.h --- a/xen/arch/x86/cpu/mcheck/x86_mca.h Tue Sep 23 17:11:33 2008 +0100 +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Fri Sep 26 14:30:17 2008 +0900 @@ -70,3 +70,11 @@ /* reserved bits */ #define MCi_STATUS_OTHER_RESERVED2 0x0180000000000000ULL +/* Polling period */ +#define MCE_PERIOD MILLISECS(15000) +#define MCE_MIN MILLISECS(2000) +#define MCE_MAX MILLISECS(30000) + +/* Common routines */ +void x86_machine_check(struct cpu_user_regs *regs, long error_code); +void x86_mce_checkregs(void *info); diff -r f4552d9f6afb xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Tue Sep 23 17:11:33 2008 +0100 +++ b/xen/arch/x86/traps.c Fri Sep 26 14:30:17 2008 +0900 @@ -713,8 +713,10 @@ __clear_bit(X86_FEATURE_VME, &d); __clear_bit(X86_FEATURE_PSE, &d); __clear_bit(X86_FEATURE_PGE, &d); +#ifndef __x86_64__ __clear_bit(X86_FEATURE_MCE, &d); __clear_bit(X86_FEATURE_MCA, &d); +#endif __clear_bit(X86_FEATURE_PSE36, &d); } switch ( (uint32_t)regs->eax ) _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |