[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [linux-2.6.18-xen] x86: add MCA logging support in DOM0



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1245149935 -3600
# Node ID 75e5bfa7fbdc175b1e59f27563545421cbe96cd8
# Parent  9242c5b965c181a4eb095570c51b1cc05bd58a33
x86: add MCA logging support in DOM0

When an MCE/CMCI error happens (or by polling), the related error
information will be sent to DOM0 by XEN. This patch will help to fetch
the xen-logged information by hypercall and then convert XEN-format
log into Linux format MCELOG. It makes using current available mcelog
tools for native Linux possible.

With this patch, after mce/cmci error log information is sent to DOM0,
running mcelog tools in DOM0, you will get same detailed decoded mce
information as in Native Linux.

Signed-Off-By: Liping Ke <liping.ke@xxxxxxxxx>
Signed-Off-By: Yunhong Jiang <yunhong.jiang@xxxxxxxxx>
Acked-By: Jan Beulich <jbeulich@xxxxxxxxxx>
---
 arch/x86_64/Kconfig                         |   12 +-
 arch/x86_64/kernel/Makefile                 |    1 
 arch/x86_64/kernel/entry-xen.S              |    9 -
 arch/x86_64/kernel/mce.c                    |   24 ++++-
 arch/x86_64/kernel/mce_dom0.c               |  131 ++++++++++++++++++++++++++++
 include/asm-x86_64/mach-xen/asm/hypercall.h |    9 +
 6 files changed, 170 insertions(+), 16 deletions(-)

diff -r 9242c5b965c1 -r 75e5bfa7fbdc arch/x86_64/Kconfig
--- a/arch/x86_64/Kconfig       Tue Jun 16 11:09:39 2009 +0100
+++ b/arch/x86_64/Kconfig       Tue Jun 16 11:58:55 2009 +0100
@@ -471,8 +471,8 @@ config SWIOTLB
        bool
 
 config X86_MCE
-       bool "Machine check support" if EMBEDDED
-       depends on !X86_64_XEN
+       bool "Machine check support"
+       depends on (!XEN_UNPRIVILEGED_GUEST)
        default y
        help
           Include a machine check error handler to report hardware errors.
@@ -482,7 +482,7 @@ config X86_MCE
 
 config X86_MCE_INTEL
        bool "Intel MCE features"
-       depends on X86_MCE && X86_LOCAL_APIC
+       depends on X86_MCE && X86_LOCAL_APIC && !X86_64_XEN
        default y
        help
           Additional support for intel specific MCE features such as
@@ -490,11 +490,15 @@ config X86_MCE_INTEL
 
 config X86_MCE_AMD
        bool "AMD MCE features"
-       depends on X86_MCE && X86_LOCAL_APIC
+       depends on X86_MCE && X86_LOCAL_APIC && !X86_64_XEN
        default y
        help
           Additional support for AMD specific MCE features such as
           the DRAM Error Threshold.
+
+config X86_XEN_MCE
+       def_bool y
+       depends on X86_64_XEN && X86_MCE
 
 config KEXEC
        bool "kexec system call (EXPERIMENTAL)"
diff -r 9242c5b965c1 -r 75e5bfa7fbdc arch/x86_64/kernel/Makefile
--- a/arch/x86_64/kernel/Makefile       Tue Jun 16 11:09:39 2009 +0100
+++ b/arch/x86_64/kernel/Makefile       Tue Jun 16 11:58:55 2009 +0100
@@ -13,6 +13,7 @@ obj-$(CONFIG_STACKTRACE)      += stacktrace.o
 obj-$(CONFIG_STACKTRACE)       += stacktrace.o
 obj-$(CONFIG_X86_MCE)         += mce.o
 obj-$(CONFIG_X86_MCE_INTEL)    += mce_intel.o
+obj-$(CONFIG_X86_XEN_MCE)      += mce_dom0.o
 obj-$(CONFIG_X86_MCE_AMD)      += mce_amd.o
 obj-$(CONFIG_MTRR)             += ../../i386/kernel/cpu/mtrr/
 obj-$(CONFIG_ACPI)             += acpi/
diff -r 9242c5b965c1 -r 75e5bfa7fbdc arch/x86_64/kernel/entry-xen.S
--- a/arch/x86_64/kernel/entry-xen.S    Tue Jun 16 11:09:39 2009 +0100
+++ b/arch/x86_64/kernel/entry-xen.S    Tue Jun 16 11:58:55 2009 +0100
@@ -1258,13 +1258,8 @@ END(spurious_interrupt_bug)
 
 #ifdef CONFIG_X86_MCE
        /* runs on exception stack */
-ENTRY(machine_check)
-       INTR_FRAME
-       pushq $0
-       CFI_ADJUST_CFA_OFFSET 8 
-       paranoidentry do_machine_check
-       jmp paranoid_exit1
-       CFI_ENDPROC
+KPROBE_ENTRY(machine_check)
+       zeroentry do_machine_check
 END(machine_check)
 #endif
 
diff -r 9242c5b965c1 -r 75e5bfa7fbdc arch/x86_64/kernel/mce.c
--- a/arch/x86_64/kernel/mce.c  Tue Jun 16 11:09:39 2009 +0100
+++ b/arch/x86_64/kernel/mce.c  Tue Jun 16 11:58:55 2009 +0100
@@ -276,9 +276,16 @@ void do_machine_check(struct pt_regs * r
 
 /*
  * Periodic polling timer for "silent" machine check errors.
- */
-
+ * We will disable polling in DOM0 since all CMCI/Polling
+ * mechanism will be done in XEN for Intel CPUs
+*/
+
+#if defined (CONFIG_X86_XEN_MCE)
+static int check_interval = 0; /* disable polling */
+#else
 static int check_interval = 5 * 60; /* 5 minutes */
+#endif
+
 static void mcheck_timer(void *data);
 static DECLARE_WORK(mcheck_work, mcheck_timer, NULL);
 
@@ -367,6 +374,7 @@ static void __cpuinit mce_cpu_quirks(str
 
 static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
 {
+#ifndef CONFIG_X86_64_XEN
        switch (c->x86_vendor) {
        case X86_VENDOR_INTEL:
                mce_intel_feature_init(c);
@@ -377,8 +385,8 @@ static void __cpuinit mce_cpu_features(s
        default:
                break;
        }
-}
-
+#endif
+}
 /* 
  * Called for each booted CPU to set up machine checks.
  * Must be called with preempt off. 
@@ -649,6 +657,7 @@ static struct notifier_block mce_cpu_not
 };
 #endif
 
+extern void bind_virq_for_mce(void);
 static __init int mce_init_device(void)
 {
        int err;
@@ -664,6 +673,13 @@ static __init int mce_init_device(void)
 
        register_hotcpu_notifier(&mce_cpu_notifier);
        misc_register(&mce_log_device);
+
+    /*Register vIRQ handler for MCE LOG processing*/
+#if defined(CONFIG_X86_XEN_MCE)
+    printk(KERN_DEBUG "MCE: bind virq for DOM0 Logging\n");
+    bind_virq_for_mce();
+#endif
+
        return err;
 }
 
diff -r 9242c5b965c1 -r 75e5bfa7fbdc arch/x86_64/kernel/mce_dom0.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/x86_64/kernel/mce_dom0.c     Tue Jun 16 11:58:55 2009 +0100
@@ -0,0 +1,131 @@
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <xen/interface/xen.h>
+#include <xen/evtchn.h>
+#include <xen/interface/vcpu.h>
+#include <asm/hypercall.h>
+#include <asm/mce.h>
+
+static int convert_log(struct mc_info *mi)
+{
+       struct mcinfo_common *mic = NULL;
+       struct mcinfo_global *mc_global;
+       struct mcinfo_bank *mc_bank;
+       struct mce m;
+
+       x86_mcinfo_lookup(mic, mi, MC_TYPE_GLOBAL);
+       if (mic == NULL)
+       {
+               printk(KERN_ERR "DOM0_MCE_LOG: global data is NULL\n");
+               return -1;
+       }
+
+       mc_global = (struct mcinfo_global*)mic;
+       m.mcgstatus = mc_global->mc_gstatus;
+       m.cpu = mc_global->mc_coreid;/*for test*/
+       x86_mcinfo_lookup(mic, mi, MC_TYPE_BANK);
+       do
+       {
+               if (mic == NULL || mic->size == 0)
+                       break;
+               if (mic->type == MC_TYPE_BANK)
+               {
+                       mc_bank = (struct mcinfo_bank*)mic;
+                       m.misc = mc_bank->mc_misc;
+                       m.status = mc_bank->mc_status;
+                       m.addr = mc_bank->mc_addr;
+                       m.tsc = mc_bank->mc_tsc;
+                       m.res1 = mc_bank->mc_ctrl2;
+                       m.bank = mc_bank->mc_bank;
+                       printk(KERN_DEBUG "[CPU%d, BANK%d, addr %llx, state 
%llx]\n", 
+                                               m.bank, m.cpu, m.addr, 
m.status);
+                       /*log this record*/
+                       mce_log(&m);
+               }
+               mic = x86_mcinfo_next(mic);
+       }while (1);
+
+       return 0;
+}
+
+static struct mc_info *g_mi;
+
+/*dom0 mce virq handler, logging physical mce error info*/
+
+static irqreturn_t mce_dom0_interrupt(int irq, void *dev_id,
+                                                                       struct 
pt_regs *regs)
+{
+       xen_mc_t mc_op;
+       int result = 0;
+
+       printk(KERN_DEBUG "MCE_DOM0_LOG: enter dom0 mce vIRQ handler\n");
+       mc_op.cmd = XEN_MC_fetch;
+       mc_op.interface_version = XEN_MCA_INTERFACE_VERSION;
+       set_xen_guest_handle(mc_op.u.mc_fetch.data, g_mi);
+urgent:
+       mc_op.u.mc_fetch.flags = XEN_MC_URGENT;
+       result = HYPERVISOR_mca(&mc_op);
+       if (result || mc_op.u.mc_fetch.flags & XEN_MC_NODATA ||
+                       mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED)
+       {
+               printk(KERN_DEBUG "MCE_DOM0_LOG: No more urgent data\n");
+               goto nonurgent;
+       }
+       else
+       {
+               result = convert_log(g_mi);
+               if (result) {
+                       printk(KERN_ERR "MCE_DOM0_LOG: Log conversion 
failed\n");
+                       goto end;
+               }
+               /* After fetching the telem from DOM0, we need to dec the 
telem's
+                * refcnt and release the entry. The telem is reserved and inc
+                * refcnt when filling the telem.
+                */
+               mc_op.u.mc_fetch.flags = XEN_MC_URGENT | XEN_MC_ACK;
+               result = HYPERVISOR_mca(&mc_op);
+
+               goto urgent;
+       }
+nonurgent:
+       mc_op.u.mc_fetch.flags = XEN_MC_NONURGENT;
+       result = HYPERVISOR_mca(&mc_op);
+       if (result || mc_op.u.mc_fetch.flags & XEN_MC_NODATA ||
+                       mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED)
+       {
+               printk(KERN_DEBUG "MCE_DOM0_LOG: No more nonurgent data\n");
+               goto end;
+       }
+       else
+       {
+               result = convert_log(g_mi);
+               if (result) {
+                       printk(KERN_ERR "MCE_DOM0_LOG: Log conversion 
failed\n");
+                       goto end;
+               }
+               /* After fetching the telem from DOM0, we need to dec the 
telem's
+                * refcnt and release the entry. The telem is reserved and inc
+                * refcnt when filling the telem.
+                */
+               mc_op.u.mc_fetch.flags = XEN_MC_NONURGENT | XEN_MC_ACK;
+               result = HYPERVISOR_mca(&mc_op);
+
+               goto nonurgent;
+       }
+end:
+       return IRQ_HANDLED;
+}
+
+void bind_virq_for_mce(void)
+{
+       int ret;
+
+       ret  = bind_virq_to_irqhandler(VIRQ_MCA, 0, 
+               mce_dom0_interrupt, 0, "mce", NULL);
+
+       g_mi = kmalloc(sizeof(struct mc_info), GFP_KERNEL);
+       if (ret < 0)
+               printk(KERN_ERR "MCE_DOM0_LOG: bind_virq for DOM0 failed\n");
+}
+
diff -r 9242c5b965c1 -r 75e5bfa7fbdc include/asm-x86_64/mach-xen/asm/hypercall.h
--- a/include/asm-x86_64/mach-xen/asm/hypercall.h       Tue Jun 16 11:09:39 
2009 +0100
+++ b/include/asm-x86_64/mach-xen/asm/hypercall.h       Tue Jun 16 11:58:55 
2009 +0100
@@ -39,6 +39,7 @@
 
 #include <linux/string.h> /* memcpy() */
 #include <linux/stringify.h>
+#include <xen/interface/arch-x86/xen-mca.h>
 
 #ifndef __HYPERVISOR_H__
 # error "please don't include this file directly"
@@ -215,7 +216,13 @@ HYPERVISOR_platform_op(
        platform_op->interface_version = XENPF_INTERFACE_VERSION;
        return _hypercall1(int, platform_op, platform_op);
 }
-
+static inline int __must_check
+HYPERVISOR_mca(
+       struct xen_mc *mc_op)
+{
+       mc_op->interface_version = XEN_MCA_INTERFACE_VERSION;
+       return _hypercall1(int, mca, mc_op);
+}
 static inline int __must_check
 HYPERVISOR_set_debugreg(
        unsigned int reg, unsigned long value)

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.