[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] [POWERPC][XEN] Machine Check Processing



# HG changeset patch
# User Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
# Node ID 22e01a4864b0b750922177067860fdcd21e86318
# Parent  a817acb393863d9158c42285d46497925c28ef81
[POWERPC][XEN] Machine Check Processing

This patch allows allows for a processor to report on the cause of the
machine check and possible even recover from it.

Signed-off-by: Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
Signed-off-by: Hollis Blanchard <hollisb@xxxxxxxxxx>
---
 xen/arch/powerpc/exceptions.c                  |   10 +++
 xen/arch/powerpc/powerpc64/ppc970.c            |   74 ++++++++++++++++++++++++-
 xen/include/asm-powerpc/powerpc64/ppc970-hid.h |   17 +++++
 xen/include/asm-powerpc/processor.h            |    1 
 4 files changed, 100 insertions(+), 2 deletions(-)

diff -r a817acb39386 -r 22e01a4864b0 xen/arch/powerpc/exceptions.c
--- a/xen/arch/powerpc/exceptions.c     Thu Sep 07 01:30:12 2006 -0400
+++ b/xen/arch/powerpc/exceptions.c     Thu Sep 07 01:48:42 2006 -0400
@@ -25,6 +25,7 @@
 #include <xen/serial.h>
 #include <xen/gdbstub.h>
 #include <asm/time.h>
+#include <asm/processor.h>
 
 #undef DEBUG
 
@@ -54,6 +55,8 @@ void do_dec(struct cpu_user_regs *regs)
 
 void program_exception(struct cpu_user_regs *regs, unsigned long cookie)
 {
+    int recover = 0;
+
 #ifdef CRASH_DEBUG
     __trap_to_gdb(regs, cookie);
 #else /* CRASH_DEBUG */
@@ -62,6 +65,11 @@ void program_exception(struct cpu_user_r
     printk("hid4 0x%016lx\n", regs->hid4);
     printk("---[ backtrace ]---\n");
     show_backtrace(regs->gprs[1], regs->lr, regs->pc);
-    panic("%s: 0x%lx\n", __func__, cookie);
+
+    if (cookie == 0x200)
+        recover = cpu_machinecheck(regs);
+
+    if (!recover)
+        panic("%s: 0x%lx\n", __func__, cookie);
 #endif /* CRASH_DEBUG */
 }
diff -r a817acb39386 -r 22e01a4864b0 xen/arch/powerpc/powerpc64/ppc970.c
--- a/xen/arch/powerpc/powerpc64/ppc970.c       Thu Sep 07 01:30:12 2006 -0400
+++ b/xen/arch/powerpc/powerpc64/ppc970.c       Thu Sep 07 01:48:42 2006 -0400
@@ -17,6 +17,7 @@
  *
  * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
  *          Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
+ *          Amos Waterland  <apw@xxxxxxxxxx>
  */
 
 #include <xen/config.h>
@@ -30,6 +31,7 @@
 #include <asm/powerpc64/processor.h>
 #include <asm/powerpc64/ppc970-hid.h>
 
+#undef DEBUG
 #undef SERIALIZE
 
 struct rma_settings {
@@ -190,7 +192,7 @@ void cpu_initialize(int cpuid)
     hid5.bits.DCBZ32_ill = 0; /* make dzbz 32byte illeagal */
     mthid5(hid5.word);
 
-#ifdef DUMP_HIDS
+#ifdef DEBUG
     printk("hid0 0x%016lx\n"
            "hid1 0x%016lx\n"
            "hid4 0x%016lx\n"
@@ -238,3 +240,73 @@ void load_cpu_sprs(struct vcpu *v)
 {
     mthid4(v->arch.cpu.hid4.word);
 }
+
+int cpu_machinecheck(struct cpu_user_regs *regs)
+{
+    int recover = 0;
+    u32 dsisr = mfdsisr();
+
+    if (regs->msr & MCK_SRR1_RI)
+        recover = 1;
+
+    printk("MACHINE CHECK: %s Recoverable\n", recover ? "IS": "NOT");
+    printk("SRR1: 0x%016lx\n", regs->msr);
+    if (regs->msr & MCK_SRR1_INSN_FETCH_UNIT)
+        printk("42: Exception caused by Instruction Fetch Unit (IFU) "
+               "detection of a hardware uncorrectable error (UE).\n");
+
+    if (regs->msr & MCK_SRR1_LOAD_STORE)
+        printk("43: Exception caused by load/store detection of error "
+               "(see DSISR)\n");
+
+    switch (regs->msr & MCK_SRR1_CAUSE_MASK) {
+    case MCK_SRR1_CAUSE_SLB_PAR:
+        printk("0b01: Exception caused by an SLB parity error detected "
+               "while translating an instruction fetch address.\n");
+        break;
+    case MCK_SRR1_CAUSE_TLB_PAR:
+        printk("0b10: Exception caused by a TLB parity error detected "
+               "while translating an instruction fetch address.\n");
+        break;
+    case MCK_SRR1_CAUSE_UE:
+        printk("0b11: Exception caused by a hardware uncorrectable "
+               "error (UE) detected while doing a reload of an "
+               "instruction-fetch TLB tablewalk.\n");
+        break;
+    default:
+        break;
+    }
+
+    printk("\nDSIDR: 0x%08x\n", dsisr);
+    if (dsisr & MCK_DSISR_UE)
+        printk("16: Exception caused by a UE deferred error "
+               "(DAR is undefined).\n");
+    
+    if (dsisr & MCK_DSISR_UE_TABLE_WALK)
+        printk("17: Exception caused by a UE deferred error "
+               "during a tablewalk (D-side).\n"); 
+
+    if (dsisr & MCK_DSISR_L1_DCACHE_PAR)
+        printk("18: Exception was caused by a software recoverable "
+               "parity error in the L1 D-cache.\n");
+
+    if (dsisr & MCK_DSISR_L1_DCACHE_TAG_PAR)
+        printk("19: Exception was caused by a software recoverable "
+               "parity error in the L1 D-cache tag.\n");
+
+    if (dsisr & MCK_DSISR_D_ERAT_PAR)
+        printk("20: Exception was caused by a software recoverable parity "
+               "error in the D-ERAT.\n");
+        
+    if (dsisr & MCK_DSISR_TLB_PAR)
+        printk("21: Exception was caused by a software recoverable parity "
+               "error in the TLB.\n");
+
+    if (dsisr & MCK_DSISR_SLB_PAR)
+        printk("23: Exception was caused by an SLB parity error (may not be "
+               "recoverable). This condition could occur if the "
+               "effective segment ID (ESID) fields of two or more SLB "
+               "entries contain the same value.");
+
+    return 0; /* for now lets not recover; */
+}
diff -r a817acb39386 -r 22e01a4864b0 
xen/include/asm-powerpc/powerpc64/ppc970-hid.h
--- a/xen/include/asm-powerpc/powerpc64/ppc970-hid.h    Thu Sep 07 01:30:12 
2006 -0400
+++ b/xen/include/asm-powerpc/powerpc64/ppc970-hid.h    Thu Sep 07 01:48:42 
2006 -0400
@@ -141,4 +141,21 @@ union hid5 {
     ulong word;
 };
 
+#define MCK_SRR1_INSN_FETCH_UNIT    0x0000000000200000 /* 42 */
+#define MCK_SRR1_LOAD_STORE         0x0000000000100000 /* 43 */
+#define MCK_SRR1_CAUSE_MASK         0x00000000000c0000 /* 44:45 */
+#define MCK_SRR1_CAUSE_NONE         0x0000000000000000 /* 0b00 */
+#define MCK_SRR1_CAUSE_SLB_PAR      0x0000000000040000 /* 0b01 */
+#define MCK_SRR1_CAUSE_TLB_PAR      0x0000000000080000 /* 0b10 */
+#define MCK_SRR1_CAUSE_UE           0x00000000000c0000 /* 0b11 */
+#define MCK_SRR1_RI                 MSR_RI
+
+#define MCK_DSISR_UE                0x00008000 /* 16 */
+#define MCK_DSISR_UE_TABLE_WALK     0x00004000 /* 17 */
+#define MCK_DSISR_L1_DCACHE_PAR     0x00002000 /* 18 */
+#define MCK_DSISR_L1_DCACHE_TAG_PAR 0x00001000 /* 19 */
+#define MCK_DSISR_D_ERAT_PAR        0x00000800 /* 20 */
+#define MCK_DSISR_TLB_PAR           0x00000400 /* 21 */
+#define MCK_DSISR_SLB_PAR           0x00000100 /* 23 */
+
 #endif
diff -r a817acb39386 -r 22e01a4864b0 xen/include/asm-powerpc/processor.h
--- a/xen/include/asm-powerpc/processor.h       Thu Sep 07 01:30:12 2006 -0400
+++ b/xen/include/asm-powerpc/processor.h       Thu Sep 07 01:48:42 2006 -0400
@@ -37,6 +37,7 @@ struct domain;
 struct domain;
 struct vcpu;
 struct cpu_user_regs;
+extern int cpu_machinecheck(struct cpu_user_regs *);
 extern void show_registers(struct cpu_user_regs *);
 extern void show_execution_state(struct cpu_user_regs *);
 extern void show_backtrace(ulong sp, ulong lr, ulong pc);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.