[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v2 04/12] x86/mce: handle LMCE locally



LMCE is sent to only one CPU thread, so MCE handler, barriers and
softirq handler should go without waiting for other CPUs, when
handling LMCE. Note LMCE is still broadcast to all vcpus as regular
MCE on Intel CPU right now.

Signed-off-by: Haozhong Zhang <haozhong.zhang@xxxxxxxxx>
---
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>

Changes in v2:
 * Use this_cpu() rather than __get_cpu_var().
 * Drop the per-cpu flag lmce_in_process.
 * Add parameter "bool nowait" to mce_barrier_enter/exit to let callers
   control mce_barrier_enter/exit needs to wait for barrier operations
   on other CPU's.
 * Introduce a new per-cpu flag mce_in_process to indicate whether a
   non-local MCE have not been processed completely. mce_softirq() uses
   this flag to decide whether it needs to synchronize with mce_softirq()
   on other CPU's.
---
 xen/arch/x86/cpu/mcheck/barrier.c  | 14 ++++++------
 xen/arch/x86/cpu/mcheck/barrier.h  | 14 +++++++++---
 xen/arch/x86/cpu/mcheck/mcaction.c |  4 +++-
 xen/arch/x86/cpu/mcheck/mce.c      | 44 +++++++++++++++++++++++++++-----------
 xen/arch/x86/cpu/mcheck/mce.h      |  2 ++
 xen/arch/x86/cpu/mcheck/x86_mca.h  |  4 +++-
 6 files changed, 57 insertions(+), 25 deletions(-)

diff --git a/xen/arch/x86/cpu/mcheck/barrier.c 
b/xen/arch/x86/cpu/mcheck/barrier.c
index 5dce1fb..e6f9ea2 100644
--- a/xen/arch/x86/cpu/mcheck/barrier.c
+++ b/xen/arch/x86/cpu/mcheck/barrier.c
@@ -16,11 +16,11 @@ void mce_barrier_dec(struct mce_softirq_barrier *bar)
     atomic_dec(&bar->val);
 }
 
-void mce_barrier_enter(struct mce_softirq_barrier *bar)
+void mce_barrier_enter(struct mce_softirq_barrier *bar, bool nowait)
 {
     int gen;
 
-    if (!mce_broadcast)
+    if ( !mce_broadcast || nowait )
         return;
     atomic_inc(&bar->ingen);
     gen = atomic_read(&bar->outgen);
@@ -34,11 +34,11 @@ void mce_barrier_enter(struct mce_softirq_barrier *bar)
     }
 }
 
-void mce_barrier_exit(struct mce_softirq_barrier *bar)
+void mce_barrier_exit(struct mce_softirq_barrier *bar, bool nowait)
 {
     int gen;
 
-    if ( !mce_broadcast )
+    if ( !mce_broadcast || nowait )
         return;
     atomic_inc(&bar->outgen);
     gen = atomic_read(&bar->ingen);
@@ -52,8 +52,8 @@ void mce_barrier_exit(struct mce_softirq_barrier *bar)
     }
 }
 
-void mce_barrier(struct mce_softirq_barrier *bar)
+void mce_barrier(struct mce_softirq_barrier *bar, bool nowait)
 {
-    mce_barrier_enter(bar);
-    mce_barrier_exit(bar);
+    mce_barrier_enter(bar, nowait);
+    mce_barrier_exit(bar, nowait);
 }
diff --git a/xen/arch/x86/cpu/mcheck/barrier.h 
b/xen/arch/x86/cpu/mcheck/barrier.h
index 87f7550..934b627 100644
--- a/xen/arch/x86/cpu/mcheck/barrier.h
+++ b/xen/arch/x86/cpu/mcheck/barrier.h
@@ -25,6 +25,14 @@ void mce_barrier_init(struct mce_softirq_barrier *);
 void mce_barrier_dec(struct mce_softirq_barrier *);
 
 /*
+ * If nowait is true, mce_barrier_enter/exit() will return immediately
+ * without touching the barrier. It's used when handling a LMCE which
+ * is received on only one CPU and thus does not invoke
+ * mce_barrier_enter/exit() calls on all CPUs.
+ *
+ * If nowait is false, mce_barrier_enter/exit() will handle the given
+ * barrier as below.
+ *
  * Increment the generation number and the value. The generation number
  * is incremented when entering a barrier. This way, it can be checked
  * on exit if a CPU is trying to re-enter the barrier. This can happen
@@ -36,9 +44,9 @@ void mce_barrier_dec(struct mce_softirq_barrier *);
  * These barrier functions should always be paired, so that the
  * counter value will reach 0 again after all CPUs have exited.
  */
-void mce_barrier_enter(struct mce_softirq_barrier *);
-void mce_barrier_exit(struct mce_softirq_barrier *);
+void mce_barrier_enter(struct mce_softirq_barrier *, bool nowait);
+void mce_barrier_exit(struct mce_softirq_barrier *, bool nowait);
 
-void mce_barrier(struct mce_softirq_barrier *);
+void mce_barrier(struct mce_softirq_barrier *, bool nowait);
 
 #endif /* _MCHECK_BARRIER_H */
diff --git a/xen/arch/x86/cpu/mcheck/mcaction.c 
b/xen/arch/x86/cpu/mcheck/mcaction.c
index dab9eac..ca17d22 100644
--- a/xen/arch/x86/cpu/mcheck/mcaction.c
+++ b/xen/arch/x86/cpu/mcheck/mcaction.c
@@ -96,7 +96,9 @@ mc_memerr_dhandler(struct mca_binfo *binfo,
 
                 bank->mc_addr = gfn << PAGE_SHIFT |
                   (bank->mc_addr & (PAGE_SIZE -1 ));
-                if (fill_vmsr_data(bank, d, global->mc_gstatus,
+                /* TODO: support injecting LMCE */
+                if (fill_vmsr_data(bank, d,
+                                   global->mc_gstatus & ~MCG_STATUS_LMCE,
                                    vmce_vcpuid == VMCE_INJECT_BROADCAST))
                 {
                     mce_printk(MCE_QUIET, "Fill vMCE# data for DOM%d "
diff --git a/xen/arch/x86/cpu/mcheck/mce.c b/xen/arch/x86/cpu/mcheck/mce.c
index 52b5e29..20ab678 100644
--- a/xen/arch/x86/cpu/mcheck/mce.c
+++ b/xen/arch/x86/cpu/mcheck/mce.c
@@ -42,6 +42,13 @@ DEFINE_PER_CPU_READ_MOSTLY(struct mca_banks *, 
poll_bankmask);
 DEFINE_PER_CPU_READ_MOSTLY(struct mca_banks *, no_cmci_banks);
 DEFINE_PER_CPU_READ_MOSTLY(struct mca_banks *, mce_clear_banks);
 
+/*
+ * Flag to indicate that at least one non-local MCE's on this CPU have
+ * not been completed handled. It's set by mcheck_cmn_handler() and
+ * cleared by mce_softirq().
+ */
+DEFINE_PER_CPU(bool, mce_in_process);
+
 static void intpose_init(void);
 static void mcinfo_clear(struct mc_info *);
 struct mca_banks *mca_allbanks;
@@ -396,6 +403,7 @@ mcheck_mca_logout(enum mca_source who, struct mca_banks 
*bankmask,
         sp->errcnt = errcnt;
         sp->ripv = (gstatus & MCG_STATUS_RIPV) != 0;
         sp->eipv = (gstatus & MCG_STATUS_EIPV) != 0;
+        sp->lmce = (gstatus & MCG_STATUS_LMCE) != 0;
         sp->uc = uc;
         sp->pcc = pcc;
         sp->recoverable = recover;
@@ -459,6 +467,7 @@ void mcheck_cmn_handler(const struct cpu_user_regs *regs)
     uint64_t gstatus;
     mctelem_cookie_t mctc = NULL;
     struct mca_summary bs;
+    bool lmce;
 
     mce_spin_lock(&mce_logout_lock);
 
@@ -502,15 +511,19 @@ void mcheck_cmn_handler(const struct cpu_user_regs *regs)
     }
     mce_spin_unlock(&mce_logout_lock);
 
-    mce_barrier_enter(&mce_trap_bar);
+    lmce = bs.lmce;
+    if ( !lmce )
+        this_cpu(mce_in_process) = true;
+
+    mce_barrier_enter(&mce_trap_bar, lmce);
     if ( mctc != NULL && mce_urgent_action(regs, mctc))
         cpumask_set_cpu(smp_processor_id(), &mce_fatal_cpus);
-    mce_barrier_exit(&mce_trap_bar);
+    mce_barrier_exit(&mce_trap_bar, lmce);
 
     /*
      * Wait until everybody has processed the trap.
      */
-    mce_barrier_enter(&mce_trap_bar);
+    mce_barrier_enter(&mce_trap_bar, lmce);
     if (atomic_read(&severity_cpu) == smp_processor_id())
     {
         /* According to SDM, if no error bank found on any cpus,
@@ -528,16 +541,16 @@ void mcheck_cmn_handler(const struct cpu_user_regs *regs)
         }
         atomic_set(&found_error, 0);
     }
-    mce_barrier_exit(&mce_trap_bar);
+    mce_barrier_exit(&mce_trap_bar, lmce);
 
     /* Clear flags after above fatal check */
-    mce_barrier_enter(&mce_trap_bar);
+    mce_barrier_enter(&mce_trap_bar, lmce);
     gstatus = mca_rdmsr(MSR_IA32_MCG_STATUS);
     if ((gstatus & MCG_STATUS_MCIP) != 0) {
         mce_printk(MCE_CRITICAL, "MCE: Clear MCIP@ last step");
         mca_wrmsr(MSR_IA32_MCG_STATUS, 0);
     }
-    mce_barrier_exit(&mce_trap_bar);
+    mce_barrier_exit(&mce_trap_bar, lmce);
 
     raise_softirq(MACHINE_CHECK_SOFTIRQ);
 }
@@ -1704,10 +1717,11 @@ static void mce_softirq(void)
 {
     int cpu = smp_processor_id();
     unsigned int workcpu;
+    bool nowait = !this_cpu(mce_in_process);
 
     mce_printk(MCE_VERBOSE, "CPU%d enter softirq\n", cpu);
 
-    mce_barrier_enter(&mce_inside_bar);
+    mce_barrier_enter(&mce_inside_bar, nowait);
 
     /*
      * Everybody is here. Now let's see who gets to do the
@@ -1720,10 +1734,10 @@ static void mce_softirq(void)
 
     atomic_set(&severity_cpu, cpu);
 
-    mce_barrier_enter(&mce_severity_bar);
+    mce_barrier_enter(&mce_severity_bar, nowait);
     if (!mctelem_has_deferred(cpu))
         atomic_set(&severity_cpu, cpu);
-    mce_barrier_exit(&mce_severity_bar);
+    mce_barrier_exit(&mce_severity_bar, nowait);
 
     /* We choose severity_cpu for further processing */
     if (atomic_read(&severity_cpu) == cpu) {
@@ -1733,9 +1747,11 @@ static void mce_softirq(void)
         /* Step1: Fill DOM0 LOG buffer, vMCE injection buffer and
          * vMCE MSRs virtualization buffer
          */
-        for_each_online_cpu(workcpu) {
-            mctelem_process_deferred(workcpu, mce_delayed_action);
-        }
+        if (nowait)
+            mctelem_process_deferred(cpu, mce_delayed_action);
+        else
+            for_each_online_cpu(workcpu)
+                mctelem_process_deferred(workcpu, mce_delayed_action);
 
         /* Step2: Send Log to DOM0 through vIRQ */
         if (dom0_vmce_enabled()) {
@@ -1744,7 +1760,9 @@ static void mce_softirq(void)
         }
     }
 
-    mce_barrier_exit(&mce_inside_bar);
+    mce_barrier_exit(&mce_inside_bar, nowait);
+
+    this_cpu(mce_in_process) = false;
 }
 
 /* Machine Check owner judge algorithm:
diff --git a/xen/arch/x86/cpu/mcheck/mce.h b/xen/arch/x86/cpu/mcheck/mce.h
index 32f85c7..9347eb9 100644
--- a/xen/arch/x86/cpu/mcheck/mce.h
+++ b/xen/arch/x86/cpu/mcheck/mce.h
@@ -109,12 +109,14 @@ struct mca_summary {
     int         eipv;   /* meaningful on #MC */
     bool        uc;     /* UC flag */
     bool        pcc;    /* PCC flag */
+    bool        lmce;   /* LMCE flag (Intel only) */
     bool        recoverable; /* software error recoverable flag */
 };
 
 DECLARE_PER_CPU(struct mca_banks *, poll_bankmask);
 DECLARE_PER_CPU(struct mca_banks *, no_cmci_banks);
 DECLARE_PER_CPU(struct mca_banks *, mce_clear_banks);
+DECLARE_PER_CPU(bool, mce_in_process);
 
 extern bool cmci_support;
 extern bool is_mc_panic;
diff --git a/xen/arch/x86/cpu/mcheck/x86_mca.h 
b/xen/arch/x86/cpu/mcheck/x86_mca.h
index 34d1921..de03f82 100644
--- a/xen/arch/x86/cpu/mcheck/x86_mca.h
+++ b/xen/arch/x86/cpu/mcheck/x86_mca.h
@@ -42,7 +42,9 @@
 #define MCG_STATUS_RIPV         0x0000000000000001ULL
 #define MCG_STATUS_EIPV         0x0000000000000002ULL
 #define MCG_STATUS_MCIP         0x0000000000000004ULL
-/* Bits 3-63 are reserved */
+#define MCG_STATUS_LMCE         0x0000000000000008ULL  /* Intel specific */
+/* Bits 3-63 are reserved on CPU not supporting LMCE */
+/* Bits 4-63 are reserved on CPU supporting LMCE */
 
 /* Bitfield of MSR_K8_MCi_STATUS registers */
 /* MCA error code */
-- 
2.10.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.