[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen staging] x86/mcheck: allow varying bank counts per CPU



commit 560cf418c8455cd8d79ad353f6f9193a2e2554e4
Author:     Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Tue Jun 25 17:32:37 2019 +0200
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Tue Jun 25 17:32:37 2019 +0200

    x86/mcheck: allow varying bank counts per CPU
    
    Up to now we've been assuming that all CPUs would have the same number
    of reporting banks. However, on upcoming AMD CPUs this isn't the case,
    and one can observe
    
    (XEN) mce.c:666: Different bank number on cpu <N>
    
    indicating that Machine Check support would not be enabled on the
    affected CPUs. Convert the count variable to a per-CPU one, and adjust
    code where needed to cope with the values not being the same. In
    particular the mcabanks_alloc() invocations during AP bringup need to
    now allocate maximum-size bitmaps, because the truly needed size can't
    be known until we actually execute on that CPU, yet mcheck_init() gets
    called too early to do any allocations itself.
    
    Take the liberty and also
    - make mca_cap_init() static,
    - replace several __get_cpu_var() uses when a local variable suitable
      for use with per_cpu() appears,
    - correct which CPU's cpu_data[] entry x86_mc_msrinject_verify() uses,
    - replace a BUG() by panic().
    
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
    Reviewed-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
 xen/arch/x86/cpu/mcheck/mce.c       | 110 ++++++++++++++++++++++--------------
 xen/arch/x86/cpu/mcheck/mce_amd.c   |   2 +-
 xen/arch/x86/cpu/mcheck/mce_intel.c |  42 ++++++++------
 xen/arch/x86/cpu/mcheck/x86_mca.h   |   2 +-
 xen/arch/x86/hvm/svm/svm.c          |   2 +-
 xen/include/asm-x86/mce.h           |   2 +-
 6 files changed, 98 insertions(+), 62 deletions(-)

diff --git a/xen/arch/x86/cpu/mcheck/mce.c b/xen/arch/x86/cpu/mcheck/mce.c
index 2d700036e9..c48070119f 100644
--- a/xen/arch/x86/cpu/mcheck/mce.c
+++ b/xen/arch/x86/cpu/mcheck/mce.c
@@ -34,7 +34,7 @@ bool __read_mostly opt_mce = true;
 boolean_param("mce", opt_mce);
 bool __read_mostly mce_broadcast;
 bool is_mc_panic;
-unsigned int __read_mostly nr_mce_banks;
+DEFINE_PER_CPU_READ_MOSTLY(unsigned int, nr_mce_banks);
 unsigned int __read_mostly firstbank;
 uint8_t __read_mostly cmci_apic_vector;
 
@@ -120,7 +120,7 @@ void mce_recoverable_register(mce_recoverable_t cbfunc)
     mc_recoverable_scan = cbfunc;
 }
 
-struct mca_banks *mcabanks_alloc(void)
+struct mca_banks *mcabanks_alloc(unsigned int nr_mce_banks)
 {
     struct mca_banks *mb;
 
@@ -128,6 +128,13 @@ struct mca_banks *mcabanks_alloc(void)
     if ( !mb )
         return NULL;
 
+    /*
+     * For APs allocations get done by the BSP, i.e. when the bank count may
+     * may not be known yet. A zero bank count is a clear indication of this.
+     */
+    if ( !nr_mce_banks )
+        nr_mce_banks = MCG_CAP_COUNT;
+
     mb->bank_map = xzalloc_array(unsigned long,
                                  BITS_TO_LONGS(nr_mce_banks));
     if ( !mb->bank_map )
@@ -319,7 +326,7 @@ mcheck_mca_logout(enum mca_source who, struct mca_banks 
*bankmask,
      */
     recover = mc_recoverable_scan ? 1 : 0;
 
-    for ( i = 0; i < nr_mce_banks; i++ )
+    for ( i = 0; i < this_cpu(nr_mce_banks); i++ )
     {
         /* Skip bank if corresponding bit in bankmask is clear */
         if ( !mcabanks_test(i, bankmask) )
@@ -565,7 +572,7 @@ void mcheck_mca_clearbanks(struct mca_banks *bankmask)
 {
     int i;
 
-    for ( i = 0; i < nr_mce_banks; i++ )
+    for ( i = 0; i < this_cpu(nr_mce_banks); i++ )
     {
         if ( !mcabanks_test(i, bankmask) )
             continue;
@@ -638,54 +645,56 @@ static void set_poll_bankmask(struct cpuinfo_x86 *c)
 
     if ( cmci_support && opt_mce )
     {
-        mb->num = per_cpu(no_cmci_banks, cpu)->num;
-        bitmap_copy(mb->bank_map, per_cpu(no_cmci_banks, cpu)->bank_map,
-                    nr_mce_banks);
+        const struct mca_banks *cmci = per_cpu(no_cmci_banks, cpu);
+
+        if ( unlikely(cmci->num < mb->num) )
+            bitmap_fill(mb->bank_map, mb->num);
+        bitmap_copy(mb->bank_map, cmci->bank_map, min(mb->num, cmci->num));
     }
     else
     {
-        bitmap_copy(mb->bank_map, mca_allbanks->bank_map, nr_mce_banks);
+        bitmap_copy(mb->bank_map, mca_allbanks->bank_map,
+                    per_cpu(nr_mce_banks, cpu));
         if ( mce_firstbank(c) )
             mcabanks_clear(0, mb);
     }
 }
 
 /* The perbank ctl/status init is platform specific because of AMD's quirk */
-int mca_cap_init(void)
+static int mca_cap_init(void)
 {
     uint64_t msr_content;
+    unsigned int nr, cpu = smp_processor_id();
 
     rdmsrl(MSR_IA32_MCG_CAP, msr_content);
 
     if ( msr_content & MCG_CTL_P ) /* Control register present ? */
         wrmsrl(MSR_IA32_MCG_CTL, 0xffffffffffffffffULL);
 
-    if ( nr_mce_banks && (msr_content & MCG_CAP_COUNT) != nr_mce_banks )
-    {
-        dprintk(XENLOG_WARNING, "Different bank number on cpu %x\n",
-                smp_processor_id());
-        return -ENODEV;
-    }
-    nr_mce_banks = msr_content & MCG_CAP_COUNT;
+    per_cpu(nr_mce_banks, cpu) = nr = MASK_EXTR(msr_content, MCG_CAP_COUNT);
 
-    if ( !nr_mce_banks )
+    if ( !nr )
     {
-        printk(XENLOG_INFO "CPU%u: No MCE banks present. "
-               "Machine check support disabled\n", smp_processor_id());
+        printk(XENLOG_INFO
+               "CPU%u: No MCE banks present. Machine check support disabled\n",
+               cpu);
         return -ENODEV;
     }
 
     /* mcabanks_alloc depends on nr_mce_banks */
-    if ( !mca_allbanks )
+    if ( !mca_allbanks || nr > mca_allbanks->num )
     {
-        int i;
+        unsigned int i;
+        struct mca_banks *all = mcabanks_alloc(nr);
 
-        mca_allbanks = mcabanks_alloc();
-        for ( i = 0; i < nr_mce_banks; i++ )
+        if ( !all )
+            return -ENOMEM;
+        for ( i = 0; i < nr; i++ )
             mcabanks_set(i, mca_allbanks);
+        mcabanks_free(xchg(&mca_allbanks, all));
     }
 
-    return mca_allbanks ? 0 : -ENOMEM;
+    return 0;
 }
 
 static void cpu_bank_free(unsigned int cpu)
@@ -702,8 +711,9 @@ static void cpu_bank_free(unsigned int cpu)
 
 static int cpu_bank_alloc(unsigned int cpu)
 {
-    struct mca_banks *poll = per_cpu(poll_bankmask, cpu) ?: mcabanks_alloc();
-    struct mca_banks *clr = per_cpu(mce_clear_banks, cpu) ?: mcabanks_alloc();
+    unsigned int nr = per_cpu(nr_mce_banks, cpu);
+    struct mca_banks *poll = per_cpu(poll_bankmask, cpu) ?: mcabanks_alloc(nr);
+    struct mca_banks *clr = per_cpu(mce_clear_banks, cpu) ?: 
mcabanks_alloc(nr);
 
     if ( !poll || !clr )
     {
@@ -752,6 +762,7 @@ static struct notifier_block cpu_nfb = {
 void mcheck_init(struct cpuinfo_x86 *c, bool bsp)
 {
     enum mcheck_type inited = mcheck_none;
+    unsigned int cpu = smp_processor_id();
 
     if ( !opt_mce )
     {
@@ -762,8 +773,7 @@ void mcheck_init(struct cpuinfo_x86 *c, bool bsp)
 
     if ( !mce_available(c) )
     {
-        printk(XENLOG_INFO "CPU%i: No machine check support available\n",
-               smp_processor_id());
+        printk(XENLOG_INFO "CPU%i: No machine check support available\n", cpu);
         return;
     }
 
@@ -771,9 +781,13 @@ void mcheck_init(struct cpuinfo_x86 *c, bool bsp)
     if ( mca_cap_init() )
         return;
 
-    /* Early MCE initialisation for BSP. */
-    if ( bsp && cpu_bank_alloc(smp_processor_id()) )
-        BUG();
+    if ( !bsp )
+    {
+        per_cpu(poll_bankmask, cpu)->num = per_cpu(nr_mce_banks, cpu);
+        per_cpu(mce_clear_banks, cpu)->num = per_cpu(nr_mce_banks, cpu);
+    }
+    else if ( cpu_bank_alloc(cpu) )
+        panic("Insufficient memory for MCE bank allocations\n");
 
     switch ( c->x86_vendor )
     {
@@ -1111,24 +1125,22 @@ bool intpose_inval(unsigned int cpu_nr, uint64_t msr)
     return true;
 }
 
-#define IS_MCA_BANKREG(r) \
+#define IS_MCA_BANKREG(r, cpu) \
     ((r) >= MSR_IA32_MC0_CTL && \
-    (r) <= MSR_IA32_MCx_MISC(nr_mce_banks - 1) && \
-    ((r) - MSR_IA32_MC0_CTL) % 4 != 0) /* excludes MCi_CTL */
+     (r) <= MSR_IA32_MCx_MISC(per_cpu(nr_mce_banks, cpu) - 1) && \
+     ((r) - MSR_IA32_MC0_CTL) % 4) /* excludes MCi_CTL */
 
 static bool x86_mc_msrinject_verify(struct xen_mc_msrinject *mci)
 {
-    struct cpuinfo_x86 *c;
+    const struct cpuinfo_x86 *c = &cpu_data[mci->mcinj_cpunr];
     int i, errs = 0;
 
-    c = &cpu_data[smp_processor_id()];
-
     for ( i = 0; i < mci->mcinj_count; i++ )
     {
         uint64_t reg = mci->mcinj_msr[i].reg;
         const char *reason = NULL;
 
-        if ( IS_MCA_BANKREG(reg) )
+        if ( IS_MCA_BANKREG(reg, mci->mcinj_cpunr) )
         {
             if ( c->x86_vendor == X86_VENDOR_AMD )
             {
@@ -1448,7 +1460,7 @@ long do_mca(XEN_GUEST_HANDLE_PARAM(xen_mc_t) u_xen_mc)
         break;
 
     case XEN_MC_msrinject:
-        if ( nr_mce_banks == 0 )
+        if ( !mca_allbanks || !mca_allbanks->num )
             return x86_mcerr("do_mca inject", -ENODEV);
 
         mc_msrinject = &op->u.mc_msrinject;
@@ -1461,6 +1473,9 @@ long do_mca(XEN_GUEST_HANDLE_PARAM(xen_mc_t) u_xen_mc)
             return x86_mcerr("do_mca inject: target offline",
                              -EINVAL);
 
+        if ( !per_cpu(nr_mce_banks, target) )
+            return x86_mcerr("do_mca inject: no banks", -ENOENT);
+
         if ( mc_msrinject->mcinj_count == 0 )
             return 0;
 
@@ -1521,7 +1536,7 @@ long do_mca(XEN_GUEST_HANDLE_PARAM(xen_mc_t) u_xen_mc)
         break;
 
     case XEN_MC_mceinject:
-        if ( nr_mce_banks == 0 )
+        if ( !mca_allbanks || !mca_allbanks->num )
             return x86_mcerr("do_mca #MC", -ENODEV);
 
         mc_mceinject = &op->u.mc_mceinject;
@@ -1533,6 +1548,9 @@ long do_mca(XEN_GUEST_HANDLE_PARAM(xen_mc_t) u_xen_mc)
         if ( !cpu_online(target) )
             return x86_mcerr("do_mca #MC: target offline", -EINVAL);
 
+        if ( !per_cpu(nr_mce_banks, target) )
+            return x86_mcerr("do_mca #MC: no banks", -ENOENT);
+
         add_taint(TAINT_ERROR_INJECT);
 
         if ( mce_broadcast )
@@ -1548,7 +1566,7 @@ long do_mca(XEN_GUEST_HANDLE_PARAM(xen_mc_t) u_xen_mc)
         cpumask_var_t cmv;
         bool broadcast = op->u.mc_inject_v2.flags & 
XEN_MC_INJECT_CPU_BROADCAST;
 
-        if ( nr_mce_banks == 0 )
+        if ( !mca_allbanks || !mca_allbanks->num )
             return x86_mcerr("do_mca #MC", -ENODEV);
 
         if ( broadcast )
@@ -1570,6 +1588,16 @@ long do_mca(XEN_GUEST_HANDLE_PARAM(xen_mc_t) u_xen_mc)
                         "Not all required CPUs are online\n");
         }
 
+        for_each_cpu(target, cpumap)
+            if ( cpu_online(target) && !per_cpu(nr_mce_banks, target) )
+            {
+                ret = x86_mcerr("do_mca #MC: CPU%u has no banks",
+                                -ENOENT, target);
+                break;
+            }
+        if ( ret )
+            break;
+
         switch ( op->u.mc_inject_v2.flags & XEN_MC_INJECT_TYPE_MASK )
         {
         case XEN_MC_INJECT_TYPE_MCE:
diff --git a/xen/arch/x86/cpu/mcheck/mce_amd.c 
b/xen/arch/x86/cpu/mcheck/mce_amd.c
index 9b2852cc7e..94a5ba4561 100644
--- a/xen/arch/x86/cpu/mcheck/mce_amd.c
+++ b/xen/arch/x86/cpu/mcheck/mce_amd.c
@@ -297,7 +297,7 @@ amd_mcheck_init(struct cpuinfo_x86 *ci)
     x86_mce_vector_register(mcheck_cmn_handler);
     mce_need_clearbank_register(amd_need_clearbank_scan);
 
-    for ( i = 0; i < nr_mce_banks; i++ )
+    for ( i = 0; i < this_cpu(nr_mce_banks); i++ )
     {
         if ( quirkflag == MCEQUIRK_K8_GART && i == 4 )
             mcequirk_amd_apply(quirkflag);
diff --git a/xen/arch/x86/cpu/mcheck/mce_intel.c 
b/xen/arch/x86/cpu/mcheck/mce_intel.c
index 4474a34e34..5367ea90d7 100644
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c
@@ -535,16 +535,16 @@ out:
 static void cmci_discover(void)
 {
     unsigned long flags;
-    int i;
+    unsigned int i, cpu = smp_processor_id();
     mctelem_cookie_t mctc;
     struct mca_summary bs;
 
-    mce_printk(MCE_VERBOSE, "CMCI: find owner on CPU%d\n", smp_processor_id());
+    mce_printk(MCE_VERBOSE, "CMCI: find owner on CPU%u\n", cpu);
 
     spin_lock_irqsave(&cmci_discover_lock, flags);
 
-    for ( i = 0; i < nr_mce_banks; i++ )
-        if ( !mcabanks_test(i, __get_cpu_var(mce_banks_owned)) )
+    for ( i = 0; i < per_cpu(nr_mce_banks, cpu); i++ )
+        if ( !mcabanks_test(i, per_cpu(mce_banks_owned, cpu)) )
             do_cmci_discover(i);
 
     spin_unlock_irqrestore(&cmci_discover_lock, flags);
@@ -557,7 +557,7 @@ static void cmci_discover(void)
      */
 
     mctc = mcheck_mca_logout(
-        MCA_CMCI_HANDLER, __get_cpu_var(mce_banks_owned), &bs, NULL);
+        MCA_CMCI_HANDLER, per_cpu(mce_banks_owned, cpu), &bs, NULL);
 
     if ( bs.errcnt && mctc != NULL )
     {
@@ -576,9 +576,9 @@ static void cmci_discover(void)
         mctelem_dismiss(mctc);
 
     mce_printk(MCE_VERBOSE, "CMCI: CPU%d owner_map[%lx], no_cmci_map[%lx]\n",
-               smp_processor_id(),
-               *((unsigned long *)__get_cpu_var(mce_banks_owned)->bank_map),
-               *((unsigned long *)__get_cpu_var(no_cmci_banks)->bank_map));
+               cpu,
+               per_cpu(mce_banks_owned, cpu)->bank_map[0],
+               per_cpu(no_cmci_banks, cpu)->bank_map[0]);
 }
 
 /*
@@ -613,24 +613,24 @@ static void cpu_mcheck_distribute_cmci(void)
 
 static void clear_cmci(void)
 {
-    int i;
+    unsigned int i, cpu = smp_processor_id();
 
     if ( !cmci_support || !opt_mce )
         return;
 
-    mce_printk(MCE_VERBOSE, "CMCI: clear_cmci support on CPU%d\n",
-               smp_processor_id());
+    mce_printk(MCE_VERBOSE, "CMCI: clear_cmci support on CPU%u\n", cpu);
 
-    for ( i = 0; i < nr_mce_banks; i++ )
+    for ( i = 0; i < per_cpu(nr_mce_banks, cpu); i++ )
     {
         unsigned msr = MSR_IA32_MCx_CTL2(i);
         u64 val;
-        if ( !mcabanks_test(i, __get_cpu_var(mce_banks_owned)) )
+
+        if ( !mcabanks_test(i, per_cpu(mce_banks_owned, cpu)) )
             continue;
         rdmsrl(msr, val);
         if ( val & (CMCI_EN|CMCI_THRESHOLD_MASK) )
             wrmsrl(msr, val & ~(CMCI_EN|CMCI_THRESHOLD_MASK));
-        mcabanks_clear(i, __get_cpu_var(mce_banks_owned));
+        mcabanks_clear(i, per_cpu(mce_banks_owned, cpu));
     }
 }
 
@@ -826,7 +826,7 @@ static void intel_init_mce(void)
     intel_mce_post_reset();
 
     /* clear all banks */
-    for ( i = firstbank; i < nr_mce_banks; i++ )
+    for ( i = firstbank; i < this_cpu(nr_mce_banks); i++ )
     {
         /*
          * Some banks are shared across cores, use MCi_CTRL to judge whether
@@ -866,8 +866,9 @@ static void cpu_mcabank_free(unsigned int cpu)
 
 static int cpu_mcabank_alloc(unsigned int cpu)
 {
-    struct mca_banks *cmci = mcabanks_alloc();
-    struct mca_banks *owned = mcabanks_alloc();
+    unsigned int nr = per_cpu(nr_mce_banks, cpu);
+    struct mca_banks *cmci = mcabanks_alloc(nr);
+    struct mca_banks *owned = mcabanks_alloc(nr);
 
     if ( !cmci || !owned )
         goto out;
@@ -924,6 +925,13 @@ enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c, 
bool bsp)
         register_cpu_notifier(&cpu_nfb);
         mcheck_intel_therm_init();
     }
+    else
+    {
+        unsigned int cpu = smp_processor_id();
+
+        per_cpu(no_cmci_banks, cpu)->num = per_cpu(nr_mce_banks, cpu);
+        per_cpu(mce_banks_owned, cpu)->num = per_cpu(nr_mce_banks, cpu);
+    }
 
     intel_init_mca(c);
 
diff --git a/xen/arch/x86/cpu/mcheck/x86_mca.h 
b/xen/arch/x86/cpu/mcheck/x86_mca.h
index 0f87bcf63e..8f7fced0fe 100644
--- a/xen/arch/x86/cpu/mcheck/x86_mca.h
+++ b/xen/arch/x86/cpu/mcheck/x86_mca.h
@@ -125,7 +125,7 @@ static inline int mcabanks_test(int bit, struct mca_banks* 
banks)
     return test_bit(bit, banks->bank_map);
 }
 
-struct mca_banks *mcabanks_alloc(void);
+struct mca_banks *mcabanks_alloc(unsigned int nr);
 void mcabanks_free(struct mca_banks *banks);
 extern struct mca_banks *mca_allbanks;
 
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 0eac9ce4c6..d81401dbc0 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -2374,7 +2374,7 @@ static int svm_is_erratum_383(struct cpu_user_regs *regs)
         return 0;
     
     /* Clear MCi_STATUS registers */
-    for (i = 0; i < nr_mce_banks; i++)
+    for (i = 0; i < this_cpu(nr_mce_banks); i++)
         wrmsrl(MSR_IA32_MCx_STATUS(i), 0ULL);
     
     rdmsrl(MSR_IA32_MCG_STATUS, msr_content);
diff --git a/xen/include/asm-x86/mce.h b/xen/include/asm-x86/mce.h
index d2933c91bf..6116dbf24b 100644
--- a/xen/include/asm-x86/mce.h
+++ b/xen/include/asm-x86/mce.h
@@ -40,6 +40,6 @@ extern int vmce_rdmsr(uint32_t msr, uint64_t *val);
 extern bool vmce_has_lmce(const struct vcpu *v);
 extern int vmce_enable_mca_cap(struct domain *d, uint64_t cap);
 
-extern unsigned int nr_mce_banks;
+DECLARE_PER_CPU(unsigned int, nr_mce_banks);
 
 #endif
--
generated by git-patchbot for /home/xen/git/xen.git#staging

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/xen-changelog

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.