[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] x86 mcheck: Provide MCA "injection" hypervisor services.



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1237299848 0
# Node ID 372ec886ad0c9c5d470d95f49fa6f012af533eaa
# Parent  9c1be8f2013be449a09f1af34a0b5c8820ce7c55
x86 mcheck: Provide MCA "injection" hypervisor services.

Signed-off-by: Gavin Maltby <gavin.maltby@xxxxxxx>
---
 xen/arch/x86/cpu/mcheck/amd_f10.c      |    6 
 xen/arch/x86/cpu/mcheck/amd_nonfatal.c |    4 
 xen/arch/x86/cpu/mcheck/mce.c          |  274 +++++++++++++++++++++++++++++++--
 xen/arch/x86/cpu/mcheck/mce.h          |   17 ++
 xen/include/public/arch-x86/xen-mca.h  |   21 ++
 xen/include/xen/lib.h                  |    1 
 6 files changed, 309 insertions(+), 14 deletions(-)

diff -r 9c1be8f2013b -r 372ec886ad0c xen/arch/x86/cpu/mcheck/amd_f10.c
--- a/xen/arch/x86/cpu/mcheck/amd_f10.c Tue Mar 17 14:22:50 2009 +0000
+++ b/xen/arch/x86/cpu/mcheck/amd_f10.c Tue Mar 17 14:24:08 2009 +0000
@@ -74,9 +74,9 @@ amd_f10_handler(struct mc_info *mi, uint
        mc_ext.mc_msr[1].reg = MSR_F10_MC4_MISC2;
        mc_ext.mc_msr[2].reg = MSR_F10_MC4_MISC3;
 
-       rdmsrl(MSR_F10_MC4_MISC1, mc_ext.mc_msr[0].value);
-       rdmsrl(MSR_F10_MC4_MISC2, mc_ext.mc_msr[1].value);
-       rdmsrl(MSR_F10_MC4_MISC3, mc_ext.mc_msr[2].value);
+       mca_rdmsrl(MSR_F10_MC4_MISC1, mc_ext.mc_msr[0].value);
+       mca_rdmsrl(MSR_F10_MC4_MISC2, mc_ext.mc_msr[1].value);
+       mca_rdmsrl(MSR_F10_MC4_MISC3, mc_ext.mc_msr[2].value);
        
        x86_mcinfo_add(mi, &mc_ext);
        return MCA_EXTINFO_LOCAL;
diff -r 9c1be8f2013b -r 372ec886ad0c xen/arch/x86/cpu/mcheck/amd_nonfatal.c
--- a/xen/arch/x86/cpu/mcheck/amd_nonfatal.c    Tue Mar 17 14:22:50 2009 +0000
+++ b/xen/arch/x86/cpu/mcheck/amd_nonfatal.c    Tue Mar 17 14:24:08 2009 +0000
@@ -147,7 +147,7 @@ static void mce_amd_work_fn(void *data)
                uint64_t value;
                uint32_t counter;
 
-               rdmsrl(MSR_IA32_MC4_MISC, value);
+               mca_rdmsrl(MSR_IA32_MC4_MISC, value);
                /* Only the error counter field is of interest
                 * Bit field is described in AMD K8 BKDG chapter 6.4.5.5
                 */
@@ -172,7 +172,7 @@ static void mce_amd_work_fn(void *data)
                        value &= ~(0x60FFF00000000ULL);
                        /* Counter enable */
                        value |= (1ULL << 51);
-                       wrmsrl(MSR_IA32_MC4_MISC, value);
+                       mca_wrmsrl(MSR_IA32_MC4_MISC, value);
                        wmb();
                }
        }
diff -r 9c1be8f2013b -r 372ec886ad0c xen/arch/x86/cpu/mcheck/mce.c
--- a/xen/arch/x86/cpu/mcheck/mce.c     Tue Mar 17 14:22:50 2009 +0000
+++ b/xen/arch/x86/cpu/mcheck/mce.c     Tue Mar 17 14:24:08 2009 +0000
@@ -27,9 +27,11 @@ unsigned int nr_mce_banks;
 
 EXPORT_SYMBOL_GPL(nr_mce_banks);       /* non-fatal.o */
 
+static void intpose_init(void);
 static void mcinfo_clear(struct mc_info *);
 
-#define        SEG_PL(segsel) ((segsel) & 0x3)
+#define        SEG_PL(segsel)                  ((segsel) & 0x3)
+#define _MC_MSRINJ_F_REQ_HWCR_WREN     (1 << 16)
 
 #if 1  /* XXFM switch to 0 for putback */
 
@@ -109,7 +111,7 @@ mctelem_cookie_t mcheck_mca_logout(enum 
        cpu_nr = smp_processor_id();
        BUG_ON(cpu_nr != v->processor);
 
-       rdmsrl(MSR_IA32_MCG_STATUS, gstatus);
+       mca_rdmsrl(MSR_IA32_MCG_STATUS, gstatus);
 
        memset(&mcg, 0, sizeof (mcg));
        mcg.common.type = MC_TYPE_GLOBAL;
@@ -156,7 +158,7 @@ mctelem_cookie_t mcheck_mca_logout(enum 
                if (!test_bit(i, bankmask))
                        continue;
 
-               rdmsrl(MSR_IA32_MC0_STATUS + i * 4, status);
+               mca_rdmsrl(MSR_IA32_MC0_STATUS + i * 4, status);
                if (!(status & MCi_STATUS_VAL))
                        continue;       /* this bank has no valid telemetry */
 
@@ -189,7 +191,7 @@ mctelem_cookie_t mcheck_mca_logout(enum 
                addr = misc = 0;
 
                if (status & MCi_STATUS_ADDRV) {
-                       rdmsrl(MSR_IA32_MC0_ADDR + 4 * i, addr);
+                       mca_rdmsrl(MSR_IA32_MC0_ADDR + 4 * i, addr);
                        d = maddr_get_owner(addr);
                        if (d != NULL && (who == MCA_POLLER ||
                            who == MCA_CMCI_HANDLER))
@@ -197,13 +199,13 @@ mctelem_cookie_t mcheck_mca_logout(enum 
                }
 
                if (status & MCi_STATUS_MISCV)
-                       rdmsrl(MSR_IA32_MC0_MISC + 4 * i, misc);
+                       mca_rdmsrl(MSR_IA32_MC0_MISC + 4 * i, misc);
 
                mcb.mc_addr = addr;
                mcb.mc_misc = misc;
 
                if (who == MCA_CMCI_HANDLER) {
-                       rdmsrl(MSR_IA32_MC0_CTL2 + i, mcb.mc_ctrl2);
+                       mca_rdmsrl(MSR_IA32_MC0_CTL2 + i, mcb.mc_ctrl2);
                        rdtscll(mcb.mc_tsc);
                }
 
@@ -221,7 +223,7 @@ mctelem_cookie_t mcheck_mca_logout(enum 
                }
 
                /* Clear status */
-               wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0x0ULL);
+               mca_wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0x0ULL);
                wmb();
        }
 
@@ -281,7 +283,7 @@ void mcheck_cmn_handler(struct cpu_user_
 
        /* Read global status;  if it does not indicate machine check
         * in progress then bail as long as we have a valid ip to return to. */
-       rdmsrl(MSR_IA32_MCG_STATUS, gstatus);
+       mca_rdmsrl(MSR_IA32_MCG_STATUS, gstatus);
        ripv = ((gstatus & MCG_STATUS_RIPV) != 0);
        if (!(gstatus & MCG_STATUS_MCIP) && ripv) {
                add_taint(TAINT_MACHINE_CHECK); /* questionable */
@@ -300,7 +302,7 @@ void mcheck_cmn_handler(struct cpu_user_
 
        /* Clear MCIP or another #MC will enter shutdown state */
        gstatus &= ~MCG_STATUS_MCIP;
-       wrmsrl(MSR_IA32_MCG_STATUS, gstatus);
+       mca_wrmsrl(MSR_IA32_MCG_STATUS, gstatus);
        wmb();
 
        /* If no valid errors and our stack is intact, we're done */
@@ -540,6 +542,7 @@ void mcheck_init(struct cpuinfo_x86 *c)
                return;
        }
 
+       intpose_init();
        mctelem_init(sizeof (struct mc_info));
 
        switch (c->x86_vendor) {
@@ -768,6 +771,203 @@ void x86_mc_get_cpu_info(unsigned cpu, u
        }
 }
 
+#define        INTPOSE_NENT    50
+
+static struct intpose_ent {
+       unsigned  int cpu_nr;
+       uint64_t msr;
+       uint64_t val;
+} intpose_arr[INTPOSE_NENT];
+
+static void intpose_init(void)
+{
+       static int done;
+       int i;
+
+       if (done++ > 0)
+               return;
+
+       for (i = 0; i < INTPOSE_NENT; i++) {
+               intpose_arr[i].cpu_nr = -1;
+       }
+
+}
+
+struct intpose_ent *intpose_lookup(unsigned int cpu_nr, uint64_t msr,
+    uint64_t *valp)
+{
+       int i;
+
+       for (i = 0; i < INTPOSE_NENT; i++) {
+               if (intpose_arr[i].cpu_nr == cpu_nr &&
+                   intpose_arr[i].msr == msr) {
+                       if (valp != NULL)
+                               *valp = intpose_arr[i].val;
+                       return &intpose_arr[i];
+               }
+       }
+
+       return NULL;
+}
+
+static void intpose_add(unsigned int cpu_nr, uint64_t msr, uint64_t val)
+{
+       struct intpose_ent *ent;
+       int i;
+
+       if ((ent = intpose_lookup(cpu_nr, msr, NULL)) != NULL) {
+               ent->val = val;
+               return;
+       }
+
+       for (i = 0, ent = &intpose_arr[0]; i < INTPOSE_NENT; i++, ent++) {
+               if (ent->cpu_nr == -1) {
+                       ent->cpu_nr = cpu_nr;
+                       ent->msr = msr;
+                       ent->val = val;
+                       return;
+               }
+       }
+
+       printk("intpose_add: interpose array full - request dropped\n");
+}
+
+void intpose_inval(unsigned int cpu_nr, uint64_t msr)
+{
+       struct intpose_ent *ent;
+
+       if ((ent = intpose_lookup(cpu_nr, msr, NULL)) != NULL) {
+               ent->cpu_nr = -1;
+       }
+}
+
+#define        IS_MCA_BANKREG(r) \
+    ((r) >= MSR_IA32_MC0_CTL && \
+    (r) <= MSR_IA32_MC0_MISC + (nr_mce_banks - 1) * 4 && \
+    ((r) - MSR_IA32_MC0_CTL) % 4 != 0) /* excludes MCi_CTL */
+
+static int x86_mc_msrinject_verify(struct xen_mc_msrinject *mci)
+{
+       struct cpuinfo_x86 *c;
+       int i, errs = 0;
+
+       c = &cpu_data[smp_processor_id()];
+
+       for (i = 0; i < mci->mcinj_count; i++) {
+               uint64_t reg = mci->mcinj_msr[i].reg;
+               const char *reason = NULL;
+
+               if (IS_MCA_BANKREG(reg)) {
+                       if (c->x86_vendor == X86_VENDOR_AMD) {
+                               /* On AMD we can set MCi_STATUS_WREN in the
+                                * HWCR MSR to allow non-zero writes to banks
+                                * MSRs not to #GP.  The injector in dom0
+                                * should set that bit, but we detect when it
+                                * is necessary and set it as a courtesy to
+                                * avoid #GP in the hypervisor. */
+                               mci->mcinj_flags |=
+                                   _MC_MSRINJ_F_REQ_HWCR_WREN;
+                               continue;
+                       } else {
+                               /* No alternative but to interpose, so require
+                                * that the injector specified as such. */
+                               if (!(mci->mcinj_flags &
+                                   MC_MSRINJ_F_INTERPOSE)) {
+                                       reason = "must specify interposition";
+                               }
+                       }
+               } else {
+                       switch (reg) {
+                       /* MSRs acceptable on all x86 cpus */
+                       case MSR_IA32_MCG_STATUS:
+                               break;
+
+                       /* MSRs that the HV will take care of */
+                       case MSR_K8_HWCR:
+                               if (c->x86_vendor == X86_VENDOR_AMD)
+                                       reason = "HV will operate HWCR";
+                               else
+                                       reason ="only supported on AMD";
+                               break;
+
+                       default:
+                               reason = "not a recognized MCA MSR";
+                               break;
+                       }
+               }
+
+               if (reason != NULL) {
+                       printk("HV MSR INJECT ERROR: MSR 0x%llx %s\n",
+                           (unsigned long long)mci->mcinj_msr[i].reg, reason);
+                       errs++;
+               }
+       }
+
+       return !errs;
+}
+
+static uint64_t x86_mc_hwcr_wren(void)
+{
+       uint64_t old;
+
+       rdmsrl(MSR_K8_HWCR, old);
+
+       if (!(old & K8_HWCR_MCi_STATUS_WREN)) {
+               uint64_t new = old | K8_HWCR_MCi_STATUS_WREN;
+               wrmsrl(MSR_K8_HWCR, new);
+       }
+
+       return old;
+}
+
+static void x86_mc_hwcr_wren_restore(uint64_t hwcr)
+{
+       if (!(hwcr & K8_HWCR_MCi_STATUS_WREN))
+               wrmsrl(MSR_K8_HWCR, hwcr);
+}
+
+static void x86_mc_msrinject(void *data)
+{
+       struct xen_mc_msrinject *mci = data;
+       struct mcinfo_msr *msr;
+       struct cpuinfo_x86 *c;
+       uint64_t hwcr = 0;
+       int intpose;
+       int i;
+
+       c = &cpu_data[smp_processor_id()];
+
+       if (mci->mcinj_flags & _MC_MSRINJ_F_REQ_HWCR_WREN)
+               hwcr = x86_mc_hwcr_wren();
+
+       intpose = (mci->mcinj_flags & MC_MSRINJ_F_INTERPOSE) != 0;
+
+       for (i = 0, msr = &mci->mcinj_msr[0];
+           i < mci->mcinj_count; i++, msr++) {
+               printk("HV MSR INJECT (%s) target %u actual %u MSR 0x%llx "
+                   "<-- 0x%llx\n",
+                   intpose ?  "interpose" : "hardware",
+                   mci->mcinj_cpunr, smp_processor_id(),
+                   (unsigned long long)msr->reg,
+                   (unsigned long long)msr->value);
+
+               if (intpose)
+                       intpose_add(mci->mcinj_cpunr, msr->reg, msr->value);
+               else
+                       wrmsrl(msr->reg, msr->value);
+       }
+
+       if (mci->mcinj_flags & _MC_MSRINJ_F_REQ_HWCR_WREN)
+               x86_mc_hwcr_wren_restore(hwcr);
+}
+
+/*ARGSUSED*/
+static void x86_mc_mceinject(void *data)
+{
+       printk("Simulating #MC on cpu %d\n", smp_processor_id());
+       __asm__ __volatile__("int $0x12");
+}
+
 #if BITS_PER_LONG == 64
 
 #define        ID2COOKIE(id)   ((mctelem_cookie_t)(id))
@@ -797,6 +997,9 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u
        xen_mc_logical_cpu_t *log_cpus = NULL;
        mctelem_cookie_t mctc;
        mctelem_class_t which;
+       unsigned int target;
+       struct xen_mc_msrinject *mc_msrinject;
+       struct xen_mc_mceinject *mc_mceinject;
 
        if ( copy_from_guest(op, u_xen_mc, 1) )
                return x86_mcerr("do_mca: failed copyin of xen_mc_t", -EFAULT);
@@ -901,6 +1104,59 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u
                }
                break;
 
+       case XEN_MC_msrinject:
+               if ( !IS_PRIV(v->domain) )
+                       return x86_mcerr("do_mca inject", -EPERM);
+
+               if (nr_mce_banks == 0)
+                       return x86_mcerr("do_mca inject", -ENODEV);
+
+               mc_msrinject = &op->u.mc_msrinject;
+               target = mc_msrinject->mcinj_cpunr;
+
+               if (target >= NR_CPUS)
+                       return x86_mcerr("do_mca inject: bad target", -EINVAL);
+
+               if (!cpu_isset(target, cpu_online_map))
+                       return x86_mcerr("do_mca inject: target offline",
+                           -EINVAL);
+
+               if (mc_msrinject->mcinj_count == 0)
+                       return 0;
+
+               if (!x86_mc_msrinject_verify(mc_msrinject))
+                       return x86_mcerr("do_mca inject: illegal MSR", -EINVAL);
+
+               add_taint(TAINT_ERROR_INJECT);
+
+               on_selected_cpus(cpumask_of_cpu(target),
+                   x86_mc_msrinject, mc_msrinject, 1, 1);
+
+               break;
+
+       case XEN_MC_mceinject:
+               if ( !IS_PRIV(v->domain) )
+                       return x86_mcerr("do_mca #MC", -EPERM);
+
+               if (nr_mce_banks == 0)
+                       return x86_mcerr("do_mca #MC", -ENODEV);
+
+               mc_mceinject = &op->u.mc_mceinject;
+               target = mc_mceinject->mceinj_cpunr;
+
+               if (target >= NR_CPUS)
+                       return x86_mcerr("do_mca #MC: bad target", -EINVAL);
+                      
+               if (!cpu_isset(target, cpu_online_map))
+                       return x86_mcerr("do_mca #MC: target offline", -EINVAL);
+
+               add_taint(TAINT_ERROR_INJECT);
+
+               on_selected_cpus(cpumask_of_cpu(target),
+                   x86_mc_mceinject, mc_mceinject, 1, 1);
+
+               break;
+
        default:
                return x86_mcerr("do_mca: bad command", -EINVAL);
        }
diff -r 9c1be8f2013b -r 372ec886ad0c xen/arch/x86/cpu/mcheck/mce.h
--- a/xen/arch/x86/cpu/mcheck/mce.h     Tue Mar 17 14:22:50 2009 +0000
+++ b/xen/arch/x86/cpu/mcheck/mce.h     Tue Mar 17 14:24:08 2009 +0000
@@ -41,6 +41,23 @@ extern void x86_mce_vector_register(x86_
 /* Common generic MCE handler that implementations may nominate
  * via x86_mce_vector_register. */
 extern void mcheck_cmn_handler(struct cpu_user_regs *, long, cpu_banks_t);
+
+/* Read an MSR, checking for an interposed value first */
+extern struct intpose_ent *intpose_lookup(unsigned int, uint64_t,
+    uint64_t *);
+extern void intpose_inval(unsigned int, uint64_t);
+
+#define mca_rdmsrl(msr, var) do { \
+       if (intpose_lookup(smp_processor_id(), msr, &var) == NULL) \
+               rdmsrl(msr, var); \
+} while (0)
+
+/* Write an MSR, invalidating any interposed value */
+#define        mca_wrmsrl(msr, val) do { \
+       intpose_inval(smp_processor_id(), msr); \
+       wrmsrl(msr, val); \
+} while (0)
+
 
 /* Utility function to "logout" all architectural MCA telemetry from the MCA
  * banks of the current processor.  A cookie is returned which may be
diff -r 9c1be8f2013b -r 372ec886ad0c xen/include/public/arch-x86/xen-mca.h
--- a/xen/include/public/arch-x86/xen-mca.h     Tue Mar 17 14:22:50 2009 +0000
+++ b/xen/include/public/arch-x86/xen-mca.h     Tue Mar 17 14:24:08 2009 +0000
@@ -324,10 +324,31 @@ struct xen_mc_physcpuinfo {
        XEN_GUEST_HANDLE(xen_mc_logical_cpu_t) info;
 };
 
+#define XEN_MC_msrinject    4
+#define MC_MSRINJ_MAXMSRS       8
+struct xen_mc_msrinject {
+       /* IN */
+       unsigned int mcinj_cpunr;       /* target processor id */
+       uint32_t mcinj_flags;           /* see MC_MSRINJ_F_* below */
+       uint32_t mcinj_count;           /* 0 .. count-1 in array are valid */
+       uint32_t mcinj_pad0;
+       struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS];
+};
+
+/* Flags for mcinj_flags above; bits 16-31 are reserved */
+#define MC_MSRINJ_F_INTERPOSE   0x1
+
+#define XEN_MC_mceinject    5
+struct xen_mc_mceinject {
+       unsigned int mceinj_cpunr;      /* target processor id */
+};
+
 typedef union {
     struct xen_mc_fetch        mc_fetch;
     struct xen_mc_notifydomain mc_notifydomain;
     struct xen_mc_physcpuinfo  mc_physcpuinfo;
+    struct xen_mc_msrinject    mc_msrinject;
+    struct xen_mc_mceinject    mc_mceinject;
 } xen_mc_arg_t;
 
 struct xen_mc {
diff -r 9c1be8f2013b -r 372ec886ad0c xen/include/xen/lib.h
--- a/xen/include/xen/lib.h     Tue Mar 17 14:22:50 2009 +0000
+++ b/xen/include/xen/lib.h     Tue Mar 17 14:24:08 2009 +0000
@@ -95,6 +95,7 @@ unsigned long long parse_size_and_unit(c
 #define TAINT_MACHINE_CHECK             (1<<1)
 #define TAINT_BAD_PAGE                  (1<<2)
 #define TAINT_SYNC_CONSOLE              (1<<3)
+#define TAINT_ERROR_INJECT              (1<<4)
 extern int tainted;
 #define TAINT_STRING_MAX_LEN            20
 extern char *print_tainted(char *str);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.