[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] x86 mcheck: Provide MCA "injection" hypervisor services.
# HG changeset patch # User Keir Fraser <keir.fraser@xxxxxxxxxx> # Date 1237299848 0 # Node ID 372ec886ad0c9c5d470d95f49fa6f012af533eaa # Parent 9c1be8f2013be449a09f1af34a0b5c8820ce7c55 x86 mcheck: Provide MCA "injection" hypervisor services. Signed-off-by: Gavin Maltby <gavin.maltby@xxxxxxx> --- xen/arch/x86/cpu/mcheck/amd_f10.c | 6 xen/arch/x86/cpu/mcheck/amd_nonfatal.c | 4 xen/arch/x86/cpu/mcheck/mce.c | 274 +++++++++++++++++++++++++++++++-- xen/arch/x86/cpu/mcheck/mce.h | 17 ++ xen/include/public/arch-x86/xen-mca.h | 21 ++ xen/include/xen/lib.h | 1 6 files changed, 309 insertions(+), 14 deletions(-) diff -r 9c1be8f2013b -r 372ec886ad0c xen/arch/x86/cpu/mcheck/amd_f10.c --- a/xen/arch/x86/cpu/mcheck/amd_f10.c Tue Mar 17 14:22:50 2009 +0000 +++ b/xen/arch/x86/cpu/mcheck/amd_f10.c Tue Mar 17 14:24:08 2009 +0000 @@ -74,9 +74,9 @@ amd_f10_handler(struct mc_info *mi, uint mc_ext.mc_msr[1].reg = MSR_F10_MC4_MISC2; mc_ext.mc_msr[2].reg = MSR_F10_MC4_MISC3; - rdmsrl(MSR_F10_MC4_MISC1, mc_ext.mc_msr[0].value); - rdmsrl(MSR_F10_MC4_MISC2, mc_ext.mc_msr[1].value); - rdmsrl(MSR_F10_MC4_MISC3, mc_ext.mc_msr[2].value); + mca_rdmsrl(MSR_F10_MC4_MISC1, mc_ext.mc_msr[0].value); + mca_rdmsrl(MSR_F10_MC4_MISC2, mc_ext.mc_msr[1].value); + mca_rdmsrl(MSR_F10_MC4_MISC3, mc_ext.mc_msr[2].value); x86_mcinfo_add(mi, &mc_ext); return MCA_EXTINFO_LOCAL; diff -r 9c1be8f2013b -r 372ec886ad0c xen/arch/x86/cpu/mcheck/amd_nonfatal.c --- a/xen/arch/x86/cpu/mcheck/amd_nonfatal.c Tue Mar 17 14:22:50 2009 +0000 +++ b/xen/arch/x86/cpu/mcheck/amd_nonfatal.c Tue Mar 17 14:24:08 2009 +0000 @@ -147,7 +147,7 @@ static void mce_amd_work_fn(void *data) uint64_t value; uint32_t counter; - rdmsrl(MSR_IA32_MC4_MISC, value); + mca_rdmsrl(MSR_IA32_MC4_MISC, value); /* Only the error counter field is of interest * Bit field is described in AMD K8 BKDG chapter 6.4.5.5 */ @@ -172,7 +172,7 @@ static void mce_amd_work_fn(void *data) value &= ~(0x60FFF00000000ULL); /* Counter enable */ value |= (1ULL << 51); - wrmsrl(MSR_IA32_MC4_MISC, value); + mca_wrmsrl(MSR_IA32_MC4_MISC, value); wmb(); } } diff -r 9c1be8f2013b -r 372ec886ad0c xen/arch/x86/cpu/mcheck/mce.c --- a/xen/arch/x86/cpu/mcheck/mce.c Tue Mar 17 14:22:50 2009 +0000 +++ b/xen/arch/x86/cpu/mcheck/mce.c Tue Mar 17 14:24:08 2009 +0000 @@ -27,9 +27,11 @@ unsigned int nr_mce_banks; EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ +static void intpose_init(void); static void mcinfo_clear(struct mc_info *); -#define SEG_PL(segsel) ((segsel) & 0x3) +#define SEG_PL(segsel) ((segsel) & 0x3) +#define _MC_MSRINJ_F_REQ_HWCR_WREN (1 << 16) #if 1 /* XXFM switch to 0 for putback */ @@ -109,7 +111,7 @@ mctelem_cookie_t mcheck_mca_logout(enum cpu_nr = smp_processor_id(); BUG_ON(cpu_nr != v->processor); - rdmsrl(MSR_IA32_MCG_STATUS, gstatus); + mca_rdmsrl(MSR_IA32_MCG_STATUS, gstatus); memset(&mcg, 0, sizeof (mcg)); mcg.common.type = MC_TYPE_GLOBAL; @@ -156,7 +158,7 @@ mctelem_cookie_t mcheck_mca_logout(enum if (!test_bit(i, bankmask)) continue; - rdmsrl(MSR_IA32_MC0_STATUS + i * 4, status); + mca_rdmsrl(MSR_IA32_MC0_STATUS + i * 4, status); if (!(status & MCi_STATUS_VAL)) continue; /* this bank has no valid telemetry */ @@ -189,7 +191,7 @@ mctelem_cookie_t mcheck_mca_logout(enum addr = misc = 0; if (status & MCi_STATUS_ADDRV) { - rdmsrl(MSR_IA32_MC0_ADDR + 4 * i, addr); + mca_rdmsrl(MSR_IA32_MC0_ADDR + 4 * i, addr); d = maddr_get_owner(addr); if (d != NULL && (who == MCA_POLLER || who == MCA_CMCI_HANDLER)) @@ -197,13 +199,13 @@ mctelem_cookie_t mcheck_mca_logout(enum } if (status & MCi_STATUS_MISCV) - rdmsrl(MSR_IA32_MC0_MISC + 4 * i, misc); + mca_rdmsrl(MSR_IA32_MC0_MISC + 4 * i, misc); mcb.mc_addr = addr; mcb.mc_misc = misc; if (who == MCA_CMCI_HANDLER) { - rdmsrl(MSR_IA32_MC0_CTL2 + i, mcb.mc_ctrl2); + mca_rdmsrl(MSR_IA32_MC0_CTL2 + i, mcb.mc_ctrl2); rdtscll(mcb.mc_tsc); } @@ -221,7 +223,7 @@ mctelem_cookie_t mcheck_mca_logout(enum } /* Clear status */ - wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0x0ULL); + mca_wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0x0ULL); wmb(); } @@ -281,7 +283,7 @@ void mcheck_cmn_handler(struct cpu_user_ /* Read global status; if it does not indicate machine check * in progress then bail as long as we have a valid ip to return to. */ - rdmsrl(MSR_IA32_MCG_STATUS, gstatus); + mca_rdmsrl(MSR_IA32_MCG_STATUS, gstatus); ripv = ((gstatus & MCG_STATUS_RIPV) != 0); if (!(gstatus & MCG_STATUS_MCIP) && ripv) { add_taint(TAINT_MACHINE_CHECK); /* questionable */ @@ -300,7 +302,7 @@ void mcheck_cmn_handler(struct cpu_user_ /* Clear MCIP or another #MC will enter shutdown state */ gstatus &= ~MCG_STATUS_MCIP; - wrmsrl(MSR_IA32_MCG_STATUS, gstatus); + mca_wrmsrl(MSR_IA32_MCG_STATUS, gstatus); wmb(); /* If no valid errors and our stack is intact, we're done */ @@ -540,6 +542,7 @@ void mcheck_init(struct cpuinfo_x86 *c) return; } + intpose_init(); mctelem_init(sizeof (struct mc_info)); switch (c->x86_vendor) { @@ -768,6 +771,203 @@ void x86_mc_get_cpu_info(unsigned cpu, u } } +#define INTPOSE_NENT 50 + +static struct intpose_ent { + unsigned int cpu_nr; + uint64_t msr; + uint64_t val; +} intpose_arr[INTPOSE_NENT]; + +static void intpose_init(void) +{ + static int done; + int i; + + if (done++ > 0) + return; + + for (i = 0; i < INTPOSE_NENT; i++) { + intpose_arr[i].cpu_nr = -1; + } + +} + +struct intpose_ent *intpose_lookup(unsigned int cpu_nr, uint64_t msr, + uint64_t *valp) +{ + int i; + + for (i = 0; i < INTPOSE_NENT; i++) { + if (intpose_arr[i].cpu_nr == cpu_nr && + intpose_arr[i].msr == msr) { + if (valp != NULL) + *valp = intpose_arr[i].val; + return &intpose_arr[i]; + } + } + + return NULL; +} + +static void intpose_add(unsigned int cpu_nr, uint64_t msr, uint64_t val) +{ + struct intpose_ent *ent; + int i; + + if ((ent = intpose_lookup(cpu_nr, msr, NULL)) != NULL) { + ent->val = val; + return; + } + + for (i = 0, ent = &intpose_arr[0]; i < INTPOSE_NENT; i++, ent++) { + if (ent->cpu_nr == -1) { + ent->cpu_nr = cpu_nr; + ent->msr = msr; + ent->val = val; + return; + } + } + + printk("intpose_add: interpose array full - request dropped\n"); +} + +void intpose_inval(unsigned int cpu_nr, uint64_t msr) +{ + struct intpose_ent *ent; + + if ((ent = intpose_lookup(cpu_nr, msr, NULL)) != NULL) { + ent->cpu_nr = -1; + } +} + +#define IS_MCA_BANKREG(r) \ + ((r) >= MSR_IA32_MC0_CTL && \ + (r) <= MSR_IA32_MC0_MISC + (nr_mce_banks - 1) * 4 && \ + ((r) - MSR_IA32_MC0_CTL) % 4 != 0) /* excludes MCi_CTL */ + +static int x86_mc_msrinject_verify(struct xen_mc_msrinject *mci) +{ + struct cpuinfo_x86 *c; + int i, errs = 0; + + c = &cpu_data[smp_processor_id()]; + + for (i = 0; i < mci->mcinj_count; i++) { + uint64_t reg = mci->mcinj_msr[i].reg; + const char *reason = NULL; + + if (IS_MCA_BANKREG(reg)) { + if (c->x86_vendor == X86_VENDOR_AMD) { + /* On AMD we can set MCi_STATUS_WREN in the + * HWCR MSR to allow non-zero writes to banks + * MSRs not to #GP. The injector in dom0 + * should set that bit, but we detect when it + * is necessary and set it as a courtesy to + * avoid #GP in the hypervisor. */ + mci->mcinj_flags |= + _MC_MSRINJ_F_REQ_HWCR_WREN; + continue; + } else { + /* No alternative but to interpose, so require + * that the injector specified as such. */ + if (!(mci->mcinj_flags & + MC_MSRINJ_F_INTERPOSE)) { + reason = "must specify interposition"; + } + } + } else { + switch (reg) { + /* MSRs acceptable on all x86 cpus */ + case MSR_IA32_MCG_STATUS: + break; + + /* MSRs that the HV will take care of */ + case MSR_K8_HWCR: + if (c->x86_vendor == X86_VENDOR_AMD) + reason = "HV will operate HWCR"; + else + reason ="only supported on AMD"; + break; + + default: + reason = "not a recognized MCA MSR"; + break; + } + } + + if (reason != NULL) { + printk("HV MSR INJECT ERROR: MSR 0x%llx %s\n", + (unsigned long long)mci->mcinj_msr[i].reg, reason); + errs++; + } + } + + return !errs; +} + +static uint64_t x86_mc_hwcr_wren(void) +{ + uint64_t old; + + rdmsrl(MSR_K8_HWCR, old); + + if (!(old & K8_HWCR_MCi_STATUS_WREN)) { + uint64_t new = old | K8_HWCR_MCi_STATUS_WREN; + wrmsrl(MSR_K8_HWCR, new); + } + + return old; +} + +static void x86_mc_hwcr_wren_restore(uint64_t hwcr) +{ + if (!(hwcr & K8_HWCR_MCi_STATUS_WREN)) + wrmsrl(MSR_K8_HWCR, hwcr); +} + +static void x86_mc_msrinject(void *data) +{ + struct xen_mc_msrinject *mci = data; + struct mcinfo_msr *msr; + struct cpuinfo_x86 *c; + uint64_t hwcr = 0; + int intpose; + int i; + + c = &cpu_data[smp_processor_id()]; + + if (mci->mcinj_flags & _MC_MSRINJ_F_REQ_HWCR_WREN) + hwcr = x86_mc_hwcr_wren(); + + intpose = (mci->mcinj_flags & MC_MSRINJ_F_INTERPOSE) != 0; + + for (i = 0, msr = &mci->mcinj_msr[0]; + i < mci->mcinj_count; i++, msr++) { + printk("HV MSR INJECT (%s) target %u actual %u MSR 0x%llx " + "<-- 0x%llx\n", + intpose ? "interpose" : "hardware", + mci->mcinj_cpunr, smp_processor_id(), + (unsigned long long)msr->reg, + (unsigned long long)msr->value); + + if (intpose) + intpose_add(mci->mcinj_cpunr, msr->reg, msr->value); + else + wrmsrl(msr->reg, msr->value); + } + + if (mci->mcinj_flags & _MC_MSRINJ_F_REQ_HWCR_WREN) + x86_mc_hwcr_wren_restore(hwcr); +} + +/*ARGSUSED*/ +static void x86_mc_mceinject(void *data) +{ + printk("Simulating #MC on cpu %d\n", smp_processor_id()); + __asm__ __volatile__("int $0x12"); +} + #if BITS_PER_LONG == 64 #define ID2COOKIE(id) ((mctelem_cookie_t)(id)) @@ -797,6 +997,9 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u xen_mc_logical_cpu_t *log_cpus = NULL; mctelem_cookie_t mctc; mctelem_class_t which; + unsigned int target; + struct xen_mc_msrinject *mc_msrinject; + struct xen_mc_mceinject *mc_mceinject; if ( copy_from_guest(op, u_xen_mc, 1) ) return x86_mcerr("do_mca: failed copyin of xen_mc_t", -EFAULT); @@ -901,6 +1104,59 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u } break; + case XEN_MC_msrinject: + if ( !IS_PRIV(v->domain) ) + return x86_mcerr("do_mca inject", -EPERM); + + if (nr_mce_banks == 0) + return x86_mcerr("do_mca inject", -ENODEV); + + mc_msrinject = &op->u.mc_msrinject; + target = mc_msrinject->mcinj_cpunr; + + if (target >= NR_CPUS) + return x86_mcerr("do_mca inject: bad target", -EINVAL); + + if (!cpu_isset(target, cpu_online_map)) + return x86_mcerr("do_mca inject: target offline", + -EINVAL); + + if (mc_msrinject->mcinj_count == 0) + return 0; + + if (!x86_mc_msrinject_verify(mc_msrinject)) + return x86_mcerr("do_mca inject: illegal MSR", -EINVAL); + + add_taint(TAINT_ERROR_INJECT); + + on_selected_cpus(cpumask_of_cpu(target), + x86_mc_msrinject, mc_msrinject, 1, 1); + + break; + + case XEN_MC_mceinject: + if ( !IS_PRIV(v->domain) ) + return x86_mcerr("do_mca #MC", -EPERM); + + if (nr_mce_banks == 0) + return x86_mcerr("do_mca #MC", -ENODEV); + + mc_mceinject = &op->u.mc_mceinject; + target = mc_mceinject->mceinj_cpunr; + + if (target >= NR_CPUS) + return x86_mcerr("do_mca #MC: bad target", -EINVAL); + + if (!cpu_isset(target, cpu_online_map)) + return x86_mcerr("do_mca #MC: target offline", -EINVAL); + + add_taint(TAINT_ERROR_INJECT); + + on_selected_cpus(cpumask_of_cpu(target), + x86_mc_mceinject, mc_mceinject, 1, 1); + + break; + default: return x86_mcerr("do_mca: bad command", -EINVAL); } diff -r 9c1be8f2013b -r 372ec886ad0c xen/arch/x86/cpu/mcheck/mce.h --- a/xen/arch/x86/cpu/mcheck/mce.h Tue Mar 17 14:22:50 2009 +0000 +++ b/xen/arch/x86/cpu/mcheck/mce.h Tue Mar 17 14:24:08 2009 +0000 @@ -41,6 +41,23 @@ extern void x86_mce_vector_register(x86_ /* Common generic MCE handler that implementations may nominate * via x86_mce_vector_register. */ extern void mcheck_cmn_handler(struct cpu_user_regs *, long, cpu_banks_t); + +/* Read an MSR, checking for an interposed value first */ +extern struct intpose_ent *intpose_lookup(unsigned int, uint64_t, + uint64_t *); +extern void intpose_inval(unsigned int, uint64_t); + +#define mca_rdmsrl(msr, var) do { \ + if (intpose_lookup(smp_processor_id(), msr, &var) == NULL) \ + rdmsrl(msr, var); \ +} while (0) + +/* Write an MSR, invalidating any interposed value */ +#define mca_wrmsrl(msr, val) do { \ + intpose_inval(smp_processor_id(), msr); \ + wrmsrl(msr, val); \ +} while (0) + /* Utility function to "logout" all architectural MCA telemetry from the MCA * banks of the current processor. A cookie is returned which may be diff -r 9c1be8f2013b -r 372ec886ad0c xen/include/public/arch-x86/xen-mca.h --- a/xen/include/public/arch-x86/xen-mca.h Tue Mar 17 14:22:50 2009 +0000 +++ b/xen/include/public/arch-x86/xen-mca.h Tue Mar 17 14:24:08 2009 +0000 @@ -324,10 +324,31 @@ struct xen_mc_physcpuinfo { XEN_GUEST_HANDLE(xen_mc_logical_cpu_t) info; }; +#define XEN_MC_msrinject 4 +#define MC_MSRINJ_MAXMSRS 8 +struct xen_mc_msrinject { + /* IN */ + unsigned int mcinj_cpunr; /* target processor id */ + uint32_t mcinj_flags; /* see MC_MSRINJ_F_* below */ + uint32_t mcinj_count; /* 0 .. count-1 in array are valid */ + uint32_t mcinj_pad0; + struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS]; +}; + +/* Flags for mcinj_flags above; bits 16-31 are reserved */ +#define MC_MSRINJ_F_INTERPOSE 0x1 + +#define XEN_MC_mceinject 5 +struct xen_mc_mceinject { + unsigned int mceinj_cpunr; /* target processor id */ +}; + typedef union { struct xen_mc_fetch mc_fetch; struct xen_mc_notifydomain mc_notifydomain; struct xen_mc_physcpuinfo mc_physcpuinfo; + struct xen_mc_msrinject mc_msrinject; + struct xen_mc_mceinject mc_mceinject; } xen_mc_arg_t; struct xen_mc { diff -r 9c1be8f2013b -r 372ec886ad0c xen/include/xen/lib.h --- a/xen/include/xen/lib.h Tue Mar 17 14:22:50 2009 +0000 +++ b/xen/include/xen/lib.h Tue Mar 17 14:24:08 2009 +0000 @@ -95,6 +95,7 @@ unsigned long long parse_size_and_unit(c #define TAINT_MACHINE_CHECK (1<<1) #define TAINT_BAD_PAGE (1<<2) #define TAINT_SYNC_CONSOLE (1<<3) +#define TAINT_ERROR_INJECT (1<<4) extern int tainted; #define TAINT_STRING_MAX_LEN 20 extern char *print_tainted(char *str); _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |