[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] x86 hvm mce: Support HVM Guest virtual MCA handling.
# HG changeset patch # User Keir Fraser <keir.fraser@xxxxxxxxxx> # Date 1246372839 -3600 # Node ID 7bbbc57163d58c27f1e3883b20d09c72d04351ab # Parent 00502df38143d6c26a6db43f9329634cdef76f3e x86 hvm mce: Support HVM Guest virtual MCA handling. When MCE# happens, if the error has been contained/recovered by XEN and it impacts one guest Domain(DOM0/HVM Guest/PV Guest), we will inject the corresponding vMCE# into the impacted Domain. Guest OS will go on its own recovery job if it has MCA handler. Signed-off-by: Liping Ke <liping.ke@xxxxxxxxx> Signed-off-by: Yunhong Jiang <yunhong.jiang@xxxxxxxxx> --- xen/arch/x86/cpu/mcheck/mce_intel.c | 157 ++++++++++++++++++++++++++++-------- xen/arch/x86/cpu/mcheck/mctelem.c | 4 xen/arch/x86/cpu/mcheck/mctelem.h | 2 xen/arch/x86/hvm/hvm.c | 37 +++++--- xen/arch/x86/hvm/irq.c | 7 + xen/arch/x86/hvm/vmx/intr.c | 4 xen/arch/x86/x86_64/traps.c | 9 +- xen/include/asm-x86/domain.h | 1 xen/include/asm-x86/hvm/hvm.h | 4 9 files changed, 169 insertions(+), 56 deletions(-) diff -r 00502df38143 -r 7bbbc57163d5 xen/arch/x86/cpu/mcheck/mce_intel.c --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Tue Jun 30 15:37:14 2009 +0100 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Tue Jun 30 15:40:39 2009 +0100 @@ -10,6 +10,7 @@ #include <public/sysctl.h> #include <asm/system.h> #include <asm/msr.h> +#include <asm/p2m.h> #include "mce.h" #include "x86_mca.h" @@ -224,7 +225,7 @@ static struct bank_entry* alloc_bank_ent for vMCE# MSRs virtualization */ -static int fill_vmsr_data(int cpu, struct mcinfo_bank *mc_bank, +static int fill_vmsr_data(struct mcinfo_bank *mc_bank, uint64_t gstatus) { struct domain *d; struct bank_entry *entry; @@ -240,28 +241,89 @@ static int fill_vmsr_data(int cpu, struc return 0; } + /* For HVM guest, Only when first vMCE is consumed by HVM guest successfully, + * will we generete another node and inject another vMCE + */ + if ( (d->is_hvm) && (d->arch.vmca_msrs.nr_injection > 0) ) + { + printk(KERN_DEBUG "MCE: HVM guest has not handled previous" + " vMCE yet!\n"); + return -1; + } entry = alloc_bank_entry(); if (entry == NULL) - return -1; + return -1; + entry->mci_status = mc_bank->mc_status; entry->mci_addr = mc_bank->mc_addr; entry->mci_misc = mc_bank->mc_misc; - entry->cpu = cpu; entry->bank = mc_bank->mc_bank; - spin_lock(&d->arch.vmca_msrs.lock); + spin_lock(&d->arch.vmca_msrs.lock); /* New error Node, insert to the tail of the per_dom data */ list_add_tail(&entry->list, &d->arch.vmca_msrs.impact_header); /* Fill MSR global status */ d->arch.vmca_msrs.mcg_status = gstatus; /* New node impact the domain, need another vMCE# injection*/ d->arch.vmca_msrs.nr_injection++; - spin_unlock(&d->arch.vmca_msrs.lock); - - printk(KERN_DEBUG "MCE: Found error @[CPU%d BANK%d " + spin_unlock(&d->arch.vmca_msrs.lock); + + printk(KERN_DEBUG "MCE: Found error @[BANK%d " "status %"PRIx64" addr %"PRIx64" domid %d]\n ", - entry->cpu, mc_bank->mc_bank, - mc_bank->mc_status, mc_bank->mc_addr, mc_bank->mc_domid); + mc_bank->mc_bank, mc_bank->mc_status, mc_bank->mc_addr, + mc_bank->mc_domid); + } + return 0; +} + +static int inject_mce(struct domain *d) +{ + int cpu = smp_processor_id(); + cpumask_t affinity; + + /* PV guest and HVM guest have different vMCE# injection + * methods*/ + + if ( !test_and_set_bool(d->vcpu[0]->mce_pending) ) + { + if (d->is_hvm) + { + printk(KERN_DEBUG "MCE: inject vMCE to HVM DOM %d\n", + d->domain_id); + vcpu_kick(d->vcpu[0]); + } + /* PV guest including DOM0 */ + else + { + printk(KERN_DEBUG "MCE: inject vMCE to PV DOM%d\n", + d->domain_id); + if (guest_has_trap_callback + (d, 0, TRAP_machine_check)) + { + d->vcpu[0]->cpu_affinity_tmp = + d->vcpu[0]->cpu_affinity; + cpus_clear(affinity); + cpu_set(cpu, affinity); + printk(KERN_DEBUG "MCE: CPU%d set affinity, old %d\n", cpu, + d->vcpu[0]->processor); + vcpu_set_affinity(d->vcpu[0], &affinity); + vcpu_kick(d->vcpu[0]); + } + else + { + printk(KERN_DEBUG "MCE: Kill PV guest with No MCE handler\n"); + domain_crash(d); + } + } + } + else { + /* new vMCE comes while first one has not been injected yet, + * in this case, inject fail. [We can't lose this vMCE for + * the mce node's consistency]. + */ + printk(KERN_DEBUG "There's a pending vMCE waiting to be injected " + " to this DOM%d!\n", d->domain_id); + return -1; } return 0; } @@ -272,7 +334,7 @@ void intel_UCR_handler(struct mcinfo_ban struct mca_handle_result *result) { struct domain *d; - unsigned long mfn; + unsigned long mfn, gfn; uint32_t status; printk(KERN_DEBUG "MCE: Enter EWB UCR recovery action\n"); @@ -280,6 +342,7 @@ void intel_UCR_handler(struct mcinfo_ban if (bank->mc_addr != 0) { mfn = bank->mc_addr >> PAGE_SHIFT; if (!offline_page(mfn, 1, &status)) { + /* This is free page */ if (status & PG_OFFLINE_OFFLINED) result->result = MCA_RECOVERED; else if (status & PG_OFFLINE_PENDING) { @@ -289,9 +352,35 @@ void intel_UCR_handler(struct mcinfo_ban result->owner = status >> PG_OFFLINE_OWNER_SHIFT; printk(KERN_DEBUG "MCE: This error page is ownded" " by DOM %d\n", result->owner); - if (result->owner != 0 && result->owner != DOMID_XEN) { + /* Fill vMCE# injection and vMCE# MSR virtualization " + * "related data */ + bank->mc_domid = result->owner; + if ( result->owner != DOMID_XEN ) { d = get_domain_by_id(result->owner); - domain_crash(d); + gfn = + mfn_to_gmfn(d, ((bank->mc_addr) >> PAGE_SHIFT)); + bank->mc_addr = + gfn << PAGE_SHIFT | (bank->mc_addr & PAGE_MASK); + if (fill_vmsr_data(bank, global->mc_gstatus) == -1) + { + printk(KERN_DEBUG "Fill vMCE# data for DOM%d " + "failed\n", result->owner); + domain_crash(d); + return; + } + /* We will inject vMCE to DOMU*/ + if ( inject_mce(d) < 0 ) + { + printk(KERN_DEBUG "inject vMCE to DOM%d" + " failed\n", d->domain_id); + domain_crash(d); + return; + } + /* Impacted domain go on with domain's recovery job + * if the domain has its own MCA handler. + * For xen, it has contained the error and finished + * its own recovery job. + */ result->result = MCA_RECOVERED; } } @@ -309,7 +398,7 @@ struct mca_error_handler intel_recovery_ * should be committed for dom0 consumption, 0 if it should be * dismissed. */ -static int mce_action(unsigned int cpu, mctelem_cookie_t mctc) +static int mce_action(mctelem_cookie_t mctc) { struct mc_info *local_mi; uint32_t i; @@ -335,9 +424,6 @@ static int mce_action(unsigned int cpu, continue; } mc_bank = (struct mcinfo_bank*)mic; - /* Fill vMCE# injection and vMCE# MSR virtualization related data */ - if (fill_vmsr_data(cpu, mc_bank, mc_global->mc_gstatus) == -1) - break; /* TODO: Add recovery actions here, such as page-offline, etc */ memset(&mca_res, 0x0f, sizeof(mca_res)); @@ -386,7 +472,6 @@ static void mce_softirq(void) { int cpu = smp_processor_id(); unsigned int workcpu; - cpumask_t affinity; printk(KERN_DEBUG "CPU%d enter softirq\n", cpu); @@ -417,27 +502,13 @@ static void mce_softirq(void) * vMCE MSRs virtualization buffer */ for_each_online_cpu(workcpu) { - mctelem_process_deferred(workcpu, mce_action); + mctelem_process_deferred(workcpu, mce_action); } /* Step2: Send Log to DOM0 through vIRQ */ if (dom0 && guest_enabled_event(dom0->vcpu[0], VIRQ_MCA)) { printk(KERN_DEBUG "MCE: send MCE# to DOM0 through virq\n"); send_guest_global_virq(dom0, VIRQ_MCA); - } - - /* Step3: Inject vMCE to impacted DOM. Currently we cares DOM0 only */ - if (guest_has_trap_callback - (dom0, 0, TRAP_machine_check) && - !test_and_set_bool(dom0->vcpu[0]->mce_pending)) { - dom0->vcpu[0]->cpu_affinity_tmp = - dom0->vcpu[0]->cpu_affinity; - cpus_clear(affinity); - cpu_set(cpu, affinity); - printk(KERN_DEBUG "MCE: CPU%d set affinity, old %d\n", cpu, - dom0->vcpu[0]->processor); - vcpu_set_affinity(dom0->vcpu[0], &affinity); - vcpu_kick(dom0->vcpu[0]); } } @@ -1057,7 +1128,27 @@ int intel_mce_wrmsr(u32 msr, u64 value) break; case MSR_IA32_MCG_STATUS: d->arch.vmca_msrs.mcg_status = value; - gdprintk(XENLOG_DEBUG, "MCE: wrmsr MCG_CTL %"PRIx64"\n", value); + gdprintk(XENLOG_DEBUG, "MCE: wrmsr MCG_STATUS %"PRIx64"\n", value); + /* For HVM guest, this is the point for deleting vMCE injection node */ + if ( (d->is_hvm) && (d->arch.vmca_msrs.nr_injection >0) ) + { + d->arch.vmca_msrs.nr_injection--; /* Should be 0 */ + if (!list_empty(&d->arch.vmca_msrs.impact_header)) { + entry = list_entry(d->arch.vmca_msrs.impact_header.next, + struct bank_entry, list); + if (entry->mci_status & MCi_STATUS_VAL) + gdprintk(XENLOG_ERR, "MCE: MCi_STATUS MSR should have " + "been cleared before write MCG_STATUS MSR\n"); + + gdprintk(XENLOG_DEBUG, "MCE: Delete HVM last injection " + "Node, nr_injection %u\n", + d->arch.vmca_msrs.nr_injection); + list_del(&entry->list); + } + else + gdprintk(XENLOG_DEBUG, "MCE: Not found HVM guest" + " last injection Node, something Wrong!\n"); + } break; case MSR_IA32_MCG_CAP: gdprintk(XENLOG_WARNING, "MCE: MCG_CAP is read-only\n"); diff -r 00502df38143 -r 7bbbc57163d5 xen/arch/x86/cpu/mcheck/mctelem.c --- a/xen/arch/x86/cpu/mcheck/mctelem.c Tue Jun 30 15:37:14 2009 +0100 +++ b/xen/arch/x86/cpu/mcheck/mctelem.c Tue Jun 30 15:40:39 2009 +0100 @@ -153,7 +153,7 @@ void mctelem_defer(mctelem_cookie_t cook } void mctelem_process_deferred(unsigned int cpu, - int (*fn)(unsigned int, mctelem_cookie_t)) + int (*fn)(mctelem_cookie_t)) { struct mctelem_ent *tep; struct mctelem_ent *head, *prev; @@ -189,7 +189,7 @@ void mctelem_process_deferred(unsigned i prev = tep->mcte_prev; tep->mcte_next = tep->mcte_prev = NULL; - ret = fn(cpu, MCTE2COOKIE(tep)); + ret = fn(MCTE2COOKIE(tep)); if (prev != NULL) prev->mcte_next = NULL; tep->mcte_prev = tep->mcte_next = NULL; diff -r 00502df38143 -r 7bbbc57163d5 xen/arch/x86/cpu/mcheck/mctelem.h --- a/xen/arch/x86/cpu/mcheck/mctelem.h Tue Jun 30 15:37:14 2009 +0100 +++ b/xen/arch/x86/cpu/mcheck/mctelem.h Tue Jun 30 15:40:39 2009 +0100 @@ -69,7 +69,7 @@ extern void mctelem_ack(mctelem_class_t, extern void mctelem_ack(mctelem_class_t, mctelem_cookie_t); extern void mctelem_defer(mctelem_cookie_t); extern void mctelem_process_deferred(unsigned int, - int (*)(unsigned int, mctelem_cookie_t)); + int (*)(mctelem_cookie_t)); int mctelem_has_deferred(unsigned int); #endif diff -r 00502df38143 -r 7bbbc57163d5 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Tue Jun 30 15:37:14 2009 +0100 +++ b/xen/arch/x86/hvm/hvm.c Tue Jun 30 15:40:39 2009 +0100 @@ -1771,6 +1771,8 @@ void hvm_rdtsc_intercept(struct cpu_user regs->edx = (uint32_t)(tsc >> 32); } +extern int intel_mce_rdmsr(u32 msr, u32 *lo, u32 *hi); +extern int intel_mce_wrmsr(u32 msr, u64 value); int hvm_msr_read_intercept(struct cpu_user_regs *regs) { uint32_t ecx = regs->ecx; @@ -1779,6 +1781,8 @@ int hvm_msr_read_intercept(struct cpu_us uint64_t *var_range_base, *fixed_range_base; int index, mtrr; uint32_t cpuid[4]; + uint32_t lo, hi; + int ret; var_range_base = (uint64_t *)v->arch.hvm_vcpu.mtrr.var_ranges; fixed_range_base = (uint64_t *)v->arch.hvm_vcpu.mtrr.fixed_ranges; @@ -1794,18 +1798,6 @@ int hvm_msr_read_intercept(struct cpu_us case MSR_IA32_APICBASE: msr_content = vcpu_vlapic(v)->hw.apic_base_msr; - break; - - case MSR_IA32_MCG_CAP: - case MSR_IA32_MCG_STATUS: - case MSR_IA32_MC0_STATUS: - case MSR_IA32_MC1_STATUS: - case MSR_IA32_MC2_STATUS: - case MSR_IA32_MC3_STATUS: - case MSR_IA32_MC4_STATUS: - case MSR_IA32_MC5_STATUS: - /* No point in letting the guest see real MCEs */ - msr_content = 0; break; case MSR_IA32_CR_PAT: @@ -1858,7 +1850,17 @@ int hvm_msr_read_intercept(struct cpu_us break; default: - return hvm_funcs.msr_read_intercept(regs); + ret = intel_mce_rdmsr(ecx, &lo, &hi); + if ( ret < 0 ) + goto gp_fault; + else if ( ret ) + { + msr_content = ((u64)hi << 32) | lo; + break; + } + /* ret == 0, This is not an MCE MSR, see other MSRs */ + else if (!ret) + return hvm_funcs.msr_read_intercept(regs); } regs->eax = (uint32_t)msr_content; @@ -1884,6 +1886,7 @@ int hvm_msr_write_intercept(struct cpu_u struct vcpu *v = current; int index, mtrr; uint32_t cpuid[4]; + int ret; hvm_cpuid(1, &cpuid[0], &cpuid[1], &cpuid[2], &cpuid[3]); mtrr = !!(cpuid[3] & bitmaskof(X86_FEATURE_MTRR)); @@ -1946,7 +1949,13 @@ int hvm_msr_write_intercept(struct cpu_u break; default: - return hvm_funcs.msr_write_intercept(regs); + ret = intel_mce_wrmsr(ecx, msr_content); + if ( ret < 0 ) + goto gp_fault; + else if ( ret ) + break; + else if (!ret) + return hvm_funcs.msr_write_intercept(regs); } return X86EMUL_OKAY; diff -r 00502df38143 -r 7bbbc57163d5 xen/arch/x86/hvm/irq.c --- a/xen/arch/x86/hvm/irq.c Tue Jun 30 15:37:14 2009 +0100 +++ b/xen/arch/x86/hvm/irq.c Tue Jun 30 15:40:39 2009 +0100 @@ -326,6 +326,9 @@ struct hvm_intack hvm_vcpu_has_pending_i if ( unlikely(v->nmi_pending) ) return hvm_intack_nmi; + if ( unlikely(v->mce_pending) ) + return hvm_intack_mce; + if ( vlapic_accept_pic_intr(v) && plat->vpic[0].int_output ) return hvm_intack_pic(0); @@ -345,6 +348,10 @@ struct hvm_intack hvm_vcpu_ack_pending_i { case hvm_intsrc_nmi: if ( !test_and_clear_bool(v->nmi_pending) ) + intack = hvm_intack_none; + break; + case hvm_intsrc_mce: + if ( !test_and_clear_bool(v->mce_pending) ) intack = hvm_intack_none; break; case hvm_intsrc_pic: diff -r 00502df38143 -r 7bbbc57163d5 xen/arch/x86/hvm/vmx/intr.c --- a/xen/arch/x86/hvm/vmx/intr.c Tue Jun 30 15:37:14 2009 +0100 +++ b/xen/arch/x86/hvm/vmx/intr.c Tue Jun 30 15:40:39 2009 +0100 @@ -157,6 +157,10 @@ asmlinkage void vmx_intr_assist(void) { vmx_inject_nmi(); } + else if ( intack.source == hvm_intsrc_mce ) + { + vmx_inject_hw_exception(TRAP_machine_check, HVM_DELIVER_NO_ERROR_CODE); + } else { HVMTRACE_2D(INJ_VIRQ, intack.vector, /*fake=*/ 0); diff -r 00502df38143 -r 7bbbc57163d5 xen/arch/x86/x86_64/traps.c --- a/xen/arch/x86/x86_64/traps.c Tue Jun 30 15:37:14 2009 +0100 +++ b/xen/arch/x86/x86_64/traps.c Tue Jun 30 15:40:39 2009 +0100 @@ -309,12 +309,13 @@ unsigned long do_iret(void) && !cpus_equal(v->cpu_affinity_tmp, v->cpu_affinity)) vcpu_set_affinity(v, &v->cpu_affinity_tmp); - /*Currently, only inject vMCE to DOM0.*/ + /* inject vMCE to PV_Guest including DOM0. */ if (v->trap_priority >= VCPU_TRAP_NMI) { - printk(KERN_DEBUG "MCE: Return from vMCE# trap!"); - if (d->domain_id == 0 && v->vcpu_id == 0) { + printk(KERN_DEBUG "MCE: Return from vMCE# trap!\n"); + if ( v->vcpu_id == 0 ) { if ( !d->arch.vmca_msrs.nr_injection ) { - printk(KERN_WARNING "MCE: Ret from vMCE#, nr_injection is 0\n"); + printk(KERN_WARNING "MCE: Ret from vMCE#, " + "No injection Node\n"); goto end; } diff -r 00502df38143 -r 7bbbc57163d5 xen/include/asm-x86/domain.h --- a/xen/include/asm-x86/domain.h Tue Jun 30 15:37:14 2009 +0100 +++ b/xen/include/asm-x86/domain.h Tue Jun 30 15:40:39 2009 +0100 @@ -210,7 +210,6 @@ struct p2m_domain; * put into impact_header list. */ struct bank_entry { struct list_head list; - int32_t cpu; uint16_t bank; uint64_t mci_status; uint64_t mci_addr; diff -r 00502df38143 -r 7bbbc57163d5 xen/include/asm-x86/hvm/hvm.h --- a/xen/include/asm-x86/hvm/hvm.h Tue Jun 30 15:37:14 2009 +0100 +++ b/xen/include/asm-x86/hvm/hvm.h Tue Jun 30 15:40:39 2009 +0100 @@ -31,7 +31,8 @@ enum hvm_intsrc { hvm_intsrc_none, hvm_intsrc_pic, hvm_intsrc_lapic, - hvm_intsrc_nmi + hvm_intsrc_nmi, + hvm_intsrc_mce }; struct hvm_intack { uint8_t source; /* enum hvm_intsrc */ @@ -41,6 +42,7 @@ struct hvm_intack { #define hvm_intack_pic(vec) ( (struct hvm_intack) { hvm_intsrc_pic, vec } ) #define hvm_intack_lapic(vec) ( (struct hvm_intack) { hvm_intsrc_lapic, vec } ) #define hvm_intack_nmi ( (struct hvm_intack) { hvm_intsrc_nmi, 2 } ) +#define hvm_intack_mce ( (struct hvm_intack) { hvm_intsrc_mce, 18 } ) enum hvm_intblk { hvm_intblk_none, /* not blocked (deliverable) */ hvm_intblk_shadow, /* MOV-SS or STI shadow */ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |