[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] Clean up MCA MSR virtualization and vMCE injection
# HG changeset patch # User Keir Fraser <keir.fraser@xxxxxxxxxx> # Date 1271663693 -3600 # Node ID 6233eb0f29ba6b89b4dd14fe3d385e85924cd9f1 # Parent 7ee8bb40200a1922d15036fd8788a364f8aaae8f Clean up MCA MSR virtualization and vMCE injection Remove all virtual MCE related work into a seperated file. It also try to do some clean-up on the vMCE, including: a) renmae some function name like mce_init_msr/mce_rdmsr to be vmce_init_msr/vmce_rdmsr to make it more straightforward, b) make the vmca_msrs be a pointer in arch_domain, to decrease arch_domain's size c) extract per-bank MCA MSR access to be seperated function (bank_mce_wrmsr/bank_mce_rdmsr) to make it be a bit cleaner. d) A new file xen/include/asm-x86/mce.h is added for vmce related header. Signed-off-by: Jiang, Yunhong <yunhong.jiang@xxxxxxxxx> --- xen/arch/x86/cpu/mcheck/Makefile | 1 xen/arch/x86/cpu/mcheck/mce.c | 238 ------------------ xen/arch/x86/cpu/mcheck/mce.h | 28 ++ xen/arch/x86/cpu/mcheck/mce_intel.c | 123 --------- xen/arch/x86/cpu/mcheck/vmce.c | 451 ++++++++++++++++++++++++++++++++++++ xen/arch/x86/domain.c | 3 xen/arch/x86/hvm/hvm.c | 5 xen/arch/x86/traps.c | 15 - xen/common/domain.c | 2 xen/include/asm-x86/domain.h | 29 -- xen/include/asm-x86/mce.h | 36 ++ xen/include/asm-x86/traps.h | 5 12 files changed, 540 insertions(+), 396 deletions(-) diff -r 7ee8bb40200a -r 6233eb0f29ba xen/arch/x86/cpu/mcheck/Makefile --- a/xen/arch/x86/cpu/mcheck/Makefile Thu Apr 15 19:11:16 2010 +0100 +++ b/xen/arch/x86/cpu/mcheck/Makefile Mon Apr 19 08:54:53 2010 +0100 @@ -7,3 +7,4 @@ obj-y += mce_intel.o obj-y += mce_intel.o obj-y += mce_amd_quirks.o obj-y += non-fatal.o +obj-y += vmce.o diff -r 7ee8bb40200a -r 6233eb0f29ba xen/arch/x86/cpu/mcheck/mce.c --- a/xen/arch/x86/cpu/mcheck/mce.c Thu Apr 15 19:11:16 2010 +0100 +++ b/xen/arch/x86/cpu/mcheck/mce.c Mon Apr 19 08:54:53 2010 +0100 @@ -31,11 +31,11 @@ unsigned int nr_mce_banks; unsigned int nr_mce_banks; int mce_broadcast = 0; -static uint64_t g_mcg_cap; +uint64_t g_mcg_cap; /* Real value in physical CTL MSR */ -static uint64_t h_mcg_ctl = 0UL; -static uint64_t *h_mci_ctrl; +uint64_t h_mcg_ctl = 0UL; +uint64_t *h_mci_ctrl; int firstbank; static void intpose_init(void); @@ -752,234 +752,6 @@ u64 mce_cap_init(void) return value; } -/* Guest vMCE# MSRs virtualization ops (rdmsr/wrmsr) */ -void mce_init_msr(struct domain *d) -{ - d->arch.vmca_msrs.mcg_status = 0x0; - d->arch.vmca_msrs.mcg_cap = g_mcg_cap; - d->arch.vmca_msrs.mcg_ctl = ~(uint64_t)0x0; - d->arch.vmca_msrs.nr_injection = 0; - memset(d->arch.vmca_msrs.mci_ctl, ~0, - sizeof(d->arch.vmca_msrs.mci_ctl)); - INIT_LIST_HEAD(&d->arch.vmca_msrs.impact_header); - spin_lock_init(&d->arch.vmca_msrs.lock); -} - -int mce_rdmsr(uint32_t msr, uint64_t *val) -{ - struct domain *d = current->domain; - int ret = 1; - unsigned int bank; - struct bank_entry *entry = NULL; - - *val = 0; - spin_lock(&d->arch.vmca_msrs.lock); - - switch ( msr ) - { - case MSR_IA32_MCG_STATUS: - *val = d->arch.vmca_msrs.mcg_status; - if (*val) - mce_printk(MCE_VERBOSE, - "MCE: rdmsr MCG_STATUS 0x%"PRIx64"\n", *val); - break; - case MSR_IA32_MCG_CAP: - *val = d->arch.vmca_msrs.mcg_cap; - mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CAP 0x%"PRIx64"\n", - *val); - break; - case MSR_IA32_MCG_CTL: - /* Always 0 if no CTL support */ - *val = d->arch.vmca_msrs.mcg_ctl & h_mcg_ctl; - mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CTL 0x%"PRIx64"\n", - *val); - break; - case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * MAX_NR_BANKS - 1: - bank = (msr - MSR_IA32_MC0_CTL) / 4; - if ( bank >= (d->arch.vmca_msrs.mcg_cap & MCG_CAP_COUNT) ) - { - mce_printk(MCE_QUIET, "MCE: MSR %x is not MCA MSR\n", msr); - ret = 0; - break; - } - switch (msr & (MSR_IA32_MC0_CTL | 3)) - { - case MSR_IA32_MC0_CTL: - *val = d->arch.vmca_msrs.mci_ctl[bank] & - (h_mci_ctrl ? h_mci_ctrl[bank] : ~0UL); - mce_printk(MCE_VERBOSE, "MCE: rdmsr MC%u_CTL 0x%"PRIx64"\n", - bank, *val); - break; - case MSR_IA32_MC0_STATUS: - /* Only error bank is read. Non-error banks simply return. */ - if ( !list_empty(&d->arch.vmca_msrs.impact_header) ) - { - entry = list_entry(d->arch.vmca_msrs.impact_header.next, - struct bank_entry, list); - if (entry->bank == bank) { - *val = entry->mci_status; - mce_printk(MCE_VERBOSE, - "MCE: rd MC%u_STATUS in vMCE# context " - "value 0x%"PRIx64"\n", bank, *val); - } - else - entry = NULL; - } - break; - case MSR_IA32_MC0_ADDR: - if ( !list_empty(&d->arch.vmca_msrs.impact_header) ) - { - entry = list_entry(d->arch.vmca_msrs.impact_header.next, - struct bank_entry, list); - if ( entry->bank == bank ) - { - *val = entry->mci_addr; - mce_printk(MCE_VERBOSE, - "MCE: rdmsr MC%u_ADDR in vMCE# context " - "0x%"PRIx64"\n", bank, *val); - } - } - break; - case MSR_IA32_MC0_MISC: - if ( !list_empty(&d->arch.vmca_msrs.impact_header) ) - { - entry = list_entry(d->arch.vmca_msrs.impact_header.next, - struct bank_entry, list); - if ( entry->bank == bank ) - { - *val = entry->mci_misc; - mce_printk(MCE_VERBOSE, - "MCE: rd MC%u_MISC in vMCE# context " - "0x%"PRIx64"\n", bank, *val); - } - } - break; - } - break; - default: - switch ( boot_cpu_data.x86_vendor ) - { - case X86_VENDOR_INTEL: - ret = intel_mce_rdmsr(msr, val); - break; - default: - ret = 0; - break; - } - break; - } - - spin_unlock(&d->arch.vmca_msrs.lock); - return ret; -} - -int mce_wrmsr(u32 msr, u64 val) -{ - struct domain *d = current->domain; - struct bank_entry *entry = NULL; - unsigned int bank; - int ret = 1; - - if ( !g_mcg_cap ) - return 0; - - spin_lock(&d->arch.vmca_msrs.lock); - - switch ( msr ) - { - case MSR_IA32_MCG_CTL: - d->arch.vmca_msrs.mcg_ctl = val; - break; - case MSR_IA32_MCG_STATUS: - d->arch.vmca_msrs.mcg_status = val; - mce_printk(MCE_VERBOSE, "MCE: wrmsr MCG_STATUS %"PRIx64"\n", val); - /* For HVM guest, this is the point for deleting vMCE injection node */ - if ( d->is_hvm && (d->arch.vmca_msrs.nr_injection > 0) ) - { - d->arch.vmca_msrs.nr_injection--; /* Should be 0 */ - if ( !list_empty(&d->arch.vmca_msrs.impact_header) ) - { - entry = list_entry(d->arch.vmca_msrs.impact_header.next, - struct bank_entry, list); - if ( entry->mci_status & MCi_STATUS_VAL ) - mce_printk(MCE_QUIET, "MCE: MCi_STATUS MSR should have " - "been cleared before write MCG_STATUS MSR\n"); - - mce_printk(MCE_QUIET, "MCE: Delete HVM last injection " - "Node, nr_injection %u\n", - d->arch.vmca_msrs.nr_injection); - list_del(&entry->list); - xfree(entry); - } - else - mce_printk(MCE_QUIET, "MCE: Not found HVM guest" - " last injection Node, something Wrong!\n"); - } - break; - case MSR_IA32_MCG_CAP: - mce_printk(MCE_QUIET, "MCE: MCG_CAP is read-only\n"); - ret = -1; - break; - case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * MAX_NR_BANKS - 1: - bank = (msr - MSR_IA32_MC0_CTL) / 4; - if ( bank >= (d->arch.vmca_msrs.mcg_cap & MCG_CAP_COUNT) ) - { - mce_printk(MCE_QUIET, "MCE: MSR %x is not MCA MSR\n", msr); - ret = 0; - break; - } - switch ( msr & (MSR_IA32_MC0_CTL | 3) ) - { - case MSR_IA32_MC0_CTL: - d->arch.vmca_msrs.mci_ctl[bank] = val; - break; - case MSR_IA32_MC0_STATUS: - /* Give the first entry of the list, it corresponds to current - * vMCE# injection. When vMCE# is finished processing by the - * the guest, this node will be deleted. - * Only error bank is written. Non-error banks simply return. - */ - if ( !list_empty(&d->arch.vmca_msrs.impact_header) ) - { - entry = list_entry(d->arch.vmca_msrs.impact_header.next, - struct bank_entry, list); - if ( entry->bank == bank ) - entry->mci_status = val; - mce_printk(MCE_VERBOSE, - "MCE: wr MC%u_STATUS %"PRIx64" in vMCE#\n", - bank, val); - } - else - mce_printk(MCE_VERBOSE, - "MCE: wr MC%u_STATUS %"PRIx64"\n", bank, val); - break; - case MSR_IA32_MC0_ADDR: - mce_printk(MCE_QUIET, "MCE: MC%u_ADDR is read-only\n", bank); - ret = -1; - break; - case MSR_IA32_MC0_MISC: - mce_printk(MCE_QUIET, "MCE: MC%u_MISC is read-only\n", bank); - ret = -1; - break; - } - break; - default: - switch ( boot_cpu_data.x86_vendor ) - { - case X86_VENDOR_INTEL: - ret = intel_mce_wrmsr(msr, val); - break; - default: - ret = 0; - break; - } - break; - } - - spin_unlock(&d->arch.vmca_msrs.lock); - return ret; -} - static void mcinfo_clear(struct mc_info *mi) { memset(mi, 0, sizeof(struct mc_info)); @@ -1238,11 +1010,11 @@ int mca_ctl_conflict(struct mcinfo_bank return 1; /* Will MCE happen in host if If host mcg_ctl is 0? */ - if ( ~d->arch.vmca_msrs.mcg_ctl & h_mcg_ctl ) + if ( ~d->arch.vmca_msrs->mcg_ctl & h_mcg_ctl ) return 1; bank_nr = bank->mc_bank; - if (~d->arch.vmca_msrs.mci_ctl[bank_nr] & h_mci_ctrl[bank_nr] ) + if (~d->arch.vmca_msrs->mci_ctl[bank_nr] & h_mci_ctrl[bank_nr] ) return 1; return 0; } diff -r 7ee8bb40200a -r 6233eb0f29ba xen/arch/x86/cpu/mcheck/mce.h --- a/xen/arch/x86/cpu/mcheck/mce.h Thu Apr 15 19:11:16 2010 +0100 +++ b/xen/arch/x86/cpu/mcheck/mce.h Mon Apr 19 08:54:53 2010 +0100 @@ -164,4 +164,32 @@ int x86_mcinfo_add(struct mc_info *mi, v int x86_mcinfo_add(struct mc_info *mi, void *mcinfo); void x86_mcinfo_dump(struct mc_info *mi); +int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d, + uint64_t gstatus); +int inject_vmce(struct domain *d); +int vmce_domain_inject(struct mcinfo_bank *bank, struct domain *d, struct mcinfo_global *global); + +extern uint64_t g_mcg_cap; +/* Real value in physical CTL MSR */ +extern uint64_t h_mcg_ctl; +extern uint64_t *h_mci_ctrl; + +extern unsigned int nr_mce_banks; + +static inline int mce_vendor_bank_msr(uint32_t msr) +{ + if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + (msr > MSR_IA32_MC0_CTL2 && msr < (MSR_IA32_MC0_CTL2 + nr_mce_banks)) ) + return 1; + return 0; +} + +static inline int mce_bank_msr(uint32_t msr) +{ + if ( (msr > MSR_IA32_MC0_CTL2 && + msr < (MSR_IA32_MC0_CTL + 4 * nr_mce_banks - 1)) || + mce_vendor_bank_msr(msr) ) + return 1; + return 0; +} #endif /* _MCE_H */ diff -r 7ee8bb40200a -r 6233eb0f29ba xen/arch/x86/cpu/mcheck/mce_intel.c --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Thu Apr 15 19:11:16 2010 +0100 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Mon Apr 19 08:54:53 2010 +0100 @@ -11,6 +11,7 @@ #include <asm/system.h> #include <asm/msr.h> #include <asm/p2m.h> +#include <asm/mce.h> #include "mce.h" #include "x86_mca.h" @@ -199,126 +200,6 @@ intel_get_extended_msrs(struct mc_info * return MCA_EXTINFO_GLOBAL; } -/* This node list records errors impacting a domain. when one - * MCE# happens, one error bank impacts a domain. This error node - * will be inserted to the tail of the per_dom data for vMCE# MSR - * virtualization. When one vMCE# injection is finished processing - * processed by guest, the corresponding node will be deleted. - * This node list is for GUEST vMCE# MSRS virtualization. - */ -static struct bank_entry* alloc_bank_entry(void) { - struct bank_entry *entry; - - entry = xmalloc(struct bank_entry); - if (!entry) { - printk(KERN_ERR "MCE: malloc bank_entry failed\n"); - return NULL; - } - memset(entry, 0x0, sizeof(entry)); - INIT_LIST_HEAD(&entry->list); - return entry; -} - -/* Fill error bank info for #vMCE injection and GUEST vMCE# - * MSR virtualization data - * 1) Log down how many nr_injections of the impacted. - * 2) Copy MCE# error bank to impacted DOM node list, - for vMCE# MSRs virtualization -*/ - -static int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d, - uint64_t gstatus) { - struct bank_entry *entry; - - /* This error bank impacts one domain, we need to fill domain related - * data for vMCE MSRs virtualization and vMCE# injection */ - if (mc_bank->mc_domid != (uint16_t)~0) { - /* For HVM guest, Only when first vMCE is consumed by HVM guest successfully, - * will we generete another node and inject another vMCE - */ - if ( (d->is_hvm) && (d->arch.vmca_msrs.nr_injection > 0) ) - { - mce_printk(MCE_QUIET, "MCE: HVM guest has not handled previous" - " vMCE yet!\n"); - return -1; - } - entry = alloc_bank_entry(); - if (entry == NULL) - return -1; - - entry->mci_status = mc_bank->mc_status; - entry->mci_addr = mc_bank->mc_addr; - entry->mci_misc = mc_bank->mc_misc; - entry->bank = mc_bank->mc_bank; - - spin_lock(&d->arch.vmca_msrs.lock); - /* New error Node, insert to the tail of the per_dom data */ - list_add_tail(&entry->list, &d->arch.vmca_msrs.impact_header); - /* Fill MSR global status */ - d->arch.vmca_msrs.mcg_status = gstatus; - /* New node impact the domain, need another vMCE# injection*/ - d->arch.vmca_msrs.nr_injection++; - spin_unlock(&d->arch.vmca_msrs.lock); - - mce_printk(MCE_VERBOSE,"MCE: Found error @[BANK%d " - "status %"PRIx64" addr %"PRIx64" domid %d]\n ", - mc_bank->mc_bank, mc_bank->mc_status, mc_bank->mc_addr, - mc_bank->mc_domid); - } - return 0; -} - -static int inject_mce(struct domain *d) -{ - int cpu = smp_processor_id(); - cpumask_t affinity; - - /* PV guest and HVM guest have different vMCE# injection - * methods*/ - - if ( !test_and_set_bool(d->vcpu[0]->mce_pending) ) - { - if (d->is_hvm) - { - mce_printk(MCE_VERBOSE, "MCE: inject vMCE to HVM DOM %d\n", - d->domain_id); - vcpu_kick(d->vcpu[0]); - } - /* PV guest including DOM0 */ - else - { - mce_printk(MCE_VERBOSE, "MCE: inject vMCE to PV DOM%d\n", - d->domain_id); - if (guest_has_trap_callback - (d, 0, TRAP_machine_check)) - { - d->vcpu[0]->cpu_affinity_tmp = - d->vcpu[0]->cpu_affinity; - cpus_clear(affinity); - cpu_set(cpu, affinity); - mce_printk(MCE_VERBOSE, "MCE: CPU%d set affinity, old %d\n", cpu, - d->vcpu[0]->processor); - vcpu_set_affinity(d->vcpu[0], &affinity); - vcpu_kick(d->vcpu[0]); - } - else - { - mce_printk(MCE_VERBOSE, "MCE: Kill PV guest with No MCE handler\n"); - domain_crash(d); - } - } - } - else { - /* new vMCE comes while first one has not been injected yet, - * in this case, inject fail. [We can't lose this vMCE for - * the mce node's consistency]. - */ - mce_printk(MCE_QUIET, "There's a pending vMCE waiting to be injected " - " to this DOM%d!\n", d->domain_id); - return -1; - } - return 0; -} static void intel_UCR_handler(struct mcinfo_bank *bank, struct mcinfo_global *global, @@ -377,7 +258,7 @@ static void intel_UCR_handler(struct mci return; } /* We will inject vMCE to DOMU*/ - if ( inject_mce(d) < 0 ) + if ( inject_vmce(d) < 0 ) { mce_printk(MCE_QUIET, "inject vMCE to DOM%d" " failed\n", d->domain_id); diff -r 7ee8bb40200a -r 6233eb0f29ba xen/arch/x86/cpu/mcheck/vmce.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/cpu/mcheck/vmce.c Mon Apr 19 08:54:53 2010 +0100 @@ -0,0 +1,451 @@ +/* + * vmce.c - virtual MCE support + */ + +#include <xen/init.h> +#include <xen/types.h> +#include <xen/irq.h> +#include <xen/event.h> +#include <xen/kernel.h> +#include <xen/delay.h> +#include <xen/smp.h> +#include <xen/mm.h> +#include <asm/processor.h> +#include <public/sysctl.h> +#include <asm/system.h> +#include <asm/msr.h> +#include <asm/p2m.h> +#include "mce.h" +#include "x86_mca.h" + +int vmce_init_msr(struct domain *d) +{ + if ( dom_vmce(d) ) + { + dprintk(XENLOG_G_WARNING, "Domain %d has inited vMCE\n", d->domain_id); + return 0; + } + + /* Allocate the vmca_msrs and mci_ctl togother */ + dom_vmce(d) = xmalloc(struct domain_mca_msrs); + if ( !dom_vmce(d) ) + return -ENOMEM; + + dom_vmce(d)->mci_ctl = xmalloc_array(uint64_t, nr_mce_banks); + if ( !dom_vmce(d)->mci_ctl ) + { + xfree(dom_vmce(d)); + return -ENOMEM; + } + memset(d->arch.vmca_msrs->mci_ctl, ~0, + sizeof(d->arch.vmca_msrs->mci_ctl)); + + dom_vmce(d)->mcg_status = 0x0; + dom_vmce(d)->mcg_cap = g_mcg_cap; + dom_vmce(d)->mcg_ctl = ~(uint64_t)0x0; + dom_vmce(d)->nr_injection = 0; + + INIT_LIST_HEAD(&d->arch.vmca_msrs->impact_header); + spin_lock_init(&d->arch.vmca_msrs->lock); + + return 0; +} + +/* + * Caller should make sure msr is bank msr */ +static int bank_mce_rdmsr(struct domain *d, uint32_t msr, uint64_t *val) +{ + int bank, ret = 1; + struct domain_mca_msrs *vmce; + struct bank_entry *entry = NULL; + + if (!d) + return -EINVAL; + vmce = dom_vmce(d); + ASSERT(vmce); + + bank = (msr - MSR_IA32_MC0_CTL) / 4; + if (bank >= nr_mce_banks) + return -1; + + switch (msr & (MSR_IA32_MC0_CTL | 3)) + { + case MSR_IA32_MC0_CTL: + *val = vmce->mci_ctl[bank] & + (h_mci_ctrl ? h_mci_ctrl[bank] : ~0UL); + mce_printk(MCE_VERBOSE, "MCE: rdmsr MC%u_CTL 0x%"PRIx64"\n", + bank, *val); + break; + case MSR_IA32_MC0_STATUS: + /* Only error bank is read. Non-error banks simply return. */ + if ( !list_empty(&vmce->impact_header) ) + { + entry = list_entry(vmce->impact_header.next, + struct bank_entry, list); + if (entry->bank == bank) { + *val = entry->mci_status; + mce_printk(MCE_VERBOSE, + "MCE: rd MC%u_STATUS in vMCE# context " + "value 0x%"PRIx64"\n", bank, *val); + } + else + entry = NULL; + } + break; + case MSR_IA32_MC0_ADDR: + if ( !list_empty(&vmce->impact_header) ) + { + entry = list_entry(vmce->impact_header.next, + struct bank_entry, list); + if ( entry->bank == bank ) + { + *val = entry->mci_addr; + mce_printk(MCE_VERBOSE, + "MCE: rdmsr MC%u_ADDR in vMCE# context " + "0x%"PRIx64"\n", bank, *val); + } + } + break; + case MSR_IA32_MC0_MISC: + if ( !list_empty(&vmce->impact_header) ) + { + entry = list_entry(vmce->impact_header.next, + struct bank_entry, list); + if ( entry->bank == bank ) + { + *val = entry->mci_misc; + mce_printk(MCE_VERBOSE, + "MCE: rd MC%u_MISC in vMCE# context " + "0x%"PRIx64"\n", bank, *val); + } + } + break; + default: + switch ( boot_cpu_data.x86_vendor ) + { + case X86_VENDOR_INTEL: + ret = intel_mce_rdmsr(msr, val); + break; + default: + ret = 0; + break; + } + break; + } + + return ret; +} + +/* + * < 0: Unsupported and will #GP fault to guest + * = 0: Not handled, should be handled by other components + * > 0: Success + */ +int vmce_rdmsr(uint32_t msr, uint64_t *val) +{ + struct domain *d = current->domain; + struct domain_mca_msrs *vmce; + int ret = 1; + + *val = 0; + + vmce = dom_vmce(d); + if ( !vmce ) + { + /* XXX more handle here */ + return 0; + } + + spin_lock(&d->arch.vmca_msrs->lock); + + switch ( msr ) + { + case MSR_IA32_MCG_STATUS: + *val = vmce->mcg_status; + if (*val) + mce_printk(MCE_VERBOSE, + "MCE: rdmsr MCG_STATUS 0x%"PRIx64"\n", *val); + break; + case MSR_IA32_MCG_CAP: + *val = vmce->mcg_cap; + mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CAP 0x%"PRIx64"\n", + *val); + break; + case MSR_IA32_MCG_CTL: + /* Always 0 if no CTL support */ + *val = vmce->mcg_ctl & h_mcg_ctl; + mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CTL 0x%"PRIx64"\n", + *val); + break; + default: + if ( mce_bank_msr(msr) ) + ret = bank_mce_rdmsr(d, msr, val); + else + ret = 0; + break; + } + + spin_unlock(&d->arch.vmca_msrs->lock); + return ret; +} + +int bank_mce_wrmsr(struct domain *d, u32 msr, u64 val) +{ + int bank, ret = 1; + struct domain_mca_msrs *vmce; + struct bank_entry *entry = NULL; + + if (!d) + return -EINVAL; + vmce = dom_vmce(d); + ASSERT(vmce && vmce->mci_ctl); + + bank = (msr - MSR_IA32_MC0_CTL) / 4; + if (bank >= nr_mce_banks) + return -EINVAL; + + switch ( msr & (MSR_IA32_MC0_CTL | 3) ) + { + case MSR_IA32_MC0_CTL: + vmce->mci_ctl[bank] = val; + break; + case MSR_IA32_MC0_STATUS: + /* Give the first entry of the list, it corresponds to current + * vMCE# injection. When vMCE# is finished processing by the + * the guest, this node will be deleted. + * Only error bank is written. Non-error banks simply return. + */ + if ( !list_empty(&d->arch.vmca_msrs->impact_header) ) + { + entry = list_entry(d->arch.vmca_msrs->impact_header.next, + struct bank_entry, list); + if ( entry->bank == bank ) + entry->mci_status = val; + mce_printk(MCE_VERBOSE, + "MCE: wr MC%u_STATUS %"PRIx64" in vMCE#\n", + bank, val); + } + else + mce_printk(MCE_VERBOSE, + "MCE: wr MC%u_STATUS %"PRIx64"\n", bank, val); + break; + case MSR_IA32_MC0_ADDR: + mce_printk(MCE_QUIET, "MCE: MC%u_ADDR is read-only\n", bank); + ret = -1; + break; + case MSR_IA32_MC0_MISC: + mce_printk(MCE_QUIET, "MCE: MC%u_MISC is read-only\n", bank); + ret = -1; + break; + default: + switch ( boot_cpu_data.x86_vendor ) + { + case X86_VENDOR_INTEL: + ret = intel_mce_wrmsr(msr, val); + break; + default: + ret = 0; + break; + } + break; + } + + return ret; +} + +/* + * < 0: Unsupported and will #GP fault to guest + * = 0: Not handled, should be handled by other components + * > 0: Success + */ +int vmce_wrmsr(u32 msr, u64 val) +{ + struct domain *d = current->domain; + struct bank_entry *entry = NULL; + struct domain_mca_msrs *vmce; + int ret = 1; + + if ( !g_mcg_cap ) + return 0; + + vmce = dom_vmce(d); + spin_lock(&vmce->lock); + + switch ( msr ) + { + case MSR_IA32_MCG_CTL: + vmce->mcg_ctl = val; + break; + case MSR_IA32_MCG_STATUS: + vmce->mcg_status = val; + mce_printk(MCE_VERBOSE, "MCE: wrmsr MCG_STATUS %"PRIx64"\n", val); + /* For HVM guest, this is the point for deleting vMCE injection node */ + if ( d->is_hvm && (vmce->nr_injection > 0) ) + { + vmce->nr_injection--; /* Should be 0 */ + if ( !list_empty(&vmce->impact_header) ) + { + entry = list_entry(vmce->impact_header.next, + struct bank_entry, list); + if ( entry->mci_status & MCi_STATUS_VAL ) + mce_printk(MCE_QUIET, "MCE: MCi_STATUS MSR should have " + "been cleared before write MCG_STATUS MSR\n"); + + mce_printk(MCE_QUIET, "MCE: Delete HVM last injection " + "Node, nr_injection %u\n", + vmce->nr_injection); + list_del(&entry->list); + xfree(entry); + } + else + mce_printk(MCE_QUIET, "MCE: Not found HVM guest" + " last injection Node, something Wrong!\n"); + } + break; + case MSR_IA32_MCG_CAP: + mce_printk(MCE_QUIET, "MCE: MCG_CAP is read-only\n"); + ret = -1; + break; + default: + if ( mce_bank_msr(msr) ) + ret = bank_mce_wrmsr(d, msr, val); + else + ret = 0; + break; + } + + spin_unlock(&vmce->lock); + return ret; +} + +int inject_vmce(struct domain *d) +{ + int cpu = smp_processor_id(); + cpumask_t affinity; + + /* PV guest and HVM guest have different vMCE# injection + * methods*/ + if ( !test_and_set_bool(d->vcpu[0]->mce_pending) ) + { + if (d->is_hvm) + { + mce_printk(MCE_VERBOSE, "MCE: inject vMCE to HVM DOM %d\n", + d->domain_id); + vcpu_kick(d->vcpu[0]); + } + /* PV guest including DOM0 */ + else + { + mce_printk(MCE_VERBOSE, "MCE: inject vMCE to PV DOM%d\n", + d->domain_id); + if (guest_has_trap_callback + (d, 0, TRAP_machine_check)) + { + d->vcpu[0]->cpu_affinity_tmp = + d->vcpu[0]->cpu_affinity; + cpus_clear(affinity); + cpu_set(cpu, affinity); + mce_printk(MCE_VERBOSE, "MCE: CPU%d set affinity, old %d\n", cpu, + d->vcpu[0]->processor); + vcpu_set_affinity(d->vcpu[0], &affinity); + vcpu_kick(d->vcpu[0]); + } + else + { + mce_printk(MCE_VERBOSE, "MCE: Kill PV guest with No MCE handler\n"); + domain_crash(d); + } + } + } + else { + /* new vMCE comes while first one has not been injected yet, + * in this case, inject fail. [We can't lose this vMCE for + * the mce node's consistency]. + */ + mce_printk(MCE_QUIET, "There's a pending vMCE waiting to be injected " + " to this DOM%d!\n", d->domain_id); + return -1; + } + return 0; +} + +/* This node list records errors impacting a domain. when one + * MCE# happens, one error bank impacts a domain. This error node + * will be inserted to the tail of the per_dom data for vMCE# MSR + * virtualization. When one vMCE# injection is finished processing + * processed by guest, the corresponding node will be deleted. + * This node list is for GUEST vMCE# MSRS virtualization. + */ +static struct bank_entry* alloc_bank_entry(void) { + struct bank_entry *entry; + + entry = xmalloc(struct bank_entry); + if (!entry) { + printk(KERN_ERR "MCE: malloc bank_entry failed\n"); + return NULL; + } + memset(entry, 0x0, sizeof(entry)); + INIT_LIST_HEAD(&entry->list); + return entry; +} + +/* Fill error bank info for #vMCE injection and GUEST vMCE# + * MSR virtualization data + * 1) Log down how many nr_injections of the impacted. + * 2) Copy MCE# error bank to impacted DOM node list, + for vMCE# MSRs virtualization +*/ + +int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d, + uint64_t gstatus) { + struct bank_entry *entry; + + /* This error bank impacts one domain, we need to fill domain related + * data for vMCE MSRs virtualization and vMCE# injection */ + if (mc_bank->mc_domid != (uint16_t)~0) { + /* For HVM guest, Only when first vMCE is consumed by HVM guest successfully, + * will we generete another node and inject another vMCE + */ + if ( (d->is_hvm) && (d->arch.vmca_msrs->nr_injection > 0) ) + { + mce_printk(MCE_QUIET, "MCE: HVM guest has not handled previous" + " vMCE yet!\n"); + return -1; + } + entry = alloc_bank_entry(); + if (entry == NULL) + return -1; + + entry->mci_status = mc_bank->mc_status; + entry->mci_addr = mc_bank->mc_addr; + entry->mci_misc = mc_bank->mc_misc; + entry->bank = mc_bank->mc_bank; + + spin_lock(&d->arch.vmca_msrs->lock); + /* New error Node, insert to the tail of the per_dom data */ + list_add_tail(&entry->list, &d->arch.vmca_msrs->impact_header); + /* Fill MSR global status */ + d->arch.vmca_msrs->mcg_status = gstatus; + /* New node impact the domain, need another vMCE# injection*/ + d->arch.vmca_msrs->nr_injection++; + spin_unlock(&d->arch.vmca_msrs->lock); + + mce_printk(MCE_VERBOSE,"MCE: Found error @[BANK%d " + "status %"PRIx64" addr %"PRIx64" domid %d]\n ", + mc_bank->mc_bank, mc_bank->mc_status, mc_bank->mc_addr, + mc_bank->mc_domid); + } + return 0; +} + +int vmce_domain_inject(struct mcinfo_bank *bank, struct domain *d, struct mcinfo_global *global) +{ + int ret; + + ret = fill_vmsr_data(bank, d, global->mc_gstatus); + if (ret < 0) + return ret; + + return inject_vmce(d); +} + diff -r 7ee8bb40200a -r 6233eb0f29ba xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Thu Apr 15 19:11:16 2010 +0100 +++ b/xen/arch/x86/domain.c Mon Apr 19 08:54:53 2010 +0100 @@ -49,6 +49,7 @@ #include <asm/msr.h> #include <asm/traps.h> #include <asm/nmi.h> +#include <asm/mce.h> #include <xen/numa.h> #include <xen/iommu.h> #ifdef CONFIG_COMPAT @@ -501,7 +502,7 @@ int arch_domain_create(struct domain *d, goto fail; /* For Guest vMCE MSRs virtualization */ - mce_init_msr(d); + vmce_init_msr(d); } if ( is_hvm_domain(d) ) diff -r 7ee8bb40200a -r 6233eb0f29ba xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Thu Apr 15 19:11:16 2010 +0100 +++ b/xen/arch/x86/hvm/hvm.c Mon Apr 19 08:54:53 2010 +0100 @@ -47,6 +47,7 @@ #include <asm/traps.h> #include <asm/mc146818rtc.h> #include <asm/spinlock.h> +#include <asm/mce.h> #include <asm/hvm/hvm.h> #include <asm/hvm/vpt.h> #include <asm/hvm/support.h> @@ -2061,7 +2062,7 @@ int hvm_msr_read_intercept(struct cpu_us break; default: - ret = mce_rdmsr(ecx, &msr_content); + ret = vmce_rdmsr(ecx, &msr_content); if ( ret < 0 ) goto gp_fault; else if ( ret ) @@ -2160,7 +2161,7 @@ int hvm_msr_write_intercept(struct cpu_u break; default: - ret = mce_wrmsr(ecx, msr_content); + ret = vmce_wrmsr(ecx, msr_content); if ( ret < 0 ) goto gp_fault; else if ( ret ) diff -r 7ee8bb40200a -r 6233eb0f29ba xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Thu Apr 15 19:11:16 2010 +0100 +++ b/xen/arch/x86/traps.c Mon Apr 19 08:54:53 2010 +0100 @@ -65,6 +65,7 @@ #include <asm/traps.h> #include <asm/hvm/vpt.h> #include <asm/hypercall.h> +#include <asm/mce.h> #include <public/arch-x86/cpuid.h> /* @@ -2295,7 +2296,7 @@ static int emulate_privileged_op(struct if ( wrmsr_hypervisor_regs(regs->ecx, val) ) break; - rc = mce_wrmsr(regs->ecx, val); + rc = vmce_wrmsr(regs->ecx, val); if ( rc < 0 ) goto fail; if ( rc ) @@ -2388,7 +2389,7 @@ static int emulate_privileged_op(struct break; } - rc = mce_rdmsr(regs->ecx, &val); + rc = vmce_rdmsr(regs->ecx, &val); if ( rc < 0 ) goto fail; if ( rc ) @@ -2947,19 +2948,19 @@ void async_exception_cleanup(struct vcpu { struct domain *d = curr->domain; - if ( !d->arch.vmca_msrs.nr_injection ) + if ( !d->arch.vmca_msrs->nr_injection ) { printk(XENLOG_WARNING "MCE: ret from vMCE#, " "no injection node\n"); goto end; } - d->arch.vmca_msrs.nr_injection--; - if ( !list_empty(&d->arch.vmca_msrs.impact_header) ) + d->arch.vmca_msrs->nr_injection--; + if ( !list_empty(&d->arch.vmca_msrs->impact_header) ) { struct bank_entry *entry; - entry = list_entry(d->arch.vmca_msrs.impact_header.next, + entry = list_entry(d->arch.vmca_msrs->impact_header.next, struct bank_entry, list); gdprintk(XENLOG_DEBUG, "MCE: delete last injection node\n"); list_del(&entry->list); @@ -2968,7 +2969,7 @@ void async_exception_cleanup(struct vcpu printk(XENLOG_ERR "MCE: didn't found last injection node\n"); /* further injection */ - if ( d->arch.vmca_msrs.nr_injection > 0 && + if ( d->arch.vmca_msrs->nr_injection > 0 && guest_has_trap_callback(d, 0, TRAP_machine_check) && !test_and_set_bool(curr->mce_pending) ) { diff -r 7ee8bb40200a -r 6233eb0f29ba xen/common/domain.c --- a/xen/common/domain.c Thu Apr 15 19:11:16 2010 +0100 +++ b/xen/common/domain.c Mon Apr 19 08:54:53 2010 +0100 @@ -616,6 +616,8 @@ static void complete_domain_destroy(stru xfree(d->pirq_mask); xfree(d->pirq_to_evtchn); + xfree(dom_vmce(d)->mci_ctl); + xfree(dom_vmce(d)); xsm_free_security_domain(d); free_domain_struct(d); diff -r 7ee8bb40200a -r 6233eb0f29ba xen/include/asm-x86/domain.h --- a/xen/include/asm-x86/domain.h Thu Apr 15 19:11:16 2010 +0100 +++ b/xen/include/asm-x86/domain.h Mon Apr 19 08:54:53 2010 +0100 @@ -6,6 +6,7 @@ #include <asm/hvm/vcpu.h> #include <asm/hvm/domain.h> #include <asm/e820.h> +#include <asm/mce.h> #include <public/vcpu.h> #define has_32bit_shinfo(d) ((d)->arch.has_32bit_shinfo) @@ -214,32 +215,6 @@ typedef xen_domctl_cpuid_t cpuid_input_t typedef xen_domctl_cpuid_t cpuid_input_t; struct p2m_domain; - -/* Define for GUEST MCA handling */ -#define MAX_NR_BANKS 30 - -/* This entry is for recording bank nodes for the impacted domain, - * put into impact_header list. */ -struct bank_entry { - struct list_head list; - uint16_t bank; - uint64_t mci_status; - uint64_t mci_addr; - uint64_t mci_misc; -}; - -struct domain_mca_msrs -{ - /* Guest should not change below values after DOM boot up */ - uint64_t mcg_cap; - uint64_t mcg_ctl; - uint64_t mcg_status; - uint64_t mci_ctl[MAX_NR_BANKS]; - uint16_t nr_injection; - struct list_head impact_header; - spinlock_t lock; -}; - struct time_scale { int shift; u32 mul_frac; @@ -311,7 +286,7 @@ struct arch_domain cpuid_input_t cpuids[MAX_CPUID_INPUT]; /* For Guest vMCA handling */ - struct domain_mca_msrs vmca_msrs; + struct domain_mca_msrs *vmca_msrs; /* TSC management (emulation, pv, scaling, stats) */ int tsc_mode; /* see include/asm-x86/time.h */ diff -r 7ee8bb40200a -r 6233eb0f29ba xen/include/asm-x86/mce.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/include/asm-x86/mce.h Mon Apr 19 08:54:53 2010 +0100 @@ -0,0 +1,36 @@ +#include <xen/types.h> +#include <public/arch-x86/xen-mca.h> +#ifndef _XEN_X86_MCE_H +#define _XEN_X86_MCE_H +/* Define for GUEST MCA handling */ +#define MAX_NR_BANKS 30 + +/* This entry is for recording bank nodes for the impacted domain, + * put into impact_header list. */ +struct bank_entry { + struct list_head list; + uint16_t bank; + uint64_t mci_status; + uint64_t mci_addr; + uint64_t mci_misc; +}; + +struct domain_mca_msrs +{ + /* Guest should not change below values after DOM boot up */ + uint64_t mcg_cap; + uint64_t mcg_ctl; + uint64_t mcg_status; + uint64_t *mci_ctl; + uint16_t nr_injection; + struct list_head impact_header; + spinlock_t lock; +}; + +#define dom_vmce(x) ((x)->arch.vmca_msrs) + +/* Guest vMCE MSRs virtualization */ +extern int vmce_init_msr(struct domain *d); +extern int vmce_wrmsr(uint32_t msr, uint64_t val); +extern int vmce_rdmsr(uint32_t msr, uint64_t *val); +#endif diff -r 7ee8bb40200a -r 6233eb0f29ba xen/include/asm-x86/traps.h --- a/xen/include/asm-x86/traps.h Thu Apr 15 19:11:16 2010 +0100 +++ b/xen/include/asm-x86/traps.h Mon Apr 19 08:54:53 2010 +0100 @@ -49,9 +49,4 @@ extern int send_guest_trap(struct domain extern int send_guest_trap(struct domain *d, uint16_t vcpuid, unsigned int trap_nr); -/* Guest vMCE MSRs virtualization */ -extern void mce_init_msr(struct domain *d); -extern int mce_wrmsr(uint32_t msr, uint64_t val); -extern int mce_rdmsr(uint32_t msr, uint64_t *val); - #endif /* ASM_TRAP_H */ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |