[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] RE: [Xen-devel] [PATCH] Clean-up on MCA MSR virtualization and vMCE injection
Sorry forgot the attachment. --jyh >-----Original Message----- >From: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx >[mailto:xen-devel-bounces@xxxxxxxxxxxxxxxxxxx] On Behalf Of Jiang, Yunhong >Sent: Friday, April 16, 2010 6:56 PM >To: Keir Fraser; Frank.Vanderlinden@xxxxxxx; Christoph Egger >Cc: xen-devel@xxxxxxxxxxxxxxxxxxx >Subject: [Xen-devel] [PATCH] Clean-up on MCA MSR virtualization and vMCE >injection > >Clean-up on MCA MSR virtualization and vMCE injection > >Remove all virtual MCE related work into a seperated file. >It also try to do some clean-up on the vMCE, including: >a) renmae some function name like mce_init_msr/mce_rdmsr to be > vmce_init_msr/vmce_rdmsr to make it more straightforward, >b) make the vmca_msrs be a pointer in arch_domain, > to decrease arch_domain's size >c) extract per-bank MCA MSR access to be seperated function > (bank_mce_wrmsr/bank_mce_rdmsr) to make it be a bit cleaner. >d) A new file xen/include/asm-x86/mce.h is added for vmce related header. > >Signed-off-by: Jiang, Yunhong <yunhong.jiang@xxxxxxxxx> > >diff -r 7ee8bb40200a -r b4fd50c22d9c xen/arch/x86/cpu/mcheck/Makefile >--- a/xen/arch/x86/cpu/mcheck/Makefile Thu Apr 15 19:11:16 2010 +0100 >+++ b/xen/arch/x86/cpu/mcheck/Makefile Fri Apr 16 18:55:03 2010 +0800 >@@ -7,3 +7,4 @@ obj-y += mce_intel.o > obj-y += mce_intel.o > obj-y += mce_amd_quirks.o > obj-y += non-fatal.o >+obj-y += vmce.o >diff -r 7ee8bb40200a -r b4fd50c22d9c xen/arch/x86/cpu/mcheck/mce.c >--- a/xen/arch/x86/cpu/mcheck/mce.c Thu Apr 15 19:11:16 2010 +0100 >+++ b/xen/arch/x86/cpu/mcheck/mce.c Fri Apr 16 18:55:03 2010 +0800 >@@ -31,11 +31,11 @@ unsigned int nr_mce_banks; > unsigned int nr_mce_banks; > > int mce_broadcast = 0; >-static uint64_t g_mcg_cap; >+uint64_t g_mcg_cap; > > /* Real value in physical CTL MSR */ >-static uint64_t h_mcg_ctl = 0UL; >-static uint64_t *h_mci_ctrl; >+uint64_t h_mcg_ctl = 0UL; >+uint64_t *h_mci_ctrl; > int firstbank; > > static void intpose_init(void); >@@ -752,234 +752,6 @@ u64 mce_cap_init(void) > return value; > } > >-/* Guest vMCE# MSRs virtualization ops (rdmsr/wrmsr) */ >-void mce_init_msr(struct domain *d) >-{ >- d->arch.vmca_msrs.mcg_status = 0x0; >- d->arch.vmca_msrs.mcg_cap = g_mcg_cap; >- d->arch.vmca_msrs.mcg_ctl = ~(uint64_t)0x0; >- d->arch.vmca_msrs.nr_injection = 0; >- memset(d->arch.vmca_msrs.mci_ctl, ~0, >- sizeof(d->arch.vmca_msrs.mci_ctl)); >- INIT_LIST_HEAD(&d->arch.vmca_msrs.impact_header); >- spin_lock_init(&d->arch.vmca_msrs.lock); >-} >- >-int mce_rdmsr(uint32_t msr, uint64_t *val) >-{ >- struct domain *d = current->domain; >- int ret = 1; >- unsigned int bank; >- struct bank_entry *entry = NULL; >- >- *val = 0; >- spin_lock(&d->arch.vmca_msrs.lock); >- >- switch ( msr ) >- { >- case MSR_IA32_MCG_STATUS: >- *val = d->arch.vmca_msrs.mcg_status; >- if (*val) >- mce_printk(MCE_VERBOSE, >- "MCE: rdmsr MCG_STATUS 0x%"PRIx64"\n", *val); >- break; >- case MSR_IA32_MCG_CAP: >- *val = d->arch.vmca_msrs.mcg_cap; >- mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CAP 0x%"PRIx64"\n", >- *val); >- break; >- case MSR_IA32_MCG_CTL: >- /* Always 0 if no CTL support */ >- *val = d->arch.vmca_msrs.mcg_ctl & h_mcg_ctl; >- mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CTL 0x%"PRIx64"\n", >- *val); >- break; >- case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * MAX_NR_BANKS - 1: >- bank = (msr - MSR_IA32_MC0_CTL) / 4; >- if ( bank >= (d->arch.vmca_msrs.mcg_cap & MCG_CAP_COUNT) ) >- { >- mce_printk(MCE_QUIET, "MCE: MSR %x is not MCA MSR\n", msr); >- ret = 0; >- break; >- } >- switch (msr & (MSR_IA32_MC0_CTL | 3)) >- { >- case MSR_IA32_MC0_CTL: >- *val = d->arch.vmca_msrs.mci_ctl[bank] & >- (h_mci_ctrl ? h_mci_ctrl[bank] : ~0UL); >- mce_printk(MCE_VERBOSE, "MCE: rdmsr MC%u_CTL >0x%"PRIx64"\n", >- bank, *val); >- break; >- case MSR_IA32_MC0_STATUS: >- /* Only error bank is read. Non-error banks simply return. */ >- if ( !list_empty(&d->arch.vmca_msrs.impact_header) ) >- { >- entry = list_entry(d->arch.vmca_msrs.impact_header.next, >- struct bank_entry, list); >- if (entry->bank == bank) { >- *val = entry->mci_status; >- mce_printk(MCE_VERBOSE, >- "MCE: rd MC%u_STATUS in vMCE# context " >- "value 0x%"PRIx64"\n", bank, *val); >- } >- else >- entry = NULL; >- } >- break; >- case MSR_IA32_MC0_ADDR: >- if ( !list_empty(&d->arch.vmca_msrs.impact_header) ) >- { >- entry = list_entry(d->arch.vmca_msrs.impact_header.next, >- struct bank_entry, list); >- if ( entry->bank == bank ) >- { >- *val = entry->mci_addr; >- mce_printk(MCE_VERBOSE, >- "MCE: rdmsr MC%u_ADDR in vMCE# context " >- "0x%"PRIx64"\n", bank, *val); >- } >- } >- break; >- case MSR_IA32_MC0_MISC: >- if ( !list_empty(&d->arch.vmca_msrs.impact_header) ) >- { >- entry = list_entry(d->arch.vmca_msrs.impact_header.next, >- struct bank_entry, list); >- if ( entry->bank == bank ) >- { >- *val = entry->mci_misc; >- mce_printk(MCE_VERBOSE, >- "MCE: rd MC%u_MISC in vMCE# context " >- "0x%"PRIx64"\n", bank, *val); >- } >- } >- break; >- } >- break; >- default: >- switch ( boot_cpu_data.x86_vendor ) >- { >- case X86_VENDOR_INTEL: >- ret = intel_mce_rdmsr(msr, val); >- break; >- default: >- ret = 0; >- break; >- } >- break; >- } >- >- spin_unlock(&d->arch.vmca_msrs.lock); >- return ret; >-} >- >-int mce_wrmsr(u32 msr, u64 val) >-{ >- struct domain *d = current->domain; >- struct bank_entry *entry = NULL; >- unsigned int bank; >- int ret = 1; >- >- if ( !g_mcg_cap ) >- return 0; >- >- spin_lock(&d->arch.vmca_msrs.lock); >- >- switch ( msr ) >- { >- case MSR_IA32_MCG_CTL: >- d->arch.vmca_msrs.mcg_ctl = val; >- break; >- case MSR_IA32_MCG_STATUS: >- d->arch.vmca_msrs.mcg_status = val; >- mce_printk(MCE_VERBOSE, "MCE: wrmsr MCG_STATUS %"PRIx64"\n", >val); >- /* For HVM guest, this is the point for deleting vMCE injection node >*/ >- if ( d->is_hvm && (d->arch.vmca_msrs.nr_injection > 0) ) >- { >- d->arch.vmca_msrs.nr_injection--; /* Should be 0 */ >- if ( !list_empty(&d->arch.vmca_msrs.impact_header) ) >- { >- entry = list_entry(d->arch.vmca_msrs.impact_header.next, >- struct bank_entry, list); >- if ( entry->mci_status & MCi_STATUS_VAL ) >- mce_printk(MCE_QUIET, "MCE: MCi_STATUS MSR should >have " >- "been cleared before write MCG_STATUS >MSR\n"); >- >- mce_printk(MCE_QUIET, "MCE: Delete HVM last injection " >- "Node, nr_injection %u\n", >- d->arch.vmca_msrs.nr_injection); >- list_del(&entry->list); >- xfree(entry); >- } >- else >- mce_printk(MCE_QUIET, "MCE: Not found HVM guest" >- " last injection Node, something Wrong!\n"); >- } >- break; >- case MSR_IA32_MCG_CAP: >- mce_printk(MCE_QUIET, "MCE: MCG_CAP is read-only\n"); >- ret = -1; >- break; >- case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * MAX_NR_BANKS - 1: >- bank = (msr - MSR_IA32_MC0_CTL) / 4; >- if ( bank >= (d->arch.vmca_msrs.mcg_cap & MCG_CAP_COUNT) ) >- { >- mce_printk(MCE_QUIET, "MCE: MSR %x is not MCA MSR\n", msr); >- ret = 0; >- break; >- } >- switch ( msr & (MSR_IA32_MC0_CTL | 3) ) >- { >- case MSR_IA32_MC0_CTL: >- d->arch.vmca_msrs.mci_ctl[bank] = val; >- break; >- case MSR_IA32_MC0_STATUS: >- /* Give the first entry of the list, it corresponds to current >- * vMCE# injection. When vMCE# is finished processing by the >- * the guest, this node will be deleted. >- * Only error bank is written. Non-error banks simply return. >- */ >- if ( !list_empty(&d->arch.vmca_msrs.impact_header) ) >- { >- entry = list_entry(d->arch.vmca_msrs.impact_header.next, >- struct bank_entry, list); >- if ( entry->bank == bank ) >- entry->mci_status = val; >- mce_printk(MCE_VERBOSE, >- "MCE: wr MC%u_STATUS %"PRIx64" in vMCE#\n", >- bank, val); >- } >- else >- mce_printk(MCE_VERBOSE, >- "MCE: wr MC%u_STATUS %"PRIx64"\n", bank, val); >- break; >- case MSR_IA32_MC0_ADDR: >- mce_printk(MCE_QUIET, "MCE: MC%u_ADDR is read-only\n", bank); >- ret = -1; >- break; >- case MSR_IA32_MC0_MISC: >- mce_printk(MCE_QUIET, "MCE: MC%u_MISC is read-only\n", bank); >- ret = -1; >- break; >- } >- break; >- default: >- switch ( boot_cpu_data.x86_vendor ) >- { >- case X86_VENDOR_INTEL: >- ret = intel_mce_wrmsr(msr, val); >- break; >- default: >- ret = 0; >- break; >- } >- break; >- } >- >- spin_unlock(&d->arch.vmca_msrs.lock); >- return ret; >-} >- > static void mcinfo_clear(struct mc_info *mi) > { > memset(mi, 0, sizeof(struct mc_info)); >@@ -1238,11 +1010,11 @@ int mca_ctl_conflict(struct mcinfo_bank > return 1; > > /* Will MCE happen in host if If host mcg_ctl is 0? */ >- if ( ~d->arch.vmca_msrs.mcg_ctl & h_mcg_ctl ) >+ if ( ~d->arch.vmca_msrs->mcg_ctl & h_mcg_ctl ) > return 1; > > bank_nr = bank->mc_bank; >- if (~d->arch.vmca_msrs.mci_ctl[bank_nr] & h_mci_ctrl[bank_nr] ) >+ if (~d->arch.vmca_msrs->mci_ctl[bank_nr] & h_mci_ctrl[bank_nr] ) > return 1; > return 0; > } >diff -r 7ee8bb40200a -r b4fd50c22d9c xen/arch/x86/cpu/mcheck/mce.h >--- a/xen/arch/x86/cpu/mcheck/mce.h Thu Apr 15 19:11:16 2010 +0100 >+++ b/xen/arch/x86/cpu/mcheck/mce.h Fri Apr 16 18:55:03 2010 +0800 >@@ -164,4 +164,32 @@ int x86_mcinfo_add(struct mc_info *mi, v > int x86_mcinfo_add(struct mc_info *mi, void *mcinfo); > void x86_mcinfo_dump(struct mc_info *mi); > >+int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d, >+ uint64_t gstatus); >+int inject_vmce(struct domain *d); >+int vmce_domain_inject(struct mcinfo_bank *bank, struct domain *d, struct >mcinfo_global *global); >+ >+extern uint64_t g_mcg_cap; >+/* Real value in physical CTL MSR */ >+extern uint64_t h_mcg_ctl; >+extern uint64_t *h_mci_ctrl; >+ >+extern unsigned int nr_mce_banks; >+ >+static inline int mce_vendor_bank_msr(uint32_t msr) >+{ >+ if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && >+ (msr > MSR_IA32_MC0_CTL2 && msr < (MSR_IA32_MC0_CTL2 + >nr_mce_banks)) ) >+ return 1; >+ return 0; >+} >+ >+static inline int mce_bank_msr(uint32_t msr) >+{ >+ if ( (msr > MSR_IA32_MC0_CTL2 && >+ msr < (MSR_IA32_MC0_CTL + 4 * nr_mce_banks - 1)) || >+ mce_vendor_bank_msr(msr) ) >+ return 1; >+ return 0; >+} > #endif /* _MCE_H */ >diff -r 7ee8bb40200a -r b4fd50c22d9c xen/arch/x86/cpu/mcheck/mce_intel.c >--- a/xen/arch/x86/cpu/mcheck/mce_intel.c Thu Apr 15 19:11:16 2010 +0100 >+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Fri Apr 16 18:55:03 2010 >+0800 >@@ -11,6 +11,7 @@ > #include <asm/system.h> > #include <asm/msr.h> > #include <asm/p2m.h> >+#include <asm/mce.h> > #include "mce.h" > #include "x86_mca.h" > >@@ -199,126 +200,6 @@ intel_get_extended_msrs(struct mc_info * > return MCA_EXTINFO_GLOBAL; > } > >-/* This node list records errors impacting a domain. when one >- * MCE# happens, one error bank impacts a domain. This error node >- * will be inserted to the tail of the per_dom data for vMCE# MSR >- * virtualization. When one vMCE# injection is finished processing >- * processed by guest, the corresponding node will be deleted. >- * This node list is for GUEST vMCE# MSRS virtualization. >- */ >-static struct bank_entry* alloc_bank_entry(void) { >- struct bank_entry *entry; >- >- entry = xmalloc(struct bank_entry); >- if (!entry) { >- printk(KERN_ERR "MCE: malloc bank_entry failed\n"); >- return NULL; >- } >- memset(entry, 0x0, sizeof(entry)); >- INIT_LIST_HEAD(&entry->list); >- return entry; >-} >- >-/* Fill error bank info for #vMCE injection and GUEST vMCE# >- * MSR virtualization data >- * 1) Log down how many nr_injections of the impacted. >- * 2) Copy MCE# error bank to impacted DOM node list, >- for vMCE# MSRs virtualization >-*/ >- >-static int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d, >- uint64_t gstatus) { >- struct bank_entry *entry; >- >- /* This error bank impacts one domain, we need to fill domain related >- * data for vMCE MSRs virtualization and vMCE# injection */ >- if (mc_bank->mc_domid != (uint16_t)~0) { >- /* For HVM guest, Only when first vMCE is consumed by HVM guest >successfully, >- * will we generete another node and inject another vMCE >- */ >- if ( (d->is_hvm) && (d->arch.vmca_msrs.nr_injection > 0) ) >- { >- mce_printk(MCE_QUIET, "MCE: HVM guest has not handled >previous" >- " vMCE yet!\n"); >- return -1; >- } >- entry = alloc_bank_entry(); >- if (entry == NULL) >- return -1; >- >- entry->mci_status = mc_bank->mc_status; >- entry->mci_addr = mc_bank->mc_addr; >- entry->mci_misc = mc_bank->mc_misc; >- entry->bank = mc_bank->mc_bank; >- >- spin_lock(&d->arch.vmca_msrs.lock); >- /* New error Node, insert to the tail of the per_dom data */ >- list_add_tail(&entry->list, &d->arch.vmca_msrs.impact_header); >- /* Fill MSR global status */ >- d->arch.vmca_msrs.mcg_status = gstatus; >- /* New node impact the domain, need another vMCE# injection*/ >- d->arch.vmca_msrs.nr_injection++; >- spin_unlock(&d->arch.vmca_msrs.lock); >- >- mce_printk(MCE_VERBOSE,"MCE: Found error @[BANK%d " >- "status %"PRIx64" addr %"PRIx64" domid %d]\n ", >- mc_bank->mc_bank, mc_bank->mc_status, >mc_bank->mc_addr, >- mc_bank->mc_domid); >- } >- return 0; >-} >- >-static int inject_mce(struct domain *d) >-{ >- int cpu = smp_processor_id(); >- cpumask_t affinity; >- >- /* PV guest and HVM guest have different vMCE# injection >- * methods*/ >- >- if ( !test_and_set_bool(d->vcpu[0]->mce_pending) ) >- { >- if (d->is_hvm) >- { >- mce_printk(MCE_VERBOSE, "MCE: inject vMCE to HVM DOM %d\n", >- d->domain_id); >- vcpu_kick(d->vcpu[0]); >- } >- /* PV guest including DOM0 */ >- else >- { >- mce_printk(MCE_VERBOSE, "MCE: inject vMCE to PV DOM%d\n", >- d->domain_id); >- if (guest_has_trap_callback >- (d, 0, TRAP_machine_check)) >- { >- d->vcpu[0]->cpu_affinity_tmp = >- d->vcpu[0]->cpu_affinity; >- cpus_clear(affinity); >- cpu_set(cpu, affinity); >- mce_printk(MCE_VERBOSE, "MCE: CPU%d set affinity, >old %d\n", cpu, >- d->vcpu[0]->processor); >- vcpu_set_affinity(d->vcpu[0], &affinity); >- vcpu_kick(d->vcpu[0]); >- } >- else >- { >- mce_printk(MCE_VERBOSE, "MCE: Kill PV guest with No MCE >handler\n"); >- domain_crash(d); >- } >- } >- } >- else { >- /* new vMCE comes while first one has not been injected yet, >- * in this case, inject fail. [We can't lose this vMCE for >- * the mce node's consistency]. >- */ >- mce_printk(MCE_QUIET, "There's a pending vMCE waiting to be injected >" >- " to this DOM%d!\n", d->domain_id); >- return -1; >- } >- return 0; >-} > > static void intel_UCR_handler(struct mcinfo_bank *bank, > struct mcinfo_global *global, >@@ -377,7 +258,7 @@ static void intel_UCR_handler(struct mci > return; > } > /* We will inject vMCE to DOMU*/ >- if ( inject_mce(d) < 0 ) >+ if ( inject_vmce(d) < 0 ) > { > mce_printk(MCE_QUIET, "inject vMCE to >DOM%d" > " failed\n", d->domain_id); >diff -r 7ee8bb40200a -r b4fd50c22d9c xen/arch/x86/cpu/mcheck/vmce.c >--- /dev/null Thu Jan 01 00:00:00 1970 +0000 >+++ b/xen/arch/x86/cpu/mcheck/vmce.c Fri Apr 16 18:55:03 2010 +0800 >@@ -0,0 +1,451 @@ >+/* >+ * vmce.c - virtual MCE support >+ */ >+ >+#include <xen/init.h> >+#include <xen/types.h> >+#include <xen/irq.h> >+#include <xen/event.h> >+#include <xen/kernel.h> >+#include <xen/delay.h> >+#include <xen/smp.h> >+#include <xen/mm.h> >+#include <asm/processor.h> >+#include <public/sysctl.h> >+#include <asm/system.h> >+#include <asm/msr.h> >+#include <asm/p2m.h> >+#include "mce.h" >+#include "x86_mca.h" >+ >+int vmce_init_msr(struct domain *d) >+{ >+ if ( dom_vmce(d) ) >+ { >+ dprintk(XENLOG_G_WARNING, "Domain %d has inited vMCE\n", >d->domain_id); >+ return 0; >+ } >+ >+ /* Allocate the vmca_msrs and mci_ctl togother */ >+ dom_vmce(d) = xmalloc(struct domain_mca_msrs); >+ if ( !dom_vmce(d) ) >+ return -ENOMEM; >+ >+ dom_vmce(d)->mci_ctl = xmalloc_array(uint64_t, nr_mce_banks); >+ if ( !dom_vmce(d)->mci_ctl ) >+ { >+ xfree(dom_vmce(d)); >+ return -ENOMEM; >+ } >+ memset(d->arch.vmca_msrs->mci_ctl, ~0, >+ sizeof(d->arch.vmca_msrs->mci_ctl)); >+ >+ dom_vmce(d)->mcg_status = 0x0; >+ dom_vmce(d)->mcg_cap = g_mcg_cap; >+ dom_vmce(d)->mcg_ctl = ~(uint64_t)0x0; >+ dom_vmce(d)->nr_injection = 0; >+ >+ INIT_LIST_HEAD(&d->arch.vmca_msrs->impact_header); >+ spin_lock_init(&d->arch.vmca_msrs->lock); >+ >+ return 0; >+} >+ >+/* >+ * Caller should make sure msr is bank msr */ >+static int bank_mce_rdmsr(struct domain *d, uint32_t msr, uint64_t *val) >+{ >+ int bank, ret = 1; >+ struct domain_mca_msrs *vmce; >+ struct bank_entry *entry = NULL; >+ >+ if (!d) >+ return -EINVAL; >+ vmce = dom_vmce(d); >+ ASSERT(vmce); >+ >+ bank = (msr - MSR_IA32_MC0_CTL) / 4; >+ if (bank >= nr_mce_banks) >+ return -1; >+ >+ switch (msr & (MSR_IA32_MC0_CTL | 3)) >+ { >+ case MSR_IA32_MC0_CTL: >+ *val = vmce->mci_ctl[bank] & >+ (h_mci_ctrl ? h_mci_ctrl[bank] : ~0UL); >+ mce_printk(MCE_VERBOSE, "MCE: rdmsr MC%u_CTL 0x%"PRIx64"\n", >+ bank, *val); >+ break; >+ case MSR_IA32_MC0_STATUS: >+ /* Only error bank is read. Non-error banks simply return. */ >+ if ( !list_empty(&vmce->impact_header) ) >+ { >+ entry = list_entry(vmce->impact_header.next, >+ struct bank_entry, list); >+ if (entry->bank == bank) { >+ *val = entry->mci_status; >+ mce_printk(MCE_VERBOSE, >+ "MCE: rd MC%u_STATUS in vMCE# context " >+ "value 0x%"PRIx64"\n", bank, *val); >+ } >+ else >+ entry = NULL; >+ } >+ break; >+ case MSR_IA32_MC0_ADDR: >+ if ( !list_empty(&vmce->impact_header) ) >+ { >+ entry = list_entry(vmce->impact_header.next, >+ struct bank_entry, list); >+ if ( entry->bank == bank ) >+ { >+ *val = entry->mci_addr; >+ mce_printk(MCE_VERBOSE, >+ "MCE: rdmsr MC%u_ADDR in vMCE# context " >+ "0x%"PRIx64"\n", bank, *val); >+ } >+ } >+ break; >+ case MSR_IA32_MC0_MISC: >+ if ( !list_empty(&vmce->impact_header) ) >+ { >+ entry = list_entry(vmce->impact_header.next, >+ struct bank_entry, list); >+ if ( entry->bank == bank ) >+ { >+ *val = entry->mci_misc; >+ mce_printk(MCE_VERBOSE, >+ "MCE: rd MC%u_MISC in vMCE# context " >+ "0x%"PRIx64"\n", bank, *val); >+ } >+ } >+ break; >+ default: >+ switch ( boot_cpu_data.x86_vendor ) >+ { >+ case X86_VENDOR_INTEL: >+ ret = intel_mce_rdmsr(msr, val); >+ break; >+ default: >+ ret = 0; >+ break; >+ } >+ break; >+ } >+ >+ return ret; >+} >+ >+/* >+ * < 0: Unsupported and will #GP fault to guest >+ * = 0: Not handled, should be handled by other components >+ * > 0: Success >+ */ >+int vmce_rdmsr(uint32_t msr, uint64_t *val) >+{ >+ struct domain *d = current->domain; >+ struct domain_mca_msrs *vmce; >+ int ret = 1; >+ >+ *val = 0; >+ >+ vmce = dom_vmce(d); >+ if ( !vmce ) >+ { >+ /* XXX more handle here */ >+ return 0; >+ } >+ >+ spin_lock(&d->arch.vmca_msrs->lock); >+ >+ switch ( msr ) >+ { >+ case MSR_IA32_MCG_STATUS: >+ *val = vmce->mcg_status; >+ if (*val) >+ mce_printk(MCE_VERBOSE, >+ "MCE: rdmsr MCG_STATUS 0x%"PRIx64"\n", *val); >+ break; >+ case MSR_IA32_MCG_CAP: >+ *val = vmce->mcg_cap; >+ mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CAP 0x%"PRIx64"\n", >+ *val); >+ break; >+ case MSR_IA32_MCG_CTL: >+ /* Always 0 if no CTL support */ >+ *val = vmce->mcg_ctl & h_mcg_ctl; >+ mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CTL 0x%"PRIx64"\n", >+ *val); >+ break; >+ default: >+ if ( mce_bank_msr(msr) ) >+ ret = bank_mce_rdmsr(d, msr, val); >+ else >+ ret = 0; >+ break; >+ } >+ >+ spin_unlock(&d->arch.vmca_msrs->lock); >+ return ret; >+} >+ >+int bank_mce_wrmsr(struct domain *d, u32 msr, u64 val) >+{ >+ int bank, ret = 1; >+ struct domain_mca_msrs *vmce; >+ struct bank_entry *entry = NULL; >+ >+ if (!d) >+ return -EINVAL; >+ vmce = dom_vmce(d); >+ ASSERT(vmce && vmce->mci_ctl); >+ >+ bank = (msr - MSR_IA32_MC0_CTL) / 4; >+ if (bank >= nr_mce_banks) >+ return -EINVAL; >+ >+ switch ( msr & (MSR_IA32_MC0_CTL | 3) ) >+ { >+ case MSR_IA32_MC0_CTL: >+ vmce->mci_ctl[bank] = val; >+ break; >+ case MSR_IA32_MC0_STATUS: >+ /* Give the first entry of the list, it corresponds to current >+ * vMCE# injection. When vMCE# is finished processing by the >+ * the guest, this node will be deleted. >+ * Only error bank is written. Non-error banks simply return. >+ */ >+ if ( !list_empty(&d->arch.vmca_msrs->impact_header) ) >+ { >+ entry = list_entry(d->arch.vmca_msrs->impact_header.next, >+ struct bank_entry, list); >+ if ( entry->bank == bank ) >+ entry->mci_status = val; >+ mce_printk(MCE_VERBOSE, >+ "MCE: wr MC%u_STATUS %"PRIx64" in vMCE#\n", >+ bank, val); >+ } >+ else >+ mce_printk(MCE_VERBOSE, >+ "MCE: wr MC%u_STATUS %"PRIx64"\n", bank, val); >+ break; >+ case MSR_IA32_MC0_ADDR: >+ mce_printk(MCE_QUIET, "MCE: MC%u_ADDR is read-only\n", >bank); >+ ret = -1; >+ break; >+ case MSR_IA32_MC0_MISC: >+ mce_printk(MCE_QUIET, "MCE: MC%u_MISC is read-only\n", bank); >+ ret = -1; >+ break; >+ default: >+ switch ( boot_cpu_data.x86_vendor ) >+ { >+ case X86_VENDOR_INTEL: >+ ret = intel_mce_wrmsr(msr, val); >+ break; >+ default: >+ ret = 0; >+ break; >+ } >+ break; >+ } >+ >+ return ret; >+} >+ >+/* >+ * < 0: Unsupported and will #GP fault to guest >+ * = 0: Not handled, should be handled by other components >+ * > 0: Success >+ */ >+int vmce_wrmsr(u32 msr, u64 val) >+{ >+ struct domain *d = current->domain; >+ struct bank_entry *entry = NULL; >+ struct domain_mca_msrs *vmce; >+ int ret = 1; >+ >+ if ( !g_mcg_cap ) >+ return 0; >+ >+ vmce = dom_vmce(d); >+ spin_lock(&vmce->lock); >+ >+ switch ( msr ) >+ { >+ case MSR_IA32_MCG_CTL: >+ vmce->mcg_ctl = val; >+ break; >+ case MSR_IA32_MCG_STATUS: >+ vmce->mcg_status = val; >+ mce_printk(MCE_VERBOSE, "MCE: wrmsr MCG_STATUS %"PRIx64"\n", >val); >+ /* For HVM guest, this is the point for deleting vMCE injection node >*/ >+ if ( d->is_hvm && (vmce->nr_injection > 0) ) >+ { >+ vmce->nr_injection--; /* Should be 0 */ >+ if ( !list_empty(&vmce->impact_header) ) >+ { >+ entry = list_entry(vmce->impact_header.next, >+ struct bank_entry, list); >+ if ( entry->mci_status & MCi_STATUS_VAL ) >+ mce_printk(MCE_QUIET, "MCE: MCi_STATUS MSR should >have " >+ "been cleared before write MCG_STATUS >MSR\n"); >+ >+ mce_printk(MCE_QUIET, "MCE: Delete HVM last injection " >+ "Node, nr_injection %u\n", >+ vmce->nr_injection); >+ list_del(&entry->list); >+ xfree(entry); >+ } >+ else >+ mce_printk(MCE_QUIET, "MCE: Not found HVM guest" >+ " last injection Node, something Wrong!\n"); >+ } >+ break; >+ case MSR_IA32_MCG_CAP: >+ mce_printk(MCE_QUIET, "MCE: MCG_CAP is read-only\n"); >+ ret = -1; >+ break; >+ default: >+ if ( mce_bank_msr(msr) ) >+ ret = bank_mce_wrmsr(d, msr, val); >+ else >+ ret = 0; >+ break; >+ } >+ >+ spin_unlock(&vmce->lock); >+ return ret; >+} >+ >+int inject_vmce(struct domain *d) >+{ >+ int cpu = smp_processor_id(); >+ cpumask_t affinity; >+ >+ /* PV guest and HVM guest have different vMCE# injection >+ * methods*/ >+ if ( !test_and_set_bool(d->vcpu[0]->mce_pending) ) >+ { >+ if (d->is_hvm) >+ { >+ mce_printk(MCE_VERBOSE, "MCE: inject vMCE to HVM >DOM %d\n", >+ d->domain_id); >+ vcpu_kick(d->vcpu[0]); >+ } >+ /* PV guest including DOM0 */ >+ else >+ { >+ mce_printk(MCE_VERBOSE, "MCE: inject vMCE to PV DOM%d\n", >+ d->domain_id); >+ if (guest_has_trap_callback >+ (d, 0, TRAP_machine_check)) >+ { >+ d->vcpu[0]->cpu_affinity_tmp = >+ d->vcpu[0]->cpu_affinity; >+ cpus_clear(affinity); >+ cpu_set(cpu, affinity); >+ mce_printk(MCE_VERBOSE, "MCE: CPU%d set affinity, >old %d\n", cpu, >+ d->vcpu[0]->processor); >+ vcpu_set_affinity(d->vcpu[0], &affinity); >+ vcpu_kick(d->vcpu[0]); >+ } >+ else >+ { >+ mce_printk(MCE_VERBOSE, "MCE: Kill PV guest with No MCE >handler\n"); >+ domain_crash(d); >+ } >+ } >+ } >+ else { >+ /* new vMCE comes while first one has not been injected yet, >+ * in this case, inject fail. [We can't lose this vMCE for >+ * the mce node's consistency]. >+ */ >+ mce_printk(MCE_QUIET, "There's a pending vMCE waiting to be injected >" >+ " to this DOM%d!\n", d->domain_id); >+ return -1; >+ } >+ return 0; >+} >+ >+/* This node list records errors impacting a domain. when one >+ * MCE# happens, one error bank impacts a domain. This error node >+ * will be inserted to the tail of the per_dom data for vMCE# MSR >+ * virtualization. When one vMCE# injection is finished processing >+ * processed by guest, the corresponding node will be deleted. >+ * This node list is for GUEST vMCE# MSRS virtualization. >+ */ >+static struct bank_entry* alloc_bank_entry(void) { >+ struct bank_entry *entry; >+ >+ entry = xmalloc(struct bank_entry); >+ if (!entry) { >+ printk(KERN_ERR "MCE: malloc bank_entry failed\n"); >+ return NULL; >+ } >+ memset(entry, 0x0, sizeof(entry)); >+ INIT_LIST_HEAD(&entry->list); >+ return entry; >+} >+ >+/* Fill error bank info for #vMCE injection and GUEST vMCE# >+ * MSR virtualization data >+ * 1) Log down how many nr_injections of the impacted. >+ * 2) Copy MCE# error bank to impacted DOM node list, >+ for vMCE# MSRs virtualization >+*/ >+ >+int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d, >+ uint64_t gstatus) { >+ struct bank_entry *entry; >+ >+ /* This error bank impacts one domain, we need to fill domain related >+ * data for vMCE MSRs virtualization and vMCE# injection */ >+ if (mc_bank->mc_domid != (uint16_t)~0) { >+ /* For HVM guest, Only when first vMCE is consumed by HVM guest >successfully, >+ * will we generete another node and inject another vMCE >+ */ >+ if ( (d->is_hvm) && (d->arch.vmca_msrs->nr_injection > 0) ) >+ { >+ mce_printk(MCE_QUIET, "MCE: HVM guest has not handled >previous" >+ " vMCE yet!\n"); >+ return -1; >+ } >+ entry = alloc_bank_entry(); >+ if (entry == NULL) >+ return -1; >+ >+ entry->mci_status = mc_bank->mc_status; >+ entry->mci_addr = mc_bank->mc_addr; >+ entry->mci_misc = mc_bank->mc_misc; >+ entry->bank = mc_bank->mc_bank; >+ >+ spin_lock(&d->arch.vmca_msrs->lock); >+ /* New error Node, insert to the tail of the per_dom data */ >+ list_add_tail(&entry->list, &d->arch.vmca_msrs->impact_header); >+ /* Fill MSR global status */ >+ d->arch.vmca_msrs->mcg_status = gstatus; >+ /* New node impact the domain, need another vMCE# injection*/ >+ d->arch.vmca_msrs->nr_injection++; >+ spin_unlock(&d->arch.vmca_msrs->lock); >+ >+ mce_printk(MCE_VERBOSE,"MCE: Found error @[BANK%d " >+ "status %"PRIx64" addr %"PRIx64" domid %d]\n ", >+ mc_bank->mc_bank, mc_bank->mc_status, >mc_bank->mc_addr, >+ mc_bank->mc_domid); >+ } >+ return 0; >+} >+ >+int vmce_domain_inject(struct mcinfo_bank *bank, struct domain *d, struct >mcinfo_global *global) >+{ >+ int ret; >+ >+ ret = fill_vmsr_data(bank, d, global->mc_gstatus); >+ if (ret < 0) >+ return ret; >+ >+ return inject_vmce(d); >+} >+ >diff -r 7ee8bb40200a -r b4fd50c22d9c xen/arch/x86/domain.c >--- a/xen/arch/x86/domain.c Thu Apr 15 19:11:16 2010 +0100 >+++ b/xen/arch/x86/domain.c Fri Apr 16 18:55:03 2010 +0800 >@@ -49,6 +49,7 @@ > #include <asm/msr.h> > #include <asm/traps.h> > #include <asm/nmi.h> >+#include <asm/mce.h> > #include <xen/numa.h> > #include <xen/iommu.h> > #ifdef CONFIG_COMPAT >@@ -501,7 +502,7 @@ int arch_domain_create(struct domain *d, > goto fail; > > /* For Guest vMCE MSRs virtualization */ >- mce_init_msr(d); >+ vmce_init_msr(d); > } > > if ( is_hvm_domain(d) ) >diff -r 7ee8bb40200a -r b4fd50c22d9c xen/arch/x86/hvm/hvm.c >--- a/xen/arch/x86/hvm/hvm.c Thu Apr 15 19:11:16 2010 +0100 >+++ b/xen/arch/x86/hvm/hvm.c Fri Apr 16 18:55:03 2010 +0800 >@@ -47,6 +47,7 @@ > #include <asm/traps.h> > #include <asm/mc146818rtc.h> > #include <asm/spinlock.h> >+#include <asm/mce.h> > #include <asm/hvm/hvm.h> > #include <asm/hvm/vpt.h> > #include <asm/hvm/support.h> >@@ -2061,7 +2062,7 @@ int hvm_msr_read_intercept(struct cpu_us > break; > > default: >- ret = mce_rdmsr(ecx, &msr_content); >+ ret = vmce_rdmsr(ecx, &msr_content); > if ( ret < 0 ) > goto gp_fault; > else if ( ret ) >@@ -2160,7 +2161,7 @@ int hvm_msr_write_intercept(struct cpu_u > break; > > default: >- ret = mce_wrmsr(ecx, msr_content); >+ ret = vmce_wrmsr(ecx, msr_content); > if ( ret < 0 ) > goto gp_fault; > else if ( ret ) >diff -r 7ee8bb40200a -r b4fd50c22d9c xen/arch/x86/traps.c >--- a/xen/arch/x86/traps.c Thu Apr 15 19:11:16 2010 +0100 >+++ b/xen/arch/x86/traps.c Fri Apr 16 18:55:03 2010 +0800 >@@ -65,6 +65,7 @@ > #include <asm/traps.h> > #include <asm/hvm/vpt.h> > #include <asm/hypercall.h> >+#include <asm/mce.h> > #include <public/arch-x86/cpuid.h> > > /* >@@ -2295,7 +2296,7 @@ static int emulate_privileged_op(struct > if ( wrmsr_hypervisor_regs(regs->ecx, val) ) > break; > >- rc = mce_wrmsr(regs->ecx, val); >+ rc = vmce_wrmsr(regs->ecx, val); > if ( rc < 0 ) > goto fail; > if ( rc ) >@@ -2388,7 +2389,7 @@ static int emulate_privileged_op(struct > break; > } > >- rc = mce_rdmsr(regs->ecx, &val); >+ rc = vmce_rdmsr(regs->ecx, &val); > if ( rc < 0 ) > goto fail; > if ( rc ) >@@ -2947,19 +2948,19 @@ void async_exception_cleanup(struct vcpu > { > struct domain *d = curr->domain; > >- if ( !d->arch.vmca_msrs.nr_injection ) >+ if ( !d->arch.vmca_msrs->nr_injection ) > { > printk(XENLOG_WARNING "MCE: ret from vMCE#, " > "no injection node\n"); > goto end; > } > >- d->arch.vmca_msrs.nr_injection--; >- if ( !list_empty(&d->arch.vmca_msrs.impact_header) ) >+ d->arch.vmca_msrs->nr_injection--; >+ if ( !list_empty(&d->arch.vmca_msrs->impact_header) ) > { > struct bank_entry *entry; > >- entry = list_entry(d->arch.vmca_msrs.impact_header.next, >+ entry = list_entry(d->arch.vmca_msrs->impact_header.next, > struct bank_entry, list); > gdprintk(XENLOG_DEBUG, "MCE: delete last injection >node\n"); > list_del(&entry->list); >@@ -2968,7 +2969,7 @@ void async_exception_cleanup(struct vcpu > printk(XENLOG_ERR "MCE: didn't found last injection node\n"); > > /* further injection */ >- if ( d->arch.vmca_msrs.nr_injection > 0 && >+ if ( d->arch.vmca_msrs->nr_injection > 0 && > guest_has_trap_callback(d, 0, TRAP_machine_check) && > !test_and_set_bool(curr->mce_pending) ) > { >diff -r 7ee8bb40200a -r b4fd50c22d9c xen/common/domain.c >--- a/xen/common/domain.c Thu Apr 15 19:11:16 2010 +0100 >+++ b/xen/common/domain.c Fri Apr 16 18:55:03 2010 +0800 >@@ -616,6 +616,8 @@ static void complete_domain_destroy(stru > > xfree(d->pirq_mask); > xfree(d->pirq_to_evtchn); >+ xfree(dom_vmce(d)->mci_ctl); >+ xfree(dom_vmce(d)); > > xsm_free_security_domain(d); > free_domain_struct(d); >diff -r 7ee8bb40200a -r b4fd50c22d9c xen/include/asm-x86/domain.h >--- a/xen/include/asm-x86/domain.h Thu Apr 15 19:11:16 2010 +0100 >+++ b/xen/include/asm-x86/domain.h Fri Apr 16 18:55:03 2010 +0800 >@@ -6,6 +6,7 @@ > #include <asm/hvm/vcpu.h> > #include <asm/hvm/domain.h> > #include <asm/e820.h> >+#include <asm/mce.h> > #include <public/vcpu.h> > > #define has_32bit_shinfo(d) ((d)->arch.has_32bit_shinfo) >@@ -214,32 +215,6 @@ typedef xen_domctl_cpuid_t cpuid_input_t > typedef xen_domctl_cpuid_t cpuid_input_t; > > struct p2m_domain; >- >-/* Define for GUEST MCA handling */ >-#define MAX_NR_BANKS 30 >- >-/* This entry is for recording bank nodes for the impacted domain, >- * put into impact_header list. */ >-struct bank_entry { >- struct list_head list; >- uint16_t bank; >- uint64_t mci_status; >- uint64_t mci_addr; >- uint64_t mci_misc; >-}; >- >-struct domain_mca_msrs >-{ >- /* Guest should not change below values after DOM boot up */ >- uint64_t mcg_cap; >- uint64_t mcg_ctl; >- uint64_t mcg_status; >- uint64_t mci_ctl[MAX_NR_BANKS]; >- uint16_t nr_injection; >- struct list_head impact_header; >- spinlock_t lock; >-}; >- > struct time_scale { > int shift; > u32 mul_frac; >@@ -311,7 +286,7 @@ struct arch_domain > cpuid_input_t cpuids[MAX_CPUID_INPUT]; > > /* For Guest vMCA handling */ >- struct domain_mca_msrs vmca_msrs; >+ struct domain_mca_msrs *vmca_msrs; > > /* TSC management (emulation, pv, scaling, stats) */ > int tsc_mode; /* see include/asm-x86/time.h */ >diff -r 7ee8bb40200a -r b4fd50c22d9c xen/include/asm-x86/mce.h >--- /dev/null Thu Jan 01 00:00:00 1970 +0000 >+++ b/xen/include/asm-x86/mce.h Fri Apr 16 18:55:03 2010 +0800 >@@ -0,0 +1,36 @@ >+#include <xen/types.h> >+#include <public/arch-x86/xen-mca.h> >+#ifndef _XEN_X86_MCE_H >+#define _XEN_X86_MCE_H >+/* Define for GUEST MCA handling */ >+#define MAX_NR_BANKS 30 >+ >+/* This entry is for recording bank nodes for the impacted domain, >+ * put into impact_header list. */ >+struct bank_entry { >+ struct list_head list; >+ uint16_t bank; >+ uint64_t mci_status; >+ uint64_t mci_addr; >+ uint64_t mci_misc; >+}; >+ >+struct domain_mca_msrs >+{ >+ /* Guest should not change below values after DOM boot up */ >+ uint64_t mcg_cap; >+ uint64_t mcg_ctl; >+ uint64_t mcg_status; >+ uint64_t *mci_ctl; >+ uint16_t nr_injection; >+ struct list_head impact_header; >+ spinlock_t lock; >+}; >+ >+#define dom_vmce(x) ((x)->arch.vmca_msrs) >+ >+/* Guest vMCE MSRs virtualization */ >+extern int vmce_init_msr(struct domain *d); >+extern int vmce_wrmsr(uint32_t msr, uint64_t val); >+extern int vmce_rdmsr(uint32_t msr, uint64_t *val); >+#endif >diff -r 7ee8bb40200a -r b4fd50c22d9c xen/include/asm-x86/traps.h >--- a/xen/include/asm-x86/traps.h Thu Apr 15 19:11:16 2010 +0100 >+++ b/xen/include/asm-x86/traps.h Fri Apr 16 18:55:03 2010 +0800 >@@ -49,9 +49,4 @@ extern int send_guest_trap(struct domain > extern int send_guest_trap(struct domain *d, uint16_t vcpuid, > unsigned int trap_nr); > >-/* Guest vMCE MSRs virtualization */ >-extern void mce_init_msr(struct domain *d); >-extern int mce_wrmsr(uint32_t msr, uint64_t val); >-extern int mce_rdmsr(uint32_t msr, uint64_t *val); >- > #endif /* ASM_TRAP_H */ > > > >_______________________________________________ >Xen-devel mailing list >Xen-devel@xxxxxxxxxxxxxxxxxxx >http://lists.xensource.com/xen-devel Attachment:
vmce_seperate_file.patch _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |