[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] x86: vMCE emulation
# HG changeset patch # User Liu, Jinsong <jinsong.liu@xxxxxxxxx> # Date 1348653840 -7200 # Node ID 08b7e65a5d936bb766f076dde2d026569ad60e4c # Parent 8278d7d8fa485996f51134c5265fceaf239adf6a x86: vMCE emulation This patch provides virtual MCE support to guest. It emulates a simple and clean MCE MSRs interface to guest by faking caps to guest if needed and masking caps if unnecessary: 1. Providing a well-defined MCG_CAP to guest, filter out un-necessary caps and provide only guest needed caps; 2. Disabling MCG_CTL to avoid model specific; 3. Sticking all 1's to MCi_CTL to guest to avoid model specific; 4. Enabling CMCI cap but never really inject to guest to prevent polling periodically; 5. Masking MSCOD field of MCi_STATUS to avoid model specific; 6. Keeping natural semantics by per-vcpu instead of per-domain variables; 7. Using bank1 and reserving bank0 to work around 'bank0 quirk' of some very old processors; 8. Cleaning some vMCE# injection logic which shared by Intel and AMD but useless under new vMCE implement; 9. Keeping compatilbe w/ old xen version which has been backported to SLES11 SP2, so that old vMCE would not blocked when migrate to new vMCE; Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx> - make printing consistent (and non-exploitable) - fix return values of intel_mce_{rd,wr}msr() for out of range banks - miscellaneous cleanup Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> Committed-by: Jan Beulich <jbeulich@xxxxxxxx> --- diff -r 8278d7d8fa48 -r 08b7e65a5d93 xen/arch/x86/cpu/mcheck/mce.h --- a/xen/arch/x86/cpu/mcheck/mce.h Wed Sep 26 11:56:07 2012 +0200 +++ b/xen/arch/x86/cpu/mcheck/mce.h Wed Sep 26 12:04:00 2012 +0200 @@ -169,15 +169,13 @@ void x86_mcinfo_dump(struct mc_info *mi) int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d, uint64_t gstatus); int inject_vmce(struct domain *d); -int vmce_domain_inject(struct mcinfo_bank *bank, struct domain *d, - struct mcinfo_global *global); static inline int mce_vendor_bank_msr(const struct vcpu *v, uint32_t msr) { switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_INTEL: if (msr >= MSR_IA32_MC0_CTL2 && - msr < MSR_IA32_MCx_CTL2(v->arch.mcg_cap & MCG_CAP_COUNT) ) + msr < MSR_IA32_MCx_CTL2(v->arch.vmce.mcg_cap & MCG_CAP_COUNT) ) return 1; break; case X86_VENDOR_AMD: @@ -195,7 +193,7 @@ static inline int mce_vendor_bank_msr(co static inline int mce_bank_msr(const struct vcpu *v, uint32_t msr) { if ( (msr >= MSR_IA32_MC0_CTL && - msr < MSR_IA32_MCx_CTL(v->arch.mcg_cap & MCG_CAP_COUNT)) || + msr < MSR_IA32_MCx_CTL(v->arch.vmce.mcg_cap & MCG_CAP_COUNT)) || mce_vendor_bank_msr(v, msr) ) return 1; return 0; diff -r 8278d7d8fa48 -r 08b7e65a5d93 xen/arch/x86/cpu/mcheck/mce_intel.c --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Wed Sep 26 11:56:07 2012 +0200 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Wed Sep 26 12:04:00 2012 +0200 @@ -982,31 +982,27 @@ enum mcheck_type intel_mcheck_init(struc /* intel specific MCA MSR */ int intel_mce_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) { - int ret = 0; + unsigned int bank = msr - MSR_IA32_MC0_CTL2; - if ( msr >= MSR_IA32_MC0_CTL2 && - msr < MSR_IA32_MCx_CTL2(v->arch.mcg_cap & MCG_CAP_COUNT) ) + if ( bank < GUEST_MC_BANK_NUM ) { - mce_printk(MCE_QUIET, "We have disabled CMCI capability, " - "Guest should not write this MSR!\n"); - ret = 1; + v->arch.vmce.bank[bank].mci_ctl2 = val; + mce_printk(MCE_VERBOSE, "MCE: wr MC%u_CTL2 %#"PRIx64"\n", bank, val); } - return ret; + return 1; } int intel_mce_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val) { - int ret = 0; + unsigned int bank = msr - MSR_IA32_MC0_CTL2; - if ( msr >= MSR_IA32_MC0_CTL2 && - msr < MSR_IA32_MCx_CTL2(v->arch.mcg_cap & MCG_CAP_COUNT) ) + if ( bank < GUEST_MC_BANK_NUM ) { - mce_printk(MCE_QUIET, "We have disabled CMCI capability, " - "Guest should not read this MSR!\n"); - ret = 1; + *val = v->arch.vmce.bank[bank].mci_ctl2; + mce_printk(MCE_VERBOSE, "MCE: rd MC%u_CTL2 %#"PRIx64"\n", bank, *val); } - return ret; + return 1; } diff -r 8278d7d8fa48 -r 08b7e65a5d93 xen/arch/x86/cpu/mcheck/vmce.c --- a/xen/arch/x86/cpu/mcheck/vmce.c Wed Sep 26 11:56:07 2012 +0200 +++ b/xen/arch/x86/cpu/mcheck/vmce.c Wed Sep 26 12:04:00 2012 +0200 @@ -1,5 +1,22 @@ /* - * vmce.c - virtual MCE support + * vmce.c - provide software emulated vMCE support to guest + * + * Copyright (C) 2010, 2011 Jiang, Yunhong <yunhong.jiang@xxxxxxxxx> + * Copyright (C) 2012, 2013 Liu, Jinsong <jinsong.liu@xxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <xen/init.h> @@ -20,66 +37,67 @@ #include "x86_mca.h" /* - * Emulate 2 banks for guest - * Bank0: reserved for 'bank0 quirk' occur at some very old processors: - * 1). Intel cpu whose family-model value < 06-1A; - * 2). AMD K7 - * Bank1: used to transfer error info to guest + * MCG_SER_P: software error recovery supported + * MCG_TES_P: to avoid MCi_status bit56:53 model specific + * MCG_CMCI_P: expose CMCI capability but never really inject it to guest, + * for sake of performance since guest not polling periodically */ -#define GUEST_BANK_NUM 2 -#define GUEST_MCG_CAP (MCG_TES_P | MCG_SER_P | GUEST_BANK_NUM) +#define INTEL_GUEST_MCG_CAP (MCG_SER_P | \ + MCG_TES_P | \ + MCG_CMCI_P | \ + GUEST_MC_BANK_NUM) -#define dom_vmce(x) ((x)->arch.vmca_msrs) - -int vmce_init_msr(struct domain *d) -{ - dom_vmce(d) = xmalloc(struct domain_mca_msrs); - if ( !dom_vmce(d) ) - return -ENOMEM; - - dom_vmce(d)->mcg_status = 0x0; - dom_vmce(d)->nr_injection = 0; - - INIT_LIST_HEAD(&dom_vmce(d)->impact_header); - spin_lock_init(&dom_vmce(d)->lock); - - return 0; -} - -void vmce_destroy_msr(struct domain *d) -{ - if ( !dom_vmce(d) ) - return; - xfree(dom_vmce(d)); - dom_vmce(d) = NULL; -} +#define AMD_GUEST_MCG_CAP GUEST_MC_BANK_NUM void vmce_init_vcpu(struct vcpu *v) { - v->arch.mcg_cap = GUEST_MCG_CAP; + int i; + + /* global MCA MSRs init */ + if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) + v->arch.vmce.mcg_cap = INTEL_GUEST_MCG_CAP; + else + v->arch.vmce.mcg_cap = AMD_GUEST_MCG_CAP; + + v->arch.vmce.mcg_status = 0; + + /* per-bank MCA MSRs init */ + for ( i = 0; i < GUEST_MC_BANK_NUM; i++ ) + memset(&v->arch.vmce.bank[i], 0, sizeof(struct vmce_bank)); + + spin_lock_init(&v->arch.vmce.lock); } int vmce_restore_vcpu(struct vcpu *v, uint64_t caps) { - if ( caps & ~GUEST_MCG_CAP & ~MCG_CAP_COUNT & ~MCG_CTL_P ) + unsigned long guest_mcg_cap; + + if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) + guest_mcg_cap = INTEL_GUEST_MCG_CAP; + else + guest_mcg_cap = AMD_GUEST_MCG_CAP; + + if ( caps & ~guest_mcg_cap & ~MCG_CAP_COUNT & ~MCG_CTL_P ) { dprintk(XENLOG_G_ERR, "%s restore: unsupported MCA capabilities" " %#" PRIx64 " for d%d:v%u (supported: %#Lx)\n", is_hvm_vcpu(v) ? "HVM" : "PV", caps, v->domain->domain_id, - v->vcpu_id, GUEST_MCG_CAP & ~MCG_CAP_COUNT); + v->vcpu_id, guest_mcg_cap & ~MCG_CAP_COUNT); return -EPERM; } - v->arch.mcg_cap = caps; + v->arch.vmce.mcg_cap = caps; return 0; } +/* + * For historic version reason, bank number may greater than GUEST_MC_BANK_NUM, + * when migrating from old vMCE version to new vMCE. + */ static int bank_mce_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val) { int ret = 1; unsigned int bank = (msr - MSR_IA32_MC0_CTL) / 4; - struct domain_mca_msrs *vmce = dom_vmce(v->domain); - struct bank_entry *entry; *val = 0; @@ -88,50 +106,33 @@ static int bank_mce_rdmsr(const struct v case MSR_IA32_MC0_CTL: /* stick all 1's to MCi_CTL */ *val = ~0UL; - mce_printk(MCE_VERBOSE, "MCE: rdmsr MC%u_CTL %#"PRIx64"\n", - bank, *val); + mce_printk(MCE_VERBOSE, "MCE: rd MC%u_CTL %#"PRIx64"\n", bank, *val); break; case MSR_IA32_MC0_STATUS: - /* Only error bank is read. Non-error banks simply return. */ - if ( !list_empty(&vmce->impact_header) ) + if ( bank < GUEST_MC_BANK_NUM ) { - entry = list_entry(vmce->impact_header.next, - struct bank_entry, list); - if ( entry->bank == bank ) - { - *val = entry->mci_status; - mce_printk(MCE_VERBOSE, - "MCE: rd MC%u_STATUS in vMCE# context " - "value %#"PRIx64"\n", bank, *val); - } + *val = v->arch.vmce.bank[bank].mci_status; + if ( *val ) + mce_printk(MCE_VERBOSE, "MCE: rd MC%u_STATUS %#"PRIx64"\n", + bank, *val); } break; case MSR_IA32_MC0_ADDR: - if ( !list_empty(&vmce->impact_header) ) + if ( bank < GUEST_MC_BANK_NUM ) { - entry = list_entry(vmce->impact_header.next, - struct bank_entry, list); - if ( entry->bank == bank ) - { - *val = entry->mci_addr; - mce_printk(MCE_VERBOSE, - "MCE: rdmsr MC%u_ADDR in vMCE# context " - "%#"PRIx64"\n", bank, *val); - } + *val = v->arch.vmce.bank[bank].mci_addr; + if ( *val ) + mce_printk(MCE_VERBOSE, "MCE: rd MC%u_ADDR %#"PRIx64"\n", + bank, *val); } break; case MSR_IA32_MC0_MISC: - if ( !list_empty(&vmce->impact_header) ) + if ( bank < GUEST_MC_BANK_NUM ) { - entry = list_entry(vmce->impact_header.next, - struct bank_entry, list); - if ( entry->bank == bank ) - { - *val = entry->mci_misc; - mce_printk(MCE_VERBOSE, - "MCE: rd MC%u_MISC in vMCE# context " - "%#"PRIx64"\n", bank, *val); - } + *val = v->arch.vmce.bank[bank].mci_misc; + if ( *val ) + mce_printk(MCE_VERBOSE, "MCE: rd MC%u_MISC %#"PRIx64"\n", + bank, *val); } break; default: @@ -157,56 +158,48 @@ static int bank_mce_rdmsr(const struct v */ int vmce_rdmsr(uint32_t msr, uint64_t *val) { - const struct vcpu *cur = current; - struct domain_mca_msrs *vmce = dom_vmce(cur->domain); + struct vcpu *cur = current; int ret = 1; *val = 0; - spin_lock(&vmce->lock); + spin_lock(&cur->arch.vmce.lock); switch ( msr ) { case MSR_IA32_MCG_STATUS: - *val = vmce->mcg_status; + *val = cur->arch.vmce.mcg_status; if (*val) mce_printk(MCE_VERBOSE, - "MCE: rdmsr MCG_STATUS %#"PRIx64"\n", *val); + "MCE: rd MCG_STATUS %#"PRIx64"\n", *val); break; case MSR_IA32_MCG_CAP: - *val = cur->arch.mcg_cap; - mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CAP %#"PRIx64"\n", - *val); + *val = cur->arch.vmce.mcg_cap; + mce_printk(MCE_VERBOSE, "MCE: rd MCG_CAP %#"PRIx64"\n", *val); break; case MSR_IA32_MCG_CTL: - /* Stick all 1's when CTL support, and 0's when no CTL support */ - if ( cur->arch.mcg_cap & MCG_CTL_P ) + if ( cur->arch.vmce.mcg_cap & MCG_CTL_P ) *val = ~0ULL; - mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CTL %#"PRIx64"\n", *val); + mce_printk(MCE_VERBOSE, "MCE: rd MCG_CTL %#"PRIx64"\n", *val); break; default: ret = mce_bank_msr(cur, msr) ? bank_mce_rdmsr(cur, msr, val) : 0; break; } - spin_unlock(&vmce->lock); + spin_unlock(&cur->arch.vmce.lock); + return ret; } +/* + * For historic version reason, bank number may greater than GUEST_MC_BANK_NUM, + * when migratie from old vMCE version to new vMCE. + */ static int bank_mce_wrmsr(struct vcpu *v, u32 msr, u64 val) { int ret = 1; unsigned int bank = (msr - MSR_IA32_MC0_CTL) / 4; - struct domain_mca_msrs *vmce = dom_vmce(v->domain); - struct bank_entry *entry = NULL; - - /* Give the first entry of the list, it corresponds to current - * vMCE# injection. When vMCE# is finished processing by the - * the guest, this node will be deleted. - * Only error bank is written. Non-error banks simply return. - */ - if ( !list_empty(&vmce->impact_header) ) - entry = list_entry(vmce->impact_header.next, struct bank_entry, list); switch ( msr & (MSR_IA32_MC0_CTL | 3) ) { @@ -217,50 +210,25 @@ static int bank_mce_wrmsr(struct vcpu *v */ break; case MSR_IA32_MC0_STATUS: - if ( entry && (entry->bank == bank) ) - { - entry->mci_status = val; - mce_printk(MCE_VERBOSE, - "MCE: wr MC%u_STATUS %"PRIx64" in vMCE#\n", - bank, val); - } - else - mce_printk(MCE_VERBOSE, - "MCE: wr MC%u_STATUS %"PRIx64"\n", bank, val); + mce_printk(MCE_VERBOSE, "MCE: wr MC%u_STATUS %#"PRIx64"\n", bank, val); + if ( val ) + ret = -1; + else if ( bank < GUEST_MC_BANK_NUM ) + v->arch.vmce.bank[bank].mci_status = val; break; case MSR_IA32_MC0_ADDR: - if ( !~val ) - { - mce_printk(MCE_QUIET, - "MCE: wr MC%u_ADDR with all 1s will cause #GP\n", bank); + mce_printk(MCE_VERBOSE, "MCE: wr MC%u_ADDR %#"PRIx64"\n", bank, val); + if ( val ) ret = -1; - } - else if ( entry && (entry->bank == bank) ) - { - entry->mci_addr = val; - mce_printk(MCE_VERBOSE, - "MCE: wr MC%u_ADDR %"PRIx64" in vMCE#\n", bank, val); - } - else - mce_printk(MCE_VERBOSE, - "MCE: wr MC%u_ADDR %"PRIx64"\n", bank, val); + else if ( bank < GUEST_MC_BANK_NUM ) + v->arch.vmce.bank[bank].mci_addr = val; break; case MSR_IA32_MC0_MISC: - if ( !~val ) - { - mce_printk(MCE_QUIET, - "MCE: wr MC%u_MISC with all 1s will cause #GP\n", bank); + mce_printk(MCE_VERBOSE, "MCE: wr MC%u_MISC %#"PRIx64"\n", bank, val); + if ( val ) ret = -1; - } - else if ( entry && (entry->bank == bank) ) - { - entry->mci_misc = val; - mce_printk(MCE_VERBOSE, - "MCE: wr MC%u_MISC %"PRIx64" in vMCE#\n", bank, val); - } - else - mce_printk(MCE_VERBOSE, - "MCE: wr MC%u_MISC %"PRIx64"\n", bank, val); + else if ( bank < GUEST_MC_BANK_NUM ) + v->arch.vmce.bank[bank].mci_misc = val; break; default: switch ( boot_cpu_data.x86_vendor ) @@ -286,52 +254,33 @@ static int bank_mce_wrmsr(struct vcpu *v int vmce_wrmsr(u32 msr, u64 val) { struct vcpu *cur = current; - struct bank_entry *entry = NULL; - struct domain_mca_msrs *vmce = dom_vmce(cur->domain); int ret = 1; - spin_lock(&vmce->lock); + spin_lock(&cur->arch.vmce.lock); switch ( msr ) { case MSR_IA32_MCG_CTL: + /* If MCG_CTL exists then stick to all 1's, else ignore. */ break; case MSR_IA32_MCG_STATUS: - vmce->mcg_status = val; - mce_printk(MCE_VERBOSE, "MCE: wrmsr MCG_STATUS %"PRIx64"\n", val); - /* For HVM guest, this is the point for deleting vMCE injection node */ - if ( is_hvm_vcpu(cur) && (vmce->nr_injection > 0) ) - { - vmce->nr_injection--; /* Should be 0 */ - if ( !list_empty(&vmce->impact_header) ) - { - entry = list_entry(vmce->impact_header.next, - struct bank_entry, list); - if ( entry->mci_status & MCi_STATUS_VAL ) - mce_printk(MCE_QUIET, "MCE: MCi_STATUS MSR should have " - "been cleared before write MCG_STATUS MSR\n"); - - mce_printk(MCE_QUIET, "MCE: Delete HVM last injection " - "Node, nr_injection %u\n", - vmce->nr_injection); - list_del(&entry->list); - xfree(entry); - } - else - mce_printk(MCE_QUIET, "MCE: Not found HVM guest" - " last injection Node, something Wrong!\n"); - } + cur->arch.vmce.mcg_status = val; + mce_printk(MCE_VERBOSE, "MCE: wr MCG_STATUS %"PRIx64"\n", val); break; case MSR_IA32_MCG_CAP: - mce_printk(MCE_QUIET, "MCE: MCG_CAP is read-only\n"); - ret = -1; + /* + * According to Intel SDM, IA32_MCG_CAP is a read-only register, + * the effect of writing to the IA32_MCG_CAP is undefined. Here we + * treat writing as 'write not change'. Guest would not surprise. + */ + mce_printk(MCE_VERBOSE, "MCE: MCG_CAP is r/o\n"); break; default: ret = mce_bank_msr(cur, msr) ? bank_mce_wrmsr(cur, msr, val) : 0; break; } - spin_unlock(&vmce->lock); + spin_unlock(&cur->arch.vmce.lock); return ret; } @@ -342,7 +291,7 @@ static int vmce_save_vcpu_ctxt(struct do for_each_vcpu( d, v ) { struct hvm_vmce_vcpu ctxt = { - .caps = v->arch.mcg_cap + .caps = v->arch.vmce.mcg_cap }; err = hvm_save_entry(VMCE_VCPU, v->vcpu_id, h, &ctxt); @@ -422,93 +371,38 @@ int inject_vmce(struct domain *d) return 0; } -/* This node list records errors impacting a domain. when one - * MCE# happens, one error bank impacts a domain. This error node - * will be inserted to the tail of the per_dom data for vMCE# MSR - * virtualization. When one vMCE# injection is finished processing - * processed by guest, the corresponding node will be deleted. - * This node list is for GUEST vMCE# MSRS virtualization. - */ -static struct bank_entry* alloc_bank_entry(void) +int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d, + uint64_t gstatus) { - struct bank_entry *entry; + struct vcpu *v = d->vcpu[0]; - entry = xzalloc(struct bank_entry); - if ( entry == NULL ) - { - printk(KERN_ERR "MCE: malloc bank_entry failed\n"); - return NULL; - } - - INIT_LIST_HEAD(&entry->list); - return entry; -} - -/* Fill error bank info for #vMCE injection and GUEST vMCE# - * MSR virtualization data - * 1) Log down how many nr_injections of the impacted. - * 2) Copy MCE# error bank to impacted DOM node list, - * for vMCE# MSRs virtualization - */ -int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d, - uint64_t gstatus) { - struct bank_entry *entry; - - /* This error bank impacts one domain, we need to fill domain related - * data for vMCE MSRs virtualization and vMCE# injection */ if ( mc_bank->mc_domid != (uint16_t)~0 ) { - /* For HVM guest, Only when first vMCE is consumed by HVM guest - * successfully, will we generete another node and inject another vMCE. - */ - if ( d->is_hvm && (dom_vmce(d)->nr_injection > 0) ) + if ( v->arch.vmce.mcg_status & MCG_STATUS_MCIP ) { - mce_printk(MCE_QUIET, "MCE: HVM guest has not handled previous" + mce_printk(MCE_QUIET, "MCE: guest has not handled previous" " vMCE yet!\n"); return -1; } - entry = alloc_bank_entry(); - if ( entry == NULL ) - return -1; + spin_lock(&v->arch.vmce.lock); - entry->mci_status = mc_bank->mc_status; - entry->mci_addr = mc_bank->mc_addr; - entry->mci_misc = mc_bank->mc_misc; - entry->bank = mc_bank->mc_bank; + v->arch.vmce.mcg_status = gstatus; + /* + * 1. Skip bank 0 to avoid 'bank 0 quirk' of old processors + * 2. Filter MCi_STATUS MSCOD model specific error code to guest + */ + v->arch.vmce.bank[1].mci_status = mc_bank->mc_status & + MCi_STATUS_MSCOD_MASK; + v->arch.vmce.bank[1].mci_addr = mc_bank->mc_addr; + v->arch.vmce.bank[1].mci_misc = mc_bank->mc_misc; - spin_lock(&dom_vmce(d)->lock); - /* New error Node, insert to the tail of the per_dom data */ - list_add_tail(&entry->list, &dom_vmce(d)->impact_header); - /* Fill MSR global status */ - dom_vmce(d)->mcg_status = gstatus; - /* New node impact the domain, need another vMCE# injection*/ - dom_vmce(d)->nr_injection++; - spin_unlock(&dom_vmce(d)->lock); - - mce_printk(MCE_VERBOSE,"MCE: Found error @[BANK%d " - "status %"PRIx64" addr %"PRIx64" domid %d]\n ", - mc_bank->mc_bank, mc_bank->mc_status, mc_bank->mc_addr, - mc_bank->mc_domid); + spin_unlock(&v->arch.vmce.lock); } return 0; } -#if 0 /* currently unused */ -int vmce_domain_inject( - struct mcinfo_bank *bank, struct domain *d, struct mcinfo_global *global) -{ - int ret; - - ret = fill_vmsr_data(bank, d, global->mc_gstatus); - if ( ret < 0 ) - return ret; - - return inject_vmce(d); -} -#endif - static int is_hvm_vmce_ready(struct mcinfo_bank *bank, struct domain *d) { struct vcpu *v; diff -r 8278d7d8fa48 -r 08b7e65a5d93 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Wed Sep 26 11:56:07 2012 +0200 +++ b/xen/arch/x86/domain.c Wed Sep 26 12:04:00 2012 +0200 @@ -577,9 +577,6 @@ int arch_domain_create(struct domain *d, if ( (rc = iommu_domain_init(d)) != 0 ) goto fail; - - /* For Guest vMCE MSRs virtualization */ - vmce_init_msr(d); } if ( is_hvm_domain(d) ) @@ -606,7 +603,6 @@ int arch_domain_create(struct domain *d, fail: d->is_dying = DOMDYING_dead; - vmce_destroy_msr(d); cleanup_domain_irq_mapping(d); free_xenheap_page(d->shared_info); if ( paging_initialised ) @@ -629,7 +625,6 @@ void arch_domain_destroy(struct domain * else xfree(d->arch.pv_domain.e820); - vmce_destroy_msr(d); free_domain_pirqs(d); if ( !is_idle_domain(d) ) iommu_domain_destroy(d); diff -r 8278d7d8fa48 -r 08b7e65a5d93 xen/arch/x86/domctl.c --- a/xen/arch/x86/domctl.c Wed Sep 26 11:56:07 2012 +0200 +++ b/xen/arch/x86/domctl.c Wed Sep 26 12:04:00 2012 +0200 @@ -1066,7 +1066,7 @@ long arch_do_domctl( evc->syscall32_callback_eip = 0; evc->syscall32_disables_events = 0; } - evc->mcg_cap = v->arch.mcg_cap; + evc->mcg_cap = v->arch.vmce.mcg_cap; } else { diff -r 8278d7d8fa48 -r 08b7e65a5d93 xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Wed Sep 26 11:56:07 2012 +0200 +++ b/xen/arch/x86/traps.c Wed Sep 26 12:04:00 2012 +0200 @@ -3141,50 +3141,6 @@ void async_exception_cleanup(struct vcpu break; ASSERT(trap <= VCPU_TRAP_LAST); - /* inject vMCE to PV_Guest including DOM0. */ - if ( trap == VCPU_TRAP_MCE ) - { - gdprintk(XENLOG_DEBUG, "MCE: Return from vMCE# trap!\n"); - if ( curr->vcpu_id == 0 ) - { - struct domain *d = curr->domain; - - if ( !d->arch.vmca_msrs->nr_injection ) - { - printk(XENLOG_WARNING "MCE: ret from vMCE#, " - "no injection node\n"); - goto end; - } - - d->arch.vmca_msrs->nr_injection--; - if ( !list_empty(&d->arch.vmca_msrs->impact_header) ) - { - struct bank_entry *entry; - - entry = list_entry(d->arch.vmca_msrs->impact_header.next, - struct bank_entry, list); - gdprintk(XENLOG_DEBUG, "MCE: delete last injection node\n"); - list_del(&entry->list); - } - else - printk(XENLOG_ERR "MCE: didn't found last injection node\n"); - - /* further injection */ - if ( d->arch.vmca_msrs->nr_injection > 0 && - guest_has_trap_callback(d, 0, TRAP_machine_check) && - !test_and_set_bool(curr->mce_pending) ) - { - int cpu = smp_processor_id(); - - cpumask_copy(curr->cpu_affinity_tmp, curr->cpu_affinity); - printk(XENLOG_DEBUG "MCE: CPU%d set affinity, old %d\n", - cpu, curr->processor); - vcpu_set_affinity(curr, cpumask_of(cpu)); - } - } - } - -end: /* Restore previous asynchronous exception mask. */ curr->async_exception_mask = curr->async_exception_state(trap).old_mask; } diff -r 8278d7d8fa48 -r 08b7e65a5d93 xen/include/asm-x86/domain.h --- a/xen/include/asm-x86/domain.h Wed Sep 26 11:56:07 2012 +0200 +++ b/xen/include/asm-x86/domain.h Wed Sep 26 12:04:00 2012 +0200 @@ -296,9 +296,6 @@ struct arch_domain struct PITState vpit; - /* For Guest vMCA handling */ - struct domain_mca_msrs *vmca_msrs; - /* TSC management (emulation, pv, scaling, stats) */ int tsc_mode; /* see include/asm-x86/time.h */ bool_t vtsc; /* tsc is emulated (may change after migrate) */ @@ -438,8 +435,8 @@ struct arch_vcpu * and thus should be saved/restored. */ bool_t nonlazy_xstate_used; - uint64_t mcg_cap; - + struct vmce vmce; + struct paging_vcpu paging; uint32_t gdbsx_vcpu_event; diff -r 8278d7d8fa48 -r 08b7e65a5d93 xen/include/asm-x86/mce.h --- a/xen/include/asm-x86/mce.h Wed Sep 26 11:56:07 2012 +0200 +++ b/xen/include/asm-x86/mce.h Wed Sep 26 12:04:00 2012 +0200 @@ -3,28 +3,35 @@ #ifndef _XEN_X86_MCE_H #define _XEN_X86_MCE_H -/* This entry is for recording bank nodes for the impacted domain, - * put into impact_header list. */ -struct bank_entry { - struct list_head list; - uint16_t bank; +/* + * Emulate 2 banks for guest + * Bank0: reserved for 'bank0 quirk' occur at some very old processors: + * 1). Intel cpu whose family-model value < 06-1A; + * 2). AMD K7 + * Bank1: used to transfer error info to guest + */ +#define GUEST_MC_BANK_NUM 2 + +/* Filter MSCOD model specific error code to guest */ +#define MCi_STATUS_MSCOD_MASK (~(0xffffULL << 16)) + +/* No mci_ctl since it stick all 1's */ +struct vmce_bank { uint64_t mci_status; uint64_t mci_addr; uint64_t mci_misc; + uint64_t mci_ctl2; }; -struct domain_mca_msrs -{ - /* Guest should not change below values after DOM boot up */ +/* No mcg_ctl since it not expose to guest */ +struct vmce { + uint64_t mcg_cap; uint64_t mcg_status; - uint16_t nr_injection; - struct list_head impact_header; spinlock_t lock; + struct vmce_bank bank[GUEST_MC_BANK_NUM]; }; /* Guest vMCE MSRs virtualization */ -extern int vmce_init_msr(struct domain *d); -extern void vmce_destroy_msr(struct domain *d); extern void vmce_init_vcpu(struct vcpu *); extern int vmce_restore_vcpu(struct vcpu *, uint64_t caps); extern int vmce_wrmsr(uint32_t msr, uint64_t val); _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |