[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] x86/mca: simplify mce action, and some update of mem err handler for
# HG changeset patch # User Liu, Jinsong <jinsong.liu@xxxxxxxxx> # Date 1305547923 -3600 # Node ID 216c2efeb246041cc39e4dcb675368e7e3182b85 # Parent f9bb0bbea7c27e256275cfe58c16c0077542984e x86/mca: simplify mce action, and some update of mem err handler for future SRAR extension This patch simplify mce_action(). Basically the 2 set of mce status flags MCA_... and MCER_... are highly functional similar. This patch remove the redundant middle-level flag MCA_..., and its related data structure and function, so that mce_action() logic is more clean and simpler. This patch also update mem err handler. intel_memerr_dhandler() will be commonly used by SRAO and SRAR, so for the extension of future SRAR case, this patch remove the default fail flag from intel_memerr_dhandler() outside to SRAO/SRAR level, since only SRAO/SRAR handler itself has the knowledge of how to handle the failure when mem err handler fail. Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx> --- diff -r f9bb0bbea7c2 -r 216c2efeb246 xen/arch/x86/cpu/mcheck/mce_intel.c --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Thu May 12 16:42:54 2011 +0100 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Mon May 16 13:12:03 2011 +0100 @@ -172,23 +172,14 @@ static unsigned int __read_mostly mce_dhandler_num; static unsigned int __read_mostly mce_uhandler_num; -enum mce_result -{ - MCER_NOERROR, - MCER_RECOVERED, - /* Not recoverd, but can continue */ - MCER_CONTINUE, - MCER_RESET, -}; - /* Maybe called in MCE context, no lock, no printk */ static enum mce_result mce_action(struct cpu_user_regs *regs, mctelem_cookie_t mctc) { struct mc_info *local_mi; - enum mce_result ret = MCER_NOERROR; + enum mce_result bank_result = MCER_NOERROR; + enum mce_result worst_result = MCER_NOERROR; struct mcinfo_common *mic = NULL; - struct mca_handle_result mca_res; struct mca_binfo binfo; const struct mca_error_handler *handlers = mce_dhandlers; unsigned int i, handler_num = mce_dhandler_num; @@ -217,7 +208,7 @@ /* Processing bank information */ x86_mcinfo_lookup(mic, local_mi, MC_TYPE_BANK); - for ( ; ret != MCER_RESET && mic && mic->size; + for ( ; bank_result != MCER_RESET && mic && mic->size; mic = x86_mcinfo_next(mic) ) { if (mic->type != MC_TYPE_BANK) { @@ -225,34 +216,20 @@ } binfo.mib = (struct mcinfo_bank*)mic; binfo.bank = binfo.mib->mc_bank; - memset(&mca_res, 0x0f, sizeof(mca_res)); + bank_result = MCER_NOERROR; for ( i = 0; i < handler_num; i++ ) { if (handlers[i].owned_error(binfo.mib->mc_status)) { - handlers[i].recovery_handler(&binfo, &mca_res); - - if (mca_res.result & MCA_OWNER) - binfo.mib->mc_domid = mca_res.owner; - - if (mca_res.result == MCA_NEED_RESET) - ret = MCER_RESET; - else if (mca_res.result == MCA_RECOVERED) - { - if (ret < MCER_RECOVERED) - ret = MCER_RECOVERED; - } - else if (mca_res.result == MCA_NO_ACTION) - { - if (ret < MCER_CONTINUE) - ret = MCER_CONTINUE; - } + handlers[i].recovery_handler(&binfo, &bank_result); + if (worst_result < bank_result) + worst_result = bank_result; break; } } ASSERT(i != handler_num); } - return ret; + return worst_result; } /* @@ -608,7 +585,7 @@ static void intel_memerr_dhandler( struct mca_binfo *binfo, - struct mca_handle_result *result) + enum mce_result *result) { struct mcinfo_bank *bank = binfo->mib; struct mcinfo_global *global = binfo->mig; @@ -618,7 +595,6 @@ uint64_t mc_status, mc_misc; mce_printk(MCE_VERBOSE, "MCE: Enter UCR recovery action\n"); - result->result = MCA_NEED_RESET; mc_status = bank->mc_status; mc_misc = bank->mc_misc; @@ -626,7 +602,6 @@ !(mc_status & MCi_STATUS_MISCV) || ((mc_misc & MCi_MISC_ADDRMOD_MASK) != MCi_MISC_PHYSMOD) ) { - result->result |= MCA_NO_ACTION; dprintk(XENLOG_WARNING, "No physical address provided for memory error\n"); return; @@ -644,23 +619,19 @@ /* This is free page */ if (status & PG_OFFLINE_OFFLINED) - result->result = MCA_RECOVERED; + *result = MCER_RECOVERED; else if (status & PG_OFFLINE_PENDING) { /* This page has owner */ if (status & PG_OFFLINE_OWNED) { - result->result |= MCA_OWNER; - result->owner = status >> PG_OFFLINE_OWNER_SHIFT; + bank->mc_domid = status >> PG_OFFLINE_OWNER_SHIFT; mce_printk(MCE_QUIET, "MCE: This error page is ownded" - " by DOM %d\n", result->owner); - /* Fill vMCE# injection and vMCE# MSR virtualization " - * "related data */ - bank->mc_domid = result->owner; + " by DOM %d\n", bank->mc_domid); /* XXX: Cannot handle shared pages yet * (this should identify all domains and gfn mapping to * the mfn in question) */ - BUG_ON( result->owner == DOMID_COW ); - if ( result->owner != DOMID_XEN ) { - d = get_domain_by_id(result->owner); + BUG_ON( bank->mc_domid == DOMID_COW ); + if ( bank->mc_domid != DOMID_XEN ) { + d = get_domain_by_id(bank->mc_domid); ASSERT(d); gfn = get_gpfn_from_mfn((bank->mc_addr) >> PAGE_SHIFT); @@ -683,7 +654,7 @@ global->mc_gstatus) == -1 ) { mce_printk(MCE_QUIET, "Fill vMCE# data for DOM%d " - "failed\n", result->owner); + "failed\n", bank->mc_domid); goto vmce_failed; } @@ -699,7 +670,7 @@ * For xen, it has contained the error and finished * its own recovery job. */ - result->result = MCA_RECOVERED; + *result = MCER_RECOVERED; put_domain(d); return; @@ -718,11 +689,13 @@ static void intel_srao_dhandler( struct mca_binfo *binfo, - struct mca_handle_result *result) + enum mce_result *result) { uint64_t status = binfo->mib->mc_status; /* For unknown srao error code, no action required */ + *result = MCER_CONTINUE; + if ( status & MCi_STATUS_VAL ) { switch ( status & INTEL_MCCOD_MASK ) @@ -744,7 +717,7 @@ static void intel_default_mce_dhandler( struct mca_binfo *binfo, - struct mca_handle_result *result) + enum mce_result *result) { uint64_t status = binfo->mib->mc_status; enum intel_mce_type type; @@ -752,9 +725,9 @@ type = intel_check_mce_type(status); if (type == intel_mce_fatal || type == intel_mce_ucr_srar) - result->result = MCA_NEED_RESET; - else if (type == intel_mce_ucr_srao) - result->result = MCA_NO_ACTION; + *result = MCER_RESET; + else + *result = MCER_CONTINUE; } static const struct mca_error_handler intel_mce_dhandlers[] = { @@ -764,7 +737,7 @@ static void intel_default_mce_uhandler( struct mca_binfo *binfo, - struct mca_handle_result *result) + enum mce_result *result) { uint64_t status = binfo->mib->mc_status; enum intel_mce_type type; @@ -776,10 +749,10 @@ /* Panic if no handler for SRAR error */ case intel_mce_ucr_srar: case intel_mce_fatal: - result->result = MCA_NEED_RESET; + *result = MCER_RESET; break; default: - result->result = MCA_NO_ACTION; + *result = MCER_CONTINUE; break; } } diff -r f9bb0bbea7c2 -r 216c2efeb246 xen/arch/x86/cpu/mcheck/x86_mca.h --- a/xen/arch/x86/cpu/mcheck/x86_mca.h Thu May 12 16:42:54 2011 +0100 +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Mon May 16 13:12:03 2011 +0100 @@ -124,36 +124,6 @@ void mcabanks_free(struct mca_banks *banks); extern struct mca_banks *mca_allbanks; -/* Below interfaces are defined for MCA internal processing: - * a. pre_handler will be called early in MCA ISR context, mainly for early - * need_reset detection for avoiding log missing. Also, it is used to judge - * impacted DOMAIN if possible. - * b. mca_error_handler is actually a (error_action_index, - * recovery_hanlder pointer) pair. The defined recovery_handler - * performs the actual recovery operations such as page_offline, cpu_offline - * in softIRQ context when the per_bank MCA error matching the corresponding - * mca_code index. If pre_handler can't judge the impacted domain, - * recovery_handler must figure it out. -*/ - -/* MCA error has been recovered successfully by the recovery action*/ -#define MCA_RECOVERED (0x1 << 0) -/* MCA error impact the specified DOMAIN in owner field below */ -#define MCA_OWNER (0x1 << 1) -/* MCA error can't be recovered and need reset */ -#define MCA_NEED_RESET (0x1 << 2) -/* MCA error did not have any action yet */ -#define MCA_NO_ACTION (0x1 << 3) - -struct mca_handle_result -{ - uint32_t result; - /* Used one result & MCA_OWNER */ - domid_t owner; - /* Used by mca_error_handler, result & MCA_RECOVRED */ - struct recovery_action *action; -}; - /*Keep bank so that we can get staus even if mib is NULL */ struct mca_binfo { int bank; @@ -163,8 +133,14 @@ struct cpu_user_regs *regs; }; -extern void (*mca_prehandler)( struct cpu_user_regs *regs, - struct mca_handle_result *result); +enum mce_result +{ + MCER_NOERROR, + MCER_RECOVERED, + /* Not recoverd, but can continue */ + MCER_CONTINUE, + MCER_RESET, +}; struct mca_error_handler { @@ -175,7 +151,7 @@ */ int (*owned_error)(uint64_t status); void (*recovery_handler)(struct mca_binfo *binfo, - struct mca_handle_result *result); + enum mce_result *result); }; /* Global variables */ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |