[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] RE: [Xen-devel] [PATCH 7] MCA: simplify mce actoin, and some update of mem err handler for future SRAR extension
Jan and Keir, Any comments? Thanks Jinsong Liu, Jinsong wrote: > MCA: simplify mce actoin, and some update of mem err handler for > future SRAR extension > > This patch simplify mce_action(). Basically the 2 set of mce status > flags MCA_... and MCER_... are highly functional similar. This patch > remove the redundant middle-level flag MCA_..., and its related data > structure and function, so that mce_action() logic is more clean and > simpler. > This patch also update mem err handler. intel_memerr_dhandler() will > be commonly used by SRAO and SRAR, so for the extension of future > SRAR case, this patch remove the default fail flag from > intel_memerr_dhandler() outside to SRAO/SRAR level, since only > SRAO/SRAR handler itself has the knowledge of how to handle the > failure when mem err handler fail. > > Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx> > > diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/mce_intel.c > --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Sun May 08 13:39:40 2011 > +0800 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Sun May 08 18:15:13 > 2011 +0800 @@ -172,23 +172,14 @@ static unsigned int __read_mostly > mce_dh static unsigned int __read_mostly mce_dhandler_num; > static unsigned int __read_mostly mce_uhandler_num; > > -enum mce_result > -{ > - MCER_NOERROR, > - MCER_RECOVERED, > - /* Not recoverd, but can continue */ > - MCER_CONTINUE, > - MCER_RESET, > -}; > - > /* Maybe called in MCE context, no lock, no printk */ > static enum mce_result mce_action(struct cpu_user_regs *regs, > mctelem_cookie_t mctc) > { > struct mc_info *local_mi; > - enum mce_result ret = MCER_NOERROR; > + enum mce_result bank_result = MCER_NOERROR; > + enum mce_result worst_result = MCER_NOERROR; > struct mcinfo_common *mic = NULL; > - struct mca_handle_result mca_res; > struct mca_binfo binfo; > const struct mca_error_handler *handlers = mce_dhandlers; > unsigned int i, handler_num = mce_dhandler_num; > @@ -217,7 +208,7 @@ static enum mce_result mce_action(struct > /* Processing bank information */ > x86_mcinfo_lookup(mic, local_mi, MC_TYPE_BANK); > > - for ( ; ret != MCER_RESET && mic && mic->size; > + for ( ; bank_result != MCER_RESET && mic && mic->size; > mic = x86_mcinfo_next(mic) ) > { > if (mic->type != MC_TYPE_BANK) { > @@ -225,34 +216,20 @@ static enum mce_result mce_action(struct > } > binfo.mib = (struct mcinfo_bank*)mic; > binfo.bank = binfo.mib->mc_bank; > - memset(&mca_res, 0x0f, sizeof(mca_res)); > + bank_result = MCER_NOERROR; > for ( i = 0; i < handler_num; i++ ) { > if (handlers[i].owned_error(binfo.mib->mc_status)) > { > - handlers[i].recovery_handler(&binfo, &mca_res); > - > - if (mca_res.result & MCA_OWNER) > - binfo.mib->mc_domid = mca_res.owner; > - > - if (mca_res.result == MCA_NEED_RESET) > - ret = MCER_RESET; > - else if (mca_res.result == MCA_RECOVERED) > - { > - if (ret < MCER_RECOVERED) > - ret = MCER_RECOVERED; > - } > - else if (mca_res.result == MCA_NO_ACTION) > - { > - if (ret < MCER_CONTINUE) > - ret = MCER_CONTINUE; > - } > + handlers[i].recovery_handler(&binfo, &bank_result); > + if (worst_result < bank_result) > + worst_result = bank_result; > break; > } > } > ASSERT(i != handler_num); > } > > - return ret; > + return worst_result; > } > > /* > @@ -608,7 +585,7 @@ struct mcinfo_recovery *mci_add_pageoff_ > > static void intel_memerr_dhandler( > struct mca_binfo *binfo, > - struct mca_handle_result *result) > + enum mce_result *result) > { > struct mcinfo_bank *bank = binfo->mib; > struct mcinfo_global *global = binfo->mig; > @@ -618,7 +595,6 @@ static void intel_memerr_dhandler( > uint64_t mc_status, mc_misc; > > mce_printk(MCE_VERBOSE, "MCE: Enter UCR recovery action\n"); > - result->result = MCA_NEED_RESET; > > mc_status = bank->mc_status; > mc_misc = bank->mc_misc; > @@ -626,7 +602,6 @@ static void intel_memerr_dhandler( > !(mc_status & MCi_STATUS_MISCV) || > ((mc_misc & MCi_MISC_ADDRMOD_MASK) != MCi_MISC_PHYSMOD) ) > { > - result->result |= MCA_NO_ACTION; > dprintk(XENLOG_WARNING, > "No physical address provided for memory error\n"); > return; > @@ -644,23 +619,19 @@ static void intel_memerr_dhandler( > > /* This is free page */ > if (status & PG_OFFLINE_OFFLINED) > - result->result = MCA_RECOVERED; > + *result = MCER_RECOVERED; > else if (status & PG_OFFLINE_PENDING) { > /* This page has owner */ > if (status & PG_OFFLINE_OWNED) { > - result->result |= MCA_OWNER; > - result->owner = status >> PG_OFFLINE_OWNER_SHIFT; > + bank->mc_domid = status >> PG_OFFLINE_OWNER_SHIFT; > mce_printk(MCE_QUIET, "MCE: This error page is ownded" > - " by DOM %d\n", result->owner); > - /* Fill vMCE# injection and vMCE# MSR virtualization " > - * "related data */ > - bank->mc_domid = result->owner; > + " by DOM %d\n", bank->mc_domid); > /* XXX: Cannot handle shared pages yet > * (this should identify all domains and gfn mapping to > * the mfn in question) */ > - BUG_ON( result->owner == DOMID_COW ); > - if ( result->owner != DOMID_XEN ) { > - d = get_domain_by_id(result->owner); > + BUG_ON( bank->mc_domid == DOMID_COW ); > + if ( bank->mc_domid != DOMID_XEN ) { > + d = get_domain_by_id(bank->mc_domid); > ASSERT(d); > gfn = get_gpfn_from_mfn((bank->mc_addr) >> > PAGE_SHIFT); > > @@ -683,7 +654,7 @@ static void intel_memerr_dhandler( > global->mc_gstatus) == -1 ) > { > mce_printk(MCE_QUIET, "Fill vMCE# data for DOM%d > " - "failed\n", result->owner); > + "failed\n", bank->mc_domid); > goto vmce_failed; > } > > @@ -699,7 +670,7 @@ static void intel_memerr_dhandler( > * For xen, it has contained the error and finished > * its own recovery job. > */ > - result->result = MCA_RECOVERED; > + *result = MCER_RECOVERED; > put_domain(d); > > return; > @@ -718,11 +689,13 @@ static int intel_srao_check(uint64_t sta > > static void intel_srao_dhandler( > struct mca_binfo *binfo, > - struct mca_handle_result *result) > + enum mce_result *result) > { > uint64_t status = binfo->mib->mc_status; > > /* For unknown srao error code, no action required */ > + *result = MCER_CONTINUE; > + > if ( status & MCi_STATUS_VAL ) > { > switch ( status & INTEL_MCCOD_MASK ) > @@ -744,7 +717,7 @@ static int intel_default_check(uint64_t > > static void intel_default_mce_dhandler( > struct mca_binfo *binfo, > - struct mca_handle_result *result) > + enum mce_result *result) > { > uint64_t status = binfo->mib->mc_status; > enum intel_mce_type type; > @@ -752,9 +725,9 @@ static void intel_default_mce_dhandler( > type = intel_check_mce_type(status); > > if (type == intel_mce_fatal || type == intel_mce_ucr_srar) > - result->result = MCA_NEED_RESET; > - else if (type == intel_mce_ucr_srao) > - result->result = MCA_NO_ACTION; > + *result = MCER_RESET; > + else > + *result = MCER_CONTINUE; > } > > static const struct mca_error_handler intel_mce_dhandlers[] = { > @@ -764,7 +737,7 @@ static const struct mca_error_handler in > > static void intel_default_mce_uhandler( > struct mca_binfo *binfo, > - struct mca_handle_result *result) > + enum mce_result *result) > { > uint64_t status = binfo->mib->mc_status; > enum intel_mce_type type; > @@ -776,10 +749,10 @@ static void intel_default_mce_uhandler( > /* Panic if no handler for SRAR error */ > case intel_mce_ucr_srar: > case intel_mce_fatal: > - result->result = MCA_NEED_RESET; > + *result = MCER_RESET; > break; > default: > - result->result = MCA_NO_ACTION; > + *result = MCER_CONTINUE; > break; > } > } > diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/x86_mca.h > --- a/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 13:39:40 2011 +0800 > +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 18:15:13 2011 +0800 > @@ -124,36 +124,6 @@ void mcabanks_free(struct mca_banks *ban > void mcabanks_free(struct mca_banks *banks); > extern struct mca_banks *mca_allbanks; > > -/* Below interfaces are defined for MCA internal processing: > - * a. pre_handler will be called early in MCA ISR context, mainly > for early > - * need_reset detection for avoiding log missing. Also, it is > used to judge > - * impacted DOMAIN if possible. > - * b. mca_error_handler is actually a (error_action_index, > - * recovery_hanlder pointer) pair. The defined recovery_handler > - * performs the actual recovery operations such as page_offline, > cpu_offline > - * in softIRQ context when the per_bank MCA error matching the > corresponding > - * mca_code index. If pre_handler can't judge the impacted domain, > - * recovery_handler must figure it out. > -*/ > - > -/* MCA error has been recovered successfully by the recovery action*/ > -#define MCA_RECOVERED (0x1 << 0) > -/* MCA error impact the specified DOMAIN in owner field below */ > -#define MCA_OWNER (0x1 << 1) > -/* MCA error can't be recovered and need reset */ > -#define MCA_NEED_RESET (0x1 << 2) > -/* MCA error did not have any action yet */ > -#define MCA_NO_ACTION (0x1 << 3) > - > -struct mca_handle_result > -{ > - uint32_t result; > - /* Used one result & MCA_OWNER */ > - domid_t owner; > - /* Used by mca_error_handler, result & MCA_RECOVRED */ > - struct recovery_action *action; > -}; > - > /*Keep bank so that we can get staus even if mib is NULL */ > struct mca_binfo { > int bank; > @@ -163,8 +133,14 @@ struct mca_binfo { > struct cpu_user_regs *regs; > }; > > -extern void (*mca_prehandler)( struct cpu_user_regs *regs, > - struct mca_handle_result *result); > +enum mce_result > +{ > + MCER_NOERROR, > + MCER_RECOVERED, > + /* Not recoverd, but can continue */ > + MCER_CONTINUE, > + MCER_RESET, > +}; > > struct mca_error_handler > { > @@ -175,7 +151,7 @@ struct mca_error_handler > */ > int (*owned_error)(uint64_t status); > void (*recovery_handler)(struct mca_binfo *binfo, > - struct mca_handle_result *result); > + enum mce_result *result); > }; > > /* Global variables */ _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |