[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] RE: [Xen-devel] [PATCH 7] MCA: simplify mce actoin, and some update of mem err handler for future SRAR extension
OK, thanks :) Jinsong Keir Fraser wrote: > Ah no, I found it! I'll take a look at it on Monday. > > -- Keir > > On 14/05/2011 10:45, "Keir Fraser" <keir@xxxxxxx> wrote: > >> I don't seem to have this one in my queue. You should resend. >> >> -- Keir >> >> On 13/05/2011 15:34, "Liu, Jinsong" <jinsong.liu@xxxxxxxxx> wrote: >> >>> Jan and Keir, >>> >>> Any comments? >>> >>> Thanks >>> Jinsong >>> >>> >>> Liu, Jinsong wrote: >>>> MCA: simplify mce actoin, and some update of mem err handler for >>>> future SRAR extension >>>> >>>> This patch simplify mce_action(). Basically the 2 set of mce status >>>> flags MCA_... and MCER_... are highly functional similar. This >>>> patch remove the redundant middle-level flag MCA_..., and its >>>> related data structure and function, so that mce_action() logic is >>>> more clean and simpler. This patch also update mem err handler. >>>> intel_memerr_dhandler() will be commonly used by SRAO and SRAR, so >>>> for the extension of future SRAR case, this patch remove the >>>> default fail flag from intel_memerr_dhandler() outside to >>>> SRAO/SRAR level, since only SRAO/SRAR handler itself has the >>>> knowledge of how to handle the failure when mem err handler fail. >>>> >>>> Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx> >>>> >>>> diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/mce_intel.c >>>> --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Sun May 08 13:39:40 2011 >>>> +0800 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Sun May 08 18:15:13 >>>> 2011 +0800 @@ -172,23 +172,14 @@ static unsigned int __read_mostly >>>> mce_dh static unsigned int __read_mostly mce_dhandler_num; >>>> static unsigned int __read_mostly mce_uhandler_num; >>>> >>>> -enum mce_result >>>> -{ >>>> - MCER_NOERROR, >>>> - MCER_RECOVERED, >>>> - /* Not recoverd, but can continue */ >>>> - MCER_CONTINUE, >>>> - MCER_RESET, >>>> -}; >>>> - >>>> /* Maybe called in MCE context, no lock, no printk */ >>>> static enum mce_result mce_action(struct cpu_user_regs *regs, >>>> mctelem_cookie_t mctc) >>>> { >>>> struct mc_info *local_mi; >>>> - enum mce_result ret = MCER_NOERROR; >>>> + enum mce_result bank_result = MCER_NOERROR; >>>> + enum mce_result worst_result = MCER_NOERROR; >>>> struct mcinfo_common *mic = NULL; >>>> - struct mca_handle_result mca_res; >>>> struct mca_binfo binfo; >>>> const struct mca_error_handler *handlers = mce_dhandlers; >>>> unsigned int i, handler_num = mce_dhandler_num; >>>> @@ -217,7 +208,7 @@ static enum mce_result mce_action(struct >>>> /* Processing bank information */ >>>> x86_mcinfo_lookup(mic, local_mi, MC_TYPE_BANK); >>>> >>>> - for ( ; ret != MCER_RESET && mic && mic->size; >>>> + for ( ; bank_result != MCER_RESET && mic && mic->size; >>>> mic = x86_mcinfo_next(mic) ) >>>> { >>>> if (mic->type != MC_TYPE_BANK) { >>>> @@ -225,34 +216,20 @@ static enum mce_result mce_action(struct >>>> } binfo.mib = (struct mcinfo_bank*)mic; >>>> binfo.bank = binfo.mib->mc_bank; >>>> - memset(&mca_res, 0x0f, sizeof(mca_res)); >>>> + bank_result = MCER_NOERROR; >>>> for ( i = 0; i < handler_num; i++ ) { >>>> if (handlers[i].owned_error(binfo.mib->mc_status)) >>>> { >>>> - handlers[i].recovery_handler(&binfo, &mca_res); - >>>> - if (mca_res.result & MCA_OWNER) >>>> - binfo.mib->mc_domid = mca_res.owner; - >>>> - if (mca_res.result == MCA_NEED_RESET) >>>> - ret = MCER_RESET; >>>> - else if (mca_res.result == MCA_RECOVERED) >>>> - { >>>> - if (ret < MCER_RECOVERED) >>>> - ret = MCER_RECOVERED; >>>> - } >>>> - else if (mca_res.result == MCA_NO_ACTION) >>>> - { >>>> - if (ret < MCER_CONTINUE) >>>> - ret = MCER_CONTINUE; >>>> - } >>>> + handlers[i].recovery_handler(&binfo, >>>> &bank_result); + if (worst_result < bank_result) >>>> + worst_result = bank_result; >>>> break; >>>> } >>>> } >>>> ASSERT(i != handler_num); >>>> } >>>> >>>> - return ret; >>>> + return worst_result; >>>> } >>>> >>>> /* >>>> @@ -608,7 +585,7 @@ struct mcinfo_recovery *mci_add_pageoff_ >>>> >>>> static void intel_memerr_dhandler( >>>> struct mca_binfo *binfo, >>>> - struct mca_handle_result *result) >>>> + enum mce_result *result) >>>> { >>>> struct mcinfo_bank *bank = binfo->mib; >>>> struct mcinfo_global *global = binfo->mig; >>>> @@ -618,7 +595,6 @@ static void intel_memerr_dhandler( >>>> uint64_t mc_status, mc_misc; >>>> >>>> mce_printk(MCE_VERBOSE, "MCE: Enter UCR recovery action\n"); >>>> - result->result = MCA_NEED_RESET; >>>> >>>> mc_status = bank->mc_status; >>>> mc_misc = bank->mc_misc; >>>> @@ -626,7 +602,6 @@ static void intel_memerr_dhandler( >>>> !(mc_status & MCi_STATUS_MISCV) || >>>> ((mc_misc & MCi_MISC_ADDRMOD_MASK) != MCi_MISC_PHYSMOD) ) >>>> { - result->result |= MCA_NO_ACTION; >>>> dprintk(XENLOG_WARNING, >>>> "No physical address provided for memory error\n"); >>>> return; @@ -644,23 +619,19 @@ static void intel_memerr_dhandler( >>>> >>>> /* This is free page */ >>>> if (status & PG_OFFLINE_OFFLINED) >>>> - result->result = MCA_RECOVERED; >>>> + *result = MCER_RECOVERED; >>>> else if (status & PG_OFFLINE_PENDING) { >>>> /* This page has owner */ >>>> if (status & PG_OFFLINE_OWNED) { >>>> - result->result |= MCA_OWNER; >>>> - result->owner = status >> PG_OFFLINE_OWNER_SHIFT; >>>> + bank->mc_domid = status >> PG_OFFLINE_OWNER_SHIFT; >>>> mce_printk(MCE_QUIET, "MCE: This error page is ownded" >>>> - " by DOM %d\n", result->owner); >>>> - /* Fill vMCE# injection and vMCE# MSR virtualization " >>>> - * "related data */ >>>> - bank->mc_domid = result->owner; >>>> + " by DOM %d\n", bank->mc_domid); >>>> /* XXX: Cannot handle shared pages yet >>>> * (this should identify all domains and gfn mapping >>>> to >>>> * the mfn in question) */ >>>> - BUG_ON( result->owner == DOMID_COW ); >>>> - if ( result->owner != DOMID_XEN ) { >>>> - d = get_domain_by_id(result->owner); >>>> + BUG_ON( bank->mc_domid == DOMID_COW ); >>>> + if ( bank->mc_domid != DOMID_XEN ) { >>>> + d = get_domain_by_id(bank->mc_domid); >>>> ASSERT(d); gfn = >>>> get_gpfn_from_mfn((bank->mc_addr) >> PAGE_SHIFT); >>>> >>>> @@ -683,7 +654,7 @@ static void intel_memerr_dhandler( >>>> global->mc_gstatus) == -1 ) >>>> { >>>> mce_printk(MCE_QUIET, "Fill vMCE# data for >>>> DOM%d " - "failed\n", result->owner); >>>> + "failed\n", bank->mc_domid); >>>> goto vmce_failed; >>>> } >>>> >>>> @@ -699,7 +670,7 @@ static void intel_memerr_dhandler( >>>> * For xen, it has contained the error and >>>> finished >>>> * its own recovery job. >>>> */ >>>> - result->result = MCA_RECOVERED; >>>> + *result = MCER_RECOVERED; >>>> put_domain(d); >>>> >>>> return; >>>> @@ -718,11 +689,13 @@ static int intel_srao_check(uint64_t sta >>>> >>>> static void intel_srao_dhandler( >>>> struct mca_binfo *binfo, >>>> - struct mca_handle_result *result) >>>> + enum mce_result *result) >>>> { >>>> uint64_t status = binfo->mib->mc_status; >>>> >>>> /* For unknown srao error code, no action required */ + >>>> *result = MCER_CONTINUE; + >>>> if ( status & MCi_STATUS_VAL ) >>>> { >>>> switch ( status & INTEL_MCCOD_MASK ) >>>> @@ -744,7 +717,7 @@ static int intel_default_check(uint64_t >>>> >>>> static void intel_default_mce_dhandler( >>>> struct mca_binfo *binfo, >>>> - struct mca_handle_result *result) >>>> + enum mce_result *result) >>>> { >>>> uint64_t status = binfo->mib->mc_status; >>>> enum intel_mce_type type; >>>> @@ -752,9 +725,9 @@ static void intel_default_mce_dhandler( >>>> type = intel_check_mce_type(status); >>>> >>>> if (type == intel_mce_fatal || type == intel_mce_ucr_srar) >>>> - result->result = MCA_NEED_RESET; >>>> - else if (type == intel_mce_ucr_srao) >>>> - result->result = MCA_NO_ACTION; >>>> + *result = MCER_RESET; >>>> + else >>>> + *result = MCER_CONTINUE; >>>> } >>>> >>>> static const struct mca_error_handler intel_mce_dhandlers[] = { >>>> @@ -764,7 +737,7 @@ static const struct mca_error_handler in >>>> >>>> static void intel_default_mce_uhandler( >>>> struct mca_binfo *binfo, >>>> - struct mca_handle_result *result) >>>> + enum mce_result *result) >>>> { >>>> uint64_t status = binfo->mib->mc_status; >>>> enum intel_mce_type type; >>>> @@ -776,10 +749,10 @@ static void intel_default_mce_uhandler( >>>> /* Panic if no handler for SRAR error */ >>>> case intel_mce_ucr_srar: >>>> case intel_mce_fatal: >>>> - result->result = MCA_NEED_RESET; >>>> + *result = MCER_RESET; >>>> break; >>>> default: >>>> - result->result = MCA_NO_ACTION; >>>> + *result = MCER_CONTINUE; >>>> break; >>>> } >>>> } >>>> diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/x86_mca.h >>>> --- a/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 13:39:40 2011 >>>> +0800 +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 18:15:13 >>>> 2011 +0800 @@ -124,36 +124,6 @@ void mcabanks_free(struct >>>> mca_banks *ban void mcabanks_free(struct mca_banks *banks); >>>> extern struct mca_banks *mca_allbanks; >>>> >>>> -/* Below interfaces are defined for MCA internal processing: >>>> - * a. pre_handler will be called early in MCA ISR context, mainly >>>> for early >>>> - * need_reset detection for avoiding log missing. Also, it is >>>> used to judge >>>> - * impacted DOMAIN if possible. >>>> - * b. mca_error_handler is actually a (error_action_index, >>>> - * recovery_hanlder pointer) pair. The defined recovery_handler >>>> - * performs the actual recovery operations such as >>>> page_offline, cpu_offline >>>> - * in softIRQ context when the per_bank MCA error matching the >>>> corresponding >>>> - * mca_code index. If pre_handler can't judge the impacted >>>> domain, >>>> - * recovery_handler must figure it out. >>>> -*/ >>>> - >>>> -/* MCA error has been recovered successfully by the recovery >>>> action*/ >>>> -#define MCA_RECOVERED (0x1 << 0) >>>> -/* MCA error impact the specified DOMAIN in owner field below */ >>>> -#define MCA_OWNER (0x1 << 1) >>>> -/* MCA error can't be recovered and need reset */ >>>> -#define MCA_NEED_RESET (0x1 << 2) >>>> -/* MCA error did not have any action yet */ >>>> -#define MCA_NO_ACTION (0x1 << 3) >>>> - >>>> -struct mca_handle_result >>>> -{ >>>> - uint32_t result; >>>> - /* Used one result & MCA_OWNER */ >>>> - domid_t owner; >>>> - /* Used by mca_error_handler, result & MCA_RECOVRED */ >>>> - struct recovery_action *action; >>>> -}; >>>> - >>>> /*Keep bank so that we can get staus even if mib is NULL */ >>>> struct mca_binfo { int bank; >>>> @@ -163,8 +133,14 @@ struct mca_binfo { >>>> struct cpu_user_regs *regs; >>>> }; >>>> >>>> -extern void (*mca_prehandler)( struct cpu_user_regs *regs, >>>> - struct mca_handle_result *result); +enum >>>> mce_result +{ >>>> + MCER_NOERROR, >>>> + MCER_RECOVERED, >>>> + /* Not recoverd, but can continue */ >>>> + MCER_CONTINUE, >>>> + MCER_RESET, >>>> +}; >>>> >>>> struct mca_error_handler >>>> { >>>> @@ -175,7 +151,7 @@ struct mca_error_handler >>>> */ >>>> int (*owned_error)(uint64_t status); >>>> void (*recovery_handler)(struct mca_binfo *binfo, >>>> - struct mca_handle_result *result); >>>> + enum mce_result *result); >>>> }; >>>> >>>> /* Global variables */ _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |