[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 7] MCA: simplify mce actoin, and some update of mem err handler for future SRAR extension
Ah no, I found it! I'll take a look at it on Monday. -- Keir On 14/05/2011 10:45, "Keir Fraser" <keir@xxxxxxx> wrote: > I don't seem to have this one in my queue. You should resend. > > -- Keir > > On 13/05/2011 15:34, "Liu, Jinsong" <jinsong.liu@xxxxxxxxx> wrote: > >> Jan and Keir, >> >> Any comments? >> >> Thanks >> Jinsong >> >> >> Liu, Jinsong wrote: >>> MCA: simplify mce actoin, and some update of mem err handler for >>> future SRAR extension >>> >>> This patch simplify mce_action(). Basically the 2 set of mce status >>> flags MCA_... and MCER_... are highly functional similar. This patch >>> remove the redundant middle-level flag MCA_..., and its related data >>> structure and function, so that mce_action() logic is more clean and >>> simpler. >>> This patch also update mem err handler. intel_memerr_dhandler() will >>> be commonly used by SRAO and SRAR, so for the extension of future >>> SRAR case, this patch remove the default fail flag from >>> intel_memerr_dhandler() outside to SRAO/SRAR level, since only >>> SRAO/SRAR handler itself has the knowledge of how to handle the >>> failure when mem err handler fail. >>> >>> Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx> >>> >>> diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/mce_intel.c >>> --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Sun May 08 13:39:40 2011 >>> +0800 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Sun May 08 18:15:13 >>> 2011 +0800 @@ -172,23 +172,14 @@ static unsigned int __read_mostly >>> mce_dh static unsigned int __read_mostly mce_dhandler_num; >>> static unsigned int __read_mostly mce_uhandler_num; >>> >>> -enum mce_result >>> -{ >>> - MCER_NOERROR, >>> - MCER_RECOVERED, >>> - /* Not recoverd, but can continue */ >>> - MCER_CONTINUE, >>> - MCER_RESET, >>> -}; >>> - >>> /* Maybe called in MCE context, no lock, no printk */ >>> static enum mce_result mce_action(struct cpu_user_regs *regs, >>> mctelem_cookie_t mctc) >>> { >>> struct mc_info *local_mi; >>> - enum mce_result ret = MCER_NOERROR; >>> + enum mce_result bank_result = MCER_NOERROR; >>> + enum mce_result worst_result = MCER_NOERROR; >>> struct mcinfo_common *mic = NULL; >>> - struct mca_handle_result mca_res; >>> struct mca_binfo binfo; >>> const struct mca_error_handler *handlers = mce_dhandlers; >>> unsigned int i, handler_num = mce_dhandler_num; >>> @@ -217,7 +208,7 @@ static enum mce_result mce_action(struct >>> /* Processing bank information */ >>> x86_mcinfo_lookup(mic, local_mi, MC_TYPE_BANK); >>> >>> - for ( ; ret != MCER_RESET && mic && mic->size; >>> + for ( ; bank_result != MCER_RESET && mic && mic->size; >>> mic = x86_mcinfo_next(mic) ) >>> { >>> if (mic->type != MC_TYPE_BANK) { >>> @@ -225,34 +216,20 @@ static enum mce_result mce_action(struct >>> } >>> binfo.mib = (struct mcinfo_bank*)mic; >>> binfo.bank = binfo.mib->mc_bank; >>> - memset(&mca_res, 0x0f, sizeof(mca_res)); >>> + bank_result = MCER_NOERROR; >>> for ( i = 0; i < handler_num; i++ ) { >>> if (handlers[i].owned_error(binfo.mib->mc_status)) >>> { >>> - handlers[i].recovery_handler(&binfo, &mca_res); >>> - >>> - if (mca_res.result & MCA_OWNER) >>> - binfo.mib->mc_domid = mca_res.owner; >>> - >>> - if (mca_res.result == MCA_NEED_RESET) >>> - ret = MCER_RESET; >>> - else if (mca_res.result == MCA_RECOVERED) >>> - { >>> - if (ret < MCER_RECOVERED) >>> - ret = MCER_RECOVERED; >>> - } >>> - else if (mca_res.result == MCA_NO_ACTION) >>> - { >>> - if (ret < MCER_CONTINUE) >>> - ret = MCER_CONTINUE; >>> - } >>> + handlers[i].recovery_handler(&binfo, &bank_result); >>> + if (worst_result < bank_result) >>> + worst_result = bank_result; >>> break; >>> } >>> } >>> ASSERT(i != handler_num); >>> } >>> >>> - return ret; >>> + return worst_result; >>> } >>> >>> /* >>> @@ -608,7 +585,7 @@ struct mcinfo_recovery *mci_add_pageoff_ >>> >>> static void intel_memerr_dhandler( >>> struct mca_binfo *binfo, >>> - struct mca_handle_result *result) >>> + enum mce_result *result) >>> { >>> struct mcinfo_bank *bank = binfo->mib; >>> struct mcinfo_global *global = binfo->mig; >>> @@ -618,7 +595,6 @@ static void intel_memerr_dhandler( >>> uint64_t mc_status, mc_misc; >>> >>> mce_printk(MCE_VERBOSE, "MCE: Enter UCR recovery action\n"); >>> - result->result = MCA_NEED_RESET; >>> >>> mc_status = bank->mc_status; >>> mc_misc = bank->mc_misc; >>> @@ -626,7 +602,6 @@ static void intel_memerr_dhandler( >>> !(mc_status & MCi_STATUS_MISCV) || >>> ((mc_misc & MCi_MISC_ADDRMOD_MASK) != MCi_MISC_PHYSMOD) ) >>> { >>> - result->result |= MCA_NO_ACTION; >>> dprintk(XENLOG_WARNING, >>> "No physical address provided for memory error\n"); >>> return; >>> @@ -644,23 +619,19 @@ static void intel_memerr_dhandler( >>> >>> /* This is free page */ >>> if (status & PG_OFFLINE_OFFLINED) >>> - result->result = MCA_RECOVERED; >>> + *result = MCER_RECOVERED; >>> else if (status & PG_OFFLINE_PENDING) { >>> /* This page has owner */ >>> if (status & PG_OFFLINE_OWNED) { >>> - result->result |= MCA_OWNER; >>> - result->owner = status >> PG_OFFLINE_OWNER_SHIFT; >>> + bank->mc_domid = status >> PG_OFFLINE_OWNER_SHIFT; >>> mce_printk(MCE_QUIET, "MCE: This error page is ownded" >>> - " by DOM %d\n", result->owner); >>> - /* Fill vMCE# injection and vMCE# MSR virtualization " >>> - * "related data */ >>> - bank->mc_domid = result->owner; >>> + " by DOM %d\n", bank->mc_domid); >>> /* XXX: Cannot handle shared pages yet >>> * (this should identify all domains and gfn mapping to >>> * the mfn in question) */ >>> - BUG_ON( result->owner == DOMID_COW ); >>> - if ( result->owner != DOMID_XEN ) { >>> - d = get_domain_by_id(result->owner); >>> + BUG_ON( bank->mc_domid == DOMID_COW ); >>> + if ( bank->mc_domid != DOMID_XEN ) { >>> + d = get_domain_by_id(bank->mc_domid); >>> ASSERT(d); >>> gfn = get_gpfn_from_mfn((bank->mc_addr) >> >>> PAGE_SHIFT); >>> >>> @@ -683,7 +654,7 @@ static void intel_memerr_dhandler( >>> global->mc_gstatus) == -1 ) >>> { >>> mce_printk(MCE_QUIET, "Fill vMCE# data for DOM%d >>> " - "failed\n", result->owner); >>> + "failed\n", bank->mc_domid); >>> goto vmce_failed; >>> } >>> >>> @@ -699,7 +670,7 @@ static void intel_memerr_dhandler( >>> * For xen, it has contained the error and finished >>> * its own recovery job. >>> */ >>> - result->result = MCA_RECOVERED; >>> + *result = MCER_RECOVERED; >>> put_domain(d); >>> >>> return; >>> @@ -718,11 +689,13 @@ static int intel_srao_check(uint64_t sta >>> >>> static void intel_srao_dhandler( >>> struct mca_binfo *binfo, >>> - struct mca_handle_result *result) >>> + enum mce_result *result) >>> { >>> uint64_t status = binfo->mib->mc_status; >>> >>> /* For unknown srao error code, no action required */ >>> + *result = MCER_CONTINUE; >>> + >>> if ( status & MCi_STATUS_VAL ) >>> { >>> switch ( status & INTEL_MCCOD_MASK ) >>> @@ -744,7 +717,7 @@ static int intel_default_check(uint64_t >>> >>> static void intel_default_mce_dhandler( >>> struct mca_binfo *binfo, >>> - struct mca_handle_result *result) >>> + enum mce_result *result) >>> { >>> uint64_t status = binfo->mib->mc_status; >>> enum intel_mce_type type; >>> @@ -752,9 +725,9 @@ static void intel_default_mce_dhandler( >>> type = intel_check_mce_type(status); >>> >>> if (type == intel_mce_fatal || type == intel_mce_ucr_srar) >>> - result->result = MCA_NEED_RESET; >>> - else if (type == intel_mce_ucr_srao) >>> - result->result = MCA_NO_ACTION; >>> + *result = MCER_RESET; >>> + else >>> + *result = MCER_CONTINUE; >>> } >>> >>> static const struct mca_error_handler intel_mce_dhandlers[] = { >>> @@ -764,7 +737,7 @@ static const struct mca_error_handler in >>> >>> static void intel_default_mce_uhandler( >>> struct mca_binfo *binfo, >>> - struct mca_handle_result *result) >>> + enum mce_result *result) >>> { >>> uint64_t status = binfo->mib->mc_status; >>> enum intel_mce_type type; >>> @@ -776,10 +749,10 @@ static void intel_default_mce_uhandler( >>> /* Panic if no handler for SRAR error */ >>> case intel_mce_ucr_srar: >>> case intel_mce_fatal: >>> - result->result = MCA_NEED_RESET; >>> + *result = MCER_RESET; >>> break; >>> default: >>> - result->result = MCA_NO_ACTION; >>> + *result = MCER_CONTINUE; >>> break; >>> } >>> } >>> diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/x86_mca.h >>> --- a/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 13:39:40 2011 +0800 >>> +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 18:15:13 2011 +0800 >>> @@ -124,36 +124,6 @@ void mcabanks_free(struct mca_banks *ban >>> void mcabanks_free(struct mca_banks *banks); >>> extern struct mca_banks *mca_allbanks; >>> >>> -/* Below interfaces are defined for MCA internal processing: >>> - * a. pre_handler will be called early in MCA ISR context, mainly >>> for early >>> - * need_reset detection for avoiding log missing. Also, it is >>> used to judge >>> - * impacted DOMAIN if possible. >>> - * b. mca_error_handler is actually a (error_action_index, >>> - * recovery_hanlder pointer) pair. The defined recovery_handler >>> - * performs the actual recovery operations such as page_offline, >>> cpu_offline >>> - * in softIRQ context when the per_bank MCA error matching the >>> corresponding >>> - * mca_code index. If pre_handler can't judge the impacted domain, >>> - * recovery_handler must figure it out. >>> -*/ >>> - >>> -/* MCA error has been recovered successfully by the recovery action*/ >>> -#define MCA_RECOVERED (0x1 << 0) >>> -/* MCA error impact the specified DOMAIN in owner field below */ >>> -#define MCA_OWNER (0x1 << 1) >>> -/* MCA error can't be recovered and need reset */ >>> -#define MCA_NEED_RESET (0x1 << 2) >>> -/* MCA error did not have any action yet */ >>> -#define MCA_NO_ACTION (0x1 << 3) >>> - >>> -struct mca_handle_result >>> -{ >>> - uint32_t result; >>> - /* Used one result & MCA_OWNER */ >>> - domid_t owner; >>> - /* Used by mca_error_handler, result & MCA_RECOVRED */ >>> - struct recovery_action *action; >>> -}; >>> - >>> /*Keep bank so that we can get staus even if mib is NULL */ >>> struct mca_binfo { >>> int bank; >>> @@ -163,8 +133,14 @@ struct mca_binfo { >>> struct cpu_user_regs *regs; >>> }; >>> >>> -extern void (*mca_prehandler)( struct cpu_user_regs *regs, >>> - struct mca_handle_result *result); >>> +enum mce_result >>> +{ >>> + MCER_NOERROR, >>> + MCER_RECOVERED, >>> + /* Not recoverd, but can continue */ >>> + MCER_CONTINUE, >>> + MCER_RESET, >>> +}; >>> >>> struct mca_error_handler >>> { >>> @@ -175,7 +151,7 @@ struct mca_error_handler >>> */ >>> int (*owned_error)(uint64_t status); >>> void (*recovery_handler)(struct mca_binfo *binfo, >>> - struct mca_handle_result *result); >>> + enum mce_result *result); >>> }; >>> >>> /* Global variables */ >> > > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |