[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] MCA interfaces between XEN/DOM0, let DOM0 know the MCA recovery action
# HG changeset patch # User Keir Fraser <keir.fraser@xxxxxxxxxx> # Date 1237569929 0 # Node ID 891af2c54155afc4ca47a8e8eb8f6865b2f76f0f # Parent cc60defe5b9697ab0e068caa4fd1f8798bfe5104 MCA interfaces between XEN/DOM0, let DOM0 know the MCA recovery action Signed-off-by: Jiang, yunhong <yunhong.jiang@xxxxxxxxx> Signed-off-by: Ke, liping <liping.ke@xxxxxxxxx> --- xen/arch/x86/cpu/mcheck/x86_mca.h | 47 +++++++++++++++++++++++++ xen/include/public/arch-x86/xen-mca.h | 63 ++++++++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+) diff -r cc60defe5b96 -r 891af2c54155 xen/arch/x86/cpu/mcheck/x86_mca.h --- a/xen/arch/x86/cpu/mcheck/x86_mca.h Fri Mar 20 17:24:53 2009 +0000 +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Fri Mar 20 17:25:29 2009 +0000 @@ -87,6 +87,53 @@ typedef DECLARE_BITMAP(cpu_banks_t, MAX_ typedef DECLARE_BITMAP(cpu_banks_t, MAX_NR_BANKS); DECLARE_PER_CPU(cpu_banks_t, mce_banks_owned); +/* Below interfaces are defined for MCA internal processing: + * a. pre_handler will be called early in MCA ISR context, mainly for early + * need_reset detection for avoiding log missing. Also, it is used to judge + * impacted DOMAIN if possible. + * b. mca_error_handler is actually a (error_action_index, + * recovery_hanlder pointer) pair. The defined recovery_handler + * performs the actual recovery operations such as page_offline, cpu_offline + * in softIRQ context when the per_bank MCA error matching the corresponding + * mca_code index. If pre_handler can't judge the impacted domain, + * recovery_handler must figure it out. +*/ + +/* MCA error has been recovered successfully by the recovery action*/ +#define MCA_RECOVERED (0x1 < 0) +/* MCA error impact the specified DOMAIN in owner field below */ +#define MCA_OWNER (0x1 < 1) +/* MCA error can't be recovered and need reset */ +#define MCA_NEED_RESET (0x1 < 2) +/* MCA error need further actions in softIRQ context for recovery */ +#define MCA_MORE_ACTION (0x1 < 3) + +struct mca_handle_result +{ + uint32_t result; + /* Used one result & MCA_OWNER */ + domid_t owner; + /* Used by mca_error_handler, result & MCA_RECOVRED */ + struct recovery_action *action; +}; + +extern void (*mca_prehandler)( struct cpu_user_regs *regs, + struct mca_handle_result *result); + +struct mca_error_handler +{ + /* Assume corresponding recovery action could be uniquely + * identified by mca_code. Otherwise, we might need to have + * a seperate function to decode the corresponding actions + * for the particular mca error later. + */ + uint16_t mca_code; + void (*recovery_handler)( struct mcinfo_bank *bank, + struct mcinfo_global *global, + struct mcinfo_extended *extension, + struct mca_handle_result *result); +}; + /* Global variables */ extern int mce_disabled; extern unsigned int nr_mce_banks; diff -r cc60defe5b96 -r 891af2c54155 xen/include/public/arch-x86/xen-mca.h --- a/xen/include/public/arch-x86/xen-mca.h Fri Mar 20 17:24:53 2009 +0000 +++ b/xen/include/public/arch-x86/xen-mca.h Fri Mar 20 17:25:29 2009 +0000 @@ -104,6 +104,7 @@ #define MC_TYPE_GLOBAL 0 #define MC_TYPE_BANK 1 #define MC_TYPE_EXTENDED 2 +#define MC_TYPE_RECOVERY 3 struct mcinfo_common { uint16_t type; /* structure type */ @@ -171,6 +172,68 @@ struct mcinfo_extended { */ struct mcinfo_msr mc_msr[10]; }; + +/* Recovery Action flags. Giving recovery result information to DOM0 */ + +/* Xen takes successful recovery action, the error is recovered */ +#define REC_ACTION_RECOVERED (0x1 << 0) +/* No action is performed by XEN */ +#define REC_ACTION_NONE (0x1 << 1) +/* It's possible DOM0 might take action ownership in some case */ +#define REC_ACTION_NEED_RESET (0x1 << 2) + +/* Different Recovery Action types, if the action is performed successfully, + * REC_ACTION_RECOVERED flag will be returned. + */ + +/* Page Offline Action */ +#define MC_ACTION_PAGE_OFFLINE (0x1 << 0) +/* CPU offline Action */ +#define MC_ACTION_CPU_OFFLINE (0x1 << 1) +/* L3 cache disable Action */ +#define MC_ACTION_CACHE_SHRINK (0x1 << 2) + +/* Below interface used between XEN/DOM0 for passing XEN's recovery action + * information to DOM0. + * usage Senario: After offlining broken page, XEN might pass its page offline + * recovery action result to DOM0. DOM0 will save the information in + * non-volatile memory for further proactive actions, such as offlining the + * easy broken page earlier when doing next reboot. +*/ +struct page_offline_action +{ + /* Params for passing the offlined page number to DOM0 */ + uint64_t mfn; + uint64_t status; +}; + +struct cpu_offline_action +{ + /* Params for passing the identity of the offlined CPU to DOM0 */ + uint32_t mc_socketid; + uint16_t mc_coreid; + uint16_t mc_core_threadid; +}; + +#define MAX_UNION_SIZE 16 +struct mc_recovery +{ + uint16_t mc_bank; /* bank nr */ + uint8_t action_flags; + uint8_t action_types; + union { + struct page_offline_action page_retire; + struct cpu_offline_action cpu_offline; + uint8_t pad[MAX_UNION_SIZE]; + } action_info; +}; + +struct mcinfo_recovery +{ + struct mcinfo_common common; + struct mc_recovery mc_action; +}; + #define MCINFO_HYPERCALLSIZE 1024 #define MCINFO_MAXSIZE 768 _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |