x86, amd: Disable GartTlbWlkErr when BIOS forgets it This patch disables GartTlbWlk errors on AMD Fam10h CPUs if the BIOS forgets to do is (or is just too old). Letting these errors enabled can cause a sync-flood on the CPU causing a reboot. The AMD BKDG recommends disabling GART TLB Wlk Error completely. Based on a Linux patch from Joerg Roedel ; see e.g. https://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=patch;h=5bbc097d890409d8eff4e3f1d26f11a9d6b7c07e Signed-off-by: Jan Beulich --- 2011-11-23.orig/xen/arch/x86/cpu/mcheck/amd_f10.c 2011-12-05 11:38:34.000000000 +0100 +++ 2011-11-23/xen/arch/x86/cpu/mcheck/amd_f10.c 2011-12-05 11:17:03.000000000 +0100 @@ -46,6 +46,7 @@ #include #include "mce.h" +#include "mce_quirks.h" #include "x86_mca.h" @@ -91,9 +92,14 @@ amd_f10_handler(struct mc_info *mi, uint /* AMD Family10 machine check */ enum mcheck_type amd_f10_mcheck_init(struct cpuinfo_x86 *c) { + enum mcequirk_amd_flags quirkflag = mcequirk_lookup_amd_quirkdata(c); + if (amd_k8_mcheck_init(c) == mcheck_none) return mcheck_none; + if (quirkflag == MCEQUIRK_F10_GART) + mcequirk_amd_apply(quirkflag); + x86_mce_callback_register(amd_f10_handler); return mcheck_amd_famXX; --- 2011-11-23.orig/xen/arch/x86/cpu/mcheck/mce_amd_quirks.c 2011-12-05 11:38:34.000000000 +0100 +++ 2011-11-23/xen/arch/x86/cpu/mcheck/mce_amd_quirks.c 2011-12-05 11:41:00.000000000 +0100 @@ -29,6 +29,8 @@ static const struct mce_quirkdata mce_am MCEQUIRK_K7_BANK0 }, { 0xf /* cpu family */, ANY /* all models */, ANY /* all steppings */, MCEQUIRK_K8_GART }, + { 0x10 /* cpu family */, ANY /* all models */, ANY /* all steppings */, + MCEQUIRK_F10_GART }, }; enum mcequirk_amd_flags @@ -54,6 +56,8 @@ mcequirk_lookup_amd_quirkdata(struct cpu int mcequirk_amd_apply(enum mcequirk_amd_flags flags) { + u64 val; + switch (flags) { case MCEQUIRK_K7_BANK0: return 1; /* first bank */ @@ -67,6 +71,10 @@ int mcequirk_amd_apply(enum mcequirk_amd wrmsrl(MSR_IA32_MC4_CTL, ~(1ULL << 10)); wrmsrl(MSR_IA32_MC4_STATUS, 0ULL); break; + case MCEQUIRK_F10_GART: + if (rdmsr_safe(MSR_AMD64_MCx_MASK(4), val) == 0) + wrmsr_safe(MSR_AMD64_MCx_MASK(4), val | (1 << 10)); + break; } return 0; --- 2011-11-23.orig/xen/arch/x86/cpu/mcheck/mce_quirks.h 2011-12-05 11:38:34.000000000 +0100 +++ 2011-11-23/xen/arch/x86/cpu/mcheck/mce_quirks.h 2011-12-05 11:11:45.000000000 +0100 @@ -33,8 +33,9 @@ struct mce_quirkdata { */ enum mcequirk_amd_flags { - MCEQUIRK_K7_BANK0 = 0x1, - MCEQUIRK_K8_GART = 0x2, + MCEQUIRK_K7_BANK0 = 1, + MCEQUIRK_K8_GART, + MCEQUIRK_F10_GART }; enum mcequirk_intel_flags { --- 2011-11-23.orig/xen/include/asm-x86/msr-index.h 2011-12-05 11:38:34.000000000 +0100 +++ 2011-11-23/xen/include/asm-x86/msr-index.h 2011-12-05 11:06:39.000000000 +0100 @@ -98,6 +98,8 @@ #define CMCI_EN (1UL<<30) #define CMCI_THRESHOLD_MASK 0x7FFF +#define MSR_AMD64_MC0_MASK 0xc0010044 + #define MSR_IA32_MC1_CTL 0x00000404 #define MSR_IA32_MC1_CTL2 0x00000281 #define MSR_IA32_MC1_STATUS 0x00000405 @@ -151,6 +153,8 @@ #define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x)) #define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x)) +#define MSR_AMD64_MCx_MASK(x) (MSR_AMD64_MC0_MASK + (x)) + #define MSR_P6_PERFCTR0 0x000000c1 #define MSR_P6_PERFCTR1 0x000000c2 #define MSR_P6_EVNTSEL0 0x00000186