[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH] amd: disable C6 after 1000 days on Fam17h models 30-3fh
As specified on Errata 1474: "A core will fail to exit CC6 after about 1044 days after the last system reset. The time of failure may vary depending on the spread spectrum and REFCLK frequency." Detect when running on AMD Fam17h models 30h-3fh and setup a timer to prevent entering C6 after 1000 days have elapsed. Take into account the TSC value at boot in order to account for any time elapsed before Xen has been booted. Print a message once C6 is disabled in order to let the user know. Signed-off-by: Roger Pau Monné <roger.pau@xxxxxxxxxx> --- I think the only 30-3fh model is 31h (Rome/Castle Peak), but I've coded the check as to allow the whole range. --- xen/arch/x86/acpi/cpu_idle.c | 3 ++- xen/arch/x86/cpu/amd.c | 42 ++++++++++++++++++++++++++++++++++ xen/arch/x86/include/asm/amd.h | 2 ++ xen/include/xen/time.h | 1 + 4 files changed, 47 insertions(+), 1 deletion(-) diff --git a/xen/arch/x86/acpi/cpu_idle.c b/xen/arch/x86/acpi/cpu_idle.c index 427c8c89c5c4..452cba3fb953 100644 --- a/xen/arch/x86/acpi/cpu_idle.c +++ b/xen/arch/x86/acpi/cpu_idle.c @@ -50,6 +50,7 @@ #include <public/platform.h> #include <public/sysctl.h> #include <acpi/cpufreq/cpufreq.h> +#include <asm/amd.h> #include <asm/apic.h> #include <asm/cpuidle.h> #include <asm/mwait.h> @@ -643,7 +644,7 @@ bool errata_c6_workaround(void) x86_match_cpu(isr_errata)); } - return (fix_needed && cpu_has_pending_apic_eoi()); + return (fix_needed && cpu_has_pending_apic_eoi()) || amd_disable_c6; } void update_last_cx_stat(struct acpi_processor_power *power, diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c index 0d3143031b5b..728fa61a54bb 100644 --- a/xen/arch/x86/cpu/amd.c +++ b/xen/arch/x86/cpu/amd.c @@ -50,6 +50,7 @@ boolean_param("allow_unsafe", opt_allow_unsafe); bool __read_mostly amd_acpi_c1e_quirk; bool __ro_after_init amd_legacy_ssbd; bool __initdata amd_virt_spec_ctrl; +bool __read_mostly amd_disable_c6; static inline int rdmsr_amd_safe(unsigned int msr, unsigned int *lo, unsigned int *hi) @@ -1189,3 +1190,44 @@ const struct cpu_dev amd_cpu_dev = { .c_early_init = early_init_amd, .c_init = init_amd, }; + +static void cf_check disable_c6(void *arg) +{ + printk(XENLOG_WARNING + "Disabling C6 after 1000 days uptime due to AMD errata 1474\n"); + amd_disable_c6 = true; +} + +static int __init cf_check amd_c6_errata(void) +{ + /* + * Errata #1474: A Core May Hang After About 1044 Days + * Set up a timer to disable C6 after 1000 days uptime. + */ + s_time_t; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD || + boot_cpu_data.x86 != 0x17 || + (boot_cpu_data.x86_model & 0xf0) != 0x30) + return 0; + + /* + * Deduct current TSC value, this would be relevant if + * kexec'ed for example. + */ + delta = DAYS(1000) - tsc_ticks2ns(rdtsc()); + if (delta > 0) { + static struct timer errata_c6; + + init_timer(&errata_c6, disable_c6, NULL, 0); + set_timer(&errata_c6, NOW() + delta); + } else + disable_c6(NULL); + + return 0; +} +/* + * Must be executed after early_time_init() for tsc_ticks2ns() to have been + * calibrated. That prevents us doing the check in init_amd(). + */ +presmp_initcall(amd_c6_errata); diff --git a/xen/arch/x86/include/asm/amd.h b/xen/arch/x86/include/asm/amd.h index 09ee52dc1c09..c54bc6a8903f 100644 --- a/xen/arch/x86/include/asm/amd.h +++ b/xen/arch/x86/include/asm/amd.h @@ -157,4 +157,6 @@ bool amd_setup_legacy_ssbd(void); void amd_set_legacy_ssbd(bool enable); void amd_set_cpuid_user_dis(bool enable); +extern bool amd_disable_c6; + #endif /* __AMD_H__ */ diff --git a/xen/include/xen/time.h b/xen/include/xen/time.h index b7427460dd13..99a91579438e 100644 --- a/xen/include/xen/time.h +++ b/xen/include/xen/time.h @@ -53,6 +53,7 @@ struct tm wallclock_time(uint64_t *ns); #define SYSTEM_TIME_HZ 1000000000ULL #define NOW() ((s_time_t)get_s_time()) +#define DAYS(_d) ((s_time_t)((_d) * 86400000000000ULL)) #define SECONDS(_s) ((s_time_t)((_s) * 1000000000ULL)) #define MILLISECS(_ms) ((s_time_t)((_ms) * 1000000ULL)) #define MICROSECS(_us) ((s_time_t)((_us) * 1000ULL)) -- 2.40.0
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |