[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH] mwait-idle: prevent SKL-H boot failure when C8+C9+C10 enabled



>>> On 30.03.16 at 10:34, <JBeulich@xxxxxxxx> wrote:
> Some SKL-H configurations require "max_cstate=7" to boot.
> While that is an effective workaround, it disables C10.
> 
> This patch detects the problematic configuration,
> and disables C8 and C9, keeping C10 enabled.
> 
> Note that enabling SGX in BIOS SETUP can also prevent this issue,
> if the system BIOS provides that option.
> 
> https://bugzilla.kernel.org/show_bug.cgi?id=109081 
> "Freezes with Intel i7 6700HQ (Skylake), unless intel_idle.max_cstate=7"
> 
> Signed-off-by: Len Brown <len.brown@xxxxxxxxx>

[Linux commit: d70e28f57e14a481977436695b0c9ba165472431]

> Adjust to Xen infrastructure.
> 
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
> 
> --- a/xen/arch/x86/cpu/mwait-idle.c
> +++ b/xen/arch/x86/cpu/mwait-idle.c
> @@ -60,7 +60,7 @@
>  #include <asm/msr.h>
>  #include <acpi/cpufreq/cpufreq.h>
>  
> -#define MWAIT_IDLE_VERSION "0.4"
> +#define MWAIT_IDLE_VERSION "0.4.1"
>  #undef PREFIX
>  #define PREFIX "mwait-idle: "
>  
> @@ -100,6 +100,7 @@ static const struct cpuidle_state {
>       unsigned int    target_residency; /* in US */
>  } *cpuidle_state_table;
>  
> +#define CPUIDLE_FLAG_DISABLED                0x1
>  /*
>   * Set this flag for states where the HW flushes the TLB for us
>   * and so we don't need cross-calls to keep it consistent.
> @@ -477,7 +478,7 @@ static const struct cpuidle_state bdw_cs
>       {}
>  };
>  
> -static const struct cpuidle_state skl_cstates[] = {
> +static struct cpuidle_state skl_cstates[] = {
>       {
>               .name = "C1-SKL",
>               .flags = MWAIT2flg(0x00),
> @@ -781,34 +782,84 @@ static const struct x86_cpu_id intel_idl
>  };
>  
>  /*
> - * mwait_idle_state_table_update()
> - *
> - * Update the default state_table for this CPU-id
> + * ivt_idle_state_table_update(void)
>   *
> - * Currently used to access tuned IVT multi-socket targets
> + * Tune IVT multi-socket targets
>   * Assumption: num_sockets == (max_package_num + 1)
>   */
> -static void __init mwait_idle_state_table_update(void)
> +static void __init ivt_idle_state_table_update(void)
>  {
>       /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
> -     if (boot_cpu_data.x86_model == 0x3e) { /* IVT */
> -             unsigned int cpu, max_apicid = boot_cpu_physical_apicid;
> +     unsigned int cpu, max_apicid = boot_cpu_physical_apicid;
>  
> -             for_each_present_cpu(cpu)
> -                     if (max_apicid < x86_cpu_to_apicid[cpu])
> -                             max_apicid = x86_cpu_to_apicid[cpu];
> -             switch (apicid_to_socket(max_apicid)) {
> -             case 0: case 1:
> -                     /* 1 and 2 socket systems use default ivt_cstates */
> -                     break;
> -             case 2: case 3:
> -                     cpuidle_state_table = ivt_cstates_4s;
> -                     break;
> -             default:
> -                     cpuidle_state_table = ivt_cstates_8s;
> -                     break;
> -             }
> +     for_each_present_cpu(cpu)
> +             if (max_apicid < x86_cpu_to_apicid[cpu])
> +                     max_apicid = x86_cpu_to_apicid[cpu];
> +     switch (apicid_to_socket(max_apicid)) {
> +     case 0: case 1:
> +             /* 1 and 2 socket systems use default ivt_cstates */
> +             break;
> +     case 2: case 3:
> +             cpuidle_state_table = ivt_cstates_4s;
> +             break;
> +     default:
> +             cpuidle_state_table = ivt_cstates_8s;
> +             break;
> +     }
> +}
> +
> +/*
> + * sklh_idle_state_table_update(void)
> + *
> + * On SKL-H (model 0x5e) disable C8 and C9 if:
> + * C10 is enabled and SGX disabled
> + */
> +static void sklh_idle_state_table_update(void)
> +{
> +     u64 msr;
> +
> +     /* if PC10 disabled via cmdline max_cstate=7 or shallower */
> +     if (max_cstate <= 7)
> +             return;
> +
> +     /* if PC10 not present in CPUID.MWAIT.EDX */
> +     if ((mwait_substates & (MWAIT_CSTATE_MASK << 28)) == 0)
> +             return;
> +
> +     rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr);
> +
> +     /* PC10 is not enabled in PKG C-state limit */
> +     if ((msr & 0xF) != 8)
> +             return;
> +
> +     /* if SGX is present */
> +     if (boot_cpu_has(X86_FEATURE_SGX)) {
> +             rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
> +
> +             /* if SGX is enabled */
> +             if (msr & (1 << 18))
> +                     return;
>       }
> +
> +     skl_cstates[5].flags |= CPUIDLE_FLAG_DISABLED;  /* C8-SKL */
> +     skl_cstates[6].flags |= CPUIDLE_FLAG_DISABLED;  /* C9-SKL */
> +}
> +
> +/*
> + * mwait_idle_state_table_update()
> + *
> + * Update the default state_table for this CPU-id
> + */
> +static void __init mwait_idle_state_table_update(void)
> +{
> +     switch (boot_cpu_data.x86_model) {
> +     case 0x3e: /* IVT */
> +             ivt_idle_state_table_update();
> +             break;
> +     case 0x5e: /* SKL-H */
> +             sklh_idle_state_table_update();
> +             break;
> +     }
>  }
>  
>  static int __init mwait_idle_probe(void)
> @@ -897,6 +948,14 @@ static int mwait_idle_cpu_init(struct no
>               if (num_substates == 0)
>                       continue;
>  
> +             /* if state marked as disabled, skip it */
> +             if (cpuidle_state_table[cstate].flags &
> +                 CPUIDLE_FLAG_DISABLED) {
> +                     printk(XENLOG_DEBUG PREFIX "state %s is disabled",
> +                            cpuidle_state_table[cstate].name);
> +                     continue;
> +             }
> +
>               if (dev->count >= ACPI_PROCESSOR_MAX_POWER) {
>                       printk(PREFIX "max C-state count of %u reached\n",
>                              ACPI_PROCESSOR_MAX_POWER);
> --- a/xen/include/asm-x86/msr-index.h
> +++ b/xen/include/asm-x86/msr-index.h
> @@ -288,6 +288,7 @@
>  #define MSR_IA32_PLATFORM_ID         0x00000017
>  #define MSR_IA32_EBL_CR_POWERON              0x0000002a
>  #define MSR_IA32_EBC_FREQUENCY_ID    0x0000002c
> +#define MSR_IA32_FEATURE_CONTROL     0x0000003a
>  #define MSR_IA32_TSC_ADJUST          0x0000003b
>  
>  #define MSR_IA32_APICBASE            0x0000001b
> --- a/xen/include/public/arch-x86/cpufeatureset.h
> +++ b/xen/include/public/arch-x86/cpufeatureset.h
> @@ -186,6 +186,7 @@
>  /* Intel-defined CPU features, CPUID level 0x00000007:0.ebx, word 5 */
>  XEN_CPUFEATURE(FSGSBASE,      5*32+ 0) /*   {RD,WR}{FS,GS}BASE instructions 
> */
>  XEN_CPUFEATURE(TSC_ADJUST,    5*32+ 1) /*   TSC_ADJUST MSR available */
> +XEN_CPUFEATURE(SGX,           5*32+ 2) /*   Software Guard extensions */
>  XEN_CPUFEATURE(BMI1,          5*32+ 3) /*   1st bit manipulation extensions 
> */
>  XEN_CPUFEATURE(HLE,           5*32+ 4) /*   Hardware Lock Elision */
>  XEN_CPUFEATURE(AVX2,          5*32+ 5) /*   AVX2 instructions */



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.