[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH v3 4/5] x86/mwait-idle: disable IBRS during long idle
From: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Having IBRS enabled while the SMT sibling is idle unnecessarily slows down the running sibling. OTOH, disabling IBRS around idle takes two MSR writes, which will increase the idle latency. Therefore, only disable IBRS around deeper idle states. Shallow idle states are bounded by the tick in duration, since NOHZ is not allowed for them by virtue of their short target residency. Only do this for mwait-driven idle, since that keeps interrupts disabled across idle, which makes disabling IBRS vs IRQ-entry a non-issue. Note: C6 is a random threshold, most importantly C1 probably shouldn't disable IBRS, benchmarking needed. Suggested-by: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Signed-off-by: Borislav Petkov <bp@xxxxxxx> Reviewed-by: Josh Poimboeuf <jpoimboe@xxxxxxxxxx> Signed-off-by: Borislav Petkov <bp@xxxxxxx> Origin: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git bf5835bcdb96 Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> --- v3: New. --- a/xen/arch/x86/cpu/mwait-idle.c +++ b/xen/arch/x86/cpu/mwait-idle.c @@ -141,6 +141,12 @@ static const struct cpuidle_state { #define CPUIDLE_FLAG_TLB_FLUSHED 0x10000 /* + * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE + * above. + */ +#define CPUIDLE_FLAG_IBRS 0x20000 + +/* * MWAIT takes an 8-bit "hint" in EAX "suggesting" * the C-state (top nibble) and sub-state (bottom nibble) * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. @@ -530,31 +536,31 @@ static struct cpuidle_state __read_mostl }, { .name = "C6", - .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 85, .target_residency = 200, }, { .name = "C7s", - .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 124, .target_residency = 800, }, { .name = "C8", - .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 200, .target_residency = 800, }, { .name = "C9", - .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 480, .target_residency = 5000, }, { .name = "C10", - .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 890, .target_residency = 5000, }, @@ -576,7 +582,7 @@ static struct cpuidle_state __read_mostl }, { .name = "C6", - .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 133, .target_residency = 600, }, @@ -906,6 +912,7 @@ static const struct cpuidle_state snr_cs static void cf_check mwait_idle(void) { unsigned int cpu = smp_processor_id(); + struct cpu_info *info = get_cpu_info(); struct acpi_processor_power *power = processor_powers[cpu]; struct acpi_processor_cx *cx = NULL; unsigned int next_state; @@ -932,8 +939,6 @@ static void cf_check mwait_idle(void) pm_idle_save(); else { - struct cpu_info *info = get_cpu_info(); - spec_ctrl_enter_idle(info); safe_halt(); spec_ctrl_exit_idle(info); @@ -960,6 +965,11 @@ static void cf_check mwait_idle(void) if ((cx->type >= 3) && errata_c6_workaround()) cx = power->safe_state; + if (cx->ibrs_disable) { + ASSERT(!cx->irq_enable_early); + spec_ctrl_enter_idle(info); + } + #if 0 /* XXX Can we/do we need to do something similar on Xen? */ /* * leave_mm() to avoid costly and often unnecessary wakeups @@ -991,6 +1001,10 @@ static void cf_check mwait_idle(void) /* Now back in C0. */ update_idle_stats(power, cx, before, after); + + if (cx->ibrs_disable) + spec_ctrl_exit_idle(info); + local_irq_enable(); TRACE_6D(TRC_PM_IDLE_EXIT, cx->type, after, @@ -1603,6 +1617,8 @@ static int cf_check mwait_idle_cpu_init( /* cstate_restore_tsc() needs to be a no-op */ boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) cx->irq_enable_early = true; + if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) + cx->ibrs_disable = true; dev->count++; } --- a/xen/include/xen/cpuidle.h +++ b/xen/include/xen/cpuidle.h @@ -42,7 +42,8 @@ struct acpi_processor_cx u8 idx; u8 type; /* ACPI_STATE_Cn */ u8 entry_method; /* ACPI_CSTATE_EM_xxx */ - bool irq_enable_early; + bool irq_enable_early:1; + bool ibrs_disable:1; u32 address; u32 latency; u32 target_residency;
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |