[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [xen master] x86/mwait-idle: disable IBRS during long idle
commit 08acdf9a26153130d7fa47925ceb53c39fcb87da Author: Peter Zijlstra <peterz@xxxxxxxxxxxxx> AuthorDate: Thu Oct 13 17:55:22 2022 +0200 Commit: Jan Beulich <jbeulich@xxxxxxxx> CommitDate: Thu Oct 13 17:55:22 2022 +0200 x86/mwait-idle: disable IBRS during long idle Having IBRS enabled while the SMT sibling is idle unnecessarily slows down the running sibling. OTOH, disabling IBRS around idle takes two MSR writes, which will increase the idle latency. Therefore, only disable IBRS around deeper idle states. Shallow idle states are bounded by the tick in duration, since NOHZ is not allowed for them by virtue of their short target residency. Only do this for mwait-driven idle, since that keeps interrupts disabled across idle, which makes disabling IBRS vs IRQ-entry a non-issue. Note: C6 is a random threshold, most importantly C1 probably shouldn't disable IBRS, benchmarking needed. Suggested-by: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Signed-off-by: Borislav Petkov <bp@xxxxxxx> Reviewed-by: Josh Poimboeuf <jpoimboe@xxxxxxxxxx> Signed-off-by: Borislav Petkov <bp@xxxxxxx> Origin: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git bf5835bcdb96 Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> Acked-by: Roger Pau Monné <roger.pau@xxxxxxxxxx> Release-acked-by: Henry Wang <Henry.Wang@xxxxxxx> --- xen/arch/x86/cpu/mwait-idle.c | 32 ++++++++++++++++++++++++-------- xen/include/xen/cpuidle.h | 3 ++- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/xen/arch/x86/cpu/mwait-idle.c b/xen/arch/x86/cpu/mwait-idle.c index 86c47a04c7..f5c83121a8 100644 --- a/xen/arch/x86/cpu/mwait-idle.c +++ b/xen/arch/x86/cpu/mwait-idle.c @@ -140,6 +140,12 @@ static const struct cpuidle_state { */ #define CPUIDLE_FLAG_TLB_FLUSHED 0x10000 +/* + * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE + * above. + */ +#define CPUIDLE_FLAG_IBRS 0x20000 + /* * MWAIT takes an 8-bit "hint" in EAX "suggesting" * the C-state (top nibble) and sub-state (bottom nibble) @@ -530,31 +536,31 @@ static struct cpuidle_state __read_mostly skl_cstates[] = { }, { .name = "C6", - .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 85, .target_residency = 200, }, { .name = "C7s", - .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 124, .target_residency = 800, }, { .name = "C8", - .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 200, .target_residency = 800, }, { .name = "C9", - .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 480, .target_residency = 5000, }, { .name = "C10", - .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 890, .target_residency = 5000, }, @@ -576,7 +582,7 @@ static struct cpuidle_state __read_mostly skx_cstates[] = { }, { .name = "C6", - .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 133, .target_residency = 600, }, @@ -906,6 +912,7 @@ static const struct cpuidle_state snr_cstates[] = { static void cf_check mwait_idle(void) { unsigned int cpu = smp_processor_id(); + struct cpu_info *info = get_cpu_info(); struct acpi_processor_power *power = processor_powers[cpu]; struct acpi_processor_cx *cx = NULL; unsigned int next_state; @@ -932,8 +939,6 @@ static void cf_check mwait_idle(void) pm_idle_save(); else { - struct cpu_info *info = get_cpu_info(); - spec_ctrl_enter_idle(info); safe_halt(); spec_ctrl_exit_idle(info); @@ -960,6 +965,11 @@ static void cf_check mwait_idle(void) if ((cx->type >= 3) && errata_c6_workaround()) cx = power->safe_state; + if (cx->ibrs_disable) { + ASSERT(!cx->irq_enable_early); + spec_ctrl_enter_idle(info); + } + #if 0 /* XXX Can we/do we need to do something similar on Xen? */ /* * leave_mm() to avoid costly and often unnecessary wakeups @@ -991,6 +1001,10 @@ static void cf_check mwait_idle(void) /* Now back in C0. */ update_idle_stats(power, cx, before, after); + + if (cx->ibrs_disable) + spec_ctrl_exit_idle(info); + local_irq_enable(); TRACE_6D(TRC_PM_IDLE_EXIT, cx->type, after, @@ -1603,6 +1617,8 @@ static int cf_check mwait_idle_cpu_init( /* cstate_restore_tsc() needs to be a no-op */ boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) cx->irq_enable_early = true; + if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) + cx->ibrs_disable = true; dev->count++; } diff --git a/xen/include/xen/cpuidle.h b/xen/include/xen/cpuidle.h index bd24a31e12..521a8deb04 100644 --- a/xen/include/xen/cpuidle.h +++ b/xen/include/xen/cpuidle.h @@ -42,7 +42,8 @@ struct acpi_processor_cx u8 idx; u8 type; /* ACPI_STATE_Cn */ u8 entry_method; /* ACPI_CSTATE_EM_xxx */ - bool irq_enable_early; + bool irq_enable_early:1; + bool ibrs_disable:1; u32 address; u32 latency; u32 target_residency; -- generated by git-patchbot for /home/xen/git/xen.git#master
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |