[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[xen staging] x86/mwait-idle: disable IBRS during long idle



commit 08acdf9a26153130d7fa47925ceb53c39fcb87da
Author:     Peter Zijlstra <peterz@xxxxxxxxxxxxx>
AuthorDate: Thu Oct 13 17:55:22 2022 +0200
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Thu Oct 13 17:55:22 2022 +0200

    x86/mwait-idle: disable IBRS during long idle
    
    Having IBRS enabled while the SMT sibling is idle unnecessarily slows
    down the running sibling. OTOH, disabling IBRS around idle takes two
    MSR writes, which will increase the idle latency.
    
    Therefore, only disable IBRS around deeper idle states. Shallow idle
    states are bounded by the tick in duration, since NOHZ is not allowed
    for them by virtue of their short target residency.
    
    Only do this for mwait-driven idle, since that keeps interrupts disabled
    across idle, which makes disabling IBRS vs IRQ-entry a non-issue.
    
    Note: C6 is a random threshold, most importantly C1 probably shouldn't
    disable IBRS, benchmarking needed.
    
    Suggested-by: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx>
    Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
    Signed-off-by: Borislav Petkov <bp@xxxxxxx>
    Reviewed-by: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
    Signed-off-by: Borislav Petkov <bp@xxxxxxx>
    Origin: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
bf5835bcdb96
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
    Acked-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
    Release-acked-by: Henry Wang <Henry.Wang@xxxxxxx>
---
 xen/arch/x86/cpu/mwait-idle.c | 32 ++++++++++++++++++++++++--------
 xen/include/xen/cpuidle.h     |  3 ++-
 2 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/xen/arch/x86/cpu/mwait-idle.c b/xen/arch/x86/cpu/mwait-idle.c
index 86c47a04c7..f5c83121a8 100644
--- a/xen/arch/x86/cpu/mwait-idle.c
+++ b/xen/arch/x86/cpu/mwait-idle.c
@@ -140,6 +140,12 @@ static const struct cpuidle_state {
  */
 #define CPUIDLE_FLAG_TLB_FLUSHED       0x10000
 
+/*
+ * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
+ * above.
+ */
+#define CPUIDLE_FLAG_IBRS              0x20000
+
 /*
  * MWAIT takes an 8-bit "hint" in EAX "suggesting"
  * the C-state (top nibble) and sub-state (bottom nibble)
@@ -530,31 +536,31 @@ static struct cpuidle_state __read_mostly skl_cstates[] = 
{
        },
        {
                .name = "C6",
-               .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+               .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | 
CPUIDLE_FLAG_IBRS,
                .exit_latency = 85,
                .target_residency = 200,
        },
        {
                .name = "C7s",
-               .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
+               .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | 
CPUIDLE_FLAG_IBRS,
                .exit_latency = 124,
                .target_residency = 800,
        },
        {
                .name = "C8",
-               .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
+               .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | 
CPUIDLE_FLAG_IBRS,
                .exit_latency = 200,
                .target_residency = 800,
        },
        {
                .name = "C9",
-               .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
+               .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | 
CPUIDLE_FLAG_IBRS,
                .exit_latency = 480,
                .target_residency = 5000,
        },
        {
                .name = "C10",
-               .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
+               .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | 
CPUIDLE_FLAG_IBRS,
                .exit_latency = 890,
                .target_residency = 5000,
        },
@@ -576,7 +582,7 @@ static struct cpuidle_state __read_mostly skx_cstates[] = {
        },
        {
                .name = "C6",
-               .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+               .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | 
CPUIDLE_FLAG_IBRS,
                .exit_latency = 133,
                .target_residency = 600,
        },
@@ -906,6 +912,7 @@ static const struct cpuidle_state snr_cstates[] = {
 static void cf_check mwait_idle(void)
 {
        unsigned int cpu = smp_processor_id();
+       struct cpu_info *info = get_cpu_info();
        struct acpi_processor_power *power = processor_powers[cpu];
        struct acpi_processor_cx *cx = NULL;
        unsigned int next_state;
@@ -932,8 +939,6 @@ static void cf_check mwait_idle(void)
                        pm_idle_save();
                else
                {
-                       struct cpu_info *info = get_cpu_info();
-
                        spec_ctrl_enter_idle(info);
                        safe_halt();
                        spec_ctrl_exit_idle(info);
@@ -960,6 +965,11 @@ static void cf_check mwait_idle(void)
        if ((cx->type >= 3) && errata_c6_workaround())
                cx = power->safe_state;
 
+       if (cx->ibrs_disable) {
+               ASSERT(!cx->irq_enable_early);
+               spec_ctrl_enter_idle(info);
+       }
+
 #if 0 /* XXX Can we/do we need to do something similar on Xen? */
        /*
         * leave_mm() to avoid costly and often unnecessary wakeups
@@ -991,6 +1001,10 @@ static void cf_check mwait_idle(void)
 
        /* Now back in C0. */
        update_idle_stats(power, cx, before, after);
+
+       if (cx->ibrs_disable)
+               spec_ctrl_exit_idle(info);
+
        local_irq_enable();
 
        TRACE_6D(TRC_PM_IDLE_EXIT, cx->type, after,
@@ -1603,6 +1617,8 @@ static int cf_check mwait_idle_cpu_init(
                    /* cstate_restore_tsc() needs to be a no-op */
                    boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
                        cx->irq_enable_early = true;
+               if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS)
+                       cx->ibrs_disable = true;
 
                dev->count++;
        }
diff --git a/xen/include/xen/cpuidle.h b/xen/include/xen/cpuidle.h
index bd24a31e12..521a8deb04 100644
--- a/xen/include/xen/cpuidle.h
+++ b/xen/include/xen/cpuidle.h
@@ -42,7 +42,8 @@ struct acpi_processor_cx
     u8 idx;
     u8 type;         /* ACPI_STATE_Cn */
     u8 entry_method; /* ACPI_CSTATE_EM_xxx */
-    bool irq_enable_early;
+    bool irq_enable_early:1;
+    bool ibrs_disable:1;
     u32 address;
     u32 latency;
     u32 target_residency;
--
generated by git-patchbot for /home/xen/git/xen.git#staging



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.