[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v3 4/5] x86/mwait-idle: disable IBRS during long idle


  • To: "xen-devel@xxxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxxx>
  • From: Jan Beulich <jbeulich@xxxxxxxx>
  • Date: Thu, 18 Aug 2022 15:04:51 +0200
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=suse.com; dmarc=pass action=none header.from=suse.com; dkim=pass header.d=suse.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=JmYPLXj1oFALypXZrKMo7JnkebrUsMPgDdXSnPxVHFw=; b=WVPxuo37uieUih/BDu/GG1ddNyaDCk1K4YVx81OanLyX57Mi6qfmsjbHZx8SgYvQB2q02rBiKr/blrOJTYzrE3dUP6YQaFwEiGbHZXrvdW61xxgwWJMcrB3vCeChc+T1OukoX9ISd3I2liiD9E9tuG4c2bfp4EVNGJssdMHRtmZ7mF+XMG2l2eV/iwITuwJHKlJLgC6NwSpwHoeLrWQ02G6TKd8SkJZ/2pmasUsqJI2DqtnJitybH/62G9mTE7jjb1+sdVvW2pHT/ABskClSYVxZXF+ilD5FUNyScAGcxeO5j/58en86whmjkTRVGwHcBBZytcnjYJ6kGtFEQaUPjQ==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=m2HnhP9YZkSDdlmjNmJDXCo/StiUW5T4KzDiHbcUlonLeTTQcvnXC8i4eGtgOmZss5Ke4xMt7ScpJV6cMxYTI03jSY1o3M2raCk9y01v+IVFfc3aM3Pbn8aPOlUo0LB60ffnvnvHmK6NkQSy7Sm72oLCYXR36grhB1OjuikikzM5R0upr2Eq6NtsPyGWcaQhp0qayFrHcCj3PfzkMne3fCWhmZ2wc0CtYjCYA1HfOIZqDnCKxp3EerRrFv4bXBiFX4mMpHoxPAfs8pkN5q2cizXoMxgWCtQQZyfOY3tg8n3ghGzBXU6dZFe8sYiEPR3jIPt4OGTc0r0NfwvC//VuSQ==
  • Authentication-results: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=suse.com;
  • Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Wei Liu <wl@xxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>
  • Delivery-date: Thu, 18 Aug 2022 13:04:58 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

From: Peter Zijlstra <peterz@xxxxxxxxxxxxx>

Having IBRS enabled while the SMT sibling is idle unnecessarily slows
down the running sibling. OTOH, disabling IBRS around idle takes two
MSR writes, which will increase the idle latency.

Therefore, only disable IBRS around deeper idle states. Shallow idle
states are bounded by the tick in duration, since NOHZ is not allowed
for them by virtue of their short target residency.

Only do this for mwait-driven idle, since that keeps interrupts disabled
across idle, which makes disabling IBRS vs IRQ-entry a non-issue.

Note: C6 is a random threshold, most importantly C1 probably shouldn't
disable IBRS, benchmarking needed.

Suggested-by: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Signed-off-by: Borislav Petkov <bp@xxxxxxx>
Reviewed-by: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
Signed-off-by: Borislav Petkov <bp@xxxxxxx>
Origin: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
bf5835bcdb96
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
v3: New.

--- a/xen/arch/x86/cpu/mwait-idle.c
+++ b/xen/arch/x86/cpu/mwait-idle.c
@@ -141,6 +141,12 @@ static const struct cpuidle_state {
 #define CPUIDLE_FLAG_TLB_FLUSHED       0x10000
 
 /*
+ * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
+ * above.
+ */
+#define CPUIDLE_FLAG_IBRS              0x20000
+
+/*
  * MWAIT takes an 8-bit "hint" in EAX "suggesting"
  * the C-state (top nibble) and sub-state (bottom nibble)
  * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
@@ -530,31 +536,31 @@ static struct cpuidle_state __read_mostl
        },
        {
                .name = "C6",
-               .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+               .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | 
CPUIDLE_FLAG_IBRS,
                .exit_latency = 85,
                .target_residency = 200,
        },
        {
                .name = "C7s",
-               .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
+               .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | 
CPUIDLE_FLAG_IBRS,
                .exit_latency = 124,
                .target_residency = 800,
        },
        {
                .name = "C8",
-               .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
+               .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | 
CPUIDLE_FLAG_IBRS,
                .exit_latency = 200,
                .target_residency = 800,
        },
        {
                .name = "C9",
-               .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
+               .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | 
CPUIDLE_FLAG_IBRS,
                .exit_latency = 480,
                .target_residency = 5000,
        },
        {
                .name = "C10",
-               .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
+               .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | 
CPUIDLE_FLAG_IBRS,
                .exit_latency = 890,
                .target_residency = 5000,
        },
@@ -576,7 +582,7 @@ static struct cpuidle_state __read_mostl
        },
        {
                .name = "C6",
-               .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+               .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | 
CPUIDLE_FLAG_IBRS,
                .exit_latency = 133,
                .target_residency = 600,
        },
@@ -906,6 +912,7 @@ static const struct cpuidle_state snr_cs
 static void cf_check mwait_idle(void)
 {
        unsigned int cpu = smp_processor_id();
+       struct cpu_info *info = get_cpu_info();
        struct acpi_processor_power *power = processor_powers[cpu];
        struct acpi_processor_cx *cx = NULL;
        unsigned int next_state;
@@ -932,8 +939,6 @@ static void cf_check mwait_idle(void)
                        pm_idle_save();
                else
                {
-                       struct cpu_info *info = get_cpu_info();
-
                        spec_ctrl_enter_idle(info);
                        safe_halt();
                        spec_ctrl_exit_idle(info);
@@ -960,6 +965,11 @@ static void cf_check mwait_idle(void)
        if ((cx->type >= 3) && errata_c6_workaround())
                cx = power->safe_state;
 
+       if (cx->ibrs_disable) {
+               ASSERT(!cx->irq_enable_early);
+               spec_ctrl_enter_idle(info);
+       }
+
 #if 0 /* XXX Can we/do we need to do something similar on Xen? */
        /*
         * leave_mm() to avoid costly and often unnecessary wakeups
@@ -991,6 +1001,10 @@ static void cf_check mwait_idle(void)
 
        /* Now back in C0. */
        update_idle_stats(power, cx, before, after);
+
+       if (cx->ibrs_disable)
+               spec_ctrl_exit_idle(info);
+
        local_irq_enable();
 
        TRACE_6D(TRC_PM_IDLE_EXIT, cx->type, after,
@@ -1603,6 +1617,8 @@ static int cf_check mwait_idle_cpu_init(
                    /* cstate_restore_tsc() needs to be a no-op */
                    boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
                        cx->irq_enable_early = true;
+               if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS)
+                       cx->ibrs_disable = true;
 
                dev->count++;
        }
--- a/xen/include/xen/cpuidle.h
+++ b/xen/include/xen/cpuidle.h
@@ -42,7 +42,8 @@ struct acpi_processor_cx
     u8 idx;
     u8 type;         /* ACPI_STATE_Cn */
     u8 entry_method; /* ACPI_CSTATE_EM_xxx */
-    bool irq_enable_early;
+    bool irq_enable_early:1;
+    bool ibrs_disable:1;
     u32 address;
     u32 latency;
     u32 target_residency;




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.