[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen staging] x86/cpuidle: switch to uniform meaning of "max_cstate="



commit 71d571ddd75f445fc56d67e0620807cbce4c276a
Author:     Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Mon Jul 22 11:31:38 2019 +0200
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Mon Jul 22 11:31:38 2019 +0200

    x86/cpuidle: switch to uniform meaning of "max_cstate="
    
    While the MWAIT idle driver already takes it to mean an actual C state,
    the ACPI idle driver so far used it as a list index. The list index,
    however, is an implementation detail of Xen and affected by firmware
    settings (i.e. not necessarily uniform for a particular system).
    
    While touching this code also avoid invoking menu_get_trace_data()
    when tracing is not active. For consistency do this also for the
    MWAIT driver.
    
    Note that I'm intentionally not adding any sorting logic to set_cx():
    Before and after this patch we assume entries to arrive in order, so
    this would be an orthogonal change.
    
    Take the opportunity and add minimal documentation for the command line
    option.
    
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
    Reviewed-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
    Acked-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
 docs/misc/xen-command-line.pandoc |  2 ++
 tools/misc/xenpm.c                | 24 +++++++++++++++++-------
 xen/arch/x86/acpi/cpu_idle.c      | 36 +++++++++++++++++++++++-------------
 xen/arch/x86/cpu/mwait-idle.c     |  3 ++-
 xen/arch/x86/time.c               |  2 +-
 xen/drivers/acpi/pmstat.c         |  1 +
 xen/include/public/sysctl.h       |  1 +
 7 files changed, 47 insertions(+), 22 deletions(-)

diff --git a/docs/misc/xen-command-line.pandoc 
b/docs/misc/xen-command-line.pandoc
index 25c910b232..ec6e2a4357 100644
--- a/docs/misc/xen-command-line.pandoc
+++ b/docs/misc/xen-command-line.pandoc
@@ -1376,6 +1376,8 @@ This option is ignored in **pv-shim** mode.
 ### max_cstate (x86)
 > `= <integer>`
 
+Specify the deepest C-state CPUs are permitted to be placed in.
+
 ### max_gsi_irqs (x86)
 > `= <integer>`
 
diff --git a/tools/misc/xenpm.c b/tools/misc/xenpm.c
index 65aa4c9d42..7828cb9dca 100644
--- a/tools/misc/xenpm.c
+++ b/tools/misc/xenpm.c
@@ -64,7 +64,7 @@ void show_help(void)
             " set-sched-smt           enable|disable enable/disable scheduler 
smt power saving\n"
             " set-vcpu-migration-delay      <num> set scheduler vcpu migration 
delay in us\n"
             " get-vcpu-migration-delay            get scheduler vcpu migration 
delay\n"
-            " set-max-cstate        <num>         set the C-State limitation 
(<num> >= 0)\n"
+            " set-max-cstate        <num>|'unlimited' set the C-State 
limitation (<num> >= 0)\n"
             " start [seconds]                     start collect Cx/Px 
statistics,\n"
             "                                     output after CTRL-C or 
SIGINT or several seconds.\n"
             " enable-turbo-mode     [cpuid]       enable Turbo Mode for 
processors that support it.\n"
@@ -194,7 +194,11 @@ static int show_max_cstate(xc_interface *xc_handle)
     if ( (ret = xc_get_cpuidle_max_cstate(xc_handle, &value)) )
         return ret;
 
-    printf("Max possible C-state: C%d\n\n", value);
+    if ( value < XEN_SYSCTL_CX_UNLIMITED )
+        printf("Max possible C-state: C%"PRIu32"\n\n", value);
+    else
+        printf("All C-states allowed\n\n");
+
     return 0;
 }
 
@@ -1117,18 +1121,24 @@ void get_vcpu_migration_delay_func(int argc, char 
*argv[])
 void set_max_cstate_func(int argc, char *argv[])
 {
     int value;
+    char buf[12];
 
-    if ( argc != 1 || sscanf(argv[0], "%d", &value) != 1 || value < 0 )
+    if ( argc != 1 ||
+         (sscanf(argv[0], "%d", &value) == 1
+          ? value < 0
+          : (value = XEN_SYSCTL_CX_UNLIMITED, strcmp(argv[0], "unlimited"))) )
     {
-        fprintf(stderr, "Missing or invalid argument(s)\n");
+        fprintf(stderr, "Missing, excess, or invalid argument(s)\n");
         exit(EINVAL);
     }
 
+    snprintf(buf, ARRAY_SIZE(buf), "C%d", value);
+
     if ( !xc_set_cpuidle_max_cstate(xc_handle, (uint32_t)value) )
-        printf("set max_cstate to C%d succeeded\n", value);
+        printf("max C-state set to %s\n", value >= 0 ? buf : argv[0]);
     else
-        fprintf(stderr, "set max_cstate to C%d failed (%d - %s)\n",
-                value, errno, strerror(errno));
+        fprintf(stderr, "Failed to set max C-state to %s (%d - %s)\n",
+                value >= 0 ? buf : argv[0], errno, strerror(errno));
 }
 
 void enable_turbo_mode(int argc, char *argv[])
diff --git a/xen/arch/x86/acpi/cpu_idle.c b/xen/arch/x86/acpi/cpu_idle.c
index 5931928d6f..6896cfa875 100644
--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -103,7 +103,7 @@ bool lapic_timer_init(void)
 }
 
 void (*__read_mostly pm_idle_save)(void);
-unsigned int max_cstate __read_mostly = ACPI_PROCESSOR_MAX_POWER - 1;
+unsigned int max_cstate __read_mostly = UINT_MAX;
 integer_param("max_cstate", max_cstate);
 static bool __read_mostly local_apic_timer_c2_ok;
 boolean_param("lapic_timer_c2_ok", local_apic_timer_c2_ok);
@@ -344,7 +344,10 @@ static void dump_cx(unsigned char key)
     unsigned int cpu;
 
     printk("'%c' pressed -> printing ACPI Cx structures\n", key);
-    printk("max cstate: C%u\n", max_cstate);
+    if ( max_cstate < UINT_MAX )
+        printk("max state: C%u\n", max_cstate);
+    else
+        printk("max state: unlimited\n");
     for_each_present_cpu ( cpu )
     {
         struct acpi_processor_power *power = processor_powers[cpu];
@@ -582,13 +585,19 @@ static void acpi_processor_idle(void)
     if ( max_cstate > 0 && power && !sched_has_urgent_vcpu() &&
          (next_state = cpuidle_current_governor->select(power)) > 0 )
     {
-        cx = &power->states[next_state];
-        if ( cx->type == ACPI_STATE_C3 && power->flags.bm_check &&
-             acpi_idle_bm_check() )
-            cx = power->safe_state;
-        if ( cx->idx > max_cstate )
-            cx = &power->states[max_cstate];
-        menu_get_trace_data(&exp, &pred);
+        do {
+            cx = &power->states[next_state];
+        } while ( cx->type > max_cstate && --next_state );
+        if ( next_state )
+        {
+            if ( cx->type == ACPI_STATE_C3 && power->flags.bm_check &&
+                 acpi_idle_bm_check() )
+                cx = power->safe_state;
+            if ( tb_init_done )
+                menu_get_trace_data(&exp, &pred);
+        }
+        else
+            cx = NULL;
     }
     if ( !cx )
     {
@@ -1396,12 +1405,12 @@ int pmstat_reset_cx_stat(uint32_t cpuid)
 
 void cpuidle_disable_deep_cstate(void)
 {
-    if ( max_cstate > 1 )
+    if ( max_cstate > ACPI_STATE_C1 )
     {
         if ( local_apic_timer_c2_ok )
-            max_cstate = 2;
+            max_cstate = ACPI_STATE_C2;
         else
-            max_cstate = 1;
+            max_cstate = ACPI_STATE_C1;
     }
 
     hpet_disable_legacy_broadcast();
@@ -1409,7 +1418,8 @@ void cpuidle_disable_deep_cstate(void)
 
 bool cpuidle_using_deep_cstate(void)
 {
-    return xen_cpuidle && max_cstate > (local_apic_timer_c2_ok ? 2 : 1);
+    return xen_cpuidle && max_cstate > (local_apic_timer_c2_ok ? ACPI_STATE_C2
+                                                               : 
ACPI_STATE_C1);
 }
 
 static int cpu_callback(
diff --git a/xen/arch/x86/cpu/mwait-idle.c b/xen/arch/x86/cpu/mwait-idle.c
index 24b757b8c1..361de1bb35 100644
--- a/xen/arch/x86/cpu/mwait-idle.c
+++ b/xen/arch/x86/cpu/mwait-idle.c
@@ -731,7 +731,8 @@ static void mwait_idle(void)
                } while (cx->type > max_cstate && --next_state);
                if (!next_state)
                        cx = NULL;
-               menu_get_trace_data(&exp, &pred);
+               else if (tb_init_done)
+                       menu_get_trace_data(&exp, &pred);
        }
        if (!cx) {
                if (pm_idle_save)
diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c
index 9a6ea8ffcb..d8242295ef 100644
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -2268,7 +2268,7 @@ static void dump_softtsc(unsigned char key)
     else if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC ) )
     {
         printk("TSC has constant rate, ");
-        if (max_cstate <= 2 && tsc_max_warp == 0)
+        if ( max_cstate <= ACPI_STATE_C2 && tsc_max_warp == 0 )
             printk("no deep Cstates, passed warp test, deemed reliable, ");
         else
             printk("deep Cstates possible, so not reliable, ");
diff --git a/xen/drivers/acpi/pmstat.c b/xen/drivers/acpi/pmstat.c
index bb80506ad6..392c9acb1c 100644
--- a/xen/drivers/acpi/pmstat.c
+++ b/xen/drivers/acpi/pmstat.c
@@ -451,6 +451,7 @@ int do_pm_op(struct xen_sysctl_pm_op *op)
 
     case XEN_SYSCTL_pm_op_get_max_cstate:
     {
+        BUILD_BUG_ON(XEN_SYSCTL_CX_UNLIMITED != UINT_MAX);
         op->u.get_max_cstate = acpi_get_cstate_limit();
         break;
     }
diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
index 46e1425da0..015756e1b7 100644
--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -370,6 +370,7 @@ struct xen_sysctl_pm_op {
         struct xen_set_cpufreq_para set_para;
         uint64_aligned_t get_avgfreq;
         uint32_t                    set_sched_opt_smt;
+#define XEN_SYSCTL_CX_UNLIMITED 0xffffffff
         uint32_t                    get_max_cstate;
         uint32_t                    set_max_cstate;
     } u;
--
generated by git-patchbot for /home/xen/git/xen.git#staging

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/xen-changelog

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.