|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH] x86/cpuidle: split the max_cstate variable
The admin can control the upper bound wanted not only via command line
option, but also via XEN_SYSCTL_pm_op_set_max_cstate. While decisions how
to set up the system are okay this way as long as we deem the command line
option a strict upper bound, what to do during S3 resume should not be
based on that potentially varying value. Decisions there need to use
solely the strict upper bound we may have enforced ourselves (or which was
forced onto us via command line option).
Rather than altering pit_broadcast_is_available(), drop the function
altogether. It's pretty odd for acpi/cpu_idle.c to call into time.c, just
for that to call into acpi/cpu_idle.c again.
Fixes: 8d24303023ec ("x86: don't write_tsc() non-zero values on CPUs updating
only the lower 32 bits")
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
cpuidle_disable_deep_cstate(), when called from handle_rtc_once(), is
still somewhat of a problem: Boot time and resume time runs of
_disable_pit_irq() may still behave differently because of that.
If we wanted the sysctl to possibly move the maximum C-state beyond what
was given on the command line, things would get yet more complicated, as
we'd then need to re-configure the driver that's in use.
I wonder how useful the ACPI_STATE_C<n> #define-s really are. Plain 1 is
used in e.g. probe_c3_errata(), and the plain 7 doesn't even have a
respective constant (nor would that be suitable, as that's not really an
ACPI state).
--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -119,7 +119,7 @@ bool lapic_timer_init(void)
lapic_timer_off = hpet_broadcast_enter;
lapic_timer_on = hpet_broadcast_exit;
}
- else if ( pit_broadcast_is_available() )
+ else if ( cpuidle_usable_deep_cstate() )
{
lapic_timer_off = pit_broadcast_enter;
lapic_timer_on = pit_broadcast_exit;
@@ -131,12 +131,15 @@ bool lapic_timer_init(void)
}
void (*__read_mostly pm_idle_save)(void);
-unsigned int max_cstate __read_mostly = UINT_MAX;
+
+unsigned int max_usable_cstate __read_mostly = UINT_MAX;
+unsigned int max_allowed_cstate __read_mostly = UINT_MAX;
unsigned int max_csubstate __read_mostly = UINT_MAX;
static int __init cf_check parse_cstate(const char *s)
{
- max_cstate = simple_strtoul(s, &s, 0);
+ max_allowed_cstate = simple_strtoul(s, &s, 0);
+ max_usable_cstate = max_allowed_cstate;
if ( *s == ',' )
max_csubstate = simple_strtoul(s + 1, NULL, 0);
return 0;
@@ -413,10 +416,11 @@ static void cf_check dump_cx(unsigned ch
unsigned int cpu;
printk("'%c' pressed -> printing ACPI Cx structures\n", key);
- if ( max_cstate < UINT_MAX )
+ if ( max_cstate() < UINT_MAX )
{
- printk("max state: C%u\n", max_cstate);
- if ( max_csubstate < UINT_MAX )
+ printk("max state: C%u\n", max_cstate());
+ if ( max_allowed_cstate <= max_usable_cstate &&
+ max_csubstate < UINT_MAX )
printk("max sub-state: %u\n", max_csubstate);
else
printk("max sub-state: unlimited\n");
@@ -690,18 +694,18 @@ static void cf_check acpi_processor_idle
u32 exp = 0, pred = 0;
u32 irq_traced[4] = { 0 };
- if ( max_cstate > 0 && power &&
+ if ( max_cstate() > 0 && power &&
(next_state = cpuidle_current_governor->select(power)) > 0 )
{
unsigned int max_state = sched_has_urgent_vcpu() ? ACPI_STATE_C1
- : max_cstate;
+ : max_cstate();
do {
cx = &power->states[next_state];
} while ( (cx->type > max_state ||
cx->entry_method == ACPI_CSTATE_EM_NONE ||
(cx->entry_method == ACPI_CSTATE_EM_FFH &&
- cx->type == max_cstate &&
+ cx->type == max_allowed_cstate &&
(cx->address & MWAIT_SUBSTATE_MASK) > max_csubstate)) &&
--next_state );
if ( next_state )
@@ -1448,7 +1452,7 @@ static void amd_cpuidle_init(struct acpi
for ( i = 0; i < nr; ++i )
{
- if ( cx[i].type > max_cstate )
+ if ( cx[i].type > max_cstate() )
break;
power->states[i + 1] = cx[i];
power->states[i + 1].idx = i + 1;
@@ -1611,21 +1615,22 @@ int pmstat_reset_cx_stat(unsigned int cp
void cpuidle_disable_deep_cstate(void)
{
- if ( max_cstate > ACPI_STATE_C1 )
+ if ( max_usable_cstate > ACPI_STATE_C1 )
{
if ( local_apic_timer_c2_ok )
- max_cstate = ACPI_STATE_C2;
+ max_usable_cstate = ACPI_STATE_C2;
else
- max_cstate = ACPI_STATE_C1;
+ max_usable_cstate = ACPI_STATE_C1;
}
hpet_disable_legacy_broadcast();
}
-bool cpuidle_using_deep_cstate(void)
+bool cpuidle_usable_deep_cstate(void)
{
- return xen_cpuidle && max_cstate > (local_apic_timer_c2_ok ? ACPI_STATE_C2
- :
ACPI_STATE_C1);
+ return xen_cpuidle &&
+ max_usable_cstate > (local_apic_timer_c2_ok ? ACPI_STATE_C2
+ : ACPI_STATE_C1);
}
static int cf_check cpu_callback(
--- a/xen/arch/x86/cpu/intel.c
+++ b/xen/arch/x86/cpu/intel.c
@@ -384,12 +384,12 @@ static void probe_c3_errata(const struct
};
/* Serialized by the AP bringup code. */
- if ( max_cstate > 1 && (c->apicid & (c->x86_num_siblings - 1)) &&
+ if ( max_usable_cstate > 1 && (c->apicid & (c->x86_num_siblings - 1)) &&
x86_match_cpu(models) )
{
printk(XENLOG_WARNING
"Disabling C-states C3 and C6 due to CPU errata\n");
- max_cstate = 1;
+ max_usable_cstate = 1;
}
}
--- a/xen/arch/x86/cpu/mwait-idle.c
+++ b/xen/arch/x86/cpu/mwait-idle.c
@@ -1045,15 +1045,16 @@ static void cf_check mwait_idle(void)
u64 before, after;
u32 exp = 0, pred = 0, irq_traced[4] = { 0 };
- if (max_cstate > 0 && power &&
+ if (max_cstate() > 0 && power &&
(next_state = cpuidle_current_governor->select(power)) > 0) {
unsigned int max_state = sched_has_urgent_vcpu() ? ACPI_STATE_C1
- : max_cstate;
+ : max_cstate();
do {
cx = &power->states[next_state];
- } while ((cx->type > max_state || (cx->type == max_cstate &&
- MWAIT_HINT2SUBSTATE(cx->address) > max_csubstate)) &&
+ } while ((cx->type > max_state ||
+ (cx->type == max_allowed_cstate &&
+ MWAIT_HINT2SUBSTATE(cx->address) > max_csubstate)) &&
--next_state);
if (!next_state)
cx = NULL;
@@ -1458,7 +1459,7 @@ static void __init sklh_idle_state_table
u64 msr;
/* if PC10 disabled via cmdline max_cstate=7 or shallower */
- if (max_cstate <= 7)
+ if (max_cstate() <= 7)
return;
/* if PC10 not present in CPUID.MWAIT.EDX */
@@ -1623,7 +1624,7 @@ static int __init mwait_idle_probe(void)
!mwait_substates)
return -ENODEV;
- if (!max_cstate || !opt_mwait_idle) {
+ if (!max_cstate() || !opt_mwait_idle) {
pr_debug(PREFIX "disabled\n");
return -EPERM;
}
@@ -1714,8 +1715,8 @@ static int cf_check mwait_idle_cpu_init(
hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
state = MWAIT_HINT2CSTATE(hint) + 1;
- if (state > max_cstate) {
- printk(PREFIX "max C-state %u reached\n", max_cstate);
+ if (state > max_cstate()) {
+ printk(PREFIX "max C-state %u reached\n", max_cstate());
break;
}
--- a/xen/arch/x86/include/asm/time.h
+++ b/xen/arch/x86/include/asm/time.h
@@ -31,7 +31,6 @@ int cpu_frequency_change(u64 freq);
void cf_check pit_broadcast_enter(void);
void cf_check pit_broadcast_exit(void);
-int pit_broadcast_is_available(void);
uint64_t cf_check acpi_pm_tick_to_ns(uint64_t ticks);
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -517,7 +517,7 @@ static int64_t __init cf_check init_hpet
bool disable_hpet = false;
if ( hpet_address && strcmp(opt_clocksource, pts->id) &&
- cpuidle_using_deep_cstate() )
+ cpuidle_usable_deep_cstate() )
{
if ( pci_conf_read16(PCI_SBDF(0, 0, 0x1f, 0),
PCI_VENDOR_ID) == PCI_VENDOR_ID_INTEL )
@@ -2655,7 +2655,7 @@ static int _disable_pit_irq(bool init)
* XXX dom0 may rely on RTC interrupt delivery, so only enable
* hpet_broadcast if FSB mode available or if force_hpet_broadcast.
*/
- if ( cpuidle_using_deep_cstate() && !boot_cpu_has(X86_FEATURE_XEN_ARAT) )
+ if ( cpuidle_usable_deep_cstate() && !boot_cpu_has(X86_FEATURE_XEN_ARAT) )
{
init ? hpet_broadcast_init() : hpet_broadcast_resume();
if ( !hpet_broadcast_is_available() )
@@ -2707,11 +2707,6 @@ void cf_check pit_broadcast_exit(void)
reprogram_timer(this_cpu(timer_deadline));
}
-int pit_broadcast_is_available(void)
-{
- return cpuidle_using_deep_cstate();
-}
-
void send_timer_event(struct vcpu *v)
{
send_guest_vcpu_virq(v, VIRQ_TIMER);
@@ -2999,7 +2994,7 @@ static void cf_check dump_softtsc(unsign
else if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC ) )
{
printk("TSC has constant rate, ");
- if ( max_cstate <= ACPI_STATE_C2 && tsc_max_warp == 0 )
+ if ( max_usable_cstate <= ACPI_STATE_C2 && tsc_max_warp == 0 )
printk("no deep Cstates, passed warp test, deemed reliable, ");
else
printk("deep Cstates possible, so not reliable, ");
--- a/xen/include/xen/acpi.h
+++ b/xen/include/xen/acpi.h
@@ -142,30 +142,33 @@ int acpi_gsi_to_irq (u32 gsi, unsigned i
#ifdef CONFIG_ACPI_CSTATE
/*
- * max_cstate sets the highest legal C-state.
- * max_cstate = 0: C0 okay, but not C1
- * max_cstate = 1: C1 okay, but not C2
- * max_cstate = 2: C2 okay, but not C3 etc.
-
- * max_csubstate sets the highest legal C-state sub-state. Only applies to the
- * highest legal C-state.
- * max_cstate = 1, max_csubstate = 0 ==> C0, C1 okay, but not C1E
- * max_cstate = 1, max_csubstate = 1 ==> C0, C1 and C1E okay, but not C2
- * max_cstate = 2, max_csubstate = 0 ==> C0, C1, C1E, C2 okay, but not C3
- * max_cstate = 2, max_csubstate = 1 ==> C0, C1, C1E, C2 okay, but not C3
+ * max_{allowed,usable}_cstate sets the highest allowed / usable C-state, where
+ * "allowed" is command line / sysctl based.
+ * max_*_cstate = 0: C0 okay, but not C1
+ * max_*_cstate = 1: C1 okay, but not C2
+ * max_*_cstate = 2: C2 okay, but not C3 etc.
+ *
+ * max_csubstate sets the highest allowed C-state sub-state. Only applies to
+ * the highest allowed C-state.
+ * max_allowed_cstate = 1, max_csubstate = 0 ==> C0, C1 okay, but not C1E
+ * max_allowed_cstate = 1, max_csubstate = 1 ==> C0, C1 and C1E okay, but not
C2
+ * max_allowed_cstate = 2, max_csubstate = 0 ==> C0, C1, C1E, C2 okay, but not
C3
+ * max_allowed_cstate = 2, max_csubstate = 1 ==> C0, C1, C1E, C2 okay, but not
C3
*/
-extern unsigned int max_cstate;
+extern unsigned int max_usable_cstate;
+extern unsigned int max_allowed_cstate;
extern unsigned int max_csubstate;
+#define max_cstate() min(max_usable_cstate, max_allowed_cstate)
+
static inline unsigned int acpi_get_cstate_limit(void)
{
- return max_cstate;
+ return max_allowed_cstate;
}
static inline void acpi_set_cstate_limit(unsigned int new_limit)
{
- max_cstate = new_limit;
- return;
+ max_allowed_cstate = new_limit;
}
static inline unsigned int acpi_get_csubstate_limit(void)
--- a/xen/include/xen/cpuidle.h
+++ b/xen/include/xen/cpuidle.h
@@ -89,7 +89,7 @@ struct cpuidle_governor
extern int8_t xen_cpuidle;
extern struct cpuidle_governor *cpuidle_current_governor;
-bool cpuidle_using_deep_cstate(void);
+bool cpuidle_usable_deep_cstate(void);
void cpuidle_disable_deep_cstate(void);
#define CPUIDLE_DRIVER_STATE_START 1
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |