[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] cpuidle: do not enter deep C state if there is urgent VCPU



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1266312465 0
# Node ID ae2b7f1c89c8b29fedfe8f4561a6d8f5efef7278
# Parent  3a0bd7ca6b1146e2165e245cb0d4c2872771de17
cpuidle: do not enter deep C state if there is urgent VCPU

when VCPU is polling on event channel, it usually has urgent task
running, e.g. spin_lock, in this case, it is better for cpuidle driver
not to enter deep C state.

This patch fix the issue that SLES 11 SP1 domain0 hangs in the box of
large number of CPUs (>= 64 CPUs).

Signed-off-by: Yu Ke <ke.yu@xxxxxxxxx>
Signed-off-by: Tian Kevin <kevin.tian@xxxxxxxxx>
Signed-off-by: Keir Fraser <keir.fraser@xxxxxxxxxx>
---
 xen/arch/x86/acpi/cpu_idle.c |   50 ++++++++++++++++++++++++-------------------
 xen/common/sched_credit.c    |    1 
 xen/common/schedule.c        |   47 +++++++++++++++++++++++++++++++++++++---
 xen/include/xen/sched-if.h   |    1 
 xen/include/xen/sched.h      |    2 +
 5 files changed, 77 insertions(+), 24 deletions(-)

diff -r 3a0bd7ca6b11 -r ae2b7f1c89c8 xen/arch/x86/acpi/cpu_idle.c
--- a/xen/arch/x86/acpi/cpu_idle.c      Mon Feb 15 17:54:04 2010 +0000
+++ b/xen/arch/x86/acpi/cpu_idle.c      Tue Feb 16 09:27:45 2010 +0000
@@ -41,6 +41,7 @@
 #include <xen/keyhandler.h>
 #include <xen/cpuidle.h>
 #include <xen/trace.h>
+#include <xen/sched-if.h>
 #include <asm/cache.h>
 #include <asm/io.h>
 #include <asm/hpet.h>
@@ -216,6 +217,15 @@ static inline void trace_exit_reason(u32
     }
 }
 
+/* vcpu is urgent if vcpu is polling event channel
+ *
+ * if urgent vcpu exists, CPU should not enter deep C state
+ */
+static int sched_has_urgent_vcpu(void)
+{
+    return atomic_read(&this_cpu(schedule_data).urgent_count);
+}
+
 static void acpi_processor_idle(void)
 {
     struct acpi_processor_power *power = processor_powers[smp_processor_id()];
@@ -226,27 +236,7 @@ static void acpi_processor_idle(void)
     u32 exp = 0, pred = 0;
     u32 irq_traced[4] = { 0 };
 
-    cpufreq_dbs_timer_suspend();
-
-    sched_tick_suspend();
-    /* sched_tick_suspend() can raise TIMER_SOFTIRQ. Process it now. */
-    process_pending_softirqs();
-
-    /*
-     * Interrupts must be disabled during bus mastering calculations and
-     * for C2/C3 transitions.
-     */
-    local_irq_disable();
-
-    if ( softirq_pending(smp_processor_id()) )
-    {
-        local_irq_enable();
-        sched_tick_resume();
-        cpufreq_dbs_timer_resume();
-        return;
-    }
-
-    if ( max_cstate > 0 && power && 
+    if ( max_cstate > 0 && power && !sched_has_urgent_vcpu() &&
          (next_state = cpuidle_current_governor->select(power)) > 0 )
     {
         cx = &power->states[next_state];
@@ -263,6 +253,24 @@ static void acpi_processor_idle(void)
             pm_idle_save();
         else
             acpi_safe_halt();
+        return;
+    }
+
+    cpufreq_dbs_timer_suspend();
+
+    sched_tick_suspend();
+    /* sched_tick_suspend() can raise TIMER_SOFTIRQ. Process it now. */
+    process_pending_softirqs();
+
+    /*
+     * Interrupts must be disabled during bus mastering calculations and
+     * for C2/C3 transitions.
+     */
+    local_irq_disable();
+
+    if ( softirq_pending(smp_processor_id()) )
+    {
+        local_irq_enable();
         sched_tick_resume();
         cpufreq_dbs_timer_resume();
         return;
diff -r 3a0bd7ca6b11 -r ae2b7f1c89c8 xen/common/sched_credit.c
--- a/xen/common/sched_credit.c Mon Feb 15 17:54:04 2010 +0000
+++ b/xen/common/sched_credit.c Tue Feb 16 09:27:45 2010 +0000
@@ -1060,6 +1060,7 @@ csched_runq_steal(int peer_cpu, int cpu,
                 /* We got a candidate. Grab it! */
                 CSCHED_VCPU_STAT_CRANK(speer, migrate_q);
                 CSCHED_STAT_CRANK(migrate_queued);
+                BUG_ON(vc->is_urgent);
                 __runq_remove(speer);
                 vc->processor = cpu;
                 return speer;
diff -r 3a0bd7ca6b11 -r ae2b7f1c89c8 xen/common/schedule.c
--- a/xen/common/schedule.c     Mon Feb 15 17:54:04 2010 +0000
+++ b/xen/common/schedule.c     Tue Feb 16 09:27:45 2010 +0000
@@ -100,6 +100,29 @@ static inline void trace_continue_runnin
                 (unsigned char *)&d);
 }
 
+static inline void vcpu_urgent_count_update(struct vcpu *v)
+{
+    if ( is_idle_vcpu(v) )
+        return;
+
+    if ( unlikely(v->is_urgent) )
+    {
+        if ( !test_bit(v->vcpu_id, v->domain->poll_mask) )
+        {
+            v->is_urgent = 0;
+            atomic_dec(&per_cpu(schedule_data,v->processor).urgent_count);
+        }
+    }
+    else
+    {
+        if ( unlikely(test_bit(v->vcpu_id, v->domain->poll_mask)) )
+        {
+            v->is_urgent = 1;
+            atomic_inc(&per_cpu(schedule_data,v->processor).urgent_count);
+        }
+    }
+}
+
 static inline void vcpu_runstate_change(
     struct vcpu *v, int new_state, s_time_t new_entry_time)
 {
@@ -107,6 +130,8 @@ static inline void vcpu_runstate_change(
 
     ASSERT(v->runstate.state != new_state);
     ASSERT(spin_is_locked(&per_cpu(schedule_data,v->processor).schedule_lock));
+
+    vcpu_urgent_count_update(v);
 
     trace_runstate_change(v, new_state);
 
@@ -188,6 +213,8 @@ void sched_destroy_vcpu(struct vcpu *v)
     kill_timer(&v->periodic_timer);
     kill_timer(&v->singleshot_timer);
     kill_timer(&v->poll_timer);
+    if ( test_and_clear_bool(v->is_urgent) )
+        atomic_dec(&per_cpu(schedule_data, v->processor).urgent_count);
     SCHED_OP(destroy_vcpu, v);
 }
 
@@ -277,7 +304,7 @@ static void vcpu_migrate(struct vcpu *v)
 static void vcpu_migrate(struct vcpu *v)
 {
     unsigned long flags;
-    int old_cpu;
+    int old_cpu, new_cpu;
 
     vcpu_schedule_lock_irqsave(v, flags);
 
@@ -293,9 +320,23 @@ static void vcpu_migrate(struct vcpu *v)
         return;
     }
 
+    /* Select new CPU. */
+    old_cpu = v->processor;
+    new_cpu = SCHED_OP(pick_cpu, v);
+
+    /*
+     * Transfer urgency status to new CPU before switching CPUs, as once
+     * the switch occurs, v->is_urgent is no longer protected by the per-CPU
+     * scheduler lock we are holding.
+     */
+    if ( unlikely(v->is_urgent) && (old_cpu != new_cpu) )
+    {
+        atomic_inc(&per_cpu(schedule_data, new_cpu).urgent_count);
+        atomic_dec(&per_cpu(schedule_data, old_cpu).urgent_count);
+    }
+
     /* Switch to new CPU, then unlock old CPU. */
-    old_cpu = v->processor;
-    v->processor = SCHED_OP(pick_cpu, v);
+    v->processor = new_cpu;
     spin_unlock_irqrestore(
         &per_cpu(schedule_data, old_cpu).schedule_lock, flags);
 
diff -r 3a0bd7ca6b11 -r ae2b7f1c89c8 xen/include/xen/sched-if.h
--- a/xen/include/xen/sched-if.h        Mon Feb 15 17:54:04 2010 +0000
+++ b/xen/include/xen/sched-if.h        Tue Feb 16 09:27:45 2010 +0000
@@ -16,6 +16,7 @@ struct schedule_data {
     struct vcpu        *idle;           /* idle task for this cpu          */
     void               *sched_priv;
     struct timer        s_timer;        /* scheduling timer                */
+    atomic_t            urgent_count;   /* how many urgent vcpus           */
 } __cacheline_aligned;
 
 DECLARE_PER_CPU(struct schedule_data, schedule_data);
diff -r 3a0bd7ca6b11 -r ae2b7f1c89c8 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Mon Feb 15 17:54:04 2010 +0000
+++ b/xen/include/xen/sched.h   Tue Feb 16 09:27:45 2010 +0000
@@ -115,6 +115,8 @@ struct vcpu
     bool_t           is_initialised;
     /* Currently running on a CPU? */
     bool_t           is_running;
+    /* VCPU should wake fast (do not deep sleep the CPU). */
+    bool_t           is_urgent;
 
 #ifdef VCPU_TRAP_LAST
 #define VCPU_TRAP_NONE    0

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.