[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-4.1-testing] sched_credit: Use delay to control scheduling frequency



# HG changeset patch
# User Hui Lv <hui.lv@xxxxxxxxx>
# Date 1331112866 0
# Node ID 1f95b55ef427144fc32759445791037f6b2c358d
# Parent  1c0f76eea67d61fe6346e2fed879b6b80fc09beb
sched_credit: Use delay to control scheduling frequency

This patch can improve Xen performance:
1. Basically, the "delay method" can achieve 11% overall performance
boost for SPECvirt than original credit scheduler.
2. We have tried 1ms delay and 10ms delay, there is no big difference
between these two configurations. (1ms is enough to achieve a good
performance)
3. We have compared different load level response time/latency (low,
high, peak), "delay method" didn't bring very much response time
increase.
4. 1ms delay can reduce 30% context switch at peak performance, where
produces the benefits. (int sched_ratelimit_us = 1000 is the
recommended setting)

Signed-off-by: Hui Lv <hui.lv@xxxxxxxxx>
Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
Committed-by: Keir Fraser <keir@xxxxxxx>
xen-unstable changeset:   24518:44c2856b1952
xen-unstable date:        Tue Jan 17 11:18:48 2012 +0000
---


diff -r 1c0f76eea67d -r 1f95b55ef427 xen/common/sched_credit.c
--- a/xen/common/sched_credit.c Wed Mar 07 09:33:45 2012 +0000
+++ b/xen/common/sched_credit.c Wed Mar 07 09:34:26 2012 +0000
@@ -172,6 +172,7 @@
     uint32_t credit;
     int credit_balance;
     uint32_t runq_sort;
+    unsigned ratelimit_us;
     /* Period of master and tick in milliseconds */
     unsigned tslice_ms, tick_period_us, ticks_per_tslice;
     unsigned credits_per_tslice;
@@ -1298,10 +1299,15 @@
     struct csched_private *prv = CSCHED_PRIV(ops);
     struct csched_vcpu *snext;
     struct task_slice ret;
+    s_time_t runtime, tslice;
 
     CSCHED_STAT_CRANK(schedule);
     CSCHED_VCPU_CHECK(current);
 
+    runtime = now - current->runstate.state_entry_time;
+    if ( runtime < 0 ) /* Does this ever happen? */
+        runtime = 0;
+
     if ( !is_idle_vcpu(scurr->vcpu) )
     {
         /* Update credits of a non-idle VCPU. */
@@ -1314,6 +1320,35 @@
         scurr->pri = CSCHED_PRI_IDLE;
     }
 
+    /* Choices, choices:
+     * - If we have a tasklet, we need to run the idle vcpu no matter what.
+     * - If sched rate limiting is in effect, and the current vcpu has
+     *   run for less than that amount of time, continue the current one,
+     *   but with a shorter timeslice and return it immediately
+     * - Otherwise, chose the one with the highest priority (which may
+     *   be the one currently running)
+     * - If the currently running one is TS_OVER, see if there
+     *   is a higher priority one waiting on the runqueue of another
+     *   cpu and steal it.
+     */
+
+    /* If we have schedule rate limiting enabled, check to see
+     * how long we've run for. */
+    if ( !tasklet_work_scheduled
+         && prv->ratelimit_us
+         && vcpu_runnable(current)
+         && !is_idle_vcpu(current)
+         && runtime < MICROSECS(prv->ratelimit_us) )
+    {
+        snext = scurr;
+        snext->start_time += now;
+        perfc_incr(delay_ms);
+        tslice = MICROSECS(prv->ratelimit_us);
+        ret.migrated = 0;
+        goto out;
+    }
+    tslice = MILLISECS(prv->tslice_ms);
+
     /*
      * Select next runnable local VCPU (ie top of local runq)
      */
@@ -1368,11 +1403,12 @@
     if ( !is_idle_vcpu(snext->vcpu) )
         snext->start_time += now;
 
+out:
     /*
      * Return task to run next...
      */
     ret.time = (is_idle_vcpu(snext->vcpu) ?
-                -1 : MILLISECS(prv->tslice_ms));
+                -1 : tslice);
     ret.task = snext->vcpu;
 
     CSCHED_VCPU_CHECK(ret.task);
@@ -1522,6 +1558,15 @@
     prv->tick_period_us = prv->tslice_ms * 1000 / prv->ticks_per_tslice;
     prv->credits_per_tslice = CSCHED_CREDITS_PER_MSEC * prv->tslice_ms;
 
+    if ( MICROSECS(sched_ratelimit_us) > MILLISECS(sched_credit_tslice_ms) )
+    {
+        printk("WARNING: sched_ratelimit_us >" 
+               "sched_credit_tslice_ms is undefined\n"
+               "Setting ratelimit_us to 1000 * tslice_ms\n");
+        prv->ratelimit_us = 1000 * prv->tslice_ms;
+    }
+    else
+        prv->ratelimit_us = sched_ratelimit_us;
     return 0;
 }
 
diff -r 1c0f76eea67d -r 1f95b55ef427 xen/common/schedule.c
--- a/xen/common/schedule.c     Wed Mar 07 09:33:45 2012 +0000
+++ b/xen/common/schedule.c     Wed Mar 07 09:34:26 2012 +0000
@@ -47,6 +47,11 @@
 bool_t sched_smt_power_savings = 0;
 boolean_param("sched_smt_power_savings", sched_smt_power_savings);
 
+/* Default scheduling rate limit: 1ms 
+ * The behavior when sched_ratelimit_us is greater than sched_credit_tslice_ms 
is undefined
+ * */
+int sched_ratelimit_us = 1000;
+integer_param("sched_ratelimit_us", sched_ratelimit_us);
 /* Various timer handlers. */
 static void s_timer_fn(void *unused);
 static void vcpu_periodic_timer_fn(void *data);
diff -r 1c0f76eea67d -r 1f95b55ef427 xen/include/xen/perfc_defn.h
--- a/xen/include/xen/perfc_defn.h      Wed Mar 07 09:33:45 2012 +0000
+++ b/xen/include/xen/perfc_defn.h      Wed Mar 07 09:34:26 2012 +0000
@@ -16,6 +16,7 @@
 PERFCOUNTER(sched_run,              "sched: runs through scheduler")
 PERFCOUNTER(sched_ctx,              "sched: context switches")
 
+PERFCOUNTER(delay_ms,               "csched: delay")
 PERFCOUNTER(vcpu_check,             "csched: vcpu_check")
 PERFCOUNTER(schedule,               "csched: schedule")
 PERFCOUNTER(acct_run,               "csched: acct_run")
diff -r 1c0f76eea67d -r 1f95b55ef427 xen/include/xen/sched-if.h
--- a/xen/include/xen/sched-if.h        Wed Mar 07 09:33:45 2012 +0000
+++ b/xen/include/xen/sched-if.h        Wed Mar 07 09:34:26 2012 +0000
@@ -16,6 +16,11 @@
 /* cpus currently in no cpupool */
 extern cpumask_t cpupool_free_cpus;
 
+/* Scheduler generic parameters
+ * */
+extern int sched_ratelimit_us;
+
+
 /*
  * In order to allow a scheduler to remap the lock->cpu mapping,
  * we have a per-cpu pointer, along with a pre-allocated set of

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.