[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [RFC PATCH 1/1] xen: credit2: rb-tree for runqueues



The patch optimized the sorted credit2 runq from simple linked list to
rb-tree implementation. This way we will gain performance and
scalability when the number of vCPUs are huge.

Signed-off-by: Praveen Kumar <kpraveen.lkml@xxxxxxxxx>
---
 xen/common/sched_credit2.c | 94 ++++++++++++++++++++++++++++++----------------
 1 file changed, 61 insertions(+), 33 deletions(-)

diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
index 9a3e71f1c8..3802c2888f 100644
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -25,6 +25,7 @@
 #include <xen/trace.h>
 #include <xen/cpu.h>
 #include <xen/keyhandler.h>
+#include <xen/rbtree.h>
 
 /* Meant only for helping developers during debugging. */
 /* #define d2printk printk */
@@ -471,7 +472,7 @@ custom_param("credit2_runqueue", parse_credit2_runqueue);
 struct csched2_runqueue_data {
     spinlock_t lock;           /* Lock for this runqueue                     */
 
-    struct list_head runq;     /* Ordered list of runnable vms               */
+    struct rb_root runq;       /* Runqueue is an rbtree                      */
     int id;                    /* ID of this runqueue (-1 if invalid)        */
 
     int load;                  /* Instantaneous load (num of non-idle vcpus) */
@@ -536,7 +537,7 @@ struct csched2_vcpu {
     s_time_t load_last_update;         /* Last time average was updated       
*/
     s_time_t avgload;                  /* Decaying queue load                 
*/
 
-    struct list_head runq_elem;        /* On the runqueue (rqd->runq)         
*/
+    struct rb_node runq_elem;          /* On the runqueue (rqd->runq)         
*/
     struct list_head parked_elem;      /* On the parked_vcpus list            
*/
     struct list_head rqd_elem;         /* On csched2_runqueue_data's svc list 
*/
     struct csched2_runqueue_data *migrate_rqd; /* Pre-determined migr. target 
*/
@@ -600,6 +601,29 @@ static inline bool has_cap(const struct csched2_vcpu *svc)
     return svc->budget != STIME_MAX;
 }
 
+static void runq_insert_rb(struct rb_root *root,
+                           struct csched2_vcpu *svc,
+                           int *pos)
+{
+    struct csched2_vcpu *entry = NULL;
+    struct rb_node **node = &root->rb_node;
+    struct rb_node *parent = NULL;
+
+    while (*node) {
+        parent = *node;
+        entry = rb_entry(parent, struct csched2_vcpu, runq_elem);
+        // Check if we are maintaining the sorted
+        if ( svc->credit < entry->credit )
+            node = &parent->rb_left;
+        else
+            node = &parent->rb_right;
+
+        (*pos)++;
+    }
+    rb_link_node(&svc->runq_elem, parent, node);
+    rb_insert_color(&svc->runq_elem, root);
+}
+
 /*
  * Hyperthreading (SMT) support.
  *
@@ -793,12 +817,12 @@ static s_time_t c2t(struct csched2_runqueue_data *rqd, 
s_time_t credit, struct c
 
 static inline int vcpu_on_runq(struct csched2_vcpu *svc)
 {
-    return !list_empty(&svc->runq_elem);
+    return !RB_EMPTY_NODE(&svc->runq_elem);
 }
 
-static inline struct csched2_vcpu * runq_elem(struct list_head *elem)
+static inline struct csched2_vcpu * runq_elem(struct rb_node *elem)
 {
-    return list_entry(elem, struct csched2_vcpu, runq_elem);
+    return rb_entry(elem, struct csched2_vcpu, runq_elem);
 }
 
 static void activate_runqueue(struct csched2_private *prv, int rqi)
@@ -812,7 +836,7 @@ static void activate_runqueue(struct csched2_private *prv, 
int rqi)
     rqd->max_weight = 1;
     rqd->id = rqi;
     INIT_LIST_HEAD(&rqd->svc);
-    INIT_LIST_HEAD(&rqd->runq);
+    rqd->runq = RB_ROOT;
     spin_lock_init(&rqd->lock);
 
     __cpumask_set_cpu(rqi, &prv->active_queues);
@@ -1272,9 +1296,8 @@ update_load(const struct scheduler *ops,
 static void
 runq_insert(const struct scheduler *ops, struct csched2_vcpu *svc)
 {
-    struct list_head *iter;
     unsigned int cpu = svc->vcpu->processor;
-    struct list_head * runq = &c2rqd(ops, cpu)->runq;
+    struct rb_root *runq = &c2rqd(ops, cpu)->runq;
     int pos = 0;
 
     ASSERT(spin_is_locked(per_cpu(schedule_data, cpu).schedule_lock));
@@ -1287,16 +1310,7 @@ runq_insert(const struct scheduler *ops, struct 
csched2_vcpu *svc)
     ASSERT(!svc->vcpu->is_running);
     ASSERT(!(svc->flags & CSFLAG_scheduled));
 
-    list_for_each( iter, runq )
-    {
-        struct csched2_vcpu * iter_svc = runq_elem(iter);
-
-        if ( svc->credit > iter_svc->credit )
-            break;
-
-        pos++;
-    }
-    list_add_tail(&svc->runq_elem, iter);
+    runq_insert_rb(runq, svc, &pos);
 
     if ( unlikely(tb_init_done) )
     {
@@ -1315,8 +1329,11 @@ runq_insert(const struct scheduler *ops, struct 
csched2_vcpu *svc)
 
 static inline void runq_remove(struct csched2_vcpu *svc)
 {
+    // TODO This assert might not be required, as we always have a check before
+    // calling this API
     ASSERT(vcpu_on_runq(svc));
-    list_del_init(&svc->runq_elem);
+    rb_erase(&svc->runq_elem, &svc->rqd->runq);
+    RB_CLEAR_NODE(&svc->runq_elem);
 }
 
 void burn_credits(struct csched2_runqueue_data *rqd, struct csched2_vcpu *, 
s_time_t);
@@ -1785,6 +1802,7 @@ static void park_vcpu(struct csched2_vcpu *svc)
      * In both cases, we also add it to the list of parked vCPUs of the domain.
      */
     __set_bit(_VPF_parked, &v->pause_flags);
+
     if ( vcpu_on_runq(svc) )
     {
         runq_remove(svc);
@@ -2037,7 +2055,7 @@ csched2_alloc_vdata(const struct scheduler *ops, struct 
vcpu *vc, void *dd)
         return NULL;
 
     INIT_LIST_HEAD(&svc->rqd_elem);
-    INIT_LIST_HEAD(&svc->runq_elem);
+    RB_CLEAR_NODE(&svc->runq_elem);
 
     svc->sdom = dd;
     svc->vcpu = vc;
@@ -2083,6 +2101,8 @@ csched2_vcpu_sleep(const struct scheduler *ops, struct 
vcpu *vc)
     }
     else if ( vcpu_on_runq(svc) )
     {
+        printk(XENLOG_WARNING "%s : %d : vcpu on runq rb !\n", __func__, 
__LINE__);
+
         ASSERT(svc->rqd == c2rqd(ops, vc->processor));
         update_load(ops, svc->rqd, svc, -1, NOW());
         runq_remove(svc);
@@ -2110,6 +2130,7 @@ csched2_vcpu_wake(const struct scheduler *ops, struct 
vcpu *vc)
 
     if ( unlikely(vcpu_on_runq(svc)) )
     {
+        printk(XENLOG_WARNING "%s : %d : vcpu on runq rb !\n", __func__, 
__LINE__);
         SCHED_STAT_CRANK(vcpu_wake_onrunq);
         goto out;
     }
@@ -2501,6 +2522,7 @@ static void migrate(const struct scheduler *ops,
         /* It's not running; just move it */
         if ( vcpu_on_runq(svc) )
         {
+            printk(XENLOG_WARNING "%s : %d : vcpu on runq rb !\n", __func__, 
__LINE__);
             runq_remove(svc);
             update_load(ops, svc->rqd, NULL, -1, now);
             on_runq = 1;
@@ -2759,8 +2781,10 @@ csched2_vcpu_migrate(
     if ( unlikely(!cpumask_test_cpu(new_cpu, cpupool_domain_cpumask(d))) )
     {
         ASSERT(system_state == SYS_STATE_suspend);
+
         if ( vcpu_on_runq(svc) )
         {
+            printk(XENLOG_WARNING "%s : %d : vcpu on runq rb !\n", __func__, 
__LINE__);
             runq_remove(svc);
             update_load(ops, svc->rqd, NULL, -1, now);
         }
@@ -3103,7 +3127,7 @@ csched2_vcpu_insert(const struct scheduler *ops, struct 
vcpu *vc)
     spinlock_t *lock;
 
     ASSERT(!is_idle_vcpu(vc));
-    ASSERT(list_empty(&svc->runq_elem));
+    ASSERT(RB_EMPTY_NODE(&svc->runq_elem));
 
     /* csched2_cpu_pick() expects the pcpu lock to be held */
     lock = vcpu_schedule_lock_irq(vc);
@@ -3141,7 +3165,7 @@ csched2_vcpu_remove(const struct scheduler *ops, struct 
vcpu *vc)
     spinlock_t *lock;
 
     ASSERT(!is_idle_vcpu(vc));
-    ASSERT(list_empty(&svc->runq_elem));
+    ASSERT(RB_EMPTY_NODE(&svc->runq_elem));
 
     SCHED_STAT_CRANK(vcpu_remove);
 
@@ -3163,7 +3187,7 @@ csched2_runtime(const struct scheduler *ops, int cpu,
     s_time_t time, min_time;
     int rt_credit; /* Proposed runtime measured in credits */
     struct csched2_runqueue_data *rqd = c2rqd(ops, cpu);
-    struct list_head *runq = &rqd->runq;
+    struct rb_root *runq = &rqd->runq;
     struct csched2_private *prv = csched2_priv(ops);
 
     /*
@@ -3201,17 +3225,21 @@ csched2_runtime(const struct scheduler *ops, int cpu,
      * 2) If there's someone waiting whose credit is positive,
      *    run until your credit ~= his.
      */
-    if ( ! list_empty(runq) )
+    if ( ! RB_EMPTY_ROOT(runq) )
     {
-        struct csched2_vcpu *swait = runq_elem(runq->next);
+        // Find the left most element, which is the most probable candidate
+        struct rb_node *node = rb_first(runq);
 
+        // TODO Can we take rb_next ?
+        //struct rb_node *node = &rb_next(root->rb_node);
+
+        struct csched2_vcpu *swait = runq_elem(node);
         if ( ! is_idle_vcpu(swait->vcpu)
              && swait->credit > 0 )
         {
             rt_credit = snext->credit - swait->credit;
         }
     }
-
     /*
      * The next guy on the runqueue may actually have a higher credit,
      * if we've tried to avoid migrating him from a different cpu.
@@ -3260,7 +3288,7 @@ runq_candidate(struct csched2_runqueue_data *rqd,
                int cpu, s_time_t now,
                unsigned int *skipped)
 {
-    struct list_head *iter, *temp;
+    struct rb_node *iter = NULL;
     struct csched2_vcpu *snext = NULL;
     struct csched2_private *prv = csched2_priv(per_cpu(scheduler, cpu));
     bool yield = false, soft_aff_preempt = false;
@@ -3345,9 +3373,8 @@ runq_candidate(struct csched2_runqueue_data *rqd,
         snext = csched2_vcpu(idle_vcpu[cpu]);
 
  check_runq:
-    list_for_each_safe( iter, temp, &rqd->runq )
-    {
-        struct csched2_vcpu * svc = list_entry(iter, struct csched2_vcpu, 
runq_elem);
+    for (iter = rb_first(&rqd->runq); iter != NULL; iter = rb_next(iter)) {
+        struct csched2_vcpu * svc = rb_entry(iter, struct csched2_vcpu, 
runq_elem);
 
         if ( unlikely(tb_init_done) )
         {
@@ -3749,7 +3776,8 @@ csched2_dump(const struct scheduler *ops)
     for_each_cpu(i, &prv->active_queues)
     {
         struct csched2_runqueue_data *rqd = prv->rqd + i;
-        struct list_head *iter, *runq = &rqd->runq;
+        struct rb_root *runq = &rqd->runq;
+        struct rb_node *iter;
         int loop = 0;
 
         /* We need the lock to scan the runqueue. */
@@ -3761,8 +3789,8 @@ csched2_dump(const struct scheduler *ops)
             dump_pcpu(ops, j);
 
         printk("RUNQ:\n");
-        list_for_each( iter, runq )
-        {
+
+        for (iter = rb_first(runq); iter != NULL; iter = rb_next(iter)) {
             struct csched2_vcpu *svc = runq_elem(iter);
 
             if ( svc )
-- 
2.13.6


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.