Xen project Mailing List

[Xen-devel] [RFC PATCH 1/1] xen: credit2: rb-tree for runqueues

From: Praveen Kumar <kpraveen.lkml@xxxxxxxxx>

Date: Tue, 3 Apr 2018 22:25:05 +0530

Cc: george.dunlap@xxxxxxxxxxxxx, Praveen Kumar <kpraveen.lkml@xxxxxxxxx>, dfaggioli@xxxxxxxx

Delivery-date: Tue, 03 Apr 2018 16:55:27 +0000

List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

The patch optimized the sorted credit2 runq from simple linked list to rb-tree implementation. This way we will gain performance and scalability when the number of vCPUs are huge. Signed-off-by: Praveen Kumar <kpraveen.lkml@xxxxxxxxx> --- xen/common/sched_credit2.c | 94 ++++++++++++++++++++++++++++++---------------- 1 file changed, 61 insertions(+), 33 deletions(-) diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c index 9a3e71f1c8..3802c2888f 100644 --- a/xen/common/sched_credit2.c +++ b/xen/common/sched_credit2.c @@ -25,6 +25,7 @@ #include <xen/trace.h> #include <xen/cpu.h> #include <xen/keyhandler.h> +#include <xen/rbtree.h> /* Meant only for helping developers during debugging. */ /* #define d2printk printk */ @@ -471,7 +472,7 @@ custom_param("credit2_runqueue", parse_credit2_runqueue); struct csched2_runqueue_data { spinlock_t lock; /* Lock for this runqueue */ - struct list_head runq; /* Ordered list of runnable vms */ + struct rb_root runq; /* Runqueue is an rbtree */ int id; /* ID of this runqueue (-1 if invalid) */ int load; /* Instantaneous load (num of non-idle vcpus) */ @@ -536,7 +537,7 @@ struct csched2_vcpu { s_time_t load_last_update; /* Last time average was updated */ s_time_t avgload; /* Decaying queue load */ - struct list_head runq_elem; /* On the runqueue (rqd->runq) */ + struct rb_node runq_elem; /* On the runqueue (rqd->runq) */ struct list_head parked_elem; /* On the parked_vcpus list */ struct list_head rqd_elem; /* On csched2_runqueue_data's svc list */ struct csched2_runqueue_data *migrate_rqd; /* Pre-determined migr. target */ @@ -600,6 +601,29 @@ static inline bool has_cap(const struct csched2_vcpu *svc) return svc->budget != STIME_MAX; } +static void runq_insert_rb(struct rb_root *root, + struct csched2_vcpu *svc, + int *pos) +{ + struct csched2_vcpu *entry = NULL; + struct rb_node **node = &root->rb_node; + struct rb_node *parent = NULL; + + while (*node) { + parent = *node; + entry = rb_entry(parent, struct csched2_vcpu, runq_elem); + // Check if we are maintaining the sorted + if ( svc->credit < entry->credit ) + node = &parent->rb_left; + else + node = &parent->rb_right; + + (*pos)++; + } + rb_link_node(&svc->runq_elem, parent, node); + rb_insert_color(&svc->runq_elem, root); +} + /* * Hyperthreading (SMT) support. * @@ -793,12 +817,12 @@ static s_time_t c2t(struct csched2_runqueue_data *rqd, s_time_t credit, struct c static inline int vcpu_on_runq(struct csched2_vcpu *svc) { - return !list_empty(&svc->runq_elem); + return !RB_EMPTY_NODE(&svc->runq_elem); } -static inline struct csched2_vcpu * runq_elem(struct list_head *elem) +static inline struct csched2_vcpu * runq_elem(struct rb_node *elem) { - return list_entry(elem, struct csched2_vcpu, runq_elem); + return rb_entry(elem, struct csched2_vcpu, runq_elem); } static void activate_runqueue(struct csched2_private *prv, int rqi) @@ -812,7 +836,7 @@ static void activate_runqueue(struct csched2_private *prv, int rqi) rqd->max_weight = 1; rqd->id = rqi; INIT_LIST_HEAD(&rqd->svc); - INIT_LIST_HEAD(&rqd->runq); + rqd->runq = RB_ROOT; spin_lock_init(&rqd->lock); __cpumask_set_cpu(rqi, &prv->active_queues); @@ -1272,9 +1296,8 @@ update_load(const struct scheduler *ops, static void runq_insert(const struct scheduler *ops, struct csched2_vcpu *svc) { - struct list_head *iter; unsigned int cpu = svc->vcpu->processor; - struct list_head * runq = &c2rqd(ops, cpu)->runq; + struct rb_root *runq = &c2rqd(ops, cpu)->runq; int pos = 0; ASSERT(spin_is_locked(per_cpu(schedule_data, cpu).schedule_lock)); @@ -1287,16 +1310,7 @@ runq_insert(const struct scheduler *ops, struct csched2_vcpu *svc) ASSERT(!svc->vcpu->is_running); ASSERT(!(svc->flags & CSFLAG_scheduled)); - list_for_each( iter, runq ) - { - struct csched2_vcpu * iter_svc = runq_elem(iter); - - if ( svc->credit > iter_svc->credit ) - break; - - pos++; - } - list_add_tail(&svc->runq_elem, iter); + runq_insert_rb(runq, svc, &pos); if ( unlikely(tb_init_done) ) { @@ -1315,8 +1329,11 @@ runq_insert(const struct scheduler *ops, struct csched2_vcpu *svc) static inline void runq_remove(struct csched2_vcpu *svc) { + // TODO This assert might not be required, as we always have a check before + // calling this API ASSERT(vcpu_on_runq(svc)); - list_del_init(&svc->runq_elem); + rb_erase(&svc->runq_elem, &svc->rqd->runq); + RB_CLEAR_NODE(&svc->runq_elem); } void burn_credits(struct csched2_runqueue_data *rqd, struct csched2_vcpu *, s_time_t); @@ -1785,6 +1802,7 @@ static void park_vcpu(struct csched2_vcpu *svc) * In both cases, we also add it to the list of parked vCPUs of the domain. */ __set_bit(_VPF_parked, &v->pause_flags); + if ( vcpu_on_runq(svc) ) { runq_remove(svc); @@ -2037,7 +2055,7 @@ csched2_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd) return NULL; INIT_LIST_HEAD(&svc->rqd_elem); - INIT_LIST_HEAD(&svc->runq_elem); + RB_CLEAR_NODE(&svc->runq_elem); svc->sdom = dd; svc->vcpu = vc; @@ -2083,6 +2101,8 @@ csched2_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc) } else if ( vcpu_on_runq(svc) ) { + printk(XENLOG_WARNING "%s : %d : vcpu on runq rb !\n", __func__, __LINE__); + ASSERT(svc->rqd == c2rqd(ops, vc->processor)); update_load(ops, svc->rqd, svc, -1, NOW()); runq_remove(svc); @@ -2110,6 +2130,7 @@ csched2_vcpu_wake(const struct scheduler *ops, struct vcpu *vc) if ( unlikely(vcpu_on_runq(svc)) ) { + printk(XENLOG_WARNING "%s : %d : vcpu on runq rb !\n", __func__, __LINE__); SCHED_STAT_CRANK(vcpu_wake_onrunq); goto out; } @@ -2501,6 +2522,7 @@ static void migrate(const struct scheduler *ops, /* It's not running; just move it */ if ( vcpu_on_runq(svc) ) { + printk(XENLOG_WARNING "%s : %d : vcpu on runq rb !\n", __func__, __LINE__); runq_remove(svc); update_load(ops, svc->rqd, NULL, -1, now); on_runq = 1; @@ -2759,8 +2781,10 @@ csched2_vcpu_migrate( if ( unlikely(!cpumask_test_cpu(new_cpu, cpupool_domain_cpumask(d))) ) { ASSERT(system_state == SYS_STATE_suspend); + if ( vcpu_on_runq(svc) ) { + printk(XENLOG_WARNING "%s : %d : vcpu on runq rb !\n", __func__, __LINE__); runq_remove(svc); update_load(ops, svc->rqd, NULL, -1, now); } @@ -3103,7 +3127,7 @@ csched2_vcpu_insert(const struct scheduler *ops, struct vcpu *vc) spinlock_t *lock; ASSERT(!is_idle_vcpu(vc)); - ASSERT(list_empty(&svc->runq_elem)); + ASSERT(RB_EMPTY_NODE(&svc->runq_elem)); /* csched2_cpu_pick() expects the pcpu lock to be held */ lock = vcpu_schedule_lock_irq(vc); @@ -3141,7 +3165,7 @@ csched2_vcpu_remove(const struct scheduler *ops, struct vcpu *vc) spinlock_t *lock; ASSERT(!is_idle_vcpu(vc)); - ASSERT(list_empty(&svc->runq_elem)); + ASSERT(RB_EMPTY_NODE(&svc->runq_elem)); SCHED_STAT_CRANK(vcpu_remove); @@ -3163,7 +3187,7 @@ csched2_runtime(const struct scheduler *ops, int cpu, s_time_t time, min_time; int rt_credit; /* Proposed runtime measured in credits */ struct csched2_runqueue_data *rqd = c2rqd(ops, cpu); - struct list_head *runq = &rqd->runq; + struct rb_root *runq = &rqd->runq; struct csched2_private *prv = csched2_priv(ops); /* @@ -3201,17 +3225,21 @@ csched2_runtime(const struct scheduler *ops, int cpu, * 2) If there's someone waiting whose credit is positive, * run until your credit ~= his. */ - if ( ! list_empty(runq) ) + if ( ! RB_EMPTY_ROOT(runq) ) { - struct csched2_vcpu *swait = runq_elem(runq->next); + // Find the left most element, which is the most probable candidate + struct rb_node *node = rb_first(runq); + // TODO Can we take rb_next ? + //struct rb_node *node = &rb_next(root->rb_node); + + struct csched2_vcpu *swait = runq_elem(node); if ( ! is_idle_vcpu(swait->vcpu) && swait->credit > 0 ) { rt_credit = snext->credit - swait->credit; } } - /* * The next guy on the runqueue may actually have a higher credit, * if we've tried to avoid migrating him from a different cpu. @@ -3260,7 +3288,7 @@ runq_candidate(struct csched2_runqueue_data *rqd, int cpu, s_time_t now, unsigned int *skipped) { - struct list_head *iter, *temp; + struct rb_node *iter = NULL; struct csched2_vcpu *snext = NULL; struct csched2_private *prv = csched2_priv(per_cpu(scheduler, cpu)); bool yield = false, soft_aff_preempt = false; @@ -3345,9 +3373,8 @@ runq_candidate(struct csched2_runqueue_data *rqd, snext = csched2_vcpu(idle_vcpu[cpu]); check_runq: - list_for_each_safe( iter, temp, &rqd->runq ) - { - struct csched2_vcpu * svc = list_entry(iter, struct csched2_vcpu, runq_elem); + for (iter = rb_first(&rqd->runq); iter != NULL; iter = rb_next(iter)) { + struct csched2_vcpu * svc = rb_entry(iter, struct csched2_vcpu, runq_elem); if ( unlikely(tb_init_done) ) { @@ -3749,7 +3776,8 @@ csched2_dump(const struct scheduler *ops) for_each_cpu(i, &prv->active_queues) { struct csched2_runqueue_data *rqd = prv->rqd + i; - struct list_head *iter, *runq = &rqd->runq; + struct rb_root *runq = &rqd->runq; + struct rb_node *iter; int loop = 0; /* We need the lock to scan the runqueue. */ @@ -3761,8 +3789,8 @@ csched2_dump(const struct scheduler *ops) dump_pcpu(ops, j); printk("RUNQ:\n"); - list_for_each( iter, runq ) - { + + for (iter = rb_first(runq); iter != NULL; iter = rb_next(iter)) { struct csched2_vcpu *svc = runq_elem(iter); if ( svc ) -- 2.13.6 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.