Xen project Mailing List

[Xen-devel] [RFC PATCH v2 1/2] xen: credit2: rb-tree for runqueues

From: Praveen Kumar <kpraveen.lkml@xxxxxxxxx>

Date: Sun, 23 Dec 2018 19:51:33 +0530

Cc: george.dunlap@xxxxxxxxxxxxx, Praveen Kumar <kpraveen.lkml@xxxxxxxxx>, dfaggioli@xxxxxxxx

Delivery-date: Fri, 04 Jan 2019 09:52:15 +0000

List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

The patch optimized the sorted credit2 runq from simple linked list to rb-tree implementation. This way we will gain performance and scalability when the number of vCPUs are huge. Signed-off-by: Praveen Kumar <kpraveen.lkml@xxxxxxxxx> --- Changes since v1: * Renamed the rb_runq_insert * Corrected the next probable runq element from rb_first to rb_last --- xen/common/sched_credit2.c | 88 +++++++++++++++++++++++++++++----------------- 1 file changed, 55 insertions(+), 33 deletions(-) diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c index 623a325ceb..2463a25f87 100644 --- a/xen/common/sched_credit2.c +++ b/xen/common/sched_credit2.c @@ -25,6 +25,7 @@ #include <xen/trace.h> #include <xen/cpu.h> #include <xen/keyhandler.h> +#include <xen/rbtree.h> /* Meant only for helping developers during debugging. */ /* #define d2printk printk */ @@ -471,7 +472,7 @@ custom_param("credit2_runqueue", parse_credit2_runqueue); struct csched2_runqueue_data { spinlock_t lock; /* Lock for this runqueue */ - struct list_head runq; /* Ordered list of runnable vms */ + struct rb_root runq; /* Runqueue is an rbtree */ int id; /* ID of this runqueue (-1 if invalid) */ int load; /* Instantaneous load (num of non-idle vcpus) */ @@ -535,7 +536,7 @@ struct csched2_vcpu { s_time_t load_last_update; /* Last time average was updated */ s_time_t avgload; /* Decaying queue load */ - struct list_head runq_elem; /* On the runqueue (rqd->runq) */ + struct rb_node runq_elem; /* On the runqueue (rqd->runq) */ struct list_head parked_elem; /* On the parked_vcpus list */ struct list_head rqd_elem; /* On csched2_runqueue_data's svc list */ struct csched2_runqueue_data *migrate_rqd; /* Pre-determined migr. target */ @@ -604,6 +605,28 @@ static inline bool has_cap(const struct csched2_vcpu *svc) return svc->budget != STIME_MAX; } +static void rb_runq_insert(struct rb_root *root, + struct csched2_vcpu *svc, + int *pos) +{ + struct csched2_vcpu *entry = NULL; + struct rb_node **node = &root->rb_node; + struct rb_node *parent = NULL; + + while (*node) { + parent = *node; + entry = rb_entry(parent, struct csched2_vcpu, runq_elem); + if ( svc->credit < entry->credit ) + node = &parent->rb_left; + else + node = &parent->rb_right; + + (*pos)++; + } + rb_link_node(&svc->runq_elem, parent, node); + rb_insert_color(&svc->runq_elem, root); +} + /* * Hyperthreading (SMT) support. * @@ -797,12 +820,12 @@ static s_time_t c2t(struct csched2_runqueue_data *rqd, s_time_t credit, struct c static inline int vcpu_on_runq(struct csched2_vcpu *svc) { - return !list_empty(&svc->runq_elem); + return !RB_EMPTY_NODE(&svc->runq_elem); } -static inline struct csched2_vcpu * runq_elem(struct list_head *elem) +static inline struct csched2_vcpu * runq_elem(struct rb_node *elem) { - return list_entry(elem, struct csched2_vcpu, runq_elem); + return rb_entry(elem, struct csched2_vcpu, runq_elem); } static void activate_runqueue(struct csched2_private *prv, int rqi) @@ -816,7 +839,7 @@ static void activate_runqueue(struct csched2_private *prv, int rqi) rqd->max_weight = 1; rqd->id = rqi; INIT_LIST_HEAD(&rqd->svc); - INIT_LIST_HEAD(&rqd->runq); + rqd->runq = RB_ROOT; spin_lock_init(&rqd->lock); __cpumask_set_cpu(rqi, &prv->active_queues); @@ -1276,9 +1299,8 @@ update_load(const struct scheduler *ops, static void runq_insert(const struct scheduler *ops, struct csched2_vcpu *svc) { - struct list_head *iter; unsigned int cpu = svc->vcpu->processor; - struct list_head * runq = &c2rqd(ops, cpu)->runq; + struct rb_root *runq = &c2rqd(ops, cpu)->runq; int pos = 0; ASSERT(spin_is_locked(per_cpu(schedule_data, cpu).schedule_lock)); @@ -1291,16 +1313,7 @@ runq_insert(const struct scheduler *ops, struct csched2_vcpu *svc) ASSERT(!svc->vcpu->is_running); ASSERT(!(svc->flags & CSFLAG_scheduled)); - list_for_each( iter, runq ) - { - struct csched2_vcpu * iter_svc = runq_elem(iter); - - if ( svc->credit > iter_svc->credit ) - break; - - pos++; - } - list_add_tail(&svc->runq_elem, iter); + rb_runq_insert(runq, svc, &pos); if ( unlikely(tb_init_done) ) { @@ -1320,7 +1333,8 @@ runq_insert(const struct scheduler *ops, struct csched2_vcpu *svc) static inline void runq_remove(struct csched2_vcpu *svc) { ASSERT(vcpu_on_runq(svc)); - list_del_init(&svc->runq_elem); + rb_erase(&svc->runq_elem, &svc->rqd->runq); + RB_CLEAR_NODE(&svc->runq_elem); } void burn_credits(struct csched2_runqueue_data *rqd, struct csched2_vcpu *, s_time_t); @@ -1789,6 +1803,7 @@ static void park_vcpu(struct csched2_vcpu *svc) * In both cases, we also add it to the list of parked vCPUs of the domain. */ __set_bit(_VPF_parked, &v->pause_flags); + if ( vcpu_on_runq(svc) ) { runq_remove(svc); @@ -2041,7 +2056,7 @@ csched2_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd) return NULL; INIT_LIST_HEAD(&svc->rqd_elem); - INIT_LIST_HEAD(&svc->runq_elem); + RB_CLEAR_NODE(&svc->runq_elem); svc->sdom = dd; svc->vcpu = vc; @@ -2087,6 +2102,8 @@ csched2_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc) } else if ( vcpu_on_runq(svc) ) { + printk(XENLOG_WARNING "%s : %d : vcpu on runq rb !\n", __func__, __LINE__); + ASSERT(svc->rqd == c2rqd(ops, vc->processor)); update_load(ops, svc->rqd, svc, -1, NOW()); runq_remove(svc); @@ -2114,6 +2131,7 @@ csched2_vcpu_wake(const struct scheduler *ops, struct vcpu *vc) if ( unlikely(vcpu_on_runq(svc)) ) { + printk(XENLOG_WARNING "%s : %d : vcpu on runq rb !\n", __func__, __LINE__); SCHED_STAT_CRANK(vcpu_wake_onrunq); goto out; } @@ -2505,6 +2523,7 @@ static void migrate(const struct scheduler *ops, /* It's not running; just move it */ if ( vcpu_on_runq(svc) ) { + printk(XENLOG_WARNING "%s : %d : vcpu on runq rb !\n", __func__, __LINE__); runq_remove(svc); update_load(ops, svc->rqd, NULL, -1, now); on_runq = 1; @@ -2764,8 +2783,10 @@ csched2_vcpu_migrate( if ( unlikely(!cpumask_test_cpu(new_cpu, cpupool_domain_cpumask(d))) ) { ASSERT(system_state == SYS_STATE_suspend); + if ( vcpu_on_runq(svc) ) { + printk(XENLOG_WARNING "%s : %d : vcpu on runq rb !\n", __func__, __LINE__); runq_remove(svc); update_load(ops, svc->rqd, NULL, -1, now); } @@ -3108,7 +3129,7 @@ csched2_vcpu_insert(const struct scheduler *ops, struct vcpu *vc) spinlock_t *lock; ASSERT(!is_idle_vcpu(vc)); - ASSERT(list_empty(&svc->runq_elem)); + ASSERT(RB_EMPTY_NODE(&svc->runq_elem)); /* csched2_cpu_pick() expects the pcpu lock to be held */ lock = vcpu_schedule_lock_irq(vc); @@ -3146,7 +3167,7 @@ csched2_vcpu_remove(const struct scheduler *ops, struct vcpu *vc) spinlock_t *lock; ASSERT(!is_idle_vcpu(vc)); - ASSERT(list_empty(&svc->runq_elem)); + ASSERT(RB_EMPTY_NODE(&svc->runq_elem)); SCHED_STAT_CRANK(vcpu_remove); @@ -3168,7 +3189,7 @@ csched2_runtime(const struct scheduler *ops, int cpu, s_time_t time, min_time; int rt_credit; /* Proposed runtime measured in credits */ struct csched2_runqueue_data *rqd = c2rqd(ops, cpu); - struct list_head *runq = &rqd->runq; + struct rb_root *runq = &rqd->runq; struct csched2_private *prv = csched2_priv(ops); /* @@ -3206,17 +3227,18 @@ csched2_runtime(const struct scheduler *ops, int cpu, * 2) If there's someone waiting whose credit is positive, * run until your credit ~= his. */ - if ( ! list_empty(runq) ) + if ( ! RB_EMPTY_ROOT(runq) ) { - struct csched2_vcpu *swait = runq_elem(runq->next); + // Find the left most element, which is the most probable candidate + struct rb_node *node = rb_last(runq); + struct csched2_vcpu *swait = runq_elem(node); if ( ! is_idle_vcpu(swait->vcpu) && swait->credit > 0 ) { rt_credit = snext->credit - swait->credit; } } - /* * The next guy on the runqueue may actually have a higher credit, * if we've tried to avoid migrating him from a different cpu. @@ -3265,7 +3287,7 @@ runq_candidate(struct csched2_runqueue_data *rqd, int cpu, s_time_t now, unsigned int *skipped) { - struct list_head *iter, *temp; + struct rb_node *iter = NULL; struct csched2_vcpu *snext = NULL; struct csched2_private *prv = csched2_priv(per_cpu(scheduler, cpu)); bool yield = false, soft_aff_preempt = false; @@ -3350,9 +3372,8 @@ runq_candidate(struct csched2_runqueue_data *rqd, snext = csched2_vcpu(idle_vcpu[cpu]); check_runq: - list_for_each_safe( iter, temp, &rqd->runq ) - { - struct csched2_vcpu * svc = list_entry(iter, struct csched2_vcpu, runq_elem); + for (iter = rb_last(&rqd->runq); iter != NULL; iter = rb_prev(iter)) { + struct csched2_vcpu * svc = rb_entry(iter, struct csched2_vcpu, runq_elem); if ( unlikely(tb_init_done) ) { @@ -3750,7 +3771,8 @@ csched2_dump(const struct scheduler *ops) for_each_cpu(i, &prv->active_queues) { struct csched2_runqueue_data *rqd = prv->rqd + i; - struct list_head *iter, *runq = &rqd->runq; + struct rb_root *runq = &rqd->runq; + struct rb_node *iter; int loop = 0; /* We need the lock to scan the runqueue. */ @@ -3762,8 +3784,8 @@ csched2_dump(const struct scheduler *ops) dump_pcpu(ops, j); printk("RUNQ:\n"); - list_for_each( iter, runq ) - { + + for (iter = rb_last(runq); iter != NULL; iter = rb_prev(iter)) { struct csched2_vcpu *svc = runq_elem(iter); if ( svc ) -- 2.13.7 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.