Xen project Mailing List

[Xen-changelog] [xen-unstable] [XEN] Initial support for multi-core and multi-threaded CPU scheduling.

From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>

Date: Mon, 06 Nov 2006 18:10:16 +0000

Delivery-date: Mon, 06 Nov 2006 10:10:13 -0800

List-id: BK change log <xen-changelog.lists.xensource.com>

# HG changeset patch # User Emmanuel Ackaouy <ack@xxxxxxxxxxxxx> # Node ID bb6cd7ba259b7552e2f46f986c1580350af10517 # Parent 32e4952c063866165d9cab913f67b57f8e78aded [XEN] Initial support for multi-core and multi-threaded CPU scheduling. In multi-core and multi-threaded systems, not all idling "CPUs" are equal: When there are idling execution vehicles, it's better to spread VCPUs across sockets and cores before co-scheduling cores and threads. Signed-off-by: Emmanuel Ackaouy <ack@xxxxxxxxxxxxx> --- xen/common/sched_credit.c | 210 ++++++++++++++++++++++++++++++++++++++++----- xen/common/sched_sedf.c | 9 + xen/common/schedule.c | 4 xen/include/xen/sched-if.h | 1 4 files changed, 202 insertions(+), 22 deletions(-) diff -r 32e4952c0638 -r bb6cd7ba259b xen/common/sched_credit.c --- a/xen/common/sched_credit.c Mon Nov 06 16:55:56 2006 +0000 +++ b/xen/common/sched_credit.c Mon Nov 06 17:32:00 2006 +0000 @@ -115,6 +115,10 @@ _MACRO(steal_peer_idle) \ _MACRO(steal_peer_running) \ _MACRO(steal_peer_pinned) \ + _MACRO(steal_peer_migrating) \ + _MACRO(steal_peer_best_idler) \ + _MACRO(steal_loner_candidate) \ + _MACRO(steal_loner_signal) \ _MACRO(dom_init) \ _MACRO(dom_destroy) \ _MACRO(vcpu_init) \ @@ -370,8 +374,42 @@ __csched_vcpu_check(struct vcpu *vc) #define CSCHED_VCPU_CHECK(_vc) #endif +/* + * Indicates which of two given idlers is most efficient to run + * an additional VCPU. + * + * Returns: + * 0: They are the same. + * negative: One is less efficient than Two. + * positive: One is more efficient than Two. + */ +static int +csched_idler_compare(int one, int two) +{ + cpumask_t idlers; + cpumask_t one_idlers; + cpumask_t two_idlers; + + idlers = csched_priv.idlers; + cpu_clear(one, idlers); + cpu_clear(two, idlers); + + if ( cpu_isset(one, cpu_core_map[two]) ) + { + cpus_and(one_idlers, idlers, cpu_sibling_map[one]); + cpus_and(two_idlers, idlers, cpu_sibling_map[two]); + } + else + { + cpus_and(one_idlers, idlers, cpu_core_map[one]); + cpus_and(two_idlers, idlers, cpu_core_map[two]); + } + + return cpus_weight(one_idlers) - cpus_weight(two_idlers); +} + static inline int -__csched_vcpu_is_stealable(int local_cpu, struct vcpu *vc) +__csched_queued_vcpu_is_stealable(int local_cpu, struct vcpu *vc) { /* * Don't pick up work that's in the peer's scheduling tail. Also only pick @@ -386,6 +424,32 @@ __csched_vcpu_is_stealable(int local_cpu if ( unlikely(!cpu_isset(local_cpu, vc->cpu_affinity)) ) { CSCHED_STAT_CRANK(steal_peer_pinned); + return 0; + } + + return 1; +} + +static inline int +__csched_running_vcpu_is_stealable(int local_cpu, struct vcpu *vc) +{ + BUG_ON( is_idle_vcpu(vc) ); + + if ( unlikely(!cpu_isset(local_cpu, vc->cpu_affinity)) ) + { + CSCHED_STAT_CRANK(steal_peer_pinned); + return 0; + } + + if ( test_bit(_VCPUF_migrating, &vc->vcpu_flags) ) + { + CSCHED_STAT_CRANK(steal_peer_migrating); + return 0; + } + + if ( csched_idler_compare(local_cpu, vc->processor) <= 0 ) + { + CSCHED_STAT_CRANK(steal_peer_best_idler); return 0; } @@ -652,6 +716,64 @@ csched_dom_destroy(struct domain *dom) xfree(sdom); } +static int +csched_cpu_pick(struct vcpu *vc) +{ + cpumask_t cpus; + int cpu, nxt; + + /* + * Pick from online CPUs in VCPU's affinity mask, giving a + * preference to its current processor if it's in there. + */ + cpus_and(cpus, cpu_online_map, vc->cpu_affinity); + ASSERT( !cpus_empty(cpus) ); + cpu = cpu_isset(vc->processor, cpus) ? vc->processor : first_cpu(cpus); + + /* + * Try to find an idle processor within the above constraints. + */ + cpus_and(cpus, cpus, csched_priv.idlers); + if ( !cpus_empty(cpus) ) + { + cpu = cpu_isset(cpu, cpus) ? cpu : first_cpu(cpus); + cpu_clear(cpu, cpus); + + /* + * In multi-core and multi-threaded CPUs, not all idle execution + * vehicles are equal! + * + * We give preference to the idle execution vehicle with the most + * idling neighbours in its grouping. This distributes work across + * distinct cores first and guarantees we don't do something stupid + * like run two VCPUs on co-hyperthreads while there are idle cores + * or sockets. + */ + while ( !cpus_empty(cpus) ) + { + nxt = first_cpu(cpus); + + if ( csched_idler_compare(cpu, nxt) < 0 ) + { + cpu = nxt; + cpu_clear(nxt, cpus); + } + else if ( cpu_isset(cpu, cpu_core_map[nxt]) ) + { + cpus_andnot(cpus, cpus, cpu_sibling_map[nxt]); + } + else + { + cpus_andnot(cpus, cpus, cpu_core_map[nxt]); + } + + ASSERT( !cpu_isset(nxt, cpus) ); + } + } + + return cpu; +} + /* * This is a O(n) optimized sort of the runq. * @@ -939,7 +1061,7 @@ csched_runq_steal(struct csched_pcpu *sp vc = speer->vcpu; BUG_ON( is_idle_vcpu(vc) ); - if ( __csched_vcpu_is_stealable(cpu, vc) ) + if ( __csched_queued_vcpu_is_stealable(cpu, vc) ) { /* We got a candidate. Grab it! */ __runq_remove(speer); @@ -959,6 +1081,7 @@ csched_load_balance(int cpu, struct csch struct csched_pcpu *spc; struct vcpu *peer_vcpu; cpumask_t workers; + cpumask_t loners; int peer_cpu; if ( snext->pri == CSCHED_PRI_IDLE ) @@ -971,6 +1094,7 @@ csched_load_balance(int cpu, struct csch /* * Peek at non-idling CPUs in the system */ + cpus_clear(loners); cpus_andnot(workers, cpu_online_map, csched_priv.idlers); cpu_clear(cpu, workers); @@ -999,13 +1123,12 @@ csched_load_balance(int cpu, struct csch continue; } + peer_vcpu = per_cpu(schedule_data, peer_cpu).curr; spc = CSCHED_PCPU(peer_cpu); - peer_vcpu = per_cpu(schedule_data, peer_cpu).curr; if ( unlikely(spc == NULL) ) { CSCHED_STAT_CRANK(steal_peer_down); - speer = NULL; } else if ( unlikely(is_idle_vcpu(peer_vcpu)) ) { @@ -1014,26 +1137,72 @@ csched_load_balance(int cpu, struct csch * pick up work from it itself. */ CSCHED_STAT_CRANK(steal_peer_idle); - speer = NULL; + } + else if ( is_idle_vcpu(__runq_elem(spc->runq.next)->vcpu) ) + { + if ( snext->pri == CSCHED_PRI_IDLE && + __csched_running_vcpu_is_stealable(cpu, peer_vcpu) ) + { + CSCHED_STAT_CRANK(steal_loner_candidate); + cpu_set(peer_cpu, loners); + } } else { - /* Try to steal work from an online non-idle CPU. */ + /* Try to steal work from a remote CPU's runq. */ speer = csched_runq_steal(spc, cpu, snext->pri); + if ( speer != NULL ) + { + spin_unlock(&per_cpu(schedule_data, peer_cpu).schedule_lock); + CSCHED_STAT_CRANK(vcpu_migrate); + speer->stats.migrate++; + return speer; + } } spin_unlock(&per_cpu(schedule_data, peer_cpu).schedule_lock); - - /* Got one? */ - if ( speer ) - { - CSCHED_STAT_CRANK(vcpu_migrate); - speer->stats.migrate++; - return speer; - } - } - - /* Failed to find more important work */ + } + + /* + * If we failed to find any remotely queued VCPUs to move here, + * see if it would be more efficient to move any of the running + * remote VCPUs over here. + */ + while ( !cpus_empty(loners) ) + { + /* For each CPU of interest, starting with our neighbour... */ + peer_cpu = next_cpu(peer_cpu, loners); + if ( peer_cpu == NR_CPUS ) + peer_cpu = first_cpu(loners); + + cpu_clear(peer_cpu, loners); + + if ( !spin_trylock(&per_cpu(schedule_data, peer_cpu).schedule_lock) ) + { + CSCHED_STAT_CRANK(steal_trylock_failed); + continue; + } + + peer_vcpu = per_cpu(schedule_data, peer_cpu).curr; + spc = CSCHED_PCPU(peer_cpu); + + if ( !is_idle_vcpu(peer_vcpu) && + is_idle_vcpu(__runq_elem(spc->runq.next)->vcpu) && + __csched_running_vcpu_is_stealable(cpu, peer_vcpu) ) + { + set_bit(_VCPUF_migrating, &peer_vcpu->vcpu_flags); + spin_unlock(&per_cpu(schedule_data, peer_cpu).schedule_lock); + + CSCHED_STAT_CRANK(steal_loner_signal); + cpu_raise_softirq(peer_cpu, SCHEDULE_SOFTIRQ); + } + else + { + spin_unlock(&per_cpu(schedule_data, peer_cpu).schedule_lock); + } + } + + /* Failed to find more important work elsewhere... */ __runq_remove(snext); return snext; } @@ -1139,9 +1308,11 @@ csched_dump_pcpu(int cpu) spc = CSCHED_PCPU(cpu); runq = &spc->runq; - printk(" tick=%lu, sort=%d\n", + printk(" tick=%lu, sort=%d, sibling=0x%lx, core=0x%lx\n", per_cpu(schedule_data, cpu).tick, - spc->runq_sort_last); + spc->runq_sort_last, + cpu_sibling_map[cpu].bits[0], + cpu_core_map[cpu].bits[0]); /* current VCPU */ svc = CSCHED_VCPU(per_cpu(schedule_data, cpu).curr); @@ -1247,6 +1418,7 @@ struct scheduler sched_credit_def = { .adjust = csched_dom_cntl, + .pick_cpu = csched_cpu_pick, .tick = csched_tick, .do_schedule = csched_schedule, diff -r 32e4952c0638 -r bb6cd7ba259b xen/common/sched_sedf.c --- a/xen/common/sched_sedf.c Mon Nov 06 16:55:56 2006 +0000 +++ b/xen/common/sched_sedf.c Mon Nov 06 17:32:00 2006 +0000 @@ -409,6 +409,14 @@ static void sedf_destroy_domain(struct d static void sedf_destroy_domain(struct domain *d) { xfree(d->sched_priv); +} + +static int sedf_pick_cpu(struct vcpu *v) +{ + cpumask_t online_affinity; + + cpus_and(online_affinity, v->cpu_affinity, cpu_online_map); + return first_cpu(online_affinity); } /* @@ -1436,6 +1444,7 @@ struct scheduler sched_sedf_def = { .destroy_vcpu = sedf_destroy_vcpu, .do_schedule = sedf_do_schedule, + .pick_cpu = sedf_pick_cpu, .dump_cpu_state = sedf_dump_cpu_state, .sleep = sedf_sleep, .wake = sedf_wake, diff -r 32e4952c0638 -r bb6cd7ba259b xen/common/schedule.c --- a/xen/common/schedule.c Mon Nov 06 16:55:56 2006 +0000 +++ b/xen/common/schedule.c Mon Nov 06 17:32:00 2006 +0000 @@ -203,7 +203,6 @@ void vcpu_wake(struct vcpu *v) static void vcpu_migrate(struct vcpu *v) { - cpumask_t online_affinity; unsigned long flags; int old_cpu; @@ -218,8 +217,7 @@ static void vcpu_migrate(struct vcpu *v) /* Switch to new CPU, then unlock old CPU. */ old_cpu = v->processor; - cpus_and(online_affinity, v->cpu_affinity, cpu_online_map); - v->processor = first_cpu(online_affinity); + v->processor = SCHED_OP(pick_cpu, v); spin_unlock_irqrestore( &per_cpu(schedule_data, old_cpu).schedule_lock, flags); diff -r 32e4952c0638 -r bb6cd7ba259b xen/include/xen/sched-if.h --- a/xen/include/xen/sched-if.h Mon Nov 06 16:55:56 2006 +0000 +++ b/xen/include/xen/sched-if.h Mon Nov 06 17:32:00 2006 +0000 @@ -74,6 +74,7 @@ struct scheduler { struct task_slice (*do_schedule) (s_time_t); + int (*pick_cpu) (struct vcpu *); int (*adjust) (struct domain *, struct xen_domctl_scheduler_op *); void (*dump_settings) (void); _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.