Xen project Mailing List

[Xen-devel] [PATCH 4/6] xen: credit1: treat pCPUs more evenly during balancing.

From: Dario Faggioli <dario.faggioli@xxxxxxxxxx>

Date: Thu, 02 Mar 2017 11:38:27 +0100

Cc: George Dunlap <george.dunlap@xxxxxxxxxxxxx>

Delivery-date: Thu, 02 Mar 2017 10:38:32 +0000

List-id: Xen developer discussion <xen-devel.lists.xen.org>

Right now, we use cpumask_first() for going through the bus pCPUs in csched_load_balance(). This means not all pCPUs have equal chances of seeing their pending work stolen. It also means there is more runqueue lock pressure on lower ID pCPUs. To avoid all this, let's record and remember, for each NUMA node, from what pCPU we have stolen for last, and start from that the following time. Signed-off-by: Dario Faggioli <dario.faggioli@xxxxxxxxxx> --- Cc: George Dunlap <george.dunlap@xxxxxxxxxxxxx> --- xen/common/sched_credit.c | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/xen/common/sched_credit.c b/xen/common/sched_credit.c index 529b6c7..bae29a7 100644 --- a/xen/common/sched_credit.c +++ b/xen/common/sched_credit.c @@ -229,6 +229,7 @@ struct csched_private { uint32_t credit; int credit_balance; uint32_t runq_sort; + uint32_t *balance_bias; unsigned ratelimit_us; /* Period of master and tick in milliseconds */ unsigned tslice_ms, tick_period_us, ticks_per_tslice; @@ -548,6 +549,7 @@ csched_deinit_pdata(const struct scheduler *ops, void *pcpu, int cpu) { struct csched_private *prv = CSCHED_PRIV(ops); struct csched_pcpu *spc = pcpu; + unsigned int node = cpu_to_node(cpu); unsigned long flags; /* @@ -571,6 +573,12 @@ csched_deinit_pdata(const struct scheduler *ops, void *pcpu, int cpu) prv->master = cpumask_first(prv->cpus); migrate_timer(&prv->master_ticker, prv->master); } + if ( prv->balance_bias[node] == cpu ) + { + cpumask_and(cpumask_scratch, prv->cpus, &node_to_cpumask(node)); + if ( !cpumask_empty(cpumask_scratch) ) + prv->balance_bias[node] = cpumask_first(cpumask_scratch); + } kill_timer(&spc->ticker); if ( prv->ncpus == 0 ) kill_timer(&prv->master_ticker); @@ -610,6 +618,10 @@ init_pdata(struct csched_private *prv, struct csched_pcpu *spc, int cpu) NOW() + MILLISECS(prv->tslice_ms)); } + cpumask_and(cpumask_scratch, prv->cpus, &node_to_cpumask(cpu_to_node(cpu))); + if ( cpumask_weight(cpumask_scratch) == 1 ) + prv->balance_bias[cpu_to_node(cpu)] = cpu; + init_timer(&spc->ticker, csched_tick, (void *)(unsigned long)cpu, cpu); set_timer(&spc->ticker, NOW() + MICROSECS(prv->tick_period_us) ); @@ -1696,7 +1708,7 @@ csched_load_balance(struct csched_private *prv, int cpu, struct csched_vcpu *speer; cpumask_t workers; cpumask_t *online; - int peer_cpu, peer_node, bstep; + int peer_cpu, first_cpu, peer_node, bstep; int node = cpu_to_node(cpu); BUG_ON( cpu != snext->vcpu->processor ); @@ -1740,9 +1752,10 @@ csched_load_balance(struct csched_private *prv, int cpu, cpumask_and(&workers, &workers, &node_to_cpumask(peer_node)); __cpumask_clear_cpu(cpu, &workers); - peer_cpu = cpumask_first(&workers); - if ( peer_cpu >= nr_cpu_ids ) + first_cpu = cpumask_cycle(prv->balance_bias[peer_node], &workers); + if ( first_cpu >= nr_cpu_ids ) goto next_node; + peer_cpu = first_cpu; do { /* @@ -1770,12 +1783,18 @@ csched_load_balance(struct csched_private *prv, int cpu, if ( speer != NULL ) { *stolen = 1; + /* + * Next time we'll look for work to steal on this node, we + * will start from the next pCPU, with respect to this one, + * so we don't risk stealing always from the same ones. + */ + prv->balance_bias[peer_node] = peer_cpu; return speer; } peer_cpu = cpumask_cycle(peer_cpu, &workers); - } while( peer_cpu != cpumask_first(&workers) ); + } while( peer_cpu != first_cpu ); next_node: peer_node = cycle_node(peer_node, node_online_map); @@ -2126,6 +2145,14 @@ csched_init(struct scheduler *ops) prv = xzalloc(struct csched_private); if ( prv == NULL ) return -ENOMEM; + + prv->balance_bias = xzalloc_array(uint32_t, MAX_NUMNODES); + if ( prv->balance_bias == NULL ) + { + xfree(prv); + return -ENOMEM; + } + if ( !zalloc_cpumask_var(&prv->cpus) || !zalloc_cpumask_var(&prv->idlers) || !zalloc_cpumask_var(&prv->overloaded) ) @@ -2133,6 +2160,7 @@ csched_init(struct scheduler *ops) free_cpumask_var(prv->overloaded); free_cpumask_var(prv->idlers); free_cpumask_var(prv->cpus); + xfree(prv->balance_bias); xfree(prv); return -ENOMEM; } @@ -2179,6 +2207,7 @@ csched_deinit(struct scheduler *ops) free_cpumask_var(prv->cpus); free_cpumask_var(prv->idlers); free_cpumask_var(prv->overloaded); + xfree(prv->balance_bias); xfree(prv); } } _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx https://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.