Xen project Mailing List

Re: [Xen-devel] [PATCH 7/7] xen: credit2: try to avoid tickling cpus subject to ratelimiting

To: Dario Faggioli <dario.faggioli@xxxxxxxxxx>, <xen-devel@xxxxxxxxxxxxxxxxxxxx>

From: George Dunlap <george.dunlap@xxxxxxxxxx>

Date: Tue, 25 Jul 2017 12:31:21 +0100

Cc: George Dunlap <george.dunlap@xxxxxxxxxxxxx>, Anshul Makkar <anshul.makkar@xxxxxxxxxx>

Delivery-date: Tue, 25 Jul 2017 11:31:32 +0000

List-id: Xen developer discussion <xen-devel.lists.xen.org>

On 06/16/2017 03:14 PM, Dario Faggioli wrote: > With context switching ratelimiting enabled, the following > pattern is quite common in a scheduling trace: > > 0.000845622 |||||||||||.x||| d32768v12 csched2:runq_insert d0v13, > position 0 > 0.000845831 |||||||||||.x||| d32768v12 csched2:runq_tickle_new d0v13, > processor = 12, credit = 10135529 > 0.000846546 |||||||||||.x||| d32768v12 csched2:burn_credits d2v7, credit > = 2619231, delta = 255937 > [1] 0.000846739 |||||||||||.x||| d32768v12 csched2:runq_tickle cpu 12 > [...] > [2] 0.000850597 ||||||||||||x||| d32768v12 csched2:schedule cpu 12, rq# 1, > busy, SMT busy, tickled > 0.000850760 ||||||||||||x||| d32768v12 csched2:burn_credits d2v7, credit > = 2614028, delta = 5203 > [3] 0.000851022 ||||||||||||x||| d32768v12 csched2:ratelimit triggered > [4] 0.000851614 ||||||||||||x||| d32768v12 runstate_continue d2v7 > running->running > > Basically, what happens is that runq_tickle() realizes > d0v13 should preempt d2v7, running on cpu 12, as it > has higher credits (10135529 vs. 2619231). It therefore > tickles cpu 12 [1], which, in turn, schedules [2]. > > But --surprise surprise-- d2v7 has run for less than the > ratelimit interval [3], and hence it is _not_ preempted, > and continues to run. This indeed looks fine. Actually, > this is what ratelimiting is there for. Note, however, > that: > 1) we interrupted cpu 12 for nothing; > 2) what if, say on cpu 8, there is a vcpu that has: > + less credit than d0v13 (so d0v13 can well > preempt it), > + more credit than d2v7 (that's why it was not > selected to be preempted), > + run for more than the ratelimiting interval > (so it can really be scheduled out)? > > With this patch, if we are in case 2), we'd realize > that tickling 12 would be pointless, and we'll continue > looking, eventually finding and tickling 8. > > Signed-off-by: Dario Faggioli <dario.faggioli@xxxxxxxxxx> Well spotted: Reviewed-by: George Dunlap <george.dunlap@xxxxxxxxxx> > --- > Cc: George Dunlap <george.dunlap@xxxxxxxxxxxxx> > Cc: Anshul Makkar <anshul.makkar@xxxxxxxxxx> > --- > xen/common/sched_credit2.c | 30 ++++++++++++++++++++++++++---- > 1 file changed, 26 insertions(+), 4 deletions(-) > > diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c > index bbda790..c45bc03 100644 > --- a/xen/common/sched_credit2.c > +++ b/xen/common/sched_credit2.c > @@ -160,6 +160,8 @@ > #define CSCHED2_MIGRATE_RESIST ((opt_migrate_resist)*MICROSECS(1)) > /* How much to "compensate" a vcpu for L2 migration. */ > #define CSCHED2_MIGRATE_COMPENSATION MICROSECS(50) > +/* How tolerant we should be when peeking at runtime of vcpus on other cpus > */ > +#define CSCHED2_RATELIMIT_TICKLE_TOLERANCE MICROSECS(50) > /* Reset: Value below which credit will be reset. */ > #define CSCHED2_CREDIT_RESET 0 > /* Max timer: Maximum time a guest can be run for. */ > @@ -1167,6 +1169,23 @@ tickle_cpu(unsigned int cpu, struct > csched2_runqueue_data *rqd) > } > > /* > + * What we want to know is whether svc, which we assume to be running on some > + * pcpu, can be interrupted and preempted (which, so far, basically means > + * whether or not it already run for more than the ratelimit, to which we > + * apply some tolerance). > + */ > +static inline bool is_preemptable(const struct csched2_vcpu *svc, > + s_time_t now, s_time_t ratelimit) > +{ > + if ( ratelimit <= CSCHED2_RATELIMIT_TICKLE_TOLERANCE ) > + return true; > + > + ASSERT(svc->vcpu->is_running); > + return now - svc->vcpu->runstate.state_entry_time > > + ratelimit - CSCHED2_RATELIMIT_TICKLE_TOLERANCE; > +} > + > +/* > * Score to preempt the target cpu. Return a negative number if the > * credit isn't high enough; if it is, favor a preemption on cpu in > * this order: > @@ -1180,10 +1199,12 @@ tickle_cpu(unsigned int cpu, struct > csched2_runqueue_data *rqd) > * > * Within the same class, the highest difference of credit. > */ > -static s_time_t tickle_score(struct csched2_runqueue_data *rqd, s_time_t now, > +static s_time_t tickle_score(const struct scheduler *ops, s_time_t now, > struct csched2_vcpu *new, unsigned int cpu) > { > + struct csched2_runqueue_data *rqd = c2rqd(ops, cpu); > struct csched2_vcpu * cur = csched2_vcpu(curr_on_cpu(cpu)); > + struct csched2_private *prv = csched2_priv(ops); > s_time_t score; > > /* > @@ -1191,7 +1212,8 @@ static s_time_t tickle_score(struct > csched2_runqueue_data *rqd, s_time_t now, > * in rqd->idle). However, some of them may be running their idle vcpu, > * if taking care of tasklets. In that case, we want to leave it alone. > */ > - if ( unlikely(is_idle_vcpu(cur->vcpu)) ) > + if ( unlikely(is_idle_vcpu(cur->vcpu) || > + !is_preemptable(cur, now, MICROSECS(prv->ratelimit_us))) ) > return -1; > > burn_credits(rqd, cur, now); > @@ -1348,7 +1370,7 @@ runq_tickle(const struct scheduler *ops, struct > csched2_vcpu *new, s_time_t now) > cpumask_and(&mask, &mask, cpumask_scratch_cpu(cpu)); > if ( __cpumask_test_and_clear_cpu(cpu, &mask) ) > { > - s_time_t score = tickle_score(rqd, now, new, cpu); > + s_time_t score = tickle_score(ops, now, new, cpu); > > if ( score > max ) > { > @@ -1371,7 +1393,7 @@ runq_tickle(const struct scheduler *ops, struct > csched2_vcpu *new, s_time_t now) > /* Already looked at this one above */ > ASSERT(i != cpu); > > - score = tickle_score(rqd, now, new, i); > + score = tickle_score(ops, now, new, i); > > if ( score > max ) > { > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx https://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.