|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v2 03/10] xen: credit2: make tickling more deterministic
On 30/09/16 03:53, Dario Faggioli wrote:
> Right now, the following scenario can occurr:
> - upon vcpu v wakeup, v itself is put in the runqueue,
> and pcpu X is tickled;
> - pcpu Y schedules (for whatever reason), sees v in
> the runqueue and picks it up.
>
> This may seem ok (or even a good thing), but it's not.
> In fact, if runq_tickle() decided X is where v should
> run, it did it for a reason (load distribution, SMT
> support, cache hotness, affinity, etc), and we really
> should try as hard as possible to stick to that.
>
> Of course, we can't be too strict, or we risk leaving
> vcpus in the runqueue while there is available CPU
> capacity. So, we only leave v in runqueue --for X to
> pick it up-- if we see that X has been tickled and
> has not scheduled yet, i.e., it will have a real chance
> of actually select and schedule v.
>
> If that is not the case, we schedule it on Y (or, at
> least, we consider that), as running somewhere non-ideal
> is better than not running at all.
>
> The commit also adds performance counters for each of
> the possible situations.
>
> Signed-off-by: Dario Faggioli <dario.faggioli@xxxxxxxxxx>
Reviewed-by: George Dunlap <george.dunlap@xxxxxxxxxx>
> ---
> Cc: George Dunlap <george.dunlap@xxxxxxxxxx>
> Cc: Anshul Makkar <anshul.makkar@xxxxxxxxxx>
> Cc: Jan Beulich <JBeulich@xxxxxxxx>
> Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
> ---
> Changes from v1:
> * always initialize tickled_cpu to -1, also for idle vcpus (in which cases,
> it
> just won't ever change to anything else than that), for improved
> readability
> and understandability;
> * logic for reporting back to csched_schedule() whether any vcpu was skipped,
> within runq_candidate(), and to only reset the credits if that did not
> happen moved out from here, to another patch.
> ---
> xen/common/sched_credit2.c | 37 ++++++++++++++++++++++++++++++++++++-
> xen/include/xen/perfc_defn.h | 3 +++
> 2 files changed, 39 insertions(+), 1 deletion(-)
>
> diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
> index 5c7d0dc..3986441 100644
> --- a/xen/common/sched_credit2.c
> +++ b/xen/common/sched_credit2.c
> @@ -54,6 +54,7 @@
> #define TRC_CSCHED2_LOAD_CHECK TRC_SCHED_CLASS_EVT(CSCHED2, 16)
> #define TRC_CSCHED2_LOAD_BALANCE TRC_SCHED_CLASS_EVT(CSCHED2, 17)
> #define TRC_CSCHED2_PICKED_CPU TRC_SCHED_CLASS_EVT(CSCHED2, 19)
> +#define TRC_CSCHED2_RUNQ_CANDIDATE TRC_SCHED_CLASS_EVT(CSCHED2, 20)
>
> /*
> * WARNING: This is still in an experimental phase. Status and work can be
> found at the
> @@ -398,6 +399,7 @@ struct csched2_vcpu {
> int credit;
> s_time_t start_time; /* When we were scheduled (used for credit) */
> unsigned flags; /* 16 bits doesn't seem to play well with
> clear_bit() */
> + int tickled_cpu; /* cpu tickled for picking us up (-1 if none) */
>
> /* Individual contribution to load */
> s_time_t load_last_update; /* Last time average was updated */
> @@ -1049,6 +1051,10 @@ runq_tickle(const struct scheduler *ops, struct
> csched2_vcpu *new, s_time_t now)
> __cpumask_set_cpu(ipid, &rqd->tickled);
> smt_idle_mask_clear(ipid, &rqd->smt_idle);
> cpu_raise_softirq(ipid, SCHEDULE_SOFTIRQ);
> +
> + if ( unlikely(new->tickled_cpu != -1) )
> + SCHED_STAT_CRANK(tickled_cpu_overwritten);
> + new->tickled_cpu = ipid;
> }
>
> /*
> @@ -1276,6 +1282,7 @@ csched2_alloc_vdata(const struct scheduler *ops, struct
> vcpu *vc, void *dd)
> svc->credit = CSCHED2_IDLE_CREDIT;
> svc->weight = 0;
> }
> + svc->tickled_cpu = -1;
>
> SCHED_STAT_CRANK(vcpu_alloc);
>
> @@ -2268,6 +2275,17 @@ runq_candidate(struct csched2_runqueue_data *rqd,
> if ( !cpumask_test_cpu(cpu, svc->vcpu->cpu_hard_affinity) )
> continue;
>
> + /*
> + * If a vcpu is meant to be picked up by another processor, and such
> + * processor has not scheduled yet, leave it in the runqueue for him.
> + */
> + if ( svc->tickled_cpu != -1 && svc->tickled_cpu != cpu &&
> + cpumask_test_cpu(svc->tickled_cpu, &rqd->tickled) )
> + {
> + SCHED_STAT_CRANK(deferred_to_tickled_cpu);
> + continue;
> + }
> +
> /* If this is on a different processor, don't pull it unless
> * its credit is at least CSCHED2_MIGRATE_RESIST higher. */
> if ( svc->vcpu->processor != cpu
> @@ -2284,9 +2302,25 @@ runq_candidate(struct csched2_runqueue_data *rqd,
>
> /* In any case, if we got this far, break. */
> break;
> + }
>
> + if ( unlikely(tb_init_done) )
> + {
> + struct {
> + unsigned vcpu:16, dom:16;
> + unsigned tickled_cpu;
> + } d;
> + d.dom = snext->vcpu->domain->domain_id;
> + d.vcpu = snext->vcpu->vcpu_id;
> + d.tickled_cpu = snext->tickled_cpu;
> + __trace_var(TRC_CSCHED2_RUNQ_CANDIDATE, 1,
> + sizeof(d),
> + (unsigned char *)&d);
> }
>
> + if ( unlikely(snext->tickled_cpu != -1 && snext->tickled_cpu != cpu) )
> + SCHED_STAT_CRANK(tickled_cpu_overridden);
> +
> return snext;
> }
>
> @@ -2351,7 +2385,7 @@ csched2_schedule(
> snext = CSCHED2_VCPU(idle_vcpu[cpu]);
> }
> else
> - snext=runq_candidate(rqd, scurr, cpu, now);
> + snext = runq_candidate(rqd, scurr, cpu, now);
>
> /* If switching from a non-idle runnable vcpu, put it
> * back on the runqueue. */
> @@ -2390,6 +2424,7 @@ csched2_schedule(
> }
>
> snext->start_time = now;
> + snext->tickled_cpu = -1;
>
> /* Safe because lock for old processor is held */
> if ( snext->vcpu->processor != cpu )
> diff --git a/xen/include/xen/perfc_defn.h b/xen/include/xen/perfc_defn.h
> index a336c71..4a835b8 100644
> --- a/xen/include/xen/perfc_defn.h
> +++ b/xen/include/xen/perfc_defn.h
> @@ -66,6 +66,9 @@ PERFCOUNTER(runtime_max_timer, "csched2:
> runtime_max_timer")
> PERFCOUNTER(migrated, "csched2: migrated")
> PERFCOUNTER(migrate_resisted, "csched2: migrate_resisted")
> PERFCOUNTER(credit_reset, "csched2: credit_reset")
> +PERFCOUNTER(deferred_to_tickled_cpu,"csched2: deferred_to_tickled_cpu")
> +PERFCOUNTER(tickled_cpu_overwritten,"csched2: tickled_cpu_overwritten")
> +PERFCOUNTER(tickled_cpu_overridden, "csched2: tickled_cpu_overridden")
>
> PERFCOUNTER(need_flush_tlb_flush, "PG_need_flush tlb flushes")
>
>
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |