commit 0f8ef8f23718cc24b0bc958979aa789be4ed89d5 Author: Dario Faggioli Date: Tue Mar 2 19:03:05 2021 +0000 Debug patch for suspect scheduler issues. Signed-off-by: Dario Faggioli diff --git a/tools/xentrace/xenalyze.c b/tools/xentrace/xenalyze.c index b7f4e2bea8..8ce705bd48 100644 --- a/tools/xentrace/xenalyze.c +++ b/tools/xentrace/xenalyze.c @@ -7440,6 +7440,17 @@ void sched_process(struct pcpu_info *p) /* TRC_SCHED_VERBOSE */ switch(ri->event) { + case TRC_SCHED_MAX_INTRV: + if(opt.dump_all) { + struct { + unsigned int domid, vcpuid; + unsigned int interv, time, last; + } *r = (typeof(r))ri->d; + + printf(" %s sched_max_interv %u usecs, at %u usecs with d%uv%u (last: %u usecs)\n", + ri->dump_header, r->interv, r->time, r->domid, r->vcpuid, r->last); + } + break; case TRC_SCHED_DOM_ADD: if(opt.dump_all) { struct { @@ -7904,6 +7915,18 @@ void sched_process(struct pcpu_info *p) ri->dump_header, r->domid, r->vcpuid); } break; + case TRC_SCHED_CLASS_EVT(CSCHED2, 24): + if(opt.dump_all) { + struct { + unsigned int domid, vcpuid; + unsigned int limits, now, exec; + } *r = (typeof(r))ri->d; + + printf(" %s csched2:limit_credit_loss[#%u] d%uv%u, at %u, exec'd %u usecs!\n", + ri->dump_header, r->limits, r->domid, r->vcpuid, r->now, r->exec); + } + break; + /* RTDS (TRC_RTDS_xxx) */ case TRC_SCHED_CLASS_EVT(RTDS, 1): /* TICKLE */ if(opt.dump_all) { diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c index ce7c56147b..29aa99db86 100644 --- a/xen/common/sched_credit2.c +++ b/xen/common/sched_credit2.c @@ -57,6 +57,7 @@ #define TRC_CSCHED2_SCHEDULE TRC_SCHED_CLASS_EVT(CSCHED2, 21) #define TRC_CSCHED2_RATELIMIT TRC_SCHED_CLASS_EVT(CSCHED2, 22) #define TRC_CSCHED2_RUNQ_CAND_CHECK TRC_SCHED_CLASS_EVT(CSCHED2, 23) +#define TRC_CSCHED2_LIMIT_CREDITS TRC_SCHED_CLASS_EVT(CSCHED2, 24) /* * TODO: @@ -775,6 +776,11 @@ static int get_fallback_cpu(struct csched2_unit *svc) return cpumask_any(cpumask_scratch_cpu(sched_unit_master(unit))); } +static DEFINE_PER_CPU(unsigned int, limit_credits); +static DEFINE_PER_CPU(s_time_t, limit_credits_time); +static DEFINE_PER_CPU(s_time_t, limit_credits_exec); +static DEFINE_PER_CPU(struct sched_unit *, limit_credits_unit); + /* * Time-to-credit, credit-to-time. * @@ -792,7 +798,17 @@ static void t2c_update(struct csched2_runqueue_data *rqd, s_time_t time, /* Getting to lower credit than CSCHED2_CREDIT_MIN makes no sense. */ val = svc->credit - val; if ( unlikely(val < CSCHED2_CREDIT_MIN) ) + { + this_cpu(limit_credits)++; + this_cpu(limit_credits_time) = NOW(); + this_cpu(limit_credits_exec) = time; + this_cpu(limit_credits_unit) = svc->unit; + TRACE_5D(TRC_CSCHED2_LIMIT_CREDITS, svc->unit->domain->domain_id, + svc->unit->unit_id, this_cpu(limit_credits), + (uint32_t)(this_cpu(limit_credits_time)/MICROSECS(1)), + (uint32_t)(this_cpu(limit_credits_exec)/MICROSECS(1))); svc->credit = CSCHED2_CREDIT_MIN; + } else svc->credit = val; } @@ -3661,6 +3677,12 @@ dump_pcpu(const struct scheduler *ops, int cpu) cpu, c2r(cpu), CPUMASK_PR(per_cpu(cpu_sibling_mask, cpu)), CPUMASK_PR(per_cpu(cpu_core_mask, cpu))); + if ( per_cpu(limit_credits_unit, cpu) != NULL ) { + printk("\tCredit limited: #%u, last at %"PRI_stime" as d%uv%u exec'd %"PRI_stime"\n", + per_cpu(limit_credits, cpu), per_cpu(limit_credits_time, cpu), + per_cpu(limit_credits_unit, cpu)->domain->domain_id, + per_cpu(limit_credits_unit, cpu)->unit_id, per_cpu(limit_credits_exec, cpu)); + } /* current UNIT (nothing to say if that's the idle unit) */ svc = csched2_unit(curr_on_cpu(cpu)); diff --git a/xen/common/schedule.c b/xen/common/schedule.c index 6b1ae7bf8c..4a950f3b57 100644 --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -2385,6 +2385,12 @@ static void sched_slave(void) is_idle_unit(next) && !is_idle_unit(prev), now); } +static DEFINE_PER_CPU(s_time_t, last_sched_time); +static DEFINE_PER_CPU(s_time_t, last_sched_interval); +static DEFINE_PER_CPU(s_time_t, max_sched_interval); +static DEFINE_PER_CPU(s_time_t, max_sched_time); +static DEFINE_PER_CPU(struct vcpu *, max_sched_interv_vprev); + /* * The main function * - deschedule the current domain (scheduler independent). @@ -2399,6 +2405,7 @@ static void schedule(void) spinlock_t *lock; int cpu = smp_processor_id(); unsigned int gran; + s_time_t sched_interval; ASSERT_NOT_IN_ATOMIC(); @@ -2408,6 +2415,21 @@ static void schedule(void) lock = pcpu_schedule_lock_irq(cpu); + now = NOW(); + + sched_interval = this_cpu(last_sched_interval) = now - this_cpu(last_sched_time); + if ( sched_interval > this_cpu(max_sched_interval) ) + { + this_cpu(max_sched_interval) = sched_interval; + this_cpu(max_sched_interv_vprev) = vprev; + this_cpu(max_sched_time) = now; + TRACE_5D(TRC_SCHED_MAX_INTRV, vprev->domain->domain_id, vprev->vcpu_id, + (uint32_t)(this_cpu(max_sched_interval) / MICROSECS(1)), + (uint32_t)(this_cpu(max_sched_time) / MICROSECS(1)), + (uint32_t)(this_cpu(last_sched_interval) / MICROSECS(1))); + } + this_cpu(last_sched_time) = now; + sr = get_sched_res(cpu); gran = sr->granularity; @@ -2427,8 +2449,6 @@ static void schedule(void) stop_timer(&sr->s_timer); - now = NOW(); - if ( gran > 1 ) { cpumask_t mask; @@ -3085,7 +3105,7 @@ void scheduler_free(struct scheduler *sched) void schedule_dump(struct cpupool *c) { - unsigned int i; + unsigned int i,j; struct scheduler *sched; cpumask_t *cpus; @@ -3106,11 +3126,30 @@ void schedule_dump(struct cpupool *c) cpus = &cpupool_free_cpus; } - if ( sched->dump_cpu_state != NULL ) - { - printk("CPUs info:\n"); - for_each_cpu (i, cpus) - sched_dump_cpu_state(sched, i); + printk("CPUs info:\n"); + for_each_cpu (i, cpus) { + struct sched_resource *sr = get_sched_res(i); + unsigned long flags; + spinlock_t *lock; + + lock = pcpu_schedule_lock_irqsave(i, &flags); + + printk("CPU[%02d] current=%pv, curr=%pv, prev=%pv\n", i, + get_cpu_current(i), sr->curr ? sr->curr->vcpu_list : NULL, + sr->prev ? sr->prev->vcpu_list : NULL); + printk("\tlast schedule: %"PRI_stime", last_interval=%"PRI_stime", " + "max_interval=%"PRI_stime" at %"PRI_stime" (after running %pv)\n", + per_cpu(last_sched_time, i), per_cpu(last_sched_interval, i), + per_cpu(max_sched_interval, i), per_cpu(max_sched_time, i), + per_cpu(max_sched_interv_vprev, i)); + per_cpu(max_sched_interval, i) = 0; + for_each_cpu (j, sr->cpus) + if ( i != j ) + printk("CPU[%02d] current=%pv\n", j, get_cpu_current(j)); + + pcpu_schedule_unlock_irqrestore(lock, flags, i); + + sched_dump_cpu_state(sched, i); } rcu_read_unlock(&sched_res_rculock); diff --git a/xen/include/public/trace.h b/xen/include/public/trace.h index d5fa4aea8d..5b3faf0fd5 100644 --- a/xen/include/public/trace.h +++ b/xen/include/public/trace.h @@ -117,6 +117,7 @@ #define TRC_SCHED_SWITCH_INFNEXT (TRC_SCHED_VERBOSE + 15) #define TRC_SCHED_SHUTDOWN_CODE (TRC_SCHED_VERBOSE + 16) #define TRC_SCHED_SWITCH_INFCONT (TRC_SCHED_VERBOSE + 17) +#define TRC_SCHED_MAX_INTRV (TRC_SCHED_VERBOSE + 18) #define TRC_DOM0_DOM_ADD (TRC_DOM0_DOMOPS + 1) #define TRC_DOM0_DOM_REM (TRC_DOM0_DOMOPS + 2)