Xen project Mailing List

[Xen-devel] [PATCH] xen: sched/Credit2: fix bug when moving CPUs between two Credit2 cpupools

From: Dario Faggioli <dfaggioli@xxxxxxxx>

Date: Thu, 13 Sep 2018 13:53:49 +0200

Cc: Juergen Gross <jgross@xxxxxxxx>, Steven Haigh <netwiz@xxxxxxxxx>, George Dunlap <george.dunlap@xxxxxxxxxx>, Jan Beulich <JBeulich@xxxxxxxx>

Delivery-date: Thu, 13 Sep 2018 11:54:12 +0000

List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

Whether or not a CPU is assigned to a runqueue (and, if yes, to which one) within a Credit2 scheduler instance must be both a per-cpu and per-scheduler instance one. In fact, when we move a CPU between cpupools, we first setup its per-cpu data in the new pool, and then cleanup its per-cpu data from the old pool. In Credit2, when there currently is no per-scheduler, per-cpu data (as the cpu-to-runqueue map is stored on a per-cpu basis only), this means that the cleanup of the old per-cpu data can mess with the new per-cpu data, leading to crashes like this: https://www.mail-archive.com/xen-devel@xxxxxxxxxxxxxxxxxxxx/msg23306.html https://www.mail-archive.com/xen-devel@xxxxxxxxxxxxxxxxxxxx/msg23350.html Basically, when csched2_deinit_pdata() is called for CPU 13, for fully removing the CPU from Pool-0, per_cpu(13,runq_map) already contain the id of the runqueue to which the CPU has been assigned in the scheduler of Pool-1, which means wrong runqueue manipulations happen in Pool-0's scheduler. Furthermore, at the end of such call, that same runq_map is updated with -1, which is what causes the BUG_ON in csched2_schedule(), on CPU 13, to trigger. So, instead of reverting a2c4e5ab59d "xen: credit2: make the cpu to runqueue map per-cpu" (as we don't want to go back to having the huge array in struct csched2_private) add a per-cpu scheduler specific data structure, like, for instance, Credit1 has already. That (for now) only contains one field: the id of the runqueue the CPU is assigned to. Signed-off-by: Dario Faggioli <dfaggioli@xxxxxxxx> --- Cc: George Dunlap <george.dunlap@xxxxxxxxxx> Cc: Steven Haigh <netwiz@xxxxxxxxx> Cc: Juergen Gross <jgross@xxxxxxxx> Cc: Jan Beulich <JBeulich@xxxxxxxx> --- This is a bugfix, so it wants to be considered for backporting to stable releases that have commit a2c4e5ab59d in them. In case it turns out to be not trivial, I can help with that. --- xen/common/sched_credit2.c | 107 ++++++++++++++++++++++++++++++-------------- 1 file changed, 73 insertions(+), 34 deletions(-) diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c index 743848121f..2b16bcea21 100644 --- a/xen/common/sched_credit2.c +++ b/xen/common/sched_credit2.c @@ -508,11 +508,10 @@ struct csched2_private { /* * Physical CPU - * - * The only per-pCPU information we need to maintain is of which runqueue - * each CPU is part of. */ -static DEFINE_PER_CPU(int, runq_map); +struct csched2_pcpu { + int runq_id; +}; /* * Virtual CPU @@ -571,6 +570,11 @@ static inline struct csched2_private *csched2_priv(const struct scheduler *ops) return ops->sched_data; } +static inline struct csched2_pcpu *csched2_pcpu(unsigned int cpu) +{ + return per_cpu(schedule_data, cpu).sched_priv; +} + static inline struct csched2_vcpu *csched2_vcpu(const struct vcpu *v) { return v->sched_priv; @@ -584,7 +588,7 @@ static inline struct csched2_dom *csched2_dom(const struct domain *d) /* CPU to runq_id macro */ static inline int c2r(unsigned int cpu) { - return per_cpu(runq_map, cpu); + return csched2_pcpu(cpu)->runq_id; } /* CPU to runqueue struct macro */ @@ -3778,31 +3782,45 @@ csched2_dump(const struct scheduler *ops) #undef cpustr } +static void * +csched2_alloc_pdata(const struct scheduler *ops, int cpu) +{ + struct csched2_pcpu *spc; + + spc = xzalloc(struct csched2_pcpu); + if ( spc == NULL ) + return ERR_PTR(-ENOMEM); + + /* Not in any runqueue yet */ + spc->runq_id = -1; + + return spc; +} + /* Returns the ID of the runqueue the cpu is assigned to. */ static unsigned -init_pdata(struct csched2_private *prv, unsigned int cpu) +init_pdata(struct csched2_private *prv, struct csched2_pcpu *spc, + unsigned int cpu) { - unsigned rqi; struct csched2_runqueue_data *rqd; ASSERT(rw_is_write_locked(&prv->lock)); ASSERT(!cpumask_test_cpu(cpu, &prv->initialized)); + /* CPU data needs to be allocated, but still uninitialized. */ + ASSERT(spc && spc->runq_id == -1); /* Figure out which runqueue to put it in */ - rqi = cpu_to_runqueue(prv, cpu); + spc->runq_id = cpu_to_runqueue(prv, cpu); - rqd = prv->rqd + rqi; + rqd = prv->rqd + spc->runq_id; - printk(XENLOG_INFO "Adding cpu %d to runqueue %d\n", cpu, rqi); - if ( ! cpumask_test_cpu(rqi, &prv->active_queues) ) + printk(XENLOG_INFO "Adding cpu %d to runqueue %d\n", cpu, spc->runq_id); + if ( ! cpumask_test_cpu(spc->runq_id, &prv->active_queues) ) { printk(XENLOG_INFO " First cpu on runqueue, activating\n"); - activate_runqueue(prv, rqi); + activate_runqueue(prv, spc->runq_id); } - /* Set the runqueue map */ - per_cpu(runq_map, cpu) = rqi; - __cpumask_set_cpu(cpu, &rqd->idle); __cpumask_set_cpu(cpu, &rqd->active); __cpumask_set_cpu(cpu, &prv->initialized); @@ -3811,7 +3829,7 @@ init_pdata(struct csched2_private *prv, unsigned int cpu) if ( cpumask_weight(&rqd->active) == 1 ) rqd->pick_bias = cpu; - return rqi; + return spc->runq_id; } static void @@ -3822,16 +3840,10 @@ csched2_init_pdata(const struct scheduler *ops, void *pdata, int cpu) unsigned long flags; unsigned rqi; - /* - * pdata contains what alloc_pdata returned. But since we don't (need to) - * implement alloc_pdata, either that's NULL, or something is very wrong! - */ - ASSERT(!pdata); - write_lock_irqsave(&prv->lock, flags); old_lock = pcpu_schedule_lock(cpu); - rqi = init_pdata(prv, cpu); + rqi = init_pdata(prv, pdata, cpu); /* Move the scheduler lock to the new runq lock. */ per_cpu(schedule_data, cpu).schedule_lock = &prv->rqd[rqi].lock; @@ -3849,7 +3861,7 @@ csched2_switch_sched(struct scheduler *new_ops, unsigned int cpu, struct csched2_vcpu *svc = vdata; unsigned rqi; - ASSERT(!pdata && svc && is_idle_vcpu(svc->vcpu)); + ASSERT(pdata && svc && is_idle_vcpu(svc->vcpu)); /* * We own one runqueue lock already (from schedule_cpu_switch()). This @@ -3864,7 +3876,7 @@ csched2_switch_sched(struct scheduler *new_ops, unsigned int cpu, idle_vcpu[cpu]->sched_priv = vdata; - rqi = init_pdata(prv, cpu); + rqi = init_pdata(prv, pdata, cpu); /* * Now that we know what runqueue we'll go in, double check what's said @@ -3875,7 +3887,7 @@ csched2_switch_sched(struct scheduler *new_ops, unsigned int cpu, ASSERT(per_cpu(schedule_data, cpu).schedule_lock != &prv->rqd[rqi].lock); per_cpu(scheduler, cpu) = new_ops; - per_cpu(schedule_data, cpu).sched_priv = NULL; /* no pdata */ + per_cpu(schedule_data, cpu).sched_priv = pdata; /* * (Re?)route the lock to the per pCPU lock as /last/ thing. In fact, @@ -3894,7 +3906,7 @@ csched2_deinit_pdata(const struct scheduler *ops, void *pcpu, int cpu) unsigned long flags; struct csched2_private *prv = csched2_priv(ops); struct csched2_runqueue_data *rqd; - int rqi; + struct csched2_pcpu *spc = pcpu; write_lock_irqsave(&prv->lock, flags); @@ -3902,17 +3914,24 @@ csched2_deinit_pdata(const struct scheduler *ops, void *pcpu, int cpu) * alloc_pdata is not implemented, so pcpu must be NULL. On the other * hand, init_pdata must have been called for this pCPU. */ - ASSERT(!pcpu && cpumask_test_cpu(cpu, &prv->initialized)); + /* + * Scheduler specific data for this pCPU must still be there and and be + * valid. In fact, if we are here: + * 1. alloc_pdata must have been called for this cpu, and free_pdata + * must not have been called on it before us, + * 2. init_pdata must have been called on this cpu, and deinit_pdata + * (us!) must not have been called on it already. + */ + ASSERT(spc && spc->runq_id != -1); + ASSERT(cpumask_test_cpu(cpu, &prv->initialized)); /* Find the old runqueue and remove this cpu from it */ - rqi = per_cpu(runq_map, cpu); - - rqd = prv->rqd + rqi; + rqd = prv->rqd + spc->runq_id; /* No need to save IRQs here, they're already disabled */ spin_lock(&rqd->lock); - printk(XENLOG_INFO "Removing cpu %d from runqueue %d\n", cpu, rqi); + printk(XENLOG_INFO "Removing cpu %d from runqueue %d\n", cpu, spc->runq_id); __cpumask_clear_cpu(cpu, &rqd->idle); __cpumask_clear_cpu(cpu, &rqd->smt_idle); @@ -3921,12 +3940,12 @@ csched2_deinit_pdata(const struct scheduler *ops, void *pcpu, int cpu) if ( cpumask_empty(&rqd->active) ) { printk(XENLOG_INFO " No cpus left on runqueue, disabling\n"); - deactivate_runqueue(prv, rqi); + deactivate_runqueue(prv, spc->runq_id); } else if ( rqd->pick_bias == cpu ) rqd->pick_bias = cpumask_first(&rqd->active); - per_cpu(runq_map, cpu) = -1; + spc->runq_id = -1; spin_unlock(&rqd->lock); @@ -3937,6 +3956,24 @@ csched2_deinit_pdata(const struct scheduler *ops, void *pcpu, int cpu) return; } +static void +csched2_free_pdata(const struct scheduler *ops, void *pcpu, int cpu) +{ + struct csched2_pcpu *spc = pcpu; + + /* + * pcpu either points to a valid struct csched2_pcpu, or is NULL (if + * CPU bringup failed, and we're beeing called from CPU_UP_CANCELLED). + * xfree() does not really mind, but we want to be sure that either + * init_pdata has never been called, or deinit_pdata has been called + * already. + */ + ASSERT(!pcpu || spc->runq_id == -1); + ASSERT(!cpumask_test_cpu(cpu, &csched2_priv(ops)->initialized)); + + xfree(pcpu); +} + static int __init csched2_global_init(void) { @@ -4061,8 +4098,10 @@ static const struct scheduler sched_credit2_def = { .deinit = csched2_deinit, .alloc_vdata = csched2_alloc_vdata, .free_vdata = csched2_free_vdata, + .alloc_pdata = csched2_alloc_pdata, .init_pdata = csched2_init_pdata, .deinit_pdata = csched2_deinit_pdata, + .free_pdata = csched2_free_pdata, .switch_sched = csched2_switch_sched, .alloc_domdata = csched2_alloc_domdata, .free_domdata = csched2_free_domdata, _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.