[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 56/60] xen/sched: protect scheduling resource via rcu
In order to be able to move cpus to cpupools with core scheduling active it is mandatory to merge multiple cpus into one scheduling resource or to split a scheduling resource with multiple cpus in it into multiple scheduling resources. This in turn requires to modify the cpu <-> scheduling resource relation. In order to be able to free unused resources protect struct sched_resource via RCU. This ensures there are no users left when freeing such a resource. Signed-off-by: Juergen Gross <jgross@xxxxxxxx> --- V1: new patch --- xen/common/cpupool.c | 4 + xen/common/schedule.c | 200 ++++++++++++++++++++++++++++++++++++++++----- xen/include/xen/sched-if.h | 7 +- 3 files changed, 190 insertions(+), 21 deletions(-) diff --git a/xen/common/cpupool.c b/xen/common/cpupool.c index 35108b9119..1c7722552c 100644 --- a/xen/common/cpupool.c +++ b/xen/common/cpupool.c @@ -510,8 +510,10 @@ static int cpupool_cpu_add(unsigned int cpu) * (or unplugging would have failed) and that is the default behavior * anyway. */ + rcu_read_lock(&sched_res_rculock); get_sched_res(cpu)->cpupool = NULL; ret = cpupool_assign_cpu_locked(cpupool0, cpu); + rcu_read_unlock(&sched_res_rculock); spin_unlock(&cpupool_lock); @@ -596,7 +598,9 @@ static void cpupool_cpu_remove_forced(unsigned int cpu) } } + rcu_read_lock(&sched_res_rculock); sched_rm_cpu(cpu); + rcu_read_unlock(&sched_res_rculock); } /* diff --git a/xen/common/schedule.c b/xen/common/schedule.c index 0f667068a8..78eb055d07 100644 --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -73,6 +73,7 @@ static void poll_timer_fn(void *data); /* This is global for now so that private implementations can reach it */ DEFINE_PER_CPU(struct sched_resource *, sched_res); static DEFINE_PER_CPU(unsigned int, sched_res_idx); +DEFINE_RCU_READ_LOCK(sched_res_rculock); /* Scratch space for cpumasks. */ DEFINE_PER_CPU(cpumask_t, cpumask_scratch); @@ -270,17 +271,25 @@ static inline void vcpu_runstate_change( void sched_guest_idle(void (*idle) (void), unsigned int cpu) { + rcu_read_lock(&sched_res_rculock); atomic_inc(&get_sched_res(cpu)->urgent_count); + rcu_read_unlock(&sched_res_rculock); + idle(); + + rcu_read_lock(&sched_res_rculock); atomic_dec(&get_sched_res(cpu)->urgent_count); + rcu_read_unlock(&sched_res_rculock); } void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate) { - spinlock_t *lock = likely(v == current) - ? NULL : unit_schedule_lock_irq(v->sched_unit); + spinlock_t *lock; s_time_t delta; + rcu_read_lock(&sched_res_rculock); + + lock = likely(v == current) ? NULL : unit_schedule_lock_irq(v->sched_unit); memcpy(runstate, &v->runstate, sizeof(*runstate)); delta = NOW() - runstate->state_entry_time; if ( delta > 0 ) @@ -288,6 +297,8 @@ void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate) if ( unlikely(lock != NULL) ) unit_schedule_unlock_irq(lock, v->sched_unit); + + rcu_read_unlock(&sched_res_rculock); } uint64_t get_cpu_idle_time(unsigned int cpu) @@ -493,6 +504,8 @@ int sched_init_vcpu(struct vcpu *v) return 0; } + rcu_read_lock(&sched_res_rculock); + /* The first vcpu of an unit can be set via sched_set_res(). */ sched_set_res(unit, get_sched_res(processor)); @@ -500,6 +513,7 @@ int sched_init_vcpu(struct vcpu *v) if ( unit->priv == NULL ) { sched_free_unit(unit, v); + rcu_read_unlock(&sched_res_rculock); return 1; } @@ -526,6 +540,8 @@ int sched_init_vcpu(struct vcpu *v) sched_insert_unit(dom_scheduler(d), unit); } + rcu_read_unlock(&sched_res_rculock); + return 0; } @@ -553,6 +569,7 @@ int sched_move_domain(struct domain *d, struct cpupool *c) void *unitdata; struct scheduler *old_ops; void *old_domdata; + int ret = 0; for_each_sched_unit ( d, unit ) { @@ -560,15 +577,21 @@ int sched_move_domain(struct domain *d, struct cpupool *c) return -EBUSY; } + rcu_read_lock(&sched_res_rculock); + domdata = sched_alloc_domdata(c->sched, d); if ( IS_ERR(domdata) ) - return PTR_ERR(domdata); + { + ret = PTR_ERR(domdata); + goto out; + } unit_priv = xzalloc_array(void *, d->max_vcpus); if ( unit_priv == NULL ) { sched_free_domdata(c->sched, domdata); - return -ENOMEM; + ret = -ENOMEM; + goto out; } for_each_sched_unit ( d, unit ) @@ -580,7 +603,8 @@ int sched_move_domain(struct domain *d, struct cpupool *c) xfree(unit_priv[unit->unit_id]); xfree(unit_priv); sched_free_domdata(c->sched, domdata); - return -ENOMEM; + ret = -ENOMEM; + goto out; } } @@ -642,7 +666,10 @@ int sched_move_domain(struct domain *d, struct cpupool *c) xfree(unit_priv); - return 0; +out: + rcu_read_unlock(&sched_res_rculock); + + return ret; } void sched_destroy_vcpu(struct vcpu *v) @@ -660,9 +687,13 @@ void sched_destroy_vcpu(struct vcpu *v) */ if ( unit->vcpu == v ) { + rcu_read_lock(&sched_res_rculock); + sched_remove_unit(vcpu_scheduler(v), unit); sched_free_vdata(vcpu_scheduler(v), unit->priv); sched_free_unit(unit, v); + + rcu_read_unlock(&sched_res_rculock); } } @@ -680,7 +711,12 @@ int sched_init_domain(struct domain *d, int poolid) SCHED_STAT_CRANK(dom_init); TRACE_1D(TRC_SCHED_DOM_ADD, d->domain_id); + rcu_read_lock(&sched_res_rculock); + sdom = sched_alloc_domdata(dom_scheduler(d), d); + + rcu_read_unlock(&sched_res_rculock); + if ( IS_ERR(sdom) ) return PTR_ERR(sdom); @@ -698,9 +734,13 @@ void sched_destroy_domain(struct domain *d) SCHED_STAT_CRANK(dom_destroy); TRACE_1D(TRC_SCHED_DOM_REM, d->domain_id); + rcu_read_lock(&sched_res_rculock); + sched_free_domdata(dom_scheduler(d), d->sched_priv); d->sched_priv = NULL; + rcu_read_unlock(&sched_res_rculock); + cpupool_rm_domain(d); } } @@ -734,11 +774,15 @@ void vcpu_sleep_nosync(struct vcpu *v) TRACE_2D(TRC_SCHED_SLEEP, v->domain->domain_id, v->vcpu_id); + rcu_read_lock(&sched_res_rculock); + lock = unit_schedule_lock_irqsave(v->sched_unit, &flags); vcpu_sleep_nosync_locked(v); unit_schedule_unlock_irqrestore(lock, flags, v->sched_unit); + + rcu_read_unlock(&sched_res_rculock); } void vcpu_sleep_sync(struct vcpu *v) @@ -759,6 +803,8 @@ void vcpu_wake(struct vcpu *v) TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id); + rcu_read_lock(&sched_res_rculock); + lock = unit_schedule_lock_irqsave(unit, &flags); if ( likely(vcpu_runnable(v)) ) @@ -779,6 +825,8 @@ void vcpu_wake(struct vcpu *v) } unit_schedule_unlock_irqrestore(lock, flags, unit); + + rcu_read_unlock(&sched_res_rculock); } void vcpu_unblock(struct vcpu *v) @@ -812,6 +860,8 @@ static void sched_unit_move_locked(struct sched_unit *unit, unsigned int old_cpu = unit->res->processor; struct vcpu *v; + rcu_read_lock(&sched_res_rculock); + /* * Transfer urgency status to new CPU before switching CPUs, as * once the switch occurs, v->is_urgent is no longer protected by @@ -831,6 +881,8 @@ static void sched_unit_move_locked(struct sched_unit *unit, * pointer can't change while the current lock is held. */ sched_migrate(vcpu_scheduler(unit->vcpu), unit, new_cpu); + + rcu_read_unlock(&sched_res_rculock); } /* @@ -992,6 +1044,8 @@ void restore_vcpu_affinity(struct domain *d) ASSERT(system_state == SYS_STATE_resume); + rcu_read_lock(&sched_res_rculock); + for_each_sched_unit ( d, unit ) { spinlock_t *lock; @@ -1042,6 +1096,8 @@ void restore_vcpu_affinity(struct domain *d) sched_move_irqs(unit); } + rcu_read_unlock(&sched_res_rculock); + domain_update_node_affinity(d); } @@ -1057,9 +1113,11 @@ int cpu_disable_scheduler(unsigned int cpu) cpumask_t online_affinity; int ret = 0; + rcu_read_lock(&sched_res_rculock); + c = get_sched_res(cpu)->cpupool; if ( c == NULL ) - return ret; + goto out; for_each_domain_in_cpupool ( d, c ) { @@ -1116,6 +1174,9 @@ int cpu_disable_scheduler(unsigned int cpu) } } +out: + rcu_read_unlock(&sched_res_rculock); + return ret; } @@ -1149,7 +1210,9 @@ void sched_set_affinity( { struct sched_unit *unit = v->sched_unit; + rcu_read_lock(&sched_res_rculock); sched_adjust_affinity(dom_scheduler(v->domain), unit, hard, soft); + rcu_read_unlock(&sched_res_rculock); if ( hard ) cpumask_copy(unit->cpu_hard_affinity, hard); @@ -1169,6 +1232,8 @@ static int vcpu_set_affinity( spinlock_t *lock; int ret = 0; + rcu_read_lock(&sched_res_rculock); + lock = unit_schedule_lock_irq(unit); if ( unit->affinity_broken ) @@ -1197,6 +1262,8 @@ static int vcpu_set_affinity( sched_unit_migrate_finish(unit); + rcu_read_unlock(&sched_res_rculock); + return ret; } @@ -1323,11 +1390,16 @@ static long do_poll(struct sched_poll *sched_poll) long vcpu_yield(void) { struct vcpu * v=current; - spinlock_t *lock = unit_schedule_lock_irq(v->sched_unit); + spinlock_t *lock; + + rcu_read_lock(&sched_res_rculock); + lock = unit_schedule_lock_irq(v->sched_unit); sched_yield(vcpu_scheduler(v), v->sched_unit); unit_schedule_unlock_irq(lock, v->sched_unit); + rcu_read_unlock(&sched_res_rculock); + SCHED_STAT_CRANK(vcpu_yield); TRACE_2D(TRC_SCHED_YIELD, current->domain->domain_id, current->vcpu_id); @@ -1413,6 +1485,8 @@ int vcpu_pin_override(struct vcpu *v, int cpu) spinlock_t *lock; int ret = -EINVAL; + rcu_read_lock(&sched_res_rculock); + lock = unit_schedule_lock_irq(unit); if ( cpu < 0 ) @@ -1447,6 +1521,8 @@ int vcpu_pin_override(struct vcpu *v, int cpu) sched_unit_migrate_finish(unit); + rcu_read_unlock(&sched_res_rculock); + return ret; } @@ -1652,9 +1728,13 @@ long sched_adjust(struct domain *d, struct xen_domctl_scheduler_op *op) /* NB: the pluggable scheduler code needs to take care * of locking by itself. */ + rcu_read_lock(&sched_res_rculock); + if ( (ret = sched_adjust_dom(dom_scheduler(d), d, op)) == 0 ) TRACE_1D(TRC_SCHED_ADJDOM, d->domain_id); + rcu_read_unlock(&sched_res_rculock); + return ret; } @@ -1675,9 +1755,13 @@ long sched_adjust_global(struct xen_sysctl_scheduler_op *op) if ( pool == NULL ) return -ESRCH; + rcu_read_lock(&sched_res_rculock); + rc = ((op->sched_id == pool->sched->sched_id) ? sched_adjust_cpupool(pool->sched, op) : -EINVAL); + rcu_read_unlock(&sched_res_rculock); + cpupool_put(pool); return rc; @@ -1859,8 +1943,11 @@ static void context_saved(struct sched_unit *unit) void sched_context_switched(struct vcpu *vprev, struct vcpu *vnext) { struct sched_unit *next = vnext->sched_unit; - struct sched_resource *sd = get_sched_res(smp_processor_id()); + struct sched_resource *sd; + rcu_read_lock(&sched_res_rculock); + + sd = get_sched_res(smp_processor_id()); /* Clear running flag /after/ writing context to memory. */ smp_wmb(); @@ -1887,6 +1974,8 @@ void sched_context_switched(struct vcpu *vprev, struct vcpu *vnext) if ( is_idle_vcpu(vprev) && vprev != vnext ) vprev->sched_unit = sd->sched_unit_idle; + + rcu_read_unlock(&sched_res_rculock); } static void sched_context_switch(struct vcpu *vprev, struct vcpu *vnext, @@ -1904,6 +1993,8 @@ static void sched_context_switch(struct vcpu *vprev, struct vcpu *vnext, vnext->sched_unit = get_sched_res(smp_processor_id())->sched_unit_idle; + rcu_read_unlock(&sched_res_rculock); + trace_continue_running(vnext); return continue_running(vprev); } @@ -1917,6 +2008,8 @@ static void sched_context_switch(struct vcpu *vprev, struct vcpu *vnext, vcpu_periodic_timer_work(vnext); + rcu_read_unlock(&sched_res_rculock); + context_switch(vprev, vnext); } @@ -2047,6 +2140,8 @@ static void sched_slave(void) ASSERT_NOT_IN_ATOMIC(); + rcu_read_lock(&sched_res_rculock); + lock = pcpu_schedule_lock_irq(cpu); now = NOW(); @@ -2070,6 +2165,8 @@ static void sched_slave(void) { pcpu_schedule_unlock_irq(lock, cpu); + rcu_read_unlock(&sched_res_rculock); + /* Check for failed forced context switch. */ if ( do_softirq ) raise_softirq(SCHEDULE_SOFTIRQ); @@ -2100,13 +2197,16 @@ static void schedule(void) struct sched_resource *sd; spinlock_t *lock; int cpu = smp_processor_id(); - unsigned int gran = get_sched_res(cpu)->granularity; + unsigned int gran; ASSERT_NOT_IN_ATOMIC(); SCHED_STAT_CRANK(sched_run); + rcu_read_lock(&sched_res_rculock); + sd = get_sched_res(cpu); + gran = sd->granularity; lock = pcpu_schedule_lock_irq(cpu); @@ -2118,6 +2218,8 @@ static void schedule(void) */ pcpu_schedule_unlock_irq(lock, cpu); + rcu_read_unlock(&sched_res_rculock); + raise_softirq(SCHEDULE_SOFTIRQ); return sched_slave(); } @@ -2230,14 +2332,27 @@ static int cpu_schedule_up(unsigned int cpu) return 0; } +static void sched_res_free(struct rcu_head *head) +{ + struct sched_resource *sd = container_of(head, struct sched_resource, rcu); + + xfree(sd); +} + static void cpu_schedule_down(unsigned int cpu) { - struct sched_resource *sd = get_sched_res(cpu); + struct sched_resource *sd; + + rcu_read_lock(&sched_res_rculock); + + sd = get_sched_res(cpu); kill_timer(&sd->s_timer); set_sched_res(cpu, NULL); - xfree(sd); + call_rcu(&sd->rcu, sched_res_free); + + rcu_read_unlock(&sched_res_rculock); } void sched_rm_cpu(unsigned int cpu) @@ -2257,6 +2372,8 @@ static int cpu_schedule_callback( unsigned int cpu = (unsigned long)hcpu; int rc = 0; + rcu_read_lock(&sched_res_rculock); + /* * From the scheduler perspective, bringing up a pCPU requires * allocating and initializing the per-pCPU scheduler specific data, @@ -2303,6 +2420,8 @@ static int cpu_schedule_callback( break; } + rcu_read_unlock(&sched_res_rculock); + return !rc ? NOTIFY_DONE : notifier_from_errno(rc); } @@ -2392,8 +2511,13 @@ void __init scheduler_init(void) idle_domain->max_vcpus = nr_cpu_ids; if ( vcpu_create(idle_domain, 0) == NULL ) BUG(); + + rcu_read_lock(&sched_res_rculock); + get_sched_res(0)->curr = idle_vcpu[0]->sched_unit; get_sched_res(0)->sched_unit_idle = idle_vcpu[0]->sched_unit; + + rcu_read_unlock(&sched_res_rculock); } /* @@ -2406,9 +2530,14 @@ int schedule_cpu_add(unsigned int cpu, struct cpupool *c) struct vcpu *idle; void *ppriv, *vpriv; struct scheduler *new_ops = c->sched; - struct sched_resource *sd = get_sched_res(cpu); + struct sched_resource *sd; spinlock_t *old_lock, *new_lock; unsigned long flags; + int ret = 0; + + rcu_read_lock(&sched_res_rculock); + + sd = get_sched_res(cpu); ASSERT(cpumask_test_cpu(cpu, &cpupool_free_cpus)); ASSERT(!cpumask_test_cpu(cpu, c->cpu_valid)); @@ -2428,13 +2557,18 @@ int schedule_cpu_add(unsigned int cpu, struct cpupool *c) idle = idle_vcpu[cpu]; ppriv = sched_alloc_pdata(new_ops, cpu); if ( IS_ERR(ppriv) ) - return PTR_ERR(ppriv); + { + ret = PTR_ERR(ppriv); + goto out; + } + vpriv = sched_alloc_vdata(new_ops, idle->sched_unit, idle->domain->sched_priv); if ( vpriv == NULL ) { sched_free_pdata(new_ops, ppriv, cpu); - return -ENOMEM; + ret = -ENOMEM; + goto out; } /* @@ -2473,7 +2607,10 @@ int schedule_cpu_add(unsigned int cpu, struct cpupool *c) /* The cpu is added to a pool, trigger it to go pick up some work */ cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); - return 0; +out: + rcu_read_unlock(&sched_res_rculock); + + return ret; } /* @@ -2486,11 +2623,16 @@ int schedule_cpu_rm(unsigned int cpu) { struct vcpu *idle; void *ppriv_old, *vpriv_old; - struct sched_resource *sd = get_sched_res(cpu); - struct scheduler *old_ops = sd->scheduler; + struct sched_resource *sd; + struct scheduler *old_ops; spinlock_t *old_lock; unsigned long flags; + rcu_read_lock(&sched_res_rculock); + + sd = get_sched_res(cpu); + old_ops = sd->scheduler; + ASSERT(sd->cpupool != NULL); ASSERT(cpumask_test_cpu(cpu, &cpupool_free_cpus)); ASSERT(!cpumask_test_cpu(cpu, sd->cpupool->cpu_valid)); @@ -2523,6 +2665,8 @@ int schedule_cpu_rm(unsigned int cpu) sd->granularity = 1; sd->cpupool = NULL; + rcu_read_unlock(&sched_res_rculock); + return 0; } @@ -2571,6 +2715,8 @@ void schedule_dump(struct cpupool *c) /* Locking, if necessary, must be handled withing each scheduler */ + rcu_read_lock(&sched_res_rculock); + if ( c != NULL ) { sched = c->sched; @@ -2590,6 +2736,8 @@ void schedule_dump(struct cpupool *c) for_each_cpu (i, cpus) sched_dump_cpu_state(sched, i); } + + rcu_read_unlock(&sched_res_rculock); } void sched_tick_suspend(void) @@ -2597,10 +2745,14 @@ void sched_tick_suspend(void) struct scheduler *sched; unsigned int cpu = smp_processor_id(); + rcu_read_lock(&sched_res_rculock); + sched = get_sched_res(cpu)->scheduler; sched_do_tick_suspend(sched, cpu); rcu_idle_enter(cpu); rcu_idle_timer_start(); + + rcu_read_unlock(&sched_res_rculock); } void sched_tick_resume(void) @@ -2608,10 +2760,14 @@ void sched_tick_resume(void) struct scheduler *sched; unsigned int cpu = smp_processor_id(); + rcu_read_lock(&sched_res_rculock); + rcu_idle_timer_stop(); rcu_idle_exit(cpu); sched = get_sched_res(cpu)->scheduler; sched_do_tick_resume(sched, cpu); + + rcu_read_unlock(&sched_res_rculock); } void wait(void) @@ -2626,7 +2782,13 @@ void wait(void) */ int sched_has_urgent_vcpu(void) { - return atomic_read(&get_sched_res(smp_processor_id())->urgent_count); + int val; + + rcu_read_lock(&sched_res_rculock); + val = atomic_read(&get_sched_res(smp_processor_id())->urgent_count); + rcu_read_unlock(&sched_res_rculock); + + return val; } #ifdef CONFIG_COMPAT diff --git a/xen/include/xen/sched-if.h b/xen/include/xen/sched-if.h index e04d249dfd..abf6d0522d 100644 --- a/xen/include/xen/sched-if.h +++ b/xen/include/xen/sched-if.h @@ -10,6 +10,7 @@ #include <xen/percpu.h> #include <xen/err.h> +#include <xen/rcupdate.h> /* A global pointer to the initial cpupool (POOL0). */ extern struct cpupool *cpupool0; @@ -58,20 +59,22 @@ struct sched_resource { unsigned int processor; unsigned int granularity; const cpumask_t *cpus; /* cpus covered by this struct */ + struct rcu_head rcu; }; #define curr_on_cpu(c) (get_sched_res(c)->curr) DECLARE_PER_CPU(struct sched_resource *, sched_res); +extern rcu_read_lock_t sched_res_rculock; static inline struct sched_resource *get_sched_res(unsigned int cpu) { - return per_cpu(sched_res, cpu); + return rcu_dereference(per_cpu(sched_res, cpu)); } static inline void set_sched_res(unsigned int cpu, struct sched_resource *res) { - per_cpu(sched_res, cpu) = res; + rcu_assign_pointer(per_cpu(sched_res, cpu), res); } static inline bool is_idle_unit(const struct sched_unit *unit) -- 2.16.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |