[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v8]xen: sched: convert RTDS from time to event driven model
I'm focusing on the style and the logic in the replenish handler: > /* > @@ -160,6 +180,7 @@ struct rt_private { > */ > struct rt_vcpu { > struct list_head q_elem; /* on the runq/depletedq list */ > + struct list_head replq_elem;/* on the repl event list */ missing space before /* > > /* Up-pointers */ > struct rt_dom *sdom; > @@ -213,8 +234,14 @@ static inline struct list_head *rt_depletedq(const > struct scheduler *ops) > return &rt_priv(ops)->depletedq; > } > > +static inline struct list_head *rt_replq(const struct scheduler *ops) > +{ > + return &rt_priv(ops)->replq; > +} > + > /* > - * Queue helper functions for runq and depletedq > + * Helper functions for manipulating the runqueue, the depleted queue, > + * and the replenishment events queue. > */ > static int > __vcpu_on_q(const struct rt_vcpu *svc) > @@ -228,6 +255,18 @@ __q_elem(struct list_head *elem) > return list_entry(elem, struct rt_vcpu, q_elem); > } > > +static struct rt_vcpu * > +replq_elem(struct list_head *elem) > +{ > + return list_entry(elem, struct rt_vcpu, replq_elem); > +} > + > +static int > +vcpu_on_replq(const struct rt_vcpu *svc) > +{ > + return !list_empty(&svc->replq_elem); > +} > + > /* > * Debug related code, dump vcpu/cpu information > */ > @@ -288,7 +327,7 @@ rt_dump_pcpu(const struct scheduler *ops, int cpu) > static void > rt_dump(const struct scheduler *ops) > { > - struct list_head *runq, *depletedq, *iter; > + struct list_head *runq, *depletedq, *replq, *iter; > struct rt_private *prv = rt_priv(ops); > struct rt_vcpu *svc; > struct rt_dom *sdom; > @@ -301,6 +340,7 @@ rt_dump(const struct scheduler *ops) > > runq = rt_runq(ops); > depletedq = rt_depletedq(ops); > + replq = rt_replq(ops); > > printk("Global RunQueue info:\n"); > list_for_each( iter, runq ) > @@ -316,6 +356,13 @@ rt_dump(const struct scheduler *ops) > rt_dump_vcpu(ops, svc); > } > > + printk("Global Replenishment Event info:\n"); > + list_for_each( iter, replq ) > + { > + svc = replq_elem(iter); > + rt_dump_vcpu(ops, svc); > + } > + > printk("Domain info:\n"); > list_for_each( iter, &prv->sdom ) > { > @@ -380,11 +427,77 @@ rt_update_deadline(s_time_t now, struct rt_vcpu *svc) > return; > } > > +/* > + * A helper function that only removes a vcpu from a queue > + * and it returns 1 if the vcpu was in front of the list. > + */ > +static inline int > +deadline_queue_remove(struct list_head *queue, struct list_head *elem) > +{ > + int pos = 0; Add an empty line here Usually, the variable definition and the main function has an empty line for seperation. > + if ( queue->next != elem ) > + pos = 1; > + > + list_del_init(elem); > + return !pos; > +} > + > static inline void > __q_remove(struct rt_vcpu *svc) > { > - if ( __vcpu_on_q(svc) ) > - list_del_init(&svc->q_elem); > + ASSERT( __vcpu_on_q(svc) ); > + list_del_init(&svc->q_elem); > +} > + > +/* > + * Removing a vcpu from the replenishment queue could > + * re-program the timer for the next replenishment event > + * if it was at the front of the list. > + */ > +static inline void > +__replq_remove(const struct scheduler *ops, struct rt_vcpu *svc) > +{ > + struct rt_private *prv = rt_priv(ops); > + struct list_head *replq = rt_replq(ops); > + struct timer* repl_timer = prv->repl_timer; > + > + ASSERT( vcpu_on_replq(svc) ); > + > + if ( deadline_queue_remove(replq, &svc->replq_elem) ) > + { > + /* re-arm the timer for the next replenishment event */ > + if ( !list_empty(replq) ) > + { > + struct rt_vcpu *svc_next = replq_elem(replq->next); > + set_timer(repl_timer, svc_next->cur_deadline); > + } > + else > + stop_timer(repl_timer); > + } > +} > + > +/* > + * An utility function that inserts a vcpu to a > + * queue based on certain order (EDF). The returned > + * value is 1 if a vcpu has been inserted to the > + * front of a list. > + */ > +static int > +deadline_queue_insert(struct rt_vcpu * (*_get_q_elem)(struct list_head > *elem), > + struct rt_vcpu *svc, struct list_head *elem, struct list_head *queue) > +{ > + struct list_head *iter; > + int pos = 0; > + > + list_for_each(iter, queue) space after ( and before ) If not sure about the space, we can refer to the sched_credit.c for the style as well, beside the CODING_STYLE file. :-) > + { > + struct rt_vcpu * iter_svc = (*_get_q_elem)(iter); > + if ( svc->cur_deadline <= iter_svc->cur_deadline ) > + break; > + pos++; > + } > + list_add_tail(elem, iter); > + return !pos; > } > > /* > @@ -397,27 +510,62 @@ __runq_insert(const struct scheduler *ops, struct > rt_vcpu *svc) > { > struct rt_private *prv = rt_priv(ops); > struct list_head *runq = rt_runq(ops); > - struct list_head *iter; > > ASSERT( spin_is_locked(&prv->lock) ); > - > ASSERT( !__vcpu_on_q(svc) ); > + ASSERT( vcpu_on_replq(svc) ); > > /* add svc to runq if svc still has budget */ > if ( svc->cur_budget > 0 ) > - { > - list_for_each(iter, runq) > - { > - struct rt_vcpu * iter_svc = __q_elem(iter); > - if ( svc->cur_deadline <= iter_svc->cur_deadline ) > - break; > - } > - list_add_tail(&svc->q_elem, iter); > - } > + deadline_queue_insert(&__q_elem, svc, &svc->q_elem, runq); > else > - { > list_add(&svc->q_elem, &prv->depletedq); > +} > + > +/* > + * Insert svc into the replenishment event list > + * in replenishment time order. > + * vcpus that need to be replished earlier go first. > + * The timer may be re-programmed if svc is inserted > + * at the front of the event list. > + */ > +static void > +__replq_insert(const struct scheduler *ops, struct rt_vcpu *svc) > +{ > + struct list_head *replq = rt_replq(ops); > + struct rt_private *prv = rt_priv(ops); > + struct timer *repl_timer = prv->repl_timer; > + > + ASSERT( !vcpu_on_replq(svc) ); > + > + if ( deadline_queue_insert(&replq_elem, svc, &svc->replq_elem, replq) ) > + set_timer(repl_timer, svc->cur_deadline); > +} > + > +/* > + * Removes and re-inserts an event to the replenishment queue. > + */ > +static void > +replq_reinsert(const struct scheduler *ops, struct rt_vcpu *svc) > +{ > + struct list_head *replq = rt_replq(ops); > + struct rt_private *prv = rt_priv(ops); > + struct timer *repl_timer = prv->repl_timer; > + > + ASSERT( vcpu_on_replq(svc) ); > + > + if ( deadline_queue_remove(replq, &svc->replq_elem) ) > + { > + if ( deadline_queue_insert(&replq_elem, svc, &svc->replq_elem, > replq) ) > + set_timer(repl_timer, svc->cur_deadline); > + else > + { > + struct rt_vcpu *svc_next = replq_elem(replq->next); > + set_timer(repl_timer, svc_next->cur_deadline); > + } > } > + else if ( deadline_queue_insert(&replq_elem, svc, &svc->replq_elem, > replq) ) > + set_timer(repl_timer, svc->cur_deadline); > } > > /* > @@ -449,11 +597,18 @@ rt_init(struct scheduler *ops) > INIT_LIST_HEAD(&prv->sdom); > INIT_LIST_HEAD(&prv->runq); > INIT_LIST_HEAD(&prv->depletedq); > + INIT_LIST_HEAD(&prv->replq); > > cpumask_clear(&prv->tickled); > > ops->sched_data = prv; > > + /* > + * The timer initialization will happen later when > + * the first pcpu is added to this pool in alloc_pdata. > + */ > + prv->repl_timer = NULL; > + > return 0; > > no_mem: > @@ -473,6 +628,10 @@ rt_deinit(struct scheduler *ops) > xfree(_cpumask_scratch); > _cpumask_scratch = NULL; > } > + > + kill_timer(prv->repl_timer); > + xfree(prv->repl_timer); > + > ops->sched_data = NULL; > xfree(prv); > } > @@ -494,6 +653,17 @@ rt_alloc_pdata(const struct scheduler *ops, int cpu) > if ( !alloc_cpumask_var(&_cpumask_scratch[cpu]) ) > return NULL; > > + if ( prv->repl_timer == NULL ) > + { > + /* Allocate the timer on the first cpu of this pool. */ > + prv->repl_timer = xzalloc(struct timer); > + > + if ( prv->repl_timer == NULL ) > + return NULL; > + > + init_timer(prv->repl_timer, repl_handler, (void *)ops, cpu); > + } > + > /* 1 indicates alloc. succeed in schedule.c */ > return (void *)1; > } > @@ -587,6 +757,7 @@ rt_alloc_vdata(const struct scheduler *ops, struct vcpu > *vc, void *dd) > return NULL; > > INIT_LIST_HEAD(&svc->q_elem); > + INIT_LIST_HEAD(&svc->replq_elem); > svc->flags = 0U; > svc->sdom = dd; > svc->vcpu = vc; > @@ -610,7 +781,8 @@ rt_free_vdata(const struct scheduler *ops, void *priv) > } > > /* > - * This function is called in sched_move_domain() in schedule.c > + * It is called in sched_move_domain() and sched_init_vcpu > + * in schedule.c. > * When move a domain to a new cpupool. > * It inserts vcpus of moving domain to the scheduler's RunQ in > * dest. cpupool. > @@ -628,8 +800,13 @@ rt_vcpu_insert(const struct scheduler *ops, struct vcpu > *vc) > if ( now >= svc->cur_deadline ) > rt_update_deadline(now, svc); > > - if ( !__vcpu_on_q(svc) && vcpu_runnable(vc) && !vc->is_running ) > - __runq_insert(ops, svc); > + if ( !__vcpu_on_q(svc) && vcpu_runnable(vc) ) > + { > + __replq_insert(ops, svc); > + > + if ( !vc->is_running ) > + __runq_insert(ops, svc); > + } > vcpu_schedule_unlock_irq(lock, vc); > > SCHED_STAT_CRANK(vcpu_insert); > @@ -652,6 +829,10 @@ rt_vcpu_remove(const struct scheduler *ops, struct vcpu > *vc) > lock = vcpu_schedule_lock_irq(vc); > if ( __vcpu_on_q(svc) ) > __q_remove(svc); > + > + if ( vcpu_on_replq(svc) ) > + __replq_remove(ops,svc); > + > vcpu_schedule_unlock_irq(lock, vc); > } > > @@ -707,7 +888,14 @@ burn_budget(const struct scheduler *ops, struct rt_vcpu > *svc, s_time_t now) > svc->cur_budget -= delta; > > if ( svc->cur_budget < 0 ) Although this line is not changed in the patch. I'm thinking maybe it should be if ( svc->cur_budget <= 0 ) because budget == 0 also means depleted budget. > + { > svc->cur_budget = 0; > + /* > + * Set __RTDS_was_depleted so the replenishment > + * handler can let this vcpu tickle if it was refilled. > + */ > + set_bit(__RTDS_was_depleted, &svc->flags); > + } > > /* TRACE */ > { > @@ -784,44 +972,6 @@ __runq_pick(const struct scheduler *ops, const cpumask_t > *mask) > } > > /* > - * Update vcpu's budget and > - * sort runq by insert the modifed vcpu back to runq > - * lock is grabbed before calling this function > - */ > -static void > -__repl_update(const struct scheduler *ops, s_time_t now) > -{ > - struct list_head *runq = rt_runq(ops); > - struct list_head *depletedq = rt_depletedq(ops); > - struct list_head *iter; > - struct list_head *tmp; > - struct rt_vcpu *svc = NULL; > - > - list_for_each_safe(iter, tmp, runq) > - { > - svc = __q_elem(iter); > - if ( now < svc->cur_deadline ) > - break; > - > - rt_update_deadline(now, svc); > - /* reinsert the vcpu if its deadline is updated */ > - __q_remove(svc); > - __runq_insert(ops, svc); > - } > - > - list_for_each_safe(iter, tmp, depletedq) > - { > - svc = __q_elem(iter); > - if ( now >= svc->cur_deadline ) > - { > - rt_update_deadline(now, svc); > - __q_remove(svc); /* remove from depleted queue */ > - __runq_insert(ops, svc); /* add to runq */ > - } > - } > -} > - > -/* > * schedule function for rt scheduler. > * The lock is already grabbed in schedule.c, no need to lock here > */ > @@ -840,8 +990,6 @@ rt_schedule(const struct scheduler *ops, s_time_t now, > bool_t tasklet_work_sched > /* burn_budget would return for IDLE VCPU */ > burn_budget(ops, scurr, now); > > - __repl_update(ops, now); > - > if ( tasklet_work_scheduled ) > { > trace_var(TRC_RTDS_SCHED_TASKLET, 1, 0, NULL); > @@ -868,6 +1016,7 @@ rt_schedule(const struct scheduler *ops, s_time_t now, > bool_t tasklet_work_sched > set_bit(__RTDS_delayed_runq_add, &scurr->flags); > > snext->last_start = now; > + ret.time = -1; /* if an idle vcpu is picked */ > if ( !is_idle_vcpu(snext->vcpu) ) > { > if ( snext != scurr ) > @@ -880,9 +1029,8 @@ rt_schedule(const struct scheduler *ops, s_time_t now, > bool_t tasklet_work_sched > snext->vcpu->processor = cpu; > ret.migrated = 1; > } > + ret.time = snext->budget; /* invoke the scheduler next time */ > } > - > - ret.time = MIN(snext->budget, MAX_SCHEDULE); /* sched quantum */ > ret.task = snext->vcpu; > > return ret; > @@ -903,7 +1051,10 @@ rt_vcpu_sleep(const struct scheduler *ops, struct vcpu > *vc) > if ( curr_on_cpu(vc->processor) == vc ) > cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ); > else if ( __vcpu_on_q(svc) ) > + { > __q_remove(svc); > + __replq_remove(ops, svc); > + } > else if ( svc->flags & RTDS_delayed_runq_add ) > clear_bit(__RTDS_delayed_runq_add, &svc->flags); > } > @@ -1008,10 +1159,7 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu > *vc) > { > struct rt_vcpu * const svc = rt_vcpu(vc); > s_time_t now = NOW(); > - struct rt_private *prv = rt_priv(ops); > - struct rt_vcpu *snext = NULL; /* highest priority on RunQ */ > - struct rt_dom *sdom = NULL; > - cpumask_t *online; > + bool_t missed; > > BUG_ON( is_idle_vcpu(vc) ); > > @@ -1033,32 +1181,42 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu > *vc) > else > SCHED_STAT_CRANK(vcpu_wake_not_runnable); > > - /* If context hasn't been saved for this vcpu yet, we can't put it on > - * the Runqueue/DepletedQ. Instead, we set a flag so that it will be > - * put on the Runqueue/DepletedQ after the context has been saved. > + /* > + * If a deadline passed while svc was asleep/blocked, we need new > + * scheduling parameters (a new deadline and full budget). > + */ > + > + missed = ( now >= svc->cur_deadline ); > + if ( missed ) > + rt_update_deadline(now, svc); > + > + /* > + * If context hasn't been saved for this vcpu yet, we can't put it on > + * the run-queue/depleted-queue. Instead, we set the appropriate flag, > + * the vcpu will be put back on queue after the context has been saved > + * (in rt_context_save()). > */ > if ( unlikely(svc->flags & RTDS_scheduled) ) > { > set_bit(__RTDS_delayed_runq_add, &svc->flags); > + /* > + * The vcpu is waking up already, and we didn't even had the time to > + * remove its next replenishment event from the replenishment queue > + * when it blocked! No big deal. If we did not miss the deadline in > + * the meantime, let's just leave it there. If we did, let's remove > it > + * and queue a new one (to occur at our new deadline). > + */ > + if ( missed ) > + replq_reinsert(ops, svc); > return; > } > > - if ( now >= svc->cur_deadline) > - rt_update_deadline(now, svc); > - > + /* Replenishment event got cancelled when we blocked. Add it back. */ > + __replq_insert(ops, svc); > /* insert svc to runq/depletedq because svc is not in queue now */ > __runq_insert(ops, svc); > > - __repl_update(ops, now); > - > - ASSERT(!list_empty(&prv->sdom)); > - sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem); > - online = cpupool_domain_cpumask(sdom->dom); > - snext = __runq_pick(ops, online); /* pick snext from ALL valid cpus */ > - > - runq_tickle(ops, snext); > - > - return; > + runq_tickle(ops, svc); > } > > /* > @@ -1069,10 +1227,6 @@ static void > rt_context_saved(const struct scheduler *ops, struct vcpu *vc) > { > struct rt_vcpu *svc = rt_vcpu(vc); > - struct rt_vcpu *snext = NULL; > - struct rt_dom *sdom = NULL; > - struct rt_private *prv = rt_priv(ops); > - cpumask_t *online; > spinlock_t *lock = vcpu_schedule_lock_irq(vc); > > clear_bit(__RTDS_scheduled, &svc->flags); > @@ -1084,15 +1238,11 @@ rt_context_saved(const struct scheduler *ops, struct > vcpu *vc) > likely(vcpu_runnable(vc)) ) > { > __runq_insert(ops, svc); > - __repl_update(ops, NOW()); > - > - ASSERT(!list_empty(&prv->sdom)); > - sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem); > - online = cpupool_domain_cpumask(sdom->dom); > - snext = __runq_pick(ops, online); /* pick snext from ALL cpus */ > - > - runq_tickle(ops, snext); > + runq_tickle(ops, svc); > } > + else > + __replq_remove(ops, svc); > + > out: > vcpu_schedule_unlock_irq(lock, vc); > } > @@ -1150,6 +1300,101 @@ rt_dom_cntl( > return rc; > } > > +/* > + * The replenishment timer handler picks vcpus > + * from the replq and does the actual replenishment. > + */ > +static void repl_handler(void *data){ > + unsigned long flags; > + s_time_t now = NOW(); > + struct scheduler *ops = data; > + struct rt_private *prv = rt_priv(ops); > + struct list_head *replq = rt_replq(ops); > + struct list_head *runq = rt_runq(ops); > + struct timer *repl_timer = prv->repl_timer; > + struct list_head *iter, *tmp; > + struct list_head tmp_replq; > + struct rt_vcpu *svc = NULL; > + > + spin_lock_irqsave(&prv->lock, flags); Hmm, I haven't thought hard about the choice between spin_lock_irqsave() and spin_lock_irq(), before. Hi Dario, Is it better to use spin_lock_irqsave() or spin_lock_irq() at this place? I'm not quite sure if the handler can be called in an interrupt disabled context? Can it? If interrupt is disabled, how can this handled be triggered? If not, can we use spi_lock_irq, instead? When I used the spin_lock_irq(save), I just refered to what credit2 scheduler does, but didn't think hard enough about which one has better performance. IMO, spin_lock_irqsave is safer, but may take more time. If spin_lock_irq is enough, we don't need overuse spin_lock_irqsave(), do we? > + > + stop_timer(repl_timer); > + > + /* > + * A temperary queue for updated vcpus. > + * It is used to tickle. > + */ > + INIT_LIST_HEAD(&tmp_replq); > + > + list_for_each_safe(iter, tmp, replq) > + { > + svc = replq_elem(iter); > + > + if ( now < svc->cur_deadline ) > + break; > + > + rt_update_deadline(now, svc); > + /* > + * When a vcpu is replenished, it is moved > + * to a temporary queue to tickle. > + */ > + list_del(&svc->replq_elem); > + list_add(&svc->replq_elem, &tmp_replq); > + > + /* > + * If svc is on run queue, we need > + * to put it to the correct place since > + * its deadline changes. > + */ > + if( __vcpu_on_q(svc) ) space before ( > + { > + /* put back to runq */ > + __q_remove(svc); > + __runq_insert(ops, svc); > + } > + } > + > + /* Iterate through the list of updated vcpus. */ > + list_for_each_safe(iter, tmp, &tmp_replq) > + { > + struct vcpu* vc; > + svc = replq_elem(iter); > + vc = svc->vcpu; > + > + if ( curr_on_cpu(vc->processor) == vc && > + ( !list_empty(runq) ) ) ( !list_empty(runq) ) should be indented to curr_on_cpu in the above line. > + { > + /* > + * If the vcpu is running, let the head > + * of the run queue tickle if it has a > + * higher priority. > + */ > + struct rt_vcpu *next_on_runq = __q_elem(runq->next); > + if ( svc->cur_deadline >= next_on_runq->cur_deadline ) It's better to be if ( svc->cur_deadline > next_on_runq->cur_deadline ), to avoid the unnecessary tickle when they have same priority. We assume priority tie is broken arbitrarily. > + runq_tickle(ops, next_on_runq); > + } > + else if ( __vcpu_on_q(svc) ) > + { > + /* Only tickle a vcpu that was depleted. */ Change comment to /* Only need to tickle a vcpu that was depleted. */ to make it clearer. > + if ( test_and_clear_bit(__RTDS_was_depleted, &svc->flags) ) > + runq_tickle(ops, svc); > + } > + > + list_del(&svc->replq_elem); > + /* Move it back to the replenishment event queue. */ > + deadline_queue_insert(&replq_elem, svc, &svc->replq_elem, replq); > + } > + > + /* > + * Use the vcpu that's on the top of the run queue. > + * Otherwise don't program the timer. > + */ > + if( !list_empty(replq) ) space before ( > + set_timer(repl_timer, replq_elem(replq->next)->cur_deadline); > + > + spin_unlock_irqrestore(&prv->lock, flags); > +} > + > static const struct scheduler sched_rtds_def = { > .name = "SMP RTDS Scheduler", > .opt_name = "rtds", Great and expedite work, Tianyang! This version looks good. Can you set up a repo. with the previous version of the patch and this version of the patch so that I can diff. these two versions to make sure I didn't miss anything you modified from the last version. One more thing we should think about is: How can we "prove/test" the correctness of the scheduler? Can we use xentrace to record the scheduling trace and then write a userspace program to check the scheduling trace is obeying the priority rules of the scheduler. Thanks and Best Regards, Meng ----------- Meng Xu PhD Student in Computer and Information Science University of Pennsylvania http://www.cis.upenn.edu/~mengxu/ _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |