Xen project Mailing List

[Xen-devel] [RFC PATCH 1/4] Implement cbs algorithm, remove extra queues, latency scaling, and weight support from sedf

--- xen/common/sched_sedf.c | 947 +++++++++-------------------------------------- 1 file changed, 173 insertions(+), 774 deletions(-) diff --git a/xen/common/sched_sedf.c b/xen/common/sched_sedf.c index 0c9011a..2ee4538 100644 --- a/xen/common/sched_sedf.c +++ b/xen/common/sched_sedf.c @@ -25,24 +25,16 @@ #define CHECK(_p) ((void)0) #endif -#define EXTRA_NONE (0) -#define EXTRA_AWARE (1) -#define EXTRA_RUN_PEN (2) -#define EXTRA_RUN_UTIL (4) -#define EXTRA_WANT_PEN_Q (8) -#define EXTRA_PEN_Q (0) -#define EXTRA_UTIL_Q (1) +#define SEDF_SOFT_TASK (1) #define SEDF_ASLEEP (16) -#define EXTRA_QUANTUM (MICROSECS(500)) -#define WEIGHT_PERIOD (MILLISECS(100)) -#define WEIGHT_SAFETY (MILLISECS(5)) +#define DEFAULT_PERIOD (MILLISECS(20)) +#define DEFAULT_SLICE (MILLISECS(10)) #define PERIOD_MAX MILLISECS(10000) /* 10s */ #define PERIOD_MIN (MICROSECS(10)) /* 10us */ #define SLICE_MIN (MICROSECS(5)) /* 5us */ -#define IMPLY(a, b) (!(a) || (b)) #define EQ(a, b) ((!!(a)) == (!!(b))) @@ -58,24 +50,14 @@ struct sedf_priv_info { struct sedf_vcpu_info { struct vcpu *vcpu; struct list_head list; - struct list_head extralist[2]; /* Parameters for EDF */ s_time_t period; /* = relative deadline */ s_time_t slice; /* = worst case execution time */ - - /* Advaced Parameters */ + /* Note: Server bandwidth = (slice / period) */ - /* Latency Scaling */ - s_time_t period_orig; - s_time_t slice_orig; - s_time_t latency; - /* Status of domain */ int status; - /* Weights for "Scheduling for beginners/ lazy/ etc." ;) */ - short weight; - short extraweight; /* Bookkeeping */ s_time_t deadl_abs; s_time_t sched_start_abs; @@ -84,28 +66,21 @@ struct sedf_vcpu_info { s_time_t block_abs; s_time_t unblock_abs; - /* Scores for {util, block penalty}-weighted extratime distribution */ - int score[2]; - s_time_t short_block_lost_tot; - - /* Statistics */ - s_time_t extra_time_tot; - #ifdef SEDF_STATS s_time_t block_time_tot; - s_time_t penalty_time_tot; int block_tot; int short_block_tot; int long_block_tot; - int pen_extra_blocks; - int pen_extra_slices; + s_time_t miss_time; + s_time_t over_time; + int miss_tot; + int over_tot; #endif }; struct sedf_cpu_info { struct list_head runnableq; struct list_head waitq; - struct list_head extraq[2]; s_time_t current_slice_expires; }; @@ -115,102 +90,20 @@ struct sedf_cpu_info { #define CPU_INFO(cpu) \ ((struct sedf_cpu_info *)per_cpu(schedule_data, cpu).sched_priv) #define LIST(d) (&EDOM_INFO(d)->list) -#define EXTRALIST(d,i) (&(EDOM_INFO(d)->extralist[i])) #define RUNQ(cpu) (&CPU_INFO(cpu)->runnableq) #define WAITQ(cpu) (&CPU_INFO(cpu)->waitq) -#define EXTRAQ(cpu,i) (&(CPU_INFO(cpu)->extraq[i])) #define IDLETASK(cpu) (idle_vcpu[cpu]) #define PERIOD_BEGIN(inf) ((inf)->deadl_abs - (inf)->period) #define DIV_UP(x,y) (((x) + (y) - 1) / y) -#define extra_runs(inf) ((inf->status) & 6) -#define extra_get_cur_q(inf) (((inf->status & 6) >> 1)-1) #define sedf_runnable(edom) (!(EDOM_INFO(edom)->status & SEDF_ASLEEP)) +#define sedf_soft(edom) (EDOM_INFO(edom)->status & SEDF_SOFT_TASK) -static void sedf_dump_cpu_state(const struct scheduler *ops, int i); - -static inline int extraq_on(struct vcpu *d, int i) -{ - return ((EXTRALIST(d,i)->next != NULL) && - (EXTRALIST(d,i)->next != EXTRALIST(d,i))); -} - -static inline void extraq_del(struct vcpu *d, int i) -{ - struct list_head *list = EXTRALIST(d,i); - ASSERT(extraq_on(d,i)); - list_del(list); - list->next = NULL; - ASSERT(!extraq_on(d, i)); -} - -/* - * Adds a domain to the queue of processes which are aware of extra time. List - * is sorted by score, where a lower score means higher priority for an extra - * slice. It also updates the score, by simply subtracting a fixed value from - * each entry, in order to avoid overflow. The algorithm works by simply - * charging each domain that recieved extratime with an inverse of its weight. - */ -static inline void extraq_add_sort_update(struct vcpu *d, int i, int sub) -{ - struct list_head *cur; - struct sedf_vcpu_info *curinf; - - ASSERT(!extraq_on(d,i)); - - /* - * Iterate through all elements to find our "hole" and on our way - * update all the other scores. - */ - list_for_each ( cur, EXTRAQ(d->processor, i) ) - { - curinf = list_entry(cur,struct sedf_vcpu_info,extralist[i]); - curinf->score[i] -= sub; - if ( EDOM_INFO(d)->score[i] < curinf->score[i] ) - break; - } - - /* cur now contains the element, before which we'll enqueue */ - list_add(EXTRALIST(d,i),cur->prev); - - /* Continue updating the extraq */ - if ( (cur != EXTRAQ(d->processor,i)) && sub ) - { - for ( cur = cur->next; cur != EXTRAQ(d->processor,i); cur = cur->next ) - { - curinf = list_entry(cur,struct sedf_vcpu_info, extralist[i]); - curinf->score[i] -= sub; - } - } - - ASSERT(extraq_on(d,i)); -} -static inline void extraq_check(struct vcpu *d) -{ - if ( extraq_on(d, EXTRA_UTIL_Q) ) - { - if ( !(EDOM_INFO(d)->status & EXTRA_AWARE) && - !extra_runs(EDOM_INFO(d)) ) - extraq_del(d, EXTRA_UTIL_Q); - } - else - { - if ( (EDOM_INFO(d)->status & EXTRA_AWARE) && sedf_runnable(d) ) - extraq_add_sort_update(d, EXTRA_UTIL_Q, 0); - } -} - -static inline void extraq_check_add_unblocked(struct vcpu *d, int priority) -{ - struct sedf_vcpu_info *inf = EDOM_INFO(d); - if ( inf->status & EXTRA_AWARE ) - /* Put on the weighted extraq without updating any scores */ - extraq_add_sort_update(d, EXTRA_UTIL_Q, 0); -} +static void sedf_dump_cpu_state(const struct scheduler *ops, int i); static inline int __task_on_queue(struct vcpu *d) { @@ -284,11 +177,7 @@ static inline void __add_to_runqueue_sort(struct vcpu *v) static void sedf_insert_vcpu(const struct scheduler *ops, struct vcpu *v) { - if ( !is_idle_vcpu(v) ) - { - extraq_check(v); - } - else + if ( is_idle_vcpu(v) ) { EDOM_INFO(v)->deadl_abs = 0; EDOM_INFO(v)->status &= ~SEDF_ASLEEP; @@ -305,19 +194,23 @@ static void *sedf_alloc_vdata(const struct scheduler *ops, struct vcpu *v, void inf->vcpu = v; - /* Every VCPU gets an equal share of extratime by default */ - inf->deadl_abs = 0; - inf->latency = 0; - inf->status = EXTRA_AWARE | SEDF_ASLEEP; - inf->extraweight = 1; - /* Upon creation all domain are best-effort */ - inf->period = WEIGHT_PERIOD; - inf->slice = 0; + inf->deadl_abs = 0; + inf->cputime = 0; + inf->status = SEDF_ASLEEP; + + if (v->domain->domain_id == 0) + { + /* Domain 0, needs a slice to boot the machine */ + inf->period = DEFAULT_PERIOD; + inf->slice = DEFAULT_SLICE; + } + else + { + inf->period = DEFAULT_PERIOD; + inf->slice = 0; + } - inf->period_orig = inf->period; inf->slice_orig = inf->slice; INIT_LIST_HEAD(&(inf->list)); - INIT_LIST_HEAD(&(inf->extralist[EXTRA_PEN_Q])); - INIT_LIST_HEAD(&(inf->extralist[EXTRA_UTIL_Q])); SCHED_STAT_CRANK(vcpu_init); @@ -333,8 +226,6 @@ sedf_alloc_pdata(const struct scheduler *ops, int cpu) BUG_ON(spc == NULL); INIT_LIST_HEAD(&spc->waitq); INIT_LIST_HEAD(&spc->runnableq); - INIT_LIST_HEAD(&spc->extraq[EXTRA_PEN_Q]); - INIT_LIST_HEAD(&spc->extraq[EXTRA_UTIL_Q]); return (void *)spc; } @@ -410,49 +301,59 @@ static void desched_edf_dom(s_time_t now, struct vcpu* d) __del_from_queue(d); - /* - * Manage bookkeeping (i.e. calculate next deadline, memorise - * overrun-time of slice) of finished domains. - */ +#ifdef SEDF_STATS + /* Manage deadline misses */ + if ( unlikely(inf->deadl_abs < now) ) + { + inf->miss_tot++; + inf->miss_time += inf->cputime; + } +#endif + + /* Manage overruns */ if ( inf->cputime >= inf->slice ) { inf->cputime -= inf->slice; - - if ( inf->period < inf->period_orig ) - { - /* This domain runs in latency scaling or burst mode */ - inf->period *= 2; - inf->slice *= 2; - if ( (inf->period > inf->period_orig) || - (inf->slice > inf->slice_orig) ) - { - /* Reset slice and period */ - inf->period = inf->period_orig; - inf->slice = inf->slice_orig; - } - } /* Set next deadline */ inf->deadl_abs += inf->period; + + /* Ensure that the cputime is always less than slice */ + if ( unlikely(inf->cputime > inf->slice) ) + { +#ifdef SEDF_STATS + inf->over_tot++; + inf->over_time += inf->cputime; +#endif + + /* Make up for the overage by pushing the deadline + into the future */ + inf->deadl_abs += ((inf->cputime / inf->slice) + * inf->period) * 2; + inf->cputime -= (inf->cputime / inf->slice) * inf->slice; + } + + /* Ensure that the start of the next period is in the future */ + if ( unlikely(PERIOD_BEGIN(inf) < now) ) + inf->deadl_abs += + (DIV_UP(now - PERIOD_BEGIN(inf), + inf->period)) * inf->period; } /* Add a runnable domain to the waitqueue */ if ( sedf_runnable(d) ) { - __add_to_waitqueue_sort(d); - } - else - { - /* We have a blocked realtime task -> remove it from exqs too */ - if ( extraq_on(d, EXTRA_PEN_Q) ) - extraq_del(d, EXTRA_PEN_Q); - if ( extraq_on(d, EXTRA_UTIL_Q) ) - extraq_del(d, EXTRA_UTIL_Q); + if( sedf_soft(d) ) + { + __add_to_runqueue_sort(d); + } + else + { + __add_to_waitqueue_sort(d); + } } - + ASSERT(EQ(sedf_runnable(d), __task_on_queue(d))); - ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q), - sedf_runnable(d))); } @@ -498,217 +399,12 @@ static void update_queues( /* Put them back into the queue */ __add_to_waitqueue_sort(curinf->vcpu); } - else if ( unlikely((curinf->deadl_abs < now) || - (curinf->cputime > curinf->slice)) ) - { - /* - * We missed the deadline or the slice was already finished. - * Might hapen because of dom_adj. - */ - printk("\tDomain %i.%i exceeded it's deadline/" - "slice (%"PRIu64" / %"PRIu64") now: %"PRIu64 - " cputime: %"PRIu64"\n", - curinf->vcpu->domain->domain_id, - curinf->vcpu->vcpu_id, - curinf->deadl_abs, curinf->slice, now, - curinf->cputime); - __del_from_queue(curinf->vcpu); - - /* Common case: we miss one period */ - curinf->deadl_abs += curinf->period; - - /* - * If we are still behind: modulo arithmetic, force deadline - * to be in future and aligned to period borders. - */ - if ( unlikely(curinf->deadl_abs < now) ) - curinf->deadl_abs += - DIV_UP(now - curinf->deadl_abs, - curinf->period) * curinf->period; - ASSERT(curinf->deadl_abs >= now); - - /* Give a fresh slice */ - curinf->cputime = 0; - if ( PERIOD_BEGIN(curinf) > now ) - __add_to_waitqueue_sort(curinf->vcpu); - else - __add_to_runqueue_sort(curinf->vcpu); - } else break; } } -/* - * removes a domain from the head of the according extraQ and - * requeues it at a specified position: - * round-robin extratime: end of extraQ - * weighted ext.: insert in sorted list by score - * if the domain is blocked / has regained its short-block-loss - * time it is not put on any queue. - */ -static void desched_extra_dom(s_time_t now, struct vcpu *d) -{ - struct sedf_vcpu_info *inf = EDOM_INFO(d); - int i = extra_get_cur_q(inf); - unsigned long oldscore; - - ASSERT(extraq_on(d, i)); - - /* Unset all running flags */ - inf->status &= ~(EXTRA_RUN_PEN | EXTRA_RUN_UTIL); - /* Fresh slice for the next run */ - inf->cputime = 0; - /* Accumulate total extratime */ - inf->extra_time_tot += now - inf->sched_start_abs; - /* Remove extradomain from head of the queue. */ - extraq_del(d, i); - - /* Update the score */ - oldscore = inf->score[i]; - if ( i == EXTRA_PEN_Q ) - { - /* Domain was running in L0 extraq */ - /* reduce block lost, probably more sophistication here!*/ - /*inf->short_block_lost_tot -= EXTRA_QUANTUM;*/ - inf->short_block_lost_tot -= now - inf->sched_start_abs; -#if 0 - /* KAF: If we don't exit short-blocking state at this point - * domain0 can steal all CPU for up to 10 seconds before - * scheduling settles down (when competing against another - * CPU-bound domain). Doing this seems to make things behave - * nicely. Noone gets starved by default. - */ - if ( inf->short_block_lost_tot <= 0 ) -#endif - { - /* We have (over-)compensated our block penalty */ - inf->short_block_lost_tot = 0; - /* We don't want a place on the penalty queue anymore! */ - inf->status &= ~EXTRA_WANT_PEN_Q; - goto check_extra_queues; - } - - /* - * We have to go again for another try in the block-extraq, - * the score is not used incremantally here, as this is - * already done by recalculating the block_lost - */ - inf->score[EXTRA_PEN_Q] = (inf->period << 10) / - inf->short_block_lost_tot; - oldscore = 0; - } - else - { - /* - * Domain was running in L1 extraq => score is inverse of - * utilization and is used somewhat incremental! - */ - if ( !inf->extraweight ) - { - /* NB: use fixed point arithmetic with 10 bits */ - inf->score[EXTRA_UTIL_Q] = (inf->period << 10) / - inf->slice; - } - else - { - /* - * Conversion between realtime utilisation and extrawieght: - * full (ie 100%) utilization is equivalent to 128 extraweight - */ - inf->score[EXTRA_UTIL_Q] = (1<<17) / inf->extraweight; - } - } - - check_extra_queues: - /* Adding a runnable domain to the right queue and removing blocked ones */ - if ( sedf_runnable(d) ) - { - /* Add according to score: weighted round robin */ - if (((inf->status & EXTRA_AWARE) && (i == EXTRA_UTIL_Q)) || - ((inf->status & EXTRA_WANT_PEN_Q) && (i == EXTRA_PEN_Q))) - extraq_add_sort_update(d, i, oldscore); - } - else - { - /* Remove this blocked domain from the waitq! */ - __del_from_queue(d); - /* Make sure that we remove a blocked domain from the other - * extraq too. */ - if ( i == EXTRA_PEN_Q ) - { - if ( extraq_on(d, EXTRA_UTIL_Q) ) - extraq_del(d, EXTRA_UTIL_Q); - } - else - { - if ( extraq_on(d, EXTRA_PEN_Q) ) - extraq_del(d, EXTRA_PEN_Q); - } - } - - ASSERT(EQ(sedf_runnable(d), __task_on_queue(d))); - ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q), - sedf_runnable(d))); -} - - -static struct task_slice sedf_do_extra_schedule( - s_time_t now, s_time_t end_xt, struct list_head *extraq[], int cpu) -{ - struct task_slice ret = { 0 }; - struct sedf_vcpu_info *runinf; - ASSERT(end_xt > now); - - /* Enough time left to use for extratime? */ - if ( end_xt - now < EXTRA_QUANTUM ) - goto return_idle; - - if ( !list_empty(extraq[EXTRA_PEN_Q]) ) - { - /* - * We still have elements on the level 0 extraq - * => let those run first! - */ - runinf = list_entry(extraq[EXTRA_PEN_Q]->next, - struct sedf_vcpu_info, extralist[EXTRA_PEN_Q]); - runinf->status |= EXTRA_RUN_PEN; - ret.task = runinf->vcpu; - ret.time = EXTRA_QUANTUM; -#ifdef SEDF_STATS - runinf->pen_extra_slices++; -#endif - } - else - { - if ( !list_empty(extraq[EXTRA_UTIL_Q]) ) - { - /* Use elements from the normal extraqueue */ - runinf = list_entry(extraq[EXTRA_UTIL_Q]->next, - struct sedf_vcpu_info, - extralist[EXTRA_UTIL_Q]); - runinf->status |= EXTRA_RUN_UTIL; - ret.task = runinf->vcpu; - ret.time = EXTRA_QUANTUM; - } - else - goto return_idle; - } - - ASSERT(ret.time > 0); - ASSERT(sedf_runnable(ret.task)); - return ret; - - return_idle: - ret.task = IDLETASK(cpu); - ret.time = end_xt - now; - ASSERT(ret.time > 0); - ASSERT(sedf_runnable(ret.task)); - return ret; -} - - static int sedf_init(struct scheduler *ops) { struct sedf_priv_info *prv; @@ -748,8 +444,6 @@ static struct task_slice sedf_do_schedule( struct list_head *runq = RUNQ(cpu); struct list_head *waitq = WAITQ(cpu); struct sedf_vcpu_info *inf = EDOM_INFO(current); - struct list_head *extraq[] = { - EXTRAQ(cpu, EXTRA_PEN_Q), EXTRAQ(cpu, EXTRA_UTIL_Q)}; struct sedf_vcpu_info *runinf, *waitinf; struct task_slice ret; @@ -770,15 +464,7 @@ static struct task_slice sedf_do_schedule( if ( inf->status & SEDF_ASLEEP ) inf->block_abs = now; - if ( unlikely(extra_runs(inf)) ) - { - /* Special treatment of domains running in extra time */ - desched_extra_dom(now, current); - } - else - { - desched_edf_dom(now, current); - } + desched_edf_dom(now, current); check_waitq: update_queues(now, runq, waitq); @@ -820,12 +506,9 @@ static struct task_slice sedf_do_schedule( else { waitinf = list_entry(waitq->next,struct sedf_vcpu_info, list); - /* - * We could not find any suitable domain - * => look for domains that are aware of extratime - */ - ret = sedf_do_extra_schedule(now, PERIOD_BEGIN(waitinf), - extraq, cpu); + + ret.task = IDLETASK(cpu); + ret.time = PERIOD_BEGIN(waitinf) - now; } /* @@ -833,11 +516,8 @@ static struct task_slice sedf_do_schedule( * still can happen!!! */ if ( ret.time < 0) - { printk("Ouch! We are seriously BEHIND schedule! %"PRIi64"\n", ret.time); - ret.time = EXTRA_QUANTUM; - } ret.migrated = 0; @@ -848,7 +528,6 @@ static struct task_slice sedf_do_schedule( return ret; } - static void sedf_sleep(const struct scheduler *ops, struct vcpu *d) { if ( is_idle_vcpu(d) ) @@ -864,13 +543,35 @@ static void sedf_sleep(const struct scheduler *ops, struct vcpu *d) { if ( __task_on_queue(d) ) __del_from_queue(d); - if ( extraq_on(d, EXTRA_UTIL_Q) ) - extraq_del(d, EXTRA_UTIL_Q); - if ( extraq_on(d, EXTRA_PEN_Q) ) - extraq_del(d, EXTRA_PEN_Q); } } +/* + * Compares two domains in the relation of whether the one is allowed to + * interrupt the others execution. + * It returns true (!=0) if a switch to the other domain is good. + * Priority scheme is as follows: + * EDF: early deadline > late deadline + */ +static inline int should_switch(struct vcpu *cur, + struct vcpu *other, + s_time_t now) +{ + struct sedf_vcpu_info *cur_inf, *other_inf; + cur_inf = EDOM_INFO(cur); + other_inf = EDOM_INFO(other); + + /* Always interrupt idle domain. */ + if ( is_idle_vcpu(cur) ) + return 1; + + /* Check whether we need to make an earlier scheduling decision */ + if ( PERIOD_BEGIN(other_inf) < + CPU_INFO(other->processor)->current_slice_expires ) + return 1; + + return 0; +} /* * This function wakes up a domain, i.e. moves them into the waitqueue @@ -904,8 +605,6 @@ static void sedf_sleep(const struct scheduler *ops, struct vcpu *d) * * -this also doesn't disturb scheduling, but might lead to the fact, that * the domain can't finish it's workload in the period - * -in addition to that the domain can be treated prioritised when - * extratime is available * -addition: experiments have shown that this may have a HUGE impact on * performance of other domains, becaus it can lead to excessive context * switches @@ -931,10 +630,6 @@ static void sedf_sleep(const struct scheduler *ops, struct vcpu *d) * DRB______D___URRRR___D...<prev [Thread] next> * (D) <- old deadline was here * -problem: deadlines don't occur isochronous anymore - * Part 2c (Improved Atropos design) - * -when a domain unblocks it is given a very short period (=latency hint) - * and slice length scaled accordingly - * -both rise again to the original value (e.g. get doubled every period) * * 3. Unconservative (i.e. incorrect) * -to boost the performance of I/O dependent domains it would be possible @@ -944,136 +639,6 @@ static void sedf_sleep(const struct scheduler *ops, struct vcpu *d) * -either behaviour can lead to missed deadlines in other domains as * opposed to approaches 1,2a,2b */ -static void unblock_short_extra_support( - struct sedf_vcpu_info* inf, s_time_t now) -{ - /* - * This unblocking scheme tries to support the domain, by assigning it - * a priority in extratime distribution according to the loss of time - * in this slice due to blocking - */ - s_time_t pen; - - /* No more realtime execution in this period! */ - inf->deadl_abs += inf->period; - if ( likely(inf->block_abs) ) - { - /* Treat blocked time as consumed by the domain */ - /*inf->cputime += now - inf->block_abs;*/ - /* - * Penalty is time the domain would have - * had if it continued to run. - */ - pen = (inf->slice - inf->cputime); - if ( pen < 0 ) - pen = 0; - /* Accumulate all penalties over the periods */ - /*inf->short_block_lost_tot += pen;*/ - /* Set penalty to the current value */ - inf->short_block_lost_tot = pen; - /* Not sure which one is better.. but seems to work well... */ - - if ( inf->short_block_lost_tot ) - { - inf->score[0] = (inf->period << 10) / - inf->short_block_lost_tot; -#ifdef SEDF_STATS - inf->pen_extra_blocks++; -#endif - if ( extraq_on(inf->vcpu, EXTRA_PEN_Q) ) - /* Remove domain for possible resorting! */ - extraq_del(inf->vcpu, EXTRA_PEN_Q); - else - /* - * Remember that we want to be on the penalty q - * so that we can continue when we (un-)block - * in penalty-extratime - */ - inf->status |= EXTRA_WANT_PEN_Q; - - /* (re-)add domain to the penalty extraq */ - extraq_add_sort_update(inf->vcpu, EXTRA_PEN_Q, 0); - } - } - - /* Give it a fresh slice in the next period! */ - inf->cputime = 0; -} - - -static void unblock_long_cons_b(struct sedf_vcpu_info* inf,s_time_t now) -{ - /* Conservative 2b */ - - /* Treat the unblocking time as a start of a new period */ - inf->deadl_abs = now + inf->period; - inf->cputime = 0; -} - - -#define DOMAIN_EDF 1 -#define DOMAIN_EXTRA_PEN 2 -#define DOMAIN_EXTRA_UTIL 3 -#define DOMAIN_IDLE 4 -static inline int get_run_type(struct vcpu* d) -{ - struct sedf_vcpu_info* inf = EDOM_INFO(d); - if (is_idle_vcpu(d)) - return DOMAIN_IDLE; - if (inf->status & EXTRA_RUN_PEN) - return DOMAIN_EXTRA_PEN; - if (inf->status & EXTRA_RUN_UTIL) - return DOMAIN_EXTRA_UTIL; - return DOMAIN_EDF; -} - - -/* - * Compares two domains in the relation of whether the one is allowed to - * interrupt the others execution. - * It returns true (!=0) if a switch to the other domain is good. - * Current Priority scheme is as follows: - * EDF > L0 (penalty based) extra-time > - * L1 (utilization) extra-time > idle-domain - * In the same class priorities are assigned as following: - * EDF: early deadline > late deadline - * L0 extra-time: lower score > higher score - */ -static inline int should_switch(struct vcpu *cur, - struct vcpu *other, - s_time_t now) -{ - struct sedf_vcpu_info *cur_inf, *other_inf; - cur_inf = EDOM_INFO(cur); - other_inf = EDOM_INFO(other); - - /* Check whether we need to make an earlier scheduling decision */ - if ( PERIOD_BEGIN(other_inf) < - CPU_INFO(other->processor)->current_slice_expires ) - return 1; - - /* No timing-based switches need to be taken into account here */ - switch ( get_run_type(cur) ) - { - case DOMAIN_EDF: - /* Do not interrupt a running EDF domain */ - return 0; - case DOMAIN_EXTRA_PEN: - /* Check whether we also want the L0 ex-q with lower score */ - return ((other_inf->status & EXTRA_WANT_PEN_Q) && - (other_inf->score[EXTRA_PEN_Q] < - cur_inf->score[EXTRA_PEN_Q])); - case DOMAIN_EXTRA_UTIL: - /* Check whether we want the L0 extraq. Don't - * switch if both domains want L1 extraq. */ - return !!(other_inf->status & EXTRA_WANT_PEN_Q); - case DOMAIN_IDLE: - return 1; - } - - return 1; -} - static void sedf_wake(const struct scheduler *ops, struct vcpu *d) { s_time_t now = NOW(); @@ -1087,8 +652,6 @@ static void sedf_wake(const struct scheduler *ops, struct vcpu *d) ASSERT(!sedf_runnable(d)); inf->status &= ~SEDF_ASLEEP; - ASSERT(!extraq_on(d, EXTRA_UTIL_Q)); - ASSERT(!extraq_on(d, EXTRA_PEN_Q)); if ( unlikely(inf->deadl_abs == 0) ) { @@ -1100,62 +663,65 @@ static void sedf_wake(const struct scheduler *ops, struct vcpu *d) inf->block_tot++; #endif - if ( unlikely(now < PERIOD_BEGIN(inf)) ) - { - /* Unblocking in extra-time! */ - if ( inf->status & EXTRA_WANT_PEN_Q ) + if ( sedf_soft(d) ) + { + /* Apply CBS rule + * Where: + * c == Remaining server slice == (inf->slice - cpu_time) + * d == Server (vcpu) deadline == inf->deadl_abs + * r == Wake-up time of vcpu == now + * U == Server (vcpu) bandwidth == (inf->slice / inf->period) + * + * if c>=(d-r)*U ---> + * (inf->slice - cputime) >= (inf->deadl_abs - now) * inf->period + * + * If true, push deadline back by one period and refresh slice, else + * use current slice and deadline. + */ + if((inf->slice - inf->cputime) >= + ((inf->deadl_abs - now) * (inf->slice / inf->period))) { - /* - * We have a domain that wants compensation - * for block penalty and did just block in - * its compensation time. Give it another - * chance! - */ - extraq_add_sort_update(d, EXTRA_PEN_Q, 0); + /* Push back deadline by one period */ + inf->deadl_abs += inf->period; + inf->cputime = 0; } - extraq_check_add_unblocked(d, 0); - } - else - { + + /* In CBS we don't care if the period has begun, + * the task doesn't have to wait for its period + * because it'll never request more than its slice + * for any given period. + */ + __add_to_runqueue_sort(d); + } + else { + /* Task is a hard task, treat accordingly */ +#ifdef SEDF_STATS if ( now < inf->deadl_abs ) { /* Short blocking */ -#ifdef SEDF_STATS inf->short_block_tot++; -#endif - unblock_short_extra_support(inf, now); - - extraq_check_add_unblocked(d, 1); } else { - /* Long unblocking */ -#ifdef SEDF_STATS + /* Long unblocking, someone is going to miss their deadline. */ inf->long_block_tot++; + } #endif - unblock_long_cons_b(inf, now); - extraq_check_add_unblocked(d, 1); - } + if ( PERIOD_BEGIN(inf) > now ) + __add_to_waitqueue_sort(d); + else + __add_to_runqueue_sort(d); } - - if ( PERIOD_BEGIN(inf) > now ) - __add_to_waitqueue_sort(d); - else - __add_to_runqueue_sort(d); #ifdef SEDF_STATS /* Do some statistics here... */ if ( inf->block_abs != 0 ) { inf->block_time_tot += now - inf->block_abs; - inf->penalty_time_tot += - PERIOD_BEGIN(inf) + inf->cputime - inf->block_abs; } #endif - /* Sanity check: make sure each extra-aware domain IS on the util-q! */ - ASSERT(IMPLY(inf->status & EXTRA_AWARE, extraq_on(d, EXTRA_UTIL_Q))); ASSERT(__task_on_queue(d)); /* * Check whether the awakened task needs to invoke the do_schedule @@ -1170,35 +736,27 @@ static void sedf_wake(const struct scheduler *ops, struct vcpu *d) cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ); } - /* Print a lot of useful information about a domains in the system */ static void sedf_dump_domain(struct vcpu *d) { printk("%i.%i has=%c ", d->domain->domain_id, d->vcpu_id, d->is_running ? 'T':'F'); - printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu" - " sc=%i xtr(%s)=%"PRIu64" ew=%hu", - EDOM_INFO(d)->period, EDOM_INFO(d)->slice, EDOM_INFO(d)->deadl_abs, - EDOM_INFO(d)->weight, - EDOM_INFO(d)->score[EXTRA_UTIL_Q], - (EDOM_INFO(d)->status & EXTRA_AWARE) ? "yes" : "no", - EDOM_INFO(d)->extra_time_tot, EDOM_INFO(d)->extraweight); + printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64, + EDOM_INFO(d)->period, EDOM_INFO(d)->slice, EDOM_INFO(d)->deadl_abs); #ifdef SEDF_STATS - if ( EDOM_INFO(d)->block_time_tot != 0 ) - printk(" pen=%"PRIu64"%%", (EDOM_INFO(d)->penalty_time_tot * 100) / - EDOM_INFO(d)->block_time_tot); + printk(" m=%u mt=%"PRIu64"o=%u ot=%"PRIu64, + EDOM_INFO(d)->miss_tot, EDOM_INFO(d)->miss_time, + EDOM_INFO(d)->over_tot, EDOM_INFO(d)->over_time); + if ( EDOM_INFO(d)->block_tot != 0 ) - printk("\n blks=%u sh=%u (%u%%) (shex=%i "\ - "shexsl=%i) l=%u (%u%%) avg: b=%"PRIu64" p=%"PRIu64"", + printk("\n blks=%u sh=%u (%u%%) "\ + "l=%u (%u%%) avg: b=%"PRIu64, EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_block_tot, (EDOM_INFO(d)->short_block_tot * 100) / EDOM_INFO(d)->block_tot, - EDOM_INFO(d)->pen_extra_blocks, - EDOM_INFO(d)->pen_extra_slices, EDOM_INFO(d)->long_block_tot, (EDOM_INFO(d)->long_block_tot * 100) / EDOM_INFO(d)->block_tot, - (EDOM_INFO(d)->block_time_tot) / EDOM_INFO(d)->block_tot, - (EDOM_INFO(d)->penalty_time_tot) / EDOM_INFO(d)->block_tot); + (EDOM_INFO(d)->block_time_tot) / EDOM_INFO(d)->block_tot); #endif printk("\n"); } @@ -1234,30 +792,6 @@ static void sedf_dump_cpu_state(const struct scheduler *ops, int i) sedf_dump_domain(d_inf->vcpu); } - queue = EXTRAQ(i,EXTRA_PEN_Q); loop = 0; - printk("\nEXTRAQ (penalty) rq %lx n: %lx, p: %lx\n", - (unsigned long)queue, (unsigned long) queue->next, - (unsigned long) queue->prev); - list_for_each_safe ( list, tmp, queue ) - { - d_inf = list_entry(list, struct sedf_vcpu_info, - extralist[EXTRA_PEN_Q]); - printk("%3d: ",loop++); - sedf_dump_domain(d_inf->vcpu); - } - - queue = EXTRAQ(i,EXTRA_UTIL_Q); loop = 0; - printk("\nEXTRAQ (utilization) rq %lx n: %lx, p: %lx\n", - (unsigned long)queue, (unsigned long) queue->next, - (unsigned long) queue->prev); - list_for_each_safe ( list, tmp, queue ) - { - d_inf = list_entry(list, struct sedf_vcpu_info, - extralist[EXTRA_UTIL_Q]); - printk("%3d: ",loop++); - sedf_dump_domain(d_inf->vcpu); - } - loop = 0; printk("\nnot on Q\n"); @@ -1279,199 +813,69 @@ static void sedf_dump_cpu_state(const struct scheduler *ops, int i) } -/* Adjusts periods and slices of the domains accordingly to their weights */ -static int sedf_adjust_weights(struct cpupool *c, int nr_cpus, int *sumw, s_time_t *sumt) -{ - struct vcpu *p; - struct domain *d; - unsigned int cpu; - - /* - * Sum across all weights. Notice that no runq locking is needed - * here: the caller holds sedf_priv_info.lock and we're not changing - * anything that is accessed during scheduling. - */ - rcu_read_lock(&domlist_read_lock); - for_each_domain_in_cpupool( d, c ) - { - for_each_vcpu( d, p ) - { - if ( (cpu = p->processor) >= nr_cpus ) - continue; - - if ( EDOM_INFO(p)->weight ) - { - sumw[cpu] += EDOM_INFO(p)->weight; - } - else - { - /* - * Don't modify domains who don't have a weight, but sum - * up the time they need, projected to a WEIGHT_PERIOD, - * so that this time is not given to the weight-driven - * domains - */ - - /* Check for overflows */ - ASSERT((WEIGHT_PERIOD < ULONG_MAX) - && (EDOM_INFO(p)->slice_orig < ULONG_MAX)); - sumt[cpu] += - (WEIGHT_PERIOD * EDOM_INFO(p)->slice_orig) / - EDOM_INFO(p)->period_orig; - } - } - } - rcu_read_unlock(&domlist_read_lock); - - /* - * Adjust all slices (and periods) to the new weight. Unlike above, we - * need to take thr runq lock for the various VCPUs: we're modyfing - * slice and period which are referenced during scheduling. - */ - rcu_read_lock(&domlist_read_lock); - for_each_domain_in_cpupool( d, c ) - { - for_each_vcpu ( d, p ) - { - if ( (cpu = p->processor) >= nr_cpus ) - continue; - if ( EDOM_INFO(p)->weight ) - { - /* Interrupts already off */ - spinlock_t *lock = vcpu_schedule_lock(p); - - EDOM_INFO(p)->period_orig = - EDOM_INFO(p)->period = WEIGHT_PERIOD; - EDOM_INFO(p)->slice_orig = - EDOM_INFO(p)->slice = - (EDOM_INFO(p)->weight * - (WEIGHT_PERIOD - WEIGHT_SAFETY - sumt[cpu])) / sumw[cpu]; - - vcpu_schedule_unlock(lock, p); - } - } - } - rcu_read_unlock(&domlist_read_lock); - - return 0; -} - - /* Set or fetch domain scheduling parameters */ static int sedf_adjust(const struct scheduler *ops, struct domain *p, struct xen_domctl_scheduler_op *op) { struct sedf_priv_info *prv = SEDF_PRIV(ops); unsigned long flags; - unsigned int nr_cpus = cpumask_last(&cpu_online_map) + 1; - int *sumw = xzalloc_array(int, nr_cpus); - s_time_t *sumt = xzalloc_array(s_time_t, nr_cpus); + s_time_t now = NOW(); struct vcpu *v; int rc = 0; /* * Serialize against the pluggable scheduler lock to protect from * concurrent updates. We need to take the runq lock for the VCPUs - * as well, since we are touching extraweight, weight, slice and - * period. As in sched_credit2.c, runq locks nest inside the - * pluggable scheduler lock. + * as well, since we are touching slice and period. + * + * As in sched_credit2.c, runq locks nest inside the pluggable scheduler + * lock. */ spin_lock_irqsave(&prv->lock, flags); if ( op->cmd == XEN_DOMCTL_SCHEDOP_putinfo ) { - /* - * These are used in sedf_adjust_weights() but have to be allocated in - * this function, as we need to avoid nesting xmem_pool_alloc's lock - * within our prv->lock. - */ - if ( !sumw || !sumt ) + /* Check for sane parameters */ + if ( !op->u.sedf.period ) { - /* Check for errors here, the _getinfo branch doesn't care */ - rc = -ENOMEM; + printk("Period Not set"); + rc = -EINVAL; goto out; } - /* Check for sane parameters */ - if ( !op->u.sedf.period && !op->u.sedf.weight ) + /* + * Sanity checking + */ + if ( (op->u.sedf.period > PERIOD_MAX) || + (op->u.sedf.period < PERIOD_MIN) || + (op->u.sedf.slice > op->u.sedf.period) || + (op->u.sedf.slice < SLICE_MIN) ) { + printk("Insane Parameters: period: %lu\tbudget: %lu\n", op->u.sedf.period, op->u.sedf.slice); rc = -EINVAL; goto out; } - if ( op->u.sedf.weight ) + /* Time-driven domains */ + for_each_vcpu ( p, v ) { - if ( (op->u.sedf.extratime & EXTRA_AWARE) && - (!op->u.sedf.period) ) + spinlock_t *lock = vcpu_schedule_lock(v); + + EDOM_INFO(v)->period = op->u.sedf.period; + EDOM_INFO(v)->slice = op->u.sedf.slice; + if(op->u.sedf.soft) { - /* Weight-driven domains with extratime only */ - for_each_vcpu ( p, v ) - { - /* (Here and everywhere in the following) IRQs are already off, - * hence vcpu_spin_lock() is the one. */ - spinlock_t *lock = vcpu_schedule_lock(v); - - EDOM_INFO(v)->extraweight = op->u.sedf.weight; - EDOM_INFO(v)->weight = 0; - EDOM_INFO(v)->slice = 0; - EDOM_INFO(v)->period = WEIGHT_PERIOD; - vcpu_schedule_unlock(lock, v); - } + EDOM_INFO(v)->status |= SEDF_SOFT_TASK; } else { - /* Weight-driven domains with real-time execution */ - for_each_vcpu ( p, v ) + /* Correct deadline when switching from a soft to hard vcpu */ + if( unlikely((EDOM_INFO(v)->deadl_abs - now) >= (EDOM_INFO(v)->period * 3)) ) { - spinlock_t *lock = vcpu_schedule_lock(v); - - EDOM_INFO(v)->weight = op->u.sedf.weight; - vcpu_schedule_unlock(lock, v); + EDOM_INFO(v)->deadl_abs = (now - EDOM_INFO(v)->cputime) + (2 * EDOM_INFO(v)->period); } + + EDOM_INFO(v)->status &= (~SEDF_SOFT_TASK); } - } - else - { - /* - * Sanity checking: note that disabling extra weight requires - * that we set a non-zero slice. - */ - if ( (op->u.sedf.period > PERIOD_MAX) || - (op->u.sedf.period < PERIOD_MIN) || - (op->u.sedf.slice > op->u.sedf.period) || - (op->u.sedf.slice < SLICE_MIN) ) - { - rc = -EINVAL; - goto out; - } - - /* Time-driven domains */ - for_each_vcpu ( p, v ) - { - spinlock_t *lock = vcpu_schedule_lock(v); - - EDOM_INFO(v)->weight = 0; - EDOM_INFO(v)->extraweight = 0; - EDOM_INFO(v)->period_orig = - EDOM_INFO(v)->period = op->u.sedf.period; - EDOM_INFO(v)->slice_orig = - EDOM_INFO(v)->slice = op->u.sedf.slice; - vcpu_schedule_unlock(lock, v); - } - } - - rc = sedf_adjust_weights(p->cpupool, nr_cpus, sumw, sumt); - if ( rc ) - goto out; - - for_each_vcpu ( p, v ) - { - spinlock_t *lock = vcpu_schedule_lock(v); - - EDOM_INFO(v)->status = - (EDOM_INFO(v)->status & - ~EXTRA_AWARE) | (op->u.sedf.extratime & EXTRA_AWARE); - EDOM_INFO(v)->latency = op->u.sedf.latency; - extraq_check(v); vcpu_schedule_unlock(lock, v); } } @@ -1485,17 +889,12 @@ static int sedf_adjust(const struct scheduler *ops, struct domain *p, struct xen op->u.sedf.period = EDOM_INFO(p->vcpu[0])->period; op->u.sedf.slice = EDOM_INFO(p->vcpu[0])->slice; - op->u.sedf.extratime = EDOM_INFO(p->vcpu[0])->status & EXTRA_AWARE; - op->u.sedf.latency = EDOM_INFO(p->vcpu[0])->latency; - op->u.sedf.weight = EDOM_INFO(p->vcpu[0])->weight; + op->u.sedf.soft = sedf_soft(p->vcpu[0]); } out: spin_unlock_irqrestore(&prv->lock, flags); - xfree(sumt); - xfree(sumw); - return rc; } -- 1.7.9.5 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.