|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC PATCH 1/4] Implement cbs algorithm, remove extra queues, latency scaling, and weight support from sedf
---
xen/common/sched_sedf.c | 947 +++++++++--------------------------------------
1 file changed, 173 insertions(+), 774 deletions(-)
diff --git a/xen/common/sched_sedf.c b/xen/common/sched_sedf.c
index 0c9011a..2ee4538 100644
--- a/xen/common/sched_sedf.c
+++ b/xen/common/sched_sedf.c
@@ -25,24 +25,16 @@
#define CHECK(_p) ((void)0)
#endif
-#define EXTRA_NONE (0)
-#define EXTRA_AWARE (1)
-#define EXTRA_RUN_PEN (2)
-#define EXTRA_RUN_UTIL (4)
-#define EXTRA_WANT_PEN_Q (8)
-#define EXTRA_PEN_Q (0)
-#define EXTRA_UTIL_Q (1)
+#define SEDF_SOFT_TASK (1)
#define SEDF_ASLEEP (16)
-#define EXTRA_QUANTUM (MICROSECS(500))
-#define WEIGHT_PERIOD (MILLISECS(100))
-#define WEIGHT_SAFETY (MILLISECS(5))
+#define DEFAULT_PERIOD (MILLISECS(20))
+#define DEFAULT_SLICE (MILLISECS(10))
#define PERIOD_MAX MILLISECS(10000) /* 10s */
#define PERIOD_MIN (MICROSECS(10)) /* 10us */
#define SLICE_MIN (MICROSECS(5)) /* 5us */
-#define IMPLY(a, b) (!(a) || (b))
#define EQ(a, b) ((!!(a)) == (!!(b)))
@@ -58,24 +50,14 @@ struct sedf_priv_info {
struct sedf_vcpu_info {
struct vcpu *vcpu;
struct list_head list;
- struct list_head extralist[2];
/* Parameters for EDF */
s_time_t period; /* = relative deadline */
s_time_t slice; /* = worst case execution time */
-
- /* Advaced Parameters */
+ /* Note: Server bandwidth = (slice / period) */
- /* Latency Scaling */
- s_time_t period_orig;
- s_time_t slice_orig;
- s_time_t latency;
-
/* Status of domain */
int status;
- /* Weights for "Scheduling for beginners/ lazy/ etc." ;) */
- short weight;
- short extraweight;
/* Bookkeeping */
s_time_t deadl_abs;
s_time_t sched_start_abs;
@@ -84,28 +66,21 @@ struct sedf_vcpu_info {
s_time_t block_abs;
s_time_t unblock_abs;
- /* Scores for {util, block penalty}-weighted extratime distribution */
- int score[2];
- s_time_t short_block_lost_tot;
-
- /* Statistics */
- s_time_t extra_time_tot;
-
#ifdef SEDF_STATS
s_time_t block_time_tot;
- s_time_t penalty_time_tot;
int block_tot;
int short_block_tot;
int long_block_tot;
- int pen_extra_blocks;
- int pen_extra_slices;
+ s_time_t miss_time;
+ s_time_t over_time;
+ int miss_tot;
+ int over_tot;
#endif
};
struct sedf_cpu_info {
struct list_head runnableq;
struct list_head waitq;
- struct list_head extraq[2];
s_time_t current_slice_expires;
};
@@ -115,102 +90,20 @@ struct sedf_cpu_info {
#define CPU_INFO(cpu) \
((struct sedf_cpu_info *)per_cpu(schedule_data, cpu).sched_priv)
#define LIST(d) (&EDOM_INFO(d)->list)
-#define EXTRALIST(d,i) (&(EDOM_INFO(d)->extralist[i]))
#define RUNQ(cpu) (&CPU_INFO(cpu)->runnableq)
#define WAITQ(cpu) (&CPU_INFO(cpu)->waitq)
-#define EXTRAQ(cpu,i) (&(CPU_INFO(cpu)->extraq[i]))
#define IDLETASK(cpu) (idle_vcpu[cpu])
#define PERIOD_BEGIN(inf) ((inf)->deadl_abs - (inf)->period)
#define DIV_UP(x,y) (((x) + (y) - 1) / y)
-#define extra_runs(inf) ((inf->status) & 6)
-#define extra_get_cur_q(inf) (((inf->status & 6) >> 1)-1)
#define sedf_runnable(edom) (!(EDOM_INFO(edom)->status & SEDF_ASLEEP))
+#define sedf_soft(edom) (EDOM_INFO(edom)->status & SEDF_SOFT_TASK)
-static void sedf_dump_cpu_state(const struct scheduler *ops, int i);
-
-static inline int extraq_on(struct vcpu *d, int i)
-{
- return ((EXTRALIST(d,i)->next != NULL) &&
- (EXTRALIST(d,i)->next != EXTRALIST(d,i)));
-}
-
-static inline void extraq_del(struct vcpu *d, int i)
-{
- struct list_head *list = EXTRALIST(d,i);
- ASSERT(extraq_on(d,i));
- list_del(list);
- list->next = NULL;
- ASSERT(!extraq_on(d, i));
-}
-
-/*
- * Adds a domain to the queue of processes which are aware of extra time. List
- * is sorted by score, where a lower score means higher priority for an extra
- * slice. It also updates the score, by simply subtracting a fixed value from
- * each entry, in order to avoid overflow. The algorithm works by simply
- * charging each domain that recieved extratime with an inverse of its weight.
- */
-static inline void extraq_add_sort_update(struct vcpu *d, int i, int sub)
-{
- struct list_head *cur;
- struct sedf_vcpu_info *curinf;
-
- ASSERT(!extraq_on(d,i));
-
- /*
- * Iterate through all elements to find our "hole" and on our way
- * update all the other scores.
- */
- list_for_each ( cur, EXTRAQ(d->processor, i) )
- {
- curinf = list_entry(cur,struct sedf_vcpu_info,extralist[i]);
- curinf->score[i] -= sub;
- if ( EDOM_INFO(d)->score[i] < curinf->score[i] )
- break;
- }
-
- /* cur now contains the element, before which we'll enqueue */
- list_add(EXTRALIST(d,i),cur->prev);
-
- /* Continue updating the extraq */
- if ( (cur != EXTRAQ(d->processor,i)) && sub )
- {
- for ( cur = cur->next; cur != EXTRAQ(d->processor,i); cur = cur->next )
- {
- curinf = list_entry(cur,struct sedf_vcpu_info, extralist[i]);
- curinf->score[i] -= sub;
- }
- }
-
- ASSERT(extraq_on(d,i));
-}
-static inline void extraq_check(struct vcpu *d)
-{
- if ( extraq_on(d, EXTRA_UTIL_Q) )
- {
- if ( !(EDOM_INFO(d)->status & EXTRA_AWARE) &&
- !extra_runs(EDOM_INFO(d)) )
- extraq_del(d, EXTRA_UTIL_Q);
- }
- else
- {
- if ( (EDOM_INFO(d)->status & EXTRA_AWARE) && sedf_runnable(d) )
- extraq_add_sort_update(d, EXTRA_UTIL_Q, 0);
- }
-}
-
-static inline void extraq_check_add_unblocked(struct vcpu *d, int priority)
-{
- struct sedf_vcpu_info *inf = EDOM_INFO(d);
- if ( inf->status & EXTRA_AWARE )
- /* Put on the weighted extraq without updating any scores */
- extraq_add_sort_update(d, EXTRA_UTIL_Q, 0);
-}
+static void sedf_dump_cpu_state(const struct scheduler *ops, int i);
static inline int __task_on_queue(struct vcpu *d)
{
@@ -284,11 +177,7 @@ static inline void __add_to_runqueue_sort(struct vcpu *v)
static void sedf_insert_vcpu(const struct scheduler *ops, struct vcpu *v)
{
- if ( !is_idle_vcpu(v) )
- {
- extraq_check(v);
- }
- else
+ if ( is_idle_vcpu(v) )
{
EDOM_INFO(v)->deadl_abs = 0;
EDOM_INFO(v)->status &= ~SEDF_ASLEEP;
@@ -305,19 +194,23 @@ static void *sedf_alloc_vdata(const struct scheduler
*ops, struct vcpu *v, void
inf->vcpu = v;
- /* Every VCPU gets an equal share of extratime by default */
- inf->deadl_abs = 0;
- inf->latency = 0;
- inf->status = EXTRA_AWARE | SEDF_ASLEEP;
- inf->extraweight = 1;
- /* Upon creation all domain are best-effort */
- inf->period = WEIGHT_PERIOD;
- inf->slice = 0;
+ inf->deadl_abs = 0;
+ inf->cputime = 0;
+ inf->status = SEDF_ASLEEP;
+
+ if (v->domain->domain_id == 0)
+ {
+ /* Domain 0, needs a slice to boot the machine */
+ inf->period = DEFAULT_PERIOD;
+ inf->slice = DEFAULT_SLICE;
+ }
+ else
+ {
+ inf->period = DEFAULT_PERIOD;
+ inf->slice = 0;
+ }
- inf->period_orig = inf->period; inf->slice_orig = inf->slice;
INIT_LIST_HEAD(&(inf->list));
- INIT_LIST_HEAD(&(inf->extralist[EXTRA_PEN_Q]));
- INIT_LIST_HEAD(&(inf->extralist[EXTRA_UTIL_Q]));
SCHED_STAT_CRANK(vcpu_init);
@@ -333,8 +226,6 @@ sedf_alloc_pdata(const struct scheduler *ops, int cpu)
BUG_ON(spc == NULL);
INIT_LIST_HEAD(&spc->waitq);
INIT_LIST_HEAD(&spc->runnableq);
- INIT_LIST_HEAD(&spc->extraq[EXTRA_PEN_Q]);
- INIT_LIST_HEAD(&spc->extraq[EXTRA_UTIL_Q]);
return (void *)spc;
}
@@ -410,49 +301,59 @@ static void desched_edf_dom(s_time_t now, struct vcpu* d)
__del_from_queue(d);
- /*
- * Manage bookkeeping (i.e. calculate next deadline, memorise
- * overrun-time of slice) of finished domains.
- */
+#ifdef SEDF_STATS
+ /* Manage deadline misses */
+ if ( unlikely(inf->deadl_abs < now) )
+ {
+ inf->miss_tot++;
+ inf->miss_time += inf->cputime;
+ }
+#endif
+
+ /* Manage overruns */
if ( inf->cputime >= inf->slice )
{
inf->cputime -= inf->slice;
-
- if ( inf->period < inf->period_orig )
- {
- /* This domain runs in latency scaling or burst mode */
- inf->period *= 2;
- inf->slice *= 2;
- if ( (inf->period > inf->period_orig) ||
- (inf->slice > inf->slice_orig) )
- {
- /* Reset slice and period */
- inf->period = inf->period_orig;
- inf->slice = inf->slice_orig;
- }
- }
/* Set next deadline */
inf->deadl_abs += inf->period;
+
+ /* Ensure that the cputime is always less than slice */
+ if ( unlikely(inf->cputime > inf->slice) )
+ {
+#ifdef SEDF_STATS
+ inf->over_tot++;
+ inf->over_time += inf->cputime;
+#endif
+
+ /* Make up for the overage by pushing the deadline
+ into the future */
+ inf->deadl_abs += ((inf->cputime / inf->slice)
+ * inf->period) * 2;
+ inf->cputime -= (inf->cputime / inf->slice) * inf->slice;
+ }
+
+ /* Ensure that the start of the next period is in the future */
+ if ( unlikely(PERIOD_BEGIN(inf) < now) )
+ inf->deadl_abs +=
+ (DIV_UP(now - PERIOD_BEGIN(inf),
+ inf->period)) * inf->period;
}
/* Add a runnable domain to the waitqueue */
if ( sedf_runnable(d) )
{
- __add_to_waitqueue_sort(d);
- }
- else
- {
- /* We have a blocked realtime task -> remove it from exqs too */
- if ( extraq_on(d, EXTRA_PEN_Q) )
- extraq_del(d, EXTRA_PEN_Q);
- if ( extraq_on(d, EXTRA_UTIL_Q) )
- extraq_del(d, EXTRA_UTIL_Q);
+ if( sedf_soft(d) )
+ {
+ __add_to_runqueue_sort(d);
+ }
+ else
+ {
+ __add_to_waitqueue_sort(d);
+ }
}
-
+
ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
- ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q),
- sedf_runnable(d)));
}
@@ -498,217 +399,12 @@ static void update_queues(
/* Put them back into the queue */
__add_to_waitqueue_sort(curinf->vcpu);
}
- else if ( unlikely((curinf->deadl_abs < now) ||
- (curinf->cputime > curinf->slice)) )
- {
- /*
- * We missed the deadline or the slice was already finished.
- * Might hapen because of dom_adj.
- */
- printk("\tDomain %i.%i exceeded it's deadline/"
- "slice (%"PRIu64" / %"PRIu64") now: %"PRIu64
- " cputime: %"PRIu64"\n",
- curinf->vcpu->domain->domain_id,
- curinf->vcpu->vcpu_id,
- curinf->deadl_abs, curinf->slice, now,
- curinf->cputime);
- __del_from_queue(curinf->vcpu);
-
- /* Common case: we miss one period */
- curinf->deadl_abs += curinf->period;
-
- /*
- * If we are still behind: modulo arithmetic, force deadline
- * to be in future and aligned to period borders.
- */
- if ( unlikely(curinf->deadl_abs < now) )
- curinf->deadl_abs +=
- DIV_UP(now - curinf->deadl_abs,
- curinf->period) * curinf->period;
- ASSERT(curinf->deadl_abs >= now);
-
- /* Give a fresh slice */
- curinf->cputime = 0;
- if ( PERIOD_BEGIN(curinf) > now )
- __add_to_waitqueue_sort(curinf->vcpu);
- else
- __add_to_runqueue_sort(curinf->vcpu);
- }
else
break;
}
}
-/*
- * removes a domain from the head of the according extraQ and
- * requeues it at a specified position:
- * round-robin extratime: end of extraQ
- * weighted ext.: insert in sorted list by score
- * if the domain is blocked / has regained its short-block-loss
- * time it is not put on any queue.
- */
-static void desched_extra_dom(s_time_t now, struct vcpu *d)
-{
- struct sedf_vcpu_info *inf = EDOM_INFO(d);
- int i = extra_get_cur_q(inf);
- unsigned long oldscore;
-
- ASSERT(extraq_on(d, i));
-
- /* Unset all running flags */
- inf->status &= ~(EXTRA_RUN_PEN | EXTRA_RUN_UTIL);
- /* Fresh slice for the next run */
- inf->cputime = 0;
- /* Accumulate total extratime */
- inf->extra_time_tot += now - inf->sched_start_abs;
- /* Remove extradomain from head of the queue. */
- extraq_del(d, i);
-
- /* Update the score */
- oldscore = inf->score[i];
- if ( i == EXTRA_PEN_Q )
- {
- /* Domain was running in L0 extraq */
- /* reduce block lost, probably more sophistication here!*/
- /*inf->short_block_lost_tot -= EXTRA_QUANTUM;*/
- inf->short_block_lost_tot -= now - inf->sched_start_abs;
-#if 0
- /* KAF: If we don't exit short-blocking state at this point
- * domain0 can steal all CPU for up to 10 seconds before
- * scheduling settles down (when competing against another
- * CPU-bound domain). Doing this seems to make things behave
- * nicely. Noone gets starved by default.
- */
- if ( inf->short_block_lost_tot <= 0 )
-#endif
- {
- /* We have (over-)compensated our block penalty */
- inf->short_block_lost_tot = 0;
- /* We don't want a place on the penalty queue anymore! */
- inf->status &= ~EXTRA_WANT_PEN_Q;
- goto check_extra_queues;
- }
-
- /*
- * We have to go again for another try in the block-extraq,
- * the score is not used incremantally here, as this is
- * already done by recalculating the block_lost
- */
- inf->score[EXTRA_PEN_Q] = (inf->period << 10) /
- inf->short_block_lost_tot;
- oldscore = 0;
- }
- else
- {
- /*
- * Domain was running in L1 extraq => score is inverse of
- * utilization and is used somewhat incremental!
- */
- if ( !inf->extraweight )
- {
- /* NB: use fixed point arithmetic with 10 bits */
- inf->score[EXTRA_UTIL_Q] = (inf->period << 10) /
- inf->slice;
- }
- else
- {
- /*
- * Conversion between realtime utilisation and extrawieght:
- * full (ie 100%) utilization is equivalent to 128 extraweight
- */
- inf->score[EXTRA_UTIL_Q] = (1<<17) / inf->extraweight;
- }
- }
-
- check_extra_queues:
- /* Adding a runnable domain to the right queue and removing blocked ones */
- if ( sedf_runnable(d) )
- {
- /* Add according to score: weighted round robin */
- if (((inf->status & EXTRA_AWARE) && (i == EXTRA_UTIL_Q)) ||
- ((inf->status & EXTRA_WANT_PEN_Q) && (i == EXTRA_PEN_Q)))
- extraq_add_sort_update(d, i, oldscore);
- }
- else
- {
- /* Remove this blocked domain from the waitq! */
- __del_from_queue(d);
- /* Make sure that we remove a blocked domain from the other
- * extraq too. */
- if ( i == EXTRA_PEN_Q )
- {
- if ( extraq_on(d, EXTRA_UTIL_Q) )
- extraq_del(d, EXTRA_UTIL_Q);
- }
- else
- {
- if ( extraq_on(d, EXTRA_PEN_Q) )
- extraq_del(d, EXTRA_PEN_Q);
- }
- }
-
- ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
- ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q),
- sedf_runnable(d)));
-}
-
-
-static struct task_slice sedf_do_extra_schedule(
- s_time_t now, s_time_t end_xt, struct list_head *extraq[], int cpu)
-{
- struct task_slice ret = { 0 };
- struct sedf_vcpu_info *runinf;
- ASSERT(end_xt > now);
-
- /* Enough time left to use for extratime? */
- if ( end_xt - now < EXTRA_QUANTUM )
- goto return_idle;
-
- if ( !list_empty(extraq[EXTRA_PEN_Q]) )
- {
- /*
- * We still have elements on the level 0 extraq
- * => let those run first!
- */
- runinf = list_entry(extraq[EXTRA_PEN_Q]->next,
- struct sedf_vcpu_info, extralist[EXTRA_PEN_Q]);
- runinf->status |= EXTRA_RUN_PEN;
- ret.task = runinf->vcpu;
- ret.time = EXTRA_QUANTUM;
-#ifdef SEDF_STATS
- runinf->pen_extra_slices++;
-#endif
- }
- else
- {
- if ( !list_empty(extraq[EXTRA_UTIL_Q]) )
- {
- /* Use elements from the normal extraqueue */
- runinf = list_entry(extraq[EXTRA_UTIL_Q]->next,
- struct sedf_vcpu_info,
- extralist[EXTRA_UTIL_Q]);
- runinf->status |= EXTRA_RUN_UTIL;
- ret.task = runinf->vcpu;
- ret.time = EXTRA_QUANTUM;
- }
- else
- goto return_idle;
- }
-
- ASSERT(ret.time > 0);
- ASSERT(sedf_runnable(ret.task));
- return ret;
-
- return_idle:
- ret.task = IDLETASK(cpu);
- ret.time = end_xt - now;
- ASSERT(ret.time > 0);
- ASSERT(sedf_runnable(ret.task));
- return ret;
-}
-
-
static int sedf_init(struct scheduler *ops)
{
struct sedf_priv_info *prv;
@@ -748,8 +444,6 @@ static struct task_slice sedf_do_schedule(
struct list_head *runq = RUNQ(cpu);
struct list_head *waitq = WAITQ(cpu);
struct sedf_vcpu_info *inf = EDOM_INFO(current);
- struct list_head *extraq[] = {
- EXTRAQ(cpu, EXTRA_PEN_Q), EXTRAQ(cpu, EXTRA_UTIL_Q)};
struct sedf_vcpu_info *runinf, *waitinf;
struct task_slice ret;
@@ -770,15 +464,7 @@ static struct task_slice sedf_do_schedule(
if ( inf->status & SEDF_ASLEEP )
inf->block_abs = now;
- if ( unlikely(extra_runs(inf)) )
- {
- /* Special treatment of domains running in extra time */
- desched_extra_dom(now, current);
- }
- else
- {
- desched_edf_dom(now, current);
- }
+ desched_edf_dom(now, current);
check_waitq:
update_queues(now, runq, waitq);
@@ -820,12 +506,9 @@ static struct task_slice sedf_do_schedule(
else
{
waitinf = list_entry(waitq->next,struct sedf_vcpu_info, list);
- /*
- * We could not find any suitable domain
- * => look for domains that are aware of extratime
- */
- ret = sedf_do_extra_schedule(now, PERIOD_BEGIN(waitinf),
- extraq, cpu);
+
+ ret.task = IDLETASK(cpu);
+ ret.time = PERIOD_BEGIN(waitinf) - now;
}
/*
@@ -833,11 +516,8 @@ static struct task_slice sedf_do_schedule(
* still can happen!!!
*/
if ( ret.time < 0)
- {
printk("Ouch! We are seriously BEHIND schedule! %"PRIi64"\n",
ret.time);
- ret.time = EXTRA_QUANTUM;
- }
ret.migrated = 0;
@@ -848,7 +528,6 @@ static struct task_slice sedf_do_schedule(
return ret;
}
-
static void sedf_sleep(const struct scheduler *ops, struct vcpu *d)
{
if ( is_idle_vcpu(d) )
@@ -864,13 +543,35 @@ static void sedf_sleep(const struct scheduler *ops,
struct vcpu *d)
{
if ( __task_on_queue(d) )
__del_from_queue(d);
- if ( extraq_on(d, EXTRA_UTIL_Q) )
- extraq_del(d, EXTRA_UTIL_Q);
- if ( extraq_on(d, EXTRA_PEN_Q) )
- extraq_del(d, EXTRA_PEN_Q);
}
}
+/*
+ * Compares two domains in the relation of whether the one is allowed to
+ * interrupt the others execution.
+ * It returns true (!=0) if a switch to the other domain is good.
+ * Priority scheme is as follows:
+ * EDF: early deadline > late deadline
+ */
+static inline int should_switch(struct vcpu *cur,
+ struct vcpu *other,
+ s_time_t now)
+{
+ struct sedf_vcpu_info *cur_inf, *other_inf;
+ cur_inf = EDOM_INFO(cur);
+ other_inf = EDOM_INFO(other);
+
+ /* Always interrupt idle domain. */
+ if ( is_idle_vcpu(cur) )
+ return 1;
+
+ /* Check whether we need to make an earlier scheduling decision */
+ if ( PERIOD_BEGIN(other_inf) <
+ CPU_INFO(other->processor)->current_slice_expires )
+ return 1;
+
+ return 0;
+}
/*
* This function wakes up a domain, i.e. moves them into the waitqueue
@@ -904,8 +605,6 @@ static void sedf_sleep(const struct scheduler *ops, struct
vcpu *d)
*
* -this also doesn't disturb scheduling, but might lead to the fact, that
* the domain can't finish it's workload in the period
- * -in addition to that the domain can be treated prioritised when
- * extratime is available
* -addition: experiments have shown that this may have a HUGE impact on
* performance of other domains, becaus it can lead to excessive context
* switches
@@ -931,10 +630,6 @@ static void sedf_sleep(const struct scheduler *ops, struct
vcpu *d)
* DRB______D___URRRR___D...<prev [Thread] next>
* (D) <- old deadline was here
* -problem: deadlines don't occur isochronous anymore
- * Part 2c (Improved Atropos design)
- * -when a domain unblocks it is given a very short period (=latency hint)
- * and slice length scaled accordingly
- * -both rise again to the original value (e.g. get doubled every period)
*
* 3. Unconservative (i.e. incorrect)
* -to boost the performance of I/O dependent domains it would be possible
@@ -944,136 +639,6 @@ static void sedf_sleep(const struct scheduler *ops,
struct vcpu *d)
* -either behaviour can lead to missed deadlines in other domains as
* opposed to approaches 1,2a,2b
*/
-static void unblock_short_extra_support(
- struct sedf_vcpu_info* inf, s_time_t now)
-{
- /*
- * This unblocking scheme tries to support the domain, by assigning it
- * a priority in extratime distribution according to the loss of time
- * in this slice due to blocking
- */
- s_time_t pen;
-
- /* No more realtime execution in this period! */
- inf->deadl_abs += inf->period;
- if ( likely(inf->block_abs) )
- {
- /* Treat blocked time as consumed by the domain */
- /*inf->cputime += now - inf->block_abs;*/
- /*
- * Penalty is time the domain would have
- * had if it continued to run.
- */
- pen = (inf->slice - inf->cputime);
- if ( pen < 0 )
- pen = 0;
- /* Accumulate all penalties over the periods */
- /*inf->short_block_lost_tot += pen;*/
- /* Set penalty to the current value */
- inf->short_block_lost_tot = pen;
- /* Not sure which one is better.. but seems to work well... */
-
- if ( inf->short_block_lost_tot )
- {
- inf->score[0] = (inf->period << 10) /
- inf->short_block_lost_tot;
-#ifdef SEDF_STATS
- inf->pen_extra_blocks++;
-#endif
- if ( extraq_on(inf->vcpu, EXTRA_PEN_Q) )
- /* Remove domain for possible resorting! */
- extraq_del(inf->vcpu, EXTRA_PEN_Q);
- else
- /*
- * Remember that we want to be on the penalty q
- * so that we can continue when we (un-)block
- * in penalty-extratime
- */
- inf->status |= EXTRA_WANT_PEN_Q;
-
- /* (re-)add domain to the penalty extraq */
- extraq_add_sort_update(inf->vcpu, EXTRA_PEN_Q, 0);
- }
- }
-
- /* Give it a fresh slice in the next period! */
- inf->cputime = 0;
-}
-
-
-static void unblock_long_cons_b(struct sedf_vcpu_info* inf,s_time_t now)
-{
- /* Conservative 2b */
-
- /* Treat the unblocking time as a start of a new period */
- inf->deadl_abs = now + inf->period;
- inf->cputime = 0;
-}
-
-
-#define DOMAIN_EDF 1
-#define DOMAIN_EXTRA_PEN 2
-#define DOMAIN_EXTRA_UTIL 3
-#define DOMAIN_IDLE 4
-static inline int get_run_type(struct vcpu* d)
-{
- struct sedf_vcpu_info* inf = EDOM_INFO(d);
- if (is_idle_vcpu(d))
- return DOMAIN_IDLE;
- if (inf->status & EXTRA_RUN_PEN)
- return DOMAIN_EXTRA_PEN;
- if (inf->status & EXTRA_RUN_UTIL)
- return DOMAIN_EXTRA_UTIL;
- return DOMAIN_EDF;
-}
-
-
-/*
- * Compares two domains in the relation of whether the one is allowed to
- * interrupt the others execution.
- * It returns true (!=0) if a switch to the other domain is good.
- * Current Priority scheme is as follows:
- * EDF > L0 (penalty based) extra-time >
- * L1 (utilization) extra-time > idle-domain
- * In the same class priorities are assigned as following:
- * EDF: early deadline > late deadline
- * L0 extra-time: lower score > higher score
- */
-static inline int should_switch(struct vcpu *cur,
- struct vcpu *other,
- s_time_t now)
-{
- struct sedf_vcpu_info *cur_inf, *other_inf;
- cur_inf = EDOM_INFO(cur);
- other_inf = EDOM_INFO(other);
-
- /* Check whether we need to make an earlier scheduling decision */
- if ( PERIOD_BEGIN(other_inf) <
- CPU_INFO(other->processor)->current_slice_expires )
- return 1;
-
- /* No timing-based switches need to be taken into account here */
- switch ( get_run_type(cur) )
- {
- case DOMAIN_EDF:
- /* Do not interrupt a running EDF domain */
- return 0;
- case DOMAIN_EXTRA_PEN:
- /* Check whether we also want the L0 ex-q with lower score */
- return ((other_inf->status & EXTRA_WANT_PEN_Q) &&
- (other_inf->score[EXTRA_PEN_Q] <
- cur_inf->score[EXTRA_PEN_Q]));
- case DOMAIN_EXTRA_UTIL:
- /* Check whether we want the L0 extraq. Don't
- * switch if both domains want L1 extraq. */
- return !!(other_inf->status & EXTRA_WANT_PEN_Q);
- case DOMAIN_IDLE:
- return 1;
- }
-
- return 1;
-}
-
static void sedf_wake(const struct scheduler *ops, struct vcpu *d)
{
s_time_t now = NOW();
@@ -1087,8 +652,6 @@ static void sedf_wake(const struct scheduler *ops, struct
vcpu *d)
ASSERT(!sedf_runnable(d));
inf->status &= ~SEDF_ASLEEP;
- ASSERT(!extraq_on(d, EXTRA_UTIL_Q));
- ASSERT(!extraq_on(d, EXTRA_PEN_Q));
if ( unlikely(inf->deadl_abs == 0) )
{
@@ -1100,62 +663,65 @@ static void sedf_wake(const struct scheduler *ops,
struct vcpu *d)
inf->block_tot++;
#endif
- if ( unlikely(now < PERIOD_BEGIN(inf)) )
- {
- /* Unblocking in extra-time! */
- if ( inf->status & EXTRA_WANT_PEN_Q )
+ if ( sedf_soft(d) )
+ {
+ /* Apply CBS rule
+ * Where:
+ * c == Remaining server slice == (inf->slice - cpu_time)
+ * d == Server (vcpu) deadline == inf->deadl_abs
+ * r == Wake-up time of vcpu == now
+ * U == Server (vcpu) bandwidth == (inf->slice / inf->period)
+ *
+ * if c>=(d-r)*U --->
+ * (inf->slice - cputime) >= (inf->deadl_abs - now) * inf->period
+ *
+ * If true, push deadline back by one period and refresh slice, else
+ * use current slice and deadline.
+ */
+ if((inf->slice - inf->cputime) >=
+ ((inf->deadl_abs - now) * (inf->slice / inf->period)))
{
- /*
- * We have a domain that wants compensation
- * for block penalty and did just block in
- * its compensation time. Give it another
- * chance!
- */
- extraq_add_sort_update(d, EXTRA_PEN_Q, 0);
+ /* Push back deadline by one period */
+ inf->deadl_abs += inf->period;
+ inf->cputime = 0;
}
- extraq_check_add_unblocked(d, 0);
- }
- else
- {
+
+ /* In CBS we don't care if the period has begun,
+ * the task doesn't have to wait for its period
+ * because it'll never request more than its slice
+ * for any given period.
+ */
+ __add_to_runqueue_sort(d);
+ }
+ else {
+ /* Task is a hard task, treat accordingly */
+#ifdef SEDF_STATS
if ( now < inf->deadl_abs )
{
/* Short blocking */
-#ifdef SEDF_STATS
inf->short_block_tot++;
-#endif
- unblock_short_extra_support(inf, now);
-
- extraq_check_add_unblocked(d, 1);
}
else
{
- /* Long unblocking */
-#ifdef SEDF_STATS
+ /* Long unblocking, someone is going to miss their deadline. */
inf->long_block_tot++;
+ }
#endif
- unblock_long_cons_b(inf, now);
- extraq_check_add_unblocked(d, 1);
- }
+ if ( PERIOD_BEGIN(inf) > now )
+ __add_to_waitqueue_sort(d);
+ else
+ __add_to_runqueue_sort(d);
}
-
- if ( PERIOD_BEGIN(inf) > now )
- __add_to_waitqueue_sort(d);
- else
- __add_to_runqueue_sort(d);
#ifdef SEDF_STATS
/* Do some statistics here... */
if ( inf->block_abs != 0 )
{
inf->block_time_tot += now - inf->block_abs;
- inf->penalty_time_tot +=
- PERIOD_BEGIN(inf) + inf->cputime - inf->block_abs;
}
#endif
- /* Sanity check: make sure each extra-aware domain IS on the util-q! */
- ASSERT(IMPLY(inf->status & EXTRA_AWARE, extraq_on(d, EXTRA_UTIL_Q)));
ASSERT(__task_on_queue(d));
/*
* Check whether the awakened task needs to invoke the do_schedule
@@ -1170,35 +736,27 @@ static void sedf_wake(const struct scheduler *ops,
struct vcpu *d)
cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
}
-
/* Print a lot of useful information about a domains in the system */
static void sedf_dump_domain(struct vcpu *d)
{
printk("%i.%i has=%c ", d->domain->domain_id, d->vcpu_id,
d->is_running ? 'T':'F');
- printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu"
- " sc=%i xtr(%s)=%"PRIu64" ew=%hu",
- EDOM_INFO(d)->period, EDOM_INFO(d)->slice, EDOM_INFO(d)->deadl_abs,
- EDOM_INFO(d)->weight,
- EDOM_INFO(d)->score[EXTRA_UTIL_Q],
- (EDOM_INFO(d)->status & EXTRA_AWARE) ? "yes" : "no",
- EDOM_INFO(d)->extra_time_tot, EDOM_INFO(d)->extraweight);
+ printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64,
+ EDOM_INFO(d)->period, EDOM_INFO(d)->slice, EDOM_INFO(d)->deadl_abs);
#ifdef SEDF_STATS
- if ( EDOM_INFO(d)->block_time_tot != 0 )
- printk(" pen=%"PRIu64"%%", (EDOM_INFO(d)->penalty_time_tot * 100) /
- EDOM_INFO(d)->block_time_tot);
+ printk(" m=%u mt=%"PRIu64"o=%u ot=%"PRIu64,
+ EDOM_INFO(d)->miss_tot, EDOM_INFO(d)->miss_time,
+ EDOM_INFO(d)->over_tot, EDOM_INFO(d)->over_time);
+
if ( EDOM_INFO(d)->block_tot != 0 )
- printk("\n blks=%u sh=%u (%u%%) (shex=%i "\
- "shexsl=%i) l=%u (%u%%) avg: b=%"PRIu64" p=%"PRIu64"",
+ printk("\n blks=%u sh=%u (%u%%) "\
+ "l=%u (%u%%) avg: b=%"PRIu64,
EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_block_tot,
(EDOM_INFO(d)->short_block_tot * 100) / EDOM_INFO(d)->block_tot,
- EDOM_INFO(d)->pen_extra_blocks,
- EDOM_INFO(d)->pen_extra_slices,
EDOM_INFO(d)->long_block_tot,
(EDOM_INFO(d)->long_block_tot * 100) / EDOM_INFO(d)->block_tot,
- (EDOM_INFO(d)->block_time_tot) / EDOM_INFO(d)->block_tot,
- (EDOM_INFO(d)->penalty_time_tot) / EDOM_INFO(d)->block_tot);
+ (EDOM_INFO(d)->block_time_tot) / EDOM_INFO(d)->block_tot);
#endif
printk("\n");
}
@@ -1234,30 +792,6 @@ static void sedf_dump_cpu_state(const struct scheduler
*ops, int i)
sedf_dump_domain(d_inf->vcpu);
}
- queue = EXTRAQ(i,EXTRA_PEN_Q); loop = 0;
- printk("\nEXTRAQ (penalty) rq %lx n: %lx, p: %lx\n",
- (unsigned long)queue, (unsigned long) queue->next,
- (unsigned long) queue->prev);
- list_for_each_safe ( list, tmp, queue )
- {
- d_inf = list_entry(list, struct sedf_vcpu_info,
- extralist[EXTRA_PEN_Q]);
- printk("%3d: ",loop++);
- sedf_dump_domain(d_inf->vcpu);
- }
-
- queue = EXTRAQ(i,EXTRA_UTIL_Q); loop = 0;
- printk("\nEXTRAQ (utilization) rq %lx n: %lx, p: %lx\n",
- (unsigned long)queue, (unsigned long) queue->next,
- (unsigned long) queue->prev);
- list_for_each_safe ( list, tmp, queue )
- {
- d_inf = list_entry(list, struct sedf_vcpu_info,
- extralist[EXTRA_UTIL_Q]);
- printk("%3d: ",loop++);
- sedf_dump_domain(d_inf->vcpu);
- }
-
loop = 0;
printk("\nnot on Q\n");
@@ -1279,199 +813,69 @@ static void sedf_dump_cpu_state(const struct scheduler
*ops, int i)
}
-/* Adjusts periods and slices of the domains accordingly to their weights */
-static int sedf_adjust_weights(struct cpupool *c, int nr_cpus, int *sumw,
s_time_t *sumt)
-{
- struct vcpu *p;
- struct domain *d;
- unsigned int cpu;
-
- /*
- * Sum across all weights. Notice that no runq locking is needed
- * here: the caller holds sedf_priv_info.lock and we're not changing
- * anything that is accessed during scheduling.
- */
- rcu_read_lock(&domlist_read_lock);
- for_each_domain_in_cpupool( d, c )
- {
- for_each_vcpu( d, p )
- {
- if ( (cpu = p->processor) >= nr_cpus )
- continue;
-
- if ( EDOM_INFO(p)->weight )
- {
- sumw[cpu] += EDOM_INFO(p)->weight;
- }
- else
- {
- /*
- * Don't modify domains who don't have a weight, but sum
- * up the time they need, projected to a WEIGHT_PERIOD,
- * so that this time is not given to the weight-driven
- * domains
- */
-
- /* Check for overflows */
- ASSERT((WEIGHT_PERIOD < ULONG_MAX)
- && (EDOM_INFO(p)->slice_orig < ULONG_MAX));
- sumt[cpu] +=
- (WEIGHT_PERIOD * EDOM_INFO(p)->slice_orig) /
- EDOM_INFO(p)->period_orig;
- }
- }
- }
- rcu_read_unlock(&domlist_read_lock);
-
- /*
- * Adjust all slices (and periods) to the new weight. Unlike above, we
- * need to take thr runq lock for the various VCPUs: we're modyfing
- * slice and period which are referenced during scheduling.
- */
- rcu_read_lock(&domlist_read_lock);
- for_each_domain_in_cpupool( d, c )
- {
- for_each_vcpu ( d, p )
- {
- if ( (cpu = p->processor) >= nr_cpus )
- continue;
- if ( EDOM_INFO(p)->weight )
- {
- /* Interrupts already off */
- spinlock_t *lock = vcpu_schedule_lock(p);
-
- EDOM_INFO(p)->period_orig =
- EDOM_INFO(p)->period = WEIGHT_PERIOD;
- EDOM_INFO(p)->slice_orig =
- EDOM_INFO(p)->slice =
- (EDOM_INFO(p)->weight *
- (WEIGHT_PERIOD - WEIGHT_SAFETY - sumt[cpu])) / sumw[cpu];
-
- vcpu_schedule_unlock(lock, p);
- }
- }
- }
- rcu_read_unlock(&domlist_read_lock);
-
- return 0;
-}
-
-
/* Set or fetch domain scheduling parameters */
static int sedf_adjust(const struct scheduler *ops, struct domain *p, struct
xen_domctl_scheduler_op *op)
{
struct sedf_priv_info *prv = SEDF_PRIV(ops);
unsigned long flags;
- unsigned int nr_cpus = cpumask_last(&cpu_online_map) + 1;
- int *sumw = xzalloc_array(int, nr_cpus);
- s_time_t *sumt = xzalloc_array(s_time_t, nr_cpus);
+ s_time_t now = NOW();
struct vcpu *v;
int rc = 0;
/*
* Serialize against the pluggable scheduler lock to protect from
* concurrent updates. We need to take the runq lock for the VCPUs
- * as well, since we are touching extraweight, weight, slice and
- * period. As in sched_credit2.c, runq locks nest inside the
- * pluggable scheduler lock.
+ * as well, since we are touching slice and period.
+ *
+ * As in sched_credit2.c, runq locks nest inside the pluggable scheduler
+ * lock.
*/
spin_lock_irqsave(&prv->lock, flags);
if ( op->cmd == XEN_DOMCTL_SCHEDOP_putinfo )
{
- /*
- * These are used in sedf_adjust_weights() but have to be allocated in
- * this function, as we need to avoid nesting xmem_pool_alloc's lock
- * within our prv->lock.
- */
- if ( !sumw || !sumt )
+ /* Check for sane parameters */
+ if ( !op->u.sedf.period )
{
- /* Check for errors here, the _getinfo branch doesn't care */
- rc = -ENOMEM;
+ printk("Period Not set");
+ rc = -EINVAL;
goto out;
}
- /* Check for sane parameters */
- if ( !op->u.sedf.period && !op->u.sedf.weight )
+ /*
+ * Sanity checking
+ */
+ if ( (op->u.sedf.period > PERIOD_MAX) ||
+ (op->u.sedf.period < PERIOD_MIN) ||
+ (op->u.sedf.slice > op->u.sedf.period) ||
+ (op->u.sedf.slice < SLICE_MIN) )
{
+ printk("Insane Parameters: period: %lu\tbudget: %lu\n",
op->u.sedf.period, op->u.sedf.slice);
rc = -EINVAL;
goto out;
}
- if ( op->u.sedf.weight )
+ /* Time-driven domains */
+ for_each_vcpu ( p, v )
{
- if ( (op->u.sedf.extratime & EXTRA_AWARE) &&
- (!op->u.sedf.period) )
+ spinlock_t *lock = vcpu_schedule_lock(v);
+
+ EDOM_INFO(v)->period = op->u.sedf.period;
+ EDOM_INFO(v)->slice = op->u.sedf.slice;
+ if(op->u.sedf.soft)
{
- /* Weight-driven domains with extratime only */
- for_each_vcpu ( p, v )
- {
- /* (Here and everywhere in the following) IRQs are already
off,
- * hence vcpu_spin_lock() is the one. */
- spinlock_t *lock = vcpu_schedule_lock(v);
-
- EDOM_INFO(v)->extraweight = op->u.sedf.weight;
- EDOM_INFO(v)->weight = 0;
- EDOM_INFO(v)->slice = 0;
- EDOM_INFO(v)->period = WEIGHT_PERIOD;
- vcpu_schedule_unlock(lock, v);
- }
+ EDOM_INFO(v)->status |= SEDF_SOFT_TASK;
}
else
{
- /* Weight-driven domains with real-time execution */
- for_each_vcpu ( p, v )
+ /* Correct deadline when switching from a soft to hard vcpu */
+ if( unlikely((EDOM_INFO(v)->deadl_abs - now) >=
(EDOM_INFO(v)->period * 3)) )
{
- spinlock_t *lock = vcpu_schedule_lock(v);
-
- EDOM_INFO(v)->weight = op->u.sedf.weight;
- vcpu_schedule_unlock(lock, v);
+ EDOM_INFO(v)->deadl_abs = (now - EDOM_INFO(v)->cputime) +
(2 * EDOM_INFO(v)->period);
}
+
+ EDOM_INFO(v)->status &= (~SEDF_SOFT_TASK);
}
- }
- else
- {
- /*
- * Sanity checking: note that disabling extra weight requires
- * that we set a non-zero slice.
- */
- if ( (op->u.sedf.period > PERIOD_MAX) ||
- (op->u.sedf.period < PERIOD_MIN) ||
- (op->u.sedf.slice > op->u.sedf.period) ||
- (op->u.sedf.slice < SLICE_MIN) )
- {
- rc = -EINVAL;
- goto out;
- }
-
- /* Time-driven domains */
- for_each_vcpu ( p, v )
- {
- spinlock_t *lock = vcpu_schedule_lock(v);
-
- EDOM_INFO(v)->weight = 0;
- EDOM_INFO(v)->extraweight = 0;
- EDOM_INFO(v)->period_orig =
- EDOM_INFO(v)->period = op->u.sedf.period;
- EDOM_INFO(v)->slice_orig =
- EDOM_INFO(v)->slice = op->u.sedf.slice;
- vcpu_schedule_unlock(lock, v);
- }
- }
-
- rc = sedf_adjust_weights(p->cpupool, nr_cpus, sumw, sumt);
- if ( rc )
- goto out;
-
- for_each_vcpu ( p, v )
- {
- spinlock_t *lock = vcpu_schedule_lock(v);
-
- EDOM_INFO(v)->status =
- (EDOM_INFO(v)->status &
- ~EXTRA_AWARE) | (op->u.sedf.extratime & EXTRA_AWARE);
- EDOM_INFO(v)->latency = op->u.sedf.latency;
- extraq_check(v);
vcpu_schedule_unlock(lock, v);
}
}
@@ -1485,17 +889,12 @@ static int sedf_adjust(const struct scheduler *ops,
struct domain *p, struct xen
op->u.sedf.period = EDOM_INFO(p->vcpu[0])->period;
op->u.sedf.slice = EDOM_INFO(p->vcpu[0])->slice;
- op->u.sedf.extratime = EDOM_INFO(p->vcpu[0])->status & EXTRA_AWARE;
- op->u.sedf.latency = EDOM_INFO(p->vcpu[0])->latency;
- op->u.sedf.weight = EDOM_INFO(p->vcpu[0])->weight;
+ op->u.sedf.soft = sedf_soft(p->vcpu[0]);
}
out:
spin_unlock_irqrestore(&prv->lock, flags);
- xfree(sumt);
- xfree(sumw);
-
return rc;
}
--
1.7.9.5
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |