[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH RFC V2 43/45] xen/sched: make vcpu_wake() and vcpu_sleep() core scheduling aware



vcpu_wake() and vcpu_sleep() need to be made core scheduling aware:
they might need to switch a single vcpu of an already scheduled item
between running and not running.

Especially when vcpu_sleep() for a vcpu is being called by a vcpu of
the same scheduling item special care must be taken in order to avoid
a deadlock: the vcpu to be put asleep must be forced through a
context switch without doing so for the calling vcpu. For this
purpose add a vcpu flag handled in sched_slave() and in
sched_wait_rendezvous_in() allowing a vcpu of the currently running
item to switch state at a higher priority than a normal schedule
event.

Use the same mechanism when waking up a vcpu of a currently active
item.

While at it make vcpu_sleep_nosync_locked() static as it is used in
schedule.c only.

Signed-off-by: Juergen Gross <jgross@xxxxxxxx>
---
RFC V2: add vcpu_sleep() handling and force_context_switch flag
---
 xen/common/schedule.c      | 144 ++++++++++++++++++++++++++++++++++++++++-----
 xen/include/xen/sched-if.h |   9 ++-
 xen/include/xen/sched.h    |   2 +
 3 files changed, 136 insertions(+), 19 deletions(-)

diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 788ecc9e81..49ed2b5900 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -79,21 +79,21 @@ extern const struct scheduler *__start_schedulers_array[], 
*__end_schedulers_arr
 
 static struct scheduler __read_mostly ops;
 
-static inline struct vcpu *sched_item2vcpu_cpu(struct sched_item *item,
-                                               unsigned int cpu)
+static inline struct vcpu *item2vcpu_cpu(struct sched_item *item,
+                                         unsigned int cpu)
 {
     unsigned int idx = item->item_id + per_cpu(sched_res_idx, cpu);
     const struct domain *d = item->domain;
-    struct vcpu *v;
 
-    if ( idx < d->max_vcpus && d->vcpu[idx] )
-    {
-        v = d->vcpu[idx];
-        if ( v->new_state == RUNSTATE_running )
-            return v;
-    }
+    return (idx < d->max_vcpus && d->vcpu[idx]) ? d->vcpu[idx] : NULL;
+}
 
-    return idle_vcpu[cpu];
+static inline struct vcpu *sched_item2vcpu_cpu(struct sched_item *item,
+                                               unsigned int cpu)
+{
+    struct vcpu *v = item2vcpu_cpu(item, cpu);
+
+    return (v && v->new_state == RUNSTATE_running) ? v : idle_vcpu[cpu];
 }
 
 static inline struct scheduler *dom_scheduler(const struct domain *d)
@@ -644,8 +644,10 @@ void sched_destroy_domain(struct domain *d)
     }
 }
 
-void vcpu_sleep_nosync_locked(struct vcpu *v)
+static void vcpu_sleep_nosync_locked(struct vcpu *v)
 {
+    struct sched_item *item = v->sched_item;
+
     ASSERT(spin_is_locked(per_cpu(sched_res, v->processor)->schedule_lock));
 
     if ( likely(!vcpu_runnable(v)) )
@@ -653,7 +655,14 @@ void vcpu_sleep_nosync_locked(struct vcpu *v)
         if ( v->runstate.state == RUNSTATE_runnable )
             vcpu_runstate_change(v, RUNSTATE_offline, NOW());
 
-        sched_sleep(vcpu_scheduler(v), v->sched_item);
+        if ( likely(!item_runnable(item)) )
+            sched_sleep(vcpu_scheduler(v), item);
+        else if ( item_running(item) > 1 && v->is_running &&
+                  !v->force_context_switch )
+        {
+            v->force_context_switch = true;
+            cpu_raise_softirq(v->processor, SCHED_SLAVE_SOFTIRQ);
+        }
     }
 }
 
@@ -685,16 +694,22 @@ void vcpu_wake(struct vcpu *v)
 {
     unsigned long flags;
     spinlock_t *lock;
+    struct sched_item *item = v->sched_item;
 
     TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id);
 
-    lock = item_schedule_lock_irqsave(v->sched_item, &flags);
+    lock = item_schedule_lock_irqsave(item, &flags);
 
     if ( likely(vcpu_runnable(v)) )
     {
         if ( v->runstate.state >= RUNSTATE_blocked )
             vcpu_runstate_change(v, RUNSTATE_runnable, NOW());
-        sched_wake(vcpu_scheduler(v), v->sched_item);
+        sched_wake(vcpu_scheduler(v), item);
+        if ( item->is_running && !v->is_running && !v->force_context_switch )
+        {
+            v->force_context_switch = true;
+            cpu_raise_softirq(v->processor, SCHED_SLAVE_SOFTIRQ);
+        }
     }
     else if ( !(v->pause_flags & VPF_blocked) )
     {
@@ -702,7 +717,7 @@ void vcpu_wake(struct vcpu *v)
             vcpu_runstate_change(v, RUNSTATE_offline, NOW());
     }
 
-    item_schedule_unlock_irqrestore(lock, flags, v->sched_item);
+    item_schedule_unlock_irqrestore(lock, flags, item);
 }
 
 void vcpu_unblock(struct vcpu *v)
@@ -1835,6 +1850,61 @@ static void sched_context_switch(struct vcpu *vprev, 
struct vcpu *vnext,
     context_switch(vprev, vnext);
 }
 
+/*
+ * Force a context switch of a single vcpu of an item.
+ * Might be called either if a vcpu of an already running item is woken up
+ * or if a vcpu of a running item is put asleep with other vcpus of the same
+ * item still running.
+ */
+static struct vcpu *sched_force_context_switch(struct vcpu *vprev,
+                                               struct vcpu *v,
+                                               int cpu, s_time_t now)
+{
+    v->force_context_switch = false;
+
+    if ( vcpu_runnable(v) == v->is_running )
+        return NULL;
+
+    if ( vcpu_runnable(v) )
+    {
+        if ( is_idle_vcpu(vprev) )
+        {
+            vcpu_runstate_change(vprev, RUNSTATE_runnable, now);
+            vprev->sched_item = this_cpu(sched_res)->sched_item_idle;
+        }
+        vcpu_runstate_change(v, RUNSTATE_running, now);
+    }
+    else
+    {
+        /* Make sure not to switch last vcpu of an item away. */
+        if ( item_running(v->sched_item) == 1 )
+            return NULL;
+
+        vcpu_runstate_change(v, vcpu_runstate_blocked(v), now);
+        v = sched_item2vcpu_cpu(vprev->sched_item, cpu);
+        if ( v != vprev )
+        {
+            if ( is_idle_vcpu(vprev) )
+            {
+                vcpu_runstate_change(vprev, RUNSTATE_runnable, now);
+                vprev->sched_item = this_cpu(sched_res)->sched_item_idle;
+            }
+            else
+            {
+                v->sched_item = vprev->sched_item;
+                vcpu_runstate_change(v, RUNSTATE_running, now);
+            }
+        }
+    }
+
+    v->is_running = 1;
+
+    /* Make sure not to loose another slave call. */
+    raise_softirq(SCHED_SLAVE_SOFTIRQ);
+
+    return v;
+}
+
 /*
  * Rendezvous before taking a scheduling decision.
  * Called with schedule lock held, so all accesses to the rendezvous counter
@@ -1850,6 +1920,7 @@ static struct sched_item *sched_wait_rendezvous_in(struct 
sched_item *prev,
                                                    s_time_t now)
 {
     struct sched_item *next;
+    struct vcpu *v;
 
     if ( !--prev->rendezvous_in_cnt )
     {
@@ -1858,8 +1929,28 @@ static struct sched_item 
*sched_wait_rendezvous_in(struct sched_item *prev,
         return next;
     }
 
+    v = item2vcpu_cpu(prev, cpu);
     while ( prev->rendezvous_in_cnt )
     {
+        if ( v && v->force_context_switch )
+        {
+            struct vcpu *vprev = current;
+
+            v = sched_force_context_switch(vprev, v, cpu, now);
+
+            if ( v )
+            {
+                /* We'll come back another time, so adjust rendezvous_in_cnt. 
*/
+                prev->rendezvous_in_cnt++;
+
+                pcpu_schedule_unlock_irq(lock, cpu);
+
+                sched_context_switch(vprev, v, false, now);
+            }
+
+            v = item2vcpu_cpu(prev, cpu);
+        }
+
         pcpu_schedule_unlock_irq(lock, cpu);
         cpu_relax();
         pcpu_schedule_lock_irq(cpu);
@@ -1870,10 +1961,11 @@ static struct sched_item 
*sched_wait_rendezvous_in(struct sched_item *prev,
 
 static void sched_slave(void)
 {
-    struct vcpu          *vprev = current;
+    struct vcpu          *v, *vprev = current;
     struct sched_item    *prev = vprev->sched_item, *next;
     s_time_t              now;
     spinlock_t           *lock;
+    bool                  do_softirq = false;
     int cpu = smp_processor_id();
 
     ASSERT_NOT_IN_ATOMIC();
@@ -1882,9 +1974,29 @@ static void sched_slave(void)
 
     now = NOW();
 
+    v = item2vcpu_cpu(prev, cpu);
+    if ( v && v->force_context_switch )
+    {
+        v = sched_force_context_switch(vprev, v, cpu, now);
+
+        if ( v )
+        {
+            pcpu_schedule_unlock_irq(lock, cpu);
+
+            sched_context_switch(vprev, v, false, now);
+        }
+
+        do_softirq = true;
+    }
+
     if ( !prev->rendezvous_in_cnt )
     {
         pcpu_schedule_unlock_irq(lock, cpu);
+
+        /* Check for failed forced context switch. */
+        if ( do_softirq )
+            raise_softirq(SCHEDULE_SOFTIRQ);
+
         return;
     }
 
diff --git a/xen/include/xen/sched-if.h b/xen/include/xen/sched-if.h
index 8981d41629..f16d81ab4a 100644
--- a/xen/include/xen/sched-if.h
+++ b/xen/include/xen/sched-if.h
@@ -75,6 +75,11 @@ static inline bool item_runnable(const struct sched_item 
*item)
     return false;
 }
 
+static inline int vcpu_runstate_blocked(struct vcpu *v)
+{
+    return (v->pause_flags & VPF_blocked) ? RUNSTATE_blocked : 
RUNSTATE_offline;
+}
+
 static inline bool item_runnable_state(const struct sched_item *item)
 {
     struct vcpu *v;
@@ -84,9 +89,7 @@ static inline bool item_runnable_state(const struct 
sched_item *item)
     {
         runnable = vcpu_runnable(v);
 
-        v->new_state = runnable ? RUNSTATE_running
-                                : (v->pause_flags & VPF_blocked)
-                                  ? RUNSTATE_blocked : RUNSTATE_offline;
+        v->new_state = runnable ? RUNSTATE_running : vcpu_runstate_blocked(v);
 
         if ( runnable )
             ret = true;
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index b6a2fe28cc..5629602de5 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -186,6 +186,8 @@ struct vcpu
     bool             is_running;
     /* VCPU should wake fast (do not deep sleep the CPU). */
     bool             is_urgent;
+    /* VCPU must context_switch without scheduling item. */
+    bool             force_context_switch;
 
 #ifdef VCPU_TRAP_LAST
 #define VCPU_TRAP_NONE    0
-- 
2.16.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.