[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen master] xen/schedule: Fix races in vcpu migration



commit 9a36de177c16d6423a07ad61f1c7af5274769aae
Author:     George Dunlap <george.dunlap@xxxxxxxxxx>
AuthorDate: Tue May 1 18:13:27 2018 +0100
Commit:     George Dunlap <george.dunlap@xxxxxxxxxx>
CommitDate: Thu May 3 11:56:48 2018 +0100

    xen/schedule: Fix races in vcpu migration
    
    The current sequence to initiate vcpu migration is inefficent and 
error-prone:
    
    - The initiator sets VPF_migraging with the lock held, then drops the
      lock and calls vcpu_sleep_nosync(), which immediately grabs the lock
      again
    
    - A number of places unnecessarily check for v->pause_flags in between
      those two
    
    - Every call to vcpu_migrate() must be prefaced with
      vcpu_sleep_nosync() or introduce a race condition; this code
      duplication is error-prone
    
    - In the event that v->is_running is true at the beginning of
      vcpu_migrate(), it's almost certain that vcpu_migrate() will end up
      being called in context_switch() as well; we might as well simply
      let it run there and save the duplicated effort (which will be
      non-negligible).
    
    The result is that Credit1 has several races which result in runqueue
    <-> v->processor invariants being violated (triggering ASSERTs in
    debug builds and strange bugs in production builds).
    
    Instead, introduce vcpu_migrate_start() to initiate the process.
    vcpu_migrate_start() is called with the scheduling lock held.  It not
    only sets VPF_migrating, but also calls vcpu_sleep_nosync_locked()
    (which will automatically do nothing if there's nothing to do).
    
    Rename vcpu_migrate() to vcpu_migrate_finish().  Check for v->is_running and
    pause_flags & VPF_migrating at the top and return if appropriate.
    
    Then the way to initiate migration is consistently:
    
    * Grab lock
    * vcpu_migrate_start()
    * Release lock
    * vcpu_migrate_finish()
    
    Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxx>
    Reviewed-by: Dario Faggioli <dfaggioli@xxxxxxxx>
    Tested-by: Olaf Hering <olaf@xxxxxxxxx>
    Release-acked-by: Juergen Gross <jgross@xxxxxxxx>
---
 xen/common/schedule.c | 80 +++++++++++++++++++++++++++++++++++----------------
 1 file changed, 55 insertions(+), 25 deletions(-)

diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 7489833361..049f93f7aa 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -593,13 +593,54 @@ static void vcpu_move_nosched(struct vcpu *v, unsigned 
int new_cpu)
     sched_move_irqs(v);
 }
 
-static void vcpu_migrate(struct vcpu *v)
+/*
+ * Initiating migration
+ *
+ * In order to migrate, we need the vcpu in question to have stopped
+ * running and had SCHED_OP(sleep) called (to take it off any
+ * runqueues, for instance); and if it is currently running, it needs
+ * to be scheduled out.  Finally, we need to hold the scheduling locks
+ * for both the processor we're migrating from, and the processor
+ * we're migrating to.
+ *
+ * In order to avoid deadlock while satisfying the final requirement,
+ * we must release any scheduling lock we hold, then try to grab both
+ * locks we want, then double-check to make sure that what we started
+ * to do hasn't been changed in the mean time.
+ *
+ * These steps are encapsulated in the following two functions; they
+ * should be called like this:
+ *
+ *     lock = vcpu_schedule_lock_irq(v);
+ *     vcpu_migrate_start(v);
+ *     vcpu_schedule_unlock_irq(lock, v)
+ *     vcpu_migrate_finish(v);
+ *
+ * vcpu_migrate_finish() will do the work now if it can, or simply
+ * return if it can't (because v is still running); in that case
+ * vcpu_migrate_finish() will be called by context_saved().
+ */
+void vcpu_migrate_start(struct vcpu *v)
+{
+    set_bit(_VPF_migrating, &v->pause_flags);
+    vcpu_sleep_nosync_locked(v);
+}
+
+static void vcpu_migrate_finish(struct vcpu *v)
 {
     unsigned long flags;
     unsigned int old_cpu, new_cpu;
     spinlock_t *old_lock, *new_lock;
     bool_t pick_called = 0;
 
+    /*
+     * If the vcpu is currently running, this will be handled by
+     * context_saved(); and in any case, if the bit is cleared, then
+     * someone else has already done the work so we don't need to.
+     */
+    if ( v->is_running || !test_bit(_VPF_migrating, &v->pause_flags) )
+        return;
+
     old_cpu = new_cpu = v->processor;
     for ( ; ; )
     {
@@ -679,14 +720,11 @@ void vcpu_force_reschedule(struct vcpu *v)
     spinlock_t *lock = vcpu_schedule_lock_irq(v);
 
     if ( v->is_running )
-        set_bit(_VPF_migrating, &v->pause_flags);
+        vcpu_migrate_start(v);
+
     vcpu_schedule_unlock_irq(lock, v);
 
-    if ( v->pause_flags & VPF_migrating )
-    {
-        vcpu_sleep_nosync(v);
-        vcpu_migrate(v);
-    }
+    vcpu_migrate_finish(v);
 }
 
 void restore_vcpu_affinity(struct domain *d)
@@ -841,10 +879,10 @@ int cpu_disable_scheduler(unsigned int cpu)
                  *  * the scheduler will always fine a suitable solution, or
                  *    things would have failed before getting in here.
                  */
-                set_bit(_VPF_migrating, &v->pause_flags);
+                vcpu_migrate_start(v);
                 vcpu_schedule_unlock_irqrestore(lock, flags, v);
-                vcpu_sleep_nosync(v);
-                vcpu_migrate(v);
+
+                vcpu_migrate_finish(v);
 
                 /*
                  * The only caveat, in this case, is that if a vcpu active in
@@ -908,18 +946,14 @@ static int vcpu_set_affinity(
             ASSERT(which == v->cpu_soft_affinity);
             sched_set_affinity(v, NULL, affinity);
         }
-        set_bit(_VPF_migrating, &v->pause_flags);
+        vcpu_migrate_start(v);
     }
 
     vcpu_schedule_unlock_irq(lock, v);
 
     domain_update_node_affinity(v->domain);
 
-    if ( v->pause_flags & VPF_migrating )
-    {
-        vcpu_sleep_nosync(v);
-        vcpu_migrate(v);
-    }
+    vcpu_migrate_finish(v);
 
     return ret;
 }
@@ -1147,7 +1181,6 @@ int vcpu_pin_override(struct vcpu *v, int cpu)
         {
             sched_set_affinity(v, v->cpu_hard_affinity_saved, NULL);
             v->affinity_broken = 0;
-            set_bit(_VPF_migrating, &v->pause_flags);
             ret = 0;
         }
     }
@@ -1160,20 +1193,18 @@ int vcpu_pin_override(struct vcpu *v, int cpu)
             cpumask_copy(v->cpu_hard_affinity_saved, v->cpu_hard_affinity);
             v->affinity_broken = 1;
             sched_set_affinity(v, cpumask_of(cpu), NULL);
-            set_bit(_VPF_migrating, &v->pause_flags);
             ret = 0;
         }
     }
 
+    if ( ret == 0 )
+        vcpu_migrate_start(v);
+
     vcpu_schedule_unlock_irq(lock, v);
 
     domain_update_node_affinity(v->domain);
 
-    if ( v->pause_flags & VPF_migrating )
-    {
-        vcpu_sleep_nosync(v);
-        vcpu_migrate(v);
-    }
+    vcpu_migrate_finish(v);
 
     return ret;
 }
@@ -1560,8 +1591,7 @@ void context_saved(struct vcpu *prev)
 
     SCHED_OP(vcpu_scheduler(prev), context_saved, prev);
 
-    if ( unlikely(prev->pause_flags & VPF_migrating) )
-        vcpu_migrate(prev);
+    vcpu_migrate_finish(prev);
 }
 
 /* The scheduler timer: force a run through the scheduler */
--
generated by git-patchbot for /home/xen/git/xen.git#master

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/xen-changelog

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.