[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen stable-4.6] credit1: fix a race when picking initial pCPU for a vCPU



commit 5bb458bec9199785eda4f47cf98eb1fde7921515
Author:     Dario Faggioli <dario.faggioli@xxxxxxxxxx>
AuthorDate: Tue Sep 6 11:51:34 2016 +0200
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Tue Sep 6 11:51:34 2016 +0200

    credit1: fix a race when picking initial pCPU for a vCPU
    
    In the Credit1 hunk of 9f358ddd69463 ("xen: Have
    schedulers revise initial placement") csched_cpu_pick()
    is called without taking the runqueue lock of the
    (temporary) pCPU that the vCPU has been assigned to
    (e.g., in XEN_DOMCTL_max_vcpus).
    
    However, although 'hidden' in the IS_RUNQ_IDLE() macro,
    that function does access the runq (for doing load
    balancing calculations). Two scenarios are possible:
     1) we are on cpu X, and IS_RUNQ_IDLE() peeks at cpu's
        X own runq;
     2) we are on cpu X, but IS_RUNQ_IDLE() peeks at some
        other cpu's runq.
    
    Scenario 2) absolutely requies that the appropriate
    runq lock is taken. Scenario 1) works even without
    taking the cpu's own runq lock. That is actually what
    happens when when _csched_pick_cpu() is called from
    csched_vcpu_acct() (in turn, called by csched_tick()).
    
    Races have been observed and reported (by both XenServer
    own testing and OSSTest [1]), in the form of
    IS_RUNQ_IDLE() falling over LIST_POISON, because we're
    not currently holding the proper lock, in
    csched_vcpu_insert(), when scenario 1) occurs.
    
    However, for better robustness, from now on we always
    ask for the proper runq lock to be held when calling
    IS_RUNQ_IDLE() (which is also becoming a static inline
    function instead of macro).
    
    In order to comply with that, we take the lock around
    the call to _csched_cpu_pick() in csched_vcpu_acct().
    
    [1] https://lists.xen.org/archives/html/xen-devel/2016-08/msg02144.html
    
    Reported-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
    Signed-off-by: Dario Faggioli <dario.faggioli@xxxxxxxxxx>
    Reviewed-by: George Dunlap <george.dunlap@xxxxxxxxxx>
    master commit: 9109bf55084398c4547b8956906410c158eb9a17
    master date: 2016-09-02 14:17:55 +0200
---
 xen/common/sched_credit.c | 53 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 39 insertions(+), 14 deletions(-)

diff --git a/xen/common/sched_credit.c b/xen/common/sched_credit.c
index 304daf4..eabdf0a 100644
--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
@@ -78,9 +78,6 @@
 #define CSCHED_VCPU(_vcpu)  ((struct csched_vcpu *) (_vcpu)->sched_priv)
 #define CSCHED_DOM(_dom)    ((struct csched_dom *) (_dom)->sched_priv)
 #define RUNQ(_cpu)          (&(CSCHED_PCPU(_cpu)->runq))
-/* Is the first element of _cpu's runq its idle vcpu? */
-#define IS_RUNQ_IDLE(_cpu)  (list_empty(RUNQ(_cpu)) || \
-                             is_idle_vcpu(__runq_elem(RUNQ(_cpu)->next)->vcpu))
 
 
 /*
@@ -251,6 +248,18 @@ __runq_elem(struct list_head *elem)
     return list_entry(elem, struct csched_vcpu, runq_elem);
 }
 
+/* Is the first element of cpu's runq (if any) cpu's idle vcpu? */
+static inline bool_t is_runq_idle(unsigned int cpu)
+{
+    /*
+     * We're peeking at cpu's runq, we must hold the proper lock.
+     */
+    ASSERT(spin_is_locked(per_cpu(schedule_data, cpu).schedule_lock));
+
+    return list_empty(RUNQ(cpu)) ||
+           is_idle_vcpu(__runq_elem(RUNQ(cpu)->next)->vcpu);
+}
+
 static inline void
 __runq_insert(unsigned int cpu, struct csched_vcpu *svc)
 {
@@ -696,7 +705,7 @@ _csched_cpu_pick(const struct scheduler *ops, struct vcpu 
*vc, bool_t commit)
          * runnable vcpu on cpu, we add cpu to the idlers.
          */
         cpumask_and(&idlers, &cpu_online_map, CSCHED_PRIV(ops)->idlers);
-        if ( vc->processor == cpu && IS_RUNQ_IDLE(cpu) )
+        if ( vc->processor == cpu && is_runq_idle(cpu) )
             __cpumask_set_cpu(cpu, &idlers);
         cpumask_and(&cpus, &cpus, &idlers);
 
@@ -862,21 +871,33 @@ csched_vcpu_acct(struct csched_private *prv, unsigned int 
cpu)
     /*
      * Put this VCPU and domain back on the active list if it was
      * idling.
-     *
-     * If it's been active a while, check if we'd be better off
-     * migrating it to run elsewhere (see multi-core and multi-thread
-     * support in csched_cpu_pick()).
      */
     if ( list_empty(&svc->active_vcpu_elem) )
     {
         __csched_vcpu_acct_start(prv, svc);
     }
-    else if ( _csched_cpu_pick(ops, current, 0) != cpu )
+    else
     {
-        SCHED_VCPU_STAT_CRANK(svc, migrate_r);
-        SCHED_STAT_CRANK(migrate_running);
-        set_bit(_VPF_migrating, &current->pause_flags);
-        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+        unsigned int new_cpu;
+        unsigned long flags;
+        spinlock_t *lock = vcpu_schedule_lock_irqsave(current, &flags);
+
+        /*
+         * If it's been active a while, check if we'd be better off
+         * migrating it to run elsewhere (see multi-core and multi-thread
+         * support in csched_cpu_pick()).
+         */
+        new_cpu = _csched_cpu_pick(ops, current, 0);
+
+        vcpu_schedule_unlock_irqrestore(lock, flags, current);
+
+        if ( new_cpu != cpu )
+        {
+            SCHED_VCPU_STAT_CRANK(svc, migrate_r);
+            SCHED_STAT_CRANK(migrate_running);
+            set_bit(_VPF_migrating, &current->pause_flags);
+            cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+        }
     }
 }
 
@@ -909,9 +930,13 @@ csched_vcpu_insert(const struct scheduler *ops, struct 
vcpu *vc)
 
     BUG_ON( is_idle_vcpu(vc) );
 
-    /* This is safe because vc isn't yet being scheduled */
+    /* csched_cpu_pick() looks in vc->processor's runq, so we need the lock. */
+    lock = vcpu_schedule_lock_irq(vc);
+
     vc->processor = csched_cpu_pick(ops, vc);
 
+    spin_unlock_irq(lock);
+
     lock = vcpu_schedule_lock_irq(vc);
 
     if ( !__vcpu_on_runq(svc) && vcpu_runnable(vc) && !vc->is_running )
--
generated by git-patchbot for /home/xen/git/xen.git#stable-4.6

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxx
https://lists.xenproject.org/xen-changelog

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.