[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[xen staging-4.10] xen/events: rework fifo queue locking



commit 1d72d9915edff0dd41f601bbb0b1f83c02ff1689
Author:     Juergen Gross <jgross@xxxxxxxx>
AuthorDate: Tue Dec 1 17:37:21 2020 +0100
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Tue Dec 1 17:37:21 2020 +0100

    xen/events: rework fifo queue locking
    
    Two cpus entering evtchn_fifo_set_pending() for the same event channel
    can race in case the first one gets interrupted after setting
    EVTCHN_FIFO_PENDING and when the other one manages to set
    EVTCHN_FIFO_LINKED before the first one is testing that bit. This can
    lead to evtchn_check_pollers() being called before the event is put
    properly into the queue, resulting eventually in the guest not seeing
    the event pending and thus blocking forever afterwards.
    
    Note that commit 5f2df45ead7c1195 ("xen/evtchn: rework per event channel
    lock") made the race just more obvious, while the fifo event channel
    implementation had this race forever since the introduction and use of
    per-channel locks, when an unmask operation was running in parallel with
    an event channel send operation.
    
    Using a spinlock for the per event channel lock had turned out
    problematic due to some paths needing to take the lock are called with
    interrupts off, so the lock would need to disable interrupts, which in
    turn broke some use cases related to vm events.
    
    For avoiding this race the queue locking in evtchn_fifo_set_pending()
    needs to be reworked to cover the test of EVTCHN_FIFO_PENDING,
    EVTCHN_FIFO_MASKED and EVTCHN_FIFO_LINKED, too. Additionally when an
    event channel needs to change queues both queues need to be locked
    initially, in order to avoid having a window with no lock held at all.
    
    Reported-by: Jan Beulich <jbeulich@xxxxxxxx>
    Fixes: 5f2df45ead7c1195 ("xen/evtchn: rework per event channel lock")
    Fixes: de6acb78bf0e137c ("evtchn: use a per-event channel lock for sending 
events")
    Signed-off-by: Juergen Gross <jgross@xxxxxxxx>
    Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx>
    master commit: 71ac522909e9302350a88bc378be99affa87067c
    master date: 2020-11-30 14:05:39 +0100
---
 xen/common/event_fifo.c | 128 ++++++++++++++++++++++++++----------------------
 1 file changed, 70 insertions(+), 58 deletions(-)

diff --git a/xen/common/event_fifo.c b/xen/common/event_fifo.c
index b1951a29ad..0a90a8404d 100644
--- a/xen/common/event_fifo.c
+++ b/xen/common/event_fifo.c
@@ -65,38 +65,6 @@ static void evtchn_fifo_init(struct domain *d, struct evtchn 
*evtchn)
                  d->domain_id, evtchn->port);
 }
 
-static struct evtchn_fifo_queue *lock_old_queue(const struct domain *d,
-                                                struct evtchn *evtchn,
-                                                unsigned long *flags)
-{
-    struct vcpu *v;
-    struct evtchn_fifo_queue *q, *old_q;
-    unsigned int try;
-    union evtchn_fifo_lastq lastq;
-
-    for ( try = 0; try < 3; try++ )
-    {
-        lastq.raw = read_atomic(&evtchn->fifo_lastq);
-        v = d->vcpu[lastq.last_vcpu_id];
-        old_q = &v->evtchn_fifo->queue[lastq.last_priority];
-
-        spin_lock_irqsave(&old_q->lock, *flags);
-
-        v = d->vcpu[lastq.last_vcpu_id];
-        q = &v->evtchn_fifo->queue[lastq.last_priority];
-
-        if ( old_q == q )
-            return old_q;
-
-        spin_unlock_irqrestore(&old_q->lock, *flags);
-    }
-
-    gprintk(XENLOG_WARNING,
-            "dom%d port %d lost event (too many queue changes)\n",
-            d->domain_id, evtchn->port);
-    return NULL;
-}          
-
 static int try_set_link(event_word_t *word, event_word_t *w, uint32_t link)
 {
     event_word_t new, old;
@@ -168,6 +136,9 @@ static void evtchn_fifo_set_pending(struct vcpu *v, struct 
evtchn *evtchn)
     event_word_t *word;
     unsigned long flags;
     bool_t was_pending;
+    struct evtchn_fifo_queue *q, *old_q;
+    unsigned int try;
+    bool linked = true;
 
     port = evtchn->port;
     word = evtchn_fifo_word_from_port(d, port);
@@ -182,17 +153,67 @@ static void evtchn_fifo_set_pending(struct vcpu *v, 
struct evtchn *evtchn)
         return;
     }
 
+    /*
+     * Lock all queues related to the event channel (in case of a queue change
+     * this might be two).
+     * It is mandatory to do that before setting and testing the PENDING bit
+     * and to hold the current queue lock until the event has been put into the
+     * list of pending events in order to avoid waking up a guest without the
+     * event being visibly pending in the guest.
+     */
+    for ( try = 0; try < 3; try++ )
+    {
+        union evtchn_fifo_lastq lastq;
+        const struct vcpu *old_v;
+
+        lastq.raw = read_atomic(&evtchn->fifo_lastq);
+        old_v = d->vcpu[lastq.last_vcpu_id];
+
+        q = &v->evtchn_fifo->queue[evtchn->priority];
+        old_q = &old_v->evtchn_fifo->queue[lastq.last_priority];
+
+        if ( q == old_q )
+            spin_lock_irqsave(&q->lock, flags);
+        else if ( q < old_q )
+        {
+            spin_lock_irqsave(&q->lock, flags);
+            spin_lock(&old_q->lock);
+        }
+        else
+        {
+            spin_lock_irqsave(&old_q->lock, flags);
+            spin_lock(&q->lock);
+        }
+
+        lastq.raw = read_atomic(&evtchn->fifo_lastq);
+        old_v = d->vcpu[lastq.last_vcpu_id];
+        if ( q == &v->evtchn_fifo->queue[evtchn->priority] &&
+             old_q == &old_v->evtchn_fifo->queue[lastq.last_priority] )
+            break;
+
+        if ( q != old_q )
+            spin_unlock(&old_q->lock);
+        spin_unlock_irqrestore(&q->lock, flags);
+    }
+
     was_pending = guest_test_and_set_bit(d, EVTCHN_FIFO_PENDING, word);
 
+    /* If we didn't get the lock bail out. */
+    if ( try == 3 )
+    {
+        gprintk(XENLOG_WARNING,
+                "%pd port %u lost event (too many queue changes)\n",
+                d, evtchn->port);
+        goto done;
+    }
+
     /*
      * Link the event if it unmasked and not already linked.
      */
     if ( !guest_test_bit(d, EVTCHN_FIFO_MASKED, word) &&
          !guest_test_bit(d, EVTCHN_FIFO_LINKED, word) )
     {
-        struct evtchn_fifo_queue *q, *old_q;
         event_word_t *tail_word;
-        bool_t linked = 0;
 
         /*
          * Control block not mapped.  The guest must not unmask an
@@ -203,25 +224,11 @@ static void evtchn_fifo_set_pending(struct vcpu *v, 
struct evtchn *evtchn)
         {
             printk(XENLOG_G_WARNING
                    "%pv has no FIFO event channel control block\n", v);
-            goto done;
+            goto unlock;
         }
 
-        /*
-         * No locking around getting the queue. This may race with
-         * changing the priority but we are allowed to signal the
-         * event once on the old priority.
-         */
-        q = &v->evtchn_fifo->queue[evtchn->priority];
-
-        old_q = lock_old_queue(d, evtchn, &flags);
-        if ( !old_q )
-            goto done;
-
         if ( guest_test_and_set_bit(d, EVTCHN_FIFO_LINKED, word) )
-        {
-            spin_unlock_irqrestore(&old_q->lock, flags);
-            goto done;
-        }
+            goto unlock;
 
         /*
          * If this event was a tail, the old queue is now empty and
@@ -240,8 +247,8 @@ static void evtchn_fifo_set_pending(struct vcpu *v, struct 
evtchn *evtchn)
             lastq.last_priority = q->priority;
             write_atomic(&evtchn->fifo_lastq, lastq.raw);
 
-            spin_unlock_irqrestore(&old_q->lock, flags);
-            spin_lock_irqsave(&q->lock, flags);
+            spin_unlock(&old_q->lock);
+            old_q = q;
         }
 
         /*
@@ -254,6 +261,7 @@ static void evtchn_fifo_set_pending(struct vcpu *v, struct 
evtchn *evtchn)
          * If the queue is empty (i.e., we haven't linked to the new
          * event), head must be updated.
          */
+        linked = false;
         if ( q->tail )
         {
             tail_word = evtchn_fifo_word_from_port(d, q->tail);
@@ -262,15 +270,19 @@ static void evtchn_fifo_set_pending(struct vcpu *v, 
struct evtchn *evtchn)
         if ( !linked )
             write_atomic(q->head, port);
         q->tail = port;
+    }
 
-        spin_unlock_irqrestore(&q->lock, flags);
+ unlock:
+    if ( q != old_q )
+        spin_unlock(&old_q->lock);
+    spin_unlock_irqrestore(&q->lock, flags);
 
-        if ( !linked
-             && !guest_test_and_set_bit(d, q->priority,
-                                        &v->evtchn_fifo->control_block->ready) 
)
-            vcpu_mark_events_pending(v);
-    }
  done:
+    if ( !linked &&
+         !guest_test_and_set_bit(d, q->priority,
+                                 &v->evtchn_fifo->control_block->ready) )
+        vcpu_mark_events_pending(v);
+
     if ( !was_pending )
         evtchn_check_pollers(d, port);
 }
--
generated by git-patchbot for /home/xen/git/xen.git#staging-4.10



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.