[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[xen stable-4.11] xen/evtchn: rework per event channel lock



commit 4438fc14a6c60b265a47c780d6a61d4f3015a297
Author:     Juergen Gross <jgross@xxxxxxxx>
AuthorDate: Tue Dec 1 17:04:43 2020 +0100
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Tue Dec 1 17:04:43 2020 +0100

    xen/evtchn: rework per event channel lock
    
    Currently the lock for a single event channel needs to be taken with
    interrupts off, which causes deadlocks in some cases.
    
    Rework the per event channel lock to be non-blocking for the case of
    sending an event and removing the need for disabling interrupts for
    taking the lock.
    
    The lock is needed for avoiding races between event channel state
    changes (creation, closing, binding) against normal operations (set
    pending, [un]masking, priority changes).
    
    Use a rwlock, but with some restrictions:
    
    - Changing the state of an event channel (creation, closing, binding)
      needs to use write_lock(), with ASSERT()ing that the lock is taken as
      writer only when the state of the event channel is either before or
      after the locked region appropriate (either free or unbound).
    
    - Sending an event needs to use read_trylock() mostly, in case of not
      obtaining the lock the operation is omitted. This is needed as
      sending an event can happen with interrupts off (at least in some
      cases).
    
    - Dumping the event channel state for debug purposes is using
      read_trylock(), too, in order to avoid blocking in case the lock is
      taken as writer for a long time.
    
    - All other cases can use read_lock().
    
    Fixes: e045199c7c9c54 ("evtchn: address races with evtchn_reset()")
    Signed-off-by: Juergen Gross <jgross@xxxxxxxx>
    Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx>
    Acked-by: Julien Grall <jgrall@xxxxxxxxxx>
    
    xen/events: fix build
    
    Commit 5f2df45ead7c1195 ("xen/evtchn: rework per event channel lock")
    introduced a build failure for NDEBUG builds.
    
    Fixes: 5f2df45ead7c1195 ("xen/evtchn: rework per event channel lock")
    Signed-off-by: Juergen Gross <jgross@xxxxxxxx>
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
    master commit: 5f2df45ead7c1195142f68b7923047a1e9479d54
    master date: 2020-11-10 14:36:15 +0100
    master commit: 53bacb86f496fdb11560d9e3b361bca7de60d268
    master date: 2020-11-11 08:56:21 +0100
---
 xen/arch/x86/irq.c         |   6 +-
 xen/arch/x86/pv/shim.c     |   9 +--
 xen/common/event_channel.c | 141 +++++++++++++++++++++++++++------------------
 xen/include/xen/event.h    |  27 +++++++--
 xen/include/xen/sched.h    |   5 +-
 5 files changed, 116 insertions(+), 72 deletions(-)

diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c
index 9878486073..a293270cf2 100644
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -2331,14 +2331,12 @@ static void dump_irqs(unsigned char key)
                 pirq = domain_irq_to_pirq(d, irq);
                 info = pirq_info(d, pirq);
                 evtchn = evtchn_from_port(d, info->evtchn);
-                local_irq_disable();
-                if ( spin_trylock(&evtchn->lock) )
+                if ( evtchn_read_trylock(evtchn) )
                 {
                     pending = evtchn_is_pending(d, evtchn);
                     masked = evtchn_is_masked(d, evtchn);
-                    spin_unlock(&evtchn->lock);
+                    evtchn_read_unlock(evtchn);
                 }
-                local_irq_enable();
                 printk("%u:%3d(%c%c%c)",
                        d->domain_id, pirq, "-P?"[pending],
                        "-M?"[masked], info->masked ? 'M' : '-');
diff --git a/xen/arch/x86/pv/shim.c b/xen/arch/x86/pv/shim.c
index 4b7d498c00..f8883bd102 100644
--- a/xen/arch/x86/pv/shim.c
+++ b/xen/arch/x86/pv/shim.c
@@ -616,11 +616,12 @@ void pv_shim_inject_evtchn(unsigned int port)
     if ( port_is_valid(guest, port) )
     {
         struct evtchn *chn = evtchn_from_port(guest, port);
-        unsigned long flags;
 
-        spin_lock_irqsave(&chn->lock, flags);
-        evtchn_port_set_pending(guest, chn->notify_vcpu_id, chn);
-        spin_unlock_irqrestore(&chn->lock, flags);
+        if ( evtchn_read_trylock(chn) )
+        {
+            evtchn_port_set_pending(guest, chn->notify_vcpu_id, chn);
+            evtchn_read_unlock(chn);
+        }
     }
 }
 
diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
index 0066c8a87f..7f2ad9d826 100644
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -50,6 +50,40 @@
 
 #define consumer_is_xen(e) (!!(e)->xen_consumer)
 
+/*
+ * Lock an event channel exclusively. This is allowed only when the channel is
+ * free or unbound either when taking or when releasing the lock, as any
+ * concurrent operation on the event channel using evtchn_read_trylock() will
+ * just assume the event channel is free or unbound at the moment when the
+ * evtchn_read_trylock() returns false.
+ */
+static inline void evtchn_write_lock(struct evtchn *evtchn)
+{
+    write_lock(&evtchn->lock);
+
+#ifndef NDEBUG
+    evtchn->old_state = evtchn->state;
+#endif
+}
+
+static inline unsigned int old_state(const struct evtchn *evtchn)
+{
+#ifndef NDEBUG
+    return evtchn->old_state;
+#else
+    return ECS_RESERVED; /* Just to allow things to build. */
+#endif
+}
+
+static inline void evtchn_write_unlock(struct evtchn *evtchn)
+{
+    /* Enforce lock discipline. */
+    ASSERT(old_state(evtchn) == ECS_FREE || old_state(evtchn) == ECS_UNBOUND ||
+           evtchn->state == ECS_FREE || evtchn->state == ECS_UNBOUND);
+
+    write_unlock(&evtchn->lock);
+}
+
 /*
  * The function alloc_unbound_xen_event_channel() allows an arbitrary
  * notifier function to be specified. However, very few unique functions
@@ -131,7 +165,7 @@ static struct evtchn *alloc_evtchn_bucket(struct domain *d, 
unsigned int port)
             return NULL;
         }
         chn[i].port = port + i;
-        spin_lock_init(&chn[i].lock);
+        rwlock_init(&chn[i].lock);
     }
     return chn;
 }
@@ -249,7 +283,6 @@ static long evtchn_alloc_unbound(evtchn_alloc_unbound_t 
*alloc)
     int            port;
     domid_t        dom = alloc->dom;
     long           rc;
-    unsigned long  flags;
 
     d = rcu_lock_domain_by_any_id(dom);
     if ( d == NULL )
@@ -265,14 +298,14 @@ static long evtchn_alloc_unbound(evtchn_alloc_unbound_t 
*alloc)
     if ( rc )
         goto out;
 
-    spin_lock_irqsave(&chn->lock, flags);
+    evtchn_write_lock(chn);
 
     chn->state = ECS_UNBOUND;
     if ( (chn->u.unbound.remote_domid = alloc->remote_dom) == DOMID_SELF )
         chn->u.unbound.remote_domid = current->domain->domain_id;
     evtchn_port_init(d, chn);
 
-    spin_unlock_irqrestore(&chn->lock, flags);
+    evtchn_write_unlock(chn);
 
     alloc->port = port;
 
@@ -285,32 +318,26 @@ static long evtchn_alloc_unbound(evtchn_alloc_unbound_t 
*alloc)
 }
 
 
-static unsigned long double_evtchn_lock(struct evtchn *lchn,
-                                        struct evtchn *rchn)
+static void double_evtchn_lock(struct evtchn *lchn, struct evtchn *rchn)
 {
-    unsigned long flags;
-
     if ( lchn <= rchn )
     {
-        spin_lock_irqsave(&lchn->lock, flags);
+        evtchn_write_lock(lchn);
         if ( lchn != rchn )
-            spin_lock(&rchn->lock);
+            evtchn_write_lock(rchn);
     }
     else
     {
-        spin_lock_irqsave(&rchn->lock, flags);
-        spin_lock(&lchn->lock);
+        evtchn_write_lock(rchn);
+        evtchn_write_lock(lchn);
     }
-
-    return flags;
 }
 
-static void double_evtchn_unlock(struct evtchn *lchn, struct evtchn *rchn,
-                                 unsigned long flags)
+static void double_evtchn_unlock(struct evtchn *lchn, struct evtchn *rchn)
 {
     if ( lchn != rchn )
-        spin_unlock(&lchn->lock);
-    spin_unlock_irqrestore(&rchn->lock, flags);
+        evtchn_write_unlock(lchn);
+    evtchn_write_unlock(rchn);
 }
 
 static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind)
@@ -320,7 +347,6 @@ static long 
evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind)
     int            lport, rport = bind->remote_port;
     domid_t        rdom = bind->remote_dom;
     long           rc;
-    unsigned long  flags;
 
     if ( rdom == DOMID_SELF )
         rdom = current->domain->domain_id;
@@ -356,7 +382,7 @@ static long 
evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind)
     if ( rc )
         goto out;
 
-    flags = double_evtchn_lock(lchn, rchn);
+    double_evtchn_lock(lchn, rchn);
 
     lchn->u.interdomain.remote_dom  = rd;
     lchn->u.interdomain.remote_port = rport;
@@ -373,7 +399,7 @@ static long 
evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind)
      */
     evtchn_port_set_pending(ld, lchn->notify_vcpu_id, lchn);
 
-    double_evtchn_unlock(lchn, rchn, flags);
+    double_evtchn_unlock(lchn, rchn);
 
     bind->local_port = lport;
 
@@ -396,7 +422,6 @@ int evtchn_bind_virq(evtchn_bind_virq_t *bind, 
evtchn_port_t port)
     struct domain *d = current->domain;
     int            virq = bind->virq, vcpu = bind->vcpu;
     int            rc = 0;
-    unsigned long  flags;
 
     if ( (virq < 0) || (virq >= ARRAY_SIZE(v->virq_to_evtchn)) )
         return -EINVAL;
@@ -429,14 +454,14 @@ int evtchn_bind_virq(evtchn_bind_virq_t *bind, 
evtchn_port_t port)
 
     chn = evtchn_from_port(d, port);
 
-    spin_lock_irqsave(&chn->lock, flags);
+    evtchn_write_lock(chn);
 
     chn->state          = ECS_VIRQ;
     chn->notify_vcpu_id = vcpu;
     chn->u.virq         = virq;
     evtchn_port_init(d, chn);
 
-    spin_unlock_irqrestore(&chn->lock, flags);
+    evtchn_write_unlock(chn);
 
     v->virq_to_evtchn[virq] = bind->port = port;
 
@@ -453,7 +478,6 @@ static long evtchn_bind_ipi(evtchn_bind_ipi_t *bind)
     struct domain *d = current->domain;
     int            port, vcpu = bind->vcpu;
     long           rc = 0;
-    unsigned long  flags;
 
     if ( (vcpu < 0) || (vcpu >= d->max_vcpus) ||
          (d->vcpu[vcpu] == NULL) )
@@ -466,13 +490,13 @@ static long evtchn_bind_ipi(evtchn_bind_ipi_t *bind)
 
     chn = evtchn_from_port(d, port);
 
-    spin_lock_irqsave(&chn->lock, flags);
+    evtchn_write_lock(chn);
 
     chn->state          = ECS_IPI;
     chn->notify_vcpu_id = vcpu;
     evtchn_port_init(d, chn);
 
-    spin_unlock_irqrestore(&chn->lock, flags);
+    evtchn_write_unlock(chn);
 
     bind->port = port;
 
@@ -516,7 +540,6 @@ static long evtchn_bind_pirq(evtchn_bind_pirq_t *bind)
     struct pirq   *info;
     int            port = 0, pirq = bind->pirq;
     long           rc;
-    unsigned long  flags;
 
     if ( (pirq < 0) || (pirq >= d->nr_pirqs) )
         return -EINVAL;
@@ -549,14 +572,14 @@ static long evtchn_bind_pirq(evtchn_bind_pirq_t *bind)
         goto out;
     }
 
-    spin_lock_irqsave(&chn->lock, flags);
+    evtchn_write_lock(chn);
 
     chn->state  = ECS_PIRQ;
     chn->u.pirq.irq = pirq;
     link_pirq_port(port, chn, v);
     evtchn_port_init(d, chn);
 
-    spin_unlock_irqrestore(&chn->lock, flags);
+    evtchn_write_unlock(chn);
 
     bind->port = port;
 
@@ -577,7 +600,6 @@ int evtchn_close(struct domain *d1, int port1, bool guest)
     struct evtchn *chn1, *chn2;
     int            port2;
     long           rc = 0;
-    unsigned long  flags;
 
  again:
     spin_lock(&d1->event_lock);
@@ -677,14 +699,14 @@ int evtchn_close(struct domain *d1, int port1, bool guest)
         BUG_ON(chn2->state != ECS_INTERDOMAIN);
         BUG_ON(chn2->u.interdomain.remote_dom != d1);
 
-        flags = double_evtchn_lock(chn1, chn2);
+        double_evtchn_lock(chn1, chn2);
 
         evtchn_free(d1, chn1);
 
         chn2->state = ECS_UNBOUND;
         chn2->u.unbound.remote_domid = d1->domain_id;
 
-        double_evtchn_unlock(chn1, chn2, flags);
+        double_evtchn_unlock(chn1, chn2);
 
         goto out;
 
@@ -692,9 +714,9 @@ int evtchn_close(struct domain *d1, int port1, bool guest)
         BUG();
     }
 
-    spin_lock_irqsave(&chn1->lock, flags);
+    evtchn_write_lock(chn1);
     evtchn_free(d1, chn1);
-    spin_unlock_irqrestore(&chn1->lock, flags);
+    evtchn_write_unlock(chn1);
 
  out:
     if ( d2 != NULL )
@@ -714,7 +736,6 @@ int evtchn_send(struct domain *ld, unsigned int lport)
     struct evtchn *lchn, *rchn;
     struct domain *rd;
     int            rport, ret = 0;
-    unsigned long  flags;
 
     if ( !port_is_valid(ld, lport) )
         return -EINVAL;
@@ -727,7 +748,7 @@ int evtchn_send(struct domain *ld, unsigned int lport)
 
     lchn = evtchn_from_port(ld, lport);
 
-    spin_lock_irqsave(&lchn->lock, flags);
+    evtchn_read_lock(lchn);
 
     /* Guest cannot send via a Xen-attached event channel. */
     if ( unlikely(consumer_is_xen(lchn)) )
@@ -762,7 +783,7 @@ int evtchn_send(struct domain *ld, unsigned int lport)
     }
 
 out:
-    spin_unlock_irqrestore(&lchn->lock, flags);
+    evtchn_read_unlock(lchn);
 
     return ret;
 }
@@ -789,9 +810,11 @@ void send_guest_vcpu_virq(struct vcpu *v, uint32_t virq)
 
     d = v->domain;
     chn = evtchn_from_port(d, port);
-    spin_lock(&chn->lock);
-    evtchn_port_set_pending(d, v->vcpu_id, chn);
-    spin_unlock(&chn->lock);
+    if ( evtchn_read_trylock(chn) )
+    {
+        evtchn_port_set_pending(d, v->vcpu_id, chn);
+        evtchn_read_unlock(chn);
+    }
 
  out:
     spin_unlock_irqrestore(&v->virq_lock, flags);
@@ -820,9 +843,11 @@ static void send_guest_global_virq(struct domain *d, 
uint32_t virq)
         goto out;
 
     chn = evtchn_from_port(d, port);
-    spin_lock(&chn->lock);
-    evtchn_port_set_pending(d, chn->notify_vcpu_id, chn);
-    spin_unlock(&chn->lock);
+    if ( evtchn_read_trylock(chn) )
+    {
+        evtchn_port_set_pending(d, chn->notify_vcpu_id, chn);
+        evtchn_read_unlock(chn);
+    }
 
  out:
     spin_unlock_irqrestore(&v->virq_lock, flags);
@@ -832,7 +857,6 @@ void send_guest_pirq(struct domain *d, const struct pirq 
*pirq)
 {
     int port;
     struct evtchn *chn;
-    unsigned long flags;
 
     /*
      * PV guests: It should not be possible to race with __evtchn_close(). The
@@ -847,9 +871,11 @@ void send_guest_pirq(struct domain *d, const struct pirq 
*pirq)
     }
 
     chn = evtchn_from_port(d, port);
-    spin_lock_irqsave(&chn->lock, flags);
-    evtchn_port_set_pending(d, chn->notify_vcpu_id, chn);
-    spin_unlock_irqrestore(&chn->lock, flags);
+    if ( evtchn_read_trylock(chn) )
+    {
+        evtchn_port_set_pending(d, chn->notify_vcpu_id, chn);
+        evtchn_read_unlock(chn);
+    }
 }
 
 static struct domain *global_virq_handlers[NR_VIRQS] __read_mostly;
@@ -1044,15 +1070,17 @@ int evtchn_unmask(unsigned int port)
 {
     struct domain *d = current->domain;
     struct evtchn *evtchn;
-    unsigned long flags;
 
     if ( unlikely(!port_is_valid(d, port)) )
         return -EINVAL;
 
     evtchn = evtchn_from_port(d, port);
-    spin_lock_irqsave(&evtchn->lock, flags);
+
+    evtchn_read_lock(evtchn);
+
     evtchn_port_unmask(d, evtchn);
-    spin_unlock_irqrestore(&evtchn->lock, flags);
+
+    evtchn_read_unlock(evtchn);
 
     return 0;
 }
@@ -1298,7 +1326,6 @@ int alloc_unbound_xen_event_channel(
 {
     struct evtchn *chn;
     int            port, rc;
-    unsigned long  flags;
 
     spin_lock(&ld->event_lock);
 
@@ -1311,14 +1338,14 @@ int alloc_unbound_xen_event_channel(
     if ( rc )
         goto out;
 
-    spin_lock_irqsave(&chn->lock, flags);
+    evtchn_write_lock(chn);
 
     chn->state = ECS_UNBOUND;
     chn->xen_consumer = get_xen_consumer(notification_fn);
     chn->notify_vcpu_id = lvcpu;
     chn->u.unbound.remote_domid = remote_domid;
 
-    spin_unlock_irqrestore(&chn->lock, flags);
+    evtchn_write_unlock(chn);
 
     write_atomic(&ld->xen_evtchns, ld->xen_evtchns + 1);
 
@@ -1350,7 +1377,6 @@ void notify_via_xen_event_channel(struct domain *ld, int 
lport)
 {
     struct evtchn *lchn, *rchn;
     struct domain *rd;
-    unsigned long flags;
 
     if ( !port_is_valid(ld, lport) )
     {
@@ -1365,7 +1391,8 @@ void notify_via_xen_event_channel(struct domain *ld, int 
lport)
 
     lchn = evtchn_from_port(ld, lport);
 
-    spin_lock_irqsave(&lchn->lock, flags);
+    if ( !evtchn_read_trylock(lchn) )
+        return;
 
     if ( likely(lchn->state == ECS_INTERDOMAIN) )
     {
@@ -1375,7 +1402,7 @@ void notify_via_xen_event_channel(struct domain *ld, int 
lport)
         evtchn_port_set_pending(rd, rchn->notify_vcpu_id, rchn);
     }
 
-    spin_unlock_irqrestore(&lchn->lock, flags);
+    evtchn_read_unlock(lchn);
 }
 
 void evtchn_check_pollers(struct domain *d, unsigned int port)
diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h
index 87a4aade86..b0c39d402c 100644
--- a/xen/include/xen/event.h
+++ b/xen/include/xen/event.h
@@ -103,6 +103,21 @@ static inline unsigned int max_evtchns(const struct domain 
*d)
                           : BITS_PER_EVTCHN_WORD(d) * BITS_PER_EVTCHN_WORD(d);
 }
 
+static inline void evtchn_read_lock(struct evtchn *evtchn)
+{
+    read_lock(&evtchn->lock);
+}
+
+static inline bool evtchn_read_trylock(struct evtchn *evtchn)
+{
+    return read_trylock(&evtchn->lock);
+}
+
+static inline void evtchn_read_unlock(struct evtchn *evtchn)
+{
+    read_unlock(&evtchn->lock);
+}
+
 static inline bool_t port_is_valid(struct domain *d, unsigned int p)
 {
     if ( p >= read_atomic(&d->valid_evtchns) )
@@ -236,11 +251,10 @@ static inline bool evtchn_port_is_pending(struct domain 
*d, evtchn_port_t port)
 {
     struct evtchn *evtchn = evtchn_from_port(d, port);
     bool rc;
-    unsigned long flags;
 
-    spin_lock_irqsave(&evtchn->lock, flags);
+    evtchn_read_lock(evtchn);
     rc = evtchn_is_pending(d, evtchn);
-    spin_unlock_irqrestore(&evtchn->lock, flags);
+    evtchn_read_unlock(evtchn);
 
     return rc;
 }
@@ -255,11 +269,12 @@ static inline bool evtchn_port_is_masked(struct domain 
*d, evtchn_port_t port)
 {
     struct evtchn *evtchn = evtchn_from_port(d, port);
     bool rc;
-    unsigned long flags;
 
-    spin_lock_irqsave(&evtchn->lock, flags);
+    evtchn_read_lock(evtchn);
+
     rc = evtchn_is_masked(d, evtchn);
-    spin_unlock_irqrestore(&evtchn->lock, flags);
+
+    evtchn_read_unlock(evtchn);
 
     return rc;
 }
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 7e4ad5d51b..3c1284c3da 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -82,7 +82,7 @@ extern domid_t hardware_domid;
 
 struct evtchn
 {
-    spinlock_t lock;
+    rwlock_t lock;
 #define ECS_FREE         0 /* Channel is available for use.                  */
 #define ECS_RESERVED     1 /* Channel is reserved.                           */
 #define ECS_UNBOUND      2 /* Channel is waiting to bind to a remote domain. */
@@ -111,6 +111,9 @@ struct evtchn
         u16 virq;      /* state == ECS_VIRQ */
     } u;
     u8 priority;
+#ifndef NDEBUG
+    u8 old_state;      /* State when taking lock in write mode. */
+#endif
     u8 last_priority;
     u16 last_vcpu_id;
 #ifdef CONFIG_XSM
--
generated by git-patchbot for /home/xen/git/xen.git#stable-4.11



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.