[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [XEN PATCH for-4.13 v3 7/7] libxl_qmp: Have a lock for QMP socket access



This patch workaround the fact that it's not possible to connect
multiple time to a single QMP socket. QEMU listen on the socket with
a backlog value of 1, which mean that on Linux when concurrent thread
call connect() on the socket, they get EAGAIN.

Background:
    This happens when attempting to create a guest with multiple
    pci devices passthrough, libxl creates one connection per device to
    attach and execute connect() on all at once before any single
    connection has finished.

To work around this, we use a new lock.

Reported-by: Sander Eikelenboom <linux@xxxxxxxxxxxxxx>
Signed-off-by: Anthony PERARD <anthony.perard@xxxxxxxxxx>
---

Notes:
    v3:
    - Use new libxl_ev_immediate in qmp_ev_connect's callbacks error path.
    - Add `lock' state into the internal state table.
      And add comments about state changes to the new functions.
    
    v2:
    - Handle error path

 tools/libxl/libxl_internal.c |   5 ++
 tools/libxl/libxl_internal.h |   9 +++
 tools/libxl/libxl_qmp.c      | 118 +++++++++++++++++++++++++++--------
 3 files changed, 107 insertions(+), 25 deletions(-)

diff --git a/tools/libxl/libxl_internal.c b/tools/libxl/libxl_internal.c
index b2084157e4cd..ba5637358e7c 100644
--- a/tools/libxl/libxl_internal.c
+++ b/tools/libxl/libxl_internal.c
@@ -590,6 +590,11 @@ void libxl__ev_devlock_init(libxl__ev_slowlock *lock)
     ev_slowlock_init_internal(lock, "libxl-device-changes-lock");
 }
 
+void libxl__ev_qmplock_init(libxl__ev_slowlock *lock)
+{
+    ev_slowlock_init_internal(lock, "qmp-socket-lock");
+}
+
 static void ev_lock_prepare_fork(libxl__egc *egc, libxl__ev_slowlock *lock);
 static void ev_lock_child_callback(libxl__egc *egc, libxl__ev_child *child,
                                    pid_t pid, int status);
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 400752a7f8fe..8988831ae5f7 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -386,6 +386,9 @@ void libxl__ev_immediate_register(libxl__egc *, 
libxl__ev_immediate *);
  * which may take a significant amount time.
  * It is to be acquired by an ao event callback.
  *
+ * If libxl__ev_devlock is needed, it should be acquired while every
+ * libxl__ev_qmp are Idle for the current domain.
+ *
  * It is to be acquired when adding/removing devices or making changes
  * to them when this is a slow operation and json_lock isn't appropriate.
  *
@@ -426,6 +429,7 @@ struct libxl__ev_slowlock {
     bool held;
 };
 _hidden void libxl__ev_devlock_init(libxl__ev_slowlock *);
+_hidden void libxl__ev_qmplock_init(libxl__ev_slowlock *);
 _hidden void libxl__ev_slowlock_lock(libxl__egc *, libxl__ev_slowlock *);
 _hidden void libxl__ev_slowlock_unlock(libxl__gc *, libxl__ev_slowlock *);
 _hidden void libxl__ev_slowlock_dispose(libxl__gc *, libxl__ev_slowlock *);
@@ -494,6 +498,8 @@ _hidden void libxl__ev_qmp_dispose(libxl__gc *gc, 
libxl__ev_qmp *ev);
 typedef enum {
     /* initial state */
     qmp_state_disconnected = 1,
+    /* waiting for lock */
+    qmp_state_waiting_lock,
     /* connected to QMP socket, waiting for greeting message */
     qmp_state_connecting,
     /* qmp_capabilities command sent, waiting for reply */
@@ -527,6 +533,9 @@ struct libxl__ev_qmp {
     libxl__carefd *cfd;
     libxl__ev_fd efd;
     libxl__qmp_state state;
+    libxl__ev_slowlock lock;
+    libxl__ev_immediate ei;
+    int rc;
     int id;
     int next_id;        /* next id to use */
     /* receive buffer */
diff --git a/tools/libxl/libxl_qmp.c b/tools/libxl/libxl_qmp.c
index f0e0b50bd1c5..c8eac2588a89 100644
--- a/tools/libxl/libxl_qmp.c
+++ b/tools/libxl/libxl_qmp.c
@@ -1082,16 +1082,17 @@ static void dm_state_saved(libxl__egc *egc, 
libxl__ev_qmp *ev,
 /*
  * Possible internal state compared to qmp_state:
  *
- * qmp_state     External   cfd    efd     id     rx_buf* tx_buf* msg*
- * disconnected   Idle       NULL   Idle    reset  free    free    free
- * connecting     Active     open   IN      reset  used    free    set
- * cap.neg        Active     open   IN|OUT  sent   used    cap_neg set
- * cap.neg        Active     open   IN      sent   used    free    set
- * connected      Connected  open   IN      any    used    free    free
- * waiting_reply  Active     open   IN|OUT  sent   used    free    set
- * waiting_reply  Active     open   IN|OUT  sent   used    user's  free
- * waiting_reply  Active     open   IN      sent   used    free    free
- * broken[1]      none[2]    any    Active  any    any     any     any
+ * qmp_state     External   cfd    efd     id     rx_buf* tx_buf* msg* lock
+ * disconnected   Idle       NULL   Idle    reset  free    free    free Idle
+ * waiting_lock   Active     open   Idle    reset  used    free    set  Active
+ * connecting     Active     open   IN      reset  used    free    set  
Acquired
+ * cap.neg        Active     open   IN|OUT  sent   used    cap_neg set  
Acquired
+ * cap.neg        Active     open   IN      sent   used    free    set  
Acquired
+ * connected      Connected  open   IN      any    used    free    free 
Acquired
+ * waiting_reply  Active     open   IN|OUT  sent   used    free    set  
Acquired
+ * waiting_reply  Active     open   IN|OUT  sent   used    user's  free 
Acquired
+ * waiting_reply  Active     open   IN      sent   used    free    free 
Acquired
+ * broken[1]      none[2]    any    Active  any    any     any     any  any
  *
  * [1] When an internal function return an error, it can leave ev_qmp in a
  * `broken` state but only if the caller is another internal function.
@@ -1118,7 +1119,8 @@ static void dm_state_saved(libxl__egc *egc, libxl__ev_qmp 
*ev,
  *     msg_id           0     id assoctiated with the command in `msg`
  *
  * - Allowed internal state transition:
- * disconnected                     -> connecting
+ * disconnected                     -> waiting_lock
+ * waiting_lock                     -> connecting
  * connection                       -> capability_negotiation
  * capability_negotiation/connected -> waiting_reply
  * waiting_reply                    -> connected
@@ -1153,6 +1155,10 @@ static void qmp_ev_ensure_reading_writing(libxl__gc *gc, 
libxl__ev_qmp *ev)
 {
     short events = POLLIN;
 
+    if (ev->state == qmp_state_waiting_lock)
+        /* We can't modifie the efd yet, as it isn't registered. */
+        return;
+
     if (ev->tx_buf)
         events |= POLLOUT;
     else if ((ev->state == qmp_state_waiting_reply) && ev->msg)
@@ -1168,9 +1174,12 @@ static void qmp_ev_set_state(libxl__gc *gc, 
libxl__ev_qmp *ev,
     switch (new_state) {
     case qmp_state_disconnected:
         break;
-    case qmp_state_connecting:
+    case qmp_state_waiting_lock:
         assert(ev->state == qmp_state_disconnected);
         break;
+    case qmp_state_connecting:
+        assert(ev->state == qmp_state_waiting_lock);
+        break;
     case qmp_state_capability_negotiation:
         assert(ev->state == qmp_state_connecting);
         break;
@@ -1231,20 +1240,22 @@ static int 
qmp_error_class_to_libxl_error_code(libxl__gc *gc,
 
 /* Setup connection */
 
-static int qmp_ev_connect(libxl__gc *gc, libxl__ev_qmp *ev)
-    /* disconnected -> connecting but with `msg` free
+static void qmp_ev_lock_aquired(libxl__egc *, libxl__ev_slowlock *,
+                                int rc);
+static void lock_error_callback(libxl__egc *, libxl__ev_immediate *);
+
+static int qmp_ev_connect(libxl__egc *egc, libxl__ev_qmp *ev)
+    /* disconnected -> waiting_lock/connecting but with `msg` free
      * on error: broken */
 {
+    EGC_GC;
     int fd;
-    int rc, r;
-    struct sockaddr_un un;
-    const char *qmp_socket_path;
-
-    assert(ev->state == qmp_state_disconnected);
+    int rc;
 
-    qmp_socket_path = libxl__qemu_qmp_path(gc, ev->domid);
+    /* Convenience aliases */
+    libxl__ev_slowlock *lock = &ev->lock;
 
-    LOGD(DEBUG, ev->domid, "Connecting to %s", qmp_socket_path);
+    assert(ev->state == qmp_state_disconnected);
 
     libxl__carefd_begin();
     fd = socket(AF_UNIX, SOCK_STREAM, 0);
@@ -1258,6 +1269,36 @@ static int qmp_ev_connect(libxl__gc *gc, libxl__ev_qmp 
*ev)
     if (rc)
         goto out;
 
+    qmp_ev_set_state(gc, ev, qmp_state_waiting_lock);
+
+    lock->ao = ev->ao;
+    lock->domid = ev->domid;
+    lock->callback = qmp_ev_lock_aquired;
+    libxl__ev_slowlock_lock(egc, &ev->lock);
+
+    return 0;
+
+out:
+    return rc;
+}
+
+static void qmp_ev_lock_aquired(libxl__egc *egc, libxl__ev_slowlock *lock,
+                                int rc)
+    /* waiting_lock (with `lock' Acquired) -> connecting
+     * on error: broken */
+{
+    libxl__ev_qmp *ev = CONTAINER_OF(lock, *ev, lock);
+    EGC_GC;
+    const char *qmp_socket_path;
+    struct sockaddr_un un;
+    int r;
+
+    if (rc) goto out;
+
+    qmp_socket_path = libxl__qemu_qmp_path(gc, ev->domid);
+
+    LOGD(DEBUG, ev->domid, "Connecting to %s", qmp_socket_path);
+
     rc = libxl__prepare_sockaddr_un(gc, &un, qmp_socket_path,
                                     "QMP socket");
     if (rc)
@@ -1279,10 +1320,33 @@ static int qmp_ev_connect(libxl__gc *gc, libxl__ev_qmp 
*ev)
 
     qmp_ev_set_state(gc, ev, qmp_state_connecting);
 
-    return 0;
+    return;
 
 out:
-    return rc;
+    /* An error occurred and we need to let the caller know.  At this
+     * point, we can only do so via the callback. Unfortunately, the
+     * callback of libxl__ev_slowlock_lock() might be called synchronously,
+     * but libxl__ev_qmp_send() promise that it will not call the callback
+     * synchronously. So we have to arrange to call the callback
+     * asynchronously. */
+    ev->rc = rc;
+    ev->ei.callback = lock_error_callback;
+    libxl__ev_immediate_register(egc, &ev->ei);
+}
+
+static void lock_error_callback(libxl__egc *egc, libxl__ev_immediate *ei)
+    /* broken -> disconnected */
+{
+    EGC_GC;
+    libxl__ev_qmp *ev = CONTAINER_OF(ei, *ev, ei);
+
+    int rc = ev->rc;
+
+    /* On error, deallocate all private resources */
+    libxl__ev_qmp_dispose(gc, ev);
+
+    /* And tell libxl__ev_qmp user about the error */
+    ev->callback(egc, ev, NULL, rc); /* must be last */
 }
 
 /* QMP FD callbacks */
@@ -1779,11 +1843,14 @@ void libxl__ev_qmp_init(libxl__ev_qmp *ev)
     ev->qemu_version.major = -1;
     ev->qemu_version.minor = -1;
     ev->qemu_version.micro = -1;
+
+    libxl__ev_qmplock_init(&ev->lock);
+    ev->rc = 0;
 }
 
 int libxl__ev_qmp_send(libxl__egc *egc, libxl__ev_qmp *ev,
                        const char *cmd, libxl__json_object *args)
-    /* disconnected -> connecting
+    /* disconnected -> waiting_lock/connecting
      * connected -> waiting_reply (with msg set)
      * on error: disconnected */
 {
@@ -1798,7 +1865,7 @@ int libxl__ev_qmp_send(libxl__egc *egc, libxl__ev_qmp *ev,
 
     /* Connect to QEMU if not already connected */
     if (ev->state == qmp_state_disconnected) {
-        rc = qmp_ev_connect(gc, ev);
+        rc = qmp_ev_connect(egc, ev);
         if (rc)
             goto error;
     }
@@ -1830,6 +1897,7 @@ void libxl__ev_qmp_dispose(libxl__gc *gc, libxl__ev_qmp 
*ev)
 
     libxl__ev_fd_deregister(gc, &ev->efd);
     libxl__carefd_close(ev->cfd);
+    libxl__ev_slowlock_dispose(gc, &ev->lock);
 
     libxl__ev_qmp_init(ev);
 }
-- 
Anthony PERARD


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.