[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [RFC 4/4] HACK libxl_exec: Check QEMU status via QMP instead of xenstore



This path is more of a prof of concept reather than a patch as this
would break qemu-trad.

When qemu is restricted, the qemu on the receiving side cann't write
anything to xenstore once the migration is started. So it cann't tell
libxl that it is ready to continue running the guest.

For libxl, the only way to find out if qemu is ready on migrate/restore,
it is to connect to the QMP socket and run "query-status".

This patch succeed in implementing that, but QMP doesn't fit well with
the libxl__ev_* infrastructure. One main issue would be qmp_open(), it
tries to connect to the QMP socket during 5 seconds without ever giving
back the hand to libxl.

Also right now, xswait is disabled, but libxl could check both
xenstore and QMP at the same time.

Signed-off-by: Anthony PERARD <anthony.perard@xxxxxxxxxx>
---
 tools/libxl/libxl_dm.c       |  3 ++
 tools/libxl/libxl_exec.c     | 95 ++++++++++++++++++++++++++++++++++++++++----
 tools/libxl/libxl_internal.h | 14 +++++++
 3 files changed, 105 insertions(+), 7 deletions(-)

diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c
index a3cddce8b7..43314e3309 100644
--- a/tools/libxl/libxl_dm.c
+++ b/tools/libxl/libxl_dm.c
@@ -2350,6 +2350,9 @@ retry_transaction:
     spawn->failure_cb = device_model_startup_failed;
     spawn->detached_cb = device_model_detached;
 
+    // HACK, disable xenstore watch, will instead use QMP
+    spawn->xspath = NULL;
+
     rc = libxl__spawn_spawn(egc, spawn);
     if (rc < 0)
         goto out_close;
diff --git a/tools/libxl/libxl_exec.c b/tools/libxl/libxl_exec.c
index 02e6c917f0..2b5db5197a 100644
--- a/tools/libxl/libxl_exec.c
+++ b/tools/libxl/libxl_exec.c
@@ -274,6 +274,58 @@ void libxl__spawn_init(libxl__spawn_state *ss)
     libxl__xswait_init(&ss->xswait);
 }
 
+static void qmpwait_callback(libxl__egc *egc,
+                          libxl__ev_time *ev,
+                          const struct timeval *requested_abs,
+                          int rc)
+{
+    libxl__spawn_state *ss = CONTAINER_OF(ev, *ss, qmpwait.qmp_ev);
+    libxl__dm_spawn_state *dmss = CONTAINER_OF(ss, *dmss, spawn);
+    STATE_AO_GC(ss->ao);
+
+    if (rc == ERROR_TIMEDOUT) /* As intended */
+        rc = 0;
+    else
+        goto out_err;
+
+    rc = libxl__qmp_query_status(gc, dmss->guest_domid, "running");
+
+    if (rc) {
+        /* retry QMP connection later */
+        libxl__ev_time_register_rel(ss->ao,
+                                    ev,
+                                    qmpwait_callback,
+                                    100);
+        return;
+    }
+
+    libxl__spawn_initiate_detach(gc, ss);
+    return;
+out_err:
+    LOG(DEBUG, "qmpwait failure: %d", rc);
+    ss->failure_cb(egc, ss, rc);
+}
+
+static void qmpwait_report_error(libxl__egc *egc, libxl__qmpwait_state *qmpwa,
+                                int rc)
+{
+    libxl__spawn_state *ss = CONTAINER_OF(qmpwa, *ss, qmpwait);
+    EGC_GC;
+    libxl__ev_time_deregister(gc, &qmpwa->time_ev);
+    libxl__ev_time_deregister(gc, &qmpwa->qmp_ev);
+    qmpwa->callback(egc, &ss->xswait, rc, 0);
+}
+
+static void qmpwait_timeout_callback(libxl__egc *egc, libxl__ev_time *ev,
+                             const struct timeval *requested_abs,
+                             int rc)
+{
+    EGC_GC;
+    libxl__qmpwait_state *qmpwa = CONTAINER_OF(ev, *qmpwa, time_ev);
+    LOG(DEBUG, "%s: qmpwait timeout", qmpwa->what);
+    qmpwait_report_error(egc, qmpwa, rc);
+}
+
 int libxl__spawn_spawn(libxl__egc *egc, libxl__spawn_state *ss)
 {
     STATE_AO_GC(ss->ao);
@@ -284,13 +336,35 @@ int libxl__spawn_spawn(libxl__egc *egc, 
libxl__spawn_state *ss)
     libxl__spawn_init(ss);
     ss->rc = ss->detaching = 0;
 
-    ss->xswait.ao = ao;
-    ss->xswait.what = GCSPRINTF("%s startup", ss->what);
-    ss->xswait.path = ss->xspath;
-    ss->xswait.timeout_ms = ss->timeout_ms;
-    ss->xswait.callback = spawn_watch_event;
-    rc = libxl__xswait_start(gc, &ss->xswait);
-    if (rc) goto out_err;
+    if (ss->xspath) {
+        ss->xswait.ao = ao;
+        ss->xswait.what = GCSPRINTF("%s startup", ss->what);
+        ss->xswait.path = ss->xspath;
+        ss->xswait.timeout_ms = ss->timeout_ms;
+        ss->xswait.callback = spawn_watch_event;
+        rc = libxl__xswait_start(gc, &ss->xswait);
+        if (rc) goto out_err;
+    } else {
+        libxl__qmpwait_state *qmpwa = &ss->qmpwait;
+
+        ss->qmpwait.ao = ao;
+        ss->qmpwait.what = GCSPRINTF("%s startup (QMP)", ss->what);
+        /*ss->qmpwait.guest_domid = ;*/
+        ss->qmpwait.timeout_ms = ss->timeout_ms;
+        ss->qmpwait.callback = spawn_watch_event;
+
+        libxl__ev_time_init(&qmpwa->time_ev);
+        libxl__ev_time_init(&qmpwa->qmp_ev);
+
+        rc = libxl__ev_time_register_rel(qmpwa->ao, &qmpwa->time_ev,
+                                         qmpwait_timeout_callback,
+                                         qmpwa->timeout_ms);
+        if (rc) goto out_err;
+
+        rc = libxl__ev_time_register_rel(ss->ao, &qmpwa->qmp_ev,
+                                         qmpwait_callback, 100);
+        if (rc) goto out_err;
+    }
 
     pid_t middle = libxl__ev_child_fork(gc, &ss->mid, spawn_middle_death);
     if (middle ==-1) { rc = ERROR_FAIL; goto out_err; }
@@ -343,10 +417,16 @@ int libxl__spawn_spawn(libxl__egc *egc, 
libxl__spawn_state *ss)
     return rc;
 }
 
+static void libxl__qmpwait_stop(libxl__gc *gc, libxl__qmpwait_state *qmpwa)
+{
+    libxl__ev_time_deregister(gc, &qmpwa->time_ev);
+    libxl__ev_time_deregister(gc, &qmpwa->qmp_ev);
+}
 static void spawn_cleanup(libxl__gc *gc, libxl__spawn_state *ss)
 {
     assert(!libxl__ev_child_inuse(&ss->mid));
     libxl__xswait_stop(gc, &ss->xswait);
+    libxl__qmpwait_stop(gc, &ss->qmpwait);
 }
 
 static void spawn_detach(libxl__gc *gc, libxl__spawn_state *ss)
@@ -359,6 +439,7 @@ static void spawn_detach(libxl__gc *gc, libxl__spawn_state 
*ss)
     assert(libxl__ev_child_inuse(&ss->mid));
     assert(ss->detaching || ss->rc);
     libxl__xswait_stop(gc, &ss->xswait);
+    libxl__qmpwait_stop(gc, &ss->qmpwait);
 
     pid_t child = ss->mid.pid;
     r = kill(child, SIGKILL);
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index d5e98114d6..fdeeeb5f45 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -1550,6 +1550,19 @@ typedef void libxl__spawn_confirm_cb(libxl__egc*, 
libxl__spawn_state*,
  */
 typedef void libxl__spawn_detached_cb(libxl__egc*, libxl__spawn_state*);
 
+// struct use for calling the QMP command "query-status" of a starting QEMU.
+typedef struct libxl__qmpwait_state {
+    /* caller must fill these in, and they must all remain valid */
+    libxl__ao *ao;
+    const char *what; /* for error msgs: noun phrase, what we're waiting for */
+    int guest_domid;
+    int timeout_ms; /* as for poll(2) */
+    /* remaining fields are private to qmpwait */
+    libxl__ev_time time_ev;
+    libxl__ev_time qmp_ev;
+    libxl__xswait_callback *callback;
+} libxl__qmpwait_state;
+
 struct libxl__spawn_state {
     /* must be filled in by user and remain valid */
     libxl__ao *ao;
@@ -1567,6 +1580,7 @@ struct libxl__spawn_state {
     int rc; /* might be non-0 whenever we are not Idle */
     libxl__ev_child mid; /* always in use whenever we are not Idle */
     libxl__xswait_state xswait;
+    libxl__qmpwait_state qmpwait;
 };
 
 static inline int libxl__spawn_inuse(const libxl__spawn_state *ss)
-- 
Anthony PERARD


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.