[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 27/27] libxl: abort bootloader invocation when domain dies



Cancel the bootloader (specifically, by sending it a signal) if the
domain is seen to disappear from xenstore.

We use a new utility event source libxl__domaindeathcheck which
provides a convenient wrapper for the xenstore watch.

Signed-off-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>

Changes since v8:
 * Fixed the commit message summary line.

Changes since v7:
 * Add a comment explaining why we use a watch on the domain's
   xenstore path rather than @releaseDomain.
 * Fix typo in error message.
---
 tools/libxl/libxl_bootloader.c |   43 ++++++++++++++++++++++++++++--------
 tools/libxl/libxl_event.c      |   47 ++++++++++++++++++++++++++++++++++++++++
 tools/libxl/libxl_internal.h   |   29 +++++++++++++++++++++++-
 3 files changed, 108 insertions(+), 11 deletions(-)

diff --git a/tools/libxl/libxl_bootloader.c b/tools/libxl/libxl_bootloader.c
index 8436c07..fb1302b 100644
--- a/tools/libxl/libxl_bootloader.c
+++ b/tools/libxl/libxl_bootloader.c
@@ -31,6 +31,7 @@ static void bootloader_keystrokes_copyfail(libxl__egc *egc,
        libxl__datacopier_state *dc, int onwrite, int errnoval);
 static void bootloader_display_copyfail(libxl__egc *egc,
        libxl__datacopier_state *dc, int onwrite, int errnoval);
+static void bootloader_domaindeath(libxl__egc*, libxl__domaindeathcheck *dc);
 static void bootloader_finished(libxl__egc *egc, libxl__ev_child *child,
                                 pid_t pid, int status);
 
@@ -213,6 +214,7 @@ void libxl__bootloader_init(libxl__bootloader_state *bl)
     bl->ptys[0].master = bl->ptys[0].slave = 0;
     bl->ptys[1].master = bl->ptys[1].slave = 0;
     libxl__ev_child_init(&bl->child);
+    libxl__domaindeathcheck_init(&bl->deathcheck);
     bl->keystrokes.ao = bl->ao;  libxl__datacopier_init(&bl->keystrokes);
     bl->display.ao = bl->ao;     libxl__datacopier_init(&bl->display);
 }
@@ -230,6 +232,7 @@ static void bootloader_cleanup(libxl__egc *egc, 
libxl__bootloader_state *bl)
         free(bl->diskpath);
         bl->diskpath = 0;
     }
+    libxl__domaindeathcheck_stop(gc,&bl->deathcheck);
     libxl__datacopier_kill(&bl->keystrokes);
     libxl__datacopier_kill(&bl->display);
     for (i=0; i<2; i++) {
@@ -256,6 +259,23 @@ static void bootloader_callback(libxl__egc *egc, 
libxl__bootloader_state *bl,
     bl->callback(egc, bl, rc);
 }
 
+/* might be called at any time, provided it's init'd */
+static void bootloader_abort(libxl__egc *egc,
+                             libxl__bootloader_state *bl, int rc)
+{
+    STATE_AO_GC(bl->ao);
+    int r;
+
+    libxl__datacopier_kill(&bl->keystrokes);
+    libxl__datacopier_kill(&bl->display);
+    if (libxl__ev_child_inuse(&bl->child)) {
+        r = kill(bl->child.pid, SIGTERM);
+        if (r) LOGE(WARN, "after failure, failed to kill bootloader [%lu]",
+                    (unsigned long)bl->child.pid);
+    }
+    bl->rc = rc;
+}
+
 /*----- main flow of control -----*/
 
 void libxl__bootloader_run(libxl__egc *egc, libxl__bootloader_state *bl)
@@ -377,6 +397,12 @@ static void bootloader_gotptys(libxl__egc *egc, 
libxl__openpty_state *op)
         goto out;
     }
 
+    bl->deathcheck.what = "stopping bootloader";
+    bl->deathcheck.domid = bl->domid;
+    bl->deathcheck.callback = bootloader_domaindeath;
+    rc = libxl__domaindeathcheck_start(gc, &bl->deathcheck);
+    if (rc) goto out;
+
     if (bl->console_available)
         bl->console_available(egc, bl);
 
@@ -454,18 +480,9 @@ static void bootloader_copyfail(libxl__egc *egc, const 
char *which,
        libxl__bootloader_state *bl, int onwrite, int errnoval)
 {
     STATE_AO_GC(bl->ao);
-    int r;
-
     if (!onwrite && !errnoval)
         LOG(ERROR, "unexpected eof copying %s", which);
-    libxl__datacopier_kill(&bl->keystrokes);
-    libxl__datacopier_kill(&bl->display);
-    if (libxl__ev_child_inuse(&bl->child)) {
-        r = kill(bl->child.pid, SIGTERM);
-        if (r) LOGE(WARN, "after failure, failed to kill bootloader [%lu]",
-                    (unsigned long)bl->child.pid);
-    }
-    bl->rc = ERROR_FAIL;
+    bootloader_abort(egc, bl, ERROR_FAIL);
 }
 static void bootloader_keystrokes_copyfail(libxl__egc *egc,
        libxl__datacopier_state *dc, int onwrite, int errnoval)
@@ -480,6 +497,12 @@ static void bootloader_display_copyfail(libxl__egc *egc,
     bootloader_copyfail(egc, "bootloader output", bl, onwrite, errnoval);
 }
 
+static void bootloader_domaindeath(libxl__egc *egc, libxl__domaindeathcheck 
*dc)
+{
+    libxl__bootloader_state *bl = CONTAINER_OF(dc, *bl, deathcheck);
+    bootloader_abort(egc, bl, ERROR_FAIL);
+}
+
 static void bootloader_finished(libxl__egc *egc, libxl__ev_child *child,
                                 pid_t pid, int status)
 {
diff --git a/tools/libxl/libxl_event.c b/tools/libxl/libxl_event.c
index 63719b2..03d0498 100644
--- a/tools/libxl/libxl_event.c
+++ b/tools/libxl/libxl_event.c
@@ -589,6 +589,53 @@ int libxl__ev_devstate_wait(libxl__gc *gc, 
libxl__ev_devstate *ds,
 }
 
 /*
+ * domain death/destruction
+ */
+
+/*
+ * We use a xenstore watch on the domain's path, rather than using an
+ * @releaseDomain watch and asking the hypervisor.  This is simpler
+ * because turning @releaseDomain into domain-specific information is
+ * complicated.
+ *
+ * It is also sufficient for our callers, which are generally trying
+ * to do cleanup of their own execution state on domain death, for the
+ * following reason: if the domain is destroyed then either (a) the
+ * entries in xenstore have already been deleted, in which case the
+ * test here works or (b) they have not in which case something has
+ * gone very badly wrong and we are going to leak those xenstore
+ * entries, in which case trying to avoid leaking other stuff is
+ * futile.
+ */
+
+static void domaindeathcheck_callback(libxl__egc *egc, libxl__ev_xswatch *w,
+                            const char *watch_path, const char *event_path)
+{
+    libxl__domaindeathcheck *dc = CONTAINER_OF(w, *dc, watch);
+    EGC_GC;
+    const char *p = libxl__xs_read(gc, XBT_NULL, watch_path);
+    if (p) return;
+
+    if (errno!=ENOENT) {
+        LIBXL__EVENT_DISASTER(egc,"failed to read xenstore"
+                              " for domain detach check", errno, 0);
+        return;
+    }
+
+    LOG(ERROR,"%s: domain %"PRIu32" removed (%s no longer in xenstore)",
+        dc->what, dc->domid, watch_path);
+    dc->callback(egc, dc);
+}
+
+int libxl__domaindeathcheck_start(libxl__gc *gc,
+                                  libxl__domaindeathcheck *dc)
+{
+    const char *path = GCSPRINTF("/local/domain/%"PRIu32, dc->domid);
+    return libxl__ev_xswatch_register(gc, &dc->watch,
+                                      domaindeathcheck_callback, path);
+}
+
+/*
  * osevent poll
  */
 
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 83fabea..f71e76a 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -866,6 +866,33 @@ _hidden int libxl__ev_devstate_wait(libxl__gc *gc, 
libxl__ev_devstate *ds,
                                     const char *state_path,
                                     int state, int milliseconds);
 
+/*
+ * libxl__ev_domaindeathcheck_register - arranges to call back (once)
+ * if the domain is destroyed.  If the domain dies, we log a message
+ * of the form "<what>: <explanation of the situation, including the domid>".
+ */
+
+typedef struct libxl__domaindeathcheck libxl__domaindeathcheck;
+typedef void libxl___domaindeathcheck_callback(libxl__egc *egc,
+                                         libxl__domaindeathcheck*);
+
+struct libxl__domaindeathcheck {
+    /* must be filled in by caller, and remain valid: */
+    const char *what;
+    uint32_t domid;
+    libxl___domaindeathcheck_callback *callback;
+    /* private */
+    libxl__ev_xswatch watch;
+};
+
+_hidden int libxl__domaindeathcheck_start(libxl__gc *gc,
+                                          libxl__domaindeathcheck *dc);
+
+static inline void libxl__domaindeathcheck_init
+ (libxl__domaindeathcheck *dc) { libxl__ev_xswatch_init(&dc->watch); }
+static inline void libxl__domaindeathcheck_stop(libxl__gc *gc,
+  libxl__domaindeathcheck *dc) { libxl__ev_xswatch_deregister(gc,&dc->watch); }
+
 
 /*
  * libxl__try_phy_backend - Check if there's support for the passed
@@ -1738,6 +1765,7 @@ struct libxl__bootloader_state {
     libxl__openpty_state openpty;
     libxl__openpty_result ptys[2];  /* [0] is for bootloader */
     libxl__ev_child child;
+    libxl__domaindeathcheck deathcheck;
     int nargs, argsspace;
     const char **args;
     libxl__datacopier_state keystrokes, display;
@@ -1750,7 +1778,6 @@ _hidden void 
libxl__bootloader_init(libxl__bootloader_state *bl);
  * If callback is passed rc==0, will have updated st->info appropriately */
 _hidden void libxl__bootloader_run(libxl__egc*, libxl__bootloader_state *st);
 
-
 /*----- Domain creation -----*/
 
 typedef struct libxl__domain_create_state libxl__domain_create_state;
-- 
1.7.2.5


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.