[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 14/15] libxl: do not leak spawned middle children



libxl__spawn_spawn would, when libxl__spawn_detach was called, make
the spawn become idle immediately.  However it still has a child
process which needs to be waited for: the `detachable' spawned
child.

This is wrong because the ultimate in-libxl caller may return to the
application, with a child process still forked but not reaped libxl
contrary to the documented behaviour of libxl.

Instead, replace libxl__spawn_detach with libxl__spawn_initiate_detach
which is asynchronous.  The detachable spawned children are abolished;
instead, we defer calling back to the in-libxl user until the middle
child has been reaped.

Signed-off-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
---
 tools/libxl/libxl_dm.c       |   14 ++++-
 tools/libxl/libxl_exec.c     |  124 ++++++++++++++++++++++++------------------
 tools/libxl/libxl_internal.h |   40 +++++++-------
 3 files changed, 103 insertions(+), 75 deletions(-)

diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c
index d987347..284847a 100644
--- a/tools/libxl/libxl_dm.c
+++ b/tools/libxl/libxl_dm.c
@@ -904,6 +904,8 @@ static void device_model_confirm(libxl__egc *egc, 
libxl__spawn_state *spawn,
                                  const char *xsdata);
 static void device_model_startup_failed(libxl__egc *egc,
                                         libxl__spawn_state *spawn);
+static void device_model_detached(libxl__egc *egc,
+                                  libxl__spawn_state *spawn);
 
 /* our "next step" function, called from those callbacks and elsewhere */
 static void device_model_spawn_outcome(libxl__egc *egc,
@@ -1011,6 +1013,7 @@ retry_transaction:
     spawn->midproc_cb = libxl__spawn_record_pid;
     spawn->confirm_cb = device_model_confirm;
     spawn->failure_cb = device_model_startup_failed;
+    spawn->detached_cb = device_model_detached;
 
     rc = libxl__spawn_spawn(egc, spawn);
     if (rc < 0)
@@ -1044,9 +1047,7 @@ static void device_model_confirm(libxl__egc *egc, 
libxl__spawn_state *spawn,
     if (strcmp(xsdata, "running"))
         return;
 
-    libxl__spawn_detach(gc, spawn);
-
-    device_model_spawn_outcome(egc, dmss, 0);
+    libxl__spawn_initiate_detach(gc, spawn);
 }
 
 static void device_model_startup_failed(libxl__egc *egc,
@@ -1056,6 +1057,13 @@ static void device_model_startup_failed(libxl__egc *egc,
     device_model_spawn_outcome(egc, dmss, ERROR_FAIL);
 }
 
+static void device_model_detached(libxl__egc *egc,
+                                  libxl__spawn_state *spawn)
+{
+    libxl__dm_spawn_state *dmss = CONTAINER_OF(spawn, *dmss, spawn);
+    device_model_spawn_outcome(egc, dmss, 0);
+}
+
 static void device_model_spawn_outcome(libxl__egc *egc,
                                        libxl__dm_spawn_state *dmss,
                                        int rc)
diff --git a/tools/libxl/libxl_exec.c b/tools/libxl/libxl_exec.c
index 082bf44..bb85682 100644
--- a/tools/libxl/libxl_exec.c
+++ b/tools/libxl/libxl_exec.c
@@ -238,15 +238,16 @@ err:
 /*
  * Full set of possible states of a libxl__spawn_state and its _detachable:
  *
- *               ss->        ss->        ss->    | ssd->       ssd->
- *               timeout     xswatch     ssd     |  mid         ss
- *  - Undefined   undef       undef       no     |  -           -
- *  - Idle        Idle        Idle        no     |  -           -
- *  - Active      Active      Active      yes    |  Active      yes
- *  - Partial     Active/Idle Active/Idle maybe  |  Active/Idle yes  (if 
exists)
- *  - Detached    -           -           -      |  Active      no
+ *                   detaching failed  mid     timeout      xswatch          
+ *  - Undefined         undef   undef   -        undef        undef
+ *  - Idle              any     any     Idle     Idle         Idle
+ *  - Attached OK       0       0       Active   Active       Active
+ *  - Attached Failed   0       1       Active   Idle         Idle
+ *  - Detaching         1       maybe   Active   Idle         Idle
+ *  - Partial           any     any     Idle     Active/Idle  Active/Idle
  *
- * When in state Detached, the middle process has been sent a SIGKILL.
+ * When in states Detaching or Attached Failed, the middle process has
+ * been sent a SIGKILL.
  */
 
 /* Event callbacks. */
@@ -257,19 +258,18 @@ static void spawn_timeout(libxl__egc *egc, libxl__ev_time 
*ev,
 static void spawn_middle_death(libxl__egc *egc, libxl__ev_child *childw,
                                pid_t pid, int status);
 
-/* Precondition: Partial.  Results: Detached. */
+/* Precondition: Partial.  Results: Idle. */
 static void spawn_cleanup(libxl__gc *gc, libxl__spawn_state *ss);
 
-/* Precondition: Partial; caller has logged failure reason.
- * Results: Caller notified of failure;
- *  after return, ss may be completely invalid as caller may reuse it */
-static void spawn_failed(libxl__egc *egc, libxl__spawn_state *ss);
+/* Precondition: Attached or Detaching; caller has logged failure reason.
+ * Results: Detaching, or Attached Failing */
+static void spawn_fail(libxl__egc *egc, libxl__spawn_state *ss);
 
 void libxl__spawn_init(libxl__spawn_state *ss)
 {
+    libxl__ev_child_init(&ss->mid);
     libxl__ev_time_init(&ss->timeout);
     libxl__ev_xswatch_init(&ss->xswatch);
-    ss->ssd = 0;
 }
 
 int libxl__spawn_spawn(libxl__egc *egc, libxl__spawn_state *ss)
@@ -280,8 +280,7 @@ int libxl__spawn_spawn(libxl__egc *egc, libxl__spawn_state 
*ss)
     int status, rc;
 
     libxl__spawn_init(ss);
-    ss->ssd = libxl__zalloc(0, sizeof(*ss->ssd));
-    libxl__ev_child_init(&ss->ssd->mid);
+    ss->failed = ss->detaching = 0;
 
     rc = libxl__ev_time_register_rel(gc, &ss->timeout,
                                      spawn_timeout, ss->timeout_ms);
@@ -291,7 +290,7 @@ int libxl__spawn_spawn(libxl__egc *egc, libxl__spawn_state 
*ss)
                                     spawn_watch_event, ss->xspath);
     if (rc) goto out_err;
 
-    pid_t middle = libxl__ev_child_fork(gc, &ss->ssd->mid, spawn_middle_death);
+    pid_t middle = libxl__ev_child_fork(gc, &ss->mid, spawn_middle_death);
     if (middle ==-1) { rc = ERROR_FAIL; goto out_err; }
 
     if (middle) {
@@ -344,54 +343,64 @@ int libxl__spawn_spawn(libxl__egc *egc, 
libxl__spawn_state *ss)
 
 static void spawn_cleanup(libxl__gc *gc, libxl__spawn_state *ss)
 {
+    assert(!libxl__ev_child_inuse(&ss->mid));
+    libxl__ev_time_deregister(gc, &ss->timeout);
+    libxl__ev_xswatch_deregister(gc, &ss->xswatch);
+}
+
+static void spawn_detach(libxl__gc *gc, libxl__spawn_state *ss)
+/* Precondition: Attached or Detaching, but caller must have just set
+ * at least one of detaching or failed.
+ * Results: Detaching or Attached Failed */
+{
     int r;
 
+    assert(libxl__ev_child_inuse(&ss->mid));
     libxl__ev_time_deregister(gc, &ss->timeout);
     libxl__ev_xswatch_deregister(gc, &ss->xswatch);
 
-    libxl__spawn_state_detachable *ssd = ss->ssd;
-    if (ssd) {
-        if (libxl__ev_child_inuse(&ssd->mid)) {
-            pid_t child = ssd->mid.pid;
-            r = kill(child, SIGKILL);
-            if (r && errno != ESRCH)
-                LOGE(WARN, "%s: failed to kill intermediate child (pid=%lu)",
-                     ss->what, (unsigned long)child);
-        }
+    pid_t child = ss->mid.pid;
+    r = kill(child, SIGKILL);
+    if (r && errno != ESRCH)
+        LOGE(WARN, "%s: failed to kill intermediate child (pid=%lu)",
+             ss->what, (unsigned long)child);
+}
 
-        /* disconnect the ss and ssd from each other */
-        ssd->ss = 0;
-        ss->ssd = 0;
-    }
+void libxl__spawn_initiate_detach(libxl__gc *gc, libxl__spawn_state *ss)
+{
+    ss->detaching = 1;
+    spawn_detach(gc, ss);
 }
 
-static void spawn_failed(libxl__egc *egc, libxl__spawn_state *ss)
+static void spawn_fail(libxl__egc *egc, libxl__spawn_state *ss)
+/* Caller must have logged.  Must be last thing in calling function,
+ * as it may make the callback.  Precondition: Attached or Detaching. */
 {
     EGC_GC;
-    spawn_cleanup(gc, ss);
-    ss->failure_cb(egc, ss); /* must be last; callback may do anything to ss */
+    ss->failed = 1;
+    spawn_detach(gc, ss);
 }
 
 static void spawn_timeout(libxl__egc *egc, libxl__ev_time *ev,
                           const struct timeval *requested_abs)
 {
-    /* Before event, was Active; is now Partial. */
+    /* Before event, was Attached. */
     EGC_GC;
     libxl__spawn_state *ss = CONTAINER_OF(ev, *ss, timeout);
     LOG(ERROR, "%s: startup timed out", ss->what);
-    spawn_failed(egc, ss); /* must be last */
+    spawn_fail(egc, ss); /* must be last */
 }
 
 static void spawn_watch_event(libxl__egc *egc, libxl__ev_xswatch *xsw,
                               const char *watch_path, const char *event_path)
 {
-    /* On entry, is Active. */
+    /* On entry, is Attached. */
     EGC_GC;
     libxl__spawn_state *ss = CONTAINER_OF(xsw, *ss, xswatch);
     char *p = libxl__xs_read(gc, 0, ss->xspath);
     if (!p && errno != ENOENT) {
         LOG(ERROR, "%s: xenstore read of %s failed", ss->what, ss->xspath);
-        spawn_failed(egc, ss); /* must be last */
+        spawn_fail(egc, ss); /* must be last */
         return;
     }
     ss->confirm_cb(egc, ss, p); /* must be last */
@@ -399,20 +408,22 @@ static void spawn_watch_event(libxl__egc *egc, 
libxl__ev_xswatch *xsw,
 
 static void spawn_middle_death(libxl__egc *egc, libxl__ev_child *childw,
                                pid_t pid, int status)
-    /* Before event, was Active or Detached;
-     * is now Active or Detached except that ssd->mid is Idle */
+    /* On entry, is Attached or Detaching */
 {
     EGC_GC;
-    libxl__spawn_state_detachable *ssd = CONTAINER_OF(childw, *ssd, mid);
-    libxl__spawn_state *ss = ssd->ss;
-
-    if (!WIFEXITED(status)) {
+    libxl__spawn_state *ss = CONTAINER_OF(childw, *ss, mid);
+
+    if ((ss->failed || ss->detaching) &&
+        ((WIFEXITED(status) && WEXITSTATUS(status)==0) ||
+         (WIFSIGNALED(status) && WTERMSIG(status)==SIGKILL))) {
+        /* as expected */
+    } else if (!WIFEXITED(status)) {
+        int loglevel = ss->detaching ? XTL_WARN : XTL_ERROR;
         const char *what =
-            GCSPRINTF("%s intermediate process (startup monitor)",
-                      ss ? ss->what : "(detached)");
-        int loglevel = ss ? XTL_ERROR : XTL_WARN;
+            GCSPRINTF("%s intermediate process (startup monitor)", ss->what);
         libxl_report_child_exitstatus(CTX, loglevel, what, pid, status);
-    } else if (ss) { /* otherwise it was supposed to be a daemon by now */
+        ss->failed = 1;
+    } else {
         if (!status)
             LOG(ERROR, "%s [%ld]: unexpectedly exited with exit status 0,"
                 " when we were waiting for it to confirm startup",
@@ -430,15 +441,22 @@ static void spawn_middle_death(libxl__egc *egc, 
libxl__ev_child *childw,
                 LOG(ERROR, "%s [%ld]: died during startup due to unknown fatal"
                     " signal number %d", ss->what, (unsigned long)pid, sig);
         }
-        ss->ssd = 0; /* detatch the ssd to make the ss be in state Partial */
-        spawn_failed(egc, ss); /* must be last use of ss */
+        ss->failed = 1;
     }
-    free(ssd);
-}
 
-void libxl__spawn_detach(libxl__gc *gc, libxl__spawn_state *ss)
-{
     spawn_cleanup(gc, ss);
+
+    if (ss->failed && !ss->detaching) {
+        ss->failure_cb(egc, ss); /* must be last */
+        return;
+    }
+    
+    if (ss->failed && ss->detaching)
+        LOG(WARN,"%s underlying machinery seemed to fail,"
+            " but its function seems to have been successful", ss->what);
+
+    assert(ss->detaching);
+    ss->detached_cb(egc, ss);
 }
 
 /*
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 22013fd..2592caa 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -981,8 +981,8 @@ _hidden int libxl__device_pci_destroy_all(libxl__gc *gc, 
uint32_t domid);
  *
  * Higher-level double-fork and separate detach eg as for device models
  *
- * Each libxl__spawn_state is in one of the conventional states
- *    Undefined, Idle, Active
+ * Each libxl__spawn_state is in one of these states
+ *    Undefined, Idle, Attached, Detaching
  */
 
 typedef struct libxl__obsolete_spawn_starting libxl__spawn_starting;
@@ -1023,15 +1023,15 @@ _hidden void libxl__spawn_init(libxl__spawn_state*);
  * intermediate or final child; an error message will have been
  * logged.
  *
- * confirm_cb and failure_cb will not be called reentrantly from
- * within libxl__spawn_spawn.
+ * confirm_cb, failure_cb and detached_cb will not be called
+ * reentrantly from within libxl__spawn_spawn.
  *
  * what: string describing the spawned process, used for logging
  *
  * Logs errors.  A copy of "what" is taken. 
  * Return values:
  *  < 0   error, *spawn is now Idle and need not be detached
- *   +1   caller is the parent, *spawn is Active and must eventually be 
detached
+ *   +1   caller is the parent, *spawn is Attached and must be detached
  *    0   caller is now the inner child, should probably call libxl__exec
  *
  * The spawn state must be Undefined or Idle on entry.
@@ -1039,12 +1039,15 @@ _hidden void libxl__spawn_init(libxl__spawn_state*);
 _hidden int libxl__spawn_spawn(libxl__egc *egc, libxl__spawn_state *spawn);
 
 /*
- * libxl__spawn_detach - Detaches the daemonic child.
+ * libxl__spawn_request_detach - Detaches the daemonic child.
  *
  * Works by killing the intermediate process from spawn_spawn.
  * After this function returns, failures of either child are no
  * longer reported via failure_cb.
  *
+ * This is not synchronous: there will be a further callback when
+ * the detach is complete.
+ *
  * If called before the inner child has been created, this may prevent
  * it from running at all.  Thus this should be called only when the
  * inner child has notified that it is ready.  Normally it will be
@@ -1052,10 +1055,10 @@ _hidden int libxl__spawn_spawn(libxl__egc *egc, 
libxl__spawn_state *spawn);
  *
  * Logs errors.
  *
- * The spawn state must be Active or Idle on entry and will be Idle
+ * The spawn state must be Attached entry and will be Detaching
  * on return.
  */
-_hidden void libxl__spawn_detach(libxl__gc *gc, libxl__spawn_state*);
+_hidden void libxl__spawn_initiate_detach(libxl__gc *gc, libxl__spawn_state*);
 
 /*
  * If successful, this should return 0.
@@ -1092,15 +1095,11 @@ typedef void libxl__spawn_failure_cb(libxl__egc*, 
libxl__spawn_state*);
 typedef void libxl__spawn_confirm_cb(libxl__egc*, libxl__spawn_state*,
                                      const char *xsdata);
 
-typedef struct {
-    /* Private to the spawn implementation.
-     */
-    /* This separate struct, from malloc, allows us to "detach"
-     * the child and reap it later, when our user has gone
-     * away and freed its libxl__spawn_state */
-    struct libxl__spawn_state *ss;
-    libxl__ev_child mid;
-} libxl__spawn_state_detachable;
+/*
+ * Called when the detach (requested by libxl__spawn_initiate_detach) has
+ * completed.  On entry to the callback the spawn state is Idle.
+ */
+typedef void libxl__spawn_detached_cb(libxl__egc*, libxl__spawn_state*);
 
 struct libxl__spawn_state {
     /* must be filled in by user and remain valid */
@@ -1112,15 +1111,18 @@ struct libxl__spawn_state {
     libxl__spawn_midproc_cb *midproc_cb;
     libxl__spawn_failure_cb *failure_cb;
     libxl__spawn_confirm_cb *confirm_cb;
+    libxl__spawn_detached_cb *detached_cb;
 
     /* remaining fields are private to libxl_spawn_... */
+    int detaching; /* we are in Detaching */
+    int failed; /* might be true whenever we are not Idle */
+    libxl__ev_child mid; /* always in use whenever we are not Idle */
     libxl__ev_time timeout;
     libxl__ev_xswatch xswatch;
-    libxl__spawn_state_detachable *ssd;
 };
 
 static inline int libxl__spawn_inuse(libxl__spawn_state *ss)
-    { return !!ss->ssd; }
+    { return libxl__ev_child_inuse(&ss->mid); }
 
 /*
  * libxl_spawner_record_pid - Record given pid in xenstore
-- 
1.7.2.5


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.