[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] xl: track child processes for the benefit of libxl


  • To: xen-changelog@xxxxxxxxxxxxxxxxxxx
  • From: Xen patchbot-unstable <patchbot@xxxxxxx>
  • Date: Tue, 29 May 2012 16:44:08 +0000
  • Delivery-date: Tue, 29 May 2012 16:44:19 +0000
  • List-id: "Change log for Mercurial \(receive only\)" <xen-changelog.lists.xen.org>

# HG changeset patch
# User Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx>
# Date 1338283897 -3600
# Node ID d968bf0e2225f462ac28dd2f1788cdc7c45d42f4
# Parent  3d98fb95e735a92b26fb32fe08330afebe0f1571
xl: track child processes for the benefit of libxl

Each time xl forks, it needs to record the pid, so that its exit
status can be preserved if it happens that libxl's event loop reaped
it.  Consequently we also have a new wrapper for waitpid, which in
that case returns the previously-reaped status.

When we get a console ready callback from libxl, check to see if we
have spawned but not reaped a previous console client, and if so reap
it now.  (This is necessary to prevent improper use of the xlchild
struct, but has the happy consequence of checking the exit status of
the first console client in the pygrub case.)

Refactor the two calls to libxl_ctx_alloc into a new function
xl_ctx_alloc which also sets the child reaped handler callback.

All of this has the effect of suppressing a message
   unknown child [nnnn] unexpected exited status zero
which would sometimes (depending on a race) appear with `xl create -c'
and pygrub.

Signed-off-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
Reported-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Cc: Roger Pau Monne <roger.pau@xxxxxxxxxx>
[ ijc -- corrected return codes in xl_reaped_callback to match documented
         convention. ]
Committed-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
---


diff -r 3d98fb95e735 -r d968bf0e2225 tools/libxl/xl.c
--- a/tools/libxl/xl.c  Tue May 29 10:31:37 2012 +0100
+++ b/tools/libxl/xl.c  Tue May 29 10:31:37 2012 +0100
@@ -102,22 +102,85 @@ void postfork(void)
     libxl_postfork_child_noexec(ctx); /* in case we don't exit/exec */
     ctx = 0;
 
-    if (libxl_ctx_alloc(&ctx, LIBXL_VERSION, 0, (xentoollog_logger*)logger)) {
-        fprintf(stderr, "cannot reinit xl context after fork\n");
-        exit(-1);
-    }
+    xl_ctx_alloc();
 }
 
-pid_t xl_fork(libxl_ctx *ctx) {
-    pid_t pid;
+pid_t xl_fork(xlchildnum child) {
+    xlchild *ch = &children[child];
+    int i;
 
-    pid = fork();
-    if (pid == -1) {
+    assert(!ch->pid);
+    ch->reaped = 0;
+
+    ch->pid = fork();
+    if (ch->pid == -1) {
         perror("fork failed");
         exit(-1);
     }
 
-    return pid;
+    if (!ch->pid) {
+        /* We are in the child now.  So all these children are not ours. */
+        for (i=0; i<child_max; i++)
+            children[i].pid = 0;
+    }
+
+    return ch->pid;
+}
+
+pid_t xl_waitpid(xlchildnum child, int *status, int flags)
+{
+    xlchild *ch = &children[child];
+    pid_t got = ch->pid;
+    assert(got);
+    if (ch->reaped) {
+        *status = ch->status;
+        ch->pid = 0;
+        return got;
+    }
+    for (;;) {
+        got = waitpid(ch->pid, status, flags);
+        if (got < 0 && errno == EINTR) continue;
+        if (got > 0) {
+            assert(got == ch->pid);
+            ch->pid = 0;
+        }
+        return got;
+    }
+}
+
+int xl_child_pid(xlchildnum child)
+{
+    xlchild *ch = &children[child];
+    return ch->pid;
+}
+
+static int xl_reaped_callback(pid_t got, int status, void *user)
+{
+    int i;
+    assert(got);
+    for (i=0; i<child_max; i++) {
+        xlchild *ch = &children[i];
+        if (ch->pid == got) {
+            ch->reaped = 1;
+            ch->status = status;
+            return 0;
+        }
+    }
+    return ERROR_UNKNOWN_CHILD;
+}
+
+static const libxl_childproc_hooks childproc_hooks = {
+    .chldowner = libxl_sigchld_owner_libxl,
+    .reaped_callback = xl_reaped_callback,
+};
+
+void xl_ctx_alloc(void) {
+    if (libxl_ctx_alloc(&ctx, LIBXL_VERSION, 0, (xentoollog_logger*)logger)) {
+        fprintf(stderr, "cannot init xl context\n");
+        exit(1);
+    }
+
+    libxl_childproc_setmode(ctx, &childproc_hooks, 0);
 }
 
 int main(int argc, char **argv)
@@ -159,10 +222,7 @@ int main(int argc, char **argv)
     logger = xtl_createlogger_stdiostream(stderr, minmsglevel,  0);
     if (!logger) exit(1);
 
-    if (libxl_ctx_alloc(&ctx, LIBXL_VERSION, 0, (xentoollog_logger*)logger)) {
-        fprintf(stderr, "cannot init xl context\n");
-        exit(1);
-    }
+    xl_ctx_alloc();
 
     /* Read global config file options */
     ret = asprintf(&config_file, "%s/xl.conf", libxl_xen_config_dir_path());
diff -r 3d98fb95e735 -r d968bf0e2225 tools/libxl/xl.h
--- a/tools/libxl/xl.h  Tue May 29 10:31:37 2012 +0100
+++ b/tools/libxl/xl.h  Tue May 29 10:31:37 2012 +0100
@@ -15,6 +15,8 @@
 #ifndef XL_H
 #define XL_H
 
+#include <assert.h>
+
 #include "xentoollog.h"
 
 struct cmd_spec {
@@ -108,8 +110,32 @@ struct cmd_spec *cmdtable_lookup(const c
 
 extern libxl_ctx *ctx;
 extern xentoollog_logger_stdiostream *logger;
-pid_t xl_fork(libxl_ctx *ctx); /* like fork, but prints and dies if it fails */
-void postfork(void);
+
+void xl_ctx_alloc(void);
+
+/* child processes */
+
+typedef struct {
+    /* every struct like this must be in XLCHILD_LIST */
+    pid_t pid; /* 0: not in use */
+    int reaped; /* valid iff pid!=0 */
+    int status; /* valid iff reaped */
+} xlchild;
+
+typedef enum {
+    child_console, child_waitdaemon, child_migration,
+    child_max
+} xlchildnum;
+
+extern xlchild children[child_max];
+
+pid_t xl_fork(xlchildnum); /* like fork, but prints and dies if it fails */
+void postfork(void); /* needed only if we aren't going to exec right away */
+
+/* Handles EINTR.  Clears out the xlchild so it can be reused. */
+pid_t xl_waitpid(xlchildnum, int *status, int flags);
+
+int xl_child_pid(xlchildnum); /* returns 0 if child struct is not in use */
 
 /* global options */
 extern int autoballoon;
diff -r 3d98fb95e735 -r d968bf0e2225 tools/libxl/xl_cmdimpl.c
--- a/tools/libxl/xl_cmdimpl.c  Tue May 29 10:31:37 2012 +0100
+++ b/tools/libxl/xl_cmdimpl.c  Tue May 29 10:31:37 2012 +0100
@@ -17,7 +17,6 @@
 #include "libxl_osdeps.h"
 
 #include <stdio.h>
-#include <assert.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
@@ -66,6 +65,8 @@ int logfile = 2;
 /* every libxl action in xl uses this same libxl context */
 libxl_ctx *ctx;
 
+xlchild children[child_max];
+
 /* when we operate on a domain, it is this one: */
 static uint32_t domid;
 static const char *common_domname;
@@ -1496,19 +1497,33 @@ static int freemem(libxl_domain_build_in
     return ERROR_NOMEM;
 }
 
+static void console_child_report(void)
+{
+    if (xl_child_pid(child_console)) {
+        int status;
+        pid_t got = xl_waitpid(child_console, &status, 0);
+        if (got < 0)
+            perror("xl: warning, failed to waitpid for console child");
+        else if (status)
+            libxl_report_child_exitstatus(ctx, XTL_ERROR, "console child",
+                                          xl_child_pid(child_console), status);
+    }
+}
+
 static void autoconnect_console(libxl_ctx *ctx_ignored,
                                 libxl_event *ev, void *priv)
 {
-    pid_t *pid = priv;
     uint32_t bldomid = ev->domid;
 
     libxl_event_free(ctx, ev);
 
-    *pid = fork();
-    if (*pid < 0) {
+    console_child_report();
+
+    pid_t pid = xl_fork(child_console);
+    if (pid < 0) {
         perror("unable to fork xenconsole");
         return;
-    } else if (*pid > 0)
+    } else if (pid > 0)
         return;
 
     postfork();
@@ -1580,7 +1595,6 @@ static int create_domain(struct domain_c
     int restore_fd = -1;
     int status = 0;
     const libxl_asyncprogress_how *autoconnect_console_how;
-    pid_t child_console_pid = -1;
     struct save_file_header hdr;
 
     int restoring = (restore_file || (migrate_fd >= 0));
@@ -1741,7 +1755,6 @@ start:
     libxl_asyncprogress_how autoconnect_console_how_buf;
     if ( dom_info->console_autoconnect ) {
         autoconnect_console_how_buf.callback = autoconnect_console;
-        autoconnect_console_how_buf.for_callback = &child_console_pid;
         autoconnect_console_how = &autoconnect_console_how_buf;
     }else{
         autoconnect_console_how = 0;
@@ -1813,19 +1826,17 @@ start:
         pid_t child1, got_child;
         int nullfd;
 
-        child1 = xl_fork(ctx);
+        child1 = xl_fork(child_waitdaemon);
         if (child1) {
             printf("Daemon running with PID %d\n", child1);
 
             for (;;) {
-                got_child = waitpid(child1, &status, 0);
+                got_child = xl_waitpid(child_waitdaemon, &status, 0);
                 if (got_child == child1) break;
                 assert(got_child == -1);
-                if (errno != EINTR) {
-                    perror("failed to wait for daemonizing child");
-                    ret = ERROR_FAIL;
-                    goto out;
-                }
+                perror("failed to wait for daemonizing child");
+                ret = ERROR_FAIL;
+                goto out;
             }
             if (status) {
                 libxl_report_child_exitstatus(ctx, XTL_ERROR,
@@ -1986,10 +1997,7 @@ out:
 
     free(config_data);
 
-waitpid_out:
-    if (child_console_pid > 0 &&
-            waitpid(child_console_pid, &status, 0) < 0 && errno == EINTR)
-        goto waitpid_out;
+    console_child_report();
 
     if (deathw)
         libxl_evdisable_domain_death(ctx, deathw);
@@ -2833,7 +2841,7 @@ static pid_t create_migration_child(cons
     MUST( libxl_pipe(ctx, sendpipe) );
     MUST( libxl_pipe(ctx, recvpipe) );
 
-    child = xl_fork(ctx);
+    child = xl_fork(child_migration);
 
     if (!child) {
         dup2(sendpipe[0], 0);
@@ -2877,19 +2885,20 @@ static int migrate_read_fixedmessage(int
     return 0;
 }
 
-static void migration_child_report(pid_t migration_child, int recv_fd) {
+static void migration_child_report(int recv_fd) {
     pid_t child;
     int status, sr;
     struct timeval now, waituntil, timeout;
     static const struct timeval pollinterval = { 0, 1000 }; /* 1ms */
 
-    if (!migration_child) return;
+    if (!xl_child_pid(child_migration)) return;
 
     CHK_ERRNO( gettimeofday(&waituntil, 0) );
     waituntil.tv_sec += 2;
 
     for (;;) {
-        child = waitpid(migration_child, &status, WNOHANG);
+        pid_t migration_child = xl_child_pid(child_migration);
+        child = xl_waitpid(child_migration, &status, WNOHANG);
 
         if (child == migration_child) {
             if (status)
@@ -2899,7 +2908,6 @@ static void migration_child_report(pid_t
             break;
         }
         if (child == -1) {
-            if (errno == EINTR) continue;
             fprintf(stderr, "wait for migration child [%ld] failed: %s\n",
                     (long)migration_child, strerror(errno));
             break;
@@ -2939,7 +2947,6 @@ static void migration_child_report(pid_t
             }
         }
     }
-    migration_child = 0;
 }
 
 static void migrate_do_preamble(int send_fd, int recv_fd, pid_t child,
@@ -2958,7 +2965,7 @@ static void migrate_do_preamble(int send
                                    "banner", rune);
     if (rc) {
         close(send_fd);
-        migration_child_report(child, recv_fd);
+        migration_child_report(recv_fd);
         exit(-rc);
     }
 
@@ -3083,13 +3090,13 @@ static void migrate_domain(const char *d
 
  failed_suspend:
     close(send_fd);
-    migration_child_report(child, recv_fd);
+    migration_child_report(recv_fd);
     fprintf(stderr, "Migration failed, failed to suspend at sender.\n");
     exit(-ERROR_FAIL);
 
  failed_resume:
     close(send_fd);
-    migration_child_report(child, recv_fd);
+    migration_child_report(recv_fd);
     fprintf(stderr, "Migration failed, resuming at sender.\n");
     libxl_domain_resume(ctx, domid, 0);
     exit(-ERROR_FAIL);
@@ -3104,7 +3111,7 @@ static void migrate_domain(const char *d
  " responsibility to avoid that.  Sorry.\n");
 
     close(send_fd);
-    migration_child_report(child, recv_fd);
+    migration_child_report(recv_fd);
     exit(-ERROR_BADFAIL);
 }
 

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.