[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH] xl: track child processes for the benefit of libxl



On Wed, 2012-05-16 at 21:40 +0100, Ian Jackson wrote:
> Each time xl forks, it needs to record the pid, so that its exit
> status can be preserved if it happens that libxl's event loop reaped
> it.  Consequently we also have a new wrapper for waitpid, which in
> that case returns the previously-reaped status.
> 
> When we get a console ready callback from libxl, check to see if we
> have spawned but not reaped a previous console client, and if so reap
> it now.  (This is necessary to prevent improper use of the xlchild
> struct, but has the happy consequence of checking the exit status of
> the first console client in the pygrub case.)
> 
> Refactor the two calls to libxl_ctx_alloc into a new function
> xl_ctx_alloc which also sets the child reaped handler callback.
> 
> All of this has the effect of suppressing a message
>    unknown child [nnnn] unexpected exited status zero
> which would sometimes (depending on a race) appear with `xl create -c'
> and pygrub.
> 
> Signed-off-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
> Reported-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
> Cc: Roger Pau Monne <roger.pau@xxxxxxxxxx>
> 
> ---
>  tools/libxl/xl.c         |   80 ++++++++++++++++++++++++++++++++++++++-------
>  tools/libxl/xl.h         |   29 +++++++++++++++-
>  tools/libxl/xl_cmdimpl.c |   80 ++++++++++++++++++++++++++-------------------
>  3 files changed, 140 insertions(+), 49 deletions(-)
> 
> diff --git a/tools/libxl/xl.c b/tools/libxl/xl.c
> index 750a61c..66499dd 100644
> --- a/tools/libxl/xl.c
> +++ b/tools/libxl/xl.c
> @@ -102,22 +102,79 @@ void postfork(void)
>      libxl_postfork_child_noexec(ctx); /* in case we don't exit/exec */
>      ctx = 0;
>  
> -    if (libxl_ctx_alloc(&ctx, LIBXL_VERSION, 0, (xentoollog_logger*)logger)) 
> {
> -        fprintf(stderr, "cannot reinit xl context after fork\n");
> -        exit(-1);
> -    }
> +    xl_ctx_alloc();
>  }
>  
> -pid_t xl_fork(libxl_ctx *ctx) {
> -    pid_t pid;
> +pid_t xl_fork(xlchild *ch) {
> +    assert(!ch->pid);
> +    ch->reaped = 0;
>  
> -    pid = fork();
> -    if (pid == -1) {
> +    ch->pid = fork();
> +    if (ch->pid == -1) {
>          perror("fork failed");
>          exit(-1);
>      }
>  
> -    return pid;
> +    if (!ch->pid) {
> +#define CHILD_FORGET(other) \
> +        other.pid = 0;
> +CHILD_LIST(CHILD_FORGET)

This clears every xlchild in the CHILD_LIST? Why? Oh, because this is
now the child and so those aren't our children any more. A comment would
be nice here, took me a little while to figure out.

Is there some reason to not indent CHILD_LIST? (You've done it more than
once, so I guess so)

> +    }
> +
> +    return ch->pid;
> +}
> +
> +pid_t xl_waitpid(xlchild *child, int *status, int flags)
> +{
> +    pid_t got = child->pid;
> +    assert(got);
> +    if (child->reaped) {
> +        *status = child->status;
> +        child->pid = 0;
> +        return got;
> +    }
> +    for (;;) {
> +        got = waitpid(child->pid, status, flags);
> +        if (got < 0 && errno == EINTR) continue;
> +        if (got > 0) {
> +            assert(got == child->pid);
> +            child->pid = 0;
> +        }
> +        return got;
> +    }
> +}
> +
> +static int reaped_callback_child_check(xlchild *ch, pid_t got, int status)
> +{
> +    if (ch->pid != got)
> +        return 0;
> +    ch->reaped = 1;
> +    ch->status = status;
> +    return 1;
> +}
> +
> +static int xl_reaped_callback(pid_t got, int status, void *user)
> +{
> +    assert(got);
> +    return (
> +#define CHILD_CHECK(ch) \
> +            reaped_callback_child_check(&ch, got, status) ||
> +CHILD_LIST(CHILD_CHECK)
> +            0) ? 0 : ERROR_UNKNOWN_CHILD;
> +}
> +
> +static const libxl_childproc_hooks childproc_hooks = {
> +    .chldowner = libxl_sigchld_owner_libxl,
> +    .reaped_callback = xl_reaped_callback,
> +};
> +
> +void xl_ctx_alloc(void) {
> +    if (libxl_ctx_alloc(&ctx, LIBXL_VERSION, 0, (xentoollog_logger*)logger)) 
> {
> +        fprintf(stderr, "cannot init xl context\n");
> +        exit(1);
> +    }
> +
> +    libxl_childproc_setmode(ctx, &childproc_hooks, 0);
>  }
>  
>  int main(int argc, char **argv)
> @@ -159,10 +216,7 @@ int main(int argc, char **argv)
>      logger = xtl_createlogger_stdiostream(stderr, minmsglevel,  0);
>      if (!logger) exit(1);
>  
> -    if (libxl_ctx_alloc(&ctx, LIBXL_VERSION, 0, (xentoollog_logger*)logger)) 
> {
> -        fprintf(stderr, "cannot init xl context\n");
> -        exit(1);
> -    }
> +    xl_ctx_alloc();
>  
>      /* Read global config file options */
>      ret = asprintf(&config_file, "%s/xl.conf", libxl_xen_config_dir_path());
> diff --git a/tools/libxl/xl.h b/tools/libxl/xl.h
> index 5d0d504..a00309c 100644
> --- a/tools/libxl/xl.h
> +++ b/tools/libxl/xl.h
> @@ -15,6 +15,8 @@
>  #ifndef XL_H
>  #define XL_H
>  
> +#include <assert.h>
> +
>  #include "xentoollog.h"
>  
>  struct cmd_spec {
> @@ -105,8 +107,31 @@ struct cmd_spec *cmdtable_lookup(const char *s);
>  
>  extern libxl_ctx *ctx;
>  extern xentoollog_logger_stdiostream *logger;
> -pid_t xl_fork(libxl_ctx *ctx); /* like fork, but prints and dies if it fails 
> */
> -void postfork(void);
> +
> +void xl_ctx_alloc(void);
> +
> +/* child processes */
> +
> +typedef struct {
> +    /* every struct like this must be in XLCHILD_LIST */
> +    pid_t pid; /* 0: not in use */
> +    int reaped;
> +    int status; /* valid iff pid==-1 */
> +} xlchild;
> +
> +/* our child processes */
> +#define CHILD_LIST(_)                           \
> +    _(child_console)                            \
> +    _(child_waitdaemon)                         \
> +    _(migration_child)

Is using "_" like this valid? (i.e. not reserved by C or POSIX or
something)

> +
> +#define CHILD_DECLARE(ch) \
> +extern xlchild ch;
> +CHILD_LIST(CHILD_DECLARE);
> +
> +pid_t xl_fork(xlchild*); /* like fork, but prints and dies if it fails */
> +void postfork(void); /* needed only if we aren't going to exec right away */
> +pid_t xl_waitpid(xlchild*, int *status, int flags); /* handles EINTR */
>  
>  /* global options */
>  extern int autoballoon;
> diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
> index 9f182c2..de1f2d8 100644
> --- a/tools/libxl/xl_cmdimpl.c
> +++ b/tools/libxl/xl_cmdimpl.c
> @@ -17,7 +17,6 @@
>  #include "libxl_osdeps.h"
>  
>  #include <stdio.h>
> -#include <assert.h>
>  #include <stdlib.h>
>  #include <string.h>
>  #include <unistd.h>
> @@ -66,6 +65,13 @@ int logfile = 2;
>  /* every libxl action in xl uses this same libxl context */
>  libxl_ctx *ctx;
>  
> +#define CHILD_DEFINE(ch) \
> +xlchild ch;
> +CHILD_LIST(CHILD_DEFINE);
> +
> +xlchild child_console;
> +xlchild child_waitdaemon;

Aren't these redundant with the definition from the preceding
CHILD_LIST?

This CHILD_LIST thing is clever but it isn't half opaque for the reader.
I'd say we'd be better off open coding them. Either we say:

There are only 3 of them, we can put the functions which deal with them
near each other and have a comment saying "update all these".

or we can add a proper list (LIBXL_LIST based?) which we add them to and
manage explicitly from xlfork and reap/xlwaitpid.

> +
>  /* when we operate on a domain, it is this one: */
>  static uint32_t domid;
>  static const char *common_domname;
> @@ -1457,19 +1463,33 @@ static int freemem(libxl_domain_build_info *b_info)
>      return ERROR_NOMEM;
>  }
>  
> +static void console_child_report(void)
> +{
> +    if (child_console.pid) {
> +        int status;
> +        pid_t got = xl_waitpid(&child_console, &status, 0);
> +        if (got < 0)
> +            perror("xl: warning, failed to waitpid for console child");
> +        else if (status)
> +            libxl_report_child_exitstatus(ctx, XTL_ERROR, "console child",
> +                                          child_console.pid, status);
> +    }
> +}
> +
>  static void autoconnect_console(libxl_ctx *ctx_ignored,
>                                  libxl_event *ev, void *priv)
>  {
> -    pid_t *pid = priv;
>      uint32_t bldomid = ev->domid;
>  
>      libxl_event_free(ctx, ev);
>  
> -    *pid = fork();
> -    if (*pid < 0) {
> +    console_child_report();
> +
> +    pid_t pid = xl_fork(&child_console);

console_child_report doesn't seem to reset child_config.pid and xlfork
has an assert(!foo.pid) in it, so how does this work on the second time?

> +    if (pid < 0) {
>          perror("unable to fork xenconsole");
>          return;
> -    } else if (*pid > 0)
> +    } else if (pid > 0)
>          return;
>  
>      postfork();
>      libxl_evdisable_domain_death(ctx, deathw);
[...]


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.