[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 02/13] libxl: react correctly to bootloader pty master POLLHUP



On Thu, 2012-08-02 at 18:27 +0100, Ian Jackson wrote:
> Receive POLLHUP on the bootloader master pty is not an error.
> Hopefully it means that the bootloader has exited and therefore the
> pty slave side has no process group any more.  (At least NetBSD
> indicates POLLHUP on the master in this case.)
> 
> So send the bootloader SIGTERM; if it has already exited then this has
> no effect (except that on some versions of NetBSD it erroneously
> returns ESRCH and we print a harmless warning) and we will then
> collect the bootloader's exit status and be satisfied.
> 
> However, we remember that we have done this so that if we got POLLHUP
> for some other reason than that the bootloader exited we report
> something resembling a useful message.
> 
> In order to implement this we need to provide a way for users of
> datacopier to handle POLLHUP rather than treating it as fatal.
> 
> We rename bootloader_abort to bootloader_stop since it now no longer
> only applies to error situations.
> 
> Signed-off-by: Roger Pau Monne <roger.pau@xxxxxxxxxx>
> Signed-off-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
> 
> -
> Changes in v5:
>  * Correctly call dc->callback_pollhup, not dc->callback,
>    in datacopier_pollhup_handled.

Therefore:

Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>

> 
> Changes in v4:
>  * Track whether we sent SIGTERM due to POLLHUP so we can report
>    messages properly.
> 
> Changes in v3:
>  * datacopier provides new interface for handling POLLHUP
>  * Do not ignore errors on the xenconsole pty
>  * Rename bootloader_abort.
> ---
>  tools/libxl/libxl_aoutils.c    |   23 +++++++++++++++++++++++
>  tools/libxl/libxl_bootloader.c |   39 +++++++++++++++++++++++++++++----------
>  tools/libxl/libxl_internal.h   |    7 +++++--
>  3 files changed, 57 insertions(+), 12 deletions(-)
> 
> diff --git a/tools/libxl/libxl_aoutils.c b/tools/libxl/libxl_aoutils.c
> index 99972a2..983a60a 100644
> --- a/tools/libxl/libxl_aoutils.c
> +++ b/tools/libxl/libxl_aoutils.c
> @@ -97,11 +97,31 @@ void libxl__datacopier_prefixdata(libxl__egc *egc, 
> libxl__datacopier_state *dc,
>      LIBXL_TAILQ_INSERT_TAIL(&dc->bufs, buf, entry);
>  }
> 
> +static int datacopier_pollhup_handled(libxl__egc *egc,
> +                                      libxl__datacopier_state *dc,
> +                                      short revents, int onwrite)
> +{
> +    STATE_AO_GC(dc->ao);
> +
> +    if (dc->callback_pollhup && (revents & POLLHUP)) {
> +        LOG(DEBUG, "received POLLHUP on %s during copy of %s",
> +            onwrite ? dc->writewhat : dc->readwhat,
> +            dc->copywhat);
> +        libxl__datacopier_kill(dc);
> +        dc->callback_pollhup(egc, dc, onwrite, -1);
> +        return 1;
> +    }
> +    return 0;
> +}
> +
>  static void datacopier_readable(libxl__egc *egc, libxl__ev_fd *ev,
>                                  int fd, short events, short revents) {
>      libxl__datacopier_state *dc = CONTAINER_OF(ev, *dc, toread);
>      STATE_AO_GC(dc->ao);
> 
> +    if (datacopier_pollhup_handled(egc, dc, revents, 0))
> +        return;
> +
>      if (revents & ~POLLIN) {
>          LOG(ERROR, "unexpected poll event 0x%x (should be POLLIN)"
>              " on %s during copy of %s", revents, dc->readwhat, dc->copywhat);
> @@ -163,6 +183,9 @@ static void datacopier_writable(libxl__egc *egc, 
> libxl__ev_fd *ev,
>      libxl__datacopier_state *dc = CONTAINER_OF(ev, *dc, towrite);
>      STATE_AO_GC(dc->ao);
> 
> +    if (datacopier_pollhup_handled(egc, dc, revents, 1))
> +        return;
> +
>      if (revents & ~POLLOUT) {
>          LOG(ERROR, "unexpected poll event 0x%x (should be POLLOUT)"
>              " on %s during copy of %s", revents, dc->writewhat, 
> dc->copywhat);
> diff --git a/tools/libxl/libxl_bootloader.c b/tools/libxl/libxl_bootloader.c
> index ef5a91b..bfc1b56 100644
> --- a/tools/libxl/libxl_bootloader.c
> +++ b/tools/libxl/libxl_bootloader.c
> @@ -215,6 +215,7 @@ void libxl__bootloader_init(libxl__bootloader_state *bl)
>      libxl__domaindeathcheck_init(&bl->deathcheck);
>      bl->keystrokes.ao = bl->ao;  libxl__datacopier_init(&bl->keystrokes);
>      bl->display.ao = bl->ao;     libxl__datacopier_init(&bl->display);
> +    bl->got_pollhup = 0;
>  }
> 
>  static void bootloader_cleanup(libxl__egc *egc, libxl__bootloader_state *bl)
> @@ -275,7 +276,7 @@ static void bootloader_local_detached_cb(libxl__egc *egc,
>  }
> 
>  /* might be called at any time, provided it's init'd */
> -static void bootloader_abort(libxl__egc *egc,
> +static void bootloader_stop(libxl__egc *egc,
>                               libxl__bootloader_state *bl, int rc)
>  {
>      STATE_AO_GC(bl->ao);
> @@ -285,8 +286,8 @@ static void bootloader_abort(libxl__egc *egc,
>      libxl__datacopier_kill(&bl->display);
>      if (libxl__ev_child_inuse(&bl->child)) {
>          r = kill(bl->child.pid, SIGTERM);
> -        if (r) LOGE(WARN, "after failure, failed to kill bootloader [%lu]",
> -                    (unsigned long)bl->child.pid);
> +        if (r) LOGE(WARN, "%sfailed to kill bootloader [%lu]",
> +                    rc ? "after failure, " : "", (unsigned 
> long)bl->child.pid);
>      }
>      bl->rc = rc;
>  }
> @@ -508,7 +509,10 @@ static void bootloader_gotptys(libxl__egc *egc, 
> libxl__openpty_state *op)
>      bl->keystrokes.maxsz = BOOTLOADER_BUF_OUT;
>      bl->keystrokes.copywhat =
>          GCSPRINTF("bootloader input for domain %"PRIu32, bl->domid);
> -    bl->keystrokes.callback = bootloader_keystrokes_copyfail;
> +    bl->keystrokes.callback =         bootloader_keystrokes_copyfail;
> +    bl->keystrokes.callback_pollhup = bootloader_keystrokes_copyfail;
> +        /* pollhup gets called with errnoval==-1 which is not otherwise
> +         * possible since errnos are nonnegative, so it's unambiguous */
>      rc = libxl__datacopier_start(&bl->keystrokes);
>      if (rc) goto out;
> 
> @@ -516,7 +520,8 @@ static void bootloader_gotptys(libxl__egc *egc, 
> libxl__openpty_state *op)
>      bl->display.maxsz = BOOTLOADER_BUF_IN;
>      bl->display.copywhat =
>          GCSPRINTF("bootloader output for domain %"PRIu32, bl->domid);
> -    bl->display.callback = bootloader_display_copyfail;
> +    bl->display.callback =         bootloader_display_copyfail;
> +    bl->display.callback_pollhup = bootloader_display_copyfail;
>      rc = libxl__datacopier_start(&bl->display);
>      if (rc) goto out;
> 
> @@ -562,30 +567,42 @@ static void bootloader_gotptys(libxl__egc *egc, 
> libxl__openpty_state *op)
> 
>  /* perhaps one of these will be called, but perhaps not */
>  static void bootloader_copyfail(libxl__egc *egc, const char *which,
> -       libxl__bootloader_state *bl, int onwrite, int errnoval)
> +        libxl__bootloader_state *bl, int ondisplay, int onwrite, int 
> errnoval)
>  {
>      STATE_AO_GC(bl->ao);
> +    int rc = ERROR_FAIL;
> +
> +    if (errnoval==-1) {
> +        /* POLLHUP */
> +        if (!!ondisplay != !!onwrite) {
> +            rc = 0;
> +            bl->got_pollhup = 1;
> +        } else {
> +            LOG(ERROR, "unexpected POLLHUP on %s", which);
> +        }
> +    }
>      if (!onwrite && !errnoval)
>          LOG(ERROR, "unexpected eof copying %s", which);
> -    bootloader_abort(egc, bl, ERROR_FAIL);
> +
> +    bootloader_stop(egc, bl, rc);
>  }
>  static void bootloader_keystrokes_copyfail(libxl__egc *egc,
>         libxl__datacopier_state *dc, int onwrite, int errnoval)
>  {
>      libxl__bootloader_state *bl = CONTAINER_OF(dc, *bl, keystrokes);
> -    bootloader_copyfail(egc, "bootloader input", bl, onwrite, errnoval);
> +    bootloader_copyfail(egc, "bootloader input", bl, 0, onwrite, errnoval);
>  }
>  static void bootloader_display_copyfail(libxl__egc *egc,
>         libxl__datacopier_state *dc, int onwrite, int errnoval)
>  {
>      libxl__bootloader_state *bl = CONTAINER_OF(dc, *bl, display);
> -    bootloader_copyfail(egc, "bootloader output", bl, onwrite, errnoval);
> +    bootloader_copyfail(egc, "bootloader output", bl, 1, onwrite, errnoval);
>  }
> 
>  static void bootloader_domaindeath(libxl__egc *egc, libxl__domaindeathcheck 
> *dc)
>  {
>      libxl__bootloader_state *bl = CONTAINER_OF(dc, *bl, deathcheck);
> -    bootloader_abort(egc, bl, ERROR_FAIL);
> +    bootloader_stop(egc, bl, ERROR_FAIL);
>  }
> 
>  static void bootloader_finished(libxl__egc *egc, libxl__ev_child *child,
> @@ -599,6 +616,8 @@ static void bootloader_finished(libxl__egc *egc, 
> libxl__ev_child *child,
>      libxl__datacopier_kill(&bl->display);
> 
>      if (status) {
> +        if (bl->got_pollhup && WIFSIGNALED(status) && 
> WTERMSIG(status)==SIGTERM)
> +            LOG(ERROR, "got POLLHUP, sent SIGTERM");
>          LOG(ERROR, "bootloader failed - consult logfile %s", bl->logfile);
>          libxl_report_child_exitstatus(CTX, XTL_ERROR, "bootloader",
>                                        pid, status);
> diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
> index 58004b3..2d6c71a 100644
> --- a/tools/libxl/libxl_internal.h
> +++ b/tools/libxl/libxl_internal.h
> @@ -2076,7 +2076,9 @@ typedef struct libxl__datacopier_buf 
> libxl__datacopier_buf;
>   *     errnoval==0 means we got eof and all data was written
>   *     errnoval!=0 means we had a read error, logged
>   * onwrite==-1 means some other internal failure, errnoval not valid, logged
> - * in all cases copier is killed before calling this callback */
> + * If we get POLLHUP, we call callback_pollhup(..., onwrite, -1);
> + * or if callback_pollhup==0 this is an internal failure, as above.
> + * In all cases copier is killed before calling this callback */
>  typedef void libxl__datacopier_callback(libxl__egc *egc,
>       libxl__datacopier_state *dc, int onwrite, int errnoval);
> 
> @@ -2095,6 +2097,7 @@ struct libxl__datacopier_state {
>      const char *copywhat, *readwhat, *writewhat; /* for error msgs */
>      FILE *log; /* gets a copy of everything */
>      libxl__datacopier_callback *callback;
> +    libxl__datacopier_callback *callback_pollhup;
>      /* remaining fields are private to datacopier */
>      libxl__ev_fd toread, towrite;
>      ssize_t used;
> @@ -2279,7 +2282,7 @@ struct libxl__bootloader_state {
>      int nargs, argsspace;
>      const char **args;
>      libxl__datacopier_state keystrokes, display;
> -    int rc;
> +    int rc, got_pollhup;
>  };
> 
>  _hidden void libxl__bootloader_init(libxl__bootloader_state *bl);
> --
> 1.7.2.5
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxx
> http://lists.xen.org/xen-devel



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.