[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] Re: [PATCH] libxl: check for early failures of qemu-dm (v2)
This patch makes xl create check whether qemu-dm has started correctly, and causes it to fail immediately with appropriate errors if not. There are other bugfixes too. More specifically: * libxl_create_device_model forks twice rather than once so that the process which calls libxl does not end up being the actual parent of qemu. That avoids the need for the qemu-dm process to be reaped at some indefinite time in the future. * The first fork generates an intermediate process which is responsible for writing the qemu-dm pid to xenstore and then merely waits to collect and report on qemu-dm's exit status during startup. New arguments to libxl_create_device_model allow the preservation of its pid so that a later call can check whether the startup is successful. * xl.c's create_domain checks for errors in its libxl calls. Consequential changes: * libxl_wait_for_device_model now takes a callback function parameter which is called repeatedly in the loop iteration and allows the caller to abort the wait. * libxl_exec no longer calls fork; there is a new libxl_fork. * osdep.[ch] new use #define _GNU_SOURCE. The provided asprintf declarations are suppressed when not needed (currently, always). * There is a hook to override waitpid, which will be necessary for some callers. Remaining problems and other issues I noticed or we found: * The error handling is rather inconsistent still and lacking in places. * xl_logv is declared but not defined. * _GNU_SOURCE should be used throughout. The asprintf implementation should be disabled in favour of the system one. * XL_LOG_ERROR_ERRNO needs to actually print the errno value. * destroy_device_model can kill random dom0 processes (!) * struct libxl_ctx should be defined in libxl_internal.h. Signed-off-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx> (Changes since v1: * Remove new error log level; should be in a future patch * Properly fixed the asprintf problem * Indentation no uses literal tabs) diff -r 49deb113cd40 tools/libxl/Makefile --- a/tools/libxl/Makefile Fri Nov 13 15:46:58 2009 +0000 +++ b/tools/libxl/Makefile Mon Nov 16 12:06:57 2009 +0000 @@ -23,8 +23,7 @@ LIBCONFIG_SOURCE = libconfig-1.3.2 LIBCONFIG_OUTPUT = $(LIBCONFIG_SOURCE)/.libs -LIBXL_OBJS-y = -LIBXL_OBJS-$(CONFIG_Linux) += osdeps.o +LIBXL_OBJS-y = osdeps.o LIBXL_OBJS = flexarray.o libxl.o libxl_dom.o libxl_exec.o libxl_xshelp.o libxl_device.o libxl_internal.o xenguest.o libxl_utils.o $(LIBXL_OBJS-y) CLIENTS = xl diff -r 49deb113cd40 tools/libxl/libxl.c --- a/tools/libxl/libxl.c Fri Nov 13 15:46:58 2009 +0000 +++ b/tools/libxl/libxl.c Mon Nov 16 12:06:57 2009 +0000 @@ -21,8 +21,10 @@ #include <sys/types.h> #include <fcntl.h> #include <sys/select.h> +#include <sys/wait.h> #include <signal.h> #include <unistd.h> /* for write, unlink and close */ +#include <assert.h> #include "libxl.h" #include "libxl_utils.h" #include "libxl_internal.h" @@ -38,6 +40,8 @@ ctx->xch = xc_interface_open(); ctx->xsh = xs_daemon_open(); + + ctx->waitpid_instead= libxl_waitpid_instead_default; return 0; } @@ -496,16 +500,24 @@ return (char **) flexarray_contents(dm_args); } +struct libxl_device_model_starting { + int domid; + pid_t intermediate; +}; + int libxl_create_device_model(struct libxl_ctx *ctx, libxl_device_model_info *info, - libxl_device_nic *vifs, int num_vifs) + libxl_device_nic *vifs, int num_vifs, + libxl_device_model_starting **starting_r) { char *dom_path, *path, *logfile, *logfile_new; - char *kvs[3]; struct stat stat_buf; - int logfile_w, null, pid; + int logfile_w, null; int i; char **args; + pid_t intermediate; + + *starting_r= 0; args = libxl_build_device_model_args(ctx, info, vifs, num_vifs); if (!args) @@ -533,17 +545,121 @@ logfile = libxl_sprintf(ctx, "/var/log/xen/qemu-dm-%s.log", info->dom_name); logfile_w = open(logfile, O_WRONLY|O_CREAT, 0644); null = open("/dev/null", O_RDONLY); - pid = libxl_exec(ctx, null, logfile_w, logfile_w, info->device_model, args); + + if (starting_r) { + *starting_r= libxl_calloc(ctx, sizeof(**starting_r), 1); + if (!*starting_r) return ERROR_NOMEM; + } + + intermediate = libxl_fork(ctx); + if (intermediate==-1) return ERROR_FAIL; + + if (!intermediate) { + struct libxl_ctx clone; + char *kvs[3]; + pid_t child, got; + int status; + + child = libxl_fork(ctx); + if (!child) { + libxl_exec(ctx, null, logfile_w, logfile_w, + info->device_model, args); + } + + if (!starting_r) _exit(0); /* just detach then */ + + clone = *ctx; + clone.xsh = xs_daemon_open(); + /* we mustn't use the parent's handle in the child */ + + kvs[0] = libxl_sprintf(ctx, "image/device-model-pid"); + kvs[1] = libxl_sprintf(ctx, "%d", child); + kvs[2] = NULL; + libxl_xs_writev(ctx, XBT_NULL, dom_path, kvs); + + got = ctx->waitpid_instead(child, &status, 0); + assert(got == child); + + libxl_report_child_exitstatus(ctx, "device model", child, status); + _exit(WIFEXITED(status) ? WEXITSTATUS(status) : + WIFSIGNALED(status) && WTERMSIG(status)<127 + ? WTERMSIG(status)+128 : -1); + } + + if (starting_r) { + (*starting_r)->domid= info->domid; + (*starting_r)->intermediate= intermediate; + } + close(null); close(logfile_w); - kvs[0] = libxl_sprintf(ctx, "image/device-model-pid"); - kvs[1] = libxl_sprintf(ctx, "%d", pid); - kvs[2] = NULL; - libxl_xs_writev(ctx, XBT_NULL, dom_path, kvs); - return 0; } + +static void report_dm_intermediate_status(struct libxl_ctx *ctx, + libxl_device_model_starting *starting, + pid_t got, int status) { + if (!WIFEXITED(status)) + /* intermediate process did the logging itself if it exited */ + libxl_report_child_exitstatus(ctx, + "device model intermediate process" + " (startup monitor)", starting->intermediate, + status); +} + +int libxl_detach_device_model(struct libxl_ctx *ctx, + libxl_device_model_starting *starting) { + int r, status; + int rc = 0; + pid_t got; + + if (starting->intermediate) { + r = kill(starting->intermediate, SIGKILL); + if (r) { + XL_LOG(ctx, XL_LOG_ERROR_ERRNO, "could not kill device model" + "intermediate process [%ld]", + (unsigned long)starting->intermediate); + abort(); /* things are very wrong */ + } + got = ctx->waitpid_instead(starting->intermediate, &status, 0); + assert(got == starting->intermediate); + if (!(WIFSIGNALED(status) && WTERMSIG(status)==SIGKILL)) { + report_dm_intermediate_status(ctx, starting, got, status); + rc = ERROR_FAIL; + } + } + + libxl_free(ctx, starting); + + return rc; +} + +static int check_dm_failure(struct libxl_ctx *ctx, + void *starting_void) { + libxl_device_model_starting *starting = starting_void; + pid_t got; + int status; + + got = ctx->waitpid_instead(starting->intermediate, &status, WNOHANG); + if (!got) return 0; + + assert(got == starting->intermediate); + report_dm_intermediate_status(ctx, starting, got, status); + starting->intermediate= 0; + return ERROR_FAIL; +} + +int libxl_confirm_device_model_startup(struct libxl_ctx *ctx, + libxl_device_model_starting *starting) { + int problem = libxl_wait_for_device_model(ctx, starting->domid, "running", + check_dm_failure, + starting); + int detach = libxl_detach_device_model(ctx, starting); + return problem ? problem : detach; + return ERROR_FAIL; +} + /******************************************************************************/ int libxl_device_disk_add(struct libxl_ctx *ctx, uint32_t domid, libxl_device_disk *disk) @@ -941,7 +1057,7 @@ hvm = is_hvm(ctx, domid); if (hvm) { - if (libxl_wait_for_device_model(ctx, domid, "running") < 0) { + if (libxl_wait_for_device_model(ctx, domid, "running", 0,0) < 0) { return -1; } snprintf(path, sizeof(path), "/local/domain/0/device-model/%d/state", domid); @@ -955,7 +1071,7 @@ pcidev->bus, pcidev->dev, pcidev->func); snprintf(path, sizeof(path), "/local/domain/0/device-model/%d/command", domid); xs_write(ctx->xsh, XBT_NULL, path, "pci-ins", strlen("pci-ins")); - if (libxl_wait_for_device_model(ctx, domid, "pci-inserted") < 0) + if (libxl_wait_for_device_model(ctx, domid, "pci-inserted", 0,0) < 0) XL_LOG(ctx, XL_LOG_ERROR, "Device Model didn't respond in time\n"); snprintf(path, sizeof(path), "/local/domain/0/device-model/%d/parameter", domid); vdevfn = libxl_xs_read(ctx, XBT_NULL, path); @@ -1029,7 +1145,7 @@ hvm = is_hvm(ctx, domid); if (hvm) { - if (libxl_wait_for_device_model(ctx, domid, "running") < 0) { + if (libxl_wait_for_device_model(ctx, domid, "running", 0,0) < 0) { return -1; } snprintf(path, sizeof(path), "/local/domain/0/device-model/%d/state", domid); @@ -1039,7 +1155,7 @@ pcidev->bus, pcidev->dev, pcidev->func); snprintf(path, sizeof(path), "/local/domain/0/device-model/%d/command", domid); xs_write(ctx->xsh, XBT_NULL, path, "pci-rem", strlen("pci-rem")); - if (libxl_wait_for_device_model(ctx, domid, "pci-removed") < 0) { + if (libxl_wait_for_device_model(ctx, domid, "pci-removed", 0,0) < 0) { XL_LOG(ctx, XL_LOG_ERROR, "Device Model didn't respond in time\n"); return -1; } diff -r 49deb113cd40 tools/libxl/libxl.h --- a/tools/libxl/libxl.h Fri Nov 13 15:46:58 2009 +0000 +++ b/tools/libxl/libxl.h Mon Nov 16 12:06:57 2009 +0000 @@ -42,6 +42,11 @@ /* mini-GC */ int alloc_maxsize; void **alloc_ptrs; + + /* for callers who reap children willy-nilly; caller must only + * set this after libxl_init and before any other call - or + * may leave them untouched */ + int (*waitpid_instead)(pid_t pid, int *status, int flags); }; typedef struct { @@ -193,9 +198,19 @@ struct libxl_dominfo * libxl_domain_list(struct libxl_ctx *ctx, int *nb_domain); xc_dominfo_t * libxl_domain_infolist(struct libxl_ctx *ctx, int *nb_domain); +typedef struct libxl_device_model_starting libxl_device_model_starting; int libxl_create_device_model(struct libxl_ctx *ctx, libxl_device_model_info *info, - libxl_device_nic *vifs, int num_vifs); + libxl_device_nic *vifs, int num_vifs, + libxl_device_model_starting **starting_r); + /* Caller must either: pass starting_r==0, or on successful + * return pass *starting_r to libxl_confirm_device_model + * or libxl_detach_device_model */ +int libxl_confirm_device_model_startup(struct libxl_ctx *ctx, + libxl_device_model_starting *starting); +int libxl_detach_device_model(struct libxl_ctx *ctx, + libxl_device_model_starting *starting); + /* DM is detached even if error is returned */ int libxl_device_disk_add(struct libxl_ctx *ctx, uint32_t domid, libxl_device_disk *disk); int libxl_device_disk_clean_shutdown(struct libxl_ctx *ctx, uint32_t domid); diff -r 49deb113cd40 tools/libxl/libxl_device.c --- a/tools/libxl/libxl_device.c Fri Nov 13 15:46:58 2009 +0000 +++ b/tools/libxl/libxl_device.c Mon Nov 16 12:06:57 2009 +0000 @@ -260,12 +260,17 @@ return -1; } -int libxl_wait_for_device_model(struct libxl_ctx *ctx, uint32_t domid, char *state) +int libxl_wait_for_device_model(struct libxl_ctx *ctx, + uint32_t domid, char *state, + int (*check_callback)(struct libxl_ctx *ctx, + void *userdata), + void *check_callback_userdata) { char path[50]; char *p; int watchdog = 100; unsigned int len; + int rc; snprintf(path, sizeof(path), "/local/domain/0/device-model/%d/state", domid); while (watchdog > 0) { @@ -282,6 +287,10 @@ usleep(100000); watchdog--; } + } + if (check_callback) { + rc = check_callback(ctx, check_callback_userdata); + if (rc) return rc; } } XL_LOG(ctx, XL_LOG_ERROR, "Device Model not ready\n"); diff -r 49deb113cd40 tools/libxl/libxl_exec.c --- a/tools/libxl/libxl_exec.c Fri Nov 13 15:46:58 2009 +0000 +++ b/tools/libxl/libxl_exec.c Mon Nov 16 12:06:57 2009 +0000 @@ -15,34 +15,81 @@ * GNU Lesser General Public License for more details. */ +#include "osdeps.h" + #include <stdio.h> +#include <string.h> #include <unistd.h> #include <stdlib.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/wait.h> #include "libxl.h" #include "libxl_internal.h" -int libxl_exec(struct libxl_ctx *ctx, int stdinfd, int stdoutfd, int stderrfd, - char *arg0, char **args) +pid_t libxl_fork(struct libxl_ctx *ctx) { - int pid, i; + pid_t pid; pid = fork(); if (pid == -1) { - XL_LOG(ctx, XL_LOG_ERROR, "fork failed"); + XL_LOG(ctx, XL_LOG_ERROR_ERRNO, "fork failed"); return -1; } - if (pid == 0) { - /* child */ - if (stdinfd != -1) - dup2(stdinfd, STDIN_FILENO); - if (stdoutfd != -1) - dup2(stdoutfd, STDOUT_FILENO); - if (stderrfd != -1) - dup2(stderrfd, STDERR_FILENO); - for (i = 4; i < 256; i++) - close(i); - execv(arg0, args); - exit(256); - } + return pid; } + +void libxl_exec(struct libxl_ctx *ctx, int stdinfd, int stdoutfd, int stderrfd, + char *arg0, char **args) + /* call this in the child */ +{ + int i; + + if (stdinfd != -1) + dup2(stdinfd, STDIN_FILENO); + if (stdoutfd != -1) + dup2(stdoutfd, STDOUT_FILENO); + if (stderrfd != -1) + dup2(stderrfd, STDERR_FILENO); + for (i = 4; i < 256; i++) + close(i); + execv(arg0, args); + XL_LOG(ctx, XL_LOG_ERROR_ERRNO, "exec %s failed", arg0); + _exit(-1); +} + +void libxl_report_child_exitstatus(struct libxl_ctx *ctx, + const char *what, pid_t pid, int status) { + /* treats all exit statuses as errors; if that's not what you want, + * check status yourself first */ + + if (WIFEXITED(status)) { + int st= WEXITSTATUS(status); + if (st) + XL_LOG(ctx, XL_LOG_ERROR, "%s [%ld] exited" + " with error status %d", what, (unsigned long)pid, st); + else + XL_LOG(ctx, XL_LOG_ERROR, "%s [%ld] unexpectedly" + " exited status zero", what, (unsigned long)pid); + } else if (WIFSIGNALED(status)) { + int sig= WTERMSIG(status); + const char *str= strsignal(sig); + const char *coredump= WCOREDUMP(status) ? " (core dumped)" : ""; + if (str) + XL_LOG(ctx, XL_LOG_ERROR, "%s [%ld] died due to" + " fatal signal %s%s", what, (unsigned long)pid, + str, coredump); + else + XL_LOG(ctx, XL_LOG_ERROR, "%s [%ld] died due to unknown" + " fatal signal number %d%s", what, (unsigned long)pid, + sig, coredump); + } else { + XL_LOG(ctx, XL_LOG_ERROR, "%s [%ld] gave unknown" + " wait status 0x%x", what, (unsigned long)pid, status); + } +} + +pid_t libxl_waitpid_instead_default(pid_t pid, int *status, int flags) { + return waitpid(pid,status,flags); +} diff -r 49deb113cd40 tools/libxl/libxl_internal.h --- a/tools/libxl/libxl_internal.h Fri Nov 13 15:46:58 2009 +0000 +++ b/tools/libxl/libxl_internal.h Mon Nov 16 12:06:57 2009 +0000 @@ -45,6 +45,7 @@ #define XL_LOG_WARNING 1 #define XL_LOG_ERROR 0 +void xl_logv(struct libxl_ctx *ctx, int loglevel, const char *file, int line, const char *func, char *fmt, va_list al); void xl_log(struct libxl_ctx *ctx, int loglevel, const char *file, int line, const char *func, char *fmt, ...); typedef struct { @@ -126,7 +127,11 @@ char **bents, char **fents); int libxl_device_destroy(struct libxl_ctx *ctx, char *be_path, int force); int libxl_devices_destroy(struct libxl_ctx *ctx, uint32_t domid, int force); -int libxl_wait_for_device_model(struct libxl_ctx *ctx, uint32_t domid, char *state); +int libxl_wait_for_device_model(struct libxl_ctx *ctx, + uint32_t domid, char *state, + int (*check_callback)(struct libxl_ctx *ctx, + void *userdata), + void *check_callback_userdata); int libxl_wait_for_backend(struct libxl_ctx *ctx, char *be_path, char *state); int libxl_device_pci_flr(struct libxl_ctx *ctx, unsigned int domain, unsigned int bus, unsigned int dev, unsigned int func); @@ -137,8 +142,12 @@ int vcpus, int store_evtchn, unsigned long *store_mfn); /* xl_exec */ -int libxl_exec(struct libxl_ctx *ctx, int stdinfd, int stdoutfd, int stderrfd, - char *arg0, char **args); +pid_t libxl_fork(struct libxl_ctx *ctx); +void libxl_exec(struct libxl_ctx *ctx, int stdinfd, int stdoutfd, int stderrfd, + char *arg0, char **args); +void libxl_report_child_exitstatus(struct libxl_ctx *ctx, + const char *what, pid_t pid, int status); +pid_t libxl_waitpid_instead_default(pid_t pid, int *status, int flags); #endif diff -r 49deb113cd40 tools/libxl/osdeps.c --- a/tools/libxl/osdeps.c Fri Nov 13 15:46:58 2009 +0000 +++ b/tools/libxl/osdeps.c Mon Nov 16 12:06:57 2009 +0000 @@ -13,11 +13,15 @@ * GNU Lesser General Public License for more details. */ +#include "osdeps.h" + #include <unistd.h> #include <stdarg.h> #include <stdio.h> #include <sys/time.h> #include <stdlib.h> + +#ifdef NEED_OWN_ASPRINTF int vasprintf(char **buffer, const char *fmt, va_list ap) { @@ -60,3 +64,5 @@ va_end (ap); return status; } + +#endif diff -r 49deb113cd40 tools/libxl/osdeps.h --- a/tools/libxl/osdeps.h Fri Nov 13 15:46:58 2009 +0000 +++ b/tools/libxl/osdeps.h Mon Nov 16 12:06:57 2009 +0000 @@ -16,9 +16,10 @@ #ifndef LIBXL_OSDEP #define LIBXL_OSDEP +#define _GNU_SOURCE + +#ifdef NEED_OWN_ASPRINTF #include <stdarg.h> - -#if defined(__linux__) int asprintf(char **buffer, char *fmt, ...); int vasprintf(char **buffer, const char *fmt, va_list ap); #endif diff -r 49deb113cd40 tools/libxl/xl.c --- a/tools/libxl/xl.c Fri Nov 13 15:46:58 2009 +0000 +++ b/tools/libxl/xl.c Mon Nov 16 12:06:57 2009 +0000 @@ -572,6 +572,15 @@ config_destroy(&config); } +#define MUST( call ) ({ \ + int must_rc = (call); \ + if (must_rc) { \ + fprintf(stderr,"xl: fatal error: %s:%d, rc=%d: %s\n", \ + __FILE__,__LINE__, must_rc, #call); \ + exit(-must_rc); \ + } \ + }) + static void create_domain(int debug, const char *filename) { struct libxl_ctx ctx; @@ -584,30 +593,35 @@ libxl_device_pci *pcidevs = NULL; int num_disks = 0, num_vifs = 0, num_pcidevs = 0; int i; + libxl_device_model_starting *dm_starting; printf("Parsing config file %s\n", filename); parse_config_file(filename, &info1, &info2, &disks, &num_disks, &vifs, &num_vifs, &pcidevs, &num_pcidevs, &dm_info); if (debug) printf_info(&info1, &info2, disks, num_disks, vifs, num_vifs, pcidevs, num_pcidevs, &dm_info); - libxl_ctx_init(&ctx); - libxl_ctx_set_log(&ctx, log_callback, NULL); - libxl_domain_make(&ctx, &info1, &domid); - libxl_domain_build(&ctx, &info2, domid); + MUST( libxl_ctx_init(&ctx) ); + MUST( libxl_ctx_set_log(&ctx, log_callback, NULL) ); + MUST( libxl_domain_make(&ctx, &info1, &domid) ); + MUST( libxl_domain_build(&ctx, &info2, domid) ); device_model_info_domid_fixup(&dm_info, domid); for (i = 0; i < num_disks; i++) { disk_info_domid_fixup(disks + i, domid); - libxl_device_disk_add(&ctx, domid, &disks[i]); + MUST( libxl_device_disk_add(&ctx, domid, &disks[i]) ); } for (i = 0; i < num_vifs; i++) { nic_info_domid_fixup(vifs + i, domid); - libxl_device_nic_add(&ctx, domid, &vifs[i]); + MUST( libxl_device_nic_add(&ctx, domid, &vifs[i]) ); } - libxl_create_device_model(&ctx, &dm_info, vifs, num_vifs); + + MUST( libxl_create_device_model(&ctx, &dm_info, vifs, num_vifs, + &dm_starting) ); for (i = 0; i < num_pcidevs; i++) libxl_device_pci_add(&ctx, domid, &pcidevs[i]); + MUST( libxl_confirm_device_model_startup(&ctx, dm_starting) ); + libxl_domain_unpause(&ctx, domid); } _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |