[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH] libxenlight: fix suspend\resume
On Fri, 27 Nov 2009, Stefano Stabellini wrote: > Hi all, > this patch fixes the current suspend\resume implementation in > libxenlight and creates the correspondent commands in xl. > > The patch applies after Tomasz' console patch. > > Signed-off-by: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx> > resending because the previous version had a bug if the device model state was missing. Signed-off-by: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx> --- diff -r c34a58c843a6 tools/libxl/libxl.c --- a/tools/libxl/libxl.c Fri Nov 27 16:24:45 2009 +0000 +++ b/tools/libxl/libxl.c Fri Nov 27 16:37:36 2009 +0000 @@ -217,29 +217,36 @@ } int libxl_domain_restore(struct libxl_ctx *ctx, libxl_domain_build_info *info, - uint32_t domid, int fd) + uint32_t domid, int fd, libxl_domain_build_state *state, + libxl_device_model_info *dm_info) { - libxl_domain_build_state state; char **vments = NULL, **localents = NULL; - memset(&state, '\0', sizeof(state)); - - build_pre(ctx, domid, info, &state); - restore_common(ctx, domid, info, &state, fd); + build_pre(ctx, domid, info, state); + restore_common(ctx, domid, info, state, fd); if (info->hvm) { - vments = libxl_calloc(ctx, 4, sizeof(char *)); + vments = libxl_calloc(ctx, 5, sizeof(char *)); vments[0] = "rtc/timeoffset"; vments[1] = (info->u.hvm.timeoffset) ? info->u.hvm.timeoffset : ""; + vments[2] = "image/ostype"; + vments[3] = "hvm"; } else { - localents = libxl_calloc(ctx, 4 * 2, sizeof(char *)); - localents[0] = "serial/0/limit"; - localents[1] = libxl_sprintf(ctx, "%d", 65536); - localents[2] = "console/port"; - localents[3] = libxl_sprintf(ctx, "%d", state.console_port); - localents[4] = "console/ring-ref"; - localents[5] = libxl_sprintf(ctx, "%ld", state.console_mfn); + vments = libxl_calloc(ctx, 9, sizeof(char *)); + vments[0] = "image/ostype"; + vments[1] = "linux"; + vments[2] = "image/kernel"; + vments[3] = (char*) info->kernel; + vments[4] = "image/ramdisk"; + vments[5] = (char*) info->u.pv.ramdisk; + vments[6] = "image/cmdline"; + vments[7] = (char*) info->u.pv.cmdline; } - build_post(ctx, domid, info, &state, vments, localents); + build_post(ctx, domid, info, state, vments, localents); + if (info->hvm) + asprintf(&(dm_info->saved_state), "/var/lib/xen/qemu-save.%d", domid); + else + dm_info->saved_state = NULL; + return 0; } @@ -299,17 +306,37 @@ return info; } +static int libxl_save_device_model(struct libxl_ctx *ctx, uint32_t domid, int fd) +{ + int fd2, c; + char buf[1024]; + char *filename = libxl_sprintf(ctx, "/var/lib/xen/qemu-save.%d", domid); + + XL_LOG(ctx, XL_LOG_DEBUG, "Saving device model state to %s", filename); + libxl_xs_write(ctx, XBT_NULL, libxl_sprintf(ctx, "/local/domain/0/device-model/%d/command", domid), "save", strlen("save")); + libxl_wait_for_device_model(ctx, domid, "paused", NULL, NULL); + + write(fd, QEMU_SIGNATURE, strlen(QEMU_SIGNATURE)); + fd2 = open(filename, O_RDONLY); + while ((c = read(fd2, buf, sizeof(buf))) != 0) { + write(fd, buf, c); + } + close(fd2); + unlink(filename); + return 0; +} + int libxl_domain_suspend(struct libxl_ctx *ctx, libxl_domain_suspend_info *info, uint32_t domid, int fd) { - int hvm = 1; - int live = 0; - int debug = 0; - char savesig[] = "XenSavedDomain\n"; + int hvm = is_hvm(ctx, domid); + int live = info != NULL && info->flags & XL_SUSPEND_LIVE; + int debug = info != NULL && info->flags & XL_SUSPEND_LIVE; - write(fd, savesig, strlen(savesig)); core_suspend(ctx, domid, fd, hvm, live, debug); + if (hvm) + libxl_save_device_model(ctx, domid, fd); return 0; } @@ -322,7 +349,19 @@ int libxl_domain_unpause(struct libxl_ctx *ctx, uint32_t domid) { + char path[50]; + char *state; + + if (is_hvm(ctx, domid)) { + snprintf(path, sizeof(path), "/local/domain/0/device-model/%d/state", domid); + state = libxl_xs_read(ctx, XBT_NULL, path); + if (state != NULL && !strcmp(state, "paused")) { + libxl_xs_write(ctx, XBT_NULL, libxl_sprintf(ctx, "/local/domain/0/device-model/%d/command", domid), "continue", strlen("continue")); + libxl_wait_for_device_model(ctx, domid, "running", NULL, NULL); + } + } xc_domain_unpause(ctx->xch, domid); + return 0; } @@ -581,6 +620,10 @@ vifs[i].devid, vifs[i].ifname, vifs[i].bridge)); } } + } + if (info->saved_state) { + flexarray_set(dm_args, num++, "-loadvm"); + flexarray_set(dm_args, num++, info->saved_state); } for (i = 0; info->extra && info->extra[i] != NULL; i++) flexarray_set(dm_args, num++, info->extra[i]); diff -r c34a58c843a6 tools/libxl/libxl.h --- a/tools/libxl/libxl.h Fri Nov 27 16:24:45 2009 +0000 +++ b/tools/libxl/libxl.h Fri Nov 27 16:37:36 2009 +0000 @@ -94,6 +94,8 @@ } libxl_domain_build_state; typedef struct { +#define XL_SUSPEND_DEBUG 1 +#define XL_SUSPEND_LIVE 2 int flags; int (*suspend_callback)(void *, int); } libxl_domain_suspend_info; @@ -107,6 +109,7 @@ int domid; char *dom_name; char *device_model; + char *saved_state; libxl_qemu_machine_type type; int videoram; /* size of the videoram in MB */ bool stdvga; /* stdvga enabled or disabled */ @@ -254,7 +257,8 @@ int libxl_domain_make(struct libxl_ctx *ctx, libxl_domain_create_info *info, uint32_t *domid); int libxl_domain_build(struct libxl_ctx *ctx, libxl_domain_build_info *info, uint32_t domid, /* out */ libxl_domain_build_state *state); int libxl_domain_restore(struct libxl_ctx *ctx, libxl_domain_build_info *info, - uint32_t domid, int fd); + uint32_t domid, int fd, libxl_domain_build_state *state, + libxl_device_model_info *dm_info); int libxl_domain_suspend(struct libxl_ctx *ctx, libxl_domain_suspend_info *info, uint32_t domid, int fd); int libxl_domain_shutdown(struct libxl_ctx *ctx, uint32_t domid, int req); diff -r c34a58c843a6 tools/libxl/libxl_dom.c --- a/tools/libxl/libxl_dom.c Fri Nov 27 16:24:45 2009 +0000 +++ b/tools/libxl/libxl_dom.c Fri Nov 27 16:37:36 2009 +0000 @@ -163,71 +163,36 @@ state->store_port, &state->store_mfn, state->console_port, &state->console_mfn, info->hvm, info->u.hvm.pae, 0); +#if defined(__i386__) || defined(__x86_64__) + xc_cpuid_apply_policy(ctx->xch, domid); +#endif return 0; } -/* the following code is extremely ugly and racy without forking. - we intend to fix the re-entrancy of the underlying code instead of forking */ -static struct libxl_ctx *global_suspend_ctx = NULL; -static struct suspendinfo { - int xch; +struct suspendinfo { + struct libxl_ctx *ctx; int xce; /* event channel handle */ int suspend_eventchn; int domid; int hvm; unsigned int flags; -} si; +}; -void core_suspend_switch_qemu_logdirty(int domid, unsigned int enable) +static void core_suspend_switch_qemu_logdirty(int domid, unsigned int enable) { - struct xs_handle *xs; - char *path, *ret_path, *cmd_path, *ret_str, *cmd_str, **watch; - unsigned int len; - struct timeval tv; - fd_set fdset; - struct libxl_ctx *ctx = global_suspend_ctx; + struct xs_handle *xsh; + char path[64]; - xs = xs_daemon_open(); - if (!xs) - return; - path = libxl_sprintf(ctx, "/local/domain/0/device-model/%i/logdirty", domid); - if (!path) - return; - ret_path = libxl_sprintf(ctx, "%s/ret", path); - if (!ret_path) - return; - cmd_path = libxl_sprintf(ctx, "%s/cmd", path); - if (!ret_path) - return; + snprintf(path, sizeof(path), "/local/domain/0/device-model/%u/logdirty/cmd", domid); - /* Watch for qemu's return value */ - if (!xs_watch(xs, ret_path, "qemu-logdirty-ret")) - return; + xsh = xs_daemon_open(); - cmd_str = (enable == 0) ? "disable" : "enable"; + if (enable) + xs_write(xsh, XBT_NULL, path, "enable", strlen("enable")); + else + xs_write(xsh, XBT_NULL, path, "disable", strlen("disable")); - /* Tell qemu that we want it to start logging dirty page to Xen */ - if (!xs_write(xs, XBT_NULL, cmd_path, cmd_str, strlen(cmd_str))) - return; - - /* Wait a while for qemu to signal that it has service logdirty command */ -read_again: - tv.tv_sec = 5; - tv.tv_usec = 0; - FD_ZERO(&fdset); - FD_SET(xs_fileno(xs), &fdset); - - if ((select(xs_fileno(xs) + 1, &fdset, NULL, NULL, &tv)) != 1) - return; - - watch = xs_read_watch(xs, &len); - free(watch); - - ret_str = xs_read(xs, XBT_NULL, ret_path, &len); - if (ret_str == NULL || strcmp(ret_str, cmd_str)) - /* Watch fired but value is not yet right */ - goto read_again; - free(ret_str); + xs_daemon_close(xsh); } static int core_suspend_callback(void *data) @@ -235,46 +200,78 @@ struct suspendinfo *si = data; unsigned long s_state = 0; int ret; + char *path, *state = "suspend"; + int watchdog = 60; if (si->hvm) - xc_get_hvm_param(si->xch, si->domid, HVM_PARAM_ACPI_S_STATE, &s_state); + xc_get_hvm_param(si->ctx->xch, si->domid, HVM_PARAM_ACPI_S_STATE, &s_state); if ((s_state == 0) && (si->suspend_eventchn >= 0)) { - ret = xc_evtchn_notify(si->xch, si->suspend_eventchn); + ret = xc_evtchn_notify(si->xce, si->suspend_eventchn); if (ret < 0) { + XL_LOG(si->ctx, XL_LOG_ERROR, "xc_evtchn_notify failed ret=%d", ret); return 0; } - ret = xc_await_suspend(si->xch, si->suspend_eventchn); + ret = xc_await_suspend(si->xce, si->suspend_eventchn); if (ret < 0) { + XL_LOG(si->ctx, XL_LOG_ERROR, "xc_await_suspend failed ret=%d", ret); return 0; } return 1; } - /* need to shutdown (to suspend) the domain here */ - return 0; + path = libxl_sprintf(si->ctx, "%s/control/shutdown", libxl_xs_get_dompath(si->ctx, si->domid)); + libxl_xs_write(si->ctx, XBT_NULL, path, "suspend", strlen("suspend")); + if (si->hvm) { + unsigned long hvm_pvdrv, hvm_s_state; + xc_get_hvm_param(si->ctx->xch, si->domid, HVM_PARAM_CALLBACK_IRQ, &hvm_pvdrv); + xc_get_hvm_param(si->ctx->xch, si->domid, HVM_PARAM_ACPI_S_STATE, &hvm_s_state); + if (!hvm_pvdrv || hvm_s_state) { + XL_LOG(si->ctx, XL_LOG_DEBUG, "Calling xc_domain_shutdown on the domain"); + xc_domain_shutdown(si->ctx->xch, si->domid, SHUTDOWN_suspend); + } + } + XL_LOG(si->ctx, XL_LOG_DEBUG, "wait for the guest to suspend"); + while (!strcmp(state, "suspend") && watchdog > 0) { + int nb_domain, i; + xc_dominfo_t *list = NULL; + usleep(100000); + list = libxl_domain_infolist(si->ctx, &nb_domain); + for (i = 0; i < nb_domain; i++) { + if (si->domid == list[i].domid) { + if (list[i].shutdown != 0 && list[i].shutdown_reason == SHUTDOWN_suspend) { + free(list); + return 1; + } + } + } + free(list); + state = libxl_xs_read(si->ctx, XBT_NULL, path); + watchdog--; + } + if (!strcmp(state, "suspend")) { + XL_LOG(si->ctx, XL_LOG_ERROR, "guest didn't suspend in time"); + libxl_xs_write(si->ctx, XBT_NULL, path, "", 1); + } + return 1; } -static struct save_callbacks callbacks; int core_suspend(struct libxl_ctx *ctx, uint32_t domid, int fd, int hvm, int live, int debug) { int flags; int port; + struct save_callbacks callbacks; + struct suspendinfo si; flags = (live) ? XCFLAGS_LIVE : 0 - | (debug) ? XCFLAGS_DEBUG : 0; - - /* crappy global lock until we make everything clean */ - while (global_suspend_ctx) { - sleep(1); - } - global_suspend_ctx = ctx; + | (debug) ? XCFLAGS_DEBUG : 0 + | (hvm) ? XCFLAGS_HVM : 0; si.domid = domid; si.flags = flags; si.hvm = hvm; - si.suspend_eventchn = si.xce = -1; - si.xch = ctx->xch; + si.ctx = ctx; + si.suspend_eventchn = -1; si.xce = xc_evtchn_open(); if (si.xce < 0) @@ -284,28 +281,28 @@ port = xs_suspend_evtchn_port(si.domid); if (port < 0) { + XL_LOG(ctx, XL_LOG_WARNING, "Failed to get the suspend evtchn port"); } else { - si.suspend_eventchn = xc_suspend_evtchn_init(si.xch, si.xce, si.domid, port); + si.suspend_eventchn = xc_suspend_evtchn_init(si.ctx->xch, si.xce, si.domid, port); - if (si.suspend_eventchn < 0) { - } + if (si.suspend_eventchn < 0) + XL_LOG(ctx, XL_LOG_WARNING, "Suspend event channel initialization failed"); } } + memset(&callbacks, 0, sizeof(callbacks)); callbacks.suspend = core_suspend_callback; - callbacks.postcopy = NULL; - callbacks.checkpoint = NULL; callbacks.data = &si; xc_domain_save(ctx->xch, fd, domid, 0, 0, flags, &callbacks, hvm, - core_suspend_switch_qemu_logdirty); + &core_suspend_switch_qemu_logdirty); if (si.suspend_eventchn > 0) xc_suspend_evtchn_release(si.xce, si.suspend_eventchn); if (si.xce > 0) xc_evtchn_close(si.xce); - global_suspend_ctx = NULL; return 0; } + diff -r c34a58c843a6 tools/libxl/libxl_internal.h --- a/tools/libxl/libxl_internal.h Fri Nov 27 16:24:45 2009 +0000 +++ b/tools/libxl/libxl_internal.h Fri Nov 27 16:37:36 2009 +0000 @@ -30,6 +30,7 @@ #define LIBXL_DESTROY_TIMEOUT 10 #define LIBXL_XENCONSOLE_LIMIT 1048576 #define LIBXL_XENCONSOLE_PROTOCOL "vt100" +#define QEMU_SIGNATURE "QemuDeviceModelRecord" #define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) diff -r c34a58c843a6 tools/libxl/xl.c --- a/tools/libxl/xl.c Fri Nov 27 16:24:45 2009 +0000 +++ b/tools/libxl/xl.c Fri Nov 27 16:37:36 2009 +0000 @@ -31,6 +31,7 @@ #include <sys/select.h> #include <arpa/inet.h> #include <xenctrl.h> + #include "libxl.h" #include "libxl_utils.h" @@ -577,7 +578,7 @@ } \ }) -static void create_domain(int debug, const char *filename) +static void create_domain(int debug, const char *config_file, const char *restore_file, int paused) { struct libxl_ctx ctx; uint32_t domid; @@ -595,9 +596,10 @@ int i, fd; int need_daemon = 1; libxl_device_model_starting *dm_starting = 0; + memset(&dm_info, 0x00, sizeof(dm_info)); - printf("Parsing config file %s\n", filename); - parse_config_file(filename, &info1, &info2, &disks, &num_disks, &vifs, &num_vifs, &pcidevs, &num_pcidevs, &vfbs, &num_vfbs, &vkbs, &num_vkbs, &dm_info); + printf("Parsing config file %s\n", config_file); + parse_config_file(config_file, &info1, &info2, &disks, &num_disks, &vifs, &num_vifs, &pcidevs, &num_pcidevs, &vfbs, &num_vfbs, &vkbs, &num_vkbs, &dm_info); if (debug) printf_info(&info1, &info2, disks, num_disks, vifs, num_vifs, pcidevs, num_pcidevs, vfbs, num_vfbs, vkbs, num_vkbs, &dm_info); @@ -607,7 +609,20 @@ libxl_ctx_init(&ctx); libxl_ctx_set_log(&ctx, log_callback, NULL); libxl_domain_make(&ctx, &info1, &domid); - libxl_domain_build(&ctx, &info2, domid, &state); + + if (!restore_file || !need_daemon) { + if (dm_info.saved_state) { + free(dm_info.saved_state); + dm_info.saved_state = NULL; + } + libxl_domain_build(&ctx, &info2, domid, &state); + } else { + int restore_fd; + + restore_fd = open(restore_file, O_RDONLY); + libxl_domain_restore(&ctx, &info2, domid, restore_fd, &state, &dm_info); + close(restore_fd); + } for (i = 0; i < num_disks; i++) { disk_info_domid_fixup(disks + i, domid); @@ -642,7 +657,8 @@ for (i = 0; i < num_pcidevs; i++) libxl_device_pci_add(&ctx, domid, &pcidevs[i]); - libxl_domain_unpause(&ctx, domid); + if (!paused) + libxl_domain_unpause(&ctx, domid); if (need_daemon) { char *fullname, *name; @@ -714,6 +730,8 @@ printf(" pause pause execution of a domain\n\n"); printf(" unpause unpause a paused domain\n\n"); printf(" console attach to domain's console\n\n"); + printf(" save save a domain state to restore later\n\n"); + printf(" restore restore a domain from a saved state\n\n"); } else if(!strcmp(command, "create")) { printf("Usage: xl create <ConfigFile> [options] [vars]\n\n"); printf("Create a domain based on <ConfigFile>.\n\n"); @@ -738,6 +756,18 @@ } else if(!strcmp(command, "unpause")) { printf("Usage: xl unpause <Domain>\n\n"); printf("Unpause a paused domain.\n\n"); + } else if(!strcmp(command, "save")) { + printf("Usage: xl save [options] <Domain> <CheckpointFile>\n\n"); + printf("Save a domain state to restore later.\n\n"); + printf("Options:\n\n"); + printf("-h Print this help.\n"); + printf("-c Leave domain running after creating the snapshot.\n"); + } else if(!strcmp(command, "restore")) { + printf("Usage: xl restore [options] <ConfigFile> <CheckpointFile>\n\n"); + printf("Restore a domain from a saved state.\n\n"); + printf("Options:\n\n"); + printf("-h Print this help.\n"); + printf("-p Do not unpause domain after restoring it.\n"); } else if(!strcmp(command, "destroy")) { printf("Usage: xl destroy <Domain>\n\n"); printf("Terminate a domain immediately.\n\n"); @@ -1017,6 +1047,101 @@ free(info); } +int save_domain(char *p, char *filename, int checkpoint) +{ + struct libxl_ctx ctx; + uint32_t domid; + int fd; + + libxl_ctx_init(&ctx); + libxl_ctx_set_log(&ctx, log_callback, NULL); + + if (libxl_param_to_domid(&ctx, p, &domid) < 0) { + fprintf(stderr, "%s is an invalid domain identifier\n", p); + exit(2); + } + fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, 0644); + if (fd < 0) { + fprintf(stderr, "Failed to open temp file %s for writing\n", filename); + exit(2); + } + libxl_domain_suspend(&ctx, NULL, domid, fd); + close(fd); + + if (checkpoint) + libxl_domain_unpause(&ctx, domid); + else + libxl_domain_destroy(&ctx, domid, 0); + + exit(0); +} + +int main_restore(int argc, char **argv) +{ + char *checkpoint_file = NULL; + char *config_file = NULL; + int paused = 0, debug = 0; + int opt; + + while ((opt = getopt(argc, argv, "hpd")) != -1) { + switch (opt) { + case 'p': + paused = 1; + break; + case 'd': + debug = 1; + break; + case 'h': + help("restore"); + exit(0); + default: + fprintf(stderr, "option not supported\n"); + break; + } + } + + if (optind >= argc - 1) { + help("restore"); + exit(2); + } + + config_file = argv[optind]; + checkpoint_file = argv[optind + 1]; + create_domain(debug, config_file, checkpoint_file, paused); + exit(0); +} + +int main_save(int argc, char **argv) +{ + char *filename = NULL, *p = NULL; + int checkpoint = 0; + int opt; + + while ((opt = getopt(argc, argv, "hc")) != -1) { + switch (opt) { + case 'c': + checkpoint = 1; + break; + case 'h': + help("save"); + exit(0); + default: + fprintf(stderr, "option not supported\n"); + break; + } + } + + if (optind >= argc - 1) { + help("save"); + exit(2); + } + + p = argv[optind]; + filename = argv[optind + 1]; + save_domain(p, filename, checkpoint); + exit(0); +} + int main_pause(int argc, char **argv) { int opt; @@ -1142,7 +1267,7 @@ } filename = argv[optind]; - create_domain(debug, filename); + create_domain(debug, filename, NULL, 0); exit(0); } @@ -1171,6 +1296,10 @@ main_unpause(argc - 1, argv + 1); } else if (!strcmp(argv[1], "console")) { main_console(argc - 1, argv + 1); + } else if (!strcmp(argv[1], "save")) { + main_save(argc - 1, argv + 1); + } else if (!strcmp(argv[1], "restore")) { + main_restore(argc - 1, argv + 1); } else if (!strcmp(argv[1], "help")) { if (argc > 2) help(argv[2]); _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |