[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 28/29] xl: split out migration related code
Include COLO / Remus code because they are built on top of the existing migration protocol. Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx> --- tools/xl/Makefile | 2 +- tools/xl/xl_cmdimpl.c | 715 ----------------------------------------------- tools/xl/xl_migrate.c | 754 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 755 insertions(+), 716 deletions(-) create mode 100644 tools/xl/xl_migrate.c diff --git a/tools/xl/Makefile b/tools/xl/Makefile index 9982b936c4..8c30fdcf9e 100644 --- a/tools/xl/Makefile +++ b/tools/xl/Makefile @@ -20,7 +20,7 @@ XL_OBJS += xl_tmem.o xl_parse.o xl_cpupool.o xl_flask.o XL_OBJS += xl_vtpm.o xl_block.o xl_nic.o xl_usb.o XL_OBJS += xl_sched.o xl_pci.o xl_vcpu.o xl_cd.o xl_mem.o XL_OBJS += xl_psr.o xl_info.o xl_console.o xl_misc.o -XL_OBJS += xl_vmcontrol.o xl_saverestore.o +XL_OBJS += xl_vmcontrol.o xl_saverestore.o xl_migrate.o $(XL_OBJS): CFLAGS += $(CFLAGS_libxentoollog) $(XL_OBJS): CFLAGS += $(CFLAGS_XL) diff --git a/tools/xl/xl_cmdimpl.c b/tools/xl/xl_cmdimpl.c index c41ae31814..bd7f8edb0a 100644 --- a/tools/xl/xl_cmdimpl.c +++ b/tools/xl/xl_cmdimpl.c @@ -98,721 +98,6 @@ void help(const char *command) } } -#ifndef LIBXL_HAVE_NO_SUSPEND_RESUME - -static pid_t create_migration_child(const char *rune, int *send_fd, - int *recv_fd) -{ - int sendpipe[2], recvpipe[2]; - pid_t child; - - if (!rune || !send_fd || !recv_fd) - return -1; - - MUST( libxl_pipe(ctx, sendpipe) ); - MUST( libxl_pipe(ctx, recvpipe) ); - - child = xl_fork(child_migration, "migration transport process"); - - if (!child) { - dup2(sendpipe[0], 0); - dup2(recvpipe[1], 1); - close(sendpipe[0]); close(sendpipe[1]); - close(recvpipe[0]); close(recvpipe[1]); - execlp("sh","sh","-c",rune,(char*)0); - perror("failed to exec sh"); - exit(EXIT_FAILURE); - } - - close(sendpipe[0]); - close(recvpipe[1]); - *send_fd = sendpipe[1]; - *recv_fd = recvpipe[0]; - - /* if receiver dies, we get an error and can clean up - rather than just dying */ - signal(SIGPIPE, SIG_IGN); - - return child; -} - -static int migrate_read_fixedmessage(int fd, const void *msg, int msgsz, - const char *what, const char *rune) { - char buf[msgsz]; - const char *stream; - int rc; - - stream = rune ? "migration receiver stream" : "migration stream"; - rc = libxl_read_exactly(ctx, fd, buf, msgsz, stream, what); - if (rc) return 1; - - if (memcmp(buf, msg, msgsz)) { - fprintf(stderr, "%s contained unexpected data instead of %s\n", - stream, what); - if (rune) - fprintf(stderr, "(command run was: %s )\n", rune); - return 1; - } - return 0; -} - -static void migration_child_report(int recv_fd) { - pid_t child; - int status, sr; - struct timeval now, waituntil, timeout; - static const struct timeval pollinterval = { 0, 1000 }; /* 1ms */ - - if (!xl_child_pid(child_migration)) return; - - CHK_SYSCALL(gettimeofday(&waituntil, 0)); - waituntil.tv_sec += 2; - - for (;;) { - pid_t migration_child = xl_child_pid(child_migration); - child = xl_waitpid(child_migration, &status, WNOHANG); - - if (child == migration_child) { - if (status) - xl_report_child_exitstatus(XTL_INFO, child_migration, - migration_child, status); - break; - } - if (child == -1) { - fprintf(stderr, "wait for migration child [%ld] failed: %s\n", - (long)migration_child, strerror(errno)); - break; - } - assert(child == 0); - - CHK_SYSCALL(gettimeofday(&now, 0)); - if (timercmp(&now, &waituntil, >)) { - fprintf(stderr, "migration child [%ld] not exiting, no longer" - " waiting (exit status will be unreported)\n", - (long)migration_child); - break; - } - timersub(&waituntil, &now, &timeout); - - if (recv_fd >= 0) { - fd_set readfds, exceptfds; - FD_ZERO(&readfds); - FD_ZERO(&exceptfds); - FD_SET(recv_fd, &readfds); - FD_SET(recv_fd, &exceptfds); - sr = select(recv_fd+1, &readfds,0,&exceptfds, &timeout); - } else { - if (timercmp(&timeout, &pollinterval, >)) - timeout = pollinterval; - sr = select(0,0,0,0, &timeout); - } - if (sr > 0) { - recv_fd = -1; - } else if (sr == 0) { - } else if (sr == -1) { - if (errno != EINTR) { - fprintf(stderr, "migration child [%ld] exit wait select" - " failed unexpectedly: %s\n", - (long)migration_child, strerror(errno)); - break; - } - } - } -} - -static void migrate_do_preamble(int send_fd, int recv_fd, pid_t child, - uint8_t *config_data, int config_len, - const char *rune) -{ - int rc = 0; - - if (send_fd < 0 || recv_fd < 0) { - fprintf(stderr, "migrate_do_preamble: invalid file descriptors\n"); - exit(EXIT_FAILURE); - } - - rc = migrate_read_fixedmessage(recv_fd, migrate_receiver_banner, - sizeof(migrate_receiver_banner)-1, - "banner", rune); - if (rc) { - close(send_fd); - migration_child_report(recv_fd); - exit(EXIT_FAILURE); - } - - save_domain_core_writeconfig(send_fd, "migration stream", - config_data, config_len); - -} - -static void migrate_domain(uint32_t domid, const char *rune, int debug, - const char *override_config_file) -{ - pid_t child = -1; - int rc; - int send_fd = -1, recv_fd = -1; - char *away_domname; - char rc_buf; - uint8_t *config_data; - int config_len, flags = LIBXL_SUSPEND_LIVE; - - save_domain_core_begin(domid, override_config_file, - &config_data, &config_len); - - if (!config_len) { - fprintf(stderr, "No config file stored for running domain and " - "none supplied - cannot migrate.\n"); - exit(EXIT_FAILURE); - } - - child = create_migration_child(rune, &send_fd, &recv_fd); - - migrate_do_preamble(send_fd, recv_fd, child, config_data, config_len, - rune); - - xtl_stdiostream_adjust_flags(logger, XTL_STDIOSTREAM_HIDE_PROGRESS, 0); - - if (debug) - flags |= LIBXL_SUSPEND_DEBUG; - rc = libxl_domain_suspend(ctx, domid, send_fd, flags, NULL); - if (rc) { - fprintf(stderr, "migration sender: libxl_domain_suspend failed" - " (rc=%d)\n", rc); - if (rc == ERROR_GUEST_TIMEDOUT) - goto failed_suspend; - else - goto failed_resume; - } - - //fprintf(stderr, "migration sender: Transfer complete.\n"); - // Should only be printed when debugging as it's a bit messy with - // progress indication. - - rc = migrate_read_fixedmessage(recv_fd, migrate_receiver_ready, - sizeof(migrate_receiver_ready), - "ready message", rune); - if (rc) goto failed_resume; - - xtl_stdiostream_adjust_flags(logger, 0, XTL_STDIOSTREAM_HIDE_PROGRESS); - - /* right, at this point we are about give the destination - * permission to rename and resume, so we must first rename the - * domain away ourselves */ - - fprintf(stderr, "migration sender: Target has acknowledged transfer.\n"); - - if (common_domname) { - xasprintf(&away_domname, "%s--migratedaway", common_domname); - rc = libxl_domain_rename(ctx, domid, common_domname, away_domname); - if (rc) goto failed_resume; - } - - /* point of no return - as soon as we have tried to say - * "go" to the receiver, it's not safe to carry on. We leave - * the domain renamed to %s--migratedaway in case that's helpful. - */ - - fprintf(stderr, "migration sender: Giving target permission to start.\n"); - - rc = libxl_write_exactly(ctx, send_fd, - migrate_permission_to_go, - sizeof(migrate_permission_to_go), - "migration stream", "GO message"); - if (rc) goto failed_badly; - - rc = migrate_read_fixedmessage(recv_fd, migrate_report, - sizeof(migrate_report), - "success/failure report message", rune); - if (rc) goto failed_badly; - - rc = libxl_read_exactly(ctx, recv_fd, - &rc_buf, 1, - "migration ack stream", "success/failure status"); - if (rc) goto failed_badly; - - if (rc_buf) { - fprintf(stderr, "migration sender: Target reports startup failure" - " (status code %d).\n", rc_buf); - - rc = migrate_read_fixedmessage(recv_fd, migrate_permission_to_go, - sizeof(migrate_permission_to_go), - "permission for sender to resume", - rune); - if (rc) goto failed_badly; - - fprintf(stderr, "migration sender: Trying to resume at our end.\n"); - - if (common_domname) { - libxl_domain_rename(ctx, domid, away_domname, common_domname); - } - rc = libxl_domain_resume(ctx, domid, 1, 0); - if (!rc) fprintf(stderr, "migration sender: Resumed OK.\n"); - - fprintf(stderr, "Migration failed due to problems at target.\n"); - exit(EXIT_FAILURE); - } - - fprintf(stderr, "migration sender: Target reports successful startup.\n"); - libxl_domain_destroy(ctx, domid, 0); /* bang! */ - fprintf(stderr, "Migration successful.\n"); - exit(EXIT_SUCCESS); - - failed_suspend: - close(send_fd); - migration_child_report(recv_fd); - fprintf(stderr, "Migration failed, failed to suspend at sender.\n"); - exit(EXIT_FAILURE); - - failed_resume: - close(send_fd); - migration_child_report(recv_fd); - fprintf(stderr, "Migration failed, resuming at sender.\n"); - libxl_domain_resume(ctx, domid, 1, 0); - exit(EXIT_FAILURE); - - failed_badly: - fprintf(stderr, - "** Migration failed during final handshake **\n" - "Domain state is now undefined !\n" - "Please CHECK AT BOTH ENDS for running instances, before renaming and\n" - " resuming at most one instance. Two simultaneous instances of the domain\n" - " would probably result in SEVERE DATA LOSS and it is now your\n" - " responsibility to avoid that. Sorry.\n"); - - close(send_fd); - migration_child_report(recv_fd); - exit(EXIT_FAILURE); -} - -static void migrate_receive(int debug, int daemonize, int monitor, - int pause_after_migration, - int send_fd, int recv_fd, - libxl_checkpointed_stream checkpointed, - char *colo_proxy_script) -{ - uint32_t domid; - int rc, rc2; - char rc_buf; - char *migration_domname; - struct domain_create dom_info; - - signal(SIGPIPE, SIG_IGN); - /* if we get SIGPIPE we'd rather just have it as an error */ - - fprintf(stderr, "migration target: Ready to receive domain.\n"); - - CHK_ERRNOVAL(libxl_write_exactly( - ctx, send_fd, migrate_receiver_banner, - sizeof(migrate_receiver_banner)-1, - "migration ack stream", "banner") ); - - memset(&dom_info, 0, sizeof(dom_info)); - dom_info.debug = debug; - dom_info.daemonize = daemonize; - dom_info.monitor = monitor; - dom_info.paused = 1; - dom_info.migrate_fd = recv_fd; - dom_info.send_back_fd = send_fd; - dom_info.migration_domname_r = &migration_domname; - dom_info.checkpointed_stream = checkpointed; - dom_info.colo_proxy_script = colo_proxy_script; - - rc = create_domain(&dom_info); - if (rc < 0) { - fprintf(stderr, "migration target: Domain creation failed" - " (code %d).\n", rc); - exit(EXIT_FAILURE); - } - - domid = rc; - - switch (checkpointed) { - case LIBXL_CHECKPOINTED_STREAM_REMUS: - case LIBXL_CHECKPOINTED_STREAM_COLO: - { - const char *ha = checkpointed == LIBXL_CHECKPOINTED_STREAM_COLO ? - "COLO" : "Remus"; - /* If we are here, it means that the sender (primary) has crashed. - * TODO: Split-Brain Check. - */ - fprintf(stderr, "migration target: %s Failover for domain %u\n", - ha, domid); - - /* - * If domain renaming fails, lets just continue (as we need the domain - * to be up & dom names may not matter much, as long as its reachable - * over network). - * - * If domain unpausing fails, destroy domain ? Or is it better to have - * a consistent copy of the domain (memory, cpu state, disk) - * on atleast one physical host ? Right now, lets just leave the domain - * as is and let the Administrator decide (or troubleshoot). - */ - if (migration_domname) { - rc = libxl_domain_rename(ctx, domid, migration_domname, - common_domname); - if (rc) - fprintf(stderr, "migration target (%s): " - "Failed to rename domain from %s to %s:%d\n", - ha, migration_domname, common_domname, rc); - } - - if (checkpointed == LIBXL_CHECKPOINTED_STREAM_COLO) - /* The guest is running after failover in COLO mode */ - exit(rc ? -ERROR_FAIL: 0); - - rc = libxl_domain_unpause(ctx, domid); - if (rc) - fprintf(stderr, "migration target (%s): " - "Failed to unpause domain %s (id: %u):%d\n", - ha, common_domname, domid, rc); - - exit(rc ? EXIT_FAILURE : EXIT_SUCCESS); - } - default: - /* do nothing */ - break; - } - - fprintf(stderr, "migration target: Transfer complete," - " requesting permission to start domain.\n"); - - rc = libxl_write_exactly(ctx, send_fd, - migrate_receiver_ready, - sizeof(migrate_receiver_ready), - "migration ack stream", "ready message"); - if (rc) exit(EXIT_FAILURE); - - rc = migrate_read_fixedmessage(recv_fd, migrate_permission_to_go, - sizeof(migrate_permission_to_go), - "GO message", 0); - if (rc) goto perhaps_destroy_notify_rc; - - fprintf(stderr, "migration target: Got permission, starting domain.\n"); - - if (migration_domname) { - rc = libxl_domain_rename(ctx, domid, migration_domname, common_domname); - if (rc) goto perhaps_destroy_notify_rc; - } - - if (!pause_after_migration) { - rc = libxl_domain_unpause(ctx, domid); - if (rc) goto perhaps_destroy_notify_rc; - } - - fprintf(stderr, "migration target: Domain started successsfully.\n"); - rc = 0; - - perhaps_destroy_notify_rc: - rc2 = libxl_write_exactly(ctx, send_fd, - migrate_report, sizeof(migrate_report), - "migration ack stream", - "success/failure report"); - if (rc2) exit(EXIT_FAILURE); - - rc_buf = -rc; - assert(!!rc_buf == !!rc); - rc2 = libxl_write_exactly(ctx, send_fd, &rc_buf, 1, - "migration ack stream", - "success/failure code"); - if (rc2) exit(EXIT_FAILURE); - - if (rc) { - fprintf(stderr, "migration target: Failure, destroying our copy.\n"); - - rc2 = libxl_domain_destroy(ctx, domid, 0); - if (rc2) { - fprintf(stderr, "migration target: Failed to destroy our copy" - " (code %d).\n", rc2); - exit(EXIT_FAILURE); - } - - fprintf(stderr, "migration target: Cleanup OK, granting sender" - " permission to resume.\n"); - - rc2 = libxl_write_exactly(ctx, send_fd, - migrate_permission_to_go, - sizeof(migrate_permission_to_go), - "migration ack stream", - "permission to sender to have domain back"); - if (rc2) exit(EXIT_FAILURE); - } - - exit(EXIT_SUCCESS); -} - - -int main_migrate_receive(int argc, char **argv) -{ - int debug = 0, daemonize = 1, monitor = 1, pause_after_migration = 0; - libxl_checkpointed_stream checkpointed = LIBXL_CHECKPOINTED_STREAM_NONE; - int opt; - char *script = NULL; - static struct option opts[] = { - {"colo", 0, 0, 0x100}, - /* It is a shame that the management code for disk is not here. */ - {"coloft-script", 1, 0, 0x200}, - COMMON_LONG_OPTS - }; - - SWITCH_FOREACH_OPT(opt, "Fedrp", opts, "migrate-receive", 0) { - case 'F': - daemonize = 0; - break; - case 'e': - daemonize = 0; - monitor = 0; - break; - case 'd': - debug = 1; - break; - case 'r': - checkpointed = LIBXL_CHECKPOINTED_STREAM_REMUS; - break; - case 0x100: - checkpointed = LIBXL_CHECKPOINTED_STREAM_COLO; - break; - case 0x200: - script = optarg; - break; - case 'p': - pause_after_migration = 1; - break; - } - - if (argc-optind != 0) { - help("migrate-receive"); - return EXIT_FAILURE; - } - migrate_receive(debug, daemonize, monitor, pause_after_migration, - STDOUT_FILENO, STDIN_FILENO, - checkpointed, script); - - return EXIT_SUCCESS; -} - -int main_migrate(int argc, char **argv) -{ - uint32_t domid; - const char *config_filename = NULL; - const char *ssh_command = "ssh"; - char *rune = NULL; - char *host; - int opt, daemonize = 1, monitor = 1, debug = 0, pause_after_migration = 0; - static struct option opts[] = { - {"debug", 0, 0, 0x100}, - {"live", 0, 0, 0x200}, - COMMON_LONG_OPTS - }; - - SWITCH_FOREACH_OPT(opt, "FC:s:ep", opts, "migrate", 2) { - case 'C': - config_filename = optarg; - break; - case 's': - ssh_command = optarg; - break; - case 'F': - daemonize = 0; - break; - case 'e': - daemonize = 0; - monitor = 0; - break; - case 'p': - pause_after_migration = 1; - break; - case 0x100: /* --debug */ - debug = 1; - break; - case 0x200: /* --live */ - /* ignored for compatibility with xm */ - break; - } - - domid = xfind_domain(argv[optind]); - host = argv[optind + 1]; - - bool pass_tty_arg = progress_use_cr || (isatty(2) > 0); - - if (!ssh_command[0]) { - rune= host; - } else { - char verbose_buf[minmsglevel_default+3]; - int verbose_len; - verbose_buf[0] = ' '; - verbose_buf[1] = '-'; - memset(verbose_buf+2, 'v', minmsglevel_default); - verbose_buf[sizeof(verbose_buf)-1] = 0; - if (minmsglevel == minmsglevel_default) { - verbose_len = 0; - } else { - verbose_len = (minmsglevel_default - minmsglevel) + 2; - } - xasprintf(&rune, "exec %s %s xl%s%.*s migrate-receive%s%s%s", - ssh_command, host, - pass_tty_arg ? " -t" : "", - verbose_len, verbose_buf, - daemonize ? "" : " -e", - debug ? " -d" : "", - pause_after_migration ? " -p" : ""); - } - - migrate_domain(domid, rune, debug, config_filename); - return EXIT_SUCCESS; -} -#endif - -#ifndef LIBXL_HAVE_NO_SUSPEND_RESUME -int main_remus(int argc, char **argv) -{ - uint32_t domid; - int opt, rc, daemonize = 1; - const char *ssh_command = "ssh"; - char *host = NULL, *rune = NULL; - libxl_domain_remus_info r_info; - int send_fd = -1, recv_fd = -1; - pid_t child = -1; - uint8_t *config_data; - int config_len; - - memset(&r_info, 0, sizeof(libxl_domain_remus_info)); - - SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:ec", NULL, "remus", 2) { - case 'i': - r_info.interval = atoi(optarg); - break; - case 'F': - libxl_defbool_set(&r_info.allow_unsafe, true); - break; - case 'b': - libxl_defbool_set(&r_info.blackhole, true); - break; - case 'u': - libxl_defbool_set(&r_info.compression, false); - break; - case 'n': - libxl_defbool_set(&r_info.netbuf, false); - break; - case 'N': - r_info.netbufscript = optarg; - break; - case 'd': - libxl_defbool_set(&r_info.diskbuf, false); - break; - case 's': - ssh_command = optarg; - break; - case 'e': - daemonize = 0; - break; - case 'c': - libxl_defbool_set(&r_info.colo, true); - } - - domid = xfind_domain(argv[optind]); - host = argv[optind + 1]; - - /* Defaults */ - libxl_defbool_setdefault(&r_info.blackhole, false); - libxl_defbool_setdefault(&r_info.colo, false); - if (!libxl_defbool_val(r_info.colo) && !r_info.interval) - r_info.interval = 200; - - if (libxl_defbool_val(r_info.colo)) { - if (r_info.interval || libxl_defbool_val(r_info.blackhole) || - !libxl_defbool_is_default(r_info.netbuf) || - !libxl_defbool_is_default(r_info.diskbuf)) { - perror("option -c is conflict with -i, -d, -n or -b"); - exit(-1); - } - - if (libxl_defbool_is_default(r_info.compression)) { - perror("COLO can't be used with memory compression. " - "Disable memory checkpoint compression now..."); - libxl_defbool_set(&r_info.compression, false); - } - } - - if (!r_info.netbufscript) { - if (libxl_defbool_val(r_info.colo)) - r_info.netbufscript = default_colo_proxy_script; - else - r_info.netbufscript = default_remus_netbufscript; - } - - if (libxl_defbool_val(r_info.blackhole)) { - send_fd = open("/dev/null", O_RDWR, 0644); - if (send_fd < 0) { - perror("failed to open /dev/null"); - exit(EXIT_FAILURE); - } - } else { - - if (!ssh_command[0]) { - rune = host; - } else { - if (!libxl_defbool_val(r_info.colo)) { - xasprintf(&rune, "exec %s %s xl migrate-receive %s %s", - ssh_command, host, - "-r", - daemonize ? "" : " -e"); - } else { - xasprintf(&rune, "exec %s %s xl migrate-receive %s %s %s %s", - ssh_command, host, - "--colo", - r_info.netbufscript ? "--coloft-script" : "", - r_info.netbufscript ? r_info.netbufscript : "", - daemonize ? "" : " -e"); - } - } - - save_domain_core_begin(domid, NULL, &config_data, &config_len); - - if (!config_len) { - fprintf(stderr, "No config file stored for running domain and " - "none supplied - cannot start remus.\n"); - exit(EXIT_FAILURE); - } - - child = create_migration_child(rune, &send_fd, &recv_fd); - - migrate_do_preamble(send_fd, recv_fd, child, config_data, config_len, - rune); - - if (ssh_command[0]) - free(rune); - } - - /* Point of no return */ - rc = libxl_domain_remus_start(ctx, &r_info, domid, send_fd, recv_fd, 0); - - /* check if the domain exists. User may have xl destroyed the - * domain to force failover - */ - if (libxl_domain_info(ctx, 0, domid)) { - fprintf(stderr, "%s: Primary domain has been destroyed.\n", - libxl_defbool_val(r_info.colo) ? "COLO" : "Remus"); - close(send_fd); - return EXIT_SUCCESS; - } - - /* If we are here, it means remus setup/domain suspend/backup has - * failed. Try to resume the domain and exit gracefully. - * TODO: Split-Brain check. - */ - if (rc == ERROR_GUEST_TIMEDOUT) - fprintf(stderr, "Failed to suspend domain at primary.\n"); - else { - fprintf(stderr, "%s: Backup failed? resuming domain at primary.\n", - libxl_defbool_val(r_info.colo) ? "COLO" : "Remus"); - libxl_domain_resume(ctx, domid, 1, 0); - } - - close(send_fd); - return EXIT_FAILURE; -} -#endif - /* * Local variables: * mode: C diff --git a/tools/xl/xl_migrate.c b/tools/xl/xl_migrate.c new file mode 100644 index 0000000000..6b5dff2d1a --- /dev/null +++ b/tools/xl/xl_migrate.c @@ -0,0 +1,754 @@ +/* + * Copyright 2009-2017 Citrix Ltd and other contributors + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#include <fcntl.h> +#include <inttypes.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/utsname.h> +#include <time.h> +#include <unistd.h> + +#include <libxl.h> +#include <libxl_utils.h> +#include <libxlutil.h> + +#include "xl.h" +#include "xl_utils.h" +#include "xl_parse.h" + +extern const char *common_domname; + +#ifndef LIBXL_HAVE_NO_SUSPEND_RESUME + +static pid_t create_migration_child(const char *rune, int *send_fd, + int *recv_fd) +{ + int sendpipe[2], recvpipe[2]; + pid_t child; + + if (!rune || !send_fd || !recv_fd) + return -1; + + MUST( libxl_pipe(ctx, sendpipe) ); + MUST( libxl_pipe(ctx, recvpipe) ); + + child = xl_fork(child_migration, "migration transport process"); + + if (!child) { + dup2(sendpipe[0], 0); + dup2(recvpipe[1], 1); + close(sendpipe[0]); close(sendpipe[1]); + close(recvpipe[0]); close(recvpipe[1]); + execlp("sh","sh","-c",rune,(char*)0); + perror("failed to exec sh"); + exit(EXIT_FAILURE); + } + + close(sendpipe[0]); + close(recvpipe[1]); + *send_fd = sendpipe[1]; + *recv_fd = recvpipe[0]; + + /* if receiver dies, we get an error and can clean up + rather than just dying */ + signal(SIGPIPE, SIG_IGN); + + return child; +} + +static int migrate_read_fixedmessage(int fd, const void *msg, int msgsz, + const char *what, const char *rune) { + char buf[msgsz]; + const char *stream; + int rc; + + stream = rune ? "migration receiver stream" : "migration stream"; + rc = libxl_read_exactly(ctx, fd, buf, msgsz, stream, what); + if (rc) return 1; + + if (memcmp(buf, msg, msgsz)) { + fprintf(stderr, "%s contained unexpected data instead of %s\n", + stream, what); + if (rune) + fprintf(stderr, "(command run was: %s )\n", rune); + return 1; + } + return 0; +} + +static void migration_child_report(int recv_fd) { + pid_t child; + int status, sr; + struct timeval now, waituntil, timeout; + static const struct timeval pollinterval = { 0, 1000 }; /* 1ms */ + + if (!xl_child_pid(child_migration)) return; + + CHK_SYSCALL(gettimeofday(&waituntil, 0)); + waituntil.tv_sec += 2; + + for (;;) { + pid_t migration_child = xl_child_pid(child_migration); + child = xl_waitpid(child_migration, &status, WNOHANG); + + if (child == migration_child) { + if (status) + xl_report_child_exitstatus(XTL_INFO, child_migration, + migration_child, status); + break; + } + if (child == -1) { + fprintf(stderr, "wait for migration child [%ld] failed: %s\n", + (long)migration_child, strerror(errno)); + break; + } + assert(child == 0); + + CHK_SYSCALL(gettimeofday(&now, 0)); + if (timercmp(&now, &waituntil, >)) { + fprintf(stderr, "migration child [%ld] not exiting, no longer" + " waiting (exit status will be unreported)\n", + (long)migration_child); + break; + } + timersub(&waituntil, &now, &timeout); + + if (recv_fd >= 0) { + fd_set readfds, exceptfds; + FD_ZERO(&readfds); + FD_ZERO(&exceptfds); + FD_SET(recv_fd, &readfds); + FD_SET(recv_fd, &exceptfds); + sr = select(recv_fd+1, &readfds,0,&exceptfds, &timeout); + } else { + if (timercmp(&timeout, &pollinterval, >)) + timeout = pollinterval; + sr = select(0,0,0,0, &timeout); + } + if (sr > 0) { + recv_fd = -1; + } else if (sr == 0) { + } else if (sr == -1) { + if (errno != EINTR) { + fprintf(stderr, "migration child [%ld] exit wait select" + " failed unexpectedly: %s\n", + (long)migration_child, strerror(errno)); + break; + } + } + } +} + +static void migrate_do_preamble(int send_fd, int recv_fd, pid_t child, + uint8_t *config_data, int config_len, + const char *rune) +{ + int rc = 0; + + if (send_fd < 0 || recv_fd < 0) { + fprintf(stderr, "migrate_do_preamble: invalid file descriptors\n"); + exit(EXIT_FAILURE); + } + + rc = migrate_read_fixedmessage(recv_fd, migrate_receiver_banner, + sizeof(migrate_receiver_banner)-1, + "banner", rune); + if (rc) { + close(send_fd); + migration_child_report(recv_fd); + exit(EXIT_FAILURE); + } + + save_domain_core_writeconfig(send_fd, "migration stream", + config_data, config_len); + +} + +static void migrate_domain(uint32_t domid, const char *rune, int debug, + const char *override_config_file) +{ + pid_t child = -1; + int rc; + int send_fd = -1, recv_fd = -1; + char *away_domname; + char rc_buf; + uint8_t *config_data; + int config_len, flags = LIBXL_SUSPEND_LIVE; + + save_domain_core_begin(domid, override_config_file, + &config_data, &config_len); + + if (!config_len) { + fprintf(stderr, "No config file stored for running domain and " + "none supplied - cannot migrate.\n"); + exit(EXIT_FAILURE); + } + + child = create_migration_child(rune, &send_fd, &recv_fd); + + migrate_do_preamble(send_fd, recv_fd, child, config_data, config_len, + rune); + + xtl_stdiostream_adjust_flags(logger, XTL_STDIOSTREAM_HIDE_PROGRESS, 0); + + if (debug) + flags |= LIBXL_SUSPEND_DEBUG; + rc = libxl_domain_suspend(ctx, domid, send_fd, flags, NULL); + if (rc) { + fprintf(stderr, "migration sender: libxl_domain_suspend failed" + " (rc=%d)\n", rc); + if (rc == ERROR_GUEST_TIMEDOUT) + goto failed_suspend; + else + goto failed_resume; + } + + //fprintf(stderr, "migration sender: Transfer complete.\n"); + // Should only be printed when debugging as it's a bit messy with + // progress indication. + + rc = migrate_read_fixedmessage(recv_fd, migrate_receiver_ready, + sizeof(migrate_receiver_ready), + "ready message", rune); + if (rc) goto failed_resume; + + xtl_stdiostream_adjust_flags(logger, 0, XTL_STDIOSTREAM_HIDE_PROGRESS); + + /* right, at this point we are about give the destination + * permission to rename and resume, so we must first rename the + * domain away ourselves */ + + fprintf(stderr, "migration sender: Target has acknowledged transfer.\n"); + + if (common_domname) { + xasprintf(&away_domname, "%s--migratedaway", common_domname); + rc = libxl_domain_rename(ctx, domid, common_domname, away_domname); + if (rc) goto failed_resume; + } + + /* point of no return - as soon as we have tried to say + * "go" to the receiver, it's not safe to carry on. We leave + * the domain renamed to %s--migratedaway in case that's helpful. + */ + + fprintf(stderr, "migration sender: Giving target permission to start.\n"); + + rc = libxl_write_exactly(ctx, send_fd, + migrate_permission_to_go, + sizeof(migrate_permission_to_go), + "migration stream", "GO message"); + if (rc) goto failed_badly; + + rc = migrate_read_fixedmessage(recv_fd, migrate_report, + sizeof(migrate_report), + "success/failure report message", rune); + if (rc) goto failed_badly; + + rc = libxl_read_exactly(ctx, recv_fd, + &rc_buf, 1, + "migration ack stream", "success/failure status"); + if (rc) goto failed_badly; + + if (rc_buf) { + fprintf(stderr, "migration sender: Target reports startup failure" + " (status code %d).\n", rc_buf); + + rc = migrate_read_fixedmessage(recv_fd, migrate_permission_to_go, + sizeof(migrate_permission_to_go), + "permission for sender to resume", + rune); + if (rc) goto failed_badly; + + fprintf(stderr, "migration sender: Trying to resume at our end.\n"); + + if (common_domname) { + libxl_domain_rename(ctx, domid, away_domname, common_domname); + } + rc = libxl_domain_resume(ctx, domid, 1, 0); + if (!rc) fprintf(stderr, "migration sender: Resumed OK.\n"); + + fprintf(stderr, "Migration failed due to problems at target.\n"); + exit(EXIT_FAILURE); + } + + fprintf(stderr, "migration sender: Target reports successful startup.\n"); + libxl_domain_destroy(ctx, domid, 0); /* bang! */ + fprintf(stderr, "Migration successful.\n"); + exit(EXIT_SUCCESS); + + failed_suspend: + close(send_fd); + migration_child_report(recv_fd); + fprintf(stderr, "Migration failed, failed to suspend at sender.\n"); + exit(EXIT_FAILURE); + + failed_resume: + close(send_fd); + migration_child_report(recv_fd); + fprintf(stderr, "Migration failed, resuming at sender.\n"); + libxl_domain_resume(ctx, domid, 1, 0); + exit(EXIT_FAILURE); + + failed_badly: + fprintf(stderr, + "** Migration failed during final handshake **\n" + "Domain state is now undefined !\n" + "Please CHECK AT BOTH ENDS for running instances, before renaming and\n" + " resuming at most one instance. Two simultaneous instances of the domain\n" + " would probably result in SEVERE DATA LOSS and it is now your\n" + " responsibility to avoid that. Sorry.\n"); + + close(send_fd); + migration_child_report(recv_fd); + exit(EXIT_FAILURE); +} + +static void migrate_receive(int debug, int daemonize, int monitor, + int pause_after_migration, + int send_fd, int recv_fd, + libxl_checkpointed_stream checkpointed, + char *colo_proxy_script) +{ + uint32_t domid; + int rc, rc2; + char rc_buf; + char *migration_domname; + struct domain_create dom_info; + + signal(SIGPIPE, SIG_IGN); + /* if we get SIGPIPE we'd rather just have it as an error */ + + fprintf(stderr, "migration target: Ready to receive domain.\n"); + + CHK_ERRNOVAL(libxl_write_exactly( + ctx, send_fd, migrate_receiver_banner, + sizeof(migrate_receiver_banner)-1, + "migration ack stream", "banner") ); + + memset(&dom_info, 0, sizeof(dom_info)); + dom_info.debug = debug; + dom_info.daemonize = daemonize; + dom_info.monitor = monitor; + dom_info.paused = 1; + dom_info.migrate_fd = recv_fd; + dom_info.send_back_fd = send_fd; + dom_info.migration_domname_r = &migration_domname; + dom_info.checkpointed_stream = checkpointed; + dom_info.colo_proxy_script = colo_proxy_script; + + rc = create_domain(&dom_info); + if (rc < 0) { + fprintf(stderr, "migration target: Domain creation failed" + " (code %d).\n", rc); + exit(EXIT_FAILURE); + } + + domid = rc; + + switch (checkpointed) { + case LIBXL_CHECKPOINTED_STREAM_REMUS: + case LIBXL_CHECKPOINTED_STREAM_COLO: + { + const char *ha = checkpointed == LIBXL_CHECKPOINTED_STREAM_COLO ? + "COLO" : "Remus"; + /* If we are here, it means that the sender (primary) has crashed. + * TODO: Split-Brain Check. + */ + fprintf(stderr, "migration target: %s Failover for domain %u\n", + ha, domid); + + /* + * If domain renaming fails, lets just continue (as we need the domain + * to be up & dom names may not matter much, as long as its reachable + * over network). + * + * If domain unpausing fails, destroy domain ? Or is it better to have + * a consistent copy of the domain (memory, cpu state, disk) + * on atleast one physical host ? Right now, lets just leave the domain + * as is and let the Administrator decide (or troubleshoot). + */ + if (migration_domname) { + rc = libxl_domain_rename(ctx, domid, migration_domname, + common_domname); + if (rc) + fprintf(stderr, "migration target (%s): " + "Failed to rename domain from %s to %s:%d\n", + ha, migration_domname, common_domname, rc); + } + + if (checkpointed == LIBXL_CHECKPOINTED_STREAM_COLO) + /* The guest is running after failover in COLO mode */ + exit(rc ? -ERROR_FAIL: 0); + + rc = libxl_domain_unpause(ctx, domid); + if (rc) + fprintf(stderr, "migration target (%s): " + "Failed to unpause domain %s (id: %u):%d\n", + ha, common_domname, domid, rc); + + exit(rc ? EXIT_FAILURE : EXIT_SUCCESS); + } + default: + /* do nothing */ + break; + } + + fprintf(stderr, "migration target: Transfer complete," + " requesting permission to start domain.\n"); + + rc = libxl_write_exactly(ctx, send_fd, + migrate_receiver_ready, + sizeof(migrate_receiver_ready), + "migration ack stream", "ready message"); + if (rc) exit(EXIT_FAILURE); + + rc = migrate_read_fixedmessage(recv_fd, migrate_permission_to_go, + sizeof(migrate_permission_to_go), + "GO message", 0); + if (rc) goto perhaps_destroy_notify_rc; + + fprintf(stderr, "migration target: Got permission, starting domain.\n"); + + if (migration_domname) { + rc = libxl_domain_rename(ctx, domid, migration_domname, common_domname); + if (rc) goto perhaps_destroy_notify_rc; + } + + if (!pause_after_migration) { + rc = libxl_domain_unpause(ctx, domid); + if (rc) goto perhaps_destroy_notify_rc; + } + + fprintf(stderr, "migration target: Domain started successsfully.\n"); + rc = 0; + + perhaps_destroy_notify_rc: + rc2 = libxl_write_exactly(ctx, send_fd, + migrate_report, sizeof(migrate_report), + "migration ack stream", + "success/failure report"); + if (rc2) exit(EXIT_FAILURE); + + rc_buf = -rc; + assert(!!rc_buf == !!rc); + rc2 = libxl_write_exactly(ctx, send_fd, &rc_buf, 1, + "migration ack stream", + "success/failure code"); + if (rc2) exit(EXIT_FAILURE); + + if (rc) { + fprintf(stderr, "migration target: Failure, destroying our copy.\n"); + + rc2 = libxl_domain_destroy(ctx, domid, 0); + if (rc2) { + fprintf(stderr, "migration target: Failed to destroy our copy" + " (code %d).\n", rc2); + exit(EXIT_FAILURE); + } + + fprintf(stderr, "migration target: Cleanup OK, granting sender" + " permission to resume.\n"); + + rc2 = libxl_write_exactly(ctx, send_fd, + migrate_permission_to_go, + sizeof(migrate_permission_to_go), + "migration ack stream", + "permission to sender to have domain back"); + if (rc2) exit(EXIT_FAILURE); + } + + exit(EXIT_SUCCESS); +} + + +int main_migrate_receive(int argc, char **argv) +{ + int debug = 0, daemonize = 1, monitor = 1, pause_after_migration = 0; + libxl_checkpointed_stream checkpointed = LIBXL_CHECKPOINTED_STREAM_NONE; + int opt; + char *script = NULL; + static struct option opts[] = { + {"colo", 0, 0, 0x100}, + /* It is a shame that the management code for disk is not here. */ + {"coloft-script", 1, 0, 0x200}, + COMMON_LONG_OPTS + }; + + SWITCH_FOREACH_OPT(opt, "Fedrp", opts, "migrate-receive", 0) { + case 'F': + daemonize = 0; + break; + case 'e': + daemonize = 0; + monitor = 0; + break; + case 'd': + debug = 1; + break; + case 'r': + checkpointed = LIBXL_CHECKPOINTED_STREAM_REMUS; + break; + case 0x100: + checkpointed = LIBXL_CHECKPOINTED_STREAM_COLO; + break; + case 0x200: + script = optarg; + break; + case 'p': + pause_after_migration = 1; + break; + } + + if (argc-optind != 0) { + help("migrate-receive"); + return EXIT_FAILURE; + } + migrate_receive(debug, daemonize, monitor, pause_after_migration, + STDOUT_FILENO, STDIN_FILENO, + checkpointed, script); + + return EXIT_SUCCESS; +} + +int main_migrate(int argc, char **argv) +{ + uint32_t domid; + const char *config_filename = NULL; + const char *ssh_command = "ssh"; + char *rune = NULL; + char *host; + int opt, daemonize = 1, monitor = 1, debug = 0, pause_after_migration = 0; + static struct option opts[] = { + {"debug", 0, 0, 0x100}, + {"live", 0, 0, 0x200}, + COMMON_LONG_OPTS + }; + + SWITCH_FOREACH_OPT(opt, "FC:s:ep", opts, "migrate", 2) { + case 'C': + config_filename = optarg; + break; + case 's': + ssh_command = optarg; + break; + case 'F': + daemonize = 0; + break; + case 'e': + daemonize = 0; + monitor = 0; + break; + case 'p': + pause_after_migration = 1; + break; + case 0x100: /* --debug */ + debug = 1; + break; + case 0x200: /* --live */ + /* ignored for compatibility with xm */ + break; + } + + domid = xfind_domain(argv[optind]); + host = argv[optind + 1]; + + bool pass_tty_arg = progress_use_cr || (isatty(2) > 0); + + if (!ssh_command[0]) { + rune= host; + } else { + char verbose_buf[minmsglevel_default+3]; + int verbose_len; + verbose_buf[0] = ' '; + verbose_buf[1] = '-'; + memset(verbose_buf+2, 'v', minmsglevel_default); + verbose_buf[sizeof(verbose_buf)-1] = 0; + if (minmsglevel == minmsglevel_default) { + verbose_len = 0; + } else { + verbose_len = (minmsglevel_default - minmsglevel) + 2; + } + xasprintf(&rune, "exec %s %s xl%s%.*s migrate-receive%s%s%s", + ssh_command, host, + pass_tty_arg ? " -t" : "", + verbose_len, verbose_buf, + daemonize ? "" : " -e", + debug ? " -d" : "", + pause_after_migration ? " -p" : ""); + } + + migrate_domain(domid, rune, debug, config_filename); + return EXIT_SUCCESS; +} + +int main_remus(int argc, char **argv) +{ + uint32_t domid; + int opt, rc, daemonize = 1; + const char *ssh_command = "ssh"; + char *host = NULL, *rune = NULL; + libxl_domain_remus_info r_info; + int send_fd = -1, recv_fd = -1; + pid_t child = -1; + uint8_t *config_data; + int config_len; + + memset(&r_info, 0, sizeof(libxl_domain_remus_info)); + + SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:ec", NULL, "remus", 2) { + case 'i': + r_info.interval = atoi(optarg); + break; + case 'F': + libxl_defbool_set(&r_info.allow_unsafe, true); + break; + case 'b': + libxl_defbool_set(&r_info.blackhole, true); + break; + case 'u': + libxl_defbool_set(&r_info.compression, false); + break; + case 'n': + libxl_defbool_set(&r_info.netbuf, false); + break; + case 'N': + r_info.netbufscript = optarg; + break; + case 'd': + libxl_defbool_set(&r_info.diskbuf, false); + break; + case 's': + ssh_command = optarg; + break; + case 'e': + daemonize = 0; + break; + case 'c': + libxl_defbool_set(&r_info.colo, true); + } + + domid = xfind_domain(argv[optind]); + host = argv[optind + 1]; + + /* Defaults */ + libxl_defbool_setdefault(&r_info.blackhole, false); + libxl_defbool_setdefault(&r_info.colo, false); + if (!libxl_defbool_val(r_info.colo) && !r_info.interval) + r_info.interval = 200; + + if (libxl_defbool_val(r_info.colo)) { + if (r_info.interval || libxl_defbool_val(r_info.blackhole) || + !libxl_defbool_is_default(r_info.netbuf) || + !libxl_defbool_is_default(r_info.diskbuf)) { + perror("option -c is conflict with -i, -d, -n or -b"); + exit(-1); + } + + if (libxl_defbool_is_default(r_info.compression)) { + perror("COLO can't be used with memory compression. " + "Disable memory checkpoint compression now..."); + libxl_defbool_set(&r_info.compression, false); + } + } + + if (!r_info.netbufscript) { + if (libxl_defbool_val(r_info.colo)) + r_info.netbufscript = default_colo_proxy_script; + else + r_info.netbufscript = default_remus_netbufscript; + } + + if (libxl_defbool_val(r_info.blackhole)) { + send_fd = open("/dev/null", O_RDWR, 0644); + if (send_fd < 0) { + perror("failed to open /dev/null"); + exit(EXIT_FAILURE); + } + } else { + + if (!ssh_command[0]) { + rune = host; + } else { + if (!libxl_defbool_val(r_info.colo)) { + xasprintf(&rune, "exec %s %s xl migrate-receive %s %s", + ssh_command, host, + "-r", + daemonize ? "" : " -e"); + } else { + xasprintf(&rune, "exec %s %s xl migrate-receive %s %s %s %s", + ssh_command, host, + "--colo", + r_info.netbufscript ? "--coloft-script" : "", + r_info.netbufscript ? r_info.netbufscript : "", + daemonize ? "" : " -e"); + } + } + + save_domain_core_begin(domid, NULL, &config_data, &config_len); + + if (!config_len) { + fprintf(stderr, "No config file stored for running domain and " + "none supplied - cannot start remus.\n"); + exit(EXIT_FAILURE); + } + + child = create_migration_child(rune, &send_fd, &recv_fd); + + migrate_do_preamble(send_fd, recv_fd, child, config_data, config_len, + rune); + + if (ssh_command[0]) + free(rune); + } + + /* Point of no return */ + rc = libxl_domain_remus_start(ctx, &r_info, domid, send_fd, recv_fd, 0); + + /* check if the domain exists. User may have xl destroyed the + * domain to force failover + */ + if (libxl_domain_info(ctx, 0, domid)) { + fprintf(stderr, "%s: Primary domain has been destroyed.\n", + libxl_defbool_val(r_info.colo) ? "COLO" : "Remus"); + close(send_fd); + return EXIT_SUCCESS; + } + + /* If we are here, it means remus setup/domain suspend/backup has + * failed. Try to resume the domain and exit gracefully. + * TODO: Split-Brain check. + */ + if (rc == ERROR_GUEST_TIMEDOUT) + fprintf(stderr, "Failed to suspend domain at primary.\n"); + else { + fprintf(stderr, "%s: Backup failed? resuming domain at primary.\n", + libxl_defbool_val(r_info.colo) ? "COLO" : "Remus"); + libxl_domain_resume(ctx, domid, 1, 0); + } + + close(send_fd); + return EXIT_FAILURE; +} +#endif + + +/* + * Local variables: + * mode: C + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + */ -- 2.11.0 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx https://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |