[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 2 of 2 V6] libxl: Remus - xl remus command



# HG changeset patch
# User Shriram Rajagopalan <rshriram@xxxxxxxxx>
# Date 1337283430 25200
# Node ID 92bf8bd9ae5783a8126ffae75da9425db7c6e3d0
# Parent  496ff6ce5bb63a2f034d2a861f34cfa8cbf06552
libxl: Remus - xl remus command

xl remus acts as a frontend to enable remus for a given domain.
 * At the moment, only memory checkpointing and blackhole replication is
   supported. Support for disk checkpointing and network buffering will
   be added in future.
 * Replication is done over ssh connection currently (like live migration
   with xl). Future versions will have an option to use simple tcp socket
   based replication channel (for both Remus & live migration).

Signed-off-by: Shriram Rajagopalan <rshriram@xxxxxxxxx>
Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>

diff -r 496ff6ce5bb6 -r 92bf8bd9ae57 docs/man/xl.pod.1
--- a/docs/man/xl.pod.1 Thu May 17 12:37:07 2012 -0700
+++ b/docs/man/xl.pod.1 Thu May 17 12:37:10 2012 -0700
@@ -381,6 +381,41 @@
 
 =back
 
+=item B<remus> [I<OPTIONS>] I<domain-id> I<host>
+
+Enable Remus HA for domain. By default B<xl> relies on ssh as a transport
+mechanism between the two hosts.
+
+B<OPTIONS>
+
+=over 4
+
+=item B<-i> I<MS>
+
+Checkpoint domain memory every MS milliseconds (default 200ms).
+
+=item B<-b>
+
+Do not checkpoint the disk. Replicate memory checkpoints to /dev/null
+(blackhole).  Network output buffering remains enabled (unless --no-net is
+supplied).  Generally useful for debugging.
+
+=item B<-u>
+
+Disable memory checkpoint compression.
+
+=item B<-s> I<sshcommand>
+
+Use <sshcommand> instead of ssh.  String will be passed to sh.
+If empty, run <host> instead of ssh <host> xl migrate-receive -r [-e].
+
+=item B<-e>
+
+On the new host, do not wait in the background (on <host>) for the death
+of the domain. See the corresponding option of the I<create> subcommand.
+
+=back
+
 =item B<pause> I<domain-id>
 
 Pause a domain.  When in a paused state the domain will still consume
diff -r 496ff6ce5bb6 -r 92bf8bd9ae57 tools/libxl/xl.h
--- a/tools/libxl/xl.h  Thu May 17 12:37:07 2012 -0700
+++ b/tools/libxl/xl.h  Thu May 17 12:37:10 2012 -0700
@@ -95,6 +95,7 @@
 int main_getenforce(int argc, char **argv);
 int main_setenforce(int argc, char **argv);
 int main_loadpolicy(int argc, char **argv);
+int main_remus(int argc, char **argv);
 
 void help(const char *command);
 
diff -r 496ff6ce5bb6 -r 92bf8bd9ae57 tools/libxl/xl_cmdimpl.c
--- a/tools/libxl/xl_cmdimpl.c  Thu May 17 12:37:07 2012 -0700
+++ b/tools/libxl/xl_cmdimpl.c  Thu May 17 12:37:10 2012 -0700
@@ -2966,7 +2966,7 @@
 }
 
 static void migrate_receive(int debug, int daemonize, int monitor,
-                            int send_fd, int recv_fd)
+                            int send_fd, int recv_fd, int remus)
 {
     int rc, rc2;
     char rc_buf;
@@ -3001,6 +3001,41 @@
         exit(-rc);
     }
 
+    if (remus) {
+        /* If we are here, it means that the sender (primary) has crashed.
+         * TODO: Split-Brain Check.
+         */
+        fprintf(stderr, "migration target: Remus Failover for domain %u\n",
+                domid);
+
+        /*
+         * If domain renaming fails, lets just continue (as we need the domain
+         * to be up & dom names may not matter much, as long as its reachable
+         * over network).
+         *
+         * If domain unpausing fails, destroy domain ? Or is it better to have
+         * a consistent copy of the domain (memory, cpu state, disk)
+         * on atleast one physical host ? Right now, lets just leave the domain
+         * as is and let the Administrator decide (or troubleshoot).
+         */
+        if (migration_domname) {
+            rc = libxl_domain_rename(ctx, domid, migration_domname,
+                                     common_domname);
+            if (rc)
+                fprintf(stderr, "migration target (Remus): "
+                        "Failed to rename domain from %s to %s:%d\n",
+                        migration_domname, common_domname, rc);
+        }
+
+        rc = libxl_domain_unpause(ctx, domid);
+        if (rc)
+            fprintf(stderr, "migration target (Remus): "
+                    "Failed to unpause domain %s (id: %u):%d\n",
+                    common_domname, domid, rc);
+
+        exit(rc ? -ERROR_FAIL: 0);
+    }
+
     fprintf(stderr, "migration target: Transfer complete,"
             " requesting permission to start domain.\n");
 
@@ -3128,10 +3163,10 @@
 
 int main_migrate_receive(int argc, char **argv)
 {
-    int debug = 0, daemonize = 1, monitor = 1;
+    int debug = 0, daemonize = 1, monitor = 1, remus = 0;
     int opt;
 
-    while ((opt = def_getopt(argc, argv, "Fed", "migrate-receive", 0)) != -1) {
+    while ((opt = def_getopt(argc, argv, "Fedr", "migrate-receive", 0)) != -1) 
{
         switch (opt) {
         case 0: case 2:
             return opt;
@@ -3145,6 +3180,9 @@
         case 'd':
             debug = 1;
             break;
+        case 'r':
+            remus = 1;
+            break;
         }
     }
 
@@ -3153,7 +3191,8 @@
         return 2;
     }
     migrate_receive(debug, daemonize, monitor,
-                    STDOUT_FILENO, STDIN_FILENO);
+                    STDOUT_FILENO, STDIN_FILENO,
+                    remus);
 
     return 0;
 }
@@ -6315,6 +6354,102 @@
     return ret;
 }
 
+int main_remus(int argc, char **argv)
+{
+    int opt, rc, daemonize = 1;
+    const char *ssh_command = "ssh";
+    char *host = NULL, *rune = NULL, *domain = NULL;
+    libxl_domain_remus_info r_info;
+    int send_fd = -1, recv_fd = -1;
+    pid_t child = -1;
+    uint8_t *config_data;
+    int config_len;
+
+    memset(&r_info, 0, sizeof(libxl_domain_remus_info));
+    /* Defaults */
+    r_info.interval = 200;
+    r_info.blackhole = 0;
+    r_info.compression = 1;
+
+    while ((opt = def_getopt(argc, argv, "bui:s:e", "remus", 2)) != -1) {
+        switch (opt) {
+        case 0: case 2:
+            return opt;
+
+        case 'i':
+           r_info.interval = atoi(optarg);
+            break;
+        case 'b':
+            r_info.blackhole = 1;
+            break;
+        case 'u':
+           r_info.compression = 0;
+            break;
+        case 's':
+            ssh_command = optarg;
+            break;
+        case 'e':
+            daemonize = 0;
+            break;
+        }
+    }
+
+    domain = argv[optind];
+    host = argv[optind + 1];
+
+    if (r_info.blackhole) {
+        find_domain(domain);
+        send_fd = open("/dev/null", O_RDWR, 0644);
+        if (send_fd < 0) {
+            perror("failed to open /dev/null");
+            exit(-1);
+        }
+    } else {
+
+        if (!ssh_command[0]) {
+            rune = host;
+        } else {
+            if (asprintf(&rune, "exec %s %s xl migrate-receive -r %s",
+                         ssh_command, host,
+                         daemonize ? "" : " -e") < 0)
+                return 1;
+        }
+
+        save_domain_core_begin(domain, NULL, &config_data, &config_len);
+
+        if (!config_len) {
+            fprintf(stderr, "No config file stored for running domain and "
+                    "none supplied - cannot start remus.\n");
+            exit(1);
+        }
+
+        child = create_migration_child(rune, &send_fd, &recv_fd);
+
+        migrate_do_preamble(send_fd, recv_fd, child, config_data, config_len,
+                            rune);
+    }
+
+    /* Point of no return */
+    rc = libxl_domain_remus_start(ctx, &r_info, domid, send_fd, recv_fd);
+
+    /* If we are here, it means backup has failed/domain suspend failed.
+     * Try to resume the domain and exit gracefully.
+     * TODO: Split-Brain check.
+     */
+    fprintf(stderr, "remus sender: libxl_domain_suspend failed"
+            " (rc=%d)\n", rc);
+
+    if (rc == ERROR_GUEST_TIMEDOUT)
+        fprintf(stderr, "Failed to suspend domain at primary.\n");
+    else {
+        fprintf(stderr, "Remus: Backup failed? resuming domain at primary.\n");
+        libxl_domain_resume(ctx, domid, 1);
+    }
+
+    close(send_fd);
+    return -ERROR_FAIL;
+}
+
 /*
  * Local variables:
  * mode: C
diff -r 496ff6ce5bb6 -r 92bf8bd9ae57 tools/libxl/xl_cmdtable.c
--- a/tools/libxl/xl_cmdtable.c Thu May 17 12:37:07 2012 -0700
+++ b/tools/libxl/xl_cmdtable.c Thu May 17 12:37:10 2012 -0700
@@ -427,6 +427,20 @@
       "Loads a new policy int the Flask Xen security module",
       "<policy file>",
     },
+    { "remus",
+      &main_remus, 0, 1,
+      "Enable Remus HA for domain",
+      "[options] <Domain> [<host>]",
+      "-i MS                   Checkpoint domain memory every MS milliseconds 
(def. 200ms).\n"
+      "-b                      Replicate memory checkpoints to /dev/null 
(blackhole)\n"
+      "-u                      Disable memory checkpoint compression.\n"
+      "-s <sshcommand>         Use <sshcommand> instead of ssh.  String will 
be passed\n"
+      "                        to sh. If empty, run <host> instead of \n"
+      "                        ssh <host> xl migrate-receive -r [-e]\n"
+      "-e                      Do not wait in the background (on <host>) for 
the death\n"
+      "                        of the domain."
+
+    },
 };
 
 int cmdtable_len = sizeof(cmd_table)/sizeof(struct cmd_spec);

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.