[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v10 25/31] COLO: use qemu block replication



Use qemu block replication as our block replication solution.
Note that guest must be paused before starting COLO, otherwise,
the disk won't be consistent between primary and secondary.

Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx>
Signed-off-by: Yang Hongyang <hongyang.yang@xxxxxxxxxxxx>
---
 tools/libxl/Makefile             |   1 +
 tools/libxl/libxl_colo_qdisk.c   | 226 +++++++++++++++++++++++++++++++++++++++
 tools/libxl/libxl_colo_restore.c |  42 +++++++-
 tools/libxl/libxl_colo_save.c    |  54 +++++++++-
 tools/libxl/libxl_internal.h     |  13 +++
 5 files changed, 331 insertions(+), 5 deletions(-)
 create mode 100644 tools/libxl/libxl_colo_qdisk.c

diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 2016393..28d54d0 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -66,6 +66,7 @@ endif
 
 LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o
 LIBXL_OBJS-y += libxl_colo_restore.o libxl_colo_save.o
+LIBXL_OBJS-y += libxl_colo_qdisk.o
 
 LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o
 LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o libxl_libfdt_compat.o
diff --git a/tools/libxl/libxl_colo_qdisk.c b/tools/libxl/libxl_colo_qdisk.c
new file mode 100644
index 0000000..6179947
--- /dev/null
+++ b/tools/libxl/libxl_colo_qdisk.c
@@ -0,0 +1,226 @@
+/*
+ * Copyright (C) 2015 FUJITSU LIMITED
+ * Author: Wen Congyang <wency@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+
+typedef struct libxl__colo_qdisk {
+    bool setuped;
+} libxl__colo_qdisk;
+
+/* ========== init() and cleanup() ========== */
+int init_subkind_qdisk(libxl__checkpoint_devices_state *cds)
+{
+    /*
+     * We don't know if we use qemu block replication, so
+     * we cannot start block replication here.
+     */
+    return 0;
+}
+
+void cleanup_subkind_qdisk(libxl__checkpoint_devices_state *cds)
+{
+}
+
+/* ========== setup() and teardown() ========== */
+static void colo_qdisk_setup(libxl__egc *egc, libxl__checkpoint_device *dev,
+                             bool primary)
+{
+    const libxl_device_disk *disk = dev->backend_dev;
+    int ret, rc = 0;
+    libxl__colo_qdisk *colo_qdisk = NULL;
+
+    /* Convenience aliases */
+    libxl__checkpoint_devices_state *const cds = dev->cds;
+    const char *host = disk->colo_host;
+    const char *port = disk->colo_port;
+    const char *export_name = disk->colo_export;
+    const int domid = cds->domid;
+
+    STATE_AO_GC(dev->cds->ao);
+
+    if (disk->backend != LIBXL_DISK_BACKEND_QDISK ||
+        !libxl_defbool_val(disk->colo_enable)) {
+        rc = ERROR_CHECKPOINT_DEVOPS_DOES_NOT_MATCH;
+        goto out;
+    }
+
+    dev->matched = true;
+
+    GCNEW(colo_qdisk);
+    dev->concrete_data = colo_qdisk;
+
+    if (primary) {
+        libxl__colo_save_state *css = cds->concrete_data;
+
+        css->qdisk_used = true;
+        /* NBD server is not ready, so we cannot start block replication now */
+        goto out;
+    } else {
+        libxl__colo_restore_state *crs = cds->concrete_data;
+
+        if (!crs->qdisk_used) {
+            /* start nbd server */
+            ret = libxl__qmp_nbd_server_start(gc, domid, host, port);
+            if (ret) {
+                rc = ERROR_FAIL;
+                goto out;
+            }
+            crs->host = host;
+            crs->port = port;
+        } else {
+            if (strcmp(crs->host, host) || strcmp(crs->port, port)) {
+                LOG(ERROR, "The host and port of all disks must be the same");
+                rc = ERROR_FAIL;
+                goto out;
+            }
+        }
+
+        crs->qdisk_used = true;
+
+        ret = libxl__qmp_nbd_server_add(gc, domid, export_name);
+        if (ret)
+            rc = ERROR_FAIL;
+
+        colo_qdisk->setuped = true;
+    }
+
+out:
+    dev->aodev.rc = rc;
+    dev->aodev.callback(egc, &dev->aodev);
+}
+
+static void colo_qdisk_teardown(libxl__egc *egc, libxl__checkpoint_device *dev,
+                                bool primary)
+{
+    int ret, rc = 0;
+    const libxl__colo_qdisk *colo_qdisk = dev->concrete_data;
+    const libxl_device_disk *disk = dev->backend_dev;
+
+    /* Convenience aliases */
+    libxl__checkpoint_devices_state *const cds = dev->cds;
+    const int domid = cds->domid;
+    const char *export_name = disk->colo_export;
+
+    EGC_GC;
+
+    if (primary) {
+        if (!colo_qdisk->setuped)
+            goto out;
+
+        /*
+         * There is no way to get the child name, but we know it is children.1
+         */
+        ret = libxl__qmp_x_blockdev_change(gc, domid, export_name,
+                                           "children.1", NULL);
+        if (ret)
+            rc = ERROR_FAIL;
+    } else {
+        libxl__colo_restore_state *crs = cds->concrete_data;
+
+        if (crs->qdisk_used) {
+            ret = libxl__qmp_nbd_server_stop(gc, domid);
+            if (ret)
+                rc = ERROR_FAIL;
+        }
+    }
+
+out:
+    dev->aodev.rc = rc;
+    dev->aodev.callback(egc, &dev->aodev);
+}
+
+/* ========== checkpointing APIs ========== */
+static void colo_qdisk_save_preresume(libxl__egc *egc,
+                                      libxl__checkpoint_device *dev)
+{
+    libxl__colo_qdisk *colo_qdisk = dev->concrete_data;
+    const libxl_device_disk *disk = dev->backend_dev;
+    int ret, rc = 0;
+    char *node = NULL;
+    char *cmd = NULL;
+
+    /* Convenience aliases */
+    const int domid = dev->cds->domid;
+    const char *host = disk->colo_host;
+    const char *port = disk->colo_port;
+    const char *export_name = disk->colo_export;
+
+    EGC_GC;
+
+    if (colo_qdisk->setuped)
+        goto out;
+
+    /* qmp command doesn't support the driver "nbd" */
+    node = GCSPRINTF("colo_node%d",
+                     libxl__device_disk_dev_number(disk->vdev, NULL, NULL));
+    cmd = GCSPRINTF("drive_add buddy driver=replication,mode=primary,"
+                    "file.driver=nbd,file.host=%s,file.port=%s,"
+                    "file.export=%s,node-name=%s,if=none",
+                    host, port, export_name, node);
+    ret = libxl__qmp_hmp(gc, domid, cmd);
+    if (ret)
+        rc = ERROR_FAIL;
+
+    ret = libxl__qmp_x_blockdev_change(gc, domid, export_name, NULL, node);
+    if (ret)
+        rc = ERROR_FAIL;
+
+    colo_qdisk->setuped = true;
+
+out:
+    dev->aodev.rc = rc;
+    dev->aodev.callback(egc, &dev->aodev);
+}
+
+/* ======== primary ======== */
+static void colo_qdisk_save_setup(libxl__egc *egc,
+                                  libxl__checkpoint_device *dev)
+{
+    colo_qdisk_setup(egc, dev, true);
+}
+
+static void colo_qdisk_save_teardown(libxl__egc *egc,
+                                   libxl__checkpoint_device *dev)
+{
+    colo_qdisk_teardown(egc, dev, true);
+}
+
+const libxl__checkpoint_device_instance_ops colo_save_device_qdisk = {
+    .kind = LIBXL__DEVICE_KIND_VBD,
+    .setup = colo_qdisk_save_setup,
+    .teardown = colo_qdisk_save_teardown,
+    .preresume = colo_qdisk_save_preresume,
+};
+
+/* ======== secondary ======== */
+static void colo_qdisk_restore_setup(libxl__egc *egc,
+                                     libxl__checkpoint_device *dev)
+{
+    colo_qdisk_setup(egc, dev, false);
+}
+
+static void colo_qdisk_restore_teardown(libxl__egc *egc,
+                                      libxl__checkpoint_device *dev)
+{
+    colo_qdisk_teardown(egc, dev, false);
+}
+
+const libxl__checkpoint_device_instance_ops colo_restore_device_qdisk = {
+    .kind = LIBXL__DEVICE_KIND_VBD,
+    .setup = colo_qdisk_restore_setup,
+    .teardown = colo_qdisk_restore_teardown,
+};
diff --git a/tools/libxl/libxl_colo_restore.c b/tools/libxl/libxl_colo_restore.c
index 0ef5b5f..eff80ca 100644
--- a/tools/libxl/libxl_colo_restore.c
+++ b/tools/libxl/libxl_colo_restore.c
@@ -50,7 +50,10 @@ static void 
libxl__colo_restore_domain_checkpoint_callback(void *data);
 static void libxl__colo_restore_domain_wait_checkpoint_callback(void *data);
 static void libxl__colo_restore_domain_suspend_callback(void *data);
 
+extern const libxl__checkpoint_device_instance_ops colo_restore_device_qdisk;
+
 static const libxl__checkpoint_device_instance_ops *colo_restore_ops[] = {
+    &colo_restore_device_qdisk,
     NULL,
 };
 
@@ -150,7 +153,11 @@ static int 
init_device_subkind(libxl__checkpoint_devices_state *cds)
     int rc;
     STATE_AO_GC(cds->ao);
 
+    rc = init_subkind_qdisk(cds);
+    if (rc)  goto out;
+
     rc = 0;
+out:
     return rc;
 }
 
@@ -158,6 +165,8 @@ static void 
cleanup_device_subkind(libxl__checkpoint_devices_state *cds)
 {
     /* cleanup device subkind-specific state in the libxl ctx */
     STATE_AO_GC(cds->ao);
+
+    cleanup_subkind_qdisk(cds);
 }
 
 
@@ -214,6 +223,8 @@ void libxl__colo_restore_setup(libxl__egc *egc,
     GCNEW(crcs);
     crs->crcs = crcs;
     crcs->crs = crs;
+    crs->qdisk_setuped = false;
+    crs->qdisk_used = false;
 
     /* setup dsps */
     crcs->dsps.ao = ao;
@@ -305,6 +316,11 @@ void libxl__colo_restore_teardown(libxl__egc *egc, void 
*dcs_void,
     }
     libxl__xc_domain_restore_done(egc, dcs, ret, retval, errnoval);
 
+    if (crs->qdisk_setuped) {
+        libxl__qmp_stop_replication(gc, crs->domid, false);
+        crs->qdisk_setuped = false;
+    }
+
     crcs->saved_rc = rc;
     if (!crcs->teardown_devices) {
         colo_restore_teardown_devices_done(egc, &dcs->cds, 0);
@@ -582,6 +598,13 @@ static void colo_restore_preresume_cb(libxl__egc *egc,
         goto out;
     }
 
+    if (crs->qdisk_setuped) {
+        if (libxl__qmp_do_checkpoint(gc, crs->domid)) {
+            LOG(ERROR, "doing checkpoint fails");
+            goto out;
+        }
+    }
+
     colo_restore_resume_vm(egc, crcs);
 
     return;
@@ -739,8 +762,8 @@ static void colo_setup_checkpoint_devices(libxl__egc *egc,
 
     STATE_AO_GC(crs->ao);
 
-    /* TODO: disk/nic support */
-    cds->device_kind_flags = 0;
+    /* TODO: nic support */
+    cds->device_kind_flags = (1 << LIBXL__DEVICE_KIND_VBD);
     cds->callback = colo_restore_setup_cds_done;
     cds->ao = ao;
     cds->domid = crs->domid;
@@ -777,6 +800,14 @@ static void colo_restore_setup_cds_done(libxl__egc *egc,
         goto out;
     }
 
+    if (crs->qdisk_used && !crs->qdisk_setuped) {
+        if (libxl__qmp_start_replication(gc, crs->domid, false)) {
+            LOG(ERROR, "starting replication fails");
+            goto out;
+        }
+        crs->qdisk_setuped = true;
+    }
+
     colo_send_svm_ready(egc, crcs);
 
     return;
@@ -931,13 +962,18 @@ static void colo_suspend_vm_done(libxl__egc *egc,
 
     crcs->status = LIBXL_COLO_SUSPENDED;
 
+    if (libxl__qmp_get_replication_error(gc, crs->domid)) {
+        LOG(ERROR, "replication error occurs when secondary vm is running");
+        goto out;
+    }
+
     cds->callback = colo_restore_postsuspend_cb;
     libxl__checkpoint_devices_postsuspend(egc, cds);
 
     return;
 
 out:
-    libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, !rc);
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, 0);
 }
 
 static void colo_restore_postsuspend_cb(libxl__egc *egc,
diff --git a/tools/libxl/libxl_colo_save.c b/tools/libxl/libxl_colo_save.c
index 8b18a91..5f8456c 100644
--- a/tools/libxl/libxl_colo_save.c
+++ b/tools/libxl/libxl_colo_save.c
@@ -19,7 +19,10 @@
 #include "libxl_internal.h"
 #include "libxl_colo.h"
 
+extern const libxl__checkpoint_device_instance_ops colo_save_device_qdisk;
+
 static const libxl__checkpoint_device_instance_ops *colo_ops[] = {
+    &colo_save_device_qdisk,
     NULL,
 };
 
@@ -30,7 +33,11 @@ static int 
init_device_subkind(libxl__checkpoint_devices_state *cds)
     int rc;
     STATE_AO_GC(cds->ao);
 
+    rc = init_subkind_qdisk(cds);
+    if (rc) goto out;
+
     rc = 0;
+out:
     return rc;
 }
 
@@ -38,6 +45,8 @@ static void 
cleanup_device_subkind(libxl__checkpoint_devices_state *cds)
 {
     /* cleanup device subkind-specific state in the libxl ctx */
     STATE_AO_GC(cds->ao);
+
+    cleanup_subkind_qdisk(cds);
 }
 
 /* ================= colo: setup save environment ================= */
@@ -71,9 +80,12 @@ void libxl__colo_save_setup(libxl__egc *egc, 
libxl__colo_save_state *css)
     css->send_fd = dss->fd;
     css->recv_fd = dss->recv_fd;
     css->svm_running = false;
+    css->paused = true;
+    css->qdisk_setuped = false;
+    css->qdisk_used = false;
 
-    /* TODO: disk/nic support */
-    cds->device_kind_flags = 0;
+    /* TODO: nic support */
+    cds->device_kind_flags = (1 << LIBXL__DEVICE_KIND_VBD);
     cds->ops = colo_ops;
     cds->callback = colo_save_setup_done;
     cds->ao = ao;
@@ -153,6 +165,11 @@ void libxl__colo_save_teardown(libxl__egc *egc,
 
     libxl__stream_read_abort(egc, &css->srs, 1);
 
+    if (css->qdisk_setuped) {
+        libxl__qmp_stop_replication(gc, dss->domid, true);
+        css->qdisk_setuped = false;
+    }
+
     dss->cds.callback = colo_teardown_done;
     libxl__checkpoint_devices_teardown(egc, &dss->cds);
     return;
@@ -287,6 +304,11 @@ static void colo_read_svm_suspended_done(libxl__egc *egc,
         goto out;
     }
 
+    if (!css->paused && libxl__qmp_get_replication_error(gc, dss->domid)) {
+        LOG(ERROR, "replication error occurs when primary vm is running");
+        goto out;
+    }
+
     ok = 1;
 
 out:
@@ -384,12 +406,40 @@ static void colo_preresume_cb(libxl__egc *egc,
         goto out;
     }
 
+    if (css->qdisk_used && !css->qdisk_setuped) {
+        if (libxl__qmp_start_replication(gc, dss->domid, true)) {
+            LOG(ERROR, "starting replication fails");
+            goto out;
+        }
+        css->qdisk_setuped = true;
+    }
+
+    if (!css->paused) {
+        if (libxl__qmp_do_checkpoint(gc, dss->domid)) {
+            LOG(ERROR, "doing checkpoint fails");
+            goto out;
+        }
+    }
+
     /* Resumes the domain and the device model */
     if (libxl__domain_resume(gc, dss->domid, /* Fast Suspend */1)) {
         LOG(ERROR, "cannot resume primary vm");
         goto out;
     }
 
+    /*
+     * The guest should be paused before doing colo because there is
+     * no disk migration.
+     */
+    if (css->paused) {
+        rc = libxl_domain_unpause(CTX, dss->domid);
+        if (rc) {
+            LOG(ERROR, "cannot unpause primary vm");
+            goto out;
+        }
+        css->paused = false;
+    }
+
     /* read CHECKPOINT_SVM_RESUMED */
     css->callback = colo_read_svm_resumed_done;
     css->srs.checkpoint_callback = colo_common_read_stream_done;
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 227b1d3..3af5fdd 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2902,6 +2902,8 @@ int init_subkind_nic(libxl__checkpoint_devices_state 
*cds);
 void cleanup_subkind_nic(libxl__checkpoint_devices_state *cds);
 int init_subkind_drbd_disk(libxl__checkpoint_devices_state *cds);
 void cleanup_subkind_drbd_disk(libxl__checkpoint_devices_state *cds);
+int init_subkind_qdisk(libxl__checkpoint_devices_state *cds);
+void cleanup_subkind_qdisk(libxl__checkpoint_devices_state *cds);
 
 typedef void libxl__checkpoint_callback(libxl__egc *,
                                         libxl__checkpoint_devices_state *,
@@ -3119,6 +3121,11 @@ struct libxl__colo_save_state {
     libxl__stream_read_state srs;
     void (*callback)(libxl__egc *, libxl__colo_save_state *, int);
     bool svm_running;
+    bool paused;
+
+    /* private, used by qdisk block replication */
+    bool qdisk_used;
+    bool qdisk_setuped;
 };
 
 /*----- Domain suspend (save) state structure -----*/
@@ -3522,6 +3529,12 @@ struct libxl__colo_restore_state {
     /* private, colo restore checkpoint state */
     libxl__domain_create_cb *saved_cb;
     void *crcs;
+
+    /* private, used by qdisk block replication */
+    bool qdisk_used;
+    bool qdisk_setuped;
+    const char *host;
+    const char *port;
 };
 
 struct libxl__domain_create_state {
-- 
2.5.0




_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.