[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v13 21/26] COLO: use qemu block replication
From: Wen Congyang <wency@xxxxxxxxxxxxxx> Use qemu block replication as our block replication solution. Note that guest must be paused before starting COLO, otherwise, the disk won't be consistent between primary and secondary. Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx> Signed-off-by: Yang Hongyang <hongyang.yang@xxxxxxxxxxxx> Signed-off-by: Changlong Xie <xiecl.fnst@xxxxxxxxxxxxxx> Acked-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx> --- tools/libxl/Makefile | 1 + tools/libxl/libxl_colo.h | 15 +++ tools/libxl/libxl_colo_qdisk.c | 230 +++++++++++++++++++++++++++++++++++++++ tools/libxl/libxl_colo_restore.c | 42 ++++++- tools/libxl/libxl_colo_save.c | 54 ++++++++- tools/libxl/libxl_internal.h | 5 + 6 files changed, 342 insertions(+), 5 deletions(-) create mode 100644 tools/libxl/libxl_colo_qdisk.c diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile index c5ef3f0..701c069 100644 --- a/tools/libxl/Makefile +++ b/tools/libxl/Makefile @@ -66,6 +66,7 @@ endif LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o LIBXL_OBJS-y += libxl_colo_restore.o libxl_colo_save.o +LIBXL_OBJS-y += libxl_colo_qdisk.o LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o libxl_libfdt_compat.o diff --git a/tools/libxl/libxl_colo.h b/tools/libxl/libxl_colo.h index feec7f1..90345f4 100644 --- a/tools/libxl/libxl_colo.h +++ b/tools/libxl/libxl_colo.h @@ -19,6 +19,7 @@ struct libxl__ao; struct libxl__egc; struct libxl__colo_save_state; +struct libxl__checkpoint_devices_state; enum { LIBXL_COLO_SETUPED, @@ -26,6 +27,10 @@ enum { LIBXL_COLO_RESUMED, }; +typedef struct libxl__colo_qdisk { + bool setuped; +} libxl__colo_qdisk; + typedef struct libxl__domain_create_state libxl__domain_create_state; typedef void libxl__domain_create_cb(struct libxl__egc *egc, libxl__domain_create_state *dcs, @@ -47,8 +52,18 @@ struct libxl__colo_restore_state { /* private, colo restore checkpoint state */ libxl__domain_create_cb *saved_cb; void *crcs; + + /* private, used by qdisk block replication */ + bool qdisk_used; + bool qdisk_setuped; + const char *host; + const char *port; }; +int init_subkind_qdisk(struct libxl__checkpoint_devices_state *cds); + +void cleanup_subkind_qdisk(struct libxl__checkpoint_devices_state *cds); + extern void libxl__colo_restore_setup(struct libxl__egc *egc, libxl__colo_restore_state *crs); extern void libxl__colo_restore_teardown(struct libxl__egc *egc, void *dcs_void, diff --git a/tools/libxl/libxl_colo_qdisk.c b/tools/libxl/libxl_colo_qdisk.c new file mode 100644 index 0000000..c23b81b --- /dev/null +++ b/tools/libxl/libxl_colo_qdisk.c @@ -0,0 +1,230 @@ +/* + * Copyright (C) 2016 FUJITSU LIMITED + * Author: Wen Congyang <wency@xxxxxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#include "libxl_osdeps.h" /* must come before any other headers */ + +#include "libxl_internal.h" + +/* ========== init() and cleanup() ========== */ + +int init_subkind_qdisk(libxl__checkpoint_devices_state *cds) +{ + /* + * We don't know if we use qemu block replication, so + * we cannot start block replication here. + */ + return 0; +} + +void cleanup_subkind_qdisk(libxl__checkpoint_devices_state *cds) +{ +} + +/* ========== setup() and teardown() ========== */ + +static void colo_qdisk_setup(libxl__egc *egc, libxl__checkpoint_device *dev, + bool primary) +{ + const libxl_device_disk *disk = dev->backend_dev; + int ret, rc = 0; + libxl__colo_qdisk *colo_qdisk = NULL; + char port[32]; + + /* Convenience aliases */ + libxl__checkpoint_devices_state *const cds = dev->cds; + const char *host = disk->colo_host; + const char *export_name = disk->colo_export; + const int domid = cds->domid; + + STATE_AO_GC(dev->cds->ao); + + if (disk->backend != LIBXL_DISK_BACKEND_QDISK || + !libxl_defbool_val(disk->colo_enable) || + !host || !export_name || (disk->colo_port <= 0) || + !disk->active_disk || !disk->hidden_disk) { + rc = ERROR_CHECKPOINT_DEVOPS_DOES_NOT_MATCH; + goto out; + } + + dev->matched = true; + + GCNEW(colo_qdisk); + dev->concrete_data = colo_qdisk; + + if (primary) { + libxl__colo_save_state *css = cds->concrete_data; + + css->qdisk_used = true; + /* NBD server is not ready, so we cannot start block replication now */ + goto out; + } else { + libxl__colo_restore_state *crs = cds->concrete_data; + sprintf(port, "%d", disk->colo_port); + + if (!crs->qdisk_used) { + /* start nbd server */ + ret = libxl__qmp_nbd_server_start(gc, domid, host, port); + if (ret) { + rc = ERROR_FAIL; + goto out; + } + crs->host = host; + crs->port = port; + } else { + if (strcmp(crs->host, host) || strcmp(crs->port, port)) { + LOG(ERROR, "The host and port of all disks must be the same"); + rc = ERROR_FAIL; + goto out; + } + } + + crs->qdisk_used = true; + + ret = libxl__qmp_nbd_server_add(gc, domid, export_name); + if (ret) + rc = ERROR_FAIL; + + colo_qdisk->setuped = true; + } + +out: + dev->aodev.rc = rc; + dev->aodev.callback(egc, &dev->aodev); +} + +static void colo_qdisk_teardown(libxl__egc *egc, libxl__checkpoint_device *dev, + bool primary) +{ + int ret, rc = 0; + const libxl__colo_qdisk *colo_qdisk = dev->concrete_data; + const libxl_device_disk *disk = dev->backend_dev; + + /* Convenience aliases */ + libxl__checkpoint_devices_state *const cds = dev->cds; + const int domid = cds->domid; + const char *export_name = disk->colo_export; + + EGC_GC; + + if (primary) { + if (!colo_qdisk->setuped) + goto out; + + /* + * There is no way to get the child name, but we know it is children.1 + */ + ret = libxl__qmp_x_blockdev_change(gc, domid, export_name, + "children.1", NULL); + if (ret) + rc = ERROR_FAIL; + } else { + libxl__colo_restore_state *crs = cds->concrete_data; + + if (crs->qdisk_used) { + ret = libxl__qmp_nbd_server_stop(gc, domid); + if (ret) + rc = ERROR_FAIL; + } + } + +out: + dev->aodev.rc = rc; + dev->aodev.callback(egc, &dev->aodev); +} + +/* ========== checkpointing APIs ========== */ + +static void colo_qdisk_save_preresume(libxl__egc *egc, + libxl__checkpoint_device *dev) +{ + libxl__colo_qdisk *colo_qdisk = dev->concrete_data; + const libxl_device_disk *disk = dev->backend_dev; + int ret, rc = 0; + char *node = NULL; + char *cmd = NULL; + + /* Convenience aliases */ + const int domid = dev->cds->domid; + const char *host = disk->colo_host; + int port = disk->colo_port; + const char *export_name = disk->colo_export; + + EGC_GC; + + if (colo_qdisk->setuped) + goto out; + + /* qmp command doesn't support the driver "nbd" */ + node = GCSPRINTF("colo_node%d", + libxl__device_disk_dev_number(disk->vdev, NULL, NULL)); + cmd = GCSPRINTF("drive_add buddy driver=replication,mode=primary," + "file.driver=nbd,file.host=%s,file.port=%d," + "file.export=%s,node-name=%s,if=none", + host, port, export_name, node); + ret = libxl__qmp_hmp(gc, domid, cmd); + if (ret) + rc = ERROR_FAIL; + + ret = libxl__qmp_x_blockdev_change(gc, domid, export_name, NULL, node); + if (ret) + rc = ERROR_FAIL; + + colo_qdisk->setuped = true; + +out: + dev->aodev.rc = rc; + dev->aodev.callback(egc, &dev->aodev); +} + +/* ======== primary ======== */ + +static void colo_qdisk_save_setup(libxl__egc *egc, + libxl__checkpoint_device *dev) +{ + colo_qdisk_setup(egc, dev, true); +} + +static void colo_qdisk_save_teardown(libxl__egc *egc, + libxl__checkpoint_device *dev) +{ + colo_qdisk_teardown(egc, dev, true); +} + +const libxl__checkpoint_device_instance_ops colo_save_device_qdisk = { + .kind = LIBXL__DEVICE_KIND_VBD, + .setup = colo_qdisk_save_setup, + .teardown = colo_qdisk_save_teardown, + .preresume = colo_qdisk_save_preresume, +}; + +/* ======== secondary ======== */ + +static void colo_qdisk_restore_setup(libxl__egc *egc, + libxl__checkpoint_device *dev) +{ + colo_qdisk_setup(egc, dev, false); +} + +static void colo_qdisk_restore_teardown(libxl__egc *egc, + libxl__checkpoint_device *dev) +{ + colo_qdisk_teardown(egc, dev, false); +} + +const libxl__checkpoint_device_instance_ops colo_restore_device_qdisk = { + .kind = LIBXL__DEVICE_KIND_VBD, + .setup = colo_qdisk_restore_setup, + .teardown = colo_qdisk_restore_teardown, +}; diff --git a/tools/libxl/libxl_colo_restore.c b/tools/libxl/libxl_colo_restore.c index 04b02d8..2ab69ed 100644 --- a/tools/libxl/libxl_colo_restore.c +++ b/tools/libxl/libxl_colo_restore.c @@ -37,7 +37,10 @@ struct libxl__colo_restore_checkpoint_state { int); }; +extern const libxl__checkpoint_device_instance_ops colo_restore_device_qdisk; + static const libxl__checkpoint_device_instance_ops *colo_restore_ops[] = { + &colo_restore_device_qdisk, NULL, }; @@ -137,7 +140,11 @@ static int init_device_subkind(libxl__checkpoint_devices_state *cds) int rc; STATE_AO_GC(cds->ao); + rc = init_subkind_qdisk(cds); + if (rc) goto out; + rc = 0; +out: return rc; } @@ -145,6 +152,8 @@ static void cleanup_device_subkind(libxl__checkpoint_devices_state *cds) { /* cleanup device subkind-specific state in the libxl ctx */ STATE_AO_GC(cds->ao); + + cleanup_subkind_qdisk(cds); } /* ================ colo: setup restore environment ================ */ @@ -213,6 +222,8 @@ void libxl__colo_restore_setup(libxl__egc *egc, GCNEW(crcs); crs->crcs = crcs; crcs->crs = crs; + crs->qdisk_setuped = false; + crs->qdisk_used = false; /* setup dsps */ crcs->dsps.ao = ao; @@ -301,6 +312,11 @@ void libxl__colo_restore_teardown(libxl__egc *egc, void *dcs_void, } libxl__xc_domain_restore_done(egc, dcs, ret, retval, errnoval); + if (crs->qdisk_setuped) { + libxl__qmp_stop_replication(gc, crs->domid, false); + crs->qdisk_setuped = false; + } + crcs->saved_rc = rc; if (!crcs->teardown_devices) { colo_restore_teardown_devices_done(egc, &dcs->cds, 0); @@ -573,6 +589,13 @@ static void colo_restore_preresume_cb(libxl__egc *egc, goto out; } + if (crs->qdisk_setuped) { + if (libxl__qmp_do_checkpoint(gc, crs->domid)) { + LOG(ERROR, "doing checkpoint fails"); + goto out; + } + } + colo_restore_resume_vm(egc, crcs); return; @@ -730,8 +753,8 @@ static void colo_setup_checkpoint_devices(libxl__egc *egc, STATE_AO_GC(crs->ao); - /* TODO: disk/nic support */ - cds->device_kind_flags = 0; + /* TODO: nic support */ + cds->device_kind_flags = (1 << LIBXL__DEVICE_KIND_VBD); cds->callback = colo_restore_setup_cds_done; cds->ao = ao; cds->domid = crs->domid; @@ -768,6 +791,14 @@ static void colo_restore_setup_cds_done(libxl__egc *egc, goto out; } + if (crs->qdisk_used && !crs->qdisk_setuped) { + if (libxl__qmp_start_replication(gc, crs->domid, false)) { + LOG(ERROR, "starting replication fails"); + goto out; + } + crs->qdisk_setuped = true; + } + colo_send_svm_ready(egc, crcs); return; @@ -922,13 +953,18 @@ static void colo_suspend_vm_done(libxl__egc *egc, crcs->status = LIBXL_COLO_SUSPENDED; + if (libxl__qmp_get_replication_error(gc, crs->domid)) { + LOG(ERROR, "replication error occurs when secondary vm is running"); + goto out; + } + cds->callback = colo_restore_postsuspend_cb; libxl__checkpoint_devices_postsuspend(egc, cds); return; out: - libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, !rc); + libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, 0); } static void colo_restore_postsuspend_cb(libxl__egc *egc, diff --git a/tools/libxl/libxl_colo_save.c b/tools/libxl/libxl_colo_save.c index cca6bde..d73e632 100644 --- a/tools/libxl/libxl_colo_save.c +++ b/tools/libxl/libxl_colo_save.c @@ -18,7 +18,10 @@ #include "libxl_internal.h" +extern const libxl__checkpoint_device_instance_ops colo_save_device_qdisk; + static const libxl__checkpoint_device_instance_ops *colo_ops[] = { + &colo_save_device_qdisk, NULL, }; @@ -30,7 +33,11 @@ static int init_device_subkind(libxl__checkpoint_devices_state *cds) int rc; STATE_AO_GC(cds->ao); + rc = init_subkind_qdisk(cds); + if (rc) goto out; + rc = 0; +out: return rc; } @@ -38,6 +45,8 @@ static void cleanup_device_subkind(libxl__checkpoint_devices_state *cds) { /* cleanup device subkind-specific state in the libxl ctx */ STATE_AO_GC(cds->ao); + + cleanup_subkind_qdisk(cds); } /* ================= colo: setup save environment ================= */ @@ -79,9 +88,12 @@ void libxl__colo_save_setup(libxl__egc *egc, libxl__colo_save_state *css) css->send_fd = dss->fd; css->recv_fd = dss->recv_fd; css->svm_running = false; + css->paused = true; + css->qdisk_setuped = false; + css->qdisk_used = false; - /* TODO: disk/nic support */ - cds->device_kind_flags = 0; + /* TODO: nic support */ + cds->device_kind_flags = (1 << LIBXL__DEVICE_KIND_VBD); cds->ops = colo_ops; cds->callback = colo_save_setup_done; cds->ao = ao; @@ -163,6 +175,11 @@ void libxl__colo_save_teardown(libxl__egc *egc, libxl__stream_read_abort(egc, &css->srs, 1); + if (css->qdisk_setuped) { + libxl__qmp_stop_replication(gc, dss->domid, true); + css->qdisk_setuped = false; + } + dss->cds.callback = colo_teardown_done; libxl__checkpoint_devices_teardown(egc, &dss->cds); return; @@ -291,6 +308,11 @@ static void colo_read_svm_suspended_done(libxl__egc *egc, goto out; } + if (!css->paused && libxl__qmp_get_replication_error(gc, dss->domid)) { + LOG(ERROR, "replication error occurs when primary vm is running"); + goto out; + } + ok = 1; out: @@ -389,12 +411,40 @@ static void colo_preresume_cb(libxl__egc *egc, goto out; } + if (css->qdisk_used && !css->qdisk_setuped) { + if (libxl__qmp_start_replication(gc, dss->domid, true)) { + LOG(ERROR, "starting replication fails"); + goto out; + } + css->qdisk_setuped = true; + } + + if (!css->paused) { + if (libxl__qmp_do_checkpoint(gc, dss->domid)) { + LOG(ERROR, "doing checkpoint fails"); + goto out; + } + } + /* Resumes the domain and the device model */ if (libxl__domain_resume(gc, dss->domid, /* Fast Suspend */1)) { LOG(ERROR, "cannot resume primary vm"); goto out; } + /* + * The guest should be paused before doing colo because there is + * no disk migration. + */ + if (css->paused) { + rc = libxl_domain_unpause(CTX, dss->domid); + if (rc) { + LOG(ERROR, "cannot unpause primary vm"); + goto out; + } + css->paused = false; + } + /* read CHECKPOINT_SVM_RESUMED */ css->callback = colo_read_svm_resumed_done; css->srs.checkpoint_callback = colo_common_read_stream_done; diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index 148df05..c3366d7 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -3211,6 +3211,11 @@ struct libxl__colo_save_state { libxl__stream_read_state srs; void (*callback)(libxl__egc *, libxl__colo_save_state *, int); bool svm_running; + bool paused; + + /* private, used by qdisk block replication */ + bool qdisk_used; + bool qdisk_setuped; }; typedef struct libxl__logdirty_switch { -- 1.9.3 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |