|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC Patch v2 18/45] secondary vm suspend/resume/checkpoint code
Secondary vm is running in colo mode. So we will do
the following things again and again:
1. Resume secondary vm
a. Send LIBXL_COLO_SVM_READY to master
b. If it is resumed the first time, call libxl__xc_domain_restore_done()
to build the secondary vm. We should also enable secondary vm's logdirty.
Otherwise, call libxl__domain_resume() to resume secondary vm.
c. Send LIBXL_COLO_SVM_RESUMED to master
2. Wait a new checkpoint
a. Read LIBXL_COLO_NEW_CHECKPOINT from master
3. Suspend secondary vm
a. Suspend secondary vm
b. Get secondary vm's dirty page information
c. Send LIBXL_COLO_SVM_SUSPENDED to master
d. Send secondary vm's dirty page information to master(count + pfn list)
Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx>
---
tools/libxc/xenguest.h | 20 +
tools/libxl/Makefile | 1 +
tools/libxl/libxl_colo.h | 38 ++
tools/libxl/libxl_colo_restore.c | 883 +++++++++++++++++++++++++++++++++++++
tools/libxl/libxl_create.c | 116 ++++-
tools/libxl/libxl_dom.c | 2 +-
tools/libxl/libxl_internal.h | 22 +
tools/libxl/libxl_save_callout.c | 6 +-
tools/libxl/libxl_save_msgs_gen.pl | 6 +-
9 files changed, 1087 insertions(+), 7 deletions(-)
create mode 100644 tools/libxl/libxl_colo.h
create mode 100644 tools/libxl/libxl_colo_restore.c
diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h
index 40bbac8..d3061c7 100644
--- a/tools/libxc/xenguest.h
+++ b/tools/libxc/xenguest.h
@@ -91,6 +91,26 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t
dom, uint32_t max_iter
/* callbacks provided by xc_domain_restore */
struct restore_callbacks {
+ /* Called after a new checkpoint to suspend the guest.
+ */
+ int (*suspend)(void* data);
+
+ /* Called after the secondary vm is ready to resume.
+ * Callback function resumes the guest & the device model,
+ * returns to xc_domain_restore.
+ */
+ int (*postcopy)(void* data);
+
+ /* callback to wait a new checkpoint
+ *
+ * returns:
+ * 0: terminate checkpointing gracefully
+ * 1: take another checkpoint */
+ int (*checkpoint)(void* data);
+
+ /* Enable qemu-dm logging dirty pages to xen */
+ int (*switch_qemu_logdirty)(int domid, unsigned enable, void *data); /*
HVM only */
+
/* callback to restore toolstack specific data */
int (*toolstack_restore)(uint32_t domid, const uint8_t *buf,
uint32_t size, void* data);
diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 5427461..c026bdd 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -57,6 +57,7 @@ LIBXL_OBJS-y += libxl_nonetbuffer.o
endif
LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o
+LIBXL_OBJS-y += libxl_colo_restore.o
LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o
LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o
diff --git a/tools/libxl/libxl_colo.h b/tools/libxl/libxl_colo.h
new file mode 100644
index 0000000..91df275
--- /dev/null
+++ b/tools/libxl/libxl_colo.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2014 FUJITSU LIMITED
+ * Author: Wen Congyang <wency@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#ifndef LIBXL_COLO_H
+#define LIBXL_COLO_H
+
+/*
+ * values to control suspend/resume primary vm and secondary vm
+ * at the same time
+ */
+enum {
+ LIBXL_COLO_NEW_CHECKPOINT = 1,
+ LIBXL_COLO_SVM_SUSPENDED,
+ LIBXL_COLO_SVM_READY,
+ LIBXL_COLO_SVM_RESUMED,
+};
+
+extern void libxl__colo_restore_done(libxl__egc *egc, void *dcs_void,
+ int ret, int retval, int errnoval);
+extern void libxl__colo_restore_setup(libxl__egc *egc,
+ libxl__colo_restore_state *crs);
+extern void libxl__colo_restore_teardown(libxl__egc *egc,
+ libxl__colo_restore_state *crs,
+ int rc);
+
+#endif
diff --git a/tools/libxl/libxl_colo_restore.c b/tools/libxl/libxl_colo_restore.c
new file mode 100644
index 0000000..ebbd6b9
--- /dev/null
+++ b/tools/libxl/libxl_colo_restore.c
@@ -0,0 +1,883 @@
+/*
+ * Copyright (C) 2014 FUJITSU LIMITED
+ * Author: Wen Congyang <wency@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+#include "libxl_colo.h"
+#include "xg_private.h"
+#include "xc_bitops.h"
+
+enum {
+ LIBXL_COLO_SETUPED,
+ LIBXL_COLO_SUSPENDED,
+ LIBXL_COLO_RESUMED,
+};
+
+typedef struct libxl__colo_restore_checkpoint_state
libxl__colo_restore_checkpoint_state;
+struct libxl__colo_restore_checkpoint_state {
+ xc_hypercall_buffer_t _dirty_bitmap;
+ xc_hypercall_buffer_t *dirty_bitmap;
+ unsigned long p2m_size;
+ libxl__domain_suspend_state2 dss2;
+ /* for sending data to master */
+ libxl__datacopier_state dc;
+ /* for reading data from master */
+ libxl__datareader_state drs;
+ uint8_t section;
+ libxl__logdirty_switch lds;
+ libxl__colo_restore_state *crs;
+ int status;
+
+ void (*callback)(libxl__egc *,
+ libxl__colo_restore_checkpoint_state *,
+ int);
+
+ /*
+ * 0: secondary vm's dirty bitmap for domain @domid
+ * 1: secondary vm is ready(domain @domid)
+ * 2: secondary vm is resumed(domain @domid)
+ */
+ const char *copywhat[3];
+};
+
+
+static void libxl__colo_restore_domain_resume_callback(void *data);
+static void libxl__colo_restore_domain_checkpoint_callback(void *data);
+static void libxl__colo_restore_domain_suspend_callback(void *data);
+
+/* ===================== colo: common functions ===================== */
+static void colo_enable_logdirty(libxl__colo_restore_state *crs, libxl__egc
*egc)
+{
+ libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+ libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+
+ /* Convenience aliases */
+ const uint32_t domid = crs->domid;
+ libxl__logdirty_switch *const lds = &crcs->lds;
+
+ STATE_AO_GC(crs->ao);
+
+ /* we need to know which pages are dirty to restore the guest */
+ if (xc_shadow_control(CTX->xch, domid,
+ XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
+ NULL, 0, NULL, 0, NULL) < 0) {
+ LOG(ERROR, "cannot enable secondary vm's logdirty");
+ lds->callback(egc, lds, ERROR_FAIL);
+ return;
+ }
+
+ if (crs->hvm) {
+ libxl__domain_common_switch_qemu_logdirty(domid, 1, lds, egc);
+ return;
+ }
+
+ lds->callback(egc, lds, 0);
+}
+
+static void colo_disable_logdirty(libxl__colo_restore_state *crs,
+ libxl__egc *egc)
+{
+ libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+ libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+
+ /* Convenience aliases */
+ const uint32_t domid = crs->domid;
+ libxl__logdirty_switch *const lds = &crcs->lds;
+
+ STATE_AO_GC(crs->ao);
+
+ /* we need to know which pages are dirty to restore the guest */
+ if (xc_shadow_control(CTX->xch, domid, XEN_DOMCTL_SHADOW_OP_OFF,
+ NULL, 0, NULL, 0, NULL) < 0)
+ LOG(WARN, "cannot disable secondary vm's logdirty");
+
+ if (crs->hvm) {
+ libxl__domain_common_switch_qemu_logdirty(domid, 0, lds, egc);
+ return;
+ }
+
+ lds->callback(egc, lds, 0);
+}
+
+static void colo_resume_vm(libxl__egc *egc,
+ libxl__colo_restore_checkpoint_state *crcs)
+{
+ libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+ int rc;
+
+ /* Convenience aliases */
+ libxl__colo_restore_state *const crs = crcs->crs;
+
+ STATE_AO_GC(crs->ao);
+
+ if (!crs->saved_cb) {
+ /* TODO: sync mmu for hvm? */
+ rc = libxl__domain_resume(gc, crs->domid, 0, 1);
+ if (rc)
+ LOG(ERROR, "cannot resume secondary vm");
+
+ crcs->callback(egc, crcs, rc);
+ return;
+ }
+
+ /*
+ * TODO: get store mfn and console mfn
+ * We should call the callback restore_results in
+ * xc_domain_restore() before resuming the guest.
+ */
+ libxl__xc_domain_restore_done(egc, dcs, 0, 0, 0);
+
+ return;
+}
+
+
+/* ================ colo: setup restore environment ================ */
+static void libxl__colo_domain_create_cb(libxl__egc *egc,
+ libxl__domain_create_state *dcs,
+ int rc, uint32_t domid);
+
+static int init_dss2(libxl__domain_suspend_state2 *dss2)
+{
+ int rc = ERROR_FAIL;
+ libxl_domain_type type;
+
+ STATE_AO_GC(dss2->ao);
+
+ type = libxl__domain_type(gc, dss2->domid);
+ if (type == LIBXL_DOMAIN_TYPE_INVALID)
+ goto out;
+
+ libxl__xswait_init(&dss2->pvcontrol);
+ libxl__ev_evtchn_init(&dss2->guest_evtchn);
+ libxl__ev_xswatch_init(&dss2->guest_watch);
+ libxl__ev_time_init(&dss2->guest_timeout);
+
+ if (type == LIBXL_DOMAIN_TYPE_HVM)
+ dss2->hvm = 1;
+ else
+ dss2->hvm = 0;
+
+ dss2->guest_evtchn.port = -1;
+ dss2->guest_evtchn_lockfd = -1;
+ dss2->guest_responded = 0;
+ dss2->dm_savefile = libxl__device_model_savefile(gc, dss2->domid);
+ dss2->save_dm = 0;
+
+ /* Secondary vm is not created, so we cannot get evtchn port */
+
+ rc = 0;
+
+out:
+ return rc;
+}
+
+void libxl__colo_restore_setup(libxl__egc *egc,
+ libxl__colo_restore_state *crs)
+{
+ libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+ libxl__colo_restore_checkpoint_state *crcs;
+ DECLARE_HYPERCALL_BUFFER(unsigned long, dirty_bitmap);
+ int rc = ERROR_FAIL;
+ int bsize;
+
+ /* Convenience aliases */
+ libxl__srm_restore_autogen_callbacks *const callbacks =
+ &dcs->shs.callbacks.restore.a;
+ const int domid = crs->domid;
+
+ STATE_AO_GC(crs->ao);
+
+ GCNEW(crcs);
+ crs->crcs = crcs;
+ crcs->crs = crs;
+
+ crcs->p2m_size = xc_domain_maximum_gpfn(CTX->xch, domid) + 1;
+
+ crcs->copywhat[0] = GCSPRINTF("secondary vm's dirty bitmap for domain
%"PRIu32,
+ domid);
+ crcs->copywhat[1] = GCSPRINTF("secondary vm is ready(domain %"PRIu32")",
+ domid);
+ crcs->copywhat[2] = GCSPRINTF("secondary vm is resumed(domain %"PRIu32")",
+ domid);
+
+ bsize = bitmap_size(crcs->p2m_size);
+ dirty_bitmap = xc_hypercall_buffer_alloc_pages(CTX->xch, dirty_bitmap,
+ NRPAGES(bsize));
+ if (!dirty_bitmap) {
+ rc = ERROR_NOMEM;
+ goto err;
+ }
+ memset(dirty_bitmap, 0, bsize);
+ crcs->_dirty_bitmap = *HYPERCALL_BUFFER(dirty_bitmap);
+ crcs->dirty_bitmap = &crcs->_dirty_bitmap;
+
+ /* setup dss2 */
+ crcs->dss2.ao = ao;
+ crcs->dss2.domid = domid;
+ if (init_dss2(&crcs->dss2))
+ goto err_init_dss2;
+
+ callbacks->suspend = libxl__colo_restore_domain_suspend_callback;
+ callbacks->postcopy = libxl__colo_restore_domain_resume_callback;
+ callbacks->checkpoint = libxl__colo_restore_domain_checkpoint_callback;
+
+ /*
+ * Secondary vm is running in colo mode, so we need to call
+ * libxl__xc_domain_restore_done() to create secondary vm.
+ * But we will exit in domain_create_cb(). So replace the
+ * callback here.
+ */
+ crs->saved_cb = dcs->callback;
+ dcs->callback = libxl__colo_domain_create_cb;
+ crcs->status = LIBXL_COLO_SETUPED;
+
+ logdirty_init(&crcs->lds);
+ crcs->lds.ao = ao;
+
+ rc = 0;
+
+out:
+ crs->callback(egc, crs, rc);
+ return;
+
+err_init_dss2:
+ xc_hypercall_buffer_free_pages(CTX->xch, dirty_bitmap, NRPAGES(bsize));
+ crcs->dirty_bitmap = NULL;
+err:
+ goto out;
+}
+
+static void libxl__colo_domain_create_cb(libxl__egc *egc,
+ libxl__domain_create_state *dcs,
+ int rc, uint32_t domid)
+{
+ libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs;
+
+ crcs->callback(egc, crcs, rc);
+}
+
+
+/* ================ colo: teardown restore environment ================ */
+static void do_failover_done(libxl__egc *egc,
+ libxl__colo_restore_checkpoint_state* crcs,
+ int rc);
+static void colo_disable_logdirty_done(libxl__egc *egc,
+ libxl__logdirty_switch *lds,
+ int rc);
+
+static void do_failover(libxl__egc *egc, libxl__colo_restore_state *crs)
+{
+ libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+
+ /* Convenience aliases */
+ const int status = crcs->status;
+ libxl__logdirty_switch *const lds = &crcs->lds;
+
+ STATE_AO_GC(crs->ao);
+
+ switch(status) {
+ case LIBXL_COLO_SETUPED:
+ /* We don't enable logdirty now */
+ colo_resume_vm(egc, crcs);
+ return;
+ case LIBXL_COLO_SUSPENDED:
+ case LIBXL_COLO_RESUMED:
+ /* disable logdirty first */
+ lds->callback = colo_disable_logdirty_done;
+ colo_disable_logdirty(crs, egc);
+ return;
+ default:
+ LOG(ERROR, "invalid status: %d", status);
+ crcs->callback(egc, crcs, ERROR_FAIL);
+ }
+}
+
+void libxl__colo_restore_teardown(libxl__egc *egc,
+ libxl__colo_restore_state *crs,
+ int rc)
+{
+ libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+ DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
crcs->dirty_bitmap);
+ int bsize = bitmap_size(crcs->p2m_size);
+ libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+
+ EGC_GC;
+
+ if (!dirty_bitmap)
+ goto do_failover;
+
+ xc_hypercall_buffer_free_pages(CTX->xch, dirty_bitmap, NRPAGES(bsize));
+
+do_failover:
+ if (!rc) {
+ crcs->callback = do_failover_done;
+ do_failover(egc, crs);
+ return;
+ }
+
+ if (crs->saved_cb) {
+ dcs->callback = crs->saved_cb;
+ crs->saved_cb = NULL;
+ }
+ crs->callback(egc, crs, rc);
+}
+
+static void do_failover_done(libxl__egc *egc,
+ libxl__colo_restore_checkpoint_state* crcs,
+ int rc)
+{
+ libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+
+ /* Convenience aliases */
+ libxl__colo_restore_state *const crs = crcs->crs;
+
+ STATE_AO_GC(crs->ao);
+
+ if (rc)
+ LOG(ERROR, "cannot do failover");
+
+ if (crs->saved_cb) {
+ dcs->callback = crs->saved_cb;
+ crs->saved_cb = NULL;
+ }
+
+ crs->callback(egc, crs, rc);
+}
+
+static void colo_disable_logdirty_done(libxl__egc *egc,
+ libxl__logdirty_switch *lds,
+ int rc)
+{
+ libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(lds, *crcs, lds);
+
+ STATE_AO_GC(lds->ao);
+
+ if (rc)
+ LOG(WARN, "cannot disable logdirty");
+
+ if (crcs->status == LIBXL_COLO_SUSPENDED) {
+ colo_resume_vm(egc, crcs);
+ return;
+ }
+
+ /* If we cannot disable logdirty, we still can do failover */
+ crcs->callback(egc, crcs, 0);
+}
+
+/*
+ * checkpoint callbacks are called in the following order:
+ * 1. resume
+ * 2. checkpoint
+ * 3. suspend
+ */
+static void colo_common_send_data_done(libxl__egc *egc,
+ libxl__datacopier_state *dc,
+ int onwrite, int errnoval);
+/* ===================== colo: resume secondary vm ===================== */
+/*
+ * Do the following things when resuming secondary vm:
+ * 1. write LIBXL_COLO_SVM_READY
+ * 2. resume secondary vm
+ * 3. write LIBXL_COLO_SVM_RESUMED
+ */
+static void colo_send_svm_ready_done(libxl__egc *egc,
+ libxl__colo_restore_checkpoint_state
*crcs,
+ int rc);
+static void colo_resume_vm_done(libxl__egc *egc,
+ libxl__colo_restore_checkpoint_state *crcs,
+ int rc);
+static void colo_write_svm_resumed(libxl__egc *egc,
+ libxl__colo_restore_checkpoint_state *crcs);
+static void colo_enable_logdirty_done(libxl__egc *egc,
+ libxl__logdirty_switch *lds,
+ int retval);
+static void colo_reenable_logdirty(libxl__egc *egc,
+ libxl__logdirty_switch *lds,
+ int rc);
+static void colo_reenable_logdirty_done(libxl__egc *egc,
+ libxl__logdirty_switch *lds,
+ int rc);
+
+static void libxl__colo_restore_domain_resume_callback(void *data)
+{
+ libxl__save_helper_state *shs = data;
+ libxl__domain_create_state *dcs = CONTAINER_OF(shs, *dcs, shs);
+ libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs;
+ uint8_t section = LIBXL_COLO_SVM_READY;
+ int rc;
+
+ /* Convenience aliases */
+ libxl__colo_restore_state *const crs = &dcs->crs;
+ const int send_fd = crs->send_fd;
+ libxl__datacopier_state *const dc = &crcs->dc;
+
+ STATE_AO_GC(crs->ao);
+
+ memset(dc, 0, sizeof(*dc));
+ dc->ao = ao;
+ dc->readfd = -1;
+ dc->writefd = send_fd;
+ dc->maxsz = INT_MAX;
+ dc->copywhat = crcs->copywhat[1];
+ dc->writewhat = "colo stream";
+ dc->callback = colo_common_send_data_done;
+ crcs->callback = colo_send_svm_ready_done;
+
+ rc = libxl__datacopier_start(dc);
+ if (rc) {
+ LOG(ERROR, "libxl__datacopier_start() fails");
+ goto out;
+ }
+
+ /* tell master that secondary vm is ready */
+ libxl__datacopier_prefixdata(shs->egc, dc, §ion, sizeof(section));
+
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(shs->egc, shs, 0);
+}
+
+static void colo_send_svm_ready_done(libxl__egc *egc,
+ libxl__colo_restore_checkpoint_state
*crcs,
+ int rc)
+{
+ crcs->callback = colo_resume_vm_done;
+ colo_resume_vm(egc, crcs);
+
+ return;
+}
+
+static void colo_resume_vm_done(libxl__egc *egc,
+ libxl__colo_restore_checkpoint_state *crcs,
+ int rc)
+{
+ libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+
+ /* Convenience aliases */
+ libxl__colo_restore_state *const crs = crcs->crs;
+ libxl__logdirty_switch *const lds = &crcs->lds;
+ libxl__save_helper_state *const shs = &dcs->shs;
+
+ STATE_AO_GC(crs->ao);
+
+ if (rc) {
+ LOG(ERROR, "cannot resume secondary vm");
+ goto out;
+ }
+
+ crcs->status = LIBXL_COLO_RESUMED;
+
+ /* avoid calling libxl__xc_domain_restore_done() more than once */
+ if (crs->saved_cb) {
+ dcs->callback = crs->saved_cb;
+ crs->saved_cb = NULL;
+
+ lds->callback = colo_enable_logdirty_done;
+ colo_enable_logdirty(crs, egc);
+ return;
+ }
+
+ colo_write_svm_resumed(egc, crcs);
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(shs->egc, shs, 0);
+}
+
+static void colo_write_svm_resumed(libxl__egc *egc,
+ libxl__colo_restore_checkpoint_state *crcs)
+{
+ libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+ uint8_t section = LIBXL_COLO_SVM_RESUMED;
+ int rc;
+
+ /* Convenience aliases */
+ libxl__colo_restore_state *const crs = crcs->crs;
+ const int send_fd = crs->send_fd;
+ libxl__datacopier_state *const dc = &crcs->dc;
+ libxl__save_helper_state *const shs = &dcs->shs;
+
+ STATE_AO_GC(crs->ao);
+
+ memset(dc, 0, sizeof(*dc));
+ dc->ao = ao;
+ dc->readfd = -1;
+ dc->writefd = send_fd;
+ dc->maxsz = INT_MAX;
+ dc->copywhat = crcs->copywhat[2];
+ dc->writewhat = "colo stream";
+ dc->callback = colo_common_send_data_done;
+ /* TODO: configure network */
+ crcs->callback = NULL;
+
+ rc = libxl__datacopier_start(dc);
+ if (rc) {
+ LOG(ERROR, "libxl__datacopier_start() fails");
+ goto out;
+ }
+
+ /* tell master that secondary vm is resumed */
+ libxl__datacopier_prefixdata(egc, dc, §ion, sizeof(section));
+
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(shs->egc, shs, 0);
+}
+
+static void colo_enable_logdirty_done(libxl__egc *egc,
+ libxl__logdirty_switch *lds,
+ int rc)
+{
+ libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(lds, *crcs, lds);
+ libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+
+ /* Convenience aliases */
+ libxl__colo_restore_state *const crs = crcs->crs;
+ libxl__save_helper_state *const shs = &dcs->shs;
+ const uint32_t domid = crs->domid;
+
+ STATE_AO_GC(crs->ao);
+
+ if (rc) {
+ /*
+ * log-dirty already enabled? There's no test op,
+ * so attempt to disable then reenable it
+ */
+ lds->callback = colo_reenable_logdirty;
+ colo_disable_logdirty(crs, egc);
+ return;
+ }
+
+ /* We have enabled secondary vm's logdirty, so we can unpause it now */
+ rc = libxl__domain_unpause(gc, domid);
+ if (rc) {
+ LOG(ERROR, "cannot unpause secondary vm");
+ goto out;
+ }
+
+ colo_write_svm_resumed(egc, crcs);
+
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(shs->egc, shs, 0);
+}
+
+static void colo_reenable_logdirty(libxl__egc *egc,
+ libxl__logdirty_switch *lds,
+ int rc)
+{
+ libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(lds, *crcs, lds);
+ libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+
+ /* Convenience aliases */
+ libxl__colo_restore_state *const crs = crcs->crs;
+ libxl__save_helper_state *const shs = &dcs->shs;
+
+ STATE_AO_GC(crs->ao);
+
+ if (rc) {
+ LOG(ERROR, "cannot enable logdirty");
+ goto out;
+ }
+
+ lds->callback = colo_reenable_logdirty_done;
+ colo_enable_logdirty(crs, egc);
+
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(shs->egc, shs, 0);
+}
+
+static void colo_reenable_logdirty_done(libxl__egc *egc,
+ libxl__logdirty_switch *lds,
+ int rc)
+{
+ libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(lds, *crcs, lds);
+ libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+
+ /* Convenience aliases */
+ libxl__save_helper_state *const shs = &dcs->shs;
+ const uint32_t domid = crcs->crs->domid;
+
+ STATE_AO_GC(crcs->crs->ao);
+
+ if (rc) {
+ LOG(ERROR, "cannot enable logdirty");
+ goto out;
+ }
+
+ /* We have enabled secondary vm's logdirty, so we can unpause it now */
+ rc = libxl__domain_unpause(gc, domid);
+ if (rc) {
+ LOG(ERROR, "cannot unpause secondary vm");
+ goto out;
+ }
+
+ colo_write_svm_resumed(egc, crcs);
+
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(shs->egc, shs, 0);
+}
+
+
+/* ===================== colo: wait new checkpoint ===================== */
+static void colo_stream_read_done(libxl__egc *egc,
+ libxl__datareader_state *drs,
+ ssize_t real_size, int errnoval);
+
+static void libxl__colo_restore_domain_checkpoint_callback(void *data)
+{
+ libxl__save_helper_state *shs = data;
+ libxl__domain_create_state *dcs = CONTAINER_OF(shs, *dcs, shs);
+ libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs;
+
+ /* Convenience aliases */
+ const int recv_fd = dcs->crs.recv_fd;
+ libxl__datareader_state *const drs = &crcs->drs;
+
+ STATE_AO_GC(dcs->crs.ao);
+
+ memset(drs, 0, sizeof(*drs));
+ drs->ao = ao;
+ drs->readfd = recv_fd;
+ drs->readsize = sizeof(crcs->section);
+ drs->readwhat = "colo stream";
+ drs->callback = colo_stream_read_done;
+ drs->buf = &crcs->section;
+
+ if (libxl__datareader_start(drs)) {
+ LOG(ERROR, "libxl__datareader_start() fails");
+ goto out;
+ }
+
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(shs->egc, shs, 0);
+}
+
+static void colo_stream_read_done(libxl__egc *egc,
+ libxl__datareader_state *drs,
+ ssize_t real_size, int errnoval)
+{
+ libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(drs, *crcs, drs);
+ libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+ int ok = 0;
+
+ /* Convenience aliases */
+ libxl__save_helper_state *const shs = &dcs->shs;
+
+ STATE_AO_GC(drs->ao);
+
+ if (real_size < drs->readsize) {
+ LOG(ERROR, "reading data fails: %lld", (long long)real_size);
+ goto out;
+ }
+
+ if (crcs->section != LIBXL_COLO_NEW_CHECKPOINT) {
+ LOG(ERROR, "invalid section: %d", crcs->section);
+ goto out;
+ }
+
+ ok = 1;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(shs->egc, shs, ok);
+}
+
+
+/* ===================== colo: suspend secondary vm ===================== */
+/*
+ * Do the following things when resuming secondary vm:
+ * 1. suspend secondary vm
+ * 2. get secondary vm's dirty page information
+ * 3. send LIBXL_COLO_SVM_SUSPENDED
+ * 4. send secondary vm's dirty page information(count + pfn list)
+ */
+static void colo_suspend_vm_done(libxl__egc *egc,
+ libxl__domain_suspend_state2 *dss2,
+ int ok);
+static void colo_append_pfn_type(libxl__egc *egc,
+ libxl__datacopier_state *dc,
+ unsigned long *dirty_bitmap,
+ unsigned long p2m_size);
+
+static void libxl__colo_restore_domain_suspend_callback(void *data)
+{
+ libxl__save_helper_state *shs = data;
+ libxl__domain_create_state *dcs = CONTAINER_OF(shs, *dcs, shs);
+ libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs;
+
+ STATE_AO_GC(dcs->ao);
+
+ /* Convenience aliases */
+ libxl__domain_suspend_state2 *const dss2 = &crcs->dss2;
+
+ /* suspend secondary vm */
+ dss2->callback_common_done = colo_suspend_vm_done;
+
+ libxl__domain_suspend2(shs->egc, dss2);
+}
+
+static void colo_suspend_vm_done(libxl__egc *egc,
+ libxl__domain_suspend_state2 *dss2,
+ int ok)
+{
+ libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(dss2, *crcs,
dss2);
+ libxl__colo_restore_state *crs = crcs->crs;
+ libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+ DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
crcs->dirty_bitmap);
+ uint8_t section = LIBXL_COLO_SVM_SUSPENDED;
+ int i, rc;
+ uint64_t count;
+
+ /* Convenience aliases */
+ const int send_fd = crs->send_fd;
+ const unsigned long p2m_size = crcs->p2m_size;
+ const uint32_t domid = crs->domid;
+ libxl__datacopier_state *const dc = &crcs->dc;
+
+ STATE_AO_GC(crs->ao);
+
+ if (!ok) {
+ LOG(ERROR, "cannot suspend secondary vm");
+ goto out;
+ }
+
+ crcs->status = LIBXL_COLO_SUSPENDED;
+
+ /*
+ * Secondary vm is running, so there are some dirty pages
+ * that are non-dirty in master. Get dirty bitmap and
+ * send it to master.
+ */
+ if (xc_shadow_control(CTX->xch, domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
+ HYPERCALL_BUFFER(dirty_bitmap), p2m_size,
+ NULL, 0, NULL) != p2m_size) {
+ LOG(ERROR, "getting secondary vm's dirty bitmap fails");
+ goto out;
+ }
+
+ count = 0;
+ for (i = 0; i < p2m_size; i++) {
+ if (test_bit(i, dirty_bitmap))
+ count++;
+ }
+
+ memset(dc, 0, sizeof(*dc));
+ dc->ao = ao;
+ dc->readfd = -1;
+ dc->writefd = send_fd;
+ dc->maxsz = INT_MAX;
+ dc->copywhat = crcs->copywhat[0];
+ dc->writewhat = "colo stream";
+ dc->callback = colo_common_send_data_done;
+ crcs->callback = NULL;
+
+ rc = libxl__datacopier_start(dc);
+ if (rc) {
+ LOG(ERROR, "libxl__datacopier_start() fails");
+ goto out;
+ }
+
+ /* tell master that secondary vm is suspended */
+ libxl__datacopier_prefixdata(egc, dc, §ion, sizeof(section));
+
+ /* send dirty pages to master */
+ libxl__datacopier_prefixdata(egc, dc, &count, sizeof(count));
+ colo_append_pfn_type(egc, dc, dirty_bitmap, p2m_size);
+ return;
+
+out:
+ ok = 0;
+ libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->shs, ok);
+}
+
+static void colo_append_pfn_type(libxl__egc *egc,
+ libxl__datacopier_state *dc,
+ unsigned long *dirty_bitmap,
+ unsigned long p2m_size)
+{
+ int i, count;
+ /* Hack, buf->buf is private member... */
+ libxl__datacopier_buf *buf = NULL;
+ int max_batch = sizeof(buf->buf) / sizeof(uint64_t);
+ int buf_size = max_batch * sizeof(uint64_t);
+ uint64_t *pfn;
+
+ STATE_AO_GC(dc->ao);
+
+ pfn = libxl__zalloc(NOGC, buf_size);
+
+ count = 0;
+ for (i = 0; i < p2m_size; i++) {
+ if (!test_bit(i, dirty_bitmap))
+ continue;
+
+ pfn[count++] = i;
+ if (count == max_batch) {
+ libxl__datacopier_prefixdata(egc, dc, pfn, buf_size);
+ count = 0;
+ }
+ }
+
+ if (count)
+ libxl__datacopier_prefixdata(egc, dc, pfn, count * sizeof(uint64_t));
+
+ free(pfn);
+}
+
+
+/* ===================== colo: common callback ===================== */
+static void colo_common_send_data_done(libxl__egc *egc,
+ libxl__datacopier_state *dc,
+ int onwrite, int errnoval)
+{
+ libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(dc, *crcs, dc);
+ libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+ int ok;
+ STATE_AO_GC(dc->ao);
+
+ if (onwrite == -1) {
+ LOG(ERROR, "sending data fails");
+ ok = 0;
+ goto out;
+ }
+
+ if (errnoval) {
+ /* failure happens when reading/writing, do failover? */
+ ok = 2;
+ goto out;
+ }
+
+ if (!crcs->callback) {
+ /* Everythins is OK */
+ ok = 1;
+ goto out;
+ }
+
+ crcs->callback(egc, crcs, 0);
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->shs, ok);
+}
diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index e29a107..fef9b36 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -19,6 +19,7 @@
#include "libxl_internal.h"
#include "libxl_arch.h"
+#include "libxl_colo.h"
#include <xc_dom.h>
#include <xenguest.h>
@@ -898,6 +899,96 @@ static void domcreate_console_available(libxl__egc *egc,
dcs->aop_console_how.for_event));
}
+static void libxl__colo_restore_teardown_done(libxl__egc *egc,
+ libxl__colo_restore_state *crs,
+ int rc)
+{
+ libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+ STATE_AO_GC(crs->ao);
+
+ /* convenience aliases */
+ libxl__save_helper_state *const shs = &dcs->shs;
+ const int domid = crs->domid;
+ const libxl_ctx *const ctx = libxl__gc_owner(gc);
+ xc_interface *const xch = ctx->xch;
+
+ if (!rc)
+ /* failover, no need to destroy the secondary vm */
+ goto out;
+
+ if (shs->retval)
+ /*
+ * shs->retval stores the return value of xc_domain_restore().
+ * If it is not 0, we have destroyed the secondary vm in
+ * xc_domain_restore();
+ */
+ goto out;
+
+ xc_domain_destroy(xch, domid);
+
+out:
+ dcs->callback(egc, dcs, rc, crs->domid);
+}
+
+void libxl__colo_restore_done(libxl__egc *egc, void *dcs_void,
+ int ret, int retval, int errnoval)
+{
+ libxl__domain_create_state *dcs = dcs_void;
+ int rc = 1;
+
+ /* convenience aliases */
+ libxl__colo_restore_state *const crs = &dcs->crs;
+ STATE_AO_GC(crs->ao);
+
+ /* teardown and failover */
+ crs->callback = libxl__colo_restore_teardown_done;
+
+ if (ret == 0 && retval == 0)
+ rc = 0;
+
+ LOG(INFO, "%s", rc ? "colo fails" : "failover");
+ libxl__colo_restore_teardown(egc, crs, rc);
+}
+
+static void libxl__colo_restore_cp_done(libxl__egc *egc,
+ libxl__colo_restore_state *crs,
+ int rc)
+{
+ libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+ int ok = 0;
+
+ /* convenience aliases */
+ libxl__save_helper_state *const shs = &dcs->shs;
+
+ if (!rc)
+ ok = 1;
+
+ libxl__xc_domain_saverestore_async_callback_done(shs->egc, shs, ok);
+}
+
+static void libxl__colo_restore_setup_done(libxl__egc *egc,
+ libxl__colo_restore_state *crs,
+ int rc)
+{
+ libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+
+ /* convenience aliases */
+ const int hvm = crs->hvm;
+ const int superpages = crs->superpages;
+ const int pae = crs->pae;
+ STATE_AO_GC(crs->ao);
+
+ if (rc) {
+ LOG(ERROR, "colo restore setup fails: %d", rc);
+ libxl__xc_domain_restore_done(egc, dcs, rc, 0, 0);
+ return;
+ }
+
+ crs->callback = libxl__colo_restore_cp_done;
+ libxl__xc_domain_restore(egc, dcs,
+ hvm, pae, superpages);
+}
+
static void domcreate_bootloader_done(libxl__egc *egc,
libxl__bootloader_state *bl,
int rc)
@@ -913,6 +1004,8 @@ static void domcreate_bootloader_done(libxl__egc *egc,
libxl__domain_build_state *const state = &dcs->build_state;
libxl__srm_restore_autogen_callbacks *const callbacks =
&dcs->shs.callbacks.restore.a;
+ const int checkpointed_stream = dcs->checkpointed_stream;
+ libxl__colo_restore_state *const crs = &dcs->crs;
if (rc) {
domcreate_rebuild_done(egc, dcs, rc);
@@ -941,6 +1034,13 @@ static void domcreate_bootloader_done(libxl__egc *egc,
/* Restore */
+ /* COLO only supports HVM now */
+ if (info->type != LIBXL_DOMAIN_TYPE_HVM &&
+ checkpointed_stream == LIBXL_CHECKPOINTED_STREAM_COLO) {
+ rc = ERROR_FAIL;
+ goto out;
+ }
+
rc = libxl__build_pre(gc, domid, d_config, state);
if (rc)
goto out;
@@ -963,8 +1063,20 @@ static void domcreate_bootloader_done(libxl__egc *egc,
rc = ERROR_INVAL;
goto out;
}
- libxl__xc_domain_restore(egc, dcs,
- hvm, pae, superpages);
+
+ if (checkpointed_stream == LIBXL_CHECKPOINTED_STREAM_COLO) {
+ crs->ao = ao;
+ crs->domid = domid;
+ crs->send_fd = dcs->send_fd;
+ crs->recv_fd = restore_fd;
+ crs->hvm = hvm;
+ crs->superpages = superpages;
+ crs->pae = pae;
+ crs->callback = libxl__colo_restore_setup_done;
+ libxl__colo_restore_setup(egc, crs);
+ } else
+ libxl__xc_domain_restore(egc, dcs,
+ hvm, pae, superpages);
return;
out:
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index 4e71ec5..769952c 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -862,7 +862,7 @@ static void switch_logdirty_xswatch(libxl__egc *egc,
libxl__ev_xswatch*,
static void switch_logdirty_done(libxl__egc *egc,
libxl__logdirty_switch *lds, int ok);
-static void logdirty_init(libxl__logdirty_switch *lds)
+void logdirty_init(libxl__logdirty_switch *lds)
{
lds->cmd_path = 0;
libxl__ev_xswatch_init(&lds->watch);
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index b04e4b9..d2e3176 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2741,6 +2741,7 @@ struct libxl__logdirty_switch {
libxl__ev_xswatch watch;
libxl__ev_time timeout;
};
+_hidden void logdirty_init(libxl__logdirty_switch *lds);
/*
* libxl__domain_suspend_state is for saving guest, not
@@ -3032,6 +3033,26 @@ typedef void libxl__domain_create_cb(libxl__egc *egc,
libxl__domain_create_state*,
int rc, uint32_t domid);
+/* colo related structure */
+typedef struct libxl__colo_restore_state libxl__colo_restore_state;
+typedef void libxl__colo_callback(libxl__egc *,
+ libxl__colo_restore_state *, int rc);
+struct libxl__colo_restore_state {
+ /* must set by caller of libxl__colo_(setup|teardown) */
+ libxl__ao *ao;
+ uint32_t domid;
+ int send_fd;
+ int recv_fd;
+ int hvm;
+ int pae;
+ int superpages;
+ libxl__colo_callback *callback;
+
+ /* private, colo restore checkpoint state */
+ libxl__domain_create_cb *saved_cb;
+ void *crcs;
+};
+
struct libxl__domain_create_state {
/* filled in by user */
libxl__ao *ao;
@@ -3044,6 +3065,7 @@ struct libxl__domain_create_state {
int guest_domid;
int checkpointed_stream;
libxl__domain_build_state build_state;
+ libxl__colo_restore_state crs;
libxl__bootloader_state bl;
libxl__stub_dm_spawn_state dmss;
/* If we're not doing stubdom, we use only dmss.dm,
diff --git a/tools/libxl/libxl_save_callout.c b/tools/libxl/libxl_save_callout.c
index 0c09d94..e251181 100644
--- a/tools/libxl/libxl_save_callout.c
+++ b/tools/libxl/libxl_save_callout.c
@@ -15,6 +15,7 @@
#include "libxl_osdeps.h"
#include "libxl_internal.h"
+#include "libxl_colo.h"
/* stream_fd is as from the caller (eventually, the application).
* It may be 0, 1 or 2, in which case we need to dup it elsewhere.
@@ -65,7 +66,10 @@ void libxl__xc_domain_restore(libxl__egc *egc,
libxl__domain_create_state *dcs,
dcs->shs.ao = ao;
dcs->shs.domid = domid;
dcs->shs.recv_callback = libxl__srm_callout_received_restore;
- dcs->shs.completion_callback = libxl__xc_domain_restore_done;
+ if (dcs->checkpointed_stream == LIBXL_CHECKPOINTED_STREAM_COLO)
+ dcs->shs.completion_callback = libxl__colo_restore_done;
+ else
+ dcs->shs.completion_callback = libxl__xc_domain_restore_done;
dcs->shs.caller_state = dcs;
dcs->shs.need_results = 1;
dcs->shs.toolstack_data_file = 0;
diff --git a/tools/libxl/libxl_save_msgs_gen.pl
b/tools/libxl/libxl_save_msgs_gen.pl
index 41ee000..0239cac 100755
--- a/tools/libxl/libxl_save_msgs_gen.pl
+++ b/tools/libxl/libxl_save_msgs_gen.pl
@@ -24,9 +24,9 @@ our @msgs = (
STRING doing_what),
'unsigned long', 'done',
'unsigned long', 'total'] ],
- [ 3, 'scxA', "suspend", [] ],
- [ 4, 'scxA', "postcopy", [] ],
- [ 5, 'scxA', "checkpoint", [] ],
+ [ 3, 'srcxA', "suspend", [] ],
+ [ 4, 'srcxA', "postcopy", [] ],
+ [ 5, 'srcxA', "checkpoint", [] ],
[ 6, 'scxA', "switch_qemu_logdirty", [qw(int domid
unsigned enable)] ],
# toolstack_save done entirely `by hand'
--
1.9.3
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |