[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v13 12/26] secondary vm suspend/resume/checkpoint code
From: Wen Congyang <wency@xxxxxxxxxxxxxx> Secondary vm is running in colo mode. So we will do the following things again and again: 1. Resume secondary vm a. Send CHECKPOINT_SVM_READY to master. b. If it is not the first resume, call libxl__checkpoint_devices_preresume(). c. If it is the first resume(resume right after live migration), - call libxl__xc_domain_restore_done() to build the secondary vm. - enable secondary vm's logdirty. - call libxl__domain_resume() to resume secondary vm. - call libxl__checkpoint_devices_setup() to setup checkpoint devices. d. Send CHECKPOINT_SVM_RESUMED to master. 2. Wait a new checkpoint a. Call libxl__checkpoint_devices_commit(). b. Read CHECKPOINT_NEW from master. 3. Suspend secondary vm a. Suspend secondary vm. b. Call libxl__checkpoint_devices_postsuspend(). c. Send CHECKPOINT_SVM_SUSPENDED to master. 4. Checkpoint a. Read emulator xenstore data and emulator context b. REC_TYPE_CHECKPOINT_END Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx> Signed-off-by: Yang Hongyang <hongyang.yang@xxxxxxxxxxxx> Signed-off-by: Changlong Xie <xiecl.fnst@xxxxxxxxxxxxxx> --- tools/libxc/include/xenguest.h | 20 + tools/libxc/xc_sr_save.c | 3 +- tools/libxl/Makefile | 1 + tools/libxl/libxl_colo.h | 55 ++ tools/libxl/libxl_colo_restore.c | 1029 ++++++++++++++++++++++++++++++++++++ tools/libxl/libxl_create.c | 45 ++ tools/libxl/libxl_internal.h | 10 +- tools/libxl/libxl_save_callout.c | 6 +- tools/libxl/libxl_save_msgs_gen.pl | 11 +- tools/libxl/libxl_stream_read.c | 12 + tools/libxl/libxl_types.idl | 1 + 11 files changed, 1180 insertions(+), 13 deletions(-) create mode 100644 tools/libxl/libxl_colo.h create mode 100644 tools/libxl/libxl_colo_restore.c diff --git a/tools/libxc/include/xenguest.h b/tools/libxc/include/xenguest.h index b4f4bfb..3193d0f 100644 --- a/tools/libxc/include/xenguest.h +++ b/tools/libxc/include/xenguest.h @@ -78,6 +78,7 @@ struct save_callbacks { typedef enum { XC_MIG_STREAM_NONE, /* plain stream */ XC_MIG_STREAM_REMUS, + XC_MIG_STREAM_COLO, } xc_migration_stream_t; /** @@ -97,6 +98,16 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter /* callbacks provided by xc_domain_restore */ struct restore_callbacks { + /* Called after a new checkpoint to suspend the guest. + */ + int (*suspend)(void* data); + + /* Called after the secondary vm is ready to resume. + * Callback function resumes the guest & the device model, + * returns to xc_domain_restore. + */ + int (*postcopy)(void* data); + /* A checkpoint record has been found in the stream. * returns: */ #define XGR_CHECKPOINT_ERROR 0 /* Terminate processing */ @@ -104,6 +115,15 @@ struct restore_callbacks { #define XGR_CHECKPOINT_FAILOVER 2 /* Failover and resume VM */ int (*checkpoint)(void* data); + /* + * Called after the checkpoint callback. + * + * returns: + * 0: terminate checkpointing gracefully + * 1: take another checkpoint + */ + int (*wait_checkpoint)(void* data); + /* to be provided as the last argument to each callback function */ void* data; }; diff --git a/tools/libxc/xc_sr_save.c b/tools/libxc/xc_sr_save.c index 1ccdbbb..d3d95d4 100644 --- a/tools/libxc/xc_sr_save.c +++ b/tools/libxc/xc_sr_save.c @@ -846,7 +846,8 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, /* If altering migration_stream update this assert too. */ assert(stream_type == XC_MIG_STREAM_NONE || - stream_type == XC_MIG_STREAM_REMUS); + stream_type == XC_MIG_STREAM_REMUS || + stream_type == XC_MIG_STREAM_COLO); /* * TODO: Find some time to better tweak the live migration algorithm. diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile index 8fa7b87..35a07a7 100644 --- a/tools/libxl/Makefile +++ b/tools/libxl/Makefile @@ -65,6 +65,7 @@ LIBXL_OBJS-y += libxl_no_convert_callout.o endif LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o +LIBXL_OBJS-y += libxl_colo_restore.o LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o libxl_libfdt_compat.o diff --git a/tools/libxl/libxl_colo.h b/tools/libxl/libxl_colo.h new file mode 100644 index 0000000..f2b98cc --- /dev/null +++ b/tools/libxl/libxl_colo.h @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2016 FUJITSU LIMITED + * Author: Wen Congyang <wency@xxxxxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#ifndef LIBXL_COLO_H +#define LIBXL_COLO_H + +struct libxl__ao; +struct libxl__egc; + +enum { + LIBXL_COLO_SETUPED, + LIBXL_COLO_SUSPENDED, + LIBXL_COLO_RESUMED, +}; + +typedef struct libxl__domain_create_state libxl__domain_create_state; +typedef void libxl__domain_create_cb(struct libxl__egc *egc, + libxl__domain_create_state *dcs, + int rc, uint32_t domid); + +typedef struct libxl__colo_restore_state libxl__colo_restore_state; +typedef void libxl__colo_callback(struct libxl__egc *egc, + libxl__colo_restore_state *crs, int rc); + +struct libxl__colo_restore_state { + /* must set by caller of libxl__colo_(setup|teardown) */ + struct libxl__ao *ao; + uint32_t domid; + int send_back_fd; + int recv_fd; + int hvm; + libxl__colo_callback *callback; + + /* private, colo restore checkpoint state */ + libxl__domain_create_cb *saved_cb; + void *crcs; +}; + +extern void libxl__colo_restore_setup(struct libxl__egc *egc, + libxl__colo_restore_state *crs); +extern void libxl__colo_restore_teardown(struct libxl__egc *egc, void *dcs_void, + int ret, int retval, int errnoval); +#endif diff --git a/tools/libxl/libxl_colo_restore.c b/tools/libxl/libxl_colo_restore.c new file mode 100644 index 0000000..a8f74a7 --- /dev/null +++ b/tools/libxl/libxl_colo_restore.c @@ -0,0 +1,1029 @@ +/* + * Copyright (C) 2016 FUJITSU LIMITED + * Author: Wen Congyang <wency@xxxxxxxxxxxxxx> + * Yang Hongyang <hongyang.yang@xxxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#include "libxl_osdeps.h" /* must come before any other headers */ + +#include "libxl_internal.h" +#include "libxl_sr_stream_format.h" + +typedef struct libxl__colo_restore_checkpoint_state libxl__colo_restore_checkpoint_state; +struct libxl__colo_restore_checkpoint_state { + libxl__domain_suspend_state dsps; + libxl__logdirty_switch lds; + libxl__colo_restore_state *crs; + libxl__stream_write_state sws; + int status; + bool preresume; + /* used for teardown */ + int teardown_devices; + int saved_rc; + char *state_file; + + void (*callback)(libxl__egc *, + libxl__colo_restore_checkpoint_state *, + int); +}; + +static const libxl__checkpoint_device_instance_ops *colo_restore_ops[] = { + NULL, +}; + +/* ===================== colo: common functions ===================== */ + +static void colo_enable_logdirty(libxl__colo_restore_state *crs, libxl__egc *egc) +{ + libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs); + libxl__colo_restore_checkpoint_state *crcs = crs->crcs; + + /* Convenience aliases */ + const uint32_t domid = crs->domid; + libxl__logdirty_switch *const lds = &crcs->lds; + + EGC_GC; + + /* we need to know which pages are dirty to restore the guest */ + if (xc_shadow_control(CTX->xch, domid, + XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY, + NULL, 0, NULL, 0, NULL) < 0) { + LOG(ERROR, "cannot enable secondary vm's logdirty"); + lds->callback(egc, lds, ERROR_FAIL); + return; + } + + if (crs->hvm) { + libxl__domain_common_switch_qemu_logdirty(egc, domid, 1, lds); + return; + } + + lds->callback(egc, lds, 0); +} + +static void colo_disable_logdirty(libxl__colo_restore_state *crs, + libxl__egc *egc) +{ + libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs); + libxl__colo_restore_checkpoint_state *crcs = crs->crcs; + + /* Convenience aliases */ + const uint32_t domid = crs->domid; + libxl__logdirty_switch *const lds = &crcs->lds; + + EGC_GC; + + /* we need to know which pages are dirty to restore the guest */ + if (xc_shadow_control(CTX->xch, domid, XEN_DOMCTL_SHADOW_OP_OFF, + NULL, 0, NULL, 0, NULL) < 0) + LOG(WARN, "cannot disable secondary vm's logdirty"); + + if (crs->hvm) { + libxl__domain_common_switch_qemu_logdirty(egc, domid, 0, lds); + return; + } + + lds->callback(egc, lds, 0); +} + +static void colo_resume_vm(libxl__egc *egc, + libxl__colo_restore_checkpoint_state *crcs, + int restore_device_model) +{ + libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs); + int rc; + + /* Convenience aliases */ + libxl__colo_restore_state *const crs = crcs->crs; + + EGC_GC; + + if (!crs->saved_cb) { + /* TODO: sync mmu for hvm? */ + if (restore_device_model) { + rc = libxl__qmp_restore(gc, crs->domid, crcs->state_file); + if (rc) { + LOG(ERROR, "cannot restore device model for secondary vm"); + crcs->callback(egc, crcs, rc); + return; + } + } + rc = libxl__domain_resume(gc, crs->domid, 0); + if (rc) + LOG(ERROR, "cannot resume secondary vm"); + + crcs->callback(egc, crcs, rc); + return; + } + + /* + * TODO: get store gfn and console gfn + * We should call the callback restore_results in + * xc_domain_restore() before resuming the guest. + */ + libxl__xc_domain_restore_done(egc, dcs, 0, 0, 0); + + return; +} + +static int init_device_subkind(libxl__checkpoint_devices_state *cds) +{ + /* init device subkind-specific state in the libxl ctx */ + int rc; + STATE_AO_GC(cds->ao); + + rc = 0; + return rc; +} + +static void cleanup_device_subkind(libxl__checkpoint_devices_state *cds) +{ + /* cleanup device subkind-specific state in the libxl ctx */ + STATE_AO_GC(cds->ao); +} + +/* ================ colo: setup restore environment ================ */ + +static void libxl__colo_domain_create_cb(libxl__egc *egc, + libxl__domain_create_state *dcs, + int rc, uint32_t domid); + +static int init_dsps(libxl__domain_suspend_state *dsps) +{ + int rc = ERROR_FAIL; + libxl_domain_type type; + + STATE_AO_GC(dsps->ao); + + libxl__xswait_init(&dsps->pvcontrol); + libxl__ev_evtchn_init(&dsps->guest_evtchn); + libxl__ev_xswatch_init(&dsps->guest_watch); + libxl__ev_time_init(&dsps->guest_timeout); + + type = libxl__domain_type(gc, dsps->domid); + if (type == LIBXL_DOMAIN_TYPE_INVALID) + goto out; + + dsps->type = type; + + dsps->guest_evtchn.port = -1; + dsps->guest_evtchn_lockfd = -1; + dsps->guest_responded = 0; + dsps->dm_savefile = libxl__device_model_savefile(gc, dsps->domid); + + /* Secondary vm is not created, so we cannot get evtchn port */ + + rc = 0; + +out: + return rc; +} + +/* + * checkpoint callbacks are called in the following order: + * 1. resume + * 2. wait checkpoint + * 3. suspend + * 4. checkpoint + */ +static void libxl__colo_restore_domain_resume_callback(void *data); +static void libxl__colo_restore_domain_wait_checkpoint_callback(void *data); +static void libxl__colo_restore_domain_suspend_callback(void *data); +static void libxl__colo_restore_domain_checkpoint_callback(void *data); + +void libxl__colo_restore_setup(libxl__egc *egc, + libxl__colo_restore_state *crs) +{ + libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs); + libxl__colo_restore_checkpoint_state *crcs; + int rc = ERROR_FAIL; + + /* Convenience aliases */ + libxl__srm_restore_autogen_callbacks *const callbacks = + &dcs->srs.shs.callbacks.restore.a; + const int domid = crs->domid; + + STATE_AO_GC(crs->ao); + + GCNEW(crcs); + crs->crcs = crcs; + crcs->crs = crs; + + /* setup dsps */ + crcs->dsps.ao = ao; + crcs->dsps.domid = domid; + if (init_dsps(&crcs->dsps)) + goto out; + + callbacks->postcopy = libxl__colo_restore_domain_resume_callback; + callbacks->wait_checkpoint = libxl__colo_restore_domain_wait_checkpoint_callback; + callbacks->suspend = libxl__colo_restore_domain_suspend_callback; + callbacks->checkpoint = libxl__colo_restore_domain_checkpoint_callback; + + /* + * Secondary vm is running in colo mode, so we need to call + * libxl__xc_domain_restore_done() to create secondary vm. + * But we will exit in domain_create_cb(). So replace the + * callback here. + */ + crs->saved_cb = dcs->callback; + dcs->callback = libxl__colo_domain_create_cb; + crcs->state_file = GCSPRINTF(LIBXL_DEVICE_MODEL_RESTORE_FILE".%d", domid); + crcs->status = LIBXL_COLO_SETUPED; + + libxl__logdirty_init(&crcs->lds); + crcs->lds.ao = ao; + + crcs->sws.fd = crs->send_back_fd; + crcs->sws.ao = ao; + crcs->sws.back_channel = true; + + dcs->cds.concrete_data = crs; + + libxl__stream_write_start(egc, &crcs->sws); + + rc = 0; + +out: + crs->callback(egc, crs, rc); + return; +} + +static void libxl__colo_domain_create_cb(libxl__egc *egc, + libxl__domain_create_state *dcs, + int rc, uint32_t domid) +{ + libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs; + + crcs->callback(egc, crcs, rc); +} + +/* ================ colo: teardown restore environment ================ */ + +static void colo_restore_teardown_devices_done(libxl__egc *egc, + libxl__checkpoint_devices_state *cds, int rc); +static void do_failover(libxl__egc *egc, libxl__colo_restore_state *crs); +static void do_failover_done(libxl__egc *egc, + libxl__colo_restore_checkpoint_state* crcs, + int rc); +static void colo_disable_logdirty_done(libxl__egc *egc, + libxl__logdirty_switch *lds, + int rc); +static void libxl__colo_restore_teardown_done(libxl__egc *egc, + libxl__colo_restore_state *crs, + int rc); + +void libxl__colo_restore_teardown(libxl__egc *egc, void *dcs_void, + int ret, int retval, int errnoval) +{ + libxl__domain_create_state *dcs = dcs_void; + libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs; + int rc = 1; + + /* convenience aliases */ + libxl__colo_restore_state *const crs = &dcs->crs; + EGC_GC; + + if (ret == 0 && retval == 0) + rc = 0; + + LOG(INFO, "%s", rc ? "colo fails" : "failover"); + + libxl__stream_write_abort(egc, &crcs->sws, 1); + if (crs->saved_cb) { + /* crcs->status is LIBXL_COLO_SETUPED */ + dcs->srs.completion_callback = NULL; + } + libxl__xc_domain_restore_done(egc, dcs, ret, retval, errnoval); + + crcs->saved_rc = rc; + if (!crcs->teardown_devices) { + colo_restore_teardown_devices_done(egc, &dcs->cds, 0); + return; + } + + dcs->cds.callback = colo_restore_teardown_devices_done; + libxl__checkpoint_devices_teardown(egc, &dcs->cds); +} + +static void colo_restore_teardown_devices_done(libxl__egc *egc, + libxl__checkpoint_devices_state *cds, int rc) +{ + libxl__colo_restore_state *crs = cds->concrete_data; + libxl__colo_restore_checkpoint_state *crcs = crs->crcs; + libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs); + + EGC_GC; + + if (rc) + LOG(ERROR, "COLO: failed to teardown device for guest with domid %u," + " rc %d", cds->domid, rc); + + if (crcs->teardown_devices) + cleanup_device_subkind(cds); + + rc = crcs->saved_rc; + if (!rc) { + crcs->callback = do_failover_done; + do_failover(egc, crs); + return; + } + + libxl__colo_restore_teardown_done(egc, crs, rc); +} + +static void do_failover(libxl__egc *egc, libxl__colo_restore_state *crs) +{ + libxl__colo_restore_checkpoint_state *crcs = crs->crcs; + + /* Convenience aliases */ + const int status = crcs->status; + libxl__logdirty_switch *const lds = &crcs->lds; + + EGC_GC; + + switch(status) { + case LIBXL_COLO_SETUPED: + /* + * We will come here only when reading emulator xenstore data or + * emulator context fails, and libxl__xc_domain_restore_done() + * is not called. In this case, the migration is not finished, + * so we cannot do failover. + */ + LOG(ERROR, "migration fails"); + crcs->callback(egc, crcs, ERROR_FAIL); + return; + case LIBXL_COLO_SUSPENDED: + case LIBXL_COLO_RESUMED: + /* disable logdirty first */ + lds->callback = colo_disable_logdirty_done; + colo_disable_logdirty(crs, egc); + return; + default: + LOG(ERROR, "invalid status: %d", status); + crcs->callback(egc, crcs, ERROR_FAIL); + } +} + +static void do_failover_done(libxl__egc *egc, + libxl__colo_restore_checkpoint_state* crcs, + int rc) +{ + libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs); + + /* Convenience aliases */ + libxl__colo_restore_state *const crs = crcs->crs; + + EGC_GC; + + if (rc) + LOG(ERROR, "cannot do failover"); + + libxl__colo_restore_teardown_done(egc, crs, rc); +} + +static void colo_disable_logdirty_done(libxl__egc *egc, + libxl__logdirty_switch *lds, + int rc) +{ + libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(lds, *crcs, lds); + + EGC_GC; + + if (rc) + LOG(WARN, "cannot disable logdirty"); + + if (crcs->status == LIBXL_COLO_SUSPENDED) { + /* + * failover when reading state from master, so no need to + * call libxl__qmp_restore(). + */ + colo_resume_vm(egc, crcs, 0); + return; + } + + /* If we cannot disable logdirty, we still can do failover */ + crcs->callback(egc, crcs, 0); +} + +static void libxl__colo_restore_teardown_done(libxl__egc *egc, + libxl__colo_restore_state *crs, + int rc) +{ + libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs); + EGC_GC; + + /* convenience aliases */ + const int domid = crs->domid; + const libxl_ctx *const ctx = libxl__gc_owner(gc); + xc_interface *const xch = ctx->xch; + + if (!rc) + /* failover, no need to destroy the secondary vm */ + goto out; + + xc_domain_destroy(xch, domid); + +out: + if (crs->saved_cb) { + dcs->callback = crs->saved_cb; + crs->saved_cb = NULL; + } + + dcs->callback(egc, dcs, rc, crs->domid); +} + +static void colo_common_write_stream_done(libxl__egc *egc, + libxl__stream_write_state *stream, + int rc); +static void colo_common_read_stream_done(libxl__egc *egc, + libxl__stream_read_state *stream, + int rc); + +/* ======================== colo: checkpoint ======================= */ + +/* + * Do the following things when resuming secondary vm: + * 1. read emulator xenstore data + * 2. read emulator context + * 3. REC_TYPE_CHECKPOINT_END + */ +static void libxl__colo_restore_domain_checkpoint_callback(void *data) +{ + libxl__save_helper_state *shs = data; + libxl__stream_read_state *srs = CONTAINER_OF(shs, *srs, shs); + libxl__domain_create_state *dcs = CONTAINER_OF(srs, *dcs, srs); + libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs; + + crcs->callback = NULL; + dcs->srs.checkpoint_callback = colo_common_read_stream_done; + libxl__stream_read_start_checkpoint(shs->egc, &dcs->srs); +} + +/* ===================== colo: resume secondary vm ===================== */ + +/* + * Do the following things when resuming secondary vm the first time: + * 1. resume secondary vm + * 2. enable log dirty + * 3. setup checkpoint devices + * 4. write CHECKPOINT_SVM_READY + * 5. unpause secondary vm + * 6. write CHECKPOINT_SVM_RESUMED + * + * Do the following things when resuming secondary vm: + * 1. write CHECKPOINT_SVM_READY + * 2. resume secondary vm + * 3. write CHECKPOINT_SVM_RESUMED + */ +static void colo_send_svm_ready(libxl__egc *egc, + libxl__colo_restore_checkpoint_state *crcs); +static void colo_send_svm_ready_done(libxl__egc *egc, + libxl__colo_restore_checkpoint_state *crcs, + int rc); +static void colo_restore_preresume_cb(libxl__egc *egc, + libxl__checkpoint_devices_state *cds, + int rc); +static void colo_restore_resume_vm(libxl__egc *egc, + libxl__colo_restore_checkpoint_state *crcs); +static void colo_resume_vm_done(libxl__egc *egc, + libxl__colo_restore_checkpoint_state *crcs, + int rc); +static void colo_write_svm_resumed(libxl__egc *egc, + libxl__colo_restore_checkpoint_state *crcs); +static void colo_enable_logdirty_done(libxl__egc *egc, + libxl__logdirty_switch *lds, + int retval); +static void colo_reenable_logdirty(libxl__egc *egc, + libxl__logdirty_switch *lds, + int rc); +static void colo_reenable_logdirty_done(libxl__egc *egc, + libxl__logdirty_switch *lds, + int rc); +static void colo_setup_checkpoint_devices(libxl__egc *egc, + libxl__colo_restore_state *crs); +static void colo_restore_setup_cds_done(libxl__egc *egc, + libxl__checkpoint_devices_state *cds, + int rc); +static void colo_unpause_svm(libxl__egc *egc, + libxl__colo_restore_checkpoint_state *crcs); + +static void libxl__colo_restore_domain_resume_callback(void *data) +{ + libxl__save_helper_state *shs = data; + libxl__stream_read_state *srs = CONTAINER_OF(shs, *srs, shs); + libxl__domain_create_state *dcs = CONTAINER_OF(srs, *dcs, srs); + libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs; + + if (crcs->teardown_devices) + colo_send_svm_ready(shs->egc, crcs); + else + colo_restore_resume_vm(shs->egc, crcs); +} + +static void colo_send_svm_ready(libxl__egc *egc, + libxl__colo_restore_checkpoint_state *crcs) +{ + libxl_sr_checkpoint_state srcs = { .id = CHECKPOINT_SVM_READY }; + + crcs->callback = colo_send_svm_ready_done; + crcs->sws.checkpoint_callback = colo_common_write_stream_done; + libxl__stream_write_checkpoint_state(egc, &crcs->sws, &srcs); +} + +static void colo_send_svm_ready_done(libxl__egc *egc, + libxl__colo_restore_checkpoint_state *crcs, + int rc) +{ + libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs); + + /* Convenience aliases */ + libxl__checkpoint_devices_state *cds = &dcs->cds; + + if (!crcs->preresume) { + crcs->preresume = true; + colo_unpause_svm(egc, crcs); + return; + } + + cds->callback = colo_restore_preresume_cb; + libxl__checkpoint_devices_preresume(egc, cds); +} + +static void colo_restore_preresume_cb(libxl__egc *egc, + libxl__checkpoint_devices_state *cds, + int rc) +{ + libxl__colo_restore_state *crs = cds->concrete_data; + libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs); + libxl__colo_restore_checkpoint_state *crcs = crs->crcs; + + /* Convenience aliases */ + libxl__save_helper_state *const shs = &dcs->srs.shs; + + EGC_GC; + + if (rc) { + LOG(ERROR, "preresume fails"); + goto out; + } + + colo_restore_resume_vm(egc, crcs); + + return; + +out: + libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0); +} + +static void colo_restore_resume_vm(libxl__egc *egc, + libxl__colo_restore_checkpoint_state *crcs) +{ + + crcs->callback = colo_resume_vm_done; + colo_resume_vm(egc, crcs, 1); +} + +static void colo_resume_vm_done(libxl__egc *egc, + libxl__colo_restore_checkpoint_state *crcs, + int rc) +{ + libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs); + + /* Convenience aliases */ + libxl__colo_restore_state *const crs = crcs->crs; + libxl__logdirty_switch *const lds = &crcs->lds; + libxl__save_helper_state *const shs = &dcs->srs.shs; + + EGC_GC; + + if (rc) { + LOG(ERROR, "cannot resume secondary vm"); + goto out; + } + + crcs->status = LIBXL_COLO_RESUMED; + + /* avoid calling stream->completion_callback() more than once */ + if (crs->saved_cb) { + dcs->callback = crs->saved_cb; + crs->saved_cb = NULL; + + dcs->srs.completion_callback = NULL; + + lds->callback = colo_enable_logdirty_done; + colo_enable_logdirty(crs, egc); + return; + } + + colo_write_svm_resumed(egc, crcs); + return; + +out: + libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0); +} + +static void colo_write_svm_resumed(libxl__egc *egc, + libxl__colo_restore_checkpoint_state *crcs) +{ + libxl_sr_checkpoint_state srcs = { .id = CHECKPOINT_SVM_RESUMED }; + + crcs->callback = NULL; + crcs->sws.checkpoint_callback = colo_common_write_stream_done; + libxl__stream_write_checkpoint_state(egc, &crcs->sws, &srcs); +} + +static void colo_enable_logdirty_done(libxl__egc *egc, + libxl__logdirty_switch *lds, + int rc) +{ + libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(lds, *crcs, lds); + + /* Convenience aliases */ + libxl__colo_restore_state *const crs = crcs->crs; + + EGC_GC; + + if (rc) { + /* + * log-dirty already enabled? There's no test op, + * so attempt to disable then reenable it + */ + lds->callback = colo_reenable_logdirty; + colo_disable_logdirty(crs, egc); + return; + } + + colo_setup_checkpoint_devices(egc, crs); +} + +static void colo_reenable_logdirty(libxl__egc *egc, + libxl__logdirty_switch *lds, + int rc) +{ + libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(lds, *crcs, lds); + libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs); + + /* Convenience aliases */ + libxl__colo_restore_state *const crs = crcs->crs; + libxl__save_helper_state *const shs = &dcs->srs.shs; + + EGC_GC; + + if (rc) { + LOG(ERROR, "cannot enable logdirty"); + goto out; + } + + lds->callback = colo_reenable_logdirty_done; + colo_enable_logdirty(crs, egc); + + return; + +out: + libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0); +} + +static void colo_reenable_logdirty_done(libxl__egc *egc, + libxl__logdirty_switch *lds, + int rc) +{ + libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(lds, *crcs, lds); + libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs); + + /* Convenience aliases */ + libxl__save_helper_state *const shs = &dcs->srs.shs; + + EGC_GC; + + if (rc) { + LOG(ERROR, "cannot enable logdirty"); + goto out; + } + + colo_setup_checkpoint_devices(egc, crcs->crs); + + return; + +out: + libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0); +} + +/* + * We cannot setup checkpoint devices in libxl__colo_restore_setup(), + * because the guest is not ready. + */ +static void colo_setup_checkpoint_devices(libxl__egc *egc, + libxl__colo_restore_state *crs) +{ + libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs); + libxl__colo_restore_checkpoint_state *crcs = crs->crcs; + + /* Convenience aliases */ + libxl__checkpoint_devices_state *cds = &dcs->cds; + libxl__save_helper_state *const shs = &dcs->srs.shs; + + STATE_AO_GC(crs->ao); + + /* TODO: disk/nic support */ + cds->device_kind_flags = 0; + cds->callback = colo_restore_setup_cds_done; + cds->ao = ao; + cds->domid = crs->domid; + cds->ops = colo_restore_ops; + + if (init_device_subkind(cds)) + goto out; + + crcs->teardown_devices = 1; + + libxl__checkpoint_devices_setup(egc, cds); + return; + +out: + libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0); +} + +static void colo_restore_setup_cds_done(libxl__egc *egc, + libxl__checkpoint_devices_state *cds, + int rc) +{ + libxl__colo_restore_state *crs = cds->concrete_data; + libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs); + libxl__colo_restore_checkpoint_state *crcs = crs->crcs; + + /* Convenience aliases */ + libxl__save_helper_state *const shs = &dcs->srs.shs; + + EGC_GC; + + if (rc) { + LOG(ERROR, "COLO: failed to setup device for guest with domid %u", + cds->domid); + goto out; + } + + colo_send_svm_ready(egc, crcs); + + return; + +out: + libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0); +} + +static void colo_unpause_svm(libxl__egc *egc, + libxl__colo_restore_checkpoint_state *crcs) +{ + libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs); + int rc; + + /* Convenience aliases */ + const uint32_t domid = crcs->crs->domid; + libxl__save_helper_state *const shs = &dcs->srs.shs; + + EGC_GC; + + /* We have enabled secondary vm's logdirty, so we can unpause it now */ + rc = libxl_domain_unpause(CTX, domid); + if (rc) { + LOG(ERROR, "cannot unpause secondary vm"); + goto out; + } + + colo_write_svm_resumed(egc, crcs); + + return; + +out: + libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0); +} + +/* ===================== colo: wait new checkpoint ===================== */ + +static void colo_restore_commit_cb(libxl__egc *egc, + libxl__checkpoint_devices_state *cds, + int rc); +static void colo_stream_read_done(libxl__egc *egc, + libxl__colo_restore_checkpoint_state *crcs, + int real_size); + +static void libxl__colo_restore_domain_wait_checkpoint_callback(void *data) +{ + libxl__save_helper_state *shs = data; + libxl__stream_read_state *srs = CONTAINER_OF(shs, *srs, shs); + libxl__domain_create_state *dcs = CONTAINER_OF(srs, *dcs, srs); + + /* Convenience aliases */ + libxl__checkpoint_devices_state *cds = &dcs->cds; + + cds->callback = colo_restore_commit_cb; + libxl__checkpoint_devices_commit(shs->egc, cds); +} + +static void colo_restore_commit_cb(libxl__egc *egc, + libxl__checkpoint_devices_state *cds, + int rc) +{ + libxl__colo_restore_state *crs = cds->concrete_data; + libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs); + libxl__colo_restore_checkpoint_state *crcs = crs->crcs; + + EGC_GC; + + if (rc) { + LOG(ERROR, "commit fails"); + goto out; + } + + crcs->callback = colo_stream_read_done; + dcs->srs.checkpoint_callback = colo_common_read_stream_done; + libxl__stream_read_checkpoint_state(egc, &dcs->srs); + + return; + +out: + libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, 0); +} + +static void colo_stream_read_done(libxl__egc *egc, + libxl__colo_restore_checkpoint_state *crcs, + int id) +{ + libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs); + int ok = 0; + + EGC_GC; + + if (id != CHECKPOINT_NEW) { + LOG(ERROR, "invalid section: %d", id); + goto out; + } + + ok = 1; + +out: + libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, ok); +} + +/* ===================== colo: suspend secondary vm ===================== */ + +/* + * Do the following things when resuming secondary vm: + * 1. suspend secondary vm + * 2. send CHECKPOINT_SVM_SUSPENDED + */ +static void colo_suspend_vm_done(libxl__egc *egc, + libxl__domain_suspend_state *dsps, + int ok); +static void colo_restore_postsuspend_cb(libxl__egc *egc, + libxl__checkpoint_devices_state *cds, + int rc); + +static void libxl__colo_restore_domain_suspend_callback(void *data) +{ + libxl__save_helper_state *shs = data; + libxl__stream_read_state *srs = CONTAINER_OF(shs, *srs, shs); + libxl__domain_create_state *dcs = CONTAINER_OF(srs, *dcs, srs); + libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs; + + STATE_AO_GC(dcs->ao); + + /* Convenience aliases */ + libxl__domain_suspend_state *const dsps = &crcs->dsps; + + /* suspend secondary vm */ + dsps->callback_common_done = colo_suspend_vm_done; + + libxl__domain_suspend(shs->egc, dsps); +} + +static void colo_suspend_vm_done(libxl__egc *egc, + libxl__domain_suspend_state *dsps, + int rc) +{ + libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(dsps, *crcs, dsps); + libxl__colo_restore_state *crs = crcs->crs; + libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs); + + /* Convenience aliases */ + libxl__checkpoint_devices_state *cds = &dcs->cds; + + EGC_GC; + + if (rc) { + LOG(ERROR, "cannot suspend secondary vm"); + goto out; + } + + crcs->status = LIBXL_COLO_SUSPENDED; + + cds->callback = colo_restore_postsuspend_cb; + libxl__checkpoint_devices_postsuspend(egc, cds); + + return; + +out: + libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, !rc); +} + +static void colo_restore_postsuspend_cb(libxl__egc *egc, + libxl__checkpoint_devices_state *cds, + int rc) +{ + libxl__colo_restore_state *crs = cds->concrete_data; + libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs); + libxl__colo_restore_checkpoint_state *crcs = crs->crcs; + libxl_sr_checkpoint_state srcs = { .id = CHECKPOINT_SVM_SUSPENDED }; + + EGC_GC; + + if (rc) { + LOG(ERROR, "postsuspend fails"); + goto out; + } + + crcs->callback = NULL; + crcs->sws.checkpoint_callback = colo_common_write_stream_done; + libxl__stream_write_checkpoint_state(egc, &crcs->sws, &srcs); + + return; + +out: + libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, !rc); +} + +/* ===================== colo: common callback ===================== */ + +static void colo_common_write_stream_done(libxl__egc *egc, + libxl__stream_write_state *stream, + int rc) +{ + libxl__colo_restore_checkpoint_state *crcs = + CONTAINER_OF(stream, *crcs, sws); + libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs); + int ok; + + EGC_GC; + + if (rc < 0) { + /* TODO: it may be a internal error, but we don't know */ + LOG(ERROR, "sending data fails"); + ok = 2; + goto out; + } + + if (!crcs->callback) { + /* Everythins is OK */ + ok = 1; + goto out; + } + + crcs->callback(egc, crcs, 0); + + return; + +out: + libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, ok); +} + +static void colo_common_read_stream_done(libxl__egc *egc, + libxl__stream_read_state *stream, + int rc) +{ + libxl__domain_create_state *dcs = CONTAINER_OF(stream, *dcs, srs); + libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs; + int ok; + + EGC_GC; + + if (rc < 0) { + /* TODO: it may be a internal error, but we don't know */ + LOG(ERROR, "reading data fails"); + ok = 2; + goto out; + } + + if (!crcs->callback) { + /* Everythins is OK */ + ok = 1; + goto out; + } + + /* rc contains the id */ + crcs->callback(egc, crcs, rc); + + return; + +out: + libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, ok); +} diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c index 4d2b95c..c58dd7e 100644 --- a/tools/libxl/libxl_create.c +++ b/tools/libxl/libxl_create.c @@ -985,6 +985,23 @@ static void domcreate_console_available(libxl__egc *egc, dcs->aop_console_how.for_event)); } +static void libxl__colo_restore_setup_done(libxl__egc *egc, + libxl__colo_restore_state *crs, + int rc) +{ + libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs); + + EGC_GC; + + if (rc) { + LOG(ERROR, "colo restore setup fails: %d", rc); + domcreate_stream_done(egc, &dcs->srs, rc); + return; + } + + libxl__stream_read_start(egc, &dcs->srs); +} + static void domcreate_bootloader_done(libxl__egc *egc, libxl__bootloader_state *bl, int rc) @@ -998,6 +1015,8 @@ static void domcreate_bootloader_done(libxl__egc *egc, const int restore_fd = dcs->restore_fd; libxl__domain_build_state *const state = &dcs->build_state; const int checkpointed_stream = dcs->restore_params.checkpointed_stream; + libxl__colo_restore_state *const crs = &dcs->crs; + libxl_domain_build_info *const info = &d_config->b_info; if (rc) { domcreate_rebuild_done(egc, dcs, rc); @@ -1026,6 +1045,22 @@ static void domcreate_bootloader_done(libxl__egc *egc, /* Restore */ + /* COLO only supports HVM now because it does not work very + * well with pv drivers: + * 1. We need to resume vm in the slow path. In this case we + * need to disconnect/reconnect backend and frontend. It + * will take too much time and the performance is very slow. + * 2. PV disk cannot reuse block replication that is implemented + * in QEMU. + */ + if (info->type != LIBXL_DOMAIN_TYPE_HVM && + checkpointed_stream == LIBXL_CHECKPOINTED_STREAM_COLO) { + LOG(ERROR, "COLO only supports HVM, unable to restore domain %d", + domid); + rc = ERROR_FAIL; + goto out; + } + rc = libxl__build_pre(gc, domid, d_config, state); if (rc) goto out; @@ -1039,6 +1074,16 @@ static void domcreate_bootloader_done(libxl__egc *egc, if (restore_fd >= 0) { switch (checkpointed_stream) { + case LIBXL_CHECKPOINTED_STREAM_COLO: + /* colo restore setup */ + crs->ao = ao; + crs->domid = domid; + crs->send_back_fd = dcs->send_back_fd; + crs->recv_fd = restore_fd; + crs->hvm = (info->type == LIBXL_DOMAIN_TYPE_HVM); + crs->callback = libxl__colo_restore_setup_done; + libxl__colo_restore_setup(egc, crs); + break; case LIBXL_CHECKPOINTED_STREAM_REMUS: libxl__remus_restore_setup(egc, dcs); /* fall through */ diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index 1fafba8..83ac20a 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -87,6 +87,8 @@ #include "_libxl_types_internal.h" #include "_libxl_types_internal_json.h" +#include "libxl_colo.h" + #define LIBXL_INIT_TIMEOUT 10 #define LIBXL_DESTROY_TIMEOUT 10 #define LIBXL_HOTPLUG_TIMEOUT 40 @@ -3422,12 +3424,6 @@ _hidden int libxl__destroy_qdisk_backend(libxl__gc *gc, uint32_t domid); /*----- Domain creation -----*/ -typedef struct libxl__domain_create_state libxl__domain_create_state; - -typedef void libxl__domain_create_cb(libxl__egc *egc, - libxl__domain_create_state*, - int rc, uint32_t domid); - /* State for manipulating a libxl migration v2 stream */ typedef struct libxl__stream_read_state libxl__stream_read_state; @@ -3510,6 +3506,8 @@ struct libxl__domain_create_state { /* private to domain_create */ int guest_domid; libxl__domain_build_state build_state; + libxl__colo_restore_state crs; + libxl__checkpoint_devices_state cds; libxl__bootloader_state bl; libxl__stub_dm_spawn_state dmss; /* If we're not doing stubdom, we use only dmss.dm, diff --git a/tools/libxl/libxl_save_callout.c b/tools/libxl/libxl_save_callout.c index f15c235..2e6267d 100644 --- a/tools/libxl/libxl_save_callout.c +++ b/tools/libxl/libxl_save_callout.c @@ -68,7 +68,11 @@ void libxl__xc_domain_restore(libxl__egc *egc, libxl__domain_create_state *dcs, shs->ao = ao; shs->domid = domid; shs->recv_callback = libxl__srm_callout_received_restore; - shs->completion_callback = libxl__xc_domain_restore_done; + if (dcs->restore_params.checkpointed_stream == + LIBXL_CHECKPOINTED_STREAM_COLO) + shs->completion_callback = libxl__colo_restore_teardown; + else + shs->completion_callback = libxl__xc_domain_restore_done; shs->caller_state = dcs; shs->need_results = 1; diff --git a/tools/libxl/libxl_save_msgs_gen.pl b/tools/libxl/libxl_save_msgs_gen.pl index d6d2967..cbb6ca1 100755 --- a/tools/libxl/libxl_save_msgs_gen.pl +++ b/tools/libxl/libxl_save_msgs_gen.pl @@ -23,14 +23,15 @@ our @msgs = ( STRING doing_what), 'unsigned long', 'done', 'unsigned long', 'total'] ], - [ 3, 'scxA', "suspend", [] ], - [ 4, 'scxA', "postcopy", [] ], + [ 3, 'srcxA', "suspend", [] ], + [ 4, 'srcxA', "postcopy", [] ], [ 5, 'srcxA', "checkpoint", [] ], - [ 6, 'scxA', "switch_qemu_logdirty", [qw(int domid + [ 6, 'rcxA', "wait_checkpoint", [] ], + [ 7, 'scxA', "switch_qemu_logdirty", [qw(int domid unsigned enable)] ], - [ 7, 'r', "restore_results", ['unsigned long', 'store_mfn', + [ 8, 'r', "restore_results", ['unsigned long', 'store_mfn', 'unsigned long', 'console_mfn'] ], - [ 8, 'srW', "complete", [qw(int retval + [ 9, 'srW', "complete", [qw(int retval int errnoval)] ], ); diff --git a/tools/libxl/libxl_stream_read.c b/tools/libxl/libxl_stream_read.c index 302ae53..9659051 100644 --- a/tools/libxl/libxl_stream_read.c +++ b/tools/libxl/libxl_stream_read.c @@ -850,6 +850,18 @@ void libxl__xc_domain_restore_done(libxl__egc *egc, void *dcs_void, */ if (libxl__stream_read_inuse(stream)) { switch (checkpointed_stream) { + case LIBXL_CHECKPOINTED_STREAM_COLO: + if (stream->completion_callback) { + /* + * restore, just build the secondary vm, don't close + * the stream + */ + stream->completion_callback(egc, stream, 0); + } else { + /* failover, just close the stream */ + stream_complete(egc, stream, 0); + } + break; case LIBXL_CHECKPOINTED_STREAM_REMUS: /* * Failover from primary. Domain state is currently at a diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl index 59b183c..4717517 100644 --- a/tools/libxl/libxl_types.idl +++ b/tools/libxl/libxl_types.idl @@ -233,6 +233,7 @@ libxl_hdtype = Enumeration("hdtype", [ libxl_checkpointed_stream = Enumeration("checkpointed_stream", [ (0, "NONE"), (1, "REMUS"), + (2, "COLO"), ]) # -- 1.9.3 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |