[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] libxl: Remus - suspend/postflush/commit callbacks
# HG changeset patch # User Shriram Rajagopalan <rshriram@xxxxxxxxx> # Date 1337335244 -3600 # Node ID 2420da9c3793bb96381a5f1716df6688418578dc # Parent 35ca1acdae34bf30c506d4679feee68d87cc2f25 libxl: Remus - suspend/postflush/commit callbacks * Add libxl callback functions for Remus checkpoint suspend, postflush (aka resume) and checkpoint commit callbacks. * suspend callback is a stub that just bounces off libxl__domain_suspend_common_callback - which suspends the domain and saves the devices model state to a file. * resume callback currently just resumes the domain (and the device model). * commit callback just writes out the saved device model state to the network and sleeps for the checkpoint interval. * Introduce a new public API, libxl_domain_remus_start (currently a stub) that sets up the network and disk buffer and initiates continuous checkpointing. * Future patches will augment these callbacks/functions with more functionalities like issuing network buffer plug/unplug commands, disk checkpoint commands, etc. Signed-off-by: Shriram Rajagopalan <rshriram@xxxxxxxxx> Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx> Committed-by: Ian Campbell <ian.campbell@xxxxxxxxxx> --- diff -r 35ca1acdae34 -r 2420da9c3793 tools/libxc/xenguest.h --- a/tools/libxc/xenguest.h Fri May 18 11:00:43 2012 +0100 +++ b/tools/libxc/xenguest.h Fri May 18 11:00:44 2012 +0100 @@ -33,10 +33,29 @@ /* callbacks provided by xc_domain_save */ struct save_callbacks { + /* Called after expiration of checkpoint interval, + * to suspend the guest. + */ int (*suspend)(void* data); - /* callback to rendezvous with external checkpoint functions */ + + /* Called after the guest's dirty pages have been + * copied into an output buffer. + * Callback function resumes the guest & the device model, + * returns to xc_domain_save. + * xc_domain_save then flushes the output buffer, while the + * guest continues to run. + */ int (*postcopy)(void* data); - /* returns: + + /* Called after the memory checkpoint has been flushed + * out into the network. Typical actions performed in this + * callback include: + * (a) send the saved device model state (for HVM guests), + * (b) wait for checkpoint ack + * (c) release the network output buffer pertaining to the acked checkpoint. + * (c) sleep for the checkpoint interval. + * + * returns: * 0: terminate checkpointing gracefully * 1: take another checkpoint */ int (*checkpoint)(void* data); diff -r 35ca1acdae34 -r 2420da9c3793 tools/libxl/libxl.c --- a/tools/libxl/libxl.c Fri May 18 11:00:43 2012 +0100 +++ b/tools/libxl/libxl.c Fri May 18 11:00:44 2012 +0100 @@ -619,6 +619,41 @@ libxl_vminfo * libxl_list_vm(libxl_ctx * return ptr; } +/* TODO: Explicit Checkpoint acknowledgements via recv_fd. */ +int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info, + uint32_t domid, int send_fd, int recv_fd) +{ + GC_INIT(ctx); + libxl_domain_type type = libxl__domain_type(gc, domid); + int rc = 0; + + if (info == NULL) { + LIBXL__LOG(ctx, LIBXL__LOG_ERROR, + "No remus_info structure supplied for domain %d", domid); + rc = ERROR_INVAL; + goto remus_fail; + } + + /* TBD: Remus setup - i.e. attach qdisc, enable disk buffering, etc */ + + /* Point of no return */ + rc = libxl__domain_suspend_common(gc, domid, send_fd, type, /* live */ 1, + /* debug */ 0, info); + + /* + * With Remus, if we reach this point, it means either + * backup died or some network error occurred preventing us + * from sending checkpoints. + */ + + /* TBD: Remus cleanup - i.e. detach qdisc, release other + * resources. + */ + remus_fail: + GC_FREE; + return rc; +} + int libxl_domain_suspend(libxl_ctx *ctx, libxl_domain_suspend_info *info, uint32_t domid, int fd) { @@ -628,7 +663,9 @@ int libxl_domain_suspend(libxl_ctx *ctx, int debug = info != NULL && info->flags & XL_SUSPEND_DEBUG; int rc = 0; - rc = libxl__domain_suspend_common(gc, domid, fd, type, live, debug); + rc = libxl__domain_suspend_common(gc, domid, fd, type, live, debug, + /* No Remus */ NULL); + if (!rc && type == LIBXL_DOMAIN_TYPE_HVM) rc = libxl__domain_save_device_model(gc, domid, fd); GC_FREE; diff -r 35ca1acdae34 -r 2420da9c3793 tools/libxl/libxl.h --- a/tools/libxl/libxl.h Fri May 18 11:00:43 2012 +0100 +++ b/tools/libxl/libxl.h Fri May 18 11:00:44 2012 +0100 @@ -525,6 +525,8 @@ int libxl_domain_create_restore(libxl_ct void libxl_domain_config_init(libxl_domain_config *d_config); void libxl_domain_config_dispose(libxl_domain_config *d_config); +int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info, + uint32_t domid, int send_fd, int recv_fd); int libxl_domain_suspend(libxl_ctx *ctx, libxl_domain_suspend_info *info, uint32_t domid, int fd); diff -r 35ca1acdae34 -r 2420da9c3793 tools/libxl/libxl_dom.c --- a/tools/libxl/libxl_dom.c Fri May 18 11:00:43 2012 +0100 +++ b/tools/libxl/libxl_dom.c Fri May 18 11:00:44 2012 +0100 @@ -566,6 +566,8 @@ struct suspendinfo { int hvm; unsigned int flags; int guest_responded; + int save_fd; /* Migration stream fd (for Remus) */ + int interval; /* checkpoint interval (for Remus) */ }; static int libxl__domain_suspend_common_switch_qemu_logdirty(int domid, unsigned int enable, void *data) @@ -848,9 +850,43 @@ static int libxl__toolstack_save(uint32_ return 0; } +static int libxl__remus_domain_suspend_callback(void *data) +{ + /* TODO: Issue disk and network checkpoint reqs. */ + return libxl__domain_suspend_common_callback(data); +} + +static int libxl__remus_domain_resume_callback(void *data) +{ + struct suspendinfo *si = data; + libxl_ctx *ctx = libxl__gc_owner(si->gc); + + /* Resumes the domain and the device model */ + if (libxl_domain_resume(ctx, si->domid, /* Fast Suspend */1)) + return 0; + + /* TODO: Deal with disk. Start a new network output buffer */ + return 1; +} + +static int libxl__remus_domain_checkpoint_callback(void *data) +{ + struct suspendinfo *si = data; + + /* This would go into tailbuf. */ + if (si->hvm && + libxl__domain_save_device_model(si->gc, si->domid, si->save_fd)) + return 0; + + /* TODO: Wait for disk and memory ack, release network buffer */ + usleep(si->interval * 1000); + return 1; +} + int libxl__domain_suspend_common(libxl__gc *gc, uint32_t domid, int fd, libxl_domain_type type, - int live, int debug) + int live, int debug, + const libxl_domain_remus_info *r_info) { libxl_ctx *ctx = libxl__gc_owner(gc); int flags; @@ -881,10 +917,20 @@ int libxl__domain_suspend_common(libxl__ return ERROR_INVAL; } + memset(&si, 0, sizeof(si)); flags = (live) ? XCFLAGS_LIVE : 0 | (debug) ? XCFLAGS_DEBUG : 0 | (hvm) ? XCFLAGS_HVM : 0; + if (r_info != NULL) { + si.interval = r_info->interval; + if (r_info->compression) + flags |= XCFLAGS_CHECKPOINT_COMPRESS; + si.save_fd = fd; + } + else + si.save_fd = -1; + si.domid = domid; si.flags = flags; si.hvm = hvm; @@ -908,7 +954,13 @@ int libxl__domain_suspend_common(libxl__ } memset(&callbacks, 0, sizeof(callbacks)); - callbacks.suspend = libxl__domain_suspend_common_callback; + if (r_info != NULL) { + callbacks.suspend = libxl__remus_domain_suspend_callback; + callbacks.postcopy = libxl__remus_domain_resume_callback; + callbacks.checkpoint = libxl__remus_domain_checkpoint_callback; + } else + callbacks.suspend = libxl__domain_suspend_common_callback; + callbacks.switch_qemu_logdirty = libxl__domain_suspend_common_switch_qemu_logdirty; callbacks.toolstack_save = libxl__toolstack_save; callbacks.data = &si; diff -r 35ca1acdae34 -r 2420da9c3793 tools/libxl/libxl_internal.h --- a/tools/libxl/libxl_internal.h Fri May 18 11:00:43 2012 +0100 +++ b/tools/libxl/libxl_internal.h Fri May 18 11:00:44 2012 +0100 @@ -757,7 +757,8 @@ _hidden int libxl__domain_restore_common int fd); _hidden int libxl__domain_suspend_common(libxl__gc *gc, uint32_t domid, int fd, libxl_domain_type type, - int live, int debug); + int live, int debug, + const libxl_domain_remus_info *r_info); _hidden const char *libxl__device_model_savefile(libxl__gc *gc, uint32_t domid); _hidden int libxl__domain_suspend_device_model(libxl__gc *gc, uint32_t domid); _hidden int libxl__domain_resume_device_model(libxl__gc *gc, uint32_t domid); diff -r 35ca1acdae34 -r 2420da9c3793 tools/libxl/libxl_types.idl --- a/tools/libxl/libxl_types.idl Fri May 18 11:00:43 2012 +0100 +++ b/tools/libxl/libxl_types.idl Fri May 18 11:00:44 2012 +0100 @@ -454,6 +454,12 @@ libxl_sched_sedf_domain = Struct("sched_ ("weight", integer), ]) +libxl_domain_remus_info = Struct("domain_remus_info",[ + ("interval", integer), + ("blackhole", bool), + ("compression", bool), + ]) + libxl_event_type = Enumeration("event_type", [ (1, "DOMAIN_SHUTDOWN"), (2, "DOMAIN_DEATH"), _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |