[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v10 3/5] remus: introduce remus device
introduce remus device, an abstract layer of remus devices(nic, disk, etc).It provide the following APIs for libxl: >libxl__remus_device_setup setup remus devices, like attach qdisc, enable disk buffering, etc >libxl__remus_device_teardown teardown devices >libxl__remus_device_postsuspend >libxl__remus_device_preresume >libxl__remus_device_commit above three are for checkpoint. through remus device layer, the remus execution flow will be like this: xl remus -> remus device setup |-> remus checkpoint(postsuspend, commit, preresume) ... |-> remus device teardown,failover or abort the remus device layer provide an interface libxl__remus_device_ops which a remus device must implement.the whole remus structure: |remus| | |remus device| | |nic| |drbd disks| |qemu disks| ... a device(nic, drbd disks, qemu disks, etc) must implement libxl__remus_device_ops to support remus. Signed-off-by: Yang Hongyang <yanghy@xxxxxxxxxxxxxx> Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx> Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxx> --- tools/libxl/Makefile | 2 + tools/libxl/libxl.c | 34 ++++- tools/libxl/libxl_dom.c | 132 ++++++++++++++-- tools/libxl/libxl_internal.h | 113 ++++++++++++++ tools/libxl/libxl_remus_device.c | 303 +++++++++++++++++++++++++++++++++++++ tools/libxl/libxl_save_msgs_gen.pl | 2 +- tools/libxl/libxl_types.idl | 1 + 7 files changed, 572 insertions(+), 15 deletions(-) create mode 100644 tools/libxl/libxl_remus_device.c diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile index a572dca..7a722a8 100644 --- a/tools/libxl/Makefile +++ b/tools/libxl/Makefile @@ -56,6 +56,8 @@ else LIBXL_OBJS-y += libxl_nonetbuffer.o endif +LIBXL_OBJS-y += libxl_remus_device.o + LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c index d59ce0c..2e7a6ea 100644 --- a/tools/libxl/libxl.c +++ b/tools/libxl/libxl.c @@ -713,6 +713,31 @@ out: static void remus_failover_cb(libxl__egc *egc, libxl__domain_suspend_state *dss, int rc); +static void libxl__remus_setup_failed(libxl__egc *egc, + libxl__remus_state *rs, int rc) +{ + STATE_AO_GC(rs->ao); + libxl__ao_complete(egc,ao,rc); +} + +static void libxl__remus_setup_done(libxl__egc *egc, + libxl__remus_state *rs, int rc) +{ + libxl__domain_suspend_state *dss = CONTAINER_OF(rs, *dss, rs); + STATE_AO_GC(rs->ao); + + if (!rc) { + libxl__domain_suspend(egc, dss); + return; + } + + LOG(ERROR, "Remus: failed to setup device for guest with domid %u", + dss->domid); + rs->saved_rc = rc; + rs->callback = libxl__remus_setup_failed; + libxl__remus_device_teardown(egc, rs); +} + /* TODO: Explicit Checkpoint acknowledgements via recv_fd. */ int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info, uint32_t domid, int send_fd, int recv_fd, @@ -741,10 +766,15 @@ int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info, assert(info); - /* TBD: Remus setup - i.e. attach qdisc, enable disk buffering, etc */ + /* Convenience aliases */ + libxl__remus_state *const rs = &dss->rs; + rs->ao = ao; + rs->domid = domid; + rs->saved_rc = 0; + rs->callback = libxl__remus_setup_done; /* Point of no return */ - libxl__domain_suspend(egc, dss); + libxl__remus_device_setup(egc, rs); return AO_INPROGRESS; out: diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c index 661999c..70765a3 100644 --- a/tools/libxl/libxl_dom.c +++ b/tools/libxl/libxl_dom.c @@ -1444,31 +1444,65 @@ static void libxl__remus_domain_suspend_callback(void *data) domain_suspend_callback_common(egc, dss); } +static void remus_device_postsuspend_cb(libxl__egc *egc, + libxl__remus_state *rs, int rc) +{ + int ok = 0; + libxl__domain_suspend_state *dss = CONTAINER_OF(rs, *dss, rs); + + if (!rc) + ok = 1; + libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, ok); +} + static void remus_domain_suspend_callback_common_done(libxl__egc *egc, libxl__domain_suspend_state *dss, int ok) { - /* REMUS TODO: Issue disk and network checkpoint reqs. */ + if (!ok) + goto out; + + libxl__remus_state *const rs = &dss->rs; + rs->callback = remus_device_postsuspend_cb; + libxl__remus_device_postsuspend(egc, rs); + return; + +out: + libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, ok); +} + +static void remus_device_preresume_cb(libxl__egc *egc, + libxl__remus_state *rs, int rc) +{ + int ok = 0; + libxl__domain_suspend_state *dss = CONTAINER_OF(rs, *dss, rs); + STATE_AO_GC(dss->ao); + + if (!rc) { + /* Resumes the domain and the device model */ + if (!libxl__domain_resume(gc, dss->domid, /* Fast Suspend */1)) + ok = 1; + } libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, ok); } -static int libxl__remus_domain_resume_callback(void *data) +static void libxl__remus_domain_resume_callback(void *data) { libxl__save_helper_state *shs = data; libxl__domain_suspend_state *dss = CONTAINER_OF(shs, *dss, shs); + libxl__egc *egc = shs->egc; STATE_AO_GC(dss->ao); - /* Resumes the domain and the device model */ - if (libxl__domain_resume(gc, dss->domid, /* Fast Suspend */1)) - return 0; - - /* REMUS TODO: Deal with disk. Start a new network output buffer */ - return 1; + libxl__remus_state *const rs = &dss->rs; + rs->callback = remus_device_preresume_cb; + libxl__remus_device_preresume(egc, rs); } /*----- remus asynchronous checkpoint callback -----*/ static void remus_checkpoint_dm_saved(libxl__egc *egc, libxl__domain_suspend_state *dss, int rc); +static void remus_next_checkpoint(libxl__egc *egc, libxl__ev_time *ev, + const struct timeval *requested_abs); static void libxl__remus_domain_checkpoint_callback(void *data) { @@ -1485,13 +1519,67 @@ static void libxl__remus_domain_checkpoint_callback(void *data) } } +static void remus_device_commit_cb(libxl__egc *egc, + libxl__remus_state *rs, int rc) +{ + libxl__domain_suspend_state *dss = CONTAINER_OF(rs, *dss, rs); + + STATE_AO_GC(dss->ao); + + if (rc) { + LOG(ERROR, "Failed to do device commit op." + " Terminating Remus.."); + goto out; + } else { + /* Set checkpoint interval timeout */ + rc = libxl__ev_time_register_rel(gc, &rs->timeout, + remus_next_checkpoint, + dss->interval); + if (rc) { + LOG(ERROR, "unable to register timeout for next epoch." + " Terminating Remus.."); + goto out; + } + } + return; + +out: + libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, 0); +} + static void remus_checkpoint_dm_saved(libxl__egc *egc, libxl__domain_suspend_state *dss, int rc) { - /* REMUS TODO: Wait for disk and memory ack, release network buffer */ - /* REMUS TODO: make this asynchronous */ - assert(!rc); /* REMUS TODO handle this error properly */ - usleep(dss->interval * 1000); + /* Convenience aliases */ + libxl__remus_state *const rs = &dss->rs; + + STATE_AO_GC(dss->ao); + + if (rc) { + LOG(ERROR, "Failed to save device model. Terminating Remus.."); + goto out; + } + + rs->callback = remus_device_commit_cb; + libxl__remus_device_commit(egc, rs); + + return; + +out: + libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, 0); +} + +static void remus_next_checkpoint(libxl__egc *egc, libxl__ev_time *ev, + const struct timeval *requested_abs) +{ + libxl__remus_state *rs = CONTAINER_OF(ev, *rs, timeout); + + /* Convenience aliases */ + libxl__domain_suspend_state *const dss = CONTAINER_OF(rs, *dss, rs); + + STATE_AO_GC(dss->ao); + + libxl__ev_time_deregister(gc, &rs->timeout); libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, 1); } @@ -1716,6 +1804,13 @@ static void save_device_model_datacopier_done(libxl__egc *egc, dss->save_dm_callback(egc, dss, our_rc); } +static void libxl__remus_teardown_done(libxl__egc *egc, + libxl__remus_state *rs, int rc) +{ + libxl__domain_suspend_state *dss = CONTAINER_OF(rs, *dss, rs); + dss->callback(egc, dss, rc); +} + static void domain_suspend_done(libxl__egc *egc, libxl__domain_suspend_state *dss, int rc) { @@ -1730,6 +1825,19 @@ static void domain_suspend_done(libxl__egc *egc, xc_suspend_evtchn_release(CTX->xch, CTX->xce, domid, dss->guest_evtchn.port, &dss->guest_evtchn_lockfd); + if (dss->remus) { + /* + * With Remus, if we reach this point, it means either + * backup died or some network error occurred preventing us + * from sending checkpoints. Teardown the network buffers and + * release netlink resources. This is an async op. + */ + dss->rs.saved_rc = rc; + dss->rs.callback = libxl__remus_teardown_done; + libxl__remus_device_teardown(egc, &dss->rs); + return; + } + dss->callback(egc, dss, rc); } diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index de0a807..2fdbafa 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -2442,6 +2442,118 @@ typedef struct libxl__save_helper_state { * marshalling and xc callback functions */ } libxl__save_helper_state; +/*----- remus device related state structure -----*/ + +typedef enum libxl__remus_device_kind { + LIBXL__REMUS_DEVICE_NIC, + LIBXL__REMUS_DEVICE_DISK, +} libxl__remus_device_kind; + +typedef struct libxl__remus_state libxl__remus_state; +typedef struct libxl__remus_device libxl__remus_device; +typedef struct libxl__remus_device_state libxl__remus_device_state; +typedef struct libxl__remus_device_ops libxl__remus_device_ops; + +struct libxl__remus_device_ops { + /* + * init device ops private data, etc. must implenment + */ + int (*init)(libxl__remus_device_ops *self, + libxl__remus_state *rs); + /* + * free device ops private data, etc. must implenment + */ + void (*destroy)(libxl__remus_device_ops *self); + /* device ops's private data */ + void *data; + + /* + * checkpoint callbacks, async ops. may not implemented + */ + void (*postsuspend)(libxl__remus_device *dev); + void (*preresume)(libxl__remus_device *dev); + void (*commit)(libxl__remus_device *dev); + + /* + * check whether device ops match the device, async op. must implement + */ + void (*match)(libxl__remus_device_ops *self, + libxl__remus_device *dev); + /* + * setup the remus device, async op. must implement + */ + void (*setup)(libxl__remus_device *dev); + + /* + * teardown the remus device, async op. must implement + */ + void (*teardown)(libxl__remus_device *dev); +}; + +struct libxl__remus_device_state { + libxl__ao *ao; + libxl__egc *egc; + + /* devices that have been setuped */ + libxl__remus_device **dev; + + int num_nics; + int num_disks; + + /* for counting devices that have been handled */ + int num_devices; + /* for counting devices that matched and setuped */ + int num_setuped; +}; + +typedef void libxl__remus_device_callback(libxl__egc *, + libxl__remus_device *, + int rc); + +struct libxl__remus_device { + int devid; + /* libxl__device_* which this remus device related to */ + const void *backend_dev; + libxl__remus_device_kind kind; + int ops_index; + libxl__remus_device_ops *ops; + libxl__remus_device_callback *callback; + + /* *kind* of device's private data */ + void *data; + libxl__remus_device_state *rds; + /* for calling scripts */ + libxl__async_exec_state aes; + /* for async func calls */ + libxl__ev_child child; +}; + +typedef void libxl__remus_callback(libxl__egc *, + libxl__remus_state *, int rc); + +struct libxl__remus_state { + libxl__ao *ao; + uint32_t domid; + libxl__remus_callback *callback; + + /* private */ + int saved_rc; + /* context containing device related stuff */ + libxl__remus_device_state dev_state; + + libxl__ev_time timeout; /* used for checkpoint */ +}; + +_hidden void libxl__remus_device_setup(libxl__egc *egc, + libxl__remus_state *rs); +_hidden void libxl__remus_device_teardown(libxl__egc *egc, + libxl__remus_state *rs); +_hidden void libxl__remus_device_postsuspend(libxl__egc *egc, + libxl__remus_state *rs); +_hidden void libxl__remus_device_preresume(libxl__egc *egc, + libxl__remus_state *rs); +_hidden void libxl__remus_device_commit(libxl__egc *egc, + libxl__remus_state *rs); _hidden int libxl__netbuffer_enabled(libxl__gc *gc); /*----- Domain suspend (save) state structure -----*/ @@ -2472,6 +2584,7 @@ struct libxl__domain_suspend_state { int live; int debug; const libxl_domain_remus_info *remus; + libxl__remus_state rs; /* private */ libxl__ev_evtchn guest_evtchn; int guest_evtchn_lockfd; diff --git a/tools/libxl/libxl_remus_device.c b/tools/libxl/libxl_remus_device.c new file mode 100644 index 0000000..62c0614 --- /dev/null +++ b/tools/libxl/libxl_remus_device.c @@ -0,0 +1,303 @@ +/* + * Copyright (C) 2014 + * Author: Lai Jiangshan <laijs@xxxxxxxxxxxxxx> + * Yang Hongyang <yanghy@xxxxxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#include "libxl_osdeps.h" /* must come before any other headers */ + +#include "libxl_internal.h" + +static libxl__remus_device_ops *dev_ops[] = { +}; + +static void device_common_cb(libxl__egc *egc, + libxl__remus_device *dev, + int rc) +{ + /* Convenience aliases */ + libxl__remus_device_state *const rds = dev->rds; + libxl__remus_state *const rs = CONTAINER_OF(rds, *rs, dev_state); + + STATE_AO_GC(rs->ao); + + rds->num_devices++; + + if (rc) + rs->saved_rc = ERROR_FAIL; + + if (rds->num_devices == rds->num_setuped) + rs->callback(egc, rs, rs->saved_rc); +} + +void libxl__remus_device_postsuspend(libxl__egc *egc, libxl__remus_state *rs) +{ + int i; + libxl__remus_device *dev; + STATE_AO_GC(rs->ao); + + /* Convenience aliases */ + libxl__remus_device_state *const rds = &rs->dev_state; + + rds->num_devices = 0; + for (i = 0; i < rds->num_setuped; i++) { + dev = rds->dev[i]; + dev->callback = device_common_cb; + if (dev->ops->postsuspend) { + dev->ops->postsuspend(dev); + } else { + rds->num_devices++; + } + } + + if (rds->num_devices == rds->num_setuped) + rs->callback(egc, rs, rs->saved_rc); +} + +void libxl__remus_device_preresume(libxl__egc *egc, libxl__remus_state *rs) +{ + int i; + libxl__remus_device *dev; + STATE_AO_GC(rs->ao); + + /* Convenience aliases */ + libxl__remus_device_state *const rds = &rs->dev_state; + + rds->num_devices = 0; + for (i = 0; i < rds->num_setuped; i++) { + dev = rds->dev[i]; + dev->callback = device_common_cb; + if (dev->ops->preresume) { + dev->ops->preresume(dev); + } else { + rds->num_devices++; + } + } + + if (rds->num_devices == rds->num_setuped) + rs->callback(egc, rs, rs->saved_rc); +} + +void libxl__remus_device_commit(libxl__egc *egc, libxl__remus_state *rs) +{ + int i; + libxl__remus_device *dev; + STATE_AO_GC(rs->ao); + + /* + * REMUS TODO: Wait for disk and explicit memory ack (through restore + * callback from remote) before releasing network buffer. + */ + /* Convenience aliases */ + libxl__remus_device_state *const rds = &rs->dev_state; + + rds->num_devices = 0; + for (i = 0; i < rds->num_setuped; i++) { + dev = rds->dev[i]; + dev->callback = device_common_cb; + if (dev->ops->commit) { + dev->ops->commit(dev); + } else { + rds->num_devices++; + } + } + + if (rds->num_devices == rds->num_setuped) + rs->callback(egc, rs, rs->saved_rc); +} + +static void device_setup_cb(libxl__egc *egc, + libxl__remus_device *dev, + int rc) +{ + /* Convenience aliases */ + libxl__remus_device_state *const rds = dev->rds; + libxl__remus_state *const rs = CONTAINER_OF(rds, *rs, dev_state); + + STATE_AO_GC(rs->ao); + + rds->num_devices++; + if (!rc) { + /* remus device has been setuped */ + rds->dev[rds->num_setuped++] = dev; + } else { + /* setup failed */ + rs->saved_rc = ERROR_FAIL; + } + + if (rds->num_devices == (rds->num_nics + rds->num_disks)) + rs->callback(egc, rs, rs->saved_rc); +} + +static void device_match_cb(libxl__egc *egc, + libxl__remus_device *dev, + int rc) +{ + libxl__remus_device_state *const rds = dev->rds; + libxl__remus_state *rs = CONTAINER_OF(rds, *rs, dev_state); + + STATE_AO_GC(rs->ao); + + if (rc) { + if (++dev->ops_index >= ARRAY_SIZE(dev_ops) || + rc != ERROR_NOT_MATCH) { + /* the device can not be matched */ + rds->num_devices++; + rs->saved_rc = ERROR_FAIL; + goto out; + } + /* the ops does not match, try next ops */ + dev->ops = dev_ops[dev->ops_index]; + dev->ops->match(dev->ops, dev); + } else { + /* the ops matched, setup the device */ + dev->callback = device_setup_cb; + dev->ops->setup(dev); + } + +out: + if (rds->num_devices == (rds->num_nics + rds->num_disks)) + rs->callback(egc, rs, rs->saved_rc); +} + +static void device_teardown_cb(libxl__egc *egc, + libxl__remus_device *dev, + int rc) +{ + int i; + libxl__remus_device_ops *ops; + libxl__remus_device_state *const rds = dev->rds; + libxl__remus_state *rs = CONTAINER_OF(rds, *rs, dev_state); + + STATE_AO_GC(rs->ao); + + /* ignore teardown errors to teardown as many devs as possible*/ + rds->num_setuped--; + + if (rds->num_setuped == 0) { + /* clean device ops */ + for (i = 0; i < ARRAY_SIZE(dev_ops); i++) { + ops = dev_ops[i]; + ops->destroy(ops); + } + rs->callback(egc, rs, rs->saved_rc); + } +} + +static __attribute__((unused)) void libxl__remus_device_init(libxl__egc *egc, + libxl__remus_device_state *rds, + libxl__remus_device_kind kind, + void *libxl_dev) +{ + libxl__remus_device *dev = NULL; + libxl_device_nic *nic = NULL; + libxl_device_disk *disk = NULL; + + STATE_AO_GC(rds->ao); + GCNEW(dev); + dev->ops_index = 0; /* we will match the ops later */ + dev->backend_dev = libxl_dev; + dev->kind = kind; + dev->rds = rds; + + switch (kind) { + case LIBXL__REMUS_DEVICE_NIC: + nic = libxl_dev; + dev->devid = nic->devid; + break; + case LIBXL__REMUS_DEVICE_DISK: + disk = libxl_dev; + /* there are no dev id for disk devices */ + dev->devid = -1; + break; + default: + return; + } + + libxl__async_exec_init(&dev->aes); + libxl__ev_child_init(&dev->child); + + /* match the ops begin */ + dev->callback = device_match_cb; + dev->ops = dev_ops[dev->ops_index]; + dev->ops->match(dev->ops, dev); +} + +void libxl__remus_device_setup(libxl__egc *egc, libxl__remus_state *rs) +{ + int i; + libxl__remus_device_ops *ops; + + /* Convenience aliases */ + libxl__remus_device_state *const rds = &rs->dev_state; + + STATE_AO_GC(rs->ao); + + if (ARRAY_SIZE(dev_ops) == 0) + goto out; + + for (i = 0; i < ARRAY_SIZE(dev_ops); i++) { + ops = dev_ops[i]; + if (ops->init(ops, rs)) { + rs->saved_rc = ERROR_FAIL; + goto out; + } + } + + rds->ao = rs->ao; + rds->egc = egc; + rds->num_devices = 0; + rds->num_nics = 0; + rds->num_disks = 0; + + /* TBD: Remus setup - i.e. attach qdisc, enable disk buffering, etc */ + + GCNEW_ARRAY(rds->dev, rds->num_nics + rds->num_disks); + + /* TBD: CALL libxl__remus_device_init to init remus devices */ + + if (rds->num_nics == 0 && rds->num_disks == 0) + goto out; + + return; + +out: + rs->callback(egc, rs, rs->saved_rc); + return; +} + +void libxl__remus_device_teardown(libxl__egc *egc, libxl__remus_state *rs) +{ + int i; + libxl__remus_device *dev; + + STATE_AO_GC(rs->ao); + + /* Convenience aliases */ + libxl__remus_device_state *const rds = &rs->dev_state; + + if (rds->num_setuped == 0) + goto out; + + for (i = 0; i < rds->num_setuped; i++) { + dev = rds->dev[i]; + dev->callback = device_teardown_cb; + dev->ops->teardown(dev); + } + + return; + +out: + rs->callback(egc, rs, rs->saved_rc); + return; +} diff --git a/tools/libxl/libxl_save_msgs_gen.pl b/tools/libxl/libxl_save_msgs_gen.pl index 745e2ac..36bae04 100755 --- a/tools/libxl/libxl_save_msgs_gen.pl +++ b/tools/libxl/libxl_save_msgs_gen.pl @@ -24,7 +24,7 @@ our @msgs = ( 'unsigned long', 'done', 'unsigned long', 'total'] ], [ 3, 'scxA', "suspend", [] ], - [ 4, 'scxW', "postcopy", [] ], + [ 4, 'scxA', "postcopy", [] ], [ 5, 'scxA', "checkpoint", [] ], [ 6, 'scxA', "switch_qemu_logdirty", [qw(int domid unsigned enable)] ], diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl index 8944686..c757f17 100644 --- a/tools/libxl/libxl_types.idl +++ b/tools/libxl/libxl_types.idl @@ -43,6 +43,7 @@ libxl_error = Enumeration("error", [ (-12, "OSEVENT_REG_FAIL"), (-13, "BUFFERFULL"), (-14, "UNKNOWN_CHILD"), + (-15, "NOT_MATCH"), ], value_namespace = "") libxl_domain_type = Enumeration("domain_type", [ -- 1.9.1 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |