[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v10 16/31] secondary vm suspend/resume/checkpoint code



On Mon, Feb 22, 2016 at 10:52:20AM +0800, Wen Congyang wrote:
> Secondary vm is running in colo mode. So we will do
> the following things again and again:
> 1. Resume secondary vm
>    a. Send CHECKPOINT_SVM_READY to master.
>    b. If it is not the first resume, call 
> libxl__checkpoint_devices_preresume().
>    c. If it is the first resume(resume right after live migration),
>       - call libxl__xc_domain_restore_done() to build the secondary vm.
>       - enable secondary vm's logdirty.
>       - call libxl__domain_resume() to resume secondary vm.
>       - call libxl__checkpoint_devices_setup() to setup checkpoint devices.
>    d. Send CHECKPOINT_SVM_RESUMED to master.
> 2. Wait a new checkpoint
>    a. Call libxl__checkpoint_devices_commit().
>    b. Read CHECKPOINT_NEW from master.
> 3. Suspend secondary vm
>    a. Suspend secondary vm.
>    b. Call libxl__checkpoint_devices_postsuspend().
>    c. Send CHECKPOINT_SVM_SUSPENDED to master.
> 
> Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx>
> Signed-off-by: Yang Hongyang <hongyang.yang@xxxxxxxxxxxx>
> ---
>  tools/libxc/xc_sr_common.h       |    2 +
>  tools/libxc/xc_sr_save.c         |    3 +-
>  tools/libxl/Makefile             |    1 +
>  tools/libxl/libxl_colo.h         |   24 +
>  tools/libxl/libxl_colo_restore.c | 1038 
> ++++++++++++++++++++++++++++++++++++++
>  tools/libxl/libxl_create.c       |   37 ++
>  tools/libxl/libxl_internal.h     |   19 +
>  tools/libxl/libxl_save_callout.c |    7 +-
>  tools/libxl/libxl_stream_read.c  |   12 +
>  tools/libxl/libxl_types.idl      |    1 +

There is a bunch of TODOs in libxl_colo.c but I don't think you're in a
better position to judge whether they should be blocker or not.

>  10 files changed, 1142 insertions(+), 2 deletions(-)
>  create mode 100644 tools/libxl/libxl_colo.h
>  create mode 100644 tools/libxl/libxl_colo_restore.c
> 
> diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h
> index 5d9f497..2bfed64 100644
> --- a/tools/libxc/xc_sr_common.h
> +++ b/tools/libxc/xc_sr_common.h
> @@ -184,10 +184,12 @@ struct xc_sr_context
>       * migration stream
>       * 0: Plain VM
>       * 1: Remus
> +     * 2: COLO
>       */
>      enum {
>          MIG_STREAM_NONE, /* plain stream */
>          MIG_STREAM_REMUS,
> +        MIG_STREAM_COLO,
>      } migration_stream;
>  
>      union /* Common save or restore data. */
> diff --git a/tools/libxc/xc_sr_save.c b/tools/libxc/xc_sr_save.c
> index fe210cc..7393355 100644
> --- a/tools/libxc/xc_sr_save.c
> +++ b/tools/libxc/xc_sr_save.c
> @@ -846,7 +846,8 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t 
> dom,
>  
>      /* If altering migration_stream update this assert too. */
>      assert(checkpointed_stream == MIG_STREAM_NONE ||
> -           checkpointed_stream == MIG_STREAM_REMUS);
> +           checkpointed_stream == MIG_STREAM_REMUS ||
> +           checkpointed_stream == MIG_STREAM_COLO);
>  
>      /*
>       * TODO: Find some time to better tweak the live migration algorithm.

[...]

> +
> +#include "libxl_osdeps.h" /* must come before any other headers */
> +
> +#include "libxl_internal.h"
> +#include "libxl_colo.h"
> +#include "libxl_sr_stream_format.h"
> +
> +enum {
> +    LIBXL_COLO_SETUPED,
> +    LIBXL_COLO_SUSPENDED,
> +    LIBXL_COLO_RESUMED,
> +};
> +
> +typedef struct libxl__colo_restore_checkpoint_state 
> libxl__colo_restore_checkpoint_state;
> +struct libxl__colo_restore_checkpoint_state {
> +    libxl__domain_suspend_state dsps;
> +    libxl__logdirty_switch lds;
> +    libxl__colo_restore_state *crs;
> +    libxl__stream_write_state sws;
> +    int status;
> +    bool preresume;
> +    /* used for teardown */
> +    int teardown_devices;
> +    int saved_rc;
> +    char *state_file;
> +
> +    void (*callback)(libxl__egc *,
> +                     libxl__colo_restore_checkpoint_state *,
> +                     int);
> +};
> +

Shouldn't the enum and struct belong to libxl_colo.h ?

> +
> +static void libxl__colo_restore_domain_resume_callback(void *data);
> +static void libxl__colo_restore_domain_checkpoint_callback(void *data);
> +static void libxl__colo_restore_domain_wait_checkpoint_callback(void *data);
> +static void libxl__colo_restore_domain_suspend_callback(void *data);
> +
> +static const libxl__checkpoint_device_instance_ops *colo_restore_ops[] = {
> +    NULL,
> +};
> +

It would be helpful to list the callbacks at the beginning of the time
in the order they are supposed to occur.

See libxl_create.c for example. Search for "Event callbacks, in this
order".

I've tried to map the algorithm you described in commit message to all
the callbacks, but without some references it is just too time consuming
from my end.

I think what I'm going to do is to make sure the normal path that
doesn't use COLO is not broken and leave the internal to you and Ian (if
he fancies to dig into details).

[...]
> +
> +void libxl__colo_restore_setup(libxl__egc *egc,
> +                               libxl__colo_restore_state *crs)
> +{
> +    libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
> +    libxl__colo_restore_checkpoint_state *crcs;
> +    int rc = ERROR_FAIL;
> +
> +    /* Convenience aliases */
> +    libxl__srm_restore_autogen_callbacks *const callbacks =
> +        &dcs->srs.shs.callbacks.restore.a;
> +    const int domid = crs->domid;
> +
> +    STATE_AO_GC(crs->ao);
> +
> +    GCNEW(crcs);
> +    crs->crcs = crcs;
> +    crcs->crs = crs;
> +
> +    /* setup dsps */
> +    crcs->dsps.ao = ao;
> +    crcs->dsps.domid = domid;
> +    if (init_dsps(&crcs->dsps))
> +        goto err;
> +
> +    callbacks->suspend = libxl__colo_restore_domain_suspend_callback;
> +    callbacks->postcopy = libxl__colo_restore_domain_resume_callback;
> +    callbacks->checkpoint = libxl__colo_restore_domain_checkpoint_callback;
> +    callbacks->wait_checkpoint = 
> libxl__colo_restore_domain_wait_checkpoint_callback;
> +
> +    /*
> +     * Secondary vm is running in colo mode, so we need to call
> +     * libxl__xc_domain_restore_done() to create secondary vm.
> +     * But we will exit in domain_create_cb(). So replace the
> +     * callback here.
> +     */
> +    crs->saved_cb = dcs->callback;
> +    dcs->callback = libxl__colo_domain_create_cb;
> +    crcs->state_file = GCSPRINTF(LIBXL_DEVICE_MODEL_RESTORE_FILE".%d", 
> domid);

Can you use a different name space from the normal one?

For example, you can put

 #define LIBXL_COLO_DEVICE_MODEL_RESTORE_FILE    XXXX

in libxl_colo.h and use it in all COLO code.


> +    crcs->status = LIBXL_COLO_SETUPED;
> +
> +    libxl__logdirty_init(&crcs->lds);
> +    crcs->lds.ao = ao;
> +
> +    crcs->sws.fd = crs->send_back_fd;
> +    crcs->sws.ao = ao;
> +    crcs->sws.back_channel = true;
> +
> +    dcs->cds.concrete_data = crs;
> +
> +    libxl__stream_write_start(egc, &crcs->sws);
> +
> +    rc = 0;
> +
> +out:
> +    crs->callback(egc, crs, rc);
> +    return;
> +
> +err:
> +    goto out;
> +}
> +
> +static void libxl__colo_domain_create_cb(libxl__egc *egc,
> +                                         libxl__domain_create_state *dcs,
> +                                         int rc, uint32_t domid)
> +{
> +    libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs;
> +
> +    crcs->callback(egc, crcs, rc);
> +}
> +
> +
[...]
> +
> +static void colo_disable_logdirty_done(libxl__egc *egc,
> +                                       libxl__logdirty_switch *lds,
> +                                       int rc)
> +{
> +    libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(lds, *crcs, 
> lds);
> +
> +    EGC_GC;
> +
> +    if (rc)
> +        LOG(WARN, "cannot disable logdirty");
> +
> +    if (crcs->status == LIBXL_COLO_SUSPENDED) {
> +        /*
> +         * failover when reading state from master, so no need to
> +         * call libxl__domain_restore().

You need to update this comment to the right function name.

> +         */
> +        colo_resume_vm(egc, crcs, 0);
> +        return;
> +    }
> +
> +    /* If we cannot disable logdirty, we still can do failover */
> +    crcs->callback(egc, crcs, 0);
> +}
> +
[...]
>  
> +/* colo related structure */
> +typedef struct libxl__colo_restore_state libxl__colo_restore_state;
> +typedef void libxl__colo_callback(libxl__egc *,
> +                                  libxl__colo_restore_state *, int rc);
> +struct libxl__colo_restore_state {
> +    /* must set by caller of libxl__colo_(setup|teardown) */
> +    libxl__ao *ao;
> +    uint32_t domid;
> +    int send_back_fd;
> +    int recv_fd;
> +    int hvm;
> +    libxl__colo_callback *callback;
> +
> +    /* private, colo restore checkpoint state */
> +    libxl__domain_create_cb *saved_cb;
> +    void *crcs;
> +};
>  

And this should go to libxl_colo.h, too? And libxl_internal.h includes
libxl_colo.h?

I just don't want to colo structures and functions scatter in
different places.

>  struct libxl__domain_create_state {
>      /* filled in by user */
> @@ -3486,6 +3503,8 @@ struct libxl__domain_create_state {
>      /* private to domain_create */
>      int guest_domid;
>      libxl__domain_build_state build_state;
> +    libxl__colo_restore_state crs;
> +    libxl__checkpoint_devices_state cds;
>      libxl__bootloader_state bl;
>      libxl__stub_dm_spawn_state dmss;
>          /* If we're not doing stubdom, we use only dmss.dm,
> diff --git a/tools/libxl/libxl_save_callout.c 
> b/tools/libxl/libxl_save_callout.c
> index 0d6949a..b1810b2 100644
> --- a/tools/libxl/libxl_save_callout.c
> +++ b/tools/libxl/libxl_save_callout.c
> @@ -15,6 +15,7 @@
>  #include "libxl_osdeps.h"
>  
>  #include "libxl_internal.h"
> +#include "libxl_colo.h"
>  
>  /* stream_fd is as from the caller (eventually, the application).
>   * It may be 0, 1 or 2, in which case we need to dup it elsewhere.
> @@ -68,7 +69,11 @@ void libxl__xc_domain_restore(libxl__egc *egc, 
> libxl__domain_create_state *dcs,
>      shs->ao = ao;
>      shs->domid = domid;
>      shs->recv_callback = libxl__srm_callout_received_restore;
> -    shs->completion_callback = libxl__xc_domain_restore_done;
> +    if (dcs->restore_params.checkpointed_stream ==
> +                                                
> LIBXL_CHECKPOINTED_STREAM_COLO)

This is very strange line wrap.

> +        shs->completion_callback = libxl__colo_restore_teardown;
> +    else
> +        shs->completion_callback = libxl__xc_domain_restore_done;
>      shs->caller_state = dcs;
>      shs->need_results = 1;
>  
> diff --git a/tools/libxl/libxl_stream_read.c b/tools/libxl/libxl_stream_read.c
> index 5d980d9..d6bd2fe 100644
> --- a/tools/libxl/libxl_stream_read.c
> +++ b/tools/libxl/libxl_stream_read.c
> @@ -846,6 +846,18 @@ void libxl__xc_domain_restore_done(libxl__egc *egc, void 
> *dcs_void,
>       */
>      if (libxl__stream_read_inuse(stream)) {
>          switch (checkpointed_stream) {
> +        case LIBXL_CHECKPOINTED_STREAM_COLO:
> +            if (stream->completion_callback) {
> +                /*
> +                 * restore, just build the secondary vm, don't close
> +                 * the stream
> +                 */
> +                stream->completion_callback(egc, stream, 0);
> +            } else {
> +                /* failover, just close the stream */
> +                stream_complete(egc, stream, 0);
> +            }
> +            break;
>          case LIBXL_CHECKPOINTED_STREAM_REMUS:
>              /*
>               * Failover from primary. Domain state is currently at a
> diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
> index 632c009..33f4a90 100644
> --- a/tools/libxl/libxl_types.idl
> +++ b/tools/libxl/libxl_types.idl
> @@ -232,6 +232,7 @@ libxl_hdtype = Enumeration("hdtype", [
>  libxl_checkpointed_stream = Enumeration("checkpointed_stream", [
>      (0, "NONE"),
>      (1, "REMUS"),
> +    (2, "COLO"),
>      ])
>  
>  #
> -- 
> 2.5.0
> 
> 
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxx
> http://lists.xen.org/xen-devel

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.