[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v10 18/31] libxc/restore: support COLO restore
call the callbacks resume/checkpoint/suspend while secondary vm status is consistent with primary. Signed-off-by: Yang Hongyang <hongyang.yang@xxxxxxxxxxxx> Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx> CC: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> --- tools/libxc/xc_sr_common.h | 4 +-- tools/libxc/xc_sr_restore.c | 60 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 2 deletions(-) diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h index 2bfed64..a24a9ad 100644 --- a/tools/libxc/xc_sr_common.h +++ b/tools/libxc/xc_sr_common.h @@ -234,13 +234,13 @@ struct xc_sr_context uint32_t guest_page_size; /* Plain VM, or checkpoints over time. */ - bool checkpointed; + int checkpointed; /* Currently buffering records between a checkpoint */ bool buffer_all_records; /* - * With Remus, we buffer the records sent by the primary at checkpoint, + * With Remus/COLO, we buffer the records sent by the primary at checkpoint, * in case the primary will fail, we can recover from the last * checkpoint state. * This should be enough for most of the cases because primary only send diff --git a/tools/libxc/xc_sr_restore.c b/tools/libxc/xc_sr_restore.c index aef9bca..2ae8154 100644 --- a/tools/libxc/xc_sr_restore.c +++ b/tools/libxc/xc_sr_restore.c @@ -460,6 +460,49 @@ static int handle_checkpoint(struct xc_sr_context *ctx) else ctx->restore.buffer_all_records = true; + if ( ctx->restore.checkpointed == MIG_STREAM_COLO ) + { +#define HANDLE_CALLBACK_RETURN_VALUE(ret) \ + do { \ + if ( ret == 1 ) \ + rc = 0; /* Success */ \ + else \ + { \ + if ( ret == 2 ) \ + rc = BROKEN_CHANNEL; \ + else \ + rc = -1; /* Some unspecified error */ \ + goto err; \ + } \ + } while (0) + + /* COLO */ + + /* We need to resume guest */ + rc = ctx->restore.ops.stream_complete(ctx); + if ( rc ) + goto err; + + /* TODO: call restore_results */ + + /* Resume secondary vm */ + ret = ctx->restore.callbacks->postcopy(ctx->restore.callbacks->data); + HANDLE_CALLBACK_RETURN_VALUE(ret); + + /* Wait for a new checkpoint */ + ret = ctx->restore.callbacks->wait_checkpoint( + ctx->restore.callbacks->data); + HANDLE_CALLBACK_RETURN_VALUE(ret); + + /* suspend secondary vm */ + ret = ctx->restore.callbacks->suspend(ctx->restore.callbacks->data); + HANDLE_CALLBACK_RETURN_VALUE(ret); + +#undef HANDLE_CALLBACK_RETURN_VALUE + + /* TODO: send dirty pfn list to primary */ + } + err: return rc; } @@ -631,6 +674,15 @@ static int restore(struct xc_sr_context *ctx) } while ( rec.type != REC_TYPE_END ); remus_failover: + + if ( ctx->restore.checkpointed == MIG_STREAM_COLO ) + { + /* With COLO, we have already called stream_complete */ + rc = 0; + IPRINTF("COLO Failover"); + goto done; + } + /* * With Remus, if we reach here, there must be some error on primary, * failover from the last checkpoint state. @@ -685,6 +737,14 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, if ( checkpointed_stream ) assert(callbacks->checkpoint); + if ( ctx.restore.checkpointed == MIG_STREAM_COLO ) + { + /* this is COLO restore */ + assert(callbacks->suspend && + callbacks->postcopy && + callbacks->wait_checkpoint); + } + DPRINTF("fd %d, dom %u, hvm %u, pae %u, superpages %d" ", checkpointed_stream %d", io_fd, dom, hvm, pae, superpages, checkpointed_stream); -- 2.5.0 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |