[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH Remus v3 3/3] libxc/restore: implement Remus checkpointed restore
With Remus, the restore flow should be: the first full migration stream -> { periodically restore stream } Signed-off-by: Yang Hongyang <yanghy@xxxxxxxxxxxxxx> Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> CC: Ian Campbell <Ian.Campbell@xxxxxxxxxx> CC: Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx> CC: Wei Liu <wei.liu2@xxxxxxxxxx> --- tools/libxc/xc_sr_common.h | 13 +++++ tools/libxc/xc_sr_restore.c | 124 +++++++++++++++++++++++++++++++++++++++----- 2 files changed, 125 insertions(+), 12 deletions(-) diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h index f8121e7..3740b89 100644 --- a/tools/libxc/xc_sr_common.h +++ b/tools/libxc/xc_sr_common.h @@ -208,6 +208,19 @@ struct xc_sr_context /* Plain VM, or checkpoints over time. */ bool checkpointed; + /* Currently buffering records between a checkpoint */ + bool buffer_all_records; + +/* + * With Remus, we buffer the records sent by primary at checkpoint, + * in case the primary will fail, we can recover from the last + * checkpoint state. + * This should be enough because primary only send dirty pages at + * checkpoint. + */ +#define MAX_BUF_RECORDS 1024 + struct xc_sr_record *buffered_records[MAX_BUF_RECORDS]; + /* * Xenstore and Console parameters. * INPUT: evtchn & domid diff --git a/tools/libxc/xc_sr_restore.c b/tools/libxc/xc_sr_restore.c index 0e512ec..85534a8 100644 --- a/tools/libxc/xc_sr_restore.c +++ b/tools/libxc/xc_sr_restore.c @@ -468,10 +468,83 @@ static int handle_page_data(struct xc_sr_context *ctx, struct xc_sr_record *rec) return rc; } +static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec); +static int handle_checkpoint(struct xc_sr_context *ctx) +{ + xc_interface *xch = ctx->xch; + int rc = 0, i; + struct xc_sr_record *rec; + + if ( !ctx->restore.checkpointed ) + { + ERROR("Found checkpoint in non-checkpointed stream"); + rc = -1; + goto err; + } + + if ( ctx->restore.buffer_all_records ) + { + IPRINTF("All records buffered"); + + /* + * We need to set buffer_all_records to false in + * order to process records instead of buffer records. + * buffer_all_records should be set back to true after + * we successfully processed all records. + */ + ctx->restore.buffer_all_records = false; + i = 0; + rec = ctx->restore.buffered_records[i++]; + while (rec) + { + rc = process_record(ctx, rec); + free(rec); + ctx->restore.buffered_records[i-1] = NULL; + if ( rc ) + goto err; + + rec = ctx->restore.buffered_records[i++]; + } + IPRINTF("All records processed"); + ctx->restore.buffer_all_records = true; + } + else + ctx->restore.buffer_all_records = true; + + err: + return rc; +} + static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec) { xc_interface *xch = ctx->xch; - int rc = 0; + int rc = 0, i; + struct xc_sr_record *buf_rec; + + if ( ctx->restore.buffer_all_records && + rec->type != REC_TYPE_END && + rec->type != REC_TYPE_CHECKPOINT ) + { + buf_rec = malloc(sizeof(struct xc_sr_record)); + if (!buf_rec) + { + ERROR("Unable to allocate memory for record"); + return -1; + } + memcpy(buf_rec, rec, sizeof(struct xc_sr_record)); + + for ( i = 0; i < MAX_BUF_RECORDS; i++ ) + if ( !ctx->restore.buffered_records[i] ) + break; + + if ( i >= MAX_BUF_RECORDS ) + { + ERROR("There are too many records within a checkpoint"); + return -1; + } + ctx->restore.buffered_records[i] = buf_rec; + return 0; + } switch ( rec->type ) { @@ -487,6 +560,10 @@ static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec) ctx->restore.verify = true; break; + case REC_TYPE_CHECKPOINT: + rc = handle_checkpoint(ctx); + break; + default: rc = ctx->restore.ops.process_record(ctx, rec); break; @@ -520,7 +597,7 @@ static int restore(struct xc_sr_context *ctx) { xc_interface *xch = ctx->xch; struct xc_sr_record rec; - int rc, saved_rc = 0, saved_errno = 0; + int rc, saved_rc = 0, saved_errno = 0, i; IPRINTF("Restoring domain"); @@ -541,7 +618,27 @@ static int restore(struct xc_sr_context *ctx) { rc = read_record(ctx, &rec); if ( rc ) - goto err; + { + if ( ctx->restore.buffer_all_records ) + goto err_buf; + else + goto err; + } + +#ifdef XG_LIBXL_HVM_COMPAT + if ( ctx->dominfo.hvm && + (rec.type == REC_TYPE_END || rec.type == REC_TYPE_CHECKPOINT) ) + { + rc = read_qemu(ctx); + if ( rc ) + { + if ( ctx->restore.buffer_all_records ) + goto err_buf; + else + goto err; + } + } +#endif rc = process_record(ctx, &rec); if ( rc ) @@ -549,15 +646,11 @@ static int restore(struct xc_sr_context *ctx) } while ( rec.type != REC_TYPE_END ); -#ifdef XG_LIBXL_HVM_COMPAT - if ( ctx->dominfo.hvm ) - { - rc = read_qemu(ctx); - if ( rc ) - goto err; - } -#endif - + err_buf: + /* + * With Remus, if we reach here, there must be some error on primary, + * failover from the last checkpoint state. + */ rc = ctx->restore.ops.stream_complete(ctx); if ( rc ) goto err; @@ -571,6 +664,13 @@ static int restore(struct xc_sr_context *ctx) PERROR("Restore failed"); done: + for ( i = 0; i < MAX_BUF_RECORDS; i++) + { + if ( ctx->restore.buffered_records[i] ) { + free(ctx->restore.buffered_records[i]->data); + free(ctx->restore.buffered_records[i]); + } + } free(ctx->restore.populated_pfns); rc = ctx->restore.ops.cleanup(ctx); if ( rc ) -- 1.9.1 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |