Xen project Mailing List

Re: [Xen-devel] [PATCH Remus v3 3/3] libxc/restore: implement Remus checkpointed restore

On 13/05/15 09:35, Yang Hongyang wrote: > With Remus, the restore flow should be: > the first full migration stream -> { periodically restore stream } > > Signed-off-by: Yang Hongyang <yanghy@xxxxxxxxxxxxxx> > Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> > CC: Ian Campbell <Ian.Campbell@xxxxxxxxxx> > CC: Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx> > CC: Wei Liu <wei.liu2@xxxxxxxxxx> > --- > tools/libxc/xc_sr_common.h | 13 +++++ > tools/libxc/xc_sr_restore.c | 124 > +++++++++++++++++++++++++++++++++++++++----- > 2 files changed, 125 insertions(+), 12 deletions(-) > > diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h > index f8121e7..3740b89 100644 > --- a/tools/libxc/xc_sr_common.h > +++ b/tools/libxc/xc_sr_common.h > @@ -208,6 +208,19 @@ struct xc_sr_context > /* Plain VM, or checkpoints over time. */ > bool checkpointed; > > + /* Currently buffering records between a checkpoint */ > + bool buffer_all_records; > + > +/* > + * With Remus, we buffer the records sent by primary at checkpoint, "sent by the primary" > + * in case the primary will fail, we can recover from the last > + * checkpoint state. > + * This should be enough because primary only send dirty pages at > + * checkpoint. > + */ > +#define MAX_BUF_RECORDS 1024 As a general point, it occurs to me that this might be better as a linked list, rather than having a hard limit. A burst of activity on the primary could potentially hit this limit > + struct xc_sr_record *buffered_records[MAX_BUF_RECORDS]; Your data handling will be much more simple if this were struct xc_sr_record *buffered_records; and you do a one-time allocation of MAX_BUF_RECORDS * sizeof(struct xc_sr_record), although it will require an index integer as well. It would probably be best to factor out setup() and clean() just like you did for the save side. > + > /* > * Xenstore and Console parameters. > * INPUT: evtchn & domid > diff --git a/tools/libxc/xc_sr_restore.c b/tools/libxc/xc_sr_restore.c > index 0e512ec..85534a8 100644 > --- a/tools/libxc/xc_sr_restore.c > +++ b/tools/libxc/xc_sr_restore.c > @@ -468,10 +468,83 @@ static int handle_page_data(struct xc_sr_context *ctx, > struct xc_sr_record *rec) > return rc; > } > > +static int process_record(struct xc_sr_context *ctx, struct xc_sr_record > *rec); > +static int handle_checkpoint(struct xc_sr_context *ctx) > +{ > + xc_interface *xch = ctx->xch; > + int rc = 0, i; Always an unsigned i for populated_pfnsindexing an array please. > + struct xc_sr_record *rec; > + > + if ( !ctx->restore.checkpointed ) > + { > + ERROR("Found checkpoint in non-checkpointed stream"); > + rc = -1; > + goto err; > + } > + > + if ( ctx->restore.buffer_all_records ) > + { > + IPRINTF("All records buffered"); > + > + /* > + * We need to set buffer_all_records to false in > + * order to process records instead of buffer records. > + * buffer_all_records should be set back to true after > + * we successfully processed all records. > + */ > + ctx->restore.buffer_all_records = false; > + i = 0; > + rec = ctx->restore.buffered_records[i++]; > + while (rec) Style (hypervisor style). > + { > + rc = process_record(ctx, rec); > + free(rec); > + ctx->restore.buffered_records[i-1] = NULL; > + if ( rc ) > + goto err; > + > + rec = ctx->restore.buffered_records[i++]; > + } > + IPRINTF("All records processed"); > + ctx->restore.buffer_all_records = true; > + } > + else > + ctx->restore.buffer_all_records = true; > + > + err: > + return rc; > +} > + > static int process_record(struct xc_sr_context *ctx, struct xc_sr_record > *rec) > { > xc_interface *xch = ctx->xch; > - int rc = 0; > + int rc = 0, i; > + struct xc_sr_record *buf_rec; > + > + if ( ctx->restore.buffer_all_records && > + rec->type != REC_TYPE_END && > + rec->type != REC_TYPE_CHECKPOINT ) > + { > + buf_rec = malloc(sizeof(struct xc_sr_record)); > + if (!buf_rec) > + { > + ERROR("Unable to allocate memory for record"); > + return -1; > + } > + memcpy(buf_rec, rec, sizeof(struct xc_sr_record)); > + > + for ( i = 0; i < MAX_BUF_RECORDS; i++ ) > + if ( !ctx->restore.buffered_records[i] ) > + break; > + > + if ( i >= MAX_BUF_RECORDS ) > + { > + ERROR("There are too many records within a checkpoint"); > + return -1; > + } > + ctx->restore.buffered_records[i] = buf_rec; > + return 0; > + } > > switch ( rec->type ) > { > @@ -487,6 +560,10 @@ static int process_record(struct xc_sr_context *ctx, > struct xc_sr_record *rec) > ctx->restore.verify = true; > break; > > + case REC_TYPE_CHECKPOINT: > + rc = handle_checkpoint(ctx); > + break; > + > default: > rc = ctx->restore.ops.process_record(ctx, rec); > break; > @@ -520,7 +597,7 @@ static int restore(struct xc_sr_context *ctx) > { > xc_interface *xch = ctx->xch; > struct xc_sr_record rec; > - int rc, saved_rc = 0, saved_errno = 0; > + int rc, saved_rc = 0, saved_errno = 0, i; > > IPRINTF("Restoring domain"); > > @@ -541,7 +618,27 @@ static int restore(struct xc_sr_context *ctx) > { > rc = read_record(ctx, &rec); > if ( rc ) > - goto err; > + { > + if ( ctx->restore.buffer_all_records ) > + goto err_buf; Please can we choose a label sufficiently different to "err". "resume_from_checkpoint" perhaps? ~Andrew > + else > + goto err; > + } > + > +#ifdef XG_LIBXL_HVM_COMPAT > + if ( ctx->dominfo.hvm && > + (rec.type == REC_TYPE_END || rec.type == REC_TYPE_CHECKPOINT) ) > + { > + rc = read_qemu(ctx); > + if ( rc ) > + { > + if ( ctx->restore.buffer_all_records ) > + goto err_buf; > + else > + goto err; > + } > + } > +#endif > > rc = process_record(ctx, &rec); > if ( rc ) > @@ -549,15 +646,11 @@ static int restore(struct xc_sr_context *ctx) > > } while ( rec.type != REC_TYPE_END ); > > -#ifdef XG_LIBXL_HVM_COMPAT > - if ( ctx->dominfo.hvm ) > - { > - rc = read_qemu(ctx); > - if ( rc ) > - goto err; > - } > -#endif > - > + err_buf: > + /* > + * With Remus, if we reach here, there must be some error on primary, > + * failover from the last checkpoint state. > + */ > rc = ctx->restore.ops.stream_complete(ctx); > if ( rc ) > goto err; > @@ -571,6 +664,13 @@ static int restore(struct xc_sr_context *ctx) > PERROR("Restore failed"); > > done: > + for ( i = 0; i < MAX_BUF_RECORDS; i++) > + { > + if ( ctx->restore.buffered_records[i] ) { > + free(ctx->restore.buffered_records[i]->data); > + free(ctx->restore.buffered_records[i]); > + } > + } > free(ctx->restore.populated_pfns); > rc = ctx->restore.ops.cleanup(ctx); > if ( rc ) _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.