[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v5 RFC 13/14] tools/libxc: noarch save code
Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> Signed-off-by: Frediano Ziglio <frediano.ziglio@xxxxxxxxxx> Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx> --- tools/libxc/saverestore/save.c | 545 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 544 insertions(+), 1 deletion(-) diff --git a/tools/libxc/saverestore/save.c b/tools/libxc/saverestore/save.c index f6ad734..9ad43a5 100644 --- a/tools/libxc/saverestore/save.c +++ b/tools/libxc/saverestore/save.c @@ -1,11 +1,554 @@ +#include <assert.h> +#include <arpa/inet.h> + #include "common.h" +/* + * Writes an Image header and Domain header into the stream. + */ +static int write_headers(struct context *ctx, uint16_t guest_type) +{ + xc_interface *xch = ctx->xch; + int32_t xen_version = xc_version(xch, XENVER_version, NULL); + struct ihdr ihdr = + { + .marker = IHDR_MARKER, + .id = htonl(IHDR_ID), + .version = htonl(IHDR_VERSION), + .options = htons(IHDR_OPT_LITTLE_ENDIAN), + }; + struct dhdr dhdr = + { + .type = guest_type, + .page_shift = XC_PAGE_SHIFT, + .xen_major = (xen_version >> 16) & 0xffff, + .xen_minor = (xen_version) & 0xffff, + }; + + if ( xen_version < 0 ) + { + PERROR("Unable to obtain Xen Version"); + return -1; + } + + if ( write_exact(ctx->fd, &ihdr, sizeof(ihdr)) ) + { + PERROR("Unable to write Image Header to stream"); + return -1; + } + + if ( write_exact(ctx->fd, &dhdr, sizeof(dhdr)) ) + { + PERROR("Unable to write Domain Header to stream"); + return -1; + } + + return 0; +} + +/* + * Writes an END record into the stream. + */ +static int write_end_record(struct context *ctx) +{ + struct record end = { REC_TYPE_END, 0, NULL }; + + return write_record(ctx, &end); +} + +/* + * Writes a batch of memory as a PAGE_DATA record into the stream. The batch + * is constructed in ctx->save.batch_pfns. + * + * This function: + * - gets the types for each pfn in the batch. + * - for each pfn with real data: + * - maps and attempts to localise the pages. + * - construct and writes a PAGE_DATA record into the stream. + */ +static int write_batch(struct context *ctx) +{ + xc_interface *xch = ctx->xch; + xen_pfn_t *mfns = NULL, *types = NULL; + void *guest_mapping = NULL; + void **guest_data = NULL; + void **local_pages = NULL; + int *errors = NULL, rc = -1; + unsigned i, p, nr_pages = 0; + unsigned nr_pfns = ctx->save.nr_batch_pfns; + void *page, *orig_page; + uint64_t *rec_pfns = NULL; + struct rec_page_data_header hdr = { 0 }; + struct record rec = + { + .type = REC_TYPE_PAGE_DATA, + }; + + assert(nr_pfns != 0); + + /* Mfns of the batch pfns. */ + mfns = malloc(nr_pfns * sizeof(*mfns)); + /* Types of the batch pfns. */ + types = malloc(nr_pfns * sizeof(*types)); + /* Errors from attempting to map the mfns. */ + errors = malloc(nr_pfns * sizeof(*errors)); + /* Pointers to page data to send. Either mapped mfns or local allocations. */ + guest_data = calloc(nr_pfns, sizeof(*guest_data)); + /* Pointers to locally allocated pages. Need freeing. */ + local_pages = calloc(nr_pfns, sizeof(*local_pages)); + + if ( !mfns || !types || !errors || !guest_data || !local_pages ) + { + ERROR("Unable to allocate arrays for a batch of %u pages", + nr_pfns); + goto err; + } + + for ( i = 0; i < nr_pfns; ++i ) + { + types[i] = mfns[i] = ctx->ops.pfn_to_gfn(ctx, ctx->save.batch_pfns[i]); + + /* Likely a ballooned page. */ + if ( mfns[i] == INVALID_MFN ) + set_bit(ctx->save.batch_pfns[i], ctx->save.deferred_pages); + } + + rc = xc_get_pfn_type_batch(xch, ctx->domid, nr_pfns, types); + if ( rc ) + { + PERROR("Failed to get types for pfn batch"); + goto err; + } + rc = -1; + + for ( i = 0; i < nr_pfns; ++i ) + { + switch ( types[i] ) + { + case XEN_DOMCTL_PFINFO_BROKEN: + case XEN_DOMCTL_PFINFO_XALLOC: + case XEN_DOMCTL_PFINFO_XTAB: + continue; + } + + mfns[nr_pages++] = mfns[i]; + } + + if ( nr_pages > 0 ) + { + guest_mapping = xc_map_foreign_bulk( + xch, ctx->domid, PROT_READ, mfns, errors, nr_pages); + if ( !guest_mapping ) + { + PERROR("Failed to map guest pages"); + goto err; + } + } + + for ( i = 0, p = 0; i < nr_pfns; ++i ) + { + switch ( types[i] ) + { + case XEN_DOMCTL_PFINFO_BROKEN: + case XEN_DOMCTL_PFINFO_XALLOC: + case XEN_DOMCTL_PFINFO_XTAB: + continue; + } + + if ( errors[p] ) + { + ERROR("Mapping of pfn %#lx (mfn %#lx) failed %d", + ctx->save.batch_pfns[i], mfns[p], errors[p]); + goto err; + } + + orig_page = page = guest_mapping + (p * PAGE_SIZE); + rc = ctx->save.ops.normalise_page(ctx, types[i], &page); + if ( rc ) + { + if ( rc == -1 && errno == EAGAIN ) + { + set_bit(ctx->save.batch_pfns[i], ctx->save.deferred_pages); + types[i] = XEN_DOMCTL_PFINFO_XTAB; + --nr_pages; + } + else + goto err; + } + else + guest_data[i] = page; + + if ( page != orig_page ) + local_pages[i] = page; + rc = -1; + + ++p; + } + + rec_pfns = malloc(nr_pfns * sizeof(*rec_pfns)); + if ( !rec_pfns ) + { + ERROR("Unable to allocate %zu bytes of memory for page data pfn list", + nr_pfns * sizeof(*rec_pfns)); + goto err; + } + + hdr.count = nr_pfns; + + rec.length = sizeof(hdr); + rec.length += nr_pfns * sizeof(*rec_pfns); + rec.length += nr_pages * PAGE_SIZE; + + for ( i = 0; i < nr_pfns; ++i ) + rec_pfns[i] = ((uint64_t)(types[i]) << 32) | ctx->save.batch_pfns[i]; + + if ( write_record_header(ctx, &rec) || + write_exact(ctx->fd, &hdr, sizeof(hdr)) || + write_exact(ctx->fd, rec_pfns, nr_pfns * sizeof(*rec_pfns)) ) + { + PERROR("Failed to write page_type header to stream"); + goto err; + } + + for ( i = 0; i < nr_pfns; ++i ) + { + if ( guest_data[i] ) + { + if ( write_exact(ctx->fd, guest_data[i], PAGE_SIZE) ) + { + PERROR("Failed to write page into stream"); + goto err; + } + + --nr_pages; + } + } + + /* Sanity check we have sent all the pages we expected to. */ + assert(nr_pages == 0); + rc = ctx->save.nr_batch_pfns = 0; + + err: + free(rec_pfns); + if ( guest_mapping ) + munmap(guest_mapping, nr_pages * PAGE_SIZE); + for ( i = 0; local_pages && i < nr_pfns; ++i ) + free(local_pages[i]); + free(local_pages); + free(guest_data); + free(errors); + free(types); + free(mfns); + + return rc; +} + +/* + * Flush a batch of pfns into the stream. + */ +static int flush_batch(struct context *ctx) +{ + int rc = 0; + + if ( ctx->save.nr_batch_pfns == 0 ) + return rc; + + rc = write_batch(ctx); + + if ( !rc ) + { + VALGRIND_MAKE_MEM_UNDEFINED(ctx->save.batch_pfns, + MAX_BATCH_SIZE * sizeof(*ctx->save.batch_pfns)); + } + + return rc; +} + +/* + * Add a single pfn to the batch, flushing the batch if full. + */ +static int add_to_batch(struct context *ctx, xen_pfn_t pfn) +{ + int rc = 0; + + if ( ctx->save.nr_batch_pfns == MAX_BATCH_SIZE ) + rc = flush_batch(ctx); + + if ( rc == 0 ) + ctx->save.batch_pfns[ctx->save.nr_batch_pfns++] = pfn; + + return rc; +} + +/* + * Pause the domain. + */ +static int pause_domain(struct context *ctx) +{ + xc_interface *xch = ctx->xch; + int rc; + + if ( !ctx->dominfo.paused ) + { + /* TODO: Properly specify the return value from this callback. */ + rc = (ctx->save.callbacks->suspend(ctx->save.callbacks->data) != 1); + if ( rc ) + { + ERROR("Failed to suspend domain"); + return rc; + } + } + + IPRINTF("Domain now paused"); + return 0; +} + +/* + * Send all domain memory. This is the heart of the live migration loop. + */ +static int send_domain_memory(struct context *ctx) +{ + xc_interface *xch = ctx->xch; + DECLARE_HYPERCALL_BUFFER(unsigned long, to_send); + xc_shadow_op_stats_t stats = { -1, -1 }; + unsigned pages_written; + unsigned x, max_iter = 5, dirty_threshold = 50; + xen_pfn_t p; + int rc = -1; + + to_send = xc_hypercall_buffer_alloc_pages( + xch, to_send, NRPAGES(bitmap_size(ctx->save.p2m_size))); + + ctx->save.batch_pfns = malloc(MAX_BATCH_SIZE * sizeof(*ctx->save.batch_pfns)); + ctx->save.deferred_pages = calloc(1, bitmap_size(ctx->save.p2m_size)); + + if ( !ctx->save.batch_pfns || !to_send || !ctx->save.deferred_pages ) + { + ERROR("Unable to allocate memory for to_{send,fix}/batch bitmaps"); + goto out; + } + + if ( xc_shadow_control(xch, ctx->domid, + XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY, + NULL, 0, NULL, 0, NULL) < 0 ) + { + PERROR("Failed to enable logdirty"); + goto out; + } + + for ( x = 0, pages_written = 0; x < max_iter ; ++x ) + { + if ( x == 0 ) + { + /* First iteration, send all pages. */ + memset(to_send, 0xff, bitmap_size(ctx->save.p2m_size)); + } + else + { + /* Else consult the dirty bitmap. */ + if ( xc_shadow_control( + xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_CLEAN, + HYPERCALL_BUFFER(to_send), ctx->save.p2m_size, + NULL, 0, &stats) != ctx->save.p2m_size ) + { + PERROR("Failed to retrieve logdirty bitmap"); + rc = -1; + goto out; + } + else + DPRINTF(" Wrote %u pages; stats: faults %"PRIu32", dirty %"PRIu32, + pages_written, stats.fault_count, stats.dirty_count); + pages_written = 0; + + if ( stats.dirty_count < dirty_threshold ) + break; + } + + DPRINTF("Iteration %u", x); + + for ( p = 0 ; p < ctx->save.p2m_size; ++p ) + { + if ( test_bit(p, to_send) ) + { + rc = add_to_batch(ctx, p); + if ( rc ) + goto out; + ++pages_written; + } + } + + rc = flush_batch(ctx); + if ( rc ) + goto out; + } + + rc = pause_domain(ctx); + if ( rc ) + goto out; + + if ( xc_shadow_control( + xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_CLEAN, + HYPERCALL_BUFFER(to_send), ctx->save.p2m_size, + NULL, 0, &stats) != ctx->save.p2m_size ) + { + PERROR("Failed to retrieve logdirty bitmap"); + rc = -1; + goto out; + } + + for ( p = 0, pages_written = 0 ; p < ctx->save.p2m_size; ++p ) + { + if ( test_bit(p, to_send) || test_bit(p, ctx->save.deferred_pages) ) + { + rc = add_to_batch(ctx, p); + if ( rc ) + goto out; + ++pages_written; + } + } + + rc = flush_batch(ctx); + if ( rc ) + goto out; + + DPRINTF(" Wrote %u pages", pages_written); + IPRINTF("Sent all pages"); + + out: + xc_hypercall_buffer_free_pages(xch, to_send, + NRPAGES(bitmap_size(ctx->save.p2m_size))); + free(ctx->save.deferred_pages); + free(ctx->save.batch_pfns); + return rc; +} + +/* + * Save a domain. + */ +static int save(struct context *ctx, uint16_t guest_type) +{ + xc_interface *xch = ctx->xch; + int rc, saved_rc = 0, saved_errno = 0; + + IPRINTF("Saving domain %d, type %s", + ctx->domid, dhdr_type_to_str(guest_type)); + + rc = ctx->save.ops.setup(ctx); + if ( rc ) + goto err; + + rc = write_headers(ctx, guest_type); + if ( rc ) + goto err; + + rc = ctx->save.ops.start_of_stream(ctx); + if ( rc ) + goto err; + + rc = send_domain_memory(ctx); + if ( rc ) + goto err; + + /* Refresh domain information now it has paused. */ + if ( (xc_domain_getinfo(xch, ctx->domid, 1, &ctx->dominfo) != 1) || + (ctx->dominfo.domid != ctx->domid) ) + { + PERROR("Unable to refresh domain information"); + rc = -1; + goto err; + } + else if ( (!ctx->dominfo.shutdown || + ctx->dominfo.shutdown_reason != SHUTDOWN_suspend ) && + !ctx->dominfo.paused ) + { + ERROR("Domain has not been suspended"); + rc = -1; + goto err; + } + + rc = ctx->save.ops.end_of_stream(ctx); + if ( rc ) + goto err; + + rc = write_end_record(ctx); + if ( rc ) + goto err; + + xc_shadow_control(xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_OFF, + NULL, 0, NULL, 0, NULL); + + IPRINTF("Save successful"); + goto done; + + err: + saved_errno = errno; + saved_rc = rc; + PERROR("Save failed"); + + done: + rc = ctx->save.ops.cleanup(ctx); + if ( rc ) + PERROR("Failed to clean up"); + + if ( saved_rc ) + { + rc = saved_rc; + errno = saved_errno; + } + + return rc; +}; + int xc_domain_save2(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iters, uint32_t max_factor, uint32_t flags, struct save_callbacks* callbacks, int hvm) { + struct context ctx = + { + .xch = xch, + .fd = io_fd, + }; + + /* GCC 4.4 (of CentOS 6.x vintage) can' t initialise anonymous unions :( */ + ctx.save.callbacks = callbacks; + IPRINTF("In experimental %s", __func__); - return -1; + + if ( xc_domain_getinfo(xch, dom, 1, &ctx.dominfo) != 1 ) + { + PERROR("Failed to get domain info"); + return -1; + } + + if ( ctx.dominfo.domid != dom ) + { + ERROR("Domain %d does not exist", dom); + return -1; + } + + ctx.domid = dom; + IPRINTF("Saving domain %d", dom); + + ctx.save.p2m_size = xc_domain_maximum_gpfn(xch, dom) + 1; + if ( ctx.save.p2m_size > ~XEN_DOMCTL_PFINFO_LTAB_MASK ) + { + errno = E2BIG; + ERROR("Cannot save this big a guest"); + return -1; + } + + if ( ctx.dominfo.hvm ) + { + ctx.ops = common_ops_x86_hvm; + ctx.save.ops = save_ops_x86_hvm; + return save(&ctx, DHDR_TYPE_X86_HVM); + } + else + { + ctx.ops = common_ops_x86_pv; + ctx.save.ops = save_ops_x86_pv; + return save(&ctx, DHDR_TYPE_X86_PV); + } } /* -- 1.7.10.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |