[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 1 of 3 V3] tools/libxc: Remus Checkpoint Compression
# HG changeset patch # User Shriram Rajagopalan <rshriram@xxxxxxxxx> # Date 1318545413 25200 # Node ID 47fdd52af616131142ac5faeacd83f40786da59a # Parent 4b0907c6a08c348962bd976c2976257b412408be tools/libxc: Remus Checkpoint Compression Instead of sending dirty pages of guest memory as-is, use a simple compression algorithm that sends a RLE-encoded XOR of the page against its last sent copy. A small LRU cache is used to hold recently dirtied pages. Pagetable pages are sent as-is, as they are canonicalized at sender side and uncanonicalized at receiver. Signed-off-by: Shriram Rajagopalan <rshriram@xxxxxxxxx> diff -r 4b0907c6a08c -r 47fdd52af616 tools/libxc/Makefile --- a/tools/libxc/Makefile Tue Oct 11 12:02:58 2011 +0100 +++ b/tools/libxc/Makefile Thu Oct 13 15:36:53 2011 -0700 @@ -42,7 +42,7 @@ GUEST_SRCS-y := GUEST_SRCS-y += xg_private.c xc_suspend.c GUEST_SRCS-$(CONFIG_MIGRATE) += xc_domain_restore.c xc_domain_save.c -GUEST_SRCS-$(CONFIG_MIGRATE) += xc_offline_page.c +GUEST_SRCS-$(CONFIG_MIGRATE) += xc_offline_page.c xc_compression.c GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c vpath %.c ../../xen/common/libelf diff -r 4b0907c6a08c -r 47fdd52af616 tools/libxc/xc_compression.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/libxc/xc_compression.c Thu Oct 13 15:36:53 2011 -0700 @@ -0,0 +1,545 @@ +/****************************************************************************** + * xc_compression.c + * + * Checkpoint Compression using Page Delta Algorithm. + * - A LRU cache of recently dirtied guest pages is maintained. + * - For each dirty guest page in the checkpoint, if a previous version of the + * page exists in the cache, XOR both pages and send the non-zero sections + * to the receiver. The cache is then updated with the newer copy of guest page. + * - The receiver will XOR the non-zero sections against its copy of the guest + * page, thereby bringing the guest page up-to-date with the sender side. + * + * Copyright (c) 2011 Shriram Rajagopalan (rshriram@xxxxxxxxx). + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/types.h> +#include <inttypes.h> +#include <errno.h> +#include "xenctrl.h" +#include "xg_save_restore.h" +#include "xg_private.h" +#include "xc_dom.h" + +/* Page Cache for Delta Compression*/ +#define DELTA_CACHE_SIZE (XC_PAGE_SIZE * 8192) + +/* Internal page buffer to hold dirty pages of a checkpoint, + * to be compressed after the domain is resumed for execution. + */ +#define PAGE_BUFFER_SIZE (XC_PAGE_SIZE * 8192) + +struct cache_page +{ + char *page; + xen_pfn_t pfn; + struct cache_page *next; + struct cache_page *prev; +}; + +struct compression_ctx +{ + /* compression buffer - holds compressed data */ + uint8_t *compbuf; + unsigned long compbuf_size; + unsigned long compbuf_pos; + + /* Page buffer to hold pages to be compressed */ + char *inputbuf; + /* pfns of pages to be compressed */ + xen_pfn_t *sendbuf_pfns; + unsigned int pfns_len; + unsigned int pfns_index; + + /* Compression Cache (LRU) */ + char *cache_base; + struct cache_page **pfn2cache; + struct cache_page *cache; + struct cache_page *page_list_head; + struct cache_page *page_list_tail; + unsigned long dom_pfnlist_size; +}; + +#define RUNFLAG 0 +#define SKIPFLAG ((uint8_t)128) +#define FLAGMASK SKIPFLAG +#define LENMASK ((uint8_t)127) + +/* + * see xg_save_restore.h for details on the compressed stream format. + * delta size = 4 bytes. + * run header = 1 byte (1 bit for runtype, 7bits for run length). + * i.e maximum size of a run = 127 * 4 = 508 bytes. + * Worst case compression: Entire page has changed. + * In the worst case, the size of the compressed page is + * 8 runs of 508 bytes + 1 run of 32 bytes + 9 run headers + * = 4105 bytes. + * We could detect this worst case and send the entire page with a + * FULL_PAGE marker, reducing the total size to 4097 bytes. The cost + * of this size reduction is an additional memcpy, on top of two previous + * memcpy (to the compressed stream and the cache page in the for loop). + * + * We might as well sacrifice an extra 8 bytes instead of a memcpy. + */ +#define WORST_COMP_PAGE_SIZE (XC_PAGE_SIZE + 9) + +/* + * A zero length skip indicates full page. + */ +#define EMPTY_PAGE 0 +#define FULL_PAGE SKIPFLAG +#define FULL_PAGE_SIZE (XC_PAGE_SIZE + 1) +#define MAX_DELTAS (XC_PAGE_SIZE/sizeof(uint32_t)) + +/* + * Add a pagetable page or a new page (uncached) + * if srcpage is a pagetable page, cache_page is null. + * if srcpage is a page that was not previously in the cache, + * cache_page points to a free page slot in the cache where + * this new page can be copied to. + */ +static int add_full_page(comp_ctx *ctx, char *srcpage, char *cache_page) +{ + uint8_t *dest = (ctx->compbuf + ctx->compbuf_pos); + + if ( (ctx->compbuf_pos + FULL_PAGE_SIZE) > ctx->compbuf_size) + return -1; + + if (cache_page) + memcpy(cache_page, srcpage, XC_PAGE_SIZE); + dest[0] = FULL_PAGE; + memcpy(&dest[1], srcpage, XC_PAGE_SIZE); + ctx->compbuf_pos += FULL_PAGE_SIZE; + + return FULL_PAGE_SIZE; +} + +static int compress_page(comp_ctx *ctx, char *srcpage, char *cache_page) +{ + uint8_t *dest = (ctx->compbuf + ctx->compbuf_pos); + uint32_t *new, *old; + + int off, runptr = 0; + int wascopying = 0, copying = 0, bytes_skipped = 0; + int complen = 0, pageoff = 0, runbytes = 0; + + uint8_t runlen = 0; + + if ( (ctx->compbuf_pos + WORST_COMP_PAGE_SIZE) > ctx->compbuf_size) + return -1; + + /* + * There are no alignment issues here since srcpage is + * domU's page passed from xc_domain_save and cache_page is + * a ptr to cache page (cache is page aligned). + */ + new = (uint32_t*)srcpage; + old = (uint32_t*)cache_page; + + for (off = 0; off <= MAX_DELTAS; off++) + { + /* + * At (off == MAX_DELTAS), we are processing the last run + * in the page. Since there is no XORing, make wascopying != copying + * to satisfy the if-block below. + */ + copying = ((off < MAX_DELTAS) ? (old[off] != new[off]) : !wascopying); + + if (runlen) + { + /* switching between run types or current run is full */ + if ( (wascopying != copying) || (runlen == LENMASK) ) + { + runbytes = runlen * sizeof(uint32_t); + runlen |= (wascopying ? RUNFLAG : SKIPFLAG); + dest[complen++] = runlen; + + if (wascopying) /* RUNFLAG */ + { + pageoff = runptr * sizeof(uint32_t); + memcpy(dest + complen, srcpage + pageoff, runbytes); + memcpy(cache_page + pageoff, srcpage + pageoff, runbytes); + complen += runbytes; + } + else /* SKIPFLAG */ + { + bytes_skipped += runbytes; + } + + runlen = 0; + runptr = off; + } + } + runlen++; + wascopying = copying; + } + + /* + * Check for empty page. + */ + if (bytes_skipped == XC_PAGE_SIZE) + { + complen = 1; + dest[0] = EMPTY_PAGE; + } + ctx->compbuf_pos += complen; + + return complen; +} + +static +int uncompress_page(xc_interface *xch, char *destpage, + unsigned long *compbuf_pos, uint8_t *compbuf, + unsigned long compbuf_size) +{ + uint8_t *src; + int pagepos = 0; + uint8_t flag, len; + + unsigned long pos = *compbuf_pos; + + if (pos >= compbuf_size) + { + ERROR("Out of bounds exception in compression buffer (a):" + "read ptr:%lu, bufsize = %lu\n", + *compbuf_pos, compbuf_size); + return -1; + } + + src = (compbuf + pos); + if (src[0] == EMPTY_PAGE) + { + pos++; + } + else if (src[0] == FULL_PAGE) + { + pos += FULL_PAGE_SIZE; + + /* Make sure that the input buffer actually has a full 4K page */ + if (pos > compbuf_size) + { + ERROR("Out of bounds exception in compression buffer (b):" + "read ptr %lu, bufsize = %lu\n", + *compbuf_pos, compbuf_size); + return -1; + } + memcpy(destpage, &src[1], XC_PAGE_SIZE); + } + else + { + while (pagepos < XC_PAGE_SIZE) + { + if ((pos + 1) > compbuf_size) + { + ERROR("Out of bounds exception in compression buffer (c):" + "read ptr %lu, bufsize = %lu\n", + pos, compbuf_size); + return -1; + } + + flag = *src & FLAGMASK; + len = *src & LENMASK; + pos++; + src++; + + if (flag == RUNFLAG) + { + if ((pos + len * sizeof(uint32_t)) > compbuf_size) + { + ERROR("Out of bounds exception in compression buffer (d):" + "read ptr %lu, runlen = %u, bufsize = %lu\n", + pos, len * sizeof(uint32_t), compbuf_size); + return -1; + } + + memcpy(&destpage[pagepos], src, len * sizeof(uint32_t)); + pos += len * sizeof(uint32_t); + src += len * sizeof(uint32_t); + } + pagepos += len * sizeof(uint32_t); + } + } + + *compbuf_pos = pos; + return 0; +} + +static +char *get_cache_page(comp_ctx *ctx, xen_pfn_t pfn, + int *israw) +{ + struct cache_page *item = NULL; + + item = ctx->pfn2cache[pfn]; + + if (!item) + { + *israw = 1; + + /* If the list is full, evict a page from the tail end. */ + item = ctx->page_list_tail; + if (item->pfn != INVALID_P2M_ENTRY) + ctx->pfn2cache[item->pfn] = NULL; + + item->pfn = pfn; + ctx->pfn2cache[pfn] = item; + } + + /* if requested item is in cache move to head of list */ + if (item != ctx->page_list_head) + { + if (item == ctx->page_list_tail) + { + /* item at tail of list. */ + ctx->page_list_tail = item->prev; + (ctx->page_list_tail)->next = NULL; + } + else + { + /* item in middle of list */ + item->prev->next = item->next; + item->next->prev = item->prev; + } + + item->prev = NULL; + item->next = ctx->page_list_head; + (ctx->page_list_head)->prev = item; + ctx->page_list_head = item; + } + + return (ctx->page_list_head)->page; +} + +/* Remove pagetable pages from cache and move to tail, as free pages */ +static +void invalidate_cache_page(comp_ctx *ctx, xen_pfn_t pfn) +{ + struct cache_page *item = NULL; + + item = ctx->pfn2cache[pfn]; + if (item) + { + if (item != ctx->page_list_tail) + { + /* item at head of list */ + if (item == ctx->page_list_head) + { + ctx->page_list_head = (ctx->page_list_head)->next; + (ctx->page_list_head)->prev = NULL; + } + else /* item in middle of list */ + { + item->prev->next = item->next; + item->next->prev = item->prev; + } + + item->next = NULL; + item->prev = ctx->page_list_tail; + (ctx->page_list_tail)->next = item; + ctx->page_list_tail = item; + } + ctx->pfn2cache[pfn] = NULL; + (ctx->page_list_tail)->pfn = INVALID_P2M_ENTRY; + } +} + +int xc_compression_add_page(xc_interface *xch, comp_ctx *ctx, + char *page, xen_pfn_t pfn, int israw) +{ + if (pfn > ctx->dom_pfnlist_size) + { + ERROR("Invalid pfn passed into " + "xc_compression_add_page %" PRIpfn "\n", pfn); + return -2; + } + + /* pagetable page */ + if (israw) + invalidate_cache_page(ctx, pfn); + ctx->sendbuf_pfns[ctx->pfns_len] = israw ? INVALID_P2M_ENTRY : pfn; + memcpy(ctx->inputbuf + ctx->pfns_len * XC_PAGE_SIZE, page, XC_PAGE_SIZE); + ctx->pfns_len++; + + /* check if we have run out of space. If so, + * we need to synchronously compress the pages and flush them out + */ + if (ctx->pfns_len == NRPAGES(PAGE_BUFFER_SIZE)) + return -1; + return 0; +} + +int xc_compression_compress_pages(xc_interface *xch, comp_ctx *ctx, + char *compbuf, unsigned long compbuf_size, + unsigned long *compbuf_len) +{ + char *cache_copy = NULL, *current_page = NULL; + int israw, rc = 1; + + if (!ctx->pfns_len || (ctx->pfns_index == ctx->pfns_len)) { + ctx->pfns_len = ctx->pfns_index = 0; + return 0; + } + + ctx->compbuf_pos = 0; + ctx->compbuf = (uint8_t *)compbuf; + ctx->compbuf_size = compbuf_size; + + for (; ctx->pfns_index < ctx->pfns_len; ctx->pfns_index++) + { + israw = 0; + cache_copy = NULL; + current_page = ctx->inputbuf + ctx->pfns_index * XC_PAGE_SIZE; + + if (ctx->sendbuf_pfns[ctx->pfns_index] == INVALID_P2M_ENTRY) + israw = 1; + else + cache_copy = get_cache_page(ctx, + ctx->sendbuf_pfns[ctx->pfns_index], + &israw); + + if (israw) + rc = (add_full_page(ctx, current_page, cache_copy) >= 0); + else + rc = (compress_page(ctx, current_page, cache_copy) >= 0); + + if ( !rc ) + { + /* Out of space in outbuf! flush and come back */ + rc = -1; + break; + } + } + if (compbuf_len) + *compbuf_len = ctx->compbuf_pos; + + return rc; +} + +inline +void xc_compression_reset_pagebuf(xc_interface *xch, comp_ctx *ctx) +{ + ctx->pfns_index = ctx->pfns_len = 0; +} + +int xc_compression_uncompress_page(xc_interface *xch, char *compbuf, + unsigned long compbuf_size, + unsigned long *compbuf_pos, char *dest) +{ + return uncompress_page(xch, dest, compbuf_pos, + (uint8_t *)compbuf, compbuf_size); +} + +void xc_compression_free_context(xc_interface *xch, comp_ctx *ctx) +{ + if (!ctx) return; + + if (ctx->inputbuf) + free(ctx->inputbuf); + if (ctx->sendbuf_pfns) + free(ctx->sendbuf_pfns); + if (ctx->cache_base) + free(ctx->cache_base); + if (ctx->pfn2cache) + free(ctx->pfn2cache); + if (ctx->cache) + free(ctx->cache); + free(ctx); +} + +comp_ctx *xc_compression_create_context(xc_interface *xch, + unsigned long p2m_size) +{ + unsigned long i; + comp_ctx *ctx = NULL; + unsigned long num_cache_pages = DELTA_CACHE_SIZE/XC_PAGE_SIZE; + + ctx = (comp_ctx *)malloc(sizeof(comp_ctx)); + if (!ctx) + { + ERROR("Failed to allocate compression_ctx\n"); + goto error; + } + memset(ctx, 0, sizeof(comp_ctx)); + + if (posix_memalign((void **)&ctx->inputbuf, + XC_PAGE_SIZE, PAGE_BUFFER_SIZE)) + { + ERROR("Failed to allocate page buffer\n"); + goto error; + } + + ctx->sendbuf_pfns = malloc(NRPAGES(PAGE_BUFFER_SIZE) * + sizeof(xen_pfn_t)); + if (!ctx->sendbuf_pfns) + { + ERROR("Could not alloc sendbuf_pfns\n"); + goto error; + } + memset(ctx->sendbuf_pfns, -1, + NRPAGES(PAGE_BUFFER_SIZE) * sizeof(xen_pfn_t)); + + if (posix_memalign((void **)&ctx->cache_base, + XC_PAGE_SIZE, DELTA_CACHE_SIZE)) + { + ERROR("Failed to allocate delta cache\n"); + goto error; + } + + ctx->pfn2cache = calloc(p2m_size, sizeof(struct cache_page *)); + if (!ctx->pfn2cache) + { + ERROR("Could not alloc pfn2cache map\n"); + goto error; + } + + ctx->cache = malloc(num_cache_pages * sizeof(struct cache_page)); + if (!ctx->cache) + { + ERROR("Could not alloc compression cache\n"); + goto error; + } + + for (i = 0; i < num_cache_pages; i++) + { + ctx->cache[i].pfn = INVALID_P2M_ENTRY; + ctx->cache[i].page = ctx->cache_base + i * XC_PAGE_SIZE; + ctx->cache[i].prev = (i == 0) ? NULL : &(ctx->cache[i - 1]); + ctx->cache[i].next = ((i+1) == num_cache_pages)? NULL : + &(ctx->cache[i + 1]); + } + ctx->page_list_head = &(ctx->cache[0]); + ctx->page_list_tail = &(ctx->cache[num_cache_pages -1]); + ctx->dom_pfnlist_size = p2m_size; + + return ctx; +error: + xc_compression_free_context(xch, ctx); + return NULL; +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r 4b0907c6a08c -r 47fdd52af616 tools/libxc/xc_domain_restore.c --- a/tools/libxc/xc_domain_restore.c Tue Oct 11 12:02:58 2011 +0100 +++ b/tools/libxc/xc_domain_restore.c Thu Oct 13 15:36:53 2011 -0700 @@ -43,6 +43,7 @@ xen_pfn_t *p2m_batch; /* A table of P2M mappings in the current region. */ int completed; /* Set when a consistent image is available */ int last_checkpoint; /* Set when we should commit to the current checkpoint when it completes. */ + int compressing; /* Set when sender signals that pages would be sent compressed (for Remus) */ struct domain_info_context dinfo; }; @@ -663,6 +664,10 @@ /* pages is of length nr_physpages, pfn_types is of length nr_pages */ unsigned int nr_physpages, nr_pages; + /* checkpoint compression state */ + int compressing; + unsigned long compbuf_pos, compbuf_size; + /* Types of the pfns in the current region */ unsigned long* pfn_types; @@ -700,6 +705,7 @@ { int count, countpages, oldcount, i; void* ptmp; + unsigned long compbuf_size; if ( RDEXACT(fd, &count, sizeof(count)) ) { @@ -809,6 +815,40 @@ } return pagebuf_get_one(xch, ctx, buf, fd, dom); + case XC_SAVE_ID_ENABLE_COMPRESSION: + /* We cannot set compression flag directly in pagebuf structure, + * since this pagebuf still has uncompressed pages that are yet to + * be applied. We enable the compression field in pagebuf structure + * after receiving the first tailbuf. + */ + ctx->compressing = 1; + // DPRINTF("compression flag received"); + return pagebuf_get_one(xch, ctx, buf, fd, dom); + + case XC_SAVE_ID_COMPRESSED_DATA: + + /* read the length of compressed chunk coming in */ + if ( RDEXACT(fd, &compbuf_size, sizeof(unsigned long)) ) + { + PERROR("Error when reading compbuf_size"); + return -1; + } + if (!compbuf_size) return 1; + + buf->compbuf_size += compbuf_size; + if (!(ptmp = realloc(buf->pages, buf->compbuf_size))) { + ERROR("Could not (re)allocate compression buffer"); + return -1; + } + buf->pages = ptmp; + + if ( RDEXACT(fd, buf->pages + (buf->compbuf_size - compbuf_size), + compbuf_size) ) { + PERROR("Error when reading compression buffer"); + return -1; + } + return compbuf_size; + default: if ( (count > MAX_BATCH_SIZE) || (count < 0) ) { ERROR("Max batch size exceeded (%d). Giving up.", count); @@ -846,6 +886,13 @@ if (!countpages) return count; + /* If Remus Checkpoint Compression is turned on, we will only be + * receiving the pfn lists now. The compressed pages will come in later, + * following a <XC_SAVE_ID_COMPRESSED_DATA, compressedChunkSize> tuple. + */ + if (buf->compressing) + return pagebuf_get_one(xch, ctx, buf, fd, dom); + oldcount = buf->nr_physpages; buf->nr_physpages += countpages; if (!buf->pages) { @@ -874,6 +921,7 @@ int rc; buf->nr_physpages = buf->nr_pages = 0; + buf->compbuf_pos = buf->compbuf_size = 0; do { rc = pagebuf_get_one(xch, ctx, buf, fd, dom); @@ -1091,7 +1139,21 @@ /* In verify mode, we use a copy; otherwise we work in place */ page = pagebuf->verify ? (void *)buf : (region_base + i*PAGE_SIZE); - memcpy(page, pagebuf->pages + (curpage + curbatch) * PAGE_SIZE, PAGE_SIZE); + /* Remus - page decompression */ + if (pagebuf->compressing) + { + if (xc_compression_uncompress_page(xch, pagebuf->pages, + pagebuf->compbuf_size, + &pagebuf->compbuf_pos, + (char *)page)) + { + ERROR("Failed to uncompress page (pfn=%lx)\n", pfn); + goto err_mapped; + } + } + else + memcpy(page, pagebuf->pages + (curpage + curbatch) * PAGE_SIZE, + PAGE_SIZE); pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK; @@ -1353,6 +1415,7 @@ if ( !ctx->completed ) { pagebuf.nr_physpages = pagebuf.nr_pages = 0; + pagebuf.compbuf_pos = pagebuf.compbuf_size = 0; if ( pagebuf_get_one(xch, ctx, &pagebuf, io_fd, dom) < 0 ) { PERROR("Error when reading batch"); goto out; @@ -1395,6 +1458,7 @@ } pagebuf.nr_physpages = pagebuf.nr_pages = 0; + pagebuf.compbuf_pos = pagebuf.compbuf_size = 0; n += j; /* crude stats */ @@ -1438,6 +1502,13 @@ */ if ( !ctx->last_checkpoint ) fcntl(io_fd, F_SETFL, orig_io_fd_flags | O_NONBLOCK); + + /* + * If sender had sent enable compression flag, switch to compressed + * checkpoints mode once the first checkpoint is received. + */ + if (ctx->compressing) + pagebuf.compressing = 1; } if (pagebuf.acpi_ioport_location == 1) { diff -r 4b0907c6a08c -r 47fdd52af616 tools/libxc/xc_domain_save.c --- a/tools/libxc/xc_domain_save.c Tue Oct 11 12:02:58 2011 +0100 +++ b/tools/libxc/xc_domain_save.c Thu Oct 13 15:36:53 2011 -0700 @@ -218,6 +218,56 @@ return noncached_write(xch, ob, fd, buf, len); } +static int write_compressed(xc_interface *xch, comp_ctx *compress_ctx, + int dobuf, struct outbuf* ob, int fd) +{ + int rc = 0; + int header = sizeof(int) + sizeof(unsigned long); + int marker = XC_SAVE_ID_COMPRESSED_DATA; + unsigned long compbuf_len = 0; + + do + { + /* check for available space (atleast 8k) */ + if ((ob->pos + header + XC_PAGE_SIZE * 2) > ob->size) + { + if (outbuf_flush(xch, ob, fd) < 0) + { + ERROR("Error when flushing outbuf intermediate"); + return -1; + } + } + + rc = xc_compression_compress_pages(xch, compress_ctx, + ob->buf + ob->pos + header, + ob->size - ob->pos - header, + &compbuf_len); + if (!rc) + return 0; + + if (outbuf_hardwrite(xch, ob, fd, &marker, sizeof(marker)) < 0) + { + PERROR("Error when writing marker (errno %d)", errno); + return -1; + } + + if (outbuf_hardwrite(xch, ob, fd, &compbuf_len, sizeof(compbuf_len)) < 0) + { + PERROR("Error when writing compbuf_len (errno %d)", errno); + return -1; + } + + ob->pos += (size_t) compbuf_len; + if (!dobuf && outbuf_flush(xch, ob, fd) < 0) + { + ERROR("Error when writing compressed chunk"); + return -1; + } + } while (rc != 0); + + return 0; +} + struct time_stats { struct timeval wall; long long d0_cpu, d1_cpu; @@ -815,11 +865,19 @@ unsigned long mfn; - struct outbuf ob; + struct outbuf ob_pagebuf, ob_tailbuf, *ob = NULL; struct save_ctx _ctx; struct save_ctx *ctx = &_ctx; struct domain_info_context *dinfo = &ctx->dinfo; + /* Compression context */ + comp_ctx *compress_ctx= NULL; + /* Even if XCFLAGS_CHECKPOINT_COMPRESS is set, we enable compression only + * after sending XC_SAVE_ID_ENABLE_COMPRESSION and the tailbuf for + * first time. + */ + int compressing = 0; + int completed = 0; if ( hvm && !callbacks->switch_qemu_logdirty ) @@ -829,7 +887,7 @@ return 1; } - outbuf_init(xch, &ob, OUTBUF_SIZE); + outbuf_init(xch, &ob_pagebuf, OUTBUF_SIZE); memset(ctx, 0, sizeof(*ctx)); @@ -917,6 +975,16 @@ } } + if ( flags & XCFLAGS_CHECKPOINT_COMPRESS ) + { + if (!(compress_ctx = xc_compression_create_context(xch, dinfo->p2m_size))) + { + ERROR("Failed to create compression context"); + goto out; + } + outbuf_init(xch, &ob_tailbuf, OUTBUF_SIZE/4); + } + last_iter = !live; /* pretend we sent all the pages last iteration */ @@ -1025,9 +1093,11 @@ } copypages: -#define wrexact(fd, buf, len) write_buffer(xch, last_iter, &ob, (fd), (buf), (len)) -#define wruncached(fd, live, buf, len) write_uncached(xch, last_iter, &ob, (fd), (buf), (len)) +#define wrexact(fd, buf, len) write_buffer(xch, last_iter, ob, (fd), (buf), (len)) +#define wruncached(fd, live, buf, len) write_uncached(xch, last_iter, ob, (fd), (buf), (len)) +#define wrcompressed(fd) write_compressed(xch, compress_ctx, last_iter, ob, (fd)) + ob = &ob_pagebuf; /* Holds pfn_types, pages/compressed pages */ /* Now write out each data page, canonicalising page tables as we go... */ for ( ; ; ) { @@ -1270,7 +1340,7 @@ { /* If the page is not a normal data page, write out any run of pages we may have previously acumulated */ - if ( run ) + if ( !compressing && run ) { if ( wruncached(io_fd, live, (char*)region_base+(PAGE_SIZE*(j-run)), @@ -1305,7 +1375,41 @@ goto out; } - if ( wruncached(io_fd, live, page, PAGE_SIZE) != PAGE_SIZE ) + if (compressing) + { + int c_err; + /* Mark pagetable page to be sent uncompressed */ + c_err = xc_compression_add_page(xch, compress_ctx, page, + pfn, 1 /* raw page */); + if (c_err == -2) /* OOB PFN */ + { + ERROR("Could not add pagetable page " + "(pfn:%" PRIpfn "to page buffer\n", pfn); + goto out; + } + + if (c_err == -1) + { + /* + * We are out of buffer space to hold dirty + * pages. Compress and flush the current buffer + * to make space. This is a corner case, that + * slows down checkpointing as the compression + * happens while domain is suspended. Happens + * seldom and if you find this occuring + * frequently, increase the PAGE_BUFFER_SIZE + * in xc_compression.c. + */ + if (wrcompressed(io_fd) < 0) + { + ERROR("Error when writing compressed" + " data (4b)\n"); + goto out; + } + } + } + else if ( wruncached(io_fd, live, page, + PAGE_SIZE) != PAGE_SIZE ) { PERROR("Error when writing to state file (4b)" " (errno %d)", errno); @@ -1315,7 +1419,34 @@ else { /* We have a normal page: accumulate it for writing. */ - run++; + if (compressing) + { + int c_err; + /* For checkpoint compression, accumulate the page in the + * page buffer, to be compressed later. + */ + c_err = xc_compression_add_page(xch, compress_ctx, spage, + pfn, 0 /* not raw page */); + + if (c_err == -2) /* OOB PFN */ + { + ERROR("Could not add page " + "(pfn:%" PRIpfn "to page buffer\n", pfn); + goto out; + } + + if (c_err == -1) + { + if (wrcompressed(io_fd) < 0) + { + ERROR("Error when writing compressed" + " data (4c)\n"); + goto out; + } + } + } + else + run++; } } /* end of the write out for this batch */ @@ -1423,6 +1554,15 @@ DPRINTF("All memory is saved\n"); + /* After last_iter, buffer the rest of pagebuf & tailbuf data into a + * separate output buffer and flush it after the compressed page chunks. + */ + if (compressing) + { + ob = &ob_tailbuf; + ob->pos = 0; + } + { struct { int id; @@ -1522,6 +1662,25 @@ } } + /* Enable compression logic on both sides by sending this + * one time marker. + * NOTE: We could have simplified this procedure by sending + * the enable/disable compression flag before the beginning of + * the main for loop. But this would break compatibility for + * live migration code, with older versions of xen. So we have + * to enable it after the last_iter, when the XC_SAVE_ID_* + * elements are sent. + */ + if (!compressing && (flags & XCFLAGS_CHECKPOINT_COMPRESS)) + { + i = XC_SAVE_ID_ENABLE_COMPRESSION; + if ( wrexact(io_fd, &i, sizeof(int)) ) + { + PERROR("Error when writing enable_compression marker"); + goto out; + } + } + /* Zero terminate */ i = 0; if ( wrexact(io_fd, &i, sizeof(int)) ) @@ -1766,14 +1925,38 @@ if ( !rc && callbacks->postcopy ) callbacks->postcopy(callbacks->data); + /* guest has been resumed. Now we can compress data + * at our own pace. + */ + if (!rc && compressing) + { + ob = &ob_pagebuf; + if (wrcompressed(io_fd) < 0) + { + ERROR("Error when writing compressed data, after postcopy\n"); + rc = 1; + goto out; + } + /* Copy the tailbuf data into the main outbuf */ + if ( wrexact(io_fd, ob_tailbuf.buf, ob_tailbuf.pos) ) + { + rc = 1; + PERROR("Error when copying tailbuf into outbuf"); + goto out; + } + } + /* Flush last write and discard cache for file. */ - if ( outbuf_flush(xch, &ob, io_fd) < 0 ) { + if ( outbuf_flush(xch, ob, io_fd) < 0 ) { PERROR("Error when flushing output buffer"); rc = 1; } discard_file_cache(xch, io_fd, 1 /* flush */); + /* Enable compression now, finally */ + compressing = (flags & XCFLAGS_CHECKPOINT_COMPRESS); + /* checkpoint_cb can spend arbitrarily long in between rounds */ if (!rc && callbacks->checkpoint && callbacks->checkpoint(callbacks->data) > 0) @@ -1815,6 +1998,9 @@ DPRINTF("Warning - couldn't disable qemu log-dirty mode"); } + if (compress_ctx) + xc_compression_free_context(xch, compress_ctx); + if ( live_shinfo ) munmap(live_shinfo, PAGE_SIZE); diff -r 4b0907c6a08c -r 47fdd52af616 tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Tue Oct 11 12:02:58 2011 +0100 +++ b/tools/libxc/xenctrl.h Thu Oct 13 15:36:53 2011 -0700 @@ -1906,4 +1906,64 @@ int verbose); /* Useful for callers who also use libelf. */ +/** + * Checkpoint Compression + */ +typedef struct compression_ctx comp_ctx; +comp_ctx *xc_compression_create_context(xc_interface *xch, + unsigned long p2m_size); +void xc_compression_free_context(xc_interface *xch, comp_ctx *ctx); + +/** + * Add a page to compression page buffer, to be compressed later. + * + * returns 0 if the page was successfully added to the page buffer + * + * returns -1 if there is no space in buffer. In this case, the + * application should call xc_compression_compress_pages to compress + * the buffer (or atleast part of it), thereby freeing some space in + * the page buffer. + * + * returns -2 if the pfn is out of bounds, where the bound is p2m_size + * parameter passed during xc_compression_create_context. + */ +int xc_compression_add_page(xc_interface *xch, comp_ctx *ctx, char *page, + unsigned long pfn, int israw); + +/** + * Delta compress pages in the compression buffer and inserts the + * compressed data into the supplied compression buffer compbuf, whose + * size is compbuf_size. + * After compression, the pages are copied to the internal LRU cache. + * + * This function compresses as many pages as possible into the + * supplied compression buffer. It maintains an internal iterator to + * keep track of pages in the input buffer that are yet to be compressed. + * + * returns -1 if the compression buffer has run out of space. + * returns 1 on success. + * returns 0 if no more pages are left to be compressed. + * When the return value is non-zero, compbuf_len indicates the actual + * amount of data present in compbuf (<=compbuf_size). + */ +int xc_compression_compress_pages(xc_interface *xch, comp_ctx *ctx, + char *compbuf, unsigned long compbuf_size, + unsigned long *compbuf_len); + +/** + * Resets the internal page buffer that holds dirty pages before compression. + * Also resets the iterators. + */ +void xc_compression_reset_pagebuf(xc_interface *xch, comp_ctx *ctx); + +/** + * Caller must supply the compression buffer (compbuf), + * its size (compbuf_size) and a reference to index variable (compbuf_pos) + * that is used internally. Each call pulls out one page from the compressed + * chunk and copies it to dest. + */ +int xc_compression_uncompress_page(xc_interface *xch, char *compbuf, + unsigned long compbuf_size, + unsigned long *compbuf_pos, char *dest); + #endif /* XENCTRL_H */ diff -r 4b0907c6a08c -r 47fdd52af616 tools/libxc/xenguest.h --- a/tools/libxc/xenguest.h Tue Oct 11 12:02:58 2011 +0100 +++ b/tools/libxc/xenguest.h Thu Oct 13 15:36:53 2011 -0700 @@ -27,6 +27,7 @@ #define XCFLAGS_DEBUG 2 #define XCFLAGS_HVM 4 #define XCFLAGS_STDVGA 8 +#define XCFLAGS_CHECKPOINT_COMPRESS 16 #define X86_64_B_SIZE 64 #define X86_32_B_SIZE 32 diff -r 4b0907c6a08c -r 47fdd52af616 tools/libxc/xg_save_restore.h --- a/tools/libxc/xg_save_restore.h Tue Oct 11 12:02:58 2011 +0100 +++ b/tools/libxc/xg_save_restore.h Thu Oct 13 15:36:53 2011 -0700 @@ -134,6 +134,8 @@ #define XC_SAVE_ID_HVM_CONSOLE_PFN -8 /* (HVM-only) */ #define XC_SAVE_ID_LAST_CHECKPOINT -9 /* Commit to restoring after completion of current iteration. */ #define XC_SAVE_ID_HVM_ACPI_IOPORTS_LOCATION -10 +#define XC_SAVE_ID_COMPRESSED_DATA -11 /* Marker to indicate arrival of compressed data */ +#define XC_SAVE_ID_ENABLE_COMPRESSION -12 /* Marker to enable compression logic at receiver side */ /* ** We process save/restore/migrate in batches of pages; the below _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |