[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] [HVM][QEMU] Save/restore: enable HVM live migration
# HG changeset patch # User Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx> # Date 1174045190 0 # Node ID 8e76e1b95b127c2bfca94cb3cb660c54bcced8b7 # Parent 422a61ebac541a40d60eee66e5ddf87d4855201e [HVM][QEMU] Save/restore: enable HVM live migration by getting page-dirtying bitmaps from qemu-dm as well as from xen. Signed-off-by: Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx> --- tools/ioemu/target-i386-dm/exec-dm.c | 16 ++++ tools/ioemu/xenstore.c | 127 ++++++++++++++++++++++++++++++++++ tools/libxc/Makefile | 2 tools/libxc/xc_hvm_save.c | 39 +++++++++- tools/libxc/xenguest.h | 6 + tools/libxc/xg_private.c | 4 - tools/xcutils/Makefile | 6 - tools/xcutils/xc_save.c | 129 ++++++++++++++++++++++++++++++++++- 8 files changed, 314 insertions(+), 15 deletions(-) diff -r 422a61ebac54 -r 8e76e1b95b12 tools/ioemu/target-i386-dm/exec-dm.c --- a/tools/ioemu/target-i386-dm/exec-dm.c Fri Mar 16 10:42:25 2007 +0000 +++ b/tools/ioemu/target-i386-dm/exec-dm.c Fri Mar 16 11:39:50 2007 +0000 @@ -450,6 +450,9 @@ static inline int paddr_is_ram(target_ph #define phys_ram_addr(x) (phys_ram_base + (x)) #endif +extern unsigned long *logdirty_bitmap; +extern unsigned long logdirty_bitmap_size; + void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, int len, int is_write) { @@ -485,9 +488,20 @@ void cpu_physical_memory_rw(target_phys_ l = 1; } } else if (paddr_is_ram(addr)) { - /* Reading from RAM */ + /* Writing to RAM */ ptr = phys_ram_addr(addr); memcpy(ptr, buf, l); + if (logdirty_bitmap != NULL) { + /* Record that we have dirtied this frame */ + unsigned long pfn = addr >> TARGET_PAGE_BITS; + if (pfn / 8 >= logdirty_bitmap_size) { + fprintf(logfile, "dirtying pfn %x >= bitmap size %x\n", + pfn, logdirty_bitmap_size * 8); + } else { + logdirty_bitmap[pfn / HOST_LONG_BITS] + |= 1UL << pfn % HOST_LONG_BITS; + } + } #ifdef __ia64__ sync_icache(ptr, l); #endif diff -r 422a61ebac54 -r 8e76e1b95b12 tools/ioemu/xenstore.c --- a/tools/ioemu/xenstore.c Fri Mar 16 10:42:25 2007 +0000 +++ b/tools/ioemu/xenstore.c Fri Mar 16 11:39:50 2007 +0000 @@ -11,6 +11,11 @@ #include "vl.h" #include "block_int.h" #include <unistd.h> +#include <sys/ipc.h> +#include <sys/shm.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> static struct xs_handle *xsh = NULL; static char *hd_filename[MAX_DISKS]; @@ -183,6 +188,13 @@ void xenstore_parse_domain_config(int do } } + /* Set a watch for log-dirty requests from the migration tools */ + if (pasprintf(&buf, "%s/logdirty/next-active", path) != -1) { + xs_watch(xsh, buf, "logdirty"); + fprintf(logfile, "Watching %s\n", buf); + } + + out: free(type); free(params); @@ -201,6 +213,116 @@ int xenstore_fd(void) return -1; } +unsigned long *logdirty_bitmap = NULL; +unsigned long logdirty_bitmap_size; +extern int vga_ram_size, bios_size; + +void xenstore_process_logdirty_event(void) +{ + char *act; + static char *active_path = NULL; + static char *next_active_path = NULL; + static char *seg = NULL; + unsigned int len; + int i; + + fprintf(logfile, "Triggered log-dirty buffer switch\n"); + + if (!seg) { + char *path, *p, *key_ascii, *key_terminated[17] = {0,}; + key_t key; + int shmid; + + /* Find and map the shared memory segment for log-dirty bitmaps */ + if (!(path = xs_get_domain_path(xsh, domid))) { + fprintf(logfile, "Log-dirty: can't get domain path in store\n"); + exit(1); + } + if (!(path = realloc(path, strlen(path) + + strlen("/logdirty/next-active") + 1))) { + fprintf(logfile, "Log-dirty: out of memory\n"); + exit(1); + } + strcat(path, "/logdirty/"); + p = path + strlen(path); + strcpy(p, "key"); + + key_ascii = xs_read(xsh, XBT_NULL, path, &len); + if (!key_ascii) { + /* No key yet: wait for the next watch */ + free(path); + return; + } + strncpy(key_terminated, key_ascii, 16); + free(key_ascii); + key = (key_t) strtoull(key_terminated, NULL, 16); + + /* Figure out how bit the log-dirty bitmaps are */ + logdirty_bitmap_size = ((phys_ram_size + 0x20 + - (vga_ram_size + bios_size)) + >> (TARGET_PAGE_BITS)); /* nr of bits in map*/ + if (logdirty_bitmap_size > HVM_BELOW_4G_MMIO_START >> TARGET_PAGE_BITS) + logdirty_bitmap_size += + HVM_BELOW_4G_MMIO_LENGTH >> TARGET_PAGE_BITS; /* still bits */ + logdirty_bitmap_size = ((logdirty_bitmap_size + HOST_LONG_BITS - 1) + / HOST_LONG_BITS); /* longs */ + logdirty_bitmap_size *= sizeof (unsigned long); /* bytes */ + + /* Map the shared-memory segment */ + if ((shmid = shmget(key, + 2 * logdirty_bitmap_size, + S_IRUSR|S_IWUSR)) == -1 + || (seg = shmat(shmid, NULL, 0)) == (void *)-1) { + fprintf(logfile, "Log-dirty: can't map segment %16.16llx (%s)\n", + (unsigned long long) key, strerror(errno)); + exit(1); + } + + fprintf(logfile, "Log-dirty: mapped segment at %p\n", seg); + + /* Double-check that the bitmaps are the size we expect */ + if (logdirty_bitmap_size != *(uint32_t *)seg) { + fprintf(logfile, "Log-dirty: got %lu, calc %lu\n", + *(uint32_t *)seg, logdirty_bitmap_size); + return; + } + + /* Remember the paths for the next-active and active entries */ + strcpy(p, "active"); + if (!(active_path = strdup(path))) { + fprintf(logfile, "Log-dirty: out of memory\n"); + exit(1); + } + strcpy(p, "next-active"); + if (!(next_active_path = strdup(path))) { + fprintf(logfile, "Log-dirty: out of memory\n"); + exit(1); + } + free(path); + } + + /* Read the required active buffer from the store */ + act = xs_read(xsh, XBT_NULL, next_active_path, &len); + if (!act) { + fprintf(logfile, "Log-dirty: can't read next-active\n"); + exit(1); + } + + /* Switch buffers */ + i = act[0] - '0'; + if (i != 0 && i != 1) { + fprintf(logfile, "Log-dirty: bad next-active entry: %s\n", act); + exit(1); + } + logdirty_bitmap = seg + i * logdirty_bitmap_size; + + /* Ack that we've switched */ + xs_write(xsh, XBT_NULL, active_path, act, len); + free(act); +} + + + void xenstore_process_event(void *opaque) { char **vec, *image = NULL; @@ -209,6 +331,11 @@ void xenstore_process_event(void *opaque vec = xs_read_watch(xsh, &num); if (!vec) return; + + if (!strcmp(vec[XS_WATCH_TOKEN], "logdirty")) { + xenstore_process_logdirty_event(); + goto out; + } if (strncmp(vec[XS_WATCH_TOKEN], "hd", 2) || strlen(vec[XS_WATCH_TOKEN]) != 3) diff -r 422a61ebac54 -r 8e76e1b95b12 tools/libxc/Makefile --- a/tools/libxc/Makefile Fri Mar 16 10:42:25 2007 +0000 +++ b/tools/libxc/Makefile Fri Mar 16 11:39:50 2007 +0000 @@ -57,7 +57,7 @@ GUEST_SRCS-$(CONFIG_IA64) += xc_dom_c CFLAGS += -Werror -Wmissing-prototypes CFLAGS += -fno-strict-aliasing -CFLAGS += $(INCLUDES) -I. +CFLAGS += $(INCLUDES) -I. -I../xenstore # Needed for posix_fadvise64() in xc_linux.c CFLAGS-$(CONFIG_Linux) += -D_GNU_SOURCE diff -r 422a61ebac54 -r 8e76e1b95b12 tools/libxc/xc_hvm_save.c --- a/tools/libxc/xc_hvm_save.c Fri Mar 16 10:42:25 2007 +0000 +++ b/tools/libxc/xc_hvm_save.c Fri Mar 16 11:39:50 2007 +0000 @@ -54,6 +54,11 @@ static unsigned long hvirt_start; /* #levels of page tables used by the current guest */ static unsigned int pt_levels; +/* Shared-memory bitmaps for getting log-dirty bits from qemu */ +static unsigned long *qemu_bitmaps[2]; +static int qemu_active; +static int qemu_non_active; + int xc_hvm_drain_io(int handle, domid_t dom) { DECLARE_HYPERCALL; @@ -77,7 +82,8 @@ int xc_hvm_drain_io(int handle, domid_t */ #define BITS_PER_LONG (sizeof(unsigned long) * 8) -#define BITMAP_SIZE ((pfn_array_size + BITS_PER_LONG - 1) / 8) +#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG) +#define BITMAP_SIZE (BITS_TO_LONGS(pfn_array_size) * sizeof(unsigned long)) #define BITMAP_ENTRY(_nr,_bmap) \ ((unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG] @@ -123,6 +129,7 @@ static inline int permute( int i, int nr return i; } + static uint64_t tv_to_us(struct timeval *new) { @@ -277,7 +284,9 @@ static int suspend_and_state(int (*suspe } int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, - uint32_t max_factor, uint32_t flags, int (*suspend)(int)) + uint32_t max_factor, uint32_t flags, int (*suspend)(int), + void *(*init_qemu_maps)(int, unsigned), + void (*qemu_flip_buffer)(int, int)) { xc_dominfo_t info; @@ -392,8 +401,6 @@ int xc_hvm_save(int xc_handle, int io_fd "nr_pages=0x%lx\n", info.max_memkb, max_mfn, info.nr_pages); if (live) { - ERROR("hvm domain doesn't support live migration now.\n"); - goto out; if (xc_shadow_control(xc_handle, dom, XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY, @@ -453,6 +460,15 @@ int xc_hvm_save(int xc_handle, int io_fd to_skip = malloc(BITMAP_SIZE); + if (live) { + /* Get qemu-dm logging dirty pages too */ + void *seg = init_qemu_maps(dom, BITMAP_SIZE); + qemu_bitmaps[0] = seg; + qemu_bitmaps[1] = seg + BITMAP_SIZE; + qemu_active = 0; + qemu_non_active = 1; + } + hvm_buf_size = xc_domain_hvm_getcontext(xc_handle, dom, 0, 0); if ( hvm_buf_size == -1 ) { @@ -677,10 +693,23 @@ int xc_hvm_save(int xc_handle, int io_fd goto out; } + /* Pull in the dirty bits from qemu too */ + if (!last_iter) { + qemu_active = qemu_non_active; + qemu_non_active = qemu_active ? 0 : 1; + qemu_flip_buffer(dom, qemu_active); + for (j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++) { + to_send[j] |= qemu_bitmaps[qemu_non_active][j]; + qemu_bitmaps[qemu_non_active][j] = 0; + } + } else { + for (j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++) + to_send[j] |= qemu_bitmaps[qemu_active][j]; + } + sent_last_iter = sent_this_iter; print_stats(xc_handle, dom, sent_this_iter, &stats, 1); - } diff -r 422a61ebac54 -r 8e76e1b95b12 tools/libxc/xenguest.h --- a/tools/libxc/xenguest.h Fri Mar 16 10:42:25 2007 +0000 +++ b/tools/libxc/xenguest.h Fri Mar 16 11:39:50 2007 +0000 @@ -32,8 +32,10 @@ int xc_linux_save(int xc_handle, int io_ * @return 0 on success, -1 on failure */ int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, - uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */, - int (*suspend)(int domid)); + uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */, + int (*suspend)(int domid), + void *(*init_qemu_maps)(int, unsigned), + void (*qemu_flip_buffer)(int, int)); /** * This function will restore a saved domain running Linux. diff -r 422a61ebac54 -r 8e76e1b95b12 tools/libxc/xg_private.c --- a/tools/libxc/xg_private.c Fri Mar 16 10:42:25 2007 +0000 +++ b/tools/libxc/xg_private.c Fri Mar 16 11:39:50 2007 +0000 @@ -201,7 +201,9 @@ __attribute__((weak)) __attribute__((weak)) int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, uint32_t max_factor, uint32_t flags, - int (*suspend)(int domid)) + int (*suspend)(int domid), + void *(*init_qemu_maps)(int, unsigned), + void (*qemu_flip_buffer)(int, int)) { errno = ENOSYS; return -1; diff -r 422a61ebac54 -r 8e76e1b95b12 tools/xcutils/Makefile --- a/tools/xcutils/Makefile Fri Mar 16 10:42:25 2007 +0000 +++ b/tools/xcutils/Makefile Fri Mar 16 11:39:50 2007 +0000 @@ -13,7 +13,7 @@ include $(XEN_ROOT)/tools/Rules.mk PROGRAMS_INSTALL_DIR = /usr/$(LIBDIR)/xen/bin -INCLUDES += -I $(XEN_LIBXC) +INCLUDES += -I $(XEN_LIBXC) -I $(XEN_XENSTORE) CFLAGS += -Werror -fno-strict-aliasing CFLAGS += $(INCLUDES) @@ -22,9 +22,9 @@ CFLAGS += -Wp,-MD,.$(@F).d CFLAGS += -Wp,-MD,.$(@F).d PROG_DEP = .*.d -PROGRAMS = xc_restore xc_save readnotes +PROGRAMS = xc_restore xc_save readnotes -LDLIBS = -L$(XEN_LIBXC) -lxenguest -lxenctrl +LDLIBS = -L$(XEN_LIBXC) -L$(XEN_XENSTORE) -lxenguest -lxenctrl -lxenstore .PHONY: all all: build diff -r 422a61ebac54 -r 8e76e1b95b12 tools/xcutils/xc_save.c --- a/tools/xcutils/xc_save.c Fri Mar 16 10:42:25 2007 +0000 +++ b/tools/xcutils/xc_save.c Fri Mar 16 11:39:50 2007 +0000 @@ -12,7 +12,13 @@ #include <stdint.h> #include <string.h> #include <stdio.h> +#include <sys/ipc.h> +#include <sys/shm.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <xs.h> #include <xenctrl.h> #include <xenguest.h> @@ -29,6 +35,123 @@ static int suspend(int domid) return (fgets(ans, sizeof(ans), stdin) != NULL && !strncmp(ans, "done\n", 5)); +} + +/* For HVM guests, there are two sources of dirty pages: the Xen shadow + * log-dirty bitmap, which we get with a hypercall, and qemu's version. + * The protocol for getting page-dirtying data from qemu uses a + * double-buffered shared memory interface directly between xc_save and + * qemu-dm. + * + * xc_save calculates the size of the bitmaps and notifies qemu-dm + * through the store that it wants to share the bitmaps. qemu-dm then + * starts filling in the 'active' buffer. + * + * To change the buffers over, xc_save writes the other buffer number to + * the store and waits for qemu to acknowledge that it is now writing to + * the new active buffer. xc_save can then process and clear the old + * active buffer. */ + +static char *qemu_active_path; +static char *qemu_next_active_path; +static struct xs_handle *xs; + +/* Get qemu to change buffers. */ +static void qemu_flip_buffer(int domid, int next_active) +{ + char digit = '0' + next_active; + unsigned int len; + char *active_str, **watch; + struct timeval tv; + fd_set fdset; + + /* Tell qemu that we want it to start writing log-dirty bits to the + * other buffer */ + if (!xs_write(xs, XBT_NULL, qemu_next_active_path, &digit, 1)) { + errx(1, "can't write next-active to store path (%s)\n", + qemu_next_active_path); + exit(1); + } + + /* Wait a while for qemu to signal that it has switched to the new + * active buffer */ + read_again: + tv.tv_sec = 5; + tv.tv_usec = 0; + FD_ZERO(&fdset); + FD_SET(xs_fileno(xs), &fdset); + if ((select(xs_fileno(xs) + 1, &fdset, NULL, NULL, &tv)) != 1) { + errx(1, "timed out waiting for qemu to switch buffers\n"); + exit(1); + } + watch = xs_read_watch(xs, &len); + free(watch); + + active_str = xs_read(xs, XBT_NULL, qemu_active_path, &len); + if (active_str == NULL || active_str[0] - '0' != next_active) + /* Watch fired but value is not yet right */ + goto read_again; +} + +static void * init_qemu_maps(int domid, unsigned int bitmap_size) +{ + key_t key; + char key_ascii[17] = {0,}; + int shmid = -1; + void *seg; + char *path, *p; + + /* Make a shared-memory segment */ + while (shmid == -1) + { + key = rand(); /* No security, just a sequence of numbers */ + shmid = shmget(key, 2 * bitmap_size, + IPC_CREAT|IPC_EXCL|S_IRUSR|S_IWUSR); + if (shmid == -1 && errno != EEXIST) + errx(1, "can't get shmem to talk to qemu-dm"); + } + + /* Map it into our address space */ + seg = shmat(shmid, NULL, 0); + if (seg == (void *) -1) + errx(1, "can't map shmem to talk to qemu-dm"); + memset(seg, 0, 2 * bitmap_size); + + /* Write the size of it into the first 32 bits */ + *(uint32_t *)seg = bitmap_size; + + /* Tell qemu about it */ + if ((xs = xs_daemon_open()) == NULL) + errx(1, "Couldn't contact xenstore"); + if (!(path = xs_get_domain_path(xs, domid))) + errx(1, "can't get domain path in store"); + if (!(path = realloc(path, strlen(path) + + strlen("/logdirty/next-active") + 1))) + errx(1, "no memory for constructing xenstore path"); + strcat(path, "/logdirty/"); + p = path + strlen(path); + + strcpy(p, "key"); + snprintf(key_ascii, 17, "%16.16llx", (unsigned long long) key); + if (!xs_write(xs, XBT_NULL, path, key_ascii, 16)) + errx(1, "can't write key (%s) to store path (%s)\n", key_ascii, path); + + /* Watch for qemu's indication of the active buffer, and request it + * to start writing to buffer 0 */ + strcpy(p, "active"); + if (!xs_watch(xs, path, "qemu-active-buffer")) + errx(1, "can't set watch in store (%s)\n", path); + if (!(qemu_active_path = strdup(path))) + errx(1, "no memory for copying xenstore path"); + + strcpy(p, "next-active"); + if (!(qemu_next_active_path = strdup(path))) + errx(1, "no memory for copying xenstore path"); + + qemu_flip_buffer(domid, 0); + + free(path); + return seg; } @@ -52,9 +175,11 @@ main(int argc, char **argv) flags = atoi(argv[5]); if (flags & XCFLAGS_HVM) - ret = xc_hvm_save(xc_fd, io_fd, domid, maxit, max_f, flags, &suspend); + ret = xc_hvm_save(xc_fd, io_fd, domid, maxit, max_f, flags, + &suspend, &init_qemu_maps, &qemu_flip_buffer); else - ret = xc_linux_save(xc_fd, io_fd, domid, maxit, max_f, flags, &suspend); + ret = xc_linux_save(xc_fd, io_fd, domid, maxit, max_f, flags, + &suspend); xc_interface_close(xc_fd); _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |