[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC Patch v2 10/16] colo: implement restore_callbacks finish_restore()
This patch implements restore callbacks for colo: 1. finish_store(): We run xc_restore in XendCheckpoint.py. We communicate with XendCheckpoint.py like this: a. write "finish\n" to stdout when we are ready to resume the vm. b. XendCheckpoint.py writes "resume" when the vm is resumed c. write "resume" to master when postresume is done d. "continue" is read from master when a new checkpoint begins e. write "suspend" to master when the vm is suspended f. "start" is read from master when primary begins to transfer dirty pages. SVM is running in colo mode, so we should suspend it to sync the state and resume it. We need to fix p2m_frame_list_list before resuming the SVM. The content of p2m_frame_list_list should be cached after suspending SVM. Signed-off-by: Ye Wei <wei.ye1987@xxxxxxxxx> Signed-off-by: Jiang Yunhong <yunhong.jiang@xxxxxxxxx> Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx> --- tools/libxc/Makefile | 6 +- tools/libxc/xc_domain_restore_colo.c | 335 ++++++++++++++++++++++++++++++++++ tools/libxc/xc_save_restore_colo.h | 1 + tools/libxl/Makefile | 2 +- tools/xcutils/Makefile | 4 +- 5 files changed, 342 insertions(+), 6 deletions(-) diff --git a/tools/libxc/Makefile b/tools/libxc/Makefile index 70994b9..92d11af 100644 --- a/tools/libxc/Makefile +++ b/tools/libxc/Makefile @@ -49,7 +49,7 @@ GUEST_SRCS-y += xc_nomigrate.c endif vpath %.c ../../xen/common/libelf -CFLAGS += -I../../xen/common/libelf +CFLAGS += -I../../xen/common/libelf -I../xenstore ELF_SRCS-y += libelf-tools.c libelf-loader.c ELF_SRCS-y += libelf-dominfo.c @@ -199,8 +199,8 @@ xc_dom_bzimageloader.o: CFLAGS += $(call zlib-options,D) xc_dom_bzimageloader.opic: CFLAGS += $(call zlib-options,D) libxenguest.so.$(MAJOR).$(MINOR): COMPRESSION_LIBS = $(call zlib-options,l) -libxenguest.so.$(MAJOR).$(MINOR): $(GUEST_PIC_OBJS) libxenctrl.so - $(CC) $(LDFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,libxenguest.so.$(MAJOR) $(SHLIB_LDFLAGS) -o $@ $(GUEST_PIC_OBJS) $(COMPRESSION_LIBS) -lz $(LDLIBS_libxenctrl) $(PTHREAD_LIBS) $(APPEND_LDFLAGS) +libxenguest.so.$(MAJOR).$(MINOR): $(GUEST_PIC_OBJS) libxenctrl.so $(LDLIBS_libxenstore) + $(CC) $(LDFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,libxenguest.so.$(MAJOR) $(SHLIB_LDFLAGS) -o $@ $(GUEST_PIC_OBJS) $(COMPRESSION_LIBS) -lz $(LDLIBS_libxenctrl) $(PTHREAD_LIBS) $(LDLIBS_libxenstore) $(APPEND_LDFLAGS) xenctrl_osdep_ENOSYS.so: $(OSDEP_PIC_OBJS) libxenctrl.so $(CC) -g $(LDFLAGS) $(SHLIB_LDFLAGS) -o $@ $(OSDEP_PIC_OBJS) $(LDLIBS_libxenctrl) $(APPEND_LDFLAGS) diff --git a/tools/libxc/xc_domain_restore_colo.c b/tools/libxc/xc_domain_restore_colo.c index 70cdd16..6b87a2d 100644 --- a/tools/libxc/xc_domain_restore_colo.c +++ b/tools/libxc/xc_domain_restore_colo.c @@ -2,6 +2,7 @@ #include <sys/types.h> #include <sys/wait.h> #include <xc_bitops.h> +#include <xenstore.h> struct restore_colo_data { @@ -602,3 +603,337 @@ int colo_update_p2m_table(struct restore_data *comm_data, void *data) return 0; } + +static int update_pfn_type(xc_interface *xch, uint32_t dom, int count, xen_pfn_t *pfn_batch, + xen_pfn_t *pfn_type_batch, xen_pfn_t *pfn_type) +{ + unsigned long k; + + if (xc_get_pfn_type_batch(xch, dom, count, pfn_type_batch)) + { + ERROR("xc_get_pfn_type_batch for slaver failed"); + return -1; + } + + for (k = 0; k < count; k++) + pfn_type[pfn_batch[k]] = pfn_type_batch[k] & XEN_DOMCTL_PFINFO_LTAB_MASK; + + return 0; +} + +static int install_fw_network(struct restore_data *comm_data) +{ + pid_t pid; + xc_interface *xch = comm_data->xch; + int status; + int rc; + + char vif[20]; + + snprintf(vif, sizeof(vif), "vif%u.0", comm_data->dom); + + pid = vfork(); + if (pid < 0) { + ERROR("vfork fails"); + return -1; + } + + if (pid > 0) { + rc = waitpid(pid, &status, 0); + if (rc != pid || !WIFEXITED(status) || WEXITSTATUS(status) != 0) { + ERROR("getting child status fails"); + return -1; + } + + return 0; + } + + execl("/etc/xen/scripts/network-colo", "network-colo", "slaver", "install", vif, "eth0", NULL); + ERROR("execl fails"); + return -1; +} + +static int get_p2m_list(struct restore_data *comm_data, + struct restore_colo_data *colo_data, + xen_pfn_t *p2m_fll, + xen_pfn_t **p2m_frame_list_list_p, + char **p2m_frame_list_p, + int prot) +{ + struct domain_info_context *dinfo = comm_data->dinfo; + xc_interface *xch = comm_data->xch; + uint32_t dom = comm_data->dom; + shared_info_t *shinfo = NULL; + xc_dominfo_t info; + xen_pfn_t *p2m_frame_list_list = NULL; + char *p2m_frame_list = NULL; + int rc = -1; + + if ( xc_domain_getinfo(xch, dom, 1, &info) != 1 ) + { + ERROR("Could not get domain info"); + return -1; + } + + /* Map the shared info frame */ + shinfo = xc_map_foreign_range(xch, dom, PAGE_SIZE, + prot, + info.shared_info_frame); + if ( shinfo == NULL ) + { + ERROR("Couldn't map shared info"); + return -1; + } + + if (p2m_fll == NULL) + shinfo->arch.pfn_to_mfn_frame_list_list = colo_data->p2m_fll; + else + *p2m_fll = shinfo->arch.pfn_to_mfn_frame_list_list; + + p2m_frame_list_list = + xc_map_foreign_range(xch, dom, PAGE_SIZE, prot, + shinfo->arch.pfn_to_mfn_frame_list_list); + if ( p2m_frame_list_list == NULL ) + { + ERROR("Couldn't map p2m_frame_list_list"); + goto error; + } + + p2m_frame_list = xc_map_foreign_pages(xch, dom, prot, + p2m_frame_list_list, + P2M_FLL_ENTRIES); + if ( p2m_frame_list == NULL ) + { + ERROR("Couldn't map p2m_frame_list"); + goto error; + } + + *p2m_frame_list_list_p = p2m_frame_list_list; + *p2m_frame_list_p = p2m_frame_list; + rc = 0; + +error: + munmap(shinfo, PAGE_SIZE); + if (rc && p2m_frame_list_list) + munmap(p2m_frame_list_list, PAGE_SIZE); + + return rc; +} + +static int update_p2m_list(struct restore_data *comm_data, + struct restore_colo_data *colo_data) +{ + struct domain_info_context *dinfo = comm_data->dinfo; + xen_pfn_t *p2m_frame_list_list = NULL; + char *p2m_frame_list = NULL; + int rc; + + rc = get_p2m_list(comm_data, colo_data, NULL, &p2m_frame_list_list, + &p2m_frame_list, PROT_READ | PROT_WRITE); + if (rc) + return rc; + + memcpy(p2m_frame_list_list, colo_data->p2m_frame_list_list, PAGE_SIZE); + memcpy(p2m_frame_list, colo_data->p2m_frame_list, PAGE_SIZE * P2M_FLL_ENTRIES); + + munmap(p2m_frame_list_list, PAGE_SIZE); + munmap(p2m_frame_list, PAGE_SIZE * P2M_FLL_ENTRIES); + + return 0; +} + +static int cache_p2m_list(struct restore_data *comm_data, + struct restore_colo_data *colo_data) +{ + struct domain_info_context *dinfo = comm_data->dinfo; + xen_pfn_t *p2m_frame_list_list = NULL; + char *p2m_frame_list = NULL; + int rc; + + rc = get_p2m_list(comm_data, colo_data, &colo_data->p2m_fll, + &p2m_frame_list_list, &p2m_frame_list, PROT_READ); + if (rc) + return rc; + + memcpy(colo_data->p2m_frame_list_list, p2m_frame_list_list, PAGE_SIZE); + memcpy(colo_data->p2m_frame_list, p2m_frame_list, PAGE_SIZE * P2M_FLL_ENTRIES); + + munmap(p2m_frame_list_list, PAGE_SIZE); + munmap(p2m_frame_list, PAGE_SIZE * P2M_FLL_ENTRIES); + + return 0; +} + +/* we are ready to start the guest when this functions is called. We + * will return until we need to do a new checkpoint or some error occurs. + * + * communication with python and master + * python code restore code master comment + * <=== "continue" a new checkpoint begins + * "suspend" ===> SVM is suspended + * "start" getting dirty pages begins + * <=== "finish\n" SVM is ready + * "resume" ===> SVM is resumed + * "resume" ===> postresume is done + * + * return value: + * -1: error + * 0: continue to start vm + * 1: continue to do a checkpoint + */ +int colo_finish_restore(struct restore_data *comm_data, void *data) +{ + struct restore_colo_data *colo_data = data; + xc_interface *xch = comm_data->xch; + uint32_t dom = comm_data->dom; + struct domain_info_context *dinfo = comm_data->dinfo; + xc_evtchn *xce = colo_data->xce; + unsigned long *pfn_batch_slaver = colo_data->pfn_batch_slaver; + unsigned long *pfn_type_batch_slaver = colo_data->pfn_type_batch_slaver; + unsigned long *pfn_type_slaver = colo_data->pfn_type_slaver; + + unsigned long i, j; + int rc; + char str[10]; + int remote_port; + int local_port = colo_data->local_port; + + /* fix pfn_to_mfn_frame_list_list */ + if (!colo_data->first_time) + { + if (update_p2m_list(comm_data, colo_data) < 0) + return -1; + } + + /* output the store-mfn & console-mfn */ + printf("store-mfn %li\n", comm_data->store_mfn); + printf("console-mfn %li\n", comm_data->console_mfn); + + /* notify python code checkpoint finish */ + printf("finish\n"); + fflush(stdout); + + /* we need to know which pages are dirty to restore the guest */ + if (xc_shadow_control(xch, dom, XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY, + NULL, 0, NULL, 0, NULL) < 0 ) + { + ERROR("enabling logdirty fails"); + return -1; + } + + /* wait domain resume, then connect the suspend evtchn */ + read_exact(0, str, 6); + str[6] = '\0'; + if (strcmp(str, "resume")) + { + ERROR("read %s, expect resume", str); + return -1; + } + + if (colo_data->first_time) { + if (install_fw_network(comm_data) < 0) + return -1; + } + + /* notify master vm is resumed */ + write_exact(comm_data->io_fd, "resume", 6); + + if (colo_data->first_time) { + sleep(10); + remote_port = xs_suspend_evtchn_port(dom); + if (remote_port < 0) { + ERROR("getting remote suspend port fails"); + return -1; + } + + local_port = xc_suspend_evtchn_init(xch, xce, dom, remote_port); + if (local_port < 0) { + ERROR("initializing suspend evtchn fails"); + return -1; + } + + colo_data->local_port = local_port; + } + + /* wait for the next checkpoint */ + read_exact(comm_data->io_fd, str, 8); + str[8] = '\0'; + if (strcmp(str, "continue")) + { + ERROR("wait for a new checkpoint fails"); + /* start the guest now? */ + return 0; + } + + /* notify the suspend evtchn */ + rc = xc_evtchn_notify(xce, local_port); + if (rc < 0) + { + ERROR("notifying the suspend evtchn fails"); + return -1; + } + + rc = xc_await_suspend(xch, xce, local_port); + if (rc < 0) + { + ERROR("waiting suspend fails"); + return -1; + } + + /* notify master suspend is done */ + write_exact(comm_data->io_fd, "suspend", 7); + read_exact(comm_data->io_fd, str, 5); + str[5] = '\0'; + if (strcmp(str, "start")) + return -1; + + if (xc_shadow_control(xch, dom, XEN_DOMCTL_SHADOW_OP_CLEAN, + HYPERCALL_BUFFER(dirty_pages), dinfo->p2m_size, + NULL, 0, NULL) != dinfo->p2m_size) + { + ERROR("getting slaver dirty fails"); + return -1; + } + + if (xc_shadow_control(xch, dom, XEN_DOMCTL_SHADOW_OP_OFF, NULL, 0, NULL, + 0, NULL) < 0 ) + { + ERROR("disabling dirty-log fails"); + return -1; + } + + j = 0; + for (i = 0; i < colo_data->max_mem_pfn; i++) + { + if ( !test_bit(i, colo_data->dirty_pages) ) + continue; + + pfn_batch_slaver[j] = i; + pfn_type_batch_slaver[j++] = comm_data->p2m[i]; + if (j == MAX_BATCH_SIZE) + { + if (update_pfn_type(xch, dom, j, pfn_batch_slaver, + pfn_type_batch_slaver, pfn_type_slaver)) + { + return -1; + } + j = 0; + } + } + + if (j) + { + if (update_pfn_type(xch, dom, j, pfn_batch_slaver, + pfn_type_batch_slaver, pfn_type_slaver)) + { + return -1; + } + } + + if (cache_p2m_list(comm_data, colo_data) < 0) + return -1; + + colo_data->first_time = 0; + + return 1; +} diff --git a/tools/libxc/xc_save_restore_colo.h b/tools/libxc/xc_save_restore_colo.h index 98e5128..57df750 100644 --- a/tools/libxc/xc_save_restore_colo.h +++ b/tools/libxc/xc_save_restore_colo.h @@ -9,5 +9,6 @@ extern void colo_free(struct restore_data *, void *); extern char *colo_get_page(struct restore_data *, void *, unsigned long); extern int colo_flush_memory(struct restore_data *, void *); extern int colo_update_p2m_table(struct restore_data *, void *); +extern int colo_finish_restore(struct restore_data *, void *); #endif diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile index cf214bb..36b924d 100644 --- a/tools/libxl/Makefile +++ b/tools/libxl/Makefile @@ -192,7 +192,7 @@ xl: $(XL_OBJS) libxlutil.so libxenlight.so $(CC) $(LDFLAGS) -o $@ $(XL_OBJS) libxlutil.so $(LDLIBS_libxenlight) $(LDLIBS_libxenctrl) -lyajl $(APPEND_LDFLAGS) libxl-save-helper: $(SAVE_HELPER_OBJS) libxenlight.so - $(CC) $(LDFLAGS) -o $@ $(SAVE_HELPER_OBJS) $(LDLIBS_libxenctrl) $(LDLIBS_libxenguest) $(APPEND_LDFLAGS) + $(CC) $(LDFLAGS) -o $@ $(SAVE_HELPER_OBJS) $(LDLIBS_libxenctrl) $(LDLIBS_libxenguest) $(LDLIBS_libxenstore) $(APPEND_LDFLAGS) testidl: testidl.o libxlutil.so libxenlight.so $(CC) $(LDFLAGS) -o $@ testidl.o libxlutil.so $(LDLIBS_libxenlight) $(LDLIBS_libxenctrl) $(APPEND_LDFLAGS) diff --git a/tools/xcutils/Makefile b/tools/xcutils/Makefile index 6c502f1..51f3f0e 100644 --- a/tools/xcutils/Makefile +++ b/tools/xcutils/Makefile @@ -27,13 +27,13 @@ all: build build: $(PROGRAMS) xc_restore: xc_restore.o - $(CC) $(LDFLAGS) $^ -o $@ $(LDLIBS_libxenctrl) $(LDLIBS_libxenguest) $(APPEND_LDFLAGS) + $(CC) $(LDFLAGS) $^ -o $@ $(LDLIBS_libxenctrl) $(LDLIBS_libxenguest) $(LDLIBS_libxenstore) $(APPEND_LDFLAGS) xc_save: xc_save.o $(CC) $(LDFLAGS) $^ -o $@ $(LDLIBS_libxenctrl) $(LDLIBS_libxenguest) $(LDLIBS_libxenstore) $(APPEND_LDFLAGS) readnotes: readnotes.o - $(CC) $(LDFLAGS) $^ -o $@ $(LDLIBS_libxenctrl) $(LDLIBS_libxenguest) $(APPEND_LDFLAGS) + $(CC) $(LDFLAGS) $^ -o $@ $(LDLIBS_libxenctrl) $(LDLIBS_libxenguest) $(LDLIBS_libxenstore) $(APPEND_LDFLAGS) lsevtchn: lsevtchn.o $(CC) $(LDFLAGS) $^ -o $@ $(LDLIBS_libxenctrl) $(APPEND_LDFLAGS) -- 1.7.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |