[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [RFC Patch v2 10/16] colo: implement restore_callbacks finish_restore()



This patch implements restore callbacks for colo:
1. finish_store():
        We run xc_restore in XendCheckpoint.py. We communicate with
        XendCheckpoint.py like this:
        a. write "finish\n" to stdout when we are ready to resume the vm.
        b. XendCheckpoint.py writes "resume" when the vm is resumed
        c. write "resume" to master when postresume is done
        d. "continue" is read from master when a new checkpoint begins
        e. write "suspend" to master when the vm is suspended
        f. "start" is read from master when primary begins to transfer
           dirty pages.

        SVM is running in colo mode, so we should suspend it to sync the state
        and resume it. We need to fix p2m_frame_list_list before resuming the
        SVM. The content of p2m_frame_list_list should be cached after 
suspending
        SVM.

Signed-off-by: Ye Wei <wei.ye1987@xxxxxxxxx>
Signed-off-by: Jiang Yunhong <yunhong.jiang@xxxxxxxxx>
Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx>
---
 tools/libxc/Makefile                 |    6 +-
 tools/libxc/xc_domain_restore_colo.c |  335 ++++++++++++++++++++++++++++++++++
 tools/libxc/xc_save_restore_colo.h   |    1 +
 tools/libxl/Makefile                 |    2 +-
 tools/xcutils/Makefile               |    4 +-
 5 files changed, 342 insertions(+), 6 deletions(-)

diff --git a/tools/libxc/Makefile b/tools/libxc/Makefile
index 70994b9..92d11af 100644
--- a/tools/libxc/Makefile
+++ b/tools/libxc/Makefile
@@ -49,7 +49,7 @@ GUEST_SRCS-y += xc_nomigrate.c
 endif
 
 vpath %.c ../../xen/common/libelf
-CFLAGS += -I../../xen/common/libelf
+CFLAGS += -I../../xen/common/libelf -I../xenstore
 
 ELF_SRCS-y += libelf-tools.c libelf-loader.c
 ELF_SRCS-y += libelf-dominfo.c
@@ -199,8 +199,8 @@ xc_dom_bzimageloader.o: CFLAGS += $(call zlib-options,D)
 xc_dom_bzimageloader.opic: CFLAGS += $(call zlib-options,D)
 
 libxenguest.so.$(MAJOR).$(MINOR): COMPRESSION_LIBS = $(call zlib-options,l)
-libxenguest.so.$(MAJOR).$(MINOR): $(GUEST_PIC_OBJS) libxenctrl.so
-       $(CC) $(LDFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,libxenguest.so.$(MAJOR) 
$(SHLIB_LDFLAGS) -o $@ $(GUEST_PIC_OBJS) $(COMPRESSION_LIBS) -lz 
$(LDLIBS_libxenctrl) $(PTHREAD_LIBS) $(APPEND_LDFLAGS)
+libxenguest.so.$(MAJOR).$(MINOR): $(GUEST_PIC_OBJS) libxenctrl.so 
$(LDLIBS_libxenstore)
+       $(CC) $(LDFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,libxenguest.so.$(MAJOR) 
$(SHLIB_LDFLAGS) -o $@ $(GUEST_PIC_OBJS) $(COMPRESSION_LIBS) -lz 
$(LDLIBS_libxenctrl) $(PTHREAD_LIBS) $(LDLIBS_libxenstore) $(APPEND_LDFLAGS)
 
 xenctrl_osdep_ENOSYS.so: $(OSDEP_PIC_OBJS) libxenctrl.so
        $(CC) -g $(LDFLAGS) $(SHLIB_LDFLAGS) -o $@ $(OSDEP_PIC_OBJS) 
$(LDLIBS_libxenctrl) $(APPEND_LDFLAGS)
diff --git a/tools/libxc/xc_domain_restore_colo.c 
b/tools/libxc/xc_domain_restore_colo.c
index 70cdd16..6b87a2d 100644
--- a/tools/libxc/xc_domain_restore_colo.c
+++ b/tools/libxc/xc_domain_restore_colo.c
@@ -2,6 +2,7 @@
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <xc_bitops.h>
+#include <xenstore.h>
 
 struct restore_colo_data
 {
@@ -602,3 +603,337 @@ int colo_update_p2m_table(struct restore_data *comm_data, 
void *data)
 
     return 0;
 }
+
+static int update_pfn_type(xc_interface *xch, uint32_t dom, int count, 
xen_pfn_t *pfn_batch,
+   xen_pfn_t *pfn_type_batch, xen_pfn_t *pfn_type)
+{
+    unsigned long k;
+
+    if (xc_get_pfn_type_batch(xch, dom, count, pfn_type_batch))
+    {
+        ERROR("xc_get_pfn_type_batch for slaver failed");
+        return -1;
+    }
+
+    for (k = 0; k < count; k++)
+        pfn_type[pfn_batch[k]] = pfn_type_batch[k] & 
XEN_DOMCTL_PFINFO_LTAB_MASK;
+
+    return 0;
+}
+
+static int install_fw_network(struct restore_data *comm_data)
+{
+    pid_t pid;
+    xc_interface *xch = comm_data->xch;
+    int status;
+    int rc;
+
+    char vif[20];
+
+    snprintf(vif, sizeof(vif), "vif%u.0", comm_data->dom);
+
+    pid = vfork();
+    if (pid < 0) {
+        ERROR("vfork fails");
+        return -1;
+    }
+
+    if (pid > 0) {
+        rc = waitpid(pid, &status, 0);
+        if (rc != pid || !WIFEXITED(status) || WEXITSTATUS(status) != 0) {
+            ERROR("getting child status fails");
+            return -1;
+        }
+
+        return 0;
+    }
+
+    execl("/etc/xen/scripts/network-colo", "network-colo", "slaver", 
"install", vif, "eth0", NULL);
+    ERROR("execl fails");
+    return -1;
+}
+
+static int get_p2m_list(struct restore_data *comm_data,
+                        struct restore_colo_data *colo_data,
+                        xen_pfn_t *p2m_fll,
+                        xen_pfn_t **p2m_frame_list_list_p,
+                        char **p2m_frame_list_p,
+                        int prot)
+{
+    struct domain_info_context *dinfo = comm_data->dinfo;
+    xc_interface *xch = comm_data->xch;
+    uint32_t dom = comm_data->dom;
+    shared_info_t *shinfo = NULL;
+    xc_dominfo_t info;
+    xen_pfn_t *p2m_frame_list_list = NULL;
+    char *p2m_frame_list = NULL;
+    int rc = -1;
+
+    if ( xc_domain_getinfo(xch, dom, 1, &info) != 1 )
+    {
+        ERROR("Could not get domain info");
+        return -1;
+    }
+
+    /* Map the shared info frame */
+    shinfo = xc_map_foreign_range(xch, dom, PAGE_SIZE,
+                                  prot,
+                                  info.shared_info_frame);
+    if ( shinfo == NULL )
+    {
+        ERROR("Couldn't map shared info");
+        return -1;
+    }
+
+    if (p2m_fll == NULL)
+        shinfo->arch.pfn_to_mfn_frame_list_list = colo_data->p2m_fll;
+    else
+        *p2m_fll = shinfo->arch.pfn_to_mfn_frame_list_list;
+
+    p2m_frame_list_list =
+        xc_map_foreign_range(xch, dom, PAGE_SIZE, prot,
+                             shinfo->arch.pfn_to_mfn_frame_list_list);
+    if ( p2m_frame_list_list == NULL )
+    {
+        ERROR("Couldn't map p2m_frame_list_list");
+        goto error;
+    }
+
+    p2m_frame_list = xc_map_foreign_pages(xch, dom, prot,
+                                          p2m_frame_list_list,
+                                          P2M_FLL_ENTRIES);
+    if ( p2m_frame_list == NULL )
+    {
+        ERROR("Couldn't map p2m_frame_list");
+        goto error;
+    }
+
+    *p2m_frame_list_list_p = p2m_frame_list_list;
+    *p2m_frame_list_p = p2m_frame_list;
+    rc = 0;
+
+error:
+    munmap(shinfo, PAGE_SIZE);
+    if (rc && p2m_frame_list_list)
+        munmap(p2m_frame_list_list, PAGE_SIZE);
+
+    return rc;
+}
+
+static int update_p2m_list(struct restore_data *comm_data,
+                           struct restore_colo_data *colo_data)
+{
+    struct domain_info_context *dinfo = comm_data->dinfo;
+    xen_pfn_t *p2m_frame_list_list = NULL;
+    char *p2m_frame_list = NULL;
+    int rc;
+
+    rc = get_p2m_list(comm_data, colo_data, NULL, &p2m_frame_list_list,
+                      &p2m_frame_list, PROT_READ | PROT_WRITE);
+    if (rc)
+        return rc;
+
+    memcpy(p2m_frame_list_list, colo_data->p2m_frame_list_list, PAGE_SIZE);
+    memcpy(p2m_frame_list, colo_data->p2m_frame_list, PAGE_SIZE * 
P2M_FLL_ENTRIES);
+
+    munmap(p2m_frame_list_list, PAGE_SIZE);
+    munmap(p2m_frame_list, PAGE_SIZE * P2M_FLL_ENTRIES);
+
+    return 0;
+}
+
+static int cache_p2m_list(struct restore_data *comm_data,
+                          struct restore_colo_data *colo_data)
+{
+    struct domain_info_context *dinfo = comm_data->dinfo;
+    xen_pfn_t *p2m_frame_list_list = NULL;
+    char *p2m_frame_list = NULL;
+    int rc;
+
+    rc = get_p2m_list(comm_data, colo_data, &colo_data->p2m_fll,
+                      &p2m_frame_list_list, &p2m_frame_list, PROT_READ);
+    if (rc)
+        return rc;
+
+    memcpy(colo_data->p2m_frame_list_list, p2m_frame_list_list, PAGE_SIZE);
+    memcpy(colo_data->p2m_frame_list, p2m_frame_list, PAGE_SIZE * 
P2M_FLL_ENTRIES);
+
+    munmap(p2m_frame_list_list, PAGE_SIZE);
+    munmap(p2m_frame_list, PAGE_SIZE * P2M_FLL_ENTRIES);
+
+    return 0;
+}
+
+/* we are ready to start the guest when this functions is called. We
+ * will return until we need to do a new checkpoint or some error occurs.
+ *
+ * communication with python and master
+ * python code          restore code        master      comment
+ *                                    <===  "continue"  a new checkpoint begins
+ *                      "suspend"     ===>              SVM is suspended
+ *                                          "start"     getting dirty pages 
begins
+ *               <===   "finish\n"                      SVM is ready
+ * "resume"      ===>                                   SVM is resumed
+ *                      "resume"      ===>              postresume is done
+ *
+ * return value:
+ *   -1: error
+ *    0: continue to start vm
+ *    1: continue to do a checkpoint
+ */
+int colo_finish_restore(struct restore_data *comm_data, void *data)
+{
+    struct restore_colo_data *colo_data = data;
+    xc_interface *xch = comm_data->xch;
+    uint32_t dom = comm_data->dom;
+    struct domain_info_context *dinfo = comm_data->dinfo;
+    xc_evtchn *xce = colo_data->xce;
+    unsigned long *pfn_batch_slaver = colo_data->pfn_batch_slaver;
+    unsigned long *pfn_type_batch_slaver = colo_data->pfn_type_batch_slaver;
+    unsigned long *pfn_type_slaver = colo_data->pfn_type_slaver;
+
+    unsigned long i, j;
+    int rc;
+    char str[10];
+    int remote_port;
+    int local_port = colo_data->local_port;
+
+    /* fix pfn_to_mfn_frame_list_list */
+    if (!colo_data->first_time)
+    {
+        if (update_p2m_list(comm_data, colo_data) < 0)
+            return -1;
+    }
+
+    /* output the store-mfn & console-mfn */
+    printf("store-mfn %li\n", comm_data->store_mfn);
+    printf("console-mfn %li\n", comm_data->console_mfn);
+
+    /* notify python code checkpoint finish */
+    printf("finish\n");
+    fflush(stdout);
+
+    /* we need to know which pages are dirty to restore the guest */
+    if (xc_shadow_control(xch, dom, XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
+                          NULL, 0, NULL, 0, NULL) < 0 )
+    {
+        ERROR("enabling logdirty fails");
+        return -1;
+    }
+
+    /* wait domain resume, then connect the suspend evtchn */
+    read_exact(0, str, 6);
+    str[6] = '\0';
+    if (strcmp(str, "resume"))
+    {
+        ERROR("read %s, expect resume", str);
+        return -1;
+    }
+
+    if (colo_data->first_time) {
+        if (install_fw_network(comm_data) < 0)
+            return -1;
+    }
+
+    /* notify master vm is resumed */
+    write_exact(comm_data->io_fd, "resume", 6);
+
+    if (colo_data->first_time) {
+        sleep(10);
+        remote_port = xs_suspend_evtchn_port(dom);
+        if (remote_port < 0) {
+            ERROR("getting remote suspend port fails");
+            return -1;
+        }
+
+        local_port = xc_suspend_evtchn_init(xch, xce, dom, remote_port);
+        if (local_port < 0) {
+            ERROR("initializing suspend evtchn fails");
+            return -1;
+        }
+
+        colo_data->local_port = local_port;
+    }
+
+    /* wait for the next checkpoint */
+    read_exact(comm_data->io_fd, str, 8);
+    str[8] = '\0';
+    if (strcmp(str, "continue"))
+    {
+        ERROR("wait for a new checkpoint fails");
+        /* start the guest now? */
+        return 0;
+    }
+
+    /* notify the suspend evtchn */
+    rc = xc_evtchn_notify(xce, local_port);
+    if (rc < 0)
+    {
+        ERROR("notifying the suspend evtchn fails");
+        return -1;
+    }
+
+    rc = xc_await_suspend(xch, xce, local_port);
+    if (rc < 0)
+    {
+        ERROR("waiting suspend fails");
+        return -1;
+    }
+
+    /* notify master suspend is done */
+    write_exact(comm_data->io_fd, "suspend", 7);
+    read_exact(comm_data->io_fd, str, 5);
+    str[5] = '\0';
+    if (strcmp(str, "start"))
+        return -1;
+
+    if (xc_shadow_control(xch, dom, XEN_DOMCTL_SHADOW_OP_CLEAN,
+                          HYPERCALL_BUFFER(dirty_pages), dinfo->p2m_size,
+                          NULL, 0, NULL) != dinfo->p2m_size)
+    {
+        ERROR("getting slaver dirty fails");
+        return -1;
+    }
+
+    if (xc_shadow_control(xch, dom, XEN_DOMCTL_SHADOW_OP_OFF, NULL, 0, NULL,
+                          0, NULL) < 0 )
+    {
+        ERROR("disabling dirty-log fails");
+        return -1;
+    }
+
+    j = 0;
+    for (i = 0; i < colo_data->max_mem_pfn; i++)
+    {
+        if ( !test_bit(i, colo_data->dirty_pages) )
+            continue;
+
+        pfn_batch_slaver[j] = i;
+        pfn_type_batch_slaver[j++] = comm_data->p2m[i];
+        if (j == MAX_BATCH_SIZE)
+        {
+            if (update_pfn_type(xch, dom, j, pfn_batch_slaver,
+                                pfn_type_batch_slaver, pfn_type_slaver))
+            {
+                return -1;
+            }
+            j = 0;
+        }
+    }
+
+    if (j)
+    {
+        if (update_pfn_type(xch, dom, j, pfn_batch_slaver,
+                            pfn_type_batch_slaver, pfn_type_slaver))
+        {
+            return -1;
+        }
+    }
+
+    if (cache_p2m_list(comm_data, colo_data) < 0)
+        return -1;
+
+    colo_data->first_time = 0;
+
+    return 1;
+}
diff --git a/tools/libxc/xc_save_restore_colo.h 
b/tools/libxc/xc_save_restore_colo.h
index 98e5128..57df750 100644
--- a/tools/libxc/xc_save_restore_colo.h
+++ b/tools/libxc/xc_save_restore_colo.h
@@ -9,5 +9,6 @@ extern void colo_free(struct restore_data *, void *);
 extern char *colo_get_page(struct restore_data *, void *, unsigned long);
 extern int colo_flush_memory(struct restore_data *, void *);
 extern int colo_update_p2m_table(struct restore_data *, void *);
+extern int colo_finish_restore(struct restore_data *, void *);
 
 #endif
diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index cf214bb..36b924d 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -192,7 +192,7 @@ xl: $(XL_OBJS) libxlutil.so libxenlight.so
        $(CC) $(LDFLAGS) -o $@ $(XL_OBJS) libxlutil.so $(LDLIBS_libxenlight) 
$(LDLIBS_libxenctrl) -lyajl $(APPEND_LDFLAGS)
 
 libxl-save-helper: $(SAVE_HELPER_OBJS) libxenlight.so
-       $(CC) $(LDFLAGS) -o $@ $(SAVE_HELPER_OBJS) $(LDLIBS_libxenctrl) 
$(LDLIBS_libxenguest) $(APPEND_LDFLAGS)
+       $(CC) $(LDFLAGS) -o $@ $(SAVE_HELPER_OBJS) $(LDLIBS_libxenctrl) 
$(LDLIBS_libxenguest) $(LDLIBS_libxenstore) $(APPEND_LDFLAGS)
 
 testidl: testidl.o libxlutil.so libxenlight.so
        $(CC) $(LDFLAGS) -o $@ testidl.o libxlutil.so $(LDLIBS_libxenlight) 
$(LDLIBS_libxenctrl) $(APPEND_LDFLAGS)
diff --git a/tools/xcutils/Makefile b/tools/xcutils/Makefile
index 6c502f1..51f3f0e 100644
--- a/tools/xcutils/Makefile
+++ b/tools/xcutils/Makefile
@@ -27,13 +27,13 @@ all: build
 build: $(PROGRAMS)
 
 xc_restore: xc_restore.o
-       $(CC) $(LDFLAGS) $^ -o $@ $(LDLIBS_libxenctrl) $(LDLIBS_libxenguest) 
$(APPEND_LDFLAGS)
+       $(CC) $(LDFLAGS) $^ -o $@ $(LDLIBS_libxenctrl) $(LDLIBS_libxenguest) 
$(LDLIBS_libxenstore) $(APPEND_LDFLAGS)
 
 xc_save: xc_save.o
        $(CC) $(LDFLAGS) $^ -o $@ $(LDLIBS_libxenctrl) $(LDLIBS_libxenguest) 
$(LDLIBS_libxenstore) $(APPEND_LDFLAGS)
 
 readnotes: readnotes.o
-       $(CC) $(LDFLAGS) $^ -o $@ $(LDLIBS_libxenctrl) $(LDLIBS_libxenguest) 
$(APPEND_LDFLAGS)
+       $(CC) $(LDFLAGS) $^ -o $@ $(LDLIBS_libxenctrl) $(LDLIBS_libxenguest) 
$(LDLIBS_libxenstore) $(APPEND_LDFLAGS)
 
 lsevtchn: lsevtchn.o
        $(CC) $(LDFLAGS) $^ -o $@ $(LDLIBS_libxenctrl) $(APPEND_LDFLAGS)
-- 
1.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.