[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH] VM generation ID save/restore and migrate



On Fri, Dec 2, 2011 at 6:42 AM, Paul Durrant <paul.durrant@xxxxxxxxxx> wrote:
# HG changeset patch
# User Paul Durrant <paul.durrant@xxxxxxxxxx>
# Date 1322836937 0
# Node ID de5432066adc888a704bbbce9b18de3a60859cff
# Parent  62ff6a318c5db7779fdbf4ddfdfc9506e86fa701
VM generation ID save/restore and migrate.

Add code to track the address of the VM generation id buffer across a
save/restore or migrate and increment it as necessary.
The address of the buffer is written into xenstore by hvmloader at
boot time. It must be read from xenstore by the caller of
xc_domain_save() and then written back again by the caller of
xc_domain_restore().

Signed-off-by: Paul Durrant <paul.durrant@xxxxxxxxxx>

diff -r 62ff6a318c5d -r de5432066adc tools/libxc/ia64/xc_ia64_linux_restore.c
--- a/tools/libxc/ia64/xc_ia64_linux_restore.c  Wed Nov 30 16:59:58 2011 -0800
+++ b/tools/libxc/ia64/xc_ia64_linux_restore.c  Fri Dec 02 14:42:17 2011 +0000
@@ -548,7 +548,8 @@ int
 xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
                  unsigned int store_evtchn, unsigned long *store_mfn,
                  unsigned int console_evtchn, unsigned long *console_mfn,
-                  unsigned int hvm, unsigned int pae, int superpages)
+                  unsigned int hvm, unsigned int pae, int superpages,
+                  int increment_gid, unsigned long *vm_gid_addr)
 {
    DECLARE_DOMCTL;
    int rc = 1;
diff -r 62ff6a318c5d -r de5432066adc tools/libxc/ia64/xc_ia64_linux_save.c
--- a/tools/libxc/ia64/xc_ia64_linux_save.c     Wed Nov 30 16:59:58 2011 -0800
+++ b/tools/libxc/ia64/xc_ia64_linux_save.c     Fri Dec 02 14:42:17 2011 +0000
@@ -382,7 +382,8 @@ out:
 int
 xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iters,
               uint32_t max_factor, uint32_t flags,
-               struct save_callbacks* callbacks, int hvm)
+               struct save_callbacks* callbacks, int hvm,
+               unsigned long vm_gid_addr)
 {
    DECLARE_DOMCTL;
    xc_dominfo_t info;
diff -r 62ff6a318c5d -r de5432066adc tools/libxc/xc_domain_restore.c
--- a/tools/libxc/xc_domain_restore.c   Wed Nov 30 16:59:58 2011 -0800
+++ b/tools/libxc/xc_domain_restore.c   Fri Dec 02 14:42:17 2011 +0000
@@ -676,6 +676,7 @@ typedef struct {
    uint64_t console_pfn;
    uint64_t acpi_ioport_location;
    uint64_t viridian;
+    uint64_t vm_gid_addr;
 } pagebuf_t;

 static int pagebuf_init(pagebuf_t* buf)
@@ -820,6 +821,17 @@ static int pagebuf_get_one(xc_interface
        }
        return pagebuf_get_one(xch, ctx, buf, fd, dom);

+    case XC_SAVE_ID_HVM_GENERATION_ID_ADDR:
+        /* Skip padding 4 bytes then read the generation id buffer location. */
+        if ( RDEXACT(fd, &buf->vm_gid_addr, sizeof(uint32_t)) ||
+             RDEXACT(fd, &buf->vm_gid_addr, sizeof(uint64_t)) )
+        {
+            PERROR("error read the generation id buffer location");
+            return -1;
+        }
+        DPRINTF("read generation id buffer address");
+        return pagebuf_get_one(xch, ctx, buf, fd, dom);
+
    default:
        if ( (count > MAX_BATCH_SIZE) || (count < 0) ) {
            ERROR("Max batch size exceeded (%d). Giving up.", count);
@@ -1186,7 +1198,8 @@ static int apply_batch(xc_interface *xch
 int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
                      unsigned int store_evtchn, unsigned long *store_mfn,
                      unsigned int console_evtchn, unsigned long *console_mfn,
-                      unsigned int hvm, unsigned int pae, int superpages)
+                      unsigned int hvm, unsigned int pae, int superpages,
+                      int no_increment_gid, unsigned long *vm_gid_addr)
 {
    DECLARE_DOMCTL;
    int rc = 1, frc, i, j, n, m, pae_extended_cr3 = 0, ext_vcpucontext = 0;
@@ -1386,6 +1399,39 @@ int xc_domain_restore(xc_interface *xch,
                xc_set_hvm_param(xch, dom, HVM_PARAM_VM86_TSS, pagebuf.vm86_tss);
            if ( pagebuf.console_pfn )
                console_pfn = pagebuf.console_pfn;
+            if ( pagebuf.vm_gid_addr ) {
+                if ( !no_increment_gid ) {
+                    unsigned int offset;
+                    unsigned char *buf;
+                    unsigned long long gid;
+
+                    /*
+                     * Map the VM generation id buffer and inject the new value.
+                     */
+
+                    pfn = pagebuf.vm_gid_addr >> PAGE_SHIFT;
+                    offset = pagebuf.vm_gid_addr & (PAGE_SIZE - 1);
+
+                    if ( (pfn >= dinfo->p2m_size) ||
+                         (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB) )
+                    {
+                        ERROR("generation id buffer frame is bad");
+                        goto out;
+                    }
+
+                    mfn = ctx->p2m[pfn];
+                    buf = xc_map_foreign_range(xch, dom, PAGE_SIZE,
+                                               PROT_READ | PROT_WRITE, mfn);
+
+                    gid = *(unsigned long long *)(buf + offset);
+                    *(unsigned long long *)(buf + offset) = gid + 1;
+
+                    munmap(buf, PAGE_SIZE);
+                }
+
+                *vm_gid_addr = pagebuf.vm_gid_addr;
+            }
+
            break;  /* our work here is done */
        }

diff -r 62ff6a318c5d -r de5432066adc tools/libxc/xc_domain_save.c
--- a/tools/libxc/xc_domain_save.c      Wed Nov 30 16:59:58 2011 -0800
+++ b/tools/libxc/xc_domain_save.c      Fri Dec 02 14:42:17 2011 +0000
@@ -754,7 +754,8 @@ static int save_tsc_info(xc_interface *x

 int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iters,
                   uint32_t max_factor, uint32_t flags,
-                   struct save_callbacks* callbacks, int hvm)
+                   struct save_callbacks* callbacks, int hvm,
+                   unsigned long vm_gid_addr)
 {
    xc_dominfo_t info;
    DECLARE_DOMCTL;
@@ -1460,6 +1461,16 @@ int xc_domain_save(xc_interface *xch, in
            uint64_t data;
        } chunk = { 0, };

+        chunk.id = XC_SAVE_ID_HVM_GENERATION_ID_ADDR;
+        chunk.data = ""> +
+        if ( (chunk.data != 0) &&
+             wrexact(io_fd, &chunk, sizeof(chunk)) )
+        {
+            PERROR("Error when writing the generation id buffer location for guest");
+            goto out;
+        }
+
        chunk.id = XC_SAVE_ID_HVM_IDENT_PT;
        chunk.data = "">         xc_get_hvm_param(xch, dom, HVM_PARAM_IDENT_PT,
diff -r 62ff6a318c5d -r de5432066adc tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h    Wed Nov 30 16:59:58 2011 -0800
+++ b/tools/libxc/xenguest.h    Fri Dec 02 14:42:17 2011 +0000
@@ -57,7 +57,8 @@ struct save_callbacks {
 */
 int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iters,
                   uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
-                   struct save_callbacks* callbacks, int hvm);
+                   struct save_callbacks* callbacks, int hvm,
+                   unsigned long vm_gid_addr);


 /**
@@ -71,12 +72,15 @@ int xc_domain_save(xc_interface *xch, in
 * @parm hvm non-zero if this is a HVM restore
 * @parm pae non-zero if this HVM domain has PAE support enabled
 * @parm superpages non-zero to allocate guest memory with superpages
+ * @parm gid the new generation id of the VM
+ * @parm vm_gid_addr returned with the address of the generation id buffer
 * @return 0 on success, -1 on failure
 */
 int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
                      unsigned int store_evtchn, unsigned long *store_mfn,
                      unsigned int console_evtchn, unsigned long *console_mfn,
-                      unsigned int hvm, unsigned int pae, int superpages);
+                      unsigned int hvm, unsigned int pae, int superpages,
+                      int increment_gid, unsigned long *vm_gid_addr);
 /**
 * xc_domain_restore writes a file to disk that contains the device
 * model saved state.
diff -r 62ff6a318c5d -r de5432066adc tools/libxc/xg_save_restore.h
--- a/tools/libxc/xg_save_restore.h     Wed Nov 30 16:59:58 2011 -0800
+++ b/tools/libxc/xg_save_restore.h     Fri Dec 02 14:42:17 2011 +0000
@@ -135,6 +135,7 @@
 #define XC_SAVE_ID_LAST_CHECKPOINT    -9 /* Commit to restoring after completion of current iteration. */
 #define XC_SAVE_ID_HVM_ACPI_IOPORTS_LOCATION -10
 #define XC_SAVE_ID_HVM_VIRIDIAN       -11
+#define XC_SAVE_ID_HVM_GENERATION_ID_ADDR -12

 /*
 ** We process save/restore/migrate in batches of pages; the below
diff -r 62ff6a318c5d -r de5432066adc tools/libxl/libxl_create.c
--- a/tools/libxl/libxl_create.c        Wed Nov 30 16:59:58 2011 -0800
+++ b/tools/libxl/libxl_create.c        Fri Dec 02 14:42:17 2011 +0000
@@ -101,6 +101,7 @@ int libxl_init_build_info(libxl_ctx *ctx
        b_info->u.hvm.vpt_align = 1;
        b_info->u.hvm.timer_mode = 1;
        b_info->u.hvm.nested_hvm = 0;
+        b_info->u.hvm.no_increment_gid = 0;
        break;
    case LIBXL_DOMAIN_TYPE_PV:
        b_info->u.pv.slack_memkb = 8 * 1024;
diff -r 62ff6a318c5d -r de5432066adc tools/libxl/libxl_dom.c
--- a/tools/libxl/libxl_dom.c   Wed Nov 30 16:59:58 2011 -0800
+++ b/tools/libxl/libxl_dom.c   Fri Dec 02 14:42:17 2011 +0000
@@ -125,7 +125,7 @@ int libxl__build_post(libxl__gc *gc, uin
    if (info->cpuid != NULL)
        libxl_cpuid_set(ctx, domid, info->cpuid);

-    ents = libxl__calloc(gc, 12 + (info->max_vcpus * 2) + 2, sizeof(char *));
+    ents = libxl__calloc(gc, 14 + (info->max_vcpus * 2) + 2, sizeof(char *));
    ents[0] = "memory/static-max";
    ents[1] = libxl__sprintf(gc, "%d", info->max_memkb);
    ents[2] = "memory/target";
@@ -138,9 +138,11 @@ int libxl__build_post(libxl__gc *gc, uin
    ents[9] = libxl__sprintf(gc, "%"PRIu32, state->store_port);
    ents[10] = "store/ring-ref";
    ents[11] = libxl__sprintf(gc, "%lu", state->store_mfn);
+    ents[12] = "data/generation-id";
+    ents[13] = libxl__sprintf(gc, "0x%lx", state->vm_gid_addr);
    for (i = 0; i < info->max_vcpus; i++) {
-        ents[12+(i*2)]   = libxl__sprintf(gc, "cpu/%d/availability", i);
-        ents[12+(i*2)+1] = (i && info->cur_vcpus && !(info->cur_vcpus & (1 << i)))
+        ents[14+(i*2)]   = libxl__sprintf(gc, "cpu/%d/availability", i);
+        ents[14+(i*2)+1] = (i && info->cur_vcpus && !(info->cur_vcpus & (1 << i)))
                            ? "offline" : "online";
    }

@@ -357,16 +359,19 @@ int libxl__domain_restore_common(libxl__
    /* read signature */
    int rc;
    int hvm, pae, superpages;
+    int no_increment_gid;
    switch (info->type) {
    case LIBXL_DOMAIN_TYPE_HVM:
        hvm = 1;
        superpages = 1;
        pae = info->u.hvm.pae;
+        no_increment_gid = info->u.hvm.no_increment_gid;
        break;
    case LIBXL_DOMAIN_TYPE_PV:
        hvm = 0;
        superpages = 0;
        pae = 1;
+        no_increment_gid = 0;
        break;
    default:
        return ERROR_INVAL;
@@ -374,7 +379,7 @@ int libxl__domain_restore_common(libxl__
    rc = xc_domain_restore(ctx->xch, fd, domid,
                           state->store_port, &state->store_mfn,
                           state->console_port, &state->console_mfn,
-                           hvm, pae, superpages);
+                           hvm, pae, superpages, no_increment_gid, &state->vm_gid_addr);
    if ( rc ) {
        LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "restoring domain");
        return ERROR_FAIL;
@@ -540,12 +545,22 @@ int libxl__domain_suspend_common(libxl__
    struct save_callbacks callbacks;
    struct suspendinfo si;
    int hvm, rc = ERROR_FAIL;
+    unsigned long vm_gid_addr;

    switch (type) {
-    case LIBXL_DOMAIN_TYPE_HVM:
+    case LIBXL_DOMAIN_TYPE_HVM: {
+        char *path;
+        char *addr;
+
+        path = libxl__sprintf(gc, "%s/data/generation-id", libxl__xs_get_dompath(gc, domid));
+        addr = libxl__xs_read(gc, XBT_NULL, path);
+
+        vm_gid_addr = (addr) ? strtoul(addr, NULL, 0) : 0;
        hvm = 1;
        break;
+    }
    case LIBXL_DOMAIN_TYPE_PV:
+        vm_gid_addr = 0;
        hvm = 0;
        break;
    default:
@@ -583,7 +598,8 @@ int libxl__domain_suspend_common(libxl__
    callbacks.switch_qemu_logdirty = libxl__domain_suspend_common_switch_qemu_logdirty;
    callbacks.data = "">
-    rc = xc_domain_save(ctx->xch, fd, domid, 0, 0, flags, &callbacks, hvm);
+    rc = xc_domain_save(ctx->xch, fd, domid, 0, 0, flags, &callbacks,
+                        hvm, vm_gid_addr);
    if ( rc ) {
        LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "saving domain: %s",
                         si.guest_responded ?
diff -r 62ff6a318c5d -r de5432066adc tools/libxl/libxl_internal.h
--- a/tools/libxl/libxl_internal.h      Wed Nov 30 16:59:58 2011 -0800
+++ b/tools/libxl/libxl_internal.h      Fri Dec 02 14:42:17 2011 +0000
@@ -199,6 +199,7 @@ typedef struct {

    uint32_t console_port;
    unsigned long console_mfn;
+    unsigned long vm_gid_addr;
 } libxl__domain_build_state;

 _hidden int libxl__build_pre(libxl__gc *gc, uint32_t domid,
diff -r 62ff6a318c5d -r de5432066adc tools/libxl/libxl_types.idl
--- a/tools/libxl/libxl_types.idl       Wed Nov 30 16:59:58 2011 -0800
+++ b/tools/libxl/libxl_types.idl       Fri Dec 02 14:42:17 2011 +0000
@@ -183,6 +183,7 @@ libxl_domain_build_info = Struct("domain
                                       ("vpt_align", bool),
                                       ("timer_mode", integer),
                                       ("nested_hvm", bool),
+                                       ("no_increment_gid", bool),
                                       ])),
                 ("pv", Struct(None, [("kernel", libxl_file_reference),
                                      ("slack_memkb", uint32),
diff -r 62ff6a318c5d -r de5432066adc tools/libxl/xl_cmdimpl.c
--- a/tools/libxl/xl_cmdimpl.c  Wed Nov 30 16:59:58 2011 -0800
+++ b/tools/libxl/xl_cmdimpl.c  Fri Dec 02 14:42:17 2011 +0000
@@ -360,6 +360,7 @@ static void printf_info(int domid,
        printf("\t\t\t(vpt_align %d)\n", b_info->u.hvm.vpt_align);
        printf("\t\t\t(timer_mode %d)\n", b_info->u.hvm.timer_mode);
        printf("\t\t\t(nestedhvm %d)\n", b_info->u.hvm.nested_hvm);
+        printf("\t\t\t(no_increment_gid %d)\n", b_info->u.hvm.no_increment_gid);

        printf("\t\t\t(device_model %s)\n", dm_info->device_model ? : "default");
        printf("\t\t\t(videoram %d)\n", dm_info->videoram);
@@ -1362,6 +1363,7 @@ struct domain_create {
    const char *restore_file;
    int migrate_fd; /* -1 means none */
    char **migration_domname_r; /* from malloc */
+    int no_increment_gid;
 };

 static int freemem(libxl_domain_build_info *b_info, libxl_device_model_info *dm_info)
@@ -1571,6 +1573,8 @@ static int create_domain(struct domain_c
        }
    }

+    d_config.b_info.u.hvm.no_increment_gid = dom_info->no_increment_gid;
+
    if (debug || dom_info->dryrun)
        printf_info(-1, &d_config, &d_config.dm_info);

@@ -2796,6 +2800,7 @@ static void migrate_receive(int debug, i
    dom_info.restore_file = "incoming migration stream";
    dom_info.migrate_fd = 0; /* stdin */
    dom_info.migration_domname_r = &migration_domname;
+    dom_info.no_increment_gid = 1;

    rc = create_domain(&dom_info);
    if (rc < 0) {
diff -r 62ff6a318c5d -r de5432066adc tools/python/xen/lowlevel/checkpoint/libcheckpoint.c
--- a/tools/python/xen/lowlevel/checkpoint/libcheckpoint.c      Wed Nov 30 16:59:58 2011 -0800
+++ b/tools/python/xen/lowlevel/checkpoint/libcheckpoint.c      Fri Dec 02 14:42:17 2011 +0000
@@ -174,6 +174,7 @@ int checkpoint_start(checkpoint_state* s
 {
    int hvm, rc;
    int flags = XCFLAGS_LIVE;
+    unsigned long vm_gid_addr;

    if (!s->domid) {
       s->errstr = "checkpoint state not opened";
@@ -184,14 +185,25 @@ int checkpoint_start(checkpoint_state* s

    hvm = s->domtype > dt_pv;
    if (hvm) {
+       char path[128];
+       char *addr;
+
+       sprintf(path, "/local/domain/%u/data/generation-id", s->domid);
+       addr = xs_read(s->xsh, XBT_NULL, path, NULL);
+
+       vm_gid_addr = (addr) ? strtoul(addr, NULL, 0) : 0;
+       free(addr);
+
       flags |= XCFLAGS_HVM;
       if (switch_qemu_logdirty(s, 1))
           return -1;
+    } else {
+       vm_gid_addr = 0;
    }

    callbacks->switch_qemu_logdirty = noop_switch_logdirty;

-    rc = xc_domain_save(s->xch, fd, s->domid, 0, 0, flags, callbacks, hvm);
+    rc = xc_domain_save(s->xch, fd, s->domid, 0, 0, flags, callbacks, hvm, vm_gid_addr);

    if (hvm)
       switch_qemu_logdirty(s, 0);
diff -r 62ff6a318c5d -r de5432066adc tools/xcutils/xc_restore.c
--- a/tools/xcutils/xc_restore.c        Wed Nov 30 16:59:58 2011 -0800
+++ b/tools/xcutils/xc_restore.c        Fri Dec 02 14:42:17 2011 +0000
@@ -23,7 +23,8 @@ main(int argc, char **argv)
    xc_interface *xch;
    int io_fd, ret;
    int superpages;
-    unsigned long store_mfn, console_mfn;
+    unsigned long store_mfn, console_mfn, vm_gid_addr;
+    int no_increment_gid;

    if ( (argc != 8) && (argc != 9) )
        errx(1, "usage: %s iofd domid store_evtchn "
@@ -40,19 +41,25 @@ main(int argc, char **argv)
    hvm  = atoi(argv[5]);
    pae  = atoi(argv[6]);
    apic = atoi(argv[7]);
-    if ( argc == 9 )
+    if ( argc >= 9 )
           superpages = atoi(argv[8]);
    else
           superpages = !!hvm;
+    if ( argc >= 10 )
+           no_increment_gid = !atoi(argv[9]);
+    else
+           no_increment_gid = 0;

    ret = xc_domain_restore(xch, io_fd, domid, store_evtchn, &store_mfn,
-                            console_evtchn, &console_mfn, hvm, pae, superpages);
+                            console_evtchn, &console_mfn, hvm, pae, superpages,
+                            no_increment_gid, &vm_gid_addr);

    if ( ret == 0 )
    {
       printf("store-mfn %li\n", store_mfn);
        if ( !hvm )
            printf("console-mfn %li\n", console_mfn);
+       printf("vm-gid-addr %lx\n", vm_gid_addr);
       fflush(stdout);
    }

diff -r 62ff6a318c5d -r de5432066adc tools/xcutils/xc_save.c
--- a/tools/xcutils/xc_save.c   Wed Nov 30 16:59:58 2011 -0800
+++ b/tools/xcutils/xc_save.c   Fri Dec 02 14:42:17 2011 +0000
@@ -169,6 +169,10 @@ main(int argc, char **argv)
    unsigned int maxit, max_f;
    int io_fd, ret, port;
    struct save_callbacks callbacks;
+    char path[128];
+    struct xs_handle *xs;
+    char *addr;
+    unsigned long vm_gid_addr;

    if (argc != 6)
        errx(1, "usage: %s iofd domid maxit maxf flags", argv[0]);
@@ -207,8 +211,21 @@ main(int argc, char **argv)
    memset(&callbacks, 0, sizeof(callbacks));
    callbacks.suspend = suspend;
    callbacks.switch_qemu_logdirty = switch_qemu_logdirty;
+
+    sprintf(path, "/local/domain/%d/data/generation-id", si.domid);
+
+    if ((xs = xs_daemon_open()) == NULL)
+        errx(1, "Couldn't contact xenstore");
+
+    addr = xs_read(xs, XBT_NULL, path, NULL);
+
+    xs_daemon_close(xs);
+
+    vm_gid_addr = (addr) ? strtoul(addr, NULL, 0) : 0;
+    free(addr);
+
    ret = xc_domain_save(si.xch, io_fd, si.domid, maxit, max_f, si.flags,
-                         &callbacks, !!(si.flags & XCFLAGS_HVM));
+                         &callbacks, !!(si.flags & XCFLAGS_HVM), vm_gid_addr);

    if (si.suspend_evtchn > 0)
        xc_suspend_evtchn_release(si.xch, si.xce, si.domid, si.suspend_evtchn);

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

You might have to rebase the entire patch.
The compression patch (pushed into staging just before you sent this one)
touches the same diff context that this code does (esp. the SAVE_IDs,
code in xc_domain_restore, in tools/python/xen/lowlevel/checkpoint/checkpoint.c, etc.)

Assuming that the latest regression in staging/xen-unstable is resolved, I think
most of this code wont apply cleanly.

shriram
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.