[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH RFC v2 18/23] libxl/migration: implement the sender side of postcopy live migration



From: Joshua Otto <jtotto@xxxxxxxxxxxx>

To make the libxl sender capable of supporting postcopy live migration:
- Add a postcopy transition callback chain through the stream writer (this
  callback chain is nearly identical to the checkpoint callback chain, and
  differs meaningfully only in its failure/completion behaviour)
- Wire this callback chain up to the xc postcopy callback entries in the domain
  save logic.
- Introduce a new libxl API function, libxl_domain_live_migrate(),
  taking the same parameters as libxl_domain_suspend() as well as a
  recv_fd to enable bi-directional communication between the sender and
  receiver and a boolean out-parameter to enable the caller to reason
  about the safety of recovery from a postcopy failure. (the
  live_migrate() and domain_suspend() parameter lists will likely only
  continue to diverge over time, so it makes good sense to split them
  now)

No mechanism is introduced yet to enable library clients to induce a postcopy
live migration - this will follow after the libxl postcopy receiver logic.

Signed-off-by: Joshua Otto <jtotto@xxxxxxxxxxxx>
---
 docs/specs/libxl-migration-stream.pandoc | 19 ++++++++-
 tools/libxl/libxl.h                      |  7 ++++
 tools/libxl/libxl_dom_save.c             | 25 +++++++++++-
 tools/libxl/libxl_domain.c               | 29 +++++++++++++-
 tools/libxl/libxl_internal.h             | 21 ++++++++--
 tools/libxl/libxl_sr_stream_format.h     | 13 +++---
 tools/libxl/libxl_stream_write.c         | 69 ++++++++++++++++++++++++++++++--
 tools/xl/xl_migrate.c                    |  6 ++-
 8 files changed, 169 insertions(+), 20 deletions(-)

diff --git a/docs/specs/libxl-migration-stream.pandoc 
b/docs/specs/libxl-migration-stream.pandoc
index a1ba1ac..8d00cd7 100644
--- a/docs/specs/libxl-migration-stream.pandoc
+++ b/docs/specs/libxl-migration-stream.pandoc
@@ -2,7 +2,8 @@
 % Andrew Cooper <<andrew.cooper3@xxxxxxxxxx>>
   Wen Congyang <<wency@xxxxxxxxxxxxxx>>
   Yang Hongyang <<hongyang.yang@xxxxxxxxxxxx>>
-% Revision 2
+  Joshua Otto <<jtotto@xxxxxxxxxxxx>>
+% Revision 3
 
 Introduction
 ============
@@ -123,7 +124,9 @@ type         0x00000000: END
 
              0x00000005: CHECKPOINT_STATE
 
-             0x00000006 - 0x7FFFFFFF: Reserved for future _mandatory_
+             0x00000006: POSTCOPY_TRANSITION_END
+
+             0x00000007 - 0x7FFFFFFF: Reserved for future _mandatory_
              records.
 
              0x80000000 - 0xFFFFFFFF: Reserved for future _optional_
@@ -304,6 +307,18 @@ While Secondary is running in below loop:
     b. Send _CHECKPOINT\_SVM\_SUSPENDED_ to primary
 4. Checkpoint
 
+POSTCOPY\_TRANSITION\_END
+-------------------------
+
+A postcopy transition end record marks the end of a postcopy transition in a
+libxl live migration stream.  It indicates that control of the stream should be
+returned to libxc for the postcopy memory migration phase.
+
+     0     1     2     3     4     5     6     7 octet
+    +-------------------------------------------------+
+
+The postcopy transition end record contains no fields; its body_length is 0.
+
 Future Extensions
 =================
 
diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
index cf8687a..5e48862 100644
--- a/tools/libxl/libxl.h
+++ b/tools/libxl/libxl.h
@@ -1387,6 +1387,13 @@ int libxl_domain_suspend(libxl_ctx *ctx, uint32_t domid, 
int fd,
 #define LIBXL_SUSPEND_DEBUG 1
 #define LIBXL_SUSPEND_LIVE 2
 
+int libxl_domain_live_migrate(libxl_ctx *ctx, uint32_t domid, int send_fd,
+                              int flags, /* LIBXL_SUSPEND_* */
+                              int recv_fd,
+                              bool *postcopy_transitioned, /* OUT */
+                              const libxl_asyncop_how *ao_how)
+                              LIBXL_EXTERNAL_CALLERS_ONLY;
+
 /* @param suspend_cancel [from xenctrl.h:xc_domain_resume( @param fast )]
  *   If this parameter is true, use co-operative resume. The guest
  *   must support this.
diff --git a/tools/libxl/libxl_dom_save.c b/tools/libxl/libxl_dom_save.c
index eb1271e..75ab523 100644
--- a/tools/libxl/libxl_dom_save.c
+++ b/tools/libxl/libxl_dom_save.c
@@ -350,10 +350,31 @@ static int libxl__save_live_migration_precopy_policy(
     return XGS_POLICY_CONTINUE_PRECOPY;
 }
 
+static void postcopy_transition_done(libxl__egc *egc,
+                                     libxl__stream_write_state *sws, int rc);
+
 static void libxl__save_live_migration_postcopy_transition_callback(void *user)
 {
-    /* XXX we're not yet ready to deal with this */
-    assert(0);
+    libxl__save_helper_state *shs = user;
+    libxl__stream_write_state *sws = CONTAINER_OF(shs, *sws, shs);
+    sws->postcopy_transition_callback = postcopy_transition_done;
+    libxl__stream_write_start_postcopy_transition(shs->egc, sws);
+}
+
+static void postcopy_transition_done(libxl__egc *egc,
+                                     libxl__stream_write_state *sws,
+                                     int rc)
+{
+    libxl__domain_save_state *dss = sws->dss;
+
+    /* Past here, it's _possible_ that the domain may execute at the
+     * destination, so - unless we're given positive confirmation by the
+     * destination that it failed to resume there - we must assume it has. */
+    assert(dss->postcopy_transitioned);
+    *dss->postcopy_transitioned = !rc;
+
+    /* Return control to libxc. */
+    libxl__xc_domain_saverestore_async_callback_done(egc, &sws->shs, !rc);
 }
 
 /*----- main code for saving, in order of execution -----*/
diff --git a/tools/libxl/libxl_domain.c b/tools/libxl/libxl_domain.c
index 08eccd0..fc37f47 100644
--- a/tools/libxl/libxl_domain.c
+++ b/tools/libxl/libxl_domain.c
@@ -486,8 +486,9 @@ static void domain_suspend_cb(libxl__egc *egc,
 
 }
 
-int libxl_domain_suspend(libxl_ctx *ctx, uint32_t domid, int fd, int flags,
-                         const libxl_asyncop_how *ao_how)
+static int do_domain_suspend(libxl_ctx *ctx, uint32_t domid, int fd, int flags,
+                             int recv_fd, bool *postcopy_transitioned,
+                             const libxl_asyncop_how *ao_how)
 {
     AO_CREATE(ctx, domid, ao_how);
     int rc;
@@ -506,6 +507,8 @@ int libxl_domain_suspend(libxl_ctx *ctx, uint32_t domid, 
int fd, int flags,
 
     dss->domid = domid;
     dss->fd = fd;
+    dss->recv_fd = recv_fd;
+    dss->postcopy_transitioned = postcopy_transitioned;
     dss->type = type;
     dss->live = flags & LIBXL_SUSPEND_LIVE;
     dss->debug = flags & LIBXL_SUSPEND_DEBUG;
@@ -523,6 +526,28 @@ int libxl_domain_suspend(libxl_ctx *ctx, uint32_t domid, 
int fd, int flags,
     return AO_CREATE_FAIL(rc);
 }
 
+int libxl_domain_suspend(libxl_ctx *ctx, uint32_t domid, int fd, int flags,
+                         const libxl_asyncop_how *ao_how)
+{
+    return do_domain_suspend(ctx, domid, fd, flags, -1, NULL, ao_how);
+}
+
+int libxl_domain_live_migrate(libxl_ctx *ctx, uint32_t domid, int send_fd,
+                              int flags, int recv_fd,
+                              bool *postcopy_transitioned,
+                              const libxl_asyncop_how *ao_how)
+{
+    if (!postcopy_transitioned) {
+        errno = EINVAL;
+        return -1;
+    }
+
+    flags |= LIBXL_SUSPEND_LIVE;
+
+    return do_domain_suspend(ctx, domid, send_fd, flags, recv_fd,
+                             postcopy_transitioned, ao_how);
+}
+
 int libxl_domain_pause(libxl_ctx *ctx, uint32_t domid)
 {
     int ret;
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 30d5492..c8ea3ba 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -3204,17 +3204,25 @@ struct libxl__stream_write_state {
     void (*completion_callback)(libxl__egc *egc,
                                 libxl__stream_write_state *sws,
                                 int rc);
-    void (*checkpoint_callback)(libxl__egc *egc,
-                                libxl__stream_write_state *sws,
-                                int rc);
+    /* Checkpointing and postcopy live migration are mutually exclusive. */
+    union {
+        void (*checkpoint_callback)(libxl__egc *egc,
+                                    libxl__stream_write_state *sws,
+                                    int rc);
+        void (*postcopy_transition_callback)(libxl__egc *egc,
+                                             libxl__stream_write_state *sws,
+                                             int rc);
+    };
     /* Private */
     int rc;
     bool running;
     enum {
         SWS_PHASE_NORMAL,
         SWS_PHASE_CHECKPOINT,
-        SWS_PHASE_CHECKPOINT_STATE
+        SWS_PHASE_CHECKPOINT_STATE,
+        SWS_PHASE_POSTCOPY_TRANSITION
     } phase;
+    bool postcopy_transitioned;
     bool sync_teardown;  /* Only used to coordinate shutdown on error path. */
     libxl__save_helper_state shs;
 
@@ -3237,6 +3245,10 @@ _hidden void 
libxl__stream_write_init(libxl__stream_write_state *stream);
 _hidden void libxl__stream_write_start(libxl__egc *egc,
                                        libxl__stream_write_state *stream);
 _hidden void
+libxl__stream_write_start_postcopy_transition(
+    libxl__egc *egc,
+    libxl__stream_write_state *stream);
+_hidden void
 libxl__stream_write_start_checkpoint(libxl__egc *egc,
                                      libxl__stream_write_state *stream);
 _hidden void
@@ -3300,6 +3312,7 @@ struct libxl__domain_save_state {
     int fd;
     int fdfl; /* original flags on fd */
     int recv_fd;
+    bool *postcopy_transitioned;
     libxl_domain_type type;
     int live;
     int debug;
diff --git a/tools/libxl/libxl_sr_stream_format.h 
b/tools/libxl/libxl_sr_stream_format.h
index 75f5190..a789126 100644
--- a/tools/libxl/libxl_sr_stream_format.h
+++ b/tools/libxl/libxl_sr_stream_format.h
@@ -31,12 +31,13 @@ typedef struct libxl__sr_rec_hdr
 /* All records must be aligned up to an 8 octet boundary */
 #define REC_ALIGN_ORDER              3U
 
-#define REC_TYPE_END                    0x00000000U
-#define REC_TYPE_LIBXC_CONTEXT          0x00000001U
-#define REC_TYPE_EMULATOR_XENSTORE_DATA 0x00000002U
-#define REC_TYPE_EMULATOR_CONTEXT       0x00000003U
-#define REC_TYPE_CHECKPOINT_END         0x00000004U
-#define REC_TYPE_CHECKPOINT_STATE       0x00000005U
+#define REC_TYPE_END                     0x00000000U
+#define REC_TYPE_LIBXC_CONTEXT           0x00000001U
+#define REC_TYPE_EMULATOR_XENSTORE_DATA  0x00000002U
+#define REC_TYPE_EMULATOR_CONTEXT        0x00000003U
+#define REC_TYPE_CHECKPOINT_END          0x00000004U
+#define REC_TYPE_CHECKPOINT_STATE        0x00000005U
+#define REC_TYPE_POSTCOPY_TRANSITION_END 0x00000006U
 
 typedef struct libxl__sr_emulator_hdr
 {
diff --git a/tools/libxl/libxl_stream_write.c b/tools/libxl/libxl_stream_write.c
index 8f2a1c9..1c4b1f1 100644
--- a/tools/libxl/libxl_stream_write.c
+++ b/tools/libxl/libxl_stream_write.c
@@ -22,6 +22,9 @@
  * Entry points from outside:
  *  - libxl__stream_write_start()
  *     - Start writing a stream from the start.
+ *  - libxl__stream_write_postcopy_transition()
+ *     - Write the records required to permit postcopy resumption at the
+ *       migration target.
  *  - libxl__stream_write_start_checkpoint()
  *     - Write the records which form a checkpoint into a stream.
  *
@@ -65,6 +68,9 @@ static void stream_complete(libxl__egc *egc,
                             libxl__stream_write_state *stream, int rc);
 static void stream_done(libxl__egc *egc,
                         libxl__stream_write_state *stream, int rc);
+static void postcopy_transition_done(libxl__egc *egc,
+                                     libxl__stream_write_state *stream,
+                                     int rc);
 static void checkpoint_done(libxl__egc *egc,
                             libxl__stream_write_state *stream,
                             int rc);
@@ -91,7 +97,9 @@ static void emulator_context_record_done(libxl__egc *egc,
                                          libxl__stream_write_state *stream);
 static void write_phase_end_record(libxl__egc *egc,
                                    libxl__stream_write_state *stream);
-
+static void postcopy_transition_end_record_done(
+    libxl__egc *egc,
+    libxl__stream_write_state *stream);
 static void checkpoint_end_record_done(libxl__egc *egc,
                                        libxl__stream_write_state *stream);
 
@@ -211,6 +219,7 @@ void libxl__stream_write_init(libxl__stream_write_state 
*stream)
     stream->rc = 0;
     stream->running = false;
     stream->phase = SWS_PHASE_NORMAL;
+    stream->postcopy_transitioned = false;
     stream->sync_teardown = false;
     FILLZERO(stream->dc);
     stream->record_done_callback = NULL;
@@ -287,6 +296,22 @@ void libxl__stream_write_start(libxl__egc *egc,
     stream_complete(egc, stream, rc);
 }
 
+void libxl__stream_write_start_postcopy_transition(
+    libxl__egc *egc,
+    libxl__stream_write_state *stream)
+{
+    libxl__domain_save_state *dss = stream->dss;
+
+    assert(stream->running);
+    assert(dss->checkpointed_stream == LIBXL_CHECKPOINTED_STREAM_NONE);
+    assert(stream->phase == SWS_PHASE_NORMAL);
+    assert(!stream->postcopy_transitioned);
+
+    stream->phase = SWS_PHASE_POSTCOPY_TRANSITION;
+
+    write_emulator_xenstore_record(egc, stream);
+}
+
 void libxl__stream_write_start_checkpoint(libxl__egc *egc,
                                           libxl__stream_write_state *stream)
 {
@@ -369,7 +394,7 @@ void libxl__xc_domain_save_done(libxl__egc *egc, void 
*dss_void,
      * If the stream is not still alive, we must not continue any work.
      */
     if (libxl__stream_write_inuse(stream)) {
-        if (dss->checkpointed_stream != LIBXL_CHECKPOINTED_STREAM_NONE)
+        if (dss->checkpointed_stream != LIBXL_CHECKPOINTED_STREAM_NONE) {
             /*
              * For remus, if libxl__xc_domain_save_done() completes,
              * there was an error sending data to the secondary.
@@ -377,8 +402,17 @@ void libxl__xc_domain_save_done(libxl__egc *egc, void 
*dss_void,
              * return value (Please refer to libxl__remus_teardown())
              */
             stream_complete(egc, stream, 0);
-        else
+        } else if (stream->postcopy_transitioned) {
+            /*
+             * If, on the other hand, this is a normal migration that had a
+             * postcopy migration stage, we're completely done at this point 
and
+             * want to report any error received here to our caller.
+             */
+            assert(stream->phase == SWS_PHASE_NORMAL);
+            write_phase_end_record(egc, stream);
+        } else {
             write_emulator_xenstore_record(egc, stream);
+        }
     }
 }
 
@@ -550,6 +584,11 @@ static void write_phase_end_record(libxl__egc *egc,
         what     = "checkpoint end record";
         cb       = checkpoint_end_record_done;
         break;
+    case SWS_PHASE_POSTCOPY_TRANSITION:
+        rec.type = REC_TYPE_POSTCOPY_TRANSITION_END;
+        what     = "postcopy transition end record";
+        cb       = postcopy_transition_end_record_done;
+        break;
     default:
         /* SWS_PHASE_CHECKPOINT_STATE has no end record */
         assert(false);
@@ -558,6 +597,13 @@ static void write_phase_end_record(libxl__egc *egc,
     setup_write(egc, stream, what, &rec, NULL, cb);
 }
 
+static void postcopy_transition_end_record_done(
+    libxl__egc *egc,
+    libxl__stream_write_state *stream)
+{
+    postcopy_transition_done(egc, stream, 0);
+}
+
 static void checkpoint_end_record_done(libxl__egc *egc,
                                        libxl__stream_write_state *stream)
 {
@@ -600,6 +646,13 @@ static void stream_complete(libxl__egc *egc,
          */
         checkpoint_state_done(egc, stream, rc);
         break;
+    case SWS_PHASE_POSTCOPY_TRANSITION:
+        /*
+         * To deal with errors during the postcopy transition, we use the same
+         * strategy as during checkpoints.
+         */
+        postcopy_transition_done(egc, stream, rc);
+        break;
     }
 }
 
@@ -627,6 +680,16 @@ static void stream_done(libxl__egc *egc,
     }
 }
 
+static void postcopy_transition_done(libxl__egc *egc,
+                                     libxl__stream_write_state *stream,
+                                     int rc)
+{
+    assert(stream->phase == SWS_PHASE_POSTCOPY_TRANSITION);
+    stream->postcopy_transitioned = true;
+    stream->phase = SWS_PHASE_NORMAL;
+    stream->postcopy_transition_callback(egc, stream, rc);
+}
+
 static void checkpoint_done(libxl__egc *egc,
                             libxl__stream_write_state *stream,
                             int rc)
diff --git a/tools/xl/xl_migrate.c b/tools/xl/xl_migrate.c
index 1f0e87d..9656204 100644
--- a/tools/xl/xl_migrate.c
+++ b/tools/xl/xl_migrate.c
@@ -186,6 +186,7 @@ static void migrate_domain(uint32_t domid, const char 
*rune, int debug,
     char rc_buf;
     uint8_t *config_data;
     int config_len, flags = LIBXL_SUSPEND_LIVE;
+    bool postcopy_transitioned;
 
     save_domain_core_begin(domid, override_config_file,
                            &config_data, &config_len);
@@ -205,7 +206,10 @@ static void migrate_domain(uint32_t domid, const char 
*rune, int debug,
 
     if (debug)
         flags |= LIBXL_SUSPEND_DEBUG;
-    rc = libxl_domain_suspend(ctx, domid, send_fd, flags, NULL);
+    rc = libxl_domain_live_migrate(ctx, domid, send_fd, flags,
+                                   recv_fd, &postcopy_transitioned, NULL);
+    assert(!postcopy_transitioned);
+
     if (rc) {
         fprintf(stderr, "migration sender: libxl_domain_suspend failed"
                 " (rc=%d)\n", rc);
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.