[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] libxc: document save/restore protocol



# HG changeset patch
# User Ian Campbell <ian.campbell@xxxxxxxxxx>
# Date 1283535491 -3600
# Node ID 9aa2e9cc1b24bc516dcff13181dc4bd2d9e51922
# Parent  4e98698360910f68571a8d82b06681aa5c06d96c
libxc: document save/restore protocol

Reverse engineered from the code, likely contains inaccuracies but I
think provides a base to work from.

Add symbolic names for the minus-flags.

Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Signed-off-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
---
 tools/libxc/xc_domain_restore.c |   16 ++---
 tools/libxc/xc_domain_save.c    |   20 +++----
 tools/libxc/xg_save_restore.h   |  113 +++++++++++++++++++++++++++++++++++++++-
 3 files changed, 130 insertions(+), 19 deletions(-)

diff -r 4e9869836091 -r 9aa2e9cc1b24 tools/libxc/xc_domain_restore.c
--- a/tools/libxc/xc_domain_restore.c   Fri Sep 03 18:22:31 2010 +0100
+++ b/tools/libxc/xc_domain_restore.c   Fri Sep 03 18:38:11 2010 +0100
@@ -683,11 +683,11 @@ static int pagebuf_get_one(xc_interface 
     if (!count) {
         // DPRINTF("Last batch read\n");
         return 0;
-    } else if (count == -1) {
+    } else if (count == XC_SAVE_ID_ENABLE_VERIFY_MODE) {
         DPRINTF("Entering page verify mode\n");
         buf->verify = 1;
         return pagebuf_get_one(xch, ctx, buf, fd, dom);
-    } else if (count == -2) {
+    } else if (count == XC_SAVE_ID_VCPU_INFO) {
         buf->new_ctxt_format = 1;
         if ( RDEXACT(fd, &buf->max_vcpu_id, sizeof(buf->max_vcpu_id)) ||
              buf->max_vcpu_id >= 64 || RDEXACT(fd, &buf->vcpumap,
@@ -697,7 +697,7 @@ static int pagebuf_get_one(xc_interface 
         }
         // DPRINTF("Max VCPU ID: %d, vcpumap: %llx\n", buf->max_vcpu_id, 
buf->vcpumap);
         return pagebuf_get_one(xch, ctx, buf, fd, dom);
-    } else if (count == -3) {
+    } else if (count == XC_SAVE_ID_HVM_IDENT_PT) {
         /* Skip padding 4 bytes then read the EPT identity PT location. */
         if ( RDEXACT(fd, &buf->identpt, sizeof(uint32_t)) ||
              RDEXACT(fd, &buf->identpt, sizeof(uint64_t)) )
@@ -707,7 +707,7 @@ static int pagebuf_get_one(xc_interface 
         }
         // DPRINTF("EPT identity map address: %llx\n", buf->identpt);
         return pagebuf_get_one(xch, ctx, buf, fd, dom);
-    } else if ( count == -4 )  {
+    } else if ( count == XC_SAVE_ID_HVM_VM86_TSS )  {
         /* Skip padding 4 bytes then read the vm86 TSS location. */
         if ( RDEXACT(fd, &buf->vm86_tss, sizeof(uint32_t)) ||
              RDEXACT(fd, &buf->vm86_tss, sizeof(uint64_t)) )
@@ -717,7 +717,7 @@ static int pagebuf_get_one(xc_interface 
         }
         // DPRINTF("VM86 TSS location: %llx\n", buf->vm86_tss);
         return pagebuf_get_one(xch, ctx, buf, fd, dom);
-    } else if ( count == -5 ) {
+    } else if ( count == XC_SAVE_ID_TMEM ) {
         DPRINTF("xc_domain_restore start tmem\n");
         if ( xc_tmem_restore(xch, dom, fd) ) {
             PERROR("error reading/restoring tmem");
@@ -725,13 +725,13 @@ static int pagebuf_get_one(xc_interface 
         }
         return pagebuf_get_one(xch, ctx, buf, fd, dom);
     }
-    else if ( count == -6 ) {
+    else if ( count == XC_SAVE_ID_TMEM_EXTRA ) {
         if ( xc_tmem_restore_extra(xch, dom, fd) ) {
             PERROR("error reading/restoring tmem extra");
             return -1;
         }
         return pagebuf_get_one(xch, ctx, buf, fd, dom);
-    } else if ( count == -7 ) {
+    } else if ( count == XC_SAVE_ID_TSC_INFO ) {
         uint32_t tsc_mode, khz, incarn;
         uint64_t nsec;
         if ( RDEXACT(fd, &tsc_mode, sizeof(uint32_t)) ||
@@ -743,7 +743,7 @@ static int pagebuf_get_one(xc_interface 
             return -1;
         }
         return pagebuf_get_one(xch, ctx, buf, fd, dom);
-    } else if (count == -8 ) {
+    } else if (count == XC_SAVE_ID_HVM_CONSOLE_PFN ) {
         /* Skip padding 4 bytes then read the console pfn location. */
         if ( RDEXACT(fd, &buf->console_pfn, sizeof(uint32_t)) ||
              RDEXACT(fd, &buf->console_pfn, sizeof(uint64_t)) )
diff -r 4e9869836091 -r 9aa2e9cc1b24 tools/libxc/xc_domain_save.c
--- a/tools/libxc/xc_domain_save.c      Fri Sep 03 18:22:31 2010 +0100
+++ b/tools/libxc/xc_domain_save.c      Fri Sep 03 18:38:11 2010 +0100
@@ -861,7 +861,7 @@ static xen_pfn_t *map_and_save_p2m_table
 /* must be done AFTER suspend_and_state() */
 static int save_tsc_info(xc_interface *xch, uint32_t dom, int io_fd)
 {
-    int marker = -7;
+    int marker = XC_SAVE_ID_TSC_INFO;
     uint32_t tsc_mode, khz, incarn;
     uint64_t nsec;
 
@@ -1142,7 +1142,7 @@ int xc_domain_save(xc_interface *xch, in
 
     print_stats(xch, dom, 0, &stats, 0);
 
-    tmem_saved = xc_tmem_save(xch, dom, io_fd, live, -5);
+    tmem_saved = xc_tmem_save(xch, dom, io_fd, live, XC_SAVE_ID_TMEM);
     if ( tmem_saved == -1 )
     {
         PERROR("Error when writing to state file (tmem)");
@@ -1474,13 +1474,13 @@ int xc_domain_save(xc_interface *xch, in
 
         if ( last_iter && debug )
         {
-            int minusone = -1;
+            int id = XC_SAVE_ID_ENABLE_VERIFY_MODE;
             memset(to_send, 0xff, BITMAP_SIZE);
             debug = 0;
             DPRINTF("Entering debug resend-all mode\n");
 
             /* send "-1" to put receiver into debug mode */
-            if ( wrexact(io_fd, &minusone, sizeof(int)) )
+            if ( wrexact(io_fd, &id, sizeof(int)) )
             {
                 PERROR("Error when writing to state file (6)");
                 goto out;
@@ -1511,7 +1511,7 @@ int xc_domain_save(xc_interface *xch, in
 
                 DPRINTF("SUSPEND shinfo %08lx\n", info.shared_info_frame);
                 if ( (tmem_saved > 0) &&
-                     (xc_tmem_save_extra(xch,dom,io_fd,-6) == -1) )
+                     (xc_tmem_save_extra(xch,dom,io_fd,XC_SAVE_ID_TMEM_EXTRA) 
== -1) )
                 {
                         PERROR("Error when writing to state file (tmem)");
                         goto out;
@@ -1545,10 +1545,10 @@ int xc_domain_save(xc_interface *xch, in
 
     {
         struct {
-            int minustwo;
+            int id;
             int max_vcpu_id;
             uint64_t vcpumap;
-        } chunk = { -2, info.max_vcpu_id };
+        } chunk = { XC_SAVE_ID_VCPU_INFO, info.max_vcpu_id };
 
         if ( info.max_vcpu_id >= 64 )
         {
@@ -1580,7 +1580,7 @@ int xc_domain_save(xc_interface *xch, in
             uint64_t data;
         } chunk = { 0, };
 
-        chunk.id = -3;
+        chunk.id = XC_SAVE_ID_HVM_IDENT_PT;
         xc_get_hvm_param(xch, dom, HVM_PARAM_IDENT_PT,
                          (unsigned long *)&chunk.data);
 
@@ -1591,7 +1591,7 @@ int xc_domain_save(xc_interface *xch, in
             goto out;
         }
 
-        chunk.id = -4;
+        chunk.id = XC_SAVE_ID_HVM_VM86_TSS;
         xc_get_hvm_param(xch, dom, HVM_PARAM_VM86_TSS,
                          (unsigned long *)&chunk.data);
 
@@ -1602,7 +1602,7 @@ int xc_domain_save(xc_interface *xch, in
             goto out;
         }
 
-        chunk.id = -8;
+        chunk.id = XC_SAVE_ID_HVM_CONSOLE_PFN;
         xc_get_hvm_param(xch, dom, HVM_PARAM_CONSOLE_PFN,
                          (unsigned long *)&chunk.data);
 
diff -r 4e9869836091 -r 9aa2e9cc1b24 tools/libxc/xg_save_restore.h
--- a/tools/libxc/xg_save_restore.h     Fri Sep 03 18:22:31 2010 +0100
+++ b/tools/libxc/xg_save_restore.h     Fri Sep 03 18:38:11 2010 +0100
@@ -1,5 +1,5 @@
 /*
- * Defintions and utilities for save / restore.
+ * Definitions and utilities for save / restore.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -20,6 +20,117 @@
 
 #include <xen/foreign/x86_32.h>
 #include <xen/foreign/x86_64.h>
+
+/*
+ * SAVE/RESTORE/MIGRATE PROTOCOL
+ * =============================
+ *
+ * The general form of a stream of chunks is a header followed by a
+ * body consisting of a variable number of chunks (terminated by a
+ * chunk with type 0) followed by a trailer.
+ *
+ * For a rolling/checkpoint (e.g. remus) migration then the body and
+ * trailer phases can be repeated until an external event
+ * (e.g. failure) causes the process to terminate and commit to the
+ * most recent complete checkpoint.
+ *
+ * HEADER
+ * ------
+ *
+ * unsigned long        : p2m_size
+ *
+ * extended-info (PV-only, optional):
+ *
+ *   If first unsigned long == ~0UL then extended info is present,
+ *   otherwise unsigned long is part of p2m. Note that p2m_size above
+ *   does not include the length of the extended info.
+ *
+ *   extended-info:
+ *
+ *     unsigned long    : signature == ~0UL
+ *     uint32_t                : number of bytes remaining in extended-info
+ *
+ *     1 or more extended-info blocks of form:
+ *     char[4]          : block identifier
+ *     uint32_t         : block data size
+ *     bytes            : block data
+ *
+ *     defined extended-info blocks:
+ *     "vcpu"          : VCPU context info containing vcpu_guest_context_t.
+ *                        The precise variant of the context structure
+ *                        (e.g. 32 vs 64 bit) is distinguished by
+ *                        the block size.
+ *     "extv"           : Presence indicates use of extended VCPU context in
+ *                        tail, data size is 0.
+ *
+ * p2m (PV-only):
+ *
+ *   consists of p2m_size bytes comprising an array of xen_pfn_t sized entries.
+ *
+ * BODY PHASE
+ * ----------
+ *
+ * A series of chunks with a common header:
+ *   int              : chunk type
+ *
+ * If the chunk type is +ve then chunk contains guest memory data, and the
+ * type contains the number of pages in the batch:
+ *
+ *     unsigned long[]  : PFN array, length == number of pages in batch
+ *                        Each entry consists of XEN_DOMCTL_PFINFO_*
+ *                        in bits 31-28 and the PFN number in bits 27-0.
+ *     page data        : PAGE_SIZE bytes for each page marked present in PFN
+ *                        array
+ *
+ * If the chunk type is -ve then chunk consists of one of a number of
+ * metadata types.  See definitions of XC_SAVE_ID_* below.
+ *
+ * If chunk type is 0 then body phase is complete.
+ *
+ * TAIL PHASE
+ * ----------
+ *
+ * Content differs for PV and HVM guests.
+ *
+ * HVM TAIL:
+ *
+ *  "Magic" pages:
+ *     uint64_t         : I/O req PFN
+ *     uint64_t         : Buffered I/O req PFN
+ *     uint64_t         : Store PFN
+ *  Xen HVM Context:
+ *     uint32_t         : Length of context in bytes
+ *     bytes            : Context data
+ *  Qemu context:
+ *     char[21]         : Signature:
+ *       "QemuDeviceModelRecord" : Read Qemu save data until EOF
+ *       "RemusDeviceModelState" : uint32_t length field followed by that many
+ *                                 bytes of Qemu save data
+ *
+ * PV TAIL:
+ *
+ *  Unmapped PFN list   : list of all the PFNs that were not in map at the 
close
+ *     unsigned int     : Number of unmapped pages
+ *     unsigned long[]  : PFNs of unmapped pages
+ *
+ *  VCPU context data   : A series of VCPU records, one per present VCPU
+ *                        Maximum and present map supplied in 
XC_SAVE_ID_VCPUINFO
+ *     bytes:           : VCPU context structure. Size is determined by size
+ *                        provided in extended-info header
+ *     bytes[128]       : Extended VCPU context (present IFF "extv" block
+ *                        present in extended-info header)
+ *
+ *  Shared Info Page    : 4096 bytes of shared info page
+ */
+
+#define XC_SAVE_ID_ENABLE_VERIFY_MODE -1 /* Switch to validation phase. */
+#define XC_SAVE_ID_VCPU_INFO          -2 /* Additional VCPU info */
+#define XC_SAVE_ID_HVM_IDENT_PT       -3 /* (HVM-only) */
+#define XC_SAVE_ID_HVM_VM86_TSS       -4 /* (HVM-only) */
+#define XC_SAVE_ID_TMEM               -5
+#define XC_SAVE_ID_TMEM_EXTRA         -6
+#define XC_SAVE_ID_TSC_INFO           -7
+#define XC_SAVE_ID_HVM_CONSOLE_PFN    -8 /* (HVM-only) */
 
 /*
 ** We process save/restore/migrate in batches of pages; the below

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.