[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v9 05/15] tools/libxc: noarch common code



Add the context structure used to keep state during the save/restore
process.

Define the set of architecture or domain type specific operations with a
set of callbacks (save_ops, and restore_ops).

Add common functions for writing records.

Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
CC: Ian Campbell <Ian.Campbell@xxxxxxxxxx>
CC: Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx>
CC: Wei Liu <wei.liu2@xxxxxxxxxx>
---
 tools/libxc/xc_sr_common.c |   41 ++++++
 tools/libxc/xc_sr_common.h |  309 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 350 insertions(+)

diff --git a/tools/libxc/xc_sr_common.c b/tools/libxc/xc_sr_common.c
index 294a626..59e0c5d 100644
--- a/tools/libxc/xc_sr_common.c
+++ b/tools/libxc/xc_sr_common.c
@@ -1,3 +1,5 @@
+#include <assert.h>
+
 #include "xc_sr_common.h"
 
 static const char *dhdr_types[] =
@@ -46,6 +48,45 @@ const char *rec_type_to_str(uint32_t type)
     return "Reserved";
 }
 
+int write_split_record(struct xc_sr_context *ctx, struct xc_sr_record *rec,
+                       void *buf, size_t sz)
+{
+    static const char zeroes[(1u << REC_ALIGN_ORDER) - 1] = { 0 };
+
+    xc_interface *xch = ctx->xch;
+    typeof(rec->length) combined_length = rec->length + sz;
+    size_t record_length = ROUNDUP(combined_length, REC_ALIGN_ORDER);
+    struct iovec parts[] =
+    {
+        { &rec->type,       sizeof(rec->type) },
+        { &combined_length, sizeof(combined_length) },
+        { rec->data,        rec->length },
+        { buf,              sz },
+        { (void*)zeroes,    record_length - combined_length },
+    };
+
+    if ( record_length > REC_LENGTH_MAX )
+    {
+        ERROR("Record (0x%08x, %s) length %#x exceeds max (%#x)", rec->type,
+              rec_type_to_str(rec->type), rec->length, REC_LENGTH_MAX);
+        return -1;
+    }
+
+    if ( rec->length )
+        assert(rec->data);
+    if ( sz )
+        assert(buf);
+
+    if ( writev_exact(ctx->fd, parts, ARRAY_SIZE(parts)) )
+        goto err;
+
+    return 0;
+
+ err:
+    PERROR("Unable to write record to stream");
+    return -1;
+}
+
 static void __attribute__((unused)) build_assertions(void)
 {
     XC_BUILD_BUG_ON(sizeof(struct xc_sr_ihdr) != 24);
diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h
index b65e52b..e3b1990 100644
--- a/tools/libxc/xc_sr_common.h
+++ b/tools/libxc/xc_sr_common.h
@@ -1,7 +1,12 @@
 #ifndef __COMMON__H
 #define __COMMON__H
 
+#include <stdbool.h>
+
 #include "xg_private.h"
+#include "xg_save_restore.h"
+#include "xc_dom.h"
+#include "xc_bitops.h"
 
 #include "xc_sr_stream_format.h"
 
@@ -11,6 +16,310 @@
 /* String representation of Record types. */
 const char *rec_type_to_str(uint32_t type);
 
+struct xc_sr_context;
+struct xc_sr_record;
+
+/**
+ * Save operations.  To be implemented for each type of guest, for use by the
+ * common save algorithm.
+ *
+ * Every function must be implemented, even if only with a no-op stub.
+ */
+struct xc_sr_save_ops
+{
+    /* Convert a PFN to GFN.  May return ~0UL for an invalid mapping. */
+    xen_pfn_t (*pfn_to_gfn)(const struct xc_sr_context *ctx, xen_pfn_t pfn);
+
+    /**
+     * Optionally transform the contents of a page from being specific to the
+     * sending environment, to being generic for the stream.
+     *
+     * The page of data at the end of 'page' may be a read-only mapping of a
+     * running guest; it must not be modified.  If no transformation is
+     * required, the callee should leave '*pages' unouched.
+     *
+     * If a transformation is required, the callee should allocate themselves
+     * a local page using malloc() and return it via '*page'.
+     *
+     * The caller shall free() '*page' in all cases.  In the case that the
+     * callee enounceters an error, it should *NOT* free() the memory it
+     * allocated for '*page'.
+     *
+     * It is valid to fail with EAGAIN if the transformation is not able to be
+     * completed at this point.  The page shall be retried later.
+     *
+     * @returns 0 for success, -1 for failure, with errno appropriately set.
+     */
+    int (*normalise_page)(struct xc_sr_context *ctx, xen_pfn_t type,
+                          void **page);
+
+    /**
+     * Set up local environment to restore a domain.  This is called before
+     * any records are written to the stream.  (Typically querying running
+     * domain state, setting up mappings etc.)
+     */
+    int (*setup)(struct xc_sr_context *ctx);
+
+    /**
+     * Write records which need to be at the start of the stream.  This is
+     * called after the Image and Domain headers are written.  (Any records
+     * which need to be ahead of the memory.)
+     */
+    int (*start_of_stream)(struct xc_sr_context *ctx);
+
+    /**
+     * Write records which need to be at the end of the stream, following the
+     * complete memory contents.  The caller shall handle writing the END
+     * record into the stream.  (Any records which need to be after the memory
+     * is complete.)
+     */
+    int (*end_of_stream)(struct xc_sr_context *ctx);
+
+    /**
+     * Clean up the local environment.  Will be called exactly once, either
+     * after a successful save, or upon encountering an error.
+     */
+    int (*cleanup)(struct xc_sr_context *ctx);
+};
+
+
+/**
+ * Restore operations.  To be implemented for each type of guest, for use by
+ * the common restore algorithm.
+ *
+ * Every function must be implemented, even if only with a no-op stub.
+ */
+struct xc_sr_restore_ops
+{
+    /* Convert a PFN to GFN.  May return ~0UL for an invalid mapping. */
+    xen_pfn_t (*pfn_to_gfn)(const struct xc_sr_context *ctx, xen_pfn_t pfn);
+
+    /* Check to see whether a PFN is valid. */
+    bool (*pfn_is_valid)(const struct xc_sr_context *ctx, xen_pfn_t pfn);
+
+    /* Set the GFN of a PFN. */
+    void (*set_gfn)(struct xc_sr_context *ctx, xen_pfn_t pfn, xen_pfn_t gfn);
+
+    /* Set the type of a PFN. */
+    void (*set_page_type)(struct xc_sr_context *ctx, xen_pfn_t pfn,
+                          xen_pfn_t type);
+
+    /**
+     * Optionally transform the contents of a page from being generic in the
+     * stream, to being specific to the restoring environment.
+     *
+     * 'page' is expected to be modified in-place if a transformation is
+     * required.
+     *
+     * @returns 0 for success, -1 for failure, with errno appropriately set.
+     */
+    int (*localise_page)(struct xc_sr_context *ctx, uint32_t type, void *page);
+
+    /**
+     * Set up local environment to restore a domain.  This is called before
+     * any records are read from the stream.
+     */
+    int (*setup)(struct xc_sr_context *ctx);
+
+    /**
+     * Process an individual record from the stream.  The caller shall take
+     * care of processing common records (e.g. END, PAGE_DATA).
+     *
+     * @return 0 for success, -1 for failure, or the sentinal value
+     * RECORD_NOT_PROCESSED.
+     */
+#define RECORD_NOT_PROCESSED 1
+    int (*process_record)(struct xc_sr_context *ctx, struct xc_sr_record *rec);
+
+    /**
+     * Perform any actions required after the stream has been finished. Called
+     * after the END record has been received.
+     */
+    int (*stream_complete)(struct xc_sr_context *ctx);
+
+    /**
+     * Clean up the local environment.  Will be called exactly once, either
+     * after a successful restore, or upon encountering an error.
+     */
+    int (*cleanup)(struct xc_sr_context *ctx);
+};
+
+/* x86 PV per-vcpu storage structure for blobs heading Xen-wards. */
+struct xc_sr_x86_pv_restore_vcpu
+{
+    void *basic, *extd, *xsave, *msr;
+    size_t basicsz, extdsz, xsavesz, msrsz;
+};
+
+struct xc_sr_context
+{
+    xc_interface *xch;
+    uint32_t domid;
+    int fd;
+
+    xc_dominfo_t dominfo;
+
+    union /* Common save or restore data. */
+    {
+        struct /* Save data. */
+        {
+            struct xc_sr_save_ops ops;
+            struct save_callbacks *callbacks;
+
+            /* Live migrate vs nonlive suspend. */
+            bool live;
+
+            /* Further debugging information in the stream. */
+            bool debug;
+
+            /* Parameters for tweaking live migration. */
+            unsigned max_iterations;
+            unsigned dirty_threshold;
+
+            unsigned long p2m_size;
+
+            xen_pfn_t *batch_pfns;
+            unsigned nr_batch_pfns;
+            unsigned long *deferred_pages;
+            unsigned long nr_deferred_pages;
+        } save;
+
+        struct /* Restore data. */
+        {
+            struct xc_sr_restore_ops ops;
+            struct restore_callbacks *callbacks;
+
+            /* From Image Header. */
+            uint32_t format_version;
+
+            /* From Domain Header. */
+            uint32_t guest_type;
+            uint32_t guest_page_size;
+
+            /*
+             * Xenstore and Console parameters.
+             * INPUT:  evtchn & domid
+             * OUTPUT: gfn
+             */
+            xen_pfn_t    xenstore_gfn,    console_gfn;
+            unsigned int xenstore_evtchn, console_evtchn;
+            domid_t      xenstore_domid,  console_domid;
+
+            /* Bitmap of currently populated PFNs during restore. */
+            unsigned long *populated_pfns;
+            xen_pfn_t max_populated_pfn;
+
+            /* Sender has invoked verify mode on the stream. */
+            bool verify;
+        } restore;
+    };
+
+    union /* Guest-arch specific data. */
+    {
+        struct /* x86 PV guest. */
+        {
+            /* 4 or 8; 32 or 64 bit domain */
+            unsigned int width;
+            /* 3 or 4 pagetable levels */
+            unsigned int levels;
+
+            /* Maximum Xen frame */
+            xen_pfn_t max_mfn;
+            /* Read-only machine to phys map */
+            xen_pfn_t *m2p;
+            /* first mfn of the compat m2p (Only needed for 32bit PV guests) */
+            xen_pfn_t compat_m2p_mfn0;
+            /* Number of m2p frames mapped */
+            unsigned long nr_m2p_frames;
+
+            /* Maximum guest frame */
+            xen_pfn_t max_pfn;
+
+            /* Number of frames making up the p2m */
+            unsigned int p2m_frames;
+            /* Guest's phys to machine map.  Mapped read-only (save) or
+             * allocated locally (restore).  Uses guest unsigned longs. */
+            void *p2m;
+            /* The guest pfns containing the p2m leaves */
+            xen_pfn_t *p2m_pfns;
+
+            /* Read-only mapping of guests shared info page */
+            shared_info_any_t *shinfo;
+
+            union
+            {
+                struct
+                {
+                    /* State machine for the order of received records. */
+                    bool seen_pv_info;
+
+                    /* Types for each page (bounded by max_pfn). */
+                    uint32_t *pfn_types;
+
+                    /* Vcpu context blobs. */
+                    struct xc_sr_x86_pv_restore_vcpu *vcpus;
+                    unsigned nr_vcpus;
+                } restore;
+            };
+        } x86_pv;
+
+        struct /* x86 HVM guest. */
+        {
+            union
+            {
+                struct
+                {
+                    /* Whether qemu enabled logdirty mode, and we should
+                     * disable on cleanup. */
+                    bool qemu_enabled_logdirty;
+                } save;
+
+                struct
+                {
+                    /* HVM context blob. */
+                    void *context;
+                    size_t contextsz;
+                } restore;
+            };
+        } x86_hvm;
+    };
+};
+
+
+struct xc_sr_record
+{
+    uint32_t type;
+    uint32_t length;
+    void *data;
+};
+
+/*
+ * Writes a split record to the stream, applying correct padding where
+ * appropriate.  It is common when sending records containing blobs from Xen
+ * that the header and blob data are separate.  This function accepts a second
+ * buffer and length, and will merge it with the main record when sending.
+ *
+ * Records with a non-zero length must provide a valid data field; records
+ * with a 0 length shall have their data field ignored.
+ *
+ * Returns 0 on success and non0 on failure.
+ */
+int write_split_record(struct xc_sr_context *ctx, struct xc_sr_record *rec,
+                       void *buf, size_t sz);
+
+/*
+ * Writes a record to the stream, applying correct padding where appropriate.
+ * Records with a non-zero length must provide a valid data field; records
+ * with a 0 length shall have their data field ignored.
+ *
+ * Returns 0 on success and non0 on failure.
+ */
+static inline int write_record(struct xc_sr_context *ctx,
+                               struct xc_sr_record *rec)
+{
+    return write_split_record(ctx, rec, NULL, 0);
+}
+
 #endif
 /*
  * Local variables:
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.