[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v12 20/26] Support colo mode for qemu disk



From: Wen Congyang <wency@xxxxxxxxxxxxxx>

Usage: disk = 
['...,colo,colo-host=xxx,colo-port=xxx,colo-export=xxx,active-disk=xxx,hidden-disk=xxx...']
For QEMU block replication details:
http://wiki.qemu.org/Features/BlockReplication

Note: we just introduce COLO framework, but don't implement COLO
operations in this patch.

Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx>
Signed-off-by: Yang Hongyang <hongyang.yang@xxxxxxxxxxxx>
Signed-off-by: Changlong Xie <xiecl.fnst@xxxxxxxxxxxxxx>
---
 docs/man/xl.pod.1                   |  38 ++++++++--
 docs/misc/xl-disk-configuration.txt |  53 ++++++++++++++
 tools/libxl/libxl.c                 |  51 +++++++++++++-
 tools/libxl/libxl_create.c          |  26 ++++++-
 tools/libxl/libxl_device.c          |  11 +++
 tools/libxl/libxl_dm.c              | 136 +++++++++++++++++++++++++++++++++---
 tools/libxl/libxl_types.idl         |   7 ++
 tools/libxl/libxlu_disk_l.l         |  17 +++++
 8 files changed, 323 insertions(+), 16 deletions(-)

diff --git a/docs/man/xl.pod.1 b/docs/man/xl.pod.1
index a0da263..16ebfae 100644
--- a/docs/man/xl.pod.1
+++ b/docs/man/xl.pod.1
@@ -450,12 +450,40 @@ Print huge (!) amount of debug during the migration 
process.
 Enable Remus HA or COLO HA for domain. By default B<xl> relies on ssh as a
 transport mechanism between the two hosts.
 
-N.B: Remus support in xl is still in experimental (proof-of-concept) phase.
-     Disk replication support is limited to DRBD disks.
+B<NOTES>
+
+=over 4
+
+Remus support in xl is still in experimental (proof-of-concept) phase.
+Disk replication support is limited to DRBD disks.
+
+COLO support in xl is still in experimental (proof-of-concept) phase.
+There is no support for network, so the guest will confuse its network
+peers at the moment.
+
+=back
+
+B<EXAMPLE>
 
-     COLO support in xl is still in experimental (proof-of-concept) phase.
-     There is no support for network or disk, so the guest will corrupt its
-     disk and confuse its network peers at the moment.
+=over 4
+
+(a) An example for COLO replication's configuration: disk =['...,colo,colo-host
+=xxx,colo-port=xxx,colo-export=xxx,active-disk=xxx,hidden-disk=xxx...']
+
+=item B<colo-host>      :Secondary host's ip address.
+
+=item B<colo-port>      :Secondary host's port, we will run a nbd server on
+secondary host, and the nbd server will listen this port.
+
+=item B<colo-export>    :Nbd server's disk export name of secondary host.
+
+=item B<active-disk>    :Secondary's guest write will be buffered in this disk,
+and it's used by secondary.
+
+=item B<hidden-disk>    :Primary's modified contents will be buffered in this
+disk, and it's used by secondary.
+
+=back
 
 B<OPTIONS>
 
diff --git a/docs/misc/xl-disk-configuration.txt 
b/docs/misc/xl-disk-configuration.txt
index 29f6ddb..6e73975 100644
--- a/docs/misc/xl-disk-configuration.txt
+++ b/docs/misc/xl-disk-configuration.txt
@@ -234,6 +234,59 @@ were intentionally created non-sparse to avoid 
fragmentation of the
 file.
 
 
+===============
+COLO PARAMETERS
+===============
+
+
+colo
+----
+
+Enable COLO HA for disk. For better understanding block replication on
+QEMU, please refer to:
+http://wiki.qemu.org/Features/BlockReplication
+
+
+colo-host
+---------
+
+Description:           Secondary host's address
+Mandatory:             Yes when COLO enabled
+
+
+colo-port
+---------
+
+Description:           Secondary port
+                       We will run a nbd server on secondary host,
+                       and the nbd server will listen this port.
+Mandatory:             Yes when COLO enabled
+
+
+colo-export
+-----------
+
+Description:           We will run a nbd server on secondary host,
+                       exportname is the nbd server's disk export name.
+Mandatory:             Yes when COLO enabled
+
+
+active-disk
+-----------
+
+Description:           This is used by secondary. Secondary guest's write
+                       will be buffered in this disk.
+Mandatory:             Yes when COLO enabled
+
+
+hidden-disk
+-----------
+
+Description:           This is used by secondary. It buffers the original
+                       content that is modified by the primary VM.
+Mandatory:             Yes when COLO enabled
+
+
 ============================================
 DEPRECATED PARAMETERS, PREFIXES AND SYNTAXES
 ============================================
diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index f255496..5482219 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -2307,6 +2307,8 @@ int libxl__device_disk_setdefault(libxl__gc *gc, 
libxl_device_disk *disk)
     int rc;
 
     libxl_defbool_setdefault(&disk->discard_enable, !!disk->readwrite);
+    libxl_defbool_setdefault(&disk->colo_enable, false);
+    libxl_defbool_setdefault(&disk->colo_restore_enable, false);
 
     rc = libxl__resolve_domid(gc, disk->backend_domname, &disk->backend_domid);
     if (rc < 0) return rc;
@@ -2505,6 +2507,18 @@ static void device_disk_add(libxl__egc *egc, uint32_t 
domid,
                 flexarray_append(back, "params");
                 flexarray_append(back, GCSPRINTF("%s:%s",
                               
libxl__device_disk_string_of_format(disk->format), disk->pdev_path));
+                if (libxl_defbool_val(disk->colo_enable)) {
+                    flexarray_append(back, "colo-host");
+                    flexarray_append(back, libxl__sprintf(gc, "%s", 
disk->colo_host));
+                    flexarray_append(back, "colo-port");
+                    flexarray_append(back, libxl__sprintf(gc, "%d", 
disk->colo_port));
+                    flexarray_append(back, "colo-export");
+                    flexarray_append(back, libxl__sprintf(gc, "%s", 
disk->colo_export));
+                    flexarray_append(back, "active-disk");
+                    flexarray_append(back, libxl__sprintf(gc, "%s", 
disk->active_disk));
+                    flexarray_append(back, "hidden-disk");
+                    flexarray_append(back, libxl__sprintf(gc, "%s", 
disk->hidden_disk));
+                }
                 assert(device->backend_kind == LIBXL__DEVICE_KIND_QDISK);
                 break;
             default:
@@ -2620,7 +2634,12 @@ static int libxl__device_disk_from_xs_be(libxl__gc *gc,
         goto cleanup;
     }
 
-    /* "params" may not be present; but everything else must be. */
+    /*
+     * "params" may not be present; but everything else must be.
+     * colo releated entries(colo-host, colo-port, colo-export,
+     * active-disk and hidden-disk) are present only if colo is
+     * enabled.
+     */
     tmp = xs_read(ctx->xsh, XBT_NULL,
                   GCSPRINTF("%s/params", be_path), &len);
     if (tmp && strchr(tmp, ':')) {
@@ -2630,6 +2649,36 @@ static int libxl__device_disk_from_xs_be(libxl__gc *gc,
         disk->pdev_path = tmp;
     }
 
+    tmp = xs_read(ctx->xsh, XBT_NULL,
+                  GCSPRINTF("%s/colo-host", be_path), &len);
+    if (tmp) {
+        libxl_defbool_set(&disk->colo_enable, true);
+        disk->colo_host = tmp;
+
+        tmp = xs_read(ctx->xsh, XBT_NULL,
+                      GCSPRINTF("%s/colo-port", be_path), &len);
+        if (!tmp) {
+            LOG(ERROR, "Missing xenstore node %s/colo-port", be_path);
+            goto cleanup;
+        }
+        disk->colo_port = atoi(tmp);
+
+#define XS_READ_COLO(param, item) do {                                  \
+        tmp = xs_read(ctx->xsh, XBT_NULL,                               \
+                      GCSPRINTF("%s/"#param"", be_path), &len);         \
+        if (!tmp) {                                                     \
+            LOG(ERROR, "Missing xenstore node %s/"#param"", be_path);   \
+            goto cleanup;                                               \
+        }                                                               \
+        disk->item = tmp;                                               \
+} while (0)
+        XS_READ_COLO(colo-export, colo_export);
+        XS_READ_COLO(active-disk, active_disk);
+        XS_READ_COLO(hidden-disk, hidden_disk);
+#undef XS_READ_COLO
+    } else {
+        libxl_defbool_set(&disk->colo_enable, false);
+    }
 
     tmp = libxl__xs_read(gc, XBT_NULL,
                          GCSPRINTF("%s/type", be_path));
diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index e26a483..6a96a7d 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -1807,12 +1807,30 @@ static void domain_create_cb(libxl__egc *egc,
 
     libxl__ao_complete(egc, ao, rc);
 }
-    
+
+
+static void set_disk_colo_restore(libxl_domain_config *d_config)
+{
+    int i;
+
+    for (i = 0; i < d_config->num_disks; i++)
+        libxl_defbool_set(&d_config->disks[i].colo_restore_enable, true);
+}
+
+static void unset_disk_colo_restore(libxl_domain_config *d_config)
+{
+    int i;
+
+    for (i = 0; i < d_config->num_disks; i++)
+        libxl_defbool_set(&d_config->disks[i].colo_restore_enable, false);
+}
+
 int libxl_domain_create_new(libxl_ctx *ctx, libxl_domain_config *d_config,
                             uint32_t *domid,
                             const libxl_asyncop_how *ao_how,
                             const libxl_asyncprogress_how *aop_console_how)
 {
+    unset_disk_colo_restore(d_config);
     return do_domain_create(ctx, d_config, domid, -1, -1, NULL,
                             ao_how, aop_console_how);
 }
@@ -1824,6 +1842,12 @@ int libxl_domain_create_restore(libxl_ctx *ctx, 
libxl_domain_config *d_config,
                                 const libxl_asyncop_how *ao_how,
                                 const libxl_asyncprogress_how *aop_console_how)
 {
+    if (params->checkpointed_stream == LIBXL_CHECKPOINTED_STREAM_COLO) {
+        set_disk_colo_restore(d_config);
+    } else {
+        unset_disk_colo_restore(d_config);
+    }
+
     return do_domain_create(ctx, d_config, domid, restore_fd, send_back_fd,
                             params, ao_how, aop_console_how);
 }
diff --git a/tools/libxl/libxl_device.c b/tools/libxl/libxl_device.c
index 8bb5e93..49527d0 100644
--- a/tools/libxl/libxl_device.c
+++ b/tools/libxl/libxl_device.c
@@ -196,6 +196,9 @@ static int disk_try_backend(disk_try_backend_args *a,
             goto bad_format;
         }
 
+        if (libxl_defbool_val(a->disk->colo_enable))
+            goto bad_colo;
+
         if (a->disk->backend_domid != LIBXL_TOOLSTACK_DOMID) {
             LOG(DEBUG, "Disk vdev=%s, is using a storage driver domain, "
                        "skipping physical device check", a->disk->vdev);
@@ -218,6 +221,9 @@ static int disk_try_backend(disk_try_backend_args *a,
     case LIBXL_DISK_BACKEND_TAP:
         if (a->disk->script) goto bad_script;
 
+        if (libxl_defbool_val(a->disk->colo_enable))
+            goto bad_colo;
+
         if (a->disk->is_cdrom) {
             LOG(DEBUG, "Disk vdev=%s, backend tap unsuitable for cdroms",
                        a->disk->vdev);
@@ -256,6 +262,11 @@ static int disk_try_backend(disk_try_backend_args *a,
     LOG(DEBUG, "Disk vdev=%s, backend %s not compatible with script=...",
         a->disk->vdev, libxl_disk_backend_to_string(backend));
     return 0;
+
+ bad_colo:
+    LOG(DEBUG, "Disk vdev=%s, backend %s not compatible with colo",
+        a->disk->vdev, libxl_disk_backend_to_string(backend));
+    return 0;
 }
 
 int libxl__device_disk_set_backend(libxl__gc *gc, libxl_device_disk *disk) {
diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c
index 2226004..67bb6ac 100644
--- a/tools/libxl/libxl_dm.c
+++ b/tools/libxl/libxl_dm.c
@@ -754,6 +754,8 @@ static int libxl__dm_runas_helper(libxl__gc *gc, const char 
*username)
 /* colo mode */
 enum {
     LIBXL__COLO_NONE = 0,
+    LIBXL__COLO_PRIMARY,
+    LIBXL__COLO_SECONDARY,
 };
 
 static char *qemu_disk_scsi_drive_string(libxl__gc *gc, const char *pdev_path,
@@ -762,15 +764,61 @@ static char *qemu_disk_scsi_drive_string(libxl__gc *gc, 
const char *pdev_path,
                                          int colo_mode)
 {
     char *drive = NULL;
+    const char *exportname = disk->colo_export;
+    const char *active_disk = disk->active_disk;
+    const char *hidden_disk = disk->hidden_disk;
 
-    if (colo_mode == LIBXL__COLO_NONE)
-    {
+    switch (colo_mode) {
+    case LIBXL__COLO_NONE:
         drive = libxl__sprintf
             (gc, "file=%s,if=scsi,bus=0,unit=%d,format=%s,cache=writeback",
              pdev_path, unit, format);
-        return drive;
-    } else
+        break;
+    case LIBXL__COLO_PRIMARY:
+        /*
+         * primary:
+         *  -dirve if=scsi,bus=0,unit=x,cache=writeback,driver=quorum,\
+         *  id=exportname,\
+         *  children.0.file.filename=pdev_path,\
+         *  children.0.driver=format,\
+         *  read-pattern=fifo,\
+         *  vote-threshold=1
+         */
+        drive = GCSPRINTF(
+            "if=scsi,bus=0,unit=%d,cache=writeback,driver=quorum,"
+            "id=%s,"
+            "children.0.file.filename=%s,"
+            "children.0.driver=%s,"
+            "read-pattern=fifo,"
+            "vote-threshold=1",
+            unit, exportname, pdev_path, format);
+        break;
+    case LIBXL__COLO_SECONDARY:
+        /*
+         * secondary:
+         *  -drive if=scsi,bus=0,unit=x,cache=writeback,driver=replication,\
+         *  mode=secondary,\
+         *  file.driver=qcow2,\
+         *  file.file.filename=active_disk,\
+         *  file.backing.driver=qcow2,\
+         *  file.backing.file.filename=hidden_disk,\
+         *  file.backing.backing=exportname,
+         */
+        drive = GCSPRINTF(
+            "if=scsi,bus=0,unit=%d,cache=writeback,driver=replication,"
+            "mode=secondary,"
+            "file.driver=qcow2,"
+            "file.file.filename=%s,"
+            "file.backing.driver=qcow2,"
+            "file.backing.file.filename=%s,"
+            "file.backing.backing=%s",
+            unit, active_disk, hidden_disk, exportname);
+        break;
+    default:
         abort();
+    }
+
+    return drive;
 }
 
 static char *qemu_disk_ide_drive_string(libxl__gc *gc, const char *pdev_path,
@@ -779,15 +827,61 @@ static char *qemu_disk_ide_drive_string(libxl__gc *gc, 
const char *pdev_path,
                                         int colo_mode)
 {
     char *drive = NULL;
+    const char *exportname = disk->colo_export;
+    const char *active_disk = disk->active_disk;
+    const char *hidden_disk = disk->hidden_disk;
 
-    if (colo_mode == LIBXL__COLO_NONE)
-    {
+    switch (colo_mode) {
+    case LIBXL__COLO_NONE:
         drive = GCSPRINTF
             ("file=%s,if=ide,index=%d,media=disk,format=%s,cache=writeback",
              pdev_path, unit, format);
-        return drive;
-    }
+        break;
+    case LIBXL__COLO_PRIMARY:
+        /*
+         * primary:
+         *  -dirve if=ide,index=x,media=disk,cache=writeback,driver=quorum,\
+         *  id=exportname,\
+         *  children.0.file.filename=pdev_path,\
+         *  children.0.driver=format,\
+         *  read-pattern=fifo,\
+         *  vote-threshold=1
+         */
+        drive = GCSPRINTF(
+            "if=ide,index=%d,media=disk,cache=writeback,driver=quorum,"
+            "id=%s,"
+            "children.0.file.filename=%s,"
+            "children.0.driver=%s,"
+            "read-pattern=fifo,"
+            "vote-threshold=1",
+             unit, exportname, pdev_path, format);
+        break;
+    case LIBXL__COLO_SECONDARY:
+        /*
+         * secondary:
+         *  -drive 
if=ide,index=x,media=disk,cache=writeback,driver=replication,\
+         *  mode=secondary,\
+         *  file.driver=qcow2,\
+         *  file.file.filename=active_disk,\
+         *  file.backing.driver=qcow2,\
+         *  file.backing.file.filename=hidden_disk,\
+         *  file.backing.backing=exportname,
+         */
+        drive = GCSPRINTF(
+            "if=ide,index=%d,media=disk,cache=writeback,driver=replication,"
+            "mode=secondary,"
+            "file.driver=qcow2,"
+            "file.file.filename=%s,"
+            "file.backing.driver=qcow2,"
+            "file.backing.file.filename=%s,"
+            "file.backing.backing=%s",
+            unit, active_disk, hidden_disk, exportname);
+        break;
+    default:
         abort();
+    }
+
+    return drive;
 }
 
 static int libxl__build_device_model_args_new(libxl__gc *gc,
@@ -1249,8 +1343,24 @@ static int libxl__build_device_model_args_new(libxl__gc 
*gc,
                  * hd[a-d] and ignore the rest.
                  */
 
-                colo_mode = LIBXL__COLO_NONE;
+                if (libxl_defbool_val(disks[i].colo_enable)) {
+                    if (libxl_defbool_val(disks[i].colo_restore_enable))
+                        colo_mode = LIBXL__COLO_SECONDARY;
+                    else
+                        colo_mode = LIBXL__COLO_PRIMARY;
+                } else {
+                    colo_mode = LIBXL__COLO_NONE;
+                }
+
                 if (strncmp(disks[i].vdev, "sd", 2) == 0) {
+                    if (colo_mode == LIBXL__COLO_SECONDARY) {
+                        drive = libxl__sprintf
+                            (gc, "if=none,driver=%s,file=%s,id=%s",
+                             format, pdev_path, disks[i].colo_export);
+
+                        flexarray_append(dm_args, "-drive");
+                        flexarray_append(dm_args, drive);
+                    }
                     drive = qemu_disk_scsi_drive_string(gc, pdev_path, disk,
                                                         format,
                                                         &disks[i],
@@ -1277,6 +1387,14 @@ static int libxl__build_device_model_args_new(libxl__gc 
*gc,
                         LOG(ERROR, "qemu-xen doesn't support read-only IDE 
disk drivers");
                         return ERROR_INVAL;
                     }
+                    if (colo_mode == LIBXL__COLO_SECONDARY) {
+                        drive = libxl__sprintf
+                            (gc, "if=none,driver=%s,file=%s,id=%s",
+                             format, pdev_path, disks[i].colo_export);
+
+                        flexarray_append(dm_args, "-drive");
+                        flexarray_append(dm_args, drive);
+                    }
                     drive = qemu_disk_ide_drive_string(gc, pdev_path, disk,
                                                        format,
                                                        &disks[i],
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index 9b0a537..a206d68 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -575,6 +575,13 @@ libxl_device_disk = Struct("device_disk", [
     ("is_cdrom", integer),
     ("direct_io_safe", bool),
     ("discard_enable", libxl_defbool),
+    ("colo_enable", libxl_defbool),
+    ("colo_restore_enable", libxl_defbool),
+    ("colo_host", string),
+    ("colo_port", integer),
+    ("colo_export", string),
+    ("active_disk", string),
+    ("hidden_disk", string)
     ])
 
 libxl_device_nic = Struct("device_nic", [
diff --git a/tools/libxl/libxlu_disk_l.l b/tools/libxl/libxlu_disk_l.l
index 1a5deb5..cf2eec2 100644
--- a/tools/libxl/libxlu_disk_l.l
+++ b/tools/libxl/libxlu_disk_l.l
@@ -113,6 +113,16 @@ static void setbackendtype(DiskParseContext *dpc, const 
char *str) {
     else xlu__disk_err(dpc,str,"unknown value for backendtype");
 }
 
+/* Sets ->colo-port from the string.  COLO need this. */
+static void setcoloport(DiskParseContext *dpc, const char *str) {
+    int port = atoi(str);
+    if (port) {
+       dpc->disk->colo_port = port;
+    } else {
+       xlu__disk_err(dpc,str,"unknown value for colo_port");
+    }
+}
+
 #define DEPRECATE(usewhatinstead) /* not currently reported */
 
 /* Handles a vdev positional parameter which includes a devtype. */
@@ -176,6 +186,13 @@ script=[^,]*,?     { STRIP(','); SAVESTRING("script", 
script, FROMEQUALS); }
 direct-io-safe,? { DPC->disk->direct_io_safe = 1; }
 discard,?      { libxl_defbool_set(&DPC->disk->discard_enable, true); }
 no-discard,?   { libxl_defbool_set(&DPC->disk->discard_enable, false); }
+colo,?         { libxl_defbool_set(&DPC->disk->colo_enable, true); }
+no-colo,?      { libxl_defbool_set(&DPC->disk->colo_enable, false); }
+colo-host=[^,]*,?      { STRIP(','); SAVESTRING("colo-host", colo_host, 
FROMEQUALS); }
+colo-port=[^,]*,?      { STRIP(','); setcoloport(DPC, FROMEQUALS); }
+colo-export=[^,]*,?    { STRIP(','); SAVESTRING("colo-export", colo_export, 
FROMEQUALS); }
+active-disk=[^,]*,?    { STRIP(','); SAVESTRING("active-disk", active_disk, 
FROMEQUALS); }
+hidden-disk=[^,]*,?    { STRIP(','); SAVESTRING("hidden-disk", hidden_disk, 
FROMEQUALS); }
 
  /* the target magic parameter, eats the rest of the string */
 
-- 
1.9.3




_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.