[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v17 4/8] remus drbd: Implement remus drbd replicated disk



Implement remus-drbd-replicated-checkpointing-disk based on
generic remus devices framework.

Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxx>
Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx>
Signed-off-by: Yang Hongyang <yanghy@xxxxxxxxxxxxxx>
---
 tools/hotplug/Linux/Makefile         |   1 +
 tools/hotplug/Linux/block-drbd-probe |  85 ++++++++++++
 tools/libxl/Makefile                 |   2 +-
 tools/libxl/libxl.c                  |   2 +
 tools/libxl/libxl_internal.h         |   3 +
 tools/libxl/libxl_remus_disk_drbd.c  | 243 +++++++++++++++++++++++++++++++++++
 6 files changed, 335 insertions(+), 1 deletion(-)
 create mode 100755 tools/hotplug/Linux/block-drbd-probe
 create mode 100644 tools/libxl/libxl_remus_disk_drbd.c

diff --git a/tools/hotplug/Linux/Makefile b/tools/hotplug/Linux/Makefile
index 721f8c0..15d1b37 100644
--- a/tools/hotplug/Linux/Makefile
+++ b/tools/hotplug/Linux/Makefile
@@ -24,6 +24,7 @@ XEN_SCRIPTS += xen-hotplug-cleanup
 XEN_SCRIPTS += external-device-migrate
 XEN_SCRIPTS += vscsi
 XEN_SCRIPTS += block-iscsi
+XEN_SCRIPTS += block-drbd-probe
 XEN_SCRIPTS += $(XEN_SCRIPTS-y)
 
 XEN_SCRIPT_DATA = xen-script-common.sh locking.sh logging.sh
diff --git a/tools/hotplug/Linux/block-drbd-probe 
b/tools/hotplug/Linux/block-drbd-probe
new file mode 100755
index 0000000..3a3d446
--- /dev/null
+++ b/tools/hotplug/Linux/block-drbd-probe
@@ -0,0 +1,85 @@
+#! /bin/bash
+#
+# Copyright (C) 2014 FUJITSU LIMITED
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+# Usage:
+#     block-drbd-probe devicename
+#
+# Return value:
+#     0: the device is drbd device
+#     1: the device is not drbd device
+#     2: unkown error
+#     3: the drbd device does not use protocol D
+#     4: the drbd device is not ready
+
+drbd_res=
+
+function get_res_name()
+{
+    local drbd_dev=$1
+    local drbd_dev_list=($(drbdadm sh-dev all))
+    local drbd_res_list=($(drbdadm sh-resource all))
+    local temp_drbd_dev temp_drbd_res
+    local found=0
+
+    for temp_drbd_dev in ${drbd_dev_list[@]}; do
+        if [[ "$temp_drbd_dev" == "$drbd_dev" ]]; then
+            found=1
+            break
+        fi
+    done
+
+    if [[ $found -eq 0 ]]; then
+        return 1
+    fi
+
+    for temp_drbd_res in ${drbd_res_list[@]}; do
+        temp_drbd_dev=$(drbdadm sh-dev $temp_drbd_res)
+        if [[ "$temp_drbd_dev" == "$drbd_dev" ]]; then
+            drbd_res="$temp_drbd_res"
+            return 0
+        fi
+    done
+
+    # OOPS
+    return 2
+}
+
+get_res_name $1
+rc=$?
+if [[ $rc -ne 0 ]]; then
+    exit $rc
+fi
+
+# check protocol
+drbdsetup $1 show | grep -q "protocol D;"
+if [[ $? -ne 0 ]]; then
+    exit 3
+fi
+
+# check connect status
+state=$(drbdadm cstate "$drbd_res")
+if [[ "$state" != "Connected" ]]; then
+    exit 4
+fi
+
+# check role
+role=$(drbdadm role "$drbd_res")
+if [[ "$role" != "Primary/Secondary" ]]; then
+    exit 4
+fi
+
+exit 0
diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 202f1bb..ba10ab7 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -56,7 +56,7 @@ else
 LIBXL_OBJS-y += libxl_nonetbuffer.o
 endif
 
-LIBXL_OBJS-y += libxl_remus_device.o
+LIBXL_OBJS-y += libxl_remus_device.o libxl_remus_disk_drbd.o
 
 LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o
 LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o
diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index cd936c2..78a4752 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -790,6 +790,7 @@ static void remus_failover_cb(libxl__egc *egc,
 
 static const libxl__remus_device_subkind_ops *remus_ops[] = {
     &remus_device_nic,
+    &remus_device_drbd_disk,
     NULL,
 };
 
@@ -831,6 +832,7 @@ int libxl_domain_remus_start(libxl_ctx *ctx, 
libxl_domain_remus_info *info,
         }
         rds->device_kind_flags |= LIBXL__REMUS_DEVICE_NIC;
     }
+    rds->device_kind_flags |= LIBXL__REMUS_DEVICE_DISK;
 
     rds->ao = ao;
     rds->egc = egc;
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 5f2ee05..980f300 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -140,6 +140,7 @@ typedef struct libxl__aop_occurred libxl__aop_occurred;
 typedef struct libxl__osevent_hook_nexus libxl__osevent_hook_nexus;
 typedef struct libxl__osevent_hook_nexi libxl__osevent_hook_nexi;
 typedef struct libxl__remus_netbuf_state libxl__remus_netbuf_state;
+typedef struct libxl__remus_drbd_state libxl__remus_drbd_state;
 
 _hidden void libxl__alloc_failed(libxl_ctx *, const char *func,
                          size_t nmemb, size_t size) __attribute__((noreturn));
@@ -377,6 +378,7 @@ struct libxl__ctx {
     libxl_version_info version_info;
 
     libxl__remus_netbuf_state *rns;
+    libxl__remus_drbd_state *drbd_state;
 };
 
 typedef struct {
@@ -2664,6 +2666,7 @@ _hidden void libxl__remus_devices_commit(libxl__egc *egc,
                                          libxl__remus_device_state *rds);
 
 extern const libxl__remus_device_subkind_ops remus_device_nic;
+extern const libxl__remus_device_subkind_ops remus_device_drbd_disk;
 
 _hidden int libxl__netbuffer_enabled(libxl__gc *gc);
 
diff --git a/tools/libxl/libxl_remus_disk_drbd.c 
b/tools/libxl/libxl_remus_disk_drbd.c
new file mode 100644
index 0000000..88a1984
--- /dev/null
+++ b/tools/libxl/libxl_remus_disk_drbd.c
@@ -0,0 +1,243 @@
+/*
+ * Copyright (C) 2014 FUJITSU LIMITED
+ * Author Lai Jiangshan <laijs@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+
+/*** drbd implementation ***/
+const int DRBD_SEND_CHECKPOINT = 20;
+const int DRBD_WAIT_CHECKPOINT_ACK = 30;
+
+struct libxl__remus_drbd_state {
+    libxl__ao *ao;
+    char *drbd_probe_script;
+};
+
+typedef struct libxl__remus_drbd_disk {
+    int ctl_fd;
+    int ackwait;
+    const char *path;
+
+    libxl__async_exec_state aes;
+    libxl__ev_child child;
+} libxl__remus_drbd_disk;
+
+/*----- helper functions, for async calls -----*/
+static void drbd_async_call(libxl__remus_device *dev,
+                            void func(libxl__remus_device *),
+                            libxl__ev_child_callback callback)
+{
+    int pid = -1;
+    STATE_AO_GC(dev->rds->ao);
+
+    /* Fork and call */
+    pid = libxl__ev_child_fork(gc, &dev->child, callback);
+    if (pid == -1) {
+        LOG(ERROR, "unable to fork");
+        goto out;
+    }
+
+    if (!pid) {
+        /* child */
+        func(dev);
+        /* notreached */
+        abort();
+    }
+
+    return;
+
+out:
+    dev->callback(dev->rds->egc, dev, ERROR_FAIL);
+}
+
+/*----- init() and cleanup() -----*/
+static int drbd_init(libxl__remus_device_state *rds)
+{
+    libxl__remus_drbd_state *drbd_state;
+
+    STATE_AO_GC(rds->ao);
+
+    GCNEW(drbd_state);
+    CTX->drbd_state = drbd_state;
+    drbd_state->ao = ao;
+    drbd_state->drbd_probe_script = GCSPRINTF("%s/block-drbd-probe",
+                                              libxl__xen_script_dir_path());
+
+    return 0;
+}
+
+static void drbd_cleanup(libxl__remus_device_state *rds)
+{
+    return;
+}
+
+/*----- match(), setup() and teardown() -----*/
+
+/* callbacks */
+static void match_async_exec_cb(libxl__egc *egc,
+                                libxl__async_exec_state *aes,
+                                int status);
+
+/* implementations */
+
+static void match_async_exec(libxl__egc *egc, libxl__remus_device *dev)
+{
+    int arraysize, nr = 0;
+    const libxl_device_disk *disk = dev->backend_dev;
+    libxl__async_exec_state *aes = &dev->aes;
+    STATE_AO_GC(dev->rds->ao);
+
+    libxl__remus_drbd_state *drbd_state = CTX->drbd_state;
+    /* setup env & args */
+    arraysize = 1;
+    GCNEW_ARRAY(aes->env, arraysize);
+    aes->env[nr++] = NULL;
+    assert(nr <= arraysize);
+
+    arraysize = 3;
+    nr = 0;
+    GCNEW_ARRAY(aes->args, arraysize);
+    aes->args[nr++] = drbd_state->drbd_probe_script;
+    aes->args[nr++] = disk->pdev_path;
+    aes->args[nr++] = NULL;
+    assert(nr <= arraysize);
+
+    aes->ao = drbd_state->ao;
+    aes->what = GCSPRINTF("%s %s", aes->args[0], aes->args[1]);
+    aes->timeout_ms = LIBXL_HOTPLUG_TIMEOUT * 1000;
+    aes->callback = match_async_exec_cb;
+    aes->stdfds[0] = -1;
+    aes->stdfds[1] = -1;
+    aes->stdfds[2] = -1;
+
+    if (libxl__async_exec_start(gc, aes))
+        goto out;
+
+    return;
+
+out:
+    dev->callback(egc, dev, ERROR_FAIL);
+}
+
+static void drbd_match(libxl__remus_device *dev)
+{
+    match_async_exec(dev->rds->egc, dev);
+}
+
+static void match_async_exec_cb(libxl__egc *egc,
+                                libxl__async_exec_state *aes,
+                                int status)
+{
+    libxl__remus_device *dev = CONTAINER_OF(aes, *dev, aes);
+
+    if (status) {
+        dev->callback(egc, dev, ERROR_REMUS_DEVOPS_NOT_MATCH);
+    } else {
+        dev->callback(egc, dev, 0);
+    }
+}
+
+static void drbd_setup(libxl__remus_device *dev)
+{
+    libxl__remus_drbd_disk *drbd_disk;
+    const libxl_device_disk *disk = dev->backend_dev;
+    STATE_AO_GC(dev->rds->ao);
+
+    GCNEW(drbd_disk);
+    dev->concrete_data = drbd_disk;
+    drbd_disk->path = disk->pdev_path;
+    drbd_disk->ackwait = 0;
+    drbd_disk->ctl_fd = open(drbd_disk->path, O_RDONLY);
+    if (drbd_disk->ctl_fd < 0)
+        dev->callback(dev->rds->egc, dev, ERROR_FAIL);
+    else
+        dev->callback(dev->rds->egc, dev, 0);
+}
+
+static void drbd_teardown(libxl__remus_device *dev)
+{
+    libxl__remus_drbd_disk *drbd_disk = dev->concrete_data;
+
+    close(drbd_disk->ctl_fd);
+    dev->callback(dev->rds->egc, dev, 0);
+}
+
+/*----- checkpointing APIs -----*/
+
+/* callbacks */
+static void chekpoint_async_call_done(libxl__egc *egc,
+                                      libxl__ev_child *child,
+                                      pid_t pid, int status);
+
+/* API implementations */
+
+/* this op will not wait and block, so implement as sync op */
+static void drbd_postsuspend(libxl__remus_device *dev)
+{
+    libxl__remus_drbd_disk *rdd = dev->concrete_data;
+
+    if (!rdd->ackwait) {
+        if (ioctl(rdd->ctl_fd, DRBD_SEND_CHECKPOINT, 0) <= 0)
+            rdd->ackwait = 1;
+    }
+
+    dev->callback(dev->rds->egc, dev, 0);
+}
+
+static void drbd_preresume_async(libxl__remus_device *dev)
+{
+    libxl__remus_drbd_disk *rdd = dev->concrete_data;
+    int ackwait = rdd->ackwait;
+
+    if (ackwait) {
+        ioctl(rdd->ctl_fd, DRBD_WAIT_CHECKPOINT_ACK, 0);
+        ackwait = 0;
+    }
+
+    _exit(ackwait);
+}
+
+static void drbd_preresume(libxl__remus_device *dev)
+{
+    drbd_async_call(dev, drbd_preresume_async, chekpoint_async_call_done);
+}
+
+static void chekpoint_async_call_done(libxl__egc *egc,
+                                      libxl__ev_child *child,
+                                      pid_t pid, int status)
+{
+    libxl__remus_device *dev = CONTAINER_OF(child, *dev, child);
+    libxl__remus_drbd_disk *rdd = dev->concrete_data;
+    STATE_AO_GC(dev->rds->ao);
+
+    if (WIFEXITED(status)) {
+        rdd->ackwait = WEXITSTATUS(status);
+        dev->callback(egc, dev, 0);
+    } else {
+        dev->callback(egc, dev, ERROR_FAIL);
+    }
+}
+
+const libxl__remus_device_subkind_ops remus_device_drbd_disk = {
+    .kind = LIBXL__REMUS_DEVICE_DISK,
+    .init = drbd_init,
+    .cleanup = drbd_cleanup,
+    .match = drbd_match,
+    .setup = drbd_setup,
+    .teardown = drbd_teardown,
+    .postsuspend = drbd_postsuspend,
+    .preresume = drbd_preresume,
+};
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.