[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v10] remus drbd: Implement remus drbd replicated disk



Implement remus-drbd-replicated-checkpointing-disk based on
generic remus devices framework.

Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxx>
Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx>
Signed-off-by: Yang Hongyang <yanghy@xxxxxxxxxxxxxx>
---
 tools/hotplug/Linux/Makefile         |   1 +
 tools/hotplug/Linux/block-drbd-probe |  84 ++++++++++
 tools/libxl/Makefile                 |   2 +-
 tools/libxl/libxl_internal.h         |   1 +
 tools/libxl/libxl_remus_device.c     |  23 ++-
 tools/libxl/libxl_remus_disk_drbd.c  | 290 +++++++++++++++++++++++++++++++++++
 6 files changed, 394 insertions(+), 7 deletions(-)
 create mode 100755 tools/hotplug/Linux/block-drbd-probe
 create mode 100644 tools/libxl/libxl_remus_disk_drbd.c

diff --git a/tools/hotplug/Linux/Makefile b/tools/hotplug/Linux/Makefile
index 13e1f5f..5dd8599 100644
--- a/tools/hotplug/Linux/Makefile
+++ b/tools/hotplug/Linux/Makefile
@@ -23,6 +23,7 @@ XEN_SCRIPTS += xen-hotplug-cleanup
 XEN_SCRIPTS += external-device-migrate
 XEN_SCRIPTS += vscsi
 XEN_SCRIPTS += block-iscsi
+XEN_SCRIPTS += block-drbd-probe
 XEN_SCRIPTS += $(XEN_SCRIPTS-y)
 
 XEN_SCRIPT_DATA = xen-script-common.sh locking.sh logging.sh
diff --git a/tools/hotplug/Linux/block-drbd-probe 
b/tools/hotplug/Linux/block-drbd-probe
new file mode 100755
index 0000000..163ad04
--- /dev/null
+++ b/tools/hotplug/Linux/block-drbd-probe
@@ -0,0 +1,84 @@
+#! /bin/bash
+#
+# Copyright (C) 2014 FUJITSU LIMITED
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+# Usage:
+#     block-drbd-probe devicename
+#
+# Return value:
+#     0: the device is drbd device
+#     1: the device is not drbd device
+#     2: unkown error
+#     3: the drbd device does not use protocol D
+#     4: the drbd device is not ready
+
+drbd_res=
+
+function get_res_name()
+{
+    local drbd_dev=$1
+    local drbd_dev_list=($(drbdadm sh-dev all))
+    local drbd_res_list=($(drbdadm sh-resource all))
+    local temp_drbd_dev temp_drbd_res
+    local found=0
+
+    for temp_drbd_dev in ${drbd_dev_list[@]}; do
+        if [[ "$temp_drbd_dev" == "$drbd_dev" ]]; then
+            found=1
+            break
+        fi
+    done
+
+    if [[ $found -eq 0 ]]; then
+        return 1
+    fi
+
+    for temp_drbd_res in ${drbd_res_list[@]}; do
+        temp_drbd_dev=$(drbdadm sh-dev $temp_drbd_res)
+        if [[ "$temp_drbd_dev" == "$drbd_dev" ]]; then
+            drbd_res="$temp_drbd_res"
+            return 0
+        fi
+    done
+
+    # OOPS
+    return 2
+}
+
+get_res_name $1
+if [[ $? -ne 0 ]]; then
+    exit $?
+fi
+
+# check protocol
+drbdsetup $1 show | grep -q "protocol D;"
+if [[ $? -ne 0 ]]; then
+    exit 3
+fi
+
+# check connect status
+state=$(drbdadm cstate "$drbd_res")
+if [[ "$state" != "Connected" ]]; then
+    exit 4
+fi
+
+# check role
+role=$(drbdadm role "$drbd_res")
+if [[ "$role" != "Primary/Secondary" ]]; then
+    exit 4
+fi
+
+exit 0
diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 7a722a8..6f4d9b4 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -56,7 +56,7 @@ else
 LIBXL_OBJS-y += libxl_nonetbuffer.o
 endif
 
-LIBXL_OBJS-y += libxl_remus_device.o
+LIBXL_OBJS-y += libxl_remus_device.o libxl_remus_disk_drbd.o
 
 LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o
 LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 85fa713..9d344e8 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2499,6 +2499,7 @@ struct libxl__remus_device_state {
 
     libxl_device_nic *nics;
     int num_nics;
+    libxl_device_disk *disks;
     int num_disks;
 
     /* for counting devices that have been handled */
diff --git a/tools/libxl/libxl_remus_device.c b/tools/libxl/libxl_remus_device.c
index 5f07266..040441a 100644
--- a/tools/libxl/libxl_remus_device.c
+++ b/tools/libxl/libxl_remus_device.c
@@ -19,8 +19,10 @@
 #include "libxl_internal.h"
 
 extern libxl__remus_device_ops remus_device_nic;
+extern libxl__remus_device_ops remus_device_drbd_disk;
 static libxl__remus_device_ops *dev_ops[] = {
     &remus_device_nic,
+    &remus_device_drbd_disk,
 };
 
 static void device_common_cb(libxl__egc *egc,
@@ -194,6 +196,13 @@ static void device_teardown_cb(libxl__egc *egc,
         rds->nics = NULL;
         rds->num_nics = 0;
 
+        /* clean disk */
+        for (i = 0; i < rds->num_disks; i++)
+            libxl_device_disk_dispose(&rds->disks[i]);
+        free(rds->disks);
+        rds->disks = NULL;
+        rds->num_disks = 0;
+
         /* clean device ops */
         for (i = 0; i < ARRAY_SIZE(dev_ops); i++) {
             ops = dev_ops[i];
@@ -269,15 +278,15 @@ void libxl__remus_device_setup(libxl__egc *egc, 
libxl__remus_state *rs)
     rds->num_nics = 0;
     rds->num_disks = 0;
 
-    /* TBD: Remus setup - i.e. attach qdisc, enable disk buffering, etc */
-
     if (rs->netbufscript) {
         rds->nics = libxl_device_nic_list(CTX, rs->domid, &rds->num_nics);
     }
+    rds->disks = libxl_device_disk_list(CTX, rs->domid, &rds->num_disks);
 
-    GCNEW_ARRAY(rds->dev, rds->num_nics + rds->num_disks);
+    if (rds->num_nics == 0 && rds->num_disks == 0)
+        goto out;
 
-    /* TBD: CALL libxl__remus_device_init to init remus devices */
+    GCNEW_ARRAY(rds->dev, rds->num_nics + rds->num_disks);
 
     if (rs->netbufscript && rds->nics) {
         for (i = 0; i < rds->num_nics; i++) {
@@ -286,8 +295,10 @@ void libxl__remus_device_setup(libxl__egc *egc, 
libxl__remus_state *rs)
         }
     }
 
-    if (rds->num_nics == 0 && rds->num_disks == 0)
-        goto out;
+    for (i = 0; i < rds->num_disks; i++) {
+        libxl__remus_device_init(egc, rds,
+                                 LIBXL__REMUS_DEVICE_DISK, &rds->disks[i]);
+    }
 
     return;
 
diff --git a/tools/libxl/libxl_remus_disk_drbd.c 
b/tools/libxl/libxl_remus_disk_drbd.c
new file mode 100644
index 0000000..f35a406
--- /dev/null
+++ b/tools/libxl/libxl_remus_disk_drbd.c
@@ -0,0 +1,290 @@
+/*
+ * Copyright (C) 2014 FUJITSU LIMITED
+ * Author Lai Jiangshan <laijs@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+
+/*** drbd implementation ***/
+const int DRBD_SEND_CHECKPOINT = 20;
+const int DRBD_WAIT_CHECKPOINT_ACK = 30;
+
+typedef struct libxl__remus_drbd_disk {
+    libxl__remus_device remus_dev;
+    int ctl_fd;
+    int ackwait;
+    const char *path;
+} libxl__remus_drbd_disk;
+
+typedef struct libxl__remus_drbd_state {
+    libxl__ao *ao;
+    char *drbd_probe_script;
+} libxl__remus_drbd_state;
+
+static void drbd_async_call(libxl__remus_device *dev,
+                            void func(libxl__remus_device *),
+                            libxl__ev_child_callback callback)
+{
+    int pid = -1;
+    STATE_AO_GC(dev->rds->ao);
+
+    /* Fork and call */
+    pid = libxl__ev_child_fork(gc, &dev->child, callback);
+    if (pid == -1) {
+        LOG(ERROR, "unable to fork");
+        goto out;
+    }
+
+    if (!pid) {
+        /* child */
+        func(dev);
+        /* notreached */
+        abort();
+    }
+
+    return;
+
+out:
+    dev->callback(dev->rds->egc, dev, ERROR_FAIL);
+}
+
+static void chekpoint_async_call_done(libxl__egc *egc,
+                                      libxl__ev_child *child,
+                                      pid_t pid, int status)
+{
+    libxl__remus_device *dev = CONTAINER_OF(child, *dev, child);
+    libxl__remus_drbd_disk *rdd = dev->data;
+    STATE_AO_GC(dev->rds->ao);
+
+    if (WIFEXITED(status)) {
+        rdd->ackwait = WEXITSTATUS(status);
+        dev->callback(egc, dev, 0);
+    } else {
+        dev->callback(egc, dev, ERROR_FAIL);
+    }
+}
+
+static void drbd_postsuspend_async(libxl__remus_device *dev)
+{
+    libxl__remus_drbd_disk *rdd = dev->data;
+    int ackwait = rdd->ackwait;
+
+    if (!ackwait) {
+        if (ioctl(rdd->ctl_fd, DRBD_SEND_CHECKPOINT, 0) <= 0)
+            ackwait = 1;
+    }
+
+    _exit(ackwait);
+}
+
+static void drbd_postsuspend(libxl__remus_device *dev)
+{
+    drbd_async_call(dev, drbd_postsuspend_async, chekpoint_async_call_done);
+}
+
+static void drbd_preresume_async(libxl__remus_device *dev)
+{
+    libxl__remus_drbd_disk *rdd = dev->data;
+    int ackwait = rdd->ackwait;
+
+    if (ackwait) {
+        ioctl(rdd->ctl_fd, DRBD_WAIT_CHECKPOINT_ACK, 0);
+        ackwait = 0;
+    }
+
+    _exit(ackwait);
+}
+
+static void drbd_preresume(libxl__remus_device *dev)
+{
+    drbd_async_call(dev, drbd_preresume_async, chekpoint_async_call_done);
+}
+
+static int drbd_init(libxl__remus_device_ops *self,
+                     libxl__remus_state *rs)
+{
+    libxl__remus_drbd_state *drbd_state;
+
+    STATE_AO_GC(rs->ao);
+
+    GCNEW(drbd_state);
+    self->data = drbd_state;
+    drbd_state->ao = ao;
+    drbd_state->drbd_probe_script = GCSPRINTF("%s/block-drbd-probe",
+                                              libxl__xen_script_dir_path());
+
+
+    return 0;
+}
+
+static void drbd_destroy(libxl__remus_device_ops *self)
+{
+    return;
+}
+
+static void match_async_exec_cb(libxl__egc *egc,
+                                libxl__async_exec_state *aes,
+                                int status)
+{
+    libxl__remus_device *dev = CONTAINER_OF(aes, *dev, aes);
+
+    if (status) {
+        dev->callback(egc, dev, ERROR_NOT_MATCH);
+    } else {
+        dev->callback(egc, dev, 0);
+    }
+}
+
+static void match_async_exec(libxl__egc *egc, libxl__remus_device *dev)
+{
+    int arraysize, nr = 0;
+    const libxl_device_disk *disk = dev->backend_dev;
+    libxl__remus_drbd_state *drbd_state = dev->ops->data;
+    libxl__async_exec_state *aes = &dev->aes;
+    STATE_AO_GC(drbd_state->ao);
+
+    /* setup env & args */
+    arraysize = 1;
+    GCNEW_ARRAY(aes->env, arraysize);
+    aes->env[nr++] = NULL;
+    assert(nr <= arraysize);
+
+    arraysize = 3;
+    nr = 0;
+    GCNEW_ARRAY(aes->args, arraysize);
+    aes->args[nr++] = drbd_state->drbd_probe_script;
+    aes->args[nr++] = disk->pdev_path;
+    aes->args[nr++] = NULL;
+    assert(nr <= arraysize);
+
+    aes->ao = drbd_state->ao;
+    aes->what = GCSPRINTF("%s %s", aes->args[0], aes->args[1]);
+    aes->timeout_ms = LIBXL_HOTPLUG_TIMEOUT * 1000;
+    aes->callback = match_async_exec_cb;
+    aes->stdfds[0] = -1;
+    aes->stdfds[1] = -1;
+    aes->stdfds[2] = -1;
+
+    if (libxl__async_exec_start(gc, aes))
+        goto out;
+
+    return;
+
+out:
+    dev->callback(egc, dev, ERROR_FAIL);
+}
+
+static void match_async_call_done(libxl__egc *egc,
+                                  libxl__ev_child *child,
+                                  pid_t pid, int status)
+{
+    libxl__remus_device *dev = CONTAINER_OF(child, *dev, child);
+    STATE_AO_GC(dev->rds->ao);
+
+    if (WIFEXITED(status)) {
+        if (-WEXITSTATUS(status) == ERROR_NOT_MATCH) {
+            dev->callback(egc, dev, ERROR_NOT_MATCH);
+        } else {
+            match_async_exec(egc, dev);
+        }
+    } else {
+        dev->callback(egc, dev, ERROR_FAIL);
+    }
+}
+
+static void drbd_match_async(libxl__remus_device *dev)
+{
+    if (dev->kind != LIBXL__REMUS_DEVICE_DISK)
+        _exit(-ERROR_NOT_MATCH);
+
+    _exit(0);
+}
+
+static void drbd_match(libxl__remus_device_ops *self,
+                      libxl__remus_device *dev)
+{
+    drbd_async_call(dev, drbd_match_async, match_async_call_done);
+}
+
+static void setup_async_call_done(libxl__egc *egc,
+                                 libxl__ev_child *child,
+                                 pid_t pid, int status)
+{
+    libxl__remus_device *dev = CONTAINER_OF(child, *dev, child);
+    STATE_AO_GC(dev->rds->ao);
+
+    if (WIFEXITED(status)) {
+        dev->callback(egc, dev, 0);
+    } else {
+        dev->callback(egc, dev, ERROR_FAIL);
+    }
+}
+
+static void drbd_setup_async(libxl__remus_device *dev)
+{
+    libxl__remus_drbd_disk *drbd_disk = dev->data;
+
+    if (drbd_disk->ctl_fd < 0)
+        abort();
+
+    _exit(0);
+}
+
+static void drbd_setup(libxl__remus_device *dev)
+{
+    libxl__remus_drbd_disk *drbd_disk;
+    const libxl_device_disk *disk = dev->backend_dev;
+    STATE_AO_GC(dev->rds->ao);
+
+    GCNEW(drbd_disk);
+    dev->data = drbd_disk;
+    drbd_disk->path = disk->pdev_path;
+    drbd_disk->ackwait = 0;
+    drbd_disk->ctl_fd = open(drbd_disk->path, O_RDONLY);
+    drbd_async_call(dev, drbd_setup_async, setup_async_call_done);
+}
+
+static void teardown_async_call_done(libxl__egc *egc,
+                                     libxl__ev_child *child,
+                                     pid_t pid, int status)
+{
+    libxl__remus_device *dev = CONTAINER_OF(child, *dev, child);
+    STATE_AO_GC(dev->rds->ao);
+
+    dev->callback(egc, dev, 0);
+}
+
+static void drbd_teardown_async(libxl__remus_device *dev)
+{
+    _exit(0);
+}
+
+static void drbd_teardown(libxl__remus_device *dev)
+{
+    libxl__remus_drbd_disk *drbd_disk = dev->data;
+
+    close(drbd_disk->ctl_fd);
+    drbd_async_call(dev, drbd_teardown_async, teardown_async_call_done);
+}
+
+libxl__remus_device_ops remus_device_drbd_disk = {
+    .init = drbd_init,
+    .destroy = drbd_destroy,
+    .postsuspend = drbd_postsuspend,
+    .preresume = drbd_preresume,
+    .match = drbd_match,
+    .setup = drbd_setup,
+    .teardown = drbd_teardown,
+};
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.