[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v11 5/6] remus drbd: Implement remus drbd replicated disk



On 06/16/2014 11:33 AM, Shriram Rajagopalan wrote:

On Jun 13, 2014 7:39 AM, "Yang Hongyang" <yanghy@xxxxxxxxxxxxxx
<mailto:yanghy@xxxxxxxxxxxxxx>> wrote:
 >
 > Implement remus-drbd-replicated-checkpointing-disk based on
 > generic remus devices framework.
 >
 > Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxx 
<mailto:laijs@xxxxxxxxxxxxxx>>
 > Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx 
<mailto:wency@xxxxxxxxxxxxxx>>
 > Signed-off-by: Yang Hongyang <yanghy@xxxxxxxxxxxxxx
<mailto:yanghy@xxxxxxxxxxxxxx>>
 > ---
 >  tools/hotplug/Linux/Makefile         |   1 +
 >  tools/hotplug/Linux/block-drbd-probe |  84 ++++++++++++
 >  tools/libxl/Makefile                 |   2 +-
 >  tools/libxl/libxl_internal.h         |   1 +
 >  tools/libxl/libxl_remus_device.c     |  23 +++-
 >  tools/libxl/libxl_remus_disk_drbd.c  | 249 
+++++++++++++++++++++++++++++++++++
 >  6 files changed, 353 insertions(+), 7 deletions(-)
 >  create mode 100755 tools/hotplug/Linux/block-drbd-probe
 >  create mode 100644 tools/libxl/libxl_remus_disk_drbd.c
 >
 > diff --git a/tools/hotplug/Linux/Makefile b/tools/hotplug/Linux/Makefile
 > index 13e1f5f..5dd8599 100644
 > --- a/tools/hotplug/Linux/Makefile
 > +++ b/tools/hotplug/Linux/Makefile
 > @@ -23,6 +23,7 @@ XEN_SCRIPTS += xen-hotplug-cleanup
 >  XEN_SCRIPTS += external-device-migrate
 >  XEN_SCRIPTS += vscsi
 >  XEN_SCRIPTS += block-iscsi
 > +XEN_SCRIPTS += block-drbd-probe
 >  XEN_SCRIPTS += $(XEN_SCRIPTS-y)
 >
 >  XEN_SCRIPT_DATA = xen-script-common.sh locking.sh logging.sh
 > diff --git a/tools/hotplug/Linux/block-drbd-probe
b/tools/hotplug/Linux/block-drbd-probe
 > new file mode 100755
 > index 0000000..163ad04
 > --- /dev/null
 > +++ b/tools/hotplug/Linux/block-drbd-probe
 > @@ -0,0 +1,84 @@
 > +#! /bin/bash
 > +#
 > +# Copyright (C) 2014 FUJITSU LIMITED
 > +#
 > +# This library is free software; you can redistribute it and/or
 > +# modify it under the terms of version 2.1 of the GNU Lesser General Public
 > +# License as published by the Free Software Foundation.
 > +#
 > +# This library is distributed in the hope that it will be useful,
 > +# but WITHOUT ANY WARRANTY; without even the implied warranty of
 > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 > +# Lesser General Public License for more details.
 > +#
 > +# You should have received a copy of the GNU Lesser General Public
 > +# License along with this library; if not, write to the Free Software
 > +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 > +#
 > +# Usage:
 > +#     block-drbd-probe devicename
 > +#
 > +# Return value:
 > +#     0: the device is drbd device
 > +#     1: the device is not drbd device
 > +#     2: unkown error
 > +#     3: the drbd device does not use protocol D
 > +#     4: the drbd device is not ready
 > +
 > +drbd_res=
 > +
 > +function get_res_name()
 > +{
 > +    local drbd_dev=$1
 > +    local drbd_dev_list=($(drbdadm sh-dev all))
 > +    local drbd_res_list=($(drbdadm sh-resource all))
 > +    local temp_drbd_dev temp_drbd_res
 > +    local found=0
 > +
 > +    for temp_drbd_dev in ${drbd_dev_list[@]}; do
 > +        if [[ "$temp_drbd_dev" == "$drbd_dev" ]]; then
 > +            found=1
 > +            break
 > +        fi
 > +    done
 > +
 > +    if [[ $found -eq 0 ]]; then
 > +        return 1
 > +    fi
 > +
 > +    for temp_drbd_res in ${drbd_res_list[@]}; do
 > +        temp_drbd_dev=$(drbdadm sh-dev $temp_drbd_res)
 > +        if [[ "$temp_drbd_dev" == "$drbd_dev" ]]; then
 > +            drbd_res="$temp_drbd_res"
 > +            return 0
 > +        fi
 > +    done
 > +
 > +    # OOPS
 > +    return 2
 > +}
 > +
 > +get_res_name $1
 > +if [[ $? -ne 0 ]]; then
 > +    exit $?
 > +fi
 > +
 > +# check protocol
 > +drbdsetup $1 show | grep -q "protocol D;"
 > +if [[ $? -ne 0 ]]; then
 > +    exit 3
 > +fi
 > +
 > +# check connect status
 > +state=$(drbdadm cstate "$drbd_res")
 > +if [[ "$state" != "Connected" ]]; then
 > +    exit 4
 > +fi
 > +
 > +# check role
 > +role=$(drbdadm role "$drbd_res")
 > +if [[ "$role" != "Primary/Secondary" ]]; then
 > +    exit 4
 > +fi
 > +
 > +exit 0
 > diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
 > index 7a722a8..6f4d9b4 100644
 > --- a/tools/libxl/Makefile
 > +++ b/tools/libxl/Makefile
 > @@ -56,7 +56,7 @@ else
 >  LIBXL_OBJS-y += libxl_nonetbuffer.o
 >  endif
 >
 > -LIBXL_OBJS-y += libxl_remus_device.o
 > +LIBXL_OBJS-y += libxl_remus_device.o libxl_remus_disk_drbd.o
 >
 >  LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o
 >  LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o
 > diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
 > index 47648cd..2424be0 100644
 > --- a/tools/libxl/libxl_internal.h
 > +++ b/tools/libxl/libxl_internal.h
 > @@ -2580,6 +2580,7 @@ struct libxl__remus_device_state {
 >
 >      libxl_device_nic *nics;
 >      int num_nics;
 > +    libxl_device_disk *disks;
 >      int num_disks;
 >
 >      /* for counting devices that have been handled */
 > diff --git a/tools/libxl/libxl_remus_device.c 
b/tools/libxl/libxl_remus_device.c
 > index c447d7d..9637b1e 100644
 > --- a/tools/libxl/libxl_remus_device.c
 > +++ b/tools/libxl/libxl_remus_device.c
 > @@ -19,8 +19,10 @@
 >  #include "libxl_internal.h"
 >
 >  extern libxl__remus_device_ops remus_device_nic;
 > +extern libxl__remus_device_ops remus_device_drbd_disk;
 >  static libxl__remus_device_ops *dev_ops[] = {
 >      &remus_device_nic,
 > +    &remus_device_drbd_disk,
 >  };
 >
 >  static void device_common_cb(libxl__egc *egc,
 > @@ -197,6 +199,13 @@ static void device_teardown_cb(libxl__egc *egc,
 >          rds->nics = NULL;
 >          rds->num_nics = 0;
 >
 > +        /* clean disk */
 > +        for (i = 0; i < rds->num_disks; i++)
 > +            libxl_device_disk_dispose(&rds->disks[i]);
 > +        free(rds->disks);
 > +        rds->disks = NULL;
 > +        rds->num_disks = 0;
 > +
 >          /* clean device ops */
 >          for (i = 0; i < ARRAY_SIZE(dev_ops); i++) {
 >              ops = dev_ops[i];
 > @@ -272,15 +281,15 @@ void libxl__remus_device_setup(libxl__egc *egc,
libxl__remus_state *rs)
 >      rds->num_nics = 0;
 >      rds->num_disks = 0;
 >
 > -    /* TBD: Remus setup - i.e. attach qdisc, enable disk buffering, etc */
 > -
 >      if (rs->netbufscript) {
 >          rds->nics = libxl_device_nic_list(CTX, rs->domid, &rds->num_nics);
 >      }
 > +    rds->disks = libxl_device_disk_list(CTX, rs->domid, &rds->num_disks);
 >
 > -    GCNEW_ARRAY(rds->dev, rds->num_nics + rds->num_disks);
 > +    if (rds->num_nics == 0 && rds->num_disks == 0)
 > +        goto out;
 >
 > -    /* TBD: CALL libxl__remus_device_init to init remus devices */
 > +    GCNEW_ARRAY(rds->dev, rds->num_nics + rds->num_disks);
 >
 >      if (rs->netbufscript && rds->nics) {
 >          for (i = 0; i < rds->num_nics; i++) {
 > @@ -289,8 +298,10 @@ void libxl__remus_device_setup(libxl__egc *egc,
libxl__remus_state *rs)
 >          }
 >      }
 >
 > -    if (rds->num_nics == 0 && rds->num_disks == 0)
 > -        goto out;
 > +    for (i = 0; i < rds->num_disks; i++) {
 > +        libxl__remus_device_init(egc, rds,
 > +                                 LIBXL__REMUS_DEVICE_DISK, &rds->disks[i]);
 > +    }
 >
 >      return;
 >
 > diff --git a/tools/libxl/libxl_remus_disk_drbd.c
b/tools/libxl/libxl_remus_disk_drbd.c
 > new file mode 100644
 > index 0000000..69fdd34
 > --- /dev/null
 > +++ b/tools/libxl/libxl_remus_disk_drbd.c
 > @@ -0,0 +1,249 @@
 > +/*
 > + * Copyright (C) 2014 FUJITSU LIMITED
 > + * Author Lai Jiangshan <laijs@xxxxxxxxxxxxxx <mailto:laijs@xxxxxxxxxxxxxx>>
 > + *
 > + * This program is free software; you can redistribute it and/or modify
 > + * it under the terms of the GNU Lesser General Public License as published
 > + * by the Free Software Foundation; version 2.1 only. with the special
 > + * exception on linking described in file LICENSE.
 > + *
 > + * This program is distributed in the hope that it will be useful,
 > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 > + * GNU Lesser General Public License for more details.
 > + */
 > +
 > +#include "libxl_osdeps.h" /* must come before any other headers */
 > +
 > +#include "libxl_internal.h"
 > +
 > +/*** drbd implementation ***/
 > +const int DRBD_SEND_CHECKPOINT = 20;
 > +const int DRBD_WAIT_CHECKPOINT_ACK = 30;
 > +
 > +typedef struct libxl__remus_drbd_disk {
 > +    libxl__remus_device remus_dev;
 > +    int ctl_fd;
 > +    int ackwait;
 > +    const char *path;
 > +} libxl__remus_drbd_disk;
 > +
 > +typedef struct libxl__remus_drbd_state {
 > +    libxl__ao *ao;
 > +    char *drbd_probe_script;
 > +} libxl__remus_drbd_state;
 > +
 > +static void drbd_async_call(libxl__remus_device *dev,
 > +                            void func(libxl__remus_device *),
 > +                            libxl__ev_child_callback callback)
 > +{
 > +    int pid = -1;
 > +    STATE_AO_GC(dev->rds->ao);
 > +
 > +    /* Fork and call */
 > +    pid = libxl__ev_child_fork(gc, &dev->child, callback);
 > +    if (pid == -1) {
 > +        LOG(ERROR, "unable to fork");
 > +        goto out;
 > +    }
 > +
 > +    if (!pid) {
 > +        /* child */
 > +        func(dev);
 > +        /* notreached */
 > +        abort();
 > +    }
 > +
 > +    return;
 > +
 > +out:
 > +    dev->callback(dev->rds->egc, dev, ERROR_FAIL);
 > +}
 > +
 > +static void chekpoint_async_call_done(libxl__egc *egc,
 > +                                      libxl__ev_child *child,
 > +                                      pid_t pid, int status)
 > +{
 > +    libxl__remus_device *dev = CONTAINER_OF(child, *dev, child);
 > +    libxl__remus_drbd_disk *rdd = dev->data;
 > +    STATE_AO_GC(dev->rds->ao);
 > +
 > +    if (WIFEXITED(status)) {
 > +        rdd->ackwait = WEXITSTATUS(status);
 > +        dev->callback(egc, dev, 0);
 > +    } else {
 > +        dev->callback(egc, dev, ERROR_FAIL);
 > +    }
 > +}
 > +
 > +/* this op will not wait and block, so implement as sync op */
 > +static void drbd_postsuspend(libxl__remus_device *dev)
 > +{
 > +    libxl__remus_drbd_disk *rdd = dev->data;
 > +
 > +    if (!rdd->ackwait) {
 > +        if (ioctl(rdd->ctl_fd, DRBD_SEND_CHECKPOINT, 0) <= 0)
 > +            rdd->ackwait = 1;
 > +    }
 > +
 > +    dev->callback(dev->rds->egc, dev, 0);
 > +}
 > +
 > +static void drbd_preresume_async(libxl__remus_device *dev)
 > +{
 > +    libxl__remus_drbd_disk *rdd = dev->data;
 > +    int ackwait = rdd->ackwait;
 > +
 > +    if (ackwait) {
 > +        ioctl(rdd->ctl_fd, DRBD_WAIT_CHECKPOINT_ACK, 0);
 > +        ackwait = 0;
 > +    }
 > +
 > +    _exit(ackwait);
 > +}
 > +
 > +static void drbd_preresume(libxl__remus_device *dev)
 > +{
 > +    drbd_async_call(dev, drbd_preresume_async, chekpoint_async_call_done);
 > +}
 > +
 > +static int drbd_init(libxl__remus_device_ops *self,
 > +                     libxl__remus_state *rs)
 > +{
 > +    libxl__remus_drbd_state *drbd_state;
 > +
 > +    STATE_AO_GC(rs->ao);
 > +
 > +    GCNEW(drbd_state);
 > +    self->data = drbd_state;
 > +    drbd_state->ao = ao;
 > +    drbd_state->drbd_probe_script = GCSPRINTF("%s/block-drbd-probe",
 > +                                              libxl__xen_script_dir_path());
 > +
 > +
 > +    return 0;
 > +}
 > +
 > +static void drbd_destroy(libxl__remus_device_ops *self)
 > +{
 > +    return;
 > +}
 > +
 > +static void match_async_exec_cb(libxl__egc *egc,
 > +                                libxl__async_exec_state *aes,
 > +                                int status)
 > +{
 > +    libxl__remus_device *dev = CONTAINER_OF(aes, *dev, aes);
 > +
 > +    if (status) {
 > +        dev->callback(egc, dev, ERROR_NOT_MATCH);
 > +    } else {
 > +        dev->callback(egc, dev, 0);
 > +    }
 > +}
 > +
 > +static void match_async_exec(libxl__egc *egc, libxl__remus_device *dev)
 > +{
 > +    int arraysize, nr = 0;
 > +    const libxl_device_disk *disk = dev->backend_dev;
 > +    libxl__remus_drbd_state *drbd_state = dev->ops->data;
 > +    libxl__async_exec_state *aes = &dev->aes;
 > +    STATE_AO_GC(drbd_state->ao);
 > +
 > +    /* setup env & args */
 > +    arraysize = 1;
 > +    GCNEW_ARRAY(aes->env, arraysize);
 > +    aes->env[nr++] = NULL;
 > +    assert(nr <= arraysize);
 > +
 > +    arraysize = 3;
 > +    nr = 0;
 > +    GCNEW_ARRAY(aes->args, arraysize);
 > +    aes->args[nr++] = drbd_state->drbd_probe_script;
 > +    aes->args[nr++] = disk->pdev_path;
 > +    aes->args[nr++] = NULL;
 > +    assert(nr <= arraysize);
 > +
 > +    aes->ao = drbd_state->ao;
 > +    aes->what = GCSPRINTF("%s %s", aes->args[0], aes->args[1]);
 > +    aes->timeout_ms = LIBXL_HOTPLUG_TIMEOUT * 1000;
 > +    aes->callback = match_async_exec_cb;
 > +    aes->stdfds[0] = -1;
 > +    aes->stdfds[1] = -1;
 > +    aes->stdfds[2] = -1;
 > +
 > +    if (libxl__async_exec_start(gc, aes))
 > +        goto out;
 > +
 > +    return;
 > +
 > +out:
 > +    dev->callback(egc, dev, ERROR_FAIL);
 > +}
 > +
 > +static void match_async_call_done(libxl__egc *egc,
 > +                                  libxl__ev_child *child,
 > +                                  pid_t pid, int status)
 > +{
 > +    libxl__remus_device *dev = CONTAINER_OF(child, *dev, child);
 > +    STATE_AO_GC(dev->rds->ao);
 > +
 > +    if (WIFEXITED(status)) {
 > +        if (-WEXITSTATUS(status) == ERROR_NOT_MATCH) {
 > +            dev->callback(egc, dev, ERROR_NOT_MATCH);
 > +        } else {
 > +            match_async_exec(egc, dev);
 > +        }
 > +    } else {
 > +        dev->callback(egc, dev, ERROR_FAIL);
 > +    }
 > +}
 > +
 > +static void drbd_match_async(libxl__remus_device *dev)
 > +{
 > +    if (dev->kind != LIBXL__REMUS_DEVICE_DISK)
 > +        _exit(-ERROR_NOT_MATCH);
 > +
 > +    _exit(0);
 > +}
 > +
 > +static void drbd_match(libxl__remus_device_ops *self,
 > +                      libxl__remus_device *dev)
 > +{
 > +    drbd_async_call(dev, drbd_match_async, match_async_call_done);
 > +}
 > +
 > +static void drbd_setup(libxl__remus_device *dev)
 > +{
 > +    libxl__remus_drbd_disk *drbd_disk;
 > +    const libxl_device_disk *disk = dev->backend_dev;
 > +    STATE_AO_GC(dev->rds->ao);
 > +
 > +    GCNEW(drbd_disk);
 > +    dev->data = drbd_disk;
 > +    drbd_disk->path = disk->pdev_path;
 > +    drbd_disk->ackwait = 0;
 > +    drbd_disk->ctl_fd = open(drbd_disk->path, O_RDONLY);
 > +    if (drbd_disk->ctl_fd < 0)
 > +        dev->callback(dev->rds->egc, dev, ERROR_FAIL);
 > +    else
 > +        dev->callback(dev->rds->egc, dev, 0);
 > +}
 > +
 > +static void drbd_teardown(libxl__remus_device *dev)
 > +{
 > +    libxl__remus_drbd_disk *drbd_disk = dev->data;
 > +
 > +    close(drbd_disk->ctl_fd);
 > +    dev->callback(dev->rds->egc, dev, 0);
 > +}
 > +
 > +libxl__remus_device_ops remus_device_drbd_disk = {
 > +    .init = drbd_init,
 > +    .destroy = drbd_destroy,
 > +    .postsuspend = drbd_postsuspend,
 > +    .preresume = drbd_preresume,
 > +    .match = drbd_match,
 > +    .setup = drbd_setup,
 > +    .teardown = drbd_teardown,
 > +};
 > --
 > 1.9.1
 >

I am fine with this patch. My only feedback is to have an option in xl remus
command to explicitly disable disk buffering, similar to the netbuffer disable
switch.


Agree, will add the option, thanks for the review!

Acked-by: Shriram Rajagopalan <rshriram@xxxxxxxxx <mailto:rshriram@xxxxxxxxx>>


--
Thanks,
Yang.

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.