[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v15 3/7] remus netbuffer: implement remus network buffering for nic devices



1.Add two members in libxl_domain_remus_info:
    netbuf: whether netbuf is enabled
    netbufscript: the path of the script which will be run to setup
       and tear down the guest's interface.
2.Introduce remus-netbuf-setup hotplug script responsible for
  setting up and tearing down the necessary infrastructure required for
  network output buffering in Remus.  This script is intended to be invoked
  by libxl for each guest interface, when starting or stopping Remus.

  Apart from returning success/failure indication via the usual hotplug
  entries in xenstore, this script also writes to xenstore, the name of
  the IFB device to be used to control the vif's network output.

  The script relies on libnl3 command line utilities to perform various
  setup/teardown functions. The script is confined to Linux platforms only
  since NetBSD does not seem to have libnl3.

  The following steps are taken during init:
    a) establish a dedicated remus context containing libnl related
       state (netlink sockets)

  The following steps are taken for each vif during setup:
    a) call the hotplug script to setup its network buffer and
       init qdisc caches

    b) Obtain handles to plug qdiscs installed on the IFB devices
       chosen by the hotplug scripts.

  And during teardown, the netlink resources are released, followed by
  invocation of hotplug scripts to remove the ifb devices.
3.Implement the remus device interface. setup, teardown, etc. The
  checkpoint callbacks for netbuffer are implemented as sync op because
  net ops are quick enough and will not block.

Signed-off-by: Shriram Rajagopalan <rshriram@xxxxxxxxx>
Signed-off-by: Yang Hongyang <yanghy@xxxxxxxxxxxxxx>
Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxx>
---
 docs/misc/xenstore-paths.markdown      |   4 +
 tools/hotplug/Linux/Makefile           |   1 +
 tools/hotplug/Linux/remus-netbuf-setup | 203 ++++++++++++++
 tools/libxl/libxl.c                    |  21 +-
 tools/libxl/libxl_internal.h           |  10 +
 tools/libxl/libxl_netbuffer.c          | 475 +++++++++++++++++++++++++++++++++
 tools/libxl/libxl_nonetbuffer.c        |  27 ++
 tools/libxl/libxl_remus_device.c       |  21 +-
 tools/libxl/libxl_types.idl            |   2 +
 9 files changed, 759 insertions(+), 5 deletions(-)
 create mode 100644 tools/hotplug/Linux/remus-netbuf-setup

diff --git a/docs/misc/xenstore-paths.markdown 
b/docs/misc/xenstore-paths.markdown
index ea67536..d94ea9d 100644
--- a/docs/misc/xenstore-paths.markdown
+++ b/docs/misc/xenstore-paths.markdown
@@ -393,6 +393,10 @@ The guest's virtual time offset from UTC in seconds.
 
 The device model version for a domain.
 
+#### /libxl/$DOMID/remus/netbuf/$DEVID/ifb = STRING [n,INTERNAL]
+
+ifb device used by Remus to buffer network output from the associated vif.
+
 [BLKIF]: 
http://xenbits.xen.org/docs/unstable/hypercall/include,public,io,blkif.h.html
 [FBIF]: 
http://xenbits.xen.org/docs/unstable/hypercall/include,public,io,fbif.h.html
 [HVMPARAMS]: 
http://xenbits.xen.org/docs/unstable/hypercall/include,public,hvm,params.h.html
diff --git a/tools/hotplug/Linux/Makefile b/tools/hotplug/Linux/Makefile
index d5de9e6..721f8c0 100644
--- a/tools/hotplug/Linux/Makefile
+++ b/tools/hotplug/Linux/Makefile
@@ -16,6 +16,7 @@ XEN_SCRIPTS += vif-nat
 XEN_SCRIPTS += vif-openvswitch
 XEN_SCRIPTS += vif2
 XEN_SCRIPTS += vif-setup
+XEN_SCRIPTS-$(CONFIG_REMUS_NETBUF) += remus-netbuf-setup
 XEN_SCRIPTS += block
 XEN_SCRIPTS += block-enbd block-nbd
 XEN_SCRIPTS-$(CONFIG_BLKTAP1) += blktap
diff --git a/tools/hotplug/Linux/remus-netbuf-setup 
b/tools/hotplug/Linux/remus-netbuf-setup
new file mode 100644
index 0000000..58c46f3
--- /dev/null
+++ b/tools/hotplug/Linux/remus-netbuf-setup
@@ -0,0 +1,203 @@
+#!/bin/bash
+#============================================================================
+# ${XEN_SCRIPT_DIR}/remus-netbuf-setup
+#
+# Script for attaching a network buffer to the specified vif (in any mode).
+# The hotplugging system will call this script when starting remus via libxl
+# API, libxl_domain_remus_start.
+#
+# Usage:
+# remus-netbuf-setup (setup|teardown)
+#
+# Environment vars:
+# vifname     vif interface name (required).
+# XENBUS_PATH path in Xenstore, where the IFB device details will be stored
+#                      or read from (required).
+#             (libxl passes /libxl/<domid>/remus/netbuf/<devid>)
+# IFB         ifb interface to be cleaned up (required). [for teardown op only]
+
+# Written to the store: (setup operation)
+# XENBUS_PATH/ifb=<ifbdevName> the IFB device serving
+#  as the intermediate buffer through which the interface's network output
+#  can be controlled.
+#
+# To install a network buffer on a guest vif (vif1.0) using ifb (ifb0)
+# we need to do the following
+#
+#  ip link set dev ifb0 up
+#  tc qdisc add dev vif1.0 ingress
+#  tc filter add dev vif1.0 parent ffff: proto ip \
+#    prio 10 u32 match u32 0 0 action mirred egress redirect dev ifb0
+#  nl-qdisc-add --dev=ifb0 --parent root plug
+#  nl-qdisc-add --dev=ifb0 --parent root --update plug --limit=10000000
+#                                                (10MB limit on buffer)
+#
+# So order of operations when installing a network buffer on vif1.0
+# 1. find a free ifb and bring up the device
+# 2. redirect traffic from vif1.0 to ifb:
+#   2.1 add ingress qdisc to vif1.0 (to capture outgoing packets from guest)
+#   2.2 use tc filter command with actions mirred egress + redirect
+# 3. install plug_qdisc on ifb device, with which we can buffer/release
+#    guest's network output from vif1.0
+#
+#
+
+#============================================================================
+
+# Unlike other vif scripts, vif-common is not needed here as it executes vif
+#specific setup code such as renaming.
+dir=$(dirname "$0")
+. "$dir/xen-hotplug-common.sh"
+
+findCommand "$@"
+
+if [ "$command" != "setup" -a  "$command" != "teardown" ]
+then
+  echo "Invalid command: $command"
+  log err "Invalid command: $command"
+  exit 1
+fi
+
+evalVariables "$@"
+
+: ${vifname:?}
+: ${XENBUS_PATH:?}
+
+check_libnl_tools() {
+    if ! command -v nl-qdisc-list > /dev/null 2>&1; then
+        fatal "Unable to find nl-qdisc-list tool"
+    fi
+    if ! command -v nl-qdisc-add > /dev/null 2>&1; then
+        fatal "Unable to find nl-qdisc-add tool"
+    fi
+    if ! command -v nl-qdisc-delete > /dev/null 2>&1; then
+        fatal "Unable to find nl-qdisc-delete tool"
+    fi
+}
+
+# We only check for modules. We don't load them.
+# User/Admin is supposed to load ifb during boot time,
+# ensuring that there are enough free ifbs in the system.
+# Other modules will be loaded automatically by tc commands.
+check_modules() {
+    for m in ifb sch_plug sch_ingress act_mirred cls_u32
+    do
+        if ! modinfo $m > /dev/null 2>&1; then
+            fatal "Unable to find $m kernel module"
+        fi
+    done
+}
+
+xs_write_failed() {
+    local vif=$1
+    local ifb=$2
+    teardown_netbuf "$vifname" "$IFB"
+    fatal "failed to write ifb name to xenstore"
+}
+
+#return 0 if the ifb is free
+check_ifb() {
+    local installed=`nl-qdisc-list -d $1`
+    [ -n "$installed" ] && return 1
+
+    for domid in `xenstore-list "/local/domain" 2>/dev/null || true`
+    do
+        [ $domid -eq 0 ] && continue
+        xenstore-exists "/libxl/$domid/remus/netbuf" || continue
+        for devid in `xenstore-list "/libxl/$domid/remus/netbuf" 2>/dev/null 
|| true`
+        do
+            local path="/libxl/$domid/remus/netbuf/$devid/ifb"
+            xenstore-exists $path || continue
+            local ifb=`xenstore-read "$path" 2>/dev/null || true`
+            [ "$ifb" = "$1" ] && return 1
+        done
+    done
+
+    return 0
+}
+
+setup_ifb() {
+
+    for ifb in `ifconfig -a -s|egrep ^ifb|cut -d ' ' -f1`
+    do
+        check_ifb "$ifb" || continue
+        IFB="$ifb"
+        break
+    done
+
+    if [ -z "$IFB" ]
+    then
+        fatal "Unable to find a free IFB device for $vifname"
+    fi
+
+    #not using xenstore_write that automatically exits on error
+    #because we need to cleanup
+    _xenstore_write "$XENBUS_PATH/ifb" "$IFB" || xs_write_failed "$vifname" 
"$IFB"
+    do_or_die ip link set dev "$IFB" up
+}
+
+redirect_vif_traffic() {
+    local vif=$1
+    local ifb=$2
+
+    do_or_die tc qdisc add dev "$vif" ingress
+
+    tc filter add dev "$vif" parent ffff: proto ip prio 10 \
+        u32 match u32 0 0 action mirred egress redirect dev "$ifb" >/dev/null 
2>&1
+
+    if [ $? -ne 0 ]
+    then
+        do_without_error tc qdisc del dev "$vif" ingress
+        fatal "Failed to redirect traffic from $vif to $ifb"
+    fi
+}
+
+add_plug_qdisc() {
+    local vif=$1
+    local ifb=$2
+
+    nl-qdisc-add --dev="$ifb" --parent root plug >/dev/null 2>&1
+    if [ $? -ne 0 ]
+    then
+        do_without_error tc qdisc del dev "$vif" ingress
+        fatal "Failed to add plug qdisc to $ifb"
+    fi
+
+    #set ifb buffering limit in bytes. Its okay if this command fails
+    nl-qdisc-add --dev="$ifb" --parent root \
+        --update plug --limit=10000000 >/dev/null 2>&1 || true
+}
+
+teardown_netbuf() {
+    local vif=$1
+    local ifb=$2
+
+    if [ "$ifb" ]; then
+        do_without_error ip link set dev "$ifb" down
+        do_without_error nl-qdisc-delete --dev="$ifb" --parent root plug 
>/dev/null 2>&1
+        xenstore-rm -t "$XENBUS_PATH/ifb" 2>/dev/null || true
+    fi
+    do_without_error tc qdisc del dev "$vif" ingress
+    xenstore-rm -t "$XENBUS_PATH/hotplug-status" 2>/dev/null || true
+    xenstore-rm -t "$XENBUS_PATH/hotplug-error" 2>/dev/null || true
+}
+
+case "$command" in
+    setup)
+        check_libnl_tools
+        check_modules
+
+        claim_lock "pickifb"
+        setup_ifb
+        redirect_vif_traffic "$vifname" "$IFB"
+        add_plug_qdisc "$vifname" "$IFB"
+        release_lock "pickifb"
+
+        success
+        ;;
+    teardown)
+        teardown_netbuf "$vifname" "$IFB"
+        ;;
+esac
+
+log debug "Successful remus-netbuf-setup $command for $vifname, ifb $IFB."
diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index bcbd02b..b7d62c1 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -788,6 +788,24 @@ int libxl_domain_remus_start(libxl_ctx *ctx, 
libxl_domain_remus_info *info,
 
     /* Convenience aliases */
     libxl__remus_state *const rs = &dss->rs;
+
+    /* Setup network buffering */
+    if (info->netbuf) {
+        if (!libxl__netbuffer_enabled(gc)) {
+            LOG(ERROR, "Remus: No support for network buffering");
+            goto out;
+        }
+
+        if (info->netbufscript) {
+            rs->netbufscript =
+                libxl__strdup(gc, info->netbufscript);
+        } else {
+            rs->netbufscript =
+                GCSPRINTF("%s/remus-netbuf-setup",
+                libxl__xen_script_dir_path());
+        }
+    }
+
     rs->ao = ao;
     rs->domid = domid;
     rs->saved_rc = 0;
@@ -811,9 +829,6 @@ static void remus_failover_cb(libxl__egc *egc,
      * from sending checkpoints.
      */
 
-    /* TBD: Remus cleanup - i.e. detach qdisc, release other
-     * resources.
-     */
     libxl__ao_complete(egc, ao, rc);
 }
 
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 8ef20a0..2fe36a6 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -310,6 +310,10 @@ struct libxl__gc {
     libxl_ctx *owner;
 };
 
+/* remus device ops specific structures start */
+typedef struct libxl__remus_netbuf_state libxl__remus_netbuf_state;
+/* remus device ops specific structures end */
+
 struct libxl__ctx {
     xentoollog_logger *lg;
     xc_interface *xch;
@@ -374,6 +378,9 @@ struct libxl__ctx {
     LIBXL_LIST_ENTRY(libxl_ctx) sigchld_users_entry;
 
     libxl_version_info version_info;
+
+    /* remus device ops specific structures */
+    libxl__remus_netbuf_state *rns;
 };
 
 typedef struct {
@@ -2576,6 +2583,7 @@ struct libxl__remus_device_state {
     /* devices that have been setuped */
     libxl__remus_device **dev;
 
+    libxl_device_nic *nics;
     int num_nics;
     int num_disks;
 
@@ -2631,6 +2639,8 @@ struct libxl__remus_state {
     libxl__ao *ao;
     uint32_t domid;
     libxl__remus_callback *callback;
+    /* Script to setup/teardown network buffers */
+    const char *netbufscript;
 
     /* private */
     int saved_rc;
diff --git a/tools/libxl/libxl_netbuffer.c b/tools/libxl/libxl_netbuffer.c
index 52d593c..025ee89 100644
--- a/tools/libxl/libxl_netbuffer.c
+++ b/tools/libxl/libxl_netbuffer.c
@@ -17,11 +17,486 @@
 
 #include "libxl_internal.h"
 
+#include <netlink/cache.h>
+#include <netlink/socket.h>
+#include <netlink/attr.h>
+#include <netlink/route/link.h>
+#include <netlink/route/route.h>
+#include <netlink/route/qdisc.h>
+#include <netlink/route/qdisc/plug.h>
+
+struct libxl__remus_netbuf_state {
+    libxl__ao *ao;
+    uint32_t domid;
+    const char *netbufscript;
+
+    struct nl_sock *nlsock;
+    struct nl_cache *qdisc_cache;
+};
+
+typedef struct libxl__remus_device_nic {
+    int devid;
+    const char *vif;
+    const char *ifb;
+    struct rtnl_qdisc *qdisc;
+} libxl__remus_device_nic;
+
 int libxl__netbuffer_enabled(libxl__gc *gc)
 {
     return 1;
 }
 
+/*----- init() and destroy() -----*/
+
+static int nic_init(const libxl__remus_device_ops *self,
+                    libxl__remus_state *rs)
+{
+    int rc;
+    libxl__remus_netbuf_state *ns;
+
+    STATE_AO_GC(rs->ao);
+
+    GCNEW(ns);
+    CTX->rns = ns;
+
+    ns->nlsock = nl_socket_alloc();
+    if (!ns->nlsock) {
+        LOG(ERROR, "cannot allocate nl socket");
+        goto out;
+    }
+
+    rc = nl_connect(ns->nlsock, NETLINK_ROUTE);
+    if (rc) {
+        LOG(ERROR, "failed to open netlink socket: %s",
+            nl_geterror(rc));
+        goto out;
+    }
+
+    /* get list of all qdiscs installed on network devs. */
+    rc = rtnl_qdisc_alloc_cache(ns->nlsock, &ns->qdisc_cache);
+    if (rc) {
+        LOG(ERROR, "failed to allocate qdisc cache: %s",
+            nl_geterror(rc));
+        goto out;
+    }
+
+    ns->ao = rs->ao;
+    ns->domid = rs->domid;
+    ns->netbufscript = rs->netbufscript;
+
+    return 0;
+
+out:
+    return ERROR_FAIL;
+}
+
+static void nic_destroy(const libxl__remus_device_ops *self,
+                        libxl__remus_state *rs)
+{
+    STATE_AO_GC(rs->ao);
+    libxl__remus_netbuf_state *ns = CTX->rns;
+
+    if (!ns)
+        return;
+
+    /* free qdisc cache */
+    if (ns->qdisc_cache) {
+        nl_cache_clear(ns->qdisc_cache);
+        nl_cache_free(ns->qdisc_cache);
+        ns->qdisc_cache = NULL;
+    }
+
+    /* close & free nlsock */
+    if (ns->nlsock) {
+        nl_close(ns->nlsock);
+        nl_socket_free(ns->nlsock);
+        ns->nlsock = NULL;
+    }
+}
+
+/*----- checkpointing APIs -----*/
+
+/* The buffer_op's value, not the value passed to kernel */
+enum {
+    tc_buffer_start,
+    tc_buffer_release
+};
+
+/* API implementations */
+
+static int remus_netbuf_op(libxl__remus_device_nic *remus_nic,
+                           libxl__remus_netbuf_state *netbuf_state,
+                           int buffer_op)
+{
+    int rc;
+
+    STATE_AO_GC(netbuf_state->ao);
+
+    if (buffer_op == tc_buffer_start)
+        rc = rtnl_qdisc_plug_buffer(remus_nic->qdisc);
+    else
+        rc = rtnl_qdisc_plug_release_one(remus_nic->qdisc);
+
+    if (rc)
+        goto out;
+
+    rc = rtnl_qdisc_add(netbuf_state->nlsock,
+                        remus_nic->qdisc,
+                        NLM_F_REQUEST);
+    if (rc)
+        goto out;
+
+    return 0;
+
+out:
+    LOG(ERROR, "Remus: cannot do netbuf op %s on %s:%s",
+        ((buffer_op == tc_buffer_start) ?
+        "start_new_epoch" : "release_prev_epoch"),
+        remus_nic->ifb, nl_geterror(rc));
+    return ERROR_FAIL;
+}
+
+static void nic_postsuspend(libxl__remus_device *dev)
+{
+    int rc;
+    libxl__remus_device_nic *remus_nic = dev->data;
+    STATE_AO_GC(dev->rds->ao);
+    libxl__remus_netbuf_state *ns = CTX->rns;
+
+    rc = remus_netbuf_op(remus_nic, ns, tc_buffer_start);
+    dev->callback(dev->rds->egc, dev, rc);
+}
+
+static void nic_commit(libxl__remus_device *dev)
+{
+    int rc;
+    libxl__remus_device_nic *remus_nic = dev->data;
+    STATE_AO_GC(dev->rds->ao);
+    libxl__remus_netbuf_state *ns = CTX->rns;
+
+    rc = remus_netbuf_op(remus_nic, ns, tc_buffer_release);
+    dev->callback(dev->rds->egc, dev, rc);
+}
+
+/*----- main flow of control -----*/
+
+/* helper functions */
+
+/*
+ * If the device has a vifname, then use that instead of
+ * the vifX.Y format.
+ * it must ONLY be used for remus because if driver domains
+ * were in use it would constitute a security vulnerability.
+ */
+static const char *get_vifname(libxl__remus_device *dev,
+                               const libxl_device_nic *nic)
+{
+    const char *vifname = NULL;
+    const char *path;
+    int rc;
+
+    STATE_AO_GC(dev->rds->ao);
+
+    /* Convenience aliases */
+    libxl__remus_netbuf_state *netbuf_state = CTX->rns;
+    const uint32_t domid = netbuf_state->domid;
+
+    path = libxl__sprintf(gc, "%s/backend/vif/%d/%d/vifname",
+                          libxl__xs_get_dompath(gc, 0), domid, nic->devid);
+    rc = libxl__xs_read_checked(gc, XBT_NULL, path, &vifname);
+    if (!rc && !vifname) {
+        /* use the default name */
+        vifname = libxl__device_nic_devname(gc, domid,
+                                            nic->devid,
+                                            nic->nictype);
+    }
+
+    return vifname;
+}
+
+static void free_qdisc(libxl__remus_device_nic *remus_nic)
+{
+    /* free qdiscs */
+    if (remus_nic->qdisc == NULL)
+        return;
+
+    nl_object_put((struct nl_object *)(remus_nic->qdisc));
+    remus_nic->qdisc = NULL;
+}
+
+static int init_qdisc(libxl__remus_netbuf_state *netbuf_state,
+                      libxl__remus_device_nic *remus_nic)
+{
+    int rc, ifindex;
+    struct rtnl_link *ifb = NULL;
+    struct rtnl_qdisc *qdisc = NULL;
+
+    STATE_AO_GC(netbuf_state->ao);
+
+    /* Now that we have brought up IFB device with plug qdisc for
+     * this vif, so we need to refill the qdisc cache.
+     */
+    rc = nl_cache_refill(netbuf_state->nlsock, netbuf_state->qdisc_cache);
+    if (rc) {
+        LOG(ERROR, "cannot refill qdisc cache: %s", nl_geterror(rc));
+        rc = ERROR_FAIL;
+        goto out;
+    }
+
+    /* get a handle to the IFB interface */
+    ifb = NULL;
+    rc = rtnl_link_get_kernel(netbuf_state->nlsock, 0,
+                               remus_nic->ifb, &ifb);
+    if (rc) {
+        LOG(ERROR, "cannot obtain handle for %s: %s", remus_nic->ifb,
+            nl_geterror(rc));
+        rc = ERROR_FAIL;
+        goto out;
+    }
+
+    rc = ERROR_FAIL;
+    ifindex = rtnl_link_get_ifindex(ifb);
+    if (!ifindex) {
+        LOG(ERROR, "interface %s has no index", remus_nic->ifb);
+        goto out;
+    }
+
+    /* Get a reference to the root qdisc installed on the IFB, by
+     * querying the qdisc list we obtained earlier. The netbufscript
+     * sets up the plug qdisc as the root qdisc, so we don't have to
+     * search the entire qdisc tree on the IFB dev.
+
+     * There is no need to explicitly free this qdisc as its just a
+     * reference from the qdisc cache we allocated earlier.
+     */
+    qdisc = rtnl_qdisc_get_by_parent(netbuf_state->qdisc_cache, ifindex,
+                                     TC_H_ROOT);
+
+    if (qdisc) {
+        const char *tc_kind = rtnl_tc_get_kind(TC_CAST(qdisc));
+        /* Sanity check: Ensure that the root qdisc is a plug qdisc. */
+        if (!tc_kind || strcmp(tc_kind, "plug")) {
+            nl_object_put((struct nl_object *)qdisc);
+            LOG(ERROR, "plug qdisc is not installed on %s", remus_nic->ifb);
+            goto out;
+        }
+        remus_nic->qdisc = qdisc;
+        rc = 0;
+    } else {
+        LOG(ERROR, "Cannot get qdisc handle from ifb %s", remus_nic->ifb);
+    }
+
+out:
+    if (ifb)
+        rtnl_link_put(ifb);
+
+    return rc;
+}
+
+/* callbacks */
+
+/*
+ * In return, the script writes the name of IFB device (during setup) to be
+ * used for output buffering into XENBUS_PATH/ifb
+ */
+static void netbuf_setup_script_cb(libxl__egc *egc,
+                                   libxl__async_exec_state *aes,
+                                   int status)
+{
+    libxl__remus_device *dev = CONTAINER_OF(aes, *dev, aes);
+    libxl__remus_device_nic *remus_nic = dev->data;
+    const char *out_path_base, *hotplug_error = NULL;
+    int rc;
+
+    STATE_AO_GC(dev->rds->ao);
+
+    /* Convenience aliases */
+    libxl__remus_netbuf_state *netbuf_state = CTX->rns;
+    const uint32_t domid = netbuf_state->domid;
+    const int devid = remus_nic->devid;
+    const char *const vif = remus_nic->vif;
+    const char **const ifb = &remus_nic->ifb;
+
+    /*
+     * we need to get ifb first because it's needed for teardown
+     */
+    rc = libxl__xs_read_checked(gc, XBT_NULL,
+                                GCSPRINTF("%s/remus/netbuf/%d/ifb",
+                                          libxl__xs_libxl_path(gc, domid),
+                                          devid),
+                                ifb);
+    if (rc)
+        goto out;
+
+    if (!(*ifb)) {
+        LOG(ERROR, "Cannot get ifb dev name for domain %u dev %s",
+            domid, vif);
+        rc = ERROR_FAIL;
+        goto out;
+    }
+
+    out_path_base = GCSPRINTF("%s/remus/netbuf/%d",
+                              libxl__xs_libxl_path(gc, domid), devid);
+
+    rc = libxl__xs_read_checked(gc, XBT_NULL,
+                                GCSPRINTF("%s/hotplug-error", out_path_base),
+                                &hotplug_error);
+    if (rc)
+        goto out;
+
+    if (hotplug_error) {
+        LOG(ERROR, "netbuf script %s setup failed for vif %s: %s",
+            netbuf_state->netbufscript, vif, hotplug_error);
+        rc = ERROR_FAIL;
+        goto out;
+    }
+
+    if (status) {
+        rc = ERROR_FAIL;
+        goto out;
+    }
+
+    LOG(DEBUG, "%s will buffer packets from vif %s", *ifb, vif);
+    rc = init_qdisc(netbuf_state, remus_nic);
+
+out:
+    dev->callback(egc, dev, rc);
+}
+
+static void netbuf_teardown_script_cb(libxl__egc *egc,
+                                      libxl__async_exec_state *aes,
+                                      int status)
+{
+    int rc;
+    libxl__remus_device *dev = CONTAINER_OF(aes, *dev, aes);
+    libxl__remus_device_nic *remus_nic = dev->data;
+
+    if (status)
+        rc = ERROR_FAIL;
+    else
+        rc = 0;
+
+    free_qdisc(remus_nic);
+
+    dev->callback(egc, dev, rc);
+}
+
+/* setup and teardown */
+
+/*
+ * the script needs the following env & args
+ * $vifname
+ * $XENBUS_PATH (/libxl/<domid>/remus/netbuf/<devid>/)
+ * $IFB (for teardown)
+ * setup/teardown as command line arg.
+ */
+static void setup_async_exec(libxl__async_exec_state *aes,
+                             char *op, libxl__remus_device *dev)
+{
+    int arraysize, nr = 0;
+    char **env = NULL, **args = NULL;
+    libxl__remus_device_nic *remus_nic = dev->data;
+    STATE_AO_GC(dev->rds->ao);
+
+    /* Convenience aliases */
+    libxl__remus_netbuf_state *ns = CTX->rns;
+    char *const script = libxl__strdup(gc, ns->netbufscript);
+    const uint32_t domid = ns->domid;
+    const int dev_id = remus_nic->devid;
+    const char *const vif = remus_nic->vif;
+    const char *const ifb = remus_nic->ifb;
+
+    arraysize = 7;
+    GCNEW_ARRAY(env, arraysize);
+    env[nr++] = "vifname";
+    env[nr++] = libxl__strdup(gc, vif);
+    env[nr++] = "XENBUS_PATH";
+    env[nr++] = GCSPRINTF("%s/remus/netbuf/%d",
+                          libxl__xs_libxl_path(gc, domid), dev_id);
+    if (!strcmp(op, "teardown") && ifb) {
+        env[nr++] = "IFB";
+        env[nr++] = libxl__strdup(gc, ifb);
+    }
+    env[nr++] = NULL;
+    assert(nr <= arraysize);
+
+    arraysize = 3; nr = 0;
+    GCNEW_ARRAY(args, arraysize);
+    args[nr++] = script;
+    args[nr++] = op;
+    args[nr++] = NULL;
+    assert(nr == arraysize);
+
+    aes->ao = dev->rds->ao;
+    aes->what = GCSPRINTF("%s %s", args[0], args[1]);
+    aes->env = env;
+    aes->args = args;
+    aes->timeout_ms = LIBXL_HOTPLUG_TIMEOUT * 1000;
+    aes->stdfds[0] = -1;
+    aes->stdfds[1] = -1;
+    aes->stdfds[2] = -1;
+
+    if (!strcmp(op, "teardown"))
+        aes->callback = netbuf_teardown_script_cb;
+    else
+        aes->callback = netbuf_setup_script_cb;
+}
+
+static void nic_setup(libxl__remus_device *dev)
+{
+    int rc;
+    libxl__remus_device_nic *remus_nic;
+    const libxl_device_nic *nic = dev->backend_dev;
+
+    STATE_AO_GC(dev->rds->ao);
+
+    GCNEW(remus_nic);
+    dev->data = remus_nic;
+    remus_nic->devid = nic->devid;
+    remus_nic->vif = get_vifname(dev, nic);
+    if (!remus_nic->vif) {
+        rc = ERROR_FAIL;
+        goto out;
+    }
+
+    setup_async_exec(&dev->aes, "setup", dev);
+    rc = libxl__async_exec_start(gc, &dev->aes);
+    if (rc)
+        goto out;
+
+    return;
+
+out:
+    dev->callback(dev->rds->egc, dev, rc);
+}
+
+static void nic_teardown(libxl__remus_device *dev)
+{
+    int rc;
+    STATE_AO_GC(dev->rds->ao);
+
+    setup_async_exec(&dev->aes, "teardown", dev);
+
+    rc = libxl__async_exec_start(gc, &dev->aes);
+    if (rc)
+        goto out;
+
+    return;
+
+out:
+    dev->callback(dev->rds->egc, dev, rc);
+}
+
+const libxl__remus_device_ops remus_device_nic = {
+    .kind = LIBXL__REMUS_DEVICE_NIC,
+    .init = nic_init,
+    .destroy = nic_destroy,
+    .postsuspend = nic_postsuspend,
+    .commit = nic_commit,
+    .setup = nic_setup,
+    .teardown = nic_teardown,
+};
+
 /*
  * Local variables:
  * mode: C
diff --git a/tools/libxl/libxl_nonetbuffer.c b/tools/libxl/libxl_nonetbuffer.c
index 1c72a7f..5390274 100644
--- a/tools/libxl/libxl_nonetbuffer.c
+++ b/tools/libxl/libxl_nonetbuffer.c
@@ -22,6 +22,33 @@ int libxl__netbuffer_enabled(libxl__gc *gc)
     return 0;
 }
 
+static void nic_match(const libxl__remus_device_ops *self,
+                      libxl__remus_device *dev)
+{
+    STATE_AO_GC(dev->rds->ao);
+
+    dev->callback(dev->rds->egc, dev, ERROR_FAIL);
+}
+
+static int nic_init(const libxl__remus_device_ops *self,
+                    libxl__remus_state *rs)
+{
+    return 0;
+}
+
+static void nic_destroy(const libxl__remus_device_ops *self,
+                        libxl__remus_state *rs)
+{
+    return;
+}
+
+const libxl__remus_device_ops remus_device_nic = {
+    .kind = LIBXL__REMUS_DEVICE_NIC,
+    .init = nic_init,
+    .destroy = nic_destroy,
+    .match = nic_match,
+};
+
 /*
  * Local variables:
  * mode: C
diff --git a/tools/libxl/libxl_remus_device.c b/tools/libxl/libxl_remus_device.c
index cd71242..5ef11b9 100644
--- a/tools/libxl/libxl_remus_device.c
+++ b/tools/libxl/libxl_remus_device.c
@@ -17,7 +17,9 @@
 
 #include "libxl_internal.h"
 
+extern const libxl__remus_device_ops remus_device_nic;
 static const libxl__remus_device_ops *dev_ops[] = {
+    &remus_device_nic,
 };
 
 /*----- checkpointing APIs -----*/
@@ -227,6 +229,7 @@ static void device_teardown_cb(libxl__egc *egc,
                                libxl__remus_device *dev,
                                int rc)
 {
+    int i;
     libxl__remus_device_state *const rds = dev->rds;
     libxl__remus_state *rs = CONTAINER_OF(rds, *rs, dev_state);
 
@@ -236,6 +239,13 @@ static void device_teardown_cb(libxl__egc *egc,
     rds->num_set_up--;
 
     if (rds->num_set_up == 0) {
+        /* clean nic */
+        for (i = 0; i < rds->num_nics; i++)
+            libxl_device_nic_dispose(&rds->nics[i]);
+        free(rds->nics);
+        rds->nics = NULL;
+        rds->num_nics = 0;
+
         destroy_device_ops(rs);
         rs->callback(egc, rs, rs->saved_rc);
     }
@@ -243,7 +253,7 @@ static void device_teardown_cb(libxl__egc *egc,
 
 /* remus device setup and teardown */
 
-static __attribute__((unused)) void libxl__remus_device_init(libxl__egc *egc,
+static void libxl__remus_device_init(libxl__egc *egc,
                                      libxl__remus_device_state *rds,
                                      libxl__remus_device_kind kind,
                                      void *libxl_dev)
@@ -314,6 +324,7 @@ static void destroy_device_ops(libxl__remus_state *rs)
 
 void libxl__remus_device_setup(libxl__egc *egc, libxl__remus_state *rs)
 {
+    int i;
     STATE_AO_GC(rs->ao);
 
     /* Convenience aliases */
@@ -332,7 +343,9 @@ void libxl__remus_device_setup(libxl__egc *egc, 
libxl__remus_state *rs)
     rds->num_nics = 0;
     rds->num_disks = 0;
 
-    /* TBD: Remus setup - i.e. attach qdisc, enable disk buffering, etc */
+    /* TBD: Remus setup - i.e. enable disk buffering, etc */
+    if (rs->netbufscript)
+        rds->nics = libxl_device_nic_list(CTX, rs->domid, &rds->num_nics);
 
     if (rds->num_nics == 0 && rds->num_disks == 0)
         goto out;
@@ -340,6 +353,10 @@ void libxl__remus_device_setup(libxl__egc *egc, 
libxl__remus_state *rs)
     GCNEW_ARRAY(rds->dev, rds->num_nics + rds->num_disks);
 
     /* TBD: CALL libxl__remus_device_init to init remus devices */
+    for (i = 0; i < rds->num_nics; i++) {
+        libxl__remus_device_init(egc, rds,
+                                 LIBXL__REMUS_DEVICE_NIC, &rds->nics[i]);
+    }
 
     return;
 
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index e56567f..e85a636 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -575,6 +575,8 @@ libxl_domain_remus_info = Struct("domain_remus_info",[
     ("interval",     integer),
     ("blackhole",    bool),
     ("compression",  bool),
+    ("netbuf",       bool),
+    ("netbufscript", string),
     ])
 
 libxl_event_type = Enumeration("event_type", [
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.