|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 06/13 V6] remus: implement the API to setup network buffering
From: Shriram Rajagopalan <rshriram@xxxxxxxxx>
The following steps are taken during setup:
a) call the hotplug script for each vif to setup its network buffer
b) establish a dedicated remus context containing libnl related
state (netlink sockets, qdisc caches, etc.,)
c) Obtain handles to plug qdiscs installed on the IFB devices
chosen by the hotplug scripts.
Signed-off-by: Shriram Rajagopalan <rshriram@xxxxxxxxx>
Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxx>
Reviewed-by: Wen Congyang <wency@xxxxxxxxxxxxxx>
---
docs/misc/xenstore-paths.markdown | 4 +
tools/libxl/Makefile | 2 +
tools/libxl/libxl_dom.c | 7 +-
tools/libxl/libxl_internal.h | 11 +
tools/libxl/libxl_netbuffer.c | 419 ++++++++++++++++++++++++++++++++++++++
tools/libxl/libxl_nonetbuffer.c | 6 +
tools/libxl/libxl_remus.c | 35 ++++
7 files changed, 479 insertions(+), 5 deletions(-)
create mode 100644 tools/libxl/libxl_remus.c
diff --git a/docs/misc/xenstore-paths.markdown
b/docs/misc/xenstore-paths.markdown
index 70ab7f4..7a0d2c9 100644
--- a/docs/misc/xenstore-paths.markdown
+++ b/docs/misc/xenstore-paths.markdown
@@ -385,6 +385,10 @@ The guest's virtual time offset from UTC in seconds.
The device model version for a domain.
+#### /libxl/$DOMID/remus/netbuf/$DEVID/ifb = STRING [n,INTERNAL]
+
+IFB device used by Remus to buffer network output from the associated vif.
+
[BLKIF]:
http://xenbits.xen.org/docs/unstable/hypercall/include,public,io,blkif.h.html
[FBIF]:
http://xenbits.xen.org/docs/unstable/hypercall/include,public,io,fbif.h.html
[HVMPARAMS]:
http://xenbits.xen.org/docs/unstable/hypercall/include,public,hvm,params.h.html
diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 84a467c..218f55e 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -52,6 +52,8 @@ else
LIBXL_OBJS-y += libxl_nonetbuffer.o
endif
+LIBXL_OBJS-y += libxl_remus.o
+
LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o
LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index 8d63f90..e3e9f6f 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -753,9 +753,6 @@ int libxl__toolstack_restore(uint32_t domid, const uint8_t
*buf,
/*==================== Domain suspend (save) ====================*/
-static void domain_suspend_done(libxl__egc *egc,
- libxl__domain_suspend_state *dss, int rc);
-
/*----- complicated callback, called by xc_domain_save -----*/
/*
@@ -1508,8 +1505,8 @@ static void save_device_model_datacopier_done(libxl__egc
*egc,
dss->save_dm_callback(egc, dss, our_rc);
}
-static void domain_suspend_done(libxl__egc *egc,
- libxl__domain_suspend_state *dss, int rc)
+void domain_suspend_done(libxl__egc *egc,
+ libxl__domain_suspend_state *dss, int rc)
{
STATE_AO_GC(dss->ao);
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 2f64382..0430307 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2313,6 +2313,17 @@ typedef struct libxl__remus_state {
_hidden int libxl__netbuffer_enabled(libxl__gc *gc);
+_hidden void domain_suspend_done(libxl__egc *egc,
+ libxl__domain_suspend_state *dss,
+ int rc);
+
+_hidden void libxl__remus_setup_done(libxl__egc *egc,
+ libxl__domain_suspend_state *dss,
+ int rc);
+
+_hidden void libxl__remus_netbuf_setup(libxl__egc *egc,
+ libxl__domain_suspend_state *dss);
+
struct libxl__domain_suspend_state {
/* set by caller of libxl__domain_suspend */
libxl__ao *ao;
diff --git a/tools/libxl/libxl_netbuffer.c b/tools/libxl/libxl_netbuffer.c
index 8e23d75..0be876c 100644
--- a/tools/libxl/libxl_netbuffer.c
+++ b/tools/libxl/libxl_netbuffer.c
@@ -17,11 +17,430 @@
#include "libxl_internal.h"
+#include <netlink/cache.h>
+#include <netlink/socket.h>
+#include <netlink/attr.h>
+#include <netlink/route/link.h>
+#include <netlink/route/route.h>
+#include <netlink/route/qdisc.h>
+#include <netlink/route/qdisc/plug.h>
+
+typedef struct libxl__remus_netbuf_state {
+ struct rtnl_qdisc **netbuf_qdisc_list;
+ struct nl_sock *nlsock;
+ struct nl_cache *qdisc_cache;
+ const char **vif_list;
+ const char **ifb_list;
+ uint32_t num_netbufs;
+ uint32_t unused;
+} libxl__remus_netbuf_state;
+
int libxl__netbuffer_enabled(libxl__gc *gc)
{
return 1;
}
+/* If the device has a vifname, then use that instead of
+ * the vifX.Y format.
+ */
+static const char *get_vifname(libxl__gc *gc, uint32_t domid,
+ libxl_device_nic *nic)
+{
+ const char *vifname = NULL;
+ const char *path;
+ int rc;
+
+ path = libxl__sprintf(gc, "%s/backend/vif/%d/%d/vifname",
+ libxl__xs_get_dompath(gc, 0), domid, nic->devid);
+ rc = libxl__xs_read_checked(gc, XBT_NULL, path, &vifname);
+ if (rc < 0) {
+ /* use the default name */
+ vifname = libxl__device_nic_devname(gc, domid,
+ nic->devid,
+ nic->nictype);
+ }
+
+ return vifname;
+}
+
+static const char **get_guest_vif_list(libxl__gc *gc, uint32_t domid,
+ int *num_vifs)
+{
+ libxl_device_nic *nics = NULL;
+ int nb, i = 0;
+ const char **vif_list = NULL;
+
+ *num_vifs = 0;
+ nics = libxl_device_nic_list(CTX, domid, &nb);
+ if (!nics)
+ return NULL;
+
+ /* Ensure that none of the vifs are backed by driver domains */
+ for (i = 0; i < nb; i++) {
+ if (nics[i].backend_domid != LIBXL_TOOLSTACK_DOMID) {
+ LOG(ERROR, "vif %s has driver domain (%u) as its backend. "
+ "Network buffering is not supported with driver domains",
+ get_vifname(gc, domid, &nics[i]), nics[i].backend_domid);
+ *num_vifs = -1;
+ goto out;
+ }
+ }
+
+ GCNEW_ARRAY(vif_list, nb);
+ for (i = 0; i < nb; ++i) {
+ vif_list[i] = get_vifname(gc, domid, &nics[i]);
+ if (!vif_list[i]) {
+ vif_list = NULL;
+ goto out;
+ }
+ }
+ *num_vifs = nb;
+
+ out:
+ for (i = 0; i < nb; i++)
+ libxl_device_nic_dispose(&nics[i]);
+ free(nics);
+ return vif_list;
+}
+
+static void free_qdiscs(libxl__remus_netbuf_state *netbuf_state)
+{
+ int i;
+ struct rtnl_qdisc *qdisc = NULL;
+
+ /* free qdiscs */
+ for (i = 0; i < netbuf_state->num_netbufs; i++) {
+ qdisc = netbuf_state->netbuf_qdisc_list[i];
+ if (!qdisc)
+ break;
+
+ nl_object_put((struct nl_object *)qdisc);
+ }
+
+ /* free qdisc cache */
+ nl_cache_clear(netbuf_state->qdisc_cache);
+ nl_cache_free(netbuf_state->qdisc_cache);
+
+ /* close nlsock */
+ nl_close(netbuf_state->nlsock);
+
+ /* free nlsock */
+ nl_socket_free(netbuf_state->nlsock);
+}
+
+static int init_qdiscs(libxl__gc *gc,
+ libxl__remus_state *remus_state)
+{
+ int i, ret, ifindex;
+ struct rtnl_link *ifb = NULL;
+ struct rtnl_qdisc *qdisc = NULL;
+
+ /* Convenience aliases */
+ libxl__remus_netbuf_state * const netbuf_state = remus_state->netbuf_state;
+ const int num_netbufs = netbuf_state->num_netbufs;
+ const char ** const ifb_list = netbuf_state->ifb_list;
+
+ /* Now that we have brought up IFB devices with plug qdisc for
+ * each vif, lets get a netlink handle on the plug qdisc for use
+ * during checkpointing.
+ */
+ netbuf_state->nlsock = nl_socket_alloc();
+ if (!netbuf_state->nlsock) {
+ LOG(ERROR, "cannot allocate nl socket");
+ goto out;
+ }
+
+ ret = nl_connect(netbuf_state->nlsock, NETLINK_ROUTE);
+ if (ret) {
+ LOG(ERROR, "failed to open netlink socket: %s",
+ nl_geterror(ret));
+ goto out;
+ }
+
+ /* get list of all qdiscs installed on network devs. */
+ ret = rtnl_qdisc_alloc_cache(netbuf_state->nlsock,
+ &netbuf_state->qdisc_cache);
+ if (ret) {
+ LOG(ERROR, "failed to allocate qdisc cache: %s",
+ nl_geterror(ret));
+ goto out;
+ }
+
+ /* list of handles to plug qdiscs */
+ GCNEW_ARRAY(netbuf_state->netbuf_qdisc_list, num_netbufs);
+
+ for (i = 0; i < num_netbufs; ++i) {
+
+ /* get a handle to the IFB interface */
+ ifb = NULL;
+ ret = rtnl_link_get_kernel(netbuf_state->nlsock, 0,
+ ifb_list[i], &ifb);
+ if (ret) {
+ LOG(ERROR, "cannot obtain handle for %s: %s", ifb_list[i],
+ nl_geterror(ret));
+ goto out;
+ }
+
+ ifindex = rtnl_link_get_ifindex(ifb);
+ if (!ifindex) {
+ LOG(ERROR, "interface %s has no index", ifb_list[i]);
+ goto out;
+ }
+
+ /* Get a reference to the root qdisc installed on the IFB, by
+ * querying the qdisc list we obtained earlier. The netbufscript
+ * sets up the plug qdisc as the root qdisc, so we don't have to
+ * search the entire qdisc tree on the IFB dev.
+
+ * There is no need to explicitly free this qdisc as its just a
+ * reference from the qdisc cache we allocated earlier.
+ */
+ qdisc = rtnl_qdisc_get_by_parent(netbuf_state->qdisc_cache, ifindex,
+ TC_H_ROOT);
+
+ if (qdisc) {
+ const char *tc_kind = rtnl_tc_get_kind(TC_CAST(qdisc));
+ /* Sanity check: Ensure that the root qdisc is a plug qdisc. */
+ if (!tc_kind || strcmp(tc_kind, "plug")) {
+ nl_object_put((struct nl_object *)qdisc);
+ LOG(ERROR, "plug qdisc is not installed on %s", ifb_list[i]);
+ goto out;
+ }
+ netbuf_state->netbuf_qdisc_list[i] = qdisc;
+ } else {
+ LOG(ERROR, "Cannot get qdisc handle from ifb %s", ifb_list[i]);
+ goto out;
+ }
+ rtnl_link_put(ifb);
+ }
+
+ return 0;
+
+ out:
+ if (ifb)
+ rtnl_link_put(ifb);
+ free_qdiscs(netbuf_state);
+ return ERROR_FAIL;
+}
+
+static void netbuf_setup_timeout_cb(libxl__egc *egc,
+ libxl__ev_time *ev,
+ const struct timeval *requested_abs)
+{
+ libxl__remus_state *remus_state = CONTAINER_OF(ev, *remus_state, timeout);
+
+ /* Convenience aliases */
+ const int devid = remus_state->dev_id;
+ libxl__remus_netbuf_state *const netbuf_state = remus_state->netbuf_state;
+ const char *const vif = netbuf_state->vif_list[devid];
+
+ STATE_AO_GC(remus_state->dss->ao);
+
+ libxl__ev_time_deregister(gc, &remus_state->timeout);
+ assert(libxl__ev_child_inuse(&remus_state->child));
+
+ LOG(DEBUG, "killing hotplug script %s (on vif %s) because of timeout",
+ remus_state->netbufscript, vif);
+
+ if (kill(remus_state->child.pid, SIGKILL)) {
+ LOGEV(ERROR, errno, "unable to kill hotplug script %s [%ld]",
+ remus_state->netbufscript,
+ (unsigned long)remus_state->child.pid);
+ }
+
+ return;
+}
+
+/* the script needs the following env & args
+ * $vifname
+ * $XENBUS_PATH (/libxl/<domid>/remus/netbuf/<devid>/)
+ * $IFB (for teardown)
+ * setup/teardown as command line arg.
+ * In return, the script writes the name of IFB device (during setup) to be
+ * used for output buffering into XENBUS_PATH/ifb
+ */
+static int exec_netbuf_script(libxl__gc *gc, libxl__remus_state *remus_state,
+ char *op, libxl__ev_child_callback *death)
+{
+ int arraysize = 7, nr = 0;
+ char **env = NULL, **args = NULL;
+ pid_t pid;
+
+ /* Convenience aliases */
+ libxl__ev_child *const child = &remus_state->child;
+ libxl__ev_time *const timeout = &remus_state->timeout;
+ char *const script = libxl__strdup(gc, remus_state->netbufscript);
+ const uint32_t domid = remus_state->dss->domid;
+ const int devid = remus_state->dev_id;
+ libxl__remus_netbuf_state *const netbuf_state = remus_state->netbuf_state;
+ const char *const vif = netbuf_state->vif_list[devid];
+ const char *const ifb = netbuf_state->ifb_list[devid];
+
+ GCNEW_ARRAY(env, arraysize);
+ env[nr++] = "vifname";
+ env[nr++] = libxl__strdup(gc, vif);
+ env[nr++] = "XENBUS_PATH";
+ env[nr++] = GCSPRINTF("%s/remus/netbuf/%d",
+ libxl__xs_libxl_path(gc, domid), devid);
+ if (!strcmp(op, "teardown")) {
+ env[nr++] = "IFB";
+ env[nr++] = libxl__strdup(gc, ifb);
+ }
+ env[nr++] = NULL;
+ assert(nr <= arraysize);
+
+ arraysize = 3; nr = 0;
+ GCNEW_ARRAY(args, arraysize);
+ args[nr++] = script;
+ args[nr++] = op;
+ args[nr++] = NULL;
+ assert(nr == arraysize);
+
+ /* Set hotplug timeout */
+ if (libxl__ev_time_register_rel(gc, timeout,
+ netbuf_setup_timeout_cb,
+ LIBXL_HOTPLUG_TIMEOUT * 1000)) {
+ LOG(ERROR, "unable to register timeout for "
+ "netbuf setup script %s on vif %s", script, vif);
+ return ERROR_FAIL;
+ }
+
+ LOG(DEBUG, "Calling netbuf script: %s %s on vif %s",
+ script, op, vif);
+
+ /* Fork and exec netbuf script */
+ pid = libxl__ev_child_fork(gc, child, death);
+ if (pid == -1) {
+ LOG(ERROR, "unable to fork netbuf script %s", script);
+ return ERROR_FAIL;
+ }
+
+ if (!pid) {
+ /* child: Launch netbuf script */
+ libxl__exec(gc, -1, -1, -1, args[0], args, env);
+ /* notreached */
+ abort();
+ }
+
+ return 0;
+}
+
+static void netbuf_setup_script_cb(libxl__egc *egc,
+ libxl__ev_child *child,
+ pid_t pid, int status)
+{
+ libxl__remus_state *remus_state = CONTAINER_OF(child, *remus_state, child);
+ const char *out_path_base, *hotplug_error = NULL;
+ int rc = ERROR_FAIL;
+
+ /* Convenience aliases */
+ const uint32_t domid = remus_state->dss->domid;
+ const int devid = remus_state->dev_id;
+ libxl__remus_netbuf_state *const netbuf_state = remus_state->netbuf_state;
+ const char *const vif = netbuf_state->vif_list[devid];
+ const char **const ifb = &netbuf_state->ifb_list[devid];
+
+ STATE_AO_GC(remus_state->dss->ao);
+
+ libxl__ev_time_deregister(gc, &remus_state->timeout);
+
+ out_path_base = GCSPRINTF("%s/remus/netbuf/%d",
+ libxl__xs_libxl_path(gc, domid), devid);
+
+ rc = libxl__xs_read_checked(gc, XBT_NULL,
+ GCSPRINTF("%s/hotplug-error", out_path_base),
+ &hotplug_error);
+ if (rc)
+ goto out;
+
+ if (hotplug_error) {
+ LOG(ERROR, "netbuf script %s setup failed for vif %s: %s",
+ remus_state->netbufscript,
+ netbuf_state->vif_list[devid], hotplug_error);
+ rc = ERROR_FAIL;
+ goto out;
+ }
+
+ if (status) {
+ libxl_report_child_exitstatus(CTX, LIBXL__LOG_ERROR,
+ remus_state->netbufscript,
+ pid, status);
+ rc = ERROR_FAIL;
+ goto out;
+ }
+
+ rc = libxl__xs_read_checked(gc, XBT_NULL,
+ GCSPRINTF("%s/remus/netbuf/%d/ifb",
+ libxl__xs_libxl_path(gc, domid),
+ devid),
+ ifb);
+ if (rc)
+ goto out;
+
+ if (!(*ifb)) {
+ LOG(ERROR, "Cannot get ifb dev name for domain %u dev %s",
+ domid, vif);
+ rc = ERROR_FAIL;
+ goto out;
+ }
+
+ LOG(DEBUG, "%s will buffer packets from vif %s", *ifb, vif);
+ remus_state->dev_id++;
+ if (remus_state->dev_id < netbuf_state->num_netbufs) {
+ rc = exec_netbuf_script(gc, remus_state,
+ "setup", netbuf_setup_script_cb);
+ if (rc)
+ goto out;
+
+ return;
+ }
+
+ rc = init_qdiscs(gc, remus_state);
+ out:
+ libxl__remus_setup_done(egc, remus_state->dss, rc);
+}
+
+/* Scan through the list of vifs belonging to domid and
+ * invoke the netbufscript to setup the IFB device & plug qdisc
+ * for each vif. Then scan through the list of IFB devices to obtain
+ * a handle on the plug qdisc installed on these IFB devices.
+ * Network output buffering is controlled via these qdiscs.
+ */
+void libxl__remus_netbuf_setup(libxl__egc *egc,
+ libxl__domain_suspend_state *dss)
+{
+ libxl__remus_netbuf_state *netbuf_state = NULL;
+ int num_netbufs = 0;
+ int rc = ERROR_FAIL;
+
+ /* Convenience aliases */
+ const uint32_t domid = dss->domid;
+ libxl__remus_state *const remus_state = dss->remus_state;
+
+ STATE_AO_GC(dss->ao);
+
+ GCNEW(netbuf_state);
+ netbuf_state->vif_list = get_guest_vif_list(gc, domid, &num_netbufs);
+ if (!num_netbufs) {
+ rc = 0;
+ goto out;
+ }
+
+ if (num_netbufs < 0) goto out;
+
+ GCNEW_ARRAY(netbuf_state->ifb_list, num_netbufs);
+ netbuf_state->num_netbufs = num_netbufs;
+ remus_state->netbuf_state = netbuf_state;
+ remus_state->dev_id = 0;
+ if (exec_netbuf_script(gc, remus_state, "setup",
+ netbuf_setup_script_cb))
+ goto out;
+ return;
+
+ out:
+ libxl__remus_setup_done(egc, dss, rc);
+}
+
/*
* Local variables:
* mode: C
diff --git a/tools/libxl/libxl_nonetbuffer.c b/tools/libxl/libxl_nonetbuffer.c
index 6aa4bf1..acfa534 100644
--- a/tools/libxl/libxl_nonetbuffer.c
+++ b/tools/libxl/libxl_nonetbuffer.c
@@ -22,6 +22,12 @@ int libxl__netbuffer_enabled(libxl__gc *gc)
return 0;
}
+/* Remus network buffer related stubs */
+void libxl__remus_netbuf_setup(libxl__egc *egc,
+ libxl__domain_suspend_state *dss)
+{
+}
+
/*
* Local variables:
* mode: C
diff --git a/tools/libxl/libxl_remus.c b/tools/libxl/libxl_remus.c
new file mode 100644
index 0000000..b3342b3
--- /dev/null
+++ b/tools/libxl/libxl_remus.c
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2014
+ * Author Shriram Rajagopalan <rshriram@xxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+
+/*----- remus setup/teardown code -----*/
+
+void libxl__remus_setup_done(libxl__egc *egc,
+ libxl__domain_suspend_state *dss,
+ int rc)
+{
+ STATE_AO_GC(dss->ao);
+ if (!rc) {
+ libxl__domain_suspend(egc, dss);
+ return;
+ }
+
+ LOG(ERROR, "Remus: failed to setup network buffering"
+ " for guest with domid %u", dss->domid);
+ domain_suspend_done(egc, dss, rc);
+}
--
1.8.4.2
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |