[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 06/13 V6] remus: implement the API to setup network buffering
From: Shriram Rajagopalan <rshriram@xxxxxxxxx> The following steps are taken during setup: a) call the hotplug script for each vif to setup its network buffer b) establish a dedicated remus context containing libnl related state (netlink sockets, qdisc caches, etc.,) c) Obtain handles to plug qdiscs installed on the IFB devices chosen by the hotplug scripts. Signed-off-by: Shriram Rajagopalan <rshriram@xxxxxxxxx> Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxx> Reviewed-by: Wen Congyang <wency@xxxxxxxxxxxxxx> --- docs/misc/xenstore-paths.markdown | 4 + tools/libxl/Makefile | 2 + tools/libxl/libxl_dom.c | 7 +- tools/libxl/libxl_internal.h | 11 + tools/libxl/libxl_netbuffer.c | 419 ++++++++++++++++++++++++++++++++++++++ tools/libxl/libxl_nonetbuffer.c | 6 + tools/libxl/libxl_remus.c | 35 ++++ 7 files changed, 479 insertions(+), 5 deletions(-) create mode 100644 tools/libxl/libxl_remus.c diff --git a/docs/misc/xenstore-paths.markdown b/docs/misc/xenstore-paths.markdown index 70ab7f4..7a0d2c9 100644 --- a/docs/misc/xenstore-paths.markdown +++ b/docs/misc/xenstore-paths.markdown @@ -385,6 +385,10 @@ The guest's virtual time offset from UTC in seconds. The device model version for a domain. +#### /libxl/$DOMID/remus/netbuf/$DEVID/ifb = STRING [n,INTERNAL] + +IFB device used by Remus to buffer network output from the associated vif. + [BLKIF]: http://xenbits.xen.org/docs/unstable/hypercall/include,public,io,blkif.h.html [FBIF]: http://xenbits.xen.org/docs/unstable/hypercall/include,public,io,fbif.h.html [HVMPARAMS]: http://xenbits.xen.org/docs/unstable/hypercall/include,public,hvm,params.h.html diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile index 84a467c..218f55e 100644 --- a/tools/libxl/Makefile +++ b/tools/libxl/Makefile @@ -52,6 +52,8 @@ else LIBXL_OBJS-y += libxl_nonetbuffer.o endif +LIBXL_OBJS-y += libxl_remus.o + LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c index 8d63f90..e3e9f6f 100644 --- a/tools/libxl/libxl_dom.c +++ b/tools/libxl/libxl_dom.c @@ -753,9 +753,6 @@ int libxl__toolstack_restore(uint32_t domid, const uint8_t *buf, /*==================== Domain suspend (save) ====================*/ -static void domain_suspend_done(libxl__egc *egc, - libxl__domain_suspend_state *dss, int rc); - /*----- complicated callback, called by xc_domain_save -----*/ /* @@ -1508,8 +1505,8 @@ static void save_device_model_datacopier_done(libxl__egc *egc, dss->save_dm_callback(egc, dss, our_rc); } -static void domain_suspend_done(libxl__egc *egc, - libxl__domain_suspend_state *dss, int rc) +void domain_suspend_done(libxl__egc *egc, + libxl__domain_suspend_state *dss, int rc) { STATE_AO_GC(dss->ao); diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index 2f64382..0430307 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -2313,6 +2313,17 @@ typedef struct libxl__remus_state { _hidden int libxl__netbuffer_enabled(libxl__gc *gc); +_hidden void domain_suspend_done(libxl__egc *egc, + libxl__domain_suspend_state *dss, + int rc); + +_hidden void libxl__remus_setup_done(libxl__egc *egc, + libxl__domain_suspend_state *dss, + int rc); + +_hidden void libxl__remus_netbuf_setup(libxl__egc *egc, + libxl__domain_suspend_state *dss); + struct libxl__domain_suspend_state { /* set by caller of libxl__domain_suspend */ libxl__ao *ao; diff --git a/tools/libxl/libxl_netbuffer.c b/tools/libxl/libxl_netbuffer.c index 8e23d75..0be876c 100644 --- a/tools/libxl/libxl_netbuffer.c +++ b/tools/libxl/libxl_netbuffer.c @@ -17,11 +17,430 @@ #include "libxl_internal.h" +#include <netlink/cache.h> +#include <netlink/socket.h> +#include <netlink/attr.h> +#include <netlink/route/link.h> +#include <netlink/route/route.h> +#include <netlink/route/qdisc.h> +#include <netlink/route/qdisc/plug.h> + +typedef struct libxl__remus_netbuf_state { + struct rtnl_qdisc **netbuf_qdisc_list; + struct nl_sock *nlsock; + struct nl_cache *qdisc_cache; + const char **vif_list; + const char **ifb_list; + uint32_t num_netbufs; + uint32_t unused; +} libxl__remus_netbuf_state; + int libxl__netbuffer_enabled(libxl__gc *gc) { return 1; } +/* If the device has a vifname, then use that instead of + * the vifX.Y format. + */ +static const char *get_vifname(libxl__gc *gc, uint32_t domid, + libxl_device_nic *nic) +{ + const char *vifname = NULL; + const char *path; + int rc; + + path = libxl__sprintf(gc, "%s/backend/vif/%d/%d/vifname", + libxl__xs_get_dompath(gc, 0), domid, nic->devid); + rc = libxl__xs_read_checked(gc, XBT_NULL, path, &vifname); + if (rc < 0) { + /* use the default name */ + vifname = libxl__device_nic_devname(gc, domid, + nic->devid, + nic->nictype); + } + + return vifname; +} + +static const char **get_guest_vif_list(libxl__gc *gc, uint32_t domid, + int *num_vifs) +{ + libxl_device_nic *nics = NULL; + int nb, i = 0; + const char **vif_list = NULL; + + *num_vifs = 0; + nics = libxl_device_nic_list(CTX, domid, &nb); + if (!nics) + return NULL; + + /* Ensure that none of the vifs are backed by driver domains */ + for (i = 0; i < nb; i++) { + if (nics[i].backend_domid != LIBXL_TOOLSTACK_DOMID) { + LOG(ERROR, "vif %s has driver domain (%u) as its backend. " + "Network buffering is not supported with driver domains", + get_vifname(gc, domid, &nics[i]), nics[i].backend_domid); + *num_vifs = -1; + goto out; + } + } + + GCNEW_ARRAY(vif_list, nb); + for (i = 0; i < nb; ++i) { + vif_list[i] = get_vifname(gc, domid, &nics[i]); + if (!vif_list[i]) { + vif_list = NULL; + goto out; + } + } + *num_vifs = nb; + + out: + for (i = 0; i < nb; i++) + libxl_device_nic_dispose(&nics[i]); + free(nics); + return vif_list; +} + +static void free_qdiscs(libxl__remus_netbuf_state *netbuf_state) +{ + int i; + struct rtnl_qdisc *qdisc = NULL; + + /* free qdiscs */ + for (i = 0; i < netbuf_state->num_netbufs; i++) { + qdisc = netbuf_state->netbuf_qdisc_list[i]; + if (!qdisc) + break; + + nl_object_put((struct nl_object *)qdisc); + } + + /* free qdisc cache */ + nl_cache_clear(netbuf_state->qdisc_cache); + nl_cache_free(netbuf_state->qdisc_cache); + + /* close nlsock */ + nl_close(netbuf_state->nlsock); + + /* free nlsock */ + nl_socket_free(netbuf_state->nlsock); +} + +static int init_qdiscs(libxl__gc *gc, + libxl__remus_state *remus_state) +{ + int i, ret, ifindex; + struct rtnl_link *ifb = NULL; + struct rtnl_qdisc *qdisc = NULL; + + /* Convenience aliases */ + libxl__remus_netbuf_state * const netbuf_state = remus_state->netbuf_state; + const int num_netbufs = netbuf_state->num_netbufs; + const char ** const ifb_list = netbuf_state->ifb_list; + + /* Now that we have brought up IFB devices with plug qdisc for + * each vif, lets get a netlink handle on the plug qdisc for use + * during checkpointing. + */ + netbuf_state->nlsock = nl_socket_alloc(); + if (!netbuf_state->nlsock) { + LOG(ERROR, "cannot allocate nl socket"); + goto out; + } + + ret = nl_connect(netbuf_state->nlsock, NETLINK_ROUTE); + if (ret) { + LOG(ERROR, "failed to open netlink socket: %s", + nl_geterror(ret)); + goto out; + } + + /* get list of all qdiscs installed on network devs. */ + ret = rtnl_qdisc_alloc_cache(netbuf_state->nlsock, + &netbuf_state->qdisc_cache); + if (ret) { + LOG(ERROR, "failed to allocate qdisc cache: %s", + nl_geterror(ret)); + goto out; + } + + /* list of handles to plug qdiscs */ + GCNEW_ARRAY(netbuf_state->netbuf_qdisc_list, num_netbufs); + + for (i = 0; i < num_netbufs; ++i) { + + /* get a handle to the IFB interface */ + ifb = NULL; + ret = rtnl_link_get_kernel(netbuf_state->nlsock, 0, + ifb_list[i], &ifb); + if (ret) { + LOG(ERROR, "cannot obtain handle for %s: %s", ifb_list[i], + nl_geterror(ret)); + goto out; + } + + ifindex = rtnl_link_get_ifindex(ifb); + if (!ifindex) { + LOG(ERROR, "interface %s has no index", ifb_list[i]); + goto out; + } + + /* Get a reference to the root qdisc installed on the IFB, by + * querying the qdisc list we obtained earlier. The netbufscript + * sets up the plug qdisc as the root qdisc, so we don't have to + * search the entire qdisc tree on the IFB dev. + + * There is no need to explicitly free this qdisc as its just a + * reference from the qdisc cache we allocated earlier. + */ + qdisc = rtnl_qdisc_get_by_parent(netbuf_state->qdisc_cache, ifindex, + TC_H_ROOT); + + if (qdisc) { + const char *tc_kind = rtnl_tc_get_kind(TC_CAST(qdisc)); + /* Sanity check: Ensure that the root qdisc is a plug qdisc. */ + if (!tc_kind || strcmp(tc_kind, "plug")) { + nl_object_put((struct nl_object *)qdisc); + LOG(ERROR, "plug qdisc is not installed on %s", ifb_list[i]); + goto out; + } + netbuf_state->netbuf_qdisc_list[i] = qdisc; + } else { + LOG(ERROR, "Cannot get qdisc handle from ifb %s", ifb_list[i]); + goto out; + } + rtnl_link_put(ifb); + } + + return 0; + + out: + if (ifb) + rtnl_link_put(ifb); + free_qdiscs(netbuf_state); + return ERROR_FAIL; +} + +static void netbuf_setup_timeout_cb(libxl__egc *egc, + libxl__ev_time *ev, + const struct timeval *requested_abs) +{ + libxl__remus_state *remus_state = CONTAINER_OF(ev, *remus_state, timeout); + + /* Convenience aliases */ + const int devid = remus_state->dev_id; + libxl__remus_netbuf_state *const netbuf_state = remus_state->netbuf_state; + const char *const vif = netbuf_state->vif_list[devid]; + + STATE_AO_GC(remus_state->dss->ao); + + libxl__ev_time_deregister(gc, &remus_state->timeout); + assert(libxl__ev_child_inuse(&remus_state->child)); + + LOG(DEBUG, "killing hotplug script %s (on vif %s) because of timeout", + remus_state->netbufscript, vif); + + if (kill(remus_state->child.pid, SIGKILL)) { + LOGEV(ERROR, errno, "unable to kill hotplug script %s [%ld]", + remus_state->netbufscript, + (unsigned long)remus_state->child.pid); + } + + return; +} + +/* the script needs the following env & args + * $vifname + * $XENBUS_PATH (/libxl/<domid>/remus/netbuf/<devid>/) + * $IFB (for teardown) + * setup/teardown as command line arg. + * In return, the script writes the name of IFB device (during setup) to be + * used for output buffering into XENBUS_PATH/ifb + */ +static int exec_netbuf_script(libxl__gc *gc, libxl__remus_state *remus_state, + char *op, libxl__ev_child_callback *death) +{ + int arraysize = 7, nr = 0; + char **env = NULL, **args = NULL; + pid_t pid; + + /* Convenience aliases */ + libxl__ev_child *const child = &remus_state->child; + libxl__ev_time *const timeout = &remus_state->timeout; + char *const script = libxl__strdup(gc, remus_state->netbufscript); + const uint32_t domid = remus_state->dss->domid; + const int devid = remus_state->dev_id; + libxl__remus_netbuf_state *const netbuf_state = remus_state->netbuf_state; + const char *const vif = netbuf_state->vif_list[devid]; + const char *const ifb = netbuf_state->ifb_list[devid]; + + GCNEW_ARRAY(env, arraysize); + env[nr++] = "vifname"; + env[nr++] = libxl__strdup(gc, vif); + env[nr++] = "XENBUS_PATH"; + env[nr++] = GCSPRINTF("%s/remus/netbuf/%d", + libxl__xs_libxl_path(gc, domid), devid); + if (!strcmp(op, "teardown")) { + env[nr++] = "IFB"; + env[nr++] = libxl__strdup(gc, ifb); + } + env[nr++] = NULL; + assert(nr <= arraysize); + + arraysize = 3; nr = 0; + GCNEW_ARRAY(args, arraysize); + args[nr++] = script; + args[nr++] = op; + args[nr++] = NULL; + assert(nr == arraysize); + + /* Set hotplug timeout */ + if (libxl__ev_time_register_rel(gc, timeout, + netbuf_setup_timeout_cb, + LIBXL_HOTPLUG_TIMEOUT * 1000)) { + LOG(ERROR, "unable to register timeout for " + "netbuf setup script %s on vif %s", script, vif); + return ERROR_FAIL; + } + + LOG(DEBUG, "Calling netbuf script: %s %s on vif %s", + script, op, vif); + + /* Fork and exec netbuf script */ + pid = libxl__ev_child_fork(gc, child, death); + if (pid == -1) { + LOG(ERROR, "unable to fork netbuf script %s", script); + return ERROR_FAIL; + } + + if (!pid) { + /* child: Launch netbuf script */ + libxl__exec(gc, -1, -1, -1, args[0], args, env); + /* notreached */ + abort(); + } + + return 0; +} + +static void netbuf_setup_script_cb(libxl__egc *egc, + libxl__ev_child *child, + pid_t pid, int status) +{ + libxl__remus_state *remus_state = CONTAINER_OF(child, *remus_state, child); + const char *out_path_base, *hotplug_error = NULL; + int rc = ERROR_FAIL; + + /* Convenience aliases */ + const uint32_t domid = remus_state->dss->domid; + const int devid = remus_state->dev_id; + libxl__remus_netbuf_state *const netbuf_state = remus_state->netbuf_state; + const char *const vif = netbuf_state->vif_list[devid]; + const char **const ifb = &netbuf_state->ifb_list[devid]; + + STATE_AO_GC(remus_state->dss->ao); + + libxl__ev_time_deregister(gc, &remus_state->timeout); + + out_path_base = GCSPRINTF("%s/remus/netbuf/%d", + libxl__xs_libxl_path(gc, domid), devid); + + rc = libxl__xs_read_checked(gc, XBT_NULL, + GCSPRINTF("%s/hotplug-error", out_path_base), + &hotplug_error); + if (rc) + goto out; + + if (hotplug_error) { + LOG(ERROR, "netbuf script %s setup failed for vif %s: %s", + remus_state->netbufscript, + netbuf_state->vif_list[devid], hotplug_error); + rc = ERROR_FAIL; + goto out; + } + + if (status) { + libxl_report_child_exitstatus(CTX, LIBXL__LOG_ERROR, + remus_state->netbufscript, + pid, status); + rc = ERROR_FAIL; + goto out; + } + + rc = libxl__xs_read_checked(gc, XBT_NULL, + GCSPRINTF("%s/remus/netbuf/%d/ifb", + libxl__xs_libxl_path(gc, domid), + devid), + ifb); + if (rc) + goto out; + + if (!(*ifb)) { + LOG(ERROR, "Cannot get ifb dev name for domain %u dev %s", + domid, vif); + rc = ERROR_FAIL; + goto out; + } + + LOG(DEBUG, "%s will buffer packets from vif %s", *ifb, vif); + remus_state->dev_id++; + if (remus_state->dev_id < netbuf_state->num_netbufs) { + rc = exec_netbuf_script(gc, remus_state, + "setup", netbuf_setup_script_cb); + if (rc) + goto out; + + return; + } + + rc = init_qdiscs(gc, remus_state); + out: + libxl__remus_setup_done(egc, remus_state->dss, rc); +} + +/* Scan through the list of vifs belonging to domid and + * invoke the netbufscript to setup the IFB device & plug qdisc + * for each vif. Then scan through the list of IFB devices to obtain + * a handle on the plug qdisc installed on these IFB devices. + * Network output buffering is controlled via these qdiscs. + */ +void libxl__remus_netbuf_setup(libxl__egc *egc, + libxl__domain_suspend_state *dss) +{ + libxl__remus_netbuf_state *netbuf_state = NULL; + int num_netbufs = 0; + int rc = ERROR_FAIL; + + /* Convenience aliases */ + const uint32_t domid = dss->domid; + libxl__remus_state *const remus_state = dss->remus_state; + + STATE_AO_GC(dss->ao); + + GCNEW(netbuf_state); + netbuf_state->vif_list = get_guest_vif_list(gc, domid, &num_netbufs); + if (!num_netbufs) { + rc = 0; + goto out; + } + + if (num_netbufs < 0) goto out; + + GCNEW_ARRAY(netbuf_state->ifb_list, num_netbufs); + netbuf_state->num_netbufs = num_netbufs; + remus_state->netbuf_state = netbuf_state; + remus_state->dev_id = 0; + if (exec_netbuf_script(gc, remus_state, "setup", + netbuf_setup_script_cb)) + goto out; + return; + + out: + libxl__remus_setup_done(egc, dss, rc); +} + /* * Local variables: * mode: C diff --git a/tools/libxl/libxl_nonetbuffer.c b/tools/libxl/libxl_nonetbuffer.c index 6aa4bf1..acfa534 100644 --- a/tools/libxl/libxl_nonetbuffer.c +++ b/tools/libxl/libxl_nonetbuffer.c @@ -22,6 +22,12 @@ int libxl__netbuffer_enabled(libxl__gc *gc) return 0; } +/* Remus network buffer related stubs */ +void libxl__remus_netbuf_setup(libxl__egc *egc, + libxl__domain_suspend_state *dss) +{ +} + /* * Local variables: * mode: C diff --git a/tools/libxl/libxl_remus.c b/tools/libxl/libxl_remus.c new file mode 100644 index 0000000..b3342b3 --- /dev/null +++ b/tools/libxl/libxl_remus.c @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2014 + * Author Shriram Rajagopalan <rshriram@xxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#include "libxl_osdeps.h" /* must come before any other headers */ + +#include "libxl_internal.h" + +/*----- remus setup/teardown code -----*/ + +void libxl__remus_setup_done(libxl__egc *egc, + libxl__domain_suspend_state *dss, + int rc) +{ + STATE_AO_GC(dss->ao); + if (!rc) { + libxl__domain_suspend(egc, dss); + return; + } + + LOG(ERROR, "Remus: failed to setup network buffering" + " for guest with domid %u", dss->domid); + domain_suspend_done(egc, dss, rc); +} -- 1.8.4.2 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |