[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 06/13 V6] remus: implement the API to setup network buffering



At 01/27/2014 06:30 AM, Shriram Rajagopalan Wrote:
> On Tue, Jan 21, 2014 at 1:05 AM, Lai Jiangshan <laijs@xxxxxxxxxxxxxx> wrote:
> 
>> From: Shriram Rajagopalan <rshriram@xxxxxxxxx>
>>
>> The following steps are taken during setup:
>>  a) call the hotplug script for each vif to setup its network buffer
>>
>>  b) establish a dedicated remus context containing libnl related
>>     state (netlink sockets, qdisc caches, etc.,)
>>
>>  c) Obtain handles to plug qdiscs installed on the IFB devices
>>     chosen by the hotplug scripts.
>>
>> Signed-off-by: Shriram Rajagopalan <rshriram@xxxxxxxxx>
>> Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxx>
>> Reviewed-by: Wen Congyang <wency@xxxxxxxxxxxxxx>
>> ---
>>  docs/misc/xenstore-paths.markdown |   4 +
>>  tools/libxl/Makefile              |   2 +
>>  tools/libxl/libxl_dom.c           |   7 +-
>>  tools/libxl/libxl_internal.h      |  11 +
>>  tools/libxl/libxl_netbuffer.c     | 419
>> ++++++++++++++++++++++++++++++++++++++
>>  tools/libxl/libxl_nonetbuffer.c   |   6 +
>>  tools/libxl/libxl_remus.c         |  35 ++++
>>  7 files changed, 479 insertions(+), 5 deletions(-)
>>  create mode 100644 tools/libxl/libxl_remus.c
>>
>> diff --git a/docs/misc/xenstore-paths.markdown
>> b/docs/misc/xenstore-paths.markdown
>> index 70ab7f4..7a0d2c9 100644
>> --- a/docs/misc/xenstore-paths.markdown
>> +++ b/docs/misc/xenstore-paths.markdown
>> @@ -385,6 +385,10 @@ The guest's virtual time offset from UTC in seconds.
>>
>>  The device model version for a domain.
>>
>> +#### /libxl/$DOMID/remus/netbuf/$DEVID/ifb = STRING [n,INTERNAL]
>> +
>> +IFB device used by Remus to buffer network output from the associated vif.
>> +
>>  [BLKIF]:
>> http://xenbits.xen.org/docs/unstable/hypercall/include,public,io,blkif.h.html
>>  [FBIF]:
>> http://xenbits.xen.org/docs/unstable/hypercall/include,public,io,fbif.h.html
>>  [HVMPARAMS]:
>> http://xenbits.xen.org/docs/unstable/hypercall/include,public,hvm,params.h.html
>> diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
>> index 84a467c..218f55e 100644
>> --- a/tools/libxl/Makefile
>> +++ b/tools/libxl/Makefile
>> @@ -52,6 +52,8 @@ else
>>  LIBXL_OBJS-y += libxl_nonetbuffer.o
>>  endif
>>
>> +LIBXL_OBJS-y += libxl_remus.o
>> +
>>  LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o
>>  LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o
>>
>> diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
>> index 8d63f90..e3e9f6f 100644
>> --- a/tools/libxl/libxl_dom.c
>> +++ b/tools/libxl/libxl_dom.c
>> @@ -753,9 +753,6 @@ int libxl__toolstack_restore(uint32_t domid, const
>> uint8_t *buf,
>>
>>  /*==================== Domain suspend (save) ====================*/
>>
>> -static void domain_suspend_done(libxl__egc *egc,
>> -                        libxl__domain_suspend_state *dss, int rc);
>> -
>>  /*----- complicated callback, called by xc_domain_save -----*/
>>
>>  /*
>> @@ -1508,8 +1505,8 @@ static void
>> save_device_model_datacopier_done(libxl__egc *egc,
>>      dss->save_dm_callback(egc, dss, our_rc);
>>  }
>>
>> -static void domain_suspend_done(libxl__egc *egc,
>> -                        libxl__domain_suspend_state *dss, int rc)
>> +void domain_suspend_done(libxl__egc *egc,
>> +                         libxl__domain_suspend_state *dss, int rc)
>>  {
>>      STATE_AO_GC(dss->ao);
>>
>> diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
>> index 2f64382..0430307 100644
>> --- a/tools/libxl/libxl_internal.h
>> +++ b/tools/libxl/libxl_internal.h
>> @@ -2313,6 +2313,17 @@ typedef struct libxl__remus_state {
>>
>>  _hidden int libxl__netbuffer_enabled(libxl__gc *gc);
>>
>> +_hidden void domain_suspend_done(libxl__egc *egc,
>> +                                 libxl__domain_suspend_state *dss,
>> +                                 int rc);
>> +
>> +_hidden void libxl__remus_setup_done(libxl__egc *egc,
>> +                                     libxl__domain_suspend_state *dss,
>> +                                     int rc);
>> +
>> +_hidden void libxl__remus_netbuf_setup(libxl__egc *egc,
>> +                                       libxl__domain_suspend_state *dss);
>> +
>>  struct libxl__domain_suspend_state {
>>      /* set by caller of libxl__domain_suspend */
>>      libxl__ao *ao;
>> diff --git a/tools/libxl/libxl_netbuffer.c b/tools/libxl/libxl_netbuffer.c
>> index 8e23d75..0be876c 100644
>> --- a/tools/libxl/libxl_netbuffer.c
>> +++ b/tools/libxl/libxl_netbuffer.c
>> @@ -17,11 +17,430 @@
>>
>>  #include "libxl_internal.h"
>>
>> +#include <netlink/cache.h>
>> +#include <netlink/socket.h>
>> +#include <netlink/attr.h>
>> +#include <netlink/route/link.h>
>> +#include <netlink/route/route.h>
>> +#include <netlink/route/qdisc.h>
>> +#include <netlink/route/qdisc/plug.h>
>> +
>> +typedef struct libxl__remus_netbuf_state {
>> +    struct rtnl_qdisc **netbuf_qdisc_list;
>> +    struct nl_sock *nlsock;
>> +    struct nl_cache *qdisc_cache;
>> +    const char **vif_list;
>> +    const char **ifb_list;
>> +    uint32_t num_netbufs;
>> +    uint32_t unused;
>> +} libxl__remus_netbuf_state;
>> +
>>  int libxl__netbuffer_enabled(libxl__gc *gc)
>>  {
>>      return 1;
>>  }
>>
>> +/* If the device has a vifname, then use that instead of
>> + * the vifX.Y format.
>> + */
>> +static const char *get_vifname(libxl__gc *gc, uint32_t domid,
>> +                               libxl_device_nic *nic)
>> +{
>> +    const char *vifname = NULL;
>> +    const char *path;
>> +    int rc;
>> +
>> +    path = libxl__sprintf(gc, "%s/backend/vif/%d/%d/vifname",
>> +                          libxl__xs_get_dompath(gc, 0), domid,
>> nic->devid);
>> +    rc = libxl__xs_read_checked(gc, XBT_NULL, path, &vifname);
>> +    if (rc < 0) {
>> +        /* use the default name */
>> +        vifname = libxl__device_nic_devname(gc, domid,
>> +                                            nic->devid,
>> +                                            nic->nictype);
>> +    }
>> +
>> +    return vifname;
>>
> 
> IanJ's feedback last time was that the error code rc needs to be checked.
> If its a failure, then return NULL to the caller. If its a ENOENT, then use
> the
> default name.

OK. This should be
if (!rc && !vifname) {
    /* use the default name */
    ....
}

> 
> +}
>> +
>> +static const char **get_guest_vif_list(libxl__gc *gc, uint32_t domid,
>> +                                       int *num_vifs)
>> +{
>> +    libxl_device_nic *nics = NULL;
>> +    int nb, i = 0;
>> +    const char **vif_list = NULL;
>> +
>> +    *num_vifs = 0;
>> +    nics = libxl_device_nic_list(CTX, domid, &nb);
>> +    if (!nics)
>> +        return NULL;
>> +
>> +    /* Ensure that none of the vifs are backed by driver domains */
>> +    for (i = 0; i < nb; i++) {
>> +        if (nics[i].backend_domid != LIBXL_TOOLSTACK_DOMID) {
>> +            LOG(ERROR, "vif %s has driver domain (%u) as its backend. "
>> +                "Network buffering is not supported with driver domains",
>> +                get_vifname(gc, domid, &nics[i]), nics[i].backend_domid);
>>
> 
> And if the previous feedback were to be incorporated, then get_vifname's
> return
> value should be assigned to a variable and checked before printing or using
> it for
> other purposes.

OK

> 
> 
>> +            *num_vifs = -1;
>> +            goto out;
>> +        }
>> +    }
>> +
>> +    GCNEW_ARRAY(vif_list, nb);
>> +    for (i = 0; i < nb; ++i) {
>> +        vif_list[i] = get_vifname(gc, domid, &nics[i]);
>> +        if (!vif_list[i]) {
>> +            vif_list = NULL;
>> +            goto out;
>> +        }
>> +    }
>> +    *num_vifs = nb;
>> +
>> + out:
>> +    for (i = 0; i < nb; i++)
>> +        libxl_device_nic_dispose(&nics[i]);
>> +    free(nics);
>> +    return vif_list;
>> +}
>> +
>> +static void free_qdiscs(libxl__remus_netbuf_state *netbuf_state)
>> +{
>> +    int i;
>> +    struct rtnl_qdisc *qdisc = NULL;
>> +
>> +    /* free qdiscs */
>> +    for (i = 0; i < netbuf_state->num_netbufs; i++) {
>> +        qdisc = netbuf_state->netbuf_qdisc_list[i];
>> +        if (!qdisc)
>> +            break;
>> +
>> +        nl_object_put((struct nl_object *)qdisc);
>> +    }
>> +
>> +    /* free qdisc cache */
>> +    nl_cache_clear(netbuf_state->qdisc_cache);
>> +    nl_cache_free(netbuf_state->qdisc_cache);
>> +
>> +    /* close nlsock */
>> +    nl_close(netbuf_state->nlsock);
>> +
>> +    /* free nlsock */
>> +    nl_socket_free(netbuf_state->nlsock);
>> +}
>> +
>>
> 
> This code (free_qdiscs) is new. Have you tested it?
> While the control flow looks pretty sane, libnl has been evolving
> a bit ever since the 3.* series.
> 
> If init_qdisc fails, it calls free_qdisc(). If any other setup stage after
> network buffering fails, it would invoke the teardown code, which also
> calls free_qdisc(). This may end up in a segfault.
> 
> I suggest adding a simple check to see if nlsock/qdisc_cache are NULL
> before attempting to execute the rest of the function.  And after you free
> the
> qdisc_cache & nlsock, set them to NULL.

Yes, free_qdisc() may be called twice. Will fix it in the next version.

> 
> 
>> +static int init_qdiscs(libxl__gc *gc,
>> +                       libxl__remus_state *remus_state)
>> +{
>> +    int i, ret, ifindex;
>> +    struct rtnl_link *ifb = NULL;
>> +    struct rtnl_qdisc *qdisc = NULL;
>> +
>> +    /* Convenience aliases */
>> +    libxl__remus_netbuf_state * const netbuf_state =
>> remus_state->netbuf_state;
>> +    const int num_netbufs = netbuf_state->num_netbufs;
>> +    const char ** const ifb_list = netbuf_state->ifb_list;
>> +
>> +    /* Now that we have brought up IFB devices with plug qdisc for
>> +     * each vif, lets get a netlink handle on the plug qdisc for use
>> +     * during checkpointing.
>> +     */
>> +    netbuf_state->nlsock = nl_socket_alloc();
>> +    if (!netbuf_state->nlsock) {
>> +        LOG(ERROR, "cannot allocate nl socket");
>> +        goto out;
>> +    }
>> +
>> +    ret = nl_connect(netbuf_state->nlsock, NETLINK_ROUTE);
>> +    if (ret) {
>> +        LOG(ERROR, "failed to open netlink socket: %s",
>> +            nl_geterror(ret));
>> +        goto out;
>> +    }
>> +
>> +    /* get list of all qdiscs installed on network devs. */
>> +    ret = rtnl_qdisc_alloc_cache(netbuf_state->nlsock,
>> +                                 &netbuf_state->qdisc_cache);
>> +    if (ret) {
>> +        LOG(ERROR, "failed to allocate qdisc cache: %s",
>> +            nl_geterror(ret));
>> +        goto out;
>> +    }
>> +
>> +    /* list of handles to plug qdiscs */
>> +    GCNEW_ARRAY(netbuf_state->netbuf_qdisc_list, num_netbufs);
>> +
>> +    for (i = 0; i < num_netbufs; ++i) {
>> +
>> +        /* get a handle to the IFB interface */
>> +        ifb = NULL;
>> +        ret = rtnl_link_get_kernel(netbuf_state->nlsock, 0,
>> +                                   ifb_list[i], &ifb);
>> +        if (ret) {
>> +            LOG(ERROR, "cannot obtain handle for %s: %s", ifb_list[i],
>> +                nl_geterror(ret));
>> +            goto out;
>> +        }
>> +
>> +        ifindex = rtnl_link_get_ifindex(ifb);
>> +        if (!ifindex) {
>> +            LOG(ERROR, "interface %s has no index", ifb_list[i]);
>> +            goto out;
>> +        }
>> +
>> +        /* Get a reference to the root qdisc installed on the IFB, by
>> +         * querying the qdisc list we obtained earlier. The netbufscript
>> +         * sets up the plug qdisc as the root qdisc, so we don't have to
>> +         * search the entire qdisc tree on the IFB dev.
>> +
>> +         * There is no need to explicitly free this qdisc as its just a
>> +         * reference from the qdisc cache we allocated earlier.
>> +         */
>> +        qdisc = rtnl_qdisc_get_by_parent(netbuf_state->qdisc_cache,
>> ifindex,
>> +                                         TC_H_ROOT);
>> +
>> +        if (qdisc) {
>> +            const char *tc_kind = rtnl_tc_get_kind(TC_CAST(qdisc));
>> +            /* Sanity check: Ensure that the root qdisc is a plug qdisc.
>> */
>> +            if (!tc_kind || strcmp(tc_kind, "plug")) {
>> +                nl_object_put((struct nl_object *)qdisc);
>> +                LOG(ERROR, "plug qdisc is not installed on %s",
>> ifb_list[i]);
>> +                goto out;
>> +            }
>> +            netbuf_state->netbuf_qdisc_list[i] = qdisc;
>> +        } else {
>> +            LOG(ERROR, "Cannot get qdisc handle from ifb %s",
>> ifb_list[i]);
>> +            goto out;
>> +        }
>> +        rtnl_link_put(ifb);
>> +    }
>> +
>> +    return 0;
>> +
>> + out:
>> +    if (ifb)
>> +        rtnl_link_put(ifb);
>> +    free_qdiscs(netbuf_state);
>> +    return ERROR_FAIL;
>> +}
>> +
>> +static void netbuf_setup_timeout_cb(libxl__egc *egc,
>> +                                    libxl__ev_time *ev,
>> +                                    const struct timeval *requested_abs)
>> +{
>> +    libxl__remus_state *remus_state = CONTAINER_OF(ev, *remus_state,
>> timeout);
>> +
>> +    /* Convenience aliases */
>> +    const int devid = remus_state->dev_id;
>> +    libxl__remus_netbuf_state *const netbuf_state =
>> remus_state->netbuf_state;
>> +    const char *const vif = netbuf_state->vif_list[devid];
>> +
>> +    STATE_AO_GC(remus_state->dss->ao);
>> +
>> +    libxl__ev_time_deregister(gc, &remus_state->timeout);
>> +    assert(libxl__ev_child_inuse(&remus_state->child));
>> +
>> +    LOG(DEBUG, "killing hotplug script %s (on vif %s) because of timeout",
>> +        remus_state->netbufscript, vif);
>> +
>> +    if (kill(remus_state->child.pid, SIGKILL)) {
>> +        LOGEV(ERROR, errno, "unable to kill hotplug script %s [%ld]",
>> +              remus_state->netbufscript,
>> +              (unsigned long)remus_state->child.pid);
>> +    }
>> +
>> +    return;
>> +}
>> +
>> +/* the script needs the following env & args
>> + * $vifname
>> + * $XENBUS_PATH (/libxl/<domid>/remus/netbuf/<devid>/)
>> + * $IFB (for teardown)
>> + * setup/teardown as command line arg.
>> + * In return, the script writes the name of IFB device (during setup) to
>> be
>> + * used for output buffering into XENBUS_PATH/ifb
>> + */
>> +static int exec_netbuf_script(libxl__gc *gc, libxl__remus_state
>> *remus_state,
>> +                              char *op, libxl__ev_child_callback *death)
>> +{
>> +    int arraysize = 7, nr = 0;
>> +    char **env = NULL, **args = NULL;
>> +    pid_t pid;
>> +
>> +    /* Convenience aliases */
>> +    libxl__ev_child *const child = &remus_state->child;
>> +    libxl__ev_time *const timeout = &remus_state->timeout;
>> +    char *const script = libxl__strdup(gc, remus_state->netbufscript);
>> +    const uint32_t domid = remus_state->dss->domid;
>> +    const int devid = remus_state->dev_id;
>> +    libxl__remus_netbuf_state *const netbuf_state =
>> remus_state->netbuf_state;
>> +    const char *const vif = netbuf_state->vif_list[devid];
>> +    const char *const ifb = netbuf_state->ifb_list[devid];
>> +
>>
> 
> Please set arraysize to 7 here, instead of the beginning of the function.
> Its more readable that way.

OK.

Thanks
Wen Congyang

> 
> +    GCNEW_ARRAY(env, arraysize);
>> +    env[nr++] = "vifname";
>> +    env[nr++] = libxl__strdup(gc, vif);
>> +    env[nr++] = "XENBUS_PATH";
>> +    env[nr++] = GCSPRINTF("%s/remus/netbuf/%d",
>> +                          libxl__xs_libxl_path(gc, domid), devid);
>> +    if (!strcmp(op, "teardown")) {
>> +        env[nr++] = "IFB";
>> +        env[nr++] = libxl__strdup(gc, ifb);
>> +    }
>> +    env[nr++] = NULL;
>> +    assert(nr <= arraysize);
>> +
>> +    arraysize = 3; nr = 0;
>> +    GCNEW_ARRAY(args, arraysize);
>> +    args[nr++] = script;
>> +    args[nr++] = op;
>> +    args[nr++] = NULL;
>> +    assert(nr == arraysize);
>> +
>> +    /* Set hotplug timeout */
>> +    if (libxl__ev_time_register_rel(gc, timeout,
>> +                                    netbuf_setup_timeout_cb,
>> +                                    LIBXL_HOTPLUG_TIMEOUT * 1000)) {
>> +        LOG(ERROR, "unable to register timeout for "
>> +            "netbuf setup script %s on vif %s", script, vif);
>> +        return ERROR_FAIL;
>> +    }
>> +
>> +    LOG(DEBUG, "Calling netbuf script: %s %s on vif %s",
>> +        script, op, vif);
>> +
>> +    /* Fork and exec netbuf script */
>> +    pid = libxl__ev_child_fork(gc, child, death);
>> +    if (pid == -1) {
>> +        LOG(ERROR, "unable to fork netbuf script %s", script);
>> +        return ERROR_FAIL;
>> +    }
>> +
>> +    if (!pid) {
>> +        /* child: Launch netbuf script */
>> +        libxl__exec(gc, -1, -1, -1, args[0], args, env);
>> +        /* notreached */
>> +        abort();
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>>
>>
> 
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxx
> http://lists.xen.org/xen-devel
> 


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.