[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 3 of 6 RESEND v2] blktap3/sring: extract requests from the ring, grouping of VBDs



This patch introduces the functions that extract requests from the shared ring.
When a VBD is created, the file descriptor of the event channel is added to the
set of the file descriptors watched by the main select(). When a notification
arrives, the callback associated with it extracts requests from the ring.
Parsing of the requests and passing them to the tapdisk standard request queue
is introduced by another patch in this series.

Signed-off-by: Thanos Makatos <thanos.makatos@xxxxxxxxxx>

---
Changed since v1:
  * Clarified use of grouping of event channels.

diff --git a/tools/blktap3/drivers/sring/td-ctx.c 
b/tools/blktap3/drivers/sring/td-ctx.c
new file mode 100644
--- /dev/null
+++ b/tools/blktap3/drivers/sring/td-ctx.c
@@ -0,0 +1,443 @@
+/*
+ * Copyright (C) 2012      Citrix Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
+ * USA.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <syslog.h>
+
+#include "tapdisk-server.h"
+
+#include "td-ctx.h"
+
+struct tqh_td_xenio_ctx  _td_xenio_ctxs
+    = TAILQ_HEAD_INITIALIZER(_td_xenio_ctxs);
+
+/**
+ * TODO releases a pool?
+ */
+static void
+tapdisk_xenio_ctx_close(struct td_xenio_ctx * const ctx)
+{
+    assert(ctx);
+
+    if (ctx->ring_event >= 0) {
+        tapdisk_server_unregister_event(ctx->ring_event);
+        ctx->ring_event = -1;
+    }
+
+    if (ctx->xce_handle) {
+        xc_evtchn_close(ctx->xce_handle);
+        ctx->xce_handle = NULL;
+    }
+
+    if (ctx->xcg_handle) {
+        xc_evtchn_close(ctx->xcg_handle);
+        ctx->xcg_handle = NULL;
+    }
+
+    TAILQ_REMOVE(&_td_xenio_ctxs, ctx, entry);
+
+    /* TODO when do we free it? */
+}
+
+/*
+ * XXX only called by tapdisk_xenio_ctx_ring_event
+ */
+static inline struct td_xenblkif *
+xenio_pending_blkif(struct td_xenio_ctx * const ctx)
+{
+    evtchn_port_or_error_t port;
+    struct td_xenblkif *blkif;
+    int err;
+
+    assert(ctx);
+
+    /*
+     * Get the local port for which there is a pending event.
+     */
+    port = xc_evtchn_pending(ctx->xce_handle);
+    if (port == -1) {
+        /* TODO log error */
+        return NULL;
+    }
+
+    /*
+     * Find the block interface with that local port.
+     */
+    tapdisk_xenio_ctx_find_blkif(ctx, blkif,
+            blkif->port == port);
+    if (blkif) {
+        err = xc_evtchn_unmask(ctx->xce_handle, port);
+        if (err) {
+            /* TODO log error */
+            return NULL;
+        }
+    }
+    /*
+     * TODO Is it possible to have an pending event channel but no block
+     * interface associated with it?
+     */
+
+    return blkif;
+}
+
+#define blkif_get_req(dst, src)                 \
+{                                               \
+    int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;  \
+    dst->operation = src->operation;            \
+    dst->nr_segments = src->nr_segments;        \
+    dst->handle = src->handle;                  \
+    dst->id = src->id;                          \
+    dst->sector_number = src->sector_number;    \
+    xen_rmb();                                  \
+    if (n > dst->nr_segments)                   \
+        n = dst->nr_segments;                   \
+    for (i = 0; i < n; i++)                     \
+        dst->seg[i] = src->seg[i];              \
+}
+
+/**
+ * Utility function that retrieves a request using @idx as the ring index,
+ * copying it to the @dst in a H/W independent way.
+ *
+ * @param blkif the block interface
+ * @param dst address that receives the request
+ * @param rc the index of the request in the ring
+ */
+static inline void
+xenio_blkif_get_request(struct td_xenblkif * const blkif,
+        blkif_request_t *const dst, const RING_IDX idx)
+{
+    blkif_back_rings_t * rings;
+
+    assert(blkif);
+    assert(dst);
+
+    rings = &blkif->rings;
+
+    switch (blkif->proto) {
+        case BLKIF_PROTOCOL_NATIVE:
+            {
+                blkif_request_t *src;
+                src = RING_GET_REQUEST(&rings->native, idx);
+                memcpy(dst, src, sizeof(blkif_request_t));
+                break;
+            }
+
+        case BLKIF_PROTOCOL_X86_32:
+            {
+                blkif_x86_32_request_t *src;
+                src = RING_GET_REQUEST(&rings->x86_32, idx);
+                blkif_get_req(dst, src);
+                break;
+            }
+
+        case BLKIF_PROTOCOL_X86_64:
+            {
+                blkif_x86_64_request_t *src;
+                src = RING_GET_REQUEST(&rings->x86_64, idx);
+                blkif_get_req(dst, src);
+                break;
+            }
+
+        default:
+            /*
+             * TODO log error
+             */
+            assert(0);
+    }
+}
+
+/**
+ * Retrieves at most @count request descriptors from the ring, copying them to
+ * @reqs.
+ *
+ * @param blkif the block interface
+ * @param reqs array of pointers where each element points to sufficient memory
+ * space that receives each request descriptor
+ * @param count retrieve at most that many request descriptors
+ * @returns the number of retrieved request descriptors
+ *
+ *  XXX only called by xenio_blkif_get_requests
+ */
+static inline int
+__xenio_blkif_get_requests(struct td_xenblkif * const blkif,
+        blkif_request_t *reqs[], const unsigned int count)
+{
+    blkif_common_back_ring_t * ring;
+    RING_IDX rp, rc;
+    unsigned int n;
+
+    assert(blkif);
+    assert(reqs);
+
+    if (!count)
+        return 0;
+
+    ring = &blkif->rings.common;
+
+    rp = ring->sring->req_prod;
+    xen_rmb(); /* TODO why? */
+
+    for (rc = ring->req_cons, n = 0; rc != rp; rc++) {
+        blkif_request_t *dst = reqs[n];
+
+        if (n++ >= count)
+            break;
+
+        xenio_blkif_get_request(blkif, dst, rc);
+    }
+
+    ring->req_cons = rc;
+
+    return n;
+}
+
+/**
+ * Retrieves at most @count request descriptors.
+ *
+ * @param blkif the block interface
+ * @param reqs array of pointers where each pointer points to sufficient
+ * memory to hold a request descriptor
+ * @count maximum number of request descriptors to retrieve
+ * @param final re-enable notifications before it stops reading
+ * @returns the number of request descriptors retrieved
+ *
+ * TODO change name
+ */
+static inline int
+xenio_blkif_get_requests(struct td_xenblkif * const blkif,
+        blkif_request_t *reqs[], const int count, const int final)
+{
+    blkif_common_back_ring_t * ring;
+    int n = 0;
+    int work = 0;
+
+    assert(blkif);
+    assert(reqs);
+    assert(count > 0);
+
+    ring = &blkif->rings.common;
+
+    do {
+        if (final)
+            RING_FINAL_CHECK_FOR_REQUESTS(ring, work);
+        else
+            work = RING_HAS_UNCONSUMED_REQUESTS(ring);
+
+        if (!work)
+            break;
+
+        if (n >= count)
+            break;
+
+        n += __xenio_blkif_get_requests(blkif, reqs + n, count - n);
+    } while (1);
+
+    return n;
+}
+
+/**
+ * Callback executed when there is a request descriptor in the ring. Copies as
+ * many request descriptors as possible (limited by local buffer space) to the
+ * td_blkif's local request buffer and queues them to the tapdisk queue.
+ */
+static inline void
+tapdisk_xenio_ctx_ring_event(event_id_t id __attribute__((unused)),
+        char mode __attribute__((unused)), void *private)
+{
+    struct td_xenio_ctx *ctx = private;
+    struct td_xenblkif *blkif = NULL;
+    int n_reqs;
+    int final = 0;
+    int start;
+    blkif_request_t **reqs;
+
+    assert(ctx);
+
+    blkif = xenio_pending_blkif(ctx);
+    if (!blkif) {
+        /* TODO log error */
+        return;
+    }
+
+    start = blkif->n_reqs_free;
+    blkif->stats.kicks.in++;
+
+    /*
+     * In each iteration, copy as many request descriptors from the shared ring
+     * that can fit in the td_blkif's buffer.
+     */
+    do {
+        reqs = &blkif->reqs_free[blkif->ring_size - blkif->n_reqs_free];
+
+        assert(reqs);
+
+        n_reqs = xenio_blkif_get_requests(blkif, reqs, blkif->n_reqs_free,
+                final);
+        assert(n_reqs >= 0);
+        if (!n_reqs)
+            break;
+
+        blkif->n_reqs_free -= n_reqs;
+        final = 1;
+
+    } while (1);
+
+    n_reqs = start - blkif->n_reqs_free;
+    if (!n_reqs)
+        /* TODO If there are no requests to be copied, why was there a
+         * notification in the first place?
+         */
+        return;
+    blkif->stats.reqs.in += n_reqs;
+    reqs = &blkif->reqs_free[blkif->ring_size - start];
+    tapdisk_xenblkif_queue_requests(blkif, reqs, n_reqs);
+}
+
+/* NB. may be NULL, but then the image must be bouncing I/O */
+#define TD_XENBLKIF_DEFAULT_POOL "td-xenio-default"
+
+/**
+ * Opens a context on the specified pool.
+ *
+ * @param pool the pool, it can either be NULL or a non-zero length string
+ * @returns 0 in success
+ *
+ * TODO The pool is ignored, we always open the default pool.
+ */
+static inline int
+tapdisk_xenio_ctx_open(const char *pool)
+{
+    struct td_xenio_ctx *ctx;
+    int fd, err;
+
+    /* zero-length pool names are not allowed */
+    if (pool && !strlen(pool))
+        return EINVAL;
+
+    ctx = calloc(1, sizeof(*ctx));
+    if (!ctx) {
+        err = errno;
+        syslog(LOG_ERR, "cannot allocate memory");
+        goto fail;
+    }
+
+    ctx->ring_event = -1; /* TODO is there a special value? */
+    ctx->pool = TD_XENBLKIF_DEFAULT_POOL;
+    TAILQ_INIT(&ctx->blkifs);
+    TAILQ_INSERT_HEAD(&_td_xenio_ctxs, ctx, entry);
+
+    ctx->xce_handle = xc_evtchn_open(NULL, 0);
+    if (ctx->xce_handle == NULL) {
+        err = errno;
+        syslog(LOG_ERR, "failed to open the event channel driver: %s\n",
+                strerror(err));
+        goto fail;
+    }
+
+    ctx->xcg_handle = xc_gnttab_open(NULL, 0);
+    if (ctx->xcg_handle == NULL) {
+        err = errno;
+        syslog(LOG_ERR, "failed to open the grant table driver: %s\n",
+                strerror(err));
+        goto fail;
+    }
+
+    fd = xc_evtchn_fd(ctx->xce_handle);
+    if (fd < 0) {
+        err = errno;
+        syslog(LOG_ERR, "failed to get the event channel file descriptor: 
%s\n",
+                strerror(err));
+        goto fail;
+    }
+
+    ctx->ring_event = tapdisk_server_register_event(SCHEDULER_POLL_READ_FD,
+        fd, 0, tapdisk_xenio_ctx_ring_event, ctx);
+    if (ctx->ring_event < 0) {
+        err = -ctx->ring_event;
+        syslog(LOG_ERR, "failed to register event: %s\n", strerror(err));
+        goto fail;
+    }
+
+    return 0;
+
+fail:
+    tapdisk_xenio_ctx_close(ctx);
+    return err;
+}
+
+
+/**
+ * Tells whether @ctx belongs to @pool.
+ *
+ * If no @pool is not specified and a default pool is set, @ctx is compared
+ * against the default pool. Note that NULL is valid pool name value.
+ */
+static inline int
+__td_xenio_ctx_match(struct td_xenio_ctx * ctx, const char *pool)
+{
+    if (unlikely(!pool)) {
+        if (NULL != TD_XENBLKIF_DEFAULT_POOL)
+            return !strcmp(ctx->pool, TD_XENBLKIF_DEFAULT_POOL);
+        else
+            return !ctx->pool;
+    }
+
+    return !strcmp(ctx->pool, pool);
+}
+
+#define tapdisk_xenio_find_ctx(_ctx, _cond)    \
+       do {                                                                    
\
+               int found = 0;                                          \
+               tapdisk_xenio_for_each_ctx(_ctx) {      \
+                       if (_cond) {                                    \
+                               found = 1;                                      
\
+                               break;                                          
\
+                       }                                                       
        \
+               }                                                               
        \
+               if (!found)                                                     
\
+                       _ctx = NULL;                                    \
+       } while (0)
+
+int
+tapdisk_xenio_ctx_get(const char *pool, struct td_xenio_ctx ** _ctx)
+{
+    struct td_xenio_ctx *ctx;
+    int err = 0;
+
+    do {
+        tapdisk_xenio_find_ctx(ctx, __td_xenio_ctx_match(ctx, pool));
+        if (ctx) {
+            *_ctx = ctx;
+            return 0;
+        }
+
+        err = tapdisk_xenio_ctx_open(pool);
+    } while (!err);
+
+    return err;
+}
+
+void
+tapdisk_xenio_ctx_put(struct td_xenio_ctx * ctx)
+{
+    if (TAILQ_EMPTY(&ctx->blkifs))
+        tapdisk_xenio_ctx_close(ctx);
+}
diff --git a/tools/blktap3/drivers/sring/td-ctx.h 
b/tools/blktap3/drivers/sring/td-ctx.h
new file mode 100644
--- /dev/null
+++ b/tools/blktap3/drivers/sring/td-ctx.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2012      Citrix Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
+ * USA.
+ */
+
+#ifndef __TD_CTX_H__
+#define __TD_CTX_H__
+
+#include "td-blkif.h"
+#include <xenctrl.h>
+#include "scheduler.h"
+
+TAILQ_HEAD(tqh_td_xenblkif, td_xenblkif);
+
+/**
+ * A VBD context: groups two or more VBDs of the same tapdisk.
+ *
+ * TODO The purpose of this struct is dubious: it allows one or more VBDs
+ * belonging to the same tapdisk process to share the same handle to the event
+ * channel driver. This means that when an event triggers on some event
+ * channel, this notification will be delivered via the file descriptor of the
+ * handle. Thus, we need to look which exactly event channel triggered. This
+ * functionality is a trade off between reducing the total amount of open
+ * handles to the event channel driver versus speeding up the data path. Also,
+ * it effectively allows for more event channels to be polled by select. The
+ * bottom line is that if we use one VBD per tapdisk this functionality is
+ * unnecessary.
+ */
+struct td_xenio_ctx {
+    char *pool; /* TODO rename to pool_name */
+
+    /**
+     * Handle to the grant table driver.
+     */
+    xc_gnttab *xcg_handle;
+
+    /**
+     * Handle to the event channel driver.
+     */
+    xc_evtchn *xce_handle;
+
+    /**
+     * Return value of tapdisk_server_register_event, we use this to tell
+     * whether the context is registered.
+     */
+    event_id_t ring_event;
+
+    /**
+     * block interfaces in this pool
+     */
+    struct tqh_td_xenblkif blkifs;
+
+    /**
+     * for linked lists
+     */
+    TAILQ_ENTRY(td_xenio_ctx) entry;
+};
+
+/**
+ * Retrieves the context corresponding to the specified pool name, creating it
+ * if it doesn't already exist.
+ */
+int
+tapdisk_xenio_ctx_get(const char *pool, struct td_xenio_ctx ** _ctx);
+
+/**
+ * Releases the pool, only if there is no block interface using it.
+ */
+void
+tapdisk_xenio_ctx_put(struct td_xenio_ctx * ctx);
+
+/**
+ * List of contexts.
+ */
+extern TAILQ_HEAD(tqh_td_xenio_ctx, td_xenio_ctx) _td_xenio_ctxs;
+
+/**
+ * For each block interface of this context...
+ */
+#define tapdisk_xenio_for_each_blkif(_blkif, _ctx)     \
+       TAILQ_FOREACH(_blkif, &(_ctx)->blkifs, entry)
+
+/**
+ * Search this context for the block interface for which the condition is true.
+ */
+#define tapdisk_xenio_ctx_find_blkif(_ctx, _blkif, _cond)      \
+       do {                                                                    
                                \
+               int found = 0;                                                  
                        \
+               tapdisk_xenio_for_each_blkif(_blkif, _ctx) {            \
+                       if (_cond) {                                            
                        \
+                               found = 1;                                      
                                \
+                               break;                                          
                                \
+                       }                                                       
                                        \
+               }                                                               
                                        \
+               if (!found)                                                     
                                \
+                       _blkif = NULL;                                          
                        \
+       } while (0)
+
+#endif /* __TD_CTX_H__ */

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.