[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 1/5] blktap: Manage segment buffers in mempools.



 - Replaces the request free list with a (mempooled) slab.

 - Replaces request buckets with a mempool. No buckets, because
   we're doing full s/g on page granularity anyway, so can gfp()
   independent pages everywhere. Allocations are 1-11 page-sized
   segments.

 - Adds support for multiple page pools.

 - Adds pools to sysfs. Linked as a 'pools' kset to blktap-control.

 - Makes the per-tap pool selectable. Attribute 'pool' on the tap device.

 - Make pools online-resizeable. Attributes free/size on the pool kobj.

Signed-off-by: Daniel Stodden <daniel.stodden@xxxxxxxxxx>
---
 drivers/xen/blktap/blktap.h  |   35 ++-
 drivers/xen/blktap/control.c |   80 ++++++--
 drivers/xen/blktap/device.c  |    2 +-
 drivers/xen/blktap/request.c |  509 +++++++++++++++++++++++++-----------------
 drivers/xen/blktap/ring.c    |   10 +-
 drivers/xen/blktap/sysfs.c   |   36 +++
 6 files changed, 433 insertions(+), 239 deletions(-)

diff --git a/drivers/xen/blktap/blktap.h b/drivers/xen/blktap/blktap.h
index a29b509..ad79c15 100644
--- a/drivers/xen/blktap/blktap.h
+++ b/drivers/xen/blktap/blktap.h
@@ -121,17 +121,19 @@ struct blktap_statistics {
 };
 
 struct blktap_request {
+       struct blktap                 *tap;
        struct request                *rq;
        uint16_t                       usr_idx;
 
        uint8_t                        status;
        atomic_t                       pendcnt;
-       uint8_t                        nr_pages;
        unsigned short                 operation;
 
        struct timeval                 time;
        struct grant_handle_pair       handles[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-       struct list_head               free_list;
+
+       struct page                   *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       int                            nr_pages;
 };
 
 struct blktap {
@@ -140,6 +142,7 @@ struct blktap {
 
        struct blktap_ring             ring;
        struct blktap_device           device;
+       struct blktap_page_pool       *pool;
 
        int                            pending_cnt;
        struct blktap_request         *pending_requests[MAX_PENDING_REQS];
@@ -152,6 +155,13 @@ struct blktap {
        struct blktap_statistics       stats;
 };
 
+struct blktap_page_pool {
+       struct mempool_s              *bufs;
+       spinlock_t                     lock;
+       struct kobject                 kobj;
+       wait_queue_head_t              wait;
+};
+
 extern struct mutex blktap_lock;
 extern struct blktap **blktaps;
 extern int blktap_max_minor;
@@ -165,7 +175,6 @@ size_t blktap_ring_debug(struct blktap *, char *, size_t);
 int blktap_ring_create(struct blktap *);
 int blktap_ring_destroy(struct blktap *);
 void blktap_ring_kick_user(struct blktap *);
-void blktap_ring_kick_all(void);
 
 int blktap_sysfs_init(void);
 void blktap_sysfs_exit(void);
@@ -181,19 +190,23 @@ void blktap_device_destroy_sync(struct blktap *);
 int blktap_device_run_queue(struct blktap *);
 void blktap_device_end_request(struct blktap *, struct blktap_request *, int);
 
-int blktap_request_pool_init(void);
-void blktap_request_pool_free(void);
-int blktap_request_pool_grow(void);
-int blktap_request_pool_shrink(void);
-struct blktap_request *blktap_request_allocate(struct blktap *);
+int blktap_page_pool_init(struct kobject *);
+void blktap_page_pool_exit(void);
+struct blktap_page_pool *blktap_page_pool_get(const char *);
+
+size_t blktap_request_debug(struct blktap *, char *, size_t);
+struct blktap_request *blktap_request_alloc(struct blktap *);
+int blktap_request_get_pages(struct blktap *, struct blktap_request *, int);
 void blktap_request_free(struct blktap *, struct blktap_request *);
-struct page *request_to_page(struct blktap_request *, int);
+void blktap_request_bounce(struct blktap *, struct blktap_request *, int, int);
 
 static inline unsigned long
 request_to_kaddr(struct blktap_request *req, int seg)
 {
-       unsigned long pfn = page_to_pfn(request_to_page(req, seg));
-       return (unsigned long)pfn_to_kaddr(pfn);
+       return (unsigned long)page_address(req->pages[seg]);
 }
 
+#define request_to_page(_request, _seg) ((_request)->pages[_seg])
+
+
 #endif
diff --git a/drivers/xen/blktap/control.c b/drivers/xen/blktap/control.c
index ef54fa1..8652e07 100644
--- a/drivers/xen/blktap/control.c
+++ b/drivers/xen/blktap/control.c
@@ -1,7 +1,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/miscdevice.h>
-
+#include <linux/device.h>
 #include <asm/uaccess.h>
 
 #include "blktap.h"
@@ -10,6 +10,7 @@ DEFINE_MUTEX(blktap_lock);
 
 struct blktap **blktaps;
 int blktap_max_minor;
+static struct blktap_page_pool *default_pool;
 
 static struct blktap *
 blktap_control_get_minor(void)
@@ -83,6 +84,9 @@ blktap_control_create_tap(void)
        if (!tap)
                return NULL;
 
+       kobject_get(&default_pool->kobj);
+       tap->pool = default_pool;
+
        err = blktap_ring_create(tap);
        if (err)
                goto fail_tap;
@@ -110,6 +114,8 @@ blktap_control_destroy_tap(struct blktap *tap)
        if (err)
                return err;
 
+       kobject_put(&tap->pool->kobj);
+
        blktap_sysfs_destroy(tap);
 
        blktap_control_put_minor(tap);
@@ -166,12 +172,43 @@ static struct file_operations 
blktap_control_file_operations = {
        .ioctl    = blktap_control_ioctl,
 };
 
-static struct miscdevice blktap_misc = {
+static struct miscdevice blktap_control = {
        .minor    = MISC_DYNAMIC_MINOR,
        .name     = "blktap-control",
        .fops     = &blktap_control_file_operations,
 };
 
+static struct device *control_device;
+
+static ssize_t
+blktap_control_show_default_pool(struct device *device,
+                                struct device_attribute *attr,
+                                char *buf)
+{
+       return sprintf(buf, "%s", kobject_name(&default_pool->kobj));
+}
+
+static ssize_t
+blktap_control_store_default_pool(struct device *device,
+                                 struct device_attribute *attr,
+                                 const char *buf, size_t size)
+{
+       struct blktap_page_pool *pool, *tmp = default_pool;
+
+       pool = blktap_page_pool_get(buf);
+       if (IS_ERR(pool))
+               return PTR_ERR(pool);
+
+       default_pool = pool;
+       kobject_put(&tmp->kobj);
+
+       return size;
+}
+
+static DEVICE_ATTR(default_pool, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH,
+                  blktap_control_show_default_pool,
+                  blktap_control_store_default_pool);
+
 size_t
 blktap_control_debug(struct blktap *tap, char *buf, size_t size)
 {
@@ -190,12 +227,11 @@ blktap_control_init(void)
 {
        int err;
 
-       err = misc_register(&blktap_misc);
-       if (err) {
-               blktap_misc.minor = MISC_DYNAMIC_MINOR;
-               BTERR("misc_register failed for control device");
+       err = misc_register(&blktap_control);
+       if (err)
                return err;
-       }
+
+       control_device = blktap_control.this_device;
 
        blktap_max_minor = min(64, MAX_BLKTAP_DEVICE);
        blktaps = kzalloc(blktap_max_minor * sizeof(blktaps[0]), GFP_KERNEL);
@@ -204,20 +240,39 @@ blktap_control_init(void)
                return -ENOMEM;
        }
 
+       err = blktap_page_pool_init(&control_device->kobj);
+       if (err)
+               return err;
+
+       default_pool = blktap_page_pool_get("default");
+       if (!default_pool)
+               return -ENOMEM;
+
+       err = device_create_file(control_device, &dev_attr_default_pool);
+       if (err)
+               return err;
+
        return 0;
 }
 
 static void
 blktap_control_exit(void)
 {
+       if (default_pool) {
+               kobject_put(&default_pool->kobj);
+               default_pool = NULL;
+       }
+
+       blktap_page_pool_exit();
+
        if (blktaps) {
                kfree(blktaps);
                blktaps = NULL;
        }
 
-       if (blktap_misc.minor != MISC_DYNAMIC_MINOR) {
-               misc_deregister(&blktap_misc);
-               blktap_misc.minor = MISC_DYNAMIC_MINOR;
+       if (control_device) {
+               misc_deregister(&blktap_control);
+               control_device = NULL;
        }
 }
 
@@ -228,7 +283,6 @@ blktap_exit(void)
        blktap_ring_exit();
        blktap_sysfs_exit();
        blktap_device_exit();
-       blktap_request_pool_free();
 }
 
 static int __init
@@ -239,10 +293,6 @@ blktap_init(void)
        if (!xen_pv_domain())
                return -ENODEV;
 
-       err = blktap_request_pool_init();
-       if (err)
-               return err;
-
        err = blktap_device_init();
        if (err)
                goto fail;
diff --git a/drivers/xen/blktap/device.c b/drivers/xen/blktap/device.c
index 3acb8fa..ed95548 100644
--- a/drivers/xen/blktap/device.c
+++ b/drivers/xen/blktap/device.c
@@ -605,7 +605,7 @@ blktap_device_run_queue(struct blktap *tap)
                        break;
                }
 
-               request = blktap_request_allocate(tap);
+               request = blktap_request_alloc(tap);
                if (!request) {
                        tap->stats.st_oo_req++;
                        goto wait;
diff --git a/drivers/xen/blktap/request.c b/drivers/xen/blktap/request.c
index eee7100..ca12442 100644
--- a/drivers/xen/blktap/request.c
+++ b/drivers/xen/blktap/request.c
@@ -1,297 +1,400 @@
+#include <linux/mempool.h>
 #include <linux/spinlock.h>
-#include <xen/balloon.h>
+#include <linux/mutex.h>
 #include <linux/sched.h>
+#include <linux/device.h>
+#include <xen/balloon.h>
 
 #include "blktap.h"
 
-#define MAX_BUCKETS                      8
-#define BUCKET_SIZE                      MAX_PENDING_REQS
+/* max pages per shared pool. just to prevent accidental dos. */
+#define POOL_MAX_PAGES           (256*BLKIF_MAX_SEGMENTS_PER_REQUEST)
 
-#define BLKTAP_POOL_CLOSING              1
+/* default page pool size. when considering to shrink a shared pool,
+ * note that paused tapdisks may grab a whole lot of pages for a long
+ * time. */
+#define POOL_DEFAULT_PAGES       (2 * MMAP_PAGES)
 
-struct blktap_request_bucket;
+/* max number of pages allocatable per request. */
+#define POOL_MAX_REQUEST_PAGES   BLKIF_MAX_SEGMENTS_PER_REQUEST
 
-struct blktap_request_handle {
-       int                              slot;
-       uint8_t                          inuse;
-       struct blktap_request            request;
-       struct blktap_request_bucket    *bucket;
-};
+/* min request structs per pool. These grow dynamically. */
+#define POOL_MIN_REQS            BLK_RING_SIZE
 
-struct blktap_request_bucket {
-       atomic_t                         reqs_in_use;
-       struct blktap_request_handle     handles[BUCKET_SIZE];
-       struct page                    **foreign_pages;
-};
+static struct kset *pool_set;
 
-struct blktap_request_pool {
-       spinlock_t                       lock;
-       uint8_t                          status;
-       struct list_head                 free_list;
-       atomic_t                         reqs_in_use;
-       wait_queue_head_t                wait_queue;
-       struct blktap_request_bucket    *buckets[MAX_BUCKETS];
-};
+#define kobj_to_pool(_kobj) \
+       container_of(_kobj, struct blktap_page_pool, kobj)
 
-static struct blktap_request_pool pool;
-
-static inline struct blktap_request_handle *
-blktap_request_to_handle(struct blktap_request *req)
-{
-       return container_of(req, struct blktap_request_handle, request);
-}
+static struct kmem_cache *request_cache;
+static mempool_t *request_pool;
 
 static void
-blktap_request_pool_init_request(struct blktap_request *request)
+__page_pool_wake(struct blktap_page_pool *pool)
 {
-       int i;
-
-       request->usr_idx  = -1;
-       request->nr_pages = 0;
-       request->status   = BLKTAP_REQUEST_FREE;
-       INIT_LIST_HEAD(&request->free_list);
-       for (i = 0; i < ARRAY_SIZE(request->handles); i++) {
-               request->handles[i].user   = INVALID_GRANT_HANDLE;
-               request->handles[i].kernel = INVALID_GRANT_HANDLE;
-       }
+       mempool_t *mem = pool->bufs;
+
+       /*
+         NB. slightly wasteful to always wait for a full segment
+         set. but this ensures the next disk makes
+         progress. presently, the repeated request struct
+         alloc/release cycles would otherwise keep everyone spinning.
+       */
+
+       if (mem->curr_nr >= POOL_MAX_REQUEST_PAGES)
+               wake_up(&pool->wait);
 }
 
-static int
-blktap_request_pool_allocate_bucket(void)
+int
+blktap_request_get_pages(struct blktap *tap,
+                        struct blktap_request *request, int nr_pages)
 {
-       int i, idx;
-       unsigned long flags;
-       struct blktap_request *request;
-       struct blktap_request_handle *handle;
-       struct blktap_request_bucket *bucket;
+       struct blktap_page_pool *pool = tap->pool;
+       mempool_t *mem = pool->bufs;
+       struct page *page;
 
-       bucket = kzalloc(sizeof(struct blktap_request_bucket), GFP_KERNEL);
-       if (!bucket)
-               goto fail;
+       BUG_ON(request->nr_pages != 0);
+       BUG_ON(nr_pages > POOL_MAX_REQUEST_PAGES);
 
-       bucket->foreign_pages = alloc_empty_pages_and_pagevec(MMAP_PAGES);
-       if (!bucket->foreign_pages)
-               goto fail;
+       if (mem->curr_nr < nr_pages)
+               return -ENOMEM;
 
-       spin_lock_irqsave(&pool.lock, flags);
+       /* NB. avoid thundering herds of tapdisks colliding. */
+       spin_lock(&pool->lock);
 
-       idx = -1;
-       for (i = 0; i < MAX_BUCKETS; i++) {
-               if (!pool.buckets[i]) {
-                       idx = i;
-                       pool.buckets[idx] = bucket;
-                       break;
-               }
+       if (mem->curr_nr < nr_pages) {
+               spin_unlock(&pool->lock);
+               return -ENOMEM;
        }
 
-       if (idx == -1) {
-               spin_unlock_irqrestore(&pool.lock, flags);
-               goto fail;
+       while (request->nr_pages < nr_pages) {
+               page = mempool_alloc(mem, GFP_NOWAIT);
+               BUG_ON(!page);
+               request->pages[request->nr_pages++] = page;
        }
 
-       for (i = 0; i < BUCKET_SIZE; i++) {
-               handle  = bucket->handles + i;
-               request = &handle->request;
+       spin_unlock(&pool->lock);
 
-               handle->slot   = i;
-               handle->inuse  = 0;
-               handle->bucket = bucket;
+       return 0;
+}
+
+static void
+blktap_request_put_pages(struct blktap *tap,
+                        struct blktap_request *request)
+{
+       struct blktap_page_pool *pool = tap->pool;
+       struct page *page;
 
-               blktap_request_pool_init_request(request);
-               list_add_tail(&request->free_list, &pool.free_list);
+       while (request->nr_pages) {
+               page = request->pages[--request->nr_pages];
+               mempool_free(page, pool->bufs);
        }
+}
 
-       spin_unlock_irqrestore(&pool.lock, flags);
+size_t
+blktap_request_debug(struct blktap *tap, char *buf, size_t size)
+{
+       struct blktap_page_pool *pool = tap->pool;
+       mempool_t *mem = pool->bufs;
+       char *s = buf, *end = buf + size;
 
-       return 0;
+       s += snprintf(buf, end - s,
+                     "pool:%s pages:%d free:%d\n",
+                     kobject_name(&pool->kobj),
+                     mem->min_nr, mem->curr_nr);
 
-fail:
-       if (bucket && bucket->foreign_pages)
-               free_empty_pages_and_pagevec(bucket->foreign_pages, MMAP_PAGES);
-       kfree(bucket);
-       return -ENOMEM;
+       return s - buf;
 }
 
-static void
-blktap_request_pool_free_bucket(struct blktap_request_bucket *bucket)
+struct blktap_request*
+blktap_request_alloc(struct blktap *tap)
 {
-       if (!bucket)
-               return;
+       struct blktap_request *request;
 
-       BTDBG("freeing bucket %p\n", bucket);
+       request = mempool_alloc(request_pool, GFP_NOWAIT);
+       if (request)
+               request->tap = tap;
 
-       free_empty_pages_and_pagevec(bucket->foreign_pages, MMAP_PAGES);
-       kfree(bucket);
+       return request;
 }
 
-struct page *
-request_to_page(struct blktap_request *req, int seg)
+void
+blktap_request_free(struct blktap *tap,
+                   struct blktap_request *request)
 {
-       struct blktap_request_handle *handle = blktap_request_to_handle(req);
-       int idx = handle->slot * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
-       return handle->bucket->foreign_pages[idx];
+       blktap_request_put_pages(tap, request);
+
+       mempool_free(request, request_pool);
+
+       __page_pool_wake(tap->pool);
 }
 
-int
-blktap_request_pool_shrink(void)
+static void
+blktap_request_ctor(void *obj)
+{
+       struct blktap_request *request = obj;
+
+       memset(request, 0, sizeof(*request));
+       sg_init_table(request->sg_table, ARRAY_SIZE(request->sg_table));
+}
+
+static int
+blktap_page_pool_resize(struct blktap_page_pool *pool, int target)
 {
-       int i, err;
-       unsigned long flags;
-       struct blktap_request_bucket *bucket;
+       mempool_t *bufs = pool->bufs;
+       int err;
+
+       /* NB. mempool asserts min_nr >= 1 */
+       target = max(1, target);
+
+       err = mempool_resize(bufs, target, GFP_KERNEL);
+       if (err)
+               return err;
 
-       err = -EAGAIN;
+       __page_pool_wake(pool);
 
-       spin_lock_irqsave(&pool.lock, flags);
+       return 0;
+}
 
-       /* always keep at least one bucket */
-       for (i = 1; i < MAX_BUCKETS; i++) {
-               bucket = pool.buckets[i];
-               if (!bucket)
-                       continue;
+struct pool_attribute {
+       struct attribute attr;
 
-               if (atomic_read(&bucket->reqs_in_use))
-                       continue;
+       ssize_t (*show)(struct blktap_page_pool *pool,
+                       char *buf);
 
-               blktap_request_pool_free_bucket(bucket);
-               pool.buckets[i] = NULL;
-               err = 0;
-               break;
-       }
+       ssize_t (*store)(struct blktap_page_pool *pool,
+                        const char *buf, size_t count);
+};
 
-       spin_unlock_irqrestore(&pool.lock, flags);
+#define kattr_to_pool_attr(_kattr) \
+       container_of(_kattr, struct pool_attribute, attr)
 
-       return err;
+static ssize_t
+blktap_page_pool_show_size(struct blktap_page_pool *pool,
+                          char *buf)
+{
+       mempool_t *mem = pool->bufs;
+       return sprintf(buf, "%d", mem->min_nr);
 }
 
-int
-blktap_request_pool_grow(void)
+static ssize_t
+blktap_page_pool_store_size(struct blktap_page_pool *pool,
+                           const char *buf, size_t size)
 {
-       return blktap_request_pool_allocate_bucket();
+       int target;
+
+       /*
+        * NB. target fixup to avoid undesired results. less than a
+        * full segment set can wedge the disk. much more than a
+        * couple times the physical queue depth is rarely useful.
+        */
+
+       target = simple_strtoul(buf, NULL, 0);
+       target = max(POOL_MAX_REQUEST_PAGES, target);
+       target = min(target, POOL_MAX_PAGES);
+
+       return blktap_page_pool_resize(pool, target) ? : size;
 }
 
-struct blktap_request *
-blktap_request_allocate(struct blktap *tap)
+static struct pool_attribute blktap_page_pool_attr_size =
+       __ATTR(size, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH,
+              blktap_page_pool_show_size,
+              blktap_page_pool_store_size);
+
+static ssize_t
+blktap_page_pool_show_free(struct blktap_page_pool *pool,
+                          char *buf)
 {
-       int i;
-       uint16_t usr_idx;
-       unsigned long flags;
-       struct blktap_request *request;
+       mempool_t *mem = pool->bufs;
+       return sprintf(buf, "%d", mem->curr_nr);
+}
 
-       usr_idx = -1;
-       request = NULL;
+static struct pool_attribute blktap_page_pool_attr_free =
+       __ATTR(free, S_IRUSR|S_IRGRP|S_IROTH,
+              blktap_page_pool_show_free,
+              NULL);
 
-       spin_lock_irqsave(&pool.lock, flags);
+static struct attribute *blktap_page_pool_attrs[] = {
+       &blktap_page_pool_attr_size.attr,
+       &blktap_page_pool_attr_free.attr,
+       NULL,
+};
 
-       if (pool.status == BLKTAP_POOL_CLOSING)
-               goto out;
+static inline struct kobject*
+__blktap_kset_find_obj(struct kset *kset, const char *name)
+{
+       struct kobject *k;
+       struct kobject *ret = NULL;
 
-       for (i = 0; i < ARRAY_SIZE(tap->pending_requests); i++)
-               if (!tap->pending_requests[i]) {
-                       usr_idx = i;
+       spin_lock(&kset->list_lock);
+       list_for_each_entry(k, &kset->list, entry) {
+               if (kobject_name(k) && !strcmp(kobject_name(k), name)) {
+                       ret = kobject_get(k);
                        break;
                }
-
-       if (usr_idx == (uint16_t)-1)
-               goto out;
-
-       if (!list_empty(&pool.free_list)) {
-               request = list_entry(pool.free_list.next,
-                                    struct blktap_request, free_list);
-               list_del(&request->free_list);
        }
+       spin_unlock(&kset->list_lock);
+       return ret;
+}
 
-       if (request) {
-               struct blktap_request_handle *handle;
+static ssize_t
+blktap_page_pool_show_attr(struct kobject *kobj, struct attribute *kattr,
+                          char *buf)
+{
+       struct blktap_page_pool *pool = kobj_to_pool(kobj);
+       struct pool_attribute *attr = kattr_to_pool_attr(kattr);
 
-               atomic_inc(&pool.reqs_in_use);
+       if (attr->show)
+               return attr->show(pool, buf);
 
-               handle = blktap_request_to_handle(request);
-               atomic_inc(&handle->bucket->reqs_in_use);
-               handle->inuse = 1;
+       return -EIO;
+}
 
-               request->usr_idx = usr_idx;
+static ssize_t
+blktap_page_pool_store_attr(struct kobject *kobj, struct attribute *kattr,
+                           const char *buf, size_t size)
+{
+       struct blktap_page_pool *pool = kobj_to_pool(kobj);
+       struct pool_attribute *attr = kattr_to_pool_attr(kattr);
 
-               tap->pending_requests[usr_idx] = request;
-               tap->pending_cnt++;
-       }
+       if (attr->show)
+               return attr->store(pool, buf, size);
 
-out:
-       spin_unlock_irqrestore(&pool.lock, flags);
-       return request;
+       return -EIO;
 }
 
-void
-blktap_request_free(struct blktap *tap, struct blktap_request *request)
+static struct sysfs_ops blktap_page_pool_sysfs_ops = {
+       .show           = blktap_page_pool_show_attr,
+       .store          = blktap_page_pool_store_attr,
+};
+
+static void
+blktap_page_pool_release(struct kobject *kobj)
 {
-       int free;
-       unsigned long flags;
-       struct blktap_request_handle *handle;
+       struct blktap_page_pool *pool = kobj_to_pool(kobj);
+       mempool_destroy(pool->bufs);
+       kfree(pool);
+}
 
-       BUG_ON(request->usr_idx >= ARRAY_SIZE(tap->pending_requests));
-       handle = blktap_request_to_handle(request);
+struct kobj_type blktap_page_pool_ktype = {
+       .release       = blktap_page_pool_release,
+       .sysfs_ops     = &blktap_page_pool_sysfs_ops,
+       .default_attrs = blktap_page_pool_attrs,
+};
+
+static void*
+__mempool_page_alloc(gfp_t gfp_mask, void *pool_data)
+{
+       struct page *page;
 
-       spin_lock_irqsave(&pool.lock, flags);
+       if (!(gfp_mask & __GFP_WAIT))
+               return NULL;
 
-       handle->inuse = 0;
-       tap->pending_requests[request->usr_idx] = NULL;
-       blktap_request_pool_init_request(request);
-       list_add(&request->free_list, &pool.free_list);
-       atomic_dec(&handle->bucket->reqs_in_use);
-       free = atomic_dec_and_test(&pool.reqs_in_use);
-       tap->pending_cnt--;
+       page = alloc_page(gfp_mask);
+       if (page)
+               SetPageReserved(page);
 
-       spin_unlock_irqrestore(&pool.lock, flags);
+       return page;
+}
 
-       if (free)
-               wake_up(&pool.wait_queue);
+static void
+__mempool_page_free(void *element, void *pool_data)
+{
+       struct page *page = element;
 
-       blktap_ring_kick_all();
+       ClearPageReserved(page);
+       put_page(page);
 }
 
-void
-blktap_request_pool_free(void)
+static struct kobject*
+blktap_page_pool_create(const char *name, int nr_pages)
 {
-       int i;
-       unsigned long flags;
+       struct blktap_page_pool *pool;
+       int err;
 
-       spin_lock_irqsave(&pool.lock, flags);
+       pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+       if (!pool)
+               goto fail;
 
-       pool.status = BLKTAP_POOL_CLOSING;
-       while (atomic_read(&pool.reqs_in_use)) {
-               spin_unlock_irqrestore(&pool.lock, flags);
-               wait_event(pool.wait_queue, !atomic_read(&pool.reqs_in_use));
-               spin_lock_irqsave(&pool.lock, flags);
-       }
+       spin_lock_init(&pool->lock);
+       init_waitqueue_head(&pool->wait);
 
-       for (i = 0; i < MAX_BUCKETS; i++) {
-               blktap_request_pool_free_bucket(pool.buckets[i]);
-               pool.buckets[i] = NULL;
-       }
+       pool->bufs = mempool_create(nr_pages,
+                                   __mempool_page_alloc, __mempool_page_free,
+                                   pool);
+       if (!pool->bufs)
+               goto fail_pool;
+
+       kobject_init(&pool->kobj, &blktap_page_pool_ktype);
+       pool->kobj.kset = pool_set;
+       err = kobject_add(&pool->kobj, &pool_set->kobj, "%s", name);
+       if (err)
+               goto fail_bufs;
+
+       return &pool->kobj;
 
-       spin_unlock_irqrestore(&pool.lock, flags);
+       kobject_del(&pool->kobj);
+fail_bufs:
+       mempool_destroy(pool->bufs);
+fail_pool:
+       kfree(pool);
+fail:
+       return NULL;
 }
 
-int __init
-blktap_request_pool_init(void)
+struct blktap_page_pool*
+blktap_page_pool_get(const char *name)
 {
-       int i, err;
+       struct kobject *kobj;
+
+       kobj = __blktap_kset_find_obj(pool_set, name);
+       if (!kobj)
+               kobj = blktap_page_pool_create(name,
+                                              POOL_DEFAULT_PAGES);
+       if (!kobj)
+               return ERR_PTR(-ENOMEM);
 
-       memset(&pool, 0, sizeof(pool));
+       return kobj_to_pool(kobj);
+}
+
+int __init
+blktap_page_pool_init(struct kobject *parent)
+{
+       request_cache =
+               kmem_cache_create("blktap-request",
+                                 sizeof(struct blktap_request), 0,
+                                 0, blktap_request_ctor);
+       if (!request_cache)
+               return -ENOMEM;
+
+       request_pool =
+               mempool_create_slab_pool(POOL_MIN_REQS, request_cache);
+       if (!request_pool)
+               return -ENOMEM;
+
+       pool_set = kset_create_and_add("pools", NULL, parent);
+       if (!pool_set)
+               return -ENOMEM;
 
-       spin_lock_init(&pool.lock);
-       INIT_LIST_HEAD(&pool.free_list);
-       atomic_set(&pool.reqs_in_use, 0);
-       init_waitqueue_head(&pool.wait_queue);
+       return 0;
+}
 
-       for (i = 0; i < 2; i++) {
-               err = blktap_request_pool_allocate_bucket();
-               if (err)
-                       goto fail;
+void
+blktap_page_pool_exit(void)
+{
+       if (pool_set) {
+               BUG_ON(!list_empty(&pool_set->list));
+               kset_unregister(pool_set);
+               pool_set = NULL;
        }
 
-       return 0;
+       if (request_pool) {
+               mempool_destroy(request_pool);
+               request_pool = NULL;
+       }
 
-fail:
-       blktap_request_pool_free();
-       return err;
+       if (request_cache) {
+               kmem_cache_destroy(request_cache);
+               request_cache = NULL;
+       }
 }
diff --git a/drivers/xen/blktap/ring.c b/drivers/xen/blktap/ring.c
index 057e97f..a72a1b3 100644
--- a/drivers/xen/blktap/ring.c
+++ b/drivers/xen/blktap/ring.c
@@ -17,8 +17,6 @@
 int blktap_ring_major;
 static struct cdev blktap_ring_cdev;
 
-static DECLARE_WAIT_QUEUE_HEAD(blktap_poll_wait);
-
 static inline struct blktap *
 vma_to_blktap(struct vm_area_struct *vma)
 {
@@ -409,7 +407,7 @@ static unsigned int blktap_ring_poll(struct file *filp, 
poll_table *wait)
        struct blktap_ring *ring = &tap->ring;
        int work = 0;
 
-       poll_wait(filp, &blktap_poll_wait, wait);
+       poll_wait(filp, &tap->pool->wait, wait);
        poll_wait(filp, &ring->poll_wait, wait);
 
        down_read(&current->mm->mmap_sem);
@@ -440,12 +438,6 @@ blktap_ring_kick_user(struct blktap *tap)
        wake_up(&tap->ring.poll_wait);
 }
 
-void
-blktap_ring_kick_all(void)
-{
-       wake_up(&blktap_poll_wait);
-}
-
 int
 blktap_ring_destroy(struct blktap *tap)
 {
diff --git a/drivers/xen/blktap/sysfs.c b/drivers/xen/blktap/sysfs.c
index e573549..7bbfea8 100644
--- a/drivers/xen/blktap/sysfs.c
+++ b/drivers/xen/blktap/sysfs.c
@@ -104,6 +104,8 @@ blktap_sysfs_debug_device(struct device *dev, struct 
device_attribute *attr, cha
 
        s += blktap_control_debug(tap, s, end - s);
 
+       s += blktap_request_debug(tap, s, end - s);
+
        s += blktap_device_debug(tap, s, end - s);
 
        s += blktap_ring_debug(tap, s, end - s);
@@ -129,6 +131,38 @@ blktap_sysfs_show_task(struct device *dev, struct 
device_attribute *attr, char *
 }
 static DEVICE_ATTR(task, S_IRUGO, blktap_sysfs_show_task, NULL);
 
+static ssize_t
+blktap_sysfs_show_pool(struct device *dev,
+                      struct device_attribute *attr,
+                      char *buf)
+{
+       struct blktap *tap = dev_get_drvdata(dev);
+       return sprintf(buf, "%s", kobject_name(&tap->pool->kobj));
+}
+
+static ssize_t
+blktap_sysfs_store_pool(struct device *dev,
+                       struct device_attribute *attr,
+                       const char *buf, size_t size)
+{
+       struct blktap *tap = dev_get_drvdata(dev);
+       struct blktap_page_pool *pool, *tmp = tap->pool;
+
+       if (tap->device.gd)
+               return -EBUSY;
+
+       pool = blktap_page_pool_get(buf);
+       if (IS_ERR(pool))
+               return PTR_ERR(pool);
+
+       tap->pool = pool;
+       kobject_put(&tmp->kobj);
+
+       return size;
+}
+DEVICE_ATTR(pool, S_IRUSR|S_IWUSR,
+           blktap_sysfs_show_pool, blktap_sysfs_store_pool);
+
 int
 blktap_sysfs_create(struct blktap *tap)
 {
@@ -151,6 +185,8 @@ blktap_sysfs_create(struct blktap *tap)
        if (!err)
                err = device_create_file(dev, &dev_attr_task);
        if (!err)
+               err = device_create_file(dev, &dev_attr_pool);
+       if (!err)
                ring->dev = dev;
        else
                device_unregister(dev);
-- 
1.7.0.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.