[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [patch] CFQ for xen domains
Hi folks,New version of the patch, adapted to apply cleanly against latest unstable, also debug output is runtime-switchable now, no major changes from last version. I'm running my xen machine all day with that patch, without any issues showing up, including working save/restore. Any chance to get it merged? Full list changes: * One thread per blkif. The I/O scheduler can do a better job that way, also you can use ionice on the blkback threads to adjust the block I/O priorities for the domain. * Various stuff has been moved from global variables into blkif_t. * The scary allocation ring for pending_req's is gone and has been replaced by a free list. * made dispatch_rw_block_io() reentrant. * general linux coding style cleanup, at least for the code I've touched anyway. * number of outstanding requests is runtime-configurable now. * made the ia64 #ifdefs smaller and dropped one. It should still work on ia64 in theory, but would be great if the ia64 folks can have a look ... * re-added the xen_init() which got lost by mistake (pointed out by the ia64 guys). * runtime-switchable stats and debug output. cheers, Gerd diff -r 6a666940fa04 linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Sun Nov 20 09:19:38 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Mon Nov 21 11:21:46 2005 @@ -12,6 +12,8 @@ */ #include <linux/spinlock.h> +#include <linux/kthread.h> +#include <linux/list.h> #include <asm-xen/balloon.h> #include <asm/hypervisor.h> #include "common.h" @@ -21,26 +23,26 @@ * pulled from a communication ring are quite likely to end up being part of * the same scatter/gather request at the disc. * - * ** TRY INCREASING 'MAX_PENDING_REQS' IF WRITE SPEEDS SEEM TOO LOW ** + * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW ** + * * This will increase the chances of being able to write whole tracks. * 64 should be enough to keep us competitive with Linux. */ -#define MAX_PENDING_REQS 64 -#define BATCH_PER_DOMAIN 16 - -static unsigned long mmap_vstart; -#define MMAP_PAGES \ - (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST) -#ifdef __ia64__ -static void *pending_vaddrs[MMAP_PAGES]; -#define MMAP_VADDR(_idx, _i) \ - (unsigned long)(pending_vaddrs[((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)]) -#else -#define MMAP_VADDR(_req,_seg) \ - (mmap_vstart + \ - ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \ - ((_seg) * PAGE_SIZE)) -#endif +static int blkif_reqs = 64; +static int mmap_pages; + +static int __init set_blkif_reqs(char *str) +{ + get_option(&str, &blkif_reqs); + return 1; +} +__setup("blkif_reqs=", set_blkif_reqs); + +/* runtime-switchable, check /sys/module/blkback/parameters/ ;) */ +static unsigned int log_stats = 0; +static unsigned int debug_lvl = 0; +module_param(log_stats, int, 0644); +module_param(debug_lvl, int, 0644); /* * Each outstanding request that we've passed to the lower device layers has a @@ -55,43 +57,38 @@ atomic_t pendcnt; unsigned short operation; int status; + struct list_head free_list; } pending_req_t; -/* - * We can't allocate pending_req's in order, since they may complete out of - * order. We therefore maintain an allocation ring. This ring also indicates - * when enough work has been passed down -- at that point the allocation ring - * will be empty. - */ -static pending_req_t pending_reqs[MAX_PENDING_REQS]; -static unsigned char pending_ring[MAX_PENDING_REQS]; -static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED; -/* NB. We use a different index type to differentiate from shared blk rings. */ -typedef unsigned int PEND_RING_IDX; -#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1)) -static PEND_RING_IDX pending_prod, pending_cons; -#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons) - -static request_queue_t *plugged_queue; -static inline void flush_plugged_queue(void) -{ - request_queue_t *q = plugged_queue; - if (q != NULL) { - if ( q->unplug_fn != NULL ) - q->unplug_fn(q); - blk_put_queue(q); - plugged_queue = NULL; - } -} +static pending_req_t *pending_reqs; +static struct list_head pending_free; +static spinlock_t pending_free_lock = SPIN_LOCK_UNLOCKED; +static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq); + +#define BLKBACK_INVALID_HANDLE (0xFFFF) + +static unsigned long mmap_vstart; +static unsigned long *pending_vaddrs; +static u16 *pending_grant_handles; + +static inline int vaddr_pagenr(pending_req_t *req, int seg) +{ + return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; +} + +static inline unsigned long vaddr(pending_req_t *req, int seg) +{ + return pending_vaddrs[vaddr_pagenr(req, seg)]; +} + +#define pending_handle(_req, _seg) \ + (pending_grant_handles[vaddr_pagenr(_req, _seg)]) + /* When using grant tables to map a frame for device access then the * handle returned must be used to unmap the frame. This is needed to * drop the ref count on the frame. */ -static u16 pending_grant_handles[MMAP_PAGES]; -#define pending_handle(_idx, _i) \ - (pending_grant_handles[((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)]) -#define BLKBACK_INVALID_HANDLE (0xFFFF) #ifdef CONFIG_XEN_BLKDEV_TAP_BE /* @@ -105,26 +102,79 @@ static inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); } #endif -static int do_block_io_op(blkif_t *blkif, int max_to_do); -static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req); +static int do_block_io_op(blkif_t *blkif); +static void dispatch_rw_block_io(blkif_t *blkif, + blkif_request_t *req, + pending_req_t *pending_req); static void make_response(blkif_t *blkif, unsigned long id, unsigned short op, int st); -static void fast_flush_area(int idx, int nr_pages) +/****************************************************************** + * misc small helpers + */ +static pending_req_t* alloc_req(void) +{ + pending_req_t *req = NULL; + unsigned long flags; + + spin_lock_irqsave(&pending_free_lock, flags); + if (!list_empty(&pending_free)) { + req = list_entry(pending_free.next, pending_req_t, free_list); + list_del(&req->free_list); + } + spin_unlock_irqrestore(&pending_free_lock, flags); + return req; +} + +static void free_req(pending_req_t *req) +{ + unsigned long flags; + int was_empty; + + spin_lock_irqsave(&pending_free_lock, flags); + was_empty = list_empty(&pending_free); + list_add(&req->free_list, &pending_free); + spin_unlock_irqrestore(&pending_free_lock, flags); + if (was_empty) + wake_up(&pending_free_wq); +} + +static void unplug_queue(blkif_t *blkif) +{ + if (NULL == blkif->plug) + return; + if (blkif->plug->unplug_fn) + blkif->plug->unplug_fn(blkif->plug); + blk_put_queue(blkif->plug); + blkif->plug = NULL; +} + +static void plug_queue(blkif_t *blkif, struct bio *bio) +{ + request_queue_t *q = bdev_get_queue(bio->bi_bdev); + + if (q == blkif->plug) + return; + unplug_queue(blkif); + blk_get_queue(q); + blkif->plug = q; +} + +static void fast_flush_area(pending_req_t *req) { struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int i, invcount = 0; u16 handle; int ret; - for (i = 0; i < nr_pages; i++) { - handle = pending_handle(idx, i); + for (i = 0; i < req->nr_pages; i++) { + handle = pending_handle(req, i); if (handle == BLKBACK_INVALID_HANDLE) continue; - unmap[invcount].host_addr = MMAP_VADDR(idx, i); + unmap[invcount].host_addr = vaddr(req, i); unmap[invcount].dev_bus_addr = 0; unmap[invcount].handle = handle; - pending_handle(idx, i) = BLKBACK_INVALID_HANDLE; + pending_handle(req, i) = BLKBACK_INVALID_HANDLE; invcount++; } @@ -133,109 +183,79 @@ BUG_ON(ret); } - -/****************************************************************** - * BLOCK-DEVICE SCHEDULER LIST MAINTENANCE - */ - -static struct list_head blkio_schedule_list; -static spinlock_t blkio_schedule_list_lock; - -static int __on_blkdev_list(blkif_t *blkif) -{ - return blkif->blkdev_list.next != NULL; -} - -static void remove_from_blkdev_list(blkif_t *blkif) -{ - unsigned long flags; - - if (!__on_blkdev_list(blkif)) - return; - - spin_lock_irqsave(&blkio_schedule_list_lock, flags); - if (__on_blkdev_list(blkif)) { - list_del(&blkif->blkdev_list); - blkif->blkdev_list.next = NULL; - blkif_put(blkif); - } - spin_unlock_irqrestore(&blkio_schedule_list_lock, flags); -} - -static void add_to_blkdev_list_tail(blkif_t *blkif) -{ - unsigned long flags; - - if (__on_blkdev_list(blkif)) - return; - - spin_lock_irqsave(&blkio_schedule_list_lock, flags); - if (!__on_blkdev_list(blkif) && (blkif->status == CONNECTED)) { - list_add_tail(&blkif->blkdev_list, &blkio_schedule_list); - blkif_get(blkif); - } - spin_unlock_irqrestore(&blkio_schedule_list_lock, flags); -} - - /****************************************************************** * SCHEDULER FUNCTIONS */ -static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait); - -static int blkio_schedule(void *arg) -{ - DECLARE_WAITQUEUE(wq, current); - - blkif_t *blkif; - struct list_head *ent; - - daemonize("xenblkd"); - +static void print_stats(blkif_t *blkif) +{ + printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d\n", + current->comm, blkif->st_oo_req, + blkif->st_rd_req, blkif->st_wr_req); + blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); + blkif->st_rd_req = 0; + blkif->st_wr_req = 0; + blkif->st_oo_req = 0; +} + +int blkif_schedule(void *arg) +{ + blkif_t *blkif = arg; + + blkif_get(blkif); + if (debug_lvl) + printk(KERN_DEBUG "%s: started\n", current->comm); for (;;) { - /* Wait for work to do. */ - add_wait_queue(&blkio_schedule_wait, &wq); - set_current_state(TASK_INTERRUPTIBLE); - if ( (NR_PENDING_REQS == MAX_PENDING_REQS) || - list_empty(&blkio_schedule_list) ) - schedule(); - __set_current_state(TASK_RUNNING); - remove_wait_queue(&blkio_schedule_wait, &wq); - - /* Queue up a batch of requests. */ - while ((NR_PENDING_REQS < MAX_PENDING_REQS) && - !list_empty(&blkio_schedule_list)) { - ent = blkio_schedule_list.next; - blkif = list_entry(ent, blkif_t, blkdev_list); - blkif_get(blkif); - remove_from_blkdev_list(blkif); - if (do_block_io_op(blkif, BATCH_PER_DOMAIN)) - add_to_blkdev_list_tail(blkif); - blkif_put(blkif); - } - - /* Push the batch through to disc. */ - flush_plugged_queue(); - } -} - -static void maybe_trigger_blkio_schedule(void) -{ - /* - * Needed so that two processes, which together make the following - * predicate true, don't both read stale values and evaluate the - * predicate incorrectly. Incredibly unlikely to stall the scheduler - * on x86, but... - */ - smp_mb(); - - if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) && - !list_empty(&blkio_schedule_list)) - wake_up(&blkio_schedule_wait); -} - - + if (kthread_should_stop()) { + /* asked to quit? */ + if (!atomic_read(&blkif->io_pending)) + break; + if (debug_lvl) + printk(KERN_DEBUG "%s: I/O pending, delaying exit\n", + current->comm); + } + + if (!atomic_read(&blkif->io_pending)) { + /* Wait for work to do. */ + wait_event_interruptible(blkif->wq, + atomic_read(&blkif->io_pending) || + kthread_should_stop()); + } else if (list_empty(&pending_free)) { + /* Wait for pending_req becoming available. */ + wait_event_interruptible(pending_free_wq, + !list_empty(&pending_free)); + } + + if (blkif->status != CONNECTED) { + /* make sure we are connected */ + if (debug_lvl) + printk(KERN_DEBUG "%s: not connected (%d pending)\n", + current->comm, atomic_read(&blkif->io_pending)); + wait_event_interruptible(blkif->wq, + blkif->status != CONNECTED || + kthread_should_stop()); + continue; + } + + /* Schedule I/O */ + atomic_set(&blkif->io_pending, 0); + if (do_block_io_op(blkif)) + atomic_inc(&blkif->io_pending); + unplug_queue(blkif); + + if (log_stats && time_after(jiffies, blkif->st_print)) + print_stats(blkif); + } + + /* bye folks, and thanks for all the fish ;) */ + if (log_stats) + print_stats(blkif); + if (debug_lvl) + printk(KERN_DEBUG "%s: exiting\n", current->comm); + blkif->xenblkd = NULL; + blkif_put(blkif); + return 0; +} /****************************************************************** * COMPLETION CALLBACK -- Called as bh->b_end_io() @@ -243,8 +263,6 @@ static void __end_block_io_op(pending_req_t *pending_req, int uptodate) { - unsigned long flags; - /* An error fails the entire request. */ if (!uptodate) { DPRINTK("Buffer not up-to-date at end of operation\n"); @@ -252,15 +270,11 @@ } if (atomic_dec_and_test(&pending_req->pendcnt)) { - int pending_idx = pending_req - pending_reqs; - fast_flush_area(pending_idx, pending_req->nr_pages); + fast_flush_area(pending_req); make_response(pending_req->blkif, pending_req->id, pending_req->operation, pending_req->status); blkif_put(pending_req->blkif); - spin_lock_irqsave(&pend_prod_lock, flags); - pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; - spin_unlock_irqrestore(&pend_prod_lock, flags); - maybe_trigger_blkio_schedule(); + free_req(pending_req); } } @@ -281,8 +295,9 @@ irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs) { blkif_t *blkif = dev_id; - add_to_blkdev_list_tail(blkif); - maybe_trigger_blkio_schedule(); + + atomic_inc(&blkif->io_pending); + wake_up(&blkif->wq); return IRQ_HANDLED; } @@ -292,10 +307,11 @@ * DOWNWARD CALLS -- These interface with the block-device layer proper. */ -static int do_block_io_op(blkif_t *blkif, int max_to_do) +static int do_block_io_op(blkif_t *blkif) { blkif_back_ring_t *blk_ring = &blkif->blk_ring; blkif_request_t *req; + pending_req_t *pending_req; RING_IDX i, rp; int more_to_do = 0; @@ -305,24 +321,30 @@ for (i = blk_ring->req_cons; (i != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, i); i++) { - if ((max_to_do-- == 0) || - (NR_PENDING_REQS == MAX_PENDING_REQS)) { + + pending_req = alloc_req(); + if (NULL == pending_req) { + blkif->st_oo_req++; more_to_do = 1; break; } - + req = RING_GET_REQUEST(blk_ring, i); switch (req->operation) { case BLKIF_OP_READ: + blkif->st_rd_req++; + dispatch_rw_block_io(blkif, req, pending_req); + break; case BLKIF_OP_WRITE: - dispatch_rw_block_io(blkif, req); + blkif->st_wr_req++; + dispatch_rw_block_io(blkif, req, pending_req); break; - default: DPRINTK("error: unknown block io operation [%d]\n", req->operation); make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); + free_req(pending_req); break; } } @@ -331,13 +353,13 @@ return more_to_do; } -static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req) +static void dispatch_rw_block_io(blkif_t *blkif, + blkif_request_t *req, + pending_req_t *pending_req) { extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ; unsigned long fas = 0; - int i, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; - pending_req_t *pending_req; struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct phys_req preq; struct { @@ -345,31 +367,35 @@ } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int nseg; struct bio *bio = NULL, *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - int nbio = 0; - request_queue_t *q; - int ret, errors = 0; + int ret, i, nbio = 0; /* Check that number of segments is sane. */ nseg = req->nr_segments; if (unlikely(nseg == 0) || unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { DPRINTK("Bad number of segments in request (%d)\n", nseg); - goto bad_descriptor; + goto fail_response; } preq.dev = req->handle; preq.sector_number = req->sector_number; preq.nr_sects = 0; + pending_req->blkif = blkif; + pending_req->id = req->id; + pending_req->operation = operation; + pending_req->status = BLKIF_RSP_OKAY; + pending_req->nr_pages = nseg; + for (i = 0; i < nseg; i++) { fas = req->frame_and_sects[i]; seg[i].nsec = blkif_last_sect(fas) - blkif_first_sect(fas) + 1; if (seg[i].nsec <= 0) - goto bad_descriptor; + goto fail_response; preq.nr_sects += seg[i].nsec; - map[i].host_addr = MMAP_VADDR(pending_idx, i); + map[i].host_addr = vaddr(pending_req, i); map[i].dom = blkif->domid; map[i].ref = blkif_gref_from_fas(fas); map[i].flags = GNTMAP_host_map; @@ -381,27 +407,23 @@ BUG_ON(ret); for (i = 0; i < nseg; i++) { - if (likely(map[i].handle >= 0)) { - pending_handle(pending_idx, i) = map[i].handle; + if (unlikely(map[i].handle < 0)) { + DPRINTK("invalid buffer -- could not remap it\n"); + goto fail_flush; + } + + pending_handle(pending_req, i) = map[i].handle; #ifdef __ia64__ - MMAP_VADDR(pending_idx,i) = gnttab_map_vaddr(map[i]); + pending_vaddrs[vaddr_pagenr(req, seg)] = + = gnttab_map_vaddr(map[i]); #else - set_phys_to_machine(__pa(MMAP_VADDR( - pending_idx, i)) >> PAGE_SHIFT, - FOREIGN_FRAME(map[i].dev_bus_addr>>PAGE_SHIFT)); + set_phys_to_machine(__pa(vaddr( + pending_req, i)) >> PAGE_SHIFT, + FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT)); #endif - fas = req->frame_and_sects[i]; - seg[i].buf = map[i].dev_bus_addr | - (blkif_first_sect(fas) << 9); - } else { - errors++; - } - } - - if (errors) { - DPRINTK("invalid buffer -- could not remap it\n"); - fast_flush_area(pending_idx, nseg); - goto bad_descriptor; + fas = req->frame_and_sects[i]; + seg[i].buf = map[i].dev_bus_addr | + (blkif_first_sect(fas) << 9); } if (vbd_translate(&preq, blkif, operation) != 0) { @@ -409,37 +431,25 @@ operation == READ ? "read" : "write", preq.sector_number, preq.sector_number + preq.nr_sects, preq.dev); - goto bad_descriptor; - } - - pending_req = &pending_reqs[pending_idx]; - pending_req->blkif = blkif; - pending_req->id = req->id; - pending_req->operation = operation; - pending_req->status = BLKIF_RSP_OKAY; - pending_req->nr_pages = nseg; + goto fail_flush; + } for (i = 0; i < nseg; i++) { if (((int)preq.sector_number|(int)seg[i].nsec) & ((bdev_hardsect_size(preq.bdev) >> 9) - 1)) { DPRINTK("Misaligned I/O request from domain %d", blkif->domid); - goto cleanup_and_fail; + goto fail_put_bio; } while ((bio == NULL) || (bio_add_page(bio, - virt_to_page(MMAP_VADDR(pending_idx, i)), + virt_to_page(vaddr(pending_req, i)), seg[i].nsec << 9, seg[i].buf & ~PAGE_MASK) == 0)) { bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, nseg-i); - if (unlikely(bio == NULL)) { - cleanup_and_fail: - for (i = 0; i < (nbio-1); i++) - bio_put(biolist[i]); - fast_flush_area(pending_idx, nseg); - goto bad_descriptor; - } + if (unlikely(bio == NULL)) + goto fail_put_bio; bio->bi_bdev = preq.bdev; bio->bi_private = pending_req; @@ -450,14 +460,8 @@ preq.sector_number += seg[i].nsec; } - if ((q = bdev_get_queue(bio->bi_bdev)) != plugged_queue) { - flush_plugged_queue(); - blk_get_queue(q); - plugged_queue = q; - } - + plug_queue(blkif, bio); atomic_set(&pending_req->pendcnt, nbio); - pending_cons++; blkif_get(blkif); for (i = 0; i < nbio; i++) @@ -465,8 +469,14 @@ return; - bad_descriptor: + fail_put_bio: + for (i = 0; i < (nbio-1); i++) + bio_put(biolist[i]); + fail_flush: + fast_flush_area(pending_req); + fail_response: make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); + free_req(pending_req); } @@ -498,56 +508,50 @@ notify_remote_via_irq(blkif->irq); } -void blkif_deschedule(blkif_t *blkif) -{ - remove_from_blkdev_list(blkif); -} - static int __init blkif_init(void) { + struct page *page; int i; - struct page *page; - int ret; - - for (i = 0; i < MMAP_PAGES; i++) - pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; if (xen_init() < 0) return -ENODEV; + mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; + pending_reqs = kmalloc(sizeof(pending_reqs[0]) * + blkif_reqs, GFP_KERNEL); + pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) * + mmap_pages, GFP_KERNEL); + pending_vaddrs = kmalloc(sizeof(pending_vaddrs[0]) * + mmap_pages, GFP_KERNEL); + if (!pending_reqs || !pending_grant_handles || !pending_vaddrs) { + printk("%s: out of memory\n", __FUNCTION__); + return -1; + } + blkif_interface_init(); - + #ifdef __ia64__ - { extern unsigned long alloc_empty_foreign_map_page_range(unsigned long pages); - int i; - - mmap_vstart = alloc_empty_foreign_map_page_range(MMAP_PAGES); - printk("Allocated mmap_vstart: 0x%lx\n", mmap_vstart); - for(i = 0; i < MMAP_PAGES; i++) - pending_vaddrs[i] = mmap_vstart + (i << PAGE_SHIFT); - BUG_ON(mmap_vstart == NULL); - } -#else - page = balloon_alloc_empty_page_range(MMAP_PAGES); + mmap_vstart = (unsigned long)alloc_empty_foreign_map_page_range(mmap_pages); +#else /* ! ia64 */ + page = balloon_alloc_empty_page_range(mmap_pages); BUG_ON(page == NULL); mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); #endif - - pending_cons = 0; - pending_prod = MAX_PENDING_REQS; + printk("%s: reqs=%d, pages=%d, mmap_vstart=0x%lx\n", + __FUNCTION__, blkif_reqs, mmap_pages, mmap_vstart); + BUG_ON(mmap_vstart == 0); + for (i = 0; i < mmap_pages; i++) + pending_vaddrs[i] = mmap_vstart + (i << PAGE_SHIFT); + + memset(pending_grant_handles, BLKBACK_INVALID_HANDLE, mmap_pages); memset(pending_reqs, 0, sizeof(pending_reqs)); - for (i = 0; i < MAX_PENDING_REQS; i++) - pending_ring[i] = i; + INIT_LIST_HEAD(&pending_free); + + for (i = 0; i < blkif_reqs; i++) + list_add_tail(&pending_reqs[i].free_list, &pending_free); - spin_lock_init(&blkio_schedule_list_lock); - INIT_LIST_HEAD(&blkio_schedule_list); - - ret = kernel_thread(blkio_schedule, NULL, CLONE_FS | CLONE_FILES); - BUG_ON(ret < 0); - blkif_xenbus_init(); - return 0; } diff -r 6a666940fa04 linux-2.6-xen-sparse/drivers/xen/blkback/common.h --- a/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Sun Nov 20 09:19:38 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Mon Nov 21 11:21:46 2005 @@ -56,9 +56,19 @@ /* Is this a blktap frontend */ unsigned int is_blktap; #endif - struct list_head blkdev_list; spinlock_t blk_ring_lock; atomic_t refcnt; + + wait_queue_head_t wq; + struct task_struct *xenblkd; + atomic_t io_pending; + request_queue_t *plug; + + /* statistics */ + unsigned long st_print; + int st_rd_req; + int st_wr_req; + int st_oo_req; struct work_struct free_work; @@ -97,11 +107,10 @@ void blkif_interface_init(void); -void blkif_deschedule(blkif_t *blkif); - void blkif_xenbus_init(void); irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs); +int blkif_schedule(void *arg); #endif /* __BLKIF__BACKEND__COMMON_H__ */ diff -r 6a666940fa04 linux-2.6-xen-sparse/drivers/xen/blkback/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Sun Nov 20 09:19:38 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Mon Nov 21 11:21:46 2005 @@ -24,6 +24,8 @@ blkif->status = DISCONNECTED; spin_lock_init(&blkif->blk_ring_lock); atomic_set(&blkif->refcnt, 1); + init_waitqueue_head(&blkif->wq); + blkif->st_print = jiffies; return blkif; } @@ -113,6 +115,7 @@ blkif->irq = bind_evtchn_to_irqhandler( blkif->evtchn, blkif_be_int, 0, "blkif-backend", blkif); blkif->status = CONNECTED; + wake_up(&blkif->wq); return 0; } diff -r 6a666940fa04 linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Sun Nov 20 09:19:38 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Mon Nov 21 11:21:46 2005 @@ -20,6 +20,7 @@ #include <stdarg.h> #include <linux/module.h> +#include <linux/kthread.h> #include <asm-xen/xenbus.h> #include "common.h" @@ -61,6 +62,8 @@ be->backend_watch.node = NULL; } if (be->blkif) { + if (be->blkif->xenblkd) + kthread_stop(be->blkif->xenblkd); blkif_put(be->blkif); be->blkif = NULL; } @@ -175,6 +178,16 @@ be->pdev = 0L; xenbus_dev_fatal(dev, err, "creating vbd structure"); + return; + } + + be->blkif->xenblkd = kthread_run(blkif_schedule, be->blkif, + "xenblkd %d/%04lx", + be->blkif->domid, be->pdev); + if (IS_ERR(be->blkif->xenblkd)) { + err = PTR_ERR(be->blkif->xenblkd); + be->blkif->xenblkd = NULL; + xenbus_dev_error(dev, err, "start xenblkd"); return; } _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |