[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 1/2 - resent] allocate tapfds for blktap
This patch (requested by Andrew) makes the tapfds descriptors in blktap only allocated when they are requested. Currently all are allocated at bootup, even when they will never be used. [This patch has been tested before the 3.0.3 branch, but only compiled tested after the branch] -- Steve Signed-off-by: Steven Rostedt <srostedt@xxxxxxxxxx> diff -r 0dc4ae151be2 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Thu Oct 05 09:30:07 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Fri Oct 20 13:11:05 2006 -0400 @@ -9,6 +9,9 @@ * Based on the blkback driver code. * * Copyright (c) 2004-2005, Andrew Warfield and Julian Chesterfield + * + * Clean ups and fix ups: + * Copyright (c) 2006, Steven Rostedt - Red Hat, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version 2 @@ -51,7 +54,7 @@ #include <asm/tlbflush.h> #include <linux/devfs_fs_kernel.h> -#define MAX_TAP_DEV 100 /*the maximum number of tapdisk ring devices */ +#define MAX_TAP_DEV 256 /*the maximum number of tapdisk ring devices */ #define MAX_DEV_NAME 100 /*the max tapdisk ring device name e.g. blktap0 */ @@ -104,6 +107,12 @@ static int mmap_pages = MMAP_PAGES; * have a bunch of pages reserved for shared * memory rings. */ + +/*Data struct handed back to userspace for tapdisk device to VBD mapping*/ +typedef struct domid_translate { + unsigned short domid; + unsigned short busid; +} domid_translate_t ; /*Data struct associated with each of the tapdisk devices*/ typedef struct tap_blkif { @@ -123,17 +132,11 @@ typedef struct tap_blkif { unsigned long *idx_map; /*Record the user ring id to kern [req id, idx] tuple */ blkif_t *blkif; /*Associate blkif with tapdev */ - int sysfs_set; /*Set if it has a class device. */ + struct domid_translate trans; /*Translation from domid to bus. */ } tap_blkif_t; -/*Data struct handed back to userspace for tapdisk device to VBD mapping*/ -typedef struct domid_translate { - unsigned short domid; - unsigned short busid; -} domid_translate_t ; - -static domid_translate_t translate_domid[MAX_TAP_DEV]; -static tap_blkif_t *tapfds[MAX_TAP_DEV]; +static struct tap_blkif *tapfds[MAX_TAP_DEV]; +static int blktap_next_minor; static int __init set_blkif_reqs(char *str) { @@ -320,7 +323,7 @@ struct vm_operations_struct blktap_vm_op */ /*Function Declarations*/ -static int get_next_free_dev(void); +static tap_blkif_t *get_next_free_dev(void); static int blktap_open(struct inode *inode, struct file *filp); static int blktap_release(struct inode *inode, struct file *filp); static int blktap_mmap(struct file *filp, struct vm_area_struct *vma); @@ -338,51 +341,94 @@ static struct file_operations blktap_fop }; -static int get_next_free_dev(void) +static tap_blkif_t *get_next_free_dev(void) { tap_blkif_t *info; - int i = 0, ret = -1; - unsigned long flags; - - spin_lock_irqsave(&pending_free_lock, flags); - - while (i < MAX_TAP_DEV) { + int minor; + + /* + * This is called only from the ioctl, which + * means we should always have interrupts enabled. + */ + BUG_ON(irqs_disabled()); + + spin_lock_irq(&pending_free_lock); + + for (minor = 1; minor < blktap_next_minor; minor++) { + info = tapfds[minor]; + /* we could have failed a previous attempt. */ + if (!info || + ((info->dev_inuse == 0) && + (info->dev_pending == 0)) ) { + info->dev_pending = 1; + goto found; + } + } + info = NULL; + minor = -1; + + /* + * We didn't find free device. If we can still allocate + * more, then we grab the next device minor that is + * available. This is done while we are still under + * the protection of the pending_free_lock. + */ + if (blktap_next_minor < MAX_TAP_DEV) + minor = blktap_next_minor++; +found: + spin_unlock_irq(&pending_free_lock); + + if (!info && minor > 0) { + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (unlikely(!info)) { + /* + * If we failed here, try to put back + * the next minor number. But if one + * was just taken, then we just lose this + * minor. We can try to allocate this + * minor again later. + */ + spin_lock_irq(&pending_free_lock); + if (blktap_next_minor == minor+1) + blktap_next_minor--; + spin_unlock_irq(&pending_free_lock); + goto out; + } + + info->minor = minor; + /* + * Make sure that we have a minor before others can + * see us. + */ + wmb(); + tapfds[minor] = info; + + class_device_create(xen_class, NULL, + MKDEV(blktap_major, minor), NULL, + "blktap%d", minor); + devfs_mk_cdev(MKDEV(blktap_major, minor), + S_IFCHR|S_IRUGO|S_IWUSR, "xen/blktap%d", minor); + } + +out: + return info; +} + +int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif) +{ + tap_blkif_t *info; + int i; + + for (i = 0; i < blktap_next_minor; i++) { info = tapfds[i]; - if ( (tapfds[i] != NULL) && (info->dev_inuse == 0) - && (info->dev_pending == 0) ) { - info->dev_pending = 1; - ret = i; - goto done; - } - i++; - } - -done: - spin_unlock_irqrestore(&pending_free_lock, flags); - - /* - * We are protected by having the dev_pending set. - */ - if (!tapfds[i]->sysfs_set && xen_class) { - class_device_create(xen_class, NULL, - MKDEV(blktap_major, ret), NULL, - "blktap%d", ret); - tapfds[i]->sysfs_set = 1; - } - return ret; -} - -int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif) -{ - int i; - - for (i = 0; i < MAX_TAP_DEV; i++) - if ( (translate_domid[i].domid == domid) - && (translate_domid[i].busid == xenbus_id) ) { - tapfds[i]->blkif = blkif; - tapfds[i]->status = RUNNING; + if ( info && + (info->trans.domid == domid) && + (info->trans.busid == xenbus_id) ) { + info->blkif = blkif; + info->status = RUNNING; return i; } + } return -1; } @@ -392,12 +438,16 @@ void signal_tapdisk(int idx) struct task_struct *ptask; info = tapfds[idx]; - if ( (idx > 0) && (idx < MAX_TAP_DEV) && (info->pid > 0) ) { + if ((idx < 0) || (idx > MAX_TAP_DEV) || !info) + return; + + if (info->pid > 0) { ptask = find_task_by_pid(info->pid); if (ptask) info->status = CLEANSHUTDOWN; } info->blkif = NULL; + return; } @@ -408,14 +458,15 @@ static int blktap_open(struct inode *ino tap_blkif_t *info; int i; - if (tapfds[idx] == NULL) { + info = tapfds[idx]; + + if ((idx < 0) || (idx > MAX_TAP_DEV) || !info) { WPRINTK("Unable to open device /dev/xen/blktap%d\n", - idx); - return -ENOMEM; - } + idx); + return -ENODEV; + } + DPRINTK("Opening device /dev/xen/blktap%d\n",idx); - - info = tapfds[idx]; /*Only one process can access device at a time*/ if (test_and_set_bit(0, &info->dev_inuse)) @@ -617,33 +668,31 @@ static int blktap_ioctl(struct inode *in { uint64_t val = (uint64_t)arg; domid_translate_t *tr = (domid_translate_t *)&val; - int newdev; DPRINTK("NEWINTF Req for domid %d and bus id %d\n", tr->domid, tr->busid); - newdev = get_next_free_dev(); - if (newdev < 1) { + info = get_next_free_dev(); + if (!info) { WPRINTK("Error initialising /dev/xen/blktap - " "No more devices\n"); return -1; } - translate_domid[newdev].domid = tr->domid; - translate_domid[newdev].busid = tr->busid; - return newdev; + info->trans.domid = tr->domid; + info->trans.busid = tr->busid; + return info->minor; } case BLKTAP_IOCTL_FREEINTF: { unsigned long dev = arg; unsigned long flags; - /* Looking at another device */ - info = NULL; - - if ( (dev > 0) && (dev < MAX_TAP_DEV) ) - info = tapfds[dev]; + info = tapfds[dev]; + + if ((dev > MAX_TAP_DEV) || !info) + return 0; /* should this be an error? */ spin_lock_irqsave(&pending_free_lock, flags); - if ( (info != NULL) && (info->dev_pending) ) + if (info->dev_pending) info->dev_pending = 0; spin_unlock_irqrestore(&pending_free_lock, flags); @@ -653,16 +702,12 @@ static int blktap_ioctl(struct inode *in { unsigned long dev = arg; - /* Looking at another device */ - info = NULL; - - if ( (dev > 0) && (dev < MAX_TAP_DEV) ) - info = tapfds[dev]; - - if (info != NULL) - return info->minor; - else - return -1; + info = tapfds[dev]; + + if (!dev || (dev > MAX_TAP_DEV) || !info) + return -EINVAL; + + return info->minor; } case BLKTAP_IOCTL_MAJOR: return blktap_major; @@ -702,13 +747,13 @@ void blktap_kick_user(int idx) { tap_blkif_t *info; - if (idx == 0) + info = tapfds[idx]; + + /* Don't kick control device minor==0 */ + if ((idx <= 0) || (idx > MAX_TAP_DEV) || !info) return; - - info = tapfds[idx]; - - if (info != NULL) - wake_up_interruptible(&info->wait); + + wake_up_interruptible(&info->wait); return; } @@ -868,8 +913,8 @@ static void free_req(pending_req_t *req) wake_up(&pending_free_wq); } -static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx, int - tapidx) +static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx, + int tapidx) { struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; unsigned int i, invcount = 0; @@ -877,13 +922,16 @@ static void fast_flush_area(pending_req_ uint64_t ptep; int ret, mmap_idx; unsigned long kvaddr, uvaddr; - - tap_blkif_t *info = tapfds[tapidx]; - - if (info == NULL) { + tap_blkif_t *info; + + + info = tapfds[tapidx]; + + if ((tapidx < 0) || (tapidx > MAX_TAP_DEV) || !info) { WPRINTK("fast_flush: Couldn't get info!\n"); return; } + mmap_idx = req->mem_idx; for (i = 0; i < req->nr_pages; i++) { @@ -1088,7 +1136,7 @@ static int do_block_io_op(blkif_t *blkif rmb(); /* Ensure we see queued requests up to 'rp'. */ /*Check blkif has corresponding UE ring*/ - if (blkif->dev_num == -1) { + if (blkif->dev_num < 0) { /*oops*/ if (print_dbug) { WPRINTK("Corresponding UE " @@ -1099,7 +1147,8 @@ static int do_block_io_op(blkif_t *blkif } info = tapfds[blkif->dev_num]; - if (info == NULL || !info->dev_inuse) { + + if (blkif->dev_num > MAX_TAP_DEV || !info || !info->dev_inuse) { if (print_dbug) { WPRINTK("Can't get UE info!\n"); print_dbug = 0; @@ -1167,16 +1216,24 @@ static void dispatch_rw_block_io(blkif_t struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; unsigned int nseg; int ret, i; - tap_blkif_t *info = tapfds[blkif->dev_num]; + tap_blkif_t *info; uint64_t sector; blkif_request_t *target; int pending_idx = RTN_PEND_IDX(pending_req,pending_req->mem_idx); - int usr_idx = GET_NEXT_REQ(info->idx_map); + int usr_idx; uint16_t mmap_idx = pending_req->mem_idx; + info = tapfds[blkif->dev_num]; + + if (blkif->dev_num < 0 || blkif->dev_num > MAX_TAP_DEV || !info) + goto fail_response; + + usr_idx = GET_NEXT_REQ(info->idx_map); + /*Check we have space on user ring - should never fail*/ - if(usr_idx == INVALID_REQ) goto fail_flush; + if (usr_idx == INVALID_REQ) + goto fail_flush; /* Check that number of segments is sane. */ nseg = req->nr_segments; @@ -1390,9 +1447,6 @@ static int __init blkif_init(void) tap_blkif_xenbus_init(); - /*Create the blktap devices, but do not map memory or waitqueue*/ - for(i = 0; i < MAX_TAP_DEV; i++) translate_domid[i].domid = 0xFFFF; - /* Dynamically allocate a major for this device */ ret = register_chrdev(0, "blktap", &blktap_fops); blktap_dir = devfs_mk_dir(NULL, "xen", 0, NULL); @@ -1404,24 +1458,22 @@ static int __init blkif_init(void) blktap_major = ret; - for(i = 0; i < MAX_TAP_DEV; i++ ) { - info = tapfds[i] = kzalloc(sizeof(tap_blkif_t),GFP_KERNEL); - if(tapfds[i] == NULL) - return -ENOMEM; - info->minor = i; - info->pid = 0; - info->blkif = NULL; - - ret = devfs_mk_cdev(MKDEV(blktap_major, i), - S_IFCHR|S_IRUGO|S_IWUSR, "xen/blktap%d", i); - - if(ret != 0) - return -ENOMEM; - info->dev_pending = info->dev_inuse = 0; - - DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i); - } - + info = kzalloc(sizeof(tap_blkif_t),GFP_KERNEL); + if (!info) + return -ENOMEM; + + blktap_next_minor++; + + ret = devfs_mk_cdev(MKDEV(blktap_major, i), + S_IFCHR|S_IRUGO|S_IWUSR, "xen/blktap%d", i); + + if(ret != 0) + return -ENOMEM; + + DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i); + + tapfds[0] = info; + /* Make sure the xen class exists */ if (!setup_xen_class()) { /* @@ -1434,7 +1486,6 @@ static int __init blkif_init(void) class_device_create(xen_class, NULL, MKDEV(blktap_major, 0), NULL, "blktap0"); - tapfds[0]->sysfs_set = 1; } else { /* this is bad, but not fatal */ WPRINTK("blktap: sysfs xen_class not created\n"); _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |