[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] Switch block device setup/teardown over to xenbus/xenstore.
# HG changeset patch # User cl349@xxxxxxxxxxxxxxxxxxxx # Node ID 7570087c2552e221af20c27e4a9e143636dce236 # Parent e355ae38c83b1d83cc618a105854fb29b5145275 Switch block device setup/teardown over to xenbus/xenstore. Remove device probing support over the device channel. g/c all the control message code. Simplify and cleanup internal function calling code (explicit arguments instead of passing structs around). Also includes: Changed "readonly" to "read-only". "backend" is still not hyphenated, but that seems to be fairly common usage. Block backends use xenbus_dev_error, making debugging nicer. The block front end is told explicitly what domid to use to talk to the backend. There's speculation that fake domid-like tokens may be given for cluster purposes in future, so deriving from the backend dir name is not guaranteed to work. Backends still derive front-end ids, using a xenbus helper (xenbus_uuid_to_domid). Signed-off-by: Rusty Russell <rusty@xxxxxxxxxxxxxxx> Signed-off-by: Christian Limpach <Christian.Limpach@xxxxxxxxxxxx> diff -r e355ae38c83b -r 7570087c2552 linux-2.6-xen-sparse/arch/xen/kernel/reboot.c --- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Fri Aug 19 02:41:16 2005 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Fri Aug 19 10:46:21 2005 @@ -65,13 +65,6 @@ /* Hmmm... a cleaner interface to suspend/resume blkdevs would be nice. */ /* XXX SMH: yes it would :-( */ -#ifdef CONFIG_XEN_BLKDEV_FRONTEND - extern void blkdev_suspend(void); - extern void blkdev_resume(void); -#else -#define blkdev_suspend() do{}while(0) -#define blkdev_resume() do{}while(0) -#endif #ifdef CONFIG_XEN_NETDEV_FRONTEND extern void netif_suspend(void); @@ -119,8 +112,6 @@ netif_suspend(); - blkdev_suspend(); - time_suspend(); #ifdef CONFIG_SMP @@ -175,8 +166,6 @@ #endif time_resume(); - - blkdev_resume(); netif_resume(); diff -r e355ae38c83b -r 7570087c2552 linux-2.6-xen-sparse/drivers/xen/blkback/Makefile --- a/linux-2.6-xen-sparse/drivers/xen/blkback/Makefile Fri Aug 19 02:41:16 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/Makefile Fri Aug 19 10:46:21 2005 @@ -1,2 +1,2 @@ -obj-y := blkback.o control.o interface.o vbd.o +obj-y := blkback.o xenbus.o interface.o vbd.o diff -r e355ae38c83b -r 7570087c2552 linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Fri Aug 19 02:41:16 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Fri Aug 19 10:46:21 2005 @@ -104,7 +104,6 @@ #endif static int do_block_io_op(blkif_t *blkif, int max_to_do); -static void dispatch_probe(blkif_t *blkif, blkif_request_t *req); static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req); static void make_response(blkif_t *blkif, unsigned long id, unsigned short op, int st); @@ -349,10 +348,6 @@ dispatch_rw_block_io(blkif, req); break; - case BLKIF_OP_PROBE: - dispatch_probe(blkif, req); - break; - default: DPRINTK("error: unknown block io operation [%d]\n", req->operation); @@ -363,66 +358,6 @@ blk_ring->req_cons = i; return more_to_do; -} - -static void dispatch_probe(blkif_t *blkif, blkif_request_t *req) -{ - int rsp = BLKIF_RSP_ERROR; - int pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; - - /* We expect one buffer only. */ - if ( unlikely(req->nr_segments != 1) ) - goto out; - - /* Make sure the buffer is page-sized. */ - if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) || - (blkif_last_sect(req->frame_and_sects[0]) != ((PAGE_SIZE/512)-1)) ) - goto out; - -#ifdef CONFIG_XEN_BLKDEV_GRANT - { - struct gnttab_map_grant_ref map; - - map.host_addr = MMAP_VADDR(pending_idx, 0); - map.flags = GNTMAP_host_map; - map.ref = blkif_gref_from_fas(req->frame_and_sects[0]); - map.dom = blkif->domid; - - if ( unlikely(HYPERVISOR_grant_table_op( - GNTTABOP_map_grant_ref, &map, 1))) - BUG(); - - if ( map.handle < 0 ) - goto out; - - pending_handle(pending_idx, 0) = map.handle; - } -#else /* else CONFIG_XEN_BLKDEV_GRANT */ - -#ifdef CONFIG_XEN_BLKDEV_TAP_BE - /* Grab the real frontend out of the probe message. */ - if (req->frame_and_sects[1] == BLKTAP_COOKIE) - blkif->is_blktap = 1; -#endif - - - if ( HYPERVISOR_update_va_mapping_otherdomain( - MMAP_VADDR(pending_idx, 0), - pfn_pte_ma(req->frame_and_sects[0] >> PAGE_SHIFT, PAGE_KERNEL), -#ifdef CONFIG_XEN_BLKDEV_TAP_BE - 0, (blkif->is_blktap ? ID_TO_DOM(req->id) : blkif->domid) ) ) -#else - 0, blkif->domid) ) -#endif - goto out; -#endif /* endif CONFIG_XEN_BLKDEV_GRANT */ - - rsp = vbd_probe(blkif, (vdisk_t *)MMAP_VADDR(pending_idx, 0), - PAGE_SIZE / sizeof(vdisk_t)); - - out: - fast_flush_area(pending_idx, 1); - make_response(blkif, req->id, req->operation, rsp); } static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req) @@ -460,7 +395,7 @@ goto bad_descriptor; } - preq.dev = req->device; + preq.dev = req->handle; preq.sector_number = req->sector_number; preq.nr_sects = 0; @@ -730,8 +665,8 @@ 0, SLAB_HWCACHE_ALIGN, NULL, NULL); #endif - blkif_ctrlif_init(); - + blkif_xenbus_init(); + #ifdef CONFIG_XEN_BLKDEV_GRANT memset( pending_grant_handles, BLKBACK_INVALID_HANDLE, MMAP_PAGES ); printk(KERN_ALERT "Blkif backend is using grant tables.\n"); diff -r e355ae38c83b -r 7570087c2552 linux-2.6-xen-sparse/drivers/xen/blkback/common.h --- a/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Fri Aug 19 02:41:16 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Fri Aug 19 10:46:21 2005 @@ -13,7 +13,6 @@ #include <asm/io.h> #include <asm/setup.h> #include <asm/pgalloc.h> -#include <asm-xen/ctrl_if.h> #include <asm-xen/evtchn.h> #include <asm-xen/hypervisor.h> #include <asm-xen/xen-public/io/blkif.h> @@ -47,6 +46,7 @@ /* Physical parameters of the comms window. */ unsigned long shmem_frame; unsigned int evtchn; + unsigned int remote_evtchn; /* Comms information. */ blkif_back_ring_t blk_ring; /* VBDs attached to this interface. */ @@ -81,17 +81,29 @@ void blkif_connect(blkif_be_connect_t *connect); int blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id); void blkif_disconnect_complete(blkif_t *blkif); -blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle); +blkif_t *blkif_find(domid_t domid); +void free_blkif(blkif_t *blkif); +int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn); + #define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) #define blkif_put(_b) \ do { \ if ( atomic_dec_and_test(&(_b)->refcnt) ) \ - blkif_disconnect_complete(_b); \ + free_blkif(_b); \ } while (0) -void vbd_create(blkif_be_vbd_create_t *create); +struct vbd; +void vbd_free(blkif_t *blkif, struct vbd *vbd); + +/* Creates inactive vbd. */ +struct vbd *vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, blkif_pdev_t pdevice, int readonly); +int vbd_is_active(struct vbd *vbd); +void vbd_activate(blkif_t *blkif, struct vbd *vbd); + +unsigned long vbd_size(struct vbd *vbd); +unsigned int vbd_info(struct vbd *vbd); +unsigned long vbd_secsize(struct vbd *vbd); void vbd_destroy(blkif_be_vbd_destroy_t *delete); -int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds); void destroy_all_vbds(blkif_t *blkif); struct phys_req { @@ -104,9 +116,10 @@ int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation); void blkif_interface_init(void); -void blkif_ctrlif_init(void); void blkif_deschedule(blkif_t *blkif); + +void blkif_xenbus_init(void); irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs); diff -r e355ae38c83b -r 7570087c2552 linux-2.6-xen-sparse/drivers/xen/blkback/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Fri Aug 19 02:41:16 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Fri Aug 19 10:46:21 2005 @@ -7,24 +7,135 @@ */ #include "common.h" +#include <asm-xen/ctrl_if.h> +#include <asm-xen/evtchn.h> #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) #define VMALLOC_VMADDR(x) ((unsigned long)(x)) #endif #define BLKIF_HASHSZ 1024 -#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1)) +#define BLKIF_HASH(_d) (((int)(_d))&(BLKIF_HASHSZ-1)) static kmem_cache_t *blkif_cachep; static blkif_t *blkif_hash[BLKIF_HASHSZ]; -blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle) -{ - blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)]; - while ( (blkif != NULL) && - ((blkif->domid != domid) || (blkif->handle != handle)) ) +blkif_t *blkif_find(domid_t domid) +{ + blkif_t *blkif = blkif_hash[BLKIF_HASH(domid)]; + + while (blkif) { + if (blkif->domid == domid) { + blkif_get(blkif); + return blkif; + } blkif = blkif->hash_next; + } + + blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL); + if (!blkif) + return ERR_PTR(-ENOMEM); + + memset(blkif, 0, sizeof(*blkif)); + blkif->domid = domid; + blkif->status = DISCONNECTED; + spin_lock_init(&blkif->vbd_lock); + spin_lock_init(&blkif->blk_ring_lock); + atomic_set(&blkif->refcnt, 1); + + blkif->hash_next = blkif_hash[BLKIF_HASH(domid)]; + blkif_hash[BLKIF_HASH(domid)] = blkif; return blkif; +} + +#ifndef CONFIG_XEN_BLKDEV_GRANT +static int map_frontend_page(blkif_t *blkif, unsigned long localaddr, + unsigned long shared_page) +{ + return direct_remap_area_pages(&init_mm, localaddr, + shared_page<<PAGE_SHIFT, PAGE_SIZE, + __pgprot(_KERNPG_TABLE), blkif->domid); +} + +static void unmap_frontend_page(blkif_t *blkif) +{ +} +#else +static int map_frontend_page(blkif_t *blkif, unsigned long localaddr, + unsigned long shared_page) +{ + struct gnttab_map_grant_ref op; + op.host_addr = localaddr; + op.flags = GNTMAP_host_map; + op.ref = shared_page; + op.dom = blkif->domid; + + BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) ); + + if (op.handle < 0) { + DPRINTK(" Grant table operation failure !\n"); + return op.handle; + } + + blkif->shmem_ref = shared_page; + blkif->shmem_handle = op.handle; + blkif->shmem_vaddr = localaddr; + return 0; +} + +static void unmap_frontend_page(blkif_t *blkif) +{ + struct gnttab_unmap_grant_ref op; + + op.host_addr = blkif->shmem_vaddr; + op.handle = blkif->shmem_handle; + op.dev_bus_addr = 0; + BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); +} +#endif /* CONFIG_XEN_BLKDEV_GRANT */ + +int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn) +{ + struct vm_struct *vma; + blkif_sring_t *sring; + evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain }; + int err; + + BUG_ON(blkif->remote_evtchn); + + if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL ) + return -ENOMEM; + + err = map_frontend_page(blkif, VMALLOC_VMADDR(vma->addr), shared_page); + if (err) { + vfree(vma->addr); + return err; + } + + op.u.bind_interdomain.dom1 = DOMID_SELF; + op.u.bind_interdomain.dom2 = blkif->domid; + op.u.bind_interdomain.port1 = 0; + op.u.bind_interdomain.port2 = evtchn; + err = HYPERVISOR_event_channel_op(&op); + if (err) { + unmap_frontend_page(blkif); + vfree(vma->addr); + return err; + } + + blkif->evtchn = op.u.bind_interdomain.port1; + blkif->remote_evtchn = evtchn; + + sring = (blkif_sring_t *)vma->addr; + SHARED_RING_INIT(sring); + BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE); + + bind_evtchn_to_irqhandler(blkif->evtchn, blkif_be_int, 0, "blkif-backend", + blkif); + blkif->status = CONNECTED; + blkif->shmem_frame = shared_page; + + return 0; } static void __blkif_disconnect_complete(void *arg) @@ -32,21 +143,13 @@ blkif_t *blkif = (blkif_t *)arg; ctrl_msg_t cmsg; blkif_be_disconnect_t disc; -#ifdef CONFIG_XEN_BLKDEV_GRANT - struct gnttab_unmap_grant_ref op; -#endif /* * These can't be done in blkif_disconnect() because at that point there * may be outstanding requests at the disc whose asynchronous responses * must still be notified to the remote driver. */ -#ifdef CONFIG_XEN_BLKDEV_GRANT - op.host_addr = blkif->shmem_vaddr; - op.handle = blkif->shmem_handle; - op.dev_bus_addr = 0; - BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); -#endif + unmap_frontend_page(blkif); vfree(blkif->blk_ring.sring); /* Construct the deferred response message. */ @@ -81,200 +184,33 @@ schedule_work(&blkif->work); } -void blkif_create(blkif_be_create_t *create) -{ - domid_t domid = create->domid; - unsigned int handle = create->blkif_handle; - blkif_t **pblkif, *blkif; - - if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL)) == NULL ) +void free_blkif(blkif_t *blkif) +{ + blkif_t **pblkif; + evtchn_op_t op = { .cmd = EVTCHNOP_close }; + + op.u.close.port = blkif->evtchn; + op.u.close.dom = DOMID_SELF; + HYPERVISOR_event_channel_op(&op); + op.u.close.port = blkif->remote_evtchn; + op.u.close.dom = blkif->domid; + HYPERVISOR_event_channel_op(&op); + + if (blkif->evtchn) + unbind_evtchn_from_irqhandler(blkif->evtchn, blkif); + + if (blkif->blk_ring.sring) + vfree(blkif->blk_ring.sring); + + pblkif = &blkif_hash[BLKIF_HASH(blkif->domid)]; + while ( *pblkif != blkif ) { - DPRINTK("Could not create blkif: out of memory\n"); - create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; - return; - } - - memset(blkif, 0, sizeof(*blkif)); - blkif->domid = domid; - blkif->handle = handle; - blkif->status = DISCONNECTED; - spin_lock_init(&blkif->vbd_lock); - spin_lock_init(&blkif->blk_ring_lock); - atomic_set(&blkif->refcnt, 0); - - pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; - while ( *pblkif != NULL ) - { - if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) ) - { - DPRINTK("Could not create blkif: already exists\n"); - create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS; - kmem_cache_free(blkif_cachep, blkif); - return; - } + BUG_ON(!*pblkif); pblkif = &(*pblkif)->hash_next; } - - blkif->hash_next = *pblkif; - *pblkif = blkif; - - DPRINTK("Successfully created blkif\n"); - create->status = BLKIF_BE_STATUS_OKAY; -} - -void blkif_destroy(blkif_be_destroy_t *destroy) -{ - domid_t domid = destroy->domid; - unsigned int handle = destroy->blkif_handle; - blkif_t **pblkif, *blkif; - - pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; - while ( (blkif = *pblkif) != NULL ) - { - if ( (blkif->domid == domid) && (blkif->handle == handle) ) - { - if ( blkif->status != DISCONNECTED ) - goto still_connected; - goto destroy; - } - pblkif = &blkif->hash_next; - } - - destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; - return; - - still_connected: - destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED; - return; - - destroy: *pblkif = blkif->hash_next; destroy_all_vbds(blkif); kmem_cache_free(blkif_cachep, blkif); - destroy->status = BLKIF_BE_STATUS_OKAY; -} - -void blkif_connect(blkif_be_connect_t *connect) -{ - domid_t domid = connect->domid; - unsigned int handle = connect->blkif_handle; - unsigned int evtchn = connect->evtchn; - unsigned long shmem_frame = connect->shmem_frame; - struct vm_struct *vma; -#ifdef CONFIG_XEN_BLKDEV_GRANT - int ref = connect->shmem_ref; -#else - pgprot_t prot; - int error; -#endif - blkif_t *blkif; - blkif_sring_t *sring; - - blkif = blkif_find_by_handle(domid, handle); - if ( unlikely(blkif == NULL) ) - { - DPRINTK("blkif_connect attempted for non-existent blkif (%u,%u)\n", - connect->domid, connect->blkif_handle); - connect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; - return; - } - - if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL ) - { - connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; - return; - } - -#ifndef CONFIG_XEN_BLKDEV_GRANT - prot = __pgprot(_KERNPG_TABLE); - error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr), - shmem_frame<<PAGE_SHIFT, PAGE_SIZE, - prot, domid); - if ( error != 0 ) - { - if ( error == -ENOMEM ) - connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; - else if ( error == -EFAULT ) - connect->status = BLKIF_BE_STATUS_MAPPING_ERROR; - else - connect->status = BLKIF_BE_STATUS_ERROR; - vfree(vma->addr); - return; - } -#else - { /* Map: Use the Grant table reference */ - struct gnttab_map_grant_ref op; - op.host_addr = VMALLOC_VMADDR(vma->addr); - op.flags = GNTMAP_host_map; - op.ref = ref; - op.dom = domid; - - BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) ); - - handle = op.handle; - - if (op.handle < 0) { - DPRINTK(" Grant table operation failure !\n"); - connect->status = BLKIF_BE_STATUS_MAPPING_ERROR; - vfree(vma->addr); - return; - } - - blkif->shmem_ref = ref; - blkif->shmem_handle = handle; - blkif->shmem_vaddr = VMALLOC_VMADDR(vma->addr); - } -#endif - - if ( blkif->status != DISCONNECTED ) - { - connect->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED; - vfree(vma->addr); - return; - } - sring = (blkif_sring_t *)vma->addr; - SHARED_RING_INIT(sring); - BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE); - - blkif->evtchn = evtchn; - blkif->shmem_frame = shmem_frame; - blkif->status = CONNECTED; - blkif_get(blkif); - - bind_evtchn_to_irqhandler( - blkif->evtchn, blkif_be_int, 0, "blkif-backend", blkif); - - connect->status = BLKIF_BE_STATUS_OKAY; -} - -int blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id) -{ - domid_t domid = disconnect->domid; - unsigned int handle = disconnect->blkif_handle; - blkif_t *blkif; - - blkif = blkif_find_by_handle(domid, handle); - if ( unlikely(blkif == NULL) ) - { - DPRINTK("blkif_disconnect attempted for non-existent blkif" - " (%u,%u)\n", disconnect->domid, disconnect->blkif_handle); - disconnect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; - return 1; /* Caller will send response error message. */ - } - - if ( blkif->status == CONNECTED ) - { - blkif->status = DISCONNECTING; - blkif->disconnect_rspid = rsp_id; - wmb(); /* Let other CPUs see the status change. */ - unbind_evtchn_from_irqhandler(blkif->evtchn, blkif); - blkif_deschedule(blkif); - blkif_put(blkif); - return 0; /* Caller should not send response message. */ - } - - disconnect->status = BLKIF_BE_STATUS_OKAY; - return 1; } void __init blkif_interface_init(void) diff -r e355ae38c83b -r 7570087c2552 linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c Fri Aug 19 02:41:16 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c Fri Aug 19 10:46:21 2005 @@ -11,13 +11,16 @@ */ #include "common.h" +#include <asm-xen/xenbus.h> struct vbd { - blkif_vdev_t vdevice; /* what the domain refers to this vbd as */ + blkif_vdev_t handle; /* what the domain refers to this vbd as */ unsigned char readonly; /* Non-zero -> read-only */ unsigned char type; /* VDISK_xxx */ blkif_pdev_t pdevice; /* phys device that this vbd maps to */ struct block_device *bdev; + + int active; rb_node_t rb; /* for linking into R-B tree lookup struct */ }; @@ -33,140 +36,128 @@ #define bdev_hardsect_size(_b) 512 #endif -void vbd_create(blkif_be_vbd_create_t *create) +unsigned long vbd_size(struct vbd *vbd) +{ + return vbd_sz(vbd); +} + +unsigned int vbd_info(struct vbd *vbd) +{ + return vbd->type | (vbd->readonly?VDISK_READONLY:0); +} + +unsigned long vbd_secsize(struct vbd *vbd) +{ + return bdev_hardsect_size(vbd->bdev); +} + +int vbd_is_active(struct vbd *vbd) +{ + return vbd->active; +} + +struct vbd *vbd_create(blkif_t *blkif, blkif_vdev_t handle, + blkif_pdev_t pdevice, int readonly) { struct vbd *vbd; + + if ( unlikely((vbd = kmalloc(sizeof(struct vbd), GFP_KERNEL)) == NULL) ) + { + DPRINTK("vbd_create: out of memory\n"); + return ERR_PTR(-ENOMEM); + } + + vbd->handle = handle; + vbd->readonly = readonly; + vbd->type = 0; + vbd->active = 0; + + vbd->pdevice = pdevice; + + /* FIXME: Who frees vbd on failure? --RR */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) + vbd->bdev = open_by_devnum( + vbd_map_devnum(vbd->pdevice), + vbd->readonly ? FMODE_READ : FMODE_WRITE); + if ( IS_ERR(vbd->bdev) ) + { + DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice); + return ERR_PTR(-ENOENT); + } + + if ( (vbd->bdev->bd_disk == NULL) ) + { + DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice); + bdev_put(vbd->bdev); + return ERR_PTR(-ENOENT); + } + + if ( vbd->bdev->bd_disk->flags & GENHD_FL_CD ) + vbd->type |= VDISK_CDROM; + if ( vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE ) + vbd->type |= VDISK_REMOVABLE; + +#else + if ( (blk_size[MAJOR(vbd->pdevice)] == NULL) || (vbd_sz(vbd) == 0) ) + { + DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice); + return ERR_PTR(-ENOENT); + } +#endif + + DPRINTK("Successful creation of handle=%04x (dom=%u)\n", + handle, blkif->domid); + return vbd; +} + +void vbd_activate(blkif_t *blkif, struct vbd *vbd) +{ rb_node_t **rb_p, *rb_parent = NULL; - blkif_t *blkif; - blkif_vdev_t vdevice = create->vdevice; - - blkif = blkif_find_by_handle(create->domid, create->blkif_handle); - if ( unlikely(blkif == NULL) ) - { - DPRINTK("vbd_create attempted for non-existent blkif (%u,%u)\n", - create->domid, create->blkif_handle); - create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; - return; - } - + struct vbd *i; + BUG_ON(vbd_is_active(vbd)); + + /* Find where to put it. */ rb_p = &blkif->vbd_rb.rb_node; while ( *rb_p != NULL ) { rb_parent = *rb_p; - vbd = rb_entry(rb_parent, struct vbd, rb); - if ( vdevice < vbd->vdevice ) + i = rb_entry(rb_parent, struct vbd, rb); + if ( vbd->handle < i->handle ) { rb_p = &rb_parent->rb_left; } - else if ( vdevice > vbd->vdevice ) + else if ( vbd->handle > i->handle ) { rb_p = &rb_parent->rb_right; } else { - DPRINTK("vbd_create attempted for already existing vbd\n"); - create->status = BLKIF_BE_STATUS_VBD_EXISTS; - return; + /* We never create two of same vbd, so not possible. */ + BUG(); } } - if ( unlikely((vbd = kmalloc(sizeof(struct vbd), GFP_KERNEL)) == NULL) ) - { - DPRINTK("vbd_create: out of memory\n"); - create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; - return; - } - - vbd->vdevice = vdevice; - vbd->readonly = create->readonly; - vbd->type = 0; - - /* Mask to 16-bit for compatibility with old tools */ - vbd->pdevice = create->pdevice & 0xffff; - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) - vbd->bdev = open_by_devnum( - vbd_map_devnum(vbd->pdevice), - vbd->readonly ? FMODE_READ : FMODE_WRITE); - if ( IS_ERR(vbd->bdev) ) - { - DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice); - create->status = BLKIF_BE_STATUS_PHYSDEV_NOT_FOUND; - return; - } - - if ( (vbd->bdev->bd_disk == NULL) ) - { - DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice); - create->status = BLKIF_BE_STATUS_PHYSDEV_NOT_FOUND; - bdev_put(vbd->bdev); - return; - } - - if ( vbd->bdev->bd_disk->flags & GENHD_FL_CD ) - vbd->type |= VDISK_CDROM; - if ( vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE ) - vbd->type |= VDISK_REMOVABLE; - -#else - if ( (blk_size[MAJOR(vbd->pdevice)] == NULL) || (vbd_sz(vbd) == 0) ) - { - DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice); - create->status = BLKIF_BE_STATUS_PHYSDEV_NOT_FOUND; - return; - } -#endif + /* Now we're active. */ + vbd->active = 1; + blkif_get(blkif); spin_lock(&blkif->vbd_lock); rb_link_node(&vbd->rb, rb_parent, rb_p); rb_insert_color(&vbd->rb, &blkif->vbd_rb); spin_unlock(&blkif->vbd_lock); - - DPRINTK("Successful creation of vdev=%04x (dom=%u)\n", - vdevice, create->domid); - create->status = BLKIF_BE_STATUS_OKAY; -} - - -void vbd_destroy(blkif_be_vbd_destroy_t *destroy) -{ - blkif_t *blkif; - struct vbd *vbd; - rb_node_t *rb; - blkif_vdev_t vdevice = destroy->vdevice; - - blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle); - if ( unlikely(blkif == NULL) ) - { - DPRINTK("vbd_destroy attempted for non-existent blkif (%u,%u)\n", - destroy->domid, destroy->blkif_handle); - destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; - return; - } - - rb = blkif->vbd_rb.rb_node; - while ( rb != NULL ) - { - vbd = rb_entry(rb, struct vbd, rb); - if ( vdevice < vbd->vdevice ) - rb = rb->rb_left; - else if ( vdevice > vbd->vdevice ) - rb = rb->rb_right; - else - goto found; - } - - destroy->status = BLKIF_BE_STATUS_VBD_NOT_FOUND; - return; - - found: - spin_lock(&blkif->vbd_lock); - rb_erase(rb, &blkif->vbd_rb); - spin_unlock(&blkif->vbd_lock); +} + +void vbd_free(blkif_t *blkif, struct vbd *vbd) +{ + if (vbd_is_active(vbd)) { + spin_lock(&blkif->vbd_lock); + rb_erase(&vbd->rb, &blkif->vbd_rb); + spin_unlock(&blkif->vbd_lock); + blkif_put(blkif); + } bdev_put(vbd->bdev); kfree(vbd); } - void destroy_all_vbds(blkif_t *blkif) { @@ -183,73 +174,11 @@ bdev_put(vbd->bdev); kfree(vbd); spin_lock(&blkif->vbd_lock); + blkif_put(blkif); } spin_unlock(&blkif->vbd_lock); } - - -static void vbd_probe_single( - blkif_t *blkif, vdisk_t *vbd_info, struct vbd *vbd) -{ - vbd_info->device = vbd->vdevice; - vbd_info->info = vbd->type | (vbd->readonly ? VDISK_READONLY : 0); - vbd_info->capacity = vbd_sz(vbd); - vbd_info->sector_size = bdev_hardsect_size(vbd->bdev); -} - - -int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds) -{ - int rc = 0, nr_vbds = 0; - rb_node_t *rb; - - spin_lock(&blkif->vbd_lock); - - if ( (rb = blkif->vbd_rb.rb_node) == NULL ) - goto out; - - new_subtree: - /* STEP 1. Find least node (it'll be left-most). */ - while ( rb->rb_left != NULL ) - rb = rb->rb_left; - - for ( ; ; ) - { - /* STEP 2. Dealt with left subtree. Now process current node. */ - vbd_probe_single(blkif, &vbd_info[nr_vbds], - rb_entry(rb, struct vbd, rb)); - if ( ++nr_vbds == max_vbds ) - goto out; - - /* STEP 3. Process right subtree, if any. */ - if ( rb->rb_right != NULL ) - { - rb = rb->rb_right; - goto new_subtree; - } - - /* STEP 4. Done both subtrees. Head back through ancesstors. */ - for ( ; ; ) - { - /* We're done when we get back to the root node. */ - if ( rb->rb_parent == NULL ) - goto out; - /* If we are left of parent, then parent is next to process. */ - if ( rb->rb_parent->rb_left == rb ) - break; - /* If we are right of parent, then we climb to grandparent. */ - rb = rb->rb_parent; - } - - rb = rb->rb_parent; - } - - out: - spin_unlock(&blkif->vbd_lock); - return (rc == 0) ? nr_vbds : rc; -} - int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation) { @@ -264,9 +193,9 @@ while ( rb != NULL ) { vbd = rb_entry(rb, struct vbd, rb); - if ( req->dev < vbd->vdevice ) + if ( req->dev < vbd->handle ) rb = rb->rb_left; - else if ( req->dev > vbd->vdevice ) + else if ( req->dev > vbd->handle ) rb = rb->rb_right; else goto found; diff -r e355ae38c83b -r 7570087c2552 linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Fri Aug 19 02:41:16 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Fri Aug 19 10:46:21 2005 @@ -53,8 +53,8 @@ #include <linux/sched.h> #include <linux/interrupt.h> #include <scsi/scsi.h> -#include <asm-xen/ctrl_if.h> #include <asm-xen/evtchn.h> +#include <asm-xen/xenbus.h> #ifdef CONFIG_XEN_BLKDEV_GRANT #include <asm-xen/xen-public/grant_table.h> #include <asm-xen/gnttab.h> @@ -65,22 +65,14 @@ /* Control whether runtime update of vbds is enabled. */ #define ENABLE_VBD_UPDATE 1 -#if ENABLE_VBD_UPDATE -static void vbd_update(void); -#else -static void vbd_update(void){}; -#endif - #define BLKIF_STATE_CLOSED 0 #define BLKIF_STATE_DISCONNECTED 1 #define BLKIF_STATE_CONNECTED 2 -static int blkif_handle = 0; static unsigned int blkif_state = BLKIF_STATE_CLOSED; static unsigned int blkif_evtchn = 0; - -static int blkif_control_rsp_valid; -static blkif_response_t blkif_control_rsp; +static unsigned int blkif_vbds = 0; +static unsigned int blkif_vbds_connected = 0; static blkif_front_ring_t blk_ring; @@ -105,7 +97,7 @@ static void kick_pending_request_queues(void); -int __init xlblk_init(void); +static int __init xlblk_init(void); static void blkif_completion(struct blk_shadow *s); @@ -179,19 +171,6 @@ module_init(xlblk_init); -#if ENABLE_VBD_UPDATE -static void update_vbds_task(void *unused) -{ - xlvbd_update_vbds(); -} - -static void vbd_update(void) -{ - static DECLARE_WORK(update_tq, update_vbds_task, NULL); - schedule_work(&update_tq); -} -#endif /* ENABLE_VBD_UPDATE */ - static struct xlbd_disk_info *head_waiting = NULL; static void kick_pending_request_queues(void) { @@ -221,16 +200,7 @@ int blkif_release(struct inode *inode, struct file *filep) { - struct gendisk *gd = inode->i_bdev->bd_disk; - struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data; - - /* - * When usage drops to zero it may allow more VBD updates to occur. - * Update of usage count is protected by a per-device semaphore. - */ - if ( --di->mi->usage == 0 ) - vbd_update(); - + /* FIXME: This is where we can actually free up majors, etc. --RR */ return 0; } @@ -301,7 +271,7 @@ ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : BLKIF_OP_READ; ring_req->sector_number = (blkif_sector_t)req->sector; - ring_req->device = di->xd_device; + ring_req->handle = di->handle; ring_req->nr_segments = 0; rq_for_each_bio(bio, req) @@ -446,10 +416,6 @@ end_that_request_last(req); break; - case BLKIF_OP_PROBE: - memcpy(&blkif_control_rsp, bret, sizeof(*bret)); - blkif_control_rsp_valid = 1; - break; default: BUG(); } @@ -483,28 +449,6 @@ #define blkif_io_lock io_request_lock /*============================================================================*/ -#if ENABLE_VBD_UPDATE - -/* - * blkif_update_int/update-vbds_task - handle VBD update events. - * Schedule a task for keventd to run, which will update the VBDs and perform - * the corresponding updates to our view of VBD state. - */ -static void update_vbds_task(void *unused) -{ - xlvbd_update_vbds(); -} - -static void vbd_update(void) -{ - static struct tq_struct update_tq; - update_tq.routine = update_vbds_task; - schedule_task(&update_tq); -} - -#endif /* ENABLE_VBD_UPDATE */ -/*============================================================================*/ - static void kick_pending_request_queues(void) { /* We kick pending request queues if the ring is reasonably empty. */ @@ -757,7 +701,8 @@ char * buffer, unsigned long sector_number, unsigned short nr_sectors, - kdev_t device) + kdev_t device, + blkif_vdev_t handle) { unsigned long buffer_ma = virt_to_bus(buffer); unsigned long xid; @@ -871,7 +816,7 @@ req->id = xid; req->operation = operation; req->sector_number = (blkif_sector_t)sector_number; - req->device = device; + req->handle = handle; req->nr_segments = 1; #ifdef CONFIG_XEN_BLKDEV_GRANT /* install a grant reference. */ @@ -1047,108 +992,10 @@ /***************************** COMMON CODE *******************************/ -#ifdef CONFIG_XEN_BLKDEV_GRANT -void blkif_control_probe_send(blkif_request_t *req, blkif_response_t *rsp, - unsigned long address) -{ - int ref = gnttab_claim_grant_reference(&gref_head, gref_terminal); - ASSERT( ref != -ENOSPC ); - - gnttab_grant_foreign_access_ref( ref, rdomid, address >> PAGE_SHIFT, 0 ); - - req->frame_and_sects[0] = blkif_fas_from_gref(ref, 0, (PAGE_SIZE/512)-1); - - blkif_control_send(req, rsp); -} -#endif - -void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp) -{ - unsigned long flags, id; - blkif_request_t *req_d; - - retry: - while ( RING_FULL(&blk_ring) ) - { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); - } - - spin_lock_irqsave(&blkif_io_lock, flags); - if ( RING_FULL(&blk_ring) ) - { - spin_unlock_irqrestore(&blkif_io_lock, flags); - goto retry; - } - - DISABLE_SCATTERGATHER(); - req_d = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt); - *req_d = *req; - - id = GET_ID_FROM_FREELIST(); - req_d->id = id; - blk_shadow[id].request = (unsigned long)req; - - pickle_request(&blk_shadow[id], req); - - blk_ring.req_prod_pvt++; - flush_requests(); - - spin_unlock_irqrestore(&blkif_io_lock, flags); - - while ( !blkif_control_rsp_valid ) - { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); - } - - memcpy(rsp, &blkif_control_rsp, sizeof(*rsp)); - blkif_control_rsp_valid = 0; -} - - -/* Send a driver status notification to the domain controller. */ -static void send_driver_status(int ok) -{ - ctrl_msg_t cmsg = { - .type = CMSG_BLKIF_FE, - .subtype = CMSG_BLKIF_FE_DRIVER_STATUS, - .length = sizeof(blkif_fe_driver_status_t), - }; - blkif_fe_driver_status_t *msg = (void*)cmsg.msg; - - msg->status = (ok ? BLKIF_DRIVER_STATUS_UP : BLKIF_DRIVER_STATUS_DOWN); - - ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); -} - -/* Tell the controller to bring up the interface. */ -static void blkif_send_interface_connect(void) -{ - ctrl_msg_t cmsg = { - .type = CMSG_BLKIF_FE, - .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT, - .length = sizeof(blkif_fe_interface_connect_t), - }; - blkif_fe_interface_connect_t *msg = (void*)cmsg.msg; - - msg->handle = 0; - msg->shmem_frame = (virt_to_machine(blk_ring.sring) >> PAGE_SHIFT); - -#ifdef CONFIG_XEN_BLKDEV_GRANT - msg->shmem_ref = gnttab_claim_grant_reference( &gref_head, gref_terminal ); - ASSERT( msg->shmem_ref != -ENOSPC ); - gnttab_grant_foreign_access_ref ( msg->shmem_ref , rdomid, msg->shmem_frame, 0 ); -#endif - - ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); -} - static void blkif_free(void) { /* Prevent new requests being issued until we fix things up. */ spin_lock_irq(&blkif_io_lock); - recovery = 1; blkif_state = BLKIF_STATE_DISCONNECTED; spin_unlock_irq(&blkif_io_lock); @@ -1160,31 +1007,6 @@ } unbind_evtchn_from_irqhandler(blkif_evtchn, NULL); blkif_evtchn = 0; -} - -static void blkif_close(void) -{ -} - -/* Move from CLOSED to DISCONNECTED state. */ -static void blkif_disconnect(void) -{ - blkif_sring_t *sring; - - if ( blk_ring.sring != NULL ) - free_page((unsigned long)blk_ring.sring); - - sring = (blkif_sring_t *)__get_free_page(GFP_KERNEL); - SHARED_RING_INIT(sring); - FRONT_RING_INIT(&blk_ring, sring, PAGE_SIZE); - blkif_state = BLKIF_STATE_DISCONNECTED; - blkif_send_interface_connect(); -} - -static void blkif_reset(void) -{ - blkif_free(); - blkif_disconnect(); } static void blkif_recover(void) @@ -1257,11 +1079,14 @@ blkif_state = BLKIF_STATE_CONNECTED; } -static void blkif_connect(blkif_fe_interface_status_t *status) +static void blkif_connect(u16 evtchn, domid_t domid) { int err = 0; - blkif_evtchn = status->evtchn; + blkif_evtchn = evtchn; +#ifdef CONFIG_XEN_BLKDEV_GRANT + rdomid = domid; +#endif err = bind_evtchn_to_irqhandler( blkif_evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", NULL); @@ -1270,142 +1095,310 @@ WPRINTK("bind_evtchn_to_irqhandler failed (err=%d)\n", err); return; } - - if ( recovery ) - { - blkif_recover(); - } - else - { - /* Transition to connected in case we need to do - * a partition probe on a whole disk. */ - blkif_state = BLKIF_STATE_CONNECTED; - - /* Probe for discs attached to the interface. */ - xlvbd_init(); - } - - /* Kick pending requests. */ - spin_lock_irq(&blkif_io_lock); - kick_pending_request_queues(); - spin_unlock_irq(&blkif_io_lock); -} - -static void unexpected(blkif_fe_interface_status_t *status) -{ - DPRINTK(" Unexpected blkif status %u in state %u\n", - status->status, blkif_state); -} - -static void blkif_status(blkif_fe_interface_status_t *status) -{ +} + + +static struct xenbus_device_id blkfront_ids[] = { + { "vbd" }, + { "" } +}; + +struct blkfront_info +{ + /* We watch the backend */ + struct xenbus_watch watch; + int vdevice; + u16 handle; + int connected; + struct xenbus_device *dev; + char *backend; +}; + +static void watch_for_status(struct xenbus_watch *watch, const char *node) +{ + struct blkfront_info *info; + unsigned int binfo; + unsigned long sectors, sector_size; + int err; + + info = container_of(watch, struct blkfront_info, watch); + node += strlen(watch->node); + + /* FIXME: clean up when error on the other end. */ + if (info->connected) + return; + + err = xenbus_gather(watch->node, + "sectors", "%lu", §ors, + "info", "%u", &binfo, + "sector-size", "%lu", §or_size, + NULL); + + if (err) + xenbus_dev_error(info->dev, err, "reading backend fields"); + else { + xlvbd_add(sectors, info->vdevice, info->handle, binfo, + sector_size); + info->connected = 1; + + /* First to connect? blkif is now connected. */ + if (blkif_vbds_connected++ == 0) + blkif_state = BLKIF_STATE_CONNECTED; + + xenbus_dev_ok(info->dev); + + /* Kick pending requests. */ + spin_lock_irq(&blkif_io_lock); + kick_pending_request_queues(); + spin_unlock_irq(&blkif_io_lock); + } +} + +static int setup_blkring(struct xenbus_device *dev, unsigned int backend_id) +{ + blkif_sring_t *sring; + evtchn_op_t op = { .cmd = EVTCHNOP_alloc_unbound }; + int err; + + sring = (void *)__get_free_page(GFP_KERNEL); + if (!sring) { + xenbus_dev_error(dev, -ENOMEM, "allocating shared ring"); + return -ENOMEM; + } + SHARED_RING_INIT(sring); + FRONT_RING_INIT(&blk_ring, sring, PAGE_SIZE); + + op.u.alloc_unbound.dom = backend_id; + err = HYPERVISOR_event_channel_op(&op); + if (err) { + free_page((unsigned long)blk_ring.sring); + blk_ring.sring = 0; + xenbus_dev_error(dev, err, "allocating event channel"); + return err; + } + blkif_connect(op.u.alloc_unbound.port, backend_id); + return 0; +} + +/* Common code used when first setting up, and when resuming. */ +static int talk_to_backend(struct xenbus_device *dev, + struct blkfront_info *info) +{ + char *backend; + const char *message; + int err, backend_id; + + backend = xenbus_read(dev->nodename, "backend", NULL); + if (IS_ERR(backend)) { + err = PTR_ERR(backend); + xenbus_dev_error(dev, err, "reading %s/backend", + dev->nodename); + goto out; + } + + /* FIXME: This driver can't handle backends on different + * domains. Check and fail gracefully. */ + err = xenbus_scanf(dev->nodename, "backend-id", "%i", &backend_id); + if (err < 0) { + xenbus_dev_error(dev, err, "reading %s/backend-id", + dev->nodename); + goto free_backend; + } + + /* First device? We create shared ring, alloc event channel. */ + if (blkif_vbds == 0) { + err = setup_blkring(dev, backend_id); + if (err) + goto free_backend; + } + + err = xenbus_transaction_start(dev->nodename); + if (err) { + xenbus_dev_error(dev, err, "starting transaction"); + goto destroy_blkring; + } + #ifdef CONFIG_XEN_BLKDEV_GRANT - rdomid = status->domid; /* need to set rdomid early */ -#endif - - if ( status->handle != blkif_handle ) - { - WPRINTK(" Invalid blkif: handle=%u\n", status->handle); - unexpected(status); - return; - } - - switch ( status->status ) - { - case BLKIF_INTERFACE_STATUS_CLOSED: - switch ( blkif_state ) - { - case BLKIF_STATE_CLOSED: - unexpected(status); - break; - case BLKIF_STATE_DISCONNECTED: - case BLKIF_STATE_CONNECTED: - unexpected(status); - blkif_close(); - break; - } - break; - - case BLKIF_INTERFACE_STATUS_DISCONNECTED: - switch ( blkif_state ) - { - case BLKIF_STATE_CLOSED: - blkif_disconnect(); - break; - case BLKIF_STATE_DISCONNECTED: - case BLKIF_STATE_CONNECTED: - /* unexpected(status); */ /* occurs during suspend/resume */ - blkif_reset(); - break; - } - break; - - case BLKIF_INTERFACE_STATUS_CONNECTED: - switch ( blkif_state ) - { - case BLKIF_STATE_CLOSED: - unexpected(status); - blkif_disconnect(); - blkif_connect(status); - break; - case BLKIF_STATE_DISCONNECTED: - blkif_connect(status); - break; - case BLKIF_STATE_CONNECTED: - unexpected(status); - blkif_connect(status); - break; - } - break; - - case BLKIF_INTERFACE_STATUS_CHANGED: - switch ( blkif_state ) - { - case BLKIF_STATE_CLOSED: - case BLKIF_STATE_DISCONNECTED: - unexpected(status); - break; - case BLKIF_STATE_CONNECTED: - vbd_update(); - break; - } - break; - - default: - WPRINTK(" Invalid blkif status: %d\n", status->status); - break; - } -} - - -static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) -{ - switch ( msg->subtype ) - { - case CMSG_BLKIF_FE_INTERFACE_STATUS: - blkif_status((blkif_fe_interface_status_t *) - &msg->msg[0]); - break; - default: - msg->length = 0; - break; - } - - ctrl_if_send_response(msg); -} - -int wait_for_blkif(void) + { + int shmem_ref; + shmem_ref = gnttab_claim_grant_reference(&gref_head, + gref_terminal); + ASSERT(shmem_ref != -ENOSPC); + gnttab_grant_foreign_access_ref(shmem_ref, + backend_id, + virt_to_machine(blk_ring.sring) + >> PAGE_SHIFT, 0); + err = xenbus_printf(dev->nodename, "grant-id","%u", shmem_ref); + if (err) { + message = "writing grant-id"; + goto abort_transaction; + } + } +#else + err = xenbus_printf(dev->nodename, "shared-frame", "%lu", + virt_to_machine(blk_ring.sring) >> PAGE_SHIFT); + if (err) { + message = "writing shared-frame"; + goto abort_transaction; + } +#endif + err = xenbus_printf(dev->nodename, + "event-channel", "%u", blkif_evtchn); + if (err) { + message = "writing event-channel"; + goto abort_transaction; + } + + info->watch.node = info->backend = backend; + info->watch.callback = watch_for_status; + + err = register_xenbus_watch(&info->watch); + if (err) { + message = "registering watch on backend"; + goto abort_transaction; + } + + err = xenbus_transaction_end(0); + if (err) { + xenbus_dev_error(dev, err, "completing transaction"); + goto destroy_blkring; + } + return 0; + +abort_transaction: + xenbus_transaction_end(1); + /* Have to do this *outside* transaction. */ + xenbus_dev_error(dev, err, "%s", message); +destroy_blkring: + if (blkif_vbds == 0) + blkif_free(); +free_backend: + kfree(backend); +out: + printk("%s:%u = %i\n", __FILE__, __LINE__, err); + return err; +} + +/* Setup supplies the backend dir, virtual device. + + We place an event channel and shared frame entries. + We watch backend to wait if it's ok. */ +static int blkfront_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + int err; + struct blkfront_info *info; + int vdevice; + + /* FIXME: Use dynamic device id if this is not set. */ + err = xenbus_scanf(dev->nodename, "virtual-device", "%i", &vdevice); + if (err < 0) { + xenbus_dev_error(dev, err, "reading virtual-device"); + return err; + } + + info = kmalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + xenbus_dev_error(dev, err, "allocating info structure"); + return err; + } + info->dev = dev; + info->vdevice = vdevice; + info->connected = 0; + /* Front end dir is a number, which is used as the id. */ + info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0); + dev->data = info; + + err = talk_to_backend(dev, info); + if (err) { + kfree(info); + return err; + } + + /* Call once in case entries already there. */ + watch_for_status(&info->watch, info->watch.node); + blkif_vbds++; + return 0; +} + +static int blkfront_remove(struct xenbus_device *dev) +{ + struct blkfront_info *info = dev->data; + + printk("blkfront_remove %s\n", dev->dev.bus_id); + if (info->backend) + unregister_xenbus_watch(&info->watch); + + if (info->connected) { + xlvbd_del(info->handle); + blkif_vbds_connected--; + } + kfree(info->backend); + kfree(info); + + if (--blkif_vbds == 0) + blkif_free(); + + printk("blkfront_remove done\n"); + return 0; +} + +static int blkfront_suspend(struct xenbus_device *dev) +{ + struct blkfront_info *info = dev->data; + + unregister_xenbus_watch(&info->watch); + kfree(info->backend); + info->backend = NULL; + + if (--blkif_vbds == 0) { + recovery = 1; + blkif_free(); + } + + return 0; +} + +static int blkfront_resume(struct xenbus_device *dev) +{ + struct blkfront_info *info = dev->data; + int err; + + /* FIXME: Check geometry hasn't changed here... */ + err = talk_to_backend(dev, info); + if (!err) { + if (blkif_vbds++ == 0) + blkif_recover(); + } + return err; +} + +static struct xenbus_driver blkfront = { + .name = __stringify(KBUILD_MODNAME), + .owner = THIS_MODULE, + .ids = blkfront_ids, + .probe = blkfront_probe, + .remove = blkfront_remove, + .resume = blkfront_resume, + .suspend = blkfront_suspend, +}; + +static void __init init_blk_xenbus(void) +{ + xenbus_register_device(&blkfront); +} + +static int wait_for_blkif(void) { int err = 0; int i; - send_driver_status(1); /* * We should read 'nr_interfaces' from response message and wait * for notifications before proceeding. For now we assume that we * will be notified of exactly one interface. */ - for ( i=0; (blkif_state != BLKIF_STATE_CONNECTED) && (i < 10*HZ); i++ ) + for ( i=0; blkif_state != BLKIF_STATE_CONNECTED && (i < 10*HZ); i++ ) { set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(1); @@ -1419,7 +1412,7 @@ return err; } -int __init xlblk_init(void) +static int __init xlblk_init(void) { int i; @@ -1443,27 +1436,11 @@ blk_shadow[i].req.id = i+1; blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; - (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx, - CALLBACK_IN_BLOCKING_CONTEXT); + init_blk_xenbus(); wait_for_blkif(); return 0; -} - -void blkdev_suspend(void) -{ -} - -void blkdev_resume(void) -{ -#ifdef CONFIG_XEN_BLKDEV_GRANT - int i, j; - for ( i = 0; i < BLK_RING_SIZE; i++ ) - for ( j = 0; j < BLKIF_MAX_SEGMENTS_PER_REQUEST; j++ ) - blk_shadow[i].req.frame_and_sects[j] |= GRANTREF_INVALID; -#endif - send_driver_status(1); } static void blkif_completion(struct blk_shadow *s) diff -r e355ae38c83b -r 7570087c2552 linux-2.6-xen-sparse/drivers/xen/blkfront/block.h --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Fri Aug 19 02:41:16 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Fri Aug 19 10:46:21 2005 @@ -100,6 +100,7 @@ struct xlbd_disk_info { int xd_device; + blkif_vdev_t handle; struct xlbd_major_info *mi; #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) struct xlbd_disk_info *next_waiting; @@ -119,17 +120,10 @@ unsigned command, unsigned long argument); extern int blkif_check(dev_t dev); extern int blkif_revalidate(dev_t dev); -extern void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp); -#ifdef CONFIG_XEN_BLKDEV_GRANT -extern void blkif_control_probe_send( - blkif_request_t *req, blkif_response_t *rsp, unsigned long address); -#endif extern void do_blkif_request (request_queue_t *rq); -extern void xlvbd_update_vbds(void); - /* Virtual block-device subsystem. */ -extern int xlvbd_init(void); -extern void xlvbd_cleanup(void); - +int xlvbd_add(blkif_sector_t capacity, int device, blkif_vdev_t handle, + u16 info, u16 sector_size); +void xlvbd_del(blkif_vdev_t handle); #endif /* __XEN_DRIVERS_BLOCK_H__ */ diff -r e355ae38c83b -r 7570087c2552 linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Fri Aug 19 02:41:16 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Fri Aug 19 10:46:21 2005 @@ -46,8 +46,9 @@ struct lvdisk { blkif_sector_t capacity; /* 0: Size in terms of 512-byte sectors. */ - blkif_vdev_t device; /* 8: Device number (opaque 16 bit value). */ - u16 info; + blkif_vdev_t handle; /* 8: Device number (opaque 16 bit value). */ + u16 info; + dev_t dev; struct list_head list; }; @@ -85,7 +86,7 @@ /* Information about our VBDs. */ #define MAX_VBDS 64 -struct list_head vbds_list; +static LIST_HEAD(vbds_list); #define MAJOR_XEN(dev) ((dev)>>8) #define MINOR_XEN(dev) ((dev) & 0xff) @@ -116,49 +117,6 @@ { list_del(&disk->list); kfree(disk); -} - -static vdisk_t *xlvbd_probe(int *ret) -{ - blkif_response_t rsp; - blkif_request_t req; - vdisk_t *disk_info = NULL; - unsigned long buf; - int nr; - - buf = __get_free_page(GFP_KERNEL); - if ((void *)buf == NULL) - goto out; - - memset(&req, 0, sizeof(req)); - req.operation = BLKIF_OP_PROBE; - req.nr_segments = 1; -#ifdef CONFIG_XEN_BLKDEV_GRANT - blkif_control_probe_send(&req, &rsp, - (unsigned long)(virt_to_machine(buf))); -#else - req.frame_and_sects[0] = blkif_fas(virt_to_machine(buf), 0, (PAGE_SIZE/512)-1); - - blkif_control_send(&req, &rsp); -#endif - if ( rsp.status <= 0 ) { - WPRINTK("Could not probe disks (%d)\n", rsp.status); - goto out; - } - nr = rsp.status; - if ( nr > MAX_VBDS ) - nr = MAX_VBDS; - - disk_info = kmalloc(nr * sizeof(vdisk_t), GFP_KERNEL); - if (disk_info != NULL) - memcpy(disk_info, (void *) buf, nr * sizeof(vdisk_t)); - - if (ret != NULL) - *ret = nr; - -out: - free_page(buf); - return disk_info; } static struct xlbd_major_info *xlbd_alloc_major_info( @@ -189,6 +147,7 @@ break; } + printk("Registering block device major %i\n", ptr->major); if (register_blkdev(ptr->major, ptr->type->devname)) { WPRINTK("can't get major %d with name %s\n", ptr->major, ptr->type->devname); @@ -231,7 +190,7 @@ xlbd_alloc_major_info(major, minor, index)); } -static int xlvbd_init_blk_queue(struct gendisk *gd, vdisk_t *disk) +static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) { request_queue_t *rq; @@ -242,7 +201,7 @@ elevator_init(rq, "noop"); /* Hard sector size and max sectors impersonate the equiv. hardware. */ - blk_queue_hardsect_size(rq, disk->sector_size); + blk_queue_hardsect_size(rq, sector_size); blk_queue_max_sectors(rq, 512); /* Each segment in a request is up to an aligned page in size. */ @@ -261,8 +220,9 @@ return 0; } -struct gendisk *xlvbd_alloc_gendisk( - struct xlbd_major_info *mi, int minor, vdisk_t *disk) +static struct gendisk *xlvbd_alloc_gendisk( + struct xlbd_major_info *mi, int minor, blkif_sector_t capacity, + int device, blkif_vdev_t handle, u16 info, u16 sector_size) { struct gendisk *gd; struct xlbd_disk_info *di; @@ -273,7 +233,8 @@ return NULL; memset(di, 0, sizeof(*di)); di->mi = mi; - di->xd_device = disk->device; + di->xd_device = device; + di->handle = handle; if ((minor & ((1 << mi->type->partn_shift) - 1)) == 0) nr_minors = 1 << mi->type->partn_shift; @@ -296,22 +257,22 @@ gd->first_minor = minor; gd->fops = &xlvbd_block_fops; gd->private_data = di; - set_capacity(gd, disk->capacity); - - if (xlvbd_init_blk_queue(gd, disk)) { + set_capacity(gd, capacity); + + if (xlvbd_init_blk_queue(gd, sector_size)) { del_gendisk(gd); goto out; } di->rq = gd->queue; - if (disk->info & VDISK_READONLY) + if (info & VDISK_READONLY) set_disk_ro(gd, 1); - if (disk->info & VDISK_REMOVABLE) + if (info & VDISK_REMOVABLE) gd->flags |= GENHD_FL_REMOVABLE; - if (disk->info & VDISK_CDROM) + if (info & VDISK_CDROM) gd->flags |= GENHD_FL_CD; add_disk(gd); @@ -323,38 +284,36 @@ return NULL; } -static int xlvbd_device_add(struct list_head *list, vdisk_t *disk) +int xlvbd_add(blkif_sector_t capacity, int device, blkif_vdev_t handle, + u16 info, u16 sector_size) { struct lvdisk *new; - int minor; - dev_t device; struct block_device *bd; struct gendisk *gd; struct xlbd_major_info *mi; - mi = xlbd_get_major_info(disk->device); + mi = xlbd_get_major_info(device); if (mi == NULL) return -EPERM; new = xlvbd_device_alloc(); if (new == NULL) - return -1; - new->capacity = disk->capacity; - new->device = disk->device; - new->info = disk->info; - - minor = MINOR_XEN(disk->device); - device = MKDEV(mi->major, minor); - - bd = bdget(device); + return -ENOMEM; + new->capacity = capacity; + new->info = info; + new->handle = handle; + new->dev = MKDEV(MAJOR_XEN(device), MINOR_XEN(device)); + + bd = bdget(new->dev); if (bd == NULL) goto out; - gd = xlvbd_alloc_gendisk(mi, minor, disk); + gd = xlvbd_alloc_gendisk(mi, MINOR_XEN(device), capacity, device, handle, + info, sector_size); if (gd == NULL) goto out_bd; - list_add(&new->list, list); + list_add(&new->list, &vbds_list); out_bd: bdput(bd); out: @@ -363,27 +322,26 @@ static int xlvbd_device_del(struct lvdisk *disk) { - dev_t device; struct block_device *bd; struct gendisk *gd; struct xlbd_disk_info *di; int ret = 0, unused; request_queue_t *rq; - device = MKDEV(MAJOR_XEN(disk->device), MINOR_XEN(disk->device)); - - bd = bdget(device); + bd = bdget(disk->dev); if (bd == NULL) return -1; - gd = get_gendisk(device, &unused); + gd = get_gendisk(disk->dev, &unused); di = gd->private_data; +#if 0 /* This is wrong: hda and hdb share same major, for example. */ if (di->mi->usage != 0) { - WPRINTK("disk removal failed: used [dev=%x]\n", device); + WPRINTK("disk removal failed: used [dev=%x]\n", disk->dev); ret = -1; goto out; } +#endif rq = gd->queue; del_gendisk(gd); @@ -391,110 +349,19 @@ blk_cleanup_queue(rq); xlvbd_device_free(disk); -out: bdput(bd); return ret; } -static int xlvbd_device_update(struct lvdisk *ldisk, vdisk_t *disk) -{ - dev_t device; - struct block_device *bd; - struct gendisk *gd; - int unused; - - if ((ldisk->capacity == disk->capacity) && (ldisk->info == disk->info)) - return 0; - - device = MKDEV(MAJOR_XEN(ldisk->device), MINOR_XEN(ldisk->device)); - - bd = bdget(device); - if (bd == NULL) - return -1; - - gd = get_gendisk(device, &unused); - set_capacity(gd, disk->capacity); - ldisk->capacity = disk->capacity; - - bdput(bd); - - return 0; -} - -void xlvbd_refresh(void) -{ - vdisk_t *newdisks; - struct list_head *tmp, *tmp2; - struct lvdisk *disk; - int i, nr; - - newdisks = xlvbd_probe(&nr); - if (newdisks == NULL) { - WPRINTK("failed to probe\n"); - return; - } - - i = 0; - list_for_each_safe(tmp, tmp2, &vbds_list) { - disk = list_entry(tmp, struct lvdisk, list); - - for (i = 0; i < nr; i++) { - if ( !newdisks[i].device ) - continue; - if ( disk->device == newdisks[i].device ) { - xlvbd_device_update(disk, &newdisks[i]); - newdisks[i].device = 0; - break; - } - } - if (i == nr) { - xlvbd_device_del(disk); - newdisks[i].device = 0; - } - } - for (i = 0; i < nr; i++) - if ( newdisks[i].device ) - xlvbd_device_add(&vbds_list, &newdisks[i]); - kfree(newdisks); -} - -/* - * xlvbd_update_vbds - reprobes the VBD status and performs updates driver - * state. The VBDs need to be updated in this way when the domain is - * initialised and also each time we receive an XLBLK_UPDATE event. - */ -void xlvbd_update_vbds(void) -{ - xlvbd_refresh(); -} - -/* - * Set up all the linux device goop for the virtual block devices - * (vbd's) that we know about. Note that although from the backend - * driver's p.o.v. VBDs are addressed simply an opaque 16-bit device - * number, the domain creation tools conventionally allocate these - * numbers to correspond to those used by 'real' linux -- this is just - * for convenience as it means e.g. that the same /etc/fstab can be - * used when booting with or without Xen. - */ -int xlvbd_init(void) -{ - int i, nr; - vdisk_t *disks; - - INIT_LIST_HEAD(&vbds_list); - - memset(major_info, 0, sizeof(major_info)); - - disks = xlvbd_probe(&nr); - if (disks == NULL) { - WPRINTK("failed to probe\n"); - return -1; - } - - for (i = 0; i < nr; i++) - xlvbd_device_add(&vbds_list, &disks[i]); - - kfree(disks); - return 0; -} +void xlvbd_del(blkif_vdev_t handle) +{ + struct lvdisk *i; + + list_for_each_entry(i, &vbds_list, list) { + if (i->handle == handle) { + xlvbd_device_del(i); + return; + } + } + BUG(); +} diff -r e355ae38c83b -r 7570087c2552 xen/include/public/io/blkif.h --- a/xen/include/public/io/blkif.h Fri Aug 19 02:41:16 2005 +++ b/xen/include/public/io/blkif.h Fri Aug 19 10:46:21 2005 @@ -18,7 +18,6 @@ #define BLKIF_OP_READ 0 #define BLKIF_OP_WRITE 1 -#define BLKIF_OP_PROBE 2 /* NB. Ring size must be small enough for sizeof(blkif_ring_t) <= PAGE_SIZE. */ #define BLKIF_RING_SIZE 64 @@ -33,7 +32,7 @@ typedef struct blkif_request { u8 operation; /* BLKIF_OP_??? */ u8 nr_segments; /* number of segments */ - blkif_vdev_t device; /* only for read/write requests */ + blkif_vdev_t handle; /* only for read/write requests */ unsigned long id; /* private guest value, echoed in resp */ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ /* @f_a_s[4:0]=last_sect ; @f_a_s[9:5]=first_sect */ @@ -71,31 +70,8 @@ DEFINE_RING_TYPES(blkif, blkif_request_t, blkif_response_t); -/* - * BLKIF_OP_PROBE: - * The request format for a probe request is constrained as follows: - * @operation == BLKIF_OP_PROBE - * @nr_segments == size of probe buffer in pages - * @device == unused (zero) - * @id == any value (echoed in response message) - * @sector_num == unused (zero) - * @frame_and_sects == list of page-sized buffers. - * (i.e., @first_sect == 0, @last_sect == 7). - * - * The response is a list of vdisk_t elements copied into the out-of-band - * probe buffer. On success the response status field contains the number - * of vdisk_t elements. - */ - #define VDISK_CDROM 0x1 #define VDISK_REMOVABLE 0x2 #define VDISK_READONLY 0x4 -typedef struct vdisk { - blkif_sector_t capacity; /* Size in terms of 512-byte sectors. */ - blkif_vdev_t device; /* Device number (opaque 16 bit value). */ - u16 info; /* Device type and flags (VDISK_*). */ - u16 sector_size; /* Minimum alignment for requests. */ -} vdisk_t; /* 16 bytes */ - #endif /* __XEN_PUBLIC_IO_BLKIF_H__ */ diff -r e355ae38c83b -r 7570087c2552 linux-2.6-xen-sparse/drivers/xen/blkback/control.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/control.c Fri Aug 19 02:41:16 2005 +++ /dev/null Fri Aug 19 10:46:21 2005 @@ -1,61 +0,0 @@ -/****************************************************************************** - * arch/xen/drivers/blkif/backend/control.c - * - * Routines for interfacing with the control plane. - * - * Copyright (c) 2004, Keir Fraser - */ - -#include "common.h" - -static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) -{ - DPRINTK("Received blkif backend message, subtype=%d\n", msg->subtype); - - switch ( msg->subtype ) - { - case CMSG_BLKIF_BE_CREATE: - blkif_create((blkif_be_create_t *)&msg->msg[0]); - break; - case CMSG_BLKIF_BE_DESTROY: - blkif_destroy((blkif_be_destroy_t *)&msg->msg[0]); - break; - case CMSG_BLKIF_BE_CONNECT: - blkif_connect((blkif_be_connect_t *)&msg->msg[0]); - break; - case CMSG_BLKIF_BE_DISCONNECT: - if ( !blkif_disconnect((blkif_be_disconnect_t *)&msg->msg[0],msg->id) ) - return; /* Sending the response is deferred until later. */ - break; - case CMSG_BLKIF_BE_VBD_CREATE: - vbd_create((blkif_be_vbd_create_t *)&msg->msg[0]); - break; - case CMSG_BLKIF_BE_VBD_DESTROY: - vbd_destroy((blkif_be_vbd_destroy_t *)&msg->msg[0]); - break; - default: - DPRINTK("Parse error while reading message subtype %d, len %d\n", - msg->subtype, msg->length); - msg->length = 0; - break; - } - - ctrl_if_send_response(msg); -} - -void blkif_ctrlif_init(void) -{ - ctrl_msg_t cmsg; - blkif_be_driver_status_t st; - - (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx, - CALLBACK_IN_BLOCKING_CONTEXT); - - /* Send a driver-UP notification to the domain controller. */ - cmsg.type = CMSG_BLKIF_BE; - cmsg.subtype = CMSG_BLKIF_BE_DRIVER_STATUS; - cmsg.length = sizeof(blkif_be_driver_status_t); - st.status = BLKIF_DRIVER_STATUS_UP; - memcpy(cmsg.msg, &st, sizeof(st)); - ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); -} _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |