[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] Add grant table support to block tap.
# HG changeset patch # User akw27@xxxxxxxxxxxxxxxxxxxxxx # Node ID eaf498f1ffdef1d63ef9df03f1d8ea749227d183 # Parent 0237746ecf92423a1b948836902f857c4cc3ddd3 Add grant table support to block tap. This patch adds grant table support to the block tap. The AIO support introduced in patch 9f0eff879d8913a824280cf67658a530c80e8424 still works -- The tap code maps a granted page twice, once in kernel and once in user. The kernel page is patched into the p2m table and pages added to the user vm_area are mapped to the appropriate underlying struct pages using the VM_FOREIGN hooks in get_user_pages(). Comparing block IO from dom0 to the existing block backend, and to the tap managing the same partition as the BE from user space with AIO, I get the following performance: Version 1.03 ------Sequential Output------ --Sequential Input- --Random- -Per Chr- --Block-- -Rewrite- -Per Chr- --Block-- --Seeks-- Machine Size K/sec %CP K/sec %CP K/sec %CP K/sec %CP K/sec %CP /sec %CP xen0 2G 31198 95 56818 8 20967 2 28415 77 59595 4 264.9 0 xenU-blkbe2cpuGT 2G 31157 96 54026 10 25585 4 30664 90 64919 7 292.7 0 xenU-blktp2cpuGT 2G 32313 97 54217 8 20950 3 28117 87 65924 4 191.8 0 Signed-off-by: andrew.warfield@xxxxxxxxxxxx diff -r 0237746ecf92 -r eaf498f1ffde linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Tue Aug 16 07:07:11 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Tue Aug 16 10:12:18 2005 @@ -23,6 +23,9 @@ blkif_be_driver_status_t be_st; printk(KERN_INFO "Initialising Xen block tap device\n"); +#ifdef CONFIG_XEN_BLKDEV_GRANT + printk(KERN_INFO "Block tap is using grant tables.\n"); +#endif DPRINTK(" tap - Backend connection init:\n"); diff -r 0237746ecf92 -r eaf498f1ffde linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h Tue Aug 16 07:07:11 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h Tue Aug 16 10:12:18 2005 @@ -85,6 +85,11 @@ spinlock_t blk_ring_lock; atomic_t refcnt; struct work_struct work; +#ifdef CONFIG_XEN_BLKDEV_GRANT + u16 shmem_handle; + memory_t shmem_vaddr; + grant_ref_t shmem_ref; +#endif } blkif_t; blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle); diff -r 0237746ecf92 -r eaf498f1ffde linux-2.6-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c Tue Aug 16 07:07:11 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c Tue Aug 16 10:12:18 2005 @@ -9,6 +9,7 @@ */ #include "blktap.h" +#include <asm-xen/evtchn.h> static char *blkif_state_name[] = { [BLKIF_STATE_CLOSED] = "closed", @@ -48,12 +49,21 @@ blkif_t *blkif = (blkif_t *)arg; ctrl_msg_t cmsg; blkif_be_disconnect_t disc; +#ifdef CONFIG_XEN_BLKDEV_GRANT + struct gnttab_unmap_grant_ref op; +#endif /* * These can't be done in blkif_disconnect() because at that point there * may be outstanding requests at the disc whose asynchronous responses * must still be notified to the remote driver. */ +#ifdef CONFIG_XEN_BLKDEV_GRANT + op.host_addr = blkif->shmem_vaddr; + op.handle = blkif->shmem_handle; + op.dev_bus_addr = 0; + BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); +#endif vfree(blkif->blk_ring.sring); /* Construct the deferred response message. */ @@ -177,8 +187,12 @@ unsigned int evtchn = connect->evtchn; unsigned long shmem_frame = connect->shmem_frame; struct vm_struct *vma; +#ifdef CONFIG_XEN_BLKDEV_GRANT + int ref = connect->shmem_ref; +#else pgprot_t prot; int error; +#endif blkif_t *blkif; blkif_sring_t *sring; @@ -199,24 +213,46 @@ return; } - prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED); +#ifndef CONFIG_XEN_BLKDEV_GRANT + prot = __pgprot(_KERNPG_TABLE); error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr), shmem_frame<<PAGE_SHIFT, PAGE_SIZE, prot, domid); if ( error != 0 ) { - WPRINTK("BE_CONNECT: error! (%d)\n", error); if ( error == -ENOMEM ) connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; - else if ( error == -EFAULT ) { + else if ( error == -EFAULT ) connect->status = BLKIF_BE_STATUS_MAPPING_ERROR; - WPRINTK("BE_CONNECT: MAPPING error!\n"); - } else connect->status = BLKIF_BE_STATUS_ERROR; vfree(vma->addr); return; } +#else + { /* Map: Use the Grant table reference */ + struct gnttab_map_grant_ref op; + op.host_addr = VMALLOC_VMADDR(vma->addr); + op.flags = GNTMAP_host_map; + op.ref = ref; + op.dom = domid; + + BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) ); + + handle = op.handle; + + if (op.handle < 0) { + DPRINTK(" Grant table operation failure !\n"); + connect->status = BLKIF_BE_STATUS_MAPPING_ERROR; + vfree(vma->addr); + return; + } + + blkif->shmem_ref = ref; + blkif->shmem_handle = handle; + blkif->shmem_vaddr = VMALLOC_VMADDR(vma->addr); + } +#endif if ( blkif->status != DISCONNECTED ) { diff -r 0237746ecf92 -r eaf498f1ffde linux-2.6-xen-sparse/drivers/xen/blktap/blktap_userdev.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_userdev.c Tue Aug 16 07:07:11 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_userdev.c Tue Aug 16 10:12:18 2005 @@ -21,6 +21,9 @@ #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm-xen/xen-public/io/blkif.h> /* for control ring. */ +#ifdef CONFIG_XEN_BLKDEV_GRANT +#include <asm-xen/xen-public/grant_table.h> +#endif #include "blktap.h" @@ -42,6 +45,7 @@ /* local prototypes */ static int blktap_read_fe_ring(void); static int blktap_read_be_ring(void); + /* -------[ mmap region ]--------------------------------------------- */ /* @@ -73,7 +77,28 @@ ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \ ((_seg) * PAGE_SIZE)) - +/* -------[ grant handles ]------------------------------------------- */ + +#ifdef CONFIG_XEN_BLKDEV_GRANT +/* When using grant tables to map a frame for device access then the + * handle returned must be used to unmap the frame. This is needed to + * drop the ref count on the frame. + */ +struct grant_handle_pair +{ + u16 kernel; + u16 user; +}; +static struct grant_handle_pair pending_grant_handles[MMAP_PAGES]; +#define pending_handle(_idx, _i) \ + (pending_grant_handles[((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)]) +#define BLKTAP_INVALID_HANDLE(_g) \ + (((_g->kernel) == 0xFFFF) && ((_g->user) == 0xFFFF)) +#define BLKTAP_INVALIDATE_HANDLE(_g) do { \ + (_g)->kernel = 0xFFFF; (_g)->user = 0xFFFF; \ + } while(0) + +#endif /* -------[ blktap vm ops ]------------------------------------------- */ @@ -348,9 +373,43 @@ /*-----[ Data to/from user space ]----------------------------------------*/ - static void fast_flush_area(int idx, int nr_pages) { +#ifdef CONFIG_XEN_BLKDEV_GRANT + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; + unsigned int i, op = 0; + struct grant_handle_pair *handle; + unsigned long ptep; + + for (i=0; i<nr_pages; i++) + { + handle = &pending_handle(idx, i); + if (!BLKTAP_INVALID_HANDLE(handle)) + { + + unmap[op].host_addr = MMAP_VADDR(mmap_vstart, idx, i); + unmap[op].dev_bus_addr = 0; + unmap[op].handle = handle->kernel; + op++; + + if (create_lookup_pte_addr(blktap_vma->vm_mm, + MMAP_VADDR(user_vstart, idx, i), + &ptep) !=0) { + DPRINTK("Couldn't get a pte addr!\n"); + return; + } + unmap[op].host_addr = ptep; + unmap[op].dev_bus_addr = 0; + unmap[op].handle = handle->user; + op++; + + BLKTAP_INVALIDATE_HANDLE(handle); + } + } + if ( unlikely(HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, unmap, op))) + BUG(); +#else multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST]; int i; @@ -363,21 +422,22 @@ mcl[nr_pages-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) ) BUG(); -} - - -extern int __direct_remap_area_pages(struct mm_struct *mm, - unsigned long address, - unsigned long size, - mmu_update_t *v); +#endif +} + int blktap_write_fe_ring(blkif_request_t *req) { blkif_request_t *target; - int i; + int i, ret = 0; +#ifdef CONFIG_XEN_BLKDEV_GRANT + struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; + int op; +#else unsigned long remap_prot; multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST+1]; mmu_update_t mmu[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +#endif /* * This is called to pass a request from the real frontend domain's @@ -394,18 +454,109 @@ return 0; } - remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW; flush_cache_all(); /* a noop on intel... */ target = RING_GET_REQUEST(&blktap_ufe_ring, blktap_ufe_ring.req_prod_pvt); memcpy(target, req, sizeof(*req)); /* Map the foreign pages directly in to the application */ +#ifdef CONFIG_XEN_BLKDEV_GRANT + op = 0; + for (i=0; i<target->nr_segments; i++) { + + unsigned long uvaddr; + unsigned long kvaddr; + unsigned long ptep; + + uvaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(req->id), i); + kvaddr = MMAP_VADDR(mmap_vstart, ID_TO_IDX(req->id), i); + + /* Map the remote page to kernel. */ + map[op].host_addr = kvaddr; + map[op].dom = ID_TO_DOM(req->id); + map[op].ref = blkif_gref_from_fas(target->frame_and_sects[i]); + map[op].flags = GNTMAP_host_map; + /* This needs a bit more thought in terms of interposition: + * If we want to be able to modify pages during write using + * grant table mappings, the guest will either need to allow + * it, or we'll need to incur a copy. */ + if (req->operation == BLKIF_OP_WRITE) + map[op].flags |= GNTMAP_readonly; + op++; + + /* Now map it to user. */ + ret = create_lookup_pte_addr(blktap_vma->vm_mm, uvaddr, &ptep); + if (ret) + { + DPRINTK("Couldn't get a pte addr!\n"); + goto fail; + } + + map[op].host_addr = ptep; + map[op].dom = ID_TO_DOM(req->id); + map[op].ref = blkif_gref_from_fas(target->frame_and_sects[i]); + map[op].flags = GNTMAP_host_map | GNTMAP_application_map + | GNTMAP_contains_pte; + /* Above interposition comment applies here as well. */ + if (req->operation == BLKIF_OP_WRITE) + map[op].flags |= GNTMAP_readonly; + op++; + } + + if ( unlikely(HYPERVISOR_grant_table_op( + GNTTABOP_map_grant_ref, map, op))) + BUG(); + + op = 0; + for (i=0; i<(target->nr_segments*2); i+=2) { + unsigned long uvaddr; + unsigned long kvaddr; + unsigned long offset; + int cancel = 0; + + uvaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(req->id), i/2); + kvaddr = MMAP_VADDR(mmap_vstart, ID_TO_IDX(req->id), i/2); + + if ( unlikely(map[i].handle < 0) ) { + DPRINTK("Error on kernel grant mapping (%d)\n", map[i].handle); + ret = map[i].handle; + cancel = 1; + } + + if ( unlikely(map[i+1].handle < 0) ) { + DPRINTK("Error on user grant mapping (%d)\n", map[i+1].handle); + ret = map[i+1].handle; + cancel = 1; + } + + if (cancel) + goto fail; + + /* Set the necessary mappings in p2m and in the VM_FOREIGN + * vm_area_struct to allow user vaddr -> struct page lookups + * to work. This is needed for direct IO to foreign pages. */ + phys_to_machine_mapping[__pa(kvaddr)>>PAGE_SHIFT] = + FOREIGN_FRAME(map[i].dev_bus_addr); + + offset = (uvaddr - blktap_vma->vm_start) >> PAGE_SHIFT; + ((struct page **)blktap_vma->vm_private_data)[offset] = + pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); + + /* Save handles for unmapping later. */ + pending_handle(ID_TO_IDX(req->id), i/2).kernel = map[i].handle; + pending_handle(ID_TO_IDX(req->id), i/2).user = map[i+1].handle; + } + +#else + + remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW; + for (i=0; i<target->nr_segments; i++) { unsigned long buf; unsigned long uvaddr; unsigned long kvaddr; unsigned long offset; + unsigned long ptep; buf = target->frame_and_sects[i] & PAGE_MASK; uvaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(req->id), i); @@ -421,10 +572,14 @@ phys_to_machine_mapping[__pa(kvaddr)>>PAGE_SHIFT] = FOREIGN_FRAME(buf >> PAGE_SHIFT); - __direct_remap_area_pages(blktap_vma->vm_mm, - uvaddr, - PAGE_SIZE, - &mmu[i]); + ret = create_lookup_pte_addr(blktap_vma->vm_mm, uvaddr, &ptep); + if (ret) + { + DPRINTK("error getting pte\n"); + goto fail; + } + + mmu[i].ptr = ptep; mmu[i].val = (target->frame_and_sects[i] & PAGE_MASK) | pgprot_val(blktap_vma->vm_page_prot); @@ -448,16 +603,17 @@ if ( unlikely(mcl[i].result != 0) ) { DPRINTK("invalid buffer -- could not remap it\n"); - fast_flush_area(ID_TO_IDX(req->id), target->nr_segments); - return -1; + ret = mcl[i].result; + goto fail; } } if ( unlikely(mcl[i].result != 0) ) { DPRINTK("direct remapping of pages to /dev/blktap failed.\n"); - return -1; - } - + ret = mcl[i].result; + goto fail; + } +#endif /* CONFIG_XEN_BLKDEV_GRANT */ /* Mark mapped pages as reserved: */ for ( i = 0; i < target->nr_segments; i++ ) @@ -472,6 +628,10 @@ blktap_ufe_ring.req_prod_pvt++; return 0; + + fail: + fast_flush_area(ID_TO_IDX(req->id), target->nr_segments); + return ret; } int blktap_write_be_ring(blkif_response_t *rsp) @@ -538,11 +698,10 @@ map[offset] = NULL; } - + fast_flush_area(ID_TO_IDX(resp_s->id), ar->nr_pages); zap_page_range(blktap_vma, MMAP_VADDR(user_vstart, ID_TO_IDX(resp_s->id), 0), ar->nr_pages << PAGE_SHIFT, NULL); - fast_flush_area(ID_TO_IDX(resp_s->id), ar->nr_pages); write_resp_to_fe_ring(blkif, resp_s); blktap_ufe_ring.rsp_cons = i + 1; kick_fe_domain(blkif); @@ -616,10 +775,16 @@ int blktap_init(void) { - int err; + int err, i, j; if ( (mmap_vstart = allocate_empty_lowmem_region(MMAP_PAGES)) == 0 ) BUG(); + +#ifdef CONFIG_XEN_BLKDEV_GRANT + for (i=0; i<MAX_PENDING_REQS ; i++) + for (j=0; j<BLKIF_MAX_SEGMENTS_PER_REQUEST; j++) + BLKTAP_INVALIDATE_HANDLE(&pending_handle(i, j)); +#endif err = misc_register(&blktap_miscdev); if ( err != 0 ) _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |