[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] Merge
# HG changeset patch # User kaf24@xxxxxxxxxxxxxxxxxxxx # Node ID 8004acaa668454c75c4d02d634b2af3a84f6f8c1 # Parent 43f424818d6ef3d3c877774b03e39fe47c8c094a # Parent 9f0eff879d8913a824280cf67658a530c80e8424 Merge diff -r 43f424818d6e -r 8004acaa6684 linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Thu Aug 4 16:53:11 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Thu Aug 4 16:53:30 2005 @@ -137,7 +137,7 @@ blkif_control_probe_send(&req, &rsp, (unsigned long)(virt_to_machine(buf))); #else - req.frame_and_sects[0] = blkif_fas(virt_to_machine(buf), 0, ((PAGE_SIZE/512)-1); + req.frame_and_sects[0] = blkif_fas(virt_to_machine(buf), 0, (PAGE_SIZE/512)-1); blkif_control_send(&req, &rsp); #endif diff -r 43f424818d6e -r 8004acaa6684 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h Thu Aug 4 16:53:11 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h Thu Aug 4 16:53:30 2005 @@ -103,8 +103,6 @@ blkif_t *blkif; unsigned long id; int nr_pages; - unsigned long mach_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - unsigned long virt_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST]; int next_free; } active_req_t; @@ -172,32 +170,7 @@ /* -------[ Mappings to User VMA ]------------------------------------ */ -#define MAX_PENDING_REQS 64 #define BATCH_PER_DOMAIN 16 -extern struct vm_area_struct *blktap_vma; - -/* The following are from blkback.c and should probably be put in a - * header and included from there. - * The mmap area described here is where attached data pages eill be mapped. - */ - -extern unsigned long mmap_vstart; -#define MMAP_PAGES_PER_REQUEST \ - (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1) -#define MMAP_PAGES \ - (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST) -#define MMAP_VADDR(_req,_seg) \ - (mmap_vstart + \ - ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \ - ((_seg) * PAGE_SIZE)) - -/* immediately before the mmap area, we have a bunch of pages reserved - * for shared memory rings. - */ - -#define RING_PAGES 3 /* Ctrl, Front, and Back */ -extern unsigned long rings_vstart; - /* -------[ Here be globals ]----------------------------------------- */ extern unsigned long blktap_mode; diff -r 43f424818d6e -r 8004acaa6684 linux-2.6-xen-sparse/drivers/xen/blktap/blktap_datapath.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_datapath.c Thu Aug 4 16:53:11 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_datapath.c Thu Aug 4 16:53:30 2005 @@ -280,8 +280,6 @@ int more_to_do = 0; int notify_be = 0, notify_user = 0; - if (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) return 1; - /* lock both rings */ spin_lock_irqsave(&blkif_io_lock, flags); diff -r 43f424818d6e -r 8004acaa6684 linux-2.6-xen-sparse/drivers/xen/blktap/blktap_userdev.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_userdev.c Thu Aug 4 16:53:11 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_userdev.c Thu Aug 4 16:53:30 2005 @@ -19,6 +19,7 @@ #include <linux/gfp.h> #include <linux/poll.h> #include <asm/pgalloc.h> +#include <asm/tlbflush.h> #include <asm-xen/xen-public/io/blkif.h> /* for control ring. */ #include "blktap.h" @@ -32,11 +33,6 @@ /* for poll: */ static wait_queue_head_t blktap_wait; - -/* Where things are inside the device mapping. */ -struct vm_area_struct *blktap_vma = NULL; -unsigned long mmap_vstart; -unsigned long rings_vstart; /* Rings up to user space. */ static blkif_front_ring_t blktap_ufe_ring; @@ -47,6 +43,39 @@ static int blktap_read_fe_ring(void); static int blktap_read_be_ring(void); +/* -------[ mmap region ]--------------------------------------------- */ +/* + * We use a big chunk of address space to map in-flight requests into, + * and export this region up to user-space. See the comments in blkback + * about this -- the two must be kept in sync if the tap is used as a + * passthrough. + */ + +#define MAX_PENDING_REQS 64 + +/* immediately before the mmap area, we have a bunch of pages reserved + * for shared memory rings. + */ +#define RING_PAGES 3 /* Ctrl, Front, and Back */ + +/* Where things are inside the device mapping. */ +struct vm_area_struct *blktap_vma = NULL; +unsigned long mmap_vstart; /* Kernel pages for mapping in data. */ +unsigned long rings_vstart; /* start of mmaped vma */ +unsigned long user_vstart; /* start of user mappings */ + +#define MMAP_PAGES_PER_REQUEST \ + (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1) +#define MMAP_PAGES \ + (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST) +#define MMAP_VADDR(_start, _req,_seg) \ + ( _start + \ + ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \ + ((_seg) * PAGE_SIZE)) + + + + /* -------[ blktap vm ops ]------------------------------------------- */ static struct page *blktap_nopage(struct vm_area_struct *vma, @@ -76,8 +105,6 @@ if ( test_and_set_bit(0, &blktap_dev_inuse) ) return -EBUSY; - - printk(KERN_ALERT "blktap open.\n"); /* Allocate the ctrl ring. */ csring = (ctrl_sring_t *)get_zeroed_page(GFP_KERNEL); @@ -128,7 +155,7 @@ blktap_dev_inuse = 0; blktap_ring_ok = 0; - printk(KERN_ALERT "blktap closed.\n"); + DPRINTK(KERN_ALERT "blktap closed.\n"); /* Free the ring page. */ ClearPageReserved(virt_to_page(blktap_uctrl_ring.sring)); @@ -140,7 +167,7 @@ ClearPageReserved(virt_to_page(blktap_ube_ring.sring)); free_page((unsigned long) blktap_ube_ring.sring); - /* Clear any active mappings. */ + /* Clear any active mappings and free foreign map table */ if (blktap_vma != NULL) { zap_page_range(blktap_vma, blktap_vma->vm_start, blktap_vma->vm_end - blktap_vma->vm_start, NULL); @@ -151,21 +178,36 @@ } /* Note on mmap: - * remap_pfn_range sets VM_IO on vma->vm_flags. In trying to make libaio - * work to do direct page access from userspace, this ended up being a - * problem. The bigger issue seems to be that there is no way to map - * a foreign page in to user space and have the virtual address of that - * page map sanely down to a mfn. - * Removing the VM_IO flag results in a loop in get_user_pages, as - * pfn_valid() always fails on a foreign page. + * We need to map pages to user space in a way that will allow the block + * subsystem set up direct IO to them. This couldn't be done before, because + * there isn't really a sane way to make a user virtual address down to a + * physical address when the page belongs to another domain. + * + * My first approach was to map the page in to kernel memory, add an entry + * for it in the physical frame list (using alloc_lomem_region as in blkback) + * and then attempt to map that page up to user space. This is disallowed + * by xen though, which realizes that we don't really own the machine frame + * underlying the physical page. + * + * The new approach is to provide explicit support for this in xen linux. + * The VMA now has a flag, VM_FOREIGN, to indicate that it contains pages + * mapped from other vms. vma->vm_private_data is set up as a mapping + * from pages to actual page structs. There is a new clause in get_user_pages + * that does the right thing for this sort of mapping. + * + * blktap_mmap sets up this mapping. Most of the real work is done in + * blktap_write_fe_ring below. */ static int blktap_mmap(struct file *filp, struct vm_area_struct *vma) { int size; - - printk(KERN_ALERT "blktap mmap (%lx, %lx)\n", + struct page **map; + int i; + + DPRINTK(KERN_ALERT "blktap mmap (%lx, %lx)\n", vma->vm_start, vma->vm_end); + vma->vm_flags |= VM_RESERVED; vma->vm_ops = &blktap_vm_ops; size = vma->vm_end - vma->vm_start; @@ -177,10 +219,10 @@ } size >>= PAGE_SHIFT; - printk(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1); + DPRINTK(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1); rings_vstart = vma->vm_start; - mmap_vstart = rings_vstart + (RING_PAGES << PAGE_SHIFT); + user_vstart = rings_vstart + (RING_PAGES << PAGE_SHIFT); /* Map the ring pages to the start of the region and reserve it. */ @@ -190,29 +232,44 @@ DPRINTK("Mapping ctrl_ring page %lx.\n", __pa(blktap_uctrl_ring.sring)); if (remap_pfn_range(vma, vma->vm_start, __pa(blktap_uctrl_ring.sring) >> PAGE_SHIFT, - PAGE_SIZE, vma->vm_page_prot)) { - WPRINTK("ctrl_ring: remap_pfn_range failure!\n"); - } + PAGE_SIZE, vma->vm_page_prot)) + goto fail; DPRINTK("Mapping be_ring page %lx.\n", __pa(blktap_ube_ring.sring)); if (remap_pfn_range(vma, vma->vm_start + PAGE_SIZE, __pa(blktap_ube_ring.sring) >> PAGE_SHIFT, - PAGE_SIZE, vma->vm_page_prot)) { - WPRINTK("be_ring: remap_pfn_range failure!\n"); - } + PAGE_SIZE, vma->vm_page_prot)) + goto fail; DPRINTK("Mapping fe_ring page %lx.\n", __pa(blktap_ufe_ring.sring)); if (remap_pfn_range(vma, vma->vm_start + ( 2 * PAGE_SIZE ), __pa(blktap_ufe_ring.sring) >> PAGE_SHIFT, - PAGE_SIZE, vma->vm_page_prot)) { - WPRINTK("fe_ring: remap_pfn_range failure!\n"); - } - + PAGE_SIZE, vma->vm_page_prot)) + goto fail; + + /* Mark this VM as containing foreign pages, and set up mappings. */ + map = kmalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) + * sizeof(struct page_struct*), + GFP_KERNEL); + if (map == NULL) goto fail; + + for (i=0; i<((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); i++) + map[i] = NULL; + + vma->vm_private_data = map; + vma->vm_flags |= VM_FOREIGN; + blktap_vma = vma; blktap_ring_ok = 1; return 0; + fail: + /* Clear any active mappings. */ + zap_page_range(vma, vma->vm_start, + vma->vm_end - vma->vm_start, NULL); + + return -ENOMEM; } static int blktap_ioctl(struct inode *inode, struct file *filp, @@ -263,6 +320,8 @@ RING_HAS_UNPUSHED_REQUESTS(&blktap_ufe_ring) || RING_HAS_UNPUSHED_RESPONSES(&blktap_ube_ring) ) { + flush_tlb_all(); + RING_PUSH_REQUESTS(&blktap_uctrl_ring); RING_PUSH_REQUESTS(&blktap_ufe_ring); RING_PUSH_RESPONSES(&blktap_ube_ring); @@ -290,10 +349,35 @@ /*-----[ Data to/from user space ]----------------------------------------*/ +static void fast_flush_area(int idx, int nr_pages) +{ + multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + int i; + + for ( i = 0; i < nr_pages; i++ ) + { + MULTI_update_va_mapping(mcl+i, MMAP_VADDR(mmap_vstart, idx, i), + __pte(0), 0); + } + + mcl[nr_pages-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; + if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) ) + BUG(); +} + + +extern int __direct_remap_area_pages(struct mm_struct *mm, + unsigned long address, + unsigned long size, + mmu_update_t *v); + int blktap_write_fe_ring(blkif_request_t *req) { blkif_request_t *target; - int error, i; + int i; + unsigned long remap_prot; + multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST+1]; + mmu_update_t mmu[BLKIF_MAX_SEGMENTS_PER_REQUEST]; /* * This is called to pass a request from the real frontend domain's @@ -310,26 +394,81 @@ return 0; } - target = RING_GET_REQUEST(&blktap_ufe_ring, - blktap_ufe_ring.req_prod_pvt); + remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW; + flush_cache_all(); /* a noop on intel... */ + + target = RING_GET_REQUEST(&blktap_ufe_ring, blktap_ufe_ring.req_prod_pvt); memcpy(target, req, sizeof(*req)); - /* Attempt to map the foreign pages directly in to the application */ + /* Map the foreign pages directly in to the application */ for (i=0; i<target->nr_segments; i++) { - - error = direct_remap_area_pages(blktap_vma->vm_mm, - MMAP_VADDR(ID_TO_IDX(req->id), i), - target->frame_and_sects[i] & PAGE_MASK, - PAGE_SIZE, - blktap_vma->vm_page_prot, - ID_TO_DOM(req->id)); - if ( error != 0 ) { - printk(KERN_INFO "remapping attached page failed! (%d)\n", error); - /* the request is now dropped on the floor. */ - return 0; + unsigned long buf; + unsigned long uvaddr; + unsigned long kvaddr; + unsigned long offset; + + buf = target->frame_and_sects[i] & PAGE_MASK; + uvaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(req->id), i); + kvaddr = MMAP_VADDR(mmap_vstart, ID_TO_IDX(req->id), i); + + MULTI_update_va_mapping_otherdomain( + mcl+i, + kvaddr, + pfn_pte_ma(buf >> PAGE_SHIFT, __pgprot(remap_prot)), + 0, + ID_TO_DOM(req->id)); + + phys_to_machine_mapping[__pa(kvaddr)>>PAGE_SHIFT] = + FOREIGN_FRAME(buf >> PAGE_SHIFT); + + __direct_remap_area_pages(blktap_vma->vm_mm, + uvaddr, + PAGE_SIZE, + &mmu[i]); + mmu[i].val = (target->frame_and_sects[i] & PAGE_MASK) + | pgprot_val(blktap_vma->vm_page_prot); + + offset = (uvaddr - blktap_vma->vm_start) >> PAGE_SHIFT; + ((struct page **)blktap_vma->vm_private_data)[offset] = + pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); + } + + /* Add the mmu_update call. */ + mcl[i].op = __HYPERVISOR_mmu_update; + mcl[i].args[0] = (unsigned long)mmu; + mcl[i].args[1] = target->nr_segments; + mcl[i].args[2] = 0; + mcl[i].args[3] = ID_TO_DOM(req->id); + + BUG_ON(HYPERVISOR_multicall(mcl, target->nr_segments+1) != 0); + + /* Make sure it all worked. */ + for ( i = 0; i < target->nr_segments; i++ ) + { + if ( unlikely(mcl[i].result != 0) ) + { + DPRINTK("invalid buffer -- could not remap it\n"); + fast_flush_area(ID_TO_IDX(req->id), target->nr_segments); + return -1; } } - + if ( unlikely(mcl[i].result != 0) ) + { + DPRINTK("direct remapping of pages to /dev/blktap failed.\n"); + return -1; + } + + + /* Mark mapped pages as reserved: */ + for ( i = 0; i < target->nr_segments; i++ ) + { + unsigned long kvaddr; + + kvaddr = MMAP_VADDR(mmap_vstart, ID_TO_IDX(req->id), i); + SetPageReserved(pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT)); + } + + blktap_ufe_ring.req_prod_pvt++; return 0; @@ -366,7 +505,7 @@ { /* This is called to read responses from the UFE ring. */ - RING_IDX i, rp; + RING_IDX i, j, rp; blkif_response_t *resp_s; blkif_t *blkif; active_req_t *ar; @@ -387,8 +526,23 @@ DPRINTK("resp->fe_ring\n"); ar = lookup_active_req(ID_TO_IDX(resp_s->id)); blkif = ar->blkif; - zap_page_range(blktap_vma, MMAP_VADDR(ID_TO_IDX(resp_s->id), 0), + for (j = 0; j < ar->nr_pages; j++) { + unsigned long vaddr; + struct page **map = blktap_vma->vm_private_data; + int offset; + + vaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(resp_s->id), j); + offset = (vaddr - blktap_vma->vm_start) >> PAGE_SHIFT; + + ClearPageReserved(virt_to_page(vaddr)); + map[offset] = NULL; + } + + + zap_page_range(blktap_vma, + MMAP_VADDR(user_vstart, ID_TO_IDX(resp_s->id), 0), ar->nr_pages << PAGE_SHIFT, NULL); + fast_flush_area(ID_TO_IDX(resp_s->id), ar->nr_pages); write_resp_to_fe_ring(blkif, resp_s); blktap_ufe_ring.rsp_cons = i + 1; kick_fe_domain(blkif); @@ -464,6 +618,9 @@ { int err; + if ( (mmap_vstart = allocate_empty_lowmem_region(MMAP_PAGES)) == 0 ) + BUG(); + err = misc_register(&blktap_miscdev); if ( err != 0 ) { diff -r 43f424818d6e -r 8004acaa6684 tools/blktap/blktaplib.c --- a/tools/blktap/blktaplib.c Thu Aug 4 16:53:11 2005 +++ b/tools/blktap/blktaplib.c Thu Aug 4 16:53:30 2005 @@ -34,7 +34,7 @@ #else #define DPRINTF(_f, _a...) ((void)0) #endif -#define DEBUG_RING_IDXS 0 +#define DEBUG_RING_IDXS 1 #define POLLRDNORM 0x040 diff -r 43f424818d6e -r 8004acaa6684 xen/include/public/io/blkif.h --- a/xen/include/public/io/blkif.h Thu Aug 4 16:53:11 2005 +++ b/xen/include/public/io/blkif.h Thu Aug 4 16:53:30 2005 @@ -47,7 +47,7 @@ unsigned long frame_and_sects[BLKIF_MAX_SEGMENTS_PER_REQUEST]; } blkif_request_t; -#define blkif_fas(_addr, _fs, _ls) ((addr)|((_fs)<<5)|(_ls)) +#define blkif_fas(_addr, _fs, _ls) ((_addr)|((_fs)<<5)|(_ls)) #define blkif_first_sect(_fas) (((_fas)>>5)&31) #define blkif_last_sect(_fas) ((_fas)&31) diff -r 43f424818d6e -r 8004acaa6684 tools/blktap/parallax/Makefile --- /dev/null Thu Aug 4 16:53:11 2005 +++ b/tools/blktap/parallax/Makefile Thu Aug 4 16:53:30 2005 @@ -0,0 +1,64 @@ +XEN_ROOT = ../../.. +include $(XEN_ROOT)/tools/Rules.mk + +PARALLAX_INSTALL_DIR = /usr/sbin + +INSTALL = install +INSTALL_PROG = $(INSTALL) -m0755 +INSTALL_DIR = $(INSTALL) -d -m0755 + +INCLUDES += -I.. -I/usr/include -I $(XEN_LIBXC) + +LDFLAGS = -L.. -lpthread -lz -lblktap + +#PLX_SRCS := +PLX_SRCS := vdi.c +PLX_SRCS += radix.c +PLX_SRCS += snaplog.c +PLX_SRCS += blockstore.c +PLX_SRCS += block-async.c +PLX_SRCS += requests-async.c +VDI_SRCS := $(PLX_SRCS) +PLX_SRCS += parallax.c + +#VDI_TOOLS := +VDI_TOOLS := vdi_create +VDI_TOOLS += vdi_list +VDI_TOOLS += vdi_snap +VDI_TOOLS += vdi_snap_list +VDI_TOOLS += vdi_snap_delete +VDI_TOOLS += vdi_fill +VDI_TOOLS += vdi_tree +VDI_TOOLS += vdi_validate + +CFLAGS += -Wall +CFLAGS += -Werror +CFLAGS += -Wno-unused +#CFLAGS += -O3 +CFLAGS += -g3 +CFLAGS += -fno-strict-aliasing +CFLAGS += $(INCLUDES) +CFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE +# Get gcc to generate the dependencies for us. +CFLAGS += -Wp,-MD,.$(@F).d +DEPS = .*.d + +OBJS = $(patsubst %.c,%.o,$(SRCS)) +IBINS = parallax $(VDI_TOOLS) + +all: $(VDI_TOOLS) parallax blockstored + +install: all + $(INSTALL_PROG) $(IBINS) $(DESTDIR)$(PARALLAX_INSTALL_DIR) + +clean: + rm -rf *.o *~ $(DEPS) xen TAGS $(VDI_TOOLS) parallax vdi_unittest + +parallax: $(PLX_SRCS) + $(CC) $(CFLAGS) -o parallax -L.. $(LDFLAGS) $(PLX_SRCS) + +${VDI_TOOLS}: %: %.c $(VDI_SRCS) + $(CC) $(CFLAGS) -g3 -o $@ $@.c $(LDFLAGS) $(VDI_SRCS) + +.PHONY: TAGS clean install rpm +-include $(DEPS) \ No newline at end of file _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |