[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH] [xen, xencomm] various xencomm fixes (was Re: [XenPPC] Re: [Xen-ia64-devel] [PATCH 1/2] remove xencomm page size limit(xen side))
On Fri, Aug 03, 2007 at 09:43:16AM -0500, Hollis Blanchard wrote: > On Fri, 2007-08-03 at 11:12 +0900, Isaku Yamahata wrote: > > On Thu, Aug 02, 2007 at 10:00:46AM -0500, Hollis Blanchard wrote: > > > On Thu, 2007-08-02 at 11:44 +0900, Isaku Yamahata wrote: > > > > > > > > > But we can issue sequential p2m hcalls with different offsets, so we > > > > do. > > > > > So what exactly is the new problem? > > > > > > > > The limit is about 64GB for xen/ia64 because xen/ia64 deafult page > > > > size > > > > is 16KB. > > > > The issue I'm seeing is the populate physmap hypercall failure > > > > at domain creation. It's essentially same issue you described above. > > > > I want to remove the limitation with the patches. > > > > > > Can't you simply issue repeated populate_physmap hypercalls, > > > incrementing extent_start? > > > > Yes, it's possible. However my choice was to generalize xencomm > > because > > - The inline xencomm code already supports page boundary crossing. > > - IMHO it doesn't unnecessarily complicate the existing code. > > Well, your patch was largish... I see your point. Unfortunately I consolidated the xen side xencomm.c and reviewed the patch, it became larger. If you don't like multi page address array support, I can drop it from the attached patch and go for repeating populate_physmap hypercall. But the various argument check is necessary with or without multi page support anyway, so just dropping only multi page support doesn't make sense. # HG changeset patch # User yamahata@xxxxxxxxxxxxx # Date 1186464727 -32400 # Node ID 41fde67aa85ac54b24191eb3db573998b098c41d # Parent 8b6af0333d531d1fd63b1ce02c2df51b0d4f45f5 Various xencomm fixes. This patch fixes following issues. - Xencomm should check whether struct xencomm_desc itself (8 bytes)doesn't cross page boundary. Otherwise a hostile guest kernel can pass such a pointer that may across page boundary. Then xencomm accesses a unrelated page. - Xencomm should check whether struct xencomm_desc::address[] array crosses page boundary. Otherwise xencomm may access unrelated pages. - Xencomm should get_page()/put_page() after address conversion from paddr to maddr because xen supports SMP and balloon driver. Otherwise another vcpu may free the page at the same time. - Current implementation doesn't allow struct xencomm_desc::address array to be more than single page. On IA64 it causes 64GB+ domain creation failure. This patch generalizes xencomm to allow multipage xencomm_desc::address array. PATCHNAME: various_fix_xencomm Signed-off-by: Isaku Yamahata <yamahata@xxxxxxxxxxxxx> diff -r 8b6af0333d53 -r 41fde67aa85a xen/common/xencomm.c --- a/xen/common/xencomm.c Tue Aug 07 17:25:23 2007 +0900 +++ b/xen/common/xencomm.c Tue Aug 07 14:32:07 2007 +0900 @@ -17,6 +17,7 @@ * * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx> * Tristan Gingold <tristan.gingold@xxxxxxxx> + * Isaku Yamahata <yamahata@xxxxxxxxxxxxx> */ #include <xen/config.h> @@ -34,6 +35,82 @@ static int xencomm_debug = 1; /* extreme #define xencomm_debug 0 #endif +/* get_page() to prevent from another vcpu freeing the page */ +static int +xencomm_paddr_to_maddr(unsigned long paddr, unsigned long *maddr, + struct page_info **page) +{ + *maddr = paddr_to_maddr(paddr); + if (*maddr == 0) + return -EFAULT; + + *page = virt_to_page(*maddr); + if (get_page(*page, current->domain) == 0) { + if (page_get_owner(*page) != current->domain) + /* This page might be a page granted by another domain */ + panic_domain(NULL, "copy_from_guest from foreign domain\n"); + + /* Try again. */ + return -EAGAIN; + } + + return 0; +} + +/* check if struct desc doesn't cross page boundry */ +static int +xencomm_desc_cross_page_boundary(unsigned long paddr) +{ + unsigned long offset = paddr & ~PAGE_MASK; + if (offset > PAGE_SIZE - sizeof(struct xencomm_desc)) + return 1; + return 0; +} + +static int +xencomm_get_address(const void *handle, struct xencomm_desc *desc, int i, + unsigned long **address, struct page_info **page) +{ + if (i == 0) + *address = &desc->address[0]; + else + (*address)++; + + /* When crossing page boundary, machine address must be calculated. */ + if (((unsigned long)*address & ~PAGE_MASK) == 0) { + unsigned long paddr = (unsigned long) + &(((const struct xencomm_desc*)handle)->address[i]); + put_page(*page); + return xencomm_paddr_to_maddr(paddr, *address, page); + } + return 0; +} + +static int +xencomm_copy_chunk_from(unsigned long to, unsigned long paddr, + unsigned int len) +{ + unsigned long maddr; + struct page_info *page; + + while (1) { + int res; + res = xencomm_paddr_to_maddr(paddr, &maddr, &page); + if (res != 0) { + if (res == -EAGAIN) + continue; /* Try again. */ + return res; + } + if (xencomm_debug) + printk("%lx[%d] -> %lx\n", maddr, len, to); + + memcpy((void *)to, (void *)maddr, len); + put_page(page); + return 0; + } + /* NOTREACHED */ +} + static unsigned long xencomm_inline_from_guest(void *to, const void *from, unsigned int n, unsigned int skip) @@ -44,17 +121,17 @@ xencomm_inline_from_guest(void *to, cons while (n > 0) { unsigned int chunksz; - unsigned long src_maddr; unsigned int bytes; + int res; chunksz = PAGE_SIZE - (src_paddr % PAGE_SIZE); bytes = min(chunksz, n); - src_maddr = paddr_to_maddr(src_paddr); - if (xencomm_debug) - printk("%lx[%d] -> %lx\n", src_maddr, bytes, (unsigned long)to); - memcpy(to, (void *)src_maddr, bytes); + res = xencomm_copy_chunk_from((unsigned long)to, src_paddr, bytes); + if (res != 0) + return n; + src_paddr += bytes; to += bytes; n -= bytes; @@ -81,6 +158,8 @@ xencomm_copy_from_guest(void *to, const unsigned int skip) { struct xencomm_desc *desc; + struct page_info *page; + unsigned long *address; unsigned int from_pos = 0; unsigned int to_pos = 0; unsigned int i = 0; @@ -88,24 +167,30 @@ xencomm_copy_from_guest(void *to, const if (xencomm_is_inline(from)) return xencomm_inline_from_guest(to, from, n, skip); + if (xencomm_desc_cross_page_boundary((unsigned long)from)) + return n; /* first we need to access the descriptor */ - desc = (struct xencomm_desc *)paddr_to_maddr((unsigned long)from); - if (desc == NULL) + if (xencomm_paddr_to_maddr((unsigned long)from, + (unsigned long*)&desc, &page)) return n; if (desc->magic != XENCOMM_MAGIC) { printk("%s: error: %p magic was 0x%x\n", __func__, desc, desc->magic); + put_page(page); return n; } /* iterate through the descriptor, copying up to a page at a time */ while ((to_pos < n) && (i < desc->nr_addrs)) { - unsigned long src_paddr = desc->address[i]; + unsigned long src_paddr; unsigned int pgoffset; unsigned int chunksz; unsigned int chunk_skip; + if (xencomm_get_address(from, desc, i, &address, &page)) + return n - to_pos; + src_paddr = *address; if (src_paddr == XENCOMM_INVALID) { i++; continue; @@ -120,17 +205,16 @@ xencomm_copy_from_guest(void *to, const skip -= chunk_skip; if (skip == 0 && chunksz > 0) { - unsigned long src_maddr; - unsigned long dest = (unsigned long)to + to_pos; unsigned int bytes = min(chunksz, n - to_pos); - - src_maddr = paddr_to_maddr(src_paddr + chunk_skip); - if (src_maddr == 0) + int res; + + res = xencomm_copy_chunk_from((unsigned long)to + to_pos, + src_paddr + chunk_skip, bytes); + if (res != 0) { + put_page(page); return n - to_pos; - - if (xencomm_debug) - printk("%lx[%d] -> %lx\n", src_maddr, bytes, dest); - memcpy((void *)dest, (void *)src_maddr, bytes); + } + from_pos += bytes; to_pos += bytes; } @@ -138,7 +222,33 @@ xencomm_copy_from_guest(void *to, const i++; } + put_page(page); return n - to_pos; +} + +static int +xencomm_copy_chunk_to(unsigned long paddr, unsigned long from, + unsigned int len) +{ + unsigned long maddr; + struct page_info *page; + + while (1) { + int res; + res = xencomm_paddr_to_maddr(paddr, &maddr, &page); + if (res != 0) { + if (res == -EAGAIN) + continue; /* Try again. */ + return res; + } + if (xencomm_debug) + printk("%lx[%d] -> %lx\n", from, len, maddr); + + memcpy((void *)maddr, (void *)from, len); + put_page(page); + return 0; + } + /* NOTREACHED */ } static unsigned long @@ -151,17 +261,16 @@ xencomm_inline_to_guest(void *to, const while (n > 0) { unsigned int chunksz; - unsigned long dest_maddr; unsigned int bytes; + int res; chunksz = PAGE_SIZE - (dest_paddr % PAGE_SIZE); bytes = min(chunksz, n); - - dest_maddr = paddr_to_maddr(dest_paddr); - if (xencomm_debug) - printk("%lx[%d] -> %lx\n", (unsigned long)from, bytes, dest_maddr); - memcpy((void *)dest_maddr, (void *)from, bytes); + res = xencomm_copy_chunk_to(dest_paddr, (unsigned long)from, bytes); + if (res != 0) + return n; + dest_paddr += bytes; from += bytes; n -= bytes; @@ -188,6 +297,8 @@ xencomm_copy_to_guest(void *to, const vo unsigned int skip) { struct xencomm_desc *desc; + struct page_info *page; + unsigned long *address; unsigned int from_pos = 0; unsigned int to_pos = 0; unsigned int i = 0; @@ -195,23 +306,29 @@ xencomm_copy_to_guest(void *to, const vo if (xencomm_is_inline(to)) return xencomm_inline_to_guest(to, from, n, skip); + if (xencomm_desc_cross_page_boundary((unsigned long)to)) + return n; /* first we need to access the descriptor */ - desc = (struct xencomm_desc *)paddr_to_maddr((unsigned long)to); - if (desc == NULL) + if (xencomm_paddr_to_maddr((unsigned long)to, + (unsigned long*)&desc, &page)) return n; if (desc->magic != XENCOMM_MAGIC) { printk("%s error: %p magic was 0x%x\n", __func__, desc, desc->magic); + put_page(page); return n; } /* iterate through the descriptor, copying up to a page at a time */ while ((from_pos < n) && (i < desc->nr_addrs)) { - unsigned long dest_paddr = desc->address[i]; + unsigned long dest_paddr; unsigned int pgoffset; unsigned int chunksz; unsigned int chunk_skip; + if (xencomm_get_address(to, desc, i, &address, &page)) + return n - from_pos; + dest_paddr = *address; if (dest_paddr == XENCOMM_INVALID) { i++; continue; @@ -226,17 +343,16 @@ xencomm_copy_to_guest(void *to, const vo skip -= chunk_skip; if (skip == 0 && chunksz > 0) { - unsigned long dest_maddr; - unsigned long source = (unsigned long)from + from_pos; unsigned int bytes = min(chunksz, n - from_pos); - - dest_maddr = paddr_to_maddr(dest_paddr + chunk_skip); - if (dest_maddr == 0) - return -1; - - if (xencomm_debug) - printk("%lx[%d] -> %lx\n", source, bytes, dest_maddr); - memcpy((void *)dest_maddr, (void *)source, bytes); + int res; + + res = xencomm_copy_chunk_to(dest_paddr + chunk_skip, + (unsigned long)from + from_pos, bytes); + if (res != 0) { + put_page(page); + return n - from_pos; + } + from_pos += bytes; to_pos += bytes; } @@ -244,6 +360,7 @@ xencomm_copy_to_guest(void *to, const vo i++; } + put_page(page); return n - from_pos; } @@ -258,27 +375,40 @@ int xencomm_add_offset(void **handle, un int xencomm_add_offset(void **handle, unsigned int bytes) { struct xencomm_desc *desc; + struct page_info *page; + unsigned long *address; int i = 0; if (xencomm_is_inline(*handle)) return xencomm_inline_add_offset(handle, bytes); + if (xencomm_desc_cross_page_boundary(*(unsigned long*)handle)) + return -EINVAL; /* first we need to access the descriptor */ - desc = (struct xencomm_desc *)paddr_to_maddr((unsigned long)*handle); - if (desc == NULL) - return -1; + if (xencomm_paddr_to_maddr(*(unsigned long*)handle, + (unsigned long*)&desc, &page)) + return -EFAULT; if (desc->magic != XENCOMM_MAGIC) { printk("%s error: %p magic was 0x%x\n", __func__, desc, desc->magic); - return -1; + put_page(page); + return -EINVAL; } /* iterate through the descriptor incrementing addresses */ while ((bytes > 0) && (i < desc->nr_addrs)) { - unsigned long dest_paddr = desc->address[i]; + unsigned long dest_paddr; unsigned int pgoffset; unsigned int chunksz; unsigned int chunk_skip; + + if (xencomm_get_address(*handle, desc, i, &address, &page)) + return -EFAULT; + dest_paddr = *address; + if (dest_paddr == XENCOMM_INVALID) { + i++; + continue; + } pgoffset = dest_paddr % PAGE_SIZE; chunksz = PAGE_SIZE - pgoffset; @@ -286,31 +416,45 @@ int xencomm_add_offset(void **handle, un chunk_skip = min(chunksz, bytes); if (chunk_skip == chunksz) { /* exhausted this page */ - desc->address[i] = XENCOMM_INVALID; + *address = XENCOMM_INVALID; } else { - desc->address[i] += chunk_skip; + *address += chunk_skip; } bytes -= chunk_skip; - } + + i++; + } + put_page(page); return 0; } int xencomm_handle_is_null(void *handle) { struct xencomm_desc *desc; + struct page_info *page; + unsigned long *address; int i; if (xencomm_is_inline(handle)) return xencomm_inline_addr(handle) == 0; - desc = (struct xencomm_desc *)paddr_to_maddr((unsigned long)handle); - if (desc == NULL) + if (xencomm_desc_cross_page_boundary((unsigned long)handle)) + return 1; /* EINVAL */ + if (xencomm_paddr_to_maddr((unsigned long)handle, + (unsigned long*)&desc, &page)) return 1; - for (i = 0; i < desc->nr_addrs; i++) - if (desc->address[i] != XENCOMM_INVALID) + for (i = 0; i < desc->nr_addrs; i++) { + if (xencomm_get_address(handle, desc, i, &address, &page)) + return 1; /* EFAULT */ + + if (*address != XENCOMM_INVALID) { + put_page(page); return 0; - + } + } + + put_page(page); return 1; } -- yamahata Attachment:
15698_41fde67aa85a_various_fix_xencomm.patch _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |