[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
On 12/14/2010 06:55 AM, Daniel De Graaf wrote: > This allows a userspace application to allocate a shared page for > implementing inter-domain communication or device drivers. These > shared pages can be mapped using the gntdev device or by the kernel > in another domain. > > Signed-off-by: Daniel De Graaf <dgdegra@xxxxxxxxxxxxx> > --- > drivers/xen/Kconfig | 7 + > drivers/xen/Makefile | 2 + > drivers/xen/gntalloc.c | 456 > ++++++++++++++++++++++++++++++++++++++++++++++++ > include/xen/gntalloc.h | 68 +++++++ > 4 files changed, 533 insertions(+), 0 deletions(-) > create mode 100644 drivers/xen/gntalloc.c > create mode 100644 include/xen/gntalloc.h > > diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig > index fa9982e..8398cb0 100644 > --- a/drivers/xen/Kconfig > +++ b/drivers/xen/Kconfig > @@ -180,6 +180,13 @@ config XEN_GNTDEV > help > Allows userspace processes use grants. > > +config XEN_GRANT_DEV_ALLOC > + tristate "User-space grant reference allocator driver" > + depends on XEN > + help > + Allows userspace processes to create pages with access granted > + to other domains. > + > config XEN_S3 > def_bool y > depends on XEN_DOM0 && ACPI > diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile > index ef1ea63..9814c1d 100644 > --- a/drivers/xen/Makefile > +++ b/drivers/xen/Makefile > @@ -10,6 +10,7 @@ obj-$(CONFIG_XEN_XENCOMM) += xencomm.o > obj-$(CONFIG_XEN_BALLOON) += balloon.o > obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o > obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o > +obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o > obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback/ > obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/ > obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/ > @@ -25,3 +26,4 @@ obj-$(CONFIG_XEN_PLATFORM_PCI) += platform-pci.o > > xen-evtchn-y := evtchn.o > xen-gntdev-y := gntdev.o > +xen-gntalloc-y := gntalloc.o > diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c > new file mode 100644 > index 0000000..f26adfd > --- /dev/null > +++ b/drivers/xen/gntalloc.c > @@ -0,0 +1,456 @@ > +/****************************************************************************** > + * gntalloc.c > + * > + * Device for creating grant references (in user-space) that may be shared > + * with other domains. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program; if not, write to the Free Software > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA > + */ > + > +/* > + * This driver exists to allow userspace programs in Linux to allocate kernel > + * memory that will later be shared with another domain. Without this > device, > + * Linux userspace programs cannot create grant references. > + * > + * How this stuff works: > + * X -> granting a page to Y > + * Y -> mapping the grant from X > + * > + * 1. X uses the gntalloc device to allocate a page of kernel memory, P. > + * 2. X creates an entry in the grant table that says domid(Y) can > + * access P. > + * 3. X gives the grant reference identifier, GREF, to Y. > + * 4. A program in Y uses the gntdev device to map the page (owned by X > + * and identified by GREF) into domain(Y) and then into the address > + * space of the program. Behind the scenes, this requires a > + * hypercall in which Xen modifies the host CPU page tables to > + * perform the sharing -- that's where the actual cross-domain mapping > + * occurs. Presumably Y could be any grant-page user, not specifically gntdev? So you could use this to implement a frontend in userspace? > + * 5. A program in X mmap()s a segment of the gntalloc device that > + * corresponds to the shared page. > + * 6. The two userspace programs can now communicate over the shared page. > + * > + * > + * NOTE TO USERSPACE LIBRARIES: > + * The grant allocation and mmap()ing are, naturally, two separate > + * operations. You set up the sharing by calling the create ioctl() and > + * then the mmap(). You must tear down the sharing in the reverse order > + * (munmap() and then the destroy ioctl()). > + * > + * WARNING: Since Xen does not allow a guest to forcibly end the use of a > grant > + * reference, this device can be used to consume kernel memory by leaving > grant > + * references mapped by another domain when an application exits. Therefore, > + * there is a global limit on the number of pages that can be allocated. When > + * all references to the page are unmapped, it will be freed during the next > + * grant operation. > + */ > + > +#include <asm/atomic.h> > +#include <linux/module.h> > +#include <linux/miscdevice.h> > +#include <linux/kernel.h> > +#include <linux/init.h> > +#include <linux/slab.h> > +#include <linux/fs.h> > +#include <linux/device.h> > +#include <linux/mm.h> > +#include <asm/uaccess.h> > +#include <linux/types.h> > +#include <linux/list.h> > + > +#include <xen/xen.h> > +#include <xen/page.h> > +#include <xen/grant_table.h> > +#include <xen/gntalloc.h> > + > +static int debug = 0; > +module_param(debug, int, 0644); > + > +static int limit = 1024; > +module_param(limit, int, 0644); > + > +static LIST_HEAD(gref_list); > +static DEFINE_SPINLOCK(gref_lock); > +static int gref_size = 0; > + > +/* Metadata on a grant reference. */ > +struct gntalloc_gref { > + struct list_head next_all; /* list entry gref_list */ > + struct list_head next_file; /* list entry file->list, if open */ > + domid_t foreign_domid; /* The ID of the domain to share with. */ > + grant_ref_t gref_id; /* The grant reference number. */ > + unsigned int users; /* Use count - when zero, waiting on Xen */ > + struct page* page; /* The shared page. */ > +}; > + > +struct gntalloc_file_private_data { > + struct list_head list; > +}; > + > +static void __del_gref(struct gntalloc_gref *gref); > + > +static void do_cleanup(void) > +{ > + struct gntalloc_gref *gref, *n; > + list_for_each_entry_safe(gref, n, &gref_list, next_all) { > + if (!gref->users) > + __del_gref(gref); > + } > +} > + > + > +static int add_gref(domid_t foreign_domid, uint32_t readonly, > + struct gntalloc_file_private_data *priv) > +{ > + int rc; > + struct gntalloc_gref *gref; > + > + rc = -ENOMEM; > + spin_lock(&gref_lock); > + do_cleanup(); > + if (gref_size >= limit) { > + spin_unlock(&gref_lock); > + rc = -ENOSPC; > + goto out; > + } > + gref_size++; > + spin_unlock(&gref_lock); > + > + gref = kzalloc(sizeof(*gref), GFP_KERNEL); > + if (!gref) > + goto out; > + > + gref->foreign_domid = foreign_domid; > + gref->users = 1; > + > + /* Allocate the page to share. */ > + gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO); Could this be GFP_HIGHUSER? > + if (!gref->page) > + goto out_nopage; > + > + /* Grant foreign access to the page. */ > + gref->gref_id = gnttab_grant_foreign_access(foreign_domid, > + pfn_to_mfn(page_to_pfn(gref->page)), readonly); > + if (gref->gref_id < 0) { > + printk(KERN_ERR "%s: failed to grant foreign access for mfn " > + "%lu to domain %u\n", __func__, > + pfn_to_mfn(page_to_pfn(gref->page)), foreign_domid); > + rc = -EFAULT; > + goto out_no_foreign_gref; > + } > + > + /* Add to gref lists. */ > + spin_lock(&gref_lock); > + list_add_tail(&gref->next_all, &gref_list); > + list_add_tail(&gref->next_file, &priv->list); > + spin_unlock(&gref_lock); > + > + return gref->gref_id; > + > +out_no_foreign_gref: > + __free_page(gref->page); > +out_nopage: > + kfree(gref); > +out: > + return rc; > +} > + > +static void __del_gref(struct gntalloc_gref *gref) > +{ > + if (gnttab_query_foreign_access(gref->gref_id)) > + return; > + > + if (!gnttab_end_foreign_access_ref(gref->gref_id, 0)) > + return; > + > + gref_size--; > + list_del(&gref->next_all); > + > + __free_page(gref->page); > + kfree(gref); > +} > + > +static struct gntalloc_gref* find_gref(struct gntalloc_file_private_data > *priv, > + grant_ref_t gref_id) > +{ > + struct gntalloc_gref *gref; > + list_for_each_entry(gref, &priv->list, next_file) { > + if (gref->gref_id == gref_id) > + return gref; > + } > + return NULL; > +} > + > +/* > + * ------------------------------------- > + * File operations. > + * ------------------------------------- > + */ > +static int gntalloc_open(struct inode *inode, struct file *filp) > +{ > + struct gntalloc_file_private_data *priv; > + > + try_module_get(THIS_MODULE); > + > + priv = kzalloc(sizeof(*priv), GFP_KERNEL); > + if (!priv) > + goto out_nomem; > + INIT_LIST_HEAD(&priv->list); > + > + filp->private_data = priv; > + > + if (debug) > + printk("%s: priv %p\n", __FUNCTION__, priv); > + > + return 0; > + > +out_nomem: > + return -ENOMEM; > +} > + > +static int gntalloc_release(struct inode *inode, struct file *filp) > +{ > + struct gntalloc_file_private_data *priv = filp->private_data; > + struct gntalloc_gref *gref; > + > + if (debug) > + printk("%s: priv %p\n", __FUNCTION__, priv); > + > + spin_lock(&gref_lock); Presumably this is unnecessary because there can be no other users if you're tearing down the list and destroying priv. > + while (!list_empty(&priv->list)) { > + gref = list_entry(priv->list.next, > + struct gntalloc_gref, next_file); > + list_del(&gref->next_file); > + gref->users--; > + if (gref->users == 0) > + __del_gref(gref); > + } > + kfree(priv); > + spin_unlock(&gref_lock); > + > + module_put(THIS_MODULE); > + > + return 0; > +} > + > +static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv, > + void __user *arg) > +{ > + int rc = 0; > + struct ioctl_gntalloc_alloc_gref op; > + > + if (debug) > + printk("%s: priv %p\n", __FUNCTION__, priv); > + > + if (copy_from_user(&op, arg, sizeof(op))) { > + rc = -EFAULT; > + goto alloc_grant_out; > + } > + rc = add_gref(op.foreign_domid, op.readonly, priv); > + if (rc < 0) > + goto alloc_grant_out; > + > + op.gref_id = rc; > + op.page_idx = rc; Hm, see below. > + > + rc = 0; > + > + if (copy_to_user((void __user *)arg, &op, sizeof(op))) { > + rc = -EFAULT; > + goto alloc_grant_out; > + } > + > +alloc_grant_out: > + return rc; > +} > + > +static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv, > + void __user *arg) > +{ > + int rc = 0; > + struct ioctl_gntalloc_dealloc_gref op; > + struct gntalloc_gref *gref; > + > + if (debug) > + printk("%s: priv %p\n", __FUNCTION__, priv); > + > + if (copy_from_user(&op, arg, sizeof(op))) { > + rc = -EFAULT; > + goto dealloc_grant_out; > + } > + > + spin_lock(&gref_lock); > + gref = find_gref(priv, op.gref_id); > + if (gref) { > + list_del(&gref->next_file); > + gref->users--; > + rc = 0; > + } else { > + rc = -EINVAL; > + } > + > + do_cleanup(); > + spin_unlock(&gref_lock); > +dealloc_grant_out: > + return rc; > +} > + > +static long gntalloc_ioctl(struct file *filp, unsigned int cmd, > + unsigned long arg) > +{ > + struct gntalloc_file_private_data *priv = filp->private_data; > + > + switch (cmd) { > + case IOCTL_GNTALLOC_ALLOC_GREF: > + return gntalloc_ioctl_alloc(priv, (void __user*)arg); > + > + case IOCTL_GNTALLOC_DEALLOC_GREF: > + return gntalloc_ioctl_dealloc(priv, (void __user*)arg); > + > + default: > + return -ENOIOCTLCMD; > + } > + > + return 0; > +} > + > +static int gntalloc_vma_fault(struct vm_area_struct *vma, struct vm_fault > *vmf) > +{ > + struct gntalloc_gref *gref = vma->vm_private_data; > + if (!gref) > + return VM_FAULT_SIGBUS; > + > + vmf->page = gref->page; > + get_page(vmf->page); > + > + return 0; > +}; > + > +static void gntalloc_vma_close(struct vm_area_struct *vma) > +{ > + struct gntalloc_gref *gref = vma->vm_private_data; > + if (!gref) > + return; > + > + spin_lock(&gref_lock); > + gref->users--; > + if (gref->users == 0) > + __del_gref(gref); > + spin_unlock(&gref_lock); > +} > + > +static struct vm_operations_struct gntalloc_vmops = { > + .fault = gntalloc_vma_fault, > + .close = gntalloc_vma_close, > +}; > + > +static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma) > +{ > + struct gntalloc_file_private_data *priv = filp->private_data; > + struct gntalloc_gref *gref; > + > + if (debug) > + printk("%s: priv %p, page %lu\n", __func__, > + priv, vma->vm_pgoff); > + > + /* > + * There is a 1-to-1 correspondence of grant references to shared > + * pages, so it only makes sense to map exactly one page per > + * call to mmap(). > + */ Single-page mmap makes sense if the only possible use-cases are for single-page mappings, but if you're talking about framebuffers and the like is seems like a very awkward way to use mmap. It would be cleaner from an API perspective to have a user-mode defined flat address space indexed by pgoff which maps to an array of grefs, so you can sensibly do a multi-page mapping. It would also allow you to hide the grefs from usermode entirely. Then its just up to usermode to choose suitable file offsets for itself. > + if (((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) != 1) { > + printk(KERN_ERR "%s: Only one page can be memory-mapped " > + "per grant reference.\n", __func__); > + return -EINVAL; > + } > + > + if (!(vma->vm_flags & VM_SHARED)) { > + printk(KERN_ERR "%s: Mapping must be shared.\n", > + __func__); > + return -EINVAL; > + } > + > + spin_lock(&gref_lock); > + gref = find_gref(priv, vma->vm_pgoff); > + if (gref == NULL) { > + spin_unlock(&gref_lock); > + printk(KERN_ERR "%s: Could not find a grant reference with " > + "page index %lu.\n", __func__, vma->vm_pgoff); > + return -ENOENT; > + } > + gref->users++; > + spin_unlock(&gref_lock); > + > + vma->vm_private_data = gref; > + > + /* This flag prevents Bad PTE errors when the memory is unmapped. */ > + vma->vm_flags |= VM_RESERVED; > + vma->vm_flags |= VM_DONTCOPY; > + vma->vm_flags |= VM_IO; If you set VM_PFNMAP then you don't need to deal with faults. > + > + vma->vm_ops = &gntalloc_vmops; > + > + return 0; > +} > + > +static const struct file_operations gntalloc_fops = { > + .owner = THIS_MODULE, > + .open = gntalloc_open, > + .release = gntalloc_release, > + .unlocked_ioctl = gntalloc_ioctl, > + .mmap = gntalloc_mmap > +}; > + > +/* > + * ------------------------------------- > + * Module creation/destruction. > + * ------------------------------------- > + */ > +static struct miscdevice gntalloc_miscdev = { > + .minor = MISC_DYNAMIC_MINOR, > + .name = "xen/gntalloc", > + .fops = &gntalloc_fops, > +}; > + > +static int __init gntalloc_init(void) > +{ > + int err; > + > + if (!xen_domain()) { > + if (debug) > + printk(KERN_ERR "gntalloc: You must be running Xen\n"); > + return -ENODEV; > + } > + > + err = misc_register(&gntalloc_miscdev); > + if (err != 0) { > + printk(KERN_ERR "Could not register misc gntalloc device\n"); > + return err; > + } > + > + if (debug) > + printk(KERN_INFO "Created grant allocation device at %d,%d\n", > + MISC_MAJOR, gntalloc_miscdev.minor); > + > + return 0; > +} > + > +static void __exit gntalloc_exit(void) > +{ > + misc_deregister(&gntalloc_miscdev); > +} > + > +module_init(gntalloc_init); > +module_exit(gntalloc_exit); > + > +MODULE_LICENSE("GPL"); > +MODULE_AUTHOR("Carter Weatherly <carter.weatherly@xxxxxxxxxx>, " > + "Daniel De Graaf <dgdegra@xxxxxxxxxxxxx>"); > +MODULE_DESCRIPTION("User-space grant reference allocator driver"); > diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h > new file mode 100644 > index 0000000..76b70d7 > --- /dev/null > +++ b/include/xen/gntalloc.h > @@ -0,0 +1,68 @@ > +/****************************************************************************** > + * gntalloc.h > + * > + * Interface to /dev/xen/gntalloc. > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License version 2 > + * as published by the Free Software Foundation; or, when distributed > + * separately from the Linux kernel or incorporated into other > + * software packages, subject to the following license: > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > copy > + * of this source file (the "Software"), to deal in the Software without > + * restriction, including without limitation the rights to use, copy, modify, > + * merge, publish, distribute, sublicense, and/or sell copies of the > Software, > + * and to permit persons to whom the Software is furnished to do so, subject > to > + * the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > THE > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > DEALINGS > + * IN THE SOFTWARE. > + */ > + > +#ifndef __LINUX_PUBLIC_GNTALLOC_H__ > +#define __LINUX_PUBLIC_GNTALLOC_H__ > + > +/* > + * Allocates a new page and creates a new grant reference. > + * > + * N.B. The page_idx is really the address >> PAGE_SHIFT, meaning it's the > + * page number and not an actual address. It must be shifted again prior > + * to feeding it to mmap() (i.e. page_idx << PAGE_SHIFT). > + */ > +#define IOCTL_GNTALLOC_ALLOC_GREF \ > +_IOC(_IOC_NONE, 'G', 1, sizeof(struct ioctl_gntalloc_alloc_gref)) > +struct ioctl_gntalloc_alloc_gref { > + /* IN parameters */ > + /* The ID of the domain creating the grant reference. */ > + domid_t owner_domid; > + /* The ID of the domain to be given access to the grant. */ > + domid_t foreign_domid; > + /* The type of access given to domid. */ > + uint32_t readonly; > + /* OUT parameters */ > + /* The grant reference of the newly created grant. */ > + grant_ref_t gref_id; > + /* The page index (page number, NOT address) for grant mmap(). */ > + uint32_t page_idx; > +}; > + > +/* > + * Deallocates the grant reference, freeing the associated page. > + */ > +#define IOCTL_GNTALLOC_DEALLOC_GREF \ > +_IOC(_IOC_NONE, 'G', 2, sizeof(struct ioctl_gntalloc_dealloc_gref)) > +struct ioctl_gntalloc_dealloc_gref { > + /* IN parameter */ > + /* The grant reference to deallocate. */ > + grant_ref_t gref_id; > +}; > +#endif /* __LINUX_PUBLIC_GNTALLOC_H__ */ J _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |