[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 6/6] xen-gntdev: Introduce HVM version of gntdev
HVM does not allow direct PTE modification, so guest pages must be allocated and used for grant mappings. If Xen does not provide an auto-translated physmap, the existing direct PTE modification is more efficient. Signed-off-by: Daniel De Graaf <dgdegra@xxxxxxxxxxxxx> --- drivers/xen/Makefile | 2 + drivers/xen/gntdev-hvm.c | 601 ++++++++++++++++++++++++++++++++++++++++++++++ drivers/xen/gntdev.c | 3 + 3 files changed, 606 insertions(+), 0 deletions(-) create mode 100644 drivers/xen/gntdev-hvm.c diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 9814c1d..ab0e6eb 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_XEN_XENCOMM) += xencomm.o obj-$(CONFIG_XEN_BALLOON) += balloon.o obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o +obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev-hvm.o obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback/ obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/ @@ -26,4 +27,5 @@ obj-$(CONFIG_XEN_PLATFORM_PCI) += platform-pci.o xen-evtchn-y := evtchn.o xen-gntdev-y := gntdev.o +xen-gntdev-hvm-y := gntdev-hvm.o xen-gntalloc-y := gntalloc.o diff --git a/drivers/xen/gntdev-hvm.c b/drivers/xen/gntdev-hvm.c new file mode 100644 index 0000000..331d5af --- /dev/null +++ b/drivers/xen/gntdev-hvm.c @@ -0,0 +1,601 @@ +/****************************************************************************** + * gntdev.c + * + * Device for accessing (in user-space) pages that have been granted by other + * domains. + * + * Copyright (c) 2006-2007, D G Murray. + * (c) 2009 Gerd Hoffmann <kraxel@xxxxxxxxxx> + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/miscdevice.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/types.h> +#include <linux/uaccess.h> +#include <linux/sched.h> +#include <linux/spinlock.h> +#include <linux/vmalloc.h> + +#include <xen/xen.h> +#include <xen/grant_table.h> +#include <xen/gntdev.h> +#include <xen/interface/memory.h> +#include <asm/xen/hypervisor.h> +#include <asm/xen/hypercall.h> +#include <asm/xen/page.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Derek G. Murray <Derek.Murray@xxxxxxxxxxxx>, " + "Gerd Hoffmann <kraxel@xxxxxxxxxx>"); +MODULE_DESCRIPTION("User-space granted page access driver"); + +static int debug = 0; +module_param(debug, int, 0644); +static int limit = 1024*1024; +module_param(limit, int, 0644); +static atomic_t pages_mapped = ATOMIC_INIT(0); + +struct gntdev_priv { + struct list_head maps; + spinlock_t lock; +}; + +struct granted_page { + struct page* page; + union { + struct ioctl_gntdev_grant_ref target; + grant_handle_t handle; + }; +}; + +struct grant_map { + struct list_head next; + int index; + int count; + atomic_t users; + int is_mapped:1; + int is_ro:1; + struct granted_page pages[0]; +}; + +static struct vm_operations_struct gntdev_vmops; + +/* ------------------------------------------------------------------ */ + +static void gntdev_print_maps(struct gntdev_priv *priv, + char *text, int text_index) +{ + struct grant_map *map; + + printk("%s: maps list (priv %p)\n", __FUNCTION__, priv); + list_for_each_entry(map, &priv->maps, next) { + printk(" %p: %2d+%2d, r%c, %s %d,%d %s\n", map, + map->index, map->count, map->is_ro ? 'o' : 'w', + map->is_mapped ? "use,hnd" : "dom,ref", + map->is_mapped ? atomic_read(&map->users) + : map->pages[0].target.domid, + map->is_mapped ? map->pages[0].handle + : map->pages[0].target.ref, + map->index == text_index && text ? text : ""); + } +} + +static struct grant_map *gntdev_alloc_map(int count, + struct ioctl_gntdev_grant_ref* grants) +{ + struct grant_map *add; + int i; + + add = kzalloc(sizeof(*add) + sizeof(add->pages[0])*count, GFP_KERNEL); + if (!add) + return NULL; + + atomic_set(&add->users, 1); + add->count = count; + + for(i = 0; i < count; i++) + add->pages[i].target = grants[i]; + + return add; +} + +static void gntdev_add_map(struct gntdev_priv *priv, struct grant_map *add) +{ + struct grant_map *map; + + spin_lock(&priv->lock); + + list_for_each_entry(map, &priv->maps, next) { + if (add->index + add->count < map->index) { + list_add_tail(&add->next, &map->next); + goto done; + } + add->index = map->index + map->count; + } + list_add_tail(&add->next, &priv->maps); + +done: + if (debug) + gntdev_print_maps(priv, "[new]", add->index); + + spin_unlock(&priv->lock); +} + +static void __gntdev_del_map(struct gntdev_priv *priv, struct grant_map *map) +{ + list_del(&map->next); +} + +static void gntdev_del_map(struct gntdev_priv *priv, struct grant_map *map) +{ + spin_lock(&priv->lock); + __gntdev_del_map(priv, map); + spin_unlock(&priv->lock); +} + +static struct grant_map *gntdev_find_map_index(struct gntdev_priv *priv, int index, + int count) +{ + struct grant_map *map; + + list_for_each_entry(map, &priv->maps, next) { + if (map->index != index) + continue; + if (map->count != count) + continue; + return map; + } + return NULL; +} + +static void gntdev_unmap_fast(struct grant_map *map, + struct gnttab_unmap_grant_ref *unmap_ops) +{ + int err, flags, i, unmap_size = 0; + unsigned long pfn; + phys_addr_t mfn; + + flags = GNTMAP_host_map; + if (map->is_ro) + flags |= GNTMAP_readonly; + + for (i=0; i < map->count; i++) { + if (!map->pages[i].page) + continue; + pfn = page_to_pfn(map->pages[i].page); + mfn = (phys_addr_t)pfn_to_kaddr(pfn); + gnttab_set_unmap_op(&unmap_ops[unmap_size], mfn, flags, + map->pages[i].handle); + unmap_size++; + } + + err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, + unmap_ops, unmap_size); + WARN_ON(err); + + for (i = 0; i < unmap_size; i++) + WARN_ON(unmap_ops[i].status); +} + +// for the out-of-memory case +static void gntdev_unmap_slow(struct grant_map *map) +{ + int err, flags, i; + unsigned long pfn; + phys_addr_t mfn; + struct gnttab_unmap_grant_ref unmap_op; + + flags = GNTMAP_host_map; + if (map->is_ro) + flags |= GNTMAP_readonly; + + for (i=0; i < map->count; i++) { + if (!map->pages[i].page) + continue; + + pfn = page_to_pfn(map->pages[i].page); + mfn = (phys_addr_t)pfn_to_kaddr(pfn); + gnttab_set_unmap_op(&unmap_op, mfn, flags, map->pages[i].handle); + err = HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, &unmap_op, 1); + WARN_ON(err); + WARN_ON(unmap_op.status); + } +} + +static void gntdev_put_map(struct grant_map *map) +{ + struct gnttab_unmap_grant_ref *unmap_ops; + int i; + if (!map) + return; + if (!atomic_dec_and_test(&map->users)) + return; + if (debug) + printk("%s: unmap %p (%d pages)\n", __FUNCTION__, map, map->count); + if (map->is_mapped) { + unmap_ops = kzalloc(sizeof(unmap_ops[0]) * map->count, + GFP_TEMPORARY); + if (likely(unmap_ops)) { + gntdev_unmap_fast(map, unmap_ops); + kfree(unmap_ops); + } else { + gntdev_unmap_slow(map); + } + + atomic_sub(map->count, &pages_mapped); + } + for (i=0; i < map->count; i++) + if (map->pages[i].page) + __free_page(map->pages[i].page); + kfree(map); +} + +static int gntdev_open(struct inode *inode, struct file *flip) +{ + struct gntdev_priv *priv; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + INIT_LIST_HEAD(&priv->maps); + spin_lock_init(&priv->lock); + + flip->private_data = priv; + if (debug) + printk("%s: priv %p\n", __FUNCTION__, priv); + + return 0; +} + +static int gntdev_release(struct inode *inode, struct file *flip) +{ + struct gntdev_priv *priv = flip->private_data; + struct grant_map *map; + + if (debug) { + printk("%s: priv %p\n", __FUNCTION__, priv); + gntdev_print_maps(priv, NULL, 0); + } + + spin_lock(&priv->lock); + while (!list_empty(&priv->maps)) { + map = list_entry(priv->maps.next, struct grant_map, next); + list_del(&map->next); + gntdev_put_map(map); + } + spin_unlock(&priv->lock); + + kfree(priv); + return 0; +} + +static int gntdev_do_map(struct grant_map *map) +{ + int err, flags, i; + struct page* page; + phys_addr_t mfn; + struct gnttab_map_grant_ref* map_ops; + + flags = GNTMAP_host_map; + if (map->is_ro) + flags |= GNTMAP_readonly; + + err = -ENOMEM; + + if (unlikely(atomic_add_return(map->count, &pages_mapped) > limit)) { + if (debug) + printk("%s: maps full\n", __FUNCTION__); + goto out; + } + + map_ops = kzalloc(sizeof(map_ops[0]) * map->count, GFP_TEMPORARY); + if (!map_ops) + goto out; + + for (i = 0; i < map->count; i++) { + page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM|__GFP_ZERO); + if (unlikely(!page)) + goto out_free; + map->pages[i].page = page; + mfn = (phys_addr_t)pfn_to_kaddr(page_to_pfn(page)); + gnttab_set_map_op(&map_ops[i], mfn, flags, + map->pages[i].target.ref, + map->pages[i].target.domid); + } + err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, + map_ops, map->count); + if (WARN_ON(err)) + goto out_free; + + if (debug && map->count) + printk("%s: mapped first at gfn=%lx mfn=%lx\n", __func__, + page_to_pfn(map->pages[0].page), pfn_to_mfn(page_to_pfn(map->pages[0].page))); + + map->is_mapped = 1; + + for (i = 0; i < map->count; i++) { + if (map_ops[i].status) { + if (debug) + printk("%s: failed map at page %d: stat=%d\n", + __FUNCTION__, i, map_ops[i].status); + __free_page(map->pages[i].page); + map->pages[i].page = NULL; + err = -EINVAL; + } else { + map->pages[i].handle = map_ops[i].handle; + } + } + +out_free: + kfree(map_ops); +out: + if (!map->is_mapped) + atomic_sub(map->count, &pages_mapped); + return err; +} + +static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv, + struct ioctl_gntdev_map_grant_ref __user *u) +{ + struct ioctl_gntdev_map_grant_ref op; + struct grant_map *map; + struct ioctl_gntdev_grant_ref* grants; + int err; + + if (copy_from_user(&op, u, sizeof(op)) != 0) + return -EFAULT; + if (debug) + printk("%s: priv %p, add %d\n", __FUNCTION__, priv, + op.count); + if (unlikely(op.count <= 0)) + return -EINVAL; + + err = -ENOMEM; + grants = kmalloc(sizeof(grants[0]) * op.count, GFP_TEMPORARY); + if (!grants) + goto out_fail; + + err = -EFAULT; + if (copy_from_user(grants, u->refs, sizeof(grants[0]) * op.count)) + goto out_free; + + map = gntdev_alloc_map(op.count, grants); + if (!map) + goto out_free; + + gntdev_add_map(priv, map); + + op.index = map->index << PAGE_SHIFT; + + err = -EFAULT; + if (copy_to_user(u, &op, sizeof(op)) != 0) + goto out_remove; + + err = 0; + +out_free: + kfree(grants); +out_fail: + return err; + +out_remove: + gntdev_del_map(priv, map); + gntdev_put_map(map); + goto out_free; +} + +static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv, + struct ioctl_gntdev_unmap_grant_ref __user *u) +{ + struct ioctl_gntdev_unmap_grant_ref op; + struct grant_map *map; + int err = 0; + + if (copy_from_user(&op, u, sizeof(op)) != 0) + return -EFAULT; + + spin_lock(&priv->lock); + map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count); + if (map) { + __gntdev_del_map(priv, map); + } else + err = -EINVAL; + spin_unlock(&priv->lock); + + if (debug) + printk("%s: priv %p, del %d+%d = %p\n", __FUNCTION__, priv, + (int)op.index, (int)op.count, map); + + gntdev_put_map(map); + return err; +} + +static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv, + struct ioctl_gntdev_get_offset_for_vaddr __user *u) +{ + struct ioctl_gntdev_get_offset_for_vaddr op; + struct vm_area_struct *vma; + struct grant_map *map; + + if (copy_from_user(&op, u, sizeof(op)) != 0) + return -EFAULT; + if (debug) + printk("%s: priv %p, offset for vaddr %lx\n", __FUNCTION__, priv, + (unsigned long)op.vaddr); + + vma = find_vma(current->mm, op.vaddr); + if (!vma) + return -EINVAL; + + map = vma->vm_private_data; + if (vma->vm_ops != &gntdev_vmops || !map) + return -EINVAL; + + op.offset = map->index << PAGE_SHIFT; + op.count = map->count; + + if (copy_to_user(u, &op, sizeof(op)) != 0) + return -EFAULT; + return 0; +} + +static long gntdev_ioctl(struct file *flip, + unsigned int cmd, unsigned long arg) +{ + struct gntdev_priv *priv = flip->private_data; + void __user *ptr = (void __user *)arg; + + switch (cmd) { + case IOCTL_GNTDEV_MAP_GRANT_REF: + return gntdev_ioctl_map_grant_ref(priv, ptr); + + case IOCTL_GNTDEV_UNMAP_GRANT_REF: + return gntdev_ioctl_unmap_grant_ref(priv, ptr); + + case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR: + return gntdev_ioctl_get_offset_for_vaddr(priv, ptr); + + default: + if (debug) + printk("%s: priv %p, unknown cmd %x\n", + __FUNCTION__, priv, cmd); + return -ENOIOCTLCMD; + } + + return 0; +} + +static int gntdev_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct grant_map *map = vma->vm_private_data; + pgoff_t pgoff = vmf->pgoff - vma->vm_pgoff; + + if (!map || !map->is_mapped || pgoff < 0 || pgoff > map->count) { + if (debug) + printk("%s: vaddr %p, pgoff %ld (shouldn't happen)\n", + __FUNCTION__, vmf->virtual_address, pgoff); + return VM_FAULT_SIGBUS; + } + + vmf->page = map->pages[pgoff].page; + get_page(vmf->page); + return 0; +} + +static void gntdev_vma_close(struct vm_area_struct *vma) +{ + struct grant_map *map = vma->vm_private_data; + gntdev_put_map(map); +} + +static struct vm_operations_struct gntdev_vmops = { + .fault = gntdev_vma_fault, + .close = gntdev_vma_close, +}; + +static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) +{ + struct gntdev_priv *priv = flip->private_data; + int index = vma->vm_pgoff; + int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + struct grant_map *map; + int err = -EINVAL; + + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + + spin_lock(&priv->lock); + map = gntdev_find_map_index(priv, index, count); + + if (debug) + printk("%s: map %d+%d at %lx (priv %p, map %p)\n", __func__, + index, count, vma->vm_start, priv, map); + + if (!map) + goto unlock_out; + + if (!map->is_mapped) { + map->is_ro = !(vma->vm_flags & VM_WRITE); + err = gntdev_do_map(map); + if (err) + goto unlock_out; + } + + if ((vma->vm_flags & VM_WRITE) && map->is_ro) + goto unlock_out; + + err = 0; + vma->vm_ops = &gntdev_vmops; + + vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND; + vma->vm_flags |= VM_IO; + + vma->vm_private_data = map; + + atomic_inc(&map->users); + +unlock_out: + spin_unlock(&priv->lock); + return err; +} + +static const struct file_operations gntdev_fops = { + .owner = THIS_MODULE, + .open = gntdev_open, + .release = gntdev_release, + .mmap = gntdev_mmap, + .unlocked_ioctl = gntdev_ioctl +}; + +static struct miscdevice gntdev_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "xen/gntdev", + .fops = &gntdev_fops, +}; + +/* ------------------------------------------------------------------ */ + +static int __init gntdev_init(void) +{ + int err; + + if (!xen_domain()) + return -ENODEV; + + if (!xen_feature(XENFEAT_auto_translated_physmap)) + return -ENODEV; + + err = misc_register(&gntdev_miscdev); + if (err != 0) { + printk(KERN_ERR "Could not register gntdev device\n"); + return err; + } + return 0; +} + +static void __exit gntdev_exit(void) +{ + misc_deregister(&gntdev_miscdev); +} + +module_init(gntdev_init); +module_exit(gntdev_exit); + +/* ------------------------------------------------------------------ */ diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index a73f07c..f6b98c0 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -626,6 +626,9 @@ static int __init gntdev_init(void) if (!xen_domain()) return -ENODEV; + if (xen_feature(XENFEAT_auto_translated_physmap)) + return -ENODEV; + err = misc_register(&gntdev_miscdev); if (err != 0) { printk(KERN_ERR "Could not register gntdev device\n"); -- 1.7.2.3 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |