[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 4/9] /dev/xenshare for accessing/mapping shared pages from userspace



To manipulate shared pages from userspace, we use a simple device.
Userspace can gain access to a share by handle, mmap it, place a
watch, trigger them, and do scatter-gather transfers.

FIXME: Should use vm_insert_page these days.

diff -r 125c7cd65739 linux-2.6-xen-sparse/drivers/xen/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/Makefile Thu Jun  1 23:24:05 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/Makefile Fri Jun  2 09:25:42 2006
@@ -8,6 +8,7 @@
 obj-y  += balloon/
 obj-y  += privcmd/
 obj-y  += xenbus/
+obj-y  += xenshare.o
 
 obj-$(CONFIG_XEN_BLKDEV_BACKEND)       += blkback/
 obj-$(CONFIG_XEN_NETDEV_BACKEND)       += netback/
diff -r 125c7cd65739 linux-2.6-xen-sparse/drivers/xen/xenshare.c
--- /dev/null   Thu Jun  1 23:24:05 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/xenshare.c       Fri Jun  2 09:25:42 2006
@@ -0,0 +1,365 @@
+/* Userspace interface for accessing share regions.
+ *
+ * Copyright 2006 Rusty Russell <rusty@xxxxxxxxxxxxxxx> IBM Corporation
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#define DEBUG
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/ioctl.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/device.h>
+#include <asm/hypervisor.h>
+#include <xen/interface/share.h>
+#include <xen/public/xenshare.h>
+#include <xen/evtchn.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/share.h>
+
+/* FIXME: %s/pr_debug(/pr_debug(/g*/
+struct share_info
+{
+       struct xen_share *share;
+
+       int out_sg_used;
+       unsigned int out_sg_pages;
+       struct page *out_sg;
+
+       /* Trigger they placed watch on (-1 == none) */
+       int watch_number;
+       int watch_result;
+
+       struct xen_share_handler handler;
+       wait_queue_head_t waiters;
+};
+
+/* FIXME: Should we register handlers as required? */
+static void share_io_handler(struct xen_share_handler *handler)
+{
+       struct share_info *info;
+
+       info = container_of(handler, struct share_info, handler);
+       pr_debug("xenshare: interrupt!\n");
+       wake_up_all(&info->waiters);
+}
+
+static int get_share(struct file *file, void __user *udata)
+{
+       struct xenshare_get_share share;
+       struct share_info *info;
+       int err;
+
+       if (copy_from_user(&share, udata, sizeof(share)) != 0)
+               return -EFAULT;
+
+       if (file->private_data)
+               return -EBUSY;
+
+       info = kmalloc(sizeof(*info), GFP_KERNEL);
+       if (!info)
+               return -ENOMEM;
+
+       info->out_sg = NULL;
+       info->watch_number = -1;
+       info->watch_result = 1;
+       init_waitqueue_head(&info->waiters);
+
+       info->share = xen_share_get(share.share_ref, share.num_pages);
+       if (IS_ERR(info->share)) {
+               err = PTR_ERR(info->share);
+               pr_debug("xenshare: get_share returned %i\n", err);
+               goto free_info;
+       }
+       info->handler.handler = share_io_handler;
+       xen_share_add_handler(info->share, &info->handler);
+       file->private_data = info;
+       return info->share->peerid;
+
+free_info:
+       kfree(info);
+       return err;
+}
+
+static int pages_to_sg(struct share_info *info,
+                      struct xen_sg sg[],
+                      unsigned long len)
+{
+       unsigned int i;
+
+       if (len > PAGE_SIZE * XEN_SG_MAX)
+               return -ENOSPC;
+
+       /* Register this length of our buffer as sg. */
+       for (i = 0; i < len/PAGE_SIZE; i++) {
+               sg[i].addr = page_to_pfn(info->out_sg + i) << PAGE_SHIFT;
+               sg[i].len = PAGE_SIZE;
+       }
+       if (len % PAGE_SIZE) {
+               sg[i].addr = page_to_pfn(info->out_sg + i) << PAGE_SHIFT;
+               sg[i].len = len % PAGE_SIZE;
+               i++;
+       }
+       return i;
+}
+
+static int send_sg(struct file *file, void __user *udata)
+{
+       struct xen_sg sg[XEN_SG_MAX];
+       struct xenshare_sg send;
+       int err;
+       struct share_info *info = file->private_data;
+
+       if (copy_from_user(&send, udata, sizeof(send)) != 0)
+               return -EFAULT;
+
+       if (!info)
+               return -EINVAL;
+
+       err = pages_to_sg(info, sg, send.len);
+       if (err >= 0)
+               err = xen_sg_xfer(info->share, send.queue, XEN_SG_OUT,
+                                 err, sg);
+       return err;
+}
+
+static int register_sg(struct file *file, void __user *udata)
+{
+       struct share_info *info = file->private_data;
+       struct xen_sg sg[XEN_SG_MAX];
+       struct xenshare_sg reg;
+       int err;
+
+       if (copy_from_user(&reg, udata, sizeof(reg)) != 0)
+               return -EFAULT;
+
+       if (!info)
+               return -EINVAL;
+
+       err = pages_to_sg(info, sg, reg.len);
+       if (err < 0)
+               return err;
+
+       info->out_sg_used = 0;
+       err = xen_sg_register(info->share, XEN_SG_IN, reg.queue,
+                             &info->out_sg_used, err, sg);
+       pr_debug("xenshare: Registered sg: %i\n", err);
+       if (err)
+               info->out_sg_used = 1;
+       return err;
+}
+
+static int watch(struct file *file, unsigned long trigger)
+{
+       struct share_info *info = file->private_data;
+       int err;
+
+       if (!info)
+               return -EINVAL;
+
+       pr_debug("xenshare: watch %li\n", trigger);
+       if (info->watch_number != -1)
+               return -EBUSY;
+
+       info->watch_number = trigger;
+       err = xen_share_watch(info->share, trigger, &info->watch_result);
+       if (err)
+               info->watch_number = -1;
+       pr_debug("xenshare: watch returned %i\n", err);
+       return err;
+}
+
+static int trigger(struct file *file, unsigned long watch_number)
+{
+       struct share_info *info = file->private_data;
+
+       if (!info)
+               return -EINVAL;
+
+       pr_debug("xenshare: trigger %li\n", watch_number);
+       return xen_share_trigger(info->share, watch_number);
+}
+
+static int xenshare_ioctl(struct inode *inode, struct file *file,
+                         unsigned int cmd, unsigned long data)
+{
+       switch (cmd) {
+       case IOCTL_XENSHARE_GET_SHARE:
+               return get_share(file, (void __user *)data);
+       case IOCTL_XENSHARE_SG_SEND:
+               return send_sg(file, (void __user *)data);
+       case IOCTL_XENSHARE_SG_REGISTER:
+               return register_sg(file, (void __user *)data);
+       case IOCTL_XENSHARE_WATCH:
+               return watch(file, data);
+       case IOCTL_XENSHARE_TRIGGER:
+               return trigger(file, data);
+       default:
+               return -ENOTTY;
+       }
+}
+
+/* In 2.6.12, this is how you map a kernel page.  Later, use vm_insert_page. */
+static struct page *share_nopage(struct vm_area_struct *vma,
+                                unsigned long vaddr, int *type)
+{
+       unsigned int pagenum = (vaddr - vma->vm_start)/PAGE_SIZE;
+        if (vaddr > vma->vm_end)
+               return NOPAGE_SIGBUS;
+       if (type)
+               *type = VM_FAULT_MINOR;
+       return (struct page *)vma->vm_private_data + pagenum;
+}
+
+static struct vm_operations_struct xenshare_vm_ops =
+{
+       .nopage   = share_nopage,
+};
+
+static void map_pages(struct vm_area_struct *vma, struct page *page)
+{
+       vma->vm_ops = &xenshare_vm_ops;
+       vma->vm_flags |= VM_DONTEXPAND | VM_RESERVED;
+       vma->vm_flags &= ~VM_IO; /* using shared anonymous pages */
+       vma->vm_private_data = page;
+}
+
+static int create_and_map_sg(struct share_info *info,
+                            struct vm_area_struct *vma)
+{
+       unsigned long pages = (vma->vm_end - vma->vm_start) / PAGE_SIZE;
+
+       if (!info->out_sg) {
+               if (pages > XEN_SG_MAX)
+                       return -ENOSPC;
+               info->out_sg = alloc_pages(GFP_KERNEL, fls(pages-1));
+               if (!info->out_sg) {
+                       printk("Could not allocate %i pages\n",
+                              1<<fls(pages-1));
+                       return -ENOMEM;
+               }
+               info->out_sg_pages = pages;
+               /* We set this to 0 when registered with hypervisor. */
+               info->out_sg_used = 1;
+       }
+
+       /* Can't map more than we have. */
+       if (pages > info->out_sg_pages)
+               return -ENOSPC;
+
+       map_pages(vma, info->out_sg);
+       return 0;
+}
+
+static int xenshare_mmap(struct file *file, struct vm_area_struct *vma)
+{
+       struct share_info *info = file->private_data;
+
+       if (!info) {
+               pr_debug("mmap before get_share file %p\n", file);
+               return -EINVAL;
+       }
+
+       if (vma->vm_pgoff == 0)
+               return create_and_map_sg(info, vma);
+       if (vma->vm_pgoff == XENSHARE_MAP_SHARE_PAGE)
+               return xen_share_map(info->share, vma);
+       pr_debug("Unknown mmap offset %li\n", vma->vm_pgoff);
+       return -EINVAL;
+}
+
+/* Read means wait for sg to be used / watch to be fired. */
+static ssize_t read(struct file *file, char __user *ubuf, size_t size,
+                   loff_t *off)
+{
+       int err;
+       struct share_info *info = file->private_data;
+
+       if (!info)
+               return -EINVAL;
+       if (size != sizeof(info->out_sg_used))
+               return -EINVAL;
+
+       err = wait_event_interruptible(info->waiters,
+                                      info->out_sg_used || 
!info->watch_result);
+       if (err)
+               return err;
+
+       /* 0 or negative indicates the watch fired. */
+       if (info->watch_result <= 0) {
+               int watch = -info->watch_number;
+               info->watch_result = 1;
+               pr_debug("Watch number %i\n", info->watch_number);
+               if (copy_to_user(ubuf, &watch, 4) != 0)
+                       return -EFAULT;
+       } else {
+               pr_debug("sg_used %i\n", info->out_sg_used);
+               if (copy_to_user(ubuf, &info->out_sg_used, 4) != 0)
+                       return -EFAULT;
+       }
+       return size;
+}
+
+/* Free up allocated evtchn port and drop share */
+static int xenshare_release(struct inode *inode, struct file *file)
+{
+       struct share_info *info = file->private_data;
+
+       /* If private_data isn't allocated we we're opened and closed
+        * without doing anything interesting */
+       if (!info)
+               return 0;
+
+       /* Unregister sg before drop */
+       if (info->out_sg)
+               xen_sg_unregister(info->share,
+                                 page_to_pfn(info->out_sg) << PAGE_SHIFT);
+
+       if (info->watch_number != -1)
+               xen_share_unwatch(info->share, info->watch_number);
+
+       xen_share_remove_handler(info->share, &info->handler);
+       xen_share_put(info->share);
+       kfree(info);
+       file->private_data = NULL;
+
+       return 0;
+}
+
+static struct file_operations xenshare_file_ops = {
+       .ioctl = xenshare_ioctl,
+       .mmap = xenshare_mmap,
+       .read = read,
+       .release = xenshare_release,
+};
+
+static int init(void)
+{
+       struct class *xen_class;
+       int err;
+
+       err = register_chrdev(0, "xenshare", &xenshare_file_ops);
+       if (err < 0)
+               return err;
+
+       xen_class = class_create(THIS_MODULE, "xen");
+       /* FIXME: save struct class_device * */
+       (void*)class_device_create(xen_class, NULL, MKDEV(err, 0), NULL, 
"xenshare");
+       return 0;
+}
+module_init(init);
+MODULE_LICENSE("GPL");
diff -r 125c7cd65739 linux-2.6-xen-sparse/include/xen/public/xenshare.h
--- /dev/null   Thu Jun  1 23:24:05 2006
+++ b/linux-2.6-xen-sparse/include/xen/public/xenshare.h        Fri Jun  2 
09:25:42 2006
@@ -0,0 +1,77 @@
+/******************************************************************************
+ * xenshare.h
+ * 
+ * Interface to /dev/xenshare.
+ * 
+ * Copyright 2006 Rusty Russell <rusty@xxxxxxxxxxxxxxx> IBM Corporation
+ * 
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#ifndef __LINUX_XENSHARE_H__
+#define __LINUX_XENSHARE_H__
+
+/* Device is used as follows:
+ * (1) IOCTL_XENSHARE_GET_SHARE is called to get the share.  Then you can
+ *     mmap at page XENSHARE_MAP_SHARE_PAGE to access it.
+ * (2) mmap at 0 creates a scatter-gather list.
+ * (3) Writing a 4-byte length to the fd registers it with the share.
+ * (4) Reading the fd blocks until the sg is filled.  The 4-byte
+ *     length is returned.
+ * (5) IOCTL_XENSHARE_SG_SEND is called to send an sg.
+ */
+
+#define XENSHARE_MAP_SHARE_PAGE 0x1000
+
+struct xenshare_get_share
+{
+       unsigned long share_ref;
+       unsigned int num_pages;
+};
+
+struct xenshare_sg
+{
+       unsigned long len;
+       uint32_t queue;
+};
+
+/* After this, you can mmap XENSHARE_MAP_PAGE to access share. 
+ * Returns peerid. */
+#define IOCTL_XENSHARE_GET_SHARE                       \
+       _IOR('P', 100, struct xenshare_get_share)
+
+/* Transfers the xenshare_sg. */
+#define IOCTL_XENSHARE_SG_SEND                                 \
+       _IOR('P', 101, struct xenshare_sg)
+
+/* Registers the xenshare_sg */
+#define IOCTL_XENSHARE_SG_REGISTER                             \
+       _IOR('P', 102, struct xenshare_sg)
+
+/* Registers a watch (currently only 1): read returns -triggernum */
+#define IOCTL_XENSHARE_WATCH                           \
+       _IO('P', 103)
+
+/* Triggers a watch: returns same a hypercall */
+#define IOCTL_XENSHARE_TRIGGER                         \
+       _IO('P', 104)
+
+#endif /* __LINUX_XENSHARE_H__ */

-- 
 ccontrol: http://ccontrol.ozlabs.org


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.