[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 1/3] net: introduce new socket support: xensock



Those sockets will be used for the xen-sock frontend/backend
drivers. Those drivers will allow to connect via xensock
sockets (in this case dom0/domD sockets can be used for the
server application and domU sockets can be used for the
client application).

Signed-off-by: Oleksandr Dmytryshyn <oleksandr.dmytryshyn@xxxxxxxxxxxxxxx>
---
 drivers/net/Kconfig                 |   4 +
 drivers/net/Makefile                |   2 +
 drivers/net/xensock/Makefile        |   5 +
 drivers/net/xensock/xensock-dev.c   | 269 +++++++++++++
 drivers/net/xensock/xensock-proto.c | 767 ++++++++++++++++++++++++++++++++++++
 include/linux/socket.h              |   4 +-
 include/net/af_xensock.h            |  46 +++
 include/net/xensock.h               | 130 ++++++
 net/core/sock.c                     |   9 +-
 9 files changed, 1232 insertions(+), 4 deletions(-)
 create mode 100644 drivers/net/xensock/Makefile
 create mode 100644 drivers/net/xensock/xensock-dev.c
 create mode 100644 drivers/net/xensock/xensock-proto.c
 create mode 100644 include/net/af_xensock.h
 create mode 100644 include/net/xensock.h

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 89402c3..420981a 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -348,6 +348,10 @@ config XEN_NETDEV_BACKEND
          compile this driver as a module, chose M here: the module
          will be called xen-netback.
 
+config XEN_SOCKDEV_PROTO
+       bool
+       default n
+
 config VMXNET3
        tristate "VMware VMXNET3 ethernet driver"
        depends on PCI && INET
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 3fef8a8..43bf910 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -61,6 +61,8 @@ obj-$(CONFIG_VMXNET3) += vmxnet3/
 obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o
 obj-$(CONFIG_XEN_NETDEV_BACKEND) += xen-netback/
 
+obj-$(CONFIG_XEN_SOCKDEV_PROTO) += xensock/
+
 obj-$(CONFIG_USB_CATC)          += usb/
 obj-$(CONFIG_USB_KAWETH)        += usb/
 obj-$(CONFIG_USB_PEGASUS)       += usb/
diff --git a/drivers/net/xensock/Makefile b/drivers/net/xensock/Makefile
new file mode 100644
index 0000000..d70db09
--- /dev/null
+++ b/drivers/net/xensock/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the xensock driver
+#
+
+obj-$(CONFIG_XEN_SOCKDEV_PROTO) := xensock-proto.o xensock-dev.o
diff --git a/drivers/net/xensock/xensock-dev.c 
b/drivers/net/xensock/xensock-dev.c
new file mode 100644
index 0000000..6da8f34
--- /dev/null
+++ b/drivers/net/xensock/xensock-dev.c
@@ -0,0 +1,269 @@
+/*
+ * Xen socket dev driver.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/socket.h>
+#include <linux/skbuff.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+
+#include <net/af_xensock.h>
+
+LIST_HEAD(xensock_list);
+DEFINE_RWLOCK(xensock_list_lock);
+DECLARE_WAIT_QUEUE_HEAD(xensock_accept_wait);
+
+static inline void xensock_lock_dev(struct xen_sock_dev *dev)
+{
+       spin_lock(&dev->lock);
+}
+
+static inline void xensock_unlock_dev(struct xen_sock_dev *dev)
+{
+       spin_unlock(&dev->lock);
+}
+
+struct xen_sock_dev *alloc_xen_sock_dev(int sizeof_priv, const char *name)
+{
+       struct xen_sock_dev *dev;
+       int alloc_size;
+       struct xen_sock_dev *p;
+
+       BUG_ON(strlen(name) >= sizeof(dev->name));
+
+       alloc_size = sizeof(struct xen_sock_dev);
+
+       if (sizeof_priv) {
+               alloc_size = ALIGN(sizeof(struct xen_sock_dev), SOCKDEV_ALIGN);
+               alloc_size += sizeof_priv;
+       }
+
+       /* ensure 32-byte alignment of whole construct */
+       alloc_size += SOCKDEV_ALIGN - 1;
+
+       p = kzalloc(alloc_size, GFP_KERNEL);
+       if (!p) {
+               pr_err("alloc_xen_sock_dev: Unable to allocate device\n");
+               return NULL;
+       }
+
+       dev = PTR_ALIGN(p, SOCKDEV_ALIGN);
+       dev->padded = (char *)dev - (char *)p;
+
+       strcpy(dev->name, name);
+       spin_lock_init(&dev->lock);
+       sockif_carrier_off(dev);
+       sockif_stop_queue(dev);
+
+       return dev;
+}
+EXPORT_SYMBOL(alloc_xen_sock_dev);
+
+void free_xen_sock_dev(struct xen_sock_dev *dev)
+{
+       if (!dev)
+               return;
+
+       kfree((char *)dev - dev->padded);
+}
+EXPORT_SYMBOL(free_xen_sock_dev);
+
+
+int xensock_register_dev(struct xen_sock_dev *dev)
+{
+       write_lock_bh(&xensock_list_lock);
+       list_add_tail(&dev->list, &xensock_list);
+       write_unlock_bh(&xensock_list_lock);
+       wake_up_interruptible(&xensock_accept_wait);
+
+       return 0;
+}
+EXPORT_SYMBOL(xensock_register_dev);
+
+static void __xensock_dev_unlink_sk(struct xen_sock_dev *dev);
+
+void  xensock_unregister_dev(struct xen_sock_dev *dev)
+{
+       write_lock_bh(&xensock_list_lock);
+       xensock_lock_dev(dev);
+       __xensock_dev_unlink_sk(dev);
+       list_del(&dev->list);
+       xensock_unlock_dev(dev);
+       write_unlock_bh(&xensock_list_lock);
+}
+EXPORT_SYMBOL(xensock_unregister_dev);
+
+static int __xensock_dev_link_sk(struct xen_sock_dev *dev, struct sock *sk)
+{
+       sock_hold(sk);
+       dev->sk = sk;
+       xen_sk(sk)->dev = dev;
+
+       return 0;
+}
+
+static void __xensock_dev_unlink_sk(struct xen_sock_dev *dev)
+{
+       struct sock *sk = dev->sk;
+       struct socket *sock;
+
+       if (sk) {
+               sock = sk->sk_socket;
+               sock->state = SS_UNCONNECTED;
+
+               xen_sk(sk)->dev = NULL;
+               dev->sk = NULL;
+               sk->sk_err = ENOTCONN;
+               sk->sk_state_change(sk);
+               sock_put(sk);
+       }
+}
+
+void xensock_dev_unlink_sk(struct xen_sock_dev *dev)
+{
+       write_lock_bh(&xensock_list_lock);
+       xensock_lock_dev(dev);
+       __xensock_dev_unlink_sk(dev);
+       xensock_unlock_dev(dev);
+       write_unlock_bh(&xensock_list_lock);
+       wake_up_interruptible(&xensock_accept_wait);
+}
+
+void xensock_unlink_all_dev_sk(void)
+{
+       struct xen_sock_dev *ldev;
+
+       write_lock_bh(&xensock_list_lock);
+       list_for_each_entry(ldev, &xensock_list, list) {
+               xensock_lock_dev(ldev);
+               __xensock_dev_unlink_sk(ldev);
+               xensock_unlock_dev(ldev);
+       }
+       write_unlock_bh(&xensock_list_lock);
+}
+
+int xensock_dev_wait(struct sock *sk, struct sock *nsk)
+{
+       int rc = 0;
+       long timeout = sk->sk_rcvtimeo;
+       struct xen_sock_dev *ldev;
+       bool dev_found;
+
+       DECLARE_WAITQUEUE(wait, current);
+
+       add_wait_queue_exclusive(&xensock_accept_wait, &wait);
+       for (;;) {
+               __set_current_state(TASK_INTERRUPTIBLE);
+               if (sk->sk_shutdown & RCV_SHUTDOWN)
+                       break;
+               rc = -ERESTARTSYS;
+               if (signal_pending(current))
+                       break;
+               rc = -EAGAIN;
+               if (!timeout)
+                       break;
+               rc = 0;
+               dev_found = false;
+               read_lock_bh(&xensock_list_lock);
+               list_for_each_entry(ldev, &xensock_list, list) {
+                       xensock_lock_dev(ldev);
+                       if (ldev->sk == NULL) {
+                               __xensock_dev_link_sk(ldev, nsk);
+                               xensock_unlock_dev(ldev);
+                               dev_found = true;
+                               break;
+                       }
+                       xensock_unlock_dev(ldev);
+               }
+               read_unlock_bh(&xensock_list_lock);
+               if (dev_found)
+                       break;
+
+               release_sock(sk);
+               timeout = schedule_timeout(timeout);
+               lock_sock(sk);
+       }
+       __set_current_state(TASK_RUNNING);
+       remove_wait_queue(&xensock_accept_wait, &wait);
+
+       return rc;
+}
+
+/* xensock_dev_send - transmit a xensock frame
+ * @skb: pointer to socket buffer with xensock frame in data section
+ */
+int xensock_dev_send(struct sk_buff *skb, struct xen_sock_dev *dev)
+{
+       int err = -EINVAL;
+
+       /* Make sure the xensock frame can pass the selected xensock device */
+       if (unlikely(skb->len > XENSOCK_MTU)) {
+               err = -EMSGSIZE;
+               goto inval_skb;
+       }
+
+       if (unlikely(!dev->start_xmit)) {
+               err = -EIO;
+               goto inval_skb;
+       }
+
+       if (sockif_queue_stopped(dev)) {
+               err = -ENOBUFS;
+               goto inval_skb;
+       }
+
+       err = dev->start_xmit(skb, dev);
+       if (err)
+               goto inval_skb;
+
+       return 0;
+
+inval_skb:
+       kfree_skb(skb);
+       return err;
+}
+
+int xensock_dev_queue_rx_skb(struct sk_buff *skb, struct xen_sock_dev *dev)
+{
+       int rc = -ENETDOWN;
+       struct sock *sk = dev->sk;
+
+       xensock_lock_dev(dev);
+       if (sk == NULL)
+               goto out;
+
+       rc = 0;
+       skb_queue_tail(&sk->sk_receive_queue, skb);
+       if (!sock_flag(sk, SOCK_DEAD))
+               sk->sk_data_ready(sk, skb->len);
+out:
+       xensock_unlock_dev(dev);
+       return rc;
+}
diff --git a/drivers/net/xensock/xensock-proto.c 
b/drivers/net/xensock/xensock-proto.c
new file mode 100644
index 0000000..d05e5d5
--- /dev/null
+++ b/drivers/net/xensock/xensock-proto.c
@@ -0,0 +1,767 @@
+/*
+ * Xen socket protocol driver.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/socket.h>
+#include <linux/skbuff.h>
+#include <linux/if_arp.h>
+#include <linux/highmem.h>
+
+#include <net/tcp_states.h>
+#include <net/af_xensock.h>
+
+#define XENSOCK_DISCONNECT             BIT(0)
+
+struct xensock_skb_cb {
+       unsigned int flags_checked;
+};
+
+/* Return pointer to store the extra msg flags for xensock_proto_recvmsg().
+ * We use the space of one unsigned int beyond the 'int' size
+ * in skb->cb. Xensock frontend and backend drivers are using this
+ * first 'int' part of the skb->cb.
+ */
+static inline struct xensock_skb_cb *xensock_proto_cb(struct sk_buff *skb)
+{
+       BUILD_BUG_ON(sizeof(skb->cb) <= (sizeof(int) +
+                                        sizeof(struct xensock_skb_cb)));
+
+       /* return pointer after 'int' size */
+       return (struct xensock_skb_cb *)(&((int *)skb->cb)[1]);
+}
+
+static int xensock_prot_init(struct sock *sk)
+{
+       return 0;
+}
+
+static struct proto xensock_proto __read_mostly = {
+       .name           = "XENSOCK",
+       .owner          = THIS_MODULE,
+       .obj_size       = sizeof(struct xen_sock),
+       .init           = xensock_prot_init,
+};
+
+struct sock *xen_allocate_sock(struct net *net)
+{
+       struct xen_sock *xsk;
+       struct sock *sk;
+
+       sk = sk_alloc(net, PF_XENSOCK, GFP_KERNEL, &xensock_proto);
+
+       if (!sk)
+               goto out;
+
+       sock_init_data(NULL, sk);
+
+       xsk = xen_sk(sk);
+       xsk->dev = NULL;
+out:
+       return sk;
+}
+
+static struct sock *xensock_make_new(struct sock *osk)
+{
+       struct sock *sk = NULL;
+
+       if (osk->sk_type != SOCK_RAW)
+               goto out;
+
+       sk = xen_allocate_sock(sock_net(osk));
+       if (sk == NULL)
+               goto out;
+
+       sk->sk_type        = osk->sk_type;
+       sk->sk_priority    = osk->sk_priority;
+       sk->sk_protocol    = osk->sk_protocol;
+       sk->sk_rcvbuf      = osk->sk_rcvbuf;
+       sk->sk_sndbuf      = osk->sk_sndbuf;
+       sk->sk_state       = TCP_ESTABLISHED;
+       sk->sk_backlog_rcv = osk->sk_backlog_rcv;
+       sock_copy_flags(sk, osk);
+
+out:
+       return sk;
+}
+
+static int xensock_prot_connect(struct socket *sock, struct sockaddr *uaddr,
+                               int addr_len, int flags)
+{
+       struct sock *sk;
+       int rc;
+
+       if (sock->state == SS_CONNECTED)
+               return 0;
+
+       sk = sock->sk;
+
+       lock_sock(sk);
+       rc = xensock_dev_wait(sk, sk);
+
+       if (!rc)
+               sock->state = SS_CONNECTED;
+
+       release_sock(sk);
+
+       return rc;
+}
+
+static int xensock_prot_accept(struct socket *sock, struct socket *newsock,
+                              int flags)
+{
+       struct sock *nsk, *sk = sock->sk;
+       int rc = -EINVAL;
+
+       if (!sk)
+               goto out;
+
+       rc = -EOPNOTSUPP;
+       if (sk->sk_type != SOCK_RAW)
+               goto out;
+
+       lock_sock(sk);
+       rc = -EINVAL;
+       if (sk->sk_state != TCP_LISTEN)
+               goto err_release_sk;
+
+       rc = -ENOMEM;
+       nsk = xensock_make_new(sk);
+       if (!nsk)
+               goto err_release_sk;
+
+       rc = xensock_dev_wait(sk, nsk);
+       if (rc)
+               goto err_remove_nsk;
+
+       sock_graft(nsk, newsock);
+
+       /* Now attach up the new socket */
+       sk->sk_ack_backlog--;
+       newsock->state = SS_CONNECTED;
+       rc = 0;
+err_release_sk:
+       release_sock(sk);
+out:
+       return rc;
+
+err_remove_nsk:
+       sock_orphan(nsk);
+       sock_put(nsk);
+       goto err_release_sk;
+}
+
+static int xensock_prot_listen(struct socket *sock, int backlog)
+{
+       struct sock *sk = sock->sk;
+       int rc = -EOPNOTSUPP;
+
+       lock_sock(sk);
+       /* All created sockets (in .accept callback) will have a non
+        * NULL pointer and listen operation for them is prohibited.
+        */
+       if (xen_sk(sk)->dev)
+               goto out;
+
+       if (sk->sk_state != TCP_LISTEN) {
+               sk->sk_max_ack_backlog = backlog;
+               sk->sk_state           = TCP_LISTEN;
+               rc = 0;
+       }
+out:
+       release_sock(sk);
+
+       return rc;
+}
+
+static int xensock_prot_release(struct socket *sock)
+{
+       struct sock *sk = sock->sk;
+       struct xen_sock_dev *dev;
+       struct sk_buff *skb;
+       unsigned char xflag;
+       int err;
+
+       if (!sk)
+               return 0;
+
+       lock_sock(sk);
+       dev = xen_sk(sk)->dev;
+
+       if (dev) {
+               release_sock(sk);
+               skb = sock_alloc_send_skb(sk, 1, 0, &err);
+               lock_sock(sk);
+               if (!skb)
+                       goto skip_send_no_con;
+
+               xflag = XENSOCK_DISCONNECT;
+               memcpy(skb_put(skb, 1), &xflag, 1);
+               skb->dev = NULL;
+               skb->sk  = sk;
+
+               err = xensock_dev_send(skb, dev);
+
+               if (err)
+                       kfree_skb(skb);
+
+skip_send_no_con:
+               xensock_dev_unlink_sk(dev);
+       }
+
+       /* Flush the recv buffs */
+       while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL)
+               __kfree_skb(skb);
+
+       sock_orphan(sk);
+       sock->sk = NULL;
+
+       release_sock(sk);
+       sock_put(sk);
+
+       return 0;
+}
+
+int xensock_prot_ioctl(struct socket *sock, unsigned int cmd, unsigned long 
arg)
+{
+       struct sock *sk = sock->sk;
+
+       switch (cmd) {
+       case SIOCGSTAMP:
+               return sock_get_timestamp(sk, (struct timeval __user *)arg);
+
+       default:
+               return -ENOIOCTLCMD;
+       }
+}
+
+static int xensock_prot_sendmsg(struct kiocb *iocb, struct socket *sock,
+                               struct msghdr *msg, size_t size)
+{
+       struct sock *sk = sock->sk;
+       struct xen_sock_dev *dev;
+       struct sk_buff *skb;
+       int err;
+       unsigned char xflag;
+       size_t sent = 0;
+       unsigned int header_len, data_len;
+       unsigned int chunk;
+
+       if (msg->msg_flags & MSG_OOB)
+               return -EOPNOTSUPP;
+
+       lock_sock(sk);
+
+       err = sock_error(sk);
+       if (err)
+               goto out;
+
+       err = -ENOTCONN;
+
+       dev = xen_sk(sk)->dev;
+       if (!dev)
+               goto out;
+
+       /* Another side has closed connection */
+       if (sock->state == SS_DISCONNECTING) {
+               err = size;
+               goto out;
+       }
+
+       if (sock->state != SS_CONNECTED)
+               goto out;
+
+       if (size == 0) {
+               err = 0;
+               goto out;
+       }
+
+       do {
+               chunk = size + 1;
+
+               if (chunk > XENSOCK_MTU)
+                       chunk = XENSOCK_MTU;
+
+               if (chunk <= PAGE_SIZE) {
+                       header_len = chunk;
+                       data_len = 0;
+               } else {
+                       header_len = PAGE_SIZE;
+                       data_len = chunk - PAGE_SIZE;
+               }
+
+               release_sock(sk);
+               skb = sock_alloc_send_pskb(sk, header_len, data_len,
+                                          msg->msg_flags & MSG_DONTWAIT,
+                                          &err);
+               lock_sock(sk);
+               if (!skb)
+                       goto out;
+
+               xflag = 0;
+               memcpy(skb_tail_pointer(skb), &xflag, 1);
+
+               skb->data_len = data_len;
+               skb->len = chunk;
+
+               err = skb_copy_datagram_from_iovec(skb, 1, msg->msg_iov,
+                                                  sent, chunk - 1);
+               if (err < 0)
+                       goto free_skb;
+
+               /* move pointers in the skb */
+               skb->tail += header_len;
+
+               err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
+               if (err < 0)
+                       goto free_skb;
+
+               skb->dev = NULL;
+               skb->sk  = sk;
+
+               err = xensock_dev_send(skb, dev);
+
+               if (err)
+                       goto free_skb;
+
+               /* First byte of the SKB data is the xensock flags */
+               chunk--;
+
+               sent += chunk;
+               size -= chunk;
+       } while (size);
+
+       err = sent;
+out:
+       release_sock(sk);
+       return err;
+
+free_skb:
+       kfree_skb(skb);
+       goto out;
+}
+
+static long xen_sock_data_wait(struct sock *sk, long timeo)
+{
+       DECLARE_WAITQUEUE(wait, current);
+
+       add_wait_queue(sk_sleep(sk), &wait);
+       for (;;) {
+               set_current_state(TASK_INTERRUPTIBLE);
+
+               if (!skb_queue_empty(&sk->sk_receive_queue))
+                       break;
+
+               if (sk->sk_err || (sk->sk_shutdown & RCV_SHUTDOWN))
+                       break;
+
+               if (signal_pending(current) || !timeo)
+                       break;
+
+               set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+               release_sock(sk);
+               timeo = schedule_timeout(timeo);
+               lock_sock(sk);
+               clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+       }
+
+       __set_current_state(TASK_RUNNING);
+       remove_wait_queue(sk_sleep(sk), &wait);
+       return timeo;
+}
+
+static unsigned char get_skb_xflags(struct sk_buff *skb)
+{
+       unsigned char ret;
+       int i;
+       int nr_frags;
+       int skb_len = skb_headlen(skb);
+
+       if (1 <= skb_len) {
+               ret = skb->data[0];
+               __skb_pull(skb, 1);
+               return ret;
+       } else {
+               struct sk_buff *frag1;
+
+               skb_walk_frags(skb, frag1) {
+                       if (frag1->len) {
+                               skb->len -= 1;
+                               skb->data_len -= 1;
+                               ret = frag1->data[0];
+                               __skb_pull(frag1, 1);
+                               return ret;
+                       }
+               }
+
+               nr_frags = skb_shinfo(skb)->nr_frags;
+               for (i = 0; i < nr_frags; i++) {
+                       skb_frag_t *frag2;
+
+                       frag2 = &skb_shinfo(skb)->frags[i];
+                       if (skb_frag_size(frag2)) {
+                               struct page *page = skb_frag_page(frag2);
+                               u8  *vaddr;
+
+                               vaddr = kmap(page);
+                               ret = vaddr[frag2->page_offset];
+                               kunmap(page);
+                               skb->len -= 1;
+                               skb->data_len -= 1;
+                               skb_frag_size_sub(frag2, 1);
+                               frag2->page_offset += 1;
+                               return ret;
+                       }
+               }
+       }
+       return 0;
+}
+
+static int xensock_prot_recvmsg(struct kiocb *iocb, struct socket *sock,
+                               struct msghdr *msg, size_t size, int flags)
+{
+       struct sock *sk = sock->sk;
+       int err = 0;
+       size_t target, copied = 0;
+       long timeo;
+       int i;
+       struct sk_buff *skb;
+       int nr_frags;
+       unsigned char xflags;
+       struct xensock_skb_cb *scb;
+       struct xen_sock_dev *dev = xen_sk(sk)->dev;
+
+       if (flags & MSG_OOB)
+               return -EOPNOTSUPP;
+
+       msg->msg_namelen = 0;
+
+       lock_sock(sk);
+
+       if (!dev) {
+               copied = -ENOTCONN;
+               goto out;
+       }
+
+       if (sock->state == SS_DISCONNECTING)
+               goto out_disconnecting_state;
+
+       target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
+       timeo  = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+
+       do {
+               int chunk;
+
+               skb = skb_dequeue(&sk->sk_receive_queue);
+               if (!skb) {
+                       if (copied >= target)
+                               break;
+
+                       err = sock_error(sk);
+                       if (err)
+                               break;
+                       if (sk->sk_shutdown & RCV_SHUTDOWN)
+                               break;
+
+                       err = -EAGAIN;
+                       if (!timeo)
+                               break;
+
+                       timeo = xen_sock_data_wait(sk, timeo);
+
+                       if (signal_pending(current)) {
+                               err = sock_intr_errno(timeo);
+                               goto out;
+                       }
+                       continue;
+               }
+
+               scb = xensock_proto_cb(skb);
+
+               if (!scb->flags_checked) {
+                       /* First byte of the SKB data is the xensock flags */
+                       if (skb->len < 1) {
+                               copied = -EFAULT;
+                               break;
+                       }
+
+                       scb->flags_checked = 1;
+
+                       xflags = get_skb_xflags(skb);
+                       if (xflags & XENSOCK_DISCONNECT) {
+                               sock->state = SS_DISCONNECTING;
+                               xensock_dev_unlink_sk(dev);
+                               goto out_disconnecting_state;
+                       }
+               }
+
+               chunk = min_t(unsigned int, skb->len, size);
+               if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, chunk)) {
+                       skb_queue_head(&sk->sk_receive_queue, skb);
+                       if (!copied)
+                               copied = -EFAULT;
+                       break;
+               }
+               copied += chunk;
+               size   -= chunk;
+
+               sock_recv_ts_and_drops(msg, sk, skb);
+
+               if (!(flags & MSG_PEEK)) {
+                       int skb_len = skb_headlen(skb);
+
+                       if (chunk <= skb_len) {
+                               __skb_pull(skb, chunk);
+                       } else {
+                               struct sk_buff *frag1;
+
+                               __skb_pull(skb, skb_len);
+                               chunk -= skb_len;
+
+                               skb_walk_frags(skb, frag1) {
+                                       if (chunk <= frag1->len) {
+                                               /* Pulling partial data */
+                                               skb->len -= chunk;
+                                               skb->data_len -= chunk;
+                                               __skb_pull(frag1, chunk);
+                                               break;
+                                       } else if (frag1->len) {
+                                               /* Pulling all frag data */
+                                               chunk -= frag1->len;
+                                               skb->len -= frag1->len;
+                                               skb->data_len -= frag1->len;
+                                               __skb_pull(frag1, frag1->len);
+                                       }
+                               }
+
+                               nr_frags = skb_shinfo(skb)->nr_frags;
+                               for (i = 0; i < nr_frags; i++) {
+                                       skb_frag_t *frag2;
+                                       unsigned int frag_size;
+
+                                       frag2 = &skb_shinfo(skb)->frags[i];
+                                       frag_size = skb_frag_size(frag2);
+
+                                       if (chunk <= frag_size) {
+                                               /* Pulling partial data */
+                                               skb->len -= chunk;
+                                               skb->data_len -= chunk;
+                                               skb_frag_size_sub(frag2, chunk);
+                                               frag2->page_offset += chunk;
+                                               break;
+                                       } else if (frag_size) {
+                                               /* Pulling all frag data */
+                                               chunk -= frag_size;
+                                               skb->len -= frag_size;
+                                               skb->data_len -= frag_size;
+                                               skb_frag_size_set(frag2, 0);
+                                       }
+                               }
+                       }
+
+                       if (skb->len) {
+                               skb_queue_head(&sk->sk_receive_queue, skb);
+                               break;
+                       }
+                       kfree_skb(skb);
+
+               } else {
+                       /* put message back and return */
+                       skb_queue_head(&sk->sk_receive_queue, skb);
+                       break;
+               }
+       } while (size);
+out:
+       release_sock(sk);
+       return copied ? : err;
+
+out_disconnecting_state:
+       /* Flush the recv buffs */
+       while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL)
+               __kfree_skb(skb);
+
+       err = 0;
+       copied = 0;
+       goto out;
+}
+
+static const struct proto_ops xen_sock_server_ops = {
+       .family        = PF_XENSOCK,
+       .release       = xensock_prot_release,
+       .bind          = sock_no_bind,
+       .connect       = sock_no_connect,
+       .socketpair    = sock_no_socketpair,
+       .accept        = xensock_prot_accept,
+       .getname       = sock_no_getname,
+       .poll          = sock_no_poll,
+       .ioctl         = xensock_prot_ioctl,
+       .listen        = xensock_prot_listen,
+       .shutdown      = sock_no_shutdown,
+       .setsockopt    = sock_no_setsockopt,
+       .getsockopt    = sock_no_getsockopt,
+       .sendmsg       = xensock_prot_sendmsg,
+       .recvmsg       = xensock_prot_recvmsg,
+       .mmap          = sock_no_mmap,
+       .sendpage      = sock_no_sendpage,
+};
+
+static const struct proto_ops xen_sock_client_ops = {
+       .family        = PF_XENSOCK,
+       .release       = xensock_prot_release,
+       .bind          = sock_no_bind,
+       .connect       = xensock_prot_connect,
+       .socketpair    = sock_no_socketpair,
+       .accept        = sock_no_accept,
+       .getname       = sock_no_getname,
+       .poll          = sock_no_poll,
+       .ioctl         = xensock_prot_ioctl,
+       .listen        = sock_no_listen,
+       .shutdown      = sock_no_shutdown,
+       .setsockopt    = sock_no_setsockopt,
+       .getsockopt    = sock_no_getsockopt,
+       .sendmsg       = xensock_prot_sendmsg,
+       .recvmsg       = xensock_prot_recvmsg,
+       .mmap          = sock_no_mmap,
+       .sendpage      = sock_no_sendpage,
+};
+
+static void xensock_destruct(struct sock *sk)
+{
+}
+
+static int xensock_create(struct net *net, struct socket *sock, int proto,
+                         int kern, const struct proto_ops *xen_proto_ops)
+{
+       struct sock *sk;
+       int err = 0;
+
+       if (sock->type != SOCK_RAW)
+               return -ESOCKTNOSUPPORT;
+       if (proto != 0)
+               return -EPROTONOSUPPORT;
+
+       sk = xen_allocate_sock(net);
+       if (!sk)
+               return -ENOMEM;
+
+       sock->state = SS_UNCONNECTED;
+       sock->ops = xen_proto_ops;
+       sock_init_data(sock, sk);
+
+       sk->sk_destruct = xensock_destruct;
+       sk->sk_protocol = proto;
+
+       xen_sk(sk)->dev = NULL;
+
+       if (sk->sk_prot->init)
+               err = sk->sk_prot->init(sk);
+
+       if (err) {
+               /* release sk on errors */
+               sock_orphan(sk);
+               sock_put(sk);
+       }
+
+       return err;
+}
+
+static int xensock_server_create(struct net *net, struct socket *sock,
+                                int proto, int kern)
+{
+       return xensock_create(net, sock, proto, kern, &xen_sock_server_ops);
+}
+
+static int xensock_client_create(struct net *net, struct socket *sock,
+                                int proto, int kern)
+{
+       return xensock_create(net, sock, proto, kern, &xen_sock_client_ops);
+}
+
+static const struct net_proto_family xensock_server_family_ops = {
+       .family = PF_XENSOCK,
+       .create = xensock_server_create,
+       .owner  = THIS_MODULE,
+};
+
+static const struct net_proto_family xensock_client_family_ops = {
+       .family = PF_XENSOCK,
+       .create = xensock_client_create,
+       .owner  = THIS_MODULE,
+};
+
+static int xensock_proto_init(bool is_server_ops)
+{
+       int ret;
+
+       ret = proto_register(&xensock_proto, 0);
+       if (ret) {
+               pr_err("proto_register failed: %d\n", ret);
+               return ret;
+       }
+
+       if (is_server_ops)
+               ret = sock_register(&xensock_server_family_ops);
+       else
+               ret = sock_register(&xensock_client_family_ops);
+
+       if (ret) {
+               pr_err("sock_register failed: %d\n", ret);
+               goto proto_unreg;
+       }
+
+       return 0;
+
+proto_unreg:
+       proto_unregister(&xensock_proto);
+       return ret;
+}
+
+int xensock_proto_server_init(void)
+{
+       return xensock_proto_init(true);
+}
+EXPORT_SYMBOL(xensock_proto_server_init);
+
+int xensock_proto_client_init(void)
+{
+       return xensock_proto_init(false);
+}
+EXPORT_SYMBOL(xensock_proto_client_init);
+
+void xensock_proto_cleanup(void)
+{
+       xensock_unlink_all_dev_sk();
+       sock_unregister(PF_XENSOCK);
+       proto_unregister(&xensock_proto);
+}
+EXPORT_SYMBOL(xensock_proto_cleanup);
+
+MODULE_DESCRIPTION("xensock protocol");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_NETPROTO(AF_XENSOCK);
diff --git a/include/linux/socket.h b/include/linux/socket.h
index ec538fc..79ffa55 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -180,7 +180,8 @@ struct ucred {
 #define AF_ALG         38      /* Algorithm sockets            */
 #define AF_NFC         39      /* NFC sockets                  */
 #define AF_VSOCK       40      /* vSockets                     */
-#define AF_MAX         41      /* For now.. */
+#define AF_XENSOCK     41      /* xensock sockets              */
+#define AF_MAX         42      /* For now.. */
 
 /* Protocol families, same as address families. */
 #define PF_UNSPEC      AF_UNSPEC
@@ -225,6 +226,7 @@ struct ucred {
 #define PF_ALG         AF_ALG
 #define PF_NFC         AF_NFC
 #define PF_VSOCK       AF_VSOCK
+#define PF_XENSOCK     AF_XENSOCK
 #define PF_MAX         AF_MAX
 
 /* Maximum queue length specifiable by listen.  */
diff --git a/include/net/af_xensock.h b/include/net/af_xensock.h
new file mode 100644
index 0000000..48df5ce
--- /dev/null
+++ b/include/net/af_xensock.h
@@ -0,0 +1,46 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __AF_XENSOCK_H__
+#define __AF_XENSOCK_H__
+
+#include <net/xensock.h>
+
+struct xen_sock {
+       struct sock sk;
+       struct xen_sock_dev *dev;
+};
+
+static inline struct xen_sock *xen_sk(const struct sock *sk)
+{
+       return (struct xen_sock *)sk;
+}
+
+int xensock_proto_server_init(void);
+int xensock_proto_client_init(void);
+void xensock_proto_cleanup(void);
+
+#endif /* __AF_XENSOCK_H__ */
diff --git a/include/net/xensock.h b/include/net/xensock.h
new file mode 100644
index 0000000..2e5949b
--- /dev/null
+++ b/include/net/xensock.h
@@ -0,0 +1,130 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __XENSOCK_H__
+#define __XENSOCK_H__
+
+#include <linux/bitops.h>
+#include <linux/if.h>
+#include <net/sock.h>
+
+#define                XENSOCK_MTU             65535
+
+#define                SOCKDEV_ALIGN           32
+
+#define                __SOCK_STATE_NOCARRIER          0
+#define                __SOCK_STATE_QUEUESTOPPED       1
+
+struct xen_sock_dev {
+       char name[IFNAMSIZ];
+       struct sock *sk;
+       spinlock_t lock;       /* sock_dev operations lock */
+       unsigned long state;
+       unsigned short padded; /* Padding added by alloc_xen_sock_dev() */
+
+       int (*start_xmit)(struct sk_buff *skb, struct xen_sock_dev *dev);
+       unsigned long tx_queue_len;
+       struct list_head list;
+};
+
+/*
+ *     xensock_dev_priv - access sock device private data
+ *     @dev: xen_sock device
+ *
+ * Get xen_sock device private data
+ */
+static inline void *xensock_dev_priv(const struct xen_sock_dev *dev)
+{
+       return (char *)dev + ALIGN(sizeof(struct xen_sock_dev), SOCKDEV_ALIGN);
+}
+
+/*
+ *     sockif_carrier_ok - test if carrier present
+ *     @dev: xensock device
+ *
+ * Check if carrier is present on device
+ */
+static inline bool sockif_carrier_ok(const struct xen_sock_dev *dev)
+{
+       return !test_bit(__SOCK_STATE_NOCARRIER, &dev->state);
+}
+
+/*
+ *     sockif_carrier_on - set carrier
+ *     @dev: xensock device
+ */
+static inline void sockif_carrier_on(struct xen_sock_dev *dev)
+{
+       clear_bit(__SOCK_STATE_NOCARRIER, &dev->state);
+}
+
+/*
+ *     sockif_carrier_on - clear carrier
+ *     @dev: xensock device
+ */
+static inline void sockif_carrier_off(struct xen_sock_dev *dev)
+{
+       set_bit(__SOCK_STATE_NOCARRIER, &dev->state);
+}
+
+/*
+ *     sockif_queue_stopped - test if tx queue is stopped
+ *     @dev: xensock device
+ */
+static inline bool sockif_queue_stopped(const struct xen_sock_dev *dev)
+{
+       return test_bit(__SOCK_STATE_QUEUESTOPPED, &dev->state);
+}
+
+/*
+ *     sockif_wake_queue - wake tx queue
+ *     @dev: xensock device
+ */
+static inline void sockif_wake_queue(struct xen_sock_dev *dev)
+{
+       clear_bit(__SOCK_STATE_QUEUESTOPPED, &dev->state);
+}
+
+/*
+ *     sockif_stop_queue - stop tx queue
+ *     @dev: xensock device
+ */
+static inline void sockif_stop_queue(struct xen_sock_dev *dev)
+{
+       set_bit(__SOCK_STATE_QUEUESTOPPED, &dev->state);
+}
+
+struct xen_sock_dev *alloc_xen_sock_dev(int sizeof_priv, const char *name);
+void free_xen_sock_dev(struct xen_sock_dev *dev);
+int xensock_register_dev(struct xen_sock_dev *dev);
+void xensock_unregister_dev(struct xen_sock_dev *dev);
+void xensock_dev_unlink_sk(struct xen_sock_dev *dev);
+void xensock_unlink_all_dev_sk(void);
+int xensock_dev_wait(struct sock *sk, struct sock *nsk);
+int xensock_dev_send(struct sk_buff *skb, struct xen_sock_dev *dev);
+int xensock_dev_queue_rx_skb(struct sk_buff *skb, struct xen_sock_dev *dev);
+
+#endif /* __XENSOCK_H__ */
diff --git a/net/core/sock.c b/net/core/sock.c
index 026e01f..a57f264 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -262,7 +262,8 @@ static const char *const af_family_key_strings[AF_MAX+1] = {
   "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
   "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN"     , "sk_lock-AF_PHONET"   ,
   "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG"      ,
-  "sk_lock-AF_NFC"   , "sk_lock-AF_VSOCK"    , "sk_lock-AF_MAX"
+  "sk_lock-AF_NFC"   , "sk_lock-AF_VSOCK"    , "sk_lock-AF_XENSOCK"  ,
+  "sk_lock-AF_MAX"
 };
 static const char *const af_family_slock_key_strings[AF_MAX+1] = {
   "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
@@ -278,7 +279,8 @@ static const char *const 
af_family_slock_key_strings[AF_MAX+1] = {
   "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
   "slock-AF_RXRPC" , "slock-AF_ISDN"     , "slock-AF_PHONET"   ,
   "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG"      ,
-  "slock-AF_NFC"   , "slock-AF_VSOCK"    ,"slock-AF_MAX"
+  "slock-AF_NFC"   , "slock-AF_VSOCK"    , "slock-AF_XENSOCK"  ,
+  "slock-AF_MAX"
 };
 static const char *const af_family_clock_key_strings[AF_MAX+1] = {
   "clock-AF_UNSPEC", "clock-AF_UNIX"     , "clock-AF_INET"     ,
@@ -294,7 +296,8 @@ static const char *const 
af_family_clock_key_strings[AF_MAX+1] = {
   "clock-AF_TIPC"  , "clock-AF_BLUETOOTH", "clock-AF_IUCV"     ,
   "clock-AF_RXRPC" , "clock-AF_ISDN"     , "clock-AF_PHONET"   ,
   "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG"      ,
-  "clock-AF_NFC"   , "clock-AF_VSOCK"    , "clock-AF_MAX"
+  "clock-AF_NFC"   , "clock-AF_VSOCK"    , "clock-AF_XENSOCK"  ,
+  "clock-AF_MAX"
 };
 
 /*
-- 
1.8.2.rc2


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.