[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 1/3] net: introduce new socket support: xensock
Those sockets will be used for the xen-sock frontend/backend drivers. Those drivers will allow to connect via xensock sockets (in this case dom0/domD sockets can be used for the server application and domU sockets can be used for the client application). Signed-off-by: Oleksandr Dmytryshyn <oleksandr.dmytryshyn@xxxxxxxxxxxxxxx> --- drivers/net/Kconfig | 4 + drivers/net/Makefile | 2 + drivers/net/xensock/Makefile | 5 + drivers/net/xensock/xensock-dev.c | 269 +++++++++++++ drivers/net/xensock/xensock-proto.c | 767 ++++++++++++++++++++++++++++++++++++ include/linux/socket.h | 4 +- include/net/af_xensock.h | 46 +++ include/net/xensock.h | 130 ++++++ net/core/sock.c | 9 +- 9 files changed, 1232 insertions(+), 4 deletions(-) create mode 100644 drivers/net/xensock/Makefile create mode 100644 drivers/net/xensock/xensock-dev.c create mode 100644 drivers/net/xensock/xensock-proto.c create mode 100644 include/net/af_xensock.h create mode 100644 include/net/xensock.h diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 89402c3..420981a 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -348,6 +348,10 @@ config XEN_NETDEV_BACKEND compile this driver as a module, chose M here: the module will be called xen-netback. +config XEN_SOCKDEV_PROTO + bool + default n + config VMXNET3 tristate "VMware VMXNET3 ethernet driver" depends on PCI && INET diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 3fef8a8..43bf910 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -61,6 +61,8 @@ obj-$(CONFIG_VMXNET3) += vmxnet3/ obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o obj-$(CONFIG_XEN_NETDEV_BACKEND) += xen-netback/ +obj-$(CONFIG_XEN_SOCKDEV_PROTO) += xensock/ + obj-$(CONFIG_USB_CATC) += usb/ obj-$(CONFIG_USB_KAWETH) += usb/ obj-$(CONFIG_USB_PEGASUS) += usb/ diff --git a/drivers/net/xensock/Makefile b/drivers/net/xensock/Makefile new file mode 100644 index 0000000..d70db09 --- /dev/null +++ b/drivers/net/xensock/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the xensock driver +# + +obj-$(CONFIG_XEN_SOCKDEV_PROTO) := xensock-proto.o xensock-dev.o diff --git a/drivers/net/xensock/xensock-dev.c b/drivers/net/xensock/xensock-dev.c new file mode 100644 index 0000000..6da8f34 --- /dev/null +++ b/drivers/net/xensock/xensock-dev.c @@ -0,0 +1,269 @@ +/* + * Xen socket dev driver. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/socket.h> +#include <linux/skbuff.h> +#include <linux/sched.h> +#include <linux/spinlock.h> + +#include <net/af_xensock.h> + +LIST_HEAD(xensock_list); +DEFINE_RWLOCK(xensock_list_lock); +DECLARE_WAIT_QUEUE_HEAD(xensock_accept_wait); + +static inline void xensock_lock_dev(struct xen_sock_dev *dev) +{ + spin_lock(&dev->lock); +} + +static inline void xensock_unlock_dev(struct xen_sock_dev *dev) +{ + spin_unlock(&dev->lock); +} + +struct xen_sock_dev *alloc_xen_sock_dev(int sizeof_priv, const char *name) +{ + struct xen_sock_dev *dev; + int alloc_size; + struct xen_sock_dev *p; + + BUG_ON(strlen(name) >= sizeof(dev->name)); + + alloc_size = sizeof(struct xen_sock_dev); + + if (sizeof_priv) { + alloc_size = ALIGN(sizeof(struct xen_sock_dev), SOCKDEV_ALIGN); + alloc_size += sizeof_priv; + } + + /* ensure 32-byte alignment of whole construct */ + alloc_size += SOCKDEV_ALIGN - 1; + + p = kzalloc(alloc_size, GFP_KERNEL); + if (!p) { + pr_err("alloc_xen_sock_dev: Unable to allocate device\n"); + return NULL; + } + + dev = PTR_ALIGN(p, SOCKDEV_ALIGN); + dev->padded = (char *)dev - (char *)p; + + strcpy(dev->name, name); + spin_lock_init(&dev->lock); + sockif_carrier_off(dev); + sockif_stop_queue(dev); + + return dev; +} +EXPORT_SYMBOL(alloc_xen_sock_dev); + +void free_xen_sock_dev(struct xen_sock_dev *dev) +{ + if (!dev) + return; + + kfree((char *)dev - dev->padded); +} +EXPORT_SYMBOL(free_xen_sock_dev); + + +int xensock_register_dev(struct xen_sock_dev *dev) +{ + write_lock_bh(&xensock_list_lock); + list_add_tail(&dev->list, &xensock_list); + write_unlock_bh(&xensock_list_lock); + wake_up_interruptible(&xensock_accept_wait); + + return 0; +} +EXPORT_SYMBOL(xensock_register_dev); + +static void __xensock_dev_unlink_sk(struct xen_sock_dev *dev); + +void xensock_unregister_dev(struct xen_sock_dev *dev) +{ + write_lock_bh(&xensock_list_lock); + xensock_lock_dev(dev); + __xensock_dev_unlink_sk(dev); + list_del(&dev->list); + xensock_unlock_dev(dev); + write_unlock_bh(&xensock_list_lock); +} +EXPORT_SYMBOL(xensock_unregister_dev); + +static int __xensock_dev_link_sk(struct xen_sock_dev *dev, struct sock *sk) +{ + sock_hold(sk); + dev->sk = sk; + xen_sk(sk)->dev = dev; + + return 0; +} + +static void __xensock_dev_unlink_sk(struct xen_sock_dev *dev) +{ + struct sock *sk = dev->sk; + struct socket *sock; + + if (sk) { + sock = sk->sk_socket; + sock->state = SS_UNCONNECTED; + + xen_sk(sk)->dev = NULL; + dev->sk = NULL; + sk->sk_err = ENOTCONN; + sk->sk_state_change(sk); + sock_put(sk); + } +} + +void xensock_dev_unlink_sk(struct xen_sock_dev *dev) +{ + write_lock_bh(&xensock_list_lock); + xensock_lock_dev(dev); + __xensock_dev_unlink_sk(dev); + xensock_unlock_dev(dev); + write_unlock_bh(&xensock_list_lock); + wake_up_interruptible(&xensock_accept_wait); +} + +void xensock_unlink_all_dev_sk(void) +{ + struct xen_sock_dev *ldev; + + write_lock_bh(&xensock_list_lock); + list_for_each_entry(ldev, &xensock_list, list) { + xensock_lock_dev(ldev); + __xensock_dev_unlink_sk(ldev); + xensock_unlock_dev(ldev); + } + write_unlock_bh(&xensock_list_lock); +} + +int xensock_dev_wait(struct sock *sk, struct sock *nsk) +{ + int rc = 0; + long timeout = sk->sk_rcvtimeo; + struct xen_sock_dev *ldev; + bool dev_found; + + DECLARE_WAITQUEUE(wait, current); + + add_wait_queue_exclusive(&xensock_accept_wait, &wait); + for (;;) { + __set_current_state(TASK_INTERRUPTIBLE); + if (sk->sk_shutdown & RCV_SHUTDOWN) + break; + rc = -ERESTARTSYS; + if (signal_pending(current)) + break; + rc = -EAGAIN; + if (!timeout) + break; + rc = 0; + dev_found = false; + read_lock_bh(&xensock_list_lock); + list_for_each_entry(ldev, &xensock_list, list) { + xensock_lock_dev(ldev); + if (ldev->sk == NULL) { + __xensock_dev_link_sk(ldev, nsk); + xensock_unlock_dev(ldev); + dev_found = true; + break; + } + xensock_unlock_dev(ldev); + } + read_unlock_bh(&xensock_list_lock); + if (dev_found) + break; + + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); + } + __set_current_state(TASK_RUNNING); + remove_wait_queue(&xensock_accept_wait, &wait); + + return rc; +} + +/* xensock_dev_send - transmit a xensock frame + * @skb: pointer to socket buffer with xensock frame in data section + */ +int xensock_dev_send(struct sk_buff *skb, struct xen_sock_dev *dev) +{ + int err = -EINVAL; + + /* Make sure the xensock frame can pass the selected xensock device */ + if (unlikely(skb->len > XENSOCK_MTU)) { + err = -EMSGSIZE; + goto inval_skb; + } + + if (unlikely(!dev->start_xmit)) { + err = -EIO; + goto inval_skb; + } + + if (sockif_queue_stopped(dev)) { + err = -ENOBUFS; + goto inval_skb; + } + + err = dev->start_xmit(skb, dev); + if (err) + goto inval_skb; + + return 0; + +inval_skb: + kfree_skb(skb); + return err; +} + +int xensock_dev_queue_rx_skb(struct sk_buff *skb, struct xen_sock_dev *dev) +{ + int rc = -ENETDOWN; + struct sock *sk = dev->sk; + + xensock_lock_dev(dev); + if (sk == NULL) + goto out; + + rc = 0; + skb_queue_tail(&sk->sk_receive_queue, skb); + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_data_ready(sk, skb->len); +out: + xensock_unlock_dev(dev); + return rc; +} diff --git a/drivers/net/xensock/xensock-proto.c b/drivers/net/xensock/xensock-proto.c new file mode 100644 index 0000000..d05e5d5 --- /dev/null +++ b/drivers/net/xensock/xensock-proto.c @@ -0,0 +1,767 @@ +/* + * Xen socket protocol driver. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/socket.h> +#include <linux/skbuff.h> +#include <linux/if_arp.h> +#include <linux/highmem.h> + +#include <net/tcp_states.h> +#include <net/af_xensock.h> + +#define XENSOCK_DISCONNECT BIT(0) + +struct xensock_skb_cb { + unsigned int flags_checked; +}; + +/* Return pointer to store the extra msg flags for xensock_proto_recvmsg(). + * We use the space of one unsigned int beyond the 'int' size + * in skb->cb. Xensock frontend and backend drivers are using this + * first 'int' part of the skb->cb. + */ +static inline struct xensock_skb_cb *xensock_proto_cb(struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(skb->cb) <= (sizeof(int) + + sizeof(struct xensock_skb_cb))); + + /* return pointer after 'int' size */ + return (struct xensock_skb_cb *)(&((int *)skb->cb)[1]); +} + +static int xensock_prot_init(struct sock *sk) +{ + return 0; +} + +static struct proto xensock_proto __read_mostly = { + .name = "XENSOCK", + .owner = THIS_MODULE, + .obj_size = sizeof(struct xen_sock), + .init = xensock_prot_init, +}; + +struct sock *xen_allocate_sock(struct net *net) +{ + struct xen_sock *xsk; + struct sock *sk; + + sk = sk_alloc(net, PF_XENSOCK, GFP_KERNEL, &xensock_proto); + + if (!sk) + goto out; + + sock_init_data(NULL, sk); + + xsk = xen_sk(sk); + xsk->dev = NULL; +out: + return sk; +} + +static struct sock *xensock_make_new(struct sock *osk) +{ + struct sock *sk = NULL; + + if (osk->sk_type != SOCK_RAW) + goto out; + + sk = xen_allocate_sock(sock_net(osk)); + if (sk == NULL) + goto out; + + sk->sk_type = osk->sk_type; + sk->sk_priority = osk->sk_priority; + sk->sk_protocol = osk->sk_protocol; + sk->sk_rcvbuf = osk->sk_rcvbuf; + sk->sk_sndbuf = osk->sk_sndbuf; + sk->sk_state = TCP_ESTABLISHED; + sk->sk_backlog_rcv = osk->sk_backlog_rcv; + sock_copy_flags(sk, osk); + +out: + return sk; +} + +static int xensock_prot_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags) +{ + struct sock *sk; + int rc; + + if (sock->state == SS_CONNECTED) + return 0; + + sk = sock->sk; + + lock_sock(sk); + rc = xensock_dev_wait(sk, sk); + + if (!rc) + sock->state = SS_CONNECTED; + + release_sock(sk); + + return rc; +} + +static int xensock_prot_accept(struct socket *sock, struct socket *newsock, + int flags) +{ + struct sock *nsk, *sk = sock->sk; + int rc = -EINVAL; + + if (!sk) + goto out; + + rc = -EOPNOTSUPP; + if (sk->sk_type != SOCK_RAW) + goto out; + + lock_sock(sk); + rc = -EINVAL; + if (sk->sk_state != TCP_LISTEN) + goto err_release_sk; + + rc = -ENOMEM; + nsk = xensock_make_new(sk); + if (!nsk) + goto err_release_sk; + + rc = xensock_dev_wait(sk, nsk); + if (rc) + goto err_remove_nsk; + + sock_graft(nsk, newsock); + + /* Now attach up the new socket */ + sk->sk_ack_backlog--; + newsock->state = SS_CONNECTED; + rc = 0; +err_release_sk: + release_sock(sk); +out: + return rc; + +err_remove_nsk: + sock_orphan(nsk); + sock_put(nsk); + goto err_release_sk; +} + +static int xensock_prot_listen(struct socket *sock, int backlog) +{ + struct sock *sk = sock->sk; + int rc = -EOPNOTSUPP; + + lock_sock(sk); + /* All created sockets (in .accept callback) will have a non + * NULL pointer and listen operation for them is prohibited. + */ + if (xen_sk(sk)->dev) + goto out; + + if (sk->sk_state != TCP_LISTEN) { + sk->sk_max_ack_backlog = backlog; + sk->sk_state = TCP_LISTEN; + rc = 0; + } +out: + release_sock(sk); + + return rc; +} + +static int xensock_prot_release(struct socket *sock) +{ + struct sock *sk = sock->sk; + struct xen_sock_dev *dev; + struct sk_buff *skb; + unsigned char xflag; + int err; + + if (!sk) + return 0; + + lock_sock(sk); + dev = xen_sk(sk)->dev; + + if (dev) { + release_sock(sk); + skb = sock_alloc_send_skb(sk, 1, 0, &err); + lock_sock(sk); + if (!skb) + goto skip_send_no_con; + + xflag = XENSOCK_DISCONNECT; + memcpy(skb_put(skb, 1), &xflag, 1); + skb->dev = NULL; + skb->sk = sk; + + err = xensock_dev_send(skb, dev); + + if (err) + kfree_skb(skb); + +skip_send_no_con: + xensock_dev_unlink_sk(dev); + } + + /* Flush the recv buffs */ + while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) + __kfree_skb(skb); + + sock_orphan(sk); + sock->sk = NULL; + + release_sock(sk); + sock_put(sk); + + return 0; +} + +int xensock_prot_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + struct sock *sk = sock->sk; + + switch (cmd) { + case SIOCGSTAMP: + return sock_get_timestamp(sk, (struct timeval __user *)arg); + + default: + return -ENOIOCTLCMD; + } +} + +static int xensock_prot_sendmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t size) +{ + struct sock *sk = sock->sk; + struct xen_sock_dev *dev; + struct sk_buff *skb; + int err; + unsigned char xflag; + size_t sent = 0; + unsigned int header_len, data_len; + unsigned int chunk; + + if (msg->msg_flags & MSG_OOB) + return -EOPNOTSUPP; + + lock_sock(sk); + + err = sock_error(sk); + if (err) + goto out; + + err = -ENOTCONN; + + dev = xen_sk(sk)->dev; + if (!dev) + goto out; + + /* Another side has closed connection */ + if (sock->state == SS_DISCONNECTING) { + err = size; + goto out; + } + + if (sock->state != SS_CONNECTED) + goto out; + + if (size == 0) { + err = 0; + goto out; + } + + do { + chunk = size + 1; + + if (chunk > XENSOCK_MTU) + chunk = XENSOCK_MTU; + + if (chunk <= PAGE_SIZE) { + header_len = chunk; + data_len = 0; + } else { + header_len = PAGE_SIZE; + data_len = chunk - PAGE_SIZE; + } + + release_sock(sk); + skb = sock_alloc_send_pskb(sk, header_len, data_len, + msg->msg_flags & MSG_DONTWAIT, + &err); + lock_sock(sk); + if (!skb) + goto out; + + xflag = 0; + memcpy(skb_tail_pointer(skb), &xflag, 1); + + skb->data_len = data_len; + skb->len = chunk; + + err = skb_copy_datagram_from_iovec(skb, 1, msg->msg_iov, + sent, chunk - 1); + if (err < 0) + goto free_skb; + + /* move pointers in the skb */ + skb->tail += header_len; + + err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); + if (err < 0) + goto free_skb; + + skb->dev = NULL; + skb->sk = sk; + + err = xensock_dev_send(skb, dev); + + if (err) + goto free_skb; + + /* First byte of the SKB data is the xensock flags */ + chunk--; + + sent += chunk; + size -= chunk; + } while (size); + + err = sent; +out: + release_sock(sk); + return err; + +free_skb: + kfree_skb(skb); + goto out; +} + +static long xen_sock_data_wait(struct sock *sk, long timeo) +{ + DECLARE_WAITQUEUE(wait, current); + + add_wait_queue(sk_sleep(sk), &wait); + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + + if (!skb_queue_empty(&sk->sk_receive_queue)) + break; + + if (sk->sk_err || (sk->sk_shutdown & RCV_SHUTDOWN)) + break; + + if (signal_pending(current) || !timeo) + break; + + set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock(sk); + clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + } + + __set_current_state(TASK_RUNNING); + remove_wait_queue(sk_sleep(sk), &wait); + return timeo; +} + +static unsigned char get_skb_xflags(struct sk_buff *skb) +{ + unsigned char ret; + int i; + int nr_frags; + int skb_len = skb_headlen(skb); + + if (1 <= skb_len) { + ret = skb->data[0]; + __skb_pull(skb, 1); + return ret; + } else { + struct sk_buff *frag1; + + skb_walk_frags(skb, frag1) { + if (frag1->len) { + skb->len -= 1; + skb->data_len -= 1; + ret = frag1->data[0]; + __skb_pull(frag1, 1); + return ret; + } + } + + nr_frags = skb_shinfo(skb)->nr_frags; + for (i = 0; i < nr_frags; i++) { + skb_frag_t *frag2; + + frag2 = &skb_shinfo(skb)->frags[i]; + if (skb_frag_size(frag2)) { + struct page *page = skb_frag_page(frag2); + u8 *vaddr; + + vaddr = kmap(page); + ret = vaddr[frag2->page_offset]; + kunmap(page); + skb->len -= 1; + skb->data_len -= 1; + skb_frag_size_sub(frag2, 1); + frag2->page_offset += 1; + return ret; + } + } + } + return 0; +} + +static int xensock_prot_recvmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t size, int flags) +{ + struct sock *sk = sock->sk; + int err = 0; + size_t target, copied = 0; + long timeo; + int i; + struct sk_buff *skb; + int nr_frags; + unsigned char xflags; + struct xensock_skb_cb *scb; + struct xen_sock_dev *dev = xen_sk(sk)->dev; + + if (flags & MSG_OOB) + return -EOPNOTSUPP; + + msg->msg_namelen = 0; + + lock_sock(sk); + + if (!dev) { + copied = -ENOTCONN; + goto out; + } + + if (sock->state == SS_DISCONNECTING) + goto out_disconnecting_state; + + target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + + do { + int chunk; + + skb = skb_dequeue(&sk->sk_receive_queue); + if (!skb) { + if (copied >= target) + break; + + err = sock_error(sk); + if (err) + break; + if (sk->sk_shutdown & RCV_SHUTDOWN) + break; + + err = -EAGAIN; + if (!timeo) + break; + + timeo = xen_sock_data_wait(sk, timeo); + + if (signal_pending(current)) { + err = sock_intr_errno(timeo); + goto out; + } + continue; + } + + scb = xensock_proto_cb(skb); + + if (!scb->flags_checked) { + /* First byte of the SKB data is the xensock flags */ + if (skb->len < 1) { + copied = -EFAULT; + break; + } + + scb->flags_checked = 1; + + xflags = get_skb_xflags(skb); + if (xflags & XENSOCK_DISCONNECT) { + sock->state = SS_DISCONNECTING; + xensock_dev_unlink_sk(dev); + goto out_disconnecting_state; + } + } + + chunk = min_t(unsigned int, skb->len, size); + if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, chunk)) { + skb_queue_head(&sk->sk_receive_queue, skb); + if (!copied) + copied = -EFAULT; + break; + } + copied += chunk; + size -= chunk; + + sock_recv_ts_and_drops(msg, sk, skb); + + if (!(flags & MSG_PEEK)) { + int skb_len = skb_headlen(skb); + + if (chunk <= skb_len) { + __skb_pull(skb, chunk); + } else { + struct sk_buff *frag1; + + __skb_pull(skb, skb_len); + chunk -= skb_len; + + skb_walk_frags(skb, frag1) { + if (chunk <= frag1->len) { + /* Pulling partial data */ + skb->len -= chunk; + skb->data_len -= chunk; + __skb_pull(frag1, chunk); + break; + } else if (frag1->len) { + /* Pulling all frag data */ + chunk -= frag1->len; + skb->len -= frag1->len; + skb->data_len -= frag1->len; + __skb_pull(frag1, frag1->len); + } + } + + nr_frags = skb_shinfo(skb)->nr_frags; + for (i = 0; i < nr_frags; i++) { + skb_frag_t *frag2; + unsigned int frag_size; + + frag2 = &skb_shinfo(skb)->frags[i]; + frag_size = skb_frag_size(frag2); + + if (chunk <= frag_size) { + /* Pulling partial data */ + skb->len -= chunk; + skb->data_len -= chunk; + skb_frag_size_sub(frag2, chunk); + frag2->page_offset += chunk; + break; + } else if (frag_size) { + /* Pulling all frag data */ + chunk -= frag_size; + skb->len -= frag_size; + skb->data_len -= frag_size; + skb_frag_size_set(frag2, 0); + } + } + } + + if (skb->len) { + skb_queue_head(&sk->sk_receive_queue, skb); + break; + } + kfree_skb(skb); + + } else { + /* put message back and return */ + skb_queue_head(&sk->sk_receive_queue, skb); + break; + } + } while (size); +out: + release_sock(sk); + return copied ? : err; + +out_disconnecting_state: + /* Flush the recv buffs */ + while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) + __kfree_skb(skb); + + err = 0; + copied = 0; + goto out; +} + +static const struct proto_ops xen_sock_server_ops = { + .family = PF_XENSOCK, + .release = xensock_prot_release, + .bind = sock_no_bind, + .connect = sock_no_connect, + .socketpair = sock_no_socketpair, + .accept = xensock_prot_accept, + .getname = sock_no_getname, + .poll = sock_no_poll, + .ioctl = xensock_prot_ioctl, + .listen = xensock_prot_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt, + .sendmsg = xensock_prot_sendmsg, + .recvmsg = xensock_prot_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +static const struct proto_ops xen_sock_client_ops = { + .family = PF_XENSOCK, + .release = xensock_prot_release, + .bind = sock_no_bind, + .connect = xensock_prot_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = sock_no_getname, + .poll = sock_no_poll, + .ioctl = xensock_prot_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt, + .sendmsg = xensock_prot_sendmsg, + .recvmsg = xensock_prot_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +static void xensock_destruct(struct sock *sk) +{ +} + +static int xensock_create(struct net *net, struct socket *sock, int proto, + int kern, const struct proto_ops *xen_proto_ops) +{ + struct sock *sk; + int err = 0; + + if (sock->type != SOCK_RAW) + return -ESOCKTNOSUPPORT; + if (proto != 0) + return -EPROTONOSUPPORT; + + sk = xen_allocate_sock(net); + if (!sk) + return -ENOMEM; + + sock->state = SS_UNCONNECTED; + sock->ops = xen_proto_ops; + sock_init_data(sock, sk); + + sk->sk_destruct = xensock_destruct; + sk->sk_protocol = proto; + + xen_sk(sk)->dev = NULL; + + if (sk->sk_prot->init) + err = sk->sk_prot->init(sk); + + if (err) { + /* release sk on errors */ + sock_orphan(sk); + sock_put(sk); + } + + return err; +} + +static int xensock_server_create(struct net *net, struct socket *sock, + int proto, int kern) +{ + return xensock_create(net, sock, proto, kern, &xen_sock_server_ops); +} + +static int xensock_client_create(struct net *net, struct socket *sock, + int proto, int kern) +{ + return xensock_create(net, sock, proto, kern, &xen_sock_client_ops); +} + +static const struct net_proto_family xensock_server_family_ops = { + .family = PF_XENSOCK, + .create = xensock_server_create, + .owner = THIS_MODULE, +}; + +static const struct net_proto_family xensock_client_family_ops = { + .family = PF_XENSOCK, + .create = xensock_client_create, + .owner = THIS_MODULE, +}; + +static int xensock_proto_init(bool is_server_ops) +{ + int ret; + + ret = proto_register(&xensock_proto, 0); + if (ret) { + pr_err("proto_register failed: %d\n", ret); + return ret; + } + + if (is_server_ops) + ret = sock_register(&xensock_server_family_ops); + else + ret = sock_register(&xensock_client_family_ops); + + if (ret) { + pr_err("sock_register failed: %d\n", ret); + goto proto_unreg; + } + + return 0; + +proto_unreg: + proto_unregister(&xensock_proto); + return ret; +} + +int xensock_proto_server_init(void) +{ + return xensock_proto_init(true); +} +EXPORT_SYMBOL(xensock_proto_server_init); + +int xensock_proto_client_init(void) +{ + return xensock_proto_init(false); +} +EXPORT_SYMBOL(xensock_proto_client_init); + +void xensock_proto_cleanup(void) +{ + xensock_unlink_all_dev_sk(); + sock_unregister(PF_XENSOCK); + proto_unregister(&xensock_proto); +} +EXPORT_SYMBOL(xensock_proto_cleanup); + +MODULE_DESCRIPTION("xensock protocol"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_NETPROTO(AF_XENSOCK); diff --git a/include/linux/socket.h b/include/linux/socket.h index ec538fc..79ffa55 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -180,7 +180,8 @@ struct ucred { #define AF_ALG 38 /* Algorithm sockets */ #define AF_NFC 39 /* NFC sockets */ #define AF_VSOCK 40 /* vSockets */ -#define AF_MAX 41 /* For now.. */ +#define AF_XENSOCK 41 /* xensock sockets */ +#define AF_MAX 42 /* For now.. */ /* Protocol families, same as address families. */ #define PF_UNSPEC AF_UNSPEC @@ -225,6 +226,7 @@ struct ucred { #define PF_ALG AF_ALG #define PF_NFC AF_NFC #define PF_VSOCK AF_VSOCK +#define PF_XENSOCK AF_XENSOCK #define PF_MAX AF_MAX /* Maximum queue length specifiable by listen. */ diff --git a/include/net/af_xensock.h b/include/net/af_xensock.h new file mode 100644 index 0000000..48df5ce --- /dev/null +++ b/include/net/af_xensock.h @@ -0,0 +1,46 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __AF_XENSOCK_H__ +#define __AF_XENSOCK_H__ + +#include <net/xensock.h> + +struct xen_sock { + struct sock sk; + struct xen_sock_dev *dev; +}; + +static inline struct xen_sock *xen_sk(const struct sock *sk) +{ + return (struct xen_sock *)sk; +} + +int xensock_proto_server_init(void); +int xensock_proto_client_init(void); +void xensock_proto_cleanup(void); + +#endif /* __AF_XENSOCK_H__ */ diff --git a/include/net/xensock.h b/include/net/xensock.h new file mode 100644 index 0000000..2e5949b --- /dev/null +++ b/include/net/xensock.h @@ -0,0 +1,130 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __XENSOCK_H__ +#define __XENSOCK_H__ + +#include <linux/bitops.h> +#include <linux/if.h> +#include <net/sock.h> + +#define XENSOCK_MTU 65535 + +#define SOCKDEV_ALIGN 32 + +#define __SOCK_STATE_NOCARRIER 0 +#define __SOCK_STATE_QUEUESTOPPED 1 + +struct xen_sock_dev { + char name[IFNAMSIZ]; + struct sock *sk; + spinlock_t lock; /* sock_dev operations lock */ + unsigned long state; + unsigned short padded; /* Padding added by alloc_xen_sock_dev() */ + + int (*start_xmit)(struct sk_buff *skb, struct xen_sock_dev *dev); + unsigned long tx_queue_len; + struct list_head list; +}; + +/* + * xensock_dev_priv - access sock device private data + * @dev: xen_sock device + * + * Get xen_sock device private data + */ +static inline void *xensock_dev_priv(const struct xen_sock_dev *dev) +{ + return (char *)dev + ALIGN(sizeof(struct xen_sock_dev), SOCKDEV_ALIGN); +} + +/* + * sockif_carrier_ok - test if carrier present + * @dev: xensock device + * + * Check if carrier is present on device + */ +static inline bool sockif_carrier_ok(const struct xen_sock_dev *dev) +{ + return !test_bit(__SOCK_STATE_NOCARRIER, &dev->state); +} + +/* + * sockif_carrier_on - set carrier + * @dev: xensock device + */ +static inline void sockif_carrier_on(struct xen_sock_dev *dev) +{ + clear_bit(__SOCK_STATE_NOCARRIER, &dev->state); +} + +/* + * sockif_carrier_on - clear carrier + * @dev: xensock device + */ +static inline void sockif_carrier_off(struct xen_sock_dev *dev) +{ + set_bit(__SOCK_STATE_NOCARRIER, &dev->state); +} + +/* + * sockif_queue_stopped - test if tx queue is stopped + * @dev: xensock device + */ +static inline bool sockif_queue_stopped(const struct xen_sock_dev *dev) +{ + return test_bit(__SOCK_STATE_QUEUESTOPPED, &dev->state); +} + +/* + * sockif_wake_queue - wake tx queue + * @dev: xensock device + */ +static inline void sockif_wake_queue(struct xen_sock_dev *dev) +{ + clear_bit(__SOCK_STATE_QUEUESTOPPED, &dev->state); +} + +/* + * sockif_stop_queue - stop tx queue + * @dev: xensock device + */ +static inline void sockif_stop_queue(struct xen_sock_dev *dev) +{ + set_bit(__SOCK_STATE_QUEUESTOPPED, &dev->state); +} + +struct xen_sock_dev *alloc_xen_sock_dev(int sizeof_priv, const char *name); +void free_xen_sock_dev(struct xen_sock_dev *dev); +int xensock_register_dev(struct xen_sock_dev *dev); +void xensock_unregister_dev(struct xen_sock_dev *dev); +void xensock_dev_unlink_sk(struct xen_sock_dev *dev); +void xensock_unlink_all_dev_sk(void); +int xensock_dev_wait(struct sock *sk, struct sock *nsk); +int xensock_dev_send(struct sk_buff *skb, struct xen_sock_dev *dev); +int xensock_dev_queue_rx_skb(struct sk_buff *skb, struct xen_sock_dev *dev); + +#endif /* __XENSOCK_H__ */ diff --git a/net/core/sock.c b/net/core/sock.c index 026e01f..a57f264 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -262,7 +262,8 @@ static const char *const af_family_key_strings[AF_MAX+1] = { "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , - "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_MAX" + "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_XENSOCK" , + "sk_lock-AF_MAX" }; static const char *const af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , @@ -278,7 +279,8 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , - "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_MAX" + "slock-AF_NFC" , "slock-AF_VSOCK" , "slock-AF_XENSOCK" , + "slock-AF_MAX" }; static const char *const af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , @@ -294,7 +296,8 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , - "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_MAX" + "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_XENSOCK" , + "clock-AF_MAX" }; /* -- 1.8.2.rc2 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |