[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 3/3] xen-sockfront: add support of the xen-sockfront driver



This driver will allow to use xensock sockets for the
client application for the domU domain.

Signed-off-by: Oleksandr Dmytryshyn <oleksandr.dmytryshyn@xxxxxxxxxxxxxxx>
---
 drivers/net/Kconfig         |   17 +-
 drivers/net/Makefile        |    1 +
 drivers/net/xen-sockfront.c | 1364 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 1381 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/xen-sockfront.c

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index e643f42..887fe4b 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -348,6 +348,21 @@ config XEN_NETDEV_BACKEND
          compile this driver as a module, chose M here: the module
          will be called xen-netback.
 
+config XEN_SOCKDEV_FRONTEND
+       tristate "Xen frontend socket device"
+       depends on XEN_DOMU
+       select XEN_SOCKDEV_PROTO
+       help
+         This driver provides support for Xen socket devices exported
+         by a Xen socket driver domain (often domain 0).
+
+         The corresponding Linux backend driver is enabled by the
+         XEN_SOCKDEV_BACKEND option.
+
+         If you are compiling a kernel for use as Xen guest, you
+         should say Y here. To compile this driver as a module, chose
+         M here: the module will be called xen-sockfront.
+
 config XEN_SOCKDEV_BACKEND
        tristate "Xen backend socket device"
        depends on XEN_BACKEND
@@ -369,7 +384,7 @@ config XEN_SOCKDEV_BACKEND
 config XEN_SOCKDEV_PROTO
        bool
        default n
-       depends on XEN_SOCKDEV_BACKEND
+       depends on XEN_SOCKDEV_BACKEND || XEN_SOCKDEV_FRONTEND
 
 config VMXNET3
        tristate "VMware VMXNET3 ethernet driver"
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 96c6c97..8c3eee8 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -62,6 +62,7 @@ obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o
 obj-$(CONFIG_XEN_NETDEV_BACKEND) += xen-netback/
 
 obj-$(CONFIG_XEN_SOCKDEV_PROTO) += xensock/
+obj-$(CONFIG_XEN_SOCKDEV_FRONTEND) += xen-sockfront.o
 obj-$(CONFIG_XEN_SOCKDEV_BACKEND) += xen-sockback/
 
 obj-$(CONFIG_USB_CATC)          += usb/
diff --git a/drivers/net/xen-sockfront.c b/drivers/net/xen-sockfront.c
new file mode 100644
index 0000000..2aa65e4
--- /dev/null
+++ b/drivers/net/xen-sockfront.c
@@ -0,0 +1,1364 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/moduleparam.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+
+#include <net/xensock.h>
+#include <net/af_xensock.h>
+
+#include <asm/xen/page.h>
+#include <xen/xen.h>
+#include <xen/xenbus.h>
+#include <xen/events.h>
+#include <xen/page.h>
+#include <xen/grant_table.h>
+
+#include <xen/interface/io/sockif.h>
+#include <xen/interface/memory.h>
+#include <xen/interface/grant_table.h>
+
+struct sockfront_cb {
+       int pull_to;
+};
+
+#define SOCKFRONT_SKB_CB(skb)  ((struct sockfront_cb *)((skb)->cb))
+
+#define RX_COPY_THRESHOLD 256
+
+#define GRANT_INVALID_REF      0
+
+#define SOCK_TX_RING_SIZE __CONST_RING_SIZE(xen_sockif_tx, PAGE_SIZE)
+#define SOCK_RX_RING_SIZE __CONST_RING_SIZE(xen_sockif_rx, PAGE_SIZE)
+#define TX_MAX_TARGET min_t(int, SOCK_TX_RING_SIZE, 256)
+
+struct sockfront_info {
+       struct list_head list;
+       struct xen_sock_dev *sockdev;
+       unsigned int irq;
+
+       struct tasklet_struct rx_poll;
+
+       unsigned int evtchn;
+       struct xenbus_device *xbdev;
+
+       spinlock_t   tx_lock; /* transmitter lock */
+       struct xen_sockif_tx_front_ring tx;
+       int tx_ring_ref;
+
+       /* {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries
+        * are linked from tx_skb_freelist through skb_entry.link.
+        *
+        *  NB. Freelist index entries are always going to be less than
+        *  PAGE_OFFSET, whereas pointers to skbs will always be equal or
+        *  greater than PAGE_OFFSET: we use this property to distinguish
+        *  them.
+        */
+       union skb_entry {
+               struct sk_buff *skb;
+               unsigned long link;
+       } tx_skbs[SOCK_TX_RING_SIZE];
+       grant_ref_t gref_tx_head;
+       grant_ref_t grant_tx_ref[SOCK_TX_RING_SIZE];
+       unsigned tx_skb_freelist;
+
+       spinlock_t   rx_lock ____cacheline_aligned_in_smp;
+       struct xen_sockif_rx_front_ring rx;
+       int rx_ring_ref;
+
+       /* Receive-ring batched refills. */
+#define RX_MIN_TARGET 8
+#define RX_DFL_MIN_TARGET 64
+#define RX_MAX_TARGET min_t(int, SOCK_RX_RING_SIZE, 256)
+       unsigned rx_min_target, rx_max_target, rx_target;
+       struct sk_buff_head rx_batch;
+
+       struct timer_list rx_refill_timer;
+
+       struct sk_buff *rx_skbs[SOCK_RX_RING_SIZE];
+       grant_ref_t gref_rx_head;
+       grant_ref_t grant_rx_ref[SOCK_RX_RING_SIZE];
+
+       unsigned long rx_pfn_array[SOCK_RX_RING_SIZE];
+       struct multicall_entry rx_mcl[SOCK_RX_RING_SIZE+1];
+       struct mmu_update rx_mmu[SOCK_RX_RING_SIZE];
+};
+
+struct sockfront_rx_info {
+       struct xen_sockif_rx_response rx;
+};
+
+static void skb_entry_set_link(union skb_entry *list, unsigned short id)
+{
+       list->link = id;
+}
+
+static int skb_entry_is_link(const union skb_entry *list)
+{
+       BUILD_BUG_ON(sizeof(list->skb) != sizeof(list->link));
+       return (unsigned long)list->skb < PAGE_OFFSET;
+}
+
+/* Access macros for acquiring freeing slots in tx_skbs[] */
+
+static void add_id_to_freelist(unsigned *head, union skb_entry *list,
+                              unsigned short id)
+{
+       skb_entry_set_link(&list[id], *head);
+       *head = id;
+}
+
+static unsigned short get_id_from_freelist(unsigned *head,
+                                          union skb_entry *list)
+{
+       unsigned int id = *head;
+       *head = list[id].link;
+       return id;
+}
+
+static int xensock_rxidx(RING_IDX idx)
+{
+       return idx & (SOCK_RX_RING_SIZE - 1);
+}
+
+static struct sk_buff *xensock_get_rx_skb(struct sockfront_info *np,
+                                         RING_IDX ri)
+{
+       int i = xensock_rxidx(ri);
+       struct sk_buff *skb = np->rx_skbs[i];
+
+       np->rx_skbs[i] = NULL;
+       return skb;
+}
+
+static grant_ref_t xensock_get_rx_ref(struct sockfront_info *np,
+                                     RING_IDX ri)
+{
+       int i = xensock_rxidx(ri);
+       grant_ref_t ref = np->grant_rx_ref[i];
+
+       np->grant_rx_ref[i] = GRANT_INVALID_REF;
+       return ref;
+}
+
+static void rx_refill_timeout(unsigned long data)
+{
+       struct xen_sock_dev *dev = (struct xen_sock_dev *)data;
+       struct sockfront_info *np = xensock_dev_priv(dev);
+
+       tasklet_schedule(&np->rx_poll);
+}
+
+static int sockfront_tx_slot_available(struct sockfront_info *np)
+{
+       return (np->tx.req_prod_pvt - np->tx.rsp_cons) <
+               (TX_MAX_TARGET - MAX_SKB_FRAGS - 2);
+}
+
+static void xensock_maybe_wake_tx(struct xen_sock_dev *dev)
+{
+       struct sockfront_info *np = xensock_dev_priv(dev);
+
+       if (unlikely(sockif_queue_stopped(dev)) &&
+           sockfront_tx_slot_available(np))
+               sockif_wake_queue(dev);
+}
+
+static void xensock_alloc_rx_buffers(struct xen_sock_dev *dev)
+{
+       unsigned short id;
+       struct sockfront_info *np = xensock_dev_priv(dev);
+       struct sk_buff *skb;
+       struct page *page;
+       int i, batch_target, notify;
+       RING_IDX req_prod = np->rx.req_prod_pvt;
+       grant_ref_t ref;
+       unsigned long pfn;
+       void *vaddr;
+       struct xen_sockif_rx_request *req;
+
+       if (unlikely(!sockif_carrier_ok(dev)))
+               return;
+
+       /* Allocate skbuffs greedily, even though we batch updates to the
+        * receive ring. This creates a less bursty demand on the memory
+        * allocator, so should reduce the chance of failed allocation requests
+        * both for ourself and for other kernel subsystems.
+        */
+       batch_target = np->rx_target - (req_prod - np->rx.rsp_cons);
+       for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
+               skb = alloc_skb(RX_COPY_THRESHOLD + NET_IP_ALIGN + NET_SKB_PAD,
+                               GFP_ATOMIC | __GFP_NOWARN);
+               if (unlikely(!skb))
+                       goto no_skb;
+
+               /* Align ip header to a 16 bytes boundary */
+               skb_reserve(skb, NET_IP_ALIGN);
+
+               page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+               if (!page) {
+                       kfree_skb(skb);
+no_skb:
+                       /* Any skbuffs queued for refill? Force them out. */
+                       if (i != 0)
+                               goto refill;
+                       /* Could not allocate any skbuffs. Try again later. */
+                       mod_timer(&np->rx_refill_timer,
+                                 jiffies + (HZ/10));
+                       break;
+               }
+
+               __skb_fill_page_desc(skb, 0, page, 0, 0);
+               skb_shinfo(skb)->nr_frags = 1;
+               __skb_queue_tail(&np->rx_batch, skb);
+       }
+
+       /* Is the batch large enough to be worthwhile? */
+       if (i < (np->rx_target/2)) {
+               if (req_prod > np->rx.sring->req_prod)
+                       goto push;
+               return;
+       }
+
+       /* Adjust our fill target if we risked running out of buffers. */
+       if ((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) {
+               np->rx_target *= 2;
+               if (np->rx_target > np->rx_max_target)
+                       np->rx_target = np->rx_max_target;
+       }
+
+ refill:
+       for (i = 0; ; i++) {
+               skb = __skb_dequeue(&np->rx_batch);
+               if (skb == NULL)
+                       break;
+
+               id = xensock_rxidx(req_prod + i);
+
+               BUG_ON(np->rx_skbs[id]);
+               np->rx_skbs[id] = skb;
+
+               ref = gnttab_claim_grant_reference(&np->gref_rx_head);
+               BUG_ON((signed short)ref < 0);
+               np->grant_rx_ref[id] = ref;
+
+               pfn = page_to_pfn(skb_frag_page(&skb_shinfo(skb)->frags[0]));
+               vaddr = page_address(skb_frag_page(&skb_shinfo(skb)->frags[0]));
+
+               req = RING_GET_REQUEST(&np->rx, req_prod + i);
+               gnttab_grant_foreign_access_ref(ref,
+                                               np->xbdev->otherend_id,
+                                               pfn_to_mfn(pfn),
+                                               0);
+
+               req->id = id;
+               req->gref = ref;
+       }
+
+       wmb();          /* barrier so backend seens requests */
+
+       /* Above is a suitable barrier to ensure backend will see requests. */
+       np->rx.req_prod_pvt = req_prod + i;
+ push:
+       RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify);
+       if (notify)
+               notify_remote_via_irq(np->irq);
+}
+
+static void xensock_tx_buf_gc(struct xen_sock_dev *dev)
+{
+       RING_IDX cons, prod;
+       unsigned short id;
+       struct sockfront_info *np = xensock_dev_priv(dev);
+       struct sk_buff *skb;
+
+       BUG_ON(!sockif_carrier_ok(dev));
+
+       do {
+               prod = np->tx.sring->rsp_prod;
+               rmb(); /* Ensure we see responses up to 'rp'. */
+
+               for (cons = np->tx.rsp_cons; cons != prod; cons++) {
+                       struct xen_sockif_tx_response *txrsp;
+
+                       txrsp = RING_GET_RESPONSE(&np->tx, cons);
+
+                       id  = txrsp->id;
+                       skb = np->tx_skbs[id].skb;
+                       if (unlikely(gnttab_query_foreign_access(
+                               np->grant_tx_ref[id]) != 0)) {
+                               pr_alert("%s: warning -- grant still in use "
+                                        "by backend domain.\n", __func__);
+                               BUG();
+                       }
+                       gnttab_end_foreign_access_ref(
+                               np->grant_tx_ref[id], GNTMAP_readonly);
+                       gnttab_release_grant_reference(
+                               &np->gref_tx_head, np->grant_tx_ref[id]);
+                       np->grant_tx_ref[id] = GRANT_INVALID_REF;
+                       add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs,
+                                          id);
+                       dev_kfree_skb_irq(skb);
+               }
+
+               np->tx.rsp_cons = prod;
+
+               /* Set a new event, then check for race with update of tx_cons.
+                * Note that it is essential to schedule a callback, no matter
+                * how few buffers are pending. Even if there is space in the
+                * transmit ring, higher layers may be blocked because too much
+                * data is outstanding: in such cases notification from Xen is
+                * likely to be the only kick that we'll get.
+                */
+               np->tx.sring->rsp_event =
+                       prod + ((np->tx.sring->req_prod - prod) >> 1) + 1;
+               mb();           /* update shared area */
+       } while ((cons == prod) && (prod != np->tx.sring->rsp_prod));
+
+       xensock_maybe_wake_tx(dev);
+}
+
+static void xensock_make_frags(struct sk_buff *skb, struct xen_sock_dev *dev,
+                              struct xen_sockif_tx_request *tx)
+{
+       struct sockfront_info *np = xensock_dev_priv(dev);
+       char *data = skb->data;
+       unsigned long mfn;
+       RING_IDX prod = np->tx.req_prod_pvt;
+       int frags = skb_shinfo(skb)->nr_frags;
+       unsigned int offset = offset_in_page(data);
+       unsigned int len = skb_headlen(skb);
+       unsigned int id;
+       grant_ref_t ref;
+       int i;
+
+       /* While the header overlaps a page boundary (including being
+        * larger than a page), split it it into page-sized chunks.
+        */
+       while (len > PAGE_SIZE - offset) {
+               tx->size = PAGE_SIZE - offset;
+               tx->flags |= XEN_SOCKTXF_more_data;
+               len -= tx->size;
+               data += tx->size;
+               offset = 0;
+
+               id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
+               np->tx_skbs[id].skb = skb_get(skb);
+               tx = RING_GET_REQUEST(&np->tx, prod++);
+               tx->id = id;
+               ref = gnttab_claim_grant_reference(&np->gref_tx_head);
+               BUG_ON((signed short)ref < 0);
+
+               mfn = virt_to_mfn(data);
+               gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
+                                               mfn, GNTMAP_readonly);
+
+               tx->gref = ref;
+               np->grant_tx_ref[id] = ref;
+               tx->offset = offset;
+               tx->size = len;
+               tx->flags = 0;
+       }
+
+       /* Grant backend access to each skb fragment page. */
+       for (i = 0; i < frags; i++) {
+               skb_frag_t *frag = skb_shinfo(skb)->frags + i;
+               struct page *page = skb_frag_page(frag);
+
+               len = skb_frag_size(frag);
+               offset = frag->page_offset;
+
+               /* Data must not cross a page boundary. */
+               BUG_ON(len + offset > PAGE_SIZE<<compound_order(page));
+
+               /* Skip unused frames from start of page */
+               page += offset >> PAGE_SHIFT;
+               offset &= ~PAGE_MASK;
+
+               while (len > 0) {
+                       unsigned long bytes;
+
+                       BUG_ON(offset >= PAGE_SIZE);
+
+                       bytes = PAGE_SIZE - offset;
+                       if (bytes > len)
+                               bytes = len;
+
+                       tx->flags |= XEN_SOCKTXF_more_data;
+
+                       id = get_id_from_freelist(&np->tx_skb_freelist,
+                                                 np->tx_skbs);
+                       np->tx_skbs[id].skb = skb_get(skb);
+                       tx = RING_GET_REQUEST(&np->tx, prod++);
+                       tx->id = id;
+                       ref = gnttab_claim_grant_reference(&np->gref_tx_head);
+                       BUG_ON((signed short)ref < 0);
+
+                       mfn = pfn_to_mfn(page_to_pfn(page));
+                       gnttab_grant_foreign_access_ref(ref,
+                                                       np->xbdev->otherend_id,
+                                                       mfn, GNTMAP_readonly);
+
+                       tx->gref = ref;
+                       np->grant_tx_ref[id] = ref;
+                       tx->offset = offset;
+                       tx->size = bytes;
+                       tx->flags = 0;
+
+                       offset += bytes;
+                       len -= bytes;
+
+                       /* Next frame */
+                       if (offset == PAGE_SIZE && len) {
+                               BUG_ON(!PageCompound(page));
+                               page++;
+                               offset = 0;
+                       }
+               }
+       }
+
+       np->tx.req_prod_pvt = prod;
+}
+
+/* Count how many ring slots are required to send the frags of this
+ * skb. Each frag might be a compound page.
+ */
+static int xensock_count_skb_frag_slots(struct sk_buff *skb)
+{
+       int i, frags = skb_shinfo(skb)->nr_frags;
+       int pages = 0;
+
+       for (i = 0; i < frags; i++) {
+               skb_frag_t *frag = skb_shinfo(skb)->frags + i;
+               unsigned long size = skb_frag_size(frag);
+               unsigned long offset = frag->page_offset;
+
+               /* Skip unused frames from start of page */
+               offset &= ~PAGE_MASK;
+
+               pages += PFN_UP(offset + size);
+       }
+
+       return pages;
+}
+
+static int xensock_start_xmit(struct sk_buff *skb, struct xen_sock_dev *dev)
+{
+       unsigned short id;
+       struct sockfront_info *np = xensock_dev_priv(dev);
+       struct xen_sockif_tx_request *tx;
+       char *data = skb->data;
+       RING_IDX i;
+       grant_ref_t ref;
+       unsigned long mfn;
+       int notify;
+       int slots;
+       unsigned int offset = offset_in_page(data);
+       unsigned int len = skb_headlen(skb);
+       unsigned long flags;
+
+       slots = DIV_ROUND_UP(offset + len, PAGE_SIZE) +
+               xensock_count_skb_frag_slots(skb);
+       if (unlikely(slots > MAX_SKB_FRAGS + 1)) {
+               pr_err_ratelimited("xensock: skb rides the rocket: %d slots\n",
+                                  slots);
+               goto drop;
+       }
+
+       spin_lock_irqsave(&np->tx_lock, flags);
+
+       if (unlikely(!sockif_carrier_ok(dev))) {
+               spin_unlock_irqrestore(&np->tx_lock, flags);
+               goto drop;
+       }
+
+       i = np->tx.req_prod_pvt;
+
+       id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
+       np->tx_skbs[id].skb = skb;
+
+       tx = RING_GET_REQUEST(&np->tx, i);
+
+       tx->id   = id;
+       ref = gnttab_claim_grant_reference(&np->gref_tx_head);
+       BUG_ON((signed short)ref < 0);
+       mfn = virt_to_mfn(data);
+       gnttab_grant_foreign_access_ref(
+               ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly);
+       tx->gref = ref;
+       np->grant_tx_ref[id] = ref;
+       tx->offset = offset;
+       tx->size = len;
+
+       tx->flags = 0;
+
+       np->tx.req_prod_pvt = i + 1;
+
+       xensock_make_frags(skb, dev, tx);
+       tx->size = skb->len;
+
+       RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify);
+       if (notify)
+               notify_remote_via_irq(np->irq);
+
+       /* Note: It is not safe to access skb after xensock_tx_buf_gc()! */
+       xensock_tx_buf_gc(dev);
+
+       if (!sockfront_tx_slot_available(np))
+               sockif_stop_queue(dev);
+
+       spin_unlock_irqrestore(&np->tx_lock, flags);
+
+       return 0;
+
+ drop:
+       dev_kfree_skb(skb);
+       return 0;
+}
+
+static void xensock_move_rx_slot(struct sockfront_info *np, struct sk_buff 
*skb,
+                                grant_ref_t ref)
+{
+       int new = xensock_rxidx(np->rx.req_prod_pvt);
+
+       BUG_ON(np->rx_skbs[new]);
+       np->rx_skbs[new] = skb;
+       np->grant_rx_ref[new] = ref;
+       RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new;
+       RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref;
+       np->rx.req_prod_pvt++;
+}
+
+static int xensock_get_responses(struct sockfront_info *np,
+                                struct sockfront_rx_info *rinfo, RING_IDX rp,
+                                struct sk_buff_head *list)
+{
+       struct xen_sockif_rx_response *rx = &rinfo->rx;
+       struct device *dev = &np->xbdev->dev;
+       RING_IDX cons = np->rx.rsp_cons;
+       struct sk_buff *skb = xensock_get_rx_skb(np, cons);
+       grant_ref_t ref = xensock_get_rx_ref(np, cons);
+       int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
+       int frags = 1;
+       int err = 0;
+       unsigned long ret;
+
+       for (;;) {
+               if (unlikely(rx->status < 0 ||
+                            rx->offset + rx->status > PAGE_SIZE)) {
+                       dev_warn_ratelimited(dev, "rx->offset: %x, size: %u\n",
+                                            rx->offset, rx->status);
+                       xensock_move_rx_slot(np, skb, ref);
+                       err = -EINVAL;
+                       goto next;
+               }
+
+               /* This definitely indicates a bug, either in this driver or in
+                * the backend driver. In future this should flag the bad
+                * situation to the system controller to reboot the backed.
+                */
+               if (ref == GRANT_INVALID_REF) {
+                       dev_warn_ratelimited(dev, "Bad rx response id %d.\n",
+                                            rx->id);
+                       err = -EINVAL;
+                       goto next;
+               }
+
+               ret = gnttab_end_foreign_access_ref(ref, 0);
+               BUG_ON(!ret);
+
+               gnttab_release_grant_reference(&np->gref_rx_head, ref);
+
+               __skb_queue_tail(list, skb);
+
+next:
+               if (!(rx->flags & XEN_SOCKRXF_more_data))
+                       break;
+
+               if (cons + frags == rp) {
+                       dev_warn_ratelimited(dev, "Need more frags\n");
+                       err = -ENOENT;
+                       break;
+               }
+
+               rx = RING_GET_RESPONSE(&np->rx, cons + frags);
+               skb = xensock_get_rx_skb(np, cons + frags);
+               ref = xensock_get_rx_ref(np, cons + frags);
+               frags++;
+       }
+
+       if (unlikely(frags > max)) {
+               dev_warn_ratelimited(dev, "Too many frags\n");
+               err = -E2BIG;
+       }
+
+       if (unlikely(err))
+               np->rx.rsp_cons = cons + frags;
+
+       return err;
+}
+
+static RING_IDX xensock_fill_frags(struct sockfront_info *np,
+                                  struct sk_buff *skb,
+                                  struct sk_buff_head *list)
+{
+       struct skb_shared_info *shinfo = skb_shinfo(skb);
+       int nr_frags = shinfo->nr_frags;
+       RING_IDX cons = np->rx.rsp_cons;
+       struct sk_buff *nskb;
+
+       while ((nskb = __skb_dequeue(list))) {
+               struct xen_sockif_rx_response *rx =
+                       RING_GET_RESPONSE(&np->rx, ++cons);
+               skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0];
+
+               __skb_fill_page_desc(skb, nr_frags,
+                                    skb_frag_page(nfrag),
+                                    rx->offset, rx->status);
+
+               skb->data_len += rx->status;
+
+               skb_shinfo(nskb)->nr_frags = 0;
+               kfree_skb(nskb);
+
+               nr_frags++;
+       }
+
+       shinfo->nr_frags = nr_frags;
+       return cons;
+}
+
+static int handle_incoming_queue(struct xen_sock_dev *dev,
+                                struct sk_buff_head *rxq)
+{
+       int packets_dropped = 0;
+       struct sk_buff *skb;
+
+       while ((skb = __skb_dequeue(rxq)) != NULL) {
+               int pull_to = SOCKFRONT_SKB_CB(skb)->pull_to;
+
+               __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
+
+               /* Pass it up. */
+               xensock_dev_queue_rx_skb(skb, dev);
+       }
+
+       return packets_dropped;
+}
+
+static void xensock_poll(long unsigned int data)
+{
+       struct sockfront_info *np = (struct sockfront_info *)data;
+       struct xen_sock_dev *dev = np->sockdev;
+       struct sk_buff *skb;
+       struct sockfront_rx_info rinfo;
+       struct xen_sockif_rx_response *rx = &rinfo.rx;
+       RING_IDX i, rp;
+       int work_done;
+       struct sk_buff_head rxq;
+       struct sk_buff_head errq;
+       struct sk_buff_head tmpq;
+       unsigned long flags;
+       int err;
+       int budget = 16;
+       int more_to_do = 1;
+
+       spin_lock(&np->rx_lock);
+
+       skb_queue_head_init(&rxq);
+       skb_queue_head_init(&errq);
+       skb_queue_head_init(&tmpq);
+
+       rp = np->rx.sring->rsp_prod;
+       rmb(); /* Ensure we see queued responses up to 'rp'. */
+
+       i = np->rx.rsp_cons;
+       work_done = 0;
+       while ((i != rp) && (work_done < budget)) {
+               memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
+
+               err = xensock_get_responses(np, &rinfo, rp, &tmpq);
+
+               if (unlikely(err)) {
+                       while ((skb = __skb_dequeue(&tmpq)))
+                               __skb_queue_tail(&errq, skb);
+                       i = np->rx.rsp_cons;
+                       continue;
+               }
+
+               skb = __skb_dequeue(&tmpq);
+
+               SOCKFRONT_SKB_CB(skb)->pull_to = rx->status;
+               if (SOCKFRONT_SKB_CB(skb)->pull_to > RX_COPY_THRESHOLD)
+                       SOCKFRONT_SKB_CB(skb)->pull_to = RX_COPY_THRESHOLD;
+
+               skb_shinfo(skb)->frags[0].page_offset = rx->offset;
+               skb_frag_size_set(&skb_shinfo(skb)->frags[0], rx->status);
+               skb->data_len = rx->status;
+
+               i = xensock_fill_frags(np, skb, &tmpq);
+
+               /* Truesize is the actual allocation size, even if the
+                * allocation is only partially used.
+                */
+               skb->truesize += PAGE_SIZE * skb_shinfo(skb)->nr_frags;
+               skb->len += skb->data_len;
+
+               __skb_queue_tail(&rxq, skb);
+
+               np->rx.rsp_cons = ++i;
+               work_done++;
+       }
+
+       __skb_queue_purge(&errq);
+
+       work_done -= handle_incoming_queue(dev, &rxq);
+
+       /* If we get a callback with very few responses, reduce fill target. */
+       /* NB. Note exponential increase, linear decrease. */
+       if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) >
+            ((3*np->rx_target) / 4)) &&
+           (--np->rx_target < np->rx_min_target))
+               np->rx_target = np->rx_min_target;
+
+       xensock_alloc_rx_buffers(dev);
+
+       if (work_done < budget) {
+               local_irq_save(flags);
+
+               RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do);
+
+               local_irq_restore(flags);
+       }
+
+       spin_unlock(&np->rx_lock);
+       if (more_to_do)
+               tasklet_schedule(&np->rx_poll);
+}
+
+static void xensock_release_tx_bufs(struct sockfront_info *np)
+{
+       struct sk_buff *skb;
+       int i;
+
+       for (i = 0; i < SOCK_TX_RING_SIZE; i++) {
+               /* Skip over entries which are actually freelist references */
+               if (skb_entry_is_link(&np->tx_skbs[i]))
+                       continue;
+
+               skb = np->tx_skbs[i].skb;
+               gnttab_end_foreign_access_ref(np->grant_tx_ref[i],
+                                             GNTMAP_readonly);
+               gnttab_release_grant_reference(&np->gref_tx_head,
+                                              np->grant_tx_ref[i]);
+               np->grant_tx_ref[i] = GRANT_INVALID_REF;
+               add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, i);
+               dev_kfree_skb_irq(skb);
+       }
+}
+
+static void xensock_release_rx_bufs(struct sockfront_info *np)
+{
+       struct mmu_update      *mmu = np->rx_mmu;
+       struct multicall_entry *mcl = np->rx_mcl;
+       struct sk_buff_head free_list;
+       struct sk_buff *skb;
+       unsigned long mfn;
+       int xfer = 0, noxfer = 0, unused = 0;
+       int id, ref;
+
+       dev_warn(&np->xbdev->dev, "%s: fix me for copying receiver.\n",
+                __func__);
+       return;
+
+       skb_queue_head_init(&free_list);
+
+       spin_lock_bh(&np->rx_lock);
+
+       for (id = 0; id < SOCK_RX_RING_SIZE; id++) {
+               ref = np->grant_rx_ref[id];
+               if (ref == GRANT_INVALID_REF) {
+                       unused++;
+                       continue;
+               }
+
+               skb = np->rx_skbs[id];
+               mfn = gnttab_end_foreign_transfer_ref(ref);
+               gnttab_release_grant_reference(&np->gref_rx_head, ref);
+               np->grant_rx_ref[id] = GRANT_INVALID_REF;
+
+               if (0 == mfn) {
+                       skb_shinfo(skb)->nr_frags = 0;
+                       dev_kfree_skb(skb);
+                       noxfer++;
+                       continue;
+               }
+
+               if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+                       /* Remap the page. */
+                       const struct page *page =
+                               skb_frag_page(&skb_shinfo(skb)->frags[0]);
+                       unsigned long pfn = page_to_pfn(page);
+                       void *vaddr = page_address(page);
+
+                       MULTI_update_va_mapping(mcl, (unsigned long)vaddr,
+                                               mfn_pte(mfn, PAGE_KERNEL),
+                                               0);
+                       mcl++;
+                       mmu->ptr = ((u64)mfn << PAGE_SHIFT)
+                               | MMU_MACHPHYS_UPDATE;
+                       mmu->val = pfn;
+                       mmu++;
+
+                       set_phys_to_machine(pfn, mfn);
+               }
+               __skb_queue_tail(&free_list, skb);
+               xfer++;
+       }
+
+       dev_info(&np->xbdev->dev, "%s: %d xfer, %d noxfer, %d unused\n",
+                __func__, xfer, noxfer, unused);
+
+       if (xfer) {
+               if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+                       /* Do all the remapping work and M2P updates. */
+                       MULTI_mmu_update(mcl, np->rx_mmu, mmu - np->rx_mmu,
+                                        NULL, DOMID_SELF);
+                       mcl++;
+                       HYPERVISOR_multicall(np->rx_mcl, mcl - np->rx_mcl);
+               }
+       }
+
+       __skb_queue_purge(&free_list);
+
+       spin_unlock_bh(&np->rx_lock);
+}
+
+static void xensock_uninit(struct xen_sock_dev *dev)
+{
+       struct sockfront_info *np = xensock_dev_priv(dev);
+
+       xensock_release_tx_bufs(np);
+       xensock_release_rx_bufs(np);
+       gnttab_free_grant_references(np->gref_tx_head);
+       gnttab_free_grant_references(np->gref_rx_head);
+}
+
+static irqreturn_t xensock_interrupt(int irq, void *dev_id)
+{
+       struct xen_sock_dev *dev = dev_id;
+       struct sockfront_info *np = xensock_dev_priv(dev);
+       unsigned long flags;
+
+       spin_lock_irqsave(&np->tx_lock, flags);
+
+       if (likely(sockif_carrier_ok(dev))) {
+               xensock_tx_buf_gc(dev);
+               /* Under tx_lock: protects access to rx shared-ring indexes. */
+               if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
+                       tasklet_schedule(&np->rx_poll);
+       }
+
+       spin_unlock_irqrestore(&np->tx_lock, flags);
+
+       return IRQ_HANDLED;
+}
+
+static struct xen_sock_dev *xensock_create_dev(struct xenbus_device *dev)
+{
+       int i, err;
+       struct xen_sock_dev *sockdev;
+       struct sockfront_info *np;
+
+       sockdev = alloc_xen_sock_dev(sizeof(struct sockfront_info), "vsock");
+       if (!sockdev)
+               return ERR_PTR(-ENOMEM);
+
+       np                   = xensock_dev_priv(sockdev);
+       np->xbdev            = dev;
+
+       spin_lock_init(&np->tx_lock);
+       spin_lock_init(&np->rx_lock);
+
+       skb_queue_head_init(&np->rx_batch);
+       np->rx_target     = RX_DFL_MIN_TARGET;
+       np->rx_min_target = RX_DFL_MIN_TARGET;
+       np->rx_max_target = RX_MAX_TARGET;
+
+       init_timer(&np->rx_refill_timer);
+       np->rx_refill_timer.data = (unsigned long)sockdev;
+       np->rx_refill_timer.function = rx_refill_timeout;
+
+       /* Initialise tx_skbs as a free chain containing every entry. */
+       np->tx_skb_freelist = 0;
+       for (i = 0; i < SOCK_TX_RING_SIZE; i++) {
+               skb_entry_set_link(&np->tx_skbs[i], i+1);
+               np->grant_tx_ref[i] = GRANT_INVALID_REF;
+       }
+
+       /* Clear out rx_skbs */
+       for (i = 0; i < SOCK_RX_RING_SIZE; i++) {
+               np->rx_skbs[i] = NULL;
+               np->grant_rx_ref[i] = GRANT_INVALID_REF;
+       }
+
+       /* A grant for every tx ring slot */
+       if (gnttab_alloc_grant_references(TX_MAX_TARGET,
+                                         &np->gref_tx_head) < 0) {
+               pr_alert("#### sockfront can't alloc tx grant refs\n");
+               err = -ENOMEM;
+               goto exit;
+       }
+       /* A grant for every rx ring slot */
+       if (gnttab_alloc_grant_references(RX_MAX_TARGET,
+                                         &np->gref_rx_head) < 0) {
+               pr_alert("#### sockfront can't alloc rx grant refs\n");
+               err = -ENOMEM;
+               goto exit_free_tx;
+       }
+
+       tasklet_init(&np->rx_poll, xensock_poll, (unsigned long)np);
+
+       sockdev->start_xmit = xensock_start_xmit;
+
+       np->sockdev = sockdev;
+
+       sockif_carrier_off(sockdev);
+
+       return sockdev;
+
+ exit_free_tx:
+       gnttab_free_grant_references(np->gref_tx_head);
+ exit:
+       free_xen_sock_dev(sockdev);
+       return ERR_PTR(err);
+}
+
+static void xensock_end_access(int ref, void *page)
+{
+       /* This frees the page as a side-effect */
+       if (ref != GRANT_INVALID_REF)
+               gnttab_end_foreign_access(ref, 0, (unsigned long)page);
+}
+
+static void xensock_disconnect_backend(struct sockfront_info *info)
+{
+       /* Stop old i/f to prevent errors whilst we rebuild the state. */
+       spin_lock_bh(&info->rx_lock);
+       spin_lock_irq(&info->tx_lock);
+       sockif_carrier_off(info->sockdev);
+       spin_unlock_irq(&info->tx_lock);
+       spin_unlock_bh(&info->rx_lock);
+
+       if (info->irq)
+               unbind_from_irqhandler(info->irq, info);
+       info->evtchn = 0;
+       info->irq = 0;
+
+       /* End access and free the pages */
+       xensock_end_access(info->tx_ring_ref, info->tx.sring);
+       xensock_end_access(info->rx_ring_ref, info->rx.sring);
+
+       info->tx_ring_ref = GRANT_INVALID_REF;
+       info->rx_ring_ref = GRANT_INVALID_REF;
+       info->tx.sring = NULL;
+       info->rx.sring = NULL;
+}
+
+static int setup_sockfront(struct xenbus_device *dev,
+                          struct sockfront_info *info)
+{
+       struct xen_sockif_tx_sring *txs;
+       struct xen_sockif_rx_sring *rxs;
+       int err;
+
+       info->tx_ring_ref = GRANT_INVALID_REF;
+       info->rx_ring_ref = GRANT_INVALID_REF;
+       info->rx.sring = NULL;
+       info->tx.sring = NULL;
+       info->irq = 0;
+
+       txs = (struct xen_sockif_tx_sring *)get_zeroed_page(GFP_NOIO |
+                                                            __GFP_HIGH);
+       if (!txs) {
+               err = -ENOMEM;
+               xenbus_dev_fatal(dev, err, "allocating tx ring page");
+               goto fail;
+       }
+       SHARED_RING_INIT(txs);
+       FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
+
+       err = xenbus_grant_ring(dev, virt_to_mfn(txs));
+       if (err < 0) {
+               free_page((unsigned long)txs);
+               goto fail;
+       }
+
+       info->tx_ring_ref = err;
+       rxs = (struct xen_sockif_rx_sring *)get_zeroed_page(GFP_NOIO |
+                                                            __GFP_HIGH);
+       if (!rxs) {
+               err = -ENOMEM;
+               xenbus_dev_fatal(dev, err, "allocating rx ring page");
+               goto fail;
+       }
+       SHARED_RING_INIT(rxs);
+       FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
+
+       err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
+       if (err < 0) {
+               free_page((unsigned long)rxs);
+               goto fail;
+       }
+       info->rx_ring_ref = err;
+
+       err = xenbus_alloc_evtchn(dev, &info->evtchn);
+       if (err)
+               goto fail;
+
+       err = bind_evtchn_to_irqhandler(info->evtchn, xensock_interrupt,
+                                       0, "sockif", info->sockdev);
+       if (err < 0)
+               goto fail;
+       info->irq = err;
+       return 0;
+
+ fail:
+       return err;
+}
+
+/* Common code used when first setting up, and when resuming. */
+static int talk_to_sockback(struct xenbus_device *dev,
+                           struct sockfront_info *info)
+{
+       const char *message;
+       struct xenbus_transaction xbt;
+       int err;
+
+       /* Create shared ring, alloc event channel. */
+       err = setup_sockfront(dev, info);
+       if (err)
+               goto out;
+
+again:
+       err = xenbus_transaction_start(&xbt);
+       if (err) {
+               xenbus_dev_fatal(dev, err, "starting transaction");
+               goto destroy_ring;
+       }
+
+       err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref", "%u",
+                           info->tx_ring_ref);
+       if (err) {
+               message = "writing tx ring-ref";
+               goto abort_transaction;
+       }
+       err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref", "%u",
+                           info->rx_ring_ref);
+       if (err) {
+               message = "writing rx ring-ref";
+               goto abort_transaction;
+       }
+       err = xenbus_printf(xbt, dev->nodename,
+                           "event-channel", "%u", info->evtchn);
+       if (err) {
+               message = "writing event-channel";
+               goto abort_transaction;
+       }
+
+       err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
+                           1);
+       if (err) {
+               message = "writing request-rx-copy";
+               goto abort_transaction;
+       }
+
+       err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
+       if (err) {
+               message = "writing feature-rx-notify";
+               goto abort_transaction;
+       }
+
+       err = xenbus_transaction_end(xbt, 0);
+       if (err) {
+               if (err == -EAGAIN)
+                       goto again;
+               xenbus_dev_fatal(dev, err, "completing transaction");
+               goto destroy_ring;
+       }
+
+       xenbus_switch_state(dev, XenbusStateInitialised);
+       return 0;
+
+ abort_transaction:
+       xenbus_transaction_end(xbt, 1);
+       xenbus_dev_fatal(dev, err, "%s", message);
+ destroy_ring:
+       xensock_disconnect_backend(info);
+ out:
+       return err;
+}
+
+/**
+ * We are reconnecting to the backend, due to a suspend/resume, or a backend
+ * driver restart.  We tear down our sockif structure and recreate it, but
+ * leave the device-layer structures intact so that this is transparent to the
+ * rest of the kernel.
+ */
+static int sockfront_resume(struct xenbus_device *dev)
+{
+       struct sockfront_info *info = dev_get_drvdata(&dev->dev);
+
+       dev_dbg(&dev->dev, "%s\n", dev->nodename);
+
+       xensock_disconnect_backend(info);
+       return talk_to_sockback(info->xbdev, info);
+}
+
+/**
+ * Entry point to this code when a new device is created.  Allocate the basic
+ * structures and the ring buffers for communication with the backend, and
+ * inform the backend of the appropriate details for those.
+ */
+static int sockfront_probe(struct xenbus_device *dev,
+                          const struct xenbus_device_id *id)
+{
+       int err;
+       struct xen_sock_dev *sockdev;
+       struct sockfront_info *info;
+
+       sockdev = xensock_create_dev(dev);
+       if (IS_ERR(sockdev)) {
+               err = PTR_ERR(sockdev);
+               xenbus_dev_fatal(dev, err, "creating sockdev");
+               return err;
+       }
+
+       info = xensock_dev_priv(sockdev);
+       dev_set_drvdata(&dev->dev, info);
+
+       err = xensock_register_dev(info->sockdev);
+       if (err) {
+               pr_warn("%s: xensock_register_dev err=%d\n",
+                       __func__, err);
+               goto fail;
+       }
+
+       err = talk_to_sockback(info->xbdev, info);
+       if (err)
+               goto fail;
+
+       return 0;
+
+ fail:
+       free_xen_sock_dev(sockdev);
+       dev_set_drvdata(&dev->dev, NULL);
+       return err;
+}
+
+static int xensock_connect(struct xen_sock_dev *dev)
+{
+       struct sockfront_info *np = xensock_dev_priv(dev);
+       int i, requeue_idx, err;
+       struct sk_buff *skb;
+       grant_ref_t ref;
+       struct xen_sockif_rx_request *req;
+       unsigned int feature_rx_copy;
+
+       err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
+                          "feature-rx-copy", "%u", &feature_rx_copy);
+       if (err != 1)
+               feature_rx_copy = 0;
+
+       if (!feature_rx_copy) {
+               dev_info(&np->xbdev->dev,
+                        "backend does not support copying receive path\n");
+               return -ENODEV;
+       }
+
+       spin_lock_bh(&np->rx_lock);
+       spin_lock_irq(&np->tx_lock);
+
+       /* Step 1: Discard all pending TX packet fragments. */
+       xensock_release_tx_bufs(np);
+
+       /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
+       for (requeue_idx = 0, i = 0; i < SOCK_RX_RING_SIZE; i++) {
+               skb_frag_t *frag;
+               const struct page *page;
+
+               if (!np->rx_skbs[i])
+                       continue;
+
+               skb = xensock_get_rx_skb(np, i);
+               np->rx_skbs[requeue_idx] = skb;
+               ref = xensock_get_rx_ref(np, i);
+               np->grant_rx_ref[requeue_idx] = ref;
+               req = RING_GET_REQUEST(&np->rx, requeue_idx);
+
+               frag = &skb_shinfo(skb)->frags[0];
+               page = skb_frag_page(frag);
+               gnttab_grant_foreign_access_ref(
+                       ref, np->xbdev->otherend_id,
+                       pfn_to_mfn(page_to_pfn(page)),
+                       0);
+               req->gref = ref;
+               req->id   = requeue_idx;
+
+               requeue_idx++;
+       }
+
+       np->rx.req_prod_pvt = requeue_idx;
+
+       /* Step 3: All public and private state should now be sane.  Get
+        * ready to start sending and receiving packets and give the driver
+        * domain a kick because we've probably just requeued some
+        * packets.
+        */
+       sockif_carrier_on(np->sockdev);
+       notify_remote_via_irq(np->irq);
+       xensock_tx_buf_gc(dev);
+       xensock_alloc_rx_buffers(dev);
+
+       spin_unlock_irq(&np->tx_lock);
+       spin_unlock_bh(&np->rx_lock);
+
+       return 0;
+}
+
+/**
+ * Callback received when the backend's state changes.
+ */
+static void sockback_changed(struct xenbus_device *dev,
+                            enum xenbus_state backend_state)
+{
+       struct sockfront_info *np = dev_get_drvdata(&dev->dev);
+       struct xen_sock_dev *sockdev = np->sockdev;
+
+       dev_dbg(&dev->dev, "sockfront:sndback_changed to state %s\n",
+               xenbus_strstate(backend_state));
+
+       switch (backend_state) {
+       case XenbusStateInitialising:
+       case XenbusStateInitWait:
+       case XenbusStateInitialised:
+       case XenbusStateReconfiguring:
+       case XenbusStateReconfigured:
+       case XenbusStateUnknown:
+       case XenbusStateClosed:
+               break;
+
+       case XenbusStateConnected:
+               if (xensock_connect(sockdev) != 0)
+                       break;
+               xenbus_switch_state(dev, XenbusStateConnected);
+               break;
+
+       case XenbusStateClosing:
+               xenbus_frontend_closed(dev);
+               break;
+       }
+}
+
+static const struct xenbus_device_id sockfront_ids[] = {
+       { "vsock" },
+       { "" }
+};
+
+
+static int sockfront_remove(struct xenbus_device *dev)
+{
+       struct sockfront_info *info = dev_get_drvdata(&dev->dev);
+
+       dev_dbg(&dev->dev, "%s\n", dev->nodename);
+
+       xensock_disconnect_backend(info);
+
+       xensock_unregister_dev(info->sockdev);
+
+       tasklet_disable(&info->rx_poll);
+       tasklet_kill(&info->rx_poll);
+       xensock_uninit(info->sockdev);
+
+       del_timer_sync(&info->rx_refill_timer);
+
+       free_xen_sock_dev(info->sockdev);
+
+       return 0;
+}
+
+static DEFINE_XENBUS_DRIVER(sockfront, ,
+       .probe = sockfront_probe,
+       .remove = sockfront_remove,
+       .resume = sockfront_resume,
+       .otherend_changed = sockback_changed,
+);
+
+static int __init sockif_init(void)
+{
+       int ret;
+
+       /*FIXME: xen_pv_domain() should be here, but ARM hardcoded to hvm*/
+       if (!xen_domain())
+               return -ENODEV;
+
+       /* Nothing to do if running in dom0. */
+       if (xen_initial_domain())
+               return -ENODEV;
+
+       pr_info("Initialising Xen socket driver.\n");
+
+       ret = xenbus_register_frontend(&sockfront_driver);
+       if (ret < 0)
+               return ret;
+
+       ret = xensock_proto_client_init();
+       if (ret)
+               goto err_unregister_frontend;
+
+       return ret;
+
+err_unregister_frontend:
+       xenbus_unregister_driver(&sockfront_driver);
+       return ret;
+}
+module_init(sockif_init);
+
+
+static void __exit sockif_exit(void)
+{
+       xensock_proto_cleanup();
+       xenbus_unregister_driver(&sockfront_driver);
+}
+module_exit(sockif_exit);
+
+MODULE_DESCRIPTION("Xensock device frontend");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("xen:xensock");
-- 
1.8.2.rc2


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.