[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [linux-2.6.18-xen] Solarflare: PV frontend accelerator and front/back common util driver.



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1203330633 0
# Node ID 651fc2abdd5d32bed0bc88bcf3684e8126238fe4
# Parent  e4dd072db2595c420bb21d9e835416f4fd543526
Solarflare: PV frontend accelerator and front/back common util driver.
Signed-off-by: Kieran Mansley <kmansley@xxxxxxxxxxxxxx>
---
 drivers/xen/Kconfig                           |   10 
 drivers/xen/Makefile                          |    2 
 drivers/xen/sfc_netfront/Makefile             |   11 
 drivers/xen/sfc_netfront/accel.h              |  477 ++++++++++
 drivers/xen/sfc_netfront/accel_bufs.c         |  393 ++++++++
 drivers/xen/sfc_netfront/accel_bufs.h         |  181 +++
 drivers/xen/sfc_netfront/accel_debugfs.c      |  234 +++++
 drivers/xen/sfc_netfront/accel_msg.c          |  566 ++++++++++++
 drivers/xen/sfc_netfront/accel_netfront.c     |  318 ++++++
 drivers/xen/sfc_netfront/accel_ssr.c          |  308 ++++++
 drivers/xen/sfc_netfront/accel_ssr.h          |   88 +
 drivers/xen/sfc_netfront/accel_tso.c          |  512 +++++++++++
 drivers/xen/sfc_netfront/accel_tso.h          |   57 +
 drivers/xen/sfc_netfront/accel_vi.c           | 1194 ++++++++++++++++++++++++++
 drivers/xen/sfc_netfront/accel_xenbus.c       |  776 ++++++++++++++++
 drivers/xen/sfc_netfront/ef_vi_falcon.h       |  172 +++
 drivers/xen/sfc_netfront/ef_vi_falcon_core.h  | 1075 +++++++++++++++++++++++
 drivers/xen/sfc_netfront/ef_vi_falcon_desc.h  |   43 
 drivers/xen/sfc_netfront/ef_vi_falcon_event.h |  123 ++
 drivers/xen/sfc_netfront/ef_vi_internal.h     |  256 +++++
 drivers/xen/sfc_netfront/etherfabric/ef_vi.h  |  665 ++++++++++++++
 drivers/xen/sfc_netfront/falcon_event.c       |  346 +++++++
 drivers/xen/sfc_netfront/falcon_vi.c          |  465 ++++++++++
 drivers/xen/sfc_netfront/pt_tx.c              |   91 +
 drivers/xen/sfc_netfront/sysdep.h             |  184 ++++
 drivers/xen/sfc_netfront/vi_init.c            |  183 +++
 drivers/xen/sfc_netutil/Makefile              |   10 
 drivers/xen/sfc_netutil/accel_cuckoo_hash.c   |  651 ++++++++++++++
 drivers/xen/sfc_netutil/accel_cuckoo_hash.h   |  227 ++++
 drivers/xen/sfc_netutil/accel_msg_iface.c     |  301 ++++++
 drivers/xen/sfc_netutil/accel_msg_iface.h     |  414 +++++++++
 drivers/xen/sfc_netutil/accel_shared_fifo.h   |  127 ++
 drivers/xen/sfc_netutil/accel_util.c          |  333 +++++++
 drivers/xen/sfc_netutil/accel_util.h          |  127 ++
 34 files changed, 10920 insertions(+)

diff -r e4dd072db259 -r 651fc2abdd5d drivers/xen/Kconfig
--- a/drivers/xen/Kconfig       Mon Feb 18 10:29:29 2008 +0000
+++ b/drivers/xen/Kconfig       Mon Feb 18 10:30:33 2008 +0000
@@ -78,6 +78,10 @@ config XEN_NETDEV_PIPELINED_TRANSMITTER
          like reassembling packets to perform firewall filtering; or if you
          are unsure; or if you experience network hangs when this option is
          enabled; then you must say N here.
+
+config XEN_NETDEV_ACCEL_SFC_UTIL
+        tristate
+        default n
 
 config XEN_NETDEV_LOOPBACK
        tristate "Network-device loopback driver"
@@ -182,6 +186,12 @@ config XEN_GRANT_DEV
          Device for accessing (in user-space) pages that have been granted
          by other domains.
 
+config XEN_NETDEV_ACCEL_SFC_FRONTEND
+       tristate "Network-device frontend driver acceleration for Solarflare 
NICs"
+       depends on XEN_NETDEV_FRONTEND
+        select XEN_NETDEV_ACCEL_SFC_UTIL
+       default m
+
 config XEN_FRAMEBUFFER
        tristate "Framebuffer-device frontend driver"
        depends on FB
diff -r e4dd072db259 -r 651fc2abdd5d drivers/xen/Makefile
--- a/drivers/xen/Makefile      Mon Feb 18 10:29:29 2008 +0000
+++ b/drivers/xen/Makefile      Mon Feb 18 10:30:33 2008 +0000
@@ -18,3 +18,5 @@ obj-$(CONFIG_XEN_KEYBOARD)            += fbfront/
 obj-$(CONFIG_XEN_KEYBOARD)             += fbfront/
 obj-$(CONFIG_XEN_PRIVCMD)      += privcmd/
 obj-$(CONFIG_XEN_GRANT_DEV)    += gntdev/
+obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_UTIL)                += sfc_netutil/
+obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_FRONTEND)    += sfc_netfront/
diff -r e4dd072db259 -r 651fc2abdd5d drivers/xen/sfc_netfront/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/xen/sfc_netfront/Makefile Mon Feb 18 10:30:33 2008 +0000
@@ -0,0 +1,11 @@
+EXTRA_CFLAGS += -Idrivers/xen/sfc_netutil -Idrivers/xen/netfront
+EXTRA_CFLAGS += -D__ci_driver__
+EXTRA_CFLAGS += -Werror
+
+ifdef GCOV
+EXTRA_CFLAGS += -fprofile-arcs -ftest-coverage -DEFX_GCOV
+endif
+
+obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_FRONTEND)    := sfc_netfront.o
+
+sfc_netfront-objs := accel_msg.o accel_bufs.o accel_netfront.o accel_vi.o 
accel_xenbus.o accel_tso.o accel_ssr.o accel_debugfs.o falcon_event.o 
falcon_vi.o pt_tx.o vi_init.o
diff -r e4dd072db259 -r 651fc2abdd5d drivers/xen/sfc_netfront/accel.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/xen/sfc_netfront/accel.h  Mon Feb 18 10:30:33 2008 +0000
@@ -0,0 +1,477 @@
+/****************************************************************************
+ * Solarflare driver for Xen network acceleration
+ *
+ * Copyright 2006-2008: Solarflare Communications Inc,
+ *                      9501 Jeronimo Road, Suite 250,
+ *                      Irvine, CA 92618, USA
+ *
+ * Maintained by Solarflare Communications <linux-xen-drivers@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ ****************************************************************************
+ */
+
+#ifndef NETFRONT_ACCEL_H
+#define NETFRONT_ACCEL_H
+
+#include "accel_msg_iface.h"
+#include "accel_cuckoo_hash.h"
+#include "accel_bufs.h"
+
+#include "etherfabric/ef_vi.h"
+
+#include <xen/xenbus.h>
+#include <xen/evtchn.h>
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+
+enum netfront_accel_post_status {
+       NETFRONT_ACCEL_STATUS_GOOD,
+       NETFRONT_ACCEL_STATUS_BUSY,
+       NETFRONT_ACCEL_STATUS_CANT
+};
+
+#define NETFRONT_ACCEL_STATS 1
+#if NETFRONT_ACCEL_STATS
+#define NETFRONT_ACCEL_STATS_OP(x) x
+#else
+#define NETFRONT_ACCEL_STATS_OP(x)
+#endif
+
+
+enum netfront_accel_msg_state {
+       NETFRONT_ACCEL_MSG_NONE = 0,
+       NETFRONT_ACCEL_MSG_HELLO = 1,
+       NETFRONT_ACCEL_MSG_HW = 2
+};
+
+
+typedef struct {
+       u32 in_progress;
+       u32 total_len;
+       struct sk_buff *skb;
+} netfront_accel_jumbo_state;
+
+
+struct netfront_accel_ssr_state {
+       /** List of tracked connections. */
+       struct list_head conns;
+
+       /** Free efx_ssr_conn instances. */
+       struct list_head free_conns;
+};
+
+
+struct netfront_accel_netdev_stats {
+       /* Fastpath stats. */
+       u32 fastpath_rx_pkts;
+       u32 fastpath_rx_bytes;
+       u32 fastpath_rx_errors;
+       u32 fastpath_tx_pkts; 
+       u32 fastpath_tx_bytes;
+       u32 fastpath_tx_errors;
+};
+
+
+struct netfront_accel_netdev_dbfs {
+       struct dentry *fastpath_rx_pkts;
+       struct dentry *fastpath_rx_bytes;
+       struct dentry *fastpath_rx_errors;
+       struct dentry *fastpath_tx_pkts; 
+       struct dentry *fastpath_tx_bytes;
+       struct dentry *fastpath_tx_errors;
+};
+
+
+struct netfront_accel_stats {
+       /** Fast path events */
+       u64 fastpath_tx_busy;
+
+       /** TX DMA queue status */
+       u64 fastpath_tx_completions;
+
+       /** The number of events processed. */
+       u64 event_count;
+
+       /** Number of frame trunc events seen on fastpath */
+       u64 fastpath_frm_trunc;
+
+       /** Number of no rx descriptor trunc events seen on fastpath */
+       u64 rx_no_desc_trunc;
+
+       /** The number of misc bad events (e.g. RX_DISCARD) processed. */
+       u64 bad_event_count;
+
+       /** Number of events dealt with in poll loop */
+       u32 events_per_poll_max;
+       u32 events_per_poll_tx_max;
+       u32 events_per_poll_rx_max;
+
+       /** Largest number of concurrently outstanding tx descriptors */
+       u32 fastpath_tx_pending_max;
+
+       /** The number of events since the last interrupts. */
+       u32 event_count_since_irq;
+
+       /** The max number of events between interrupts. */
+       u32 events_per_irq_max;
+
+       /** The number of interrupts. */
+       u64 irq_count;
+
+       /** The number of useless interrupts. */
+       u64 useless_irq_count;
+
+       /** The number of polls scheduled. */
+       u64 poll_schedule_count;
+
+       /** The number of polls called. */
+       u64 poll_call_count;
+
+       /** The number of rechecks. */
+       u64 poll_reschedule_count;
+
+       /** Number of times we've called netif_stop_queue/netif_wake_queue */
+       u64 queue_stops;
+       u64 queue_wakes;
+
+       /** SSR stats */
+       u64 ssr_bursts;
+       u64 ssr_drop_stream;
+       u64 ssr_misorder;
+       u64 ssr_slow_start;
+       u64 ssr_merges;
+       u64 ssr_too_many;
+       u64 ssr_new_stream;
+};
+
+
+struct netfront_accel_dbfs {
+       struct dentry *fastpath_tx_busy;
+       struct dentry *fastpath_tx_completions;
+       struct dentry *fastpath_tx_pending_max;
+       struct dentry *fastpath_frm_trunc;
+       struct dentry *rx_no_desc_trunc;
+       struct dentry *event_count;
+       struct dentry *bad_event_count;
+       struct dentry *events_per_poll_max;
+       struct dentry *events_per_poll_rx_max;
+       struct dentry *events_per_poll_tx_max;
+       struct dentry *event_count_since_irq;
+       struct dentry *events_per_irq_max;
+       struct dentry *irq_count;
+       struct dentry *useless_irq_count;
+       struct dentry *poll_schedule_count;
+       struct dentry *poll_call_count;
+       struct dentry *poll_reschedule_count;
+       struct dentry *queue_stops;
+       struct dentry *queue_wakes;
+       struct dentry *ssr_bursts;
+       struct dentry *ssr_drop_stream;
+       struct dentry *ssr_misorder;
+       struct dentry *ssr_slow_start;
+       struct dentry *ssr_merges;
+       struct dentry *ssr_too_many;
+       struct dentry *ssr_new_stream;
+};
+
+
+typedef struct netfront_accel_vnic {
+       struct netfront_accel_vnic *next;
+       
+       struct mutex vnic_mutex;
+
+       spinlock_t tx_lock;
+
+       struct netfront_accel_bufpages bufpages;
+       struct netfront_accel_bufinfo *rx_bufs;
+       struct netfront_accel_bufinfo *tx_bufs;
+       
+       /** Hardware & VI state */
+       ef_vi vi;
+
+       ef_vi_state *vi_state;
+
+       ef_eventq_state evq_state;
+
+       void *evq_mapping;
+
+       /** Hardware dependant state */
+       union {
+               struct {
+                       /** Falcon A or B */
+                       enum net_accel_hw_type type; 
+                       u32 *evq_rptr;
+                       u32 *doorbell;
+                       void *evq_rptr_mapping;
+                       void *doorbell_mapping;
+                       void *txdmaq_mapping;
+                       void *rxdmaq_mapping;
+               } falcon;
+       } hw;
+  
+       /** RX DMA queue status */
+       u32 rx_dma_level;
+
+       /** Number of RX descriptors waiting to be pushed to the card. */
+       u32 rx_dma_batched;
+#define NETFRONT_ACCEL_RX_DESC_BATCH 16
+
+       /**
+        * Hash table of remote mac addresses to decide whether to try
+        * fast path
+        */
+       cuckoo_hash_table fastpath_table;
+       spinlock_t table_lock;
+
+       /** the local mac address of virtual interface we're accelerating */
+       u8 mac[ETH_ALEN];
+
+       int rx_pkt_stride;
+       int rx_skb_stride;
+
+       /**
+        * Keep track of fragments of jumbo packets as events are
+        * delivered by NIC 
+        */
+       netfront_accel_jumbo_state jumbo_state;
+
+       struct net_device *net_dev;
+
+       /** These two gate the enabling of fast path operations */
+       int frontend_ready;
+       int backend_netdev_up;
+
+       int irq_enabled;
+       spinlock_t irq_enabled_lock;
+
+       int tx_enabled;
+
+       int poll_enabled;
+
+       /** A spare slot for a TX packet.  This is treated as an extension
+        * of the DMA queue. */
+       struct sk_buff *tx_skb;
+
+       /** Keep track of fragments of SSR packets */
+       struct netfront_accel_ssr_state ssr_state;
+
+       struct xenbus_device *dev;
+
+       /** Event channel for messages */
+       int msg_channel;
+       int msg_channel_irq;
+
+       /** Event channel for network interrupts. */
+       int net_channel;
+       int net_channel_irq;
+
+       struct net_accel_shared_page *shared_page;
+
+       grant_ref_t ctrl_page_gnt;
+       grant_ref_t msg_page_gnt;
+
+       /** Message Qs, 1 each way. */
+       sh_msg_fifo2 to_dom0;
+       sh_msg_fifo2 from_dom0;
+
+       enum netfront_accel_msg_state msg_state;
+
+       /** Watch on accelstate */
+       struct xenbus_watch backend_accel_watch;
+       /** Watch on frontend's MAC address */
+       struct xenbus_watch mac_address_watch;
+
+       /** Work to process received irq/msg */
+       struct work_struct msg_from_bend;
+
+       /** Wait queue for changes in accelstate. */
+       wait_queue_head_t state_wait_queue;
+
+       /** The current accelstate of this driver. */
+       XenbusState frontend_state;
+
+       /** The most recent accelstate seen by the xenbus watch. */
+       XenbusState backend_state;
+
+       /** Non-zero if we should reject requests to connect. */
+       int removing;
+
+       /** Non-zero if the domU shared state has been initialised. */
+       int domU_state_is_setup;
+
+       /** Non-zero if the dom0 shared state has been initialised. */
+       int dom0_state_is_setup;
+
+       /* Those statistics that are added to the netdev stats */
+       struct netfront_accel_netdev_stats netdev_stats;
+       struct netfront_accel_netdev_stats stats_last_read;
+#ifdef CONFIG_DEBUG_FS
+       struct netfront_accel_netdev_dbfs netdev_dbfs;
+#endif
+
+       /* These statistics are internal and optional */
+#if NETFRONT_ACCEL_STATS
+       struct netfront_accel_stats stats;
+#ifdef CONFIG_DEBUG_FS
+       struct netfront_accel_dbfs dbfs;
+#endif
+#endif
+
+       /** Debufs fs dir for this interface */
+       struct dentry *dbfs_dir;
+} netfront_accel_vnic;
+
+
+/* Module parameters */
+extern unsigned max_pages;
+extern unsigned buffer_split;
+
+extern const char *frontend_name;
+extern struct netfront_accel_hooks accel_hooks;
+extern struct workqueue_struct *netfront_accel_workqueue;
+
+
+extern
+void netfront_accel_vi_ctor(netfront_accel_vnic *vnic);
+
+extern
+int netfront_accel_vi_init(netfront_accel_vnic *vnic, 
+                          struct net_accel_msg_hw *hw_msg);
+
+extern
+void netfront_accel_vi_dtor(netfront_accel_vnic *vnic);
+
+
+/**
+ * Add new buffers which have been registered with the NIC.
+ *
+ * @v   vnic     The vnic instance to process the response.
+ *
+ * The buffers contained in the message are added to the buffer pool.
+ */
+extern
+void netfront_accel_vi_add_bufs(netfront_accel_vnic *vnic, int is_rx);
+
+/**
+ * Put a packet on the tx DMA queue.
+ *
+ * @v  vnic     The vnic instance to accept the packet.
+ * @v  skb      A sk_buff to send.
+ *
+ * Attempt to send a packet.  On success, the skb is owned by the DMA
+ * queue and will be released when the completion event arrives.
+ */
+extern enum netfront_accel_post_status
+netfront_accel_vi_tx_post(netfront_accel_vnic *vnic,
+                         struct sk_buff *skb);
+
+
+/**
+ * Process events in response to an interrupt.
+ *
+ * @v   vnic       The vnic instance to poll.
+ * @v   rx_packets The maximum number of rx packets to process.
+ * @ret rx_done    The number of rx packets processed.
+ *
+ * The vnic will process events until there are no more events
+ * remaining or the specified number of rx packets has been processed.
+ * The split from the interrupt call is to allow Linux NAPI
+ * polling.
+ */
+extern
+int netfront_accel_vi_poll(netfront_accel_vnic *vnic, int rx_packets);
+
+
+/**
+ * Iterate over the fragments of a packet buffer.
+ *
+ * @v   skb      The packet buffer to examine.
+ * @v   idx      A variable name for the fragment index.
+ * @v   data     A variable name for the address of the fragment data.
+ * @v   length   A variable name for the fragment length.
+ * @v   code     A section of code to execute for each fragment.
+ *
+ * This macro iterates over the fragments in a packet buffer and
+ * executes the code for each of them.
+ */
+#define NETFRONT_ACCEL_PKTBUFF_FOR_EACH_FRAGMENT(skb, frag_idx,                
\
+                                                frag_data, frag_len,   \
+                                                code)                  \
+       do {                                                            \
+               int frag_idx;                                           \
+               void *frag_data;                                        \
+               unsigned int      frag_len;                             \
+                                                                       \
+               frag_data = skb->data;                                  \
+               frag_len = skb_headlen(skb);                            \
+               frag_idx = 0;                                           \
+               while (1) { /* For each fragment */                     \
+                       code;                                           \
+                       if (frag_idx >= skb_shinfo(skb)->nr_frags) {    \
+                               break;                                  \
+                       } else {                                        \
+                               skb_frag_t *fragment;                   \
+                               fragment = &skb_shinfo(skb)->frags[frag_idx]; \
+                               frag_len = fragment->size;              \
+                               frag_data = 
((void*)page_address(fragment->page) \
+                                            + fragment->page_offset);  \
+                       };                                              \
+                       frag_idx++;                                     \
+               }                                                       \
+       } while(0)
+
+static inline
+void netfront_accel_disable_net_interrupts(netfront_accel_vnic *vnic)
+{
+       mask_evtchn(vnic->net_channel);
+}
+
+static inline
+void netfront_accel_enable_net_interrupts(netfront_accel_vnic *vnic)
+{
+       unmask_evtchn(vnic->net_channel);
+}
+
+void netfront_accel_msg_tx_fastpath(netfront_accel_vnic *vnic, const void *mac,
+                                   u32 ip, u16 port, u8 protocol);
+
+/* Process an IRQ received from back end driver */
+irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context, 
+                                                    struct pt_regs *unused);
+irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context, 
+                                                    struct pt_regs *unused);
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
+extern void netfront_accel_msg_from_bend(struct work_struct *context);
+#else
+extern void netfront_accel_msg_from_bend(void *context);
+#endif
+
+extern void vnic_stop_fastpath(netfront_accel_vnic *vnic);
+
+extern int netfront_accel_probe(struct net_device *net_dev, 
+                               struct xenbus_device *dev);
+extern int netfront_accel_remove(struct xenbus_device *dev);
+extern void netfront_accel_set_closing(netfront_accel_vnic *vnic);
+
+extern int netfront_accel_vi_enable_interrupts(netfront_accel_vnic *vnic);
+
+extern void netfront_accel_debugfs_init(void);
+extern void netfront_accel_debugfs_fini(void);
+extern int netfront_accel_debugfs_create(netfront_accel_vnic *vnic);
+extern int netfront_accel_debugfs_remove(netfront_accel_vnic *vnic);
+
+#endif /* NETFRONT_ACCEL_H */
diff -r e4dd072db259 -r 651fc2abdd5d drivers/xen/sfc_netfront/accel_bufs.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/xen/sfc_netfront/accel_bufs.c     Mon Feb 18 10:30:33 2008 +0000
@@ -0,0 +1,393 @@
+/****************************************************************************
+ * Solarflare driver for Xen network acceleration
+ *
+ * Copyright 2006-2008: Solarflare Communications Inc,
+ *                      9501 Jeronimo Road, Suite 250,
+ *                      Irvine, CA 92618, USA
+ *
+ * Maintained by Solarflare Communications <linux-xen-drivers@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ ****************************************************************************
+ */
+
+#include <xen/gnttab.h>
+
+#include "accel_bufs.h"
+#include "accel_util.h"
+
+#include "accel.h"
+
+
+static int 
+netfront_accel_alloc_buf_desc_blocks(struct netfront_accel_bufinfo *manager,
+                                    int pages)
+{
+       manager->desc_blocks = 
+               kzalloc(sizeof(struct netfront_accel_pkt_desc *) * 
+                       NETFRONT_ACCEL_BUF_NUM_BLOCKS(pages), GFP_KERNEL);
+       if (manager->desc_blocks == NULL) {
+               return -ENOMEM;
+       }
+       
+       return 0;
+}
+
+static int 
+netfront_accel_alloc_buf_lists(struct netfront_accel_bufpages *bufpages,
+                              int pages)
+{
+       bufpages->page_list = kmalloc(pages * sizeof(void *), GFP_KERNEL);
+       if (bufpages->page_list == NULL) {
+               return -ENOMEM;
+       }
+
+       bufpages->grant_list = kzalloc(pages * sizeof(grant_ref_t), GFP_KERNEL);
+       if (bufpages->grant_list == NULL) {
+               kfree(bufpages->page_list);
+               bufpages->page_list = NULL;
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+
+int netfront_accel_alloc_buffer_mem(struct netfront_accel_bufpages *bufpages,
+                                   struct netfront_accel_bufinfo *rx_manager,
+                                   struct netfront_accel_bufinfo *tx_manager,
+                                   int pages)
+{
+       int n, rc;
+
+       if ((rc = netfront_accel_alloc_buf_desc_blocks
+            (rx_manager, pages - (pages / buffer_split))) < 0) {
+               goto rx_fail;
+       }
+
+       if ((rc = netfront_accel_alloc_buf_desc_blocks
+            (tx_manager, pages / buffer_split)) < 0) {
+               goto tx_fail;
+       }
+
+       if ((rc = netfront_accel_alloc_buf_lists(bufpages, pages)) < 0) {
+               goto lists_fail;
+       }
+
+       for (n = 0; n < pages; n++) {
+               void *tmp = (void*)__get_free_page(GFP_KERNEL);
+               if (tmp == NULL)
+                       break;
+
+               bufpages->page_list[n] = tmp;
+       }
+
+       if (n != pages) {
+               EPRINTK("%s: not enough pages: %d != %d\n", __FUNCTION__, n, 
+                       pages);
+               for (; n >= 0; n--)
+                       free_page((unsigned long)(bufpages->page_list[n]));
+               rc = -ENOMEM;
+               goto pages_fail;
+       }
+
+       bufpages->max_pages = pages;
+       bufpages->page_reqs = 0;
+
+       return 0;
+
+ pages_fail:
+       kfree(bufpages->page_list);
+       kfree(bufpages->grant_list);
+
+       bufpages->page_list = NULL;
+       bufpages->grant_list = NULL;
+ lists_fail:
+       kfree(tx_manager->desc_blocks);
+       tx_manager->desc_blocks = NULL;
+
+ tx_fail:
+       kfree(rx_manager->desc_blocks);
+       rx_manager->desc_blocks = NULL;
+ rx_fail:
+       return rc;
+}
+
+
+void netfront_accel_free_buffer_mem(struct netfront_accel_bufpages *bufpages,
+                                   struct netfront_accel_bufinfo *rx_manager,
+                                   struct netfront_accel_bufinfo *tx_manager)
+{
+       int i;
+
+       for (i = 0; i < bufpages->max_pages; i++) {
+               if (bufpages->grant_list[i] != 0)
+                       net_accel_ungrant_page(bufpages->grant_list[i]);
+               free_page((unsigned long)(bufpages->page_list[i]));
+       }
+
+       if (bufpages->max_pages) {
+               kfree(bufpages->page_list);
+               kfree(bufpages->grant_list);
+               kfree(rx_manager->desc_blocks);
+               kfree(tx_manager->desc_blocks);
+       }
+}
+
+
+/*
+ * Allocate memory for the buffer manager and create a lock.  If no
+ * lock is supplied its own is allocated.
+ */
+struct netfront_accel_bufinfo *netfront_accel_init_bufs(spinlock_t *lock)
+{
+       struct netfront_accel_bufinfo *res = kmalloc(sizeof(*res), GFP_KERNEL);
+       if (res != NULL) {
+               res->npages = res->nused = 0;
+               res->first_free = -1;
+
+               if (lock == NULL) {
+                       res->lock = kmalloc(sizeof(*res->lock), GFP_KERNEL);
+                       if (res->lock == NULL) {
+                               kfree(res);
+                               return NULL;
+                       }
+                       spin_lock_init(res->lock);
+                       res->internally_locked = 1;
+               } else {
+                       res->lock = lock;
+                       res->internally_locked = 0;
+               }
+               
+               res->desc_blocks = NULL;
+       }
+
+       return res;
+}
+
+
+void netfront_accel_fini_bufs(struct netfront_accel_bufinfo *bufs)
+{
+       if (bufs->internally_locked)
+               kfree(bufs->lock);
+       kfree(bufs);
+}
+
+
+int netfront_accel_buf_map_request(struct xenbus_device *dev,
+                                  struct netfront_accel_bufpages *bufpages,
+                                  struct net_accel_msg *msg, 
+                                  int pages, int offset)
+{
+       int i, mfn;
+       int err;
+
+       net_accel_msg_init(msg, NET_ACCEL_MSG_MAPBUF);
+
+       BUG_ON(pages > NET_ACCEL_MSG_MAX_PAGE_REQ);
+
+       msg->u.mapbufs.pages = pages;
+
+       for (i = 0; i < msg->u.mapbufs.pages; i++) {
+               /* 
+                * This can happen if we tried to send this message
+                * earlier but the queue was full.
+                */
+               if (bufpages->grant_list[offset+i] != 0) {
+                       msg->u.mapbufs.grants[i] = 
+                               bufpages->grant_list[offset+i];
+                       continue;
+               }
+
+               mfn = virt_to_mfn(bufpages->page_list[offset+i]);
+               VPRINTK("%s: Granting page %d, mfn %08x\n",
+                       __FUNCTION__, i, mfn);
+
+               bufpages->grant_list[offset+i] =
+                       net_accel_grant_page(dev, mfn, 0);
+               msg->u.mapbufs.grants[i] = bufpages->grant_list[offset+i];
+
+               if (msg->u.mapbufs.grants[i] < 0) {
+                       EPRINTK("%s: Failed to grant buffer: %d\n",
+                               __FUNCTION__, msg->u.mapbufs.grants[i]);
+                       err = -EIO;
+                       goto error;
+               }
+       }
+
+       /* This is interpreted on return as the offset in the the page_list */
+       msg->u.mapbufs.reqid = offset;
+
+       return 0;
+
+error:
+       /* Ungrant all the pages we've successfully granted. */
+       for (i--; i >= 0; i--) {
+               net_accel_ungrant_page(bufpages->grant_list[offset+i]);
+               bufpages->grant_list[offset+i] = 0;
+       }
+       return err;
+}
+
+
+/* Process a response to a buffer request. */
+int netfront_accel_add_bufs(struct netfront_accel_bufpages *bufpages,
+                           struct netfront_accel_bufinfo *manager, 
+                           struct net_accel_msg *msg)
+{
+       int msg_pages, page_offset, i, newtot;
+       int old_block_count, new_block_count;
+       u32 msg_buf;
+       unsigned long flags;
+
+       VPRINTK("%s: manager %p msg %p\n", __FUNCTION__, manager, msg);
+
+       BUG_ON(msg->id != (NET_ACCEL_MSG_MAPBUF | NET_ACCEL_MSG_REPLY));
+
+       msg_pages = msg->u.mapbufs.pages;
+       msg_buf = msg->u.mapbufs.buf;
+       page_offset = msg->u.mapbufs.reqid;
+
+       spin_lock_irqsave(manager->lock, flags);
+       newtot = manager->npages + msg_pages;
+       old_block_count = 
+               (manager->npages + NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK - 1) >>
+               NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT;
+       new_block_count = 
+               (newtot + NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK - 1) >>
+               NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT;
+
+       for (i = old_block_count; i < new_block_count; i++) {
+               struct netfront_accel_pkt_desc *block;
+               if (manager->desc_blocks[i] != NULL) {
+                       VPRINTK("Not needed\n");
+                       continue;
+               }
+               block = kzalloc(NETFRONT_ACCEL_BUFS_PER_BLOCK * 
+                               sizeof(netfront_accel_pkt_desc), GFP_ATOMIC);
+               if (block == NULL) {
+                       spin_unlock_irqrestore(manager->lock, flags);
+                       return -ENOMEM;
+               }
+               manager->desc_blocks[i] = block;
+       }
+       for (i = manager->npages; i < newtot; i++) {
+               int k, j = i - manager->npages;
+               int block_num;
+               int block_idx;
+               struct netfront_accel_pkt_desc *pkt;
+
+               block_num = i >> NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT;
+               block_idx = (NETFRONT_ACCEL_BUFS_PER_PAGE*i)
+                       & (NETFRONT_ACCEL_BUFS_PER_BLOCK-1);
+
+               pkt = manager->desc_blocks[block_num] + block_idx;
+               
+               for (k = 0; k < NETFRONT_ACCEL_BUFS_PER_PAGE; k++) {
+                       BUG_ON(page_offset + j >= bufpages->max_pages);
+
+                       pkt[k].buf_id = NETFRONT_ACCEL_BUFS_PER_PAGE * i + k;
+                       pkt[k].pkt_kva = bufpages->page_list[page_offset + j] +
+                               (PAGE_SIZE/NETFRONT_ACCEL_BUFS_PER_PAGE) * k;
+                       pkt[k].pkt_buff_addr = msg_buf +
+                               (PAGE_SIZE/NETFRONT_ACCEL_BUFS_PER_PAGE) * 
+                               (NETFRONT_ACCEL_BUFS_PER_PAGE * j + k);
+                       pkt[k].next_free = manager->first_free;
+                       manager->first_free = pkt[k].buf_id;
+                       *(int*)(pkt[k].pkt_kva) = pkt[k].buf_id;
+
+                       VPRINTK("buf %d desc %p kva %p buffaddr %x\n",
+                               pkt[k].buf_id, &(pkt[k]), pkt[k].pkt_kva, 
+                               pkt[k].pkt_buff_addr);
+               }
+       }
+       manager->npages = newtot;
+       spin_unlock_irqrestore(manager->lock, flags);
+       VPRINTK("Added %d pages. Total is now %d\n", msg_pages,
+               manager->npages);
+       return 0;
+}
+
+
+netfront_accel_pkt_desc *
+netfront_accel_buf_find(struct netfront_accel_bufinfo *manager, u16 id)
+{
+       netfront_accel_pkt_desc *pkt;
+       int block_num = id >> NETFRONT_ACCEL_BUFS_PER_BLOCK_SHIFT;
+       int block_idx = id & (NETFRONT_ACCEL_BUFS_PER_BLOCK - 1);
+       BUG_ON(id >= manager->npages * NETFRONT_ACCEL_BUFS_PER_PAGE);
+       BUG_ON(block_idx >= NETFRONT_ACCEL_BUFS_PER_BLOCK);
+       pkt = manager->desc_blocks[block_num] + block_idx;
+       return pkt;
+}
+
+
+/* Allocate a buffer from the buffer manager */
+netfront_accel_pkt_desc *
+netfront_accel_buf_get(struct netfront_accel_bufinfo *manager)
+{
+       int bufno = -1;
+       netfront_accel_pkt_desc *buf = NULL;
+       unsigned long flags = 0;
+
+       /* Any spare? */
+       if (manager->first_free == -1)
+               return NULL;
+       /* Take lock */
+       if (manager->internally_locked)
+               spin_lock_irqsave(manager->lock, flags);
+       bufno = manager->first_free;
+       if (bufno != -1) {
+               buf = netfront_accel_buf_find(manager, bufno);
+               manager->first_free = buf->next_free;
+               manager->nused++;
+       }
+       /* Release lock */
+       if (manager->internally_locked)
+               spin_unlock_irqrestore(manager->lock, flags);
+
+       /* Tell the world */
+       VPRINTK("Allocated buffer %i, buffaddr %x\n", bufno,
+               buf->pkt_buff_addr);
+
+       return buf;
+}
+
+
+/* Release a buffer back to the buffer manager pool */
+int netfront_accel_buf_put(struct netfront_accel_bufinfo *manager, u16 id)
+{
+       netfront_accel_pkt_desc *buf = netfront_accel_buf_find(manager, id);
+       unsigned long flags = 0;
+       unsigned was_empty = 0;
+       int bufno = id;
+
+       VPRINTK("Freeing buffer %i\n", id);
+       BUG_ON(id == (u16)-1);
+
+       if (manager->internally_locked)
+               spin_lock_irqsave(manager->lock, flags);
+
+       if (manager->first_free == -1)
+               was_empty = 1;
+
+       buf->next_free = manager->first_free;
+       manager->first_free = bufno;
+       manager->nused--;
+
+       if (manager->internally_locked)
+               spin_unlock_irqrestore(manager->lock, flags);
+
+       return was_empty;
+}
diff -r e4dd072db259 -r 651fc2abdd5d drivers/xen/sfc_netfront/accel_bufs.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/xen/sfc_netfront/accel_bufs.h     Mon Feb 18 10:30:33 2008 +0000
@@ -0,0 +1,181 @@
+/****************************************************************************
+ * Solarflare driver for Xen network acceleration
+ *
+ * Copyright 2006-2008: Solarflare Communications Inc,
+ *                      9501 Jeronimo Road, Suite 250,
+ *                      Irvine, CA 92618, USA
+ *
+ * Maintained by Solarflare Communications <linux-xen-drivers@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ ****************************************************************************
+ */
+
+#ifndef NETFRONT_ACCEL_BUFS_H
+#define NETFRONT_ACCEL_BUFS_H
+
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <xen/xenbus.h>
+
+#include "accel_msg_iface.h"
+
+
+/*! Buffer descriptor structure */
+typedef struct netfront_accel_pkt_desc {
+       int buf_id;
+       u32 pkt_buff_addr;
+       void *pkt_kva;
+       /* This is the socket buffer currently married to this buffer */
+       struct sk_buff *skb;
+       int next_free;
+} netfront_accel_pkt_desc;
+
+
+#define NETFRONT_ACCEL_DEFAULT_BUF_PAGES (384)
+#define NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT (4)
+#define NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK             \
+       (1 << (NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT))
+#define NETFRONT_ACCEL_BUFS_PER_PAGE_SHIFT (1)
+#define NETFRONT_ACCEL_BUFS_PER_PAGE                   \
+       (1 << (NETFRONT_ACCEL_BUFS_PER_PAGE_SHIFT))
+#define NETFRONT_ACCEL_BUFS_PER_BLOCK_SHIFT            \
+       (NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT +     \
+        NETFRONT_ACCEL_BUFS_PER_PAGE_SHIFT)
+#define NETFRONT_ACCEL_BUFS_PER_BLOCK                  \
+       (1 << NETFRONT_ACCEL_BUFS_PER_BLOCK_SHIFT)
+#define NETFRONT_ACCEL_BUF_NUM_BLOCKS(max_pages)                       \
+       (((max_pages)+NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK-1) /           \
+        NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK)
+
+/*! Buffer management structure. */
+struct netfront_accel_bufinfo {
+       /* number added to this manager */
+       unsigned npages;
+       /* number currently used from this manager */
+       unsigned nused;
+
+       int first_free;
+
+       int internally_locked;
+       spinlock_t *lock;
+
+       /*
+        * array of pointers (length NETFRONT_ACCEL_BUF_NUM_BLOCKS) to
+        * pkt descs
+        */
+       struct netfront_accel_pkt_desc **desc_blocks; 
+};
+
+
+struct netfront_accel_bufpages {
+       /* length of lists of pages/grants */
+       int max_pages;
+       /* list of pages allocated for network buffers */
+       void **page_list;
+       /* list of grants for the above pages */
+       grant_ref_t *grant_list;
+       
+       /* number of page requests that have been made */
+       unsigned page_reqs;
+};
+
+
+/*! Allocate memory for the buffer manager, set up locks etc.
+ * Optionally takes a lock to use, if not supplied it makes its own.
+ *
+ * \return pointer to netfront_accel_bufinfo structure that represents the
+ * buffer manager
+ */
+extern struct netfront_accel_bufinfo *
+netfront_accel_init_bufs(spinlock_t *lock);
+
+/*! Allocate memory for the buffers
+ */
+extern int
+netfront_accel_alloc_buffer_mem(struct netfront_accel_bufpages *bufpages,
+                               struct netfront_accel_bufinfo *rx_res,
+                               struct netfront_accel_bufinfo *tx_res,
+                               int pages);
+extern void
+netfront_accel_free_buffer_mem(struct netfront_accel_bufpages *bufpages,
+                              struct netfront_accel_bufinfo *rx_res,
+                              struct netfront_accel_bufinfo *tx_res);
+
+/*! Release memory for the buffer manager, buffers, etc.
+ *
+ * \param manager pointer to netfront_accel_bufinfo structure that
+ * represents the buffer manager
+ */
+extern void netfront_accel_fini_bufs(struct netfront_accel_bufinfo *manager);
+
+/*! Release a buffer.
+ *
+ * \param manager  The buffer manager which owns the buffer.
+ * \param id   The buffer identifier.
+ */
+extern int netfront_accel_buf_put(struct netfront_accel_bufinfo *manager, 
+                                 u16 id);
+
+/*! Get the packet descriptor associated with a buffer id.
+ *
+ * \param manager  The buffer manager which owns the buffer.
+ * \param id       The buffer identifier.
+ *
+ * The returned value is the packet descriptor for this buffer.
+ */
+extern netfront_accel_pkt_desc *
+netfront_accel_buf_find(struct netfront_accel_bufinfo *manager, u16 id);
+
+
+/*! Fill out a message request for some buffers to be mapped by the
+ * back end driver
+ * 
+ * \param manager The buffer manager 
+ * \param msg Pointer to an ef_msg to complete.
+ * \return 0 on success
+ */
+extern int 
+netfront_accel_buf_map_request(struct xenbus_device *dev,
+                              struct netfront_accel_bufpages *bufpages,
+                              struct net_accel_msg *msg, 
+                              int pages, int offset);
+
+/*! Process a response to a buffer request. 
+ * 
+ * Deal with a received message from the back end in response to our
+ * request for buffers
+ * 
+ * \param manager The buffer manager
+ * \param msg The received message from the back end describing new
+ * buffers
+ * \return 0 on success
+ */
+extern int 
+netfront_accel_add_bufs(struct netfront_accel_bufpages *bufpages,
+                       struct netfront_accel_bufinfo *manager,
+                       struct net_accel_msg *msg);
+
+
+/*! Allocate a buffer from the buffer manager 
+ *
+ * \param manager The buffer manager data structure
+ * \param id On exit, the id of the buffer allocated
+ * \return Pointer to buffer descriptor.
+ */
+struct netfront_accel_pkt_desc *
+netfront_accel_buf_get(struct netfront_accel_bufinfo *manager);
+
+#endif /* NETFRONT_ACCEL_BUFS_H */
+
diff -r e4dd072db259 -r 651fc2abdd5d drivers/xen/sfc_netfront/accel_debugfs.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/xen/sfc_netfront/accel_debugfs.c  Mon Feb 18 10:30:33 2008 +0000
@@ -0,0 +1,234 @@
+/****************************************************************************
+ * Solarflare driver for Xen network acceleration
+ *
+ * Copyright 2006-2008: Solarflare Communications Inc,
+ *                      9501 Jeronimo Road, Suite 250,
+ *                      Irvine, CA 92618, USA
+ *
+ * Maintained by Solarflare Communications <linux-xen-drivers@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ ****************************************************************************
+ */
+
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+
+#include "accel.h"
+
+#if defined(CONFIG_DEBUG_FS)
+static struct dentry *sfc_debugfs_root = NULL;
+#endif
+
+
+/*
+ * Extend debugfs helper functions to have a u64 version
+ */
+static void debugfs_u64_set(void *data, u64 val)
+{
+  *(u64 *)data = val;
+}
+
+static u64 debugfs_u64_get(void *data)
+{
+  return *(u64 *)data;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_u64, debugfs_u64_get, debugfs_u64_set, "%llu\n");
+
+struct dentry *debugfs_create_u64(const char *name, mode_t mode,
+                                 struct dentry *parent, u64 *value)
+{
+  return debugfs_create_file(name, mode, parent, value, &fops_u64);
+}
+
+
+void netfront_accel_debugfs_init(void) 
+{
+#if defined(CONFIG_DEBUG_FS)
+       sfc_debugfs_root = debugfs_create_dir(frontend_name, NULL);
+#endif
+}
+
+
+void netfront_accel_debugfs_fini(void)
+{
+#if defined(CONFIG_DEBUG_FS)
+       if (sfc_debugfs_root)
+               debugfs_remove(sfc_debugfs_root);
+#endif
+}
+
+
+int netfront_accel_debugfs_create(netfront_accel_vnic *vnic)
+{
+#if defined(CONFIG_DEBUG_FS)
+       if (sfc_debugfs_root == NULL)
+               return -ENOENT;
+
+       vnic->dbfs_dir = debugfs_create_dir(vnic->net_dev->name, 
+                                           sfc_debugfs_root);
+       if (vnic->dbfs_dir == NULL)
+               return -ENOMEM;
+
+       vnic->netdev_dbfs.fastpath_rx_pkts = debugfs_create_u32
+               ("fastpath_rx_pkts", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->netdev_stats.fastpath_rx_pkts);
+       vnic->netdev_dbfs.fastpath_rx_bytes = debugfs_create_u32
+               ("fastpath_rx_bytes", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->netdev_stats.fastpath_rx_bytes);
+       vnic->netdev_dbfs.fastpath_rx_errors = debugfs_create_u32
+               ("fastpath_rx_errors", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->netdev_stats.fastpath_rx_errors);
+       vnic->netdev_dbfs.fastpath_tx_pkts = debugfs_create_u32
+               ("fastpath_tx_pkts", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->netdev_stats.fastpath_tx_pkts);
+       vnic->netdev_dbfs.fastpath_tx_bytes = debugfs_create_u32
+               ("fastpath_tx_bytes", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->netdev_stats.fastpath_tx_bytes);
+       vnic->netdev_dbfs.fastpath_tx_errors = debugfs_create_u32
+               ("fastpath_tx_errors", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->netdev_stats.fastpath_tx_errors);
+
+#if NETFRONT_ACCEL_STATS
+       vnic->dbfs.irq_count = debugfs_create_u64
+               ("irq_count", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->stats.irq_count);
+       vnic->dbfs.useless_irq_count = debugfs_create_u64
+               ("useless_irq_count", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->stats.useless_irq_count);
+       vnic->dbfs.poll_schedule_count = debugfs_create_u64
+               ("poll_schedule_count", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->stats.poll_schedule_count);
+       vnic->dbfs.poll_call_count = debugfs_create_u64
+               ("poll_call_count", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->stats.poll_call_count);
+       vnic->dbfs.poll_reschedule_count = debugfs_create_u64
+               ("poll_reschedule_count", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->stats.poll_reschedule_count);
+       vnic->dbfs.queue_stops = debugfs_create_u64
+               ("queue_stops", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->stats.queue_stops);
+       vnic->dbfs.queue_wakes = debugfs_create_u64
+               ("queue_wakes", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->stats.queue_wakes);
+       vnic->dbfs.ssr_bursts = debugfs_create_u64
+               ("ssr_bursts", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->stats.ssr_bursts);
+       vnic->dbfs.ssr_drop_stream = debugfs_create_u64
+               ("ssr_drop_stream", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->stats.ssr_drop_stream);
+       vnic->dbfs.ssr_misorder = debugfs_create_u64
+               ("ssr_misorder", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->stats.ssr_misorder);
+       vnic->dbfs.ssr_slow_start = debugfs_create_u64
+               ("ssr_slow_start", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->stats.ssr_slow_start);
+       vnic->dbfs.ssr_merges = debugfs_create_u64
+               ("ssr_merges", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->stats.ssr_merges);
+       vnic->dbfs.ssr_too_many = debugfs_create_u64
+               ("ssr_too_many", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->stats.ssr_too_many);
+       vnic->dbfs.ssr_new_stream = debugfs_create_u64
+               ("ssr_new_stream", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->stats.ssr_new_stream);
+
+       vnic->dbfs.fastpath_tx_busy = debugfs_create_u64
+               ("fastpath_tx_busy", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->stats.fastpath_tx_busy);
+       vnic->dbfs.fastpath_tx_completions = debugfs_create_u64
+               ("fastpath_tx_completions", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->stats.fastpath_tx_completions);
+       vnic->dbfs.fastpath_tx_pending_max = debugfs_create_u32
+               ("fastpath_tx_pending_max", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->stats.fastpath_tx_pending_max);
+       vnic->dbfs.event_count = debugfs_create_u64
+               ("event_count", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->stats.event_count);
+       vnic->dbfs.bad_event_count = debugfs_create_u64
+               ("bad_event_count", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->stats.bad_event_count);
+       vnic->dbfs.event_count_since_irq = debugfs_create_u32
+               ("event_count_since_irq", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->stats.event_count_since_irq);
+       vnic->dbfs.events_per_irq_max = debugfs_create_u32
+               ("events_per_irq_max", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->stats.events_per_irq_max);
+       vnic->dbfs.fastpath_frm_trunc = debugfs_create_u64
+               ("fastpath_frm_trunc", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->stats.fastpath_frm_trunc);
+       vnic->dbfs.rx_no_desc_trunc = debugfs_create_u64
+               ("rx_no_desc_trunc", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->stats.rx_no_desc_trunc);
+       vnic->dbfs.events_per_poll_max = debugfs_create_u32
+               ("events_per_poll_max", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->stats.events_per_poll_max);
+       vnic->dbfs.events_per_poll_rx_max = debugfs_create_u32
+               ("events_per_poll_rx_max", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->stats.events_per_poll_rx_max);
+       vnic->dbfs.events_per_poll_tx_max = debugfs_create_u32
+               ("events_per_poll_tx_max", S_IRUSR | S_IRGRP | S_IROTH,
+                vnic->dbfs_dir, &vnic->stats.events_per_poll_tx_max);
+#endif
+#endif
+       return 0;
+}
+
+
+int netfront_accel_debugfs_remove(netfront_accel_vnic *vnic)
+{
+#if defined(CONFIG_DEBUG_FS)
+       if (vnic->dbfs_dir != NULL) {
+               debugfs_remove(vnic->netdev_dbfs.fastpath_rx_pkts);
+               debugfs_remove(vnic->netdev_dbfs.fastpath_rx_bytes);
+               debugfs_remove(vnic->netdev_dbfs.fastpath_rx_errors);
+               debugfs_remove(vnic->netdev_dbfs.fastpath_tx_pkts);
+               debugfs_remove(vnic->netdev_dbfs.fastpath_tx_bytes);
+               debugfs_remove(vnic->netdev_dbfs.fastpath_tx_errors);
+               
+#if NETFRONT_ACCEL_STATS
+               debugfs_remove(vnic->dbfs.irq_count);
+               debugfs_remove(vnic->dbfs.useless_irq_count);
+               debugfs_remove(vnic->dbfs.poll_schedule_count);
+               debugfs_remove(vnic->dbfs.poll_call_count);
+               debugfs_remove(vnic->dbfs.poll_reschedule_count);
+               debugfs_remove(vnic->dbfs.queue_stops);
+               debugfs_remove(vnic->dbfs.queue_wakes);
+               debugfs_remove(vnic->dbfs.ssr_bursts);
+               debugfs_remove(vnic->dbfs.ssr_drop_stream);
+               debugfs_remove(vnic->dbfs.ssr_misorder);
+               debugfs_remove(vnic->dbfs.ssr_slow_start);
+               debugfs_remove(vnic->dbfs.ssr_merges);
+               debugfs_remove(vnic->dbfs.ssr_too_many);
+               debugfs_remove(vnic->dbfs.ssr_new_stream);
+               
+               debugfs_remove(vnic->dbfs.fastpath_tx_busy);
+               debugfs_remove(vnic->dbfs.fastpath_tx_completions);
+               debugfs_remove(vnic->dbfs.fastpath_tx_pending_max);
+               debugfs_remove(vnic->dbfs.event_count);
+               debugfs_remove(vnic->dbfs.bad_event_count);
+               debugfs_remove(vnic->dbfs.event_count_since_irq);
+               debugfs_remove(vnic->dbfs.events_per_irq_max);
+               debugfs_remove(vnic->dbfs.fastpath_frm_trunc);
+               debugfs_remove(vnic->dbfs.rx_no_desc_trunc);
+               debugfs_remove(vnic->dbfs.events_per_poll_max);
+               debugfs_remove(vnic->dbfs.events_per_poll_rx_max);
+               debugfs_remove(vnic->dbfs.events_per_poll_tx_max);
+#endif
+               debugfs_remove(vnic->dbfs_dir);
+       }
+#endif
+       return 0;
+}
diff -r e4dd072db259 -r 651fc2abdd5d drivers/xen/sfc_netfront/accel_msg.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/xen/sfc_netfront/accel_msg.c      Mon Feb 18 10:30:33 2008 +0000
@@ -0,0 +1,566 @@
+/****************************************************************************
+ * Solarflare driver for Xen network acceleration
+ *
+ * Copyright 2006-2008: Solarflare Communications Inc,
+ *                      9501 Jeronimo Road, Suite 250,
+ *                      Irvine, CA 92618, USA
+ *
+ * Maintained by Solarflare Communications <linux-xen-drivers@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ ****************************************************************************
+ */
+
+#include <linux/stddef.h>
+#include <linux/errno.h>
+
+#include <xen/xenbus.h>
+
+#include "accel.h"
+#include "accel_msg_iface.h"
+#include "accel_util.h"
+#include "accel_bufs.h"
+
+#include "netfront.h" /* drivers/xen/netfront/netfront.h */
+
+static void vnic_start_interrupts(netfront_accel_vnic *vnic)
+{
+       unsigned long flags;
+       
+       /* Prime our interrupt */
+       spin_lock_irqsave(&vnic->irq_enabled_lock, flags);
+       if (!netfront_accel_vi_enable_interrupts(vnic)) {
+               /* Cripes, that was quick, better pass it up */
+               netfront_accel_disable_net_interrupts(vnic);
+               vnic->irq_enabled = 0;
+               NETFRONT_ACCEL_STATS_OP(vnic->stats.poll_schedule_count++);
+               netif_rx_schedule(vnic->net_dev);
+       } else {
+               /*
+                * Nothing yet, make sure we get interrupts through
+                * back end 
+                */
+               vnic->irq_enabled = 1;
+               netfront_accel_enable_net_interrupts(vnic);
+       }
+       spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
+}
+
+
+static void vnic_stop_interrupts(netfront_accel_vnic *vnic)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&vnic->irq_enabled_lock, flags);
+       netfront_accel_disable_net_interrupts(vnic);
+       vnic->irq_enabled = 0;
+       spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
+}
+
+
+static void vnic_start_fastpath(netfront_accel_vnic *vnic)
+{
+       struct net_device *net_dev = vnic->net_dev;
+       unsigned long flags;
+
+       DPRINTK("%s\n", __FUNCTION__);
+
+       spin_lock_irqsave(&vnic->tx_lock, flags);
+       vnic->tx_enabled = 1;
+       spin_unlock_irqrestore(&vnic->tx_lock, flags);
+       
+       netif_poll_disable(net_dev);
+       vnic->poll_enabled = 1;
+       netif_poll_enable(net_dev);
+       
+       vnic_start_interrupts(vnic);
+}
+
+
+void vnic_stop_fastpath(netfront_accel_vnic *vnic)
+{
+       struct net_device *net_dev = vnic->net_dev;
+       struct netfront_info *np = (struct netfront_info *)netdev_priv(net_dev);
+       unsigned long flags1, flags2;
+
+       DPRINTK("%s\n", __FUNCTION__);
+
+       vnic_stop_interrupts(vnic);
+       
+       spin_lock_irqsave(&vnic->tx_lock, flags1);
+       vnic->tx_enabled = 0;
+       spin_lock_irqsave(&np->tx_lock, flags2);
+       if (vnic->tx_skb != NULL) {
+               dev_kfree_skb_any(vnic->tx_skb);
+               vnic->tx_skb = NULL;
+               if (netfront_check_queue_ready(net_dev)) {
+                       netif_wake_queue(net_dev);
+                       NETFRONT_ACCEL_STATS_OP
+                               (vnic->stats.queue_wakes++);
+               }
+       }
+       spin_unlock_irqrestore(&np->tx_lock, flags2);
+       spin_unlock_irqrestore(&vnic->tx_lock, flags1);
+       
+       /* Must prevent polls and hold lock to modify poll_enabled */
+       netif_poll_disable(net_dev);
+       spin_lock_irqsave(&vnic->irq_enabled_lock, flags1);
+       vnic->poll_enabled = 0;
+       spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags1);
+       netif_poll_enable(net_dev);
+}
+
+
+static void netfront_accel_interface_up(netfront_accel_vnic *vnic)
+{
+
+       if (!vnic->backend_netdev_up) {
+               vnic->backend_netdev_up = 1;
+               
+               if (vnic->frontend_ready)
+                       vnic_start_fastpath(vnic);
+       }
+}
+
+
+static void netfront_accel_interface_down(netfront_accel_vnic *vnic)
+{
+
+       if (vnic->backend_netdev_up) {
+               vnic->backend_netdev_up = 0;
+               
+               if (vnic->frontend_ready)
+                       vnic_stop_fastpath(vnic);
+       }
+}
+
+
+static int vnic_add_bufs(netfront_accel_vnic *vnic, 
+                        struct net_accel_msg *msg)
+{
+       int rc, offset;
+       struct netfront_accel_bufinfo *bufinfo;
+  
+       BUG_ON(msg->u.mapbufs.pages > NET_ACCEL_MSG_MAX_PAGE_REQ);
+
+       offset = msg->u.mapbufs.reqid;
+
+       if (offset < vnic->bufpages.max_pages - 
+           (vnic->bufpages.max_pages / buffer_split)) {
+               bufinfo = vnic->rx_bufs;
+       } else
+               bufinfo = vnic->tx_bufs;
+
+       /* Queue up some Rx buffers to start things off. */
+       if ((rc = netfront_accel_add_bufs(&vnic->bufpages, bufinfo, msg)) == 0) 
{
+               netfront_accel_vi_add_bufs(vnic, bufinfo == vnic->rx_bufs);
+
+               if (offset + msg->u.mapbufs.pages == vnic->bufpages.max_pages) {
+                       VPRINTK("%s: got all buffers back\n", __FUNCTION__);
+                       vnic->frontend_ready = 1;
+                       if (vnic->backend_netdev_up)
+                               vnic_start_fastpath(vnic);
+               } else {
+                       VPRINTK("%s: got buffers back %d %d\n", __FUNCTION__, 
+                               offset, msg->u.mapbufs.pages);
+               }
+       }
+
+       return rc;
+}
+
+
+/* The largest [o] such that (1u << o) <= n.  Requires n > 0. */
+
+inline unsigned log2_le(unsigned long n) {
+       unsigned order = 1;
+       while ((1ul << order) <= n) ++order;
+       return (order - 1);
+}
+
+static int vnic_send_buffer_requests(netfront_accel_vnic *vnic,
+                                    struct netfront_accel_bufpages *bufpages)
+{
+       int pages, offset, rc = 0, sent = 0;
+       struct net_accel_msg msg;
+
+       while (bufpages->page_reqs < bufpages->max_pages) {
+               offset = bufpages->page_reqs;
+
+               pages = pow2(log2_le(bufpages->max_pages - 
+                                    bufpages->page_reqs));
+               pages = pages < NET_ACCEL_MSG_MAX_PAGE_REQ ? 
+                       pages : NET_ACCEL_MSG_MAX_PAGE_REQ;
+
+               BUG_ON(offset < 0);
+               BUG_ON(pages <= 0);
+
+               rc = netfront_accel_buf_map_request(vnic->dev, bufpages,
+                                                   &msg, pages, offset);
+               if (rc == 0) {
+                       rc = net_accel_msg_send(vnic->shared_page, 
+                                               &vnic->to_dom0, &msg);
+                       if (rc < 0) {
+                               VPRINTK("%s: queue full, stopping for now\n",
+                                       __FUNCTION__);
+                               break;
+                       }
+                       sent++;
+               } else {
+                       EPRINTK("%s: problem with grant, stopping for now\n",
+                               __FUNCTION__);
+                       break;
+               }
+
+               bufpages->page_reqs += pages;
+       }
+
+       if (sent)
+               net_accel_msg_notify(vnic->msg_channel_irq);
+
+       return rc;
+}
+
+
+/*
+ * In response to dom0 saying "my queue is full", we reply with this
+ * when it is no longer full
+ */
+inline void vnic_set_queue_not_full(netfront_accel_vnic *vnic)
+{
+
+       if (test_and_set_bit(NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL_B,
+                           (unsigned long *)&vnic->shared_page->aflags))
+               notify_remote_via_irq(vnic->msg_channel_irq);
+       else
+               VPRINTK("queue not full bit already set, not signalling\n");
+}
+
+/* 
+ * Notify dom0 that the queue we want to use is full, it should
+ * respond by setting MSG_AFLAGS_QUEUEUNOTFULL in due course
+ */
+inline void vnic_set_queue_full(netfront_accel_vnic *vnic)
+{
+
+       if (!test_and_set_bit(NET_ACCEL_MSG_AFLAGS_QUEUEUFULL_B,
+                            (unsigned long *)&vnic->shared_page->aflags))
+               notify_remote_via_irq(vnic->msg_channel_irq);
+       else
+               VPRINTK("queue full bit already set, not signalling\n");
+}
+
+
+static int vnic_check_hello_version(unsigned version) 
+{
+       if (version > NET_ACCEL_MSG_VERSION) {
+               /* Newer protocol, we must refuse */
+               return -EPROTO;
+       }
+
+       if (version < NET_ACCEL_MSG_VERSION) {
+               /*
+                * We are newer, so have discretion to accept if we
+                * wish.  For now however, just reject
+                */
+               return -EPROTO;
+       }
+
+       BUG_ON(version != NET_ACCEL_MSG_VERSION);
+       return 0;
+}
+
+
+static int vnic_process_hello_msg(netfront_accel_vnic *vnic,
+                                 struct net_accel_msg *msg)
+{
+       int err = 0;
+       unsigned pages = max_pages;
+
+       if (vnic_check_hello_version(msg->u.hello.version) < 0) {
+               msg->id = NET_ACCEL_MSG_HELLO | NET_ACCEL_MSG_REPLY 
+                       | NET_ACCEL_MSG_ERROR;
+               msg->u.hello.version = NET_ACCEL_MSG_VERSION;
+       } else {
+               vnic->backend_netdev_up
+                       = vnic->shared_page->net_dev_up;
+               
+               msg->id = NET_ACCEL_MSG_HELLO | NET_ACCEL_MSG_REPLY;
+               msg->u.hello.version = NET_ACCEL_MSG_VERSION;
+               if (msg->u.hello.max_pages &&
+                   msg->u.hello.max_pages < pages)
+                       pages = msg->u.hello.max_pages;
+               msg->u.hello.max_pages = pages;
+               
+               /* Half of pages for rx, half for tx */ 
+               err = netfront_accel_alloc_buffer_mem(&vnic->bufpages,
+                                                     vnic->rx_bufs, 
+                                                     vnic->tx_bufs,
+                                                     pages);
+               if (err)
+                       msg->id |= NET_ACCEL_MSG_ERROR;         
+       }
+       
+       /* Send reply */
+       net_accel_msg_reply_notify(vnic->shared_page, vnic->msg_channel_irq,
+                                  &vnic->to_dom0, msg);
+       return err;
+}
+
+
+static int vnic_process_localmac_msg(netfront_accel_vnic *vnic,
+                                    struct net_accel_msg *msg)
+{
+       unsigned long flags;
+       cuckoo_hash_mac_key key;
+
+       if (msg->u.localmac.flags & NET_ACCEL_MSG_ADD) {
+               DPRINTK("MAC has moved, could be local: " MAC_FMT "\n",
+                       MAC_ARG(msg->u.localmac.mac));
+               key = cuckoo_mac_to_key(msg->u.localmac.mac);
+               spin_lock_irqsave(&vnic->table_lock, flags);
+               /* Try to remove it, not a big deal if not there */
+               cuckoo_hash_remove(&vnic->fastpath_table, 
+                                  (cuckoo_hash_key *)&key);
+               spin_unlock_irqrestore(&vnic->table_lock, flags);
+       }
+       
+       return 0;
+}
+
+
+static 
+int vnic_process_rx_msg(netfront_accel_vnic *vnic,
+                       struct net_accel_msg *msg)
+{
+       int err;
+
+       switch (msg->id) {
+       case NET_ACCEL_MSG_HELLO:
+               /* Hello, reply with Reply */
+               DPRINTK("got Hello, with version %.8x\n",
+                       msg->u.hello.version);
+               BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_NONE);
+               err = vnic_process_hello_msg(vnic, msg);
+               if (err == 0)
+                       vnic->msg_state = NETFRONT_ACCEL_MSG_HELLO;
+               break;
+       case NET_ACCEL_MSG_SETHW:
+               /* Hardware info message */
+               DPRINTK("got H/W info\n");
+               BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_HELLO);
+               err = netfront_accel_vi_init(vnic, &msg->u.hw);
+               if (err == 0)
+                       vnic->msg_state = NETFRONT_ACCEL_MSG_HW;
+               break;
+       case NET_ACCEL_MSG_MAPBUF | NET_ACCEL_MSG_REPLY:
+               VPRINTK("Got mapped buffers back\n");
+               BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_HW);
+               err = vnic_add_bufs(vnic, msg);
+               break;
+       case NET_ACCEL_MSG_MAPBUF | NET_ACCEL_MSG_REPLY | NET_ACCEL_MSG_ERROR:
+               /* No buffers.  Can't use the fast path. */
+               EPRINTK("Got mapped buffers error.  Cannot accelerate.\n");
+               BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_HW);
+               err = -EIO;
+               break;
+       case NET_ACCEL_MSG_LOCALMAC:
+               /* Should be add, remove not currently used */
+               EPRINTK_ON(!(msg->u.localmac.flags & NET_ACCEL_MSG_ADD));
+               BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_HW);
+               err = vnic_process_localmac_msg(vnic, msg);
+               break;
+       default:
+               EPRINTK("Huh? Message code is 0x%x\n", msg->id);
+               err = -EPROTO;
+               break;
+       }
+
+       return err;
+}
+
+
+/* Process an IRQ received from back end driver */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
+void netfront_accel_msg_from_bend(struct work_struct *context)
+#else
+void netfront_accel_msg_from_bend(void *context)
+#endif
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
+       netfront_accel_vnic *vnic = 
+               container_of(context, netfront_accel_vnic, msg_from_bend);
+#else
+       netfront_accel_vnic *vnic = (netfront_accel_vnic *)context;
+#endif
+       struct net_accel_msg msg;
+       int err, queue_was_full = 0;
+       
+       mutex_lock(&vnic->vnic_mutex);
+
+       /*
+        * This happens when the shared pages have been unmapped but
+        * the workqueue has yet to be flushed 
+        */
+       if (!vnic->dom0_state_is_setup) 
+               goto unlock_out;
+
+       while ((vnic->shared_page->aflags & NET_ACCEL_MSG_AFLAGS_TO_DOMU_MASK)
+              != 0) {
+               if (vnic->shared_page->aflags &
+                   NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL) {
+                       /* We've been told there may now be space. */
+                       clear_bit(NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL_B,
+                                 (unsigned long *)&vnic->shared_page->aflags);
+               }
+
+               if (vnic->shared_page->aflags &
+                   NET_ACCEL_MSG_AFLAGS_QUEUE0FULL) {
+                       /*
+                        * There will be space at the end of this
+                        * function if we can make any.
+                        */
+                       clear_bit(NET_ACCEL_MSG_AFLAGS_QUEUE0FULL_B,
+                                 (unsigned long *)&vnic->shared_page->aflags);
+                       queue_was_full = 1;
+               }
+
+               if (vnic->shared_page->aflags &
+                   NET_ACCEL_MSG_AFLAGS_NETUPDOWN) {
+                       DPRINTK("%s: net interface change\n", __FUNCTION__);
+                       clear_bit(NET_ACCEL_MSG_AFLAGS_NETUPDOWN_B,
+                                 (unsigned long *)&vnic->shared_page->aflags);
+                       if (vnic->shared_page->net_dev_up)
+                               netfront_accel_interface_up(vnic);
+                       else
+                               netfront_accel_interface_down(vnic);
+               }
+       }
+
+       /* Pull msg out of shared memory */
+       while ((err = net_accel_msg_recv(vnic->shared_page, &vnic->from_dom0,
+                                        &msg)) == 0) {
+               err = vnic_process_rx_msg(vnic, &msg);
+               
+               if (err != 0)
+                       goto done;
+       }
+
+       /*
+        * Send any pending buffer map request messages that we can,
+        * and mark domU->dom0 as full if necessary.  
+        */
+       if (vnic->msg_state == NETFRONT_ACCEL_MSG_HW &&
+           vnic->bufpages.page_reqs < vnic->bufpages.max_pages) {
+               if (vnic_send_buffer_requests(vnic, &vnic->bufpages) == -ENOSPC)
+                       vnic_set_queue_full(vnic);
+       }
+
+       /* 
+        * If there are no messages then this is not an error.  It
+        * just means that we've finished processing the queue.
+        */
+       if (err == -ENOENT)
+               err = 0;
+ done:
+       /* We will now have made space in the dom0->domU queue if we can */
+       if (queue_was_full)
+               vnic_set_queue_not_full(vnic);
+
+       if (err != 0) {
+               EPRINTK("%s returned %d\n", __FUNCTION__, err);
+               netfront_accel_set_closing(vnic);
+       }
+
+ unlock_out:
+       mutex_unlock(&vnic->vnic_mutex);
+
+       return;
+}
+
+
+irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context, 
+                                                struct pt_regs *unused)
+{
+       netfront_accel_vnic *vnic = (netfront_accel_vnic *)context;
+       VPRINTK("irq %d from device %s\n", irq, vnic->dev->nodename);
+
+       queue_work(netfront_accel_workqueue, &vnic->msg_from_bend);
+
+       return IRQ_HANDLED;
+}
+
+/* Process an interrupt received from the NIC via backend */
+irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context, 
+                                                    struct pt_regs *unused)
+{
+       netfront_accel_vnic *vnic = (netfront_accel_vnic *)context;
+       struct net_device *net_dev = vnic->net_dev;
+       unsigned long flags;
+
+       VPRINTK("net irq %d from device %s\n", irq, vnic->dev->nodename);
+       
+       NETFRONT_ACCEL_STATS_OP(vnic->stats.irq_count++);
+
+       BUG_ON(net_dev==NULL);
+
+       spin_lock_irqsave(&vnic->irq_enabled_lock, flags);
+       if (vnic->irq_enabled) {
+               netfront_accel_disable_net_interrupts(vnic);
+               vnic->irq_enabled = 0;
+               spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
+
+#if NETFRONT_ACCEL_STATS
+               vnic->stats.poll_schedule_count++;
+               if (vnic->stats.event_count_since_irq >
+                   vnic->stats.events_per_irq_max)
+                       vnic->stats.events_per_irq_max = 
+                               vnic->stats.event_count_since_irq;
+               vnic->stats.event_count_since_irq = 0;
+#endif
+               netif_rx_schedule(net_dev);
+       }
+       else {
+               spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
+               NETFRONT_ACCEL_STATS_OP(vnic->stats.useless_irq_count++);
+               DPRINTK("%s: irq when disabled\n", __FUNCTION__);
+       }
+       
+       return IRQ_HANDLED;
+}
+
+
+void netfront_accel_msg_tx_fastpath(netfront_accel_vnic *vnic, const void *mac,
+                                   u32 ip, u16 port, u8 protocol)
+{
+       unsigned long lock_state;
+       struct net_accel_msg *msg;
+
+       msg = net_accel_msg_start_send(vnic->shared_page, &vnic->to_dom0,
+                                      &lock_state);
+
+       if (msg == NULL)
+               return;
+
+       net_accel_msg_init(msg, NET_ACCEL_MSG_FASTPATH);
+       msg->u.fastpath.flags = NET_ACCEL_MSG_REMOVE;
+       memcpy(msg->u.fastpath.mac, mac, ETH_ALEN);
+
+       msg->u.fastpath.port = port;
+       msg->u.fastpath.ip = ip;
+       msg->u.fastpath.proto = protocol;
+
+       net_accel_msg_complete_send_notify(vnic->shared_page, &vnic->to_dom0, 
+                                          &lock_state, vnic->msg_channel_irq);
+}
diff -r e4dd072db259 -r 651fc2abdd5d drivers/xen/sfc_netfront/accel_netfront.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/xen/sfc_netfront/accel_netfront.c Mon Feb 18 10:30:33 2008 +0000
@@ -0,0 +1,318 @@
+/****************************************************************************
+ * Solarflare driver for Xen network acceleration
+ *
+ * Copyright 2006-2008: Solarflare Communications Inc,
+ *                      9501 Jeronimo Road, Suite 250,
+ *                      Irvine, CA 92618, USA
+ *
+ * Maintained by Solarflare Communications <linux-xen-drivers@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ ****************************************************************************
+ */
+
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+
+/* drivers/xen/netfront/netfront.h */
+#include "netfront.h"
+
+#include "accel.h"
+#include "accel_bufs.h"
+#include "accel_util.h"
+#include "accel_msg_iface.h"
+#include "accel_ssr.h"
+ 
+#ifdef EFX_GCOV
+#include "gcov.h"
+#endif
+
+#define NETFRONT_ACCEL_VNIC_FROM_NETDEV(_nd)                           \
+       ((netfront_accel_vnic *)((struct netfront_info 
*)netdev_priv(net_dev))->accel_priv)
+
+static int netfront_accel_netdev_start_xmit(struct sk_buff *skb,
+                                           struct net_device *net_dev)
+{
+       netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev);
+       struct netfront_info *np = 
+               (struct netfront_info *)netdev_priv(net_dev);
+       int handled, rc;
+       unsigned long flags1, flags2;
+
+       BUG_ON(vnic == NULL);
+
+       /* Take our tx lock and hold for the duration */
+       spin_lock_irqsave(&vnic->tx_lock, flags1);
+
+       if (!vnic->tx_enabled) {
+               rc = 0;
+               goto unlock_out;
+       }
+
+       handled = netfront_accel_vi_tx_post(vnic, skb);
+       if (handled == NETFRONT_ACCEL_STATUS_BUSY) {
+               BUG_ON(vnic->net_dev != net_dev);
+               DPRINTK("%s stopping queue\n", __FUNCTION__);
+
+               /* Netfront's lock protects tx_skb */
+               spin_lock_irqsave(&np->tx_lock, flags2);
+               BUG_ON(vnic->tx_skb != NULL);
+               vnic->tx_skb = skb;
+               netif_stop_queue(net_dev);
+               spin_unlock_irqrestore(&np->tx_lock, flags2);
+
+               NETFRONT_ACCEL_STATS_OP(vnic->stats.queue_stops++);
+       }
+
+       if (handled == NETFRONT_ACCEL_STATUS_CANT)
+               rc = 0;
+       else
+               rc = 1;
+
+unlock_out:
+       spin_unlock_irqrestore(&vnic->tx_lock, flags1);
+
+       return rc;
+}
+
+
+static int netfront_accel_netdev_poll(struct net_device *net_dev, int *budget)
+{
+       netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev);
+       int rx_allowed = *budget, rx_done;
+       
+       BUG_ON(vnic == NULL);
+
+       /* Can check this without lock as modifier excludes polls */ 
+       if (!vnic->poll_enabled)
+               return 0;
+
+       rx_done = netfront_accel_vi_poll(vnic, rx_allowed);
+       *budget -= rx_done;
+       
+       NETFRONT_ACCEL_STATS_OP(vnic->stats.poll_call_count++);
+
+       VPRINTK("%s: done %d allowed %d\n",
+               __FUNCTION__, rx_done, rx_allowed);
+
+       netfront_accel_ssr_end_of_burst(vnic, &vnic->ssr_state);
+
+       if (rx_done < rx_allowed) {
+                return 0; /* Done */
+       }
+       
+       NETFRONT_ACCEL_STATS_OP(vnic->stats.poll_reschedule_count++);
+
+       return 1; /* More to do. */
+}
+
+
+/*
+ * Process request from netfront to start napi interrupt
+ * mode. (i.e. enable interrupts as it's finished polling)
+ */
+static int netfront_accel_start_napi_interrupts(struct net_device *net_dev) 
+{
+       netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev);
+       unsigned long flags;
+
+       BUG_ON(vnic == NULL);
+       
+       /*
+        * Can check this without lock as writer excludes poll before
+        * modifying
+        */
+       if (!vnic->poll_enabled)
+               return 0;
+
+       if (!netfront_accel_vi_enable_interrupts(vnic)) {
+               /* 
+                * There was something there, tell caller we had
+                * something to do.
+                */
+               return 1;
+       }
+
+       spin_lock_irqsave(&vnic->irq_enabled_lock, flags);
+       vnic->irq_enabled = 1;
+       netfront_accel_enable_net_interrupts(vnic);
+       spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
+
+       return 0;
+}
+
+
+/*
+ * Process request from netfront to stop napi interrupt
+ * mode. (i.e. disable interrupts as it's starting to poll 
+ */
+static void netfront_accel_stop_napi_interrupts(struct net_device *net_dev) 
+{
+       netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev);
+       unsigned long flags;
+
+       BUG_ON(vnic == NULL);
+
+       spin_lock_irqsave(&vnic->irq_enabled_lock, flags);
+
+       if (!vnic->poll_enabled) {
+               spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
+               return;
+       }
+
+       netfront_accel_disable_net_interrupts(vnic);
+       vnic->irq_enabled = 0;
+       spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
+}
+
+
+static int netfront_accel_check_ready(struct net_device *net_dev)
+{
+       netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev);
+
+       BUG_ON(vnic == NULL);
+
+       /* This is protected by netfront's lock */ 
+       return vnic->tx_skb == NULL;
+}
+
+
+static int netfront_accel_get_stats(struct net_device *net_dev,
+                                   struct net_device_stats *stats)
+{
+       netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev);
+       struct netfront_accel_netdev_stats now;
+
+       BUG_ON(vnic == NULL);
+
+       now.fastpath_rx_pkts   = vnic->netdev_stats.fastpath_rx_pkts;
+       now.fastpath_rx_bytes  = vnic->netdev_stats.fastpath_rx_bytes;
+       now.fastpath_rx_errors = vnic->netdev_stats.fastpath_rx_errors;
+       now.fastpath_tx_pkts   = vnic->netdev_stats.fastpath_tx_pkts;
+       now.fastpath_tx_bytes  = vnic->netdev_stats.fastpath_tx_bytes;
+       now.fastpath_tx_errors = vnic->netdev_stats.fastpath_tx_errors;
+       
+       stats->rx_packets += (now.fastpath_rx_pkts - 
+                             vnic->stats_last_read.fastpath_rx_pkts);
+       stats->rx_bytes   += (now.fastpath_rx_bytes -
+                             vnic->stats_last_read.fastpath_rx_bytes);
+       stats->rx_errors  += (now.fastpath_rx_errors - 
+                             vnic->stats_last_read.fastpath_rx_errors);
+       stats->tx_packets += (now.fastpath_tx_pkts - 
+                             vnic->stats_last_read.fastpath_tx_pkts);
+       stats->tx_bytes   += (now.fastpath_tx_bytes - 
+                             vnic->stats_last_read.fastpath_tx_bytes);
+       stats->tx_errors  += (now.fastpath_tx_errors - 
+                             vnic->stats_last_read.fastpath_tx_errors);
+       
+       vnic->stats_last_read = now;
+
+       return 0;
+}
+
+
+struct netfront_accel_hooks accel_hooks = {
+       .new_device         = &netfront_accel_probe,
+       .remove         = &netfront_accel_remove,
+       .netdev_poll       = &netfront_accel_netdev_poll,
+       .start_xmit         = &netfront_accel_netdev_start_xmit,
+       .start_napi_irq = &netfront_accel_start_napi_interrupts,
+       .stop_napi_irq   = &netfront_accel_stop_napi_interrupts,
+       .check_ready       = &netfront_accel_check_ready,
+       .get_stats           = &netfront_accel_get_stats
+};
+
+
+unsigned max_pages = NETFRONT_ACCEL_DEFAULT_BUF_PAGES;
+module_param (max_pages, int, 0666);
+MODULE_PARM_DESC(max_pages, "Number of buffer pages to request");
+
+unsigned buffer_split = 2;
+module_param (buffer_split, int, 0666);
+MODULE_PARM_DESC(buffer_split, "Fraction of buffers to use for TX, rest for 
RX");
+
+
+const char *frontend_name = "sfc_netfront";
+
+struct workqueue_struct *netfront_accel_workqueue;
+
+static int __init netfront_accel_init(void)
+{
+       int rc;
+#ifdef EFX_GCOV        
+       gcov_provider_init(THIS_MODULE);
+#endif
+
+       /*
+        * If we're running on dom0, netfront hasn't initialised
+        * itself, so we need to keep away
+        */
+       if (is_initial_xendomain())
+               return 0;
+
+       if (!is_pow2(sizeof(struct net_accel_msg)))
+               EPRINTK("%s: bad structure size\n", __FUNCTION__);
+
+       netfront_accel_workqueue = create_workqueue(frontend_name);
+
+       netfront_accel_debugfs_init();
+
+       rc = netfront_accelerator_loaded(NETFRONT_ACCEL_VERSION,
+                                        frontend_name, &accel_hooks);
+
+       if (rc < 0) {
+               EPRINTK("Xen netfront accelerator version mismatch\n");
+               return -EINVAL;
+       }
+
+       if (rc > 0) {
+               /* 
+                * In future may want to add backwards compatibility
+                * and accept certain subsets of previous versions
+                */
+               EPRINTK("Xen netfront accelerator version mismatch\n");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+module_init(netfront_accel_init);
+
+static void __exit netfront_accel_exit(void)
+{
+       if (is_initial_xendomain())
+               return;
+
+       DPRINTK("%s: unhooking\n", __FUNCTION__);
+
+       /* Unhook from normal netfront */
+       netfront_accelerator_stop(frontend_name);
+
+       DPRINTK("%s: done\n", __FUNCTION__);
+
+       netfront_accel_debugfs_fini();
+
+       flush_workqueue(netfront_accel_workqueue);
+
+       destroy_workqueue(netfront_accel_workqueue);
+
+#ifdef EFX_GCOV
+       gcov_provider_fini(THIS_MODULE);
+#endif
+       return;
+}
+module_exit(netfront_accel_exit);
+
+MODULE_LICENSE("GPL");
+
diff -r e4dd072db259 -r 651fc2abdd5d drivers/xen/sfc_netfront/accel_ssr.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/xen/sfc_netfront/accel_ssr.c      Mon Feb 18 10:30:33 2008 +0000
@@ -0,0 +1,308 @@
+/****************************************************************************
+ * Solarflare driver for Xen network acceleration
+ *
+ * Copyright 2006-2008: Solarflare Communications Inc,
+ *                      9501 Jeronimo Road, Suite 250,
+ *                      Irvine, CA 92618, USA
+ *
+ * Maintained by Solarflare Communications <linux-xen-drivers@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ ****************************************************************************
+ */
+
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/list.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+
+#include "accel.h"
+#include "accel_util.h"
+#include "accel_bufs.h"
+
+#include "accel_ssr.h"
+
+static inline int list_valid(struct list_head *lh) {
+       return(lh->next != NULL);
+}
+
+static void netfront_accel_ssr_deliver (struct netfront_accel_vnic *vnic,
+                                       struct netfront_accel_ssr_state *st,
+                                       struct netfront_accel_ssr_conn *c);
+
+/** Construct an efx_ssr_state.
+ *
+ * @v st     The SSR state (per channel per port)
+ * @v port   The port.
+ */
+void netfront_accel_ssr_init(struct netfront_accel_ssr_state *st) {
+       unsigned i;
+
+       INIT_LIST_HEAD(&st->conns);
+       INIT_LIST_HEAD(&st->free_conns);
+       for (i = 0; i < 8; ++i) {
+               struct netfront_accel_ssr_conn *c = 
+                       kmalloc(sizeof(*c), GFP_KERNEL);
+               if (c == NULL)  break;
+               c->n_in_order_pkts = 0;
+               c->skb = NULL;
+               list_add(&c->link, &st->free_conns);
+       }
+
+}
+
+
+/** Destructor for an efx_ssr_state.
+ *
+ * @v st     The SSR state (per channel per port)
+ */
+void netfront_accel_ssr_fini(netfront_accel_vnic *vnic, 
+                            struct netfront_accel_ssr_state *st) {
+       struct netfront_accel_ssr_conn *c;
+
+       /* Return cleanly if efx_ssr_init() not previously called */
+       BUG_ON(list_valid(&st->conns) != list_valid(&st->free_conns));
+       if (! list_valid(&st->conns))
+               return;
+
+       while ( ! list_empty(&st->free_conns)) {
+               c = list_entry(st->free_conns.prev, 
+                              struct netfront_accel_ssr_conn, link);
+               list_del(&c->link);
+               BUG_ON(c->skb != NULL);
+               kfree(c);
+       }
+       while ( ! list_empty(&st->conns)) {
+               c = list_entry(st->conns.prev, 
+                              struct netfront_accel_ssr_conn, link);
+               list_del(&c->link);
+               if (c->skb)
+                       netfront_accel_ssr_deliver(vnic, st, c);
+               kfree(c);
+       }
+}
+
+
+/** Calc IP checksum and deliver to the OS
+ *
+ * @v st     The SSR state (per channel per port)
+ * @v c             The SSR connection state
+ */
+static void netfront_accel_ssr_deliver(netfront_accel_vnic *vnic,
+                                      struct netfront_accel_ssr_state *st,
+                                      struct netfront_accel_ssr_conn *c) {
+       BUG_ON(c->skb == NULL);
+
+       /*
+        * If we've chained packets together, recalculate the IP
+        * checksum.
+        */
+       if (skb_shinfo(c->skb)->frag_list) {
+               NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_bursts);
+               c->iph->check = 0;
+               c->iph->check = ip_fast_csum((unsigned char *) c->iph, 
+                                            c->iph->ihl);
+       }
+
+       VPRINTK("%s: %d\n", __FUNCTION__, c->skb->len);
+
+       netif_receive_skb(c->skb); 
+       c->skb = NULL;
+}
+
+
+/** Push held skbs down into network stack.
+ *
+ * @v st       SSR state
+ *
+ * Only called if we are tracking one or more connections.
+ */
+void __netfront_accel_ssr_end_of_burst(netfront_accel_vnic *vnic, 
+                                      struct netfront_accel_ssr_state *st) {
+       struct netfront_accel_ssr_conn *c;
+
+       BUG_ON(list_empty(&st->conns));
+
+       list_for_each_entry(c, &st->conns, link)
+               if (c->skb)
+                       netfront_accel_ssr_deliver(vnic, st, c);
+
+       /* Time-out connections that have received no traffic for 20ms. */
+       c = list_entry(st->conns.prev, struct netfront_accel_ssr_conn,
+                      link);
+       if (jiffies - c->last_pkt_jiffies > (HZ / 50 + 1)) {
+               NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_drop_stream);
+               list_del(&c->link);
+               list_add(&c->link, &st->free_conns);
+       }
+}
+
+
+/** Process SKB and decide whether to dispatch it to the stack now or
+ * later.
+ *
+ * @v st        SSR state
+ * @v skb      SKB to exmaine
+ * @ret rc       0 => deliver SKB to kernel now, otherwise the SKB belongs
+ *            us.
+ */
+int netfront_accel_ssr_skb(struct netfront_accel_vnic *vnic,
+                          struct netfront_accel_ssr_state *st,
+                          struct sk_buff *skb) {
+       int data_length, dont_merge;
+       struct netfront_accel_ssr_conn *c;
+       struct iphdr *iph;
+       struct tcphdr *th;
+       unsigned th_seq;
+
+       BUG_ON(skb_shinfo(skb)->frag_list != NULL);
+       BUG_ON(skb->next != NULL);
+
+       /* We're not interested if it isn't TCP over IPv4. */
+       iph = (struct iphdr *) skb->data;
+       if (skb->protocol != htons(ETH_P_IP) ||
+           iph->protocol != IPPROTO_TCP) {
+               return 0;
+       }
+
+       /* Ignore segments that fail csum or are fragmented. */
+       if (unlikely((skb->ip_summed - CHECKSUM_UNNECESSARY) |
+                    (iph->frag_off & htons(IP_MF | IP_OFFSET)))) {
+               return 0;
+       }
+
+       th = (struct tcphdr*)(skb->data + iph->ihl * 4);
+       data_length = ntohs(iph->tot_len) - iph->ihl * 4 - th->doff * 4;
+       th_seq = ntohl(th->seq);
+       dont_merge = (data_length == 0) | th->urg | th->syn | th->rst;
+
+       list_for_each_entry(c, &st->conns, link) {
+               if ((c->saddr  - iph->saddr) |
+                   (c->daddr  - iph->daddr) |
+                   (c->source - th->source) |
+                   (c->dest   - th->dest  ))
+                       continue;
+
+               /* Re-insert at head of list to reduce lookup time. */
+               list_del(&c->link);
+               list_add(&c->link, &st->conns);
+               c->last_pkt_jiffies = jiffies;
+
+               if (unlikely(th_seq - c->next_seq)) {
+                       /* Out-of-order, so start counting again. */
+                       if (c->skb)
+                               netfront_accel_ssr_deliver(vnic, st, c);
+                       c->n_in_order_pkts = 0;
+                       c->next_seq = th_seq + data_length;
+                       NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_misorder);
+                       return 0;
+               }
+               c->next_seq = th_seq + data_length;
+
+               if (++c->n_in_order_pkts < 300) {
+                       /* May be in slow-start, so don't merge. */
+                       NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_slow_start);
+                       return 0;
+               }
+
+               if (unlikely(dont_merge)) {
+                       if (c->skb)
+                               netfront_accel_ssr_deliver(vnic, st, c);
+                       return 0;
+               }
+
+               if (c->skb) {
+                       c->iph->tot_len = ntohs(c->iph->tot_len);
+                       c->iph->tot_len += data_length;
+                       c->iph->tot_len = htons(c->iph->tot_len);
+                       c->th->ack_seq = th->ack_seq;
+                       c->th->fin |= th->fin;
+                       c->th->psh |= th->psh;
+                       c->th->window = th->window;
+
+                       /* Remove the headers from this skb. */
+                       skb_pull(skb, skb->len - data_length);
+
+                       /*
+                        * Tack the new skb onto the head skb's frag_list.
+                        * This is exactly the format that fragmented IP
+                        * datagrams are reassembled into.
+                        */
+                       BUG_ON(skb->next != 0);
+                       if ( ! skb_shinfo(c->skb)->frag_list)
+                               skb_shinfo(c->skb)->frag_list = skb;
+                       else
+                               c->skb_tail->next = skb;
+                       c->skb_tail = skb;
+                       c->skb->len += skb->len;
+                       c->skb->data_len += skb->len;
+                       c->skb->truesize += skb->truesize;
+
+                       NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_merges);
+
+                       /*
+                        * If the next packet might push this super-packet
+                        * over the limit for an IP packet, deliver it now.
+                        * This is slightly conservative, but close enough.
+                        */
+                       if (c->skb->len + 
+                           (PAGE_SIZE / NETFRONT_ACCEL_BUFS_PER_PAGE)
+                           > 16384)
+                               netfront_accel_ssr_deliver(vnic, st, c);
+
+                       return 1;
+               }
+               else {
+                       c->iph = iph;
+                       c->th = th;
+                       c->skb = skb;
+                       return 1;
+               }
+       }
+
+       /* We're not yet tracking this connection. */
+
+       if (dont_merge) {
+               return 0;
+       }
+
+       if (list_empty(&st->free_conns)) {
+               c = list_entry(st->conns.prev, 
+                              struct netfront_accel_ssr_conn,
+                              link);
+               if (c->skb) {
+                       NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_too_many);
+                       return 0;
+               }
+       }
+       else {
+               c = list_entry(st->free_conns.next,
+                              struct netfront_accel_ssr_conn,
+                              link);
+       }
+       list_del(&c->link);
+       list_add(&c->link, &st->conns);
+       c->saddr = iph->saddr;
+       c->daddr = iph->daddr;
+       c->source = th->source;
+       c->dest = th->dest;
+       c->next_seq = th_seq + data_length;
+       c->n_in_order_pkts = 0;
+       BUG_ON(c->skb != NULL);
+       NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_new_stream);
+       return 0;
+}
diff -r e4dd072db259 -r 651fc2abdd5d drivers/xen/sfc_netfront/accel_ssr.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/xen/sfc_netfront/accel_ssr.h      Mon Feb 18 10:30:33 2008 +0000
@@ -0,0 +1,88 @@
+/****************************************************************************
+ * Solarflare driver for Xen network acceleration
+ *
+ * Copyright 2006-2008: Solarflare Communications Inc,
+ *                      9501 Jeronimo Road, Suite 250,
+ *                      Irvine, CA 92618, USA
+ *
+ * Maintained by Solarflare Communications <linux-xen-drivers@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ ****************************************************************************
+ */
+
+#ifndef NETFRONT_ACCEL_SSR_H
+#define NETFRONT_ACCEL_SSR_H
+
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/list.h>
+
+#include "accel.h"
+
+/** State for Soft Segment Reassembly (SSR). */
+
+struct netfront_accel_ssr_conn {
+       struct list_head link;
+
+       unsigned saddr, daddr;
+       unsigned short source, dest;
+
+       /** Number of in-order packets we've seen with payload. */
+       unsigned n_in_order_pkts;
+
+       /** Next in-order sequence number. */
+       unsigned next_seq;
+
+       /** Time we last saw a packet on this connection. */
+       unsigned long last_pkt_jiffies;
+
+       /** The SKB we are currently holding.  If NULL, then all following
+        * fields are undefined.
+        */
+       struct sk_buff *skb;
+
+       /** The tail of the frag_list of SKBs we're holding.  Only valid
+        * after at least one merge.
+        */
+       struct sk_buff *skb_tail;
+
+       /** The IP header of the skb we are holding. */
+       struct iphdr *iph;
+       
+       /** The TCP header of the skb we are holding. */
+       struct tcphdr *th;
+};
+
+extern void netfront_accel_ssr_init(struct netfront_accel_ssr_state *st);
+extern void netfront_accel_ssr_fini(netfront_accel_vnic *vnic,
+                                   struct netfront_accel_ssr_state *st);
+
+extern void
+__netfront_accel_ssr_end_of_burst(netfront_accel_vnic *vnic,
+                                 struct netfront_accel_ssr_state *st);
+
+extern int  netfront_accel_ssr_skb(netfront_accel_vnic *vnic,
+                                  struct netfront_accel_ssr_state *st,
+                                  struct sk_buff *skb);
+
+static inline void
+netfront_accel_ssr_end_of_burst (netfront_accel_vnic *vnic,
+                                struct netfront_accel_ssr_state *st) {
+       if ( ! list_empty(&st->conns) )
+               __netfront_accel_ssr_end_of_burst(vnic, st);
+}
+
+#endif /* NETFRONT_ACCEL_SSR_H */
diff -r e4dd072db259 -r 651fc2abdd5d drivers/xen/sfc_netfront/accel_tso.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/xen/sfc_netfront/accel_tso.c      Mon Feb 18 10:30:33 2008 +0000
@@ -0,0 +1,512 @@
+/****************************************************************************
+ * Solarflare driver for Xen network acceleration
+ *
+ * Copyright 2006-2008: Solarflare Communications Inc,
+ *                      9501 Jeronimo Road, Suite 250,
+ *                      Irvine, CA 92618, USA
+ *
+ * Maintained by Solarflare Communications <linux-xen-drivers@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ ****************************************************************************
+ */
+
+#include <linux/pci.h>
+#include <linux/tcp.h>
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+
+#include "accel.h"
+#include "accel_util.h"
+
+#include "accel_tso.h"
+
+#define PTR_DIFF(p1, p2)  ((u8*)(p1) - (u8*)(p2))
+#define ETH_HDR_LEN(skb)  ((skb)->nh.raw - (skb)->data)
+#define SKB_TCP_OFF(skb)  PTR_DIFF ((skb)->h.th, (skb)->data)
+#define SKB_IP_OFF(skb)   PTR_DIFF ((skb)->nh.iph, (skb)->data)
+
+/*
+ * Set a maximum number of buffers in each output packet to make life
+ * a little simpler - if this is reached it will just move on to
+ * another packet 
+ */
+#define ACCEL_TSO_MAX_BUFFERS (6)
+
+/** TSO State.
+ *
+ * The state used during segmentation.  It is put into this data structure
+ * just to make it easy to pass into inline functions.
+ */
+struct netfront_accel_tso_state {
+       /** bytes of data we've yet to segment */
+       unsigned remaining_len;
+
+       /** current sequence number */
+       unsigned seqnum;
+
+       /** remaining space in current packet */
+       unsigned packet_space;
+
+       /** List of packets to be output, containing the buffers and
+        *  iovecs to describe each packet 
+        */
+       struct netfront_accel_tso_output_packet *output_packets;
+
+       /** Total number of buffers in output_packets */
+       unsigned buffers;
+
+       /** Total number of packets in output_packets */
+       unsigned packets;
+
+       /** Input Fragment Cursor.
+        *
+        * Where we are in the current fragment of the incoming SKB.  These
+        * values get updated in place when we split a fragment over
+        * multiple packets.
+        */
+       struct {
+               /** address of current position */
+               void *addr;
+               /** remaining length */   
+               unsigned int len;
+       } ifc; /*  == ifc Input Fragment Cursor */
+
+       /** Parameters.
+        *
+        * These values are set once at the start of the TSO send and do
+        * not get changed as the routine progresses.
+        */
+       struct {
+               /* the number of bytes of header */
+               unsigned int header_length;
+
+               /* The number of bytes to put in each outgoing segment. */
+               int full_packet_size;
+               
+               /* Current IP ID, host endian. */
+               unsigned ip_id;
+
+               /* Max size of each output packet payload */
+               int gso_size;
+       } p;
+};
+
+
+/**
+ * Verify that our various assumptions about sk_buffs and the conditions
+ * under which TSO will be attempted hold true.
+ *
+ * @v skb             The sk_buff to check.
+ */
+static inline void tso_check_safe(struct sk_buff *skb) {
+       EPRINTK_ON(skb->protocol != htons (ETH_P_IP));
+       EPRINTK_ON(((struct ethhdr*) skb->data)->h_proto != htons (ETH_P_IP));
+       EPRINTK_ON(skb->nh.iph->protocol != IPPROTO_TCP);
+       EPRINTK_ON((PTR_DIFF(skb->h.th, skb->data)
+                   + (skb->h.th->doff << 2u)) > skb_headlen(skb));
+}
+
+
+
+/** Parse the SKB header and initialise state. */
+static inline void tso_start(struct netfront_accel_tso_state *st, 
+                            struct sk_buff *skb) {
+
+       /*
+        * All ethernet/IP/TCP headers combined size is TCP header size
+        * plus offset of TCP header relative to start of packet.
+        */
+       st->p.header_length = ((skb->h.th->doff << 2u)
+                              + PTR_DIFF(skb->h.th, skb->data));
+       st->p.full_packet_size = (st->p.header_length
+                                 + skb_shinfo(skb)->gso_size);
+       st->p.gso_size = skb_shinfo(skb)->gso_size;
+
+       st->p.ip_id = htons(skb->nh.iph->id);
+       st->seqnum = ntohl(skb->h.th->seq);
+
+       EPRINTK_ON(skb->h.th->urg);
+       EPRINTK_ON(skb->h.th->syn);
+       EPRINTK_ON(skb->h.th->rst);
+
+       st->remaining_len = skb->len - st->p.header_length;
+
+       st->output_packets = NULL;
+       st->buffers = 0;
+       st->packets = 0;
+
+       VPRINTK("Starting new TSO: hl %d ps %d gso %d seq %x len %d\n",
+               st->p.header_length, st->p.full_packet_size, st->p.gso_size,
+               st->seqnum, skb->len);
+}
+
+/**
+ * Add another NIC mapped buffer onto an output packet  
+ */ 
+static inline int tso_start_new_buffer(netfront_accel_vnic *vnic,
+                                      struct netfront_accel_tso_state *st,
+                                      int first)
+{
+       struct netfront_accel_tso_buffer *tso_buf;
+       struct netfront_accel_pkt_desc *buf;
+
+       /* Get a mapped packet buffer */
+       buf = netfront_accel_buf_get(vnic->tx_bufs);
+       if (buf == NULL) {
+               DPRINTK("%s: No buffer for TX\n", __FUNCTION__);
+               return -1;
+       }
+
+       /* Store a bit of meta-data at the end */
+       tso_buf =(struct netfront_accel_tso_buffer *)
+               (buf->pkt_kva + NETFRONT_ACCEL_TSO_BUF_LENGTH
+                + sizeof(struct netfront_accel_tso_output_packet));
+
+       tso_buf->buf = buf;
+
+       tso_buf->length = 0;
+       
+       if (first) {
+               struct netfront_accel_tso_output_packet *output_packet 
+                       = (struct netfront_accel_tso_output_packet *)
+                       (buf->pkt_kva + NETFRONT_ACCEL_TSO_BUF_LENGTH);
+               output_packet->next = st->output_packets;
+               st->output_packets = output_packet;
+               tso_buf->next = NULL;
+               st->output_packets->tso_bufs = tso_buf;
+               st->output_packets->tso_bufs_len = 1;
+       } else {
+               tso_buf->next = st->output_packets->tso_bufs;
+               st->output_packets->tso_bufs = tso_buf;
+               st->output_packets->tso_bufs_len ++;
+       }
+
+       BUG_ON(st->output_packets->tso_bufs_len > ACCEL_TSO_MAX_BUFFERS);
+       
+       st->buffers ++;
+
+       /*
+        * Store the context, set to NULL, last packet buffer will get
+        * non-NULL later
+        */
+       tso_buf->buf->skb = NULL;
+
+       return 0;
+}
+
+
+/* Generate a new header, and prepare for the new packet.
+ *
+ * @v vnic           VNIC
+ * @v skb             Socket buffer
+ * @v st               TSO state
+ * @ret rc           0 on success, or -1 if failed to alloc header
+ */
+
+static inline 
+int tso_start_new_packet(netfront_accel_vnic *vnic,
+                        struct sk_buff *skb,
+                        struct netfront_accel_tso_state *st) 
+{
+       struct netfront_accel_tso_buffer *tso_buf;
+       struct iphdr *tsoh_iph;
+       struct tcphdr *tsoh_th;
+       unsigned ip_length;
+
+       if (tso_start_new_buffer(vnic, st, 1) < 0) {
+               NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
+               return -1;              
+       }
+
+       /* This has been set up by tso_start_new_buffer() */
+       tso_buf = st->output_packets->tso_bufs;
+
+       /* Copy in the header */
+       memcpy(tso_buf->buf->pkt_kva, skb->data, st->p.header_length);
+       tso_buf->length = st->p.header_length;
+
+       tsoh_th = (struct tcphdr*) 
+               (tso_buf->buf->pkt_kva + SKB_TCP_OFF(skb));
+       tsoh_iph = (struct iphdr*) 
+               (tso_buf->buf->pkt_kva + SKB_IP_OFF(skb));
+
+       /* Set to zero to encourage falcon to fill these in */
+       tsoh_th->check  = 0;
+       tsoh_iph->check = 0;
+
+       tsoh_th->seq = htonl(st->seqnum);
+       st->seqnum += st->p.gso_size;
+
+       if (st->remaining_len > st->p.gso_size) {
+               /* This packet will not finish the TSO burst. */
+               ip_length = st->p.full_packet_size - ETH_HDR_LEN(skb);
+               tsoh_th->fin = 0;
+               tsoh_th->psh = 0;
+       } else {
+               /* This packet will be the last in the TSO burst. */
+               ip_length = (st->p.header_length - ETH_HDR_LEN(skb)
+                            + st->remaining_len);
+               tsoh_th->fin = skb->h.th->fin;
+               tsoh_th->psh = skb->h.th->psh;
+       }
+
+       tsoh_iph->tot_len = htons(ip_length);
+
+       /* Linux leaves suitable gaps in the IP ID space for us to fill. */
+       tsoh_iph->id = st->p.ip_id++;
+       tsoh_iph->id = htons(tsoh_iph->id);
+
+       st->packet_space = st->p.gso_size; 
+
+       st->packets++;
+
+       return 0;
+}
+
+
+
+static inline void tso_get_fragment(struct netfront_accel_tso_state *st, 
+                                   int len, void *addr)
+{
+       st->ifc.len = len;
+       st->ifc.addr = addr;
+       return;
+}
+
+
+static inline void tso_unwind(netfront_accel_vnic *vnic, 
+                             struct netfront_accel_tso_state *st)
+{
+       struct netfront_accel_tso_buffer *tso_buf;
+       struct netfront_accel_tso_output_packet *output_packet;
+
+       DPRINTK("%s\n", __FUNCTION__);
+
+       while (st->output_packets != NULL) {
+               output_packet = st->output_packets;
+               st->output_packets = output_packet->next;
+               while (output_packet->tso_bufs != NULL) {
+                       tso_buf = output_packet->tso_bufs;
+                       output_packet->tso_bufs = tso_buf->next;
+
+                       st->buffers --;
+                       output_packet->tso_bufs_len --;
+
+                       netfront_accel_buf_put(vnic->tx_bufs, 
+                                              tso_buf->buf->buf_id);
+               }
+       }
+       BUG_ON(st->buffers != 0);
+}
+
+
+
+static inline
+void tso_fill_packet_with_fragment(netfront_accel_vnic *vnic,
+                                  struct netfront_accel_tso_state *st) 
+{
+       struct netfront_accel_tso_buffer *tso_buf;
+       int n, space;
+
+       BUG_ON(st->output_packets == NULL);
+       BUG_ON(st->output_packets->tso_bufs == NULL);
+
+       tso_buf = st->output_packets->tso_bufs;
+
+       if (st->ifc.len == 0)  return;
+       if (st->packet_space == 0)  return;
+       if (tso_buf->length == NETFRONT_ACCEL_TSO_BUF_LENGTH) return;
+
+       n = min(st->ifc.len, st->packet_space);
+
+       space = NETFRONT_ACCEL_TSO_BUF_LENGTH - tso_buf->length;
+       n = min(n, space);
+
+       st->packet_space -= n;
+       st->remaining_len -= n;
+       st->ifc.len -= n;
+
+       memcpy(tso_buf->buf->pkt_kva + tso_buf->length, st->ifc.addr, n);
+
+       tso_buf->length += n;
+
+       BUG_ON(tso_buf->length > NETFRONT_ACCEL_TSO_BUF_LENGTH);
+
+       st->ifc.addr += n;
+
+       return;
+}
+
+
+int netfront_accel_enqueue_skb_tso(netfront_accel_vnic *vnic,
+                                  struct sk_buff *skb)
+{
+       struct netfront_accel_tso_state state;
+       struct netfront_accel_tso_buffer *tso_buf = NULL;
+       struct netfront_accel_tso_output_packet *reversed_list = NULL;
+       struct netfront_accel_tso_output_packet *tmp_pkt;
+       ef_iovec iovecs[ACCEL_TSO_MAX_BUFFERS];
+       int frag_i, rc, dma_id;
+       skb_frag_t *f;
+
+       tso_check_safe(skb);
+
+       if (skb->ip_summed != CHECKSUM_HW)
+               EPRINTK("Trying to TSO send a packet without HW checksum\n");
+
+       tso_start(&state, skb);
+
+       /*
+        * Setup the first payload fragment.  If the skb header area
+        * contains exactly the headers and all payload is in the frag
+        * list things are little simpler
+        */
+       if (skb_headlen(skb) == state.p.header_length) {
+               /* Grab the first payload fragment. */
+               BUG_ON(skb_shinfo(skb)->nr_frags < 1);
+               frag_i = 0;
+               f = &skb_shinfo(skb)->frags[frag_i];
+               tso_get_fragment(&state, f->size, 
+                                page_address(f->page) + f->page_offset);
+       } else {
+               int hl = state.p.header_length;
+               tso_get_fragment(&state,  skb_headlen(skb) - hl, 
+                                skb->data + hl);
+               frag_i = -1;
+       }
+
+       if (tso_start_new_packet(vnic, skb, &state) < 0) {
+               DPRINTK("%s: out of first start-packet memory\n",
+                       __FUNCTION__);
+               goto unwind;
+       }
+
+       while (1) {
+               tso_fill_packet_with_fragment(vnic, &state);
+               
+               /* Move onto the next fragment? */
+               if (state.ifc.len == 0) {
+                       if (++frag_i >= skb_shinfo(skb)->nr_frags)
+                               /* End of payload reached. */
+                               break;
+                       f = &skb_shinfo(skb)->frags[frag_i];
+                       tso_get_fragment(&state, f->size,
+                                        page_address(f->page) +
+                                        f->page_offset);
+               }
+
+               /* Start a new buffer? */
+               if ((state.output_packets->tso_bufs->length == 
+                    NETFRONT_ACCEL_TSO_BUF_LENGTH) &&
+                   tso_start_new_buffer(vnic, &state, 0)) {
+                       DPRINTK("%s: out of start-buffer memory\n",
+                               __FUNCTION__);
+                       goto unwind;
+               }
+
+               /* Start at new packet? */
+               if ((state.packet_space == 0 || 
+                    ((state.output_packets->tso_bufs_len >=
+                      ACCEL_TSO_MAX_BUFFERS) &&
+                     (state.output_packets->tso_bufs->length >= 
+                      NETFRONT_ACCEL_TSO_BUF_LENGTH))) &&
+                   tso_start_new_packet(vnic, skb, &state) < 0) {
+                       DPRINTK("%s: out of start-packet memory\n",
+                               __FUNCTION__);
+                       goto unwind;
+               }
+
+       }
+
+       /* Check for space */
+       if (ef_vi_transmit_space(&vnic->vi) < state.buffers) {
+               DPRINTK("%s: Not enough TX space (%d)\n",
+                       __FUNCTION__, state.buffers);
+               goto unwind;
+       }
+
+       /*
+        * Store the skb context in the most recent buffer (i.e. the
+        * last buffer that will be sent)
+        */
+       state.output_packets->tso_bufs->buf->skb = skb;
+
+       /* Reverse the list of packets as we construct it on a stack */
+       while (state.output_packets != NULL) {
+               tmp_pkt = state.output_packets;
+               state.output_packets = tmp_pkt->next;
+               tmp_pkt->next = reversed_list;
+               reversed_list = tmp_pkt;
+       }
+
+       /* Pass off to hardware */
+       while (reversed_list != NULL) {
+               tmp_pkt = reversed_list;
+               reversed_list = tmp_pkt->next;
+
+               BUG_ON(tmp_pkt->tso_bufs_len > ACCEL_TSO_MAX_BUFFERS);
+               BUG_ON(tmp_pkt->tso_bufs_len == 0);
+
+               dma_id = tmp_pkt->tso_bufs->buf->buf_id;
+
+               /*
+                * Make an iovec of the buffers in the list, reversing
+                * the buffers as we go as they are constructed on a
+                * stack
+                */
+               tso_buf = tmp_pkt->tso_bufs;
+               for (frag_i = tmp_pkt->tso_bufs_len - 1;
+                    frag_i >= 0;
+                    frag_i--) {
+                       iovecs[frag_i].iov_base = tso_buf->buf->pkt_buff_addr;
+                       iovecs[frag_i].iov_len = tso_buf->length;
+                       tso_buf = tso_buf->next;
+               }
+
+               rc = ef_vi_transmitv(&vnic->vi, iovecs, tmp_pkt->tso_bufs_len,
+                                    dma_id);
+               /*
+                * We checked for space already, so it really should
+                * succeed
+                */
+               BUG_ON(rc != 0);
+       }
+
+       /* Track number of tx fastpath stats */
+       vnic->netdev_stats.fastpath_tx_bytes += skb->len;
+       vnic->netdev_stats.fastpath_tx_pkts += state.packets;
+#if NETFRONT_ACCEL_STATS
+       {
+               unsigned n;
+               n = vnic->netdev_stats.fastpath_tx_pkts -
+                       vnic->stats.fastpath_tx_completions;
+               if (n > vnic->stats.fastpath_tx_pending_max)
+                       vnic->stats.fastpath_tx_pending_max = n;
+       }
+#endif
+
+       return NETFRONT_ACCEL_STATUS_GOOD;
+ 
+ unwind:
+       tso_unwind(vnic, &state);
+
+       NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
+
+       return NETFRONT_ACCEL_STATUS_BUSY;
+}
+
+
+
diff -r e4dd072db259 -r 651fc2abdd5d drivers/xen/sfc_netfront/accel_tso.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/xen/sfc_netfront/accel_tso.h      Mon Feb 18 10:30:33 2008 +0000
@@ -0,0 +1,57 @@
+/****************************************************************************
+ * Solarflare driver for Xen network acceleration
+ *
+ * Copyright 2006-2008: Solarflare Communications Inc,
+ *                      9501 Jeronimo Road, Suite 250,
+ *                      Irvine, CA 92618, USA
+ *
+ * Maintained by Solarflare Communications <linux-xen-drivers@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ ****************************************************************************
+ */
+
+#ifndef NETFRONT_ACCEL_TSO_H
+#define NETFRONT_ACCEL_TSO_H
+
+#include "accel_bufs.h"
+
+/* Track the buffers used in each output packet */
+struct netfront_accel_tso_buffer {
+       struct netfront_accel_tso_buffer *next;
+       struct netfront_accel_pkt_desc *buf;
+       unsigned length;
+};
+
+/* Track the output packets formed from each input packet */
+struct netfront_accel_tso_output_packet {
+       struct netfront_accel_tso_output_packet *next;
+       struct netfront_accel_tso_buffer *tso_bufs;
+       unsigned tso_bufs_len;
+};
+
+
+/*
+ * Max available space in a buffer for data once meta-data has taken
+ * its place 
+ */
+#define NETFRONT_ACCEL_TSO_BUF_LENGTH                                  \
+       ((PAGE_SIZE / NETFRONT_ACCEL_BUFS_PER_PAGE)                     \
+        - sizeof(struct netfront_accel_tso_buffer)                     \
+        - sizeof(struct netfront_accel_tso_output_packet))
+
+int netfront_accel_enqueue_skb_tso(netfront_accel_vnic *vnic,
+                                  struct sk_buff *skb);
+
+#endif /* NETFRONT_ACCEL_TSO_H */
diff -r e4dd072db259 -r 651fc2abdd5d drivers/xen/sfc_netfront/accel_vi.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/xen/sfc_netfront/accel_vi.c       Mon Feb 18 10:30:33 2008 +0000
@@ -0,0 +1,1194 @@
+/****************************************************************************
+ * Solarflare driver for Xen network acceleration
+ *
+ * Copyright 2006-2008: Solarflare Communications Inc,
+ *                      9501 Jeronimo Road, Suite 250,
+ *                      Irvine, CA 92618, USA
+ *
+ * Maintained by Solarflare Communications <linux-xen-drivers@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ ****************************************************************************
+ */
+
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+#include <asm/io.h>
+
+#include "accel.h"
+#include "accel_util.h"
+#include "accel_bufs.h"
+#include "accel_tso.h"
+#include "accel_ssr.h"
+#include "netfront.h"
+
+#include "etherfabric/ef_vi.h"
+
+/*
+ * Max available space in a buffer for data once meta-data has taken
+ * its place
+ */
+#define NETFRONT_ACCEL_TX_BUF_LENGTH                                   \
+       ((PAGE_SIZE / NETFRONT_ACCEL_BUFS_PER_PAGE)                     \
+        - sizeof(struct netfront_accel_tso_buffer))
+
+#define ACCEL_TX_MAX_BUFFERS (6)
+#define ACCEL_VI_POLL_EVENTS (8)
+
+static
+int netfront_accel_vi_init_fini(netfront_accel_vnic *vnic, 
+                               struct net_accel_msg_hw *hw_msg)
+{
+       struct ef_vi_nic_type nic_type;
+       struct net_accel_hw_falcon_b *hw_info;
+       void *io_kva, *evq_base, *rx_dma_kva, *tx_dma_kva, *doorbell_kva;
+       u32 *evq_gnts;
+       u32 evq_order;
+       int vi_state_size;
+       u8 vi_data[VI_MAPPINGS_SIZE];
+
+       if (hw_msg == NULL)
+               goto fini;
+
+       /* And create the local macs table lock */
+       spin_lock_init(&vnic->table_lock);
+       
+       /* Create fastpath table, initial size 8, key length 8 */
+       if (cuckoo_hash_init(&vnic->fastpath_table, 3, 8)) {
+               EPRINTK("failed to allocate fastpath table\n");
+               goto fail_cuckoo;
+       }
+
+       vnic->hw.falcon.type = hw_msg->type;
+
+       switch (hw_msg->type) {
+       case NET_ACCEL_MSG_HWTYPE_FALCON_A:
+               hw_info = &hw_msg->resources.falcon_a.common;
+               /* Need the extra rptr register page on A1 */
+               io_kva = net_accel_map_iomem_page
+                       (vnic->dev, hw_msg->resources.falcon_a.evq_rptr_gnt,
+                        &vnic->hw.falcon.evq_rptr_mapping);
+               if (io_kva == NULL) {
+                       EPRINTK("%s: evq_rptr permission failed\n", 
__FUNCTION__);
+                       goto evq_rptr_fail;
+               }
+
+               vnic->hw.falcon.evq_rptr = io_kva + 
+                       (hw_info->evq_rptr & (PAGE_SIZE - 1));
+               break;
+       case NET_ACCEL_MSG_HWTYPE_FALCON_B:
+               hw_info = &hw_msg->resources.falcon_b;
+               break;
+       default:
+               goto bad_type;
+       }
+
+       /**** Event Queue ****/
+
+       /* Map the event queue pages */
+       evq_gnts = hw_info->evq_mem_gnts;
+       evq_order = hw_info->evq_order;
+
+       EPRINTK_ON(hw_info->evq_offs != 0);
+
+       DPRINTK("Will map evq %d pages\n", 1 << evq_order);
+
+       evq_base =
+               net_accel_map_grants_contig(vnic->dev, evq_gnts, 1 << evq_order,
+                                           &vnic->evq_mapping);
+       if (evq_base == NULL) {
+               EPRINTK("%s: evq_base failed\n", __FUNCTION__);
+               goto evq_fail;
+       }
+
+       /**** Doorbells ****/
+       /* Set up the doorbell mappings. */
+       doorbell_kva = 
+               net_accel_map_iomem_page(vnic->dev, hw_info->doorbell_gnt,
+                                        &vnic->hw.falcon.doorbell_mapping);
+       if (doorbell_kva == NULL) {
+               EPRINTK("%s: doorbell permission failed\n", __FUNCTION__);
+               goto doorbell_fail;
+       }
+       vnic->hw.falcon.doorbell = doorbell_kva;
+
+       /* On Falcon_B we get the rptr from the doorbell page */
+       if (hw_msg->type == NET_ACCEL_MSG_HWTYPE_FALCON_B) {
+               vnic->hw.falcon.evq_rptr = 
+                       (u32 *)((char *)vnic->hw.falcon.doorbell 
+                               + hw_info->evq_rptr);
+       }
+
+       /**** DMA Queue ****/
+
+       /* Set up the DMA Queues from the message. */
+       tx_dma_kva = net_accel_map_grants_contig
+               (vnic->dev, &(hw_info->txdmaq_gnt), 1, 
+                &vnic->hw.falcon.txdmaq_mapping);
+       if (tx_dma_kva == NULL) {
+               EPRINTK("%s: TX dma failed\n", __FUNCTION__);
+               goto tx_dma_fail;
+       }
+
+       rx_dma_kva = net_accel_map_grants_contig
+               (vnic->dev, &(hw_info->rxdmaq_gnt), 1, 
+                &vnic->hw.falcon.rxdmaq_mapping);
+       if (rx_dma_kva == NULL) {
+               EPRINTK("%s: RX dma failed\n", __FUNCTION__);
+               goto rx_dma_fail;
+       }
+
+       /* Full confession */
+       DPRINTK("Mapped H/W"
+               "  Tx DMAQ grant %x -> %p\n"
+               "  Rx DMAQ grant %x -> %p\n"
+               "  EVQ grant %x -> %p\n",
+               hw_info->txdmaq_gnt, tx_dma_kva,
+               hw_info->rxdmaq_gnt, rx_dma_kva,
+               evq_gnts[0], evq_base
+               );
+
+       memset(vi_data, 0, sizeof(vi_data));
+       
+       /* TODO BUG11305: convert efhw_arch to ef_vi_arch
+        * e.g.
+        * arch = ef_vi_arch_from_efhw_arch(hw_info->nic_arch);
+        * assert(arch >= 0);
+        * nic_type.arch = arch;
+        */
+       nic_type.arch = (unsigned char)hw_info->nic_arch;
+       nic_type.variant = (char)hw_info->nic_variant;
+       nic_type.revision = (unsigned char)hw_info->nic_revision;
+       
+       ef_vi_init_mapping_evq(vi_data, nic_type, hw_info->instance, 
+                              1 << (evq_order + PAGE_SHIFT), evq_base, 
+                              (void *)0xdeadbeef);
+
+       ef_vi_init_mapping_vi(vi_data, nic_type, hw_info->rx_capacity, 
+                             hw_info->tx_capacity, hw_info->instance, 
+                             doorbell_kva, rx_dma_kva, tx_dma_kva, 0);
+
+       vi_state_size = ef_vi_calc_state_bytes(hw_info->rx_capacity,
+                                              hw_info->tx_capacity);
+       vnic->vi_state = (ef_vi_state *)kmalloc(vi_state_size, GFP_KERNEL);
+       if (vnic->vi_state == NULL) {
+               EPRINTK("%s: kmalloc for VI state failed\n", __FUNCTION__);
+               goto vi_state_fail;
+       }
+       ef_vi_init(&vnic->vi, vi_data, vnic->vi_state, &vnic->evq_state, 0);
+
+       ef_eventq_state_init(&vnic->vi);
+
+       ef_vi_state_init(&vnic->vi);
+
+       return 0;
+
+fini:
+       kfree(vnic->vi_state);
+       vnic->vi_state = NULL;
+vi_state_fail:
+       net_accel_unmap_grants_contig(vnic->dev, 
vnic->hw.falcon.rxdmaq_mapping);
+rx_dma_fail:
+       net_accel_unmap_grants_contig(vnic->dev, 
vnic->hw.falcon.txdmaq_mapping);
+tx_dma_fail:
+       net_accel_unmap_iomem_page(vnic->dev, vnic->hw.falcon.doorbell_mapping);
+       vnic->hw.falcon.doorbell = NULL;
+doorbell_fail:
+       net_accel_unmap_grants_contig(vnic->dev, vnic->evq_mapping);
+evq_fail:
+       if (vnic->hw.falcon.type == NET_ACCEL_MSG_HWTYPE_FALCON_A)
+               net_accel_unmap_iomem_page(vnic->dev, 
+                                          vnic->hw.falcon.evq_rptr_mapping);
+       vnic->hw.falcon.evq_rptr = NULL;
+evq_rptr_fail:
+bad_type:
+       cuckoo_hash_destroy(&vnic->fastpath_table);
+fail_cuckoo:
+       return -EIO;
+}
+
+
+void netfront_accel_vi_ctor(netfront_accel_vnic *vnic)
+{
+       /* Just mark the VI as uninitialised. */
+       vnic->vi_state = NULL;
+}
+
+
+int netfront_accel_vi_init(netfront_accel_vnic *vnic, struct net_accel_msg_hw 
*hw_msg)
+{
+       BUG_ON(hw_msg == NULL);
+       return netfront_accel_vi_init_fini(vnic, hw_msg);
+}
+
+
+void netfront_accel_vi_dtor(netfront_accel_vnic *vnic)
+{
+       if (vnic->vi_state != NULL)
+               netfront_accel_vi_init_fini(vnic, NULL);
+}
+
+
+static
+void netfront_accel_vi_post_rx(netfront_accel_vnic *vnic, u16 id,
+                              netfront_accel_pkt_desc *buf)
+{
+
+       int idx = vnic->rx_dma_batched;
+
+#if 0
+       VPRINTK("Posting buffer %d (0x%08x) for rx at index %d, space is %d\n",
+               id, buf->pkt_buff_addr, idx, ef_vi_receive_space(&vnic->vi));
+#endif
+       /* Set up a virtual buffer descriptor */
+       ef_vi_receive_init(&vnic->vi, buf->pkt_buff_addr, id,
+                          /*rx_bytes=max*/0);
+
+       idx++;
+
+       vnic->rx_dma_level++;
+       
+       /* 
+        * Only push the descriptor to the card if we've reached the
+        * batch size.  Otherwise, the descriptors can sit around for
+        * a while.  There will be plenty available.
+        */
+       if (idx >= NETFRONT_ACCEL_RX_DESC_BATCH ||
+           vnic->rx_dma_level < NETFRONT_ACCEL_RX_DESC_BATCH) {
+#if 0
+               VPRINTK("Flushing %d rx descriptors.\n", idx);
+#endif
+
+               /* Push buffer to hardware */
+               ef_vi_receive_push(&vnic->vi);
+               
+               idx = 0;
+       }
+       
+       vnic->rx_dma_batched = idx;
+}
+
+
+inline
+void netfront_accel_vi_post_rx_or_free(netfront_accel_vnic *vnic, u16 id,
+                                      netfront_accel_pkt_desc *buf)
+{
+
+       VPRINTK("%s: %d\n", __FUNCTION__, id);
+
+       if (ef_vi_receive_space(&vnic->vi) <= vnic->rx_dma_batched) {
+               VPRINTK("RX space is full\n");
+               netfront_accel_buf_put(vnic->rx_bufs, id);
+               return;
+       }
+
+       VPRINTK("Completed buffer %d is reposted\n", id);
+       netfront_accel_vi_post_rx(vnic, id, buf);
+       
+       /*
+        * Let's see if there's any more to be pushed out to the NIC
+        * while we're here
+        */
+       while (ef_vi_receive_space(&vnic->vi) > vnic->rx_dma_batched) {
+               /* Try to allocate a buffer. */
+               buf = netfront_accel_buf_get(vnic->rx_bufs);
+               if (buf == NULL)
+                       break;
+               
+               /* Add it to the rx dma queue. */
+               netfront_accel_vi_post_rx(vnic, buf->buf_id, buf);      
+       }
+}
+
+
+void netfront_accel_vi_add_bufs(netfront_accel_vnic *vnic, int is_rx)
+{
+
+       while (is_rx && 
+              ef_vi_receive_space(&vnic->vi) > vnic->rx_dma_batched) {
+               netfront_accel_pkt_desc *buf;
+               
+               VPRINTK("%s: %d\n", __FUNCTION__, vnic->rx_dma_level);
+               
+               /* Try to allocate a buffer. */
+               buf = netfront_accel_buf_get(vnic->rx_bufs);
+
+               if (buf == NULL)
+                       break;
+               
+               /* Add it to the rx dma queue. */
+               netfront_accel_vi_post_rx(vnic, buf->buf_id, buf);
+       }
+
+       VPRINTK("%s: done\n", __FUNCTION__);
+}
+
+
+struct netfront_accel_multi_state {
+       unsigned remaining_len;
+
+       unsigned buffers;
+
+       struct netfront_accel_tso_buffer *output_buffers;
+
+       /* Where we are in the current fragment of the SKB. */
+       struct {
+               /* address of current position */
+               void *addr;
+               /* remaining length */    
+               unsigned int len;
+       } ifc; /*  == Input Fragment Cursor */
+};
+
+
+static inline void multi_post_start(struct netfront_accel_multi_state *st, 
+                                   struct sk_buff *skb)
+{
+       st->remaining_len = skb->len;
+       st->output_buffers = NULL;
+       st->buffers = 0;
+       st->ifc.len = skb_headlen(skb);
+       st->ifc.addr = skb->data;
+}
+
+static int multi_post_start_new_buffer(netfront_accel_vnic *vnic, 
+                                      struct netfront_accel_multi_state *st)
+{
+       struct netfront_accel_tso_buffer *tso_buf;
+       struct netfront_accel_pkt_desc *buf;
+
+       /* Get a mapped packet buffer */
+       buf = netfront_accel_buf_get(vnic->tx_bufs);
+       if (buf == NULL) {
+               DPRINTK("%s: No buffer for TX\n", __FUNCTION__);
+               return -1;
+       }
+
+       /* Store a bit of meta-data at the end */
+       tso_buf = (struct netfront_accel_tso_buffer *)
+               (buf->pkt_kva + NETFRONT_ACCEL_TX_BUF_LENGTH);
+
+       tso_buf->buf = buf;
+
+       tso_buf->length = 0;
+       
+       tso_buf->next = st->output_buffers;
+       st->output_buffers = tso_buf;
+       st->buffers++;
+
+       BUG_ON(st->buffers >= ACCEL_TX_MAX_BUFFERS);
+
+       /*
+        * Store the context, set to NULL, last packet buffer will get
+        * non-NULL later
+        */
+       tso_buf->buf->skb = NULL;
+       
+       return 0;
+}
+
+
+static void
+multi_post_fill_buffer_with_fragment(netfront_accel_vnic *vnic,
+                                    struct netfront_accel_multi_state *st)
+{
+       struct netfront_accel_tso_buffer *tso_buf;
+       unsigned n, space;
+
+       BUG_ON(st->output_buffers == NULL);
+       tso_buf = st->output_buffers;
+
+       if (st->ifc.len == 0) return;
+       if (tso_buf->length == NETFRONT_ACCEL_TX_BUF_LENGTH) return;
+
+       BUG_ON(tso_buf->length > NETFRONT_ACCEL_TX_BUF_LENGTH);
+
+       space = NETFRONT_ACCEL_TX_BUF_LENGTH - tso_buf->length;
+       n = min(st->ifc.len, space);
+
+       memcpy(tso_buf->buf->pkt_kva + tso_buf->length, st->ifc.addr, n);
+
+       st->remaining_len -= n;
+       st->ifc.len -= n;
+       tso_buf->length += n;
+       st->ifc.addr += n;
+
+       BUG_ON(tso_buf->length > NETFRONT_ACCEL_TX_BUF_LENGTH);
+
+       return;
+}
+
+
+static inline void multi_post_unwind(netfront_accel_vnic *vnic,
+                                    struct netfront_accel_multi_state *st)
+{
+       struct netfront_accel_tso_buffer *tso_buf;
+
+       DPRINTK("%s\n", __FUNCTION__);
+
+       while (st->output_buffers != NULL) {
+               tso_buf = st->output_buffers;
+               st->output_buffers = tso_buf->next;
+               st->buffers--;
+               netfront_accel_buf_put(vnic->tx_bufs, tso_buf->buf->buf_id);
+       }
+       BUG_ON(st->buffers != 0);
+}
+
+
+static enum netfront_accel_post_status
+netfront_accel_enqueue_skb_multi(netfront_accel_vnic *vnic, struct sk_buff 
*skb)
+{
+       struct netfront_accel_tso_buffer *tso_buf;
+       struct netfront_accel_multi_state state;
+       ef_iovec iovecs[ACCEL_TX_MAX_BUFFERS];
+       skb_frag_t *f;
+       int frag_i, rc, dma_id;
+
+       multi_post_start(&state, skb);
+
+       frag_i = -1;
+
+       if (skb->ip_summed == CHECKSUM_HW) {
+               /* Set to zero to encourage falcon to work it out for us */
+               *(u16*)(skb->h.raw + skb->csum) = 0;
+       }
+
+       if (multi_post_start_new_buffer(vnic, &state)) {
+               DPRINTK("%s: out of buffers\n", __FUNCTION__);
+               goto unwind;
+       }
+
+       while (1) {
+               multi_post_fill_buffer_with_fragment(vnic, &state);
+
+               /* Move onto the next fragment? */
+               if (state.ifc.len == 0) {
+                       if (++frag_i >= skb_shinfo(skb)->nr_frags)
+                               /* End of payload reached. */
+                               break;
+                       f = &skb_shinfo(skb)->frags[frag_i];
+                       state.ifc.len = f->size;
+                       state.ifc.addr = page_address(f->page) + f->page_offset;
+               }
+
+               /* Start a new buffer? */
+               if ((state.output_buffers->length == 
+                    NETFRONT_ACCEL_TX_BUF_LENGTH) &&
+                   multi_post_start_new_buffer(vnic, &state)) {
+                       DPRINTK("%s: out of buffers\n", __FUNCTION__);
+                       goto unwind;
+               }
+       }
+
+       /* Check for space */
+       if (ef_vi_transmit_space(&vnic->vi) < state.buffers) {
+               DPRINTK("%s: Not enough TX space (%d)\n", __FUNCTION__, 
state.buffers);
+               goto unwind;
+       }
+
+       /* Store the skb in what will be the last buffer's context */
+       state.output_buffers->buf->skb = skb;
+       /* Remember dma_id of what will be the last buffer */ 
+       dma_id = state.output_buffers->buf->buf_id;
+
+       /*
+        * Make an iovec of the buffers in the list, reversing the
+        * buffers as we go as they are constructed on a stack
+        */
+       tso_buf = state.output_buffers;
+       for (frag_i = state.buffers-1; frag_i >= 0; frag_i--) {
+               iovecs[frag_i].iov_base = tso_buf->buf->pkt_buff_addr;
+               iovecs[frag_i].iov_len = tso_buf->length;
+               tso_buf = tso_buf->next;
+       }
+       
+       rc = ef_vi_transmitv(&vnic->vi, iovecs, state.buffers, dma_id);
+
+       /* Track number of tx fastpath stats */
+       vnic->netdev_stats.fastpath_tx_bytes += skb->len;
+       vnic->netdev_stats.fastpath_tx_pkts ++;
+#if NETFRONT_ACCEL_STATS
+       {
+               u32 n;
+               n = vnic->netdev_stats.fastpath_tx_pkts -
+                       (u32)vnic->stats.fastpath_tx_completions;
+               if (n > vnic->stats.fastpath_tx_pending_max)
+                       vnic->stats.fastpath_tx_pending_max = n;
+       }
+#endif
+       return NETFRONT_ACCEL_STATUS_GOOD;
+
+unwind:
+       multi_post_unwind(vnic, &state);
+
+       NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
+
+       return NETFRONT_ACCEL_STATUS_BUSY;
+}
+
+
+static enum netfront_accel_post_status 
+netfront_accel_enqueue_skb_single(netfront_accel_vnic *vnic, struct sk_buff 
*skb)
+{
+       struct netfront_accel_tso_buffer *tso_buf;
+       struct netfront_accel_pkt_desc *buf;
+       u8 *kva;
+       int rc;
+
+       if (ef_vi_transmit_space(&vnic->vi) < 1) {
+               DPRINTK("%s: No TX space\n", __FUNCTION__);
+               NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
+               return NETFRONT_ACCEL_STATUS_BUSY;
+       }
+
+       buf = netfront_accel_buf_get(vnic->tx_bufs);
+       if (buf == NULL) {
+               DPRINTK("%s: No buffer for TX\n", __FUNCTION__);
+               NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
+               return NETFRONT_ACCEL_STATUS_BUSY;
+       }
+
+       /* Track number of tx fastpath stats */
+       vnic->netdev_stats.fastpath_tx_pkts++;
+       vnic->netdev_stats.fastpath_tx_bytes += skb->len;
+
+#if NETFRONT_ACCEL_STATS
+       {
+               u32 n;
+               n = vnic->netdev_stats.fastpath_tx_pkts - 
+                       (u32)vnic->stats.fastpath_tx_completions;
+               if (n > vnic->stats.fastpath_tx_pending_max)
+                       vnic->stats.fastpath_tx_pending_max = n;
+       }
+#endif
+       
+       /* Store the context */
+       buf->skb = skb;
+       
+       kva = buf->pkt_kva;
+
+       if (skb->ip_summed == CHECKSUM_HW) {
+               /* Set to zero to encourage falcon to work it out for us */
+               *(u16*)(skb->h.raw + skb->csum) = 0;
+       }
+       NETFRONT_ACCEL_PKTBUFF_FOR_EACH_FRAGMENT
+               (skb, idx, frag_data, frag_len, {
+                       /* Copy in payload */
+                       VPRINTK("*** Copying %d bytes to %p\n", frag_len, kva);
+                       memcpy(kva, frag_data, frag_len);
+                       kva += frag_len;
+               });
+
+       VPRINTK("%s: id %d pkt %p kva %p buff_addr 0x%08x\n", __FUNCTION__,
+               buf->buf_id, buf, buf->pkt_kva, buf->pkt_buff_addr);
+
+
+       /* Set up the TSO meta-data for a single buffer/packet */
+       tso_buf = (struct netfront_accel_tso_buffer *)
+               (buf->pkt_kva + NETFRONT_ACCEL_TX_BUF_LENGTH);
+       tso_buf->next = NULL;
+       tso_buf->buf = buf;
+       tso_buf->length = skb->len;
+
+       rc = ef_vi_transmit(&vnic->vi, buf->pkt_buff_addr, skb->len,
+                           buf->buf_id);
+       /* We checked for space already, so it really should succeed */
+       BUG_ON(rc != 0);
+
+       return NETFRONT_ACCEL_STATUS_GOOD;
+}
+
+
+enum netfront_accel_post_status 
+netfront_accel_vi_tx_post(netfront_accel_vnic *vnic, struct sk_buff *skb)
+{
+       struct ethhdr *pkt_eth_hdr;
+       struct iphdr *pkt_ipv4_hdr;
+       int value, try_fastpath;
+
+       /*
+        * This assumes that the data field points to the dest mac
+        * address.
+        */
+       cuckoo_hash_mac_key key = cuckoo_mac_to_key(skb->data);
+
+       /*
+        * NB very important that all things that could return "CANT"
+        * are tested before things that return "BUSY" as if it it
+        * returns "BUSY" it is assumed that it won't return "CANT"
+        * next time it is tried
+        */
+
+       /*
+        * Do a fastpath send if fast path table lookup returns true.
+        * We do this without the table lock and so may get the wrong
+        * answer, but current opinion is that's not a big problem 
+        */
+       try_fastpath = cuckoo_hash_lookup(&vnic->fastpath_table, 
+                                         (cuckoo_hash_key *)(&key), &value);
+
+       if (!try_fastpath) {
+               VPRINTK("try fast path false for mac: " MAC_FMT "\n",
+                       MAC_ARG(skb->data));
+               
+               return NETFRONT_ACCEL_STATUS_CANT;
+       }
+
+       /* Check to see if the packet can be sent. */
+       if (skb_headlen(skb) < sizeof(*pkt_eth_hdr) + sizeof(*pkt_ipv4_hdr)) {
+               EPRINTK("%s: Packet header is too small\n", __FUNCTION__);
+               return NETFRONT_ACCEL_STATUS_CANT;
+       }
+
+       pkt_eth_hdr  = (void*)skb->data;
+       pkt_ipv4_hdr = (void*)(pkt_eth_hdr+1);
+
+       if (be16_to_cpu(pkt_eth_hdr->h_proto) != ETH_P_IP) {
+               DPRINTK("%s: Packet is not IPV4 (ether_type=0x%04x)\n", 
__FUNCTION__,
+                       be16_to_cpu(pkt_eth_hdr->h_proto));
+               return NETFRONT_ACCEL_STATUS_CANT;
+       }
+       
+       if (pkt_ipv4_hdr->protocol != IPPROTO_TCP &&
+           pkt_ipv4_hdr->protocol != IPPROTO_UDP) {
+               DPRINTK("%s: Packet is not TCP/UDP (ip_protocol=0x%02x)\n",
+                       __FUNCTION__, pkt_ipv4_hdr->protocol);
+               return NETFRONT_ACCEL_STATUS_CANT;
+       }
+       
+       VPRINTK("%s: %d bytes, gso %d\n", __FUNCTION__, skb->len, 
+               skb_shinfo(skb)->gso_size);
+       
+       if (skb_shinfo(skb)->gso_size) {
+               return netfront_accel_enqueue_skb_tso(vnic, skb);
+       }
+
+       if (skb->len <= NETFRONT_ACCEL_TX_BUF_LENGTH) {
+               return netfront_accel_enqueue_skb_single(vnic, skb);
+       }
+
+       return netfront_accel_enqueue_skb_multi(vnic, skb);
+}
+
+
+/*
+ * Copy the data to required end destination. NB. len is the total new
+ * length of the socket buffer, not the amount of data to copy
+ */
+inline
+int ef_vnic_copy_to_skb(netfront_accel_vnic *vnic, struct sk_buff *skb, 
+                       struct netfront_accel_pkt_desc *buf, int len)
+{
+       int i, extra = len - skb->len;
+       char c;
+       int pkt_stride = vnic->rx_pkt_stride;
+       int skb_stride = vnic->rx_skb_stride;
+       char *skb_start;
+       
+       /*
+        * This pulls stuff into the cache - have seen performance
+        * benefit in this, but disabled by default
+        */
+       skb_start = skb->data;
+       if (pkt_stride) {
+               for (i = 0; i < len; i += pkt_stride) {
+                       c += ((volatile char*)(buf->pkt_kva))[i];
+               }
+       }
+       if (skb_stride) {
+               for (i = skb->len; i < len ; i += skb_stride) {
+                       c += ((volatile char*)(skb_start))[i];
+               }
+       }
+
+       if (skb_tailroom(skb) >= extra) {
+               memcpy(skb_put(skb, extra), buf->pkt_kva, extra);
+               return 0;
+       }
+
+       return -ENOSPC;
+}
+
+
+static void discard_jumbo_state(netfront_accel_vnic *vnic) 
+{
+
+       if (vnic->jumbo_state.skb != NULL) {
+               dev_kfree_skb_any(vnic->jumbo_state.skb);
+
+               vnic->jumbo_state.skb = NULL;
+       }
+       vnic->jumbo_state.in_progress = 0;
+}
+
+
+static void  netfront_accel_vi_rx_complete(netfront_accel_vnic *vnic,
+                                          struct sk_buff *skb)
+{
+       cuckoo_hash_mac_key key;
+       unsigned long flags;
+       int value;
+       struct net_device *net_dev;
+
+
+       key = cuckoo_mac_to_key(skb->data + ETH_ALEN);
+
+       /*
+        * If this is a MAC address that we want to do fast path TX
+        * to, and we don't already, add it to the fastpath table.
+        * The initial lookup is done without the table lock and so
+        * may get the wrong answer, but current opinion is that's not
+        * a big problem
+        */
+       if (is_valid_ether_addr(skb->data + ETH_ALEN) &&
+           !cuckoo_hash_lookup(&vnic->fastpath_table, (cuckoo_hash_key *)&key,
+                               &value)) {
+               spin_lock_irqsave(&vnic->table_lock, flags);
+                  
+               cuckoo_hash_add_check(&vnic->fastpath_table,
+                                     (cuckoo_hash_key *)&key,
+                                     1, 1);
+               
+               spin_unlock_irqrestore(&vnic->table_lock, flags);
+       }
+
+       if (compare_ether_addr(skb->data, vnic->mac)) {
+               struct iphdr *ip = (struct iphdr *)(skb->data + ETH_HLEN);
+               u16 port;
+
+               DPRINTK("%s: saw wrong MAC address " MAC_FMT "\n", 
+                       __FUNCTION__, MAC_ARG(skb->data));
+
+               if (ip->protocol == IPPROTO_TCP) {
+                       struct tcphdr *tcp = (struct tcphdr *)
+                               ((char *)ip + 4 * ip->ihl);
+                       port = tcp->dest;
+               } else {
+                       struct udphdr *udp = (struct udphdr *)
+                               ((char *)ip + 4 * ip->ihl);
+                       EPRINTK_ON(ip->protocol != IPPROTO_UDP);
+                       port = udp->dest;
+               }
+
+               netfront_accel_msg_tx_fastpath(vnic, skb->data,
+                                              ip->daddr, port,
+                                              ip->protocol);
+       }
+
+       net_dev = vnic->net_dev;
+       skb->dev = net_dev;
+       skb->protocol = eth_type_trans(skb, net_dev);
+       /* CHECKSUM_UNNECESSARY as hardware has done it already */
+       skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+       if (!netfront_accel_ssr_skb(vnic, &vnic->ssr_state, skb))
+               netif_receive_skb(skb);
+}
+
+
+static int netfront_accel_vi_poll_process_rx(netfront_accel_vnic *vnic, 
+                                            ef_event *ev)
+{
+       struct netfront_accel_bufinfo *bufinfo = vnic->rx_bufs;
+       struct netfront_accel_pkt_desc *buf = NULL;
+       struct sk_buff *skb;
+       int id, len, sop = 0, cont = 0;
+
+       VPRINTK("Rx event.\n");
+       /*
+        * Complete the receive operation, and get the request id of
+        * the buffer
+        */
+       id = ef_vi_receive_done(&vnic->vi, ev);
+
+       if (id < 0 || id >= bufinfo->npages*NETFRONT_ACCEL_BUFS_PER_PAGE) {
+               EPRINTK("Rx packet %d is invalid\n", id);
+               /* Carry on round the loop if more events */
+               goto bad_packet;
+       }
+       /* Get our buffer descriptor */
+       buf = netfront_accel_buf_find(bufinfo, id);
+
+       len = EF_EVENT_RX_BYTES(*ev);
+
+       /* An RX buffer has been removed from the DMA ring. */
+       vnic->rx_dma_level--;
+
+       if (EF_EVENT_TYPE(*ev) == EF_EVENT_TYPE_RX) {
+               sop = EF_EVENT_RX_SOP(*ev);
+               cont = EF_EVENT_RX_CONT(*ev);
+
+               skb = vnic->jumbo_state.skb;
+
+               VPRINTK("Rx packet %d: %d bytes so far; sop %d; cont %d\n", 
+                       id, len, sop, cont);
+
+               if (sop) {
+                       if (!vnic->jumbo_state.in_progress) {
+                               vnic->jumbo_state.in_progress = 1;
+                               BUG_ON(vnic->jumbo_state.skb != NULL);
+                       } else {
+                               /*
+                                * This fragment shows a missing tail in 
+                                * previous one, but is itself possibly OK
+                                */
+                               DPRINTK("sop and in_progress => no tail\n");
+
+                               /* Release the socket buffer we already had */
+                               discard_jumbo_state(vnic);
+
+                               /* Now start processing this fragment */
+                               vnic->jumbo_state.in_progress = 1;
+                               skb = NULL;
+                       }
+               } else if (!vnic->jumbo_state.in_progress) {
+                       DPRINTK("!sop and !in_progress => missing head\n");
+                       goto missing_head;
+               }
+
+               if (!cont) {
+                       /* Update state for next time */
+                       vnic->jumbo_state.in_progress = 0;
+                       vnic->jumbo_state.skb = NULL;
+               } else if (!vnic->jumbo_state.in_progress) {
+                       DPRINTK("cont and !in_progress => missing head\n");
+                       goto missing_head;
+               }
+
+               if (skb == NULL) {
+                       BUG_ON(!sop);
+
+                       if (!cont)
+                               skb = alloc_skb(len+NET_IP_ALIGN, GFP_ATOMIC);
+                       else
+                               skb = 
alloc_skb(vnic->net_dev->mtu+NET_IP_ALIGN, 
+                                               GFP_ATOMIC);
+
+                       if (skb == NULL) {
+                               DPRINTK("%s: Couldn't get an rx skb.\n",
+                                       __FUNCTION__);
+                               netfront_accel_vi_post_rx_or_free(vnic, 
(u16)id, buf);
+                               /*
+                                * Dropping this fragment means we
+                                * should discard the rest too
+                                */
+                               discard_jumbo_state(vnic);
+
+                               /* Carry on round the loop if more events */
+                               return 0;
+                       }
+
+               }
+               
+               /* Copy the data to required end destination */
+               if (ef_vnic_copy_to_skb(vnic, skb, buf, len) != 0) {
+                       /*
+                        * No space in the skb - suggests > MTU packet
+                        * received
+                        */
+                       EPRINTK("%s: Rx packet too large (%d)\n",
+                               __FUNCTION__, len);
+                       netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf);
+                       discard_jumbo_state(vnic);
+                       return 0;
+               }
+               
+               /* Put the buffer back in the DMA queue. */
+               netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf);
+
+               if (cont) {
+                       vnic->jumbo_state.skb = skb;
+
+                       return 0;
+               } else {
+                       /* Track number of rx fastpath packets */
+                       vnic->netdev_stats.fastpath_rx_pkts++;
+                       vnic->netdev_stats.fastpath_rx_bytes += len;
+
+                       netfront_accel_vi_rx_complete(vnic, skb);
+
+                       return 1;
+               }
+       } else {
+               BUG_ON(EF_EVENT_TYPE(*ev) != EF_EVENT_TYPE_RX_DISCARD);
+
+               if (EF_EVENT_RX_DISCARD_TYPE(*ev) 
+                   == EF_EVENT_RX_DISCARD_TRUNC) {
+                       DPRINTK("%s: " EF_EVENT_FMT 
+                               " buffer %d FRM_TRUNC q_id %d\n",
+                               __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id,
+                               EF_EVENT_RX_DISCARD_Q_ID(*ev) );
+                       
NETFRONT_ACCEL_STATS_OP(++vnic->stats.fastpath_frm_trunc);
+               } else if (EF_EVENT_RX_DISCARD_TYPE(*ev) 
+                         == EF_EVENT_RX_DISCARD_OTHER) {
+                       DPRINTK("%s: " EF_EVENT_FMT 
+                               " buffer %d RX_DISCARD_OTHER q_id %d\n",
+                               __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id,
+                               EF_EVENT_RX_DISCARD_Q_ID(*ev) );
+                       /*
+                        * Probably tail of packet for which error has
+                        * already been logged, so don't count in
+                        * stats
+                        */
+               } else {
+                       EPRINTK("%s: " EF_EVENT_FMT 
+                               " buffer %d rx discard type %d q_id %d\n",
+                               __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id,
+                               EF_EVENT_RX_DISCARD_TYPE(*ev), 
+                               EF_EVENT_RX_DISCARD_Q_ID(*ev) );
+                       NETFRONT_ACCEL_STATS_OP(++vnic->stats.bad_event_count);
+               }
+       }
+
+       /* discard type drops through here */
+
+bad_packet:
+       /* Release the socket buffer we already had */
+       discard_jumbo_state(vnic);
+
+missing_head:
+       BUG_ON(vnic->jumbo_state.in_progress != 0);
+       BUG_ON(vnic->jumbo_state.skb != NULL);
+
+       if (id >= 0 && id < bufinfo->npages*NETFRONT_ACCEL_BUFS_PER_PAGE)
+               /* Put the buffer back in the DMA queue. */
+               netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf);
+
+       vnic->netdev_stats.fastpath_rx_errors++;
+
+       DPRINTK("%s experienced bad packet/missing fragment error: %d \n",
+               __FUNCTION__, ev->rx.flags);
+
+       return 0;
+}
+
+
+static void netfront_accel_vi_not_busy(netfront_accel_vnic *vnic)
+{
+       struct netfront_info *np = ((struct netfront_info *)
+                                   netdev_priv(vnic->net_dev));
+       struct sk_buff *skb;
+       int handled;
+       unsigned long flags;
+       
+       /*
+        * TODO if we could safely check tx_skb == NULL and return
+        * early without taking the lock, that would obviously help
+        * performance
+        */
+
+       /* Take the netfront lock which protects tx_skb. */
+       spin_lock_irqsave(&np->tx_lock, flags);
+       if (vnic->tx_skb != NULL) {
+               DPRINTK("%s trying to send spare buffer\n", __FUNCTION__);
+               
+               skb = vnic->tx_skb;
+               vnic->tx_skb = NULL;
+
+               spin_unlock_irqrestore(&np->tx_lock, flags);
+
+               handled = netfront_accel_vi_tx_post(vnic, skb);
+               
+               spin_lock_irqsave(&np->tx_lock, flags);
+
+               if (handled != NETFRONT_ACCEL_STATUS_BUSY) {
+                       DPRINTK("%s restarting tx\n", __FUNCTION__);
+                       if (netfront_check_queue_ready(vnic->net_dev)) {
+                               netif_wake_queue(vnic->net_dev);
+                               NETFRONT_ACCEL_STATS_OP
+                                       (vnic->stats.queue_wakes++);
+                       }
+               } else {
+                       vnic->tx_skb = skb;
+               }
+               
+               /*
+                * Should never get a CANT, as it checks that before
+                * deciding it was BUSY first time round 
+                */
+               BUG_ON(handled == NETFRONT_ACCEL_STATUS_CANT);
+       }
+       spin_unlock_irqrestore(&np->tx_lock, flags);
+}
+
+
+static void netfront_accel_vi_tx_complete(netfront_accel_vnic *vnic, 
+                                         struct netfront_accel_tso_buffer 
*tso_buf,
+                                         int is_last)
+{
+       struct netfront_accel_tso_buffer *next;
+
+       /* 
+        * We get a single completion for every call to
+        * ef_vi_transmitv so handle any other buffers which are part
+        * of the same packet 
+        */
+       while (tso_buf != NULL) {
+               if (tso_buf->buf->skb != NULL) {
+                       dev_kfree_skb_any(tso_buf->buf->skb);
+                       tso_buf->buf->skb = NULL;
+               }
+
+               next = tso_buf->next;
+
+               netfront_accel_buf_put(vnic->tx_bufs, tso_buf->buf->buf_id);
+
+               tso_buf = next;
+       }
+
+       /*
+        * If this was the last one in the batch, we try and send any
+        * pending tx_skb. There should now be buffers and
+        * descriptors
+        */
+       if (is_last)
+               netfront_accel_vi_not_busy(vnic);
+}
+
+
+static void netfront_accel_vi_poll_process_tx(netfront_accel_vnic *vnic,
+                                             ef_event *ev)
+{
+       struct netfront_accel_pkt_desc *buf;
+       struct netfront_accel_tso_buffer *tso_buf;
+       ef_request_id ids[EF_VI_TRANSMIT_BATCH];
+       int i, n_ids;
+       unsigned long flags;
+
+       /* Get the request ids for this tx completion event. */
+       n_ids = ef_vi_transmit_unbundle(&vnic->vi, ev, ids);
+
+       /* Take the tx buffer spin lock and hold for the duration */
+       spin_lock_irqsave(&vnic->tx_lock, flags);
+
+       for (i = 0; i < n_ids; ++i) {
+               VPRINTK("Tx packet %d complete\n", ids[i]);
+               buf = netfront_accel_buf_find(vnic->tx_bufs, ids[i]);
+               NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_completions++);
+
+               tso_buf = (struct netfront_accel_tso_buffer *)
+                       (buf->pkt_kva + NETFRONT_ACCEL_TX_BUF_LENGTH);
+               BUG_ON(tso_buf->buf != buf);
+
+               netfront_accel_vi_tx_complete(vnic, tso_buf, i == (n_ids-1));
+       }
+
+       spin_unlock_irqrestore(&vnic->tx_lock, flags);
+}
+
+
+int netfront_accel_vi_poll(netfront_accel_vnic *vnic, int rx_packets)
+{
+       ef_event ev[ACCEL_VI_POLL_EVENTS];
+       int rx_remain = rx_packets, rc, events, i;
+#if NETFRONT_ACCEL_STATS
+       int n_evs_polled = 0, rx_evs_polled = 0, tx_evs_polled = 0;
+#endif
+       BUG_ON(rx_packets <= 0);
+
+       events = ef_eventq_poll(&vnic->vi, ev, 
+                               min(rx_remain, ACCEL_VI_POLL_EVENTS));
+       i = 0;
+       NETFRONT_ACCEL_STATS_OP(n_evs_polled += events);
+
+       VPRINTK("%s: %d events\n", __FUNCTION__, events);
+
+       /* Loop over each event */
+       while (events) {
+               VPRINTK("%s: Event "EF_EVENT_FMT", index %lu\n", __FUNCTION__, 
+                       EF_EVENT_PRI_ARG(ev[i]),        
+                       (unsigned long)(vnic->vi.evq_state->evq_ptr));
+
+               if ((EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_RX) ||
+                   (EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_RX_DISCARD)) {
+                       rc = netfront_accel_vi_poll_process_rx(vnic, &ev[i]);
+                       rx_remain -= rc;
+                       BUG_ON(rx_remain < 0);
+                       NETFRONT_ACCEL_STATS_OP(rx_evs_polled++);
+               } else if (EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_TX) {
+                       netfront_accel_vi_poll_process_tx(vnic, &ev[i]);
+                       NETFRONT_ACCEL_STATS_OP(tx_evs_polled++);
+               } else if (EF_EVENT_TYPE(ev[i]) == 
+                          EF_EVENT_TYPE_RX_NO_DESC_TRUNC) {
+                       DPRINTK("%s: RX_NO_DESC_TRUNC " EF_EVENT_FMT "\n",
+                               __FUNCTION__, EF_EVENT_PRI_ARG(ev[i]));
+                       discard_jumbo_state(vnic);
+                       NETFRONT_ACCEL_STATS_OP(vnic->stats.rx_no_desc_trunc++);
+               } else {
+                       EPRINTK("Unexpected event " EF_EVENT_FMT "\n", 
+                               EF_EVENT_PRI_ARG(ev[i]));
+                       NETFRONT_ACCEL_STATS_OP(vnic->stats.bad_event_count++);
+               }
+
+               i++;
+
+               /* Carry on round the loop if more events and more space */
+               if (i == events) {
+                       if (rx_remain == 0)
+                               break;
+
+                       events = ef_eventq_poll(&vnic->vi, ev, 
+                                               min(rx_remain, 
+                                                   ACCEL_VI_POLL_EVENTS));
+                       i = 0;
+                       NETFRONT_ACCEL_STATS_OP(n_evs_polled += events);
+               }
+       }
+       
+#if NETFRONT_ACCEL_STATS
+       vnic->stats.event_count += n_evs_polled;
+       vnic->stats.event_count_since_irq += n_evs_polled;
+       if (n_evs_polled > vnic->stats.events_per_poll_max)
+               vnic->stats.events_per_poll_max = n_evs_polled;
+       if (rx_evs_polled > vnic->stats.events_per_poll_rx_max)
+               vnic->stats.events_per_poll_rx_max = rx_evs_polled;
+       if (tx_evs_polled > vnic->stats.events_per_poll_tx_max)
+               vnic->stats.events_per_poll_tx_max = tx_evs_polled;
+#endif
+
+       return rx_packets - rx_remain;
+}
+
+
+int netfront_accel_vi_enable_interrupts(netfront_accel_vnic *vnic)
+{
+       u32 sw_evq_ptr;
+
+       VPRINTK("%s: checking for event on %p\n", __FUNCTION__, 
&vnic->vi.evq_state);
+
+       BUG_ON(vnic == NULL);
+       BUG_ON(vnic->vi.evq_state == NULL);
+
+       /* Do a quick check for an event. */
+       if (ef_eventq_has_event(&vnic->vi)) {
+               VPRINTK("%s: found event\n",  __FUNCTION__);
+               return 0;
+       }
+
+       VPRINTK("evq_ptr=0x%08x  evq_mask=0x%08x\n",
+               vnic->evq_state.evq_ptr, vnic->vi.evq_mask);
+  
+       /* Request a wakeup from the hardware. */
+       sw_evq_ptr = vnic->evq_state.evq_ptr & vnic->vi.evq_mask;
+
+       BUG_ON(vnic->hw.falcon.evq_rptr == NULL);
+
+       VPRINTK("Requesting wakeup at 0x%08x, rptr %p\n", sw_evq_ptr,
+               vnic->hw.falcon.evq_rptr);
+       *(volatile u32 *)(vnic->hw.falcon.evq_rptr) = (sw_evq_ptr >> 3);
+
+       return 1;
+}
diff -r e4dd072db259 -r 651fc2abdd5d drivers/xen/sfc_netfront/accel_xenbus.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/xen/sfc_netfront/accel_xenbus.c   Mon Feb 18 10:30:33 2008 +0000
@@ -0,0 +1,776 @@
+/****************************************************************************
+ * Solarflare driver for Xen network acceleration
+ *
+ * Copyright 2006-2008: Solarflare Communications Inc,
+ *                      9501 Jeronimo Road, Suite 250,
+ *                      Irvine, CA 92618, USA
+ *
+ * Maintained by Solarflare Communications <linux-xen-drivers@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ ****************************************************************************
+ */
+
+#include <linux/stddef.h>
+#include <linux/errno.h>
+
+#include <xen/xenbus.h>
+#include <xen/evtchn.h>
+#include <xen/gnttab.h>
+
+#include "accel.h"
+#include "accel_util.h"
+#include "accel_msg_iface.h"
+#include "accel_bufs.h"
+#include "accel_ssr.h"
+/* drivers/xen/netfront/netfront.h */
+#include "netfront.h"
+
+void netfront_accel_set_closing(netfront_accel_vnic *vnic) 
+{
+
+       vnic->frontend_state = XenbusStateClosing;
+       net_accel_update_state(vnic->dev, XenbusStateClosing);
+}
+       
+
+static void mac_address_change(struct xenbus_watch *watch,
+                              const char **vec, unsigned int len)
+{
+       netfront_accel_vnic *vnic;
+       struct xenbus_device *dev;
+       int rc;
+
+       DPRINTK("%s\n", __FUNCTION__);
+       
+       vnic = container_of(watch, netfront_accel_vnic, 
+                               mac_address_watch);
+       dev = vnic->dev;
+
+       rc = net_accel_xen_net_read_mac(dev, vnic->mac);
+
+       if (rc != 0)
+               EPRINTK("%s: failed to read mac (%d)\n", __FUNCTION__, rc);
+}
+
+
+static int setup_mac_address_watch(struct xenbus_device *dev,
+                                  netfront_accel_vnic *vnic)
+{
+       int err;
+
+       DPRINTK("Setting watch on %s/%s\n", dev->nodename, "mac");
+
+       err = xenbus_watch_path2(dev, dev->nodename, "mac", 
+                                &vnic->mac_address_watch, 
+                                mac_address_change);
+       if (err) {
+               EPRINTK("%s: Failed to register xenbus watch: %d\n",
+                       __FUNCTION__, err);
+               goto fail;
+       }
+
+       return 0;
+ fail:
+       vnic->mac_address_watch.node = NULL;
+       return err;
+}
+
+
+/* Grant access to some pages and publish through xenbus */
+static int make_named_grant(struct xenbus_device *dev, void *page, 
+                           const char *name, grant_ref_t *gnt_ref)
+{
+       struct xenbus_transaction tr;
+       int err;
+       grant_ref_t gnt;
+
+       gnt = net_accel_grant_page(dev, virt_to_mfn(page), 0);
+       if (gnt < 0)
+               return gnt;
+
+       do {
+               err = xenbus_transaction_start(&tr);
+               if (err != 0) {
+                       EPRINTK("%s: transaction start failed %d\n",
+                               __FUNCTION__, err);
+                       return err;
+               }
+               err = xenbus_printf(tr, dev->nodename, name, "%d", gnt);
+               if (err != 0) {
+                       EPRINTK("%s: xenbus_printf failed %d\n", __FUNCTION__,
+                               err);
+                       xenbus_transaction_end(tr, 1);
+                       return err;
+               }
+               err = xenbus_transaction_end(tr, 0);
+       } while (err == -EAGAIN);
+       
+       if (err != 0) {
+               EPRINTK("%s: transaction end failed %d\n", __FUNCTION__, err);
+               return err;
+       }
+       
+       *gnt_ref = gnt;
+
+       return 0;
+}
+
+
+static int remove_named_grant(struct xenbus_device *dev,
+                             const char *name, grant_ref_t gnt_ref)
+{
+       struct xenbus_transaction tr;
+       int err;
+
+       net_accel_ungrant_page(gnt_ref);
+
+       do {
+               err = xenbus_transaction_start(&tr);
+               if (err != 0) {
+                       EPRINTK("%s: transaction start failed %d\n",
+                               __FUNCTION__, err);
+                       return err;
+               }
+               err = xenbus_rm(tr, dev->nodename, name);
+               if (err != 0) {
+                       EPRINTK("%s: xenbus_rm failed %d\n", __FUNCTION__,
+                               err);
+                       xenbus_transaction_end(tr, 1);
+                       return err;
+               }
+               err = xenbus_transaction_end(tr, 0);
+       } while (err == -EAGAIN);
+       
+       if (err != 0) {
+               EPRINTK("%s: transaction end failed %d\n", __FUNCTION__, err);
+               return err;
+       }
+
+       return 0;
+}
+
+
+static 
+netfront_accel_vnic *netfront_accel_vnic_ctor(struct net_device *net_dev,
+                                             struct xenbus_device *dev)
+{
+       struct netfront_info *np =
+               (struct netfront_info *)netdev_priv(net_dev);
+       netfront_accel_vnic *vnic;
+       int err;
+
+       /*
+        * A bug in earlier versions of Xen accel plugin system meant
+        * you could be probed twice for the same device on suspend
+        * cancel.  Be tolerant of that.
+        */ 
+       if (np->accel_priv != NULL)
+               return ERR_PTR(-EALREADY);
+
+       /* Alloc mem for state */
+       vnic = kzalloc(sizeof(netfront_accel_vnic), GFP_KERNEL);
+       if (vnic == NULL) {
+               EPRINTK("%s: no memory for vnic state\n", __FUNCTION__);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       spin_lock_init(&vnic->tx_lock);
+
+       mutex_init(&vnic->vnic_mutex);
+       mutex_lock(&vnic->vnic_mutex);
+
+       /* Store so state can be retrieved from device */
+       BUG_ON(np->accel_priv != NULL);
+       np->accel_priv = vnic;
+       vnic->dev = dev;
+       vnic->net_dev = net_dev;
+       spin_lock_init(&vnic->irq_enabled_lock);
+       netfront_accel_ssr_init(&vnic->ssr_state);
+
+       init_waitqueue_head(&vnic->state_wait_queue);
+       vnic->backend_state = XenbusStateUnknown;
+       vnic->frontend_state = XenbusStateClosed;
+       vnic->removing = 0;
+       vnic->domU_state_is_setup = 0;
+       vnic->dom0_state_is_setup = 0;
+       vnic->poll_enabled = 0;
+       vnic->tx_enabled = 0;
+       vnic->tx_skb = NULL;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
+       INIT_WORK(&vnic->msg_from_bend, netfront_accel_msg_from_bend);
+#else
+       INIT_WORK(&vnic->msg_from_bend, netfront_accel_msg_from_bend, vnic);
+#endif
+
+       netfront_accel_debugfs_create(vnic);
+
+       mutex_unlock(&vnic->vnic_mutex);
+
+       err = net_accel_xen_net_read_mac(dev, vnic->mac);
+       if (err) 
+               goto fail_mac;
+
+       /* Setup a watch on the frontend's MAC address */
+       err = setup_mac_address_watch(dev, vnic);
+       if (err)
+               goto fail_mac;
+
+       return vnic;
+
+fail_mac:
+
+       mutex_lock(&vnic->vnic_mutex);
+
+       netfront_accel_debugfs_remove(vnic);
+
+       netfront_accel_ssr_fini(vnic, &vnic->ssr_state);
+
+       EPRINTK_ON(vnic->tx_skb != NULL);
+
+       vnic->frontend_state = XenbusStateUnknown;
+       net_accel_update_state(dev, XenbusStateUnknown);
+
+       mutex_unlock(&vnic->vnic_mutex);
+
+       np->accel_priv = NULL;
+       kfree(vnic);
+
+       return ERR_PTR(err);
+}
+
+
+static void netfront_accel_vnic_dtor(netfront_accel_vnic *vnic)
+{
+       struct net_device *net_dev = vnic->net_dev;
+       struct netfront_info *np = 
+               (struct netfront_info *)netdev_priv(net_dev);
+
+       /*
+        * Now we don't hold the lock any more it is safe to remove
+        * this watch and synchonrise with the completion of
+        * watches
+        */
+       DPRINTK("%s: unregistering xenbus mac watch\n", __FUNCTION__);
+       unregister_xenbus_watch(&vnic->mac_address_watch);
+       kfree(vnic->mac_address_watch.node);
+
+       flush_workqueue(netfront_accel_workqueue);
+
+       mutex_lock(&vnic->vnic_mutex);
+
+       netfront_accel_debugfs_remove(vnic);
+
+       netfront_accel_ssr_fini(vnic, &vnic->ssr_state);
+
+       EPRINTK_ON(vnic->tx_skb != NULL);
+
+       vnic->frontend_state = XenbusStateUnknown;
+       net_accel_update_state(vnic->dev, XenbusStateUnknown);
+
+       mutex_unlock(&vnic->vnic_mutex);
+
+       np->accel_priv = NULL;
+       kfree(vnic);
+}
+
+
+static int vnic_setup_domU_shared_state(struct xenbus_device *dev,
+                                       netfront_accel_vnic *vnic)
+{
+       struct xenbus_transaction tr;
+       int err;
+       int msgs_per_queue;
+
+
+       DPRINTK("Setting up domU shared state.\n");
+
+       msgs_per_queue = (PAGE_SIZE/2) / sizeof(struct net_accel_msg);
+
+       /* Allocate buffer state */
+       vnic->tx_bufs = netfront_accel_init_bufs(&vnic->tx_lock);
+       if (vnic->tx_bufs == NULL) {
+               err = -ENOMEM;
+               EPRINTK("%s: Failed to allocate tx buffers\n", __FUNCTION__);
+               goto fail_tx_bufs;
+       }
+
+       vnic->rx_bufs = netfront_accel_init_bufs(NULL);
+       if (vnic->rx_bufs == NULL) {
+               err = -ENOMEM;
+               EPRINTK("%s: Failed to allocate rx buffers\n", __FUNCTION__);
+               goto fail_rx_bufs;
+       }
+
+       /* 
+        * This allocates two pages, one for the shared page and one
+        * for the message queue.
+        */
+       vnic->shared_page = (struct net_accel_shared_page *)
+               __get_free_pages(GFP_KERNEL, 1);
+       if (vnic->shared_page == NULL) {
+               EPRINTK("%s: no memory for shared pages\n", __FUNCTION__);
+               err = -ENOMEM;
+               goto fail_shared_page;
+       }
+
+       net_accel_msg_init_queue
+               (&vnic->from_dom0, &vnic->shared_page->queue0, 
+                (struct net_accel_msg *)((u8*)vnic->shared_page + PAGE_SIZE),
+                msgs_per_queue);
+
+       net_accel_msg_init_queue
+               (&vnic->to_dom0, &vnic->shared_page->queue1,
+                (struct net_accel_msg *)((u8*)vnic->shared_page +
+                                         (3 * PAGE_SIZE / 2)),
+                msgs_per_queue);
+       
+       vnic->msg_state = NETFRONT_ACCEL_MSG_NONE;
+
+       err = make_named_grant(dev, vnic->shared_page, "accel-ctrl-page",
+                              &vnic->ctrl_page_gnt);
+       if (err) {
+               EPRINTK("couldn't make ctrl-page named grant\n");
+               goto fail_ctrl_page_grant;
+       }
+
+       err = make_named_grant(dev, (u8*)vnic->shared_page + PAGE_SIZE,
+                              "accel-msg-page", &vnic->msg_page_gnt);
+       if (err) {
+               EPRINTK("couldn't make msg-page named grant\n");
+               goto fail_msg_page_grant;
+       }
+
+       /* Create xenbus msg event channel */
+       err = bind_listening_port_to_irqhandler
+               (dev->otherend_id, netfront_accel_msg_channel_irq_from_bend,
+                SA_SAMPLE_RANDOM, "vnicctrl", vnic);
+       if (err < 0) {
+               EPRINTK("Couldn't bind msg event channel\n");
+               goto fail_msg_irq;
+       }
+       vnic->msg_channel_irq = err;
+       vnic->msg_channel = irq_to_evtchn_port(vnic->msg_channel_irq);
+       
+       /* Create xenbus net event channel */
+       err = bind_listening_port_to_irqhandler
+               (dev->otherend_id, netfront_accel_net_channel_irq_from_bend,
+                SA_SAMPLE_RANDOM, "vnicfront", vnic);
+       if (err < 0) {
+               EPRINTK("Couldn't bind net event channel\n");
+               goto fail_net_irq;
+       }
+       vnic->net_channel_irq = err;
+       vnic->net_channel = irq_to_evtchn_port(vnic->net_channel_irq);
+       /* Want to ensure we don't get interrupts before we're ready */
+       netfront_accel_disable_net_interrupts(vnic);
+
+       DPRINTK("otherend %d has msg ch %u (%u) and net ch %u (%u)\n",
+               dev->otherend_id, vnic->msg_channel, vnic->msg_channel_irq, 
+               vnic->net_channel, vnic->net_channel_irq);
+
+       do {
+               err = xenbus_transaction_start(&tr);
+               if (err != 0) {
+                       EPRINTK("%s: Transaction start failed %d\n",
+                               __FUNCTION__, err);
+                       goto fail_transaction;
+               }
+
+               err = xenbus_printf(tr, dev->nodename, "accel-msg-channel",
+                                   "%u", vnic->msg_channel);
+               if (err != 0) {
+                       EPRINTK("%s: event channel xenbus write failed %d\n",
+                               __FUNCTION__, err);
+                       xenbus_transaction_end(tr, 1);
+                       goto fail_transaction;
+               }
+
+               err = xenbus_printf(tr, dev->nodename, "accel-net-channel",
+                                   "%u", vnic->net_channel);
+               if (err != 0) {
+                       EPRINTK("%s: net channel xenbus write failed %d\n",
+                               __FUNCTION__, err);
+                       xenbus_transaction_end(tr, 1);
+                       goto fail_transaction;
+               }
+
+               err = xenbus_transaction_end(tr, 0);
+       } while (err == -EAGAIN);
+
+       if (err != 0) {
+               EPRINTK("%s: Transaction end failed %d\n", __FUNCTION__, err);
+               goto fail_transaction;
+       }
+
+       DPRINTK("Completed setting up domU shared state\n");
+
+       return 0;
+
+fail_transaction:
+
+       unbind_from_irqhandler(vnic->net_channel_irq, vnic);
+fail_net_irq:
+
+       unbind_from_irqhandler(vnic->msg_channel_irq, vnic);
+fail_msg_irq:
+
+       remove_named_grant(dev, "accel-ctrl-page", vnic->ctrl_page_gnt);
+fail_msg_page_grant:
+
+       remove_named_grant(dev, "accel-msg-page", vnic->msg_page_gnt);
+fail_ctrl_page_grant:
+
+       free_pages((unsigned long)vnic->shared_page, 1);
+       vnic->shared_page = NULL;
+fail_shared_page:
+
+       netfront_accel_fini_bufs(vnic->rx_bufs);
+fail_rx_bufs:
+
+       netfront_accel_fini_bufs(vnic->tx_bufs);
+fail_tx_bufs:
+
+       /* Undo the memory allocation created when we got the HELLO */
+       netfront_accel_free_buffer_mem(&vnic->bufpages,
+                                      vnic->rx_bufs,
+                                      vnic->tx_bufs);
+
+       DPRINTK("Failed to setup domU shared state with code %d\n", err);
+
+       return err;
+}
+
+
+static void vnic_remove_domU_shared_state(struct xenbus_device *dev, 
+                                         netfront_accel_vnic *vnic)
+{
+       struct xenbus_transaction tr;
+       
+       /*
+        * Don't remove any watches because we currently hold the
+        * mutex and the watches take the mutex.
+        */
+
+       DPRINTK("%s: removing event channel irq handlers %d %d\n",
+               __FUNCTION__, vnic->net_channel_irq, vnic->msg_channel_irq);
+       do {
+               if (xenbus_transaction_start(&tr) != 0)
+                       break;
+               xenbus_rm(tr, dev->nodename, "accel-msg-channel");
+               xenbus_rm(tr, dev->nodename, "accel-net-channel");
+       } while (xenbus_transaction_end(tr, 0) == -EAGAIN);
+
+       unbind_from_irqhandler(vnic->net_channel_irq, vnic);
+       unbind_from_irqhandler(vnic->msg_channel_irq, vnic);
+
+       /* ungrant pages for msg channel */
+       remove_named_grant(dev, "accel-ctrl-page", vnic->ctrl_page_gnt);
+       remove_named_grant(dev, "accel-msg-page", vnic->msg_page_gnt);
+       free_pages((unsigned long)vnic->shared_page, 1);
+       vnic->shared_page = NULL;
+
+       /* ungrant pages for buffers, and free buffer memory */
+       netfront_accel_free_buffer_mem(&vnic->bufpages,
+                                      vnic->rx_bufs,
+                                      vnic->tx_bufs);
+       netfront_accel_fini_bufs(vnic->rx_bufs);
+       netfront_accel_fini_bufs(vnic->tx_bufs);
+}
+
+
+static void vnic_setup_dom0_shared_state(struct xenbus_device *dev,
+                                       netfront_accel_vnic *vnic)
+{
+       DPRINTK("Setting up dom0 shared state\n");
+
+       netfront_accel_vi_ctor(vnic);
+
+       /*
+        * Message processing will be enabled when this function
+        * returns, but we might have missed an interrupt.  Schedule a
+        * check just in case.
+        */
+       queue_work(netfront_accel_workqueue, &vnic->msg_from_bend);
+}
+
+
+static void vnic_remove_dom0_shared_state(struct xenbus_device *dev,
+                                         netfront_accel_vnic *vnic)
+{
+       DPRINTK("Removing dom0 shared state\n");
+
+       vnic_stop_fastpath(vnic);
+
+       netfront_accel_vi_dtor(vnic);
+}
+
+
+/*************************************************************************/
+
+/*
+ * The following code handles accelstate changes between the frontend
+ * and the backend.  In response to transitions, calls the following
+ * functions in matching pairs:
+ *
+ *   vnic_setup_domU_shared_state
+ *   vnic_remove_domU_shared_state
+ *
+ *   vnic_setup_dom0_shared_state
+ *   vnic_remove_dom0_shared_state
+ *
+ * Valid state transitions for DomU are as follows:
+ *
+ * Closed->Init       on probe or in response to Init from dom0
+ *
+ * Init->Connected    in response to Init from dom0
+ * Init->Closing      on error providing dom0 is in Init
+ * Init->Closed       on remove or in response to Closing from dom0
+ *
+ * Connected->Closing on error/remove
+ * Connected->Closed  in response to Closing from dom0
+ *
+ * Closing->Closed    in response to Closing from dom0
+ *
+ */
+
+
+/* Function to deal with Xenbus accel state change in backend */
+static void netfront_accel_backend_accel_changed(netfront_accel_vnic *vnic,
+                                                XenbusState backend_state)
+{
+       struct xenbus_device *dev = vnic->dev;
+       XenbusState frontend_state;
+       int state;
+
+       DPRINTK("%s: changing from %s to %s. nodename %s, otherend %s\n",
+               __FUNCTION__, xenbus_strstate(vnic->backend_state),
+               xenbus_strstate(backend_state), dev->nodename, dev->otherend);
+
+       /*
+        * Ignore duplicate state changes.  This can happen if the
+        * backend changes state twice in quick succession and the
+        * first watch fires in the frontend after the second
+        * transition has completed.
+        */
+       if (vnic->backend_state == backend_state)
+               return;
+
+       vnic->backend_state = backend_state;
+       frontend_state = vnic->frontend_state;
+
+       switch (backend_state) {
+       case XenbusStateInitialising:
+               /*
+                * It's possible for us to miss the closed state from
+                * dom0, so do the work here.
+                */
+               if (vnic->domU_state_is_setup) {
+                       vnic_remove_domU_shared_state(dev, vnic);
+                       vnic->domU_state_is_setup = 0;
+               }
+
+               if (frontend_state != XenbusStateInitialising) {
+                       /* Make sure the backend doesn't go away. */
+                       frontend_state = XenbusStateInitialising;
+                       net_accel_update_state(dev, frontend_state);
+                       xenbus_scanf(XBT_NIL, dev->otherend, "accelstate", 
"%d", &state);
+                       backend_state = (XenbusState)state;
+                       if (backend_state != XenbusStateInitialising)
+                               break;
+               }
+
+               /* Start the new connection. */
+               if (!vnic->removing) {
+                       BUG_ON(vnic->domU_state_is_setup);
+                       if (vnic_setup_domU_shared_state(dev, vnic) == 0) {
+                               vnic->domU_state_is_setup = 1;
+                               frontend_state = XenbusStateConnected;
+                       } else
+                               frontend_state = XenbusStateClosing;
+               }
+               break;
+       case XenbusStateConnected:
+               if (vnic->domU_state_is_setup &&
+                   !vnic->dom0_state_is_setup) {
+                       vnic_setup_dom0_shared_state(dev, vnic);
+                       vnic->dom0_state_is_setup = 1;
+               }
+               break;
+       default:
+       case XenbusStateClosing:
+               if (vnic->dom0_state_is_setup) {
+                       vnic_remove_dom0_shared_state(dev, vnic);
+                       vnic->dom0_state_is_setup = 0;
+               }
+               frontend_state = XenbusStateClosed;
+               break;
+       case XenbusStateUnknown:
+       case XenbusStateClosed:
+               if (vnic->domU_state_is_setup) {
+                       vnic_remove_domU_shared_state(dev, vnic);
+                       vnic->domU_state_is_setup = 0;
+               }
+               break;
+       }
+
+       if (frontend_state != vnic->frontend_state) {
+               DPRINTK("Switching from state %s (%d) to %s (%d)\n",
+                       xenbus_strstate(vnic->frontend_state),
+                       vnic->frontend_state,
+                       xenbus_strstate(frontend_state), frontend_state);
+               vnic->frontend_state = frontend_state;
+               net_accel_update_state(dev, frontend_state);
+       }
+
+       wake_up(&vnic->state_wait_queue);
+}
+
+
+static void backend_accel_state_change(struct xenbus_watch *watch,
+                                      const char **vec, unsigned int len)
+{
+       int state;
+       netfront_accel_vnic *vnic;
+       struct xenbus_device *dev;
+
+       DPRINTK("%s\n", __FUNCTION__);
+
+       vnic = container_of(watch, struct netfront_accel_vnic,
+                               backend_accel_watch);
+
+       mutex_lock(&vnic->vnic_mutex);
+
+       dev = vnic->dev;
+
+       state = (int)XenbusStateUnknown;
+       xenbus_scanf(XBT_NIL, dev->otherend, "accelstate", "%d", &state);
+       netfront_accel_backend_accel_changed(vnic, state);
+
+       mutex_unlock(&vnic->vnic_mutex);
+}
+
+
+static int setup_dom0_accel_watch(struct xenbus_device *dev,
+                                 netfront_accel_vnic *vnic)
+{
+       int err;
+
+       DPRINTK("Setting watch on %s/%s\n", dev->otherend, "accelstate");
+
+       err = xenbus_watch_path2(dev, dev->otherend, "accelstate", 
+                                &vnic->backend_accel_watch, 
+                                backend_accel_state_change);
+       if (err) {
+               EPRINTK("%s: Failed to register xenbus watch: %d\n",
+                       __FUNCTION__, err);
+               goto fail;
+       }
+       return 0;
+ fail:
+       vnic->backend_accel_watch.node = NULL;
+       return err;
+}
+
+
+int netfront_accel_probe(struct net_device *net_dev, struct xenbus_device *dev)
+{
+       netfront_accel_vnic *vnic;
+       int err;
+
+       DPRINTK("Probe passed device %s\n", dev->nodename);
+
+       vnic = netfront_accel_vnic_ctor(net_dev, dev);
+       if (IS_ERR(vnic))
+               return PTR_ERR(vnic);
+
+       /*
+        * Setup a watch on the backend accel state.  This sets things
+        * going.
+        */
+       err = setup_dom0_accel_watch(dev, vnic);
+       if (err) {
+               netfront_accel_vnic_dtor(vnic);
+               EPRINTK("%s: probe failed with code %d\n", __FUNCTION__, err);
+               return err;
+       }
+
+       /*
+        * Indicate to the other end that we're ready to start unless
+        * the watch has already fired.
+        */
+       mutex_lock(&vnic->vnic_mutex);
+       VPRINTK("setup success, updating accelstate\n");
+       if (vnic->frontend_state == XenbusStateClosed) {
+               vnic->frontend_state = XenbusStateInitialising;
+               net_accel_update_state(dev, XenbusStateInitialising);
+       }
+       mutex_unlock(&vnic->vnic_mutex);
+
+       DPRINTK("Probe done device %s\n", dev->nodename);
+
+       return 0;
+}
+
+
+int netfront_accel_remove(struct xenbus_device *dev)
+{
+       struct netfront_info *np =
+               (struct netfront_info *)dev->dev.driver_data;
+       netfront_accel_vnic *vnic = (netfront_accel_vnic *)np->accel_priv;
+
+       DPRINTK("%s %s\n", __FUNCTION__, dev->nodename);
+
+       BUG_ON(vnic == NULL);
+
+       mutex_lock(&vnic->vnic_mutex);
+
+       /* Reject any attempts to connect. */
+       vnic->removing = 1;
+
+       /* Close any existing connection. */
+       if (vnic->frontend_state == XenbusStateConnected) {
+               vnic->frontend_state = XenbusStateClosing;
+               net_accel_update_state(dev, XenbusStateClosing);
+       }
+
+       mutex_unlock(&vnic->vnic_mutex);
+
+       DPRINTK("%s waiting for release of %s\n", __FUNCTION__, dev->nodename);
+
+       /*
+        * Wait for the xenbus watch to release the shared resources.
+        * This indicates that dom0 has made the transition
+        * Closing->Closed or that dom0 was in Closed or Init and no
+        * resources were mapped.
+        */
+       wait_event(vnic->state_wait_queue,
+                  !vnic->domU_state_is_setup);
+
+       /*
+        * Now we don't need this watch anymore it is safe to remove
+        * it (and so synchronise with it completing if outstanding)
+        */
+       DPRINTK("%s: unregistering xenbus accel watch\n",
+               __FUNCTION__);
+       unregister_xenbus_watch(&vnic->backend_accel_watch);
+       kfree(vnic->backend_accel_watch.node);
+
+       netfront_accel_vnic_dtor(vnic);
+
+       DPRINTK("%s done %s\n", __FUNCTION__, dev->nodename);
+
+       return 0;
+}
diff -r e4dd072db259 -r 651fc2abdd5d drivers/xen/sfc_netfront/ef_vi_falcon.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/xen/sfc_netfront/ef_vi_falcon.h   Mon Feb 18 10:30:33 2008 +0000
@@ -0,0 +1,172 @@
+/****************************************************************************
+ * Copyright 2002-2005: Level 5 Networks Inc.
+ * Copyright 2005-2008: Solarflare Communications Inc,
+ *                      9501 Jeronimo Road, Suite 250,
+ *                      Irvine, CA 92618, USA
+ *
+ * Maintained by Solarflare Communications
+ *  <linux-xen-drivers@xxxxxxxxxxxxxx>
+ *  <onload-dev@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ ****************************************************************************
+ */
+
+/*
+ * \author  slp
+ *  \brief  Falcon specific definitions
+ *   \date  2004/08
+ */
+
+#ifndef __EF_VI_FALCON_H__
+#define __EF_VI_FALCON_H__    
+
+#define EFHW_4K                0x00001000u
+#define EFHW_8K                0x00002000u
+
+/* include the autogenerated register definitions */
+
+#include "ef_vi_falcon_core.h"
+#include "ef_vi_falcon_desc.h"
+#include "ef_vi_falcon_event.h"
+
+
+/*----------------------------------------------------------------------------
+ *
+ * Helpers to turn bit shifts into dword shifts and check that the bit fields 
+ * haven't overflown the dword etc. Aim is to preserve consistency with the 
+ * autogenerated headers - once stable we could hard code.
+ *
+ *---------------------------------------------------------------------------*/
+
+/* mask constructors */
+#define __FALCON_MASK(WIDTH,T)  ((((T)1) << (WIDTH)) - 1)
+#define __EFVI_MASK32(WIDTH)  __FALCON_MASK((WIDTH),uint32_t)
+#define __EFVI_MASK64(WIDTH)  __FALCON_MASK((WIDTH),uint64_t)
+
+#define __EFVI_FALCON_MASKFIELD32(LBN, WIDTH)   ((uint32_t)  \
+                             (__EFVI_MASK32(WIDTH) << (LBN)))
+
+/* constructors for fields which span the first and second dwords */
+#define __LW(LBN) (32 - LBN)
+#define LOW(v, LBN, WIDTH)   ((uint32_t)  \
+                               (((v) & __EFVI_MASK64(__LW((LBN)))) << (LBN)))
+#define HIGH(v, LBN, WIDTH)  ((uint32_t)(((v) >> __LW((LBN))) & \
+                                       __EFVI_MASK64((WIDTH - __LW((LBN))))))
+/* constructors for fields within the second dword */
+#define __DW2(LBN)       ((LBN) - 32)
+
+/* constructors for fields which span the second and third dwords */
+#define __LW2(LBN) (64 - LBN)
+#define LOW2(v, LBN, WIDTH) ((uint32_t) \
+                       (((v) & __EFVI_MASK64(__LW2((LBN)))) << ((LBN) - 32)))
+#define HIGH2(v, LBN, WIDTH)  ((uint32_t) \
+             (((v) >> __LW2((LBN))) & __EFVI_MASK64((WIDTH - __LW2((LBN))))))
+
+/* constructors for fields within the third dword */
+#define __DW3(LBN)       ((LBN) - 64)
+
+                               
+/* constructors for fields which span the third and fourth dwords */
+#define __LW3(LBN) (96 - LBN)
+#define LOW3(v, LBN, WIDTH)   ((uint32_t)    \
+              (((v) & __EFVI_MASK64(__LW3((LBN)))) << ((LBN) - 64)))
+#define HIGH3(v, LBN, WIDTH)  ((unit32_t)    \
+             (((v) >> __LW3((LBN))) & __EFVI_MASK64((WIDTH - __LW3((LBN))))))
+
+/* constructors for fields within the fourth dword */
+#define __DW4(LBN)       ((LBN) - 96)
+
+/* checks that the autogenerated headers our consistent with our model */
+#define WIDTHCHCK(a, b) ef_assert((a) == (b))
+#define RANGECHCK(v, WIDTH) \
+                ef_assert(((uint64_t)(v) & ~(__EFVI_MASK64((WIDTH)))) == 0)
+
+/* fields within the first dword */
+#define DWCHCK(LBN, WIDTH) ef_assert(((LBN) >= 0) &&(((LBN)+(WIDTH)) <= 32))
+
+/* fields which span the first and second dwords */
+#define LWCHK(LBN, WIDTH)  ef_assert(WIDTH >= __LW(LBN))
+
+/*----------------------------------------------------------------------------
+ *
+ * Buffer virtual addresses (4K buffers) 
+ *
+ *---------------------------------------------------------------------------*/
+
+/* Form a buffer virtual address from buffer ID and offset.  If the offset
+** is larger than the buffer size, then the buffer indexed will be
+** calculated appropriately.  It is the responsibility of the caller to
+** ensure that they have valid buffers programmed at that address.
+*/
+#define EFVI_FALCON_VADDR_4K_S         (12)         
+#define EFVI_FALCON_VADDR_M       0xfffff              /* post shift mask  */
+
+
+#define EFVI_FALCON_BUFFER_4K_ADDR(id,off)      \
+  (((id) << EFVI_FALCON_VADDR_4K_S) + (off))
+
+#define EFVI_FALCON_BUFFER_4K_PAGE(vaddr)                       \
+  (((vaddr) >> EFVI_FALCON_VADDR_4K_S) & EFVI_FALCON_VADDR_M)
+
+#define EFVI_FALCON_BUFFER_4K_OFF(vaddr)                \
+  ((vaddr) & __EFVI_MASK32(EFVI_FALCON_VADDR_4K_S))
+
+
+/*----------------------------------------------------------------------------
+ *
+ * Masks
+ *
+ *---------------------------------------------------------------------------*/
+
+#define EFVI_FALCON_CLOCK_ASIC_HZ    (125000)
+#define EFVI_FALCON_CLOCK_FPGA_HZ    (62500)
+#define EFVI_FALCON_CLOCK_HZ         EFVI_FALCON_CLOCK_ASIC_HZ
+
+
+/*----------------------------------------------------------------------------
+ *
+ * Timers
+ *
+ *---------------------------------------------------------------------------*/
+
+/* Event-Queue Timer granularity - measured in us 
+   Given by: 4096 * 3 cycle * clock period */
+
+#define EFVI_FALCON_EVQTIMER_PERIOD_US   ((4096 * 3 * 1000) / 
EFVI_FALCON_CLOCK_HZ)
+
+/* mode bits */
+#define EFVI_FALCON_TIMER_MODE_DIS     0     /* disabled */
+#define EFVI_FALCON_TIMER_MODE_RUN     1     /* started counting right away */
+#define EFVI_FALCON_TIMER_MODE_HOLD    2     /* trigger mode (user queues) */
+
+#define EFVI_FALCON_EVQTIMER_HOLD     (EFVI_FALCON_TIMER_MODE_HOLD << 
TIMER_MODE_LBN)
+#define EFVI_FALCON_EVQTIMER_RUN      (EFVI_FALCON_TIMER_MODE_RUN  << 
TIMER_MODE_LBN)
+#define EFVI_FALCON_EVQTIMER_DISABLE  (EFVI_FALCON_TIMER_MODE_DIS  << 
TIMER_MODE_LBN) 
+
+
+/* ---- efhw_event_t helpers --- */
+
+#define EFVI_FALCON_EVENT_CODE(evp) \
+       ((evp)->u64 & EFVI_FALCON_EVENT_CODE_MASK)
+
+#define EFVI_FALCON_EVENT_SW_DATA_MASK    0x0000ffff
+
+#define __EFVI_FALCON_OPEN_MASK(WIDTH)  ((((uint64_t)1) << (WIDTH)) - 1)
+
+#define EFVI_FALCON_EVENT_CODE_MASK \
+           (__EFVI_FALCON_OPEN_MASK(EV_CODE_WIDTH) << EV_CODE_LBN)
+
+
+#endif  /* __EF_VI_FALCON_H__ */
diff -r e4dd072db259 -r 651fc2abdd5d 
drivers/xen/sfc_netfront/ef_vi_falcon_core.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/xen/sfc_netfront/ef_vi_falcon_core.h      Mon Feb 18 10:30:33 
2008 +0000
@@ -0,0 +1,1075 @@
+
+#define  EFVI_FALCON_EXTENDED_P_BAR 1
+
+//////////////---- Bus Interface Unit Registers C Header ----//////////////
+#define IOM_IND_ADR_REG_OFST 0x0 // IO-mapped indirect access address register
+  #define IOM_AUTO_ADR_INC_EN_LBN 16
+  #define IOM_AUTO_ADR_INC_EN_WIDTH 1
+  #define IOM_IND_ADR_LBN 0
+  #define IOM_IND_ADR_WIDTH 16
+#define IOM_IND_DAT_REG_OFST 0x4 // IO-mapped indirect access data register
+  #define IOM_IND_DAT_LBN 0
+  #define IOM_IND_DAT_WIDTH 32
+#define ADR_REGION_REG_KER_OFST 0x0 // Address region register
+#define ADR_REGION_REG_OFST 0x0 // Address region register
+  #define ADR_REGION3_LBN 96
+  #define ADR_REGION3_WIDTH 18
+  #define ADR_REGION2_LBN 64
+  #define ADR_REGION2_WIDTH 18
+  #define ADR_REGION1_LBN 32
+  #define ADR_REGION1_WIDTH 18
+  #define ADR_REGION0_LBN 0
+  #define ADR_REGION0_WIDTH 18
+#define INT_EN_REG_KER_OFST 0x10 // Kernel driver Interrupt enable register
+  #define KER_INT_CHAR_LBN 4
+  #define KER_INT_CHAR_WIDTH 1
+  #define KER_INT_KER_LBN 3
+  #define KER_INT_KER_WIDTH 1
+  #define ILL_ADR_ERR_INT_EN_KER_LBN 2
+  #define ILL_ADR_ERR_INT_EN_KER_WIDTH 1
+  #define SRM_PERR_INT_EN_KER_LBN 1
+  #define SRM_PERR_INT_EN_KER_WIDTH 1
+  #define DRV_INT_EN_KER_LBN 0
+  #define DRV_INT_EN_KER_WIDTH 1
+#define INT_EN_REG_CHAR_OFST 0x20 // Char Driver interrupt enable register
+  #define CHAR_INT_CHAR_LBN 4
+  #define CHAR_INT_CHAR_WIDTH 1
+  #define CHAR_INT_KER_LBN 3
+  #define CHAR_INT_KER_WIDTH 1
+  #define ILL_ADR_ERR_INT_EN_CHAR_LBN 2
+  #define ILL_ADR_ERR_INT_EN_CHAR_WIDTH 1
+  #define SRM_PERR_INT_EN_CHAR_LBN 1
+  #define SRM_PERR_INT_EN_CHAR_WIDTH 1
+  #define DRV_INT_EN_CHAR_LBN 0
+  #define DRV_INT_EN_CHAR_WIDTH 1
+#define INT_ADR_REG_KER_OFST 0x30 // Interrupt host address for Kernel driver
+  #define INT_ADR_KER_LBN 0
+  #define INT_ADR_KER_WIDTH 64
+  #define DRV_INT_KER_LBN 32
+  #define DRV_INT_KER_WIDTH 1
+  #define EV_FF_HALF_INT_KER_LBN 3
+  #define EV_FF_HALF_INT_KER_WIDTH 1
+  #define EV_FF_FULL_INT_KER_LBN 2
+  #define EV_FF_FULL_INT_KER_WIDTH 1
+  #define ILL_ADR_ERR_INT_KER_LBN 1
+  #define ILL_ADR_ERR_INT_KER_WIDTH 1
+  #define SRAM_PERR_INT_KER_LBN 0
+  #define SRAM_PERR_INT_KER_WIDTH 1
+#define INT_ADR_REG_CHAR_OFST 0x40 // Interrupt host address for Char driver
+  #define INT_ADR_CHAR_LBN 0
+  #define INT_ADR_CHAR_WIDTH 64
+  #define DRV_INT_CHAR_LBN 32
+  #define DRV_INT_CHAR_WIDTH 1
+  #define EV_FF_HALF_INT_CHAR_LBN 3
+  #define EV_FF_HALF_INT_CHAR_WIDTH 1
+  #define EV_FF_FULL_INT_CHAR_LBN 2
+  #define EV_FF_FULL_INT_CHAR_WIDTH 1
+  #define ILL_ADR_ERR_INT_CHAR_LBN 1
+  #define ILL_ADR_ERR_INT_CHAR_WIDTH 1
+  #define SRAM_PERR_INT_CHAR_LBN 0
+  #define SRAM_PERR_INT_CHAR_WIDTH 1
+#define INT_ISR0_B0_OFST 0x90 // B0 only
+#define INT_ISR1_B0_OFST 0xA0
+#define INT_ACK_REG_KER_A1_OFST 0x50 // Kernel interrupt acknowledge register
+  #define RESERVED_LBN 0
+  #define RESERVED_WIDTH 32
+#define INT_ACK_REG_CHAR_A1_OFST 0x60 // CHAR interrupt acknowledge register
+  #define RESERVED_LBN 0
+  #define RESERVED_WIDTH 32
+//////////////---- Global CSR Registers C Header ----//////////////
+#define STRAP_REG_KER_OFST 0x200 // ASIC strap status register
+#define STRAP_REG_OFST 0x200 // ASIC strap status register
+  #define ONCHIP_SRAM_LBN 16
+  #define ONCHIP_SRAM_WIDTH 0
+  #define STRAP_ISCSI_EN_LBN 3
+  #define STRAP_ISCSI_EN_WIDTH 1
+  #define STRAP_PINS_LBN 0
+  #define STRAP_PINS_WIDTH 3
+#define GPIO_CTL_REG_KER_OFST 0x210 // GPIO control register
+#define GPIO_CTL_REG_OFST 0x210 // GPIO control register
+  #define GPIO_OEN_LBN 24
+  #define GPIO_OEN_WIDTH 4
+  #define GPIO_OUT_LBN 16
+  #define GPIO_OUT_WIDTH 4
+  #define GPIO_IN_LBN 8
+  #define GPIO_IN_WIDTH 4
+  #define GPIO_PWRUP_VALUE_LBN 0
+  #define GPIO_PWRUP_VALUE_WIDTH 4
+#define GLB_CTL_REG_KER_OFST 0x220 // Global control register
+#define GLB_CTL_REG_OFST 0x220 // Global control register
+  #define SWRST_LBN 0
+  #define SWRST_WIDTH 1
+#define FATAL_INTR_REG_KER_OFST 0x230 // Fatal interrupt register for Kernel
+  #define PCI_BUSERR_INT_KER_EN_LBN 43
+  #define PCI_BUSERR_INT_KER_EN_WIDTH 1
+  #define SRAM_OOB_INT_KER_EN_LBN 42
+  #define SRAM_OOB_INT_KER_EN_WIDTH 1
+  #define BUFID_OOB_INT_KER_EN_LBN 41
+  #define BUFID_OOB_INT_KER_EN_WIDTH 1
+  #define MEM_PERR_INT_KER_EN_LBN 40
+  #define MEM_PERR_INT_KER_EN_WIDTH 1
+  #define RBUF_OWN_INT_KER_EN_LBN 39
+  #define RBUF_OWN_INT_KER_EN_WIDTH 1
+  #define TBUF_OWN_INT_KER_EN_LBN 38
+  #define TBUF_OWN_INT_KER_EN_WIDTH 1
+  #define RDESCQ_OWN_INT_KER_EN_LBN 37
+  #define RDESCQ_OWN_INT_KER_EN_WIDTH 1
+  #define TDESCQ_OWN_INT_KER_EN_LBN 36
+  #define TDESCQ_OWN_INT_KER_EN_WIDTH 1
+  #define EVQ_OWN_INT_KER_EN_LBN 35
+  #define EVQ_OWN_INT_KER_EN_WIDTH 1
+  #define EVFF_OFLO_INT_KER_EN_LBN 34
+  #define EVFF_OFLO_INT_KER_EN_WIDTH 1
+  #define ILL_ADR_INT_KER_EN_LBN 33
+  #define ILL_ADR_INT_KER_EN_WIDTH 1
+  #define SRM_PERR_INT_KER_EN_LBN 32
+  #define SRM_PERR_INT_KER_EN_WIDTH 1
+  #define PCI_BUSERR_INT_KER_LBN 11
+  #define PCI_BUSERR_INT_KER_WIDTH 1
+  #define SRAM_OOB_INT_KER_LBN 10
+  #define SRAM_OOB_INT_KER_WIDTH 1
+  #define BUFID_OOB_INT_KER_LBN 9
+  #define BUFID_OOB_INT_KER_WIDTH 1
+  #define MEM_PERR_INT_KER_LBN 8
+  #define MEM_PERR_INT_KER_WIDTH 1
+  #define RBUF_OWN_INT_KER_LBN 7
+  #define RBUF_OWN_INT_KER_WIDTH 1
+  #define TBUF_OWN_INT_KER_LBN 6
+  #define TBUF_OWN_INT_KER_WIDTH 1
+  #define RDESCQ_OWN_INT_KER_LBN 5
+  #define RDESCQ_OWN_INT_KER_WIDTH 1
+  #define TDESCQ_OWN_INT_KER_LBN 4
+  #define TDESCQ_OWN_INT_KER_WIDTH 1
+  #define EVQ_OWN_INT_KER_LBN 3
+  #define EVQ_OWN_INT_KER_WIDTH 1
+  #define EVFF_OFLO_INT_KER_LBN 2
+  #define EVFF_OFLO_INT_KER_WIDTH 1
+  #define ILL_ADR_INT_KER_LBN 1
+  #define ILL_ADR_INT_KER_WIDTH 1
+  #define SRM_PERR_INT_KER_LBN 0
+  #define SRM_PERR_INT_KER_WIDTH 1
+#define FATAL_INTR_REG_OFST 0x240 // Fatal interrupt register for Char
+  #define PCI_BUSERR_INT_CHAR_EN_LBN 43
+  #define PCI_BUSERR_INT_CHAR_EN_WIDTH 1
+  #define SRAM_OOB_INT_CHAR_EN_LBN 42
+  #define SRAM_OOB_INT_CHAR_EN_WIDTH 1
+  #define BUFID_OOB_INT_CHAR_EN_LBN 41
+  #define BUFID_OOB_INT_CHAR_EN_WIDTH 1
+  #define MEM_PERR_INT_CHAR_EN_LBN 40
+  #define MEM_PERR_INT_CHAR_EN_WIDTH 1
+  #define RBUF_OWN_INT_CHAR_EN_LBN 39
+  #define RBUF_OWN_INT_CHAR_EN_WIDTH 1
+  #define TBUF_OWN_INT_CHAR_EN_LBN 38
+  #define TBUF_OWN_INT_CHAR_EN_WIDTH 1
+  #define RDESCQ_OWN_INT_CHAR_EN_LBN 37
+  #define RDESCQ_OWN_INT_CHAR_EN_WIDTH 1
+  #define TDESCQ_OWN_INT_CHAR_EN_LBN 36
+  #define TDESCQ_OWN_INT_CHAR_EN_WIDTH 1
+  #define EVQ_OWN_INT_CHAR_EN_LBN 35
+  #define EVQ_OWN_INT_CHAR_EN_WIDTH 1
+  #define EVFF_OFLO_INT_CHAR_EN_LBN 34
+  #define EVFF_OFLO_INT_CHAR_EN_WIDTH 1
+  #define ILL_ADR_INT_CHAR_EN_LBN 33
+  #define ILL_ADR_INT_CHAR_EN_WIDTH 1
+  #define SRM_PERR_INT_CHAR_EN_LBN 32
+  #define SRM_PERR_INT_CHAR_EN_WIDTH 1
+  #define FATAL_INTR_REG_EN_BITS    0xffffffffffffffffULL
+  #define PCI_BUSERR_INT_CHAR_LBN 11
+  #define PCI_BUSERR_INT_CHAR_WIDTH 1
+  #define SRAM_OOB_INT_CHAR_LBN 10
+  #define SRAM_OOB_INT_CHAR_WIDTH 1
+  #define BUFID_OOB_INT_CHAR_LBN 9
+  #define BUFID_OOB_INT_CHAR_WIDTH 1
+  #define MEM_PERR_INT_CHAR_LBN 8
+  #define MEM_PERR_INT_CHAR_WIDTH 1
+  #define RBUF_OWN_INT_CHAR_LBN 7
+  #define RBUF_OWN_INT_CHAR_WIDTH 1
+  #define TBUF_OWN_INT_CHAR_LBN 6
+  #define TBUF_OWN_INT_CHAR_WIDTH 1
+  #define RDESCQ_OWN_INT_CHAR_LBN 5
+  #define RDESCQ_OWN_INT_CHAR_WIDTH 1
+  #define TDESCQ_OWN_INT_CHAR_LBN 4
+  #define TDESCQ_OWN_INT_CHAR_WIDTH 1
+  #define EVQ_OWN_INT_CHAR_LBN 3
+  #define EVQ_OWN_INT_CHAR_WIDTH 1
+  #define EVFF_OFLO_INT_CHAR_LBN 2
+  #define EVFF_OFLO_INT_CHAR_WIDTH 1
+  #define ILL_ADR_INT_CHAR_LBN 1
+  #define ILL_ADR_INT_CHAR_WIDTH 1
+  #define SRM_PERR_INT_CHAR_LBN 0
+  #define SRM_PERR_INT_CHAR_WIDTH 1
+#define DP_CTRL_REG_OFST 0x250 // Datapath control register
+  #define FLS_EVQ_ID_LBN 0
+  #define FLS_EVQ_ID_WIDTH 12
+#define MEM_STAT_REG_KER_OFST 0x260 // Memory status register
+#define MEM_STAT_REG_OFST 0x260 // Memory status register
+  #define MEM_PERR_VEC_LBN 53
+  #define MEM_PERR_VEC_WIDTH 38
+  #define MBIST_CORR_LBN 38
+  #define MBIST_CORR_WIDTH 15
+  #define MBIST_ERR_LBN 0
+  #define MBIST_ERR_WIDTH 38
+#define DEBUG_REG_KER_OFST 0x270 // Debug register
+#define DEBUG_REG_OFST 0x270 // Debug register
+  #define DEBUG_BLK_SEL2_LBN 47
+  #define DEBUG_BLK_SEL2_WIDTH 3
+  #define DEBUG_BLK_SEL1_LBN 44
+  #define DEBUG_BLK_SEL1_WIDTH 3
+  #define DEBUG_BLK_SEL0_LBN 41
+  #define DEBUG_BLK_SEL0_WIDTH 3
+  #define MISC_DEBUG_ADDR_LBN 36
+  #define MISC_DEBUG_ADDR_WIDTH 5
+  #define SERDES_DEBUG_ADDR_LBN 31
+  #define SERDES_DEBUG_ADDR_WIDTH 5
+  #define EM_DEBUG_ADDR_LBN 26
+  #define EM_DEBUG_ADDR_WIDTH 5
+  #define SR_DEBUG_ADDR_LBN 21
+  #define SR_DEBUG_ADDR_WIDTH 5
+  #define EV_DEBUG_ADDR_LBN 16
+  #define EV_DEBUG_ADDR_WIDTH 5
+  #define RX_DEBUG_ADDR_LBN 11
+  #define RX_DEBUG_ADDR_WIDTH 5
+  #define TX_DEBUG_ADDR_LBN 6
+  #define TX_DEBUG_ADDR_WIDTH 5
+  #define BIU_DEBUG_ADDR_LBN 1
+  #define BIU_DEBUG_ADDR_WIDTH 5
+  #define DEBUG_EN_LBN 0
+  #define DEBUG_EN_WIDTH 1
+#define DRIVER_REG0_KER_OFST 0x280 // Driver scratch register 0
+#define DRIVER_REG0_OFST 0x280 // Driver scratch register 0
+  #define DRIVER_DW0_LBN 0
+  #define DRIVER_DW0_WIDTH 32
+#define DRIVER_REG1_KER_OFST 0x290 // Driver scratch register 1
+#define DRIVER_REG1_OFST 0x290 // Driver scratch register 1
+  #define DRIVER_DW1_LBN 0
+  #define DRIVER_DW1_WIDTH 32
+#define DRIVER_REG2_KER_OFST 0x2A0 // Driver scratch register 2
+#define DRIVER_REG2_OFST 0x2A0 // Driver scratch register 2
+  #define DRIVER_DW2_LBN 0
+  #define DRIVER_DW2_WIDTH 32
+#define DRIVER_REG3_KER_OFST 0x2B0 // Driver scratch register 3
+#define DRIVER_REG3_OFST 0x2B0 // Driver scratch register 3
+  #define DRIVER_DW3_LBN 0
+  #define DRIVER_DW3_WIDTH 32
+#define DRIVER_REG4_KER_OFST 0x2C0 // Driver scratch register 4
+#define DRIVER_REG4_OFST 0x2C0 // Driver scratch register 4
+  #define DRIVER_DW4_LBN 0
+  #define DRIVER_DW4_WIDTH 32
+#define DRIVER_REG5_KER_OFST 0x2D0 // Driver scratch register 5
+#define DRIVER_REG5_OFST 0x2D0 // Driver scratch register 5
+  #define DRIVER_DW5_LBN 0
+  #define DRIVER_DW5_WIDTH 32
+#define DRIVER_REG6_KER_OFST 0x2E0 // Driver scratch register 6
+#define DRIVER_REG6_OFST 0x2E0 // Driver scratch register 6
+  #define DRIVER_DW6_LBN 0
+  #define DRIVER_DW6_WIDTH 32
+#define DRIVER_REG7_KER_OFST 0x2F0 // Driver scratch register 7
+#define DRIVER_REG7_OFST 0x2F0 // Driver scratch register 7
+  #define DRIVER_DW7_LBN 0
+  #define DRIVER_DW7_WIDTH 32
+#define ALTERA_BUILD_REG_OFST 0x300 // Altera build register
+#define ALTERA_BUILD_REG_OFST 0x300 // Altera build register
+  #define ALTERA_BUILD_VER_LBN 0
+  #define ALTERA_BUILD_VER_WIDTH 32
+
+/* so called CSR spare register 
+    - contains separate parity enable bits for the various internal memory 
blocks */
+#define MEM_PARITY_ERR_EN_REG_KER 0x310 
+#define MEM_PARITY_ALL_BLOCKS_EN_LBN 64
+#define MEM_PARITY_ALL_BLOCKS_EN_WIDTH 38
+#define MEM_PARITY_TX_DATA_EN_LBN   72
+#define MEM_PARITY_TX_DATA_EN_WIDTH 2
+
+//////////////---- Event & Timer Module Registers C Header ----//////////////
+
+#if EFVI_FALCON_EXTENDED_P_BAR
+#define EVQ_RPTR_REG_KER_OFST 0x11B00 // Event queue read pointer register
+#else
+#define EVQ_RPTR_REG_KER_OFST 0x1B00 // Event queue read pointer register
+#endif
+
+#define EVQ_RPTR_REG_OFST 0xFA0000 // Event queue read pointer register array.
+  #define EVQ_RPTR_LBN 0
+  #define EVQ_RPTR_WIDTH 15
+
+#if EFVI_FALCON_EXTENDED_P_BAR
+#define EVQ_PTR_TBL_KER_OFST 0x11A00 // Event queue pointer table for kernel 
access
+#else
+#define EVQ_PTR_TBL_KER_OFST 0x1A00 // Event queue pointer table for kernel 
access
+#endif
+
+#define EVQ_PTR_TBL_CHAR_OFST 0xF60000 // Event queue pointer table for char 
direct access
+  #define EVQ_WKUP_OR_INT_EN_LBN 39
+  #define EVQ_WKUP_OR_INT_EN_WIDTH 1
+  #define EVQ_NXT_WPTR_LBN 24
+  #define EVQ_NXT_WPTR_WIDTH 15
+  #define EVQ_EN_LBN 23
+  #define EVQ_EN_WIDTH 1
+  #define EVQ_SIZE_LBN 20
+  #define EVQ_SIZE_WIDTH 3
+  #define EVQ_BUF_BASE_ID_LBN 0
+  #define EVQ_BUF_BASE_ID_WIDTH 20
+#define TIMER_CMD_REG_KER_OFST 0x420 // Timer table for kernel access. 
Page-mapped
+#define TIMER_CMD_REG_PAGE4_OFST 0x8420 // Timer table for user-level access. 
Page-mapped. For lowest 1K queues.
+#define TIMER_CMD_REG_PAGE123K_OFST 0x1000420 // Timer table for user-level 
access. Page-mapped. For upper 3K queues.
+#define TIMER_TBL_OFST 0xF70000 // Timer table for char driver direct access
+  #define TIMER_MODE_LBN 12
+  #define TIMER_MODE_WIDTH 2
+  #define TIMER_VAL_LBN 0
+  #define TIMER_VAL_WIDTH 12
+  #define TIMER_MODE_INT_HLDOFF 2
+  #define EVQ_BUF_SIZE_LBN 0
+  #define EVQ_BUF_SIZE_WIDTH 1
+#define DRV_EV_REG_KER_OFST 0x440 // Driver generated event register
+#define DRV_EV_REG_OFST 0x440 // Driver generated event register
+  #define DRV_EV_QID_LBN 64
+  #define DRV_EV_QID_WIDTH 12
+  #define DRV_EV_DATA_LBN 0
+  #define DRV_EV_DATA_WIDTH 64
+#define EVQ_CTL_REG_KER_OFST 0x450 // Event queue control register
+#define EVQ_CTL_REG_OFST 0x450 // Event queue control register
+  #define RX_EVQ_WAKEUP_MASK_B0_LBN 15
+  #define RX_EVQ_WAKEUP_MASK_B0_WIDTH 6
+  #define EVQ_OWNERR_CTL_LBN 14
+  #define EVQ_OWNERR_CTL_WIDTH 1
+  #define EVQ_FIFO_AF_TH_LBN 8
+  #define EVQ_FIFO_AF_TH_WIDTH 6
+  #define EVQ_FIFO_NOTAF_TH_LBN 0
+  #define EVQ_FIFO_NOTAF_TH_WIDTH 6
+//////////////---- SRAM Module Registers C Header ----//////////////
+#define BUF_TBL_CFG_REG_KER_OFST 0x600 // Buffer table configuration register
+#define BUF_TBL_CFG_REG_OFST 0x600 // Buffer table configuration register
+  #define BUF_TBL_MODE_LBN 3
+  #define BUF_TBL_MODE_WIDTH 1
+#define SRM_RX_DC_CFG_REG_KER_OFST 0x610 // SRAM receive descriptor cache 
configuration register
+#define SRM_RX_DC_CFG_REG_OFST 0x610 // SRAM receive descriptor cache 
configuration register
+  #define SRM_RX_DC_BASE_ADR_LBN 0
+  #define SRM_RX_DC_BASE_ADR_WIDTH 21
+#define SRM_TX_DC_CFG_REG_KER_OFST 0x620 // SRAM transmit descriptor cache 
configuration register
+#define SRM_TX_DC_CFG_REG_OFST 0x620 // SRAM transmit descriptor cache 
configuration register
+  #define SRM_TX_DC_BASE_ADR_LBN 0
+  #define SRM_TX_DC_BASE_ADR_WIDTH 21
+#define SRM_CFG_REG_KER_OFST 0x630 // SRAM configuration register
+#define SRM_CFG_REG_OFST 0x630 // SRAM configuration register
+  #define SRAM_OOB_ADR_INTEN_LBN 5
+  #define SRAM_OOB_ADR_INTEN_WIDTH 1
+  #define SRAM_OOB_BUF_INTEN_LBN 4
+  #define SRAM_OOB_BUF_INTEN_WIDTH 1
+  #define SRAM_BT_INIT_EN_LBN 3
+  #define SRAM_BT_INIT_EN_WIDTH 1
+  #define SRM_NUM_BANK_LBN 2
+  #define SRM_NUM_BANK_WIDTH 1
+  #define SRM_BANK_SIZE_LBN 0
+  #define SRM_BANK_SIZE_WIDTH 2
+#define BUF_TBL_UPD_REG_KER_OFST 0x650 // Buffer table update register
+#define BUF_TBL_UPD_REG_OFST 0x650 // Buffer table update register
+  #define BUF_UPD_CMD_LBN 63
+  #define BUF_UPD_CMD_WIDTH 1
+  #define BUF_CLR_CMD_LBN 62
+  #define BUF_CLR_CMD_WIDTH 1
+  #define BUF_CLR_END_ID_LBN 32
+  #define BUF_CLR_END_ID_WIDTH 20
+  #define BUF_CLR_START_ID_LBN 0
+  #define BUF_CLR_START_ID_WIDTH 20
+#define SRM_UPD_EVQ_REG_KER_OFST 0x660 // Buffer table update register
+#define SRM_UPD_EVQ_REG_OFST 0x660 // Buffer table update register
+  #define SRM_UPD_EVQ_ID_LBN 0
+  #define SRM_UPD_EVQ_ID_WIDTH 12
+#define SRAM_PARITY_REG_KER_OFST 0x670 // SRAM parity register.
+#define SRAM_PARITY_REG_OFST 0x670 // SRAM parity register.
+  #define FORCE_SRAM_PERR_LBN 0
+  #define FORCE_SRAM_PERR_WIDTH 1
+
+#if EFVI_FALCON_EXTENDED_P_BAR
+#define BUF_HALF_TBL_KER_OFST 0x18000 // Buffer table in half buffer table 
mode direct access by kernel driver
+#else
+#define BUF_HALF_TBL_KER_OFST 0x8000 // Buffer table in half buffer table mode 
direct access by kernel driver
+#endif
+
+
+#define BUF_HALF_TBL_OFST 0x800000 // Buffer table in half buffer table mode 
direct access by char driver
+  #define BUF_ADR_HBUF_ODD_LBN 44
+  #define BUF_ADR_HBUF_ODD_WIDTH 20
+  #define BUF_OWNER_ID_HBUF_ODD_LBN 32
+  #define BUF_OWNER_ID_HBUF_ODD_WIDTH 12
+  #define BUF_ADR_HBUF_EVEN_LBN 12
+  #define BUF_ADR_HBUF_EVEN_WIDTH 20
+  #define BUF_OWNER_ID_HBUF_EVEN_LBN 0
+  #define BUF_OWNER_ID_HBUF_EVEN_WIDTH 12
+
+
+#if EFVI_FALCON_EXTENDED_P_BAR
+#define BUF_FULL_TBL_KER_OFST 0x18000 // Buffer table in full buffer table 
mode direct access by kernel driver
+#else
+#define BUF_FULL_TBL_KER_OFST 0x8000 // Buffer table in full buffer table mode 
direct access by kernel driver
+#endif
+
+
+
+
+#define BUF_FULL_TBL_OFST 0x800000 // Buffer table in full buffer table mode 
direct access by char driver
+  #define IP_DAT_BUF_SIZE_LBN 50
+  #define IP_DAT_BUF_SIZE_WIDTH 1
+  #define BUF_ADR_REGION_LBN 48
+  #define BUF_ADR_REGION_WIDTH 2
+  #define BUF_ADR_FBUF_LBN 14
+  #define BUF_ADR_FBUF_WIDTH 34
+  #define BUF_OWNER_ID_FBUF_LBN 0
+  #define BUF_OWNER_ID_FBUF_WIDTH 14
+#define SRM_DBG_REG_OFST 0x3000000 // SRAM debug access
+  #define SRM_DBG_LBN 0
+  #define SRM_DBG_WIDTH 64
+//////////////---- RX Datapath Registers C Header ----//////////////
+
+#define RX_CFG_REG_KER_OFST 0x800 // Receive configuration register
+#define RX_CFG_REG_OFST 0x800 // Receive configuration register
+
+#if !defined(FALCON_64K_RXFIFO) && !defined(FALCON_PRE_02020029)
+# if !defined(FALCON_128K_RXFIFO)
+#  define FALCON_128K_RXFIFO
+# endif
+#endif
+
+#if defined(FALCON_128K_RXFIFO)
+
+/* new for B0 */
+  #define RX_TOEP_TCP_SUPPRESS_B0_LBN 48
+  #define RX_TOEP_TCP_SUPPRESS_B0_WIDTH 1
+  #define RX_INGR_EN_B0_LBN 47
+  #define RX_INGR_EN_B0_WIDTH 1
+  #define RX_TOEP_IPV4_B0_LBN 46
+  #define RX_TOEP_IPV4_B0_WIDTH 1
+  #define RX_HASH_ALG_B0_LBN 45
+  #define RX_HASH_ALG_B0_WIDTH 1
+  #define RX_HASH_INSERT_HDR_B0_LBN 44
+  #define RX_HASH_INSERT_HDR_B0_WIDTH 1
+/* moved for B0 */
+  #define RX_DESC_PUSH_EN_B0_LBN 43
+  #define RX_DESC_PUSH_EN_B0_WIDTH 1
+  #define RX_RDW_PATCH_EN_LBN 42 /* Non head of line blocking */
+  #define RX_RDW_PATCH_EN_WIDTH 1
+  #define RX_PCI_BURST_SIZE_B0_LBN 39
+  #define RX_PCI_BURST_SIZE_B0_WIDTH 3
+  #define RX_OWNERR_CTL_B0_LBN 38
+  #define RX_OWNERR_CTL_B0_WIDTH 1
+  #define RX_XON_TX_TH_B0_LBN 33 
+  #define RX_XON_TX_TH_B0_WIDTH 5
+  #define RX_XOFF_TX_TH_B0_LBN 28 
+  #define RX_XOFF_TX_TH_B0_WIDTH 5
+  #define RX_USR_BUF_SIZE_B0_LBN 19
+  #define RX_USR_BUF_SIZE_B0_WIDTH 9
+  #define RX_XON_MAC_TH_B0_LBN 10
+  #define RX_XON_MAC_TH_B0_WIDTH 9
+  #define RX_XOFF_MAC_TH_B0_LBN 1
+  #define RX_XOFF_MAC_TH_B0_WIDTH 9
+  #define RX_XOFF_MAC_EN_B0_LBN 0
+  #define RX_XOFF_MAC_EN_B0_WIDTH 1
+
+#elif !defined(FALCON_PRE_02020029)
+/* new for B0 */
+  #define RX_TOEP_TCP_SUPPRESS_B0_LBN 46
+  #define RX_TOEP_TCP_SUPPRESS_B0_WIDTH 1
+  #define RX_INGR_EN_B0_LBN 45
+  #define RX_INGR_EN_B0_WIDTH 1
+  #define RX_TOEP_IPV4_B0_LBN 44
+  #define RX_TOEP_IPV4_B0_WIDTH 1
+  #define RX_HASH_ALG_B0_LBN 43
+  #define RX_HASH_ALG_B0_WIDTH 41
+  #define RX_HASH_INSERT_HDR_B0_LBN 42
+  #define RX_HASH_INSERT_HDR_B0_WIDTH 1
+/* moved for B0 */
+  #define RX_DESC_PUSH_EN_B0_LBN 41
+  #define RX_DESC_PUSH_EN_B0_WIDTH 1
+  #define RX_PCI_BURST_SIZE_B0_LBN 37
+  #define RX_PCI_BURST_SIZE_B0_WIDTH 3
+  #define RX_OWNERR_CTL_B0_LBN 36
+  #define RX_OWNERR_CTL_B0_WIDTH 1
+  #define RX_XON_TX_TH_B0_LBN 31
+  #define RX_XON_TX_TH_B0_WIDTH 5
+  #define RX_XOFF_TX_TH_B0_LBN 26
+  #define RX_XOFF_TX_TH_B0_WIDTH 5
+  #define RX_USR_BUF_SIZE_B0_LBN 17
+  #define RX_USR_BUF_SIZE_B0_WIDTH 9
+  #define RX_XON_MAC_TH_B0_LBN 9
+  #define RX_XON_MAC_TH_B0_WIDTH 8
+  #define RX_XOFF_MAC_TH_B0_LBN 1
+  #define RX_XOFF_MAC_TH_B0_WIDTH 8
+  #define RX_XOFF_MAC_EN_B0_LBN 0
+  #define RX_XOFF_MAC_EN_B0_WIDTH 1
+
+#else
+/* new for B0 */
+  #define RX_TOEP_TCP_SUPPRESS_B0_LBN 44
+  #define RX_TOEP_TCP_SUPPRESS_B0_WIDTH 1
+  #define RX_INGR_EN_B0_LBN 43
+  #define RX_INGR_EN_B0_WIDTH 1
+  #define RX_TOEP_IPV4_B0_LBN 42
+  #define RX_TOEP_IPV4_B0_WIDTH 1
+  #define RX_HASH_ALG_B0_LBN 41
+  #define RX_HASH_ALG_B0_WIDTH 41
+  #define RX_HASH_INSERT_HDR_B0_LBN 40
+  #define RX_HASH_INSERT_HDR_B0_WIDTH 1
+/* moved for B0 */
+  #define RX_DESC_PUSH_EN_B0_LBN 35
+  #define RX_DESC_PUSH_EN_B0_WIDTH 1
+  #define RX_PCI_BURST_SIZE_B0_LBN 35
+  #define RX_PCI_BURST_SIZE_B0_WIDTH 2
+  #define RX_OWNERR_CTL_B0_LBN 34
+  #define RX_OWNERR_CTL_B0_WIDTH 1
+  #define RX_XON_TX_TH_B0_LBN 29
+  #define RX_XON_TX_TH_B0_WIDTH 5
+  #define RX_XOFF_TX_TH_B0_LBN 24
+  #define RX_XOFF_TX_TH_B0_WIDTH 5
+  #define RX_USR_BUF_SIZE_B0_LBN 15
+  #define RX_USR_BUF_SIZE_B0_WIDTH 9
+  #define RX_XON_MAC_TH_B0_LBN 8
+  #define RX_XON_MAC_TH_B0_WIDTH 7
+  #define RX_XOFF_MAC_TH_B0_LBN 1
+  #define RX_XOFF_MAC_TH_B0_WIDTH 7
+  #define RX_XOFF_MAC_EN_B0_LBN 0
+  #define RX_XOFF_MAC_EN_B0_WIDTH 1
+
+#endif
+
+/* A0/A1 */
+  #define RX_PUSH_EN_A1_LBN 35
+  #define RX_PUSH_EN_A1_WIDTH 1
+  #define RX_PCI_BURST_SIZE_A1_LBN 31
+  #define RX_PCI_BURST_SIZE_A1_WIDTH 3
+  #define RX_OWNERR_CTL_A1_LBN 30
+  #define RX_OWNERR_CTL_A1_WIDTH 1
+  #define RX_XON_TX_TH_A1_LBN 25
+  #define RX_XON_TX_TH_A1_WIDTH 5
+  #define RX_XOFF_TX_TH_A1_LBN 20
+  #define RX_XOFF_TX_TH_A1_WIDTH 5
+  #define RX_USR_BUF_SIZE_A1_LBN 11
+  #define RX_USR_BUF_SIZE_A1_WIDTH 9
+  #define RX_XON_MAC_TH_A1_LBN 6
+  #define RX_XON_MAC_TH_A1_WIDTH 5
+  #define RX_XOFF_MAC_TH_A1_LBN 1
+  #define RX_XOFF_MAC_TH_A1_WIDTH 5
+  #define RX_XOFF_MAC_EN_A1_LBN 0
+  #define RX_XOFF_MAC_EN_A1_WIDTH 1
+
+#define RX_FILTER_CTL_REG_OFST 0x810 // Receive filter control registers
+  #define SCATTER_ENBL_NO_MATCH_Q_B0_LBN 40
+  #define SCATTER_ENBL_NO_MATCH_Q_B0_WIDTH 1
+  #define UDP_FULL_SRCH_LIMIT_LBN 32
+  #define UDP_FULL_SRCH_LIMIT_WIDTH 8
+  #define NUM_KER_LBN 24
+  #define NUM_KER_WIDTH 2
+  #define UDP_WILD_SRCH_LIMIT_LBN 16
+  #define UDP_WILD_SRCH_LIMIT_WIDTH 8
+  #define TCP_WILD_SRCH_LIMIT_LBN 8
+  #define TCP_WILD_SRCH_LIMIT_WIDTH 8
+  #define TCP_FULL_SRCH_LIMIT_LBN 0
+  #define TCP_FULL_SRCH_LIMIT_WIDTH 8
+#define RX_FLUSH_DESCQ_REG_KER_OFST 0x820 // Receive flush descriptor queue 
register
+#define RX_FLUSH_DESCQ_REG_OFST 0x820 // Receive flush descriptor queue 
register
+  #define RX_FLUSH_DESCQ_CMD_LBN 24
+  #define RX_FLUSH_DESCQ_CMD_WIDTH 1
+  #define RX_FLUSH_EVQ_ID_LBN 12
+  #define RX_FLUSH_EVQ_ID_WIDTH 12
+  #define RX_FLUSH_DESCQ_LBN 0
+  #define RX_FLUSH_DESCQ_WIDTH 12
+#define RX_DESC_UPD_REG_KER_OFST 0x830 // Kernel  receive descriptor update 
register. Page-mapped
+#define RX_DESC_UPD_REG_PAGE4_OFST 0x8830 // Char & user receive descriptor 
update register. Page-mapped. For lowest 1K queues.
+#define RX_DESC_UPD_REG_PAGE123K_OFST 0x1000830 // Char & user receive 
descriptor update register. Page-mapped. For upper 3K queues.
+  #define RX_DESC_WPTR_LBN 96
+  #define RX_DESC_WPTR_WIDTH 12
+  #define RX_DESC_PUSH_CMD_LBN 95
+  #define RX_DESC_PUSH_CMD_WIDTH 1
+  #define RX_DESC_LBN 0
+  #define RX_DESC_WIDTH 64
+  #define RX_KER_DESC_LBN 0
+  #define RX_KER_DESC_WIDTH 64
+  #define RX_USR_DESC_LBN 0
+  #define RX_USR_DESC_WIDTH 32
+#define RX_DC_CFG_REG_KER_OFST 0x840 // Receive descriptor cache configuration 
register
+#define RX_DC_CFG_REG_OFST 0x840 // Receive descriptor cache configuration 
register
+  #define RX_DC_SIZE_LBN 0
+  #define RX_DC_SIZE_WIDTH 2
+#define RX_DC_PF_WM_REG_KER_OFST 0x850 // Receive descriptor cache pre-fetch 
watermark register
+#define RX_DC_PF_WM_REG_OFST 0x850 // Receive descriptor cache pre-fetch 
watermark register
+  #define RX_DC_PF_LWM_LO_LBN 0
+  #define RX_DC_PF_LWM_LO_WIDTH 6
+
+#define RX_RSS_TKEY_B0_OFST 0x860 // RSS Toeplitz hash key (B0 only)
+
+#define RX_NODESC_DROP_REG 0x880
+  #define RX_NODESC_DROP_CNT_LBN 0
+  #define RX_NODESC_DROP_CNT_WIDTH 16
+
+#define XM_TX_CFG_REG_OFST 0x1230
+  #define XM_AUTO_PAD_LBN 5
+  #define XM_AUTO_PAD_WIDTH 1
+
+#define RX_FILTER_TBL0_OFST 0xF00000 // Receive filter table - even entries
+  #define RSS_EN_0_B0_LBN 110
+  #define RSS_EN_0_B0_WIDTH 1
+  #define SCATTER_EN_0_B0_LBN 109
+  #define SCATTER_EN_0_B0_WIDTH 1
+  #define TCP_UDP_0_LBN 108
+  #define TCP_UDP_0_WIDTH 1
+  #define RXQ_ID_0_LBN 96
+  #define RXQ_ID_0_WIDTH 12
+  #define DEST_IP_0_LBN 64
+  #define DEST_IP_0_WIDTH 32
+  #define DEST_PORT_TCP_0_LBN 48
+  #define DEST_PORT_TCP_0_WIDTH 16
+  #define SRC_IP_0_LBN 16
+  #define SRC_IP_0_WIDTH 32
+  #define SRC_TCP_DEST_UDP_0_LBN 0
+  #define SRC_TCP_DEST_UDP_0_WIDTH 16
+#define RX_FILTER_TBL1_OFST 0xF00010 // Receive filter table - odd entries
+  #define RSS_EN_1_B0_LBN 110
+  #define RSS_EN_1_B0_WIDTH 1
+  #define SCATTER_EN_1_B0_LBN 109
+  #define SCATTER_EN_1_B0_WIDTH 1
+  #define TCP_UDP_1_LBN 108
+  #define TCP_UDP_1_WIDTH 1
+  #define RXQ_ID_1_LBN 96
+  #define RXQ_ID_1_WIDTH 12
+  #define DEST_IP_1_LBN 64
+  #define DEST_IP_1_WIDTH 32
+  #define DEST_PORT_TCP_1_LBN 48
+  #define DEST_PORT_TCP_1_WIDTH 16
+  #define SRC_IP_1_LBN 16
+  #define SRC_IP_1_WIDTH 32
+  #define SRC_TCP_DEST_UDP_1_LBN 0
+  #define SRC_TCP_DEST_UDP_1_WIDTH 16
+
+#if EFVI_FALCON_EXTENDED_P_BAR
+#define RX_DESC_PTR_TBL_KER_OFST 0x11800 // Receive descriptor pointer kernel 
access
+#else
+#define RX_DESC_PTR_TBL_KER_OFST 0x1800 // Receive descriptor pointer kernel 
access
+#endif
+
+
+#define RX_DESC_PTR_TBL_OFST 0xF40000 // Receive descriptor pointer table
+  #define RX_ISCSI_DDIG_EN_LBN 88
+  #define RX_ISCSI_DDIG_EN_WIDTH 1
+  #define RX_ISCSI_HDIG_EN_LBN 87
+  #define RX_ISCSI_HDIG_EN_WIDTH 1
+  #define RX_DESC_PREF_ACT_LBN 86
+  #define RX_DESC_PREF_ACT_WIDTH 1
+  #define RX_DC_HW_RPTR_LBN 80
+  #define RX_DC_HW_RPTR_WIDTH 6
+  #define RX_DESCQ_HW_RPTR_LBN 68
+  #define RX_DESCQ_HW_RPTR_WIDTH 12
+  #define RX_DESCQ_SW_WPTR_LBN 56
+  #define RX_DESCQ_SW_WPTR_WIDTH 12
+  #define RX_DESCQ_BUF_BASE_ID_LBN 36
+  #define RX_DESCQ_BUF_BASE_ID_WIDTH 20
+  #define RX_DESCQ_EVQ_ID_LBN 24
+  #define RX_DESCQ_EVQ_ID_WIDTH 12
+  #define RX_DESCQ_OWNER_ID_LBN 10
+  #define RX_DESCQ_OWNER_ID_WIDTH 14
+  #define RX_DESCQ_LABEL_LBN 5
+  #define RX_DESCQ_LABEL_WIDTH 5
+  #define RX_DESCQ_SIZE_LBN 3
+  #define RX_DESCQ_SIZE_WIDTH 2
+  #define RX_DESCQ_TYPE_LBN 2
+  #define RX_DESCQ_TYPE_WIDTH 1
+  #define RX_DESCQ_JUMBO_LBN 1
+  #define RX_DESCQ_JUMBO_WIDTH 1
+  #define RX_DESCQ_EN_LBN 0
+  #define RX_DESCQ_EN_WIDTH 1
+
+
+#define RX_RSS_INDIR_TBL_B0_OFST 0xFB0000 // RSS indirection table (B0 only)
+  #define RX_RSS_INDIR_ENT_B0_LBN 0
+  #define RX_RSS_INDIR_ENT_B0_WIDTH 6
+
+//////////////---- TX Datapath Registers C Header ----//////////////
+#define TX_FLUSH_DESCQ_REG_KER_OFST 0xA00 // Transmit flush descriptor queue 
register
+#define TX_FLUSH_DESCQ_REG_OFST 0xA00 // Transmit flush descriptor queue 
register
+  #define TX_FLUSH_DESCQ_CMD_LBN 12
+  #define TX_FLUSH_DESCQ_CMD_WIDTH 1
+  #define TX_FLUSH_DESCQ_LBN 0
+  #define TX_FLUSH_DESCQ_WIDTH 12
+#define TX_DESC_UPD_REG_KER_OFST 0xA10 // Kernel transmit descriptor update 
register. Page-mapped
+#define TX_DESC_UPD_REG_PAGE4_OFST 0x8A10 // Char & user transmit descriptor 
update register. Page-mapped
+#define TX_DESC_UPD_REG_PAGE123K_OFST 0x1000A10 // Char & user transmit 
descriptor update register. Page-mapped
+  #define TX_DESC_WPTR_LBN 96
+  #define TX_DESC_WPTR_WIDTH 12
+  #define TX_DESC_PUSH_CMD_LBN 95
+  #define TX_DESC_PUSH_CMD_WIDTH 1
+  #define TX_DESC_LBN 0
+  #define TX_DESC_WIDTH 95
+  #define TX_KER_DESC_LBN 0
+  #define TX_KER_DESC_WIDTH 64
+  #define TX_USR_DESC_LBN 0
+  #define TX_USR_DESC_WIDTH 64
+#define TX_DC_CFG_REG_KER_OFST 0xA20 // Transmit descriptor cache 
configuration register
+#define TX_DC_CFG_REG_OFST 0xA20 // Transmit descriptor cache configuration 
register
+  #define TX_DC_SIZE_LBN 0
+  #define TX_DC_SIZE_WIDTH 2
+
+#if EFVI_FALCON_EXTENDED_P_BAR
+#define TX_DESC_PTR_TBL_KER_OFST 0x11900 // Transmit descriptor pointer.
+#else
+#define TX_DESC_PTR_TBL_KER_OFST 0x1900 // Transmit descriptor pointer.
+#endif
+
+
+#define TX_DESC_PTR_TBL_OFST 0xF50000 // Transmit descriptor pointer
+  #define TX_NON_IP_DROP_DIS_B0_LBN 91
+  #define TX_NON_IP_DROP_DIS_B0_WIDTH 1
+  #define TX_IP_CHKSM_DIS_B0_LBN 90
+  #define TX_IP_CHKSM_DIS_B0_WIDTH 1
+  #define TX_TCP_CHKSM_DIS_B0_LBN 89
+  #define TX_TCP_CHKSM_DIS_B0_WIDTH 1
+  #define TX_DESCQ_EN_LBN 88
+  #define TX_DESCQ_EN_WIDTH 1
+  #define TX_ISCSI_DDIG_EN_LBN 87
+  #define TX_ISCSI_DDIG_EN_WIDTH 1
+  #define TX_ISCSI_HDIG_EN_LBN 86
+  #define TX_ISCSI_HDIG_EN_WIDTH 1
+  #define TX_DC_HW_RPTR_LBN 80
+  #define TX_DC_HW_RPTR_WIDTH 6
+  #define TX_DESCQ_HW_RPTR_LBN 68
+  #define TX_DESCQ_HW_RPTR_WIDTH 12
+  #define TX_DESCQ_SW_WPTR_LBN 56
+  #define TX_DESCQ_SW_WPTR_WIDTH 12
+  #define TX_DESCQ_BUF_BASE_ID_LBN 36
+  #define TX_DESCQ_BUF_BASE_ID_WIDTH 20
+  #define TX_DESCQ_EVQ_ID_LBN 24
+  #define TX_DESCQ_EVQ_ID_WIDTH 12
+  #define TX_DESCQ_OWNER_ID_LBN 10
+  #define TX_DESCQ_OWNER_ID_WIDTH 14
+  #define TX_DESCQ_LABEL_LBN 5
+  #define TX_DESCQ_LABEL_WIDTH 5
+  #define TX_DESCQ_SIZE_LBN 3
+  #define TX_DESCQ_SIZE_WIDTH 2
+  #define TX_DESCQ_TYPE_LBN 1
+  #define TX_DESCQ_TYPE_WIDTH 2
+  #define TX_DESCQ_FLUSH_LBN 0
+  #define TX_DESCQ_FLUSH_WIDTH 1
+#define TX_CFG_REG_KER_OFST 0xA50 // Transmit configuration register
+#define TX_CFG_REG_OFST 0xA50 // Transmit configuration register
+  #define TX_IP_ID_P1_OFS_LBN 32
+  #define TX_IP_ID_P1_OFS_WIDTH 15
+  #define TX_IP_ID_P0_OFS_LBN 16
+  #define TX_IP_ID_P0_OFS_WIDTH 15
+  #define TX_TURBO_EN_LBN 3
+  #define TX_TURBO_EN_WIDTH 1 
+  #define TX_OWNERR_CTL_LBN 2
+  #define TX_OWNERR_CTL_WIDTH 2
+  #define TX_NON_IP_DROP_DIS_LBN 1
+  #define TX_NON_IP_DROP_DIS_WIDTH 1
+  #define TX_IP_ID_REP_EN_LBN 0
+  #define TX_IP_ID_REP_EN_WIDTH 1
+#define TX_RESERVED_REG_KER_OFST 0xA80 // Transmit configuration register
+#define TX_RESERVED_REG_OFST 0xA80 // Transmit configuration register
+  #define TX_CSR_PUSH_EN_LBN 89
+  #define TX_CSR_PUSH_EN_WIDTH 1
+  #define TX_RX_SPACER_LBN 64
+  #define TX_RX_SPACER_WIDTH 8
+  #define TX_SW_EV_EN_LBN 59
+  #define TX_SW_EV_EN_WIDTH 1
+  #define TX_RX_SPACER_EN_LBN 57
+  #define TX_RX_SPACER_EN_WIDTH 1
+  #define TX_CSR_PREF_WD_TMR_LBN 24
+  #define TX_CSR_PREF_WD_TMR_WIDTH 16
+  #define TX_CSR_ONLY1TAG_LBN 21
+  #define TX_CSR_ONLY1TAG_WIDTH 1
+  #define TX_PREF_THRESHOLD_LBN 19
+  #define TX_PREF_THRESHOLD_WIDTH 2
+  #define TX_ONE_PKT_PER_Q_LBN 18
+  #define TX_ONE_PKT_PER_Q_WIDTH 1
+  #define TX_DIS_NON_IP_EV_LBN 17
+  #define TX_DIS_NON_IP_EV_WIDTH 1
+  #define TX_DMA_SPACER_LBN 8
+  #define TX_DMA_SPACER_WIDTH 8
+  #define TX_FLUSH_MIN_LEN_EN_B0_LBN 7
+  #define TX_FLUSH_MIN_LEN_EN_B0_WIDTH 1
+  #define TX_TCP_DIS_A1_LBN 7
+  #define TX_TCP_DIS_A1_WIDTH 1
+  #define TX_IP_DIS_A1_LBN 6
+  #define TX_IP_DIS_A1_WIDTH 1
+  #define TX_MAX_CPL_LBN 2
+  #define TX_MAX_CPL_WIDTH 2
+  #define TX_MAX_PREF_LBN 0
+  #define TX_MAX_PREF_WIDTH 2
+#define TX_VLAN_REG_OFST 0xAE0 // Transmit VLAN tag register
+  #define TX_VLAN_EN_LBN 127
+  #define TX_VLAN_EN_WIDTH 1
+  #define TX_VLAN7_PORT1_EN_LBN 125
+  #define TX_VLAN7_PORT1_EN_WIDTH 1
+  #define TX_VLAN7_PORT0_EN_LBN 124
+  #define TX_VLAN7_PORT0_EN_WIDTH 1
+  #define TX_VLAN7_LBN 112
+  #define TX_VLAN7_WIDTH 12
+  #define TX_VLAN6_PORT1_EN_LBN 109
+  #define TX_VLAN6_PORT1_EN_WIDTH 1
+  #define TX_VLAN6_PORT0_EN_LBN 108
+  #define TX_VLAN6_PORT0_EN_WIDTH 1
+  #define TX_VLAN6_LBN 96
+  #define TX_VLAN6_WIDTH 12
+  #define TX_VLAN5_PORT1_EN_LBN 93
+  #define TX_VLAN5_PORT1_EN_WIDTH 1
+  #define TX_VLAN5_PORT0_EN_LBN 92
+  #define TX_VLAN5_PORT0_EN_WIDTH 1
+  #define TX_VLAN5_LBN 80
+  #define TX_VLAN5_WIDTH 12
+  #define TX_VLAN4_PORT1_EN_LBN 77
+  #define TX_VLAN4_PORT1_EN_WIDTH 1
+  #define TX_VLAN4_PORT0_EN_LBN 76
+  #define TX_VLAN4_PORT0_EN_WIDTH 1
+  #define TX_VLAN4_LBN 64
+  #define TX_VLAN4_WIDTH 12
+  #define TX_VLAN3_PORT1_EN_LBN 61
+  #define TX_VLAN3_PORT1_EN_WIDTH 1
+  #define TX_VLAN3_PORT0_EN_LBN 60
+  #define TX_VLAN3_PORT0_EN_WIDTH 1
+  #define TX_VLAN3_LBN 48
+  #define TX_VLAN3_WIDTH 12
+  #define TX_VLAN2_PORT1_EN_LBN 45
+  #define TX_VLAN2_PORT1_EN_WIDTH 1
+  #define TX_VLAN2_PORT0_EN_LBN 44
+  #define TX_VLAN2_PORT0_EN_WIDTH 1
+  #define TX_VLAN2_LBN 32
+  #define TX_VLAN2_WIDTH 12
+  #define TX_VLAN1_PORT1_EN_LBN 29
+  #define TX_VLAN1_PORT1_EN_WIDTH 1
+  #define TX_VLAN1_PORT0_EN_LBN 28
+  #define TX_VLAN1_PORT0_EN_WIDTH 1
+  #define TX_VLAN1_LBN 16
+  #define TX_VLAN1_WIDTH 12
+  #define TX_VLAN0_PORT1_EN_LBN 13
+  #define TX_VLAN0_PORT1_EN_WIDTH 1
+  #define TX_VLAN0_PORT0_EN_LBN 12
+  #define TX_VLAN0_PORT0_EN_WIDTH 1
+  #define TX_VLAN0_LBN 0
+  #define TX_VLAN0_WIDTH 12
+#define TX_FIL_CTL_REG_OFST 0xAF0 // Transmit filter control register
+  #define TX_MADR1_FIL_EN_LBN 65
+  #define TX_MADR1_FIL_EN_WIDTH 1
+  #define TX_MADR0_FIL_EN_LBN 64
+  #define TX_MADR0_FIL_EN_WIDTH 1
+  #define TX_IPFIL31_PORT1_EN_LBN 63
+  #define TX_IPFIL31_PORT1_EN_WIDTH 1
+  #define TX_IPFIL31_PORT0_EN_LBN 62
+  #define TX_IPFIL31_PORT0_EN_WIDTH 1
+  #define TX_IPFIL30_PORT1_EN_LBN 61
+  #define TX_IPFIL30_PORT1_EN_WIDTH 1
+  #define TX_IPFIL30_PORT0_EN_LBN 60
+  #define TX_IPFIL30_PORT0_EN_WIDTH 1
+  #define TX_IPFIL29_PORT1_EN_LBN 59
+  #define TX_IPFIL29_PORT1_EN_WIDTH 1
+  #define TX_IPFIL29_PORT0_EN_LBN 58
+  #define TX_IPFIL29_PORT0_EN_WIDTH 1
+  #define TX_IPFIL28_PORT1_EN_LBN 57
+  #define TX_IPFIL28_PORT1_EN_WIDTH 1
+  #define TX_IPFIL28_PORT0_EN_LBN 56
+  #define TX_IPFIL28_PORT0_EN_WIDTH 1
+  #define TX_IPFIL27_PORT1_EN_LBN 55
+  #define TX_IPFIL27_PORT1_EN_WIDTH 1
+  #define TX_IPFIL27_PORT0_EN_LBN 54
+  #define TX_IPFIL27_PORT0_EN_WIDTH 1
+  #define TX_IPFIL26_PORT1_EN_LBN 53
+  #define TX_IPFIL26_PORT1_EN_WIDTH 1
+  #define TX_IPFIL26_PORT0_EN_LBN 52
+  #define TX_IPFIL26_PORT0_EN_WIDTH 1
+  #define TX_IPFIL25_PORT1_EN_LBN 51
+  #define TX_IPFIL25_PORT1_EN_WIDTH 1
+  #define TX_IPFIL25_PORT0_EN_LBN 50
+  #define TX_IPFIL25_PORT0_EN_WIDTH 1
+  #define TX_IPFIL24_PORT1_EN_LBN 49
+  #define TX_IPFIL24_PORT1_EN_WIDTH 1
+  #define TX_IPFIL24_PORT0_EN_LBN 48
+  #define TX_IPFIL24_PORT0_EN_WIDTH 1
+  #define TX_IPFIL23_PORT1_EN_LBN 47
+  #define TX_IPFIL23_PORT1_EN_WIDTH 1
+  #define TX_IPFIL23_PORT0_EN_LBN 46
+  #define TX_IPFIL23_PORT0_EN_WIDTH 1
+  #define TX_IPFIL22_PORT1_EN_LBN 45
+  #define TX_IPFIL22_PORT1_EN_WIDTH 1
+  #define TX_IPFIL22_PORT0_EN_LBN 44
+  #define TX_IPFIL22_PORT0_EN_WIDTH 1
+  #define TX_IPFIL21_PORT1_EN_LBN 43
+  #define TX_IPFIL21_PORT1_EN_WIDTH 1
+  #define TX_IPFIL21_PORT0_EN_LBN 42
+  #define TX_IPFIL21_PORT0_EN_WIDTH 1
+  #define TX_IPFIL20_PORT1_EN_LBN 41
+  #define TX_IPFIL20_PORT1_EN_WIDTH 1
+  #define TX_IPFIL20_PORT0_EN_LBN 40
+  #define TX_IPFIL20_PORT0_EN_WIDTH 1
+  #define TX_IPFIL19_PORT1_EN_LBN 39
+  #define TX_IPFIL19_PORT1_EN_WIDTH 1
+  #define TX_IPFIL19_PORT0_EN_LBN 38
+  #define TX_IPFIL19_PORT0_EN_WIDTH 1
+  #define TX_IPFIL18_PORT1_EN_LBN 37
+  #define TX_IPFIL18_PORT1_EN_WIDTH 1
+  #define TX_IPFIL18_PORT0_EN_LBN 36
+  #define TX_IPFIL18_PORT0_EN_WIDTH 1
+  #define TX_IPFIL17_PORT1_EN_LBN 35
+  #define TX_IPFIL17_PORT1_EN_WIDTH 1
+  #define TX_IPFIL17_PORT0_EN_LBN 34
+  #define TX_IPFIL17_PORT0_EN_WIDTH 1
+  #define TX_IPFIL16_PORT1_EN_LBN 33
+  #define TX_IPFIL16_PORT1_EN_WIDTH 1
+  #define TX_IPFIL16_PORT0_EN_LBN 32
+  #define TX_IPFIL16_PORT0_EN_WIDTH 1
+  #define TX_IPFIL15_PORT1_EN_LBN 31
+  #define TX_IPFIL15_PORT1_EN_WIDTH 1
+  #define TX_IPFIL15_PORT0_EN_LBN 30
+  #define TX_IPFIL15_PORT0_EN_WIDTH 1
+  #define TX_IPFIL14_PORT1_EN_LBN 29
+  #define TX_IPFIL14_PORT1_EN_WIDTH 1
+  #define TX_IPFIL14_PORT0_EN_LBN 28
+  #define TX_IPFIL14_PORT0_EN_WIDTH 1
+  #define TX_IPFIL13_PORT1_EN_LBN 27
+  #define TX_IPFIL13_PORT1_EN_WIDTH 1
+  #define TX_IPFIL13_PORT0_EN_LBN 26
+  #define TX_IPFIL13_PORT0_EN_WIDTH 1
+  #define TX_IPFIL12_PORT1_EN_LBN 25
+  #define TX_IPFIL12_PORT1_EN_WIDTH 1
+  #define TX_IPFIL12_PORT0_EN_LBN 24
+  #define TX_IPFIL12_PORT0_EN_WIDTH 1
+  #define TX_IPFIL11_PORT1_EN_LBN 23
+  #define TX_IPFIL11_PORT1_EN_WIDTH 1
+  #define TX_IPFIL11_PORT0_EN_LBN 22
+  #define TX_IPFIL11_PORT0_EN_WIDTH 1
+  #define TX_IPFIL10_PORT1_EN_LBN 21
+  #define TX_IPFIL10_PORT1_EN_WIDTH 1
+  #define TX_IPFIL10_PORT0_EN_LBN 20
+  #define TX_IPFIL10_PORT0_EN_WIDTH 1
+  #define TX_IPFIL9_PORT1_EN_LBN 19
+  #define TX_IPFIL9_PORT1_EN_WIDTH 1
+  #define TX_IPFIL9_PORT0_EN_LBN 18
+  #define TX_IPFIL9_PORT0_EN_WIDTH 1
+  #define TX_IPFIL8_PORT1_EN_LBN 17
+  #define TX_IPFIL8_PORT1_EN_WIDTH 1
+  #define TX_IPFIL8_PORT0_EN_LBN 16
+  #define TX_IPFIL8_PORT0_EN_WIDTH 1
+  #define TX_IPFIL7_PORT1_EN_LBN 15
+  #define TX_IPFIL7_PORT1_EN_WIDTH 1
+  #define TX_IPFIL7_PORT0_EN_LBN 14
+  #define TX_IPFIL7_PORT0_EN_WIDTH 1
+  #define TX_IPFIL6_PORT1_EN_LBN 13
+  #define TX_IPFIL6_PORT1_EN_WIDTH 1
+  #define TX_IPFIL6_PORT0_EN_LBN 12
+  #define TX_IPFIL6_PORT0_EN_WIDTH 1
+  #define TX_IPFIL5_PORT1_EN_LBN 11
+  #define TX_IPFIL5_PORT1_EN_WIDTH 1
+  #define TX_IPFIL5_PORT0_EN_LBN 10
+  #define TX_IPFIL5_PORT0_EN_WIDTH 1
+  #define TX_IPFIL4_PORT1_EN_LBN 9
+  #define TX_IPFIL4_PORT1_EN_WIDTH 1
+  #define TX_IPFIL4_PORT0_EN_LBN 8
+  #define TX_IPFIL4_PORT0_EN_WIDTH 1
+  #define TX_IPFIL3_PORT1_EN_LBN 7
+  #define TX_IPFIL3_PORT1_EN_WIDTH 1
+  #define TX_IPFIL3_PORT0_EN_LBN 6
+  #define TX_IPFIL3_PORT0_EN_WIDTH 1
+  #define TX_IPFIL2_PORT1_EN_LBN 5
+  #define TX_IPFIL2_PORT1_EN_WIDTH 1
+  #define TX_IPFIL2_PORT0_EN_LBN 4
+  #define TX_IPFIL2_PORT0_EN_WIDTH 1
+  #define TX_IPFIL1_PORT1_EN_LBN 3
+  #define TX_IPFIL1_PORT1_EN_WIDTH 1
+  #define TX_IPFIL1_PORT0_EN_LBN 2
+  #define TX_IPFIL1_PORT0_EN_WIDTH 1
+  #define TX_IPFIL0_PORT1_EN_LBN 1
+  #define TX_IPFIL0_PORT1_EN_WIDTH 1
+  #define TX_IPFIL0_PORT0_EN_LBN 0
+  #define TX_IPFIL0_PORT0_EN_WIDTH 1
+#define TX_IPFIL_TBL_OFST 0xB00 // Transmit IP source address filter table
+  #define TX_IPFIL_MASK_LBN 32
+  #define TX_IPFIL_MASK_WIDTH 32
+  #define TX_IP_SRC_ADR_LBN 0
+  #define TX_IP_SRC_ADR_WIDTH 32
+#define TX_PACE_REG_A1_OFST 0xF80000 // Transmit pace control register
+#define TX_PACE_REG_B0_OFST 0xA90    // Transmit pace control register
+  #define TX_PACE_SB_AF_LBN 19
+  #define TX_PACE_SB_AF_WIDTH 10
+  #define TX_PACE_SB_NOTAF_LBN 9
+  #define TX_PACE_SB_NOTAF_WIDTH 10
+  #define TX_PACE_FB_BASE_LBN 5
+  #define TX_PACE_FB_BASE_WIDTH 4
+  #define TX_PACE_BIN_TH_LBN 0
+  #define TX_PACE_BIN_TH_WIDTH 5
+#define TX_PACE_TBL_A1_OFST 0xF80040 // Transmit pacing table
+#define TX_PACE_TBL_FIRST_QUEUE_A1 4
+#define TX_PACE_TBL_B0_OFST 0xF80000 // Transmit pacing table
+#define TX_PACE_TBL_FIRST_QUEUE_B0 0
+  #define TX_PACE_LBN 0
+  #define TX_PACE_WIDTH 5
+
+//////////////---- EE/Flash Registers C Header ----//////////////
+#define EE_SPI_HCMD_REG_KER_OFST 0x100 // SPI host command register
+#define EE_SPI_HCMD_REG_OFST 0x100 // SPI host command register
+  #define EE_SPI_HCMD_CMD_EN_LBN 31
+  #define EE_SPI_HCMD_CMD_EN_WIDTH 1
+  #define EE_WR_TIMER_ACTIVE_LBN 28
+  #define EE_WR_TIMER_ACTIVE_WIDTH 1
+  #define EE_SPI_HCMD_SF_SEL_LBN 24
+  #define EE_SPI_HCMD_SF_SEL_WIDTH 1
+  #define EE_SPI_HCMD_DABCNT_LBN 16
+  #define EE_SPI_HCMD_DABCNT_WIDTH 5
+  #define EE_SPI_HCMD_READ_LBN 15
+  #define EE_SPI_HCMD_READ_WIDTH 1
+  #define EE_SPI_HCMD_DUBCNT_LBN 12
+  #define EE_SPI_HCMD_DUBCNT_WIDTH 2
+  #define EE_SPI_HCMD_ADBCNT_LBN 8
+  #define EE_SPI_HCMD_ADBCNT_WIDTH 2
+  #define EE_SPI_HCMD_ENC_LBN 0
+  #define EE_SPI_HCMD_ENC_WIDTH 8
+#define EE_SPI_HADR_REG_KER_OFST 0X110 // SPI host address register
+#define EE_SPI_HADR_REG_OFST 0X110 // SPI host address register
+  #define EE_SPI_HADR_DUBYTE_LBN 24
+  #define EE_SPI_HADR_DUBYTE_WIDTH 8
+  #define EE_SPI_HADR_ADR_LBN 0
+  #define EE_SPI_HADR_ADR_WIDTH 24
+#define EE_SPI_HDATA_REG_KER_OFST 0x120 // SPI host data register
+#define EE_SPI_HDATA_REG_OFST 0x120 // SPI host data register
+  #define EE_SPI_HDATA3_LBN 96
+  #define EE_SPI_HDATA3_WIDTH 32
+  #define EE_SPI_HDATA2_LBN 64
+  #define EE_SPI_HDATA2_WIDTH 32
+  #define EE_SPI_HDATA1_LBN 32
+  #define EE_SPI_HDATA1_WIDTH 32
+  #define EE_SPI_HDATA0_LBN 0
+  #define EE_SPI_HDATA0_WIDTH 32
+#define EE_BASE_PAGE_REG_KER_OFST 0x130 // Expansion ROM base mirror register
+#define EE_BASE_PAGE_REG_OFST 0x130 // Expansion ROM base mirror register
+  #define EE_EXP_ROM_WINDOW_BASE_LBN 16
+  #define EE_EXP_ROM_WINDOW_BASE_WIDTH 13
+  #define EE_EXPROM_MASK_LBN 0
+  #define EE_EXPROM_MASK_WIDTH 13
+#define EE_VPD_CFG0_REG_KER_OFST 0X140 // SPI/VPD configuration register
+#define EE_VPD_CFG0_REG_OFST 0X140 // SPI/VPD configuration register
+  #define EE_SF_FASTRD_EN_LBN 127
+  #define EE_SF_FASTRD_EN_WIDTH 1
+  #define EE_SF_CLOCK_DIV_LBN 120
+  #define EE_SF_CLOCK_DIV_WIDTH 7
+  #define EE_VPD_WIP_POLL_LBN 119
+  #define EE_VPD_WIP_POLL_WIDTH 1
+  #define EE_VPDW_LENGTH_LBN 80
+  #define EE_VPDW_LENGTH_WIDTH 15
+  #define EE_VPDW_BASE_LBN 64
+  #define EE_VPDW_BASE_WIDTH 15
+  #define EE_VPD_WR_CMD_EN_LBN 56
+  #define EE_VPD_WR_CMD_EN_WIDTH 8
+  #define EE_VPD_BASE_LBN 32
+  #define EE_VPD_BASE_WIDTH 24
+  #define EE_VPD_LENGTH_LBN 16
+  #define EE_VPD_LENGTH_WIDTH 13
+  #define EE_VPD_AD_SIZE_LBN 8
+  #define EE_VPD_AD_SIZE_WIDTH 5
+  #define EE_VPD_ACCESS_ON_LBN 5
+  #define EE_VPD_ACCESS_ON_WIDTH 1
+#define EE_VPD_SW_CNTL_REG_KER_OFST 0X150 // VPD access SW control register
+#define EE_VPD_SW_CNTL_REG_OFST 0X150 // VPD access SW control register
+  #define EE_VPD_CYCLE_PENDING_LBN 31
+  #define EE_VPD_CYCLE_PENDING_WIDTH 1
+  #define EE_VPD_CYC_WRITE_LBN 28
+  #define EE_VPD_CYC_WRITE_WIDTH 1
+  #define EE_VPD_CYC_ADR_LBN 0
+  #define EE_VPD_CYC_ADR_WIDTH 15
+#define EE_VPD_SW_DATA_REG_KER_OFST 0x160 // VPD access SW data register
+#define EE_VPD_SW_DATA_REG_OFST 0x160 // VPD access SW data register
+  #define EE_VPD_CYC_DAT_LBN 0
+  #define EE_VPD_CYC_DAT_WIDTH 32
diff -r e4dd072db259 -r 651fc2abdd5d 
drivers/xen/sfc_netfront/ef_vi_falcon_desc.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/xen/sfc_netfront/ef_vi_falcon_desc.h      Mon Feb 18 10:30:33 
2008 +0000
@@ -0,0 +1,43 @@
+//////////////---- Descriptors C Headers ----//////////////
+// Receive Kernel IP Descriptor
+  #define RX_KER_BUF_SIZE_LBN 48
+  #define RX_KER_BUF_SIZE_WIDTH 14
+  #define RX_KER_BUF_REGION_LBN 46
+  #define RX_KER_BUF_REGION_WIDTH 2
+      #define RX_KER_BUF_REGION0_DECODE 0
+      #define RX_KER_BUF_REGION1_DECODE 1
+      #define RX_KER_BUF_REGION2_DECODE 2
+      #define RX_KER_BUF_REGION3_DECODE 3
+  #define RX_KER_BUF_ADR_LBN 0
+  #define RX_KER_BUF_ADR_WIDTH 46
+// Receive User IP Descriptor
+  #define RX_USR_2BYTE_OFS_LBN 20
+  #define RX_USR_2BYTE_OFS_WIDTH 12
+  #define RX_USR_BUF_ID_LBN 0
+  #define RX_USR_BUF_ID_WIDTH 20
+// Transmit Kernel IP Descriptor
+  #define TX_KER_PORT_LBN 63
+  #define TX_KER_PORT_WIDTH 1
+  #define TX_KER_CONT_LBN 62
+  #define TX_KER_CONT_WIDTH 1
+  #define TX_KER_BYTE_CNT_LBN 48
+  #define TX_KER_BYTE_CNT_WIDTH 14
+  #define TX_KER_BUF_REGION_LBN 46
+  #define TX_KER_BUF_REGION_WIDTH 2
+      #define TX_KER_BUF_REGION0_DECODE 0
+      #define TX_KER_BUF_REGION1_DECODE 1
+      #define TX_KER_BUF_REGION2_DECODE 2
+      #define TX_KER_BUF_REGION3_DECODE 3
+  #define TX_KER_BUF_ADR_LBN 0
+  #define TX_KER_BUF_ADR_WIDTH 46
+// Transmit User IP Descriptor
+  #define TX_USR_PORT_LBN 47
+  #define TX_USR_PORT_WIDTH 1
+  #define TX_USR_CONT_LBN 46
+  #define TX_USR_CONT_WIDTH 1
+  #define TX_USR_BYTE_CNT_LBN 33
+  #define TX_USR_BYTE_CNT_WIDTH 13
+  #define TX_USR_BUF_ID_LBN 13
+  #define TX_USR_BUF_ID_WIDTH 20
+  #define TX_USR_BYTE_OFS_LBN 0
+  #define TX_USR_BYTE_OFS_WIDTH 13
diff -r e4dd072db259 -r 651fc2abdd5d 
drivers/xen/sfc_netfront/ef_vi_falcon_event.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/xen/sfc_netfront/ef_vi_falcon_event.h     Mon Feb 18 10:30:33 
2008 +0000
@@ -0,0 +1,123 @@
+//////////////---- Events Format C Header ----//////////////
+//////////////---- Event entry ----//////////////
+  #define EV_CODE_LBN 60
+  #define EV_CODE_WIDTH 4
+      #define RX_IP_EV_DECODE 0
+      #define TX_IP_EV_DECODE 2
+      #define DRIVER_EV_DECODE 5
+      #define GLOBAL_EV_DECODE 6
+      #define DRV_GEN_EV_DECODE 7
+  #define EV_DATA_LBN 0
+  #define EV_DATA_WIDTH 60
+//////////////---- Receive IP events for both Kernel & User event queues 
----//////////////
+  #define RX_EV_PKT_OK_LBN 56
+  #define RX_EV_PKT_OK_WIDTH 1
+  #define RX_EV_BUF_OWNER_ID_ERR_LBN 54
+  #define RX_EV_BUF_OWNER_ID_ERR_WIDTH 1
+  #define RX_EV_IP_HDR_CHKSUM_ERR_LBN 52
+  #define RX_EV_IP_HDR_CHKSUM_ERR_WIDTH 1
+  #define RX_EV_TCP_UDP_CHKSUM_ERR_LBN 51
+  #define RX_EV_TCP_UDP_CHKSUM_ERR_WIDTH 1
+  #define RX_EV_ETH_CRC_ERR_LBN 50
+  #define RX_EV_ETH_CRC_ERR_WIDTH 1
+  #define RX_EV_FRM_TRUNC_LBN 49
+  #define RX_EV_FRM_TRUNC_WIDTH 1
+  #define RX_EV_DRIB_NIB_LBN 48
+  #define RX_EV_DRIB_NIB_WIDTH 1
+  #define RX_EV_TOBE_DISC_LBN 47
+  #define RX_EV_TOBE_DISC_WIDTH 1
+  #define RX_EV_PKT_TYPE_LBN 44
+  #define RX_EV_PKT_TYPE_WIDTH 3
+      #define RX_EV_PKT_TYPE_ETH_DECODE 0
+      #define RX_EV_PKT_TYPE_LLC_DECODE 1
+      #define RX_EV_PKT_TYPE_JUMBO_DECODE 2
+      #define RX_EV_PKT_TYPE_VLAN_DECODE 3
+      #define RX_EV_PKT_TYPE_VLAN_LLC_DECODE 4
+      #define RX_EV_PKT_TYPE_VLAN_JUMBO_DECODE 5
+  #define RX_EV_HDR_TYPE_LBN 42
+  #define RX_EV_HDR_TYPE_WIDTH 2
+      #define RX_EV_HDR_TYPE_TCP_IPV4_DECODE 0
+      #define RX_EV_HDR_TYPE_UDP_IPV4_DECODE 1
+      #define RX_EV_HDR_TYPE_OTHER_IP_DECODE 2
+      #define RX_EV_HDR_TYPE_NON_IP_DECODE 3
+  #define RX_EV_DESC_Q_EMPTY_LBN 41
+  #define RX_EV_DESC_Q_EMPTY_WIDTH 1
+  #define RX_EV_MCAST_HASH_MATCH_LBN 40
+  #define RX_EV_MCAST_HASH_MATCH_WIDTH 1
+  #define RX_EV_MCAST_PKT_LBN 39
+  #define RX_EV_MCAST_PKT_WIDTH 1
+  #define RX_EV_Q_LABEL_LBN 32
+  #define RX_EV_Q_LABEL_WIDTH 5
+  #define RX_JUMBO_CONT_LBN 31
+  #define RX_JUMBO_CONT_WIDTH 1
+  #define RX_SOP_LBN 15
+  #define RX_SOP_WIDTH 1
+  #define RX_PORT_LBN 30
+  #define RX_PORT_WIDTH 1
+  #define RX_EV_BYTE_CNT_LBN 16
+  #define RX_EV_BYTE_CNT_WIDTH 14
+  #define RX_iSCSI_PKT_OK_LBN 14
+  #define RX_iSCSI_PKT_OK_WIDTH 1
+  #define RX_ISCSI_DDIG_ERR_LBN 13
+  #define RX_ISCSI_DDIG_ERR_WIDTH 1
+  #define RX_ISCSI_HDIG_ERR_LBN 12
+  #define RX_ISCSI_HDIG_ERR_WIDTH 1
+  #define RX_EV_DESC_PTR_LBN 0
+  #define RX_EV_DESC_PTR_WIDTH 12
+//////////////---- Transmit IP events for both Kernel & User event queues 
----//////////////
+  #define TX_EV_PKT_ERR_LBN 38
+  #define TX_EV_PKT_ERR_WIDTH 1
+  #define TX_EV_PKT_TOO_BIG_LBN 37
+  #define TX_EV_PKT_TOO_BIG_WIDTH 1
+  #define TX_EV_Q_LABEL_LBN 32
+  #define TX_EV_Q_LABEL_WIDTH 5
+  #define TX_EV_PORT_LBN 16
+  #define TX_EV_PORT_WIDTH 1
+  #define TX_EV_WQ_FF_FULL_LBN 15
+  #define TX_EV_WQ_FF_FULL_WIDTH 1
+  #define TX_EV_BUF_OWNER_ID_ERR_LBN 14
+  #define TX_EV_BUF_OWNER_ID_ERR_WIDTH 1
+  #define TX_EV_COMP_LBN 12
+  #define TX_EV_COMP_WIDTH 1
+  #define TX_EV_DESC_PTR_LBN 0
+  #define TX_EV_DESC_PTR_WIDTH 12
+//////////////---- Char or Kernel driver events ----//////////////
+  #define DRIVER_EV_SUB_CODE_LBN 56
+  #define DRIVER_EV_SUB_CODE_WIDTH 4
+      #define TX_DESCQ_FLS_DONE_EV_DECODE 0x0
+      #define RX_DESCQ_FLS_DONE_EV_DECODE 0x1
+      #define EVQ_INIT_DONE_EV_DECODE 0x2
+      #define EVQ_NOT_EN_EV_DECODE 0x3
+      #define RX_DESCQ_FLSFF_OVFL_EV_DECODE 0x4
+      #define SRM_UPD_DONE_EV_DECODE 0x5
+      #define WAKE_UP_EV_DECODE 0x6
+      #define TX_PKT_NON_TCP_UDP_DECODE 0x9
+      #define TIMER_EV_DECODE 0xA
+      #define RX_DSC_ERROR_EV_DECODE 0xE
+  #define DRIVER_EV_TX_DESCQ_ID_LBN 0
+  #define DRIVER_EV_TX_DESCQ_ID_WIDTH 12
+  #define DRIVER_EV_RX_DESCQ_ID_LBN 0
+  #define DRIVER_EV_RX_DESCQ_ID_WIDTH 12
+  #define DRIVER_EV_EVQ_ID_LBN 0
+  #define DRIVER_EV_EVQ_ID_WIDTH 12
+  #define DRIVER_TMR_ID_LBN 0
+  #define DRIVER_TMR_ID_WIDTH 12
+  #define DRIVER_EV_SRM_UPD_LBN 0
+  #define DRIVER_EV_SRM_UPD_WIDTH 2
+      #define SRM_CLR_EV_DECODE 0
+      #define SRM_UPD_EV_DECODE 1
+      #define SRM_ILLCLR_EV_DECODE 2
+//////////////---- Global events. Sent to both event queue 0 and 4. 
----//////////////
+  #define XFP_PHY_INTR_LBN 10
+  #define XFP_PHY_INTR_WIDTH 1
+  #define XG_PHY_INTR_LBN 9
+  #define XG_PHY_INTR_WIDTH 1
+  #define G_PHY1_INTR_LBN 8
+  #define G_PHY1_INTR_WIDTH 1
+  #define G_PHY0_INTR_LBN 7
+  #define G_PHY0_INTR_WIDTH 1
+//////////////---- Driver generated events ----//////////////
+  #define DRV_GEN_EV_CODE_LBN 60
+  #define DRV_GEN_EV_CODE_WIDTH 4
+  #define DRV_GEN_EV_DATA_LBN 0
+  #define DRV_GEN_EV_DATA_WIDTH 60
diff -r e4dd072db259 -r 651fc2abdd5d drivers/xen/sfc_netfront/ef_vi_internal.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/xen/sfc_netfront/ef_vi_internal.h Mon Feb 18 10:30:33 2008 +0000
@@ -0,0 +1,256 @@
+/****************************************************************************
+ * Copyright 2002-2005: Level 5 Networks Inc.
+ * Copyright 2005-2008: Solarflare Communications Inc,
+ *                      9501 Jeronimo Road, Suite 250,
+ *                      Irvine, CA 92618, USA
+ *
+ * Maintained by Solarflare Communications
+ *  <linux-xen-drivers@xxxxxxxxxxxxxx>
+ *  <onload-dev@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ ****************************************************************************
+ */
+
+/*
+ * \author  djr
+ *  \brief  Really-and-truely-honestly internal stuff for libef.
+ *   \date  2004/06/13
+ */
+
+/*! \cidoxg_include_ci_ul */
+#ifndef __CI_EF_VI_INTERNAL_H__
+#define __CI_EF_VI_INTERNAL_H__
+
+
+/* These flags share space with enum ef_vi_flags. */
+#define EF_VI_BUG5692_WORKAROUND  0x10000
+
+
+/* ***********************************************************************
+ * COMPILATION CONTROL FLAGS (see ef_vi.h for "workaround" controls)
+ */
+
+#define EF_VI_DO_MAGIC_CHECKS 1
+
+
+/**********************************************************************
+ * Headers
+ */
+
+#include <etherfabric/ef_vi.h>
+#include "sysdep.h"
+#include "ef_vi_falcon.h"
+
+
+/**********************************************************************
+ * Debugging.
+ */
+
+#ifndef NDEBUG
+
+# define _ef_assert(exp, file, line) BUG_ON(!(exp));
+
+# define _ef_assert2(exp, x, y, file, line)  do {      \
+               if (unlikely(!(exp)))           \
+                       BUG();                          \
+       } while (0)
+
+#else
+
+# define _ef_assert(exp, file, line)
+# define _ef_assert2(e, x, y, file, line)
+
+#endif
+
+#define ef_assert(a)          do{ _ef_assert((a),__FILE__,__LINE__); } while(0)
+#define ef_assert_equal(a,b)  _ef_assert2((a)==(b),(a),(b),__FILE__,__LINE__)
+#define ef_assert_eq          ef_assert_equal
+#define ef_assert_lt(a,b)     _ef_assert2((a)<(b),(a),(b),__FILE__,__LINE__)
+#define ef_assert_le(a,b)     _ef_assert2((a)<=(b),(a),(b),__FILE__,__LINE__)
+#define ef_assert_nequal(a,b) _ef_assert2((a)!=(b),(a),(b),__FILE__,__LINE__)
+#define ef_assert_ne          ef_assert_nequal
+#define ef_assert_ge(a,b)     _ef_assert2((a)>=(b),(a),(b),__FILE__,__LINE__)
+#define ef_assert_gt(a,b)     _ef_assert2((a)>(b),(a),(b),__FILE__,__LINE__)
+
+/**********************************************************************
+ * Debug checks. ******************************************************
+ **********************************************************************/
+
+#ifdef NDEBUG
+# define EF_VI_MAGIC_SET(p, type)
+# define EF_VI_CHECK_VI(p)
+# define EF_VI_CHECK_EVENT_Q(p)
+# define EF_VI_CHECK_IOBUFSET(p)
+# define EF_VI_CHECK_FILTER(p)
+# define EF_VI_CHECK_SHMBUF(p)
+# define EF_VI_CHECK_PT_EP(p)
+#else
+# define EF_VI                    0x3
+# define EF_EPLOCK                0x6
+# define EF_IOBUFSET              0x9
+# define EF_FILTER                0xa
+# define EF_SHMBUF                0x11
+
+# define EF_VI_MAGIC(p, type)                          \
+       (((unsigned)(type) << 28) |                     \
+        (((unsigned)(intptr_t)(p)) & 0x0fffffffu))
+
+# if !EF_VI_DO_MAGIC_CHECKS
+#  define EF_VI_MAGIC_SET(p, type)
+#  define EF_VI_MAGIC_CHECK(p, type)
+# else
+#  define EF_VI_MAGIC_SET(p, type)                     \
+       do {                                            \
+               (p)->magic = EF_VI_MAGIC((p), (type));  \
+       } while (0)
+
+# define EF_VI_MAGIC_OKAY(p, type)                      \
+       ((p)->magic == EF_VI_MAGIC((p), (type)))
+
+# define EF_VI_MAGIC_CHECK(p, type)                     \
+       ef_assert(EF_VI_MAGIC_OKAY((p), (type)))
+
+#endif /* EF_VI_DO_MAGIC_CHECKS */
+
+# define EF_VI_CHECK_VI(p)                     \
+       ef_assert(p);                           \
+       EF_VI_MAGIC_CHECK((p), EF_VI);
+
+# define EF_VI_CHECK_EVENT_Q(p)                        \
+       ef_assert(p);                           \
+       EF_VI_MAGIC_CHECK((p), EF_VI);          \
+       ef_assert((p)->evq_base);               \
+       ef_assert((p)->evq_mask);
+
+# define EF_VI_CHECK_PT_EP(p)                  \
+       ef_assert(p);                           \
+       EF_VI_MAGIC_CHECK((p), EF_VI);          \
+       ef_assert((p)->ep_state);
+
+# define EF_VI_CHECK_IOBUFSET(p)               \
+       ef_assert(p);                           \
+       EF_VI_MAGIC_CHECK((p), EF_IOBUFSET)
+
+# define EF_VI_CHECK_FILTER(p)                 \
+       ef_assert(p);                           \
+       EF_VI_MAGIC_CHECK((p), EF_FILTER);
+
+# define EF_VI_CHECK_SHMBUF(p)                 \
+       ef_assert(p);                           \
+       EF_VI_MAGIC_CHECK((p), EF_SHMBUF);
+
+#endif
+
+#ifndef NDEBUG
+# define EF_DRIVER_MAGIC 0x00f00ba4
+# define EF_ASSERT_THIS_DRIVER_VALID(driver)                           \
+       do{ ef_assert(driver);                                          \
+               EF_VI_MAGIC_CHECK((driver), EF_DRIVER_MAGIC);           \
+               ef_assert((driver)->init);               }while(0)
+
+# define EF_ASSERT_DRIVER_VALID() EF_ASSERT_THIS_DRIVER_VALID(&ci_driver)
+#else
+# define EF_ASSERT_THIS_DRIVER_VALID(driver)
+# define EF_ASSERT_DRIVER_VALID()
+#endif
+
+
+/* *************************************
+ * Power of 2 FIFO
+ */
+
+#define EF_VI_FIFO2_M(f, x)  ((x) & ((f)->fifo_mask))
+#define ef_vi_fifo2_valid(f) ((f) && (f)->fifo && (f)->fifo_mask > 0 &&        
\
+                             (f)->fifo_rd_i <= (f)->fifo_mask       && \
+                             (f)->fifo_wr_i <= (f)->fifo_mask       && \
+                             EF_VI_IS_POW2((f)->fifo_mask+1u))
+
+#define ef_vi_fifo2_init(f, cap)                       \
+       do{ ef_assert(EF_VI_IS_POW2((cap) + 1));        \
+               (f)->fifo_rd_i = (f)->fifo_wr_i = 0u;   \
+               (f)->fifo_mask = (cap);                 \
+       }while(0)
+
+#define ef_vi_fifo2_is_empty(f) ((f)->fifo_rd_i == (f)->fifo_wr_i)
+#define ef_vi_fifo2_capacity(f) ((f)->fifo_mask)
+#define ef_vi_fifo2_buf_size(f) ((f)->fifo_mask + 1u)
+#define ef_vi_fifo2_end(f)      ((f)->fifo + ef_vi_fifo2_buf_size(f))
+#define ef_vi_fifo2_peek(f)     ((f)->fifo[(f)->fifo_rd_i])
+#define ef_vi_fifo2_poke(f)     ((f)->fifo[(f)->fifo_wr_i])
+#define ef_vi_fifo2_num(f)   EF_VI_FIFO2_M((f),(f)->fifo_wr_i-(f)->fifo_rd_i)
+
+#define ef_vi_fifo2_wr_prev(f)                                         \
+       do{ (f)->fifo_wr_i = EF_VI_FIFO2_M((f), (f)->fifo_wr_i - 1u); }while(0)
+#define ef_vi_fifo2_wr_next(f)                                         \
+       do{ (f)->fifo_wr_i = EF_VI_FIFO2_M((f), (f)->fifo_wr_i + 1u); }while(0)
+#define ef_vi_fifo2_rd_adv(f, n)                                       \
+       do{ (f)->fifo_rd_i = EF_VI_FIFO2_M((f), (f)->fifo_rd_i + (n)); }while(0)
+#define ef_vi_fifo2_rd_prev(f)                                         \
+       do{ (f)->fifo_rd_i = EF_VI_FIFO2_M((f), (f)->fifo_rd_i - 1u); }while(0)
+#define ef_vi_fifo2_rd_next(f)                                         \
+       do{ (f)->fifo_rd_i = EF_VI_FIFO2_M((f), (f)->fifo_rd_i + 1u); }while(0)
+
+#define ef_vi_fifo2_put(f, v)                                          \
+       do{ ef_vi_fifo2_poke(f) = (v); ef_vi_fifo2_wr_next(f); }while(0)
+#define ef_vi_fifo2_get(f, pv)                                         \
+       do{ *(pv) = ef_vi_fifo2_peek(f); ef_vi_fifo2_rd_next(f); }while(0)
+
+
+/* *********************************************************************
+ * Eventq handling
+ */
+
+typedef union {
+       uint64_t    u64;
+       struct {
+               uint32_t  a;
+               uint32_t  b;
+       } opaque;
+} ef_vi_event;
+
+
+#define EF_VI_EVENT_OFFSET(q, i)                                       \
+       (((q)->evq_state->evq_ptr - (i) * sizeof(ef_vi_event)) & (q)->evq_mask)
+
+#define EF_VI_EVENT_PTR(q, i)                                           \
+       ((ef_vi_event*) ((q)->evq_base + EF_VI_EVENT_OFFSET((q), (i))))
+
+/* *********************************************************************
+ * Miscellaneous goodies
+ */
+#ifdef NDEBUG
+# define EF_VI_DEBUG(x)
+#else
+# define EF_VI_DEBUG(x)            x
+#endif
+
+#define EF_VI_ROUND_UP(i, align)   (((i)+(align)-1u) & ~((align)-1u))
+#define EF_VI_ALIGN_FWD(p, align)  (((p)+(align)-1u) & ~((align)-1u))
+#define EF_VI_ALIGN_BACK(p, align) ((p) & ~((align)-1u))
+#define EF_VI_PTR_ALIGN_BACK(p, align)                                 \
+       ((char*)EF_VI_ALIGN_BACK(((intptr_t)(p)), ((intptr_t)(align))))
+#define EF_VI_IS_POW2(x)           ((x) && ! ((x) & ((x) - 1)))
+
+
+/* ******************************************************************** 
+ */
+
+extern void falcon_vi_init(ef_vi*, void* vvis ) EF_VI_HF;
+extern void ef_eventq_state_init(ef_vi* evq) EF_VI_HF;
+extern void __ef_init(void) EF_VI_HF;
+
+
+#endif  /* __CI_EF_VI_INTERNAL_H__ */
+
diff -r e4dd072db259 -r 651fc2abdd5d 
drivers/xen/sfc_netfront/etherfabric/ef_vi.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/xen/sfc_netfront/etherfabric/ef_vi.h      Mon Feb 18 10:30:33 
2008 +0000
@@ -0,0 +1,665 @@
+/****************************************************************************
+ * Copyright 2002-2005: Level 5 Networks Inc.
+ * Copyright 2005-2008: Solarflare Communications Inc,
+ *                      9501 Jeronimo Road, Suite 250,
+ *                      Irvine, CA 92618, USA
+ *
+ * Maintained by Solarflare Communications
+ *  <linux-xen-drivers@xxxxxxxxxxxxxx>
+ *  <onload-dev@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ ****************************************************************************
+ */
+
+/*
+ *  \brief  Virtual Interface
+ *   \date  2007/05/16
+ */
+
+#ifndef __EFAB_EF_VI_H__
+#define __EFAB_EF_VI_H__
+
+
+/**********************************************************************
+ * Primitive types ****************************************************
+ **********************************************************************/
+
+/* We standardise on the types from stdint.h and synthesise these types
+ * for compilers/platforms that don't provide them */
+
+#  include <linux/types.h>
+# define EF_VI_ALIGN(x) __attribute__ ((aligned (x)))
+# define ef_vi_inline static inline
+
+
+
+/**********************************************************************
+ * Types **************************************************************
+ **********************************************************************/
+
+typedef uint32_t                ef_eventq_ptr;
+
+typedef uint64_t                ef_addr;
+typedef char*                   ef_vi_ioaddr_t;
+
+/**********************************************************************
+ * ef_event ***********************************************************
+ **********************************************************************/
+
+/*! \i_ef_vi A DMA request identifier.
+**
+** This is an integer token specified by the transport and associated
+** with a DMA request.  It is returned to the VI user with DMA completion
+** events.  It is typically used to identify the buffer associated with
+** the transfer.
+*/
+typedef int                    ef_request_id;
+
+typedef union {
+       uint64_t  u64[1];
+       uint32_t  u32[2];
+} ef_vi_qword;
+
+typedef ef_vi_qword             ef_hw_event;
+
+#define EF_REQUEST_ID_BITS      16u
+#define EF_REQUEST_ID_MASK      ((1u << EF_REQUEST_ID_BITS) - 1u)
+
+/*! \i_ef_event An [ef_event] is a token that identifies something that
+** has happened.  Examples include packets received, packets transmitted
+** and errors.
+*/
+typedef union {
+       struct {
+               ef_hw_event    ev;
+               unsigned       type       :16;
+       } generic;
+       struct {
+               ef_hw_event    ev;
+               unsigned       type       :16;
+               /*ef_request_id  request_id :EF_REQUEST_ID_BITS;*/
+               unsigned       q_id       :16;
+               unsigned       len        :16;
+               unsigned       flags      :16;
+       } rx;
+       struct {  /* This *must* have same layout as [rx]. */
+               ef_hw_event    ev;
+               unsigned       type       :16;
+               /*ef_request_id  request_id :EF_REQUEST_ID_BITS;*/
+               unsigned       q_id       :16;
+               unsigned       len        :16;
+               unsigned       flags      :16;
+               unsigned       subtype    :16;
+       } rx_discard;
+       struct {
+               ef_hw_event    ev;
+               unsigned       type       :16;
+               /*ef_request_id  request_id :EF_REQUEST_ID_BITS;*/
+               unsigned       q_id       :16;
+       } tx;
+       struct {
+               ef_hw_event    ev;
+               unsigned       type       :16;
+               /*ef_request_id  request_id :EF_REQUEST_ID_BITS;*/
+               unsigned       q_id       :16;
+               unsigned       subtype    :16;
+       } tx_error;
+       struct {
+               ef_hw_event    ev;
+               unsigned       type       :16;
+               unsigned       q_id       :16;
+       } rx_no_desc_trunc;
+       struct {
+               ef_hw_event    ev;
+               unsigned       type       :16;
+               unsigned       data;
+       } sw;
+} ef_event;
+
+
+#define EF_EVENT_TYPE(e)        ((e).generic.type)
+enum {
+       /** Good data was received. */
+       EF_EVENT_TYPE_RX,
+       /** Packets have been sent. */
+       EF_EVENT_TYPE_TX,
+       /** Data received and buffer consumed, but something is wrong. */
+       EF_EVENT_TYPE_RX_DISCARD,
+       /** Transmit of packet failed. */
+       EF_EVENT_TYPE_TX_ERROR,
+       /** Received packet was truncated due to lack of descriptors. */
+       EF_EVENT_TYPE_RX_NO_DESC_TRUNC,
+       /** Software generated event. */
+       EF_EVENT_TYPE_SW,
+       /** Event queue overflow. */
+       EF_EVENT_TYPE_OFLOW,
+};
+
+#define EF_EVENT_RX_BYTES(e)    ((e).rx.len)
+#define EF_EVENT_RX_Q_ID(e)     ((e).rx.q_id)
+#define EF_EVENT_RX_CONT(e)     ((e).rx.flags & EF_EVENT_FLAG_CONT)
+#define EF_EVENT_RX_SOP(e)      ((e).rx.flags & EF_EVENT_FLAG_SOP)
+#define EF_EVENT_RX_ISCSI_OKAY(e) ((e).rx.flags & EF_EVENT_FLAG_ISCSI_OK)
+#define EF_EVENT_FLAG_SOP       0x1
+#define EF_EVENT_FLAG_CONT      0x2
+#define EF_EVENT_FLAG_ISCSI_OK  0x4
+
+#define EF_EVENT_TX_Q_ID(e)     ((e).tx.q_id)
+
+#define EF_EVENT_RX_DISCARD_Q_ID(e)  ((e).rx_discard.q_id)
+#define EF_EVENT_RX_DISCARD_LEN(e)   ((e).rx_discard.len)
+#define EF_EVENT_RX_DISCARD_TYPE(e)  ((e).rx_discard.subtype)
+enum {
+       EF_EVENT_RX_DISCARD_CSUM_BAD,
+       EF_EVENT_RX_DISCARD_CRC_BAD,
+       EF_EVENT_RX_DISCARD_TRUNC,
+       EF_EVENT_RX_DISCARD_RIGHTS,
+       EF_EVENT_RX_DISCARD_OTHER,
+};
+
+#define EF_EVENT_TX_ERROR_Q_ID(e)    ((e).tx_error.q_id)
+#define EF_EVENT_TX_ERROR_TYPE(e)    ((e).tx_error.subtype)
+enum {
+       EF_EVENT_TX_ERROR_RIGHTS,
+       EF_EVENT_TX_ERROR_OFLOW,
+       EF_EVENT_TX_ERROR_2BIG,
+       EF_EVENT_TX_ERROR_BUS,
+};
+
+#define EF_EVENT_RX_NO_DESC_TRUNC_Q_ID(e)  ((e).rx_no_desc_trunc.q_id)
+
+#define EF_EVENT_SW_DATA_MASK   0xffff
+#define EF_EVENT_SW_DATA(e)     ((e).sw.data)
+
+#define EF_EVENT_FMT            "[ev:%x:%08x:%08x]"
+#define EF_EVENT_PRI_ARG(e)     (unsigned) (e).generic.type,    \
+               (unsigned) (e).generic.ev.u32[1],               \
+               (unsigned) (e).generic.ev.u32[0]
+
+#define EF_GET_HW_EV(e)         ((e).generic.ev)
+#define EF_GET_HW_EV_PTR(e)     (&(e).generic.ev)
+#define EF_GET_HW_EV_U64(e)     ((e).generic.ev.u64[0])
+
+
+/* ***************** */
+
+/*! Used by netif shared state. Must use types of explicit size. */
+typedef struct {
+       uint16_t              rx_last_desc_ptr;   /* for RX duplicates       */
+       uint8_t               bad_sop;            /* bad SOP detected        */
+       uint8_t               frag_num;           /* next fragment #, 0=>SOP */
+} ef_rx_dup_state_t;
+
+
+/* Max number of ports on any SF NIC. */
+#define EFAB_DMAQS_PER_EVQ_MAX 32
+
+typedef struct {
+       ef_eventq_ptr           evq_ptr;
+       int32_t               trashed;
+       ef_rx_dup_state_t     rx_dup_state[EFAB_DMAQS_PER_EVQ_MAX];
+} ef_eventq_state;
+
+
+/*! \i_ef_base [ef_iovec] is similar the standard [struct iovec].  An
+** array of these is used to designate a scatter/gather list of I/O
+** buffers.
+*/
+typedef struct {
+       ef_addr                       iov_base EF_VI_ALIGN(8);
+       unsigned                      iov_len;
+} ef_iovec;
+
+/* Falcon constants */
+#define TX_EV_DESC_PTR_LBN 0
+
+/**********************************************************************
+ * ef_iobufset ********************************************************
+ **********************************************************************/
+
+/*! \i_ef_bufs An [ef_iobufset] is a collection of buffers to be used
+** with the NIC.
+*/
+typedef struct ef_iobufset {
+       unsigned                      magic;
+       unsigned                      bufs_mmap_bytes;
+       unsigned                      bufs_handle;
+       int                           bufs_ptr_off;
+       ef_addr                       bufs_addr;
+       unsigned                      bufs_size; /* size rounded to pow2 */
+       int                           bufs_num;
+       int                           faultonaccess;
+} ef_iobufset;
+
+
+/**********************************************************************
+ * ef_vi **************************************************************
+ **********************************************************************/
+
+enum ef_vi_flags {
+       EF_VI_RX_SCATTER        = 0x1,
+       EF_VI_ISCSI_RX_HDIG     = 0x2,
+       EF_VI_ISCSI_TX_HDIG     = 0x4,
+       EF_VI_ISCSI_RX_DDIG     = 0x8,
+       EF_VI_ISCSI_TX_DDIG     = 0x10,
+       EF_VI_TX_PHYS_ADDR      = 0x20,
+       EF_VI_RX_PHYS_ADDR      = 0x40,
+       EF_VI_TX_IP_CSUM_DIS    = 0x80,
+       EF_VI_TX_TCPUDP_CSUM_DIS= 0x100,
+       EF_VI_TX_TCPUDP_ONLY    = 0x200,
+       /* Flags in range 0xXXXX0000 are for internal use. */
+};
+
+typedef struct {
+       uint32_t  added;
+       uint32_t  removed;
+} ef_vi_txq_state;
+
+typedef struct {
+       uint32_t  added;
+       uint32_t  removed;
+} ef_vi_rxq_state;
+
+typedef struct {
+       uint32_t         mask;
+       void*            doorbell;
+       void*            descriptors;
+       uint16_t*        ids;
+       unsigned         misalign_mask;
+} ef_vi_txq;
+
+typedef struct {
+       uint32_t         mask;
+       void*            doorbell;
+       void*            descriptors;
+       uint16_t*        ids;
+} ef_vi_rxq;
+
+typedef struct {
+       ef_eventq_state  evq;
+       ef_vi_txq_state  txq;
+       ef_vi_rxq_state  rxq;
+       /* Followed by request id fifos. */
+} ef_vi_state;
+
+/*! \i_ef_vi  A virtual interface.
+**
+** An [ef_vi] represents a virtual interface on a specific NIC.  A
+** virtual interface is a collection of an event queue and two DMA queues
+** used to pass Ethernet frames between the transport implementation and
+** the network.
+*/
+typedef struct ef_vi {
+       unsigned                        magic;
+
+       unsigned                      vi_resource_id;
+       unsigned                      vi_resource_handle_hack;
+       unsigned                      vi_i;
+
+       char*                           vi_mem_mmap_ptr;
+       int                           vi_mem_mmap_bytes;
+       char*                           vi_io_mmap_ptr;
+       int                           vi_io_mmap_bytes;
+
+       ef_eventq_state*              evq_state;
+       char*                         evq_base;
+       unsigned                      evq_mask;
+       ef_vi_ioaddr_t                evq_timer_reg;
+
+       ef_vi_txq                     vi_txq;
+       ef_vi_rxq                     vi_rxq;
+       ef_vi_state*                  ep_state;
+       enum ef_vi_flags              vi_flags;
+} ef_vi;
+
+
+enum ef_vi_arch {
+       EF_VI_ARCH_FALCON,
+};
+
+
+struct ef_vi_nic_type {
+       unsigned char  arch;
+       char           variant;
+       unsigned char  revision;
+};
+
+
+/* This structure is opaque to the client & used to pass mapping data
+ * from the resource manager to the ef_vi lib. for ef_vi_init().
+ */
+struct vi_mappings {
+       uint32_t         signature;
+# define VI_MAPPING_VERSION   0x02  /*Byte: Increment me if struct altered*/
+# define VI_MAPPING_SIGNATURE (0xBA1150 + VI_MAPPING_VERSION)
+
+       struct ef_vi_nic_type nic_type;
+
+       int              vi_instance;
+
+       unsigned         evq_bytes;
+       char*            evq_base;
+       ef_vi_ioaddr_t   evq_timer_reg;
+
+       unsigned         rx_queue_capacity;
+       ef_vi_ioaddr_t   rx_dma_ef1;
+       char*            rx_dma_falcon;
+       ef_vi_ioaddr_t   rx_bell;
+
+       unsigned         tx_queue_capacity;
+       ef_vi_ioaddr_t   tx_dma_ef1;
+       char*            tx_dma_falcon;
+       ef_vi_ioaddr_t   tx_bell;
+};
+/* This is used by clients to allocate a suitably sized buffer for the 
+ * resource manager to fill & ef_vi_init() to use. */
+#define VI_MAPPINGS_SIZE (sizeof(struct vi_mappings))
+
+
+/**********************************************************************
+ * ef_config **********************************************************
+ **********************************************************************/
+
+struct ef_config_t {
+       int   log;                    /* debug logging level          */
+};
+
+extern struct ef_config_t  ef_config;
+
+
+/**********************************************************************
+ * ef_vi **************************************************************
+ **********************************************************************/
+
+/* Initialise [data_area] with information required to initialise an ef_vi.
+ * In the following, an unused param should be set to NULL. Note the case
+ * marked (*) of [iobuf_mmap] for falcon/driver; for normal driver this
+ * must be NULL.
+ *
+ * \param  data_area     [in,out] required, must ref at least VI_MAPPINGS_SIZE 
+ *                                bytes
+ * \param  evq_capacity  [in] number of events in event queue.  Specify 0 for
+ *                            no event queue.
+ * \param  rxq_capacity  [in] number of descriptors in RX DMA queue.  Specify
+ *                            0 for no RX queue.
+ * \param  txq_capacity  [in] number of descriptors in TX DMA queue.  Specify
+ *                            0 for no TX queue.
+ * \param  mmap_info     [in] mem-map info for resource
+ * \param  io_mmap       [in] ef1,    required
+ *                            falcon, required
+ * \param  iobuf_mmap    [in] ef1,    UL: unused
+ *                            falcon, UL: required
+ */
+extern void ef_vi_init_mapping_vi(void* data_area, struct ef_vi_nic_type,
+                                  unsigned rxq_capacity,
+                                  unsigned txq_capacity, int instance,
+                                  void* io_mmap, void* iobuf_mmap_rx,
+                                  void* iobuf_mmap_tx, enum ef_vi_flags);
+
+
+extern void ef_vi_init_mapping_evq(void* data_area, struct ef_vi_nic_type,
+                                   int instance, unsigned evq_bytes,
+                                   void* base, void* timer_reg);
+
+ef_vi_inline unsigned ef_vi_resource_id(ef_vi* vi)
+{ 
+       return vi->vi_resource_id; 
+}
+
+ef_vi_inline enum ef_vi_flags ef_vi_flags(ef_vi* vi)
+{ 
+       return vi->vi_flags; 
+}
+
+
+/**********************************************************************
+ * Receive interface **************************************************
+ **********************************************************************/
+
+/*! \i_ef_vi Returns the amount of space in the RX descriptor ring.
+**
+** \return the amount of space in the queue.
+*/
+ef_vi_inline int ef_vi_receive_space(ef_vi* vi) 
+{
+       ef_vi_rxq_state* qs = &vi->ep_state->rxq;
+       return vi->vi_rxq.mask - (qs->added - qs->removed);
+}
+
+
+/*! \i_ef_vi Returns the fill level of the RX descriptor ring.
+**
+** \return the fill level of the queue.
+*/
+ef_vi_inline int ef_vi_receive_fill_level(ef_vi* vi) 
+{
+       ef_vi_rxq_state* qs = &vi->ep_state->rxq;
+       return qs->added - qs->removed;
+}
+
+
+ef_vi_inline int ef_vi_receive_capacity(ef_vi* vi)
+{ 
+       return vi->vi_rxq.mask;
+}
+
+/*! \i_ef_vi  Complete a receive operation.
+**
+** When a receive completion event is received, it should be passed to
+** this function.  The request-id for the buffer that the packet was
+** delivered to is returned.
+**
+** After this function returns, more space may be available in the
+** receive queue.
+*/
+extern ef_request_id ef_vi_receive_done(const ef_vi*, const ef_event*);
+
+/*! \i_ef_vi  Return request ID indicated by a receive event
+ */
+ef_vi_inline ef_request_id ef_vi_receive_request_id(const ef_vi* vi,
+                                                    const ef_event* ef_ev)
+{
+       const ef_vi_qword* ev = EF_GET_HW_EV_PTR(*ef_ev);
+       return ev->u32[0] & vi->vi_rxq.mask;
+}
+  
+
+/*! \i_ef_vi  Form a receive descriptor.
+**
+** If \c initial_rx_bytes is zero use a reception size at least as large
+** as an MTU.
+*/
+extern int ef_vi_receive_init(ef_vi* vi, ef_addr addr, ef_request_id dma_id,
+                              int intial_rx_bytes);
+
+/*! \i_ef_vi  Submit initialised receive descriptors to the NIC. */
+extern void ef_vi_receive_push(ef_vi* vi);
+
+/*! \i_ef_vi  Post a buffer on the receive queue.
+**
+**   \return 0 on success, or -EAGAIN if the receive queue is full
+*/
+extern int ef_vi_receive_post(ef_vi*, ef_addr addr,
+                             ef_request_id dma_id);
+
+/**********************************************************************
+ * Transmit interface *************************************************
+ **********************************************************************/
+
+/*! \i_ef_vi Return the amount of space (in descriptors) in the transmit
+**           queue.
+**
+** \return the amount of space in the queue (in descriptors)
+*/
+ef_vi_inline int ef_vi_transmit_space(ef_vi* vi) 
+{
+       ef_vi_txq_state* qs = &vi->ep_state->txq;
+       return vi->vi_txq.mask - (qs->added - qs->removed);
+}
+
+
+/*! \i_ef_vi Returns the fill level of the TX descriptor ring.
+**
+** \return the fill level of the queue.
+*/
+ef_vi_inline int ef_vi_transmit_fill_level(ef_vi* vi)
+{
+       ef_vi_txq_state* qs = &vi->ep_state->txq;
+       return qs->added - qs->removed;
+}
+
+
+/*! \i_ef_vi Returns the total capacity of the TX descriptor ring.
+**
+** \return the capacity of the queue.
+*/
+ef_vi_inline int ef_vi_transmit_capacity(ef_vi* vi)
+{ 
+       return vi->vi_txq.mask;
+}
+
+
+/*! \i_ef_vi  Transmit a packet.
+**
+**   \param bytes must be greater than ETH_ZLEN.
+**   \return -EAGAIN if the transmit queue is full, or 0 on success
+*/
+extern int ef_vi_transmit(ef_vi*, ef_addr, int bytes, ef_request_id dma_id);
+
+/*! \i_ef_vi  Transmit a packet using a gather list.
+**
+**   \param iov_len must be greater than zero
+**   \param iov the first must be non-zero in length (but others need not)
+**
+**   \return -EAGAIN if the queue is full, or 0 on success
+*/
+extern int ef_vi_transmitv(ef_vi*, const ef_iovec* iov, int iov_len,
+                           ef_request_id dma_id);
+
+/*! \i_ef_vi  Initialise a DMA request.
+**
+** \return -EAGAIN if the queue is full, or 0 on success
+*/
+extern int ef_vi_transmit_init(ef_vi*, ef_addr, int bytes,
+                               ef_request_id dma_id);
+
+/*! \i_ef_vi  Initialise a DMA request.
+**
+** \return -EAGAIN if the queue is full, or 0 on success
+*/
+extern int ef_vi_transmitv_init(ef_vi*, const ef_iovec*, int iov_len,
+                                ef_request_id dma_id);
+
+/*! \i_ef_vi  Submit DMA requests to the NIC.
+**
+** The DMA requests must have been initialised using
+** ef_vi_transmit_init() or ef_vi_transmitv_init().
+*/
+extern void ef_vi_transmit_push(ef_vi*);
+
+
+/*! \i_ef_vi Maximum number of transmit completions per transmit event. */
+#define EF_VI_TRANSMIT_BATCH  64
+
+/*! \i_ef_vi Determine the set of [ef_request_id]s for each DMA request
+**           which has been completed by a given transmit completion
+**           event.
+**
+** \param ids must point to an array of length EF_VI_TRANSMIT_BATCH
+** \return the number of valid [ef_request_id]s (can be zero)
+*/
+extern int ef_vi_transmit_unbundle(ef_vi* ep, const ef_event*,
+                                   ef_request_id* ids);
+
+
+/*! \i_ef_event Returns true if ef_eventq_poll() will return event(s). */
+extern int ef_eventq_has_event(ef_vi* vi);
+
+/*! \i_ef_event Returns true if there are quite a few events in the event
+** queue.
+**
+** This looks ahead in the event queue, so has the property that it will
+** not ping-pong a cache-line when it is called concurrently with events
+** being delivered.
+*/
+extern int ef_eventq_has_many_events(ef_vi* evq, int look_ahead);
+
+/*! Type of function to handle unknown events arriving on event queue
+**  Return CI_TRUE iff the event has been handled.
+*/
+typedef int/*bool*/ ef_event_handler_fn(void* priv, ef_vi* evq, ef_event* ev);
+
+/*! Standard poll exception routine */
+extern int/*bool*/ ef_eventq_poll_exception(void* priv, ef_vi* evq,
+                                            ef_event* ev);
+
+/*! \i_ef_event  Retrieve events from the event queue, handle RX/TX events
+**  and pass any others to an exception handler function
+**
+**   \return The number of events retrieved.
+*/
+extern int ef_eventq_poll_evs(ef_vi* evq, ef_event* evs, int evs_len,
+                              ef_event_handler_fn *exception, void *expt_priv);
+
+/*! \i_ef_event  Retrieve events from the event queue.
+**
+**   \return The number of events retrieved.
+*/
+ef_vi_inline int ef_eventq_poll(ef_vi* evq, ef_event* evs, int evs_len)
+{
+       return ef_eventq_poll_evs(evq, evs, evs_len,
+                            &ef_eventq_poll_exception, (void*)0);
+}
+
+/*! \i_ef_event Returns the capacity of an event queue. */
+ef_vi_inline int ef_eventq_capacity(ef_vi* vi) 
+{
+       return (vi->evq_mask + 1u) / sizeof(ef_hw_event);
+}
+
+/* Returns the instance ID of [vi] */
+ef_vi_inline unsigned ef_vi_instance(ef_vi* vi)
+{ return vi->vi_i; }
+
+
+/**********************************************************************
+ * Initialisation *****************************************************
+ **********************************************************************/
+
+/*! Return size of state buffer of an initialised VI. */
+extern int ef_vi_state_bytes(ef_vi*);
+
+/*! Return size of buffer needed for VI state given sizes of RX and TX
+** DMA queues.  Queue sizes must be legal sizes (power of 2), or 0 (no
+** queue).
+*/
+extern int ef_vi_calc_state_bytes(int rxq_size, int txq_size);
+
+/*! Initialise [ef_vi] from the provided resources. [vvis] must have been
+** created by ef_make_vi_data() & remains owned by the caller.
+*/
+extern void ef_vi_init(ef_vi*, void* vi_info, ef_vi_state* state,
+                       ef_eventq_state* evq_state, enum ef_vi_flags);
+
+extern void ef_vi_state_init(ef_vi*);
+extern void ef_eventq_state_init(ef_vi*);
+
+/*! Convert an efhw device arch to ef_vi_arch, or returns -1 if not
+** recognised.
+*/
+extern int  ef_vi_arch_from_efhw_arch(int efhw_arch);
+
+
+#endif /* __EFAB_EF_VI_H__ */
diff -r e4dd072db259 -r 651fc2abdd5d drivers/xen/sfc_netfront/falcon_event.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/xen/sfc_netfront/falcon_event.c   Mon Feb 18 10:30:33 2008 +0000
@@ -0,0 +1,346 @@
+/****************************************************************************
+ * Copyright 2002-2005: Level 5 Networks Inc.
+ * Copyright 2005-2008: Solarflare Communications Inc,
+ *                      9501 Jeronimo Road, Suite 250,
+ *                      Irvine, CA 92618, USA
+ *
+ * Maintained by Solarflare Communications
+ *  <linux-xen-drivers@xxxxxxxxxxxxxx>
+ *  <onload-dev@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ ****************************************************************************
+ */
+
+/*
+ * \author  djr
+ *  \brief  Routine to poll event queues.
+ *   \date  2003/03/04
+ */
+
+/*! \cidoxg_lib_ef */
+#include "ef_vi_internal.h"
+
+/* Be worried about this on byteswapped machines */
+/* Due to crazy chipsets, we see the event words being written in
+** arbitrary order (bug4539).  So test for presence of event must ensure
+** that both halves have changed from the null.
+*/
+# define EF_VI_IS_EVENT(evp)                                           \
+       ( (((evp)->opaque.a != (uint32_t)-1) &&                         \
+          ((evp)->opaque.b != (uint32_t)-1)) )
+
+
+#ifdef NDEBUG
+# define IS_DEBUG 0
+#else
+# define IS_DEBUG 1
+#endif
+
+
+/*! Check for RX events with inconsistent SOP/CONT
+**
+** Returns true if this event should be discarded
+*/
+ef_vi_inline int ef_eventq_is_rx_sop_cont_bad_efab(ef_vi* vi,
+                                                  const ef_vi_qword* ev)
+{
+       ef_rx_dup_state_t* rx_dup_state;
+       uint8_t* bad_sop;
+
+       unsigned label = QWORD_GET_U(RX_EV_Q_LABEL, *ev);
+       unsigned sop   = QWORD_TEST_BIT(RX_SOP, *ev);
+  
+       ef_assert(vi);
+       ef_assert_lt(label, EFAB_DMAQS_PER_EVQ_MAX);
+
+       rx_dup_state = &vi->evq_state->rx_dup_state[label];
+       bad_sop = &rx_dup_state->bad_sop;
+
+       if( ! ((vi->vi_flags & EF_VI_BUG5692_WORKAROUND) || IS_DEBUG) ) {
+               *bad_sop = (*bad_sop && !sop);
+       }
+       else {
+               unsigned cont  = QWORD_TEST_BIT(RX_JUMBO_CONT, *ev);
+               uint8_t *frag_num = &rx_dup_state->frag_num;
+
+               /* bad_sop should latch till the next sop */
+               *bad_sop = (*bad_sop && !sop) || ( !!sop != (*frag_num==0) );
+
+               /* we do not check the number of bytes relative to the
+                * fragment number and size of the user rx buffer here
+                * because we don't know the size of the user rx
+                * buffer - we probably should perform this check in
+                * the nearest code calling this though.
+                */
+               *frag_num = cont ? (*frag_num + 1) : 0;
+       }
+
+       return *bad_sop;
+}
+
+
+ef_vi_inline int falcon_rx_check_dup(ef_vi* evq, ef_event* ev_out,
+                                    const ef_vi_qword* ev)
+{
+       unsigned q_id = QWORD_GET_U(RX_EV_Q_LABEL, *ev);
+       unsigned desc_ptr = QWORD_GET_U(RX_EV_DESC_PTR, *ev);
+       ef_rx_dup_state_t* rx_dup_state = &evq->evq_state->rx_dup_state[q_id];
+
+       if(likely( desc_ptr != rx_dup_state->rx_last_desc_ptr )) {
+               rx_dup_state->rx_last_desc_ptr = desc_ptr;
+               return 0;
+       }
+
+       rx_dup_state->rx_last_desc_ptr = desc_ptr;
+       rx_dup_state->bad_sop = 1;
+#ifndef NDEBUG
+       rx_dup_state->frag_num = 0;
+#endif
+       BUG_ON(!QWORD_TEST_BIT(RX_EV_FRM_TRUNC, *ev));
+       BUG_ON( QWORD_TEST_BIT(RX_EV_PKT_OK, *ev));
+       BUG_ON(!QWORD_GET_U(RX_EV_BYTE_CNT, *ev) == 0);
+       ev_out->rx_no_desc_trunc.type = EF_EVENT_TYPE_RX_NO_DESC_TRUNC;
+       ev_out->rx_no_desc_trunc.q_id = q_id;
+       return 1;
+}
+
+
+ef_vi_inline void falcon_rx_event(ef_event* ev_out, const ef_vi_qword* ev)
+{
+       if(likely( QWORD_TEST_BIT(RX_EV_PKT_OK, *ev) )) {
+               ev_out->rx.type = EF_EVENT_TYPE_RX;
+               ev_out->rx.q_id = QWORD_GET_U(RX_EV_Q_LABEL, *ev);
+               ev_out->rx.len  = QWORD_GET_U(RX_EV_BYTE_CNT, *ev);
+               if( QWORD_TEST_BIT(RX_SOP, *ev) )
+                       ev_out->rx.flags = EF_EVENT_FLAG_SOP;
+               else
+                       ev_out->rx.flags = 0;
+               if( QWORD_TEST_BIT(RX_JUMBO_CONT, *ev) )
+                       ev_out->rx.flags |= EF_EVENT_FLAG_CONT;
+               if( QWORD_TEST_BIT(RX_iSCSI_PKT_OK, *ev) )
+                       ev_out->rx.flags |= EF_EVENT_FLAG_ISCSI_OK;
+       }
+       else {
+               ev_out->rx_discard.type = EF_EVENT_TYPE_RX_DISCARD;
+               ev_out->rx_discard.q_id = QWORD_GET_U(RX_EV_Q_LABEL, *ev);
+               ev_out->rx_discard.len  = QWORD_GET_U(RX_EV_BYTE_CNT, *ev);
+#if 1  /* hack for ptloop compatability: ?? TODO purge */
+               if( QWORD_TEST_BIT(RX_SOP, *ev) )
+                       ev_out->rx_discard.flags = EF_EVENT_FLAG_SOP;
+               else
+                       ev_out->rx_discard.flags = 0;
+               if( QWORD_TEST_BIT(RX_JUMBO_CONT, *ev) )
+                       ev_out->rx_discard.flags |= EF_EVENT_FLAG_CONT;
+               if( QWORD_TEST_BIT(RX_iSCSI_PKT_OK, *ev) )
+                       ev_out->rx_discard.flags |= EF_EVENT_FLAG_ISCSI_OK;
+#endif
+               /* Order matters here: more fundamental errors first. */
+               if( QWORD_TEST_BIT(RX_EV_BUF_OWNER_ID_ERR, *ev) )
+                       ev_out->rx_discard.subtype = 
+                               EF_EVENT_RX_DISCARD_RIGHTS;
+               else if( QWORD_TEST_BIT(RX_EV_FRM_TRUNC, *ev) )
+                       ev_out->rx_discard.subtype = 
+                               EF_EVENT_RX_DISCARD_TRUNC;
+               else if( QWORD_TEST_BIT(RX_EV_ETH_CRC_ERR, *ev) )
+                       ev_out->rx_discard.subtype = 
+                               EF_EVENT_RX_DISCARD_CRC_BAD;
+               else if( QWORD_TEST_BIT(RX_EV_IP_HDR_CHKSUM_ERR, *ev) )
+                       ev_out->rx_discard.subtype = 
+                               EF_EVENT_RX_DISCARD_CSUM_BAD;
+               else if( QWORD_TEST_BIT(RX_EV_TCP_UDP_CHKSUM_ERR, *ev) )
+                       ev_out->rx_discard.subtype = 
+                               EF_EVENT_RX_DISCARD_CSUM_BAD;
+               else
+                       ev_out->rx_discard.subtype = 
+                               EF_EVENT_RX_DISCARD_OTHER;
+       }
+}
+
+
+ef_vi_inline void falcon_tx_event(ef_event* ev_out, const ef_vi_qword* ev)
+{
+       /* Danger danger!  No matter what we ask for wrt batching, we
+       ** will get a batched event every 16 descriptors, and we also
+       ** get dma-queue-empty events.  i.e. Duplicates are expected.
+       **
+       ** In addition, if it's been requested in the descriptor, we
+       ** get an event per descriptor.  (We don't currently request
+       ** this).
+       */
+       if(likely( QWORD_TEST_BIT(TX_EV_COMP, *ev) )) {
+               ev_out->tx.type = EF_EVENT_TYPE_TX;
+               ev_out->tx.q_id = QWORD_GET_U(TX_EV_Q_LABEL, *ev);
+       }
+       else {
+               ev_out->tx_error.type = EF_EVENT_TYPE_TX_ERROR;
+               ev_out->tx_error.q_id = QWORD_GET_U(TX_EV_Q_LABEL, *ev);
+               if(likely( QWORD_TEST_BIT(TX_EV_BUF_OWNER_ID_ERR, *ev) ))
+                       ev_out->tx_error.subtype = EF_EVENT_TX_ERROR_RIGHTS;
+               else if(likely( QWORD_TEST_BIT(TX_EV_WQ_FF_FULL, *ev) ))
+                       ev_out->tx_error.subtype = EF_EVENT_TX_ERROR_OFLOW;
+               else if(likely( QWORD_TEST_BIT(TX_EV_PKT_TOO_BIG, *ev) ))
+                       ev_out->tx_error.subtype = EF_EVENT_TX_ERROR_2BIG;
+               else if(likely( QWORD_TEST_BIT(TX_EV_PKT_ERR, *ev) ))
+                       ev_out->tx_error.subtype = EF_EVENT_TX_ERROR_BUS;
+       }
+}
+
+
+static void mark_bad(ef_event* ev)
+{
+       ev->generic.ev.u64[0] &=~ ((uint64_t) 1u << RX_EV_PKT_OK_LBN);
+}
+
+
+int ef_eventq_poll_evs(ef_vi* evq, ef_event* evs, int evs_len,
+                      ef_event_handler_fn *exception, void *expt_priv)
+{
+       int evs_len_orig = evs_len;
+
+       EF_VI_CHECK_EVENT_Q(evq);
+       ef_assert(evs);
+       ef_assert_gt(evs_len, 0);
+
+       if(unlikely( EF_VI_IS_EVENT(EF_VI_EVENT_PTR(evq, 1)) ))
+               goto overflow;
+
+       do {
+               { /* Read the event out of the ring, then fiddle with
+                  * copied version.  Reason is that the ring is
+                  * likely to get pushed out of cache by another
+                  * event being delivered by hardware. */
+                       ef_vi_event* ev = EF_VI_EVENT_PTR(evq, 0);
+                       if( ! EF_VI_IS_EVENT(ev) )
+                               break;
+                       evs->generic.ev.u64[0] = cpu_to_le64 (ev->u64);
+                       evq->evq_state->evq_ptr += sizeof(ef_vi_event);
+                       ev->u64 = (uint64_t)(int64_t) -1;
+               }
+
+               /* Ugly: Exploit the fact that event code lies in top
+                * bits of event. */
+               ef_assert_ge(EV_CODE_LBN, 32u);
+               switch( evs->generic.ev.u32[1] >> (EV_CODE_LBN - 32u) ) {
+               case RX_IP_EV_DECODE:
+                       /* Look for duplicate desc_ptr: it signals
+                        * that a jumbo frame was truncated because we
+                        * ran out of descriptors. */
+                       if(unlikely( falcon_rx_check_dup
+                                          (evq, evs, &evs->generic.ev) )) {
+                               --evs_len;
+                               ++evs;
+                               break;
+                       }
+                       else {
+                               /* Cope with FalconA1 bugs where RX
+                                * gives inconsistent RX events Mark
+                                * events as bad until SOP becomes
+                                * consistent again
+                                * ef_eventq_is_rx_sop_cont_bad() has
+                                * side effects - order is important
+                                */
+                               if(unlikely
+                                  (ef_eventq_is_rx_sop_cont_bad_efab
+                                   (evq, &evs->generic.ev) )) {
+                                       mark_bad(evs);
+                               }
+                       }
+                       falcon_rx_event(evs, &evs->generic.ev);
+                       --evs_len;      
+                       ++evs;
+                       break;
+
+               case TX_IP_EV_DECODE:
+                       falcon_tx_event(evs, &evs->generic.ev);
+                       --evs_len;
+                       ++evs;
+                       break;
+
+               default:
+                       break;
+               }
+       } while( evs_len );
+
+       return evs_len_orig - evs_len;
+
+
+ overflow:
+       evs->generic.type = EF_EVENT_TYPE_OFLOW;
+       evs->generic.ev.u64[0] = (uint64_t)((int64_t)-1);
+       return 1;
+}
+
+
+int/*bool*/ ef_eventq_poll_exception(void* priv, ef_vi* evq, ef_event* ev)
+{
+       int /*bool*/ handled = 0;
+  
+       switch( ev->generic.ev.u32[1] >> (EV_CODE_LBN - 32u) ) {
+       case DRIVER_EV_DECODE:
+               if( QWORD_GET_U(DRIVER_EV_SUB_CODE, ev->generic.ev) ==
+                   EVQ_INIT_DONE_EV_DECODE )
+                       /* EVQ initialised event: ignore. */
+                       handled = 1;
+               break;
+       }
+       return handled;
+}
+
+
+void ef_eventq_iterate(ef_vi* vi,
+                      void (*fn)(void* arg, ef_vi*, int rel_pos,
+                                 int abs_pos, void* event),
+                      void* arg, int stop_at_end)
+{
+       int i, size_evs = (vi->evq_mask + 1) / sizeof(ef_vi_event);
+
+       for( i = 0; i < size_evs; ++i ) {
+               ef_vi_event* e = EF_VI_EVENT_PTR(vi, -i);
+               if( EF_VI_IS_EVENT(e) )
+                       fn(arg, vi, i, 
+                          EF_VI_EVENT_OFFSET(vi, -i) / sizeof(ef_vi_event),
+                          e);
+               else if( stop_at_end )
+                       break;
+       }
+}
+
+
+int ef_eventq_has_event(ef_vi* vi)
+{
+       return EF_VI_IS_EVENT(EF_VI_EVENT_PTR(vi, 0));
+}
+
+
+int ef_eventq_has_many_events(ef_vi* vi, int look_ahead)
+{
+       ef_assert_ge(look_ahead, 0);
+       return EF_VI_IS_EVENT(EF_VI_EVENT_PTR(vi, -look_ahead));
+}
+
+
+int ef_eventq_has_rx_event(ef_vi* vi)
+{
+       ef_vi_event* ev;
+       int i, n_evs = 0;
+
+       for( i = 0;  EF_VI_IS_EVENT(EF_VI_EVENT_PTR(vi, i)); --i ) {
+               ev = EF_VI_EVENT_PTR(vi, i);
+               if( EFVI_FALCON_EVENT_CODE(ev) == EF_EVENT_TYPE_RX )  n_evs++;
+       }
+       return n_evs;
+}
+
+/*! \cidoxg_end */
diff -r e4dd072db259 -r 651fc2abdd5d drivers/xen/sfc_netfront/falcon_vi.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/xen/sfc_netfront/falcon_vi.c      Mon Feb 18 10:30:33 2008 +0000
@@ -0,0 +1,465 @@
+/****************************************************************************
+ * Copyright 2002-2005: Level 5 Networks Inc.
+ * Copyright 2005-2008: Solarflare Communications Inc,
+ *                      9501 Jeronimo Road, Suite 250,
+ *                      Irvine, CA 92618, USA
+ *
+ * Maintained by Solarflare Communications
+ *  <linux-xen-drivers@xxxxxxxxxxxxxx>
+ *  <onload-dev@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ ****************************************************************************
+ */
+
+/*
+ * \author  djr, stg
+ *  \brief  Falcon-specific VI
+ *   \date  2006/11/30
+ */
+
+#include "ef_vi_internal.h"
+
+
+#define EFVI_FALCON_DMA_TX_FRAG                1
+
+
+/* TX descriptor for both physical and virtual packet transfers */
+typedef union {
+       uint32_t        dword[2];
+} ef_vi_falcon_dma_tx_buf_desc;
+typedef ef_vi_falcon_dma_tx_buf_desc ef_vi_falcon_dma_tx_phys_desc;
+
+
+/* RX descriptor for physical addressed transfers */
+typedef union {
+       uint32_t        dword[2];
+} ef_vi_falcon_dma_rx_phys_desc;
+
+
+/* RX descriptor for virtual packet transfers */
+typedef struct {
+       uint32_t        dword[1];
+} ef_vi_falcon_dma_rx_buf_desc;
+
+/* Buffer table index */
+typedef uint32_t               ef_vi_buffer_addr_t;
+
+ef_vi_inline int64_t dma_addr_to_u46(int64_t src_dma_addr)
+{
+       return (src_dma_addr & __FALCON_MASK(46, int64_t));
+}
+
+/*! Setup a physical address based descriptor with a specified length */
+ef_vi_inline void
+__falcon_dma_rx_calc_ip_phys(ef_vi_dma_addr_t dest_pa, 
+                            ef_vi_falcon_dma_rx_phys_desc *desc,
+                            int bytes)
+{
+       int region = 0;                 /* TODO fixme */
+       int64_t dest    = dma_addr_to_u46(dest_pa); /* lower 46 bits */
+
+       DWCHCK(__DW2(RX_KER_BUF_SIZE_LBN),  RX_KER_BUF_SIZE_WIDTH);
+       DWCHCK(__DW2(RX_KER_BUF_REGION_LBN),RX_KER_BUF_REGION_WIDTH);
+
+       LWCHK(RX_KER_BUF_ADR_LBN, RX_KER_BUF_ADR_WIDTH);
+
+       RANGECHCK(bytes,  RX_KER_BUF_SIZE_WIDTH);
+       RANGECHCK(region, RX_KER_BUF_REGION_WIDTH);
+
+       ef_assert(desc);
+
+       desc->dword[1] = ((bytes << __DW2(RX_KER_BUF_SIZE_LBN)) |
+                         (region << __DW2(RX_KER_BUF_REGION_LBN)) |
+                         (HIGH(dest,
+                               RX_KER_BUF_ADR_LBN, 
+                               RX_KER_BUF_ADR_WIDTH)));
+
+       desc->dword[0] = LOW(dest, 
+                            RX_KER_BUF_ADR_LBN, 
+                            RX_KER_BUF_ADR_WIDTH);
+}
+
+/*! Setup a virtual buffer descriptor for an IPMODE transfer */
+ef_vi_inline void
+__falcon_dma_tx_calc_ip_buf(unsigned buf_id, unsigned buf_ofs, unsigned bytes,
+                           int port, int frag, 
+                           ef_vi_falcon_dma_tx_buf_desc *desc)
+{
+       DWCHCK(__DW2(TX_USR_PORT_LBN), TX_USR_PORT_WIDTH);
+       DWCHCK(__DW2(TX_USR_CONT_LBN), TX_USR_CONT_WIDTH);
+       DWCHCK(__DW2(TX_USR_BYTE_CNT_LBN), TX_USR_BYTE_CNT_WIDTH);
+       LWCHK(RX_KER_BUF_ADR_LBN, RX_KER_BUF_ADR_WIDTH);
+       DWCHCK(TX_USR_BYTE_OFS_LBN, TX_USR_BYTE_OFS_WIDTH);
+
+       RANGECHCK(bytes,   TX_USR_BYTE_CNT_WIDTH);
+       RANGECHCK(port,    TX_USR_PORT_WIDTH);
+       RANGECHCK(frag,    TX_USR_CONT_WIDTH);
+       RANGECHCK(buf_id,  TX_USR_BUF_ID_WIDTH);
+       RANGECHCK(buf_ofs, TX_USR_BYTE_OFS_WIDTH);
+
+       ef_assert(desc);
+
+       desc->dword[1] = ((port   <<  __DW2(TX_USR_PORT_LBN))      | 
+                         (frag   <<  __DW2(TX_USR_CONT_LBN))      | 
+                         (bytes  <<  __DW2(TX_USR_BYTE_CNT_LBN))  |
+                         (HIGH(buf_id, 
+                               TX_USR_BUF_ID_LBN,
+                               TX_USR_BUF_ID_WIDTH)));
+
+       desc->dword[0] =  ((LOW(buf_id,
+                               TX_USR_BUF_ID_LBN,
+                               (TX_USR_BUF_ID_WIDTH))) |
+                          (buf_ofs << TX_USR_BYTE_OFS_LBN));
+}
+
+ef_vi_inline void
+falcon_dma_tx_calc_ip_buf_4k(unsigned buf_vaddr, unsigned bytes,
+                            int port, int frag, 
+                            ef_vi_falcon_dma_tx_buf_desc *desc)
+{
+       /* TODO FIXME [buf_vaddr] consists of the buffer index in the
+       ** high bits, and an offset in the low bits. Assumptions
+       ** permate the code that these can be rolled into one 32bit
+       ** value, so this is currently preserved for Falcon. But we
+       ** should change to support 8K pages
+       */
+       unsigned buf_id =  EFVI_FALCON_BUFFER_4K_PAGE(buf_vaddr);
+       unsigned buf_ofs = EFVI_FALCON_BUFFER_4K_OFF(buf_vaddr);
+
+       __falcon_dma_tx_calc_ip_buf( buf_id, buf_ofs, bytes, port, frag, desc);
+}
+
+ef_vi_inline void
+falcon_dma_tx_calc_ip_buf(unsigned buf_vaddr, unsigned bytes, int port, 
+                         int frag, ef_vi_falcon_dma_tx_buf_desc *desc)
+{
+       falcon_dma_tx_calc_ip_buf_4k(buf_vaddr, bytes, port, frag, desc);
+}
+
+/*! Setup a virtual buffer based descriptor */
+ef_vi_inline void
+__falcon_dma_rx_calc_ip_buf(unsigned buf_id, unsigned buf_ofs, 
+                           ef_vi_falcon_dma_rx_buf_desc *desc)
+{ 
+       /* check alignment of buffer offset and pack */
+       ef_assert((buf_ofs & 0x1) == 0);
+
+       buf_ofs >>= 1;
+
+       DWCHCK(RX_USR_2BYTE_OFS_LBN, RX_USR_2BYTE_OFS_WIDTH);
+       DWCHCK(RX_USR_BUF_ID_LBN, RX_USR_BUF_ID_WIDTH);
+
+       RANGECHCK(buf_ofs, RX_USR_2BYTE_OFS_WIDTH);
+       RANGECHCK(buf_id,  RX_USR_BUF_ID_WIDTH);
+
+       ef_assert(desc);
+
+       desc->dword[0] = ((buf_ofs << RX_USR_2BYTE_OFS_LBN) | 
+                         (buf_id  << RX_USR_BUF_ID_LBN));
+}
+
+ef_vi_inline void
+falcon_dma_rx_calc_ip_buf_4k(unsigned buf_vaddr, 
+                            ef_vi_falcon_dma_rx_buf_desc *desc)
+{ 
+       /* TODO FIXME [buf_vaddr] consists of the buffer index in the
+       ** high bits, and an offset in the low bits. Assumptions
+       ** permeate the code that these can be rolled into one 32bit
+       ** value, so this is currently preserved for Falcon. But we
+       ** should change to support 8K pages
+       */
+       unsigned buf_id =  EFVI_FALCON_BUFFER_4K_PAGE(buf_vaddr);
+       unsigned buf_ofs = EFVI_FALCON_BUFFER_4K_OFF(buf_vaddr);
+
+       __falcon_dma_rx_calc_ip_buf(buf_id, buf_ofs, desc);
+}
+
+ef_vi_inline void
+falcon_dma_rx_calc_ip_buf(unsigned buf_vaddr, 
+                         ef_vi_falcon_dma_rx_buf_desc *desc)
+{ 
+       falcon_dma_rx_calc_ip_buf_4k(buf_vaddr, desc);
+}
+
+
+ef_vi_inline ef_vi_dma_addr_t ef_physaddr(ef_addr efaddr)
+{
+       return (ef_vi_dma_addr_t) efaddr;
+}
+
+
+/*! Convert between an ef_addr and a buffer table index
+**  Assert that this was not a physical address
+*/
+ef_vi_inline ef_vi_buffer_addr_t ef_bufaddr(ef_addr efaddr)
+{
+       ef_assert(efaddr < ((uint64_t)1 << 32) );
+
+       return (ef_vi_buffer_addr_t) efaddr;
+}
+
+
+/*! Setup an physical address based descriptor for an IPMODE transfer */
+ef_vi_inline void
+falcon_dma_tx_calc_ip_phys(ef_vi_dma_addr_t src_dma_addr, unsigned bytes, 
+                          int port, int frag,
+                          ef_vi_falcon_dma_tx_phys_desc *desc)
+{
+
+       int region = 0; /* FIXME */
+       int64_t src    = dma_addr_to_u46(src_dma_addr); /* lower 46 bits */
+
+       DWCHCK(__DW2(TX_KER_PORT_LBN),      TX_KER_PORT_WIDTH);
+       DWCHCK(__DW2(TX_KER_CONT_LBN),      TX_KER_CONT_WIDTH);
+       DWCHCK(__DW2(TX_KER_BYTE_CNT_LBN),  TX_KER_BYTE_CNT_WIDTH);
+       DWCHCK(__DW2(TX_KER_BUF_REGION_LBN),TX_KER_BUF_REGION_WIDTH);
+
+       LWCHK(TX_KER_BUF_ADR_LBN, TX_KER_BUF_ADR_WIDTH);
+
+       RANGECHCK(port,   TX_KER_PORT_WIDTH);
+       RANGECHCK(frag,   TX_KER_CONT_WIDTH);
+       RANGECHCK(bytes,  TX_KER_BYTE_CNT_WIDTH);
+       RANGECHCK(region, TX_KER_BUF_REGION_WIDTH);
+
+       desc->dword[1] = ((port   <<  __DW2(TX_KER_PORT_LBN))      | 
+                         (frag   <<  __DW2(TX_KER_CONT_LBN))      | 
+                         (bytes  <<  __DW2(TX_KER_BYTE_CNT_LBN))  | 
+                         (region << __DW2(TX_KER_BUF_REGION_LBN)) |
+                         (HIGH(src,
+                               TX_KER_BUF_ADR_LBN, 
+                               TX_KER_BUF_ADR_WIDTH)));
+
+       ef_assert_equal(TX_KER_BUF_ADR_LBN, 0);
+       desc->dword[0] = (uint32_t) src_dma_addr;
+}
+
+
+void falcon_vi_init(ef_vi* vi, void* vvis)
+{
+       struct vi_mappings *vm = (struct vi_mappings*)vvis;
+       uint16_t* ids;
+
+       ef_assert(vi);
+       ef_assert(vvis);
+       ef_assert_equal(vm->signature, VI_MAPPING_SIGNATURE);
+       ef_assert_equal(vm->nic_type.arch, EF_VI_ARCH_FALCON);
+
+       /* Initialise masks to zero, so that ef_vi_state_init() will
+       ** not do any harm when we don't have DMA queues. */
+       vi->vi_rxq.mask = vi->vi_txq.mask = 0;
+
+       /* Used for BUG5391_WORKAROUND. */
+       vi->vi_txq.misalign_mask = 0;
+
+       /* Initialise doorbell addresses to a distinctive small value
+       ** which will cause a segfault, to trap doorbell pushes to VIs
+       ** without DMA queues. */
+       vi->vi_rxq.doorbell = vi->vi_txq.doorbell = (ef_vi_ioaddr_t)0xdb;
+
+       ids = (uint16_t*) (vi->ep_state + 1);
+
+       if( vm->tx_queue_capacity ) {
+               vi->vi_txq.mask = vm->tx_queue_capacity - 1;
+               vi->vi_txq.doorbell = vm->tx_bell + 12;
+               vi->vi_txq.descriptors = vm->tx_dma_falcon;
+               vi->vi_txq.ids = ids;
+               ids += vi->vi_txq.mask + 1;
+               /* Check that the id fifo fits in the space allocated. */
+               ef_assert_le((char*) (vi->vi_txq.ids + vm->tx_queue_capacity),
+                            (char*) vi->ep_state
+                            + ef_vi_calc_state_bytes(vm->rx_queue_capacity,
+                                                     vm->tx_queue_capacity));
+       }
+       if( vm->rx_queue_capacity ) {
+               vi->vi_rxq.mask = vm->rx_queue_capacity - 1;
+               vi->vi_rxq.doorbell = vm->rx_bell + 12;
+               vi->vi_rxq.descriptors = vm->rx_dma_falcon;
+               vi->vi_rxq.ids = ids;
+               /* Check that the id fifo fits in the space allocated. */
+               ef_assert_le((char*) (vi->vi_rxq.ids + vm->rx_queue_capacity),
+                            (char*) vi->ep_state
+                            + ef_vi_calc_state_bytes(vm->rx_queue_capacity,
+                                                     vm->tx_queue_capacity));
+       }
+
+       if( vm->nic_type.variant == 'A' ) {
+               vi->vi_txq.misalign_mask = 15;    /* BUG5391_WORKAROUND */
+               vi->vi_flags |= EF_VI_BUG5692_WORKAROUND;
+       }
+}
+
+
+int ef_vi_transmitv_init(ef_vi* vi, const ef_iovec* iov, int iov_len,
+                        ef_request_id dma_id)
+{
+       ef_vi_txq* q = &vi->vi_txq;
+       ef_vi_txq_state* qs = &vi->ep_state->txq;
+       ef_vi_falcon_dma_tx_buf_desc* dp;
+       unsigned len, dma_len, di;
+       unsigned added_save = qs->added;
+       ef_addr dma_addr;
+       unsigned last_len = 0;
+
+       ef_assert(iov_len > 0);
+       ef_assert(iov);
+       ef_assert_equal((dma_id & EF_REQUEST_ID_MASK), dma_id);
+       ef_assert_nequal(dma_id, 0xffff);
+
+       dma_addr = iov->iov_base;
+       len = iov->iov_len;
+
+       if( vi->vi_flags & EF_VI_ISCSI_TX_DDIG ) {
+               /* Last 4 bytes of placeholder for digest must be
+                * removed for h/w */
+               ef_assert(len > 4);
+               last_len = iov[iov_len - 1].iov_len;
+               if( last_len <= 4 ) {
+                       ef_assert(iov_len > 1);
+                       --iov_len;
+                       last_len = iov[iov_len - 1].iov_len - (4 - last_len);
+               }
+               else {
+                       last_len = iov[iov_len - 1].iov_len - 4;
+               }
+               if( iov_len == 1 )
+                       len = last_len;
+       }
+
+       while( 1 ) {
+               if( qs->added - qs->removed >= q->mask ) {
+                       qs->added = added_save;
+                       return -EAGAIN;
+               }
+
+               dma_len = (~((unsigned) dma_addr) & 0xfff) + 1;
+               if( dma_len > len )  dma_len = len;
+               { /* BUG5391_WORKAROUND */
+                       unsigned misalign = 
+                               (unsigned) dma_addr & q->misalign_mask;
+                       if( misalign && dma_len + misalign > 512 )
+                               dma_len = 512 - misalign;
+               }
+
+               di = qs->added++ & q->mask;
+               dp = (ef_vi_falcon_dma_tx_buf_desc*) q->descriptors + di;
+               if( vi->vi_flags & EF_VI_TX_PHYS_ADDR )
+                       falcon_dma_tx_calc_ip_phys
+                               (ef_physaddr(dma_addr), dma_len, /*port*/ 0,
+                                (iov_len == 1 && dma_len == len) ? 0 :
+                                EFVI_FALCON_DMA_TX_FRAG, dp);
+               else
+                       falcon_dma_tx_calc_ip_buf
+                               (ef_bufaddr(dma_addr), dma_len, /*port*/ 0,
+                                (iov_len == 1 && dma_len == len) ? 0 :
+                                EFVI_FALCON_DMA_TX_FRAG, dp);
+
+               dma_addr += dma_len;
+               len -= dma_len;
+
+               if( len == 0 ) {
+                       if( --iov_len == 0 )  break;
+                       ++iov;
+                       dma_addr = iov->iov_base;
+                       len = iov->iov_len;
+                       if( (vi->vi_flags & EF_VI_ISCSI_TX_DDIG) &&
+                           (iov_len == 1) )
+                               len = last_len;
+               }
+       }
+
+       q->ids[di] = (uint16_t) dma_id;
+       return 0;
+}
+
+
+void ef_vi_transmit_push(ef_vi* vi)
+{
+       ef_vi_wiob();
+       writel((vi->ep_state->txq.added & vi->vi_txq.mask) <<
+               __DW4(TX_DESC_WPTR_LBN),
+               vi->vi_txq.doorbell);
+}
+
+
+/*! The value of initial_rx_bytes is used to set RX_KER_BUF_SIZE in an initial
+**  receive descriptor here if physical addressing is being used. A value of
+**  zero represents 16384 bytes.  This is okay, because caller must provide a
+**  buffer than is > MTU, and mac should filter anything bigger than that.
+*/
+int ef_vi_receive_init(ef_vi* vi, ef_addr addr, ef_request_id dma_id,
+                      int initial_rx_bytes)
+{
+       ef_vi_rxq* q = &vi->vi_rxq;
+       ef_vi_rxq_state* qs = &vi->ep_state->rxq;
+       unsigned di;
+
+       if( ef_vi_receive_space(vi) ) {
+               di = qs->added++ & q->mask;
+               ef_assert_equal(q->ids[di], 0xffff);
+               q->ids[di] = (uint16_t) dma_id;
+
+               if( ! (vi->vi_flags & EF_VI_RX_PHYS_ADDR) ) {
+                       ef_vi_falcon_dma_rx_buf_desc* dp;
+                       dp = (ef_vi_falcon_dma_rx_buf_desc*) 
+                               q->descriptors + di;
+                       falcon_dma_rx_calc_ip_buf(ef_bufaddr(addr), dp);
+               }
+               else {
+                       ef_vi_falcon_dma_rx_phys_desc* dp;
+                       dp = (ef_vi_falcon_dma_rx_phys_desc*) 
+                               q->descriptors + di;
+                       __falcon_dma_rx_calc_ip_phys(addr, dp,
+                                                    initial_rx_bytes);
+               }
+
+               return 0;
+       }
+
+       return -EAGAIN;
+}
+
+
+void ef_vi_receive_push(ef_vi* vi)
+{
+       ef_vi_wiob();
+       writel ((vi->ep_state->rxq.added & vi->vi_rxq.mask) <<
+               __DW4(RX_DESC_WPTR_LBN),
+               vi->vi_rxq.doorbell);
+}
+
+
+ef_request_id ef_vi_receive_done(const ef_vi* vi, const ef_event* ef_ev)
+{
+       const ef_vi_qword* ev = EF_GET_HW_EV_PTR(*ef_ev);
+       unsigned di = ev->u32[0] & vi->vi_rxq.mask;
+       ef_request_id rq_id;
+
+       ef_assert(EF_EVENT_TYPE(*ef_ev) == EF_EVENT_TYPE_RX ||
+                 EF_EVENT_TYPE(*ef_ev) == EF_EVENT_TYPE_RX_DISCARD);
+
+       /* Detect spurious / duplicate RX events.  We may need to modify this
+       ** code so that we are robust if they happen. */
+       ef_assert_equal(di, vi->ep_state->rxq.removed & vi->vi_rxq.mask);
+
+       /* We only support 1 port: so events should be in order. */
+       ef_assert(vi->vi_rxq.ids[di] != 0xffff);
+
+       rq_id = vi->vi_rxq.ids[di];
+       vi->vi_rxq.ids[di] = 0xffff;
+       ++vi->ep_state->rxq.removed;
+       return rq_id;
+}
+
+/*! \cidoxg_end */
diff -r e4dd072db259 -r 651fc2abdd5d drivers/xen/sfc_netfront/pt_tx.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/xen/sfc_netfront/pt_tx.c  Mon Feb 18 10:30:33 2008 +0000
@@ -0,0 +1,91 @@
+/****************************************************************************
+ * Copyright 2002-2005: Level 5 Networks Inc.
+ * Copyright 2005-2008: Solarflare Communications Inc,
+ *                      9501 Jeronimo Road, Suite 250,
+ *                      Irvine, CA 92618, USA
+ *
+ * Maintained by Solarflare Communications
+ *  <linux-xen-drivers@xxxxxxxxxxxxxx>
+ *  <onload-dev@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ ****************************************************************************
+ */
+
+/*
+ * \author  djr
+ *  \brief  Packet-mode transmit interface.
+ *   \date  2003/04/02
+ */
+
+/*! \cidoxg_lib_ef */
+#include "ef_vi_internal.h"
+
+
+int ef_vi_transmit_init(ef_vi* vi, ef_addr base, int len, ef_request_id dma_id)
+{
+       ef_iovec iov = { base, len };
+       return ef_vi_transmitv_init(vi, &iov, 1, dma_id);
+}
+
+
+int ef_vi_transmit(ef_vi* vi, ef_addr base, int len, ef_request_id dma_id)
+{
+       ef_iovec iov = { base, len };
+       int rc = ef_vi_transmitv_init(vi, &iov, 1, dma_id);
+       if( rc == 0 )  ef_vi_transmit_push(vi);
+       return rc;
+}
+
+
+int ef_vi_transmitv(ef_vi* vi, const ef_iovec* iov, int iov_len,
+                    ef_request_id dma_id)
+{
+       int rc = ef_vi_transmitv_init(vi, iov, iov_len, dma_id);
+       if( rc == 0 )  ef_vi_transmit_push(vi);
+       return rc;
+}
+
+
+int ef_vi_transmit_unbundle(ef_vi* vi, const ef_event* __ev,
+                           ef_request_id* ids)
+{
+       ef_request_id* ids_in = ids;
+       ef_vi_txq* q = &vi->vi_txq;
+       ef_vi_txq_state* qs = &vi->ep_state->txq;
+       const ef_vi_qword* ev = EF_GET_HW_EV_PTR(*__ev);
+       unsigned i, stop = (ev->u32[0] + 1) & q->mask;
+
+       ef_assert(EF_EVENT_TYPE(*__ev) == EF_EVENT_TYPE_TX ||
+                 EF_EVENT_TYPE(*__ev) == EF_EVENT_TYPE_TX_ERROR);
+
+       /* Shouldn't be batching more than 64 descriptors, and should not go
+       ** backwards. */
+       ef_assert_le((((ev->u32[0] + 1) - qs->removed) & q->mask), 64);
+       /* Should not complete more than we've posted. */
+       ef_assert_le((((ev->u32[0] + 1) - qs->removed) & q->mask),
+                    qs->added - qs->removed);
+
+       for( i = qs->removed & q->mask; i != stop; i = ++qs->removed & q->mask )
+               if( q->ids[i] != 0xffff ) {
+                       *ids++ = q->ids[i];
+                       q->ids[i] = 0xffff;
+               }
+
+       ef_assert_le(ids - ids_in, EF_VI_TRANSMIT_BATCH);
+
+       return (int) (ids - ids_in);
+}
+
+/*! \cidoxg_end */
diff -r e4dd072db259 -r 651fc2abdd5d drivers/xen/sfc_netfront/sysdep.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/xen/sfc_netfront/sysdep.h Mon Feb 18 10:30:33 2008 +0000
@@ -0,0 +1,184 @@
+/****************************************************************************
+ * Copyright 2002-2005: Level 5 Networks Inc.
+ * Copyright 2005-2008: Solarflare Communications Inc,
+ *                      9501 Jeronimo Road, Suite 250,
+ *                      Irvine, CA 92618, USA
+ *
+ * Maintained by Solarflare Communications
+ *  <linux-xen-drivers@xxxxxxxxxxxxxx>
+ *  <onload-dev@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ ****************************************************************************
+ */
+
+/*
+ * \author  stg
+ *  \brief  System dependent support for ef vi lib
+ *   \date  2007/05/10
+ */
+
+/*! \cidoxg_include_ci_ul */
+#ifndef __CI_CIUL_SYSDEP_LINUX_H__
+#define __CI_CIUL_SYSDEP_LINUX_H__
+
+/**********************************************************************
+ * Kernel version compatability
+ */
+
+#if defined(__GNUC__)
+
+/* Linux kernel doesn't have stdint.h or [u]intptr_t. */
+# if !defined(LINUX_VERSION_CODE)
+#  include <linux/version.h>
+# endif
+# include <asm/io.h>
+
+/* In Linux 2.6.24, linux/types.h has uintptr_t */
+# if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
+#  if BITS_PER_LONG == 32
+   typedef __u32         uintptr_t;
+#  else
+   typedef __u64         uintptr_t;
+#  endif
+# endif
+
+/* But even 2.6.24 doesn't define intptr_t */
+# if BITS_PER_LONG == 32
+   typedef __s32         intptr_t;
+# else
+   typedef __s64         intptr_t;
+# endif
+
+# if defined(__ia64__)
+#  define EF_VI_PRIx64  "lx"
+# else
+#  define EF_VI_PRIx64  "llx"
+# endif
+
+# define EF_VI_HF __attribute__((visibility("hidden")))
+# define EF_VI_HV __attribute__((visibility("hidden")))
+
+# if defined(__i386__) || defined(__x86_64__)  /* GCC x86/x64 */
+   typedef unsigned long long ef_vi_dma_addr_t; 
+#  if __GNUC__ >= 3 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)
+#   define ef_vi_wiob()  __asm__ __volatile__ ("sfence")
+#  else
+#   define ef_vi_wiob()  __asm__ __volatile__ (".byte 0x0F, 0xAE, 0xF8")
+#  endif
+
+# endif
+#endif
+
+#ifdef EFX_NOT_UPSTREAM
+
+/* Stuff for architectures/compilers not officially supported */
+
+#if !defined(__GNUC__)
+# if defined(__PPC__)  /* GCC, PPC */
+   typedef unsigned long     ef_vi_dma_addr_t;
+#  define ef_vi_wiob()  wmb()
+
+#  ifdef __powerpc64__
+#   ifdef CONFIG_SMP
+#    define CI_SMP_SYNC        "\n   eieio     \n"         /* memory cache 
sync */
+#    define CI_SMP_ISYNC       "\n   isync     \n"         /* instr cache sync 
*/
+#   else
+#    define CI_SMP_SYNC
+#    define CI_SMP_ISYNC
+#   endif
+#  else         /* for ppc32 systems */
+#   ifdef CONFIG_SMP
+#    define CI_SMP_SYNC        "\n   eieio     \n"
+#    define CI_SMP_ISYNC       "\n   sync      \n"
+#   else
+#    define CI_SMP_SYNC
+#    define CI_SMP_ISYNC
+#   endif
+#  endif
+
+# elif defined(__ia64__)  /* GCC, IA64 */
+   typedef unsigned long     ef_vi_dma_addr_t;
+#  define ef_vi_wiob()  __asm__ __volatile__("mf.a": : :"memory")
+
+# else
+#  error Unknown processor - GNU C
+# endif
+
+#elif defined(__PGI)
+# error PGI not supported 
+
+#elif defined(__INTEL_COMPILER)
+
+/* Intel compilers v7 claim to be very gcc compatible. */
+# if __INTEL_COMPILER >= 700
+#  if __GNUC__ >= 3 || (__GNUC__ == 2 && __GNUC_MINOR__ > 91)
+#   define EF_VI_LIKELY(t)    __builtin_expect((t), 1)
+#   define EF_VI_UNLIKELY(t)  __builtin_expect((t), 0)
+#  endif
+
+#  if __GNUC__ >= 3 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)
+#   define ef_vi_wiob()  __asm__ __volatile__ ("sfence")
+#  else
+#   define ef_vi_wiob()  __asm__ __volatile__ (".byte 0x0F, 0xAE, 0xF8")
+#  endif
+
+# else
+#  error Old Intel compiler not supported.
+# endif
+
+#else
+# error Unknown compiler.
+#endif
+
+#endif
+
+
+# include <linux/errno.h>
+
+
+/**********************************************************************
+ * Extracting bit fields.
+ */
+
+#define _QWORD_GET_LOW(f, v)                                    \
+  (((v).u32[0] >> (f##_LBN)) & ((1u << f##_WIDTH) - 1u))
+#define _QWORD_GET_HIGH(f, v)                                           \
+  (((v).u32[1] >> (f##_LBN - 32u)) & ((1u << f##_WIDTH) - 1u))
+#define _QWORD_GET_ANY(f, v)                                            \
+  (((v).u64[0] >> f##_LBN) & (((uint64_t) 1u << f##_WIDTH) - 1u))
+
+#define QWORD_GET(f, v)                                                     \
+  ((f##_LBN + f##_WIDTH) <= 32u                                             \
+   ? _QWORD_GET_LOW(f, (v))                                                 \
+   : ((f##_LBN >= 32u) ? _QWORD_GET_HIGH(f, (v)) : _QWORD_GET_ANY(f, (v))))
+
+#define QWORD_GET_U(f, v)  ((unsigned) QWORD_GET(f, (v)))
+
+#define _QWORD_TEST_BIT_LOW(f, v)   ((v).u32[0] & (1u << (f##_LBN)))
+#define _QWORD_TEST_BIT_HIGH(f, v)  ((v).u32[1] & (1u << (f##_LBN - 32u)))
+
+#define QWORD_TEST_BIT(f, v)                                                  \
+  (f##_LBN < 32 ? _QWORD_TEST_BIT_LOW(f, (v)) : _QWORD_TEST_BIT_HIGH(f, (v)))
+
+
+
+
+#ifndef DECLSPEC_NORETURN
+/* normally defined on Windows to expand to a declaration that the
+   function will not return */
+# define DECLSPEC_NORETURN
+#endif
+
+#endif  /* __CI_CIUL_SYSDEP_LINUX_H__ */
diff -r e4dd072db259 -r 651fc2abdd5d drivers/xen/sfc_netfront/vi_init.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/xen/sfc_netfront/vi_init.c        Mon Feb 18 10:30:33 2008 +0000
@@ -0,0 +1,183 @@
+/****************************************************************************
+ * Copyright 2002-2005: Level 5 Networks Inc.
+ * Copyright 2005-2008: Solarflare Communications Inc,
+ *                      9501 Jeronimo Road, Suite 250,
+ *                      Irvine, CA 92618, USA
+ *
+ * Maintained by Solarflare Communications
+ *  <linux-xen-drivers@xxxxxxxxxxxxxx>
+ *  <onload-dev@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ ****************************************************************************
+ */
+
+/*
+ * \author  djr
+ *  \brief  Initialisation of VIs.
+ *   \date  2007/06/08
+ */
+
+#include "ef_vi_internal.h"
+
+#define EF_VI_STATE_BYTES(rxq_sz, txq_sz)                      \
+       (sizeof(ef_vi_state) + (rxq_sz) * sizeof(uint16_t)      \
+        + (txq_sz) * sizeof(uint16_t))
+
+int ef_vi_calc_state_bytes(int rxq_sz, int txq_sz)
+{
+       ef_assert(rxq_sz == 0 || EF_VI_IS_POW2(rxq_sz));
+       ef_assert(txq_sz == 0 || EF_VI_IS_POW2(txq_sz));
+
+       return EF_VI_STATE_BYTES(rxq_sz, txq_sz);
+}
+
+
+int ef_vi_state_bytes(ef_vi* vi)
+{
+       int rxq_sz = 0, txq_sz = 0;
+       if( ef_vi_receive_capacity(vi) )
+               rxq_sz = ef_vi_receive_capacity(vi) + 1;
+       if( ef_vi_transmit_capacity(vi) )
+               txq_sz = ef_vi_transmit_capacity(vi) + 1;
+
+       ef_assert(rxq_sz == 0 || EF_VI_IS_POW2(rxq_sz));
+       ef_assert(txq_sz == 0 || EF_VI_IS_POW2(txq_sz));
+
+       return EF_VI_STATE_BYTES(rxq_sz, txq_sz);
+}
+
+
+void ef_eventq_state_init(ef_vi* evq)
+{
+       int j;
+
+       for (j = 0; j<EFAB_DMAQS_PER_EVQ_MAX; j++) {
+               ef_rx_dup_state_t *rx_dup_state =
+                       &evq->evq_state->rx_dup_state[j];
+               rx_dup_state->bad_sop = 0;
+               rx_dup_state->rx_last_desc_ptr = -1;
+               rx_dup_state->frag_num = 0;
+       }
+
+       evq->evq_state->evq_ptr = 0;
+}
+
+
+void ef_vi_state_init(ef_vi* vi)
+{
+       ef_vi_state* state = vi->ep_state;
+       unsigned i;
+
+       state->txq.added = state->txq.removed = 0;
+       state->rxq.added = state->rxq.removed = 0;
+
+       if( vi->vi_rxq.mask )
+               for( i = 0; i <= vi->vi_rxq.mask; ++i )
+                       vi->vi_rxq.ids[i] = (uint16_t) -1;
+       if( vi->vi_txq.mask )
+               for( i = 0; i <= vi->vi_txq.mask; ++i )
+                       vi->vi_txq.ids[i] = (uint16_t) -1;
+}
+
+
+void ef_vi_init_mapping_evq(void* data_area, struct ef_vi_nic_type nic_type,
+                            int instance, unsigned evq_bytes, void* base,
+                            void* timer_reg)
+{
+       struct vi_mappings* vm = (struct vi_mappings*) data_area;
+
+       vm->signature = VI_MAPPING_SIGNATURE;
+       vm->vi_instance = instance;
+       vm->nic_type = nic_type;
+       vm->evq_bytes = evq_bytes;
+       vm->evq_base = base;
+       vm->evq_timer_reg = timer_reg;
+}
+
+
+void ef_vi_init(ef_vi* vi, void* vvis, ef_vi_state* state,
+                ef_eventq_state* evq_state, enum ef_vi_flags vi_flags)
+{
+       struct vi_mappings* vm = (struct vi_mappings*) vvis;
+
+       vi->vi_i = vm->vi_instance;
+       vi->ep_state = state;
+       vi->vi_flags = vi_flags;
+
+       switch( vm->nic_type.arch ) {
+       case EF_VI_ARCH_FALCON:
+               falcon_vi_init(vi, vvis);
+               break;
+       default:
+               /* ?? TODO: We should return an error code. */
+               ef_assert(0);
+               break;
+       }
+
+       if( vm->evq_bytes ) {
+               vi->evq_state = evq_state;
+               vi->evq_mask = vm->evq_bytes - 1u;
+               vi->evq_base = vm->evq_base;
+               vi->evq_timer_reg = vm->evq_timer_reg;
+       }
+
+       EF_VI_MAGIC_SET(vi, EF_VI);
+}
+
+
+/* Initialise [data_area] with information required to initialise an ef_vi.
+ * In the following, an unused param should be set to NULL. Note the case
+ * marked (*) of [iobuf_mmap] for falcon/driver; for the normal driver this
+ * must be NULL.
+ *
+ * \param  data_area     [in,out] required, must ref at least VI_MAPPING_SIZE 
+ *                                bytes
+ * \param  io_mmap       [in] ef1,    required
+ *                            falcon, required
+ * \param  iobuf_mmap    [in] ef1,    unused
+ *                            falcon, required
+ */
+void ef_vi_init_mapping_vi(void* data_area, struct ef_vi_nic_type nic_type,
+                           unsigned rxq_capacity, unsigned txq_capacity,
+                           int instance, void* io_mmap,
+                           void* iobuf_mmap_rx, void* iobuf_mmap_tx,
+                           enum ef_vi_flags vi_flags)
+{
+       struct vi_mappings* vm = (struct vi_mappings*) data_area;
+       int rx_desc_bytes, rxq_bytes;
+
+       ef_assert(rxq_capacity > 0 || txq_capacity > 0);
+       ef_assert(vm);
+       ef_assert(io_mmap);
+       ef_assert(iobuf_mmap_rx || iobuf_mmap_tx);
+
+       vm->signature = VI_MAPPING_SIGNATURE;
+       vm->vi_instance = instance;
+       vm->nic_type = nic_type;
+
+       rx_desc_bytes = (vi_flags & EF_VI_RX_PHYS_ADDR) ? 8 : 4;
+       rxq_bytes = rxq_capacity * rx_desc_bytes;
+       rxq_bytes = (rxq_bytes + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
+
+       if( iobuf_mmap_rx == iobuf_mmap_tx )
+               iobuf_mmap_tx = (char*) iobuf_mmap_rx + rxq_bytes;
+
+       vm->rx_queue_capacity = rxq_capacity;
+       vm->rx_dma_falcon = iobuf_mmap_rx;
+       vm->rx_bell       = (char*) io_mmap + (RX_DESC_UPD_REG_KER_OFST & 4095);
+       vm->tx_queue_capacity = txq_capacity;
+       vm->tx_dma_falcon = iobuf_mmap_tx;
+       vm->tx_bell       = (char*) io_mmap + (TX_DESC_UPD_REG_KER_OFST & 4095);
+}
diff -r e4dd072db259 -r 651fc2abdd5d drivers/xen/sfc_netutil/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/xen/sfc_netutil/Makefile  Mon Feb 18 10:30:33 2008 +0000
@@ -0,0 +1,10 @@
+EXTRA_CFLAGS += -Werror
+
+ifdef GGOV
+EXTRA_CFLAGS += -fprofile-arcs -ftest-coverage -DEFX_GCOV
+endif
+
+obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_UTIL) := sfc_netutil.o
+
+sfc_netutil-objs := accel_cuckoo_hash.o accel_msg_iface.o accel_util.o 
+
diff -r e4dd072db259 -r 651fc2abdd5d drivers/xen/sfc_netutil/accel_cuckoo_hash.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/xen/sfc_netutil/accel_cuckoo_hash.c       Mon Feb 18 10:30:33 
2008 +0000
@@ -0,0 +1,651 @@
+/****************************************************************************
+ * Solarflare driver for Xen network acceleration
+ *
+ * Copyright 2006-2008: Solarflare Communications Inc,
+ *                      9501 Jeronimo Road, Suite 250,
+ *                      Irvine, CA 92618, USA
+ *
+ * Maintained by Solarflare Communications <linux-xen-drivers@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ ****************************************************************************
+ */
+
+#include <linux/types.h> /* needed for linux/random.h */
+#include <linux/random.h>
+
+#include "accel_cuckoo_hash.h"
+#include "accel_util.h"
+
+static inline int cuckoo_hash_key_compare(cuckoo_hash_table *hashtab,
+                                         cuckoo_hash_key *key1, 
+                                         cuckoo_hash_key *key2)
+{
+       return !memcmp(key1, key2, hashtab->key_length);
+}
+
+
+static inline void cuckoo_hash_key_set(cuckoo_hash_key *key1, 
+                                      cuckoo_hash_key *key2)
+{
+       *key1 = *key2;
+}
+
+
+/*
+ * Sets hash function parameters.  Chooses "a" to be odd, 0 < a < 2^w
+ * where w is the length of the key
+ */
+static void set_hash_parameters(cuckoo_hash_table *hashtab)
+{
+ again:
+       hashtab->a0 = hashtab->a1 = 0;
+
+       /* Make sure random */
+       get_random_bytes(&hashtab->a0, hashtab->key_length);
+       get_random_bytes(&hashtab->a1, hashtab->key_length);
+
+       /* Make sure odd */
+       hashtab->a0 |= 1;
+       hashtab->a1 |= 1;
+
+       /* Being different is good */
+       if (hashtab->a0 != hashtab->a1)
+               return;
+                      
+       goto again;
+}
+
+int cuckoo_hash_init(cuckoo_hash_table *hashtab, unsigned length_bits,
+                    unsigned key_length)
+{
+       char *table_mem;
+       unsigned length = 1 << length_bits;
+
+       BUG_ON(length_bits >= sizeof(unsigned) * 8);
+       BUG_ON(key_length > sizeof(cuckoo_hash_key));
+
+       table_mem = kmalloc(sizeof(cuckoo_hash_entry) * 2 * length, GFP_KERNEL);
+
+       if (table_mem == NULL)
+               return -ENOMEM;
+
+       hashtab->length = length;
+       hashtab->length_bits = length_bits;
+       hashtab->key_length = key_length;
+       hashtab->entries = 0;
+
+       hashtab->table0 = (cuckoo_hash_entry *)table_mem;
+       hashtab->table1 = (cuckoo_hash_entry *)
+               (table_mem + length * sizeof(cuckoo_hash_entry));
+
+       set_hash_parameters(hashtab);
+
+       /* Zero the table */
+       memset(hashtab->table0, 0, length * 2 * sizeof(cuckoo_hash_entry));
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(cuckoo_hash_init);
+
+void cuckoo_hash_destroy(cuckoo_hash_table *hashtab)
+{
+       if (hashtab->table0 != NULL)
+               kfree(hashtab->table0);
+}
+
+EXPORT_SYMBOL_GPL(cuckoo_hash_destroy);
+
+/* 
+ * This computes sizeof(cuckoo_hash) bits of hash, not all will be
+ * necessarily used, but the hash function throws away any that
+ * aren't
+ */ 
+static inline void cuckoo_compute_hash_helper(cuckoo_hash_table *hashtab,
+                                             cuckoo_hash_key *a,
+                                             cuckoo_hash_key *x,
+                                             cuckoo_hash *result) 
+{
+       u64 multiply_result = 0, a_temp, x_temp;
+       u32 carry = 0;
+       u32 *a_words;
+       u32 *x_words;
+       int i;
+
+       /*
+        * As the mod and div operations in the function effectively
+        * reduce and shift the bits of the product down to just the
+        * third word, we need only compute that and return it as a
+        * result.
+        *
+        * Do enough long multiplication to get the word we need
+        */
+
+       /* This assumes things about the sizes of the key and hash */
+       BUG_ON(hashtab->key_length % sizeof(u32) != 0);
+       BUG_ON(sizeof(cuckoo_hash) != sizeof(u32));
+
+       a_words = (u32 *)a;
+       x_words = (u32 *)x;
+
+       for (i = 0; i < hashtab->key_length / sizeof(u32); i++) {
+               a_temp = a_words[i];
+               x_temp = x_words[i];
+               
+               multiply_result = (a_temp * x_temp) + carry;
+               carry = (multiply_result >> 32) & 0xffffffff;
+       }
+       
+       *result = multiply_result & 0xffffffff;
+}
+
+
+/*
+ * Want to implement (ax mod 2^w) div 2^(w-q) for odd a, 0 < a < 2^w;
+ * w is the length of the key, q is the length of the hash, I think.
+ * See http://www.it-c.dk/people/pagh/papers/cuckoo-jour.pdf 
+ */
+static cuckoo_hash cuckoo_compute_hash(cuckoo_hash_table *hashtab, 
+                                      cuckoo_hash_key *key, 
+                                      cuckoo_hash_key *a)
+{
+       unsigned q = hashtab->length_bits;
+       unsigned shift = 32 - q;
+       unsigned mask = ((1 << q) - 1) << shift;
+       cuckoo_hash hash;
+
+       cuckoo_compute_hash_helper(hashtab, a, key, &hash);
+
+       /* 
+        * Take the top few bits to get the right length for this
+        * hash table 
+        */
+       hash = (hash & mask) >> shift;
+
+       BUG_ON(hash >= hashtab->length);
+
+       return hash;
+}
+
+
+static int cuckoo_hash_lookup0(cuckoo_hash_table *hashtab,
+                              cuckoo_hash_key *key,
+                              cuckoo_hash_value *value)
+{
+       cuckoo_hash hash = cuckoo_compute_hash(hashtab, key, &hashtab->a0);
+
+       if ((hashtab->table0[hash].state == CUCKOO_HASH_STATE_OCCUPIED)
+           && cuckoo_hash_key_compare(hashtab, &(hashtab->table0[hash].key),
+                                      key)) {
+               *value = hashtab->table0[hash].value;
+               return 1;
+       }
+
+       return 0;
+}
+
+static int cuckoo_hash_lookup1(cuckoo_hash_table *hashtab,
+                              cuckoo_hash_key *key,
+                              cuckoo_hash_value *value)
+{
+       cuckoo_hash hash = cuckoo_compute_hash(hashtab, key, &hashtab->a1);
+
+       if ((hashtab->table1[hash].state == CUCKOO_HASH_STATE_OCCUPIED)
+           && cuckoo_hash_key_compare(hashtab, &(hashtab->table1[hash].key),
+                                      key)) {
+               *value = hashtab->table1[hash].value;
+               return 1;
+       }
+
+       return 0;
+}
+
+
+int cuckoo_hash_lookup(cuckoo_hash_table *hashtab, cuckoo_hash_key *key,
+                      cuckoo_hash_value *value)
+{
+       return cuckoo_hash_lookup0(hashtab, key, value)
+               || cuckoo_hash_lookup1(hashtab, key, value);
+}
+EXPORT_SYMBOL_GPL(cuckoo_hash_lookup);
+
+
+/* Transfer any active entries from "old_table" into hashtab */
+static int cuckoo_hash_transfer_entries(cuckoo_hash_table *hashtab,
+                                       cuckoo_hash_entry *old_table,
+                                       unsigned capacity)
+{
+       int i, rc;
+       cuckoo_hash_entry *entry;
+
+       hashtab->entries = 0;
+
+       for (i = 0; i < capacity; i++) {
+               entry = &old_table[i];
+               if (entry->state == CUCKOO_HASH_STATE_OCCUPIED) {
+                       rc = cuckoo_hash_add(hashtab, &(entry->key), 
+                                            entry->value, 0);
+                       if (rc != 0) {
+                               return rc;
+                       }
+               }
+       }
+  
+       return 0;
+}
+
+
+int cuckoo_hash_rehash(cuckoo_hash_table *hashtab)
+{
+       cuckoo_hash_entry *new_table;
+       cuckoo_hash_table old_hashtab;
+       int resize = 0, rc, rehash_count;
+
+       /*
+        * Store old tables so we can access the existing values and
+        * copy across
+        */
+       memcpy(&old_hashtab, hashtab, sizeof(cuckoo_hash_table));
+
+       /* resize if hashtable is more than half full */
+       if (old_hashtab.entries > old_hashtab.length &&
+           old_hashtab.length_bits < 32)
+               resize = 1;
+
+ resize:
+       if (resize) {
+               new_table = kmalloc(sizeof(cuckoo_hash_entry) * 4 * 
hashtab->length,
+                                   GFP_ATOMIC);
+               if (new_table == NULL) {
+                       rc = -ENOMEM;
+                       goto err;
+               }
+
+               hashtab->length = 2 * hashtab->length;
+               hashtab->length_bits++;
+       } else {
+               new_table = kmalloc(sizeof(cuckoo_hash_entry) * 2 * 
hashtab->length,
+                                   GFP_ATOMIC);
+               if (new_table == NULL) {
+                       rc = -ENOMEM;
+                       goto err;
+               }
+       }
+    
+       /*
+        * Point hashtab to new memory region so we can try to
+        * construct new table
+        */
+       hashtab->table0 = new_table;
+       hashtab->table1 = (cuckoo_hash_entry *)
+               ((char *)new_table + hashtab->length * 
sizeof(cuckoo_hash_entry));
+  
+       rehash_count = 0;
+
+ again:
+       /* Zero the new tables */
+       memset(new_table, 0, hashtab->length * 2 * sizeof(cuckoo_hash_entry));
+
+       /* Choose new parameters for the hash functions */
+       set_hash_parameters(hashtab);
+
+       /*
+        * Multiply old_table_length by 2 as the length refers to each
+        * table, and there are two of them.  This assumes that they
+        * are arranged sequentially in memory, so assert it 
+        */
+       BUG_ON(((char *)old_hashtab.table1) != 
+              ((char *)old_hashtab.table0 + old_hashtab.length
+               * sizeof(cuckoo_hash_entry)));
+       rc = cuckoo_hash_transfer_entries(hashtab, old_hashtab.table0, 
+                                         old_hashtab.length * 2);
+       if (rc < 0) {
+               /* Problem */
+               if (rc == -ENOSPC) {
+                       ++rehash_count;
+                       if (rehash_count < CUCKOO_HASH_MAX_LOOP) {
+                               /*
+                                * Wanted to rehash, but rather than
+                                * recurse we can just do it here
+                                */
+                               goto again;
+                       } else {
+                               /*
+                                * Didn't manage to rehash, so let's
+                                * go up a size (if we haven't already
+                                * and there's space)
+                                */
+                               if (!resize && hashtab->length_bits < 32) {
+                                       resize = 1;
+                                       kfree(new_table);
+                                       goto resize;
+                               }
+                               else
+                                       goto err;
+                       }
+               }
+               else
+                       goto err;
+       }
+
+       /* Success, I think.  Free up the old table */
+       kfree(old_hashtab.table0);
+  
+       /* We should have put all the entries from old table in the new one */
+       BUG_ON(hashtab->entries != old_hashtab.entries);
+
+       return 0;
+ err:
+       EPRINTK("%s: Rehash failed, giving up\n", __FUNCTION__);
+       /* Some other error, give up, at least restore table to how it was */
+       memcpy(hashtab, &old_hashtab, sizeof(cuckoo_hash_table));
+       if (new_table)
+               kfree(new_table);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(cuckoo_hash_rehash);
+
+
+static int 
+cuckoo_hash_insert_or_displace(cuckoo_hash_entry *table, unsigned hash,
+                              cuckoo_hash_key *key, 
+                              cuckoo_hash_value value,
+                              cuckoo_hash_key *displaced_key, 
+                              cuckoo_hash_value *displaced_value)
+{
+       if (table[hash].state == CUCKOO_HASH_STATE_VACANT) {
+               cuckoo_hash_key_set(&(table[hash].key), key);
+               table[hash].value = value;
+               table[hash].state = CUCKOO_HASH_STATE_OCCUPIED;
+
+               return 1;
+       } else {
+               cuckoo_hash_key_set(displaced_key, &(table[hash].key));
+               *displaced_value = table[hash].value;
+               cuckoo_hash_key_set(&(table[hash].key), key);
+               table[hash].value = value;
+
+               return 0;
+       }
+}
+
+
+int cuckoo_hash_add(cuckoo_hash_table *hashtab, cuckoo_hash_key *key,
+                    cuckoo_hash_value value, int can_rehash)
+{
+       cuckoo_hash hash0, hash1;
+       int i, rc;
+       cuckoo_hash_key key1, key2;
+
+       cuckoo_hash_key_set(&key1, key);
+
+ again:
+       i = 0;
+       do {
+               hash0 = cuckoo_compute_hash(hashtab, &key1, &hashtab->a0);
+               if (cuckoo_hash_insert_or_displace(hashtab->table0, hash0, 
+                                                  &key1, value, &key2,
+                                                  &value)) {
+                       /* Success */
+                       hashtab->entries++;
+                       return 0;
+               }
+       
+               hash1 = cuckoo_compute_hash(hashtab, &key2, &hashtab->a1);
+               if (cuckoo_hash_insert_or_displace(hashtab->table1, hash1,
+                                                  &key2, value, &key1,
+                                                  &value)) {
+                       /* Success */
+                       hashtab->entries++;
+                       return 0;
+               }
+       } while (++i < CUCKOO_HASH_MAX_LOOP);
+
+       if (can_rehash) {
+               if ((rc = cuckoo_hash_rehash(hashtab)) < 0) {
+                       /*
+                        * Give up - this will drop whichever
+                        * key/value pair we have currently displaced
+                        * on the floor
+                        */
+                       return rc;
+               }
+               goto again;
+       }
+  
+       EPRINTK("%s: failed hash add\n", __FUNCTION__);
+       /*
+        * Couldn't do it - bad as we've now removed some random thing
+        * from the table, and will just drop it on the floor.  Better
+        * would be to somehow revert the table to the state it was in
+        * at the start
+        */
+       return -ENOSPC;
+}
+EXPORT_SYMBOL_GPL(cuckoo_hash_add);
+
+
+int cuckoo_hash_add_check(cuckoo_hash_table *hashtab,
+                         cuckoo_hash_key *key, cuckoo_hash_value value,
+                         int can_rehash)
+{
+       int stored_value;
+
+       if (cuckoo_hash_lookup(hashtab, key, &stored_value))
+               return -EBUSY;
+
+       return cuckoo_hash_add(hashtab, key, value, can_rehash);
+}
+EXPORT_SYMBOL_GPL(cuckoo_hash_add_check);
+
+
+int cuckoo_hash_remove(cuckoo_hash_table *hashtab, cuckoo_hash_key *key)
+{
+       cuckoo_hash hash;
+
+       hash = cuckoo_compute_hash(hashtab, key, &hashtab->a0);
+       if ((hashtab->table0[hash].state == CUCKOO_HASH_STATE_OCCUPIED) &&
+           cuckoo_hash_key_compare(hashtab, &(hashtab->table0[hash].key),
+                                   key)) {
+               hashtab->table0[hash].state = CUCKOO_HASH_STATE_VACANT;
+               hashtab->entries--;
+               return 0;
+       }
+  
+       hash = cuckoo_compute_hash(hashtab, key, &hashtab->a1);
+       if ((hashtab->table1[hash].state == CUCKOO_HASH_STATE_OCCUPIED) &&
+           cuckoo_hash_key_compare(hashtab, &(hashtab->table1[hash].key),
+                                   key)) {
+               hashtab->table1[hash].state = CUCKOO_HASH_STATE_VACANT;
+               hashtab->entries--;
+               return 0;
+       }
+ 
+       return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(cuckoo_hash_remove);
+
+
+int cuckoo_hash_update(cuckoo_hash_table *hashtab, cuckoo_hash_key *key,
+                      cuckoo_hash_value value)
+{
+       cuckoo_hash hash;
+
+       hash = cuckoo_compute_hash(hashtab, key, &hashtab->a0);
+       if ((hashtab->table0[hash].state == CUCKOO_HASH_STATE_OCCUPIED) &&
+           cuckoo_hash_key_compare(hashtab, &(hashtab->table0[hash].key),
+                                   key)) {
+               hashtab->table0[hash].value = value;
+               return 0;
+       }
+
+       hash = cuckoo_compute_hash(hashtab, key, &hashtab->a1);
+       if ((hashtab->table1[hash].state == CUCKOO_HASH_STATE_OCCUPIED) &&
+           cuckoo_hash_key_compare(hashtab, &(hashtab->table1[hash].key),
+                                   key)) {
+               hashtab->table1[hash].value = value;
+               return 0;
+       }
+
+       return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(cuckoo_hash_update);
+
+
+void cuckoo_hash_iterate_reset(cuckoo_hash_table *hashtab)
+{
+       hashtab->iterate_index = 0;
+}
+EXPORT_SYMBOL_GPL(cuckoo_hash_iterate_reset);
+
+
+int cuckoo_hash_iterate(cuckoo_hash_table *hashtab,
+                       cuckoo_hash_key *key, cuckoo_hash_value *value)
+{
+       unsigned index;
+
+       while (hashtab->iterate_index < hashtab->length) {
+               index = hashtab->iterate_index;
+               ++hashtab->iterate_index;
+               if (hashtab->table0[index].state == CUCKOO_HASH_STATE_OCCUPIED) 
{
+                       *key = hashtab->table0[index].key;
+                       *value = hashtab->table0[index].value;
+                       return 0;
+               }
+       }
+
+       while (hashtab->iterate_index >= hashtab->length &&
+              hashtab->iterate_index < hashtab->length * 2) {
+               index = hashtab->iterate_index - hashtab->length;
+               ++hashtab->iterate_index;               
+               if (hashtab->table1[index].state == CUCKOO_HASH_STATE_OCCUPIED) 
{
+                       *key = hashtab->table1[index].key;
+                       *value = hashtab->table1[index].value;
+                       return 0;
+               }
+       }
+
+       return -ENOSPC;
+}
+EXPORT_SYMBOL_GPL(cuckoo_hash_iterate);
+
+
+#if 0
+void cuckoo_hash_valid(cuckoo_hash_table *hashtab)
+{
+       int i, entry_count = 0;
+
+       for (i=0; i < hashtab->length; i++) {
+               EPRINTK_ON(hashtab->table0[i].state != CUCKOO_HASH_STATE_VACANT 
&&
+                          hashtab->table0[i].state != 
CUCKOO_HASH_STATE_OCCUPIED);
+               if (hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED)
+                       entry_count++;
+               EPRINTK_ON(hashtab->table1[i].state != CUCKOO_HASH_STATE_VACANT 
&&
+                          hashtab->table1[i].state != 
CUCKOO_HASH_STATE_OCCUPIED);
+               if (hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED)
+                       entry_count++;  
+       }
+       
+       if (entry_count != hashtab->entries) {
+               EPRINTK("%s: bad count\n", __FUNCTION__);
+               cuckoo_hash_dump(hashtab);
+               return;
+       }
+
+       for (i=0; i< hashtab->length; i++) {
+               if (hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED)
+                       if (i != cuckoo_compute_hash(hashtab, 
+                                                    &hashtab->table0[i].key, 
+                                                    &hashtab->a0)) {
+                               EPRINTK("%s: Bad key table 0 index %d\n",
+                                       __FUNCTION__, i);
+                               cuckoo_hash_dump(hashtab);
+                               return;
+                       }
+               if (hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED)
+                       if (i != cuckoo_compute_hash(hashtab, 
+                                                    &hashtab->table1[i].key, 
+                                                    &hashtab->a1)) {
+                               EPRINTK("%s: Bad key table 1 index %d\n",
+                                       __FUNCTION__, i);
+                               cuckoo_hash_dump(hashtab);
+                               return;
+                       }
+       }
+
+}
+EXPORT_SYMBOL_GPL(cuckoo_hash_valid);
+
+
+void cuckoo_hash_dump(cuckoo_hash_table *hashtab)
+{
+       int i, entry_count;
+
+       entry_count = 0;
+       for (i=0; i < hashtab->length; i++) {
+               EPRINTK_ON(hashtab->table0[i].state != CUCKOO_HASH_STATE_VACANT 
&&
+                          hashtab->table0[i].state != 
CUCKOO_HASH_STATE_OCCUPIED);
+               if (hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED)
+                       entry_count++;
+               EPRINTK_ON(hashtab->table1[i].state != CUCKOO_HASH_STATE_VACANT 
&&
+                          hashtab->table1[i].state != 
CUCKOO_HASH_STATE_OCCUPIED);
+               if (hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED)
+                       entry_count++;  
+       }
+
+       EPRINTK("======================\n");
+       EPRINTK("Cuckoo hash table dump\n");
+       EPRINTK("======================\n");
+       EPRINTK("length: %d; length_bits: %d; key_length: %d\n", 
hashtab->length,
+               hashtab->length_bits, hashtab->key_length);
+       EPRINTK("Recorded entries: %d\n", hashtab->entries);
+       EPRINTK("Counted entries: %d\n", entry_count);
+       EPRINTK("a0: %llx; a1: %llx\n", hashtab->a0, hashtab->a1);
+       EPRINTK("-----------------------------------------\n");
+       EPRINTK("Index  Occupied  Key  Value Index0 Index1\n");
+       EPRINTK("-----------------------------------------\n");         
+       for (i=0; i< hashtab->length; i++) {
+               if (hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED)
+               EPRINTK("%d %d %llx %d %d %d\n", i,
+                       hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED,
+                       hashtab->table0[i].key, hashtab->table0[i].value,
+                       cuckoo_compute_hash(hashtab, &hashtab->table0[i].key, 
+                                           &hashtab->a0),
+                       cuckoo_compute_hash(hashtab, &hashtab->table0[i].key, 
+                                           &hashtab->a1));
+               else
+               EPRINTK("%d %d - - - -\n", i,
+                       hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED);
+                       
+       }
+       EPRINTK("-----------------------------------------\n");
+       EPRINTK("Index  Occupied  Key  Value Index0 Index1\n");
+       EPRINTK("-----------------------------------------\n");
+       for (i=0; i< hashtab->length; i++) {
+               if (hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED)
+               EPRINTK("%d %d %llx %d %d %d\n", i,
+                       hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED,
+                       hashtab->table1[i].key, hashtab->table1[i].value,
+                       cuckoo_compute_hash(hashtab, &hashtab->table1[i].key, 
+                                           &hashtab->a0),
+                       cuckoo_compute_hash(hashtab, &hashtab->table1[i].key, 
+                                           &hashtab->a1));
+               else
+               EPRINTK("%d %d - - - -\n", i,
+                       hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED);
+       } 
+       EPRINTK("======================\n");
+}
+EXPORT_SYMBOL_GPL(cuckoo_hash_dump);
+#endif
diff -r e4dd072db259 -r 651fc2abdd5d drivers/xen/sfc_netutil/accel_cuckoo_hash.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/xen/sfc_netutil/accel_cuckoo_hash.h       Mon Feb 18 10:30:33 
2008 +0000
@@ -0,0 +1,227 @@
+/****************************************************************************
+ * Solarflare driver for Xen network acceleration
+ *
+ * Copyright 2006-2008: Solarflare Communications Inc,
+ *                      9501 Jeronimo Road, Suite 250,
+ *                      Irvine, CA 92618, USA
+ *
+ * Maintained by Solarflare Communications <linux-xen-drivers@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ ****************************************************************************
+ */
+
+/*
+ * A cuckoo hash table consists of two sub tables.  Each entry can
+ * hash to a position in each table.  If, on entry, its position is
+ * found to be occupied, the existing element is moved to it's other
+ * location.  This recurses until success or a loop is found.  If a
+ * loop is found the table is rehashed.
+ *
+ *  See http://www.it-c.dk/people/pagh/papers/cuckoo-jour.pdf
+ */
+
+#ifndef NET_ACCEL_CUCKOO_HASH_H
+#define NET_ACCEL_CUCKOO_HASH_H
+
+/*! Type used for hash table keys of ip pairs */
+typedef struct {
+       u32 local_ip;
+       //u32 remote_ip;
+       u16 local_port;
+       //u16 remote_port;
+       /* Technically only 1 bit, but use 16 to make key a round
+          number size */
+       u16 proto;
+} cuckoo_hash_ip_key;
+
+/*! Type used for hash table keys of mac addresses */
+typedef u64 cuckoo_hash_mac_key;
+
+/*! This type is designed to be large enough to hold all supported key
+ *  sizes to avoid having to malloc storage for them.
+ */
+typedef u64 cuckoo_hash_key;
+
+/*! Type used for the values stored in the hash table */
+typedef int cuckoo_hash_value;
+
+/*! Type used for the hash used to index the table */
+typedef u32 cuckoo_hash;
+
+/*! How long to spend displacing values when adding before giving up
+ *  and rehashing */
+#define CUCKOO_HASH_MAX_LOOP (hashtab->length)
+
+/*! State of hash table entry */
+typedef enum {
+       CUCKOO_HASH_STATE_VACANT = 0,
+       CUCKOO_HASH_STATE_OCCUPIED 
+} cuckoo_hash_state;
+
+/*! An entry in the hash table */
+typedef struct {
+       cuckoo_hash_state state;
+       cuckoo_hash_key key;
+       cuckoo_hash_value value;
+} cuckoo_hash_entry;
+
+/*! A cuckoo hash table */
+typedef struct {
+       /*! The length of each table (NB. there are two tables of this
+        *  length) */
+       unsigned length; 
+       /*! The length of each table in bits */
+       unsigned length_bits;
+       /*! The length of the key in bytes */ 
+       unsigned key_length; 
+       /*! The number of entries currently stored in the table */
+       unsigned entries;
+       /*! Index into table used by cuckoo_hash_iterate */
+       unsigned iterate_index; 
+
+       /* parameter of hash functions */
+       /*! The "a" parameter of the first hash function */
+       cuckoo_hash_key a0; 
+       /*! The "a" parameter of the second hash function */
+       cuckoo_hash_key a1; 
+
+       /*! The first table */
+       cuckoo_hash_entry *table0; 
+       /*! The second table */
+       cuckoo_hash_entry *table1; 
+} cuckoo_hash_table;
+
+/*! Initialise the cuckoo has table 
+ *
+ * \param hashtab A pointer to an unitialised hash table structure
+ * \param length_bits The number of elements in each table equals
+ * 2**length_bits
+ * \param key_length The length of the key in bytes
+ *
+ * \return 0 on success, -ENOMEM if it couldn't allocate the tables
+ */
+extern
+int cuckoo_hash_init(cuckoo_hash_table *hashtab, unsigned length_bits,
+                    unsigned key_length);
+
+
+/*! Destroy a hash table
+ *
+ * \param hashtab A hash table that has previously been passed to a
+ * successful call of cuckoo_hash_init()
+ */
+extern
+void cuckoo_hash_destroy(cuckoo_hash_table *hashtab);
+
+
+/*! Lookup an entry in the hash table 
+ *
+ * \param hashtab The hash table in which to look.
+ * \param key Pointer to a mac address to use as the key
+ * \param value On exit set to the value stored if key was present
+ *
+ * \return 0 if not present in the table, non-zero if it is (and value
+ * is set accordingly)
+ */
+extern
+int cuckoo_hash_lookup(cuckoo_hash_table *hashtab,
+                      cuckoo_hash_key *key,
+                      cuckoo_hash_value *value);
+
+/*! Add an entry to the hash table.  Key must not be a duplicate of
+ * anything already in the table.  If this is a risk, see
+ * cuckoo_hash_add_check
+ *
+ * \param hashtab The hash table to add the entry to
+ * \param key Pointer to a mac address to use as a key
+ * \param value The value to store 
+ * \param can_rehash Flag to allow the add function to rehash the
+ * table if necessary
+ *
+ * \return 0 on success, non-zero on failure.  -ENOSPC means it just
+ * couldn't find anywhere to put it - this is bad and probably means
+ * an entry has been dropped on the floor (but the entry you just
+ * tried to add may now be included)
+ */
+extern
+int cuckoo_hash_add(cuckoo_hash_table *hashtab,
+                   cuckoo_hash_key *key, 
+                   cuckoo_hash_value value,
+                   int can_rehash);
+
+/*! Same as cuckoo_hash_add but first checks to ensure entry is not
+ * already there
+ * \return -EBUSY if already there
+ */
+
+extern
+int cuckoo_hash_add_check(cuckoo_hash_table *hashtab,
+                         cuckoo_hash_key *key, 
+                         cuckoo_hash_value value,
+                         int can_rehash);
+/*! Remove an entry from the table 
+ *
+ * \param hashtab The hash table to remove the entry from
+ * \param key The key that was used to previously add the entry
+ *
+ * \return 0 on success, -EINVAL if the entry couldn't be found 
+ */
+extern
+int cuckoo_hash_remove(cuckoo_hash_table *hashtab, cuckoo_hash_key *key);
+
+
+/*! Helper for those using mac addresses to convert to a key for the
+ *  hash table
+ */
+static inline cuckoo_hash_mac_key cuckoo_mac_to_key(const u8 *mac)
+{
+       return (cuckoo_hash_mac_key)(mac[0])
+               | (cuckoo_hash_mac_key)(mac[1]) << 8
+               | (cuckoo_hash_mac_key)(mac[2]) << 16
+               | (cuckoo_hash_mac_key)(mac[3]) << 24
+               | (cuckoo_hash_mac_key)(mac[4]) << 32
+               | (cuckoo_hash_mac_key)(mac[5]) << 40;
+}
+
+
+/*! Update an entry already in the hash table to take a new value 
+ *
+ * \param hashtab The hash table to add the entry to
+ * \param key Pointer to a mac address to use as a key
+ * \param value The value to store 
+ *
+ * \return 0 on success, non-zero on failure. 
+ */
+int cuckoo_hash_update(cuckoo_hash_table *hashtab, cuckoo_hash_key *key,
+                      cuckoo_hash_value value);
+
+
+/*! Go through the hash table and return all used entries (one per call)
+ *
+ * \param hashtab The hash table to iterate over 
+ * \param key Pointer to a key to take the returned key
+ * \param value Pointer to a value to take the returned value
+ *
+ * \return 0 on success (key, value set), non-zero on failure.
+ */
+int cuckoo_hash_iterate(cuckoo_hash_table *hashtab,
+                       cuckoo_hash_key *key, cuckoo_hash_value *value);
+void cuckoo_hash_iterate_reset(cuckoo_hash_table *hashtab);
+
+/* debug, not compiled by default */
+void cuckoo_hash_valid(cuckoo_hash_table *hashtab);
+void cuckoo_hash_dump(cuckoo_hash_table *hashtab);
+
+#endif /* NET_ACCEL_CUCKOO_HASH_H */
diff -r e4dd072db259 -r 651fc2abdd5d drivers/xen/sfc_netutil/accel_msg_iface.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/xen/sfc_netutil/accel_msg_iface.c Mon Feb 18 10:30:33 2008 +0000
@@ -0,0 +1,301 @@
+/****************************************************************************
+ * Solarflare driver for Xen network acceleration
+ *
+ * Copyright 2006-2008: Solarflare Communications Inc,
+ *                      9501 Jeronimo Road, Suite 250,
+ *                      Irvine, CA 92618, USA
+ *
+ * Maintained by Solarflare Communications <linux-xen-drivers@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ ****************************************************************************
+ */
+
+#include <xen/evtchn.h>
+
+#include "accel_util.h"
+#include "accel_msg_iface.h"
+
+#define NET_ACCEL_MSG_Q_SIZE (1024)
+#define NET_ACCEL_MSG_Q_MASK (NET_ACCEL_MSG_Q_SIZE - 1)
+
+#ifdef NDEBUG
+#define NET_ACCEL_CHECK_MAGIC(_p, _errval)
+#define NET_ACCEL_SHOW_QUEUE(_t, _q, _id)
+#else
+#define NET_ACCEL_CHECK_MAGIC(_p, _errval)                             \
+       if (_p->magic != NET_ACCEL_MSG_MAGIC) {                         \
+               printk(KERN_ERR "%s: passed invalid shared page %p!\n", \
+                      __FUNCTION__, _p);                               \
+               return _errval;                                         \
+       }
+#define NET_ACCEL_SHOW_QUEUE(_t, _q, _id)                              \
+       printk(_t ": queue %d write %x read %x base %x limit %x\n",     \
+              _id, _q->write, _q->read, _q->base, _q->limit);
+#endif
+
+/*
+ * We've been passed at least 2 pages. 1 control page and 1 or more
+ * data pages.
+ */
+int net_accel_msg_init_page(void *mem, int len, int up)
+{
+       struct net_accel_shared_page *shared_page = 
+               (struct net_accel_shared_page*)mem;
+
+       if ((unsigned long)shared_page & NET_ACCEL_MSG_Q_MASK)
+               return -EINVAL;
+
+       shared_page->magic = NET_ACCEL_MSG_MAGIC;
+
+       shared_page->aflags = 0;
+
+       shared_page->net_dev_up = up;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(net_accel_msg_init_page);
+
+
+void net_accel_msg_init_queue(sh_msg_fifo2 *queue,
+                             struct net_accel_msg_queue *indices,
+                             struct net_accel_msg *base, int size)
+{
+       queue->fifo = base;
+       spin_lock_init(&queue->lock);
+       sh_fifo2_init(queue, size-1, &indices->read, &indices->write);
+}
+EXPORT_SYMBOL_GPL(net_accel_msg_init_queue);
+
+
+static inline int _net_accel_msg_send(struct net_accel_shared_page *sp,
+                                     sh_msg_fifo2 *queue,
+                                     struct net_accel_msg *msg,
+                                     int is_reply)
+{
+       int rc = 0;
+       NET_ACCEL_CHECK_MAGIC(sp, -EINVAL);
+       rmb();
+       if (is_reply) {
+               EPRINTK_ON(sh_fifo2_is_full(queue));
+               sh_fifo2_put(queue, *msg);
+       } else {
+               if (sh_fifo2_not_half_full(queue)) {
+                       sh_fifo2_put(queue, *msg);
+               } else {
+                       rc = -ENOSPC;
+               }
+       }
+       wmb();
+       return rc;
+}
+
+/* Notify after a batch of messages have been sent */
+void net_accel_msg_notify(int irq)
+{
+       notify_remote_via_irq(irq);
+}
+EXPORT_SYMBOL_GPL(net_accel_msg_notify);
+
+/* 
+ * Send a message on the specified FIFO. Returns 0 on success, -errno
+ * on failure. The message in msg is copied to the current slot of the
+ * FIFO.
+ */
+int net_accel_msg_send(struct net_accel_shared_page *sp, sh_msg_fifo2 *q, 
+                      struct net_accel_msg *msg)
+{
+       unsigned long flags;
+       int rc;
+       net_accel_msg_lock_queue(q, &flags);
+       rc = _net_accel_msg_send(sp, q, msg, 0);
+       net_accel_msg_unlock_queue(q, &flags);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(net_accel_msg_send);
+
+
+/* As net_accel_msg_send but also posts a notification to the far end. */
+int net_accel_msg_send_notify(struct net_accel_shared_page *sp, int irq, 
+                             sh_msg_fifo2 *q, struct net_accel_msg *msg)
+{
+       unsigned long flags;
+       int rc;
+       net_accel_msg_lock_queue(q, &flags);
+       rc = _net_accel_msg_send(sp, q, msg, 0);
+       net_accel_msg_unlock_queue(q, &flags);
+       if (rc >= 0)
+               notify_remote_via_irq(irq);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(net_accel_msg_send_notify);
+
+
+int net_accel_msg_reply(struct net_accel_shared_page *sp, sh_msg_fifo2 *q, 
+                      struct net_accel_msg *msg)
+{
+       unsigned long flags;
+       int rc;
+       net_accel_msg_lock_queue(q, &flags);
+       rc = _net_accel_msg_send(sp, q, msg, 1);
+       net_accel_msg_unlock_queue(q, &flags);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(net_accel_msg_reply);
+
+
+/* As net_accel_msg_send but also posts a notification to the far end. */
+int net_accel_msg_reply_notify(struct net_accel_shared_page *sp, int irq, 
+                             sh_msg_fifo2 *q, struct net_accel_msg *msg)
+{
+       unsigned long flags;
+       int rc;
+       net_accel_msg_lock_queue(q, &flags);
+       rc = _net_accel_msg_send(sp, q, msg, 1);
+       net_accel_msg_unlock_queue(q, &flags);
+       if (rc >= 0)
+               notify_remote_via_irq(irq);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(net_accel_msg_reply_notify);
+
+
+/*
+ * Look at a received message, if any, so a decision can be made about
+ * whether to read it now or not.  Cookie is a bit of debug which is
+ * set here and checked when passed to net_accel_msg_recv_next()
+ */
+int net_accel_msg_peek(struct net_accel_shared_page *sp, 
+                      sh_msg_fifo2 *queue, 
+                      struct net_accel_msg *msg, int *cookie)
+{
+       unsigned long flags;
+       int rc = 0;
+       NET_ACCEL_CHECK_MAGIC(sp, -EINVAL);
+       net_accel_msg_lock_queue(queue, &flags);
+       rmb();
+       if (sh_fifo2_is_empty(queue)) {
+               rc = -ENOENT;
+       } else {
+               *msg = sh_fifo2_peek(queue);
+               *cookie = *(queue->fifo_rd_i);
+       }
+       net_accel_msg_unlock_queue(queue, &flags);
+       return rc;
+}

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.