[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] [NET] back: Add SG support



# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID 50db8c95e65d6bf08ba0107399654439ee05b8fc
# Parent  1dab198509a913b84a57eb764cabe77f96bba86b
[NET] back: Add SG support

This patch adds scatter-and-gather support to the backend.  It also
advertises this fact through xenbus so that the frontend can detect
this and send through SG requests only if it is supported.

SG support is required to support skb's larger than one page.  This
in turn is needed for either jumbo MTU or TSO.  One of these is
required to bring local networking performance up to a level that
is acceptable.

Signed-off-by: Herbert Xu <herbert@xxxxxxxxxxxxxxxxxxx>
---
 linux-2.6-xen-sparse/drivers/xen/netback/netback.c |  281 ++++++++++++++++-----
 linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c  |   26 +
 xen/include/public/io/netif.h                      |    4 
 xen/include/public/io/ring.h                       |   10 
 4 files changed, 264 insertions(+), 57 deletions(-)

diff -r 1dab198509a9 -r 50db8c95e65d 
linux-2.6-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Mon Jun 05 
15:18:13 2006 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Mon Jun 05 
16:13:47 2006 +0100
@@ -490,6 +490,178 @@ inline static void net_tx_action_dealloc
        }
 }
 
+static void netbk_tx_err(netif_t *netif, RING_IDX end)
+{
+       RING_IDX cons = netif->tx.req_cons;
+
+       do {
+               netif_tx_request_t *txp = RING_GET_REQUEST(&netif->tx, cons);
+               make_tx_response(netif, txp->id, NETIF_RSP_ERROR);
+       } while (++cons < end);
+       netif->tx.req_cons = cons;
+       netif_schedule_work(netif);
+       netif_put(netif);
+}
+
+static int netbk_count_requests(netif_t *netif, netif_tx_request_t *txp,
+                               int work_to_do)
+{
+       netif_tx_request_t *first = txp;
+       RING_IDX cons = netif->tx.req_cons;
+       int frags = 1;
+
+       while (txp->flags & NETTXF_more_data) {
+               if (frags >= work_to_do) {
+                       DPRINTK("Need more frags\n");
+                       return -frags;
+               }
+
+               txp = RING_GET_REQUEST(&netif->tx, cons + frags);
+               if (txp->size > first->size) {
+                       DPRINTK("Frags galore\n");
+                       return -frags;
+               }
+
+               first->size -= txp->size;
+               frags++;
+
+               if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
+                       DPRINTK("txp->offset: %x, size: %u\n",
+                               txp->offset, txp->size);
+                       return -frags;
+               }
+       }
+
+       return frags;
+}
+
+static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
+                                                 struct sk_buff *skb,
+                                                 gnttab_map_grant_ref_t *mop)
+{
+       struct skb_shared_info *shinfo = skb_shinfo(skb);
+       skb_frag_t *frags = shinfo->frags;
+       netif_tx_request_t *txp;
+       unsigned long pending_idx = *((u16 *)skb->data);
+       int nr_frags = shinfo->nr_frags;
+       RING_IDX cons = netif->tx.req_cons + 1;
+       int i;
+
+       if ((unsigned long)shinfo->frags[0].page == pending_idx) {
+               frags++;
+               nr_frags--;
+       }
+
+       for (i = 0; i < nr_frags; i++) {
+               txp = RING_GET_REQUEST(&netif->tx, cons + i);
+               pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)];
+
+               gnttab_set_map_op(mop++, MMAP_VADDR(pending_idx),
+                                 GNTMAP_host_map | GNTMAP_readonly,
+                                 txp->gref, netif->domid);
+
+               memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
+               netif_get(netif);
+               pending_tx_info[pending_idx].netif = netif;
+               frags[i].page = (void *)pending_idx;
+       }
+
+       return mop;
+}
+
+static int netbk_tx_check_mop(struct sk_buff *skb,
+                              gnttab_map_grant_ref_t **mopp)
+{
+       gnttab_map_grant_ref_t *mop = *mopp;
+       int pending_idx = *((u16 *)skb->data);
+       netif_t *netif = pending_tx_info[pending_idx].netif;
+       netif_tx_request_t *txp;
+       struct skb_shared_info *shinfo = skb_shinfo(skb);
+       int nr_frags = shinfo->nr_frags;
+       int start;
+       int err;
+       int i;
+
+       err = mop->status;
+       if (unlikely(err)) {
+               txp = &pending_tx_info[pending_idx].req;
+               make_tx_response(netif, txp->id, NETIF_RSP_ERROR);
+               pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+               netif_put(netif);
+       } else {
+               set_phys_to_machine(
+                       __pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT,
+                       FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
+               grant_tx_handle[pending_idx] = mop->handle;
+       }
+
+       start = 0;
+       if ((unsigned long)shinfo->frags[0].page == pending_idx)
+               start++;
+
+       for (i = start; i < nr_frags; i++) {
+               int newerr;
+               int j;
+
+               pending_idx = (unsigned long)shinfo->frags[i].page;
+
+               newerr = (++mop)->status;
+               if (likely(!newerr)) {
+                       set_phys_to_machine(
+                               __pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT,
+                               FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
+                       grant_tx_handle[pending_idx] = mop->handle;
+
+                       if (unlikely(err))
+                               netif_idx_release(pending_idx);
+                       continue;
+               }
+
+               txp = &pending_tx_info[pending_idx].req;
+               make_tx_response(netif, txp->id, NETIF_RSP_ERROR);
+               pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+               netif_put(netif);
+
+               if (err)
+                       continue;
+
+               pending_idx = *((u16 *)skb->data);
+               netif_idx_release(pending_idx);
+
+               for (j = start; j < i; j++) {
+                       pending_idx = (unsigned long)shinfo->frags[i].page;
+                       netif_idx_release(pending_idx);
+               }
+               err |= newerr;
+       }
+
+       *mopp = mop + 1;
+       return err;
+}
+
+static void netbk_fill_frags(struct sk_buff *skb)
+{
+       struct skb_shared_info *shinfo = skb_shinfo(skb);
+       int nr_frags = shinfo->nr_frags;
+       int i;
+
+       for (i = 0; i < nr_frags; i++) {
+               skb_frag_t *frag = shinfo->frags + i;
+               netif_tx_request_t *txp;
+               unsigned long pending_idx;
+
+               pending_idx = (unsigned long)frag->page;
+               txp = &pending_tx_info[pending_idx].req;
+               frag->page = virt_to_page(MMAP_VADDR(pending_idx));
+               frag->size = txp->size;
+               frag->page_offset = txp->offset;
+
+               skb->len += txp->size;
+               skb->data_len += txp->size;
+               skb->truesize += txp->size;
+       }
+}
+
 /* Called after netfront has transmitted */
 static void net_tx_action(unsigned long unused)
 {
@@ -507,7 +679,7 @@ static void net_tx_action(unsigned long 
                net_tx_action_dealloc();
 
        mop = tx_map_ops;
-       while ((NR_PENDING_REQS < MAX_PENDING_REQS) &&
+       while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
                !list_empty(&net_schedule_list)) {
                /* Get a netif from the list with work to do. */
                ent = net_schedule_list.next;
@@ -555,38 +727,44 @@ static void net_tx_action(unsigned long 
                }
                netif->remaining_credit -= txreq.size;
 
-               netif->tx.req_cons++;
-
-               netif_schedule_work(netif);
-
-               if (unlikely(txreq.size < ETH_HLEN) || 
-                   unlikely(txreq.size > ETH_FRAME_LEN)) {
+               ret = netbk_count_requests(netif, &txreq, work_to_do);
+               if (unlikely(ret < 0)) {
+                       netbk_tx_err(netif, i - ret);
+                       continue;
+               }
+               i += ret;
+
+               if (unlikely(ret > MAX_SKB_FRAGS + 1)) {
+                       DPRINTK("Too many frags\n");
+                       netbk_tx_err(netif, i);
+                       continue;
+               }
+
+               if (unlikely(txreq.size < ETH_HLEN)) {
                        DPRINTK("Bad packet size: %d\n", txreq.size);
-                       make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
-                       netif_put(netif);
+                       netbk_tx_err(netif, i);
                        continue; 
                }
 
                /* No crossing a page as the payload mustn't fragment. */
-               if (unlikely((txreq.offset + txreq.size) >= PAGE_SIZE)) {
+               if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
                        DPRINTK("txreq.offset: %x, size: %u, end: %lu\n", 
                                txreq.offset, txreq.size, 
                                (txreq.offset &~PAGE_MASK) + txreq.size);
-                       make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
-                       netif_put(netif);
+                       netbk_tx_err(netif, i);
                        continue;
                }
 
                pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
 
-               data_len = (txreq.size > PKT_PROT_LEN) ?
+               data_len = (txreq.size > PKT_PROT_LEN &&
+                           ret < MAX_SKB_FRAGS + 1) ?
                        PKT_PROT_LEN : txreq.size;
 
                skb = alloc_skb(data_len+16, GFP_ATOMIC);
                if (unlikely(skb == NULL)) {
                        DPRINTK("Can't allocate a skb in start_xmit.\n");
-                       make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
-                       netif_put(netif);
+                       netbk_tx_err(netif, i);
                        break;
                }
 
@@ -603,9 +781,23 @@ static void net_tx_action(unsigned long 
                pending_tx_info[pending_idx].netif = netif;
                *((u16 *)skb->data) = pending_idx;
 
+               __skb_put(skb, data_len);
+
+               skb_shinfo(skb)->nr_frags = ret - 1;
+               if (data_len < txreq.size) {
+                       skb_shinfo(skb)->nr_frags++;
+                       skb_shinfo(skb)->frags[0].page =
+                               (void *)(unsigned long)pending_idx;
+               }
+
                __skb_queue_tail(&tx_queue, skb);
 
                pending_cons++;
+
+               mop = netbk_get_requests(netif, skb, mop);
+
+               netif->tx.req_cons = i;
+               netif_schedule_work(netif);
 
                if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
                        break;
@@ -620,75 +812,56 @@ static void net_tx_action(unsigned long 
 
        mop = tx_map_ops;
        while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
+               netif_tx_request_t *txp;
+
                pending_idx = *((u16 *)skb->data);
                netif       = pending_tx_info[pending_idx].netif;
-               memcpy(&txreq, &pending_tx_info[pending_idx].req,
-                      sizeof(txreq));
+               txp         = &pending_tx_info[pending_idx].req;
 
                /* Check the remap error code. */
-               if (unlikely(mop->status)) {
+               if (unlikely(netbk_tx_check_mop(skb, &mop))) {
                        printk(KERN_ALERT "#### netback grant fails\n");
-                       make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
-                       netif_put(netif);
+                       skb_shinfo(skb)->nr_frags = 0;
                        kfree_skb(skb);
-                       mop++;
-                       pending_ring[MASK_PEND_IDX(pending_prod++)] =
-                               pending_idx;
                        continue;
                }
-               set_phys_to_machine(
-                       __pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT,
-                       FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
-               grant_tx_handle[pending_idx] = mop->handle;
-
-               data_len = (txreq.size > PKT_PROT_LEN) ?
-                       PKT_PROT_LEN : txreq.size;
-
-               __skb_put(skb, data_len);
+
+               data_len = skb->len;
                memcpy(skb->data, 
-                      (void *)(MMAP_VADDR(pending_idx)|txreq.offset),
+                      (void *)(MMAP_VADDR(pending_idx)|txp->offset),
                       data_len);
-               if (data_len < txreq.size) {
+               if (data_len < txp->size) {
                        /* Append the packet payload as a fragment. */
-                       skb_shinfo(skb)->frags[0].page        = 
-                               virt_to_page(MMAP_VADDR(pending_idx));
-                       skb_shinfo(skb)->frags[0].size        =
-                               txreq.size - data_len;
-                       skb_shinfo(skb)->frags[0].page_offset = 
-                               txreq.offset + data_len;
-                       skb_shinfo(skb)->nr_frags = 1;
+                       txp->offset += data_len;
+                       txp->size -= data_len;
                } else {
                        /* Schedule a response immediately. */
                        netif_idx_release(pending_idx);
                }
-
-               skb->data_len  = txreq.size - data_len;
-               skb->len      += skb->data_len;
-               skb->truesize += skb->data_len;
-
-               skb->dev      = netif->dev;
-               skb->protocol = eth_type_trans(skb, skb->dev);
 
                /*
                 * Old frontends do not assert data_validated but we
                 * can infer it from csum_blank so test both flags.
                 */
-               if (txreq.flags & (NETTXF_data_validated|NETTXF_csum_blank)) {
+               if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) {
                        skb->ip_summed = CHECKSUM_UNNECESSARY;
                        skb->proto_data_valid = 1;
                } else {
                        skb->ip_summed = CHECKSUM_NONE;
                        skb->proto_data_valid = 0;
                }
-               skb->proto_csum_blank = !!(txreq.flags & NETTXF_csum_blank);
-
-               netif->stats.rx_bytes += txreq.size;
+               skb->proto_csum_blank = !!(txp->flags & NETTXF_csum_blank);
+
+               netbk_fill_frags(skb);
+
+               skb->dev      = netif->dev;
+               skb->protocol = eth_type_trans(skb, skb->dev);
+
+               netif->stats.rx_bytes += skb->len;
                netif->stats.rx_packets++;
 
                netif_rx(skb);
                netif->dev->last_rx = jiffies;
-
-               mop++;
        }
 }
 
diff -r 1dab198509a9 -r 50db8c95e65d 
linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Mon Jun 05 15:18:13 
2006 +0100
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Mon Jun 05 16:13:47 
2006 +0100
@@ -69,6 +69,8 @@ static int netback_probe(struct xenbus_d
 static int netback_probe(struct xenbus_device *dev,
                         const struct xenbus_device_id *id)
 {
+       const char *message;
+       xenbus_transaction_t xbt;
        int err;
        struct backend_info *be = kzalloc(sizeof(struct backend_info),
                                          GFP_KERNEL);
@@ -86,6 +88,27 @@ static int netback_probe(struct xenbus_d
        if (err)
                goto fail;
 
+       do {
+               err = xenbus_transaction_start(&xbt);
+               if (err) {
+                       xenbus_dev_fatal(dev, err, "starting transaction");
+                       goto fail;
+               }
+
+               err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
+               if (err) {
+                       message = "writing feature-sg";
+                       goto abort_transaction;
+               }
+
+               err = xenbus_transaction_end(xbt, 0);
+       } while (err == -EAGAIN);
+
+       if (err) {
+               xenbus_dev_fatal(dev, err, "completing transaction");
+               goto fail;
+       }
+
        err = xenbus_switch_state(dev, XenbusStateInitWait);
        if (err) {
                goto fail;
@@ -93,6 +116,9 @@ static int netback_probe(struct xenbus_d
 
        return 0;
 
+abort_transaction:
+       xenbus_transaction_end(xbt, 1);
+       xenbus_dev_fatal(dev, err, "%s", message);
 fail:
        DPRINTK("failed");
        netback_remove(dev);
diff -r 1dab198509a9 -r 50db8c95e65d xen/include/public/io/netif.h
--- a/xen/include/public/io/netif.h     Mon Jun 05 15:18:13 2006 +0100
+++ b/xen/include/public/io/netif.h     Mon Jun 05 16:13:47 2006 +0100
@@ -26,6 +26,10 @@
 /* Packet data has been validated against protocol checksum. */
 #define _NETTXF_data_validated (1)
 #define  NETTXF_data_validated (1U<<_NETTXF_data_validated)
+
+/* Packet continues in the request. */
+#define _NETTXF_more_data      (2)
+#define  NETTXF_more_data      (1U<<_NETTXF_more_data)
 
 struct netif_tx_request {
     grant_ref_t gref;      /* Reference to buffer page */
diff -r 1dab198509a9 -r 50db8c95e65d xen/include/public/io/ring.h
--- a/xen/include/public/io/ring.h      Mon Jun 05 15:18:13 2006 +0100
+++ b/xen/include/public/io/ring.h      Mon Jun 05 16:13:47 2006 +0100
@@ -159,11 +159,15 @@ typedef struct __name##_back_ring __name
 
 /* Test if there are outstanding messages to be processed on a ring. */
 #define RING_HAS_UNCONSUMED_RESPONSES(_r)                               \
-    ((_r)->rsp_cons != (_r)->sring->rsp_prod)
+    ((_r)->sring->rsp_prod - (_r)->rsp_cons)
 
 #define RING_HAS_UNCONSUMED_REQUESTS(_r)                                \
-    (((_r)->req_cons != (_r)->sring->req_prod) &&                       \
-     (((_r)->req_cons - (_r)->rsp_prod_pvt) != RING_SIZE(_r)))
+    ({                                                                 \
+       unsigned int req = (_r)->sring->req_prod - (_r)->req_cons;      \
+       unsigned int rsp = RING_SIZE(_r) -                              \
+                          ((_r)->req_cons - (_r)->rsp_prod_pvt);       \
+       req < rsp ? req : rsp;                                          \
+    })
 
 /* Direct access to individual ring elements, by index. */
 #define RING_GET_REQUEST(_r, _idx)                                      \

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.