[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] [NET] back: Add SG support
# HG changeset patch # User kaf24@xxxxxxxxxxxxxxxxxxxx # Node ID 50db8c95e65d6bf08ba0107399654439ee05b8fc # Parent 1dab198509a913b84a57eb764cabe77f96bba86b [NET] back: Add SG support This patch adds scatter-and-gather support to the backend. It also advertises this fact through xenbus so that the frontend can detect this and send through SG requests only if it is supported. SG support is required to support skb's larger than one page. This in turn is needed for either jumbo MTU or TSO. One of these is required to bring local networking performance up to a level that is acceptable. Signed-off-by: Herbert Xu <herbert@xxxxxxxxxxxxxxxxxxx> --- linux-2.6-xen-sparse/drivers/xen/netback/netback.c | 281 ++++++++++++++++----- linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c | 26 + xen/include/public/io/netif.h | 4 xen/include/public/io/ring.h | 10 4 files changed, 264 insertions(+), 57 deletions(-) diff -r 1dab198509a9 -r 50db8c95e65d linux-2.6-xen-sparse/drivers/xen/netback/netback.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Mon Jun 05 15:18:13 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Mon Jun 05 16:13:47 2006 +0100 @@ -490,6 +490,178 @@ inline static void net_tx_action_dealloc } } +static void netbk_tx_err(netif_t *netif, RING_IDX end) +{ + RING_IDX cons = netif->tx.req_cons; + + do { + netif_tx_request_t *txp = RING_GET_REQUEST(&netif->tx, cons); + make_tx_response(netif, txp->id, NETIF_RSP_ERROR); + } while (++cons < end); + netif->tx.req_cons = cons; + netif_schedule_work(netif); + netif_put(netif); +} + +static int netbk_count_requests(netif_t *netif, netif_tx_request_t *txp, + int work_to_do) +{ + netif_tx_request_t *first = txp; + RING_IDX cons = netif->tx.req_cons; + int frags = 1; + + while (txp->flags & NETTXF_more_data) { + if (frags >= work_to_do) { + DPRINTK("Need more frags\n"); + return -frags; + } + + txp = RING_GET_REQUEST(&netif->tx, cons + frags); + if (txp->size > first->size) { + DPRINTK("Frags galore\n"); + return -frags; + } + + first->size -= txp->size; + frags++; + + if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) { + DPRINTK("txp->offset: %x, size: %u\n", + txp->offset, txp->size); + return -frags; + } + } + + return frags; +} + +static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif, + struct sk_buff *skb, + gnttab_map_grant_ref_t *mop) +{ + struct skb_shared_info *shinfo = skb_shinfo(skb); + skb_frag_t *frags = shinfo->frags; + netif_tx_request_t *txp; + unsigned long pending_idx = *((u16 *)skb->data); + int nr_frags = shinfo->nr_frags; + RING_IDX cons = netif->tx.req_cons + 1; + int i; + + if ((unsigned long)shinfo->frags[0].page == pending_idx) { + frags++; + nr_frags--; + } + + for (i = 0; i < nr_frags; i++) { + txp = RING_GET_REQUEST(&netif->tx, cons + i); + pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)]; + + gnttab_set_map_op(mop++, MMAP_VADDR(pending_idx), + GNTMAP_host_map | GNTMAP_readonly, + txp->gref, netif->domid); + + memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp)); + netif_get(netif); + pending_tx_info[pending_idx].netif = netif; + frags[i].page = (void *)pending_idx; + } + + return mop; +} + +static int netbk_tx_check_mop(struct sk_buff *skb, + gnttab_map_grant_ref_t **mopp) +{ + gnttab_map_grant_ref_t *mop = *mopp; + int pending_idx = *((u16 *)skb->data); + netif_t *netif = pending_tx_info[pending_idx].netif; + netif_tx_request_t *txp; + struct skb_shared_info *shinfo = skb_shinfo(skb); + int nr_frags = shinfo->nr_frags; + int start; + int err; + int i; + + err = mop->status; + if (unlikely(err)) { + txp = &pending_tx_info[pending_idx].req; + make_tx_response(netif, txp->id, NETIF_RSP_ERROR); + pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; + netif_put(netif); + } else { + set_phys_to_machine( + __pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT, + FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT)); + grant_tx_handle[pending_idx] = mop->handle; + } + + start = 0; + if ((unsigned long)shinfo->frags[0].page == pending_idx) + start++; + + for (i = start; i < nr_frags; i++) { + int newerr; + int j; + + pending_idx = (unsigned long)shinfo->frags[i].page; + + newerr = (++mop)->status; + if (likely(!newerr)) { + set_phys_to_machine( + __pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT, + FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT)); + grant_tx_handle[pending_idx] = mop->handle; + + if (unlikely(err)) + netif_idx_release(pending_idx); + continue; + } + + txp = &pending_tx_info[pending_idx].req; + make_tx_response(netif, txp->id, NETIF_RSP_ERROR); + pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; + netif_put(netif); + + if (err) + continue; + + pending_idx = *((u16 *)skb->data); + netif_idx_release(pending_idx); + + for (j = start; j < i; j++) { + pending_idx = (unsigned long)shinfo->frags[i].page; + netif_idx_release(pending_idx); + } + err |= newerr; + } + + *mopp = mop + 1; + return err; +} + +static void netbk_fill_frags(struct sk_buff *skb) +{ + struct skb_shared_info *shinfo = skb_shinfo(skb); + int nr_frags = shinfo->nr_frags; + int i; + + for (i = 0; i < nr_frags; i++) { + skb_frag_t *frag = shinfo->frags + i; + netif_tx_request_t *txp; + unsigned long pending_idx; + + pending_idx = (unsigned long)frag->page; + txp = &pending_tx_info[pending_idx].req; + frag->page = virt_to_page(MMAP_VADDR(pending_idx)); + frag->size = txp->size; + frag->page_offset = txp->offset; + + skb->len += txp->size; + skb->data_len += txp->size; + skb->truesize += txp->size; + } +} + /* Called after netfront has transmitted */ static void net_tx_action(unsigned long unused) { @@ -507,7 +679,7 @@ static void net_tx_action(unsigned long net_tx_action_dealloc(); mop = tx_map_ops; - while ((NR_PENDING_REQS < MAX_PENDING_REQS) && + while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && !list_empty(&net_schedule_list)) { /* Get a netif from the list with work to do. */ ent = net_schedule_list.next; @@ -555,38 +727,44 @@ static void net_tx_action(unsigned long } netif->remaining_credit -= txreq.size; - netif->tx.req_cons++; - - netif_schedule_work(netif); - - if (unlikely(txreq.size < ETH_HLEN) || - unlikely(txreq.size > ETH_FRAME_LEN)) { + ret = netbk_count_requests(netif, &txreq, work_to_do); + if (unlikely(ret < 0)) { + netbk_tx_err(netif, i - ret); + continue; + } + i += ret; + + if (unlikely(ret > MAX_SKB_FRAGS + 1)) { + DPRINTK("Too many frags\n"); + netbk_tx_err(netif, i); + continue; + } + + if (unlikely(txreq.size < ETH_HLEN)) { DPRINTK("Bad packet size: %d\n", txreq.size); - make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); - netif_put(netif); + netbk_tx_err(netif, i); continue; } /* No crossing a page as the payload mustn't fragment. */ - if (unlikely((txreq.offset + txreq.size) >= PAGE_SIZE)) { + if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) { DPRINTK("txreq.offset: %x, size: %u, end: %lu\n", txreq.offset, txreq.size, (txreq.offset &~PAGE_MASK) + txreq.size); - make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); - netif_put(netif); + netbk_tx_err(netif, i); continue; } pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; - data_len = (txreq.size > PKT_PROT_LEN) ? + data_len = (txreq.size > PKT_PROT_LEN && + ret < MAX_SKB_FRAGS + 1) ? PKT_PROT_LEN : txreq.size; skb = alloc_skb(data_len+16, GFP_ATOMIC); if (unlikely(skb == NULL)) { DPRINTK("Can't allocate a skb in start_xmit.\n"); - make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); - netif_put(netif); + netbk_tx_err(netif, i); break; } @@ -603,9 +781,23 @@ static void net_tx_action(unsigned long pending_tx_info[pending_idx].netif = netif; *((u16 *)skb->data) = pending_idx; + __skb_put(skb, data_len); + + skb_shinfo(skb)->nr_frags = ret - 1; + if (data_len < txreq.size) { + skb_shinfo(skb)->nr_frags++; + skb_shinfo(skb)->frags[0].page = + (void *)(unsigned long)pending_idx; + } + __skb_queue_tail(&tx_queue, skb); pending_cons++; + + mop = netbk_get_requests(netif, skb, mop); + + netif->tx.req_cons = i; + netif_schedule_work(netif); if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops)) break; @@ -620,75 +812,56 @@ static void net_tx_action(unsigned long mop = tx_map_ops; while ((skb = __skb_dequeue(&tx_queue)) != NULL) { + netif_tx_request_t *txp; + pending_idx = *((u16 *)skb->data); netif = pending_tx_info[pending_idx].netif; - memcpy(&txreq, &pending_tx_info[pending_idx].req, - sizeof(txreq)); + txp = &pending_tx_info[pending_idx].req; /* Check the remap error code. */ - if (unlikely(mop->status)) { + if (unlikely(netbk_tx_check_mop(skb, &mop))) { printk(KERN_ALERT "#### netback grant fails\n"); - make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); - netif_put(netif); + skb_shinfo(skb)->nr_frags = 0; kfree_skb(skb); - mop++; - pending_ring[MASK_PEND_IDX(pending_prod++)] = - pending_idx; continue; } - set_phys_to_machine( - __pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT, - FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT)); - grant_tx_handle[pending_idx] = mop->handle; - - data_len = (txreq.size > PKT_PROT_LEN) ? - PKT_PROT_LEN : txreq.size; - - __skb_put(skb, data_len); + + data_len = skb->len; memcpy(skb->data, - (void *)(MMAP_VADDR(pending_idx)|txreq.offset), + (void *)(MMAP_VADDR(pending_idx)|txp->offset), data_len); - if (data_len < txreq.size) { + if (data_len < txp->size) { /* Append the packet payload as a fragment. */ - skb_shinfo(skb)->frags[0].page = - virt_to_page(MMAP_VADDR(pending_idx)); - skb_shinfo(skb)->frags[0].size = - txreq.size - data_len; - skb_shinfo(skb)->frags[0].page_offset = - txreq.offset + data_len; - skb_shinfo(skb)->nr_frags = 1; + txp->offset += data_len; + txp->size -= data_len; } else { /* Schedule a response immediately. */ netif_idx_release(pending_idx); } - - skb->data_len = txreq.size - data_len; - skb->len += skb->data_len; - skb->truesize += skb->data_len; - - skb->dev = netif->dev; - skb->protocol = eth_type_trans(skb, skb->dev); /* * Old frontends do not assert data_validated but we * can infer it from csum_blank so test both flags. */ - if (txreq.flags & (NETTXF_data_validated|NETTXF_csum_blank)) { + if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) { skb->ip_summed = CHECKSUM_UNNECESSARY; skb->proto_data_valid = 1; } else { skb->ip_summed = CHECKSUM_NONE; skb->proto_data_valid = 0; } - skb->proto_csum_blank = !!(txreq.flags & NETTXF_csum_blank); - - netif->stats.rx_bytes += txreq.size; + skb->proto_csum_blank = !!(txp->flags & NETTXF_csum_blank); + + netbk_fill_frags(skb); + + skb->dev = netif->dev; + skb->protocol = eth_type_trans(skb, skb->dev); + + netif->stats.rx_bytes += skb->len; netif->stats.rx_packets++; netif_rx(skb); netif->dev->last_rx = jiffies; - - mop++; } } diff -r 1dab198509a9 -r 50db8c95e65d linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Mon Jun 05 15:18:13 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Mon Jun 05 16:13:47 2006 +0100 @@ -69,6 +69,8 @@ static int netback_probe(struct xenbus_d static int netback_probe(struct xenbus_device *dev, const struct xenbus_device_id *id) { + const char *message; + xenbus_transaction_t xbt; int err; struct backend_info *be = kzalloc(sizeof(struct backend_info), GFP_KERNEL); @@ -86,6 +88,27 @@ static int netback_probe(struct xenbus_d if (err) goto fail; + do { + err = xenbus_transaction_start(&xbt); + if (err) { + xenbus_dev_fatal(dev, err, "starting transaction"); + goto fail; + } + + err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1); + if (err) { + message = "writing feature-sg"; + goto abort_transaction; + } + + err = xenbus_transaction_end(xbt, 0); + } while (err == -EAGAIN); + + if (err) { + xenbus_dev_fatal(dev, err, "completing transaction"); + goto fail; + } + err = xenbus_switch_state(dev, XenbusStateInitWait); if (err) { goto fail; @@ -93,6 +116,9 @@ static int netback_probe(struct xenbus_d return 0; +abort_transaction: + xenbus_transaction_end(xbt, 1); + xenbus_dev_fatal(dev, err, "%s", message); fail: DPRINTK("failed"); netback_remove(dev); diff -r 1dab198509a9 -r 50db8c95e65d xen/include/public/io/netif.h --- a/xen/include/public/io/netif.h Mon Jun 05 15:18:13 2006 +0100 +++ b/xen/include/public/io/netif.h Mon Jun 05 16:13:47 2006 +0100 @@ -26,6 +26,10 @@ /* Packet data has been validated against protocol checksum. */ #define _NETTXF_data_validated (1) #define NETTXF_data_validated (1U<<_NETTXF_data_validated) + +/* Packet continues in the request. */ +#define _NETTXF_more_data (2) +#define NETTXF_more_data (1U<<_NETTXF_more_data) struct netif_tx_request { grant_ref_t gref; /* Reference to buffer page */ diff -r 1dab198509a9 -r 50db8c95e65d xen/include/public/io/ring.h --- a/xen/include/public/io/ring.h Mon Jun 05 15:18:13 2006 +0100 +++ b/xen/include/public/io/ring.h Mon Jun 05 16:13:47 2006 +0100 @@ -159,11 +159,15 @@ typedef struct __name##_back_ring __name /* Test if there are outstanding messages to be processed on a ring. */ #define RING_HAS_UNCONSUMED_RESPONSES(_r) \ - ((_r)->rsp_cons != (_r)->sring->rsp_prod) + ((_r)->sring->rsp_prod - (_r)->rsp_cons) #define RING_HAS_UNCONSUMED_REQUESTS(_r) \ - (((_r)->req_cons != (_r)->sring->req_prod) && \ - (((_r)->req_cons - (_r)->rsp_prod_pvt) != RING_SIZE(_r))) + ({ \ + unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \ + unsigned int rsp = RING_SIZE(_r) - \ + ((_r)->req_cons - (_r)->rsp_prod_pvt); \ + req < rsp ? req : rsp; \ + }) /* Direct access to individual ring elements, by index. */ #define RING_GET_REQUEST(_r, _idx) \ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |