[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] [NET] back: Transmit SG packets if supported
# HG changeset patch # User kfraser@xxxxxxxxxxxxxxxxxxxxx # Node ID 9f29252c23b6166b6327d65a32f16e85504636d1 # Parent 5f5a2f282032766717a5cbf4be96361b43a52bcd [NET] back: Transmit SG packets if supported This patch adds scatter-and-gather transmission support to the backend. This allows the MTU to be raised right now and the potential for TSO in future. Signed-off-by: Herbert Xu <herbert@xxxxxxxxxxxxxxxxxxx> --- linux-2.6-xen-sparse/drivers/xen/netback/common.h | 6 linux-2.6-xen-sparse/drivers/xen/netback/interface.c | 25 + linux-2.6-xen-sparse/drivers/xen/netback/netback.c | 447 ++++++++++++------- linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c | 7 xen/include/public/io/netif.h | 4 5 files changed, 341 insertions(+), 148 deletions(-) diff -r 5f5a2f282032 -r 9f29252c23b6 linux-2.6-xen-sparse/drivers/xen/netback/common.h --- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h Mon Jul 31 17:35:43 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h Mon Jul 31 17:42:13 2006 +0100 @@ -129,4 +129,10 @@ static inline int netbk_can_queue(struct return netif->can_queue; } +static inline int netbk_can_sg(struct net_device *dev) +{ + netif_t *netif = netdev_priv(dev); + return netif->features & NETIF_F_SG; +} + #endif /* __NETIF__BACKEND__COMMON_H__ */ diff -r 5f5a2f282032 -r 9f29252c23b6 linux-2.6-xen-sparse/drivers/xen/netback/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Mon Jul 31 17:35:43 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Mon Jul 31 17:42:13 2006 +0100 @@ -62,10 +62,34 @@ static int net_close(struct net_device * return 0; } +static int netbk_change_mtu(struct net_device *dev, int mtu) +{ + int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN; + + if (mtu > max) + return -EINVAL; + dev->mtu = mtu; + return 0; +} + +static int netbk_set_sg(struct net_device *dev, u32 data) +{ + if (data) { + netif_t *netif = netdev_priv(dev); + + if (!(netif->features & NETIF_F_SG)) + return -ENOSYS; + } + + return ethtool_op_set_sg(dev, data); +} + static struct ethtool_ops network_ethtool_ops = { .get_tx_csum = ethtool_op_get_tx_csum, .set_tx_csum = ethtool_op_set_tx_csum, + .get_sg = ethtool_op_get_sg, + .set_sg = netbk_set_sg, .get_link = ethtool_op_get_link, }; @@ -101,6 +125,7 @@ netif_t *netif_alloc(domid_t domid, unsi dev->get_stats = netif_be_get_stats; dev->open = net_open; dev->stop = net_close; + dev->change_mtu = netbk_change_mtu; dev->features = NETIF_F_IP_CSUM; SET_ETHTOOL_OPS(dev, &network_ethtool_ops); diff -r 5f5a2f282032 -r 9f29252c23b6 linux-2.6-xen-sparse/drivers/xen/netback/netback.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Mon Jul 31 17:35:43 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Mon Jul 31 17:42:13 2006 +0100 @@ -40,6 +40,11 @@ /*#define NETBE_DEBUG_INTERRUPT*/ +struct netbk_rx_meta { + skb_frag_t frag; + int id; +}; + static void netif_idx_release(u16 pending_idx); static void netif_page_release(struct page *page); static void make_tx_response(netif_t *netif, @@ -100,21 +105,27 @@ static unsigned long mfn_list[MAX_MFN_AL static unsigned long mfn_list[MAX_MFN_ALLOC]; static unsigned int alloc_index = 0; -static unsigned long alloc_mfn(void) -{ - unsigned long mfn = 0; +static inline unsigned long alloc_mfn(void) +{ + return mfn_list[--alloc_index]; +} + +static int check_mfn(int nr) +{ struct xen_memory_reservation reservation = { - .nr_extents = MAX_MFN_ALLOC, .extent_order = 0, .domid = DOMID_SELF }; - set_xen_guest_handle(reservation.extent_start, mfn_list); - if ( unlikely(alloc_index == 0) ) - alloc_index = HYPERVISOR_memory_op( - XENMEM_increase_reservation, &reservation); - if ( alloc_index != 0 ) - mfn = mfn_list[--alloc_index]; - return mfn; + + if (likely(alloc_index >= nr)) + return 0; + + set_xen_guest_handle(reservation.extent_start, mfn_list + alloc_index); + reservation.nr_extents = MAX_MFN_ALLOC - alloc_index; + alloc_index += HYPERVISOR_memory_op(XENMEM_increase_reservation, + &reservation); + + return alloc_index >= nr ? 0 : -ENOMEM; } static inline void maybe_schedule_tx_action(void) @@ -136,12 +147,87 @@ static inline int is_xen_skb(struct sk_b return (cp == skbuff_cachep); } +static struct sk_buff *netbk_copy_skb(struct sk_buff *skb) +{ + struct skb_shared_info *ninfo; + struct sk_buff *nskb; + unsigned long offset; + int ret; + int len; + int headlen; + + nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC); + if (unlikely(!nskb)) + goto err; + + skb_reserve(nskb, 16); + headlen = nskb->end - nskb->data; + if (headlen > skb_headlen(skb)) + headlen = skb_headlen(skb); + ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen); + BUG_ON(ret); + + ninfo = skb_shinfo(nskb); + ninfo->gso_size = skb_shinfo(skb)->gso_size; + ninfo->gso_type = skb_shinfo(skb)->gso_type; + + offset = headlen; + len = skb->len - headlen; + + nskb->len = skb->len; + nskb->data_len = len; + nskb->truesize += len; + + while (len) { + struct page *page; + int copy; + int zero; + + if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) { + dump_stack(); + goto err_free; + } + + copy = len >= PAGE_SIZE ? PAGE_SIZE : len; + zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO; + + page = alloc_page(GFP_ATOMIC | zero); + if (unlikely(!page)) + goto err_free; + + ret = skb_copy_bits(skb, offset, page_address(page), copy); + BUG_ON(ret); + + ninfo->frags[ninfo->nr_frags].page = page; + ninfo->frags[ninfo->nr_frags].page_offset = 0; + ninfo->frags[ninfo->nr_frags].size = copy; + ninfo->nr_frags++; + + offset += copy; + len -= copy; + } + + offset = nskb->data - skb->data; + + nskb->h.raw = skb->h.raw + offset; + nskb->nh.raw = skb->nh.raw + offset; + nskb->mac.raw = skb->mac.raw + offset; + + return nskb; + + err_free: + kfree_skb(nskb); + err: + return NULL; +} + static inline int netbk_queue_full(netif_t *netif) { RING_IDX peek = netif->rx_req_cons_peek; - return ((netif->rx.sring->req_prod - peek) <= 0) || - ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) <= 0); + return ((netif->rx.sring->req_prod - peek) <= MAX_SKB_FRAGS) || + ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) <= + MAX_SKB_FRAGS); } int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) @@ -165,20 +251,12 @@ int netif_be_start_xmit(struct sk_buff * * We do not copy the packet unless: * 1. The data is shared; or * 2. The data is not allocated from our special cache. - * NB. We also couldn't cope with fragmented packets, but we won't get - * any because we not advertise the NETIF_F_SG feature. + * 3. The data is fragmented. */ - if (skb_shared(skb) || skb_cloned(skb) || !is_xen_skb(skb)) { - int hlen = skb->data - skb->head; - int ret; - struct sk_buff *nskb = dev_alloc_skb(hlen + skb->len); + if (skb_cloned(skb) || skb_is_nonlinear(skb) || !is_xen_skb(skb)) { + struct sk_buff *nskb = netbk_copy_skb(skb); if ( unlikely(nskb == NULL) ) goto drop; - skb_reserve(nskb, hlen); - __skb_put(nskb, skb->len); - ret = skb_copy_bits(skb, -hlen, nskb->data - hlen, - skb->len + hlen); - BUG_ON(ret); /* Copy only the header fields we use in this driver. */ nskb->dev = skb->dev; nskb->ip_summed = skb->ip_summed; @@ -187,11 +265,12 @@ int netif_be_start_xmit(struct sk_buff * skb = nskb; } - netif->rx_req_cons_peek++; + netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1; netif_get(netif); if (netbk_can_queue(dev) && netbk_queue_full(netif)) { - netif->rx.sring->req_event = netif->rx_req_cons_peek + 1; + netif->rx.sring->req_event = netif->rx_req_cons_peek + + MAX_SKB_FRAGS + 1; mb(); /* request notification /then/ check & stop the queue */ if (netbk_queue_full(netif)) netif_stop_queue(dev); @@ -227,116 +306,80 @@ int xen_network_done(void) } #endif -static void net_rx_action(unsigned long unused) -{ - netif_t *netif = NULL; - s8 status; - u16 size, id, irq, flags; - multicall_entry_t *mcl; - mmu_update_t *mmu; - gnttab_transfer_t *gop; - unsigned long vdata, old_mfn, new_mfn; - struct sk_buff_head rxq; - struct sk_buff *skb; - int notify_nr = 0; - int ret; +static u16 netbk_gop_frag(netif_t *netif, struct page *page, int count, int i) +{ + multicall_entry_t *mcl = rx_mcl + count; + mmu_update_t *mmu = rx_mmu + count; + gnttab_transfer_t *gop = grant_rx_op + count; + netif_rx_request_t *req; + unsigned long old_mfn, new_mfn; + + old_mfn = virt_to_mfn(page_address(page)); + + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + new_mfn = alloc_mfn(); + + /* + * Set the new P2M table entry before reassigning + * the old data page. Heed the comment in + * pgtable-2level.h:pte_page(). :-) + */ + set_phys_to_machine(page_to_pfn(page), new_mfn); + + MULTI_update_va_mapping(mcl, (unsigned long)page_address(page), + pfn_pte_ma(new_mfn, PAGE_KERNEL), 0); + + mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) | + MMU_MACHPHYS_UPDATE; + mmu->val = page_to_pfn(page); + } + + req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i); + gop->mfn = old_mfn; + gop->domid = netif->domid; + gop->ref = req->gref; + return req->id; +} + +static void netbk_gop_skb(struct sk_buff *skb, struct netbk_rx_meta *meta, + int count) +{ + netif_t *netif = netdev_priv(skb->dev); + int nr_frags = skb_shinfo(skb)->nr_frags; + int i; + + for (i = 0; i < nr_frags; i++) { + meta[++count].frag = skb_shinfo(skb)->frags[i]; + meta[count].id = netbk_gop_frag(netif, meta[count].frag.page, + count, i + 1); + } + /* - * Putting hundreds of bytes on the stack is considered rude. - * Static works because a tasklet can only be on one CPU at any time. + * This must occur at the end to ensure that we don't trash + * skb_shinfo until we're done. */ - static u16 notify_list[NET_RX_RING_SIZE]; - - skb_queue_head_init(&rxq); - - mcl = rx_mcl; - mmu = rx_mmu; - gop = grant_rx_op; - - while ((skb = skb_dequeue(&rx_queue)) != NULL) { - netif = netdev_priv(skb->dev); - vdata = (unsigned long)skb->data; - old_mfn = virt_to_mfn(vdata); - - if (!xen_feature(XENFEAT_auto_translated_physmap)) { - /* Memory squeeze? Back off for an arbitrary while. */ - if ((new_mfn = alloc_mfn()) == 0) { - if ( net_ratelimit() ) - WPRINTK("Memory squeeze in netback " - "driver.\n"); - mod_timer(&net_timer, jiffies + HZ); - skb_queue_head(&rx_queue, skb); - break; - } - /* - * Set the new P2M table entry before reassigning - * the old data page. Heed the comment in - * pgtable-2level.h:pte_page(). :-) - */ - set_phys_to_machine( - __pa(skb->data) >> PAGE_SHIFT, - new_mfn); - - MULTI_update_va_mapping(mcl, vdata, - pfn_pte_ma(new_mfn, - PAGE_KERNEL), 0); - mcl++; - - mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) | - MMU_MACHPHYS_UPDATE; - mmu->val = __pa(vdata) >> PAGE_SHIFT; - mmu++; - } - - gop->mfn = old_mfn; - gop->domid = netif->domid; - gop->ref = RING_GET_REQUEST( - &netif->rx, netif->rx.req_cons)->gref; - netif->rx.req_cons++; - gop++; - - __skb_queue_tail(&rxq, skb); - - /* Filled the batch queue? */ - if ((gop - grant_rx_op) == ARRAY_SIZE(grant_rx_op)) - break; - } - - if (!xen_feature(XENFEAT_auto_translated_physmap)) { - if (mcl == rx_mcl) - return; - - mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; - - if (mmu - rx_mmu) { - mcl->op = __HYPERVISOR_mmu_update; - mcl->args[0] = (unsigned long)rx_mmu; - mcl->args[1] = mmu - rx_mmu; - mcl->args[2] = 0; - mcl->args[3] = DOMID_SELF; - mcl++; - } - - ret = HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl); - BUG_ON(ret != 0); - } - - ret = HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op, - gop - grant_rx_op); - BUG_ON(ret != 0); - - mcl = rx_mcl; - gop = grant_rx_op; - while ((skb = __skb_dequeue(&rxq)) != NULL) { - netif = netdev_priv(skb->dev); - size = skb->tail - skb->data; - - atomic_set(&(skb_shinfo(skb)->dataref), 1); - skb_shinfo(skb)->nr_frags = 0; - skb_shinfo(skb)->frag_list = NULL; - - netif->stats.tx_bytes += size; - netif->stats.tx_packets++; - + meta[count - nr_frags].id = netbk_gop_frag(netif, + virt_to_page(skb->data), + count - nr_frags, 0); + netif->rx.req_cons += nr_frags + 1; +} + +static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta) +{ + int i; + + for (i = 0; i < nr_frags; i++) + put_page(meta[i].frag.page); +} + +static int netbk_check_gop(int nr_frags, domid_t domid, int count) +{ + multicall_entry_t *mcl = rx_mcl + count; + gnttab_transfer_t *gop = grant_rx_op + count; + int status = NETIF_RSP_OKAY; + int i; + + for (i = 0; i <= nr_frags; i++) { if (!xen_feature(XENFEAT_auto_translated_physmap)) { /* The update_va_mapping() must not fail. */ BUG_ON(mcl->result != 0); @@ -344,10 +387,9 @@ static void net_rx_action(unsigned long } /* Check the reassignment error code. */ - status = NETIF_RSP_OKAY; if (gop->status != 0) { DPRINTK("Bad status %d from grant transfer to DOM%u\n", - gop->status, netif->domid); + gop->status, domid); /* * Page no longer belongs to us unless GNTST_bad_page, * but that should be a fatal error anyway. @@ -355,17 +397,128 @@ static void net_rx_action(unsigned long BUG_ON(gop->status == GNTST_bad_page); status = NETIF_RSP_ERROR; } - irq = netif->irq; - id = RING_GET_REQUEST(&netif->rx, netif->rx.rsp_prod_pvt)->id; - flags = 0; + gop++; + } + + return status; +} + +static void netbk_add_frag_responses(netif_t *netif, int status, + struct netbk_rx_meta *meta, int nr_frags) +{ + int i; + + for (i = 0; i < nr_frags; i++) { + int id = meta[i].id; + int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data; + + make_rx_response(netif, id, status, meta[i].frag.page_offset, + meta[i].frag.size, flags); + } +} + +static void net_rx_action(unsigned long unused) +{ + netif_t *netif = NULL; + s8 status; + u16 id, irq, flags; + multicall_entry_t *mcl; + struct sk_buff_head rxq; + struct sk_buff *skb; + int notify_nr = 0; + int ret; + int nr_frags; + int count; + + /* + * Putting hundreds of bytes on the stack is considered rude. + * Static works because a tasklet can only be on one CPU at any time. + */ + static u16 notify_list[NET_RX_RING_SIZE]; + static struct netbk_rx_meta meta[NET_RX_RING_SIZE]; + + skb_queue_head_init(&rxq); + + count = 0; + + while ((skb = skb_dequeue(&rx_queue)) != NULL) { + nr_frags = skb_shinfo(skb)->nr_frags; + *(int *)skb->cb = nr_frags; + + if (!xen_feature(XENFEAT_auto_translated_physmap) && + check_mfn(nr_frags + 1)) { + /* Memory squeeze? Back off for an arbitrary while. */ + if ( net_ratelimit() ) + WPRINTK("Memory squeeze in netback " + "driver.\n"); + mod_timer(&net_timer, jiffies + HZ); + skb_queue_head(&rx_queue, skb); + break; + } + + netbk_gop_skb(skb, meta, count); + + count += nr_frags + 1; + + __skb_queue_tail(&rxq, skb); + + /* Filled the batch queue? */ + if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE) + break; + } + + if (!count) + return; + + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + mcl = rx_mcl + count; + + mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; + + mcl->op = __HYPERVISOR_mmu_update; + mcl->args[0] = (unsigned long)rx_mmu; + mcl->args[1] = count; + mcl->args[2] = 0; + mcl->args[3] = DOMID_SELF; + + ret = HYPERVISOR_multicall(rx_mcl, count + 1); + BUG_ON(ret != 0); + } + + ret = HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op, count); + BUG_ON(ret != 0); + + count = 0; + while ((skb = __skb_dequeue(&rxq)) != NULL) { + nr_frags = *(int *)skb->cb; + + atomic_set(&(skb_shinfo(skb)->dataref), 1); + skb_shinfo(skb)->nr_frags = 0; + skb_shinfo(skb)->frag_list = NULL; + + netif = netdev_priv(skb->dev); + netif->stats.tx_bytes += skb->len; + netif->stats.tx_packets++; + + netbk_free_pages(nr_frags, meta + count + 1); + status = netbk_check_gop(nr_frags, netif->domid, count); + + id = meta[count].id; + flags = nr_frags ? NETRXF_more_data : 0; + if (skb->ip_summed == CHECKSUM_HW) /* local packet? */ flags |= NETRXF_csum_blank | NETRXF_data_validated; else if (skb->proto_data_valid) /* remote but checksummed? */ flags |= NETRXF_data_validated; - if (make_rx_response(netif, id, status, - (unsigned long)skb->data & ~PAGE_MASK, - size, flags) && - (rx_notify[irq] == 0)) { + + make_rx_response(netif, id, status, offset_in_page(skb->data), + skb_headlen(skb), flags); + netbk_add_frag_responses(netif, status, meta + count + 1, + nr_frags); + + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret); + irq = netif->irq; + if (ret && !rx_notify[irq]) { rx_notify[irq] = 1; notify_list[notify_nr++] = irq; } @@ -376,7 +529,7 @@ static void net_rx_action(unsigned long netif_put(netif); dev_kfree_skb(skb); - gop++; + count += nr_frags + 1; } while (notify_nr != 0) { @@ -1046,7 +1199,6 @@ static int make_rx_response(netif_t *net { RING_IDX i = netif->rx.rsp_prod_pvt; netif_rx_response_t *resp; - int notify; resp = RING_GET_RESPONSE(&netif->rx, i); resp->offset = offset; @@ -1057,9 +1209,8 @@ static int make_rx_response(netif_t *net resp->status = (s16)st; netif->rx.rsp_prod_pvt = ++i; - RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, notify); - - return notify; + + return 0; } #ifdef NETBE_DEBUG_INTERRUPT diff -r 5f5a2f282032 -r 9f29252c23b6 linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Mon Jul 31 17:35:43 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Mon Jul 31 17:42:13 2006 +0100 @@ -377,6 +377,13 @@ static int connect_rings(struct backend_ /* Must be non-zero for pfifo_fast to work. */ be->netif->dev->tx_queue_len = 1; + if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0) + val = 0; + if (val) { + be->netif->features |= NETIF_F_SG; + be->netif->dev->features |= NETIF_F_SG; + } + /* Map the shared frame, irq etc. */ err = netif_map(be->netif, tx_ring_ref, rx_ring_ref, evtchn); if (err) { diff -r 5f5a2f282032 -r 9f29252c23b6 xen/include/public/io/netif.h --- a/xen/include/public/io/netif.h Mon Jul 31 17:35:43 2006 +0100 +++ b/xen/include/public/io/netif.h Mon Jul 31 17:42:13 2006 +0100 @@ -124,6 +124,10 @@ typedef struct netif_rx_request netif_rx #define _NETRXF_csum_blank (1) #define NETRXF_csum_blank (1U<<_NETRXF_csum_blank) +/* Packet continues in the next request descriptor. */ +#define _NETRXF_more_data (2) +#define NETRXF_more_data (1U<<_NETRXF_more_data) + struct netif_rx_response { uint16_t id; uint16_t offset; /* Offset in page of start of received packet */ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |