Xen project Mailing List

[Xen-changelog] [xen-unstable] [NET]: Added GSO support

From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>

Date: Wed, 28 Jun 2006 13:50:23 +0000

Delivery-date: Wed, 28 Jun 2006 06:53:33 -0700

List-id: BK change log <xen-changelog.lists.xensource.com>

# HG changeset patch # User kaf24@xxxxxxxxxxxxxxxxxxxx # Node ID 6e7027a2abcad69ecb5d2d350feaac1848b3847f # Parent b217e03e1db5bcde98fb15ca9d5df303ea067bc4 [NET]: Added GSO support Imported GSO patch. Signed-off-by: Herbert Xu <herbert@xxxxxxxxxxxxxxxxxxx> --- linux-2.6-xen-sparse/include/linux/skbuff.h | 42 linux-2.6-xen-sparse/net/core/dev.c | 238 ++- linux-2.6-xen-sparse/net/core/skbuff.c | 144 +- patches/linux-2.6.16.13/net-gso.patch | 1974 ++++++++++++++++++++++++++++ 4 files changed, 2299 insertions(+), 99 deletions(-) diff -r b217e03e1db5 -r 6e7027a2abca linux-2.6-xen-sparse/include/linux/skbuff.h --- a/linux-2.6-xen-sparse/include/linux/skbuff.h Wed Jun 28 10:32:43 2006 +0100 +++ b/linux-2.6-xen-sparse/include/linux/skbuff.h Wed Jun 28 12:03:01 2006 +0100 @@ -134,9 +134,10 @@ struct skb_shared_info { struct skb_shared_info { atomic_t dataref; unsigned short nr_frags; - unsigned short tso_size; - unsigned short tso_segs; - unsigned short ufo_size; + unsigned short gso_size; + /* Warning: this field is not always filled in (UFO)! */ + unsigned short gso_segs; + unsigned short gso_type; unsigned int ip6_frag_id; struct sk_buff *frag_list; skb_frag_t frags[MAX_SKB_FRAGS]; @@ -166,6 +167,14 @@ enum { SKB_FCLONE_UNAVAILABLE, SKB_FCLONE_ORIG, SKB_FCLONE_CLONE, +}; + +enum { + SKB_GSO_TCPV4 = 1 << 0, + SKB_GSO_UDPV4 = 1 << 1, + + /* This indicates the skb is from an untrusted source. */ + SKB_GSO_DODGY = 1 << 2, }; /** @@ -1157,18 +1166,34 @@ static inline int skb_can_coalesce(struc return 0; } +static inline int __skb_linearize(struct sk_buff *skb) +{ + return __pskb_pull_tail(skb, skb->data_len) ? 0 : -ENOMEM; +} + /** * skb_linearize - convert paged skb to linear one * @skb: buffer to linarize - * @gfp: allocation mode * * If there is no free memory -ENOMEM is returned, otherwise zero * is returned and the old skb data released. */ -extern int __skb_linearize(struct sk_buff *skb, gfp_t gfp); -static inline int skb_linearize(struct sk_buff *skb, gfp_t gfp) -{ - return __skb_linearize(skb, gfp); +static inline int skb_linearize(struct sk_buff *skb) +{ + return skb_is_nonlinear(skb) ? __skb_linearize(skb) : 0; +} + +/** + * skb_linearize_cow - make sure skb is linear and writable + * @skb: buffer to process + * + * If there is no free memory -ENOMEM is returned, otherwise zero + * is returned and the old skb data released. + */ +static inline int skb_linearize_cow(struct sk_buff *skb) +{ + return skb_is_nonlinear(skb) || skb_cloned(skb) ? + __skb_linearize(skb) : 0; } /** @@ -1263,6 +1288,7 @@ extern void skb_split(struct sk_b struct sk_buff *skb1, const u32 len); extern void skb_release_data(struct sk_buff *skb); +extern struct sk_buff *skb_segment(struct sk_buff *skb, int features); static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) diff -r b217e03e1db5 -r 6e7027a2abca linux-2.6-xen-sparse/net/core/dev.c --- a/linux-2.6-xen-sparse/net/core/dev.c Wed Jun 28 10:32:43 2006 +0100 +++ b/linux-2.6-xen-sparse/net/core/dev.c Wed Jun 28 12:03:01 2006 +0100 @@ -115,6 +115,7 @@ #include <net/iw_handler.h> #endif /* CONFIG_NET_RADIO */ #include <asm/current.h> +#include <linux/err.h> #ifdef CONFIG_XEN #include <net/ip.h> @@ -1038,7 +1039,7 @@ static inline void net_timestamp(struct * taps currently in use. */ -void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) +static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) { struct packet_type *ptype; @@ -1112,6 +1113,45 @@ out: return ret; } +/** + * skb_gso_segment - Perform segmentation on skb. + * @skb: buffer to segment + * @features: features for the output path (see dev->features) + * + * This function segments the given skb and returns a list of segments. + * + * It may return NULL if the skb requires no segmentation. This is + * only possible when GSO is used for verifying header integrity. + */ +struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) +{ + struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); + struct packet_type *ptype; + int type = skb->protocol; + + BUG_ON(skb_shinfo(skb)->frag_list); + BUG_ON(skb->ip_summed != CHECKSUM_HW); + + skb->mac.raw = skb->data; + skb->mac_len = skb->nh.raw - skb->data; + __skb_pull(skb, skb->mac_len); + + rcu_read_lock(); + list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) { + if (ptype->type == type && !ptype->dev && ptype->gso_segment) { + segs = ptype->gso_segment(skb, features); + break; + } + } + rcu_read_unlock(); + + __skb_push(skb, skb->data - skb->mac.raw); + + return segs; +} + +EXPORT_SYMBOL(skb_gso_segment); + /* Take action when hardware reception checksum errors are detected. */ #ifdef CONFIG_BUG void netdev_rx_csum_fault(struct net_device *dev) @@ -1148,75 +1188,108 @@ static inline int illegal_highdma(struct #define illegal_highdma(dev, skb) (0) #endif -/* Keep head the same: replace data */ -int __skb_linearize(struct sk_buff *skb, gfp_t gfp_mask) -{ - unsigned int size; - u8 *data; - long offset; - struct skb_shared_info *ninfo; - int headerlen = skb->data - skb->head; - int expand = (skb->tail + skb->data_len) - skb->end; - - if (skb_shared(skb)) - BUG(); - - if (expand <= 0) - expand = 0; - - size = skb->end - skb->head + expand; - size = SKB_DATA_ALIGN(size); - data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); - if (!data) - return -ENOMEM; - - /* Copy entire thing */ - if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len)) - BUG(); - - /* Set up shinfo */ - ninfo = (struct skb_shared_info*)(data + size); - atomic_set(&ninfo->dataref, 1); - ninfo->tso_size = skb_shinfo(skb)->tso_size; - ninfo->tso_segs = skb_shinfo(skb)->tso_segs; - ninfo->nr_frags = 0; - ninfo->frag_list = NULL; - - /* Offset between the two in bytes */ - offset = data - skb->head; - - /* Free old data. */ - skb_release_data(skb); - - skb->head = data; - skb->end = data + size; - - /* Set up new pointers */ - skb->h.raw += offset; - skb->nh.raw += offset; - skb->mac.raw += offset; - skb->tail += offset; - skb->data += offset; - - /* We are no longer a clone, even if we were. */ - skb->cloned = 0; - - skb->tail += skb->data_len; - skb->data_len = 0; +struct dev_gso_cb { + void (*destructor)(struct sk_buff *skb); +}; + +#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb) + +static void dev_gso_skb_destructor(struct sk_buff *skb) +{ + struct dev_gso_cb *cb; + + do { + struct sk_buff *nskb = skb->next; + + skb->next = nskb->next; + nskb->next = NULL; + kfree_skb(nskb); + } while (skb->next); + + cb = DEV_GSO_CB(skb); + if (cb->destructor) + cb->destructor(skb); +} + +/** + * dev_gso_segment - Perform emulated hardware segmentation on skb. + * @skb: buffer to segment + * + * This function segments the given skb and stores the list of segments + * in skb->next. + */ +static int dev_gso_segment(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + struct sk_buff *segs; + int features = dev->features & ~(illegal_highdma(dev, skb) ? + NETIF_F_SG : 0); + + segs = skb_gso_segment(skb, features); + + /* Verifying header integrity only. */ + if (!segs) + return 0; + + if (unlikely(IS_ERR(segs))) + return PTR_ERR(segs); + + skb->next = segs; + DEV_GSO_CB(skb)->destructor = skb->destructor; + skb->destructor = dev_gso_skb_destructor; + + return 0; +} + +int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + if (likely(!skb->next)) { + if (netdev_nit) + dev_queue_xmit_nit(skb, dev); + + if (netif_needs_gso(dev, skb)) { + if (unlikely(dev_gso_segment(skb))) + goto out_kfree_skb; + if (skb->next) + goto gso; + } + + return dev->hard_start_xmit(skb, dev); + } + +gso: + do { + struct sk_buff *nskb = skb->next; + int rc; + + skb->next = nskb->next; + nskb->next = NULL; + rc = dev->hard_start_xmit(nskb, dev); + if (unlikely(rc)) { + nskb->next = skb->next; + skb->next = nskb; + return rc; + } + if (unlikely(netif_queue_stopped(dev) && skb->next)) + return NETDEV_TX_BUSY; + } while (skb->next); + + skb->destructor = DEV_GSO_CB(skb)->destructor; + +out_kfree_skb: + kfree_skb(skb); return 0; } #define HARD_TX_LOCK(dev, cpu) { \ if ((dev->features & NETIF_F_LLTX) == 0) { \ - spin_lock(&dev->xmit_lock); \ - dev->xmit_lock_owner = cpu; \ + netif_tx_lock(dev); \ } \ } #define HARD_TX_UNLOCK(dev) { \ if ((dev->features & NETIF_F_LLTX) == 0) { \ - dev->xmit_lock_owner = -1; \ - spin_unlock(&dev->xmit_lock); \ + netif_tx_unlock(dev); \ } \ } @@ -1289,9 +1362,19 @@ int dev_queue_xmit(struct sk_buff *skb) struct Qdisc *q; int rc = -ENOMEM; + /* If a checksum-deferred packet is forwarded to a device that needs a + * checksum, correct the pointers and force checksumming. + */ + if (skb_checksum_setup(skb)) + goto out_kfree_skb; + + /* GSO will handle the following emulations directly. */ + if (netif_needs_gso(dev, skb)) + goto gso; + if (skb_shinfo(skb)->frag_list && !(dev->features & NETIF_F_FRAGLIST) && - __skb_linearize(skb, GFP_ATOMIC)) + __skb_linearize(skb)) goto out_kfree_skb; /* Fragmented skb is linearized if device does not support SG, @@ -1300,31 +1383,26 @@ int dev_queue_xmit(struct sk_buff *skb) */ if (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) && - __skb_linearize(skb, GFP_ATOMIC)) + __skb_linearize(skb)) goto out_kfree_skb; - /* If a checksum-deferred packet is forwarded to a device that needs a - * checksum, correct the pointers and force checksumming. - */ - if(skb_checksum_setup(skb)) - goto out_kfree_skb; - /* If packet is not checksummed and device does not support * checksumming for this protocol, complete checksumming here. */ if (skb->ip_summed == CHECKSUM_HW && - (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) && + (!(dev->features & NETIF_F_GEN_CSUM) && (!(dev->features & NETIF_F_IP_CSUM) || skb->protocol != htons(ETH_P_IP)))) if (skb_checksum_help(skb, 0)) goto out_kfree_skb; +gso: spin_lock_prefetch(&dev->queue_lock); /* Disable soft irqs for various locks below. Also * stops preemption for RCU. */ - local_bh_disable(); + rcu_read_lock_bh(); /* Updates of qdisc are serialized by queue_lock. * The struct Qdisc which is pointed to by qdisc is now a @@ -1358,8 +1436,8 @@ int dev_queue_xmit(struct sk_buff *skb) /* The device has no queue. Common case for software devices: loopback, all the sorts of tunnels... - Really, it is unlikely that xmit_lock protection is necessary here. - (f.e. loopback and IP tunnels are clean ignoring statistics + Really, it is unlikely that netif_tx_lock protection is necessary + here. (f.e. loopback and IP tunnels are clean ignoring statistics counters.) However, it is possible, that they rely on protection made by us here. @@ -1375,11 +1453,8 @@ int dev_queue_xmit(struct sk_buff *skb) HARD_TX_LOCK(dev, cpu); if (!netif_queue_stopped(dev)) { - if (netdev_nit) - dev_queue_xmit_nit(skb, dev); - rc = 0; - if (!dev->hard_start_xmit(skb, dev)) { + if (!dev_hard_start_xmit(skb, dev)) { HARD_TX_UNLOCK(dev); goto out; } @@ -1398,13 +1473,13 @@ int dev_queue_xmit(struct sk_buff *skb) } rc = -ENETDOWN; - local_bh_enable(); + rcu_read_unlock_bh(); out_kfree_skb: kfree_skb(skb); return rc; out: - local_bh_enable(); + rcu_read_unlock_bh(); return rc; } @@ -2732,7 +2807,7 @@ int register_netdevice(struct net_device BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); spin_lock_init(&dev->queue_lock); - spin_lock_init(&dev->xmit_lock); + spin_lock_init(&dev->_xmit_lock); dev->xmit_lock_owner = -1; #ifdef CONFIG_NET_CLS_ACT spin_lock_init(&dev->ingress_lock); @@ -2776,9 +2851,7 @@ int register_netdevice(struct net_device /* Fix illegal SG+CSUM combinations. */ if ((dev->features & NETIF_F_SG) && - !(dev->features & (NETIF_F_IP_CSUM | - NETIF_F_NO_CSUM | - NETIF_F_HW_CSUM))) { + !(dev->features & NETIF_F_ALL_CSUM)) { printk("%s: Dropping NETIF_F_SG since no checksum feature.\n", dev->name); dev->features &= ~NETIF_F_SG; @@ -3330,7 +3403,6 @@ EXPORT_SYMBOL(__dev_get_by_index); EXPORT_SYMBOL(__dev_get_by_index); EXPORT_SYMBOL(__dev_get_by_name); EXPORT_SYMBOL(__dev_remove_pack); -EXPORT_SYMBOL(__skb_linearize); EXPORT_SYMBOL(dev_valid_name); EXPORT_SYMBOL(dev_add_pack); EXPORT_SYMBOL(dev_alloc_name); diff -r b217e03e1db5 -r 6e7027a2abca linux-2.6-xen-sparse/net/core/skbuff.c --- a/linux-2.6-xen-sparse/net/core/skbuff.c Wed Jun 28 10:32:43 2006 +0100 +++ b/linux-2.6-xen-sparse/net/core/skbuff.c Wed Jun 28 12:03:01 2006 +0100 @@ -165,9 +165,9 @@ struct sk_buff *__alloc_skb(unsigned int shinfo = skb_shinfo(skb); atomic_set(&shinfo->dataref, 1); shinfo->nr_frags = 0; - shinfo->tso_size = 0; - shinfo->tso_segs = 0; - shinfo->ufo_size = 0; + shinfo->gso_size = 0; + shinfo->gso_segs = 0; + shinfo->gso_type = 0; shinfo->ip6_frag_id = 0; shinfo->frag_list = NULL; @@ -237,9 +237,9 @@ struct sk_buff *alloc_skb_from_cache(kme shinfo = skb_shinfo(skb); atomic_set(&shinfo->dataref, 1); shinfo->nr_frags = 0; - shinfo->tso_size = 0; - shinfo->tso_segs = 0; - shinfo->ufo_size = 0; + shinfo->gso_size = 0; + shinfo->gso_segs = 0; + shinfo->gso_type = 0; shinfo->ip6_frag_id = 0; shinfo->frag_list = NULL; @@ -524,8 +524,9 @@ static void copy_skb_header(struct sk_bu new->tc_index = old->tc_index; #endif atomic_set(&new->users, 1); - skb_shinfo(new)->tso_size = skb_shinfo(old)->tso_size; - skb_shinfo(new)->tso_segs = skb_shinfo(old)->tso_segs; + skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; + skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; + skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; } /** @@ -1799,6 +1800,133 @@ int skb_append_datato_frags(struct sock return 0; } + +/** + * skb_segment - Perform protocol segmentation on skb. + * @skb: buffer to segment + * @features: features for the output path (see dev->features) + * + * This function performs segmentation on the given skb. It returns + * the segment at the given position. It returns NULL if there are + * no more segments to generate, or when an error is encountered. + */ +struct sk_buff *skb_segment(struct sk_buff *skb, int features) +{ + struct sk_buff *segs = NULL; + struct sk_buff *tail = NULL; + unsigned int mss = skb_shinfo(skb)->gso_size; + unsigned int doffset = skb->data - skb->mac.raw; + unsigned int offset = doffset; + unsigned int headroom; + unsigned int len; + int sg = features & NETIF_F_SG; + int nfrags = skb_shinfo(skb)->nr_frags; + int err = -ENOMEM; + int i = 0; + int pos; + + __skb_push(skb, doffset); + headroom = skb_headroom(skb); + pos = skb_headlen(skb); + + do { + struct sk_buff *nskb; + skb_frag_t *frag; + int hsize, nsize; + int k; + int size; + + len = skb->len - offset; + if (len > mss) + len = mss; + + hsize = skb_headlen(skb) - offset; + if (hsize < 0) + hsize = 0; + nsize = hsize + doffset; + if (nsize > len + doffset || !sg) + nsize = len + doffset; + + nskb = alloc_skb(nsize + headroom, GFP_ATOMIC); + if (unlikely(!nskb)) + goto err; + + if (segs) + tail->next = nskb; + else + segs = nskb; + tail = nskb; + + nskb->dev = skb->dev; + nskb->priority = skb->priority; + nskb->protocol = skb->protocol; + nskb->dst = dst_clone(skb->dst); + memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); + nskb->pkt_type = skb->pkt_type; + nskb->mac_len = skb->mac_len; + + skb_reserve(nskb, headroom); + nskb->mac.raw = nskb->data; + nskb->nh.raw = nskb->data + skb->mac_len; + nskb->h.raw = nskb->nh.raw + (skb->h.raw - skb->nh.raw); + memcpy(skb_put(nskb, doffset), skb->data, doffset); + + if (!sg) { + nskb->csum = skb_copy_and_csum_bits(skb, offset, + skb_put(nskb, len), + len, 0); + continue; + } + + frag = skb_shinfo(nskb)->frags; + k = 0; + + nskb->ip_summed = CHECKSUM_HW; + nskb->csum = skb->csum; + memcpy(skb_put(nskb, hsize), skb->data + offset, hsize); + + while (pos < offset + len) { + BUG_ON(i >= nfrags); + + *frag = skb_shinfo(skb)->frags[i]; + get_page(frag->page); + size = frag->size; + + if (pos < offset) { + frag->page_offset += offset - pos; + frag->size -= offset - pos; + } + + k++; + + if (pos + size <= offset + len) { + i++; + pos += size; + } else { + frag->size -= pos + size - (offset + len); + break; + } + + frag++; + } + + skb_shinfo(nskb)->nr_frags = k; + nskb->data_len = len - hsize; + nskb->len += nskb->data_len; + nskb->truesize += nskb->data_len; + } while ((offset += len) < skb->len); + + return segs; + +err: + while ((skb = segs)) { + segs = skb->next; + kfree(skb); + } + return ERR_PTR(err); +} + +EXPORT_SYMBOL_GPL(skb_segment); void __init skb_init(void) { diff -r b217e03e1db5 -r 6e7027a2abca patches/linux-2.6.16.13/net-gso.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.13/net-gso.patch Wed Jun 28 12:03:01 2006 +0100 @@ -0,0 +1,2907 @@ +diff --git a/Documentation/networking/netdevices.txt b/Documentation/networking/netdevices.txt +index 3c0a5ba..847cedb 100644 +--- a/Documentation/networking/netdevices.txt ++++ b/Documentation/networking/netdevices.txt +@@ -42,9 +42,9 @@ dev->get_stats: + Context: nominally process, but don't sleep inside an rwlock + + dev->hard_start_xmit: +- Synchronization: dev->xmit_lock spinlock. ++ Synchronization: netif_tx_lock spinlock. + When the driver sets NETIF_F_LLTX in dev->features this will be +- called without holding xmit_lock. In this case the driver ++ called without holding netif_tx_lock. In this case the driver + has to lock by itself when needed. It is recommended to use a try lock + for this and return -1 when the spin lock fails. + The locking there should also properly protect against +@@ -62,12 +62,12 @@ dev->hard_start_xmit: + Only valid when NETIF_F_LLTX is set. + + dev->tx_timeout: +- Synchronization: dev->xmit_lock spinlock. ++ Synchronization: netif_tx_lock spinlock. + Context: BHs disabled + Notes: netif_queue_stopped() is guaranteed true + + dev->set_multicast_list: +- Synchronization: dev->xmit_lock spinlock. ++ Synchronization: netif_tx_lock spinlock. + Context: BHs disabled + + dev->poll: +diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c +index 4be9769..2e7cac7 100644 +--- a/drivers/block/aoe/aoenet.c ++++ b/drivers/block/aoe/aoenet.c +@@ -95,9 +95,8 @@ mac_addr(char addr[6]) + static struct sk_buff * + skb_check(struct sk_buff *skb) + { +- if (skb_is_nonlinear(skb)) + if ((skb = skb_share_check(skb, GFP_ATOMIC))) +- if (skb_linearize(skb, GFP_ATOMIC) < 0) { ++ if (skb_linearize(skb)) { + dev_kfree_skb(skb); + return NULL; + } +diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +index a2408d7..c90e620 100644 +--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c ++++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +@@ -821,7 +821,8 @@ void ipoib_mcast_restart_task(void *dev_ + + ipoib_mcast_stop_thread(dev, 0); + +- spin_lock_irqsave(&dev->xmit_lock, flags); ++ local_irq_save(flags); ++ netif_tx_lock(dev); + spin_lock(&priv->lock); + + /* +@@ -896,7 +897,8 @@ void ipoib_mcast_restart_task(void *dev_ + } + + spin_unlock(&priv->lock); +- spin_unlock_irqrestore(&dev->xmit_lock, flags); ++ netif_tx_unlock(dev); ++ local_irq_restore(flags); + + /* We have to cancel outside of the spinlock */ + list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { +diff --git a/drivers/media/dvb/dvb-core/dvb_net.c b/drivers/media/dvb/dvb-core/dvb_net.c +index 6711eb6..8d2351f 100644 +--- a/drivers/media/dvb/dvb-core/dvb_net.c ++++ b/drivers/media/dvb/dvb-core/dvb_net.c +@@ -1052,7 +1052,7 @@ static void wq_set_multicast_list (void + + dvb_net_feed_stop(dev); + priv->rx_mode = RX_MODE_UNI; +- spin_lock_bh(&dev->xmit_lock); ++ netif_tx_lock_bh(dev); + + if (dev->flags & IFF_PROMISC) { + dprintk("%s: promiscuous mode\n", dev->name); +@@ -1077,7 +1077,7 @@ static void wq_set_multicast_list (void + } + } + +- spin_unlock_bh(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + dvb_net_feed_start(dev); + } + +diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c +index dd41049..6615583 100644 +--- a/drivers/net/8139cp.c ++++ b/drivers/net/8139cp.c +@@ -794,7 +794,7 @@ #endif + entry = cp->tx_head; + eor = (entry == (CP_TX_RING_SIZE - 1)) ? RingEnd : 0; + if (dev->features & NETIF_F_TSO) +- mss = skb_shinfo(skb)->tso_size; ++ mss = skb_shinfo(skb)->gso_size; + + if (skb_shinfo(skb)->nr_frags == 0) { + struct cp_desc *txd = &cp->tx_ring[entry]; +diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c +index a24200d..b5e39a1 100644 +--- a/drivers/net/bnx2.c ++++ b/drivers/net/bnx2.c +@@ -1593,7 +1593,7 @@ bnx2_tx_int(struct bnx2 *bp) + skb = tx_buf->skb; + #ifdef BCM_TSO + /* partial BD completions possible with TSO packets */ +- if (skb_shinfo(skb)->tso_size) { ++ if (skb_shinfo(skb)->gso_size) { + u16 last_idx, last_ring_idx; + + last_idx = sw_cons + +@@ -1948,7 +1948,7 @@ bnx2_poll(struct net_device *dev, int *b + return 1; + } + +-/* Called with rtnl_lock from vlan functions and also dev->xmit_lock ++/* Called with rtnl_lock from vlan functions and also netif_tx_lock + * from set_multicast. + */ + static void +@@ -4403,7 +4403,7 @@ bnx2_vlan_rx_kill_vid(struct net_device + } + #endif + +-/* Called with dev->xmit_lock. ++/* Called with netif_tx_lock. + * hard_start_xmit is pseudo-lockless - a lock is only required when + * the tx queue is full. This way, we get the benefit of lockless + * operations most of the time without the complexities to handle +@@ -4441,7 +4441,7 @@ bnx2_start_xmit(struct sk_buff *skb, str + (TX_BD_FLAGS_VLAN_TAG | (vlan_tx_tag_get(skb) << 16)); + } + #ifdef BCM_TSO +- if ((mss = skb_shinfo(skb)->tso_size) && ++ if ((mss = skb_shinfo(skb)->gso_size) && + (skb->len > (bp->dev->mtu + ETH_HLEN))) { + u32 tcp_opt_len, ip_tcp_len; + +diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c +index bcf9f17..e970921 100644 +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -1145,8 +1145,7 @@ int bond_sethwaddr(struct net_device *bo + } + + #define BOND_INTERSECT_FEATURES \ +- (NETIF_F_SG|NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM|\ +- NETIF_F_TSO|NETIF_F_UFO) ++ (NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_TSO | NETIF_F_UFO) + + /* + * Compute the common dev->feature set available to all slaves. Some +@@ -1164,9 +1163,7 @@ static int bond_compute_features(struct + features &= (slave->dev->features & BOND_INTERSECT_FEATURES); + + if ((features & NETIF_F_SG) && +- !(features & (NETIF_F_IP_CSUM | +- NETIF_F_NO_CSUM | +- NETIF_F_HW_CSUM))) ++ !(features & NETIF_F_ALL_CSUM)) + features &= ~NETIF_F_SG; + + /* +@@ -4147,7 +4144,7 @@ static int bond_init(struct net_device * + */ + bond_dev->features |= NETIF_F_VLAN_CHALLENGED; + +- /* don't acquire bond device's xmit_lock when ++ /* don't acquire bond device's netif_tx_lock when + * transmitting */ + bond_dev->features |= NETIF_F_LLTX; + +diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c +index 30ff8ea..7b7d360 100644 +--- a/drivers/net/chelsio/sge.c ++++ b/drivers/net/chelsio/sge.c +@@ -1419,7 +1419,7 @@ int t1_start_xmit(struct sk_buff *skb, s + struct cpl_tx_pkt *cpl; + + #ifdef NETIF_F_TSO +- if (skb_shinfo(skb)->tso_size) { ++ if (skb_shinfo(skb)->gso_size) { + int eth_type; + struct cpl_tx_pkt_lso *hdr; + +@@ -1434,7 +1434,7 @@ #ifdef NETIF_F_TSO + hdr->ip_hdr_words = skb->nh.iph->ihl; + hdr->tcp_hdr_words = skb->h.th->doff; + hdr->eth_type_mss = htons(MK_ETH_TYPE_MSS(eth_type, +- skb_shinfo(skb)->tso_size)); ++ skb_shinfo(skb)->gso_size)); + hdr->len = htonl(skb->len - sizeof(*hdr)); + cpl = (struct cpl_tx_pkt *)hdr; + sge->stats.tx_lso_pkts++; +diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c +index fa29402..681d284 100644 +--- a/drivers/net/e1000/e1000_main.c ++++ b/drivers/net/e1000/e1000_main.c +@@ -2526,7 +2526,7 @@ #ifdef NETIF_F_TSO + uint8_t ipcss, ipcso, tucss, tucso, hdr_len; + int err; + +- if (skb_shinfo(skb)->tso_size) { ++ if (skb_shinfo(skb)->gso_size) { + if (skb_header_cloned(skb)) { + err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); + if (err) +@@ -2534,7 +2534,7 @@ #ifdef NETIF_F_TSO + } + + hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2)); +- mss = skb_shinfo(skb)->tso_size; ++ mss = skb_shinfo(skb)->gso_size; + if (skb->protocol == ntohs(ETH_P_IP)) { + skb->nh.iph->tot_len = 0; + skb->nh.iph->check = 0; +@@ -2651,7 +2651,7 @@ #ifdef NETIF_F_TSO + * tso gets written back prematurely before the data is fully + * DMAd to the controller */ + if (!skb->data_len && tx_ring->last_tx_tso && +- !skb_shinfo(skb)->tso_size) { ++ !skb_shinfo(skb)->gso_size) { + tx_ring->last_tx_tso = 0; + size -= 4; + } +@@ -2893,7 +2893,7 @@ #endif + } + + #ifdef NETIF_F_TSO +- mss = skb_shinfo(skb)->tso_size; ++ mss = skb_shinfo(skb)->gso_size; + /* The controller does a simple calculation to + * make sure there is enough room in the FIFO before + * initiating the DMA for each buffer. The calc is: +@@ -2935,7 +2935,7 @@ #endif + #ifdef NETIF_F_TSO + /* Controller Erratum workaround */ + if (!skb->data_len && tx_ring->last_tx_tso && +- !skb_shinfo(skb)->tso_size) ++ !skb_shinfo(skb)->gso_size) + count++; + #endif + +diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c +index 3682ec6..c35f16e 100644 +--- a/drivers/net/forcedeth.c ++++ b/drivers/net/forcedeth.c +@@ -482,9 +482,9 @@ #define LPA_1000HALF 0x0400 + * critical parts: + * - rx is (pseudo-) lockless: it relies on the single-threading provided + * by the arch code for interrupts. +- * - tx setup is lockless: it relies on dev->xmit_lock. Actual submission ++ * - tx setup is lockless: it relies on netif_tx_lock. Actual submission + * needs dev->priv->lock :-( +- * - set_multicast_list: preparation lockless, relies on dev->xmit_lock. ++ * - set_multicast_list: preparation lockless, relies on netif_tx_lock. + */ + + /* in dev: base, irq */ +@@ -1016,7 +1016,7 @@ static void drain_ring(struct net_device + + /* + * nv_start_xmit: dev->hard_start_xmit function +- * Called with dev->xmit_lock held. ++ * Called with netif_tx_lock held. + */ + static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev) + { +@@ -1105,8 +1105,8 @@ static int nv_start_xmit(struct sk_buff + np->tx_skbuff[nr] = skb; + + #ifdef NETIF_F_TSO +- if (skb_shinfo(skb)->tso_size) +- tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->tso_size << NV_TX2_TSO_SHIFT); ++ if (skb_shinfo(skb)->gso_size) ++ tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->gso_size << NV_TX2_TSO_SHIFT); + else + #endif + tx_flags_extra = (skb->ip_summed == CHECKSUM_HW ? (NV_TX2_CHECKSUM_L3|NV_TX2_CHECKSUM_L4) : 0); +@@ -1203,7 +1203,7 @@ static void nv_tx_done(struct net_device + + /* + * nv_tx_timeout: dev->tx_timeout function +- * Called with dev->xmit_lock held. ++ * Called with netif_tx_lock held. + */ + static void nv_tx_timeout(struct net_device *dev) + { +@@ -1524,7 +1524,7 @@ static int nv_change_mtu(struct net_devi + * Changing the MTU is a rare event, it shouldn't matter. + */ + disable_irq(dev->irq); +- spin_lock_bh(&dev->xmit_lock); ++ netif_tx_lock_bh(dev); + spin_lock(&np->lock); + /* stop engines */ + nv_stop_rx(dev); +@@ -1559,7 +1559,7 @@ static int nv_change_mtu(struct net_devi + nv_start_rx(dev); + nv_start_tx(dev); + spin_unlock(&np->lock); +- spin_unlock_bh(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + enable_irq(dev->irq); + } + return 0; +@@ -1594,7 +1594,7 @@ static int nv_set_mac_address(struct net + memcpy(dev->dev_addr, macaddr->sa_data, ETH_ALEN); + + if (netif_running(dev)) { +- spin_lock_bh(&dev->xmit_lock); ++ netif_tx_lock_bh(dev); + spin_lock_irq(&np->lock); + + /* stop rx engine */ +@@ -1606,7 +1606,7 @@ static int nv_set_mac_address(struct net + /* restart rx engine */ + nv_start_rx(dev); + spin_unlock_irq(&np->lock); +- spin_unlock_bh(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + } else { + nv_copy_mac_to_hw(dev); + } +@@ -1615,7 +1615,7 @@ static int nv_set_mac_address(struct net + + /* + * nv_set_multicast: dev->set_multicast function +- * Called with dev->xmit_lock held. ++ * Called with netif_tx_lock held. + */ + static void nv_set_multicast(struct net_device *dev) + { +diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c +index 102c1f0..d12605f 100644 +--- a/drivers/net/hamradio/6pack.c ++++ b/drivers/net/hamradio/6pack.c +@@ -308,9 +308,9 @@ static int sp_set_mac_address(struct net + { + struct sockaddr_ax25 *sa = addr; + +- spin_lock_irq(&dev->xmit_lock); ++ netif_tx_lock_bh(dev); + memcpy(dev->dev_addr, &sa->sax25_call, AX25_ADDR_LEN); +- spin_unlock_irq(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + + return 0; + } +@@ -767,9 +767,9 @@ static int sixpack_ioctl(struct tty_stru + break; + } + +- spin_lock_irq(&dev->xmit_lock); ++ netif_tx_lock_bh(dev); + memcpy(dev->dev_addr, &addr, AX25_ADDR_LEN); +- spin_unlock_irq(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + + err = 0; + break; +diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c +index dc5e9d5..5c66f5a 100644 +--- a/drivers/net/hamradio/mkiss.c ++++ b/drivers/net/hamradio/mkiss.c +@@ -357,9 +357,9 @@ static int ax_set_mac_address(struct net + { + struct sockaddr_ax25 *sa = addr; + +- spin_lock_irq(&dev->xmit_lock); ++ netif_tx_lock_bh(dev); + memcpy(dev->dev_addr, &sa->sax25_call, AX25_ADDR_LEN); +- spin_unlock_irq(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + + return 0; + } +@@ -886,9 +886,9 @@ static int mkiss_ioctl(struct tty_struct + break; + } + +- spin_lock_irq(&dev->xmit_lock); ++ netif_tx_lock_bh(dev); + memcpy(dev->dev_addr, addr, AX25_ADDR_LEN); +- spin_unlock_irq(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + + err = 0; + break; +diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c +index 31fb2d7..2e222ef 100644 +--- a/drivers/net/ifb.c ++++ b/drivers/net/ifb.c +@@ -76,13 +76,13 @@ static void ri_tasklet(unsigned long dev + dp->st_task_enter++; + if ((skb = skb_peek(&dp->tq)) == NULL) { + dp->st_txq_refl_try++; +- if (spin_trylock(&_dev->xmit_lock)) { ++ if (netif_tx_trylock(_dev)) { + dp->st_rxq_enter++; + while ((skb = skb_dequeue(&dp->rq)) != NULL) { + skb_queue_tail(&dp->tq, skb); + dp->st_rx2tx_tran++; + } +- spin_unlock(&_dev->xmit_lock); ++ netif_tx_unlock(_dev); + } else { + /* reschedule */ + dp->st_rxq_notenter++; +@@ -110,7 +110,7 @@ static void ri_tasklet(unsigned long dev + } + } + +- if (spin_trylock(&_dev->xmit_lock)) { ++ if (netif_tx_trylock(_dev)) { + dp->st_rxq_check++; + if ((skb = skb_peek(&dp->rq)) == NULL) { + dp->tasklet_pending = 0; +@@ -118,10 +118,10 @@ static void ri_tasklet(unsigned long dev + netif_wake_queue(_dev); + } else { + dp->st_rxq_rsch++; +- spin_unlock(&_dev->xmit_lock); ++ netif_tx_unlock(_dev); + goto resched; + } +- spin_unlock(&_dev->xmit_lock); ++ netif_tx_unlock(_dev); + } else { + resched: + dp->tasklet_pending = 1; +diff --git a/drivers/net/irda/vlsi_ir.c b/drivers/net/irda/vlsi_ir.c +index a9f49f0..339d4a7 100644 +--- a/drivers/net/irda/vlsi_ir.c ++++ b/drivers/net/irda/vlsi_ir.c +@@ -959,7 +959,7 @@ static int vlsi_hard_start_xmit(struct s + || (now.tv_sec==ready.tv_sec && now.tv_usec>=ready.tv_usec)) + break; + udelay(100); +- /* must not sleep here - we are called under xmit_lock! */ ++ /* must not sleep here - called under netif_tx_lock! */ + } + } + +diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c +index f9f77e4..bdab369 100644 +--- a/drivers/net/ixgb/ixgb_main.c ++++ b/drivers/net/ixgb/ixgb_main.c +@@ -1163,7 +1163,7 @@ #ifdef NETIF_F_TSO + uint16_t ipcse, tucse, mss; + int err; + +- if(likely(skb_shinfo(skb)->tso_size)) { ++ if(likely(skb_shinfo(skb)->gso_size)) { + if (skb_header_cloned(skb)) { + err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); + if (err) +@@ -1171,7 +1171,7 @@ #ifdef NETIF_F_TSO + } + + hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2)); +- mss = skb_shinfo(skb)->tso_size; ++ mss = skb_shinfo(skb)->gso_size; + skb->nh.iph->tot_len = 0; + skb->nh.iph->check = 0; + skb->h.th->check = ~csum_tcpudp_magic(skb->nh.iph->saddr, +diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c +index 690a1aa..9bcaa80 100644 +--- a/drivers/net/loopback.c ++++ b/drivers/net/loopback.c +@@ -74,7 +74,7 @@ static void emulate_large_send_offload(s + struct iphdr *iph = skb->nh.iph; + struct tcphdr *th = (struct tcphdr*)(skb->nh.raw + (iph->ihl * 4)); + unsigned int doffset = (iph->ihl + th->doff) * 4; +- unsigned int mtu = skb_shinfo(skb)->tso_size + doffset; ++ unsigned int mtu = skb_shinfo(skb)->gso_size + doffset; + unsigned int offset = 0; + u32 seq = ntohl(th->seq); + u16 id = ntohs(iph->id); +@@ -139,7 +139,7 @@ #ifndef LOOPBACK_MUST_CHECKSUM + #endif + + #ifdef LOOPBACK_TSO +- if (skb_shinfo(skb)->tso_size) { ++ if (skb_shinfo(skb)->gso_size) { + BUG_ON(skb->protocol != htons(ETH_P_IP)); + BUG_ON(skb->nh.iph->protocol != IPPROTO_TCP); + +diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c +index c0998ef..0fac9d5 100644 +--- a/drivers/net/mv643xx_eth.c ++++ b/drivers/net/mv643xx_eth.c +@@ -1107,7 +1107,7 @@ static int mv643xx_eth_start_xmit(struct + + #ifdef MV643XX_CHECKSUM_OFFLOAD_TX + if (has_tiny_unaligned_frags(skb)) { +- if ((skb_linearize(skb, GFP_ATOMIC) != 0)) { ++ if (__skb_linearize(skb)) { + stats->tx_dropped++; + printk(KERN_DEBUG "%s: failed to linearize tiny " + "unaligned fragment\n", dev->name); +diff --git a/drivers/net/natsemi.c b/drivers/net/natsemi.c +index 9d6d254..c9ed624 100644 +--- a/drivers/net/natsemi.c ++++ b/drivers/net/natsemi.c +@@ -323,12 +323,12 @@ performance critical codepaths: + The rx process only runs in the interrupt handler. Access from outside + the interrupt handler is only permitted after disable_irq(). + +-The rx process usually runs under the dev->xmit_lock. If np->intr_tx_reap ++The rx process usually runs under the netif_tx_lock. If np->intr_tx_reap + is set, then access is permitted under spin_lock_irq(&np->lock). + + Thus configuration functions that want to access everything must call + disable_irq(dev->irq); +- spin_lock_bh(dev->xmit_lock); ++ netif_tx_lock_bh(dev); + spin_lock_irq(&np->lock); + + IV. Notes +diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c +index 8cc0d0b..e53b313 100644 +--- a/drivers/net/r8169.c ++++ b/drivers/net/r8169.c +@@ -2171,7 +2171,7 @@ static int rtl8169_xmit_frags(struct rtl + static inline u32 rtl8169_tso_csum(struct sk_buff *skb, struct net_device *dev) + { + if (dev->features & NETIF_F_TSO) { +- u32 mss = skb_shinfo(skb)->tso_size; ++ u32 mss = skb_shinfo(skb)->gso_size; + + if (mss) + return LargeSend | ((mss & MSSMask) << MSSShift); +diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c +index b7f00d6..439f45f 100644 +--- a/drivers/net/s2io.c ++++ b/drivers/net/s2io.c +@@ -3522,8 +3522,8 @@ #endif + txdp->Control_1 = 0; + txdp->Control_2 = 0; + #ifdef NETIF_F_TSO +- mss = skb_shinfo(skb)->tso_size; +- if (mss) { ++ mss = skb_shinfo(skb)->gso_size; ++ if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV4) { + txdp->Control_1 |= TXD_TCP_LSO_EN; + txdp->Control_1 |= TXD_TCP_LSO_MSS(mss); + } +@@ -3543,10 +3543,10 @@ #endif + } + + frg_len = skb->len - skb->data_len; +- if (skb_shinfo(skb)->ufo_size) { ++ if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4) { + int ufo_size; + +- ufo_size = skb_shinfo(skb)->ufo_size; ++ ufo_size = skb_shinfo(skb)->gso_size; + ufo_size &= ~7; + txdp->Control_1 |= TXD_UFO_EN; + txdp->Control_1 |= TXD_UFO_MSS(ufo_size); +@@ -3572,7 +3572,7 @@ #endif + txdp->Host_Control = (unsigned long) skb; + txdp->Control_1 |= TXD_BUFFER0_SIZE(frg_len); + +- if (skb_shinfo(skb)->ufo_size) ++ if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4) + txdp->Control_1 |= TXD_UFO_EN; + + frg_cnt = skb_shinfo(skb)->nr_frags; +@@ -3587,12 +3587,12 @@ #endif + (sp->pdev, frag->page, frag->page_offset, + frag->size, PCI_DMA_TODEVICE); + txdp->Control_1 = TXD_BUFFER0_SIZE(frag->size); +- if (skb_shinfo(skb)->ufo_size) ++ if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4) + txdp->Control_1 |= TXD_UFO_EN; + } + txdp->Control_1 |= TXD_GATHER_CODE_LAST; + +- if (skb_shinfo(skb)->ufo_size) ++ if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4) + frg_cnt++; /* as Txd0 was used for inband header */ + + tx_fifo = mac_control->tx_FIFO_start[queue]; +@@ -3606,7 +3606,7 @@ #ifdef NETIF_F_TSO + if (mss) + val64 |= TX_FIFO_SPECIAL_FUNC; + #endif +- if (skb_shinfo(skb)->ufo_size) ++ if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4) + val64 |= TX_FIFO_SPECIAL_FUNC; + writeq(val64, &tx_fifo->List_Control); + +diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c +index 0618cd5..2a55eb3 100644 +--- a/drivers/net/sky2.c ++++ b/drivers/net/sky2.c +@@ -1125,7 +1125,7 @@ static unsigned tx_le_req(const struct s + count = sizeof(dma_addr_t) / sizeof(u32); + count += skb_shinfo(skb)->nr_frags * count; + +- if (skb_shinfo(skb)->tso_size) ++ if (skb_shinfo(skb)->gso_size) + ++count; + + if (skb->ip_summed == CHECKSUM_HW) +@@ -1197,7 +1197,7 @@ static int sky2_xmit_frame(struct sk_buf + } + + /* Check for TCP Segmentation Offload */ +- mss = skb_shinfo(skb)->tso_size; ++ mss = skb_shinfo(skb)->gso_size; + if (mss != 0) { + /* just drop the packet if non-linear expansion fails */ + if (skb_header_cloned(skb) && +diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c +index caf4102..fc9164a 100644 +--- a/drivers/net/tg3.c ++++ b/drivers/net/tg3.c +@@ -3664,7 +3664,7 @@ static int tg3_start_xmit(struct sk_buff + #if TG3_TSO_SUPPORT != 0 + mss = 0; + if (skb->len > (tp->dev->mtu + ETH_HLEN) && +- (mss = skb_shinfo(skb)->tso_size) != 0) { ++ (mss = skb_shinfo(skb)->gso_size) != 0) { + int tcp_opt_len, ip_tcp_len; + + if (skb_header_cloned(skb) && +diff --git a/drivers/net/tulip/winbond-840.c b/drivers/net/tulip/winbond-840.c +index 5b1af39..11de5af 100644 +--- a/drivers/net/tulip/winbond-840.c ++++ b/drivers/net/tulip/winbond-840.c +@@ -1605,11 +1605,11 @@ #ifdef CONFIG_PM + * - get_stats: + * spin_lock_irq(np->lock), doesn't touch hw if not present + * - hard_start_xmit: +- * netif_stop_queue + spin_unlock_wait(&dev->xmit_lock); ++ * synchronize_irq + netif_tx_disable; + * - tx_timeout: +- * netif_device_detach + spin_unlock_wait(&dev->xmit_lock); ++ * netif_device_detach + netif_tx_disable; + * - set_multicast_list +- * netif_device_detach + spin_unlock_wait(&dev->xmit_lock); ++ * netif_device_detach + netif_tx_disable; + * - interrupt handler + * doesn't touch hw if not present, synchronize_irq waits for + * running instances of the interrupt handler. +@@ -1635,11 +1635,10 @@ static int w840_suspend (struct pci_dev + netif_device_detach(dev); + update_csr6(dev, 0); + iowrite32(0, ioaddr + IntrEnable); +- netif_stop_queue(dev); + spin_unlock_irq(&np->lock); + +- spin_unlock_wait(&dev->xmit_lock); + synchronize_irq(dev->irq); ++ netif_tx_disable(dev); + + np->stats.rx_missed_errors += ioread32(ioaddr + RxMissed) & 0xffff; + +diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c +index 4c76cb7..30c48c9 100644 +--- a/drivers/net/typhoon.c ++++ b/drivers/net/typhoon.c +@@ -340,7 +340,7 @@ #define typhoon_synchronize_irq(x) synch + #endif + + #if defined(NETIF_F_TSO) +-#define skb_tso_size(x) (skb_shinfo(x)->tso_size) ++#define skb_tso_size(x) (skb_shinfo(x)->gso_size) + #define TSO_NUM_DESCRIPTORS 2 + #define TSO_OFFLOAD_ON TYPHOON_OFFLOAD_TCP_SEGMENT + #else +diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c +index ed1f837..2eb6b5f 100644 +--- a/drivers/net/via-velocity.c ++++ b/drivers/net/via-velocity.c +@@ -1899,6 +1899,13 @@ static int velocity_xmit(struct sk_buff + + int pktlen = skb->len; + ++#ifdef VELOCITY_ZERO_COPY_SUPPORT ++ if (skb_shinfo(skb)->nr_frags > 6 && __skb_linearize(skb)) { ++ kfree_skb(skb); ++ return 0; ++ } ++#endif ++ + spin_lock_irqsave(&vptr->lock, flags); + + index = vptr->td_curr[qnum]; +@@ -1914,8 +1921,6 @@ static int velocity_xmit(struct sk_buff + */ + if (pktlen < ETH_ZLEN) { + /* Cannot occur until ZC support */ +- if(skb_linearize(skb, GFP_ATOMIC)) +- return 0; + pktlen = ETH_ZLEN; + memcpy(tdinfo->buf, skb->data, skb->len); + memset(tdinfo->buf + skb->len, 0, ETH_ZLEN - skb->len); +@@ -1933,7 +1938,6 @@ #ifdef VELOCITY_ZERO_COPY_SUPPORT + int nfrags = skb_shinfo(skb)->nr_frags; + tdinfo->skb = skb; + if (nfrags > 6) { +- skb_linearize(skb, GFP_ATOMIC); + memcpy(tdinfo->buf, skb->data, skb->len); + tdinfo->skb_dma[0] = tdinfo->buf_dma; + td_ptr->tdesc0.pktsize = +diff --git a/drivers/net/wireless/orinoco.c b/drivers/net/wireless/orinoco.c +index 6fd0bf7..75237c1 100644 +--- a/drivers/net/wireless/orinoco.c ++++ b/drivers/net/wireless/orinoco.c +@@ -1835,7 +1835,9 @@ static int __orinoco_program_rids(struct + /* Set promiscuity / multicast*/ + priv->promiscuous = 0; + priv->mc_count = 0; +- __orinoco_set_multicast_list(dev); /* FIXME: what about the xmit_lock */ ++ ++ /* FIXME: what about netif_tx_lock */ ++ __orinoco_set_multicast_list(dev); + + return 0; + } +diff --git a/drivers/s390/net/qeth_eddp.c b/drivers/s390/net/qeth_eddp.c +index 82cb4af..57cec40 100644 +--- a/drivers/s390/net/qeth_eddp.c ++++ b/drivers/s390/net/qeth_eddp.c +@@ -421,7 +421,7 @@ #endif /* CONFIG_QETH_VLAN */ + } + tcph = eddp->skb->h.th; + while (eddp->skb_offset < eddp->skb->len) { +- data_len = min((int)skb_shinfo(eddp->skb)->tso_size, ++ data_len = min((int)skb_shinfo(eddp->skb)->gso_size, + (int)(eddp->skb->len - eddp->skb_offset)); + /* prepare qdio hdr */ + if (eddp->qh.hdr.l2.id == QETH_HEADER_TYPE_LAYER2){ +@@ -516,20 +516,20 @@ qeth_eddp_calc_num_pages(struct qeth_edd + + QETH_DBF_TEXT(trace, 5, "eddpcanp"); + /* can we put multiple skbs in one page? */ +- skbs_per_page = PAGE_SIZE / (skb_shinfo(skb)->tso_size + hdr_len); ++ skbs_per_page = PAGE_SIZE / (skb_shinfo(skb)->gso_size + hdr_len); + if (skbs_per_page > 1){ +- ctx->num_pages = (skb_shinfo(skb)->tso_segs + 1) / ++ ctx->num_pages = (skb_shinfo(skb)->gso_segs + 1) / + skbs_per_page + 1; + ctx->elements_per_skb = 1; + } else { + /* no -> how many elements per skb? */ +- ctx->elements_per_skb = (skb_shinfo(skb)->tso_size + hdr_len + ++ ctx->elements_per_skb = (skb_shinfo(skb)->gso_size + hdr_len + + PAGE_SIZE) >> PAGE_SHIFT; + ctx->num_pages = ctx->elements_per_skb * +- (skb_shinfo(skb)->tso_segs + 1); ++ (skb_shinfo(skb)->gso_segs + 1); + } + ctx->num_elements = ctx->elements_per_skb * +- (skb_shinfo(skb)->tso_segs + 1); ++ (skb_shinfo(skb)->gso_segs + 1); + } + + static inline struct qeth_eddp_context * +diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c +index dba7f7f..d9cc997 100644 +--- a/drivers/s390/net/qeth_main.c ++++ b/drivers/s390/net/qeth_main.c +@@ -4454,7 +4454,7 @@ qeth_send_packet(struct qeth_card *card, + queue = card->qdio.out_qs + [qeth_get_priority_queue(card, skb, ipv, cast_type)]; + +- if (skb_shinfo(skb)->tso_size) ++ if (skb_shinfo(skb)->gso_size) + large_send = card->options.large_send; + + /*are we able to do TSO ? If so ,prepare and send it from here */ +@@ -4501,7 +4501,7 @@ qeth_send_packet(struct qeth_card *card, + card->stats.tx_packets++; + card->stats.tx_bytes += skb->len; + #ifdef CONFIG_QETH_PERF_STATS +- if (skb_shinfo(skb)->tso_size && ++ if (skb_shinfo(skb)->gso_size && + !(large_send == QETH_LARGE_SEND_NO)) { + card->perf_stats.large_send_bytes += skb->len; + card->perf_stats.large_send_cnt++; +diff --git a/drivers/s390/net/qeth_tso.h b/drivers/s390/net/qeth_tso.h +index 1286dde..89cbf34 100644 +--- a/drivers/s390/net/qeth_tso.h ++++ b/drivers/s390/net/qeth_tso.h +@@ -51,7 +51,7 @@ qeth_tso_fill_header(struct qeth_card *c + hdr->ext.hdr_version = 1; + hdr->ext.hdr_len = 28; + /*insert non-fix values */ +- hdr->ext.mss = skb_shinfo(skb)->tso_size; ++ hdr->ext.mss = skb_shinfo(skb)->gso_size; + hdr->ext.dg_hdr_len = (__u16)(iph->ihl*4 + tcph->doff*4); + hdr->ext.payload_len = (__u16)(skb->len - hdr->ext.dg_hdr_len - + sizeof(struct qeth_hdr_tso)); +diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h +index 93535f0..9269df7 100644 +--- a/include/linux/ethtool.h ++++ b/include/linux/ethtool.h +@@ -408,6 +408,8 @@ #define ETHTOOL_STSO 0x0000001f /* Set + #define ETHTOOL_GPERMADDR 0x00000020 /* Get permanent hardware address */ + #define ETHTOOL_GUFO 0x00000021 /* Get UFO enable (ethtool_value) */ + #define ETHTOOL_SUFO 0x00000022 /* Set UFO enable (ethtool_value) */ ++#define ETHTOOL_GGSO 0x00000023 /* Get GSO enable (ethtool_value) */ ++#define ETHTOOL_SGSO 0x00000024 /* Set GSO enable (ethtool_value) */ + + /* compatibility with older code */ + #define SPARC_ETH_GSET ETHTOOL_GSET +diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h +index 7fda03d..47b0965 100644 +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -230,7 +230,8 @@ enum netdev_state_t + __LINK_STATE_SCHED, + __LINK_STATE_NOCARRIER, + __LINK_STATE_RX_SCHED, +- __LINK_STATE_LINKWATCH_PENDING ++ __LINK_STATE_LINKWATCH_PENDING, ++ __LINK_STATE_QDISC_RUNNING, + }; + + +@@ -306,9 +307,17 @@ #define NETIF_F_HW_VLAN_TX 128 /* Transm + #define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */ + #define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */ + #define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ +-#define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */ ++#define NETIF_F_GSO 2048 /* Enable software GSO. */ + #define NETIF_F_LLTX 4096 /* LockLess TX */ +-#define NETIF_F_UFO 8192 /* Can offload UDP Large Send*/ ++ ++ /* Segmentation offload features */ ++#define NETIF_F_GSO_SHIFT 16 ++#define NETIF_F_TSO (SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT) ++#define NETIF_F_UFO (SKB_GSO_UDPV4 << NETIF_F_GSO_SHIFT) ++#define NETIF_F_GSO_ROBUST (SKB_GSO_DODGY << NETIF_F_GSO_SHIFT) ++ ++#define NETIF_F_GEN_CSUM (NETIF_F_NO_CSUM | NETIF_F_HW_CSUM) ++#define NETIF_F_ALL_CSUM (NETIF_F_IP_CSUM | NETIF_F_GEN_CSUM) + + struct net_device *next_sched; + +@@ -394,6 +403,9 @@ #define NETIF_F_UFO 8192 + struct list_head qdisc_list; + unsigned long tx_queue_len; /* Max frames per queue allowed */ + ++ /* Partially transmitted GSO packet. */ ++ struct sk_buff *gso_skb; ++ + /* ingress path synchronizer */ + spinlock_t ingress_lock; + struct Qdisc *qdisc_ingress; +@@ -402,7 +414,7 @@ #define NETIF_F_UFO 8192 + * One part is mostly used on xmit path (device) + */ + /* hard_start_xmit synchronizer */ +- spinlock_t xmit_lock ____cacheline_aligned_in_smp; ++ spinlock_t _xmit_lock ____cacheline_aligned_in_smp; + /* cpu id of processor entered to hard_start_xmit or -1, + if nobody entered there. + */ +@@ -527,6 +539,8 @@ struct packet_type { + struct net_device *, + struct packet_type *, + struct net_device *); ++ struct sk_buff *(*gso_segment)(struct sk_buff *skb, ++ int features); + void *af_packet_priv; + struct list_head list; + }; +@@ -693,7 +707,8 @@ extern int dev_change_name(struct net_d + extern int dev_set_mtu(struct net_device *, int); + extern int dev_set_mac_address(struct net_device *, + struct sockaddr *); +-extern void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); ++extern int dev_hard_start_xmit(struct sk_buff *skb, ++ struct net_device *dev); + + extern void dev_init(void); + +@@ -900,11 +915,43 @@ static inline void __netif_rx_complete(s + clear_bit(__LINK_STATE_RX_SCHED, &dev->state); + } + ++static inline void netif_tx_lock(struct net_device *dev) ++{ ++ spin_lock(&dev->_xmit_lock); ++ dev->xmit_lock_owner = smp_processor_id(); ++} ++ ++static inline void netif_tx_lock_bh(struct net_device *dev) ++{ ++ spin_lock_bh(&dev->_xmit_lock); ++ dev->xmit_lock_owner = smp_processor_id(); ++} ++ ++static inline int netif_tx_trylock(struct net_device *dev) ++{ ++ int err = spin_trylock(&dev->_xmit_lock); ++ if (!err) ++ dev->xmit_lock_owner = smp_processor_id(); ++ return err; ++} ++ ++static inline void netif_tx_unlock(struct net_device *dev) ++{ ++ dev->xmit_lock_owner = -1; ++ spin_unlock(&dev->_xmit_lock); ++} ++ ++static inline void netif_tx_unlock_bh(struct net_device *dev) ++{ ++ dev->xmit_lock_owner = -1; ++ spin_unlock_bh(&dev->_xmit_lock); ++} ++ + static inline void netif_tx_disable(struct net_device *dev) + { +- spin_lock_bh(&dev->xmit_lock); ++ netif_tx_lock_bh(dev); + netif_stop_queue(dev); +- spin_unlock_bh(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + } + + /* These functions live elsewhere (drivers/net/net_init.c, but related) */ +@@ -932,6 +979,7 @@ extern int netdev_max_backlog; + extern int weight_p; + extern int netdev_set_master(struct net_device *dev, struct net_device *master); + extern int skb_checksum_help(struct sk_buff *skb, int inward); ++extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features); + #ifdef CONFIG_BUG + extern void netdev_rx_csum_fault(struct net_device *dev); + #else +@@ -951,6 +999,18 @@ #endif + + extern void linkwatch_run_queue(void); + ++static inline int skb_gso_ok(struct sk_buff *skb, int features) ++{ ++ int feature = skb_shinfo(skb)->gso_size ? ++ skb_shinfo(skb)->gso_type << NETIF_F_GSO_SHIFT : 0; ++ return (features & feature) == feature; ++} ++ ++static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) ++{ ++ return !skb_gso_ok(skb, dev->features); ++} ++ + #endif /* __KERNEL__ */ + + #endif /* _LINUX_DEV_H */ +diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h +index ad7cc22..b19d45d 100644 +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -134,9 +134,10 @@ struct skb_frag_struct { + struct skb_shared_info { + atomic_t dataref; + unsigned short nr_frags; +- unsigned short tso_size; +- unsigned short tso_segs; +- unsigned short ufo_size; ++ unsigned short gso_size; ++ /* Warning: this field is not always filled in (UFO)! */ ++ unsigned short gso_segs; ++ unsigned short gso_type; + unsigned int ip6_frag_id; + struct sk_buff *frag_list; + skb_frag_t frags[MAX_SKB_FRAGS]; +@@ -168,6 +169,14 @@ enum { + SKB_FCLONE_CLONE, + }; + ++enum { ++ SKB_GSO_TCPV4 = 1 << 0, ++ SKB_GSO_UDPV4 = 1 << 1, ++ ++ /* This indicates the skb is from an untrusted source. */ ++ SKB_GSO_DODGY = 1 << 2, ++}; ++ + /** + * struct sk_buff - socket buffer + * @next: Next buffer in list +@@ -1148,18 +1157,34 @@ static inline int skb_can_coalesce(struc + return 0; + } + ++static inline int __skb_linearize(struct sk_buff *skb) ++{ ++ return __pskb_pull_tail(skb, skb->data_len) ? 0 : -ENOMEM; ++} ++ + /** + * skb_linearize - convert paged skb to linear one + * @skb: buffer to linarize +- * @gfp: allocation mode + * + * If there is no free memory -ENOMEM is returned, otherwise zero + * is returned and the old skb data released. + */ +-extern int __skb_linearize(struct sk_buff *skb, gfp_t gfp); +-static inline int skb_linearize(struct sk_buff *skb, gfp_t gfp) ++static inline int skb_linearize(struct sk_buff *skb) ++{ ++ return skb_is_nonlinear(skb) ? __skb_linearize(skb) : 0; ++} ++ ++/** ++ * skb_linearize_cow - make sure skb is linear and writable ++ * @skb: buffer to process ++ * ++ * If there is no free memory -ENOMEM is returned, otherwise zero ++ * is returned and the old skb data released. ++ */ ++static inline int skb_linearize_cow(struct sk_buff *skb) + { +- return __skb_linearize(skb, gfp); ++ return skb_is_nonlinear(skb) || skb_cloned(skb) ? ++ __skb_linearize(skb) : 0; + } + + /** +@@ -1254,6 +1279,7 @@ extern void skb_split(struct sk_b + struct sk_buff *skb1, const u32 len); + + extern void skb_release_data(struct sk_buff *skb); ++extern struct sk_buff *skb_segment(struct sk_buff *skb, int features); + + static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, + int len, void *buffer) +diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h +index b94d1ad..75b5b93 100644 +--- a/include/net/pkt_sched.h ++++ b/include/net/pkt_sched.h +@@ -218,12 +218,13 @@ extern struct qdisc_rate_table *qdisc_ge + struct rtattr *tab); + extern void qdisc_put_rtab(struct qdisc_rate_table *tab); + +-extern int qdisc_restart(struct net_device *dev); ++extern void __qdisc_run(struct net_device *dev); + + static inline void qdisc_run(struct net_device *dev) + { +- while (!netif_queue_stopped(dev) && qdisc_restart(dev) < 0) +- /* NOTHING */; ++ if (!netif_queue_stopped(dev) && ++ !test_and_set_bit(__LINK_STATE_QDISC_RUNNING, &dev->state)) ++ __qdisc_run(dev); + } + + extern int tc_classify(struct sk_buff *skb, struct tcf_proto *tp, +diff --git a/include/net/protocol.h b/include/net/protocol.h +index 6dc5970..0d2dcdb 100644 +--- a/include/net/protocol.h ++++ b/include/net/protocol.h +@@ -37,6 +37,8 @@ #define MAX_INET_PROTOS 256 /* Must be + struct net_protocol { + int (*handler)(struct sk_buff *skb); + void (*err_handler)(struct sk_buff *skb, u32 info); ++ struct sk_buff *(*gso_segment)(struct sk_buff *skb, ++ int features); + int no_policy; + }; + +diff --git a/include/net/sock.h b/include/net/sock.h +index f63d0d5..a8e8d21 100644 +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -1064,9 +1064,13 @@ static inline void sk_setup_caps(struct + { + __sk_dst_set(sk, dst); + sk->sk_route_caps = dst->dev->features; ++ if (sk->sk_route_caps & NETIF_F_GSO) ++ sk->sk_route_caps |= NETIF_F_TSO; + if (sk->sk_route_caps & NETIF_F_TSO) { + if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len) + sk->sk_route_caps &= ~NETIF_F_TSO; ++ else ++ sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; + } + } + +diff --git a/include/net/tcp.h b/include/net/tcp.h +index 77f21c6..70e1d5f 100644 +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -552,13 +552,13 @@ #include <net/tcp_ecn.h> + */ + static inline int tcp_skb_pcount(const struct sk_buff *skb) + { +- return skb_shinfo(skb)->tso_segs; ++ return skb_shinfo(skb)->gso_segs; + } + + /* This is valid iff tcp_skb_pcount() > 1. */ + static inline int tcp_skb_mss(const struct sk_buff *skb) + { +- return skb_shinfo(skb)->tso_size; ++ return skb_shinfo(skb)->gso_size; + } + + static inline void tcp_dec_pcount_approx(__u32 *count, +@@ -1063,6 +1063,8 @@ extern struct request_sock_ops tcp_reque + + extern int tcp_v4_destroy_sock(struct sock *sk); + ++extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features); ++ + #ifdef CONFIG_PROC_FS + extern int tcp4_proc_init(void); + extern void tcp4_proc_exit(void); +diff --git a/net/atm/clip.c b/net/atm/clip.c +index 1842a4e..6dc21a7 100644 +--- a/net/atm/clip.c ++++ b/net/atm/clip.c +@@ -101,7 +101,7 @@ static void unlink_clip_vcc(struct clip_ + printk(KERN_CRIT "!clip_vcc->entry (clip_vcc %p)\n",clip_vcc); + return; + } +- spin_lock_bh(&entry->neigh->dev->xmit_lock); /* block clip_start_xmit() */ ++ netif_tx_lock_bh(entry->neigh->dev); /* block clip_start_xmit() */ + entry->neigh->used = jiffies; + for (walk = &entry->vccs; *walk; walk = &(*walk)->next) + if (*walk == clip_vcc) { +@@ -125,7 +125,7 @@ static void unlink_clip_vcc(struct clip_ + printk(KERN_CRIT "ATMARP: unlink_clip_vcc failed (entry %p, vcc " + "0x%p)\n",entry,clip_vcc); + out: +- spin_unlock_bh(&entry->neigh->dev->xmit_lock); ++ netif_tx_unlock_bh(entry->neigh->dev); + } + + /* The neighbour entry n->lock is held. */ +diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c +index 0b33a7b..180e79b 100644 +--- a/net/bridge/br_device.c ++++ b/net/bridge/br_device.c +@@ -146,9 +146,9 @@ static int br_set_tx_csum(struct net_dev + struct net_bridge *br = netdev_priv(dev); + + if (data) +- br->feature_mask |= NETIF_F_IP_CSUM; ++ br->feature_mask |= NETIF_F_NO_CSUM; + else +- br->feature_mask &= ~NETIF_F_IP_CSUM; ++ br->feature_mask &= ~NETIF_F_ALL_CSUM; + + br_features_recompute(br); + return 0; +@@ -185,6 +185,6 @@ void br_dev_setup(struct net_device *dev + dev->set_mac_address = br_set_mac_address; + dev->priv_flags = IFF_EBRIDGE; + +- dev->features = NETIF_F_SG | NETIF_F_FRAGLIST +- | NETIF_F_HIGHDMA | NETIF_F_TSO | NETIF_F_IP_CSUM; ++ dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | ++ NETIF_F_TSO | NETIF_F_NO_CSUM | NETIF_F_GSO_ROBUST; + } +diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c +index 2d24fb4..00b1128 100644 +--- a/net/bridge/br_forward.c ++++ b/net/bridge/br_forward.c +@@ -32,7 +32,7 @@ static inline int should_deliver(const s + int br_dev_queue_push_xmit(struct sk_buff *skb) + { + /* drop mtu oversized packets except tso */ +- if (skb->len > skb->dev->mtu && !skb_shinfo(skb)->tso_size) ++ if (skb->len > skb->dev->mtu && !skb_shinfo(skb)->gso_size) + kfree_skb(skb); + else { + #ifdef CONFIG_BRIDGE_NETFILTER +diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c +index f36b35e..0617146 100644 +--- a/net/bridge/br_if.c ++++ b/net/bridge/br_if.c +@@ -385,17 +385,28 @@ void br_features_recompute(struct net_br + struct net_bridge_port *p; + unsigned long features, checksum; + +- features = br->feature_mask &~ NETIF_F_IP_CSUM; +- checksum = br->feature_mask & NETIF_F_IP_CSUM; ++ checksum = br->feature_mask & NETIF_F_ALL_CSUM ? NETIF_F_NO_CSUM : 0; ++ features = br->feature_mask & ~NETIF_F_ALL_CSUM; + + list_for_each_entry(p, &br->port_list, list) { +- if (!(p->dev->features +- & (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM))) ++ unsigned long feature = p->dev->features; ++ ++ if (checksum & NETIF_F_NO_CSUM && !(feature & NETIF_F_NO_CSUM)) ++ checksum ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM; ++ if (checksum & NETIF_F_HW_CSUM && !(feature & NETIF_F_HW_CSUM)) ++ checksum ^= NETIF_F_HW_CSUM | NETIF_F_IP_CSUM; ++ if (!(feature & NETIF_F_IP_CSUM)) + checksum = 0; +- features &= p->dev->features; ++ ++ if (feature & NETIF_F_GSO) ++ feature |= NETIF_F_TSO; ++ feature |= NETIF_F_GSO; ++ ++ features &= feature; + } + +- br->dev->features = features | checksum | NETIF_F_LLTX; ++ br->dev->features = features | checksum | NETIF_F_LLTX | ++ NETIF_F_GSO_ROBUST; + } + + /* called with RTNL */ +diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c +index 9e27373..588207f 100644 +--- a/net/bridge/br_netfilter.c ++++ b/net/bridge/br_netfilter.c +@@ -743,7 +743,7 @@ static int br_nf_dev_queue_xmit(struct s + { + if (skb->protocol == htons(ETH_P_IP) && + skb->len > skb->dev->mtu && +- !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size)) ++ !skb_shinfo(skb)->gso_size) + return ip_fragment(skb, br_dev_queue_push_xmit); + else + return br_dev_queue_push_xmit(skb); +diff --git a/net/core/dev.c b/net/core/dev.c +index 12a214c..32e1056 100644 +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -115,6 +115,7 @@ #include <linux/wireless.h> /* Note : w + #include <net/iw_handler.h> + #endif /* CONFIG_NET_RADIO */ + #include <asm/current.h> ++#include <linux/err.h> + + /* + * The list of packet types we will receive (as opposed to discard) +@@ -1032,7 +1033,7 @@ static inline void net_timestamp(struct + * taps currently in use. + */ + +-void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) ++static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) + { + struct packet_type *ptype; + +@@ -1106,6 +1107,45 @@ out: + return ret; + } + ++/** ++ * skb_gso_segment - Perform segmentation on skb. ++ * @skb: buffer to segment ++ * @features: features for the output path (see dev->features) ++ * ++ * This function segments the given skb and returns a list of segments. ++ * ++ * It may return NULL if the skb requires no segmentation. This is ++ * only possible when GSO is used for verifying header integrity. ++ */ ++struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) ++{ ++ struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); ++ struct packet_type *ptype; ++ int type = skb->protocol; ++ ++ BUG_ON(skb_shinfo(skb)->frag_list); ++ BUG_ON(skb->ip_summed != CHECKSUM_HW); ++ ++ skb->mac.raw = skb->data; ++ skb->mac_len = skb->nh.raw - skb->data; ++ __skb_pull(skb, skb->mac_len); ++ ++ rcu_read_lock(); ++ list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) { ++ if (ptype->type == type && !ptype->dev && ptype->gso_segment) { ++ segs = ptype->gso_segment(skb, features); ++ break; ++ } ++ } ++ rcu_read_unlock(); ++ ++ __skb_push(skb, skb->data - skb->mac.raw); ++ ++ return segs; ++} ++ ++EXPORT_SYMBOL(skb_gso_segment); ++ + /* Take action when hardware reception checksum errors are detected. */ + #ifdef CONFIG_BUG + void netdev_rx_csum_fault(struct net_device *dev) +@@ -1142,75 +1182,108 @@ #else + #define illegal_highdma(dev, skb) (0) + #endif + +-/* Keep head the same: replace data */ +-int __skb_linearize(struct sk_buff *skb, gfp_t gfp_mask) +-{ +- unsigned int size; +- u8 *data; +- long offset; +- struct skb_shared_info *ninfo; +- int headerlen = skb->data - skb->head; +- int expand = (skb->tail + skb->data_len) - skb->end; +- +- if (skb_shared(skb)) +- BUG(); +- +- if (expand <= 0) +- expand = 0; +- +- size = skb->end - skb->head + expand; +- size = SKB_DATA_ALIGN(size); +- data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); +- if (!data) +- return -ENOMEM; +- +- /* Copy entire thing */ +- if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len)) +- BUG(); +- +- /* Set up shinfo */ +- ninfo = (struct skb_shared_info*)(data + size); +- atomic_set(&ninfo->dataref, 1); +- ninfo->tso_size = skb_shinfo(skb)->tso_size; +- ninfo->tso_segs = skb_shinfo(skb)->tso_segs; +- ninfo->nr_frags = 0; +- ninfo->frag_list = NULL; +- +- /* Offset between the two in bytes */ +- offset = data - skb->head; +- +- /* Free old data. */ +- skb_release_data(skb); +- +- skb->head = data; +- skb->end = data + size; +- +- /* Set up new pointers */ +- skb->h.raw += offset; +- skb->nh.raw += offset; +- skb->mac.raw += offset; +- skb->tail += offset; +- skb->data += offset; +- +- /* We are no longer a clone, even if we were. */ +- skb->cloned = 0; +- +- skb->tail += skb->data_len; +- skb->data_len = 0; ++struct dev_gso_cb { ++ void (*destructor)(struct sk_buff *skb); ++}; ++ ++#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb) ++ ++static void dev_gso_skb_destructor(struct sk_buff *skb) ++{ ++ struct dev_gso_cb *cb; ++ ++ do { ++ struct sk_buff *nskb = skb->next; ++ ++ skb->next = nskb->next; ++ nskb->next = NULL; ++ kfree_skb(nskb); ++ } while (skb->next); ++ ++ cb = DEV_GSO_CB(skb); ++ if (cb->destructor) ++ cb->destructor(skb); ++} ++ ++/** ++ * dev_gso_segment - Perform emulated hardware segmentation on skb. ++ * @skb: buffer to segment ++ * ++ * This function segments the given skb and stores the list of segments ++ * in skb->next. ++ */ ++static int dev_gso_segment(struct sk_buff *skb) ++{ ++ struct net_device *dev = skb->dev; ++ struct sk_buff *segs; ++ int features = dev->features & ~(illegal_highdma(dev, skb) ? ++ NETIF_F_SG : 0); ++ ++ segs = skb_gso_segment(skb, features); ++ ++ /* Verifying header integrity only. */ ++ if (!segs) ++ return 0; ++ ++ if (unlikely(IS_ERR(segs))) ++ return PTR_ERR(segs); ++ ++ skb->next = segs; ++ DEV_GSO_CB(skb)->destructor = skb->destructor; ++ skb->destructor = dev_gso_skb_destructor; ++ ++ return 0; ++} ++ ++int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) ++{ ++ if (likely(!skb->next)) { ++ if (netdev_nit) ++ dev_queue_xmit_nit(skb, dev); ++ ++ if (netif_needs_gso(dev, skb)) { ++ if (unlikely(dev_gso_segment(skb))) ++ goto out_kfree_skb; ++ if (skb->next) ++ goto gso; ++ } ++ ++ return dev->hard_start_xmit(skb, dev); ++ } ++ ++gso: ++ do { ++ struct sk_buff *nskb = skb->next; ++ int rc; ++ ++ skb->next = nskb->next; ++ nskb->next = NULL; ++ rc = dev->hard_start_xmit(nskb, dev); ++ if (unlikely(rc)) { ++ nskb->next = skb->next; ++ skb->next = nskb; ++ return rc; ++ } ++ if (unlikely(netif_queue_stopped(dev) && skb->next)) ++ return NETDEV_TX_BUSY; ++ } while (skb->next); ++ ++ skb->destructor = DEV_GSO_CB(skb)->destructor; ++ ++out_kfree_skb: ++ kfree_skb(skb); + return 0; + } + + #define HARD_TX_LOCK(dev, cpu) { \ + if ((dev->features & NETIF_F_LLTX) == 0) { \ +- spin_lock(&dev->xmit_lock); \ +- dev->xmit_lock_owner = cpu; \ ++ netif_tx_lock(dev); \ + } \ + } + + #define HARD_TX_UNLOCK(dev) { \ + if ((dev->features & NETIF_F_LLTX) == 0) { \ +- dev->xmit_lock_owner = -1; \ +- spin_unlock(&dev->xmit_lock); \ ++ netif_tx_unlock(dev); \ + } \ + } + +@@ -1246,9 +1319,13 @@ int dev_queue_xmit(struct sk_buff *skb) + struct Qdisc *q; + int rc = -ENOMEM; + ++ /* GSO will handle the following emulations directly. */ ++ if (netif_needs_gso(dev, skb)) ++ goto gso; ++ + if (skb_shinfo(skb)->frag_list && + !(dev->features & NETIF_F_FRAGLIST) && +- __skb_linearize(skb, GFP_ATOMIC)) ++ __skb_linearize(skb)) + goto out_kfree_skb; + + /* Fragmented skb is linearized if device does not support SG, +@@ -1257,25 +1334,26 @@ int dev_queue_xmit(struct sk_buff *skb) + */ + if (skb_shinfo(skb)->nr_frags && + (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) && +- __skb_linearize(skb, GFP_ATOMIC)) ++ __skb_linearize(skb)) + goto out_kfree_skb; + + /* If packet is not checksummed and device does not support + * checksumming for this protocol, complete checksumming here. + */ + if (skb->ip_summed == CHECKSUM_HW && +- (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) && ++ (!(dev->features & NETIF_F_GEN_CSUM) && + (!(dev->features & NETIF_F_IP_CSUM) || + skb->protocol != htons(ETH_P_IP)))) + if (skb_checksum_help(skb, 0)) + goto out_kfree_skb; + ++gso: + spin_lock_prefetch(&dev->queue_lock); + + /* Disable soft irqs for various locks below. Also + * stops preemption for RCU. + */ +- local_bh_disable(); ++ rcu_read_lock_bh(); + + /* Updates of qdisc are serialized by queue_lock. + * The struct Qdisc which is pointed to by qdisc is now a +@@ -1309,8 +1387,8 @@ #endif + /* The device has no queue. Common case for software devices: + loopback, all the sorts of tunnels... + +- Really, it is unlikely that xmit_lock protection is necessary here. +- (f.e. loopback and IP tunnels are clean ignoring statistics ++ Really, it is unlikely that netif_tx_lock protection is necessary ++ here. (f.e. loopback and IP tunnels are clean ignoring statistics + counters.) + However, it is possible, that they rely on protection + made by us here. +@@ -1326,11 +1404,8 @@ #endif + HARD_TX_LOCK(dev, cpu); + + if (!netif_queue_stopped(dev)) { +- if (netdev_nit) +- dev_queue_xmit_nit(skb, dev); +- + rc = 0; +- if (!dev->hard_start_xmit(skb, dev)) { ++ if (!dev_hard_start_xmit(skb, dev)) { + HARD_TX_UNLOCK(dev); + goto out; + } +@@ -1349,13 +1424,13 @@ #endif + } + + rc = -ENETDOWN; +- local_bh_enable(); ++ rcu_read_unlock_bh(); + + out_kfree_skb: + kfree_skb(skb); + return rc; + out: +- local_bh_enable(); ++ rcu_read_unlock_bh(); + return rc; + } + +@@ -2670,7 +2745,7 @@ int register_netdevice(struct net_device + BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); + + spin_lock_init(&dev->queue_lock); +- spin_lock_init(&dev->xmit_lock); ++ spin_lock_init(&dev->_xmit_lock); + dev->xmit_lock_owner = -1; + #ifdef CONFIG_NET_CLS_ACT + spin_lock_init(&dev->ingress_lock); +@@ -2714,9 +2789,7 @@ #endif + + /* Fix illegal SG+CSUM combinations. */ + if ((dev->features & NETIF_F_SG) && +- !(dev->features & (NETIF_F_IP_CSUM | +- NETIF_F_NO_CSUM | +- NETIF_F_HW_CSUM))) { ++ !(dev->features & NETIF_F_ALL_CSUM)) { + printk("%s: Dropping NETIF_F_SG since no checksum feature.\n", + dev->name); + dev->features &= ~NETIF_F_SG; +@@ -3268,7 +3341,6 @@ subsys_initcall(net_dev_init); + EXPORT_SYMBOL(__dev_get_by_index); + EXPORT_SYMBOL(__dev_get_by_name); + EXPORT_SYMBOL(__dev_remove_pack); +-EXPORT_SYMBOL(__skb_linearize); + EXPORT_SYMBOL(dev_valid_name); + EXPORT_SYMBOL(dev_add_pack); + EXPORT_SYMBOL(dev_alloc_name); +diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c +index 05d6085..c57d887 100644 +--- a/net/core/dev_mcast.c ++++ b/net/core/dev_mcast.c +@@ -62,7 +62,7 @@ #include <net/arp.h> + * Device mc lists are changed by bh at least if IPv6 is enabled, + * so that it must be bh protected. + * +- * We block accesses to device mc filters with dev->xmit_lock. ++ * We block accesses to device mc filters with netif_tx_lock. + */ + + /* +@@ -93,9 +93,9 @@ static void __dev_mc_upload(struct net_d + + void dev_mc_upload(struct net_device *dev) + { +- spin_lock_bh(&dev->xmit_lock); ++ netif_tx_lock_bh(dev); + __dev_mc_upload(dev); +- spin_unlock_bh(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + } + + /* +@@ -107,7 +107,7 @@ int dev_mc_delete(struct net_device *dev + int err = 0; + struct dev_mc_list *dmi, **dmip; + +- spin_lock_bh(&dev->xmit_lock); ++ netif_tx_lock_bh(dev); + + for (dmip = &dev->mc_list; (dmi = *dmip) != NULL; dmip = &dmi->next) { + /* +@@ -139,13 +139,13 @@ int dev_mc_delete(struct net_device *dev + */ + __dev_mc_upload(dev); + +- spin_unlock_bh(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + return 0; + } + } + err = -ENOENT; + done: +- spin_unlock_bh(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + return err; + } + +@@ -160,7 +160,7 @@ int dev_mc_add(struct net_device *dev, v + + dmi1 = kmalloc(sizeof(*dmi), GFP_ATOMIC); + +- spin_lock_bh(&dev->xmit_lock); ++ netif_tx_lock_bh(dev); + for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) { + if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 && + dmi->dmi_addrlen == alen) { +@@ -176,7 +176,7 @@ int dev_mc_add(struct net_device *dev, v + } + + if ((dmi = dmi1) == NULL) { +- spin_unlock_bh(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + return -ENOMEM; + } + memcpy(dmi->dmi_addr, addr, alen); +@@ -189,11 +189,11 @@ int dev_mc_add(struct net_device *dev, v + + __dev_mc_upload(dev); + +- spin_unlock_bh(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + return 0; + + done: +- spin_unlock_bh(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + kfree(dmi1); + return err; + } +@@ -204,7 +204,7 @@ done: + + void dev_mc_discard(struct net_device *dev) + { +- spin_lock_bh(&dev->xmit_lock); ++ netif_tx_lock_bh(dev); + + while (dev->mc_list != NULL) { + struct dev_mc_list *tmp = dev->mc_list; +@@ -215,7 +215,7 @@ void dev_mc_discard(struct net_device *d + } + dev->mc_count = 0; + +- spin_unlock_bh(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + } + + #ifdef CONFIG_PROC_FS +@@ -250,7 +250,7 @@ static int dev_mc_seq_show(struct seq_fi + struct dev_mc_list *m; + struct net_device *dev = v; + +- spin_lock_bh(&dev->xmit_lock); ++ netif_tx_lock_bh(dev); + for (m = dev->mc_list; m; m = m->next) { + int i; + +@@ -262,7 +262,7 @@ static int dev_mc_seq_show(struct seq_fi + + seq_putc(seq, '\n'); + } +- spin_unlock_bh(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + return 0; + } + +diff --git a/net/core/ethtool.c b/net/core/ethtool.c +index e6f7610..27ce168 100644 +--- a/net/core/ethtool.c ++++ b/net/core/ethtool.c +@@ -30,7 +30,7 @@ u32 ethtool_op_get_link(struct net_devic + + u32 ethtool_op_get_tx_csum(struct net_device *dev) + { +- return (dev->features & (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM)) != 0; ++ return (dev->features & NETIF_F_ALL_CSUM) != 0; + } + + int ethtool_op_set_tx_csum(struct net_device *dev, u32 data) +@@ -551,9 +551,7 @@ static int ethtool_set_sg(struct net_dev + return -EFAULT; + + if (edata.data && +- !(dev->features & (NETIF_F_IP_CSUM | +- NETIF_F_NO_CSUM | +- NETIF_F_HW_CSUM))) ++ !(dev->features & NETIF_F_ALL_CSUM)) + return -EINVAL; + + return __ethtool_set_sg(dev, edata.data); +@@ -591,7 +589,7 @@ static int ethtool_set_tso(struct net_de + + static int ethtool_get_ufo(struct net_device *dev, char __user *useraddr) + { +- struct ethtool_value edata = { ETHTOOL_GTSO }; ++ struct ethtool_value edata = { ETHTOOL_GUFO }; + + if (!dev->ethtool_ops->get_ufo) + return -EOPNOTSUPP; +@@ -600,6 +598,7 @@ static int ethtool_get_ufo(struct net_de + return -EFAULT; + return 0; + } ++ + static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr) + { + struct ethtool_value edata; +@@ -615,6 +614,29 @@ static int ethtool_set_ufo(struct net_de + return dev->ethtool_ops->set_ufo(dev, edata.data); + } + ++static int ethtool_get_gso(struct net_device *dev, char __user *useraddr) ++{ ++ struct ethtool_value edata = { ETHTOOL_GGSO }; ++ ++ edata.data = dev->features & NETIF_F_GSO; ++ if (copy_to_user(useraddr, &edata, sizeof(edata))) ++ return -EFAULT; ++ return 0; ++} ++ ++static int ethtool_set_gso(struct net_device *dev, char __user *useraddr) ++{ ++ struct ethtool_value edata; ++ ++ if (copy_from_user(&edata, useraddr, sizeof(edata))) ++ return -EFAULT; ++ if (edata.data) ++ dev->features |= NETIF_F_GSO; ++ else ++ dev->features &= ~NETIF_F_GSO; ++ return 0; ++} ++ + static int ethtool_self_test(struct net_device *dev, char __user *useraddr) + { + struct ethtool_test test; +@@ -906,6 +928,12 @@ int dev_ethtool(struct ifreq *ifr) + case ETHTOOL_SUFO: + rc = ethtool_set_ufo(dev, useraddr); + break; ++ case ETHTOOL_GGSO: ++ rc = ethtool_get_gso(dev, useraddr); ++ break; ++ case ETHTOOL_SGSO: ++ rc = ethtool_set_gso(dev, useraddr); ++ break; + default: + rc = -EOPNOTSUPP; + } +diff --git a/net/core/netpoll.c b/net/core/netpoll.c +index ea51f8d..ec28d3b 100644 +--- a/net/core/netpoll.c ++++ b/net/core/netpoll.c +@@ -273,24 +273,21 @@ static void netpoll_send_skb(struct netp + + do { + npinfo->tries--; +- spin_lock(&np->dev->xmit_lock); +- np->dev->xmit_lock_owner = smp_processor_id(); ++ netif_tx_lock(np->dev); + + /* + * network drivers do not expect to be called if the queue is + * stopped. + */ + if (netif_queue_stopped(np->dev)) { +- np->dev->xmit_lock_owner = -1; +- spin_unlock(&np->dev->xmit_lock); ++ netif_tx_unlock(np->dev); + netpoll_poll(np); + udelay(50); + continue; + } + + status = np->dev->hard_start_xmit(skb, np->dev); +- np->dev->xmit_lock_owner = -1; +- spin_unlock(&np->dev->xmit_lock); ++ netif_tx_unlock(np->dev); + + /* success */ + if(!status) { +diff --git a/net/core/pktgen.c b/net/core/pktgen.c +index da16f8f..2380347 100644 +--- a/net/core/pktgen.c ++++ b/net/core/pktgen.c +@@ -2582,7 +2582,7 @@ static __inline__ void pktgen_xmit(struc + } + } + +- spin_lock_bh(&odev->xmit_lock); ++ netif_tx_lock_bh(odev); + if (!netif_queue_stopped(odev)) { + + atomic_inc(&(pkt_dev->skb->users)); +@@ -2627,7 +2627,7 @@ retry_now: + pkt_dev->next_tx_ns = 0; + } + +- spin_unlock_bh(&odev->xmit_lock); ++ netif_tx_unlock_bh(odev); + + /* If pkt_dev->count is zero, then run forever */ + if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) { +diff --git a/net/core/skbuff.c b/net/core/skbuff.c +index 2144952..46f56af 100644 +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -164,9 +164,9 @@ struct sk_buff *__alloc_skb(unsigned int + shinfo = skb_shinfo(skb); + atomic_set(&shinfo->dataref, 1); + shinfo->nr_frags = 0; +- shinfo->tso_size = 0; +- shinfo->tso_segs = 0; +- shinfo->ufo_size = 0; ++ shinfo->gso_size = 0; ++ shinfo->gso_segs = 0; ++ shinfo->gso_type = 0; + shinfo->ip6_frag_id = 0; + shinfo->frag_list = NULL; + +@@ -230,8 +230,9 @@ struct sk_buff *alloc_skb_from_cache(kme + + atomic_set(&(skb_shinfo(skb)->dataref), 1); + skb_shinfo(skb)->nr_frags = 0; +- skb_shinfo(skb)->tso_size = 0; +- skb_shinfo(skb)->tso_segs = 0; ++ skb_shinfo(skb)->gso_size = 0; ++ skb_shinfo(skb)->gso_segs = 0; ++ skb_shinfo(skb)->gso_type = 0; + skb_shinfo(skb)->frag_list = NULL; + out: + return skb; +@@ -501,8 +502,9 @@ #endif + new->tc_index = old->tc_index; + #endif + atomic_set(&new->users, 1); +- skb_shinfo(new)->tso_size = skb_shinfo(old)->tso_size; +- skb_shinfo(new)->tso_segs = skb_shinfo(old)->tso_segs; ++ skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; ++ skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; ++ skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; + } + + /** +@@ -1777,6 +1779,133 @@ int skb_append_datato_frags(struct sock + return 0; + } + ++/** ++ * skb_segment - Perform protocol segmentation on skb. ++ * @skb: buffer to segment ++ * @features: features for the output path (see dev->features) ++ * ++ * This function performs segmentation on the given skb. It returns ++ * the segment at the given position. It returns NULL if there are ++ * no more segments to generate, or when an error is encountered. ++ */ ++struct sk_buff *skb_segment(struct sk_buff *skb, int features) ++{ ++ struct sk_buff *segs = NULL; ++ struct sk_buff *tail = NULL; ++ unsigned int mss = skb_shinfo(skb)->gso_size; ++ unsigned int doffset = skb->data - skb->mac.raw; ++ unsigned int offset = doffset; ++ unsigned int headroom; ++ unsigned int len; ++ int sg = features & NETIF_F_SG; ++ int nfrags = skb_shinfo(skb)->nr_frags; ++ int err = -ENOMEM; ++ int i = 0; ++ int pos; ++ ++ __skb_push(skb, doffset); ++ headroom = skb_headroom(skb); ++ pos = skb_headlen(skb); ++ ++ do { ++ struct sk_buff *nskb; ++ skb_frag_t *frag; ++ int hsize, nsize; ++ int k; ++ int size; ++ ++ len = skb->len - offset; ++ if (len > mss) ++ len = mss; ++ ++ hsize = skb_headlen(skb) - offset; ++ if (hsize < 0) ++ hsize = 0; ++ nsize = hsize + doffset; ++ if (nsize > len + doffset || !sg) ++ nsize = len + doffset; ++ ++ nskb = alloc_skb(nsize + headroom, GFP_ATOMIC); ++ if (unlikely(!nskb)) ++ goto err; ++ ++ if (segs) ++ tail->next = nskb; ++ else ++ segs = nskb; ++ tail = nskb; ++ ++ nskb->dev = skb->dev; ++ nskb->priority = skb->priority; ++ nskb->protocol = skb->protocol; ++ nskb->dst = dst_clone(skb->dst); ++ memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); ++ nskb->pkt_type = skb->pkt_type; ++ nskb->mac_len = skb->mac_len; ++ ++ skb_reserve(nskb, headroom); ++ nskb->mac.raw = nskb->data; ++ nskb->nh.raw = nskb->data + skb->mac_len; ++ nskb->h.raw = nskb->nh.raw + (skb->h.raw - skb->nh.raw); ++ memcpy(skb_put(nskb, doffset), skb->data, doffset); ++ ++ if (!sg) { ++ nskb->csum = skb_copy_and_csum_bits(skb, offset, ++ skb_put(nskb, len), ++ len, 0); ++ continue; ++ } ++ ++ frag = skb_shinfo(nskb)->frags; ++ k = 0; ++ ++ nskb->ip_summed = CHECKSUM_HW; ++ nskb->csum = skb->csum; ++ memcpy(skb_put(nskb, hsize), skb->data + offset, hsize); ++ ++ while (pos < offset + len) { ++ BUG_ON(i >= nfrags); ++ ++ *frag = skb_shinfo(skb)->frags[i]; ++ get_page(frag->page); ++ size = frag->size; ++ ++ if (pos < offset) { ++ frag->page_offset += offset - pos; ++ frag->size -= offset - pos; ++ } ++ ++ k++; ++ ++ if (pos + size <= offset + len) { ++ i++; ++ pos += size; ++ } else { ++ frag->size -= pos + size - (offset + len); ++ break; ++ } ++ ++ frag++; ++ } ++ ++ skb_shinfo(nskb)->nr_frags = k; ++ nskb->data_len = len - hsize; ++ nskb->len += nskb->data_len; ++ nskb->truesize += nskb->data_len; ++ } while ((offset += len) < skb->len); ++ ++ return segs; ++ ++err: ++ while ((skb = segs)) { ++ segs = skb->next; ++ kfree(skb); ++ } ++ return ERR_PTR(err); ++} ++ ++EXPORT_SYMBOL_GPL(skb_segment); ++ + void __init skb_init(void) + { + skbuff_head_cache = kmem_cache_create("skbuff_head_cache", +diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c +index 44bda85..2e3323a 100644 +--- a/net/decnet/dn_nsp_in.c ++++ b/net/decnet/dn_nsp_in.c +@@ -801,8 +801,7 @@ got_it: + * We linearize everything except data segments here. + */ + if (cb->nsp_flags & ~0x60) { +- if (unlikely(skb_is_nonlinear(skb)) && +- skb_linearize(skb, GFP_ATOMIC) != 0) ++ if (unlikely(skb_linearize(skb))) + goto free_out; + } + +diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c +index 3407f19..a0a25e0 100644 +--- a/net/decnet/dn_route.c ++++ b/net/decnet/dn_route.c +@@ -629,8 +629,7 @@ int dn_route_rcv(struct sk_buff *skb, st + padlen); + + if (flags & DN_RT_PKT_CNTL) { +- if (unlikely(skb_is_nonlinear(skb)) && +- skb_linearize(skb, GFP_ATOMIC) != 0) ++ if (unlikely(skb_linearize(skb))) + goto dump_it; + + switch(flags & DN_RT_CNTL_MSK) { +diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c +index 97c276f..5ba719e 100644 +--- a/net/ipv4/af_inet.c ++++ b/net/ipv4/af_inet.c +@@ -68,6 +68,7 @@ + */ + + #include <linux/config.h> ++#include <linux/err.h> + #include <linux/errno.h> + #include <linux/types.h> + #include <linux/socket.h> +@@ -1084,6 +1085,54 @@ int inet_sk_rebuild_header(struct sock * + + EXPORT_SYMBOL(inet_sk_rebuild_header); + ++static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) ++{ ++ struct sk_buff *segs = ERR_PTR(-EINVAL); ++ struct iphdr *iph; ++ struct net_protocol *ops; ++ int proto; ++ int ihl; ++ int id; ++ ++ if (!pskb_may_pull(skb, sizeof(*iph))) ++ goto out; ++ ++ iph = skb->nh.iph; ++ ihl = iph->ihl * 4; ++ if (ihl < sizeof(*iph)) ++ goto out; ++ ++ if (!pskb_may_pull(skb, ihl)) ++ goto out; ++ ++ skb->h.raw = __skb_pull(skb, ihl); ++ iph = skb->nh.iph; ++ id = ntohs(iph->id); ++ proto = iph->protocol & (MAX_INET_PROTOS - 1); ++ segs = ERR_PTR(-EPROTONOSUPPORT); ++ ++ rcu_read_lock(); ++ ops = rcu_dereference(inet_protos[proto]); ++ if (ops && ops->gso_segment) ++ segs = ops->gso_segment(skb, features); ++ rcu_read_unlock(); ++ ++ if (!segs || unlikely(IS_ERR(segs))) ++ goto out; ++ ++ skb = segs; ++ do { ++ iph = skb->nh.iph; ++ iph->id = htons(id++); ++ iph->tot_len = htons(skb->len - skb->mac_len); ++ iph->check = 0; ++ iph->check = ip_fast_csum(skb->nh.raw, iph->ihl); ++ } while ((skb = skb->next)); ++ ++out: ++ return segs; ++} ++ + #ifdef CONFIG_IP_MULTICAST + static struct net_protocol igmp_protocol = { + .handler = igmp_rcv, +@@ -1093,6 +1142,7 @@ #endif + static struct net_protocol tcp_protocol = { + .handler = tcp_v4_rcv, + .err_handler = tcp_v4_err, ++ .gso_segment = tcp_tso_segment, + .no_policy = 1, + }; + +@@ -1138,6 +1188,7 @@ static int ipv4_proc_init(void); + static struct packet_type ip_packet_type = { + .type = __constant_htons(ETH_P_IP), + .func = ip_rcv, ++ .gso_segment = inet_gso_segment, + }; + + static int __init inet_init(void) +diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c +index 8dcba38..19c3c73 100644 +--- a/net/ipv4/ip_output.c ++++ b/net/ipv4/ip_output.c +@@ -210,8 +210,7 @@ #if defined(CONFIG_NETFILTER) && defined + return dst_output(skb); + } + #endif +- if (skb->len > dst_mtu(skb->dst) && +- !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size)) ++ if (skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->gso_size) + return ip_fragment(skb, ip_finish_output2); + else + return ip_finish_output2(skb); +@@ -362,7 +361,7 @@ packet_routed: + } + + ip_select_ident_more(iph, &rt->u.dst, sk, +- (skb_shinfo(skb)->tso_segs ?: 1) - 1); ++ (skb_shinfo(skb)->gso_segs ?: 1) - 1); + + /* Add an IP checksum. */ + ip_send_check(iph); +@@ -743,7 +742,8 @@ static inline int ip_ufo_append_data(str + (length - transhdrlen)); + if (!err) { + /* specify the length of each IP datagram fragment*/ +- skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen); ++ skb_shinfo(skb)->gso_size = mtu - fragheaderlen; ++ skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4; + __skb_queue_tail(&sk->sk_write_queue, skb); + + return 0; +@@ -839,7 +839,7 @@ int ip_append_data(struct sock *sk, + */ + if (transhdrlen && + length + fragheaderlen <= mtu && +- rt->u.dst.dev->features&(NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM) && ++ rt->u.dst.dev->features & NETIF_F_ALL_CSUM && + !exthdrlen) + csummode = CHECKSUM_HW; + +@@ -1086,14 +1086,16 @@ ssize_t ip_append_page(struct sock *sk, + + inet->cork.length += size; + if ((sk->sk_protocol == IPPROTO_UDP) && +- (rt->u.dst.dev->features & NETIF_F_UFO)) +- skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen); ++ (rt->u.dst.dev->features & NETIF_F_UFO)) { ++ skb_shinfo(skb)->gso_size = mtu - fragheaderlen; ++ skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4; ++ } + + + while (size > 0) { + int i; + +- if (skb_shinfo(skb)->ufo_size) ++ if (skb_shinfo(skb)->gso_size) + len = size; + else { + +diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c +index d64e2ec..7494823 100644 +--- a/net/ipv4/ipcomp.c ++++ b/net/ipv4/ipcomp.c +@@ -84,7 +84,7 @@ static int ipcomp_input(struct xfrm_stat + struct xfrm_decap_state *decap, struct sk_buff *skb) + { + u8 nexthdr; +- int err = 0; ++ int err = -ENOMEM; + struct iphdr *iph; + union { + struct iphdr iph; +@@ -92,11 +92,8 @@ static int ipcomp_input(struct xfrm_stat + } tmp_iph; + + +- if ((skb_is_nonlinear(skb) || skb_cloned(skb)) && +- skb_linearize(skb, GFP_ATOMIC) != 0) { +- err = -ENOMEM; ++ if (skb_linearize_cow(skb)) + goto out; +- } + + skb->ip_summed = CHECKSUM_NONE; + +@@ -171,10 +168,8 @@ static int ipcomp_output(struct xfrm_sta + goto out_ok; + } + +- if ((skb_is_nonlinear(skb) || skb_cloned(skb)) && +- skb_linearize(skb, GFP_ATOMIC) != 0) { ++ if (skb_linearize_cow(skb)) + goto out_ok; +- } + + err = ipcomp_compress(x, skb); + iph = skb->nh.iph; +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index 00aa80e..84130c9 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -257,6 +257,7 @@ #include <linux/smp_lock.h> + #include <linux/fs.h> + #include <linux/random.h> + #include <linux/bootmem.h> ++#include <linux/err.h> + + #include <net/icmp.h> + #include <net/tcp.h> +@@ -570,7 +571,7 @@ new_segment: + skb->ip_summed = CHECKSUM_HW; + tp->write_seq += copy; + TCP_SKB_CB(skb)->end_seq += copy; +- skb_shinfo(skb)->tso_segs = 0; ++ skb_shinfo(skb)->gso_segs = 0; + + if (!copied) + TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH; +@@ -621,14 +622,10 @@ ssize_t tcp_sendpage(struct socket *sock + ssize_t res; + struct sock *sk = sock->sk; + +-#define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM) +- + if (!(sk->sk_route_caps & NETIF_F_SG) || +- !(sk->sk_route_caps & TCP_ZC_CSUM_FLAGS)) ++ !(sk->sk_route_caps & NETIF_F_ALL_CSUM)) + return sock_no_sendpage(sock, page, offset, size, flags); + +-#undef TCP_ZC_CSUM_FLAGS +- + lock_sock(sk); + TCP_CHECK_TIMER(sk); + res = do_tcp_sendpages(sk, &page, offset, size, flags); +@@ -725,9 +722,7 @@ new_segment: + /* + * Check whether we can use HW checksum. + */ +- if (sk->sk_route_caps & +- (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | +- NETIF_F_HW_CSUM)) ++ if (sk->sk_route_caps & NETIF_F_ALL_CSUM) + skb->ip_summed = CHECKSUM_HW; + + skb_entail(sk, tp, skb); +@@ -823,7 +818,7 @@ new_segment: + + tp->write_seq += copy; + TCP_SKB_CB(skb)->end_seq += copy; +- skb_shinfo(skb)->tso_segs = 0; ++ skb_shinfo(skb)->gso_segs = 0; + + from += copy; + copied += copy; +@@ -2026,6 +2021,71 @@ int tcp_getsockopt(struct sock *sk, int + } + + ++struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) ++{ ++ struct sk_buff *segs = ERR_PTR(-EINVAL); ++ struct tcphdr *th; ++ unsigned thlen; ++ unsigned int seq; ++ unsigned int delta; ++ unsigned int oldlen; ++ unsigned int len; ++ ++ if (!pskb_may_pull(skb, sizeof(*th))) ++ goto out; ++ ++ th = skb->h.th; ++ thlen = th->doff * 4; ++ if (thlen < sizeof(*th)) ++ goto out; ++ ++ if (!pskb_may_pull(skb, thlen)) ++ goto out; ++ ++ segs = NULL; ++ if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) ++ goto out; ++ ++ oldlen = (u16)~skb->len; ++ __skb_pull(skb, thlen); ++ ++ segs = skb_segment(skb, features); ++ if (IS_ERR(segs)) ++ goto out; ++ ++ len = skb_shinfo(skb)->gso_size; ++ delta = htonl(oldlen + (thlen + len)); ++ ++ skb = segs; ++ th = skb->h.th; ++ seq = ntohl(th->seq); ++ ++ do { ++ th->fin = th->psh = 0; ++ ++ th->check = ~csum_fold(th->check + delta); ++ if (skb->ip_summed != CHECKSUM_HW) ++ th->check = csum_fold(csum_partial(skb->h.raw, thlen, ++ skb->csum)); ++ ++ seq += len; ++ skb = skb->next; ++ th = skb->h.th; ++ ++ th->seq = htonl(seq); ++ th->cwr = 0; ++ } while (skb->next); ++ ++ delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len); ++ th->check = ~csum_fold(th->check + delta); ++ if (skb->ip_summed != CHECKSUM_HW) ++ th->check = csum_fold(csum_partial(skb->h.raw, thlen, ++ skb->csum)); ++ ++out: ++ return segs; ++} ++ + extern void __skb_cb_too_small_for_tcp(int, int); + extern struct tcp_congestion_ops tcp_reno; + +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index e9a54ae..defe77a 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -1072,7 +1072,7 @@ tcp_sacktag_write_queue(struct sock *sk, + else + pkt_len = (end_seq - + TCP_SKB_CB(skb)->seq); +- if (tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->tso_size)) ++ if (tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->gso_size)) + break; + pcount = tcp_skb_pcount(skb); + } +diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c +index 310f2e6..ee01f69 100644 +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -497,15 +497,17 @@ static void tcp_set_skb_tso_segs(struct + /* Avoid the costly divide in the normal + * non-TSO case. + */ +- skb_shinfo(skb)->tso_segs = 1; +- skb_shinfo(skb)->tso_size = 0; ++ skb_shinfo(skb)->gso_segs = 1; ++ skb_shinfo(skb)->gso_size = 0; ++ skb_shinfo(skb)->gso_type = 0; + } else { + unsigned int factor; + + factor = skb->len + (mss_now - 1); + factor /= mss_now; +- skb_shinfo(skb)->tso_segs = factor; +- skb_shinfo(skb)->tso_size = mss_now; ++ skb_shinfo(skb)->gso_segs = factor; ++ skb_shinfo(skb)->gso_size = mss_now; ++ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + } + } + +@@ -850,7 +852,7 @@ static int tcp_init_tso_segs(struct sock + + if (!tso_segs || + (tso_segs > 1 && +- skb_shinfo(skb)->tso_size != mss_now)) { ++ tcp_skb_mss(skb) != mss_now)) { + tcp_set_skb_tso_segs(sk, skb, mss_now); + tso_segs = tcp_skb_pcount(skb); + } +@@ -1510,8 +1512,9 @@ int tcp_retransmit_skb(struct sock *sk, + tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { + if (!pskb_trim(skb, 0)) { + TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1; +- skb_shinfo(skb)->tso_segs = 1; +- skb_shinfo(skb)->tso_size = 0; ++ skb_shinfo(skb)->gso_segs = 1; ++ skb_shinfo(skb)->gso_size = 0; ++ skb_shinfo(skb)->gso_type = 0; + skb->ip_summed = CHECKSUM_NONE; + skb->csum = 0; + } +@@ -1716,8 +1719,9 @@ void tcp_send_fin(struct sock *sk) + skb->csum = 0; + TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN); + TCP_SKB_CB(skb)->sacked = 0; +- skb_shinfo(skb)->tso_segs = 1; +- skb_shinfo(skb)->tso_size = 0; ++ skb_shinfo(skb)->gso_segs = 1; ++ skb_shinfo(skb)->gso_size = 0; ++ skb_shinfo(skb)->gso_type = 0; + + /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ + TCP_SKB_CB(skb)->seq = tp->write_seq; +@@ -1749,8 +1753,9 @@ void tcp_send_active_reset(struct sock * + skb->csum = 0; + TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST); + TCP_SKB_CB(skb)->sacked = 0; +- skb_shinfo(skb)->tso_segs = 1; +- skb_shinfo(skb)->tso_size = 0; ++ skb_shinfo(skb)->gso_segs = 1; ++ skb_shinfo(skb)->gso_size = 0; ++ skb_shinfo(skb)->gso_type = 0; + + /* Send it off. */ + TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp); +@@ -1833,8 +1838,9 @@ struct sk_buff * tcp_make_synack(struct + TCP_SKB_CB(skb)->seq = tcp_rsk(req)->snt_isn; + TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; + TCP_SKB_CB(skb)->sacked = 0; +- skb_shinfo(skb)->tso_segs = 1; +- skb_shinfo(skb)->tso_size = 0; ++ skb_shinfo(skb)->gso_segs = 1; ++ skb_shinfo(skb)->gso_size = 0; ++ skb_shinfo(skb)->gso_type = 0; + th->seq = htonl(TCP_SKB_CB(skb)->seq); + th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1); + if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ +@@ -1937,8 +1943,9 @@ int tcp_connect(struct sock *sk) + TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN; + TCP_ECN_send_syn(sk, tp, buff); + TCP_SKB_CB(buff)->sacked = 0; +- skb_shinfo(buff)->tso_segs = 1; +- skb_shinfo(buff)->tso_size = 0; ++ skb_shinfo(buff)->gso_segs = 1; ++ skb_shinfo(buff)->gso_size = 0; ++ skb_shinfo(buff)->gso_type = 0; + buff->csum = 0; + TCP_SKB_CB(buff)->seq = tp->write_seq++; + TCP_SKB_CB(buff)->end_seq = tp->write_seq; +@@ -2042,8 +2049,9 @@ void tcp_send_ack(struct sock *sk) + buff->csum = 0; + TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK; + TCP_SKB_CB(buff)->sacked = 0; +- skb_shinfo(buff)->tso_segs = 1; +- skb_shinfo(buff)->tso_size = 0; ++ skb_shinfo(buff)->gso_segs = 1; ++ skb_shinfo(buff)->gso_size = 0; ++ skb_shinfo(buff)->gso_type = 0; + + /* Send it off, this clears delayed acks for us. */ + TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp); +@@ -2078,8 +2086,9 @@ static int tcp_xmit_probe_skb(struct soc + skb->csum = 0; + TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK; + TCP_SKB_CB(skb)->sacked = urgent; +- skb_shinfo(skb)->tso_segs = 1; +- skb_shinfo(skb)->tso_size = 0; ++ skb_shinfo(skb)->gso_segs = 1; ++ skb_shinfo(skb)->gso_size = 0; ++ skb_shinfo(skb)->gso_type = 0; + + /* Use a previous sequence. This should cause the other + * end to send an ack. Don't queue or clone SKB, just +diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c +index 32ad229..737c1db 100644 +--- a/net/ipv4/xfrm4_output.c ++++ b/net/ipv4/xfrm4_output.c +@@ -9,6 +9,8 @@ + */ + + #include <linux/compiler.h> ++#include <linux/if_ether.h> ++#include <linux/kernel.h> + #include <linux/skbuff.h> + #include <linux/spinlock.h> + #include <linux/netfilter_ipv4.h> +@@ -152,16 +154,10 @@ error_nolock: + goto out_exit; + } + +-static int xfrm4_output_finish(struct sk_buff *skb) ++static int xfrm4_output_finish2(struct sk_buff *skb) + { + int err; + +-#ifdef CONFIG_NETFILTER +- if (!skb->dst->xfrm) { +- IPCB(skb)->flags |= IPSKB_REROUTED; +- return dst_output(skb); +- } +-#endif + while (likely((err = xfrm4_output_one(skb)) == 0)) { + nf_reset(skb); + +@@ -174,7 +170,7 @@ #endif + return dst_output(skb); + + err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL, +- skb->dst->dev, xfrm4_output_finish); ++ skb->dst->dev, xfrm4_output_finish2); + if (unlikely(err != 1)) + break; + } +@@ -182,6 +178,48 @@ #endif + return err; + } + ++static int xfrm4_output_finish(struct sk_buff *skb) ++{ ++ struct sk_buff *segs; ++ ++#ifdef CONFIG_NETFILTER ++ if (!skb->dst->xfrm) { ++ IPCB(skb)->flags |= IPSKB_REROUTED; ++ return dst_output(skb); ++ } ++#endif ++ ++ if (!skb_shinfo(skb)->gso_size) ++ return xfrm4_output_finish2(skb); ++ ++ skb->protocol = htons(ETH_P_IP); ++ segs = skb_gso_segment(skb, 0); ++ kfree_skb(skb); ++ if (unlikely(IS_ERR(segs))) ++ return PTR_ERR(segs); ++ ++ do { ++ struct sk_buff *nskb = segs->next; ++ int err; ++ ++ segs->next = NULL; ++ err = xfrm4_output_finish2(segs); ++ ++ if (unlikely(err)) { ++ while ((segs = nskb)) { ++ nskb = segs->next; ++ segs->next = NULL; ++ kfree_skb(segs); ++ } ++ return err; ++ } ++ ++ segs = nskb; ++ } while (segs); ++ ++ return 0; ++} ++ + int xfrm4_output(struct sk_buff *skb) + { + return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev, +diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c +index 5bf70b1..cf5d17e 100644 +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -147,7 +147,7 @@ static int ip6_output2(struct sk_buff *s + + int ip6_output(struct sk_buff *skb) + { +- if ((skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->ufo_size) || ++ if ((skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->gso_size) || + dst_allfrag(skb->dst)) + return ip6_fragment(skb, ip6_output2); + else +@@ -829,8 +829,9 @@ static inline int ip6_ufo_append_data(st + struct frag_hdr fhdr; + + /* specify the length of each IP datagram fragment*/ +- skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen) - +- sizeof(struct frag_hdr); ++ skb_shinfo(skb)->gso_size = mtu - fragheaderlen - ++ sizeof(struct frag_hdr); ++ skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4; + ipv6_select_ident(skb, &fhdr); + skb_shinfo(skb)->ip6_frag_id = fhdr.identification; + __skb_queue_tail(&sk->sk_write_queue, skb); +diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c +index d511a88..ef56d5d 100644 +--- a/net/ipv6/ipcomp6.c ++++ b/net/ipv6/ipcomp6.c +@@ -64,7 +64,7 @@ static LIST_HEAD(ipcomp6_tfms_list); + + static int ipcomp6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb) + { +- int err = 0; ++ int err = -ENOMEM; + u8 nexthdr = 0; + int hdr_len = skb->h.raw - skb->nh.raw; + unsigned char *tmp_hdr = NULL; +@@ -75,11 +75,8 @@ static int ipcomp6_input(struct xfrm_sta + struct crypto_tfm *tfm; + int cpu; + +- if ((skb_is_nonlinear(skb) || skb_cloned(skb)) && +- skb_linearize(skb, GFP_ATOMIC) != 0) { +- err = -ENOMEM; ++ if (skb_linearize_cow(skb)) + goto out; +- } + + skb->ip_summed = CHECKSUM_NONE; + +@@ -158,10 +155,8 @@ static int ipcomp6_output(struct xfrm_st + goto out_ok; + } + +- if ((skb_is_nonlinear(skb) || skb_cloned(skb)) && +- skb_linearize(skb, GFP_ATOMIC) != 0) { ++ if (skb_linearize_cow(skb)) + goto out_ok; +- } + + /* compression */ + plen = skb->len - hdr_len; +diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c +index 8024217..39bdeec 100644 +--- a/net/ipv6/xfrm6_output.c ++++ b/net/ipv6/xfrm6_output.c +@@ -151,7 +151,7 @@ error_nolock: + goto out_exit; + } + +-static int xfrm6_output_finish(struct sk_buff *skb) ++static int xfrm6_output_finish2(struct sk_buff *skb) + { + int err; + +@@ -167,7 +167,7 @@ static int xfrm6_output_finish(struct sk + return dst_output(skb); + + err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, &skb, NULL, +- skb->dst->dev, xfrm6_output_finish); ++ skb->dst->dev, xfrm6_output_finish2); + if (unlikely(err != 1)) + break; + } +@@ -175,6 +175,41 @@ static int xfrm6_output_finish(struct sk + return err; + } + ++static int xfrm6_output_finish(struct sk_buff *skb) ++{ ++ struct sk_buff *segs; ++ ++ if (!skb_shinfo(skb)->gso_size) ++ return xfrm6_output_finish2(skb); ++ ++ skb->protocol = htons(ETH_P_IP); ++ segs = skb_gso_segment(skb, 0); ++ kfree_skb(skb); ++ if (unlikely(IS_ERR(segs))) ++ return PTR_ERR(segs); ++ ++ do { ++ struct sk_buff *nskb = segs->next; ++ int err; ++ ++ segs->next = NULL; ++ err = xfrm6_output_finish2(segs); ++ ++ if (unlikely(err)) { ++ while ((segs = nskb)) { ++ nskb = segs->next; ++ segs->next = NULL; ++ kfree_skb(segs); ++ } ++ return err; ++ } ++ ++ segs = nskb; ++ } while (segs); ++ ++ return 0; ++} ++ + int xfrm6_output(struct sk_buff *skb) + { + return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dst->dev, +diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c +index 99ceb91..28c9efd 100644 +--- a/net/sched/sch_generic.c ++++ b/net/sched/sch_generic.c +@@ -72,9 +72,9 @@ void qdisc_unlock_tree(struct net_device + dev->queue_lock serializes queue accesses for this device + AND dev->qdisc pointer itself. + +- dev->xmit_lock serializes accesses to device driver. ++ netif_tx_lock serializes accesses to device driver. + +- dev->queue_lock and dev->xmit_lock are mutually exclusive, ++ dev->queue_lock and netif_tx_lock are mutually exclusive, + if one is grabbed, another must be free. + */ + +@@ -90,14 +90,17 @@ void qdisc_unlock_tree(struct net_device + NOTE: Called under dev->queue_lock with locally disabled BH. + */ + +-int qdisc_restart(struct net_device *dev) ++static inline int qdisc_restart(struct net_device *dev) + { + struct Qdisc *q = dev->qdisc; + struct sk_buff *skb; + + /* Dequeue packet */ +- if ((skb = q->dequeue(q)) != NULL) { ++ if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) { + unsigned nolock = (dev->features & NETIF_F_LLTX); ++ ++ dev->gso_skb = NULL; ++ + /* + * When the driver has LLTX set it does its own locking + * in start_xmit. No need to add additional overhead by +@@ -108,7 +111,7 @@ int qdisc_restart(struct net_device *dev + * will be requeued. + */ + if (!nolock) { +- if (!spin_trylock(&dev->xmit_lock)) { ++ if (!netif_tx_trylock(dev)) { + collision: + /* So, someone grabbed the driver. */ + +@@ -126,8 +129,6 @@ int qdisc_restart(struct net_device *dev + __get_cpu_var(netdev_rx_stat).cpu_collision++; + goto requeue; + } +- /* Remember that the driver is grabbed by us. */ +- dev->xmit_lock_owner = smp_processor_id(); + } + + { +@@ -136,14 +137,11 @@ int qdisc_restart(struct net_device *dev + + if (!netif_queue_stopped(dev)) { + int ret; +- if (netdev_nit) +- dev_queue_xmit_nit(skb, dev); + +- ret = dev->hard_start_xmit(skb, dev); ++ ret = dev_hard_start_xmit(skb, dev); + if (ret == NETDEV_TX_OK) { + if (!nolock) { +- dev->xmit_lock_owner = -1; +- spin_unlock(&dev->xmit_lock); ++ netif_tx_unlock(dev); + } + spin_lock(&dev->queue_lock); + return -1; +@@ -157,8 +155,7 @@ int qdisc_restart(struct net_device *dev + /* NETDEV_TX_BUSY - we need to requeue */ + /* Release the driver */ + if (!nolock) { +- dev->xmit_lock_owner = -1; +- spin_unlock(&dev->xmit_lock); ++ netif_tx_unlock(dev); + } + spin_lock(&dev->queue_lock); + q = dev->qdisc; +@@ -175,7 +172,10 @@ int qdisc_restart(struct net_device *dev + */ + + requeue: +- q->ops->requeue(skb, q); ++ if (skb->next) ++ dev->gso_skb = skb; ++ else ++ q->ops->requeue(skb, q); + netif_schedule(dev); + return 1; + } +@@ -183,11 +183,23 @@ requeue: + return q->q.qlen; + } + ++void __qdisc_run(struct net_device *dev) ++{ ++ if (unlikely(dev->qdisc == &noop_qdisc)) ++ goto out; ++ ++ while (qdisc_restart(dev) < 0 && !netif_queue_stopped(dev)) ++ /* NOTHING */; ++ ++out: ++ clear_bit(__LINK_STATE_QDISC_RUNNING, &dev->state); ++} ++ + static void dev_watchdog(unsigned long arg) + { + struct net_device *dev = (struct net_device *)arg; + +- spin_lock(&dev->xmit_lock); ++ netif_tx_lock(dev); + if (dev->qdisc != &noop_qdisc) { + if (netif_device_present(dev) && + netif_running(dev) && +@@ -201,7 +213,7 @@ static void dev_watchdog(unsigned long a + dev_hold(dev); + } + } +- spin_unlock(&dev->xmit_lock); ++ netif_tx_unlock(dev); + + dev_put(dev); + } +@@ -225,17 +237,17 @@ void __netdev_watchdog_up(struct net_dev + + static void dev_watchdog_up(struct net_device *dev) + { +- spin_lock_bh(&dev->xmit_lock); ++ netif_tx_lock_bh(dev); + __netdev_watchdog_up(dev); +- spin_unlock_bh(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + } + + static void dev_watchdog_down(struct net_device *dev) + { +- spin_lock_bh(&dev->xmit_lock); ++ netif_tx_lock_bh(dev); + if (del_timer(&dev->watchdog_timer)) + __dev_put(dev); +- spin_unlock_bh(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + } + + void netif_carrier_on(struct net_device *dev) +@@ -577,10 +589,17 @@ void dev_deactivate(struct net_device *d + + dev_watchdog_down(dev); + +- while (test_bit(__LINK_STATE_SCHED, &dev->state)) ++ /* Wait for outstanding dev_queue_xmit calls. */ ++ synchronize_rcu(); ++ ++ /* Wait for outstanding qdisc_run calls. */ ++ while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state)) + yield(); + +- spin_unlock_wait(&dev->xmit_lock); ++ if (dev->gso_skb) { ++ kfree_skb(dev->gso_skb); ++ dev->gso_skb = NULL; ++ } + } + + void dev_init_scheduler(struct net_device *dev) +@@ -622,6 +641,5 @@ EXPORT_SYMBOL(qdisc_create_dflt); + EXPORT_SYMBOL(qdisc_alloc); + EXPORT_SYMBOL(qdisc_destroy); + EXPORT_SYMBOL(qdisc_reset); +-EXPORT_SYMBOL(qdisc_restart); + EXPORT_SYMBOL(qdisc_lock_tree); + EXPORT_SYMBOL(qdisc_unlock_tree); +diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c +index 79b8ef3..4c16ad5 100644 +--- a/net/sched/sch_teql.c ++++ b/net/sched/sch_teql.c +@@ -302,20 +302,17 @@ restart: + + switch (teql_resolve(skb, skb_res, slave)) { + case 0: +- if (spin_trylock(&slave->xmit_lock)) { +- slave->xmit_lock_owner = smp_processor_id(); ++ if (netif_tx_trylock(slave)) { + if (!netif_queue_stopped(slave) && + slave->hard_start_xmit(skb, slave) == 0) { +- slave->xmit_lock_owner = -1; +- spin_unlock(&slave->xmit_lock); ++ netif_tx_unlock(slave); + master->slaves = NEXT_SLAVE(q); + netif_wake_queue(dev); + master->stats.tx_packets++; + master->stats.tx_bytes += len; + return 0; + } +- slave->xmit_lock_owner = -1; +- spin_unlock(&slave->xmit_lock); ++ netif_tx_unlock(slave); + } + if (netif_queue_stopped(dev)) + busy = 1; _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.