[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] [LINUX] Update to Linux 2.6.16.32
# HG changeset patch # User Ian Campbell <ian.campbell@xxxxxxxxxxxxx> # Node ID aaaa249e6f3b7b955605746909eb1a09b8b61061 # Parent 447ac06f74d36f148584c9196641868c319e7ae2 [LINUX] Update to Linux 2.6.16.32 Updated patches/linux-2.6.16.32/net-gso-0-base.patch due to changes in net/core/dev.c and net/core/skbuff.c and update the following in the sparse tree: include/linux/skbuff.h mm/memory.c mm/mmap.c Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxxxxx> --- patches/linux-2.6.16.31/blktap-aio-16_03_06.patch | 271 - patches/linux-2.6.16.31/device_bind.patch | 14 patches/linux-2.6.16.31/fix-hz-suspend.patch | 25 patches/linux-2.6.16.31/fix-ide-cd-pio-mode.patch | 13 patches/linux-2.6.16.31/i386-mach-io-check-nmi.patch | 35 patches/linux-2.6.16.31/ipv6-no-autoconf.patch | 18 patches/linux-2.6.16.31/net-csum.patch | 57 patches/linux-2.6.16.31/net-gso-0-base.patch | 2441 ---------- patches/linux-2.6.16.31/net-gso-1-check-dodgy.patch | 22 patches/linux-2.6.16.31/net-gso-2-checksum-fix.patch | 400 - patches/linux-2.6.16.31/net-gso-3-fix-errorcheck.patch | 13 patches/linux-2.6.16.31/net-gso-4-kill-warnon.patch | 16 patches/linux-2.6.16.31/net-gso-5-rcv-mss.patch | 11 patches/linux-2.6.16.31/pci-mmconfig-fix-from-2.6.17.patch | 252 - patches/linux-2.6.16.31/pmd-shared.patch | 100 patches/linux-2.6.16.31/rcu_needs_cpu.patch | 33 patches/linux-2.6.16.31/rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch | 26 patches/linux-2.6.16.31/series | 25 patches/linux-2.6.16.31/smp-alts.patch | 540 -- patches/linux-2.6.16.31/tpm_plugin_2.6.17.patch | 1380 ----- patches/linux-2.6.16.31/x86-elfnote-as-preprocessor-macro.patch | 27 patches/linux-2.6.16.31/x86-increase-interrupt-vector-range.patch | 73 patches/linux-2.6.16.31/x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch | 138 patches/linux-2.6.16.31/x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch | 72 patches/linux-2.6.16.31/xen-hotplug.patch | 10 patches/linux-2.6.16.31/xenoprof-generic.patch | 615 -- buildconfigs/mk.linux-2.6-xen | 2 linux-2.6-xen-sparse/include/linux/skbuff.h | 24 linux-2.6-xen-sparse/mm/memory.c | 1 linux-2.6-xen-sparse/mm/mmap.c | 17 linux-2.6-xen-sparse/net/core/skbuff.c | 125 patches/linux-2.6.16.32/blktap-aio-16_03_06.patch | 161 patches/linux-2.6.16.32/device_bind.patch | 9 patches/linux-2.6.16.32/fix-hz-suspend.patch | 9 patches/linux-2.6.16.32/fix-ide-cd-pio-mode.patch | 13 patches/linux-2.6.16.32/i386-mach-io-check-nmi.patch | 30 patches/linux-2.6.16.32/ipv6-no-autoconf.patch | 16 patches/linux-2.6.16.32/net-csum.patch | 40 patches/linux-2.6.16.32/net-gso-0-base.patch | 1970 ++++++++ patches/linux-2.6.16.32/net-gso-1-check-dodgy.patch | 16 patches/linux-2.6.16.32/net-gso-2-checksum-fix.patch | 311 + patches/linux-2.6.16.32/net-gso-3-fix-errorcheck.patch | 13 patches/linux-2.6.16.32/net-gso-4-kill-warnon.patch | 26 patches/linux-2.6.16.32/net-gso-5-rcv-mss.patch | 11 patches/linux-2.6.16.32/pci-mmconfig-fix-from-2.6.17.patch | 143 patches/linux-2.6.16.32/pmd-shared.patch | 57 patches/linux-2.6.16.32/rcu_needs_cpu.patch | 18 patches/linux-2.6.16.32/rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch | 26 patches/linux-2.6.16.32/series | 25 patches/linux-2.6.16.32/smp-alts.patch | 330 + patches/linux-2.6.16.32/tpm_plugin_2.6.17.patch | 703 ++ patches/linux-2.6.16.32/x86-elfnote-as-preprocessor-macro.patch | 25 patches/linux-2.6.16.32/x86-increase-interrupt-vector-range.patch | 73 patches/linux-2.6.16.32/x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch | 39 patches/linux-2.6.16.32/x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch | 63 patches/linux-2.6.16.32/xen-hotplug.patch | 10 patches/linux-2.6.16.32/xenoprof-generic.patch | 294 + 57 files changed, 4544 insertions(+), 6683 deletions(-) diff -r 447ac06f74d3 -r aaaa249e6f3b buildconfigs/mk.linux-2.6-xen --- a/buildconfigs/mk.linux-2.6-xen Mon Nov 27 13:50:02 2006 +0000 +++ b/buildconfigs/mk.linux-2.6-xen Mon Nov 27 13:50:02 2006 +0000 @@ -1,5 +1,5 @@ LINUX_SERIES = 2.6 LINUX_SERIES = 2.6 -LINUX_VER = 2.6.16.31 +LINUX_VER = 2.6.16.32 EXTRAVERSION ?= xen diff -r 447ac06f74d3 -r aaaa249e6f3b linux-2.6-xen-sparse/include/linux/skbuff.h --- a/linux-2.6-xen-sparse/include/linux/skbuff.h Mon Nov 27 13:50:02 2006 +0000 +++ b/linux-2.6-xen-sparse/include/linux/skbuff.h Mon Nov 27 13:50:02 2006 +0000 @@ -974,15 +974,16 @@ static inline void skb_reserve(struct sk #define NET_IP_ALIGN 2 #endif -extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc); +extern int ___pskb_trim(struct sk_buff *skb, unsigned int len); static inline void __skb_trim(struct sk_buff *skb, unsigned int len) { - if (!skb->data_len) { - skb->len = len; - skb->tail = skb->data + len; - } else - ___pskb_trim(skb, len, 0); + if (unlikely(skb->data_len)) { + WARN_ON(1); + return; + } + skb->len = len; + skb->tail = skb->data + len; } /** @@ -992,6 +993,7 @@ static inline void __skb_trim(struct sk_ * * Cut the length of a buffer down by removing data from the tail. If * the buffer is already under the length specified it is not modified. + * The skb must be linear. */ static inline void skb_trim(struct sk_buff *skb, unsigned int len) { @@ -1002,12 +1004,10 @@ static inline void skb_trim(struct sk_bu static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) { - if (!skb->data_len) { - skb->len = len; - skb->tail = skb->data+len; - return 0; - } - return ___pskb_trim(skb, len, 1); + if (skb->data_len) + return ___pskb_trim(skb, len); + __skb_trim(skb, len); + return 0; } static inline int pskb_trim(struct sk_buff *skb, unsigned int len) diff -r 447ac06f74d3 -r aaaa249e6f3b linux-2.6-xen-sparse/mm/memory.c --- a/linux-2.6-xen-sparse/mm/memory.c Mon Nov 27 13:50:02 2006 +0000 +++ b/linux-2.6-xen-sparse/mm/memory.c Mon Nov 27 13:50:02 2006 +0000 @@ -1535,6 +1535,7 @@ static inline void cow_user_page(struct if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) memset(kaddr, 0, PAGE_SIZE); kunmap_atomic(kaddr, KM_USER0); + flush_dcache_page(dst); return; } diff -r 447ac06f74d3 -r aaaa249e6f3b linux-2.6-xen-sparse/mm/mmap.c --- a/linux-2.6-xen-sparse/mm/mmap.c Mon Nov 27 13:50:02 2006 +0000 +++ b/linux-2.6-xen-sparse/mm/mmap.c Mon Nov 27 13:50:02 2006 +0000 @@ -29,6 +29,10 @@ #include <asm/uaccess.h> #include <asm/cacheflush.h> #include <asm/tlb.h> + +#ifndef arch_mmap_check +#define arch_mmap_check(addr, len, flags) (0) +#endif static void unmap_region(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev, @@ -906,6 +910,10 @@ unsigned long do_mmap_pgoff(struct file if (!len) return -EINVAL; + error = arch_mmap_check(addr, len, flags); + if (error) + return error; + /* Careful about overflows.. */ len = PAGE_ALIGN(len); if (!len || len > TASK_SIZE) @@ -1846,6 +1854,7 @@ unsigned long do_brk(unsigned long addr, unsigned long flags; struct rb_node ** rb_link, * rb_parent; pgoff_t pgoff = addr >> PAGE_SHIFT; + int error; len = PAGE_ALIGN(len); if (!len) @@ -1853,6 +1862,12 @@ unsigned long do_brk(unsigned long addr, if ((addr + len) > TASK_SIZE || (addr + len) < addr) return -EINVAL; + + flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; + + error = arch_mmap_check(addr, len, flags); + if (error) + return error; /* * mlock MCL_FUTURE? @@ -1893,8 +1908,6 @@ unsigned long do_brk(unsigned long addr, if (security_vm_enough_memory(len >> PAGE_SHIFT)) return -ENOMEM; - - flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; /* Can we just expand an old private anonymous mapping? */ if (vma_merge(mm, prev, addr, addr + len, flags, diff -r 447ac06f74d3 -r aaaa249e6f3b linux-2.6-xen-sparse/net/core/skbuff.c --- a/linux-2.6-xen-sparse/net/core/skbuff.c Mon Nov 27 13:50:02 2006 +0000 +++ b/linux-2.6-xen-sparse/net/core/skbuff.c Mon Nov 27 13:50:02 2006 +0000 @@ -261,17 +261,22 @@ nodata: } -static void skb_drop_fraglist(struct sk_buff *skb) -{ - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - skb_shinfo(skb)->frag_list = NULL; +static void skb_drop_list(struct sk_buff **listp) +{ + struct sk_buff *list = *listp; + + *listp = NULL; do { struct sk_buff *this = list; list = list->next; kfree_skb(this); } while (list); +} + +static inline void skb_drop_fraglist(struct sk_buff *skb) +{ + skb_drop_list(&skb_shinfo(skb)->frag_list); } static void skb_clone_fraglist(struct sk_buff *skb) @@ -604,6 +609,7 @@ struct sk_buff *pskb_copy(struct sk_buff n->csum = skb->csum; n->ip_summed = skb->ip_summed; + n->truesize += skb->data_len; n->data_len = skb->data_len; n->len = skb->len; @@ -798,49 +804,86 @@ struct sk_buff *skb_pad(struct sk_buff * return nskb; } -/* Trims skb to length len. It can change skb pointers, if "realloc" is 1. - * If realloc==0 and trimming is impossible without change of data, - * it is BUG(). - */ - -int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc) -{ +/* Trims skb to length len. It can change skb pointers. + */ + +int ___pskb_trim(struct sk_buff *skb, unsigned int len) +{ + struct sk_buff **fragp; + struct sk_buff *frag; int offset = skb_headlen(skb); int nfrags = skb_shinfo(skb)->nr_frags; int i; - - for (i = 0; i < nfrags; i++) { + int err; + + if (skb_cloned(skb) && + unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))) + return err; + + i = 0; + if (offset >= len) + goto drop_pages; + + for (; i < nfrags; i++) { int end = offset + skb_shinfo(skb)->frags[i].size; - if (end > len) { - if (skb_cloned(skb)) { - BUG_ON(!realloc); - if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) - return -ENOMEM; - } - if (len <= offset) { - put_page(skb_shinfo(skb)->frags[i].page); - skb_shinfo(skb)->nr_frags--; - } else { - skb_shinfo(skb)->frags[i].size = len - offset; - } - } - offset = end; - } - - if (offset < len) { + + if (end < len) { + offset = end; + continue; + } + + skb_shinfo(skb)->frags[i++].size = len - offset; + +drop_pages: + skb_shinfo(skb)->nr_frags = i; + + for (; i < nfrags; i++) + put_page(skb_shinfo(skb)->frags[i].page); + + if (skb_shinfo(skb)->frag_list) + skb_drop_fraglist(skb); + goto done; + } + + for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp); + fragp = &frag->next) { + int end = offset + frag->len; + + if (skb_shared(frag)) { + struct sk_buff *nfrag; + + nfrag = skb_clone(frag, GFP_ATOMIC); + if (unlikely(!nfrag)) + return -ENOMEM; + + nfrag->next = frag->next; + kfree_skb(frag); + frag = nfrag; + *fragp = frag; + } + + if (end < len) { + offset = end; + continue; + } + + if (end > len && + unlikely((err = pskb_trim(frag, len - offset)))) + return err; + + if (frag->next) + skb_drop_list(&frag->next); + break; + } + +done: + if (len > skb_headlen(skb)) { skb->data_len -= skb->len - len; skb->len = len; } else { - if (len <= skb_headlen(skb)) { - skb->len = len; - skb->data_len = 0; - skb->tail = skb->data + len; - if (skb_shinfo(skb)->frag_list && !skb_cloned(skb)) - skb_drop_fraglist(skb); - } else { - skb->data_len -= skb->len - len; - skb->len = len; - } + skb->len = len; + skb->data_len = 0; + skb->tail = skb->data + len; } return 0; diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.32/blktap-aio-16_03_06.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.32/blktap-aio-16_03_06.patch Mon Nov 27 13:50:02 2006 +0000 @@ -0,0 +1,294 @@ +diff -pruN ../orig-linux-2.6.16.29/fs/aio.c ./fs/aio.c +--- ../orig-linux-2.6.16.29/fs/aio.c 2006-09-12 19:02:10.000000000 +0100 ++++ ./fs/aio.c 2006-09-19 13:58:49.000000000 +0100 +@@ -34,6 +34,11 @@ + #include <asm/uaccess.h> + #include <asm/mmu_context.h> + ++#ifdef CONFIG_EPOLL ++#include <linux/poll.h> ++#include <linux/eventpoll.h> ++#endif ++ + #if DEBUG > 1 + #define dprintk printk + #else +@@ -1016,6 +1021,10 @@ put_rq: + if (waitqueue_active(&ctx->wait)) + wake_up(&ctx->wait); + ++#ifdef CONFIG_EPOLL ++ if (ctx->file && waitqueue_active(&ctx->poll_wait)) ++ wake_up(&ctx->poll_wait); ++#endif + if (ret) + put_ioctx(ctx); + +@@ -1025,6 +1034,8 @@ put_rq: + /* aio_read_evt + * Pull an event off of the ioctx's event ring. Returns the number of + * events fetched (0 or 1 ;-) ++ * If ent parameter is 0, just returns the number of events that would ++ * be fetched. + * FIXME: make this use cmpxchg. + * TODO: make the ringbuffer user mmap()able (requires FIXME). + */ +@@ -1047,13 +1058,18 @@ static int aio_read_evt(struct kioctx *i + + head = ring->head % info->nr; + if (head != ring->tail) { +- struct io_event *evp = aio_ring_event(info, head, KM_USER1); +- *ent = *evp; +- head = (head + 1) % info->nr; +- smp_mb(); /* finish reading the event before updatng the head */ +- ring->head = head; +- ret = 1; +- put_aio_ring_event(evp, KM_USER1); ++ if (ent) { /* event requested */ ++ struct io_event *evp = ++ aio_ring_event(info, head, KM_USER1); ++ *ent = *evp; ++ head = (head + 1) % info->nr; ++ /* finish reading the event before updatng the head */ ++ smp_mb(); ++ ring->head = head; ++ ret = 1; ++ put_aio_ring_event(evp, KM_USER1); ++ } else /* only need to know availability */ ++ ret = 1; + } + spin_unlock(&info->ring_lock); + +@@ -1236,9 +1252,78 @@ static void io_destroy(struct kioctx *io + + aio_cancel_all(ioctx); + wait_for_all_aios(ioctx); ++#ifdef CONFIG_EPOLL ++ /* forget the poll file, but it's up to the user to close it */ ++ if (ioctx->file) { ++ ioctx->file->private_data = 0; ++ ioctx->file = 0; ++ } ++#endif + put_ioctx(ioctx); /* once for the lookup */ + } + ++#ifdef CONFIG_EPOLL ++ ++static int aio_queue_fd_close(struct inode *inode, struct file *file) ++{ ++ struct kioctx *ioctx = file->private_data; ++ if (ioctx) { ++ file->private_data = 0; ++ spin_lock_irq(&ioctx->ctx_lock); ++ ioctx->file = 0; ++ spin_unlock_irq(&ioctx->ctx_lock); ++ } ++ return 0; ++} ++ ++static unsigned int aio_queue_fd_poll(struct file *file, poll_table *wait) ++{ unsigned int pollflags = 0; ++ struct kioctx *ioctx = file->private_data; ++ ++ if (ioctx) { ++ ++ spin_lock_irq(&ioctx->ctx_lock); ++ /* Insert inside our poll wait queue */ ++ poll_wait(file, &ioctx->poll_wait, wait); ++ ++ /* Check our condition */ ++ if (aio_read_evt(ioctx, 0)) ++ pollflags = POLLIN | POLLRDNORM; ++ spin_unlock_irq(&ioctx->ctx_lock); ++ } ++ ++ return pollflags; ++} ++ ++static struct file_operations aioq_fops = { ++ .release = aio_queue_fd_close, ++ .poll = aio_queue_fd_poll ++}; ++ ++/* make_aio_fd: ++ * Create a file descriptor that can be used to poll the event queue. ++ * Based and piggybacked on the excellent epoll code. ++ */ ++ ++static int make_aio_fd(struct kioctx *ioctx) ++{ ++ int error, fd; ++ struct inode *inode; ++ struct file *file; ++ ++ error = ep_getfd(&fd, &inode, &file, NULL, &aioq_fops); ++ if (error) ++ return error; ++ ++ /* associate the file with the IO context */ ++ file->private_data = ioctx; ++ ioctx->file = file; ++ init_waitqueue_head(&ioctx->poll_wait); ++ return fd; ++} ++#endif ++ ++ + /* sys_io_setup: + * Create an aio_context capable of receiving at least nr_events. + * ctxp must not point to an aio_context that already exists, and +@@ -1251,18 +1336,30 @@ static void io_destroy(struct kioctx *io + * resources are available. May fail with -EFAULT if an invalid + * pointer is passed for ctxp. Will fail with -ENOSYS if not + * implemented. ++ * ++ * To request a selectable fd, the user context has to be initialized ++ * to 1, instead of 0, and the return value is the fd. ++ * This keeps the system call compatible, since a non-zero value ++ * was not allowed so far. + */ + asmlinkage long sys_io_setup(unsigned nr_events, aio_context_t __user *ctxp) + { + struct kioctx *ioctx = NULL; + unsigned long ctx; + long ret; ++ int make_fd = 0; + + ret = get_user(ctx, ctxp); + if (unlikely(ret)) + goto out; + + ret = -EINVAL; ++#ifdef CONFIG_EPOLL ++ if (ctx == 1) { ++ make_fd = 1; ++ ctx = 0; ++ } ++#endif + if (unlikely(ctx || nr_events == 0)) { + pr_debug("EINVAL: io_setup: ctx %lu nr_events %u\n", + ctx, nr_events); +@@ -1273,8 +1370,12 @@ asmlinkage long sys_io_setup(unsigned nr + ret = PTR_ERR(ioctx); + if (!IS_ERR(ioctx)) { + ret = put_user(ioctx->user_id, ctxp); +- if (!ret) +- return 0; ++#ifdef CONFIG_EPOLL ++ if (make_fd && ret >= 0) ++ ret = make_aio_fd(ioctx); ++#endif ++ if (ret >= 0) ++ return ret; + + get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */ + io_destroy(ioctx); +diff -pruN ../orig-linux-2.6.16.29/fs/eventpoll.c ./fs/eventpoll.c +--- ../orig-linux-2.6.16.29/fs/eventpoll.c 2006-09-12 19:02:10.000000000 +0100 ++++ ./fs/eventpoll.c 2006-09-19 13:58:49.000000000 +0100 +@@ -235,8 +235,6 @@ struct ep_pqueue { + + static void ep_poll_safewake_init(struct poll_safewake *psw); + static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq); +-static int ep_getfd(int *efd, struct inode **einode, struct file **efile, +- struct eventpoll *ep); + static int ep_alloc(struct eventpoll **pep); + static void ep_free(struct eventpoll *ep); + static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd); +@@ -266,7 +264,7 @@ static int ep_events_transfer(struct eve + static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, + int maxevents, long timeout); + static int eventpollfs_delete_dentry(struct dentry *dentry); +-static struct inode *ep_eventpoll_inode(void); ++static struct inode *ep_eventpoll_inode(struct file_operations *fops); + static struct super_block *eventpollfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data); +@@ -525,7 +523,7 @@ asmlinkage long sys_epoll_create(int siz + * Creates all the items needed to setup an eventpoll file. That is, + * a file structure, and inode and a free file descriptor. + */ +- error = ep_getfd(&fd, &inode, &file, ep); ++ error = ep_getfd(&fd, &inode, &file, ep, &eventpoll_fops); + if (error) + goto eexit_2; + +@@ -710,8 +708,8 @@ eexit_1: + /* + * Creates the file descriptor to be used by the epoll interface. + */ +-static int ep_getfd(int *efd, struct inode **einode, struct file **efile, +- struct eventpoll *ep) ++int ep_getfd(int *efd, struct inode **einode, struct file **efile, ++ struct eventpoll *ep, struct file_operations *fops) + { + struct qstr this; + char name[32]; +@@ -727,7 +725,7 @@ static int ep_getfd(int *efd, struct ino + goto eexit_1; + + /* Allocates an inode from the eventpoll file system */ +- inode = ep_eventpoll_inode(); ++ inode = ep_eventpoll_inode(fops); + error = PTR_ERR(inode); + if (IS_ERR(inode)) + goto eexit_2; +@@ -758,7 +756,7 @@ static int ep_getfd(int *efd, struct ino + + file->f_pos = 0; + file->f_flags = O_RDONLY; +- file->f_op = &eventpoll_fops; ++ file->f_op = fops; + file->f_mode = FMODE_READ; + file->f_version = 0; + file->private_data = ep; +@@ -1574,7 +1572,7 @@ static int eventpollfs_delete_dentry(str + } + + +-static struct inode *ep_eventpoll_inode(void) ++static struct inode *ep_eventpoll_inode(struct file_operations *fops) + { + int error = -ENOMEM; + struct inode *inode = new_inode(eventpoll_mnt->mnt_sb); +@@ -1582,7 +1580,7 @@ static struct inode *ep_eventpoll_inode( + if (!inode) + goto eexit_1; + +- inode->i_fop = &eventpoll_fops; ++ inode->i_fop = fops; + + /* + * Mark the inode dirty from the very beginning, +diff -pruN ../orig-linux-2.6.16.29/include/linux/aio.h ./include/linux/aio.h +--- ../orig-linux-2.6.16.29/include/linux/aio.h 2006-09-12 19:02:10.000000000 +0100 ++++ ./include/linux/aio.h 2006-09-19 13:58:49.000000000 +0100 +@@ -191,6 +191,11 @@ struct kioctx { + struct aio_ring_info ring_info; + + struct work_struct wq; ++#ifdef CONFIG_EPOLL ++ // poll integration ++ wait_queue_head_t poll_wait; ++ struct file *file; ++#endif + }; + + /* prototypes */ +diff -pruN ../orig-linux-2.6.16.29/include/linux/eventpoll.h ./include/linux/eventpoll.h +--- ../orig-linux-2.6.16.29/include/linux/eventpoll.h 2006-09-12 19:02:10.000000000 +0100 ++++ ./include/linux/eventpoll.h 2006-09-19 13:58:49.000000000 +0100 +@@ -86,6 +86,12 @@ static inline void eventpoll_release(str + } + + ++/* ++ * called by aio code to create fd that can poll the aio event queueQ ++ */ ++struct eventpoll; ++int ep_getfd(int *efd, struct inode **einode, struct file **efile, ++ struct eventpoll *ep, struct file_operations *fops); + #else + + static inline void eventpoll_init_file(struct file *file) {} diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.32/device_bind.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.32/device_bind.patch Mon Nov 27 13:50:02 2006 +0000 @@ -0,0 +1,15 @@ +diff -pruN ../orig-linux-2.6.16.29/drivers/base/bus.c ./drivers/base/bus.c +--- ../orig-linux-2.6.16.29/drivers/base/bus.c 2006-09-12 19:02:10.000000000 +0100 ++++ ./drivers/base/bus.c 2006-09-19 13:58:54.000000000 +0100 +@@ -188,6 +188,11 @@ static ssize_t driver_bind(struct device + up(&dev->sem); + if (dev->parent) + up(&dev->parent->sem); ++ ++ if (err > 0) /* success */ ++ err = count; ++ else if (err == 0) /* driver didn't accept device */ ++ err = -ENODEV; + } + put_device(dev); + put_bus(bus); diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.32/fix-hz-suspend.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.32/fix-hz-suspend.patch Mon Nov 27 13:50:02 2006 +0000 @@ -0,0 +1,26 @@ +diff -pruN ../orig-linux-2.6.16.29/kernel/timer.c ./kernel/timer.c +--- ../orig-linux-2.6.16.29/kernel/timer.c 2006-09-12 19:02:10.000000000 +0100 ++++ ./kernel/timer.c 2006-09-19 13:58:58.000000000 +0100 +@@ -555,6 +555,22 @@ found: + } + spin_unlock(&base->t_base.lock); + ++ /* ++ * It can happen that other CPUs service timer IRQs and increment ++ * jiffies, but we have not yet got a local timer tick to process ++ * the timer wheels. In that case, the expiry time can be before ++ * jiffies, but since the high-resolution timer here is relative to ++ * jiffies, the default expression when high-resolution timers are ++ * not active, ++ * ++ * time_before(MAX_JIFFY_OFFSET + jiffies, expires) ++ * ++ * would falsely evaluate to true. If that is the case, just ++ * return jiffies so that we can immediately fire the local timer ++ */ ++ if (time_before(expires, jiffies)) ++ return jiffies; ++ + if (time_before(hr_expires, expires)) + return hr_expires; + diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.32/fix-ide-cd-pio-mode.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.32/fix-ide-cd-pio-mode.patch Mon Nov 27 13:50:02 2006 +0000 @@ -0,0 +1,18 @@ +diff -pruN ../orig-linux-2.6.16.29/drivers/ide/ide-lib.c ./drivers/ide/ide-lib.c +--- ../orig-linux-2.6.16.29/drivers/ide/ide-lib.c 2006-09-12 19:02:10.000000000 +0100 ++++ ./drivers/ide/ide-lib.c 2006-09-19 13:59:03.000000000 +0100 +@@ -410,10 +410,10 @@ void ide_toggle_bounce(ide_drive_t *driv + { + u64 addr = BLK_BOUNCE_HIGH; /* dma64_addr_t */ + +- if (!PCI_DMA_BUS_IS_PHYS) { +- addr = BLK_BOUNCE_ANY; +- } else if (on && drive->media == ide_disk) { +- if (HWIF(drive)->pci_dev) ++ if (on && drive->media == ide_disk) { ++ if (!PCI_DMA_BUS_IS_PHYS) ++ addr = BLK_BOUNCE_ANY; ++ else if (HWIF(drive)->pci_dev) + addr = HWIF(drive)->pci_dev->dma_mask; + } + diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.32/i386-mach-io-check-nmi.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.32/i386-mach-io-check-nmi.patch Mon Nov 27 13:50:02 2006 +0000 @@ -0,0 +1,45 @@ +diff -pruN ../orig-linux-2.6.16.29/arch/i386/kernel/traps.c ./arch/i386/kernel/traps.c +--- ../orig-linux-2.6.16.29/arch/i386/kernel/traps.c 2006-09-12 19:02:10.000000000 +0100 ++++ ./arch/i386/kernel/traps.c 2006-09-19 13:59:06.000000000 +0100 +@@ -567,18 +567,11 @@ static void mem_parity_error(unsigned ch + + static void io_check_error(unsigned char reason, struct pt_regs * regs) + { +- unsigned long i; +- + printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); + show_registers(regs); + + /* Re-enable the IOCK line, wait for a few seconds */ +- reason = (reason & 0xf) | 8; +- outb(reason, 0x61); +- i = 2000; +- while (--i) udelay(1000); +- reason &= ~8; +- outb(reason, 0x61); ++ clear_io_check_error(reason); + } + + static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) +diff -pruN ../orig-linux-2.6.16.29/include/asm-i386/mach-default/mach_traps.h ./include/asm-i386/mach-default/mach_traps.h +--- ../orig-linux-2.6.16.29/include/asm-i386/mach-default/mach_traps.h 2006-09-12 19:02:10.000000000 +0100 ++++ ./include/asm-i386/mach-default/mach_traps.h 2006-09-19 13:59:06.000000000 +0100 +@@ -15,6 +15,18 @@ static inline void clear_mem_error(unsig + outb(reason, 0x61); + } + ++static inline void clear_io_check_error(unsigned char reason) ++{ ++ unsigned long i; ++ ++ reason = (reason & 0xf) | 8; ++ outb(reason, 0x61); ++ i = 2000; ++ while (--i) udelay(1000); ++ reason &= ~8; ++ outb(reason, 0x61); ++} ++ + static inline unsigned char get_nmi_reason(void) + { + return inb(0x61); diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.32/ipv6-no-autoconf.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.32/ipv6-no-autoconf.patch Mon Nov 27 13:50:02 2006 +0000 @@ -0,0 +1,19 @@ +diff -pruN ../orig-linux-2.6.16.29/net/ipv6/addrconf.c ./net/ipv6/addrconf.c +--- ../orig-linux-2.6.16.29/net/ipv6/addrconf.c 2006-09-12 19:02:10.000000000 +0100 ++++ ./net/ipv6/addrconf.c 2006-09-19 13:59:11.000000000 +0100 +@@ -2471,6 +2471,7 @@ static void addrconf_dad_start(struct in + spin_lock_bh(&ifp->lock); + + if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || ++ !(dev->flags&IFF_MULTICAST) || + !(ifp->flags&IFA_F_TENTATIVE)) { + ifp->flags &= ~IFA_F_TENTATIVE; + spin_unlock_bh(&ifp->lock); +@@ -2555,6 +2556,7 @@ static void addrconf_dad_completed(struc + if (ifp->idev->cnf.forwarding == 0 && + ifp->idev->cnf.rtr_solicits > 0 && + (dev->flags&IFF_LOOPBACK) == 0 && ++ (dev->flags & IFF_MULTICAST) && + (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) { + struct in6_addr all_routers; + diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.32/net-csum.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.32/net-csum.patch Mon Nov 27 13:50:02 2006 +0000 @@ -0,0 +1,63 @@ +diff -pruN ../orig-linux-2.6.16.29/net/ipv4/netfilter/ip_nat_proto_tcp.c ./net/ipv4/netfilter/ip_nat_proto_tcp.c +--- ../orig-linux-2.6.16.29/net/ipv4/netfilter/ip_nat_proto_tcp.c 2006-09-12 19:02:10.000000000 +0100 ++++ ./net/ipv4/netfilter/ip_nat_proto_tcp.c 2006-09-19 13:59:15.000000000 +0100 +@@ -129,7 +129,12 @@ tcp_manip_pkt(struct sk_buff **pskb, + if (hdrsize < sizeof(*hdr)) + return 1; + +- hdr->check = ip_nat_cheat_check(~oldip, newip, ++#ifdef CONFIG_XEN ++ if ((*pskb)->proto_csum_blank) ++ hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check); ++ else ++#endif ++ hdr->check = ip_nat_cheat_check(~oldip, newip, + ip_nat_cheat_check(oldport ^ 0xFFFF, + newport, + hdr->check)); +diff -pruN ../orig-linux-2.6.16.29/net/ipv4/netfilter/ip_nat_proto_udp.c ./net/ipv4/netfilter/ip_nat_proto_udp.c +--- ../orig-linux-2.6.16.29/net/ipv4/netfilter/ip_nat_proto_udp.c 2006-09-12 19:02:10.000000000 +0100 ++++ ./net/ipv4/netfilter/ip_nat_proto_udp.c 2006-09-19 13:59:15.000000000 +0100 +@@ -113,11 +113,17 @@ udp_manip_pkt(struct sk_buff **pskb, + newport = tuple->dst.u.udp.port; + portptr = &hdr->dest; + } +- if (hdr->check) /* 0 is a special case meaning no checksum */ +- hdr->check = ip_nat_cheat_check(~oldip, newip, ++ if (hdr->check) { /* 0 is a special case meaning no checksum */ ++#ifdef CONFIG_XEN ++ if ((*pskb)->proto_csum_blank) ++ hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check); ++ else ++#endif ++ hdr->check = ip_nat_cheat_check(~oldip, newip, + ip_nat_cheat_check(*portptr ^ 0xFFFF, + newport, + hdr->check)); ++ } + *portptr = newport; + return 1; + } +diff -pruN ../orig-linux-2.6.16.29/net/ipv4/xfrm4_output.c ./net/ipv4/xfrm4_output.c +--- ../orig-linux-2.6.16.29/net/ipv4/xfrm4_output.c 2006-09-12 19:02:10.000000000 +0100 ++++ ./net/ipv4/xfrm4_output.c 2006-09-19 13:59:15.000000000 +0100 +@@ -17,6 +17,8 @@ + #include <net/xfrm.h> + #include <net/icmp.h> + ++extern int skb_checksum_setup(struct sk_buff *skb); ++ + /* Add encapsulation header. + * + * In transport mode, the IP header will be moved forward to make space +@@ -103,6 +105,10 @@ static int xfrm4_output_one(struct sk_bu + struct xfrm_state *x = dst->xfrm; + int err; + ++ err = skb_checksum_setup(skb); ++ if (err) ++ goto error_nolock; ++ + if (skb->ip_summed == CHECKSUM_HW) { + err = skb_checksum_help(skb, 0); + if (err) diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.32/net-gso-0-base.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.32/net-gso-0-base.patch Mon Nov 27 13:50:02 2006 +0000 @@ -0,0 +1,2898 @@ +Index: tmp-xxx/Documentation/networking/netdevices.txt +=================================================================== +--- tmp-xxx.orig/Documentation/networking/netdevices.txt 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/Documentation/networking/netdevices.txt 2006-11-27 10:52:42.000000000 +0000 +@@ -42,9 +42,9 @@ + Context: nominally process, but don't sleep inside an rwlock + + dev->hard_start_xmit: +- Synchronization: dev->xmit_lock spinlock. ++ Synchronization: netif_tx_lock spinlock. + When the driver sets NETIF_F_LLTX in dev->features this will be +- called without holding xmit_lock. In this case the driver ++ called without holding netif_tx_lock. In this case the driver + has to lock by itself when needed. It is recommended to use a try lock + for this and return -1 when the spin lock fails. + The locking there should also properly protect against +@@ -62,12 +62,12 @@ + Only valid when NETIF_F_LLTX is set. + + dev->tx_timeout: +- Synchronization: dev->xmit_lock spinlock. ++ Synchronization: netif_tx_lock spinlock. + Context: BHs disabled + Notes: netif_queue_stopped() is guaranteed true + + dev->set_multicast_list: +- Synchronization: dev->xmit_lock spinlock. ++ Synchronization: netif_tx_lock spinlock. + Context: BHs disabled + + dev->poll: +Index: tmp-xxx/drivers/block/aoe/aoenet.c +=================================================================== +--- tmp-xxx.orig/drivers/block/aoe/aoenet.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/drivers/block/aoe/aoenet.c 2006-11-27 10:52:42.000000000 +0000 +@@ -95,9 +95,8 @@ + static struct sk_buff * + skb_check(struct sk_buff *skb) + { +- if (skb_is_nonlinear(skb)) + if ((skb = skb_share_check(skb, GFP_ATOMIC))) +- if (skb_linearize(skb, GFP_ATOMIC) < 0) { ++ if (skb_linearize(skb)) { + dev_kfree_skb(skb); + return NULL; + } +Index: tmp-xxx/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +=================================================================== +--- tmp-xxx.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2006-11-27 10:52:42.000000000 +0000 +@@ -821,7 +821,8 @@ + + ipoib_mcast_stop_thread(dev, 0); + +- spin_lock_irqsave(&dev->xmit_lock, flags); ++ local_irq_save(flags); ++ netif_tx_lock(dev); + spin_lock(&priv->lock); + + /* +@@ -896,7 +897,8 @@ + } + + spin_unlock(&priv->lock); +- spin_unlock_irqrestore(&dev->xmit_lock, flags); ++ netif_tx_unlock(dev); ++ local_irq_restore(flags); + + /* We have to cancel outside of the spinlock */ + list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { +Index: tmp-xxx/drivers/media/dvb/dvb-core/dvb_net.c +=================================================================== +--- tmp-xxx.orig/drivers/media/dvb/dvb-core/dvb_net.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/drivers/media/dvb/dvb-core/dvb_net.c 2006-11-27 10:52:42.000000000 +0000 +@@ -1053,7 +1053,7 @@ + + dvb_net_feed_stop(dev); + priv->rx_mode = RX_MODE_UNI; +- spin_lock_bh(&dev->xmit_lock); ++ netif_tx_lock_bh(dev); + + if (dev->flags & IFF_PROMISC) { + dprintk("%s: promiscuous mode\n", dev->name); +@@ -1078,7 +1078,7 @@ + } + } + +- spin_unlock_bh(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + dvb_net_feed_start(dev); + } + +Index: tmp-xxx/drivers/net/8139cp.c +=================================================================== +--- tmp-xxx.orig/drivers/net/8139cp.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/drivers/net/8139cp.c 2006-11-27 10:52:42.000000000 +0000 +@@ -794,7 +794,7 @@ + entry = cp->tx_head; + eor = (entry == (CP_TX_RING_SIZE - 1)) ? RingEnd : 0; + if (dev->features & NETIF_F_TSO) +- mss = skb_shinfo(skb)->tso_size; ++ mss = skb_shinfo(skb)->gso_size; + + if (skb_shinfo(skb)->nr_frags == 0) { + struct cp_desc *txd = &cp->tx_ring[entry]; +Index: tmp-xxx/drivers/net/bnx2.c +=================================================================== +--- tmp-xxx.orig/drivers/net/bnx2.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/drivers/net/bnx2.c 2006-11-27 10:52:42.000000000 +0000 +@@ -1593,7 +1593,7 @@ + skb = tx_buf->skb; + #ifdef BCM_TSO + /* partial BD completions possible with TSO packets */ +- if (skb_shinfo(skb)->tso_size) { ++ if (skb_shinfo(skb)->gso_size) { + u16 last_idx, last_ring_idx; + + last_idx = sw_cons + +@@ -1948,7 +1948,7 @@ + return 1; + } + +-/* Called with rtnl_lock from vlan functions and also dev->xmit_lock ++/* Called with rtnl_lock from vlan functions and also netif_tx_lock + * from set_multicast. + */ + static void +@@ -4403,7 +4403,7 @@ + } + #endif + +-/* Called with dev->xmit_lock. ++/* Called with netif_tx_lock. + * hard_start_xmit is pseudo-lockless - a lock is only required when + * the tx queue is full. This way, we get the benefit of lockless + * operations most of the time without the complexities to handle +@@ -4441,7 +4441,7 @@ + (TX_BD_FLAGS_VLAN_TAG | (vlan_tx_tag_get(skb) << 16)); + } + #ifdef BCM_TSO +- if ((mss = skb_shinfo(skb)->tso_size) && ++ if ((mss = skb_shinfo(skb)->gso_size) && + (skb->len > (bp->dev->mtu + ETH_HLEN))) { + u32 tcp_opt_len, ip_tcp_len; + +Index: tmp-xxx/drivers/net/bonding/bond_main.c +=================================================================== +--- tmp-xxx.orig/drivers/net/bonding/bond_main.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/drivers/net/bonding/bond_main.c 2006-11-27 10:52:42.000000000 +0000 +@@ -1145,8 +1145,7 @@ + } + + #define BOND_INTERSECT_FEATURES \ +- (NETIF_F_SG|NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM|\ +- NETIF_F_TSO|NETIF_F_UFO) ++ (NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_TSO | NETIF_F_UFO) + + /* + * Compute the common dev->feature set available to all slaves. Some +@@ -1164,9 +1163,7 @@ + features &= (slave->dev->features & BOND_INTERSECT_FEATURES); + + if ((features & NETIF_F_SG) && +- !(features & (NETIF_F_IP_CSUM | +- NETIF_F_NO_CSUM | +- NETIF_F_HW_CSUM))) ++ !(features & NETIF_F_ALL_CSUM)) + features &= ~NETIF_F_SG; + + /* +@@ -4147,7 +4144,7 @@ + */ + bond_dev->features |= NETIF_F_VLAN_CHALLENGED; + +- /* don't acquire bond device's xmit_lock when ++ /* don't acquire bond device's netif_tx_lock when + * transmitting */ + bond_dev->features |= NETIF_F_LLTX; + +Index: tmp-xxx/drivers/net/chelsio/sge.c +=================================================================== +--- tmp-xxx.orig/drivers/net/chelsio/sge.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/drivers/net/chelsio/sge.c 2006-11-27 10:52:42.000000000 +0000 +@@ -1419,7 +1419,7 @@ + struct cpl_tx_pkt *cpl; + + #ifdef NETIF_F_TSO +- if (skb_shinfo(skb)->tso_size) { ++ if (skb_shinfo(skb)->gso_size) { + int eth_type; + struct cpl_tx_pkt_lso *hdr; + +@@ -1434,7 +1434,7 @@ + hdr->ip_hdr_words = skb->nh.iph->ihl; + hdr->tcp_hdr_words = skb->h.th->doff; + hdr->eth_type_mss = htons(MK_ETH_TYPE_MSS(eth_type, +- skb_shinfo(skb)->tso_size)); ++ skb_shinfo(skb)->gso_size)); + hdr->len = htonl(skb->len - sizeof(*hdr)); + cpl = (struct cpl_tx_pkt *)hdr; + sge->stats.tx_lso_pkts++; +Index: tmp-xxx/drivers/net/e1000/e1000_main.c +=================================================================== +--- tmp-xxx.orig/drivers/net/e1000/e1000_main.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/drivers/net/e1000/e1000_main.c 2006-11-27 10:52:42.000000000 +0000 +@@ -2526,7 +2526,7 @@ + uint8_t ipcss, ipcso, tucss, tucso, hdr_len; + int err; + +- if (skb_shinfo(skb)->tso_size) { ++ if (skb_shinfo(skb)->gso_size) { + if (skb_header_cloned(skb)) { + err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); + if (err) +@@ -2534,7 +2534,7 @@ + } + + hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2)); +- mss = skb_shinfo(skb)->tso_size; ++ mss = skb_shinfo(skb)->gso_size; + if (skb->protocol == ntohs(ETH_P_IP)) { + skb->nh.iph->tot_len = 0; + skb->nh.iph->check = 0; +@@ -2651,7 +2651,7 @@ + * tso gets written back prematurely before the data is fully + * DMAd to the controller */ + if (!skb->data_len && tx_ring->last_tx_tso && +- !skb_shinfo(skb)->tso_size) { ++ !skb_shinfo(skb)->gso_size) { + tx_ring->last_tx_tso = 0; + size -= 4; + } +@@ -2893,7 +2893,7 @@ + } + + #ifdef NETIF_F_TSO +- mss = skb_shinfo(skb)->tso_size; ++ mss = skb_shinfo(skb)->gso_size; + /* The controller does a simple calculation to + * make sure there is enough room in the FIFO before + * initiating the DMA for each buffer. The calc is: +@@ -2935,7 +2935,7 @@ + #ifdef NETIF_F_TSO + /* Controller Erratum workaround */ + if (!skb->data_len && tx_ring->last_tx_tso && +- !skb_shinfo(skb)->tso_size) ++ !skb_shinfo(skb)->gso_size) + count++; + #endif + +Index: tmp-xxx/drivers/net/forcedeth.c +=================================================================== +--- tmp-xxx.orig/drivers/net/forcedeth.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/drivers/net/forcedeth.c 2006-11-27 10:52:42.000000000 +0000 +@@ -482,9 +482,9 @@ + * critical parts: + * - rx is (pseudo-) lockless: it relies on the single-threading provided + * by the arch code for interrupts. +- * - tx setup is lockless: it relies on dev->xmit_lock. Actual submission ++ * - tx setup is lockless: it relies on netif_tx_lock. Actual submission + * needs dev->priv->lock :-( +- * - set_multicast_list: preparation lockless, relies on dev->xmit_lock. ++ * - set_multicast_list: preparation lockless, relies on netif_tx_lock. + */ + + /* in dev: base, irq */ +@@ -1016,7 +1016,7 @@ + + /* + * nv_start_xmit: dev->hard_start_xmit function +- * Called with dev->xmit_lock held. ++ * Called with netif_tx_lock held. + */ + static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev) + { +@@ -1105,8 +1105,8 @@ + np->tx_skbuff[nr] = skb; + + #ifdef NETIF_F_TSO +- if (skb_shinfo(skb)->tso_size) +- tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->tso_size << NV_TX2_TSO_SHIFT); ++ if (skb_shinfo(skb)->gso_size) ++ tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->gso_size << NV_TX2_TSO_SHIFT); + else + #endif + tx_flags_extra = (skb->ip_summed == CHECKSUM_HW ? (NV_TX2_CHECKSUM_L3|NV_TX2_CHECKSUM_L4) : 0); +@@ -1203,7 +1203,7 @@ + + /* + * nv_tx_timeout: dev->tx_timeout function +- * Called with dev->xmit_lock held. ++ * Called with netif_tx_lock held. + */ + static void nv_tx_timeout(struct net_device *dev) + { +@@ -1524,7 +1524,7 @@ + * Changing the MTU is a rare event, it shouldn't matter. + */ + disable_irq(dev->irq); +- spin_lock_bh(&dev->xmit_lock); ++ netif_tx_lock_bh(dev); + spin_lock(&np->lock); + /* stop engines */ + nv_stop_rx(dev); +@@ -1559,7 +1559,7 @@ + nv_start_rx(dev); + nv_start_tx(dev); + spin_unlock(&np->lock); +- spin_unlock_bh(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + enable_irq(dev->irq); + } + return 0; +@@ -1594,7 +1594,7 @@ + memcpy(dev->dev_addr, macaddr->sa_data, ETH_ALEN); + + if (netif_running(dev)) { +- spin_lock_bh(&dev->xmit_lock); ++ netif_tx_lock_bh(dev); + spin_lock_irq(&np->lock); + + /* stop rx engine */ +@@ -1606,7 +1606,7 @@ + /* restart rx engine */ + nv_start_rx(dev); + spin_unlock_irq(&np->lock); +- spin_unlock_bh(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + } else { + nv_copy_mac_to_hw(dev); + } +@@ -1615,7 +1615,7 @@ + + /* + * nv_set_multicast: dev->set_multicast function +- * Called with dev->xmit_lock held. ++ * Called with netif_tx_lock held. + */ + static void nv_set_multicast(struct net_device *dev) + { +Index: tmp-xxx/drivers/net/hamradio/6pack.c +=================================================================== +--- tmp-xxx.orig/drivers/net/hamradio/6pack.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/drivers/net/hamradio/6pack.c 2006-11-27 10:52:42.000000000 +0000 +@@ -308,9 +308,9 @@ + { + struct sockaddr_ax25 *sa = addr; + +- spin_lock_irq(&dev->xmit_lock); ++ netif_tx_lock_bh(dev); + memcpy(dev->dev_addr, &sa->sax25_call, AX25_ADDR_LEN); +- spin_unlock_irq(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + + return 0; + } +@@ -767,9 +767,9 @@ + break; + } + +- spin_lock_irq(&dev->xmit_lock); ++ netif_tx_lock_bh(dev); + memcpy(dev->dev_addr, &addr, AX25_ADDR_LEN); +- spin_unlock_irq(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + + err = 0; + break; +Index: tmp-xxx/drivers/net/hamradio/mkiss.c +=================================================================== +--- tmp-xxx.orig/drivers/net/hamradio/mkiss.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/drivers/net/hamradio/mkiss.c 2006-11-27 10:52:42.000000000 +0000 +@@ -357,9 +357,9 @@ + { + struct sockaddr_ax25 *sa = addr; + +- spin_lock_irq(&dev->xmit_lock); ++ netif_tx_lock_bh(dev); + memcpy(dev->dev_addr, &sa->sax25_call, AX25_ADDR_LEN); +- spin_unlock_irq(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + + return 0; + } +@@ -886,9 +886,9 @@ + break; + } + +- spin_lock_irq(&dev->xmit_lock); ++ netif_tx_lock_bh(dev); + memcpy(dev->dev_addr, addr, AX25_ADDR_LEN); +- spin_unlock_irq(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + + err = 0; + break; +Index: tmp-xxx/drivers/net/ifb.c +=================================================================== +--- tmp-xxx.orig/drivers/net/ifb.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/drivers/net/ifb.c 2006-11-27 10:52:42.000000000 +0000 +@@ -76,13 +76,13 @@ + dp->st_task_enter++; + if ((skb = skb_peek(&dp->tq)) == NULL) { + dp->st_txq_refl_try++; +- if (spin_trylock(&_dev->xmit_lock)) { ++ if (netif_tx_trylock(_dev)) { + dp->st_rxq_enter++; + while ((skb = skb_dequeue(&dp->rq)) != NULL) { + skb_queue_tail(&dp->tq, skb); + dp->st_rx2tx_tran++; + } +- spin_unlock(&_dev->xmit_lock); ++ netif_tx_unlock(_dev); + } else { + /* reschedule */ + dp->st_rxq_notenter++; +@@ -110,7 +110,7 @@ + } + } + +- if (spin_trylock(&_dev->xmit_lock)) { ++ if (netif_tx_trylock(_dev)) { + dp->st_rxq_check++; + if ((skb = skb_peek(&dp->rq)) == NULL) { + dp->tasklet_pending = 0; +@@ -118,10 +118,10 @@ + netif_wake_queue(_dev); + } else { + dp->st_rxq_rsch++; +- spin_unlock(&_dev->xmit_lock); ++ netif_tx_unlock(_dev); + goto resched; + } +- spin_unlock(&_dev->xmit_lock); ++ netif_tx_unlock(_dev); + } else { + resched: + dp->tasklet_pending = 1; +Index: tmp-xxx/drivers/net/irda/vlsi_ir.c +=================================================================== +--- tmp-xxx.orig/drivers/net/irda/vlsi_ir.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/drivers/net/irda/vlsi_ir.c 2006-11-27 10:52:42.000000000 +0000 +@@ -959,7 +959,7 @@ + || (now.tv_sec==ready.tv_sec && now.tv_usec>=ready.tv_usec)) + break; + udelay(100); +- /* must not sleep here - we are called under xmit_lock! */ ++ /* must not sleep here - called under netif_tx_lock! */ + } + } + +Index: tmp-xxx/drivers/net/ixgb/ixgb_main.c +=================================================================== +--- tmp-xxx.orig/drivers/net/ixgb/ixgb_main.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/drivers/net/ixgb/ixgb_main.c 2006-11-27 10:52:42.000000000 +0000 +@@ -1163,7 +1163,7 @@ + uint16_t ipcse, tucse, mss; + int err; + +- if(likely(skb_shinfo(skb)->tso_size)) { ++ if(likely(skb_shinfo(skb)->gso_size)) { + if (skb_header_cloned(skb)) { + err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); + if (err) +@@ -1171,7 +1171,7 @@ + } + + hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2)); +- mss = skb_shinfo(skb)->tso_size; ++ mss = skb_shinfo(skb)->gso_size; + skb->nh.iph->tot_len = 0; + skb->nh.iph->check = 0; + skb->h.th->check = ~csum_tcpudp_magic(skb->nh.iph->saddr, +Index: tmp-xxx/drivers/net/loopback.c +=================================================================== +--- tmp-xxx.orig/drivers/net/loopback.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/drivers/net/loopback.c 2006-11-27 10:52:42.000000000 +0000 +@@ -74,7 +74,7 @@ + struct iphdr *iph = skb->nh.iph; + struct tcphdr *th = (struct tcphdr*)(skb->nh.raw + (iph->ihl * 4)); + unsigned int doffset = (iph->ihl + th->doff) * 4; +- unsigned int mtu = skb_shinfo(skb)->tso_size + doffset; ++ unsigned int mtu = skb_shinfo(skb)->gso_size + doffset; + unsigned int offset = 0; + u32 seq = ntohl(th->seq); + u16 id = ntohs(iph->id); +@@ -139,7 +139,7 @@ + #endif + + #ifdef LOOPBACK_TSO +- if (skb_shinfo(skb)->tso_size) { ++ if (skb_shinfo(skb)->gso_size) { + BUG_ON(skb->protocol != htons(ETH_P_IP)); + BUG_ON(skb->nh.iph->protocol != IPPROTO_TCP); + +Index: tmp-xxx/drivers/net/mv643xx_eth.c +=================================================================== +--- tmp-xxx.orig/drivers/net/mv643xx_eth.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/drivers/net/mv643xx_eth.c 2006-11-27 10:52:42.000000000 +0000 +@@ -1107,7 +1107,7 @@ + + #ifdef MV643XX_CHECKSUM_OFFLOAD_TX + if (has_tiny_unaligned_frags(skb)) { +- if ((skb_linearize(skb, GFP_ATOMIC) != 0)) { ++ if (__skb_linearize(skb)) { + stats->tx_dropped++; + printk(KERN_DEBUG "%s: failed to linearize tiny " + "unaligned fragment\n", dev->name); +Index: tmp-xxx/drivers/net/natsemi.c +=================================================================== +--- tmp-xxx.orig/drivers/net/natsemi.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/drivers/net/natsemi.c 2006-11-27 10:52:42.000000000 +0000 +@@ -323,12 +323,12 @@ + The rx process only runs in the interrupt handler. Access from outside + the interrupt handler is only permitted after disable_irq(). + +-The rx process usually runs under the dev->xmit_lock. If np->intr_tx_reap ++The rx process usually runs under the netif_tx_lock. If np->intr_tx_reap + is set, then access is permitted under spin_lock_irq(&np->lock). + + Thus configuration functions that want to access everything must call + disable_irq(dev->irq); +- spin_lock_bh(dev->xmit_lock); ++ netif_tx_lock_bh(dev); + spin_lock_irq(&np->lock); + + IV. Notes +Index: tmp-xxx/drivers/net/r8169.c +=================================================================== +--- tmp-xxx.orig/drivers/net/r8169.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/drivers/net/r8169.c 2006-11-27 10:52:42.000000000 +0000 +@@ -2171,7 +2171,7 @@ + static inline u32 rtl8169_tso_csum(struct sk_buff *skb, struct net_device *dev) + { + if (dev->features & NETIF_F_TSO) { +- u32 mss = skb_shinfo(skb)->tso_size; ++ u32 mss = skb_shinfo(skb)->gso_size; + + if (mss) + return LargeSend | ((mss & MSSMask) << MSSShift); +Index: tmp-xxx/drivers/net/s2io.c +=================================================================== +--- tmp-xxx.orig/drivers/net/s2io.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/drivers/net/s2io.c 2006-11-27 10:52:42.000000000 +0000 +@@ -3522,8 +3522,8 @@ + txdp->Control_1 = 0; + txdp->Control_2 = 0; + #ifdef NETIF_F_TSO +- mss = skb_shinfo(skb)->tso_size; +- if (mss) { ++ mss = skb_shinfo(skb)->gso_size; ++ if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV4) { + txdp->Control_1 |= TXD_TCP_LSO_EN; + txdp->Control_1 |= TXD_TCP_LSO_MSS(mss); + } +@@ -3543,10 +3543,10 @@ + } + + frg_len = skb->len - skb->data_len; +- if (skb_shinfo(skb)->ufo_size) { ++ if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4) { + int ufo_size; + +- ufo_size = skb_shinfo(skb)->ufo_size; ++ ufo_size = skb_shinfo(skb)->gso_size; + ufo_size &= ~7; + txdp->Control_1 |= TXD_UFO_EN; + txdp->Control_1 |= TXD_UFO_MSS(ufo_size); +@@ -3572,7 +3572,7 @@ + txdp->Host_Control = (unsigned long) skb; + txdp->Control_1 |= TXD_BUFFER0_SIZE(frg_len); + +- if (skb_shinfo(skb)->ufo_size) ++ if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4) + txdp->Control_1 |= TXD_UFO_EN; + + frg_cnt = skb_shinfo(skb)->nr_frags; +@@ -3587,12 +3587,12 @@ + (sp->pdev, frag->page, frag->page_offset, + frag->size, PCI_DMA_TODEVICE); + txdp->Control_1 = TXD_BUFFER0_SIZE(frag->size); +- if (skb_shinfo(skb)->ufo_size) ++ if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4) + txdp->Control_1 |= TXD_UFO_EN; + } + txdp->Control_1 |= TXD_GATHER_CODE_LAST; + +- if (skb_shinfo(skb)->ufo_size) ++ if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4) + frg_cnt++; /* as Txd0 was used for inband header */ + + tx_fifo = mac_control->tx_FIFO_start[queue]; +@@ -3606,7 +3606,7 @@ + if (mss) + val64 |= TX_FIFO_SPECIAL_FUNC; + #endif +- if (skb_shinfo(skb)->ufo_size) ++ if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4) + val64 |= TX_FIFO_SPECIAL_FUNC; + writeq(val64, &tx_fifo->List_Control); + +Index: tmp-xxx/drivers/net/sky2.c +=================================================================== +--- tmp-xxx.orig/drivers/net/sky2.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/drivers/net/sky2.c 2006-11-27 10:52:42.000000000 +0000 +@@ -1141,7 +1141,7 @@ + count = sizeof(dma_addr_t) / sizeof(u32); + count += skb_shinfo(skb)->nr_frags * count; + +- if (skb_shinfo(skb)->tso_size) ++ if (skb_shinfo(skb)->gso_size) + ++count; + + if (skb->ip_summed == CHECKSUM_HW) +@@ -1213,7 +1213,7 @@ + } + + /* Check for TCP Segmentation Offload */ +- mss = skb_shinfo(skb)->tso_size; ++ mss = skb_shinfo(skb)->gso_size; + if (mss != 0) { + /* just drop the packet if non-linear expansion fails */ + if (skb_header_cloned(skb) && +Index: tmp-xxx/drivers/net/tg3.c +=================================================================== +--- tmp-xxx.orig/drivers/net/tg3.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/drivers/net/tg3.c 2006-11-27 10:52:42.000000000 +0000 +@@ -3664,7 +3664,7 @@ + #if TG3_TSO_SUPPORT != 0 + mss = 0; + if (skb->len > (tp->dev->mtu + ETH_HLEN) && +- (mss = skb_shinfo(skb)->tso_size) != 0) { ++ (mss = skb_shinfo(skb)->gso_size) != 0) { + int tcp_opt_len, ip_tcp_len; + + if (skb_header_cloned(skb) && +Index: tmp-xxx/drivers/net/tulip/winbond-840.c +=================================================================== +--- tmp-xxx.orig/drivers/net/tulip/winbond-840.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/drivers/net/tulip/winbond-840.c 2006-11-27 10:52:42.000000000 +0000 +@@ -1605,11 +1605,11 @@ + * - get_stats: + * spin_lock_irq(np->lock), doesn't touch hw if not present + * - hard_start_xmit: +- * netif_stop_queue + spin_unlock_wait(&dev->xmit_lock); ++ * synchronize_irq + netif_tx_disable; + * - tx_timeout: +- * netif_device_detach + spin_unlock_wait(&dev->xmit_lock); ++ * netif_device_detach + netif_tx_disable; + * - set_multicast_list +- * netif_device_detach + spin_unlock_wait(&dev->xmit_lock); ++ * netif_device_detach + netif_tx_disable; + * - interrupt handler + * doesn't touch hw if not present, synchronize_irq waits for + * running instances of the interrupt handler. +@@ -1635,11 +1635,10 @@ + netif_device_detach(dev); + update_csr6(dev, 0); + iowrite32(0, ioaddr + IntrEnable); +- netif_stop_queue(dev); + spin_unlock_irq(&np->lock); + +- spin_unlock_wait(&dev->xmit_lock); + synchronize_irq(dev->irq); ++ netif_tx_disable(dev); + + np->stats.rx_missed_errors += ioread32(ioaddr + RxMissed) & 0xffff; + +Index: tmp-xxx/drivers/net/typhoon.c +=================================================================== +--- tmp-xxx.orig/drivers/net/typhoon.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/drivers/net/typhoon.c 2006-11-27 10:52:42.000000000 +0000 +@@ -340,7 +340,7 @@ + #endif + + #if defined(NETIF_F_TSO) +-#define skb_tso_size(x) (skb_shinfo(x)->tso_size) ++#define skb_tso_size(x) (skb_shinfo(x)->gso_size) + #define TSO_NUM_DESCRIPTORS 2 + #define TSO_OFFLOAD_ON TYPHOON_OFFLOAD_TCP_SEGMENT + #else +Index: tmp-xxx/drivers/net/via-velocity.c +=================================================================== +--- tmp-xxx.orig/drivers/net/via-velocity.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/drivers/net/via-velocity.c 2006-11-27 10:52:42.000000000 +0000 +@@ -1905,6 +1905,13 @@ + + int pktlen = skb->len; + ++#ifdef VELOCITY_ZERO_COPY_SUPPORT ++ if (skb_shinfo(skb)->nr_frags > 6 && __skb_linearize(skb)) { ++ kfree_skb(skb); ++ return 0; ++ } ++#endif ++ + spin_lock_irqsave(&vptr->lock, flags); + + index = vptr->td_curr[qnum]; +@@ -1920,8 +1927,6 @@ + */ + if (pktlen < ETH_ZLEN) { + /* Cannot occur until ZC support */ +- if(skb_linearize(skb, GFP_ATOMIC)) +- return 0; + pktlen = ETH_ZLEN; + memcpy(tdinfo->buf, skb->data, skb->len); + memset(tdinfo->buf + skb->len, 0, ETH_ZLEN - skb->len); +@@ -1939,7 +1944,6 @@ + int nfrags = skb_shinfo(skb)->nr_frags; + tdinfo->skb = skb; + if (nfrags > 6) { +- skb_linearize(skb, GFP_ATOMIC); + memcpy(tdinfo->buf, skb->data, skb->len); + tdinfo->skb_dma[0] = tdinfo->buf_dma; + td_ptr->tdesc0.pktsize = +Index: tmp-xxx/drivers/net/wireless/orinoco.c +=================================================================== +--- tmp-xxx.orig/drivers/net/wireless/orinoco.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/drivers/net/wireless/orinoco.c 2006-11-27 10:52:42.000000000 +0000 +@@ -1835,7 +1835,9 @@ + /* Set promiscuity / multicast*/ + priv->promiscuous = 0; + priv->mc_count = 0; +- __orinoco_set_multicast_list(dev); /* FIXME: what about the xmit_lock */ ++ ++ /* FIXME: what about netif_tx_lock */ ++ __orinoco_set_multicast_list(dev); + + return 0; + } +Index: tmp-xxx/drivers/s390/net/qeth_eddp.c +=================================================================== +--- tmp-xxx.orig/drivers/s390/net/qeth_eddp.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/drivers/s390/net/qeth_eddp.c 2006-11-27 10:52:42.000000000 +0000 +@@ -421,7 +421,7 @@ + } + tcph = eddp->skb->h.th; + while (eddp->skb_offset < eddp->skb->len) { +- data_len = min((int)skb_shinfo(eddp->skb)->tso_size, ++ data_len = min((int)skb_shinfo(eddp->skb)->gso_size, + (int)(eddp->skb->len - eddp->skb_offset)); + /* prepare qdio hdr */ + if (eddp->qh.hdr.l2.id == QETH_HEADER_TYPE_LAYER2){ +@@ -516,20 +516,20 @@ + + QETH_DBF_TEXT(trace, 5, "eddpcanp"); + /* can we put multiple skbs in one page? */ +- skbs_per_page = PAGE_SIZE / (skb_shinfo(skb)->tso_size + hdr_len); ++ skbs_per_page = PAGE_SIZE / (skb_shinfo(skb)->gso_size + hdr_len); + if (skbs_per_page > 1){ +- ctx->num_pages = (skb_shinfo(skb)->tso_segs + 1) / ++ ctx->num_pages = (skb_shinfo(skb)->gso_segs + 1) / + skbs_per_page + 1; + ctx->elements_per_skb = 1; + } else { + /* no -> how many elements per skb? */ +- ctx->elements_per_skb = (skb_shinfo(skb)->tso_size + hdr_len + ++ ctx->elements_per_skb = (skb_shinfo(skb)->gso_size + hdr_len + + PAGE_SIZE) >> PAGE_SHIFT; + ctx->num_pages = ctx->elements_per_skb * +- (skb_shinfo(skb)->tso_segs + 1); ++ (skb_shinfo(skb)->gso_segs + 1); + } + ctx->num_elements = ctx->elements_per_skb * +- (skb_shinfo(skb)->tso_segs + 1); ++ (skb_shinfo(skb)->gso_segs + 1); + } + + static inline struct qeth_eddp_context * +Index: tmp-xxx/drivers/s390/net/qeth_main.c +=================================================================== +--- tmp-xxx.orig/drivers/s390/net/qeth_main.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/drivers/s390/net/qeth_main.c 2006-11-27 10:52:42.000000000 +0000 +@@ -4454,7 +4454,7 @@ + queue = card->qdio.out_qs + [qeth_get_priority_queue(card, skb, ipv, cast_type)]; + +- if (skb_shinfo(skb)->tso_size) ++ if (skb_shinfo(skb)->gso_size) + large_send = card->options.large_send; + + /*are we able to do TSO ? If so ,prepare and send it from here */ +@@ -4501,7 +4501,7 @@ + card->stats.tx_packets++; + card->stats.tx_bytes += skb->len; + #ifdef CONFIG_QETH_PERF_STATS +- if (skb_shinfo(skb)->tso_size && ++ if (skb_shinfo(skb)->gso_size && + !(large_send == QETH_LARGE_SEND_NO)) { + card->perf_stats.large_send_bytes += skb->len; + card->perf_stats.large_send_cnt++; +Index: tmp-xxx/drivers/s390/net/qeth_tso.h +=================================================================== +--- tmp-xxx.orig/drivers/s390/net/qeth_tso.h 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/drivers/s390/net/qeth_tso.h 2006-11-27 10:52:42.000000000 +0000 +@@ -51,7 +51,7 @@ + hdr->ext.hdr_version = 1; + hdr->ext.hdr_len = 28; + /*insert non-fix values */ +- hdr->ext.mss = skb_shinfo(skb)->tso_size; ++ hdr->ext.mss = skb_shinfo(skb)->gso_size; + hdr->ext.dg_hdr_len = (__u16)(iph->ihl*4 + tcph->doff*4); + hdr->ext.payload_len = (__u16)(skb->len - hdr->ext.dg_hdr_len - + sizeof(struct qeth_hdr_tso)); +Index: tmp-xxx/include/linux/ethtool.h +=================================================================== +--- tmp-xxx.orig/include/linux/ethtool.h 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/include/linux/ethtool.h 2006-11-27 10:52:42.000000000 +0000 +@@ -408,6 +408,8 @@ + #define ETHTOOL_GPERMADDR 0x00000020 /* Get permanent hardware address */ + #define ETHTOOL_GUFO 0x00000021 /* Get UFO enable (ethtool_value) */ + #define ETHTOOL_SUFO 0x00000022 /* Set UFO enable (ethtool_value) */ ++#define ETHTOOL_GGSO 0x00000023 /* Get GSO enable (ethtool_value) */ ++#define ETHTOOL_SGSO 0x00000024 /* Set GSO enable (ethtool_value) */ + + /* compatibility with older code */ + #define SPARC_ETH_GSET ETHTOOL_GSET +Index: tmp-xxx/include/linux/netdevice.h +=================================================================== +--- tmp-xxx.orig/include/linux/netdevice.h 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/include/linux/netdevice.h 2006-11-27 10:52:42.000000000 +0000 +@@ -230,7 +230,8 @@ + __LINK_STATE_SCHED, + __LINK_STATE_NOCARRIER, + __LINK_STATE_RX_SCHED, +- __LINK_STATE_LINKWATCH_PENDING ++ __LINK_STATE_LINKWATCH_PENDING, ++ __LINK_STATE_QDISC_RUNNING, + }; + + +@@ -306,9 +307,17 @@ + #define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */ + #define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */ + #define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ +-#define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */ ++#define NETIF_F_GSO 2048 /* Enable software GSO. */ + #define NETIF_F_LLTX 4096 /* LockLess TX */ +-#define NETIF_F_UFO 8192 /* Can offload UDP Large Send*/ ++ ++ /* Segmentation offload features */ ++#define NETIF_F_GSO_SHIFT 16 ++#define NETIF_F_TSO (SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT) ++#define NETIF_F_UFO (SKB_GSO_UDPV4 << NETIF_F_GSO_SHIFT) ++#define NETIF_F_GSO_ROBUST (SKB_GSO_DODGY << NETIF_F_GSO_SHIFT) ++ ++#define NETIF_F_GEN_CSUM (NETIF_F_NO_CSUM | NETIF_F_HW_CSUM) ++#define NETIF_F_ALL_CSUM (NETIF_F_IP_CSUM | NETIF_F_GEN_CSUM) + + struct net_device *next_sched; + +@@ -394,6 +403,9 @@ + struct list_head qdisc_list; + unsigned long tx_queue_len; /* Max frames per queue allowed */ + ++ /* Partially transmitted GSO packet. */ ++ struct sk_buff *gso_skb; ++ + /* ingress path synchronizer */ + spinlock_t ingress_lock; + struct Qdisc *qdisc_ingress; +@@ -402,7 +414,7 @@ + * One part is mostly used on xmit path (device) + */ + /* hard_start_xmit synchronizer */ +- spinlock_t xmit_lock ____cacheline_aligned_in_smp; ++ spinlock_t _xmit_lock ____cacheline_aligned_in_smp; + /* cpu id of processor entered to hard_start_xmit or -1, + if nobody entered there. + */ +@@ -527,6 +539,8 @@ + struct net_device *, + struct packet_type *, + struct net_device *); ++ struct sk_buff *(*gso_segment)(struct sk_buff *skb, ++ int features); + void *af_packet_priv; + struct list_head list; + }; +@@ -693,7 +707,8 @@ + extern int dev_set_mtu(struct net_device *, int); + extern int dev_set_mac_address(struct net_device *, + struct sockaddr *); +-extern void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); ++extern int dev_hard_start_xmit(struct sk_buff *skb, ++ struct net_device *dev); + + extern void dev_init(void); + +@@ -900,11 +915,43 @@ + clear_bit(__LINK_STATE_RX_SCHED, &dev->state); + } + ++static inline void netif_tx_lock(struct net_device *dev) ++{ ++ spin_lock(&dev->_xmit_lock); ++ dev->xmit_lock_owner = smp_processor_id(); ++} ++ ++static inline void netif_tx_lock_bh(struct net_device *dev) ++{ ++ spin_lock_bh(&dev->_xmit_lock); ++ dev->xmit_lock_owner = smp_processor_id(); ++} ++ ++static inline int netif_tx_trylock(struct net_device *dev) ++{ ++ int err = spin_trylock(&dev->_xmit_lock); ++ if (!err) ++ dev->xmit_lock_owner = smp_processor_id(); ++ return err; ++} ++ ++static inline void netif_tx_unlock(struct net_device *dev) ++{ ++ dev->xmit_lock_owner = -1; ++ spin_unlock(&dev->_xmit_lock); ++} ++ ++static inline void netif_tx_unlock_bh(struct net_device *dev) ++{ ++ dev->xmit_lock_owner = -1; ++ spin_unlock_bh(&dev->_xmit_lock); ++} ++ + static inline void netif_tx_disable(struct net_device *dev) + { +- spin_lock_bh(&dev->xmit_lock); ++ netif_tx_lock_bh(dev); + netif_stop_queue(dev); +- spin_unlock_bh(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + } + + /* These functions live elsewhere (drivers/net/net_init.c, but related) */ +@@ -932,6 +979,7 @@ + extern int weight_p; + extern int netdev_set_master(struct net_device *dev, struct net_device *master); + extern int skb_checksum_help(struct sk_buff *skb, int inward); ++extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features); + #ifdef CONFIG_BUG + extern void netdev_rx_csum_fault(struct net_device *dev); + #else +@@ -951,6 +999,18 @@ + + extern void linkwatch_run_queue(void); + ++static inline int skb_gso_ok(struct sk_buff *skb, int features) ++{ ++ int feature = skb_shinfo(skb)->gso_size ? ++ skb_shinfo(skb)->gso_type << NETIF_F_GSO_SHIFT : 0; ++ return (features & feature) == feature; ++} ++ ++static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) ++{ ++ return !skb_gso_ok(skb, dev->features); ++} ++ + #endif /* __KERNEL__ */ + + #endif /* _LINUX_DEV_H */ +Index: tmp-xxx/include/linux/skbuff.h +=================================================================== +--- tmp-xxx.orig/include/linux/skbuff.h 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/include/linux/skbuff.h 2006-11-27 10:52:42.000000000 +0000 +@@ -134,9 +134,10 @@ + struct skb_shared_info { + atomic_t dataref; + unsigned short nr_frags; +- unsigned short tso_size; +- unsigned short tso_segs; +- unsigned short ufo_size; ++ unsigned short gso_size; ++ /* Warning: this field is not always filled in (UFO)! */ ++ unsigned short gso_segs; ++ unsigned short gso_type; + unsigned int ip6_frag_id; + struct sk_buff *frag_list; + skb_frag_t frags[MAX_SKB_FRAGS]; +@@ -168,6 +169,14 @@ + SKB_FCLONE_CLONE, + }; + ++enum { ++ SKB_GSO_TCPV4 = 1 << 0, ++ SKB_GSO_UDPV4 = 1 << 1, ++ ++ /* This indicates the skb is from an untrusted source. */ ++ SKB_GSO_DODGY = 1 << 2, ++}; ++ + /** + * struct sk_buff - socket buffer + * @next: Next buffer in list +@@ -1148,18 +1157,34 @@ + return 0; + } + ++static inline int __skb_linearize(struct sk_buff *skb) ++{ ++ return __pskb_pull_tail(skb, skb->data_len) ? 0 : -ENOMEM; ++} ++ + /** + * skb_linearize - convert paged skb to linear one + * @skb: buffer to linarize +- * @gfp: allocation mode + * + * If there is no free memory -ENOMEM is returned, otherwise zero + * is returned and the old skb data released. + */ +-extern int __skb_linearize(struct sk_buff *skb, gfp_t gfp); +-static inline int skb_linearize(struct sk_buff *skb, gfp_t gfp) ++static inline int skb_linearize(struct sk_buff *skb) ++{ ++ return skb_is_nonlinear(skb) ? __skb_linearize(skb) : 0; ++} ++ ++/** ++ * skb_linearize_cow - make sure skb is linear and writable ++ * @skb: buffer to process ++ * ++ * If there is no free memory -ENOMEM is returned, otherwise zero ++ * is returned and the old skb data released. ++ */ ++static inline int skb_linearize_cow(struct sk_buff *skb) + { +- return __skb_linearize(skb, gfp); ++ return skb_is_nonlinear(skb) || skb_cloned(skb) ? ++ __skb_linearize(skb) : 0; + } + + /** +@@ -1254,6 +1279,7 @@ + struct sk_buff *skb1, const u32 len); + + extern void skb_release_data(struct sk_buff *skb); ++extern struct sk_buff *skb_segment(struct sk_buff *skb, int features); + + static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, + int len, void *buffer) +Index: tmp-xxx/include/net/pkt_sched.h +=================================================================== +--- tmp-xxx.orig/include/net/pkt_sched.h 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/include/net/pkt_sched.h 2006-11-27 10:52:42.000000000 +0000 +@@ -218,12 +218,13 @@ + struct rtattr *tab); + extern void qdisc_put_rtab(struct qdisc_rate_table *tab); + +-extern int qdisc_restart(struct net_device *dev); ++extern void __qdisc_run(struct net_device *dev); + + static inline void qdisc_run(struct net_device *dev) + { +- while (!netif_queue_stopped(dev) && qdisc_restart(dev) < 0) +- /* NOTHING */; ++ if (!netif_queue_stopped(dev) && ++ !test_and_set_bit(__LINK_STATE_QDISC_RUNNING, &dev->state)) ++ __qdisc_run(dev); + } + + extern int tc_classify(struct sk_buff *skb, struct tcf_proto *tp, +Index: tmp-xxx/include/net/protocol.h +=================================================================== +--- tmp-xxx.orig/include/net/protocol.h 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/include/net/protocol.h 2006-11-27 10:52:42.000000000 +0000 +@@ -37,6 +37,8 @@ + struct net_protocol { + int (*handler)(struct sk_buff *skb); + void (*err_handler)(struct sk_buff *skb, u32 info); ++ struct sk_buff *(*gso_segment)(struct sk_buff *skb, ++ int features); + int no_policy; + }; + +Index: tmp-xxx/include/net/sock.h +=================================================================== +--- tmp-xxx.orig/include/net/sock.h 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/include/net/sock.h 2006-11-27 10:52:42.000000000 +0000 +@@ -1064,9 +1064,13 @@ + { + __sk_dst_set(sk, dst); + sk->sk_route_caps = dst->dev->features; ++ if (sk->sk_route_caps & NETIF_F_GSO) ++ sk->sk_route_caps |= NETIF_F_TSO; + if (sk->sk_route_caps & NETIF_F_TSO) { + if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len) + sk->sk_route_caps &= ~NETIF_F_TSO; ++ else ++ sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; + } + } + +Index: tmp-xxx/include/net/tcp.h +=================================================================== +--- tmp-xxx.orig/include/net/tcp.h 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/include/net/tcp.h 2006-11-27 10:52:42.000000000 +0000 +@@ -552,13 +552,13 @@ + */ + static inline int tcp_skb_pcount(const struct sk_buff *skb) + { +- return skb_shinfo(skb)->tso_segs; ++ return skb_shinfo(skb)->gso_segs; + } + + /* This is valid iff tcp_skb_pcount() > 1. */ + static inline int tcp_skb_mss(const struct sk_buff *skb) + { +- return skb_shinfo(skb)->tso_size; ++ return skb_shinfo(skb)->gso_size; + } + + static inline void tcp_dec_pcount_approx(__u32 *count, +@@ -1063,6 +1063,8 @@ + + extern int tcp_v4_destroy_sock(struct sock *sk); + ++extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features); ++ + #ifdef CONFIG_PROC_FS + extern int tcp4_proc_init(void); + extern void tcp4_proc_exit(void); +Index: tmp-xxx/net/atm/clip.c +=================================================================== +--- tmp-xxx.orig/net/atm/clip.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/net/atm/clip.c 2006-11-27 10:52:42.000000000 +0000 +@@ -101,7 +101,7 @@ + printk(KERN_CRIT "!clip_vcc->entry (clip_vcc %p)\n",clip_vcc); + return; + } +- spin_lock_bh(&entry->neigh->dev->xmit_lock); /* block clip_start_xmit() */ ++ netif_tx_lock_bh(entry->neigh->dev); /* block clip_start_xmit() */ + entry->neigh->used = jiffies; + for (walk = &entry->vccs; *walk; walk = &(*walk)->next) + if (*walk == clip_vcc) { +@@ -125,7 +125,7 @@ + printk(KERN_CRIT "ATMARP: unlink_clip_vcc failed (entry %p, vcc " + "0x%p)\n",entry,clip_vcc); + out: +- spin_unlock_bh(&entry->neigh->dev->xmit_lock); ++ netif_tx_unlock_bh(entry->neigh->dev); + } + + /* The neighbour entry n->lock is held. */ +Index: tmp-xxx/net/bridge/br_device.c +=================================================================== +--- tmp-xxx.orig/net/bridge/br_device.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/net/bridge/br_device.c 2006-11-27 10:52:42.000000000 +0000 +@@ -146,9 +146,9 @@ + struct net_bridge *br = netdev_priv(dev); + + if (data) +- br->feature_mask |= NETIF_F_IP_CSUM; ++ br->feature_mask |= NETIF_F_NO_CSUM; + else +- br->feature_mask &= ~NETIF_F_IP_CSUM; ++ br->feature_mask &= ~NETIF_F_ALL_CSUM; + + br_features_recompute(br); + return 0; +@@ -185,6 +185,6 @@ + dev->set_mac_address = br_set_mac_address; + dev->priv_flags = IFF_EBRIDGE; + +- dev->features = NETIF_F_SG | NETIF_F_FRAGLIST +- | NETIF_F_HIGHDMA | NETIF_F_TSO | NETIF_F_IP_CSUM; ++ dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | ++ NETIF_F_TSO | NETIF_F_NO_CSUM | NETIF_F_GSO_ROBUST; + } +Index: tmp-xxx/net/bridge/br_forward.c +=================================================================== +--- tmp-xxx.orig/net/bridge/br_forward.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/net/bridge/br_forward.c 2006-11-27 10:52:42.000000000 +0000 +@@ -32,7 +32,7 @@ + int br_dev_queue_push_xmit(struct sk_buff *skb) + { + /* drop mtu oversized packets except tso */ +- if (skb->len > skb->dev->mtu && !skb_shinfo(skb)->tso_size) ++ if (skb->len > skb->dev->mtu && !skb_shinfo(skb)->gso_size) + kfree_skb(skb); + else { + #ifdef CONFIG_BRIDGE_NETFILTER +Index: tmp-xxx/net/bridge/br_if.c +=================================================================== +--- tmp-xxx.orig/net/bridge/br_if.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/net/bridge/br_if.c 2006-11-27 10:52:42.000000000 +0000 +@@ -385,17 +385,28 @@ + struct net_bridge_port *p; + unsigned long features, checksum; + +- features = br->feature_mask &~ NETIF_F_IP_CSUM; +- checksum = br->feature_mask & NETIF_F_IP_CSUM; ++ checksum = br->feature_mask & NETIF_F_ALL_CSUM ? NETIF_F_NO_CSUM : 0; ++ features = br->feature_mask & ~NETIF_F_ALL_CSUM; + + list_for_each_entry(p, &br->port_list, list) { +- if (!(p->dev->features +- & (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM))) ++ unsigned long feature = p->dev->features; ++ ++ if (checksum & NETIF_F_NO_CSUM && !(feature & NETIF_F_NO_CSUM)) ++ checksum ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM; ++ if (checksum & NETIF_F_HW_CSUM && !(feature & NETIF_F_HW_CSUM)) ++ checksum ^= NETIF_F_HW_CSUM | NETIF_F_IP_CSUM; ++ if (!(feature & NETIF_F_IP_CSUM)) + checksum = 0; +- features &= p->dev->features; ++ ++ if (feature & NETIF_F_GSO) ++ feature |= NETIF_F_TSO; ++ feature |= NETIF_F_GSO; ++ ++ features &= feature; + } + +- br->dev->features = features | checksum | NETIF_F_LLTX; ++ br->dev->features = features | checksum | NETIF_F_LLTX | ++ NETIF_F_GSO_ROBUST; + } + + /* called with RTNL */ +Index: tmp-xxx/net/bridge/br_netfilter.c +=================================================================== +--- tmp-xxx.orig/net/bridge/br_netfilter.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/net/bridge/br_netfilter.c 2006-11-27 10:52:42.000000000 +0000 +@@ -743,7 +743,7 @@ + { + if (skb->protocol == htons(ETH_P_IP) && + skb->len > skb->dev->mtu && +- !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size)) ++ !skb_shinfo(skb)->gso_size) + return ip_fragment(skb, br_dev_queue_push_xmit); + else + return br_dev_queue_push_xmit(skb); +Index: tmp-xxx/net/core/dev.c +=================================================================== +--- tmp-xxx.orig/net/core/dev.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/net/core/dev.c 2006-11-27 10:57:31.000000000 +0000 +@@ -115,6 +115,7 @@ + #include <net/iw_handler.h> + #endif /* CONFIG_NET_RADIO */ + #include <asm/current.h> ++#include <linux/err.h> + + /* + * The list of packet types we will receive (as opposed to discard) +@@ -1032,7 +1033,7 @@ + * taps currently in use. + */ + +-void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) ++static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) + { + struct packet_type *ptype; + +@@ -1106,6 +1107,45 @@ + return ret; + } + ++/** ++ * skb_gso_segment - Perform segmentation on skb. ++ * @skb: buffer to segment ++ * @features: features for the output path (see dev->features) ++ * ++ * This function segments the given skb and returns a list of segments. ++ * ++ * It may return NULL if the skb requires no segmentation. This is ++ * only possible when GSO is used for verifying header integrity. ++ */ ++struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) ++{ ++ struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); ++ struct packet_type *ptype; ++ int type = skb->protocol; ++ ++ BUG_ON(skb_shinfo(skb)->frag_list); ++ BUG_ON(skb->ip_summed != CHECKSUM_HW); ++ ++ skb->mac.raw = skb->data; ++ skb->mac_len = skb->nh.raw - skb->data; ++ __skb_pull(skb, skb->mac_len); ++ ++ rcu_read_lock(); ++ list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) { ++ if (ptype->type == type && !ptype->dev && ptype->gso_segment) { ++ segs = ptype->gso_segment(skb, features); ++ break; ++ } ++ } ++ rcu_read_unlock(); ++ ++ __skb_push(skb, skb->data - skb->mac.raw); ++ ++ return segs; ++} ++ ++EXPORT_SYMBOL(skb_gso_segment); ++ + /* Take action when hardware reception checksum errors are detected. */ + #ifdef CONFIG_BUG + void netdev_rx_csum_fault(struct net_device *dev) +@@ -1142,76 +1182,107 @@ + #define illegal_highdma(dev, skb) (0) + #endif + +-/* Keep head the same: replace data */ +-int __skb_linearize(struct sk_buff *skb, gfp_t gfp_mask) ++struct dev_gso_cb { ++ void (*destructor)(struct sk_buff *skb); ++}; ++ ++#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb) ++ ++static void dev_gso_skb_destructor(struct sk_buff *skb) + { +- unsigned int size; +- u8 *data; +- long offset; +- struct skb_shared_info *ninfo; +- int headerlen = skb->data - skb->head; +- int expand = (skb->tail + skb->data_len) - skb->end; +- +- if (skb_shared(skb)) +- BUG(); +- +- if (expand <= 0) +- expand = 0; +- +- size = skb->end - skb->head + expand; +- size = SKB_DATA_ALIGN(size); +- data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); +- if (!data) +- return -ENOMEM; +- +- /* Copy entire thing */ +- if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len)) +- BUG(); +- +- /* Set up shinfo */ +- ninfo = (struct skb_shared_info*)(data + size); +- atomic_set(&ninfo->dataref, 1); +- ninfo->tso_size = skb_shinfo(skb)->tso_size; +- ninfo->tso_segs = skb_shinfo(skb)->tso_segs; +- ninfo->ufo_size = skb_shinfo(skb)->ufo_size; +- ninfo->nr_frags = 0; +- ninfo->frag_list = NULL; +- +- /* Offset between the two in bytes */ +- offset = data - skb->head; +- +- /* Free old data. */ +- skb_release_data(skb); +- +- skb->head = data; +- skb->end = data + size; +- +- /* Set up new pointers */ +- skb->h.raw += offset; +- skb->nh.raw += offset; +- skb->mac.raw += offset; +- skb->tail += offset; +- skb->data += offset; ++ struct dev_gso_cb *cb; + +- /* We are no longer a clone, even if we were. */ +- skb->cloned = 0; ++ do { ++ struct sk_buff *nskb = skb->next; + +- skb->tail += skb->data_len; +- skb->data_len = 0; ++ skb->next = nskb->next; ++ nskb->next = NULL; ++ kfree_skb(nskb); ++ } while (skb->next); ++ ++ cb = DEV_GSO_CB(skb); ++ if (cb->destructor) ++ cb->destructor(skb); ++} ++ ++/** ++ * dev_gso_segment - Perform emulated hardware segmentation on skb. ++ * @skb: buffer to segment ++ * ++ * This function segments the given skb and stores the list of segments ++ * in skb->next. ++ */ ++static int dev_gso_segment(struct sk_buff *skb) ++{ ++ struct net_device *dev = skb->dev; ++ struct sk_buff *segs; ++ int features = dev->features & ~(illegal_highdma(dev, skb) ? ++ NETIF_F_SG : 0); ++ ++ segs = skb_gso_segment(skb, features); ++ ++ /* Verifying header integrity only. */ ++ if (!segs) ++ return 0; ++ ++ if (unlikely(IS_ERR(segs))) ++ return PTR_ERR(segs); ++ ++ skb->next = segs; ++ DEV_GSO_CB(skb)->destructor = skb->destructor; ++ skb->destructor = dev_gso_skb_destructor; ++ return 0; ++} ++ ++int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) ++{ ++ if (likely(!skb->next)) { ++ if (netdev_nit) ++ dev_queue_xmit_nit(skb, dev); ++ ++ if (netif_needs_gso(dev, skb)) { ++ if (unlikely(dev_gso_segment(skb))) ++ goto out_kfree_skb; ++ if (skb->next) ++ goto gso; ++ } ++ ++ return dev->hard_start_xmit(skb, dev); ++ } ++ ++gso: ++ do { ++ struct sk_buff *nskb = skb->next; ++ int rc; ++ ++ skb->next = nskb->next; ++ nskb->next = NULL; ++ rc = dev->hard_start_xmit(nskb, dev); ++ if (unlikely(rc)) { ++ nskb->next = skb->next; ++ skb->next = nskb; ++ return rc; ++ } ++ if (unlikely(netif_queue_stopped(dev) && skb->next)) ++ return NETDEV_TX_BUSY; ++ } while (skb->next); ++ ++ skb->destructor = DEV_GSO_CB(skb)->destructor; ++ ++out_kfree_skb: ++ kfree_skb(skb); + return 0; + } + + #define HARD_TX_LOCK(dev, cpu) { \ + if ((dev->features & NETIF_F_LLTX) == 0) { \ +- spin_lock(&dev->xmit_lock); \ +- dev->xmit_lock_owner = cpu; \ ++ netif_tx_lock(dev); \ + } \ + } + + #define HARD_TX_UNLOCK(dev) { \ + if ((dev->features & NETIF_F_LLTX) == 0) { \ +- dev->xmit_lock_owner = -1; \ +- spin_unlock(&dev->xmit_lock); \ ++ netif_tx_unlock(dev); \ + } \ + } + +@@ -1247,9 +1318,13 @@ + struct Qdisc *q; + int rc = -ENOMEM; + ++ /* GSO will handle the following emulations directly. */ ++ if (netif_needs_gso(dev, skb)) ++ goto gso; ++ + if (skb_shinfo(skb)->frag_list && + !(dev->features & NETIF_F_FRAGLIST) && +- __skb_linearize(skb, GFP_ATOMIC)) ++ __skb_linearize(skb)) + goto out_kfree_skb; + + /* Fragmented skb is linearized if device does not support SG, +@@ -1258,25 +1333,26 @@ + */ + if (skb_shinfo(skb)->nr_frags && + (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) && +- __skb_linearize(skb, GFP_ATOMIC)) ++ __skb_linearize(skb)) + goto out_kfree_skb; + + /* If packet is not checksummed and device does not support + * checksumming for this protocol, complete checksumming here. + */ + if (skb->ip_summed == CHECKSUM_HW && +- (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) && ++ (!(dev->features & NETIF_F_GEN_CSUM) && + (!(dev->features & NETIF_F_IP_CSUM) || + skb->protocol != htons(ETH_P_IP)))) + if (skb_checksum_help(skb, 0)) + goto out_kfree_skb; + ++gso: + spin_lock_prefetch(&dev->queue_lock); + + /* Disable soft irqs for various locks below. Also + * stops preemption for RCU. + */ +- local_bh_disable(); ++ rcu_read_lock_bh(); + + /* Updates of qdisc are serialized by queue_lock. + * The struct Qdisc which is pointed to by qdisc is now a +@@ -1310,8 +1386,8 @@ + /* The device has no queue. Common case for software devices: + loopback, all the sorts of tunnels... + +- Really, it is unlikely that xmit_lock protection is necessary here. +- (f.e. loopback and IP tunnels are clean ignoring statistics ++ Really, it is unlikely that netif_tx_lock protection is necessary ++ here. (f.e. loopback and IP tunnels are clean ignoring statistics + counters.) + However, it is possible, that they rely on protection + made by us here. +@@ -1327,11 +1403,8 @@ + HARD_TX_LOCK(dev, cpu); + + if (!netif_queue_stopped(dev)) { +- if (netdev_nit) +- dev_queue_xmit_nit(skb, dev); +- + rc = 0; +- if (!dev->hard_start_xmit(skb, dev)) { ++ if (!dev_hard_start_xmit(skb, dev)) { + HARD_TX_UNLOCK(dev); + goto out; + } +@@ -1350,13 +1423,13 @@ + } + + rc = -ENETDOWN; +- local_bh_enable(); ++ rcu_read_unlock_bh(); + + out_kfree_skb: + kfree_skb(skb); + return rc; + out: +- local_bh_enable(); ++ rcu_read_unlock_bh(); + return rc; + } + +@@ -2671,7 +2744,7 @@ + BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); + + spin_lock_init(&dev->queue_lock); +- spin_lock_init(&dev->xmit_lock); ++ spin_lock_init(&dev->_xmit_lock); + dev->xmit_lock_owner = -1; + #ifdef CONFIG_NET_CLS_ACT + spin_lock_init(&dev->ingress_lock); +@@ -2715,9 +2788,7 @@ + + /* Fix illegal SG+CSUM combinations. */ + if ((dev->features & NETIF_F_SG) && +- !(dev->features & (NETIF_F_IP_CSUM | +- NETIF_F_NO_CSUM | +- NETIF_F_HW_CSUM))) { ++ !(dev->features & NETIF_F_ALL_CSUM)) { + printk("%s: Dropping NETIF_F_SG since no checksum feature.\n", + dev->name); + dev->features &= ~NETIF_F_SG; +@@ -3269,7 +3340,6 @@ + EXPORT_SYMBOL(__dev_get_by_index); + EXPORT_SYMBOL(__dev_get_by_name); + EXPORT_SYMBOL(__dev_remove_pack); +-EXPORT_SYMBOL(__skb_linearize); + EXPORT_SYMBOL(dev_valid_name); + EXPORT_SYMBOL(dev_add_pack); + EXPORT_SYMBOL(dev_alloc_name); +Index: tmp-xxx/net/core/dev_mcast.c +=================================================================== +--- tmp-xxx.orig/net/core/dev_mcast.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/net/core/dev_mcast.c 2006-11-27 10:52:42.000000000 +0000 +@@ -62,7 +62,7 @@ + * Device mc lists are changed by bh at least if IPv6 is enabled, + * so that it must be bh protected. + * +- * We block accesses to device mc filters with dev->xmit_lock. ++ * We block accesses to device mc filters with netif_tx_lock. + */ + + /* +@@ -93,9 +93,9 @@ + + void dev_mc_upload(struct net_device *dev) + { +- spin_lock_bh(&dev->xmit_lock); ++ netif_tx_lock_bh(dev); + __dev_mc_upload(dev); +- spin_unlock_bh(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + } + + /* +@@ -107,7 +107,7 @@ + int err = 0; + struct dev_mc_list *dmi, **dmip; + +- spin_lock_bh(&dev->xmit_lock); ++ netif_tx_lock_bh(dev); + + for (dmip = &dev->mc_list; (dmi = *dmip) != NULL; dmip = &dmi->next) { + /* +@@ -139,13 +139,13 @@ + */ + __dev_mc_upload(dev); + +- spin_unlock_bh(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + return 0; + } + } + err = -ENOENT; + done: +- spin_unlock_bh(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + return err; + } + +@@ -160,7 +160,7 @@ + + dmi1 = kmalloc(sizeof(*dmi), GFP_ATOMIC); + +- spin_lock_bh(&dev->xmit_lock); ++ netif_tx_lock_bh(dev); + for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) { + if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 && + dmi->dmi_addrlen == alen) { +@@ -176,7 +176,7 @@ + } + + if ((dmi = dmi1) == NULL) { +- spin_unlock_bh(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + return -ENOMEM; + } + memcpy(dmi->dmi_addr, addr, alen); +@@ -189,11 +189,11 @@ + + __dev_mc_upload(dev); + +- spin_unlock_bh(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + return 0; + + done: +- spin_unlock_bh(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + kfree(dmi1); + return err; + } +@@ -204,7 +204,7 @@ + + void dev_mc_discard(struct net_device *dev) + { +- spin_lock_bh(&dev->xmit_lock); ++ netif_tx_lock_bh(dev); + + while (dev->mc_list != NULL) { + struct dev_mc_list *tmp = dev->mc_list; +@@ -215,7 +215,7 @@ + } + dev->mc_count = 0; + +- spin_unlock_bh(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + } + + #ifdef CONFIG_PROC_FS +@@ -250,7 +250,7 @@ + struct dev_mc_list *m; + struct net_device *dev = v; + +- spin_lock_bh(&dev->xmit_lock); ++ netif_tx_lock_bh(dev); + for (m = dev->mc_list; m; m = m->next) { + int i; + +@@ -262,7 +262,7 @@ + + seq_putc(seq, '\n'); + } +- spin_unlock_bh(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + return 0; + } + +Index: tmp-xxx/net/core/ethtool.c +=================================================================== +--- tmp-xxx.orig/net/core/ethtool.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/net/core/ethtool.c 2006-11-27 10:52:42.000000000 +0000 +@@ -30,7 +30,7 @@ + + u32 ethtool_op_get_tx_csum(struct net_device *dev) + { +- return (dev->features & (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM)) != 0; ++ return (dev->features & NETIF_F_ALL_CSUM) != 0; + } + + int ethtool_op_set_tx_csum(struct net_device *dev, u32 data) +@@ -551,9 +551,7 @@ + return -EFAULT; + + if (edata.data && +- !(dev->features & (NETIF_F_IP_CSUM | +- NETIF_F_NO_CSUM | +- NETIF_F_HW_CSUM))) ++ !(dev->features & NETIF_F_ALL_CSUM)) + return -EINVAL; + + return __ethtool_set_sg(dev, edata.data); +@@ -561,7 +559,7 @@ + + static int ethtool_get_tso(struct net_device *dev, char __user *useraddr) + { +- struct ethtool_value edata = { ETHTOOL_GTSO }; ++ struct ethtool_value edata = { ETHTOOL_GUFO }; + + if (!dev->ethtool_ops->get_tso) + return -EOPNOTSUPP; +@@ -616,6 +614,29 @@ + return dev->ethtool_ops->set_ufo(dev, edata.data); + } + ++static int ethtool_get_gso(struct net_device *dev, char __user *useraddr) ++{ ++ struct ethtool_value edata = { ETHTOOL_GGSO }; ++ ++ edata.data = dev->features & NETIF_F_GSO; ++ if (copy_to_user(useraddr, &edata, sizeof(edata))) ++ return -EFAULT; ++ return 0; ++} ++ ++static int ethtool_set_gso(struct net_device *dev, char __user *useraddr) ++{ ++ struct ethtool_value edata; ++ ++ if (copy_from_user(&edata, useraddr, sizeof(edata))) ++ return -EFAULT; ++ if (edata.data) ++ dev->features |= NETIF_F_GSO; ++ else ++ dev->features &= ~NETIF_F_GSO; ++ return 0; ++} ++ + static int ethtool_self_test(struct net_device *dev, char __user *useraddr) + { + struct ethtool_test test; +@@ -907,6 +928,12 @@ + case ETHTOOL_SUFO: + rc = ethtool_set_ufo(dev, useraddr); + break; ++ case ETHTOOL_GGSO: ++ rc = ethtool_get_gso(dev, useraddr); ++ break; ++ case ETHTOOL_SGSO: ++ rc = ethtool_set_gso(dev, useraddr); ++ break; + default: + rc = -EOPNOTSUPP; + } +Index: tmp-xxx/net/core/netpoll.c +=================================================================== +--- tmp-xxx.orig/net/core/netpoll.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/net/core/netpoll.c 2006-11-27 10:52:42.000000000 +0000 +@@ -273,24 +273,21 @@ + + do { + npinfo->tries--; +- spin_lock(&np->dev->xmit_lock); +- np->dev->xmit_lock_owner = smp_processor_id(); ++ netif_tx_lock(np->dev); + + /* + * network drivers do not expect to be called if the queue is + * stopped. + */ + if (netif_queue_stopped(np->dev)) { +- np->dev->xmit_lock_owner = -1; +- spin_unlock(&np->dev->xmit_lock); ++ netif_tx_unlock(np->dev); + netpoll_poll(np); + udelay(50); + continue; + } + + status = np->dev->hard_start_xmit(skb, np->dev); +- np->dev->xmit_lock_owner = -1; +- spin_unlock(&np->dev->xmit_lock); ++ netif_tx_unlock(np->dev); + + /* success */ + if(!status) { +Index: tmp-xxx/net/core/pktgen.c +=================================================================== +--- tmp-xxx.orig/net/core/pktgen.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/net/core/pktgen.c 2006-11-27 10:52:42.000000000 +0000 +@@ -2586,7 +2586,7 @@ + } + } + +- spin_lock_bh(&odev->xmit_lock); ++ netif_tx_lock_bh(odev); + if (!netif_queue_stopped(odev)) { + + atomic_inc(&(pkt_dev->skb->users)); +@@ -2631,7 +2631,7 @@ + pkt_dev->next_tx_ns = 0; + } + +- spin_unlock_bh(&odev->xmit_lock); ++ netif_tx_unlock_bh(odev); + + /* If pkt_dev->count is zero, then run forever */ + if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) { +Index: tmp-xxx/net/core/skbuff.c +=================================================================== +--- tmp-xxx.orig/net/core/skbuff.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/net/core/skbuff.c 2006-11-27 10:58:31.000000000 +0000 +@@ -164,9 +164,9 @@ + shinfo = skb_shinfo(skb); + atomic_set(&shinfo->dataref, 1); + shinfo->nr_frags = 0; +- shinfo->tso_size = 0; +- shinfo->tso_segs = 0; +- shinfo->ufo_size = 0; ++ shinfo->gso_size = 0; ++ shinfo->gso_segs = 0; ++ shinfo->gso_type = 0; + shinfo->ip6_frag_id = 0; + shinfo->frag_list = NULL; + +@@ -230,9 +230,9 @@ + + atomic_set(&(skb_shinfo(skb)->dataref), 1); + skb_shinfo(skb)->nr_frags = 0; +- skb_shinfo(skb)->tso_size = 0; +- skb_shinfo(skb)->tso_segs = 0; +- skb_shinfo(skb)->ufo_size = 0; ++ skb_shinfo(skb)->gso_size = 0; ++ skb_shinfo(skb)->gso_segs = 0; ++ skb_shinfo(skb)->gso_type = 0; + skb_shinfo(skb)->frag_list = NULL; + out: + return skb; +@@ -507,9 +507,9 @@ + new->tc_index = old->tc_index; + #endif + atomic_set(&new->users, 1); +- skb_shinfo(new)->tso_size = skb_shinfo(old)->tso_size; +- skb_shinfo(new)->tso_segs = skb_shinfo(old)->tso_segs; +- skb_shinfo(new)->ufo_size = skb_shinfo(old)->ufo_size; ++ skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; ++ skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; ++ skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; + } + + /** +@@ -1822,6 +1822,133 @@ + return 0; + } + ++/** ++ * skb_segment - Perform protocol segmentation on skb. ++ * @skb: buffer to segment ++ * @features: features for the output path (see dev->features) ++ * ++ * This function performs segmentation on the given skb. It returns ++ * the segment at the given position. It returns NULL if there are ++ * no more segments to generate, or when an error is encountered. ++ */ ++struct sk_buff *skb_segment(struct sk_buff *skb, int features) ++{ ++ struct sk_buff *segs = NULL; ++ struct sk_buff *tail = NULL; ++ unsigned int mss = skb_shinfo(skb)->gso_size; ++ unsigned int doffset = skb->data - skb->mac.raw; ++ unsigned int offset = doffset; ++ unsigned int headroom; ++ unsigned int len; ++ int sg = features & NETIF_F_SG; ++ int nfrags = skb_shinfo(skb)->nr_frags; ++ int err = -ENOMEM; ++ int i = 0; ++ int pos; ++ ++ __skb_push(skb, doffset); ++ headroom = skb_headroom(skb); ++ pos = skb_headlen(skb); ++ ++ do { ++ struct sk_buff *nskb; ++ skb_frag_t *frag; ++ int hsize, nsize; ++ int k; ++ int size; ++ ++ len = skb->len - offset; ++ if (len > mss) ++ len = mss; ++ ++ hsize = skb_headlen(skb) - offset; ++ if (hsize < 0) ++ hsize = 0; ++ nsize = hsize + doffset; ++ if (nsize > len + doffset || !sg) ++ nsize = len + doffset; ++ ++ nskb = alloc_skb(nsize + headroom, GFP_ATOMIC); ++ if (unlikely(!nskb)) ++ goto err; ++ ++ if (segs) ++ tail->next = nskb; ++ else ++ segs = nskb; ++ tail = nskb; ++ ++ nskb->dev = skb->dev; ++ nskb->priority = skb->priority; ++ nskb->protocol = skb->protocol; ++ nskb->dst = dst_clone(skb->dst); ++ memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); ++ nskb->pkt_type = skb->pkt_type; ++ nskb->mac_len = skb->mac_len; ++ ++ skb_reserve(nskb, headroom); ++ nskb->mac.raw = nskb->data; ++ nskb->nh.raw = nskb->data + skb->mac_len; ++ nskb->h.raw = nskb->nh.raw + (skb->h.raw - skb->nh.raw); ++ memcpy(skb_put(nskb, doffset), skb->data, doffset); ++ ++ if (!sg) { ++ nskb->csum = skb_copy_and_csum_bits(skb, offset, ++ skb_put(nskb, len), ++ len, 0); ++ continue; ++ } ++ ++ frag = skb_shinfo(nskb)->frags; ++ k = 0; ++ ++ nskb->ip_summed = CHECKSUM_HW; ++ nskb->csum = skb->csum; ++ memcpy(skb_put(nskb, hsize), skb->data + offset, hsize); ++ ++ while (pos < offset + len) { ++ BUG_ON(i >= nfrags); ++ ++ *frag = skb_shinfo(skb)->frags[i]; ++ get_page(frag->page); ++ size = frag->size; ++ ++ if (pos < offset) { ++ frag->page_offset += offset - pos; ++ frag->size -= offset - pos; ++ } ++ ++ k++; ++ ++ if (pos + size <= offset + len) { ++ i++; ++ pos += size; ++ } else { ++ frag->size -= pos + size - (offset + len); ++ break; ++ } ++ ++ frag++; ++ } ++ ++ skb_shinfo(nskb)->nr_frags = k; ++ nskb->data_len = len - hsize; ++ nskb->len += nskb->data_len; ++ nskb->truesize += nskb->data_len; ++ } while ((offset += len) < skb->len); ++ ++ return segs; ++ ++err: ++ while ((skb = segs)) { ++ segs = skb->next; ++ kfree(skb); ++ } ++ return ERR_PTR(err); ++} ++ ++EXPORT_SYMBOL_GPL(skb_segment); ++ + void __init skb_init(void) + { + skbuff_head_cache = kmem_cache_create("skbuff_head_cache", +Index: tmp-xxx/net/decnet/dn_nsp_in.c +=================================================================== +--- tmp-xxx.orig/net/decnet/dn_nsp_in.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/net/decnet/dn_nsp_in.c 2006-11-27 10:52:42.000000000 +0000 +@@ -801,8 +801,7 @@ + * We linearize everything except data segments here. + */ + if (cb->nsp_flags & ~0x60) { +- if (unlikely(skb_is_nonlinear(skb)) && +- skb_linearize(skb, GFP_ATOMIC) != 0) ++ if (unlikely(skb_linearize(skb))) + goto free_out; + } + +Index: tmp-xxx/net/decnet/dn_route.c +=================================================================== +--- tmp-xxx.orig/net/decnet/dn_route.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/net/decnet/dn_route.c 2006-11-27 10:52:42.000000000 +0000 +@@ -629,8 +629,7 @@ + padlen); + + if (flags & DN_RT_PKT_CNTL) { +- if (unlikely(skb_is_nonlinear(skb)) && +- skb_linearize(skb, GFP_ATOMIC) != 0) ++ if (unlikely(skb_linearize(skb))) + goto dump_it; + + switch(flags & DN_RT_CNTL_MSK) { +Index: tmp-xxx/net/ipv4/af_inet.c +=================================================================== +--- tmp-xxx.orig/net/ipv4/af_inet.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/net/ipv4/af_inet.c 2006-11-27 10:52:42.000000000 +0000 +@@ -68,6 +68,7 @@ + */ + + #include <linux/config.h> ++#include <linux/err.h> + #include <linux/errno.h> + #include <linux/types.h> + #include <linux/socket.h> +@@ -1084,6 +1085,54 @@ + + EXPORT_SYMBOL(inet_sk_rebuild_header); + ++static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) ++{ ++ struct sk_buff *segs = ERR_PTR(-EINVAL); ++ struct iphdr *iph; ++ struct net_protocol *ops; ++ int proto; ++ int ihl; ++ int id; ++ ++ if (!pskb_may_pull(skb, sizeof(*iph))) ++ goto out; ++ ++ iph = skb->nh.iph; ++ ihl = iph->ihl * 4; ++ if (ihl < sizeof(*iph)) ++ goto out; ++ ++ if (!pskb_may_pull(skb, ihl)) ++ goto out; ++ ++ skb->h.raw = __skb_pull(skb, ihl); ++ iph = skb->nh.iph; ++ id = ntohs(iph->id); ++ proto = iph->protocol & (MAX_INET_PROTOS - 1); ++ segs = ERR_PTR(-EPROTONOSUPPORT); ++ ++ rcu_read_lock(); ++ ops = rcu_dereference(inet_protos[proto]); ++ if (ops && ops->gso_segment) ++ segs = ops->gso_segment(skb, features); ++ rcu_read_unlock(); ++ ++ if (!segs || unlikely(IS_ERR(segs))) ++ goto out; ++ ++ skb = segs; ++ do { ++ iph = skb->nh.iph; ++ iph->id = htons(id++); ++ iph->tot_len = htons(skb->len - skb->mac_len); ++ iph->check = 0; ++ iph->check = ip_fast_csum(skb->nh.raw, iph->ihl); ++ } while ((skb = skb->next)); ++ ++out: ++ return segs; ++} ++ + #ifdef CONFIG_IP_MULTICAST + static struct net_protocol igmp_protocol = { + .handler = igmp_rcv, +@@ -1093,6 +1142,7 @@ + static struct net_protocol tcp_protocol = { + .handler = tcp_v4_rcv, + .err_handler = tcp_v4_err, ++ .gso_segment = tcp_tso_segment, + .no_policy = 1, + }; + +@@ -1138,6 +1188,7 @@ + static struct packet_type ip_packet_type = { + .type = __constant_htons(ETH_P_IP), + .func = ip_rcv, ++ .gso_segment = inet_gso_segment, + }; + + static int __init inet_init(void) +Index: tmp-xxx/net/ipv4/ip_output.c +=================================================================== +--- tmp-xxx.orig/net/ipv4/ip_output.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/net/ipv4/ip_output.c 2006-11-27 10:52:42.000000000 +0000 +@@ -210,8 +210,7 @@ + return dst_output(skb); + } + #endif +- if (skb->len > dst_mtu(skb->dst) && +- !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size)) ++ if (skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->gso_size) + return ip_fragment(skb, ip_finish_output2); + else + return ip_finish_output2(skb); +@@ -362,7 +361,7 @@ + } + + ip_select_ident_more(iph, &rt->u.dst, sk, +- (skb_shinfo(skb)->tso_segs ?: 1) - 1); ++ (skb_shinfo(skb)->gso_segs ?: 1) - 1); + + /* Add an IP checksum. */ + ip_send_check(iph); +@@ -743,7 +742,8 @@ + (length - transhdrlen)); + if (!err) { + /* specify the length of each IP datagram fragment*/ +- skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen); ++ skb_shinfo(skb)->gso_size = mtu - fragheaderlen; ++ skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4; + __skb_queue_tail(&sk->sk_write_queue, skb); + + return 0; +@@ -839,7 +839,7 @@ + */ + if (transhdrlen && + length + fragheaderlen <= mtu && +- rt->u.dst.dev->features&(NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM) && ++ rt->u.dst.dev->features & NETIF_F_ALL_CSUM && + !exthdrlen) + csummode = CHECKSUM_HW; + +@@ -1086,14 +1086,16 @@ + + inet->cork.length += size; + if ((sk->sk_protocol == IPPROTO_UDP) && +- (rt->u.dst.dev->features & NETIF_F_UFO)) +- skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen); ++ (rt->u.dst.dev->features & NETIF_F_UFO)) { ++ skb_shinfo(skb)->gso_size = mtu - fragheaderlen; ++ skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4; ++ } + + + while (size > 0) { + int i; + +- if (skb_shinfo(skb)->ufo_size) ++ if (skb_shinfo(skb)->gso_size) + len = size; + else { + +Index: tmp-xxx/net/ipv4/ipcomp.c +=================================================================== +--- tmp-xxx.orig/net/ipv4/ipcomp.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/net/ipv4/ipcomp.c 2006-11-27 10:52:42.000000000 +0000 +@@ -84,7 +84,7 @@ + struct xfrm_decap_state *decap, struct sk_buff *skb) + { + u8 nexthdr; +- int err = 0; ++ int err = -ENOMEM; + struct iphdr *iph; + union { + struct iphdr iph; +@@ -92,11 +92,8 @@ + } tmp_iph; + + +- if ((skb_is_nonlinear(skb) || skb_cloned(skb)) && +- skb_linearize(skb, GFP_ATOMIC) != 0) { +- err = -ENOMEM; ++ if (skb_linearize_cow(skb)) + goto out; +- } + + skb->ip_summed = CHECKSUM_NONE; + +@@ -171,10 +168,8 @@ + goto out_ok; + } + +- if ((skb_is_nonlinear(skb) || skb_cloned(skb)) && +- skb_linearize(skb, GFP_ATOMIC) != 0) { ++ if (skb_linearize_cow(skb)) + goto out_ok; +- } + + err = ipcomp_compress(x, skb); + iph = skb->nh.iph; +Index: tmp-xxx/net/ipv4/tcp.c +=================================================================== +--- tmp-xxx.orig/net/ipv4/tcp.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/net/ipv4/tcp.c 2006-11-27 10:52:42.000000000 +0000 +@@ -257,6 +257,7 @@ + #include <linux/fs.h> + #include <linux/random.h> + #include <linux/bootmem.h> ++#include <linux/err.h> + + #include <net/icmp.h> + #include <net/tcp.h> +@@ -570,7 +571,7 @@ + skb->ip_summed = CHECKSUM_HW; + tp->write_seq += copy; + TCP_SKB_CB(skb)->end_seq += copy; +- skb_shinfo(skb)->tso_segs = 0; ++ skb_shinfo(skb)->gso_segs = 0; + + if (!copied) + TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH; +@@ -621,14 +622,10 @@ + ssize_t res; + struct sock *sk = sock->sk; + +-#define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM) +- + if (!(sk->sk_route_caps & NETIF_F_SG) || +- !(sk->sk_route_caps & TCP_ZC_CSUM_FLAGS)) ++ !(sk->sk_route_caps & NETIF_F_ALL_CSUM)) + return sock_no_sendpage(sock, page, offset, size, flags); + +-#undef TCP_ZC_CSUM_FLAGS +- + lock_sock(sk); + TCP_CHECK_TIMER(sk); + res = do_tcp_sendpages(sk, &page, offset, size, flags); +@@ -725,9 +722,7 @@ + /* + * Check whether we can use HW checksum. + */ +- if (sk->sk_route_caps & +- (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | +- NETIF_F_HW_CSUM)) ++ if (sk->sk_route_caps & NETIF_F_ALL_CSUM) + skb->ip_summed = CHECKSUM_HW; + + skb_entail(sk, tp, skb); +@@ -823,7 +818,7 @@ + + tp->write_seq += copy; + TCP_SKB_CB(skb)->end_seq += copy; +- skb_shinfo(skb)->tso_segs = 0; ++ skb_shinfo(skb)->gso_segs = 0; + + from += copy; + copied += copy; +@@ -2026,6 +2021,71 @@ + } + + ++struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) ++{ ++ struct sk_buff *segs = ERR_PTR(-EINVAL); ++ struct tcphdr *th; ++ unsigned thlen; ++ unsigned int seq; ++ unsigned int delta; ++ unsigned int oldlen; ++ unsigned int len; ++ ++ if (!pskb_may_pull(skb, sizeof(*th))) ++ goto out; ++ ++ th = skb->h.th; ++ thlen = th->doff * 4; ++ if (thlen < sizeof(*th)) ++ goto out; ++ ++ if (!pskb_may_pull(skb, thlen)) ++ goto out; ++ ++ segs = NULL; ++ if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) ++ goto out; ++ ++ oldlen = (u16)~skb->len; ++ __skb_pull(skb, thlen); ++ ++ segs = skb_segment(skb, features); ++ if (IS_ERR(segs)) ++ goto out; ++ ++ len = skb_shinfo(skb)->gso_size; ++ delta = htonl(oldlen + (thlen + len)); ++ ++ skb = segs; ++ th = skb->h.th; ++ seq = ntohl(th->seq); ++ ++ do { ++ th->fin = th->psh = 0; ++ ++ th->check = ~csum_fold(th->check + delta); ++ if (skb->ip_summed != CHECKSUM_HW) ++ th->check = csum_fold(csum_partial(skb->h.raw, thlen, ++ skb->csum)); ++ ++ seq += len; ++ skb = skb->next; ++ th = skb->h.th; ++ ++ th->seq = htonl(seq); ++ th->cwr = 0; ++ } while (skb->next); ++ ++ delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len); ++ th->check = ~csum_fold(th->check + delta); ++ if (skb->ip_summed != CHECKSUM_HW) ++ th->check = csum_fold(csum_partial(skb->h.raw, thlen, ++ skb->csum)); ++ ++out: ++ return segs; ++} ++ + extern void __skb_cb_too_small_for_tcp(int, int); + extern struct tcp_congestion_ops tcp_reno; + +Index: tmp-xxx/net/ipv4/tcp_input.c +=================================================================== +--- tmp-xxx.orig/net/ipv4/tcp_input.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/net/ipv4/tcp_input.c 2006-11-27 10:52:42.000000000 +0000 +@@ -1072,7 +1072,7 @@ + else + pkt_len = (end_seq - + TCP_SKB_CB(skb)->seq); +- if (tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->tso_size)) ++ if (tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->gso_size)) + break; + pcount = tcp_skb_pcount(skb); + } +Index: tmp-xxx/net/ipv4/tcp_output.c +=================================================================== +--- tmp-xxx.orig/net/ipv4/tcp_output.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/net/ipv4/tcp_output.c 2006-11-27 10:52:42.000000000 +0000 +@@ -497,15 +497,17 @@ + /* Avoid the costly divide in the normal + * non-TSO case. + */ +- skb_shinfo(skb)->tso_segs = 1; +- skb_shinfo(skb)->tso_size = 0; ++ skb_shinfo(skb)->gso_segs = 1; ++ skb_shinfo(skb)->gso_size = 0; ++ skb_shinfo(skb)->gso_type = 0; + } else { + unsigned int factor; + + factor = skb->len + (mss_now - 1); + factor /= mss_now; +- skb_shinfo(skb)->tso_segs = factor; +- skb_shinfo(skb)->tso_size = mss_now; ++ skb_shinfo(skb)->gso_segs = factor; ++ skb_shinfo(skb)->gso_size = mss_now; ++ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + } + } + +@@ -850,7 +852,7 @@ + + if (!tso_segs || + (tso_segs > 1 && +- skb_shinfo(skb)->tso_size != mss_now)) { ++ tcp_skb_mss(skb) != mss_now)) { + tcp_set_skb_tso_segs(sk, skb, mss_now); + tso_segs = tcp_skb_pcount(skb); + } +@@ -1510,8 +1512,9 @@ + tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { + if (!pskb_trim(skb, 0)) { + TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1; +- skb_shinfo(skb)->tso_segs = 1; +- skb_shinfo(skb)->tso_size = 0; ++ skb_shinfo(skb)->gso_segs = 1; ++ skb_shinfo(skb)->gso_size = 0; ++ skb_shinfo(skb)->gso_type = 0; + skb->ip_summed = CHECKSUM_NONE; + skb->csum = 0; + } +@@ -1716,8 +1719,9 @@ + skb->csum = 0; + TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN); + TCP_SKB_CB(skb)->sacked = 0; +- skb_shinfo(skb)->tso_segs = 1; +- skb_shinfo(skb)->tso_size = 0; ++ skb_shinfo(skb)->gso_segs = 1; ++ skb_shinfo(skb)->gso_size = 0; ++ skb_shinfo(skb)->gso_type = 0; + + /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ + TCP_SKB_CB(skb)->seq = tp->write_seq; +@@ -1749,8 +1753,9 @@ + skb->csum = 0; + TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST); + TCP_SKB_CB(skb)->sacked = 0; +- skb_shinfo(skb)->tso_segs = 1; +- skb_shinfo(skb)->tso_size = 0; ++ skb_shinfo(skb)->gso_segs = 1; ++ skb_shinfo(skb)->gso_size = 0; ++ skb_shinfo(skb)->gso_type = 0; + + /* Send it off. */ + TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp); +@@ -1833,8 +1838,9 @@ + TCP_SKB_CB(skb)->seq = tcp_rsk(req)->snt_isn; + TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; + TCP_SKB_CB(skb)->sacked = 0; +- skb_shinfo(skb)->tso_segs = 1; +- skb_shinfo(skb)->tso_size = 0; ++ skb_shinfo(skb)->gso_segs = 1; ++ skb_shinfo(skb)->gso_size = 0; ++ skb_shinfo(skb)->gso_type = 0; + th->seq = htonl(TCP_SKB_CB(skb)->seq); + th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1); + if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ +@@ -1937,8 +1943,9 @@ + TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN; + TCP_ECN_send_syn(sk, tp, buff); + TCP_SKB_CB(buff)->sacked = 0; +- skb_shinfo(buff)->tso_segs = 1; +- skb_shinfo(buff)->tso_size = 0; ++ skb_shinfo(buff)->gso_segs = 1; ++ skb_shinfo(buff)->gso_size = 0; ++ skb_shinfo(buff)->gso_type = 0; + buff->csum = 0; + TCP_SKB_CB(buff)->seq = tp->write_seq++; + TCP_SKB_CB(buff)->end_seq = tp->write_seq; +@@ -2042,8 +2049,9 @@ + buff->csum = 0; + TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK; + TCP_SKB_CB(buff)->sacked = 0; +- skb_shinfo(buff)->tso_segs = 1; +- skb_shinfo(buff)->tso_size = 0; ++ skb_shinfo(buff)->gso_segs = 1; ++ skb_shinfo(buff)->gso_size = 0; ++ skb_shinfo(buff)->gso_type = 0; + + /* Send it off, this clears delayed acks for us. */ + TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp); +@@ -2078,8 +2086,9 @@ + skb->csum = 0; + TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK; + TCP_SKB_CB(skb)->sacked = urgent; +- skb_shinfo(skb)->tso_segs = 1; +- skb_shinfo(skb)->tso_size = 0; ++ skb_shinfo(skb)->gso_segs = 1; ++ skb_shinfo(skb)->gso_size = 0; ++ skb_shinfo(skb)->gso_type = 0; + + /* Use a previous sequence. This should cause the other + * end to send an ack. Don't queue or clone SKB, just +Index: tmp-xxx/net/ipv4/xfrm4_output.c +=================================================================== +--- tmp-xxx.orig/net/ipv4/xfrm4_output.c 2006-11-27 10:52:32.000000000 +0000 ++++ tmp-xxx/net/ipv4/xfrm4_output.c 2006-11-27 10:52:42.000000000 +0000 +@@ -9,6 +9,8 @@ + */ + + #include <linux/compiler.h> ++#include <linux/if_ether.h> ++#include <linux/kernel.h> + #include <linux/skbuff.h> + #include <linux/spinlock.h> + #include <linux/netfilter_ipv4.h> +@@ -158,16 +160,10 @@ + goto out_exit; + } + +-static int xfrm4_output_finish(struct sk_buff *skb) ++static int xfrm4_output_finish2(struct sk_buff *skb) + { + int err; + +-#ifdef CONFIG_NETFILTER +- if (!skb->dst->xfrm) { +- IPCB(skb)->flags |= IPSKB_REROUTED; +- return dst_output(skb); +- } +-#endif + while (likely((err = xfrm4_output_one(skb)) == 0)) { + nf_reset(skb); + +@@ -180,7 +176,7 @@ + return dst_output(skb); + + err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL, +- skb->dst->dev, xfrm4_output_finish); ++ skb->dst->dev, xfrm4_output_finish2); + if (unlikely(err != 1)) + break; + } +@@ -188,6 +184,48 @@ + return err; + } + ++static int xfrm4_output_finish(struct sk_buff *skb) ++{ ++ struct sk_buff *segs; ++ ++#ifdef CONFIG_NETFILTER ++ if (!skb->dst->xfrm) { ++ IPCB(skb)->flags |= IPSKB_REROUTED; ++ return dst_output(skb); ++ } ++#endif ++ ++ if (!skb_shinfo(skb)->gso_size) ++ return xfrm4_output_finish2(skb); ++ ++ skb->protocol = htons(ETH_P_IP); ++ segs = skb_gso_segment(skb, 0); ++ kfree_skb(skb); ++ if (unlikely(IS_ERR(segs))) ++ return PTR_ERR(segs); ++ ++ do { ++ struct sk_buff *nskb = segs->next; ++ int err; ++ ++ segs->next = NULL; ++ err = xfrm4_output_finish2(segs); ++ ++ if (unlikely(err)) { ++ while ((segs = nskb)) { ++ nskb = segs->next; ++ segs->next = NULL; ++ kfree_skb(segs); ++ } ++ return err; ++ } ++ ++ segs = nskb; ++ } while (segs); ++ ++ return 0; ++} ++ + int xfrm4_output(struct sk_buff *skb) + { + return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev, +Index: tmp-xxx/net/ipv6/ip6_output.c +=================================================================== +--- tmp-xxx.orig/net/ipv6/ip6_output.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/net/ipv6/ip6_output.c 2006-11-27 10:52:42.000000000 +0000 +@@ -147,7 +147,7 @@ + + int ip6_output(struct sk_buff *skb) + { +- if ((skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->ufo_size) || ++ if ((skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->gso_size) || + dst_allfrag(skb->dst)) + return ip6_fragment(skb, ip6_output2); + else +@@ -829,8 +829,9 @@ + struct frag_hdr fhdr; + + /* specify the length of each IP datagram fragment*/ +- skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen) - +- sizeof(struct frag_hdr); ++ skb_shinfo(skb)->gso_size = mtu - fragheaderlen - ++ sizeof(struct frag_hdr); ++ skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4; + ipv6_select_ident(skb, &fhdr); + skb_shinfo(skb)->ip6_frag_id = fhdr.identification; + __skb_queue_tail(&sk->sk_write_queue, skb); +Index: tmp-xxx/net/ipv6/ipcomp6.c +=================================================================== +--- tmp-xxx.orig/net/ipv6/ipcomp6.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/net/ipv6/ipcomp6.c 2006-11-27 10:52:42.000000000 +0000 +@@ -64,7 +64,7 @@ + + static int ipcomp6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb) + { +- int err = 0; ++ int err = -ENOMEM; + u8 nexthdr = 0; + int hdr_len = skb->h.raw - skb->nh.raw; + unsigned char *tmp_hdr = NULL; +@@ -75,11 +75,8 @@ + struct crypto_tfm *tfm; + int cpu; + +- if ((skb_is_nonlinear(skb) || skb_cloned(skb)) && +- skb_linearize(skb, GFP_ATOMIC) != 0) { +- err = -ENOMEM; ++ if (skb_linearize_cow(skb)) + goto out; +- } + + skb->ip_summed = CHECKSUM_NONE; + +@@ -158,10 +155,8 @@ + goto out_ok; + } + +- if ((skb_is_nonlinear(skb) || skb_cloned(skb)) && +- skb_linearize(skb, GFP_ATOMIC) != 0) { ++ if (skb_linearize_cow(skb)) + goto out_ok; +- } + + /* compression */ + plen = skb->len - hdr_len; +Index: tmp-xxx/net/ipv6/xfrm6_output.c +=================================================================== +--- tmp-xxx.orig/net/ipv6/xfrm6_output.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/net/ipv6/xfrm6_output.c 2006-11-27 10:52:42.000000000 +0000 +@@ -151,7 +151,7 @@ + goto out_exit; + } + +-static int xfrm6_output_finish(struct sk_buff *skb) ++static int xfrm6_output_finish2(struct sk_buff *skb) + { + int err; + +@@ -167,7 +167,7 @@ + return dst_output(skb); + + err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, &skb, NULL, +- skb->dst->dev, xfrm6_output_finish); ++ skb->dst->dev, xfrm6_output_finish2); + if (unlikely(err != 1)) + break; + } +@@ -175,6 +175,41 @@ + return err; + } + ++static int xfrm6_output_finish(struct sk_buff *skb) ++{ ++ struct sk_buff *segs; ++ ++ if (!skb_shinfo(skb)->gso_size) ++ return xfrm6_output_finish2(skb); ++ ++ skb->protocol = htons(ETH_P_IP); ++ segs = skb_gso_segment(skb, 0); ++ kfree_skb(skb); ++ if (unlikely(IS_ERR(segs))) ++ return PTR_ERR(segs); ++ ++ do { ++ struct sk_buff *nskb = segs->next; ++ int err; ++ ++ segs->next = NULL; ++ err = xfrm6_output_finish2(segs); ++ ++ if (unlikely(err)) { ++ while ((segs = nskb)) { ++ nskb = segs->next; ++ segs->next = NULL; ++ kfree_skb(segs); ++ } ++ return err; ++ } ++ ++ segs = nskb; ++ } while (segs); ++ ++ return 0; ++} ++ + int xfrm6_output(struct sk_buff *skb) + { + return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dst->dev, +Index: tmp-xxx/net/sched/sch_generic.c +=================================================================== +--- tmp-xxx.orig/net/sched/sch_generic.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/net/sched/sch_generic.c 2006-11-27 10:52:42.000000000 +0000 +@@ -72,9 +72,9 @@ + dev->queue_lock serializes queue accesses for this device + AND dev->qdisc pointer itself. + +- dev->xmit_lock serializes accesses to device driver. ++ netif_tx_lock serializes accesses to device driver. + +- dev->queue_lock and dev->xmit_lock are mutually exclusive, ++ dev->queue_lock and netif_tx_lock are mutually exclusive, + if one is grabbed, another must be free. + */ + +@@ -90,14 +90,17 @@ + NOTE: Called under dev->queue_lock with locally disabled BH. + */ + +-int qdisc_restart(struct net_device *dev) ++static inline int qdisc_restart(struct net_device *dev) + { + struct Qdisc *q = dev->qdisc; + struct sk_buff *skb; + + /* Dequeue packet */ +- if ((skb = q->dequeue(q)) != NULL) { ++ if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) { + unsigned nolock = (dev->features & NETIF_F_LLTX); ++ ++ dev->gso_skb = NULL; ++ + /* + * When the driver has LLTX set it does its own locking + * in start_xmit. No need to add additional overhead by +@@ -108,7 +111,7 @@ + * will be requeued. + */ + if (!nolock) { +- if (!spin_trylock(&dev->xmit_lock)) { ++ if (!netif_tx_trylock(dev)) { + collision: + /* So, someone grabbed the driver. */ + +@@ -126,8 +129,6 @@ + __get_cpu_var(netdev_rx_stat).cpu_collision++; + goto requeue; + } +- /* Remember that the driver is grabbed by us. */ +- dev->xmit_lock_owner = smp_processor_id(); + } + + { +@@ -136,14 +137,11 @@ + + if (!netif_queue_stopped(dev)) { + int ret; +- if (netdev_nit) +- dev_queue_xmit_nit(skb, dev); + +- ret = dev->hard_start_xmit(skb, dev); ++ ret = dev_hard_start_xmit(skb, dev); + if (ret == NETDEV_TX_OK) { + if (!nolock) { +- dev->xmit_lock_owner = -1; +- spin_unlock(&dev->xmit_lock); ++ netif_tx_unlock(dev); + } + spin_lock(&dev->queue_lock); + return -1; +@@ -157,8 +155,7 @@ + /* NETDEV_TX_BUSY - we need to requeue */ + /* Release the driver */ + if (!nolock) { +- dev->xmit_lock_owner = -1; +- spin_unlock(&dev->xmit_lock); ++ netif_tx_unlock(dev); + } + spin_lock(&dev->queue_lock); + q = dev->qdisc; +@@ -175,7 +172,10 @@ + */ + + requeue: +- q->ops->requeue(skb, q); ++ if (skb->next) ++ dev->gso_skb = skb; ++ else ++ q->ops->requeue(skb, q); + netif_schedule(dev); + return 1; + } +@@ -183,11 +183,23 @@ + return q->q.qlen; + } + ++void __qdisc_run(struct net_device *dev) ++{ ++ if (unlikely(dev->qdisc == &noop_qdisc)) ++ goto out; ++ ++ while (qdisc_restart(dev) < 0 && !netif_queue_stopped(dev)) ++ /* NOTHING */; ++ ++out: ++ clear_bit(__LINK_STATE_QDISC_RUNNING, &dev->state); ++} ++ + static void dev_watchdog(unsigned long arg) + { + struct net_device *dev = (struct net_device *)arg; + +- spin_lock(&dev->xmit_lock); ++ netif_tx_lock(dev); + if (dev->qdisc != &noop_qdisc) { + if (netif_device_present(dev) && + netif_running(dev) && +@@ -201,7 +213,7 @@ + dev_hold(dev); + } + } +- spin_unlock(&dev->xmit_lock); ++ netif_tx_unlock(dev); + + dev_put(dev); + } +@@ -225,17 +237,17 @@ + + static void dev_watchdog_up(struct net_device *dev) + { +- spin_lock_bh(&dev->xmit_lock); ++ netif_tx_lock_bh(dev); + __netdev_watchdog_up(dev); +- spin_unlock_bh(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + } + + static void dev_watchdog_down(struct net_device *dev) + { +- spin_lock_bh(&dev->xmit_lock); ++ netif_tx_lock_bh(dev); + if (del_timer(&dev->watchdog_timer)) + __dev_put(dev); +- spin_unlock_bh(&dev->xmit_lock); ++ netif_tx_unlock_bh(dev); + } + + void netif_carrier_on(struct net_device *dev) +@@ -577,10 +589,17 @@ + + dev_watchdog_down(dev); + +- while (test_bit(__LINK_STATE_SCHED, &dev->state)) ++ /* Wait for outstanding dev_queue_xmit calls. */ ++ synchronize_rcu(); ++ ++ /* Wait for outstanding qdisc_run calls. */ ++ while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state)) + yield(); + +- spin_unlock_wait(&dev->xmit_lock); ++ if (dev->gso_skb) { ++ kfree_skb(dev->gso_skb); ++ dev->gso_skb = NULL; ++ } + } + + void dev_init_scheduler(struct net_device *dev) +@@ -622,6 +641,5 @@ + EXPORT_SYMBOL(qdisc_alloc); + EXPORT_SYMBOL(qdisc_destroy); + EXPORT_SYMBOL(qdisc_reset); +-EXPORT_SYMBOL(qdisc_restart); + EXPORT_SYMBOL(qdisc_lock_tree); + EXPORT_SYMBOL(qdisc_unlock_tree); +Index: tmp-xxx/net/sched/sch_teql.c +=================================================================== +--- tmp-xxx.orig/net/sched/sch_teql.c 2006-11-15 10:38:39.000000000 +0000 ++++ tmp-xxx/net/sched/sch_teql.c 2006-11-27 10:52:42.000000000 +0000 +@@ -302,20 +302,17 @@ + + switch (teql_resolve(skb, skb_res, slave)) { + case 0: +- if (spin_trylock(&slave->xmit_lock)) { +- slave->xmit_lock_owner = smp_processor_id(); ++ if (netif_tx_trylock(slave)) { + if (!netif_queue_stopped(slave) && + slave->hard_start_xmit(skb, slave) == 0) { +- slave->xmit_lock_owner = -1; +- spin_unlock(&slave->xmit_lock); ++ netif_tx_unlock(slave); + master->slaves = NEXT_SLAVE(q); + netif_wake_queue(dev); + master->stats.tx_packets++; + master->stats.tx_bytes += len; + return 0; + } +- slave->xmit_lock_owner = -1; +- spin_unlock(&slave->xmit_lock); ++ netif_tx_unlock(slave); + } + if (netif_queue_stopped(dev)) + busy = 1; diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.32/net-gso-1-check-dodgy.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.32/net-gso-1-check-dodgy.patch Mon Nov 27 13:50:02 2006 +0000 @@ -0,0 +1,27 @@ +diff -pruN ../orig-linux-2.6.16.29/net/ipv4/tcp.c ./net/ipv4/tcp.c +--- ../orig-linux-2.6.16.29/net/ipv4/tcp.c 2006-09-19 13:59:20.000000000 +0100 ++++ ./net/ipv4/tcp.c 2006-09-19 13:59:42.000000000 +0100 +@@ -2042,13 +2042,19 @@ struct sk_buff *tcp_tso_segment(struct s + if (!pskb_may_pull(skb, thlen)) + goto out; + +- segs = NULL; +- if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) +- goto out; +- + oldlen = (u16)~skb->len; + __skb_pull(skb, thlen); + ++ if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { ++ /* Packet is from an untrusted source, reset gso_segs. */ ++ int mss = skb_shinfo(skb)->gso_size; ++ ++ skb_shinfo(skb)->gso_segs = (skb->len + mss - 1) / mss; ++ ++ segs = NULL; ++ goto out; ++ } ++ + segs = skb_segment(skb, features); + if (IS_ERR(segs)) + goto out; diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.32/net-gso-2-checksum-fix.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.32/net-gso-2-checksum-fix.patch Mon Nov 27 13:50:02 2006 +0000 @@ -0,0 +1,451 @@ +diff -pruN ../orig-linux-2.6.16.29/drivers/net/bnx2.c ./drivers/net/bnx2.c +--- ../orig-linux-2.6.16.29/drivers/net/bnx2.c 2006-09-19 13:59:20.000000000 +0100 ++++ ./drivers/net/bnx2.c 2006-09-19 13:59:46.000000000 +0100 +@@ -1593,7 +1593,7 @@ bnx2_tx_int(struct bnx2 *bp) + skb = tx_buf->skb; + #ifdef BCM_TSO + /* partial BD completions possible with TSO packets */ +- if (skb_shinfo(skb)->gso_size) { ++ if (skb_is_gso(skb)) { + u16 last_idx, last_ring_idx; + + last_idx = sw_cons + +diff -pruN ../orig-linux-2.6.16.29/drivers/net/chelsio/sge.c ./drivers/net/chelsio/sge.c +--- ../orig-linux-2.6.16.29/drivers/net/chelsio/sge.c 2006-09-19 13:59:20.000000000 +0100 ++++ ./drivers/net/chelsio/sge.c 2006-09-19 13:59:46.000000000 +0100 +@@ -1419,7 +1419,7 @@ int t1_start_xmit(struct sk_buff *skb, s + struct cpl_tx_pkt *cpl; + + #ifdef NETIF_F_TSO +- if (skb_shinfo(skb)->gso_size) { ++ if (skb_is_gso(skb)) { + int eth_type; + struct cpl_tx_pkt_lso *hdr; + +diff -pruN ../orig-linux-2.6.16.29/drivers/net/e1000/e1000_main.c ./drivers/net/e1000/e1000_main.c +--- ../orig-linux-2.6.16.29/drivers/net/e1000/e1000_main.c 2006-09-19 13:59:20.000000000 +0100 ++++ ./drivers/net/e1000/e1000_main.c 2006-09-19 13:59:46.000000000 +0100 +@@ -2526,7 +2526,7 @@ e1000_tso(struct e1000_adapter *adapter, + uint8_t ipcss, ipcso, tucss, tucso, hdr_len; + int err; + +- if (skb_shinfo(skb)->gso_size) { ++ if (skb_is_gso(skb)) { + if (skb_header_cloned(skb)) { + err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); + if (err) +@@ -2651,7 +2651,7 @@ e1000_tx_map(struct e1000_adapter *adapt + * tso gets written back prematurely before the data is fully + * DMAd to the controller */ + if (!skb->data_len && tx_ring->last_tx_tso && +- !skb_shinfo(skb)->gso_size) { ++ !skb_is_gso(skb)) { + tx_ring->last_tx_tso = 0; + size -= 4; + } +@@ -2934,8 +2934,7 @@ e1000_xmit_frame(struct sk_buff *skb, st + + #ifdef NETIF_F_TSO + /* Controller Erratum workaround */ +- if (!skb->data_len && tx_ring->last_tx_tso && +- !skb_shinfo(skb)->gso_size) ++ if (!skb->data_len && tx_ring->last_tx_tso && !skb_is_gso(skb)) + count++; + #endif + +diff -pruN ../orig-linux-2.6.16.29/drivers/net/forcedeth.c ./drivers/net/forcedeth.c +--- ../orig-linux-2.6.16.29/drivers/net/forcedeth.c 2006-09-19 13:59:20.000000000 +0100 ++++ ./drivers/net/forcedeth.c 2006-09-19 13:59:46.000000000 +0100 +@@ -1105,7 +1105,7 @@ static int nv_start_xmit(struct sk_buff + np->tx_skbuff[nr] = skb; + + #ifdef NETIF_F_TSO +- if (skb_shinfo(skb)->gso_size) ++ if (skb_is_gso(skb)) + tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->gso_size << NV_TX2_TSO_SHIFT); + else + #endif +diff -pruN ../orig-linux-2.6.16.29/drivers/net/ixgb/ixgb_main.c ./drivers/net/ixgb/ixgb_main.c +--- ../orig-linux-2.6.16.29/drivers/net/ixgb/ixgb_main.c 2006-09-19 13:59:20.000000000 +0100 ++++ ./drivers/net/ixgb/ixgb_main.c 2006-09-19 13:59:46.000000000 +0100 +@@ -1163,7 +1163,7 @@ ixgb_tso(struct ixgb_adapter *adapter, s + uint16_t ipcse, tucse, mss; + int err; + +- if(likely(skb_shinfo(skb)->gso_size)) { ++ if (likely(skb_is_gso(skb))) { + if (skb_header_cloned(skb)) { + err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); + if (err) +diff -pruN ../orig-linux-2.6.16.29/drivers/net/loopback.c ./drivers/net/loopback.c +--- ../orig-linux-2.6.16.29/drivers/net/loopback.c 2006-09-19 13:59:20.000000000 +0100 ++++ ./drivers/net/loopback.c 2006-09-19 13:59:46.000000000 +0100 +@@ -139,7 +139,7 @@ static int loopback_xmit(struct sk_buff + #endif + + #ifdef LOOPBACK_TSO +- if (skb_shinfo(skb)->gso_size) { ++ if (skb_is_gso(skb)) { + BUG_ON(skb->protocol != htons(ETH_P_IP)); + BUG_ON(skb->nh.iph->protocol != IPPROTO_TCP); + +diff -pruN ../orig-linux-2.6.16.29/drivers/net/sky2.c ./drivers/net/sky2.c +--- ../orig-linux-2.6.16.29/drivers/net/sky2.c 2006-09-19 13:59:20.000000000 +0100 ++++ ./drivers/net/sky2.c 2006-09-19 13:59:46.000000000 +0100 +@@ -1125,7 +1125,7 @@ static unsigned tx_le_req(const struct s + count = sizeof(dma_addr_t) / sizeof(u32); + count += skb_shinfo(skb)->nr_frags * count; + +- if (skb_shinfo(skb)->gso_size) ++ if (skb_is_gso(skb)) + ++count; + + if (skb->ip_summed == CHECKSUM_HW) +diff -pruN ../orig-linux-2.6.16.29/drivers/net/typhoon.c ./drivers/net/typhoon.c +--- ../orig-linux-2.6.16.29/drivers/net/typhoon.c 2006-09-19 13:59:20.000000000 +0100 ++++ ./drivers/net/typhoon.c 2006-09-19 13:59:46.000000000 +0100 +@@ -805,7 +805,7 @@ typhoon_start_tx(struct sk_buff *skb, st + * If problems develop with TSO, check this first. + */ + numDesc = skb_shinfo(skb)->nr_frags + 1; +- if(skb_tso_size(skb)) ++ if (skb_is_gso(skb)) + numDesc++; + + /* When checking for free space in the ring, we need to also +@@ -845,7 +845,7 @@ typhoon_start_tx(struct sk_buff *skb, st + TYPHOON_TX_PF_VLAN_TAG_SHIFT); + } + +- if(skb_tso_size(skb)) { ++ if (skb_is_gso(skb)) { + first_txd->processFlags |= TYPHOON_TX_PF_TCP_SEGMENT; + first_txd->numDesc++; + +diff -pruN ../orig-linux-2.6.16.29/drivers/s390/net/qeth_main.c ./drivers/s390/net/qeth_main.c +--- ../orig-linux-2.6.16.29/drivers/s390/net/qeth_main.c 2006-09-19 13:59:20.000000000 +0100 ++++ ./drivers/s390/net/qeth_main.c 2006-09-19 13:59:46.000000000 +0100 +@@ -4454,7 +4454,7 @@ qeth_send_packet(struct qeth_card *card, + queue = card->qdio.out_qs + [qeth_get_priority_queue(card, skb, ipv, cast_type)]; + +- if (skb_shinfo(skb)->gso_size) ++ if (skb_is_gso(skb)) + large_send = card->options.large_send; + + /*are we able to do TSO ? If so ,prepare and send it from here */ +@@ -4501,8 +4501,7 @@ qeth_send_packet(struct qeth_card *card, + card->stats.tx_packets++; + card->stats.tx_bytes += skb->len; + #ifdef CONFIG_QETH_PERF_STATS +- if (skb_shinfo(skb)->gso_size && +- !(large_send == QETH_LARGE_SEND_NO)) { ++ if (skb_is_gso(skb) && !(large_send == QETH_LARGE_SEND_NO)) { + card->perf_stats.large_send_bytes += skb->len; + card->perf_stats.large_send_cnt++; + } +diff -pruN ../orig-linux-2.6.16.29/include/linux/netdevice.h ./include/linux/netdevice.h +--- ../orig-linux-2.6.16.29/include/linux/netdevice.h 2006-09-19 13:59:20.000000000 +0100 ++++ ./include/linux/netdevice.h 2006-09-19 13:59:46.000000000 +0100 +@@ -541,6 +541,7 @@ struct packet_type { + struct net_device *); + struct sk_buff *(*gso_segment)(struct sk_buff *skb, + int features); ++ int (*gso_send_check)(struct sk_buff *skb); + void *af_packet_priv; + struct list_head list; + }; +@@ -1001,14 +1002,15 @@ extern void linkwatch_run_queue(void); + + static inline int skb_gso_ok(struct sk_buff *skb, int features) + { +- int feature = skb_shinfo(skb)->gso_size ? +- skb_shinfo(skb)->gso_type << NETIF_F_GSO_SHIFT : 0; ++ int feature = skb_shinfo(skb)->gso_type << NETIF_F_GSO_SHIFT; + return (features & feature) == feature; + } + + static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) + { +- return !skb_gso_ok(skb, dev->features); ++ return skb_is_gso(skb) && ++ (!skb_gso_ok(skb, dev->features) || ++ unlikely(skb->ip_summed != CHECKSUM_HW)); + } + + #endif /* __KERNEL__ */ +diff -pruN ../orig-linux-2.6.16.29/include/linux/skbuff.h ./include/linux/skbuff.h +--- ../orig-linux-2.6.16.29/include/linux/skbuff.h 2006-09-19 13:59:20.000000000 +0100 ++++ ./include/linux/skbuff.h 2006-09-19 13:59:46.000000000 +0100 +@@ -1403,5 +1403,10 @@ static inline void nf_bridge_get(struct + static inline void nf_reset(struct sk_buff *skb) {} + #endif /* CONFIG_NETFILTER */ + ++static inline int skb_is_gso(const struct sk_buff *skb) ++{ ++ return skb_shinfo(skb)->gso_size; ++} ++ + #endif /* __KERNEL__ */ + #endif /* _LINUX_SKBUFF_H */ +diff -pruN ../orig-linux-2.6.16.29/include/net/protocol.h ./include/net/protocol.h +--- ../orig-linux-2.6.16.29/include/net/protocol.h 2006-09-19 13:59:20.000000000 +0100 ++++ ./include/net/protocol.h 2006-09-19 13:59:46.000000000 +0100 +@@ -37,6 +37,7 @@ + struct net_protocol { + int (*handler)(struct sk_buff *skb); + void (*err_handler)(struct sk_buff *skb, u32 info); ++ int (*gso_send_check)(struct sk_buff *skb); + struct sk_buff *(*gso_segment)(struct sk_buff *skb, + int features); + int no_policy; +diff -pruN ../orig-linux-2.6.16.29/include/net/tcp.h ./include/net/tcp.h +--- ../orig-linux-2.6.16.29/include/net/tcp.h 2006-09-19 13:59:20.000000000 +0100 ++++ ./include/net/tcp.h 2006-09-19 13:59:46.000000000 +0100 +@@ -1063,6 +1063,7 @@ extern struct request_sock_ops tcp_reque + + extern int tcp_v4_destroy_sock(struct sock *sk); + ++extern int tcp_v4_gso_send_check(struct sk_buff *skb); + extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features); + + #ifdef CONFIG_PROC_FS +diff -pruN ../orig-linux-2.6.16.29/net/bridge/br_forward.c ./net/bridge/br_forward.c +--- ../orig-linux-2.6.16.29/net/bridge/br_forward.c 2006-09-19 13:59:20.000000000 +0100 ++++ ./net/bridge/br_forward.c 2006-09-19 13:59:46.000000000 +0100 +@@ -32,7 +32,7 @@ static inline int should_deliver(const s + int br_dev_queue_push_xmit(struct sk_buff *skb) + { + /* drop mtu oversized packets except tso */ +- if (skb->len > skb->dev->mtu && !skb_shinfo(skb)->gso_size) ++ if (skb->len > skb->dev->mtu && !skb_is_gso(skb)) + kfree_skb(skb); + else { + #ifdef CONFIG_BRIDGE_NETFILTER +diff -pruN ../orig-linux-2.6.16.29/net/bridge/br_netfilter.c ./net/bridge/br_netfilter.c +--- ../orig-linux-2.6.16.29/net/bridge/br_netfilter.c 2006-09-19 13:59:20.000000000 +0100 ++++ ./net/bridge/br_netfilter.c 2006-09-19 13:59:46.000000000 +0100 +@@ -743,7 +743,7 @@ static int br_nf_dev_queue_xmit(struct s + { + if (skb->protocol == htons(ETH_P_IP) && + skb->len > skb->dev->mtu && +- !skb_shinfo(skb)->gso_size) ++ !skb_is_gso(skb)) + return ip_fragment(skb, br_dev_queue_push_xmit); + else + return br_dev_queue_push_xmit(skb); +diff -pruN ../orig-linux-2.6.16.29/net/core/dev.c ./net/core/dev.c +--- ../orig-linux-2.6.16.29/net/core/dev.c 2006-09-19 13:59:20.000000000 +0100 ++++ ./net/core/dev.c 2006-09-19 13:59:46.000000000 +0100 +@@ -1083,9 +1083,17 @@ int skb_checksum_help(struct sk_buff *sk + unsigned int csum; + int ret = 0, offset = skb->h.raw - skb->data; + +- if (inward) { +- skb->ip_summed = CHECKSUM_NONE; +- goto out; ++ if (inward) ++ goto out_set_summed; ++ ++ if (unlikely(skb_shinfo(skb)->gso_size)) { ++ static int warned; ++ ++ WARN_ON(!warned); ++ warned = 1; ++ ++ /* Let GSO fix up the checksum. */ ++ goto out_set_summed; + } + + if (skb_cloned(skb)) { +@@ -1102,6 +1110,8 @@ int skb_checksum_help(struct sk_buff *sk + BUG_ON(skb->csum + 2 > offset); + + *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum); ++ ++out_set_summed: + skb->ip_summed = CHECKSUM_NONE; + out: + return ret; +@@ -1122,17 +1132,35 @@ struct sk_buff *skb_gso_segment(struct s + struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); + struct packet_type *ptype; + int type = skb->protocol; ++ int err; + + BUG_ON(skb_shinfo(skb)->frag_list); +- BUG_ON(skb->ip_summed != CHECKSUM_HW); + + skb->mac.raw = skb->data; + skb->mac_len = skb->nh.raw - skb->data; + __skb_pull(skb, skb->mac_len); + ++ if (unlikely(skb->ip_summed != CHECKSUM_HW)) { ++ static int warned; ++ ++ WARN_ON(!warned); ++ warned = 1; ++ ++ if (skb_header_cloned(skb) && ++ (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) ++ return ERR_PTR(err); ++ } ++ + rcu_read_lock(); + list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) { + if (ptype->type == type && !ptype->dev && ptype->gso_segment) { ++ if (unlikely(skb->ip_summed != CHECKSUM_HW)) { ++ err = ptype->gso_send_check(skb); ++ segs = ERR_PTR(err); ++ if (err || skb_gso_ok(skb, features)) ++ break; ++ __skb_push(skb, skb->data - skb->nh.raw); ++ } + segs = ptype->gso_segment(skb, features); + break; + } +diff -pruN ../orig-linux-2.6.16.29/net/ipv4/af_inet.c ./net/ipv4/af_inet.c +--- ../orig-linux-2.6.16.29/net/ipv4/af_inet.c 2006-09-19 13:59:20.000000000 +0100 ++++ ./net/ipv4/af_inet.c 2006-09-19 13:59:46.000000000 +0100 +@@ -1085,6 +1085,40 @@ int inet_sk_rebuild_header(struct sock * + + EXPORT_SYMBOL(inet_sk_rebuild_header); + ++static int inet_gso_send_check(struct sk_buff *skb) ++{ ++ struct iphdr *iph; ++ struct net_protocol *ops; ++ int proto; ++ int ihl; ++ int err = -EINVAL; ++ ++ if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) ++ goto out; ++ ++ iph = skb->nh.iph; ++ ihl = iph->ihl * 4; ++ if (ihl < sizeof(*iph)) ++ goto out; ++ ++ if (unlikely(!pskb_may_pull(skb, ihl))) ++ goto out; ++ ++ skb->h.raw = __skb_pull(skb, ihl); ++ iph = skb->nh.iph; ++ proto = iph->protocol & (MAX_INET_PROTOS - 1); ++ err = -EPROTONOSUPPORT; ++ ++ rcu_read_lock(); ++ ops = rcu_dereference(inet_protos[proto]); ++ if (likely(ops && ops->gso_send_check)) ++ err = ops->gso_send_check(skb); ++ rcu_read_unlock(); ++ ++out: ++ return err; ++} ++ + static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) + { + struct sk_buff *segs = ERR_PTR(-EINVAL); +@@ -1142,6 +1176,7 @@ static struct net_protocol igmp_protocol + static struct net_protocol tcp_protocol = { + .handler = tcp_v4_rcv, + .err_handler = tcp_v4_err, ++ .gso_send_check = tcp_v4_gso_send_check, + .gso_segment = tcp_tso_segment, + .no_policy = 1, + }; +@@ -1188,6 +1223,7 @@ static int ipv4_proc_init(void); + static struct packet_type ip_packet_type = { + .type = __constant_htons(ETH_P_IP), + .func = ip_rcv, ++ .gso_send_check = inet_gso_send_check, + .gso_segment = inet_gso_segment, + }; + +diff -pruN ../orig-linux-2.6.16.29/net/ipv4/ip_output.c ./net/ipv4/ip_output.c +--- ../orig-linux-2.6.16.29/net/ipv4/ip_output.c 2006-09-19 13:59:20.000000000 +0100 ++++ ./net/ipv4/ip_output.c 2006-09-19 13:59:46.000000000 +0100 +@@ -210,7 +210,7 @@ static inline int ip_finish_output(struc + return dst_output(skb); + } + #endif +- if (skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->gso_size) ++ if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) + return ip_fragment(skb, ip_finish_output2); + else + return ip_finish_output2(skb); +@@ -1095,7 +1095,7 @@ ssize_t ip_append_page(struct sock *sk, + while (size > 0) { + int i; + +- if (skb_shinfo(skb)->gso_size) ++ if (skb_is_gso(skb)) + len = size; + else { + +diff -pruN ../orig-linux-2.6.16.29/net/ipv4/tcp_ipv4.c ./net/ipv4/tcp_ipv4.c +--- ../orig-linux-2.6.16.29/net/ipv4/tcp_ipv4.c 2006-09-12 19:02:10.000000000 +0100 ++++ ./net/ipv4/tcp_ipv4.c 2006-09-19 13:59:46.000000000 +0100 +@@ -495,6 +495,24 @@ void tcp_v4_send_check(struct sock *sk, + } + } + ++int tcp_v4_gso_send_check(struct sk_buff *skb) ++{ ++ struct iphdr *iph; ++ struct tcphdr *th; ++ ++ if (!pskb_may_pull(skb, sizeof(*th))) ++ return -EINVAL; ++ ++ iph = skb->nh.iph; ++ th = skb->h.th; ++ ++ th->check = 0; ++ th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0); ++ skb->csum = offsetof(struct tcphdr, check); ++ skb->ip_summed = CHECKSUM_HW; ++ return 0; ++} ++ + /* + * This routine will send an RST to the other tcp. + * +diff -pruN ../orig-linux-2.6.16.29/net/ipv4/xfrm4_output.c ./net/ipv4/xfrm4_output.c +--- ../orig-linux-2.6.16.29/net/ipv4/xfrm4_output.c 2006-09-19 13:59:20.000000000 +0100 ++++ ./net/ipv4/xfrm4_output.c 2006-09-19 13:59:46.000000000 +0100 +@@ -195,7 +195,7 @@ static int xfrm4_output_finish(struct sk + } + #endif + +- if (!skb_shinfo(skb)->gso_size) ++ if (!skb_is_gso(skb)) + return xfrm4_output_finish2(skb); + + skb->protocol = htons(ETH_P_IP); +diff -pruN ../orig-linux-2.6.16.29/net/ipv6/ip6_output.c ./net/ipv6/ip6_output.c +--- ../orig-linux-2.6.16.29/net/ipv6/ip6_output.c 2006-09-19 13:59:20.000000000 +0100 ++++ ./net/ipv6/ip6_output.c 2006-09-19 13:59:46.000000000 +0100 +@@ -147,7 +147,7 @@ static int ip6_output2(struct sk_buff *s + + int ip6_output(struct sk_buff *skb) + { +- if ((skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->gso_size) || ++ if ((skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) || + dst_allfrag(skb->dst)) + return ip6_fragment(skb, ip6_output2); + else +diff -pruN ../orig-linux-2.6.16.29/net/ipv6/xfrm6_output.c ./net/ipv6/xfrm6_output.c +--- ../orig-linux-2.6.16.29/net/ipv6/xfrm6_output.c 2006-09-19 13:59:20.000000000 +0100 ++++ ./net/ipv6/xfrm6_output.c 2006-09-19 13:59:46.000000000 +0100 +@@ -179,7 +179,7 @@ static int xfrm6_output_finish(struct sk + { + struct sk_buff *segs; + +- if (!skb_shinfo(skb)->gso_size) ++ if (!skb_is_gso(skb)) + return xfrm6_output_finish2(skb); + + skb->protocol = htons(ETH_P_IP); diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.32/net-gso-3-fix-errorcheck.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.32/net-gso-3-fix-errorcheck.patch Mon Nov 27 13:50:02 2006 +0000 @@ -0,0 +1,17 @@ +diff -pruN ../orig-linux-2.6.16.29/include/linux/netdevice.h ./include/linux/netdevice.h +--- ../orig-linux-2.6.16.29/include/linux/netdevice.h 2006-09-19 13:59:46.000000000 +0100 ++++ ./include/linux/netdevice.h 2006-09-19 14:05:28.000000000 +0100 +@@ -930,10 +930,10 @@ static inline void netif_tx_lock_bh(stru + + static inline int netif_tx_trylock(struct net_device *dev) + { +- int err = spin_trylock(&dev->_xmit_lock); +- if (!err) ++ int ok = spin_trylock(&dev->_xmit_lock); ++ if (likely(ok)) + dev->xmit_lock_owner = smp_processor_id(); +- return err; ++ return ok; + } + + static inline void netif_tx_unlock(struct net_device *dev) diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.32/net-gso-4-kill-warnon.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.32/net-gso-4-kill-warnon.patch Mon Nov 27 13:50:02 2006 +0000 @@ -0,0 +1,27 @@ +diff -pruN ../orig-linux-2.6.16.29/net/core/dev.c ./net/core/dev.c +--- ../orig-linux-2.6.16.29/net/core/dev.c 2006-09-19 13:59:46.000000000 +0100 ++++ ./net/core/dev.c 2006-09-19 14:05:32.000000000 +0100 +@@ -1087,11 +1087,6 @@ int skb_checksum_help(struct sk_buff *sk + goto out_set_summed; + + if (unlikely(skb_shinfo(skb)->gso_size)) { +- static int warned; +- +- WARN_ON(!warned); +- warned = 1; +- + /* Let GSO fix up the checksum. */ + goto out_set_summed; + } +@@ -1141,11 +1136,6 @@ struct sk_buff *skb_gso_segment(struct s + __skb_pull(skb, skb->mac_len); + + if (unlikely(skb->ip_summed != CHECKSUM_HW)) { +- static int warned; +- +- WARN_ON(!warned); +- warned = 1; +- + if (skb_header_cloned(skb) && + (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + return ERR_PTR(err); diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.32/net-gso-5-rcv-mss.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.32/net-gso-5-rcv-mss.patch Mon Nov 27 13:50:02 2006 +0000 @@ -0,0 +1,13 @@ +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index 104af5d..1fa1536 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -127,7 +127,7 @@ static void tcp_measure_rcv_mss(struct s + /* skb->len may jitter because of SACKs, even if peer + * sends good full-sized frames. + */ +- len = skb->len; ++ len = skb_shinfo(skb)->gso_size ?: skb->len; + if (len >= icsk->icsk_ack.rcv_mss) { + icsk->icsk_ack.rcv_mss = len; + } else { diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.32/pci-mmconfig-fix-from-2.6.17.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.32/pci-mmconfig-fix-from-2.6.17.patch Mon Nov 27 13:50:02 2006 +0000 @@ -0,0 +1,292 @@ +diff -pruN ../orig-linux-2.6.16.29/arch/i386/pci/mmconfig.c ./arch/i386/pci/mmconfig.c +--- ../orig-linux-2.6.16.29/arch/i386/pci/mmconfig.c 2006-09-12 19:02:10.000000000 +0100 ++++ ./arch/i386/pci/mmconfig.c 2006-09-21 09:35:27.000000000 +0100 +@@ -12,14 +12,22 @@ + #include <linux/pci.h> + #include <linux/init.h> + #include <linux/acpi.h> ++#include <asm/e820.h> + #include "pci.h" + ++/* aperture is up to 256MB but BIOS may reserve less */ ++#define MMCONFIG_APER_MIN (2 * 1024*1024) ++#define MMCONFIG_APER_MAX (256 * 1024*1024) ++ ++/* Assume systems with more busses have correct MCFG */ ++#define MAX_CHECK_BUS 16 ++ + #define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) + + /* The base address of the last MMCONFIG device accessed */ + static u32 mmcfg_last_accessed_device; + +-static DECLARE_BITMAP(fallback_slots, 32); ++static DECLARE_BITMAP(fallback_slots, MAX_CHECK_BUS*32); + + /* + * Functions for accessing PCI configuration space with MMCONFIG accesses +@@ -29,8 +37,8 @@ static u32 get_base_addr(unsigned int se + int cfg_num = -1; + struct acpi_table_mcfg_config *cfg; + +- if (seg == 0 && bus == 0 && +- test_bit(PCI_SLOT(devfn), fallback_slots)) ++ if (seg == 0 && bus < MAX_CHECK_BUS && ++ test_bit(PCI_SLOT(devfn) + 32*bus, fallback_slots)) + return 0; + + while (1) { +@@ -74,8 +82,10 @@ static int pci_mmcfg_read(unsigned int s + unsigned long flags; + u32 base; + +- if (!value || (bus > 255) || (devfn > 255) || (reg > 4095)) ++ if ((bus > 255) || (devfn > 255) || (reg > 4095)) { ++ *value = -1; + return -EINVAL; ++ } + + base = get_base_addr(seg, bus, devfn); + if (!base) +@@ -146,30 +156,66 @@ static struct pci_raw_ops pci_mmcfg = { + Normally this can be expressed in the MCFG by not listing them + and assigning suitable _SEGs, but this isn't implemented in some BIOS. + Instead try to discover all devices on bus 0 that are unreachable using MM +- and fallback for them. +- We only do this for bus 0/seg 0 */ ++ and fallback for them. */ + static __init void unreachable_devices(void) + { +- int i; ++ int i, k; + unsigned long flags; + +- for (i = 0; i < 32; i++) { +- u32 val1; +- u32 addr; ++ for (k = 0; k < MAX_CHECK_BUS; k++) { ++ for (i = 0; i < 32; i++) { ++ u32 val1; ++ u32 addr; ++ ++ pci_conf1_read(0, k, PCI_DEVFN(i, 0), 0, 4, &val1); ++ if (val1 == 0xffffffff) ++ continue; ++ ++ /* Locking probably not needed, but safer */ ++ spin_lock_irqsave(&pci_config_lock, flags); ++ addr = get_base_addr(0, k, PCI_DEVFN(i, 0)); ++ if (addr != 0) ++ pci_exp_set_dev_base(addr, k, PCI_DEVFN(i, 0)); ++ if (addr == 0 || ++ readl((u32 __iomem *)mmcfg_virt_addr) != val1) { ++ set_bit(i, fallback_slots); ++ printk(KERN_NOTICE ++ "PCI: No mmconfig possible on %x:%x\n", k, i); ++ } ++ spin_unlock_irqrestore(&pci_config_lock, flags); ++ } ++ } ++} + +- pci_conf1_read(0, 0, PCI_DEVFN(i, 0), 0, 4, &val1); +- if (val1 == 0xffffffff) ++/* NB. Ripped from arch/i386/kernel/setup.c for this Xen bugfix patch. */ ++#ifdef CONFIG_XEN ++extern struct e820map machine_e820; ++#define e820 machine_e820 ++#endif ++static int __init ++e820_all_mapped(unsigned long s, unsigned long e, unsigned type) ++{ ++ u64 start = s; ++ u64 end = e; ++ int i; ++ for (i = 0; i < e820.nr_map; i++) { ++ struct e820entry *ei = &e820.map[i]; ++ if (type && ei->type != type) + continue; +- +- /* Locking probably not needed, but safer */ +- spin_lock_irqsave(&pci_config_lock, flags); +- addr = get_base_addr(0, 0, PCI_DEVFN(i, 0)); +- if (addr != 0) +- pci_exp_set_dev_base(addr, 0, PCI_DEVFN(i, 0)); +- if (addr == 0 || readl((u32 __iomem *)mmcfg_virt_addr) != val1) +- set_bit(i, fallback_slots); +- spin_unlock_irqrestore(&pci_config_lock, flags); ++ /* is the region (part) in overlap with the current region ?*/ ++ if (ei->addr >= end || ei->addr + ei->size <= start) ++ continue; ++ /* if the region is at the beginning of <start,end> we move ++ * start to the end of the region since it's ok until there ++ */ ++ if (ei->addr <= start) ++ start = ei->addr + ei->size; ++ /* if start is now at or beyond end, we're done, full ++ * coverage */ ++ if (start >= end) ++ return 1; /* we're done */ + } ++ return 0; + } + + static int __init pci_mmcfg_init(void) +@@ -183,6 +229,15 @@ static int __init pci_mmcfg_init(void) + (pci_mmcfg_config[0].base_address == 0)) + goto out; + ++ if (!e820_all_mapped(pci_mmcfg_config[0].base_address, ++ pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN, ++ E820_RESERVED)) { ++ printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not E820-reserved\n", ++ pci_mmcfg_config[0].base_address); ++ printk(KERN_ERR "PCI: Not using MMCONFIG.\n"); ++ goto out; ++ } ++ + printk(KERN_INFO "PCI: Using MMCONFIG\n"); + raw_pci_ops = &pci_mmcfg; + pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; +diff -pruN ../orig-linux-2.6.16.29/arch/x86_64/pci/mmconfig.c ./arch/x86_64/pci/mmconfig.c +--- ../orig-linux-2.6.16.29/arch/x86_64/pci/mmconfig.c 2006-09-12 19:02:10.000000000 +0100 ++++ ./arch/x86_64/pci/mmconfig.c 2006-09-21 09:35:40.000000000 +0100 +@@ -9,11 +9,19 @@ + #include <linux/init.h> + #include <linux/acpi.h> + #include <linux/bitmap.h> ++#include <asm/e820.h> ++ + #include "pci.h" + +-#define MMCONFIG_APER_SIZE (256*1024*1024) ++/* aperture is up to 256MB but BIOS may reserve less */ ++#define MMCONFIG_APER_MIN (2 * 1024*1024) ++#define MMCONFIG_APER_MAX (256 * 1024*1024) ++ ++/* Verify the first 16 busses. We assume that systems with more busses ++ get MCFG right. */ ++#define MAX_CHECK_BUS 16 + +-static DECLARE_BITMAP(fallback_slots, 32); ++static DECLARE_BITMAP(fallback_slots, 32*MAX_CHECK_BUS); + + /* Static virtual mapping of the MMCONFIG aperture */ + struct mmcfg_virt { +@@ -55,7 +63,8 @@ static char __iomem *get_virt(unsigned i + static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) + { + char __iomem *addr; +- if (seg == 0 && bus == 0 && test_bit(PCI_SLOT(devfn), &fallback_slots)) ++ if (seg == 0 && bus < MAX_CHECK_BUS && ++ test_bit(32*bus + PCI_SLOT(devfn), fallback_slots)) + return NULL; + addr = get_virt(seg, bus); + if (!addr) +@@ -69,8 +78,10 @@ static int pci_mmcfg_read(unsigned int s + char __iomem *addr; + + /* Why do we have this when nobody checks it. How about a BUG()!? -AK */ +- if (unlikely(!value || (bus > 255) || (devfn > 255) || (reg > 4095))) ++ if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) { ++ *value = -1; + return -EINVAL; ++ } + + addr = pci_dev_base(seg, bus, devfn); + if (!addr) +@@ -129,23 +140,56 @@ static struct pci_raw_ops pci_mmcfg = { + Normally this can be expressed in the MCFG by not listing them + and assigning suitable _SEGs, but this isn't implemented in some BIOS. + Instead try to discover all devices on bus 0 that are unreachable using MM +- and fallback for them. +- We only do this for bus 0/seg 0 */ ++ and fallback for them. */ + static __init void unreachable_devices(void) + { +- int i; +- for (i = 0; i < 32; i++) { +- u32 val1; +- char __iomem *addr; ++ int i, k; ++ /* Use the max bus number from ACPI here? */ ++ for (k = 0; k < MAX_CHECK_BUS; k++) { ++ for (i = 0; i < 32; i++) { ++ u32 val1; ++ char __iomem *addr; ++ ++ pci_conf1_read(0, k, PCI_DEVFN(i,0), 0, 4, &val1); ++ if (val1 == 0xffffffff) ++ continue; ++ addr = pci_dev_base(0, k, PCI_DEVFN(i, 0)); ++ if (addr == NULL|| readl(addr) != val1) { ++ set_bit(i + 32*k, fallback_slots); ++ printk(KERN_NOTICE ++ "PCI: No mmconfig possible on device %x:%x\n", ++ k, i); ++ } ++ } ++ } ++} + +- pci_conf1_read(0, 0, PCI_DEVFN(i,0), 0, 4, &val1); +- if (val1 == 0xffffffff) ++/* NB. Ripped from arch/x86_64/kernel/e820.c for this Xen bugfix patch. */ ++#ifdef CONFIG_XEN ++extern struct e820map machine_e820; ++#define e820 machine_e820 ++#endif ++static int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type) ++{ ++ int i; ++ for (i = 0; i < e820.nr_map; i++) { ++ struct e820entry *ei = &e820.map[i]; ++ if (type && ei->type != type) + continue; +- addr = pci_dev_base(0, 0, PCI_DEVFN(i, 0)); +- if (addr == NULL|| readl(addr) != val1) { +- set_bit(i, &fallback_slots); +- } ++ /* is the region (part) in overlap with the current region ?*/ ++ if (ei->addr >= end || ei->addr + ei->size <= start) ++ continue; ++ ++ /* if the region is at the beginning of <start,end> we move ++ * start to the end of the region since it's ok until there ++ */ ++ if (ei->addr <= start) ++ start = ei->addr + ei->size; ++ /* if start is now at or beyond end, we're done, full coverage */ ++ if (start >= end) ++ return 1; /* we're done */ + } ++ return 0; + } + + static int __init pci_mmcfg_init(void) +@@ -161,6 +205,15 @@ static int __init pci_mmcfg_init(void) + (pci_mmcfg_config[0].base_address == 0)) + return 0; + ++ if (!e820_all_mapped(pci_mmcfg_config[0].base_address, ++ pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN, ++ E820_RESERVED)) { ++ printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not E820-reserved\n", ++ pci_mmcfg_config[0].base_address); ++ printk(KERN_ERR "PCI: Not using MMCONFIG.\n"); ++ return 0; ++ } ++ + /* RED-PEN i386 doesn't do _nocache right now */ + pci_mmcfg_virt = kmalloc(sizeof(*pci_mmcfg_virt) * pci_mmcfg_config_num, GFP_KERNEL); + if (pci_mmcfg_virt == NULL) { +@@ -169,7 +222,8 @@ static int __init pci_mmcfg_init(void) + } + for (i = 0; i < pci_mmcfg_config_num; ++i) { + pci_mmcfg_virt[i].cfg = &pci_mmcfg_config[i]; +- pci_mmcfg_virt[i].virt = ioremap_nocache(pci_mmcfg_config[i].base_address, MMCONFIG_APER_SIZE); ++ pci_mmcfg_virt[i].virt = ioremap_nocache(pci_mmcfg_config[i].base_address, ++ MMCONFIG_APER_MAX); + if (!pci_mmcfg_virt[i].virt) { + printk("PCI: Cannot map mmconfig aperture for segment %d\n", + pci_mmcfg_config[i].pci_segment_group_number); diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.32/pmd-shared.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.32/pmd-shared.patch Mon Nov 27 13:50:02 2006 +0000 @@ -0,0 +1,111 @@ +diff -pruN ../orig-linux-2.6.16.29/arch/i386/mm/pageattr.c ./arch/i386/mm/pageattr.c +--- ../orig-linux-2.6.16.29/arch/i386/mm/pageattr.c 2006-09-12 19:02:10.000000000 +0100 ++++ ./arch/i386/mm/pageattr.c 2006-09-19 14:05:35.000000000 +0100 +@@ -78,7 +78,7 @@ static void set_pmd_pte(pte_t *kpte, uns + unsigned long flags; + + set_pte_atomic(kpte, pte); /* change init_mm */ +- if (PTRS_PER_PMD > 1) ++ if (HAVE_SHARED_KERNEL_PMD) + return; + + spin_lock_irqsave(&pgd_lock, flags); +diff -pruN ../orig-linux-2.6.16.29/arch/i386/mm/pgtable.c ./arch/i386/mm/pgtable.c +--- ../orig-linux-2.6.16.29/arch/i386/mm/pgtable.c 2006-09-12 19:02:10.000000000 +0100 ++++ ./arch/i386/mm/pgtable.c 2006-09-19 14:05:35.000000000 +0100 +@@ -215,9 +215,10 @@ void pgd_ctor(void *pgd, kmem_cache_t *c + spin_lock_irqsave(&pgd_lock, flags); + } + +- clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, +- swapper_pg_dir + USER_PTRS_PER_PGD, +- KERNEL_PGD_PTRS); ++ if (PTRS_PER_PMD == 1 || HAVE_SHARED_KERNEL_PMD) ++ clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, ++ swapper_pg_dir + USER_PTRS_PER_PGD, ++ KERNEL_PGD_PTRS); + if (PTRS_PER_PMD > 1) + return; + +@@ -249,6 +250,30 @@ pgd_t *pgd_alloc(struct mm_struct *mm) + goto out_oom; + set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); + } ++ ++ if (!HAVE_SHARED_KERNEL_PMD) { ++ unsigned long flags; ++ ++ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { ++ pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); ++ if (!pmd) ++ goto out_oom; ++ set_pgd(&pgd[USER_PTRS_PER_PGD], __pgd(1 + __pa(pmd))); ++ } ++ ++ spin_lock_irqsave(&pgd_lock, flags); ++ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { ++ unsigned long v = (unsigned long)i << PGDIR_SHIFT; ++ pgd_t *kpgd = pgd_offset_k(v); ++ pud_t *kpud = pud_offset(kpgd, v); ++ pmd_t *kpmd = pmd_offset(kpud, v); ++ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); ++ memcpy(pmd, kpmd, PAGE_SIZE); ++ } ++ pgd_list_add(pgd); ++ spin_unlock_irqrestore(&pgd_lock, flags); ++ } ++ + return pgd; + + out_oom: +@@ -263,9 +288,23 @@ void pgd_free(pgd_t *pgd) + int i; + + /* in the PAE case user pgd entries are overwritten before usage */ +- if (PTRS_PER_PMD > 1) +- for (i = 0; i < USER_PTRS_PER_PGD; ++i) +- kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); ++ if (PTRS_PER_PMD > 1) { ++ for (i = 0; i < USER_PTRS_PER_PGD; ++i) { ++ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); ++ kmem_cache_free(pmd_cache, pmd); ++ } ++ if (!HAVE_SHARED_KERNEL_PMD) { ++ unsigned long flags; ++ spin_lock_irqsave(&pgd_lock, flags); ++ pgd_list_del(pgd); ++ spin_unlock_irqrestore(&pgd_lock, flags); ++ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { ++ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); ++ memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); ++ kmem_cache_free(pmd_cache, pmd); ++ } ++ } ++ } + /* in the non-PAE case, free_pgtables() clears user pgd entries */ + kmem_cache_free(pgd_cache, pgd); + } +diff -pruN ../orig-linux-2.6.16.29/include/asm-i386/pgtable-2level-defs.h ./include/asm-i386/pgtable-2level-defs.h +--- ../orig-linux-2.6.16.29/include/asm-i386/pgtable-2level-defs.h 2006-09-12 19:02:10.000000000 +0100 ++++ ./include/asm-i386/pgtable-2level-defs.h 2006-09-19 14:05:35.000000000 +0100 +@@ -1,6 +1,8 @@ + #ifndef _I386_PGTABLE_2LEVEL_DEFS_H + #define _I386_PGTABLE_2LEVEL_DEFS_H + ++#define HAVE_SHARED_KERNEL_PMD 0 ++ + /* + * traditional i386 two-level paging structure: + */ +diff -pruN ../orig-linux-2.6.16.29/include/asm-i386/pgtable-3level-defs.h ./include/asm-i386/pgtable-3level-defs.h +--- ../orig-linux-2.6.16.29/include/asm-i386/pgtable-3level-defs.h 2006-09-12 19:02:10.000000000 +0100 ++++ ./include/asm-i386/pgtable-3level-defs.h 2006-09-19 14:05:35.000000000 +0100 +@@ -1,6 +1,8 @@ + #ifndef _I386_PGTABLE_3LEVEL_DEFS_H + #define _I386_PGTABLE_3LEVEL_DEFS_H + ++#define HAVE_SHARED_KERNEL_PMD 1 ++ + /* + * PGDIR_SHIFT determines what a top-level page table entry can map + */ diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.32/rcu_needs_cpu.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.32/rcu_needs_cpu.patch Mon Nov 27 13:50:02 2006 +0000 @@ -0,0 +1,35 @@ +diff -pruN ../orig-linux-2.6.16.29/include/linux/rcupdate.h ./include/linux/rcupdate.h +--- ../orig-linux-2.6.16.29/include/linux/rcupdate.h 2006-09-12 19:02:10.000000000 +0100 ++++ ./include/linux/rcupdate.h 2006-09-19 14:05:39.000000000 +0100 +@@ -134,6 +134,7 @@ static inline void rcu_bh_qsctr_inc(int + } + + extern int rcu_pending(int cpu); ++extern int rcu_needs_cpu(int cpu); + + /** + * rcu_read_lock - mark the beginning of an RCU read-side critical section. +diff -pruN ../orig-linux-2.6.16.29/kernel/rcupdate.c ./kernel/rcupdate.c +--- ../orig-linux-2.6.16.29/kernel/rcupdate.c 2006-09-12 19:02:10.000000000 +0100 ++++ ./kernel/rcupdate.c 2006-09-19 14:05:39.000000000 +0100 +@@ -485,6 +485,20 @@ int rcu_pending(int cpu) + __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu)); + } + ++/* ++ * Check to see if any future RCU-related work will need to be done ++ * by the current CPU, even if none need be done immediately, returning ++ * 1 if so. This function is part of the RCU implementation; it is -not- ++ * an exported member of the RCU API. ++ */ ++int rcu_needs_cpu(int cpu) ++{ ++ struct rcu_data *rdp = &per_cpu(rcu_data, cpu); ++ struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu); ++ ++ return (!!rdp->curlist || !!rdp_bh->curlist || rcu_pending(cpu)); ++} ++ + void rcu_check_callbacks(int cpu, int user) + { + if (user || diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.32/rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.32/rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch Mon Nov 27 13:50:02 2006 +0000 @@ -0,0 +1,30 @@ +diff -pruN ../orig-linux-2.6.16.29/arch/i386/kernel/entry.S ./arch/i386/kernel/entry.S +--- ../orig-linux-2.6.16.29/arch/i386/kernel/entry.S 2006-09-12 19:02:10.000000000 +0100 ++++ ./arch/i386/kernel/entry.S 2006-09-19 14:05:44.000000000 +0100 +@@ -177,7 +177,7 @@ need_resched: + + # sysenter call handler stub + ENTRY(sysenter_entry) +- movl TSS_sysenter_esp0(%esp),%esp ++ movl SYSENTER_stack_esp0(%esp),%esp + sysenter_past_esp: + sti + pushl $(__USER_DS) +@@ -492,7 +492,7 @@ device_not_available_emulate: + * that sets up the real kernel stack. Check here, since we can't + * allow the wrong stack to be used. + * +- * "TSS_sysenter_esp0+12" is because the NMI/debug handler will have ++ * "SYSENTER_stack_esp0+12" is because the NMI/debug handler will have + * already pushed 3 words if it hits on the sysenter instruction: + * eflags, cs and eip. + * +@@ -504,7 +504,7 @@ device_not_available_emulate: + cmpw $__KERNEL_CS,4(%esp); \ + jne ok; \ + label: \ +- movl TSS_sysenter_esp0+offset(%esp),%esp; \ ++ movl SYSENTER_stack_esp0+offset(%esp),%esp; \ + pushfl; \ + pushl $__KERNEL_CS; \ + pushl $sysenter_past_esp diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.32/series --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.32/series Mon Nov 27 13:50:02 2006 +0000 @@ -0,0 +1,25 @@ +blktap-aio-16_03_06.patch +device_bind.patch +fix-hz-suspend.patch +fix-ide-cd-pio-mode.patch +i386-mach-io-check-nmi.patch +ipv6-no-autoconf.patch +net-csum.patch +net-gso-0-base.patch +net-gso-1-check-dodgy.patch +net-gso-2-checksum-fix.patch +net-gso-3-fix-errorcheck.patch +net-gso-4-kill-warnon.patch +net-gso-5-rcv-mss.patch +pci-mmconfig-fix-from-2.6.17.patch +pmd-shared.patch +rcu_needs_cpu.patch +rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch +smp-alts.patch +tpm_plugin_2.6.17.patch +x86-increase-interrupt-vector-range.patch +xen-hotplug.patch +xenoprof-generic.patch +x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch +x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch +x86-elfnote-as-preprocessor-macro.patch diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.32/smp-alts.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.32/smp-alts.patch Mon Nov 27 13:50:02 2006 +0000 @@ -0,0 +1,591 @@ +diff -pruN ../orig-linux-2.6.16.29/arch/i386/Kconfig ./arch/i386/Kconfig +--- ../orig-linux-2.6.16.29/arch/i386/Kconfig 2006-09-12 19:02:10.000000000 +0100 ++++ ./arch/i386/Kconfig 2006-09-19 14:05:48.000000000 +0100 +@@ -202,6 +202,19 @@ config SMP + + If you don't know what to do here, say N. + ++config SMP_ALTERNATIVES ++ bool "SMP alternatives support (EXPERIMENTAL)" ++ depends on SMP && EXPERIMENTAL ++ help ++ Try to reduce the overhead of running an SMP kernel on a uniprocessor ++ host slightly by replacing certain key instruction sequences ++ according to whether we currently have more than one CPU available. ++ This should provide a noticeable boost to performance when ++ running SMP kernels on UP machines, and have negligible impact ++ when running on an true SMP host. ++ ++ If unsure, say N. ++ + config NR_CPUS + int "Maximum number of CPUs (2-255)" + range 2 255 +diff -pruN ../orig-linux-2.6.16.29/arch/i386/kernel/Makefile ./arch/i386/kernel/Makefile +--- ../orig-linux-2.6.16.29/arch/i386/kernel/Makefile 2006-09-12 19:02:10.000000000 +0100 ++++ ./arch/i386/kernel/Makefile 2006-09-19 14:05:48.000000000 +0100 +@@ -37,6 +37,7 @@ obj-$(CONFIG_EFI) += efi.o efi_stub.o + obj-$(CONFIG_DOUBLEFAULT) += doublefault.o + obj-$(CONFIG_VM86) += vm86.o + obj-$(CONFIG_EARLY_PRINTK) += early_printk.o ++obj-$(CONFIG_SMP_ALTERNATIVES) += smpalts.o + + EXTRA_AFLAGS := -traditional + +diff -pruN ../orig-linux-2.6.16.29/arch/i386/kernel/smpalts.c ./arch/i386/kernel/smpalts.c +--- ../orig-linux-2.6.16.29/arch/i386/kernel/smpalts.c 1970-01-01 01:00:00.000000000 +0100 ++++ ./arch/i386/kernel/smpalts.c 2006-09-19 14:05:48.000000000 +0100 +@@ -0,0 +1,85 @@ ++#include <linux/kernel.h> ++#include <asm/system.h> ++#include <asm/smp_alt.h> ++#include <asm/processor.h> ++#include <asm/string.h> ++ ++struct smp_replacement_record { ++ unsigned char targ_size; ++ unsigned char smp1_size; ++ unsigned char smp2_size; ++ unsigned char up_size; ++ unsigned char feature; ++ unsigned char data[0]; ++}; ++ ++struct smp_alternative_record { ++ void *targ_start; ++ struct smp_replacement_record *repl; ++}; ++ ++extern struct smp_alternative_record __start_smp_alternatives_table, ++ __stop_smp_alternatives_table; ++extern unsigned long __init_begin, __init_end; ++ ++void prepare_for_smp(void) ++{ ++ struct smp_alternative_record *r; ++ printk(KERN_INFO "Enabling SMP...\n"); ++ for (r = &__start_smp_alternatives_table; ++ r != &__stop_smp_alternatives_table; ++ r++) { ++ BUG_ON(r->repl->targ_size < r->repl->smp1_size); ++ BUG_ON(r->repl->targ_size < r->repl->smp2_size); ++ BUG_ON(r->repl->targ_size < r->repl->up_size); ++ if (system_state == SYSTEM_RUNNING && ++ r->targ_start >= (void *)&__init_begin && ++ r->targ_start < (void *)&__init_end) ++ continue; ++ if (r->repl->feature != (unsigned char)-1 && ++ boot_cpu_has(r->repl->feature)) { ++ memcpy(r->targ_start, ++ r->repl->data + r->repl->smp1_size, ++ r->repl->smp2_size); ++ memset(r->targ_start + r->repl->smp2_size, ++ 0x90, ++ r->repl->targ_size - r->repl->smp2_size); ++ } else { ++ memcpy(r->targ_start, ++ r->repl->data, ++ r->repl->smp1_size); ++ memset(r->targ_start + r->repl->smp1_size, ++ 0x90, ++ r->repl->targ_size - r->repl->smp1_size); ++ } ++ } ++ /* Paranoia */ ++ asm volatile ("jmp 1f\n1:"); ++ mb(); ++} ++ ++void unprepare_for_smp(void) ++{ ++ struct smp_alternative_record *r; ++ printk(KERN_INFO "Disabling SMP...\n"); ++ for (r = &__start_smp_alternatives_table; ++ r != &__stop_smp_alternatives_table; ++ r++) { ++ BUG_ON(r->repl->targ_size < r->repl->smp1_size); ++ BUG_ON(r->repl->targ_size < r->repl->smp2_size); ++ BUG_ON(r->repl->targ_size < r->repl->up_size); ++ if (system_state == SYSTEM_RUNNING && ++ r->targ_start >= (void *)&__init_begin && ++ r->targ_start < (void *)&__init_end) ++ continue; ++ memcpy(r->targ_start, ++ r->repl->data + r->repl->smp1_size + r->repl->smp2_size, ++ r->repl->up_size); ++ memset(r->targ_start + r->repl->up_size, ++ 0x90, ++ r->repl->targ_size - r->repl->up_size); ++ } ++ /* Paranoia */ ++ asm volatile ("jmp 1f\n1:"); ++ mb(); ++} +diff -pruN ../orig-linux-2.6.16.29/arch/i386/kernel/smpboot.c ./arch/i386/kernel/smpboot.c +--- ../orig-linux-2.6.16.29/arch/i386/kernel/smpboot.c 2006-09-12 19:02:10.000000000 +0100 ++++ ./arch/i386/kernel/smpboot.c 2006-09-19 14:05:48.000000000 +0100 +@@ -1218,6 +1218,11 @@ static void __init smp_boot_cpus(unsigne + if (max_cpus <= cpucount+1) + continue; + ++#ifdef CONFIG_SMP_ALTERNATIVES ++ if (kicked == 1) ++ prepare_for_smp(); ++#endif ++ + if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu)) + printk("CPU #%d not responding - cannot use it.\n", + apicid); +@@ -1396,6 +1401,11 @@ int __devinit __cpu_up(unsigned int cpu) + return -EIO; + } + ++#ifdef CONFIG_SMP_ALTERNATIVES ++ if (num_online_cpus() == 1) ++ prepare_for_smp(); ++#endif ++ + local_irq_enable(); + per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; + /* Unleash the CPU! */ +diff -pruN ../orig-linux-2.6.16.29/arch/i386/kernel/vmlinux.lds.S ./arch/i386/kernel/vmlinux.lds.S +--- ../orig-linux-2.6.16.29/arch/i386/kernel/vmlinux.lds.S 2006-09-12 19:02:10.000000000 +0100 ++++ ./arch/i386/kernel/vmlinux.lds.S 2006-09-19 14:05:48.000000000 +0100 +@@ -34,6 +34,13 @@ SECTIONS + __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) } + __stop___ex_table = .; + ++ . = ALIGN(16); ++ __start_smp_alternatives_table = .; ++ __smp_alternatives : { *(__smp_alternatives) } ++ __stop_smp_alternatives_table = .; ++ ++ __smp_replacements : { *(__smp_replacements) } ++ + RODATA + + /* writeable */ +diff -pruN ../orig-linux-2.6.16.29/include/asm-i386/atomic.h ./include/asm-i386/atomic.h +--- ../orig-linux-2.6.16.29/include/asm-i386/atomic.h 2006-09-12 19:02:10.000000000 +0100 ++++ ./include/asm-i386/atomic.h 2006-09-19 14:05:48.000000000 +0100 +@@ -4,18 +4,13 @@ + #include <linux/config.h> + #include <linux/compiler.h> + #include <asm/processor.h> ++#include <asm/smp_alt.h> + + /* + * Atomic operations that C can't guarantee us. Useful for + * resource counting etc.. + */ + +-#ifdef CONFIG_SMP +-#define LOCK "lock ; " +-#else +-#define LOCK "" +-#endif +- + /* + * Make sure gcc doesn't try to be clever and move things around + * on us. We need to use _exactly_ the address the user gave us, +diff -pruN ../orig-linux-2.6.16.29/include/asm-i386/bitops.h ./include/asm-i386/bitops.h +--- ../orig-linux-2.6.16.29/include/asm-i386/bitops.h 2006-09-12 19:02:10.000000000 +0100 ++++ ./include/asm-i386/bitops.h 2006-09-19 14:05:48.000000000 +0100 +@@ -7,6 +7,7 @@ + + #include <linux/config.h> + #include <linux/compiler.h> ++#include <asm/smp_alt.h> + + /* + * These have to be done with inline assembly: that way the bit-setting +@@ -16,12 +17,6 @@ + * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). + */ + +-#ifdef CONFIG_SMP +-#define LOCK_PREFIX "lock ; " +-#else +-#define LOCK_PREFIX "" +-#endif +- + #define ADDR (*(volatile long *) addr) + + /** +@@ -41,7 +36,7 @@ + */ + static inline void set_bit(int nr, volatile unsigned long * addr) + { +- __asm__ __volatile__( LOCK_PREFIX ++ __asm__ __volatile__( LOCK + "btsl %1,%0" + :"+m" (ADDR) + :"Ir" (nr)); +@@ -76,7 +71,7 @@ static inline void __set_bit(int nr, vol + */ + static inline void clear_bit(int nr, volatile unsigned long * addr) + { +- __asm__ __volatile__( LOCK_PREFIX ++ __asm__ __volatile__( LOCK + "btrl %1,%0" + :"+m" (ADDR) + :"Ir" (nr)); +@@ -121,7 +116,7 @@ static inline void __change_bit(int nr, + */ + static inline void change_bit(int nr, volatile unsigned long * addr) + { +- __asm__ __volatile__( LOCK_PREFIX ++ __asm__ __volatile__( LOCK + "btcl %1,%0" + :"+m" (ADDR) + :"Ir" (nr)); +@@ -140,7 +135,7 @@ static inline int test_and_set_bit(int n + { + int oldbit; + +- __asm__ __volatile__( LOCK_PREFIX ++ __asm__ __volatile__( LOCK + "btsl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"+m" (ADDR) + :"Ir" (nr) : "memory"); +@@ -180,7 +175,7 @@ static inline int test_and_clear_bit(int + { + int oldbit; + +- __asm__ __volatile__( LOCK_PREFIX ++ __asm__ __volatile__( LOCK + "btrl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"+m" (ADDR) + :"Ir" (nr) : "memory"); +@@ -231,7 +226,7 @@ static inline int test_and_change_bit(in + { + int oldbit; + +- __asm__ __volatile__( LOCK_PREFIX ++ __asm__ __volatile__( LOCK + "btcl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"+m" (ADDR) + :"Ir" (nr) : "memory"); +diff -pruN ../orig-linux-2.6.16.29/include/asm-i386/futex.h ./include/asm-i386/futex.h +--- ../orig-linux-2.6.16.29/include/asm-i386/futex.h 2006-09-12 19:02:10.000000000 +0100 ++++ ./include/asm-i386/futex.h 2006-09-19 14:05:48.000000000 +0100 +@@ -28,7 +28,7 @@ + "1: movl %2, %0\n\ + movl %0, %3\n" \ + insn "\n" \ +-"2: " LOCK_PREFIX "cmpxchgl %3, %2\n\ ++"2: " LOCK "cmpxchgl %3, %2\n\ + jnz 1b\n\ + 3: .section .fixup,\"ax\"\n\ + 4: mov %5, %1\n\ +@@ -68,7 +68,7 @@ futex_atomic_op_inuser (int encoded_op, + #endif + switch (op) { + case FUTEX_OP_ADD: +- __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, ++ __futex_atomic_op1(LOCK "xaddl %0, %2", ret, + oldval, uaddr, oparg); + break; + case FUTEX_OP_OR: +diff -pruN ../orig-linux-2.6.16.29/include/asm-i386/rwsem.h ./include/asm-i386/rwsem.h +--- ../orig-linux-2.6.16.29/include/asm-i386/rwsem.h 2006-09-12 19:02:10.000000000 +0100 ++++ ./include/asm-i386/rwsem.h 2006-09-19 14:05:48.000000000 +0100 +@@ -40,6 +40,7 @@ + + #include <linux/list.h> + #include <linux/spinlock.h> ++#include <asm/smp_alt.h> + + struct rwsem_waiter; + +@@ -99,7 +100,7 @@ static inline void __down_read(struct rw + { + __asm__ __volatile__( + "# beginning down_read\n\t" +-LOCK_PREFIX " incl (%%eax)\n\t" /* adds 0x00000001, returns the old value */ ++LOCK " incl (%%eax)\n\t" /* adds 0x00000001, returns the old value */ + " js 2f\n\t" /* jump if we weren't granted the lock */ + "1:\n\t" + LOCK_SECTION_START("") +@@ -130,7 +131,7 @@ static inline int __down_read_trylock(st + " movl %1,%2\n\t" + " addl %3,%2\n\t" + " jle 2f\n\t" +-LOCK_PREFIX " cmpxchgl %2,%0\n\t" ++LOCK " cmpxchgl %2,%0\n\t" + " jnz 1b\n\t" + "2:\n\t" + "# ending __down_read_trylock\n\t" +@@ -150,7 +151,7 @@ static inline void __down_write(struct r + tmp = RWSEM_ACTIVE_WRITE_BIAS; + __asm__ __volatile__( + "# beginning down_write\n\t" +-LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */ ++LOCK " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */ + " testl %%edx,%%edx\n\t" /* was the count 0 before? */ + " jnz 2f\n\t" /* jump if we weren't granted the lock */ + "1:\n\t" +@@ -188,7 +189,7 @@ static inline void __up_read(struct rw_s + __s32 tmp = -RWSEM_ACTIVE_READ_BIAS; + __asm__ __volatile__( + "# beginning __up_read\n\t" +-LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtracts 1, returns the old value */ ++LOCK " xadd %%edx,(%%eax)\n\t" /* subtracts 1, returns the old value */ + " js 2f\n\t" /* jump if the lock is being waited upon */ + "1:\n\t" + LOCK_SECTION_START("") +@@ -214,7 +215,7 @@ static inline void __up_write(struct rw_ + __asm__ __volatile__( + "# beginning __up_write\n\t" + " movl %2,%%edx\n\t" +-LOCK_PREFIX " xaddl %%edx,(%%eax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */ ++LOCK " xaddl %%edx,(%%eax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */ + " jnz 2f\n\t" /* jump if the lock is being waited upon */ + "1:\n\t" + LOCK_SECTION_START("") +@@ -239,7 +240,7 @@ static inline void __downgrade_write(str + { + __asm__ __volatile__( + "# beginning __downgrade_write\n\t" +-LOCK_PREFIX " addl %2,(%%eax)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */ ++LOCK " addl %2,(%%eax)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */ + " js 2f\n\t" /* jump if the lock is being waited upon */ + "1:\n\t" + LOCK_SECTION_START("") +@@ -263,7 +264,7 @@ LOCK_PREFIX " addl %2,(%%eax)\n\t" + static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) + { + __asm__ __volatile__( +-LOCK_PREFIX "addl %1,%0" ++LOCK "addl %1,%0" + : "=m"(sem->count) + : "ir"(delta), "m"(sem->count)); + } +@@ -276,7 +277,7 @@ static inline int rwsem_atomic_update(in + int tmp = delta; + + __asm__ __volatile__( +-LOCK_PREFIX "xadd %0,(%2)" ++LOCK "xadd %0,(%2)" + : "+r"(tmp), "=m"(sem->count) + : "r"(sem), "m"(sem->count) + : "memory"); +diff -pruN ../orig-linux-2.6.16.29/include/asm-i386/smp_alt.h ./include/asm-i386/smp_alt.h +--- ../orig-linux-2.6.16.29/include/asm-i386/smp_alt.h 1970-01-01 01:00:00.000000000 +0100 ++++ ./include/asm-i386/smp_alt.h 2006-09-19 14:05:48.000000000 +0100 +@@ -0,0 +1,32 @@ ++#ifndef __ASM_SMP_ALT_H__ ++#define __ASM_SMP_ALT_H__ ++ ++#include <linux/config.h> ++ ++#ifdef CONFIG_SMP ++#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE) ++#define LOCK \ ++ "6677: nop\n" \ ++ ".section __smp_alternatives,\"a\"\n" \ ++ ".long 6677b\n" \ ++ ".long 6678f\n" \ ++ ".previous\n" \ ++ ".section __smp_replacements,\"a\"\n" \ ++ "6678: .byte 1\n" \ ++ ".byte 1\n" \ ++ ".byte 0\n" \ ++ ".byte 1\n" \ ++ ".byte -1\n" \ ++ "lock\n" \ ++ "nop\n" \ ++ ".previous\n" ++void prepare_for_smp(void); ++void unprepare_for_smp(void); ++#else ++#define LOCK "lock ; " ++#endif ++#else ++#define LOCK "" ++#endif ++ ++#endif /* __ASM_SMP_ALT_H__ */ +diff -pruN ../orig-linux-2.6.16.29/include/asm-i386/spinlock.h ./include/asm-i386/spinlock.h +--- ../orig-linux-2.6.16.29/include/asm-i386/spinlock.h 2006-09-12 19:02:10.000000000 +0100 ++++ ./include/asm-i386/spinlock.h 2006-09-19 14:05:48.000000000 +0100 +@@ -6,6 +6,7 @@ + #include <asm/page.h> + #include <linux/config.h> + #include <linux/compiler.h> ++#include <asm/smp_alt.h> + + /* + * Your basic SMP spinlocks, allowing only a single CPU anywhere +@@ -23,7 +24,8 @@ + + #define __raw_spin_lock_string \ + "\n1:\t" \ +- "lock ; decb %0\n\t" \ ++ LOCK \ ++ "decb %0\n\t" \ + "jns 3f\n" \ + "2:\t" \ + "rep;nop\n\t" \ +@@ -34,7 +36,8 @@ + + #define __raw_spin_lock_string_flags \ + "\n1:\t" \ +- "lock ; decb %0\n\t" \ ++ LOCK \ ++ "decb %0\n\t" \ + "jns 4f\n\t" \ + "2:\t" \ + "testl $0x200, %1\n\t" \ +@@ -65,10 +68,34 @@ static inline void __raw_spin_lock_flags + static inline int __raw_spin_trylock(raw_spinlock_t *lock) + { + char oldval; ++#ifdef CONFIG_SMP_ALTERNATIVES + __asm__ __volatile__( +- "xchgb %b0,%1" ++ "1:movb %1,%b0\n" ++ "movb $0,%1\n" ++ "2:" ++ ".section __smp_alternatives,\"a\"\n" ++ ".long 1b\n" ++ ".long 3f\n" ++ ".previous\n" ++ ".section __smp_replacements,\"a\"\n" ++ "3: .byte 2b - 1b\n" ++ ".byte 5f-4f\n" ++ ".byte 0\n" ++ ".byte 6f-5f\n" ++ ".byte -1\n" ++ "4: xchgb %b0,%1\n" ++ "5: movb %1,%b0\n" ++ "movb $0,%1\n" ++ "6:\n" ++ ".previous\n" + :"=q" (oldval), "=m" (lock->slock) + :"0" (0) : "memory"); ++#else ++ __asm__ __volatile__( ++ "xchgb %b0,%1\n" ++ :"=q" (oldval), "=m" (lock->slock) ++ :"0" (0) : "memory"); ++#endif + return oldval > 0; + } + +@@ -178,12 +205,12 @@ static inline int __raw_write_trylock(ra + + static inline void __raw_read_unlock(raw_rwlock_t *rw) + { +- asm volatile("lock ; incl %0" :"=m" (rw->lock) : : "memory"); ++ asm volatile(LOCK "incl %0" :"=m" (rw->lock) : : "memory"); + } + + static inline void __raw_write_unlock(raw_rwlock_t *rw) + { +- asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ", %0" ++ asm volatile(LOCK "addl $" RW_LOCK_BIAS_STR ", %0" + : "=m" (rw->lock) : : "memory"); + } + +diff -pruN ../orig-linux-2.6.16.29/include/asm-i386/system.h ./include/asm-i386/system.h +--- ../orig-linux-2.6.16.29/include/asm-i386/system.h 2006-09-12 19:02:10.000000000 +0100 ++++ ./include/asm-i386/system.h 2006-09-19 14:05:48.000000000 +0100 +@@ -5,7 +5,7 @@ + #include <linux/kernel.h> + #include <asm/segment.h> + #include <asm/cpufeature.h> +-#include <linux/bitops.h> /* for LOCK_PREFIX */ ++#include <asm/smp_alt.h> + + #ifdef __KERNEL__ + +@@ -271,19 +271,19 @@ static inline unsigned long __cmpxchg(vo + unsigned long prev; + switch (size) { + case 1: +- __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2" ++ __asm__ __volatile__(LOCK "cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 2: +- __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2" ++ __asm__ __volatile__(LOCK "cmpxchgw %w1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 4: +- __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2" ++ __asm__ __volatile__(LOCK "cmpxchgl %1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); +@@ -336,7 +336,7 @@ static inline unsigned long long __cmpxc + unsigned long long new) + { + unsigned long long prev; +- __asm__ __volatile__(LOCK_PREFIX "cmpxchg8b %3" ++ __asm__ __volatile__(LOCK "cmpxchg8b %3" + : "=A"(prev) + : "b"((unsigned long)new), + "c"((unsigned long)(new >> 32)), +@@ -503,11 +503,55 @@ struct alt_instr { + #endif + + #ifdef CONFIG_SMP ++#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE) ++#define smp_alt_mb(instr) \ ++__asm__ __volatile__("6667:\nnop\nnop\nnop\nnop\nnop\nnop\n6668:\n" \ ++ ".section __smp_alternatives,\"a\"\n" \ ++ ".long 6667b\n" \ ++ ".long 6673f\n" \ ++ ".previous\n" \ ++ ".section __smp_replacements,\"a\"\n" \ ++ "6673:.byte 6668b-6667b\n" \ ++ ".byte 6670f-6669f\n" \ ++ ".byte 6671f-6670f\n" \ ++ ".byte 0\n" \ ++ ".byte %c0\n" \ ++ "6669:lock;addl $0,0(%%esp)\n" \ ++ "6670:" instr "\n" \ ++ "6671:\n" \ ++ ".previous\n" \ ++ : \ ++ : "i" (X86_FEATURE_XMM2) \ ++ : "memory") ++#define smp_rmb() smp_alt_mb("lfence") ++#define smp_mb() smp_alt_mb("mfence") ++#define set_mb(var, value) do { \ ++unsigned long __set_mb_temp; \ ++__asm__ __volatile__("6667:movl %1, %0\n6668:\n" \ ++ ".section __smp_alternatives,\"a\"\n" \ ++ ".long 6667b\n" \ ++ ".long 6673f\n" \ ++ ".previous\n" \ ++ ".section __smp_replacements,\"a\"\n" \ ++ "6673: .byte 6668b-6667b\n" \ ++ ".byte 6670f-6669f\n" \ ++ ".byte 0\n" \ ++ ".byte 6671f-6670f\n" \ ++ ".byte -1\n" \ ++ "6669: xchg %1, %0\n" \ ++ "6670:movl %1, %0\n" \ ++ "6671:\n" \ ++ ".previous\n" \ ++ : "=m" (var), "=r" (__set_mb_temp) \ ++ : "1" (value) \ ++ : "memory"); } while (0) ++#else + #define smp_mb() mb() + #define smp_rmb() rmb() ++#define set_mb(var, value) do { (void) xchg(&var, value); } while (0) ++#endif + #define smp_wmb() wmb() + #define smp_read_barrier_depends() read_barrier_depends() +-#define set_mb(var, value) do { (void) xchg(&var, value); } while (0) + #else + #define smp_mb() barrier() + #define smp_rmb() barrier() diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.32/tpm_plugin_2.6.17.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.32/tpm_plugin_2.6.17.patch Mon Nov 27 13:50:02 2006 +0000 @@ -0,0 +1,1545 @@ +diff -pruN ../orig-linux-2.6.16.29/drivers/char/tpm/tpm_atmel.c ./drivers/char/tpm/tpm_atmel.c +--- ../orig-linux-2.6.16.29/drivers/char/tpm/tpm_atmel.c 2006-09-12 19:02:10.000000000 +0100 ++++ ./drivers/char/tpm/tpm_atmel.c 2006-09-19 14:05:52.000000000 +0100 +@@ -47,12 +47,12 @@ static int tpm_atml_recv(struct tpm_chip + return -EIO; + + for (i = 0; i < 6; i++) { +- status = ioread8(chip->vendor->iobase + 1); ++ status = ioread8(chip->vendor.iobase + 1); + if ((status & ATML_STATUS_DATA_AVAIL) == 0) { + dev_err(chip->dev, "error reading header\n"); + return -EIO; + } +- *buf++ = ioread8(chip->vendor->iobase); ++ *buf++ = ioread8(chip->vendor.iobase); + } + + /* size of the data received */ +@@ -63,7 +63,7 @@ static int tpm_atml_recv(struct tpm_chip + dev_err(chip->dev, + "Recv size(%d) less than available space\n", size); + for (; i < size; i++) { /* clear the waiting data anyway */ +- status = ioread8(chip->vendor->iobase + 1); ++ status = ioread8(chip->vendor.iobase + 1); + if ((status & ATML_STATUS_DATA_AVAIL) == 0) { + dev_err(chip->dev, "error reading data\n"); + return -EIO; +@@ -74,16 +74,16 @@ static int tpm_atml_recv(struct tpm_chip + + /* read all the data available */ + for (; i < size; i++) { +- status = ioread8(chip->vendor->iobase + 1); ++ status = ioread8(chip->vendor.iobase + 1); + if ((status & ATML_STATUS_DATA_AVAIL) == 0) { + dev_err(chip->dev, "error reading data\n"); + return -EIO; + } +- *buf++ = ioread8(chip->vendor->iobase); ++ *buf++ = ioread8(chip->vendor.iobase); + } + + /* make sure data available is gone */ +- status = ioread8(chip->vendor->iobase + 1); ++ status = ioread8(chip->vendor.iobase + 1); + + if (status & ATML_STATUS_DATA_AVAIL) { + dev_err(chip->dev, "data available is stuck\n"); +@@ -100,7 +100,7 @@ static int tpm_atml_send(struct tpm_chip + dev_dbg(chip->dev, "tpm_atml_send:\n"); + for (i = 0; i < count; i++) { + dev_dbg(chip->dev, "%d 0x%x(%d)\n", i, buf[i], buf[i]); +- iowrite8(buf[i], chip->vendor->iobase); ++ iowrite8(buf[i], chip->vendor.iobase); + } + + return count; +@@ -108,12 +108,12 @@ static int tpm_atml_send(struct tpm_chip + + static void tpm_atml_cancel(struct tpm_chip *chip) + { +- iowrite8(ATML_STATUS_ABORT, chip->vendor->iobase + 1); ++ iowrite8(ATML_STATUS_ABORT, chip->vendor.iobase + 1); + } + + static u8 tpm_atml_status(struct tpm_chip *chip) + { +- return ioread8(chip->vendor->iobase + 1); ++ return ioread8(chip->vendor.iobase + 1); + } + + static struct file_operations atmel_ops = { +@@ -140,7 +140,7 @@ static struct attribute* atmel_attrs[] = + + static struct attribute_group atmel_attr_grp = { .attrs = atmel_attrs }; + +-static struct tpm_vendor_specific tpm_atmel = { ++static const struct tpm_vendor_specific tpm_atmel = { + .recv = tpm_atml_recv, + .send = tpm_atml_send, + .cancel = tpm_atml_cancel, +@@ -159,10 +159,10 @@ static void atml_plat_remove(void) + struct tpm_chip *chip = dev_get_drvdata(&pdev->dev); + + if (chip) { +- if (chip->vendor->have_region) +- atmel_release_region(chip->vendor->base, +- chip->vendor->region_size); +- atmel_put_base_addr(chip->vendor); ++ if (chip->vendor.have_region) ++ atmel_release_region(chip->vendor.base, ++ chip->vendor.region_size); ++ atmel_put_base_addr(chip->vendor.iobase); + tpm_remove_hardware(chip->dev); + platform_device_unregister(pdev); + } +@@ -179,18 +179,22 @@ static struct device_driver atml_drv = { + static int __init init_atmel(void) + { + int rc = 0; ++ void __iomem *iobase = NULL; ++ int have_region, region_size; ++ unsigned long base; ++ struct tpm_chip *chip; + + driver_register(&atml_drv); + +- if ((tpm_atmel.iobase = atmel_get_base_addr(&tpm_atmel)) == NULL) { ++ if ((iobase = atmel_get_base_addr(&base, ®ion_size)) == NULL) { + rc = -ENODEV; + goto err_unreg_drv; + } + +- tpm_atmel.have_region = ++ have_region = + (atmel_request_region +- (tpm_atmel.base, tpm_atmel.region_size, +- "tpm_atmel0") == NULL) ? 0 : 1; ++ (tpm_atmel.base, region_size, "tpm_atmel0") == NULL) ? 0 : 1; ++ + + if (IS_ERR + (pdev = +@@ -199,17 +203,25 @@ static int __init init_atmel(void) + goto err_rel_reg; + } + +- if ((rc = tpm_register_hardware(&pdev->dev, &tpm_atmel)) < 0) ++ if (!(chip = tpm_register_hardware(&pdev->dev, &tpm_atmel))) { ++ rc = -ENODEV; + goto err_unreg_dev; ++ } ++ ++ chip->vendor.iobase = iobase; ++ chip->vendor.base = base; ++ chip->vendor.have_region = have_region; ++ chip->vendor.region_size = region_size; ++ + return 0; + + err_unreg_dev: + platform_device_unregister(pdev); + err_rel_reg: +- atmel_put_base_addr(&tpm_atmel); +- if (tpm_atmel.have_region) +- atmel_release_region(tpm_atmel.base, +- tpm_atmel.region_size); ++ atmel_put_base_addr(iobase); ++ if (have_region) ++ atmel_release_region(base, ++ region_size); + err_unreg_drv: + driver_unregister(&atml_drv); + return rc; +diff -pruN ../orig-linux-2.6.16.29/drivers/char/tpm/tpm_atmel.h ./drivers/char/tpm/tpm_atmel.h +--- ../orig-linux-2.6.16.29/drivers/char/tpm/tpm_atmel.h 2006-09-12 19:02:10.000000000 +0100 ++++ ./drivers/char/tpm/tpm_atmel.h 2006-09-19 14:05:52.000000000 +0100 +@@ -28,13 +28,12 @@ + #define atmel_request_region request_mem_region + #define atmel_release_region release_mem_region + +-static inline void atmel_put_base_addr(struct tpm_vendor_specific +- *vendor) ++static inline void atmel_put_base_addr(void __iomem *iobase) + { +- iounmap(vendor->iobase); ++ iounmap(iobase); + } + +-static void __iomem * atmel_get_base_addr(struct tpm_vendor_specific *vendor) ++static void __iomem * atmel_get_base_addr(unsigned long *base, int *region_size) + { + struct device_node *dn; + unsigned long address, size; +@@ -71,9 +70,9 @@ static void __iomem * atmel_get_base_add + else + size = reg[naddrc]; + +- vendor->base = address; +- vendor->region_size = size; +- return ioremap(vendor->base, vendor->region_size); ++ *base = address; ++ *region_size = size; ++ return ioremap(*base, *region_size); + } + #else + #define atmel_getb(chip, offset) inb(chip->vendor->base + offset) +@@ -106,14 +105,12 @@ static int atmel_verify_tpm11(void) + return 0; + } + +-static inline void atmel_put_base_addr(struct tpm_vendor_specific +- *vendor) ++static inline void atmel_put_base_addr(void __iomem *iobase) + { + } + + /* Determine where to talk to device */ +-static void __iomem * atmel_get_base_addr(struct tpm_vendor_specific +- *vendor) ++static void __iomem * atmel_get_base_addr(unsigned long *base, int *region_size) + { + int lo, hi; + +@@ -123,9 +120,9 @@ static void __iomem * atmel_get_base_add + lo = tpm_read_index(TPM_ADDR, TPM_ATMEL_BASE_ADDR_LO); + hi = tpm_read_index(TPM_ADDR, TPM_ATMEL_BASE_ADDR_HI); + +- vendor->base = (hi << 8) | lo; +- vendor->region_size = 2; ++ *base = (hi << 8) | lo; ++ *region_size = 2; + +- return ioport_map(vendor->base, vendor->region_size); ++ return ioport_map(*base, *region_size); + } + #endif +diff -pruN ../orig-linux-2.6.16.29/drivers/char/tpm/tpm_bios.c ./drivers/char/tpm/tpm_bios.c +--- ../orig-linux-2.6.16.29/drivers/char/tpm/tpm_bios.c 2006-09-12 19:02:10.000000000 +0100 ++++ ./drivers/char/tpm/tpm_bios.c 2006-09-19 14:05:52.000000000 +0100 +@@ -29,6 +29,11 @@ + #define MAX_TEXT_EVENT 1000 /* Max event string length */ + #define ACPI_TCPA_SIG "TCPA" /* 0x41504354 /'TCPA' */ + ++enum bios_platform_class { ++ BIOS_CLIENT = 0x00, ++ BIOS_SERVER = 0x01, ++}; ++ + struct tpm_bios_log { + void *bios_event_log; + void *bios_event_log_end; +@@ -36,9 +41,18 @@ struct tpm_bios_log { + + struct acpi_tcpa { + struct acpi_table_header hdr; +- u16 reserved; +- u32 log_max_len __attribute__ ((packed)); +- u32 log_start_addr __attribute__ ((packed)); ++ u16 platform_class; ++ union { ++ struct client_hdr { ++ u32 log_max_len __attribute__ ((packed)); ++ u64 log_start_addr __attribute__ ((packed)); ++ } client; ++ struct server_hdr { ++ u16 reserved; ++ u64 log_max_len __attribute__ ((packed)); ++ u64 log_start_addr __attribute__ ((packed)); ++ } server; ++ }; + }; + + struct tcpa_event { +@@ -91,6 +105,12 @@ static const char* tcpa_event_type_strin + "Non-Host Info" + }; + ++struct tcpa_pc_event { ++ u32 event_id; ++ u32 event_size; ++ u8 event_data[0]; ++}; ++ + enum tcpa_pc_event_ids { + SMBIOS = 1, + BIS_CERT, +@@ -100,14 +120,15 @@ enum tcpa_pc_event_ids { + NVRAM, + OPTION_ROM_EXEC, + OPTION_ROM_CONFIG, +- OPTION_ROM_MICROCODE, ++ OPTION_ROM_MICROCODE = 10, + S_CRTM_VERSION, + S_CRTM_CONTENTS, + POST_CONTENTS, ++ HOST_TABLE_OF_DEVICES, + }; + + static const char* tcpa_pc_event_id_strings[] = { +- "" ++ "", + "SMBIOS", + "BIS Certificate", + "POST BIOS ", +@@ -116,10 +137,12 @@ static const char* tcpa_pc_event_id_stri + "NVRAM", + "Option ROM", + "Option ROM config", +- "Option ROM microcode", ++ "", ++ "Option ROM microcode ", + "S-CRTM Version", +- "S-CRTM Contents", +- "S-CRTM POST Contents", ++ "S-CRTM Contents ", ++ "POST Contents ", ++ "Table of Devices", + }; + + /* returns pointer to start of pos. entry of tcg log */ +@@ -191,7 +214,7 @@ static int get_event_name(char *dest, st + const char *name = ""; + char data[40] = ""; + int i, n_len = 0, d_len = 0; +- u32 event_id; ++ struct tcpa_pc_event *pc_event; + + switch(event->event_type) { + case PREBOOT: +@@ -220,31 +243,32 @@ static int get_event_name(char *dest, st + } + break; + case EVENT_TAG: +- event_id = be32_to_cpu(*((u32 *)event_entry)); ++ pc_event = (struct tcpa_pc_event *)event_entry; + + /* ToDo Row data -> Base64 */ + +- switch (event_id) { ++ switch (pc_event->event_id) { + case SMBIOS: + case BIS_CERT: + case CMOS: + case NVRAM: + case OPTION_ROM_EXEC: + case OPTION_ROM_CONFIG: +- case OPTION_ROM_MICROCODE: + case S_CRTM_VERSION: +- case S_CRTM_CONTENTS: +- case POST_CONTENTS: +- name = tcpa_pc_event_id_strings[event_id]; ++ name = tcpa_pc_event_id_strings[pc_event->event_id]; + n_len = strlen(name); + break; ++ /* hash data */ + case POST_BIOS_ROM: + case ESCD: +- name = tcpa_pc_event_id_strings[event_id]; ++ case OPTION_ROM_MICROCODE: ++ case S_CRTM_CONTENTS: ++ case POST_CONTENTS: ++ name = tcpa_pc_event_id_strings[pc_event->event_id]; + n_len = strlen(name); + for (i = 0; i < 20; i++) +- d_len += sprintf(data, "%02x", +- event_entry[8 + i]); ++ d_len += sprintf(&data[2*i], "%02x", ++ pc_event->event_data[i]); + break; + default: + break; +@@ -260,52 +284,13 @@ static int get_event_name(char *dest, st + + static int tpm_binary_bios_measurements_show(struct seq_file *m, void *v) + { ++ struct tcpa_event *event = v; ++ char *data = v; ++ int i; + +- char *eventname; +- char data[4]; +- u32 help; +- int i, len; +- struct tcpa_event *event = (struct tcpa_event *) v; +- unsigned char *event_entry = +- (unsigned char *) (v + sizeof(struct tcpa_event)); +- +- eventname = kmalloc(MAX_TEXT_EVENT, GFP_KERNEL); +- if (!eventname) { +- printk(KERN_ERR "%s: ERROR - No Memory for event name\n ", +- __func__); +- return -ENOMEM; +- } +- +- /* 1st: PCR used is in little-endian format (4 bytes) */ +- help = le32_to_cpu(event->pcr_index); +- memcpy(data, &help, 4); +- for (i = 0; i < 4; i++) +- seq_putc(m, data[i]); +- +- /* 2nd: SHA1 (20 bytes) */ +- for (i = 0; i < 20; i++) +- seq_putc(m, event->pcr_value[i]); +- +- /* 3rd: event type identifier (4 bytes) */ +- help = le32_to_cpu(event->event_type); +- memcpy(data, &help, 4); +- for (i = 0; i < 4; i++) ++ for (i = 0; i < sizeof(struct tcpa_event) + event->event_size; i++) + seq_putc(m, data[i]); + +- len = 0; +- +- len += get_event_name(eventname, event, event_entry); +- +- /* 4th: filename <= 255 + \'0' delimiter */ +- if (len > TCG_EVENT_NAME_LEN_MAX) +- len = TCG_EVENT_NAME_LEN_MAX; +- +- for (i = 0; i < len; i++) +- seq_putc(m, eventname[i]); +- +- /* 5th: delimiter */ +- seq_putc(m, '\0'); +- + return 0; + } + +@@ -353,6 +338,7 @@ static int tpm_ascii_bios_measurements_s + /* 4th: eventname <= max + \'0' delimiter */ + seq_printf(m, " %s\n", eventname); + ++ kfree(eventname); + return 0; + } + +@@ -376,6 +362,7 @@ static int read_log(struct tpm_bios_log + struct acpi_tcpa *buff; + acpi_status status; + struct acpi_table_header *virt; ++ u64 len, start; + + if (log->bios_event_log != NULL) { + printk(KERN_ERR +@@ -396,27 +383,37 @@ static int read_log(struct tpm_bios_log + return -EIO; + } + +- if (buff->log_max_len == 0) { ++ switch(buff->platform_class) { ++ case BIOS_SERVER: ++ len = buff->server.log_max_len; ++ start = buff->server.log_start_addr; ++ break; ++ case BIOS_CLIENT: ++ default: ++ len = buff->client.log_max_len; ++ start = buff->client.log_start_addr; ++ break; ++ } ++ if (!len) { + printk(KERN_ERR "%s: ERROR - TCPA log area empty\n", __func__); + return -EIO; + } + + /* malloc EventLog space */ +- log->bios_event_log = kmalloc(buff->log_max_len, GFP_KERNEL); ++ log->bios_event_log = kmalloc(len, GFP_KERNEL); + if (!log->bios_event_log) { +- printk +- ("%s: ERROR - Not enough Memory for BIOS measurements\n", +- __func__); ++ printk("%s: ERROR - Not enough Memory for BIOS measurements\n", ++ __func__); + return -ENOMEM; + } + +- log->bios_event_log_end = log->bios_event_log + buff->log_max_len; ++ log->bios_event_log_end = log->bios_event_log + len; + +- acpi_os_map_memory(buff->log_start_addr, buff->log_max_len, (void *) &virt); ++ acpi_os_map_memory(start, len, (void *) &virt); + +- memcpy(log->bios_event_log, virt, buff->log_max_len); ++ memcpy(log->bios_event_log, virt, len); + +- acpi_os_unmap_memory(virt, buff->log_max_len); ++ acpi_os_unmap_memory(virt, len); + return 0; + } + +diff -pruN ../orig-linux-2.6.16.29/drivers/char/tpm/tpm_infineon.c ./drivers/char/tpm/tpm_infineon.c +--- ../orig-linux-2.6.16.29/drivers/char/tpm/tpm_infineon.c 2006-09-12 19:02:10.000000000 +0100 ++++ ./drivers/char/tpm/tpm_infineon.c 2006-09-19 14:05:52.000000000 +0100 +@@ -15,6 +15,7 @@ + * License. + */ + ++#include <linux/init.h> + #include <linux/pnp.h> + #include "tpm.h" + +@@ -104,7 +105,7 @@ static int empty_fifo(struct tpm_chip *c + + if (clear_wrfifo) { + for (i = 0; i < 4096; i++) { +- status = inb(chip->vendor->base + WRFIFO); ++ status = inb(chip->vendor.base + WRFIFO); + if (status == 0xff) { + if (check == 5) + break; +@@ -124,8 +125,8 @@ static int empty_fifo(struct tpm_chip *c + */ + i = 0; + do { +- status = inb(chip->vendor->base + RDFIFO); +- status = inb(chip->vendor->base + STAT); ++ status = inb(chip->vendor.base + RDFIFO); ++ status = inb(chip->vendor.base + STAT); + i++; + if (i == TPM_MAX_TRIES) + return -EIO; +@@ -138,7 +139,7 @@ static int wait(struct tpm_chip *chip, i + int status; + int i; + for (i = 0; i < TPM_MAX_TRIES; i++) { +- status = inb(chip->vendor->base + STAT); ++ status = inb(chip->vendor.base + STAT); + /* check the status-register if wait_for_bit is set */ + if (status & 1 << wait_for_bit) + break; +@@ -157,7 +158,7 @@ static int wait(struct tpm_chip *chip, i + static void wait_and_send(struct tpm_chip *chip, u8 sendbyte) + { + wait(chip, STAT_XFE); +- outb(sendbyte, chip->vendor->base + WRFIFO); ++ outb(sendbyte, chip->vendor.base + WRFIFO); + } + + /* Note: WTX means Waiting-Time-Extension. Whenever the TPM needs more +@@ -204,7 +205,7 @@ recv_begin: + ret = wait(chip, STAT_RDA); + if (ret) + return -EIO; +- buf[i] = inb(chip->vendor->base + RDFIFO); ++ buf[i] = inb(chip->vendor.base + RDFIFO); + } + + if (buf[0] != TPM_VL_VER) { +@@ -219,7 +220,7 @@ recv_begin: + + for (i = 0; i < size; i++) { + wait(chip, STAT_RDA); +- buf[i] = inb(chip->vendor->base + RDFIFO); ++ buf[i] = inb(chip->vendor.base + RDFIFO); + } + + if ((size == 0x6D00) && (buf[1] == 0x80)) { +@@ -268,7 +269,7 @@ static int tpm_inf_send(struct tpm_chip + u8 count_high, count_low, count_4, count_3, count_2, count_1; + + /* Disabling Reset, LP and IRQC */ +- outb(RESET_LP_IRQC_DISABLE, chip->vendor->base + CMD); ++ outb(RESET_LP_IRQC_DISABLE, chip->vendor.base + CMD); + + ret = empty_fifo(chip, 1); + if (ret) { +@@ -319,7 +320,7 @@ static void tpm_inf_cancel(struct tpm_ch + + static u8 tpm_inf_status(struct tpm_chip *chip) + { +- return inb(chip->vendor->base + STAT); ++ return inb(chip->vendor.base + STAT); + } + + static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL); +@@ -346,7 +347,7 @@ static struct file_operations inf_ops = + .release = tpm_release, + }; + +-static struct tpm_vendor_specific tpm_inf = { ++static const struct tpm_vendor_specific tpm_inf = { + .recv = tpm_inf_recv, + .send = tpm_inf_send, + .cancel = tpm_inf_cancel, +@@ -375,6 +376,7 @@ static int __devinit tpm_inf_pnp_probe(s + int version[2]; + int productid[2]; + char chipname[20]; ++ struct tpm_chip *chip; + + /* read IO-ports through PnP */ + if (pnp_port_valid(dev, 0) && pnp_port_valid(dev, 1) && +@@ -395,14 +397,13 @@ static int __devinit tpm_inf_pnp_probe(s + goto err_last; + } + /* publish my base address and request region */ +- tpm_inf.base = TPM_INF_BASE; + if (request_region +- (tpm_inf.base, TPM_INF_PORT_LEN, "tpm_infineon0") == NULL) { ++ (TPM_INF_BASE, TPM_INF_PORT_LEN, "tpm_infineon0") == NULL) { + rc = -EINVAL; + goto err_last; + } +- if (request_region(TPM_INF_ADDR, TPM_INF_ADDR_LEN, +- "tpm_infineon0") == NULL) { ++ if (request_region ++ (TPM_INF_ADDR, TPM_INF_ADDR_LEN, "tpm_infineon0") == NULL) { + rc = -EINVAL; + goto err_last; + } +@@ -442,9 +443,9 @@ static int __devinit tpm_inf_pnp_probe(s + + /* configure TPM with IO-ports */ + outb(IOLIMH, TPM_INF_ADDR); +- outb(((tpm_inf.base >> 8) & 0xff), TPM_INF_DATA); ++ outb(((TPM_INF_BASE >> 8) & 0xff), TPM_INF_DATA); + outb(IOLIML, TPM_INF_ADDR); +- outb((tpm_inf.base & 0xff), TPM_INF_DATA); ++ outb((TPM_INF_BASE & 0xff), TPM_INF_DATA); + + /* control if IO-ports are set correctly */ + outb(IOLIMH, TPM_INF_ADDR); +@@ -452,10 +453,10 @@ static int __devinit tpm_inf_pnp_probe(s + outb(IOLIML, TPM_INF_ADDR); + iol = inb(TPM_INF_DATA); + +- if ((ioh << 8 | iol) != tpm_inf.base) { ++ if ((ioh << 8 | iol) != TPM_INF_BASE) { + dev_err(&dev->dev, +- "Could not set IO-ports to 0x%lx\n", +- tpm_inf.base); ++ "Could not set IO-ports to 0x%x\n", ++ TPM_INF_BASE); + rc = -EIO; + goto err_release_region; + } +@@ -466,15 +467,15 @@ static int __devinit tpm_inf_pnp_probe(s + outb(DISABLE_REGISTER_PAIR, TPM_INF_ADDR); + + /* disable RESET, LP and IRQC */ +- outb(RESET_LP_IRQC_DISABLE, tpm_inf.base + CMD); ++ outb(RESET_LP_IRQC_DISABLE, TPM_INF_BASE + CMD); + + /* Finally, we're done, print some infos */ + dev_info(&dev->dev, "TPM found: " + "config base 0x%x, " + "io base 0x%x, " +- "chip version %02x%02x, " +- "vendor id %x%x (Infineon), " +- "product id %02x%02x" ++ "chip version 0x%02x%02x, " ++ "vendor id 0x%x%x (Infineon), " ++ "product id 0x%02x%02x" + "%s\n", + TPM_INF_ADDR, + TPM_INF_BASE, +@@ -482,11 +483,10 @@ static int __devinit tpm_inf_pnp_probe(s + vendorid[0], vendorid[1], + productid[0], productid[1], chipname); + +- rc = tpm_register_hardware(&dev->dev, &tpm_inf); +- if (rc < 0) { +- rc = -ENODEV; ++ if (!(chip = tpm_register_hardware(&dev->dev, &tpm_inf))) { + goto err_release_region; + } ++ chip->vendor.base = TPM_INF_BASE; + return 0; + } else { + rc = -ENODEV; +@@ -494,7 +494,7 @@ static int __devinit tpm_inf_pnp_probe(s + } + + err_release_region: +- release_region(tpm_inf.base, TPM_INF_PORT_LEN); ++ release_region(TPM_INF_BASE, TPM_INF_PORT_LEN); + release_region(TPM_INF_ADDR, TPM_INF_ADDR_LEN); + + err_last: +@@ -506,7 +506,8 @@ static __devexit void tpm_inf_pnp_remove + struct tpm_chip *chip = pnp_get_drvdata(dev); + + if (chip) { +- release_region(chip->vendor->base, TPM_INF_PORT_LEN); ++ release_region(TPM_INF_BASE, TPM_INF_PORT_LEN); ++ release_region(TPM_INF_ADDR, TPM_INF_ADDR_LEN); + tpm_remove_hardware(chip->dev); + } + } +@@ -520,7 +521,7 @@ static struct pnp_driver tpm_inf_pnp = { + }, + .id_table = tpm_pnp_tbl, + .probe = tpm_inf_pnp_probe, +- .remove = tpm_inf_pnp_remove, ++ .remove = __devexit_p(tpm_inf_pnp_remove), + }; + + static int __init init_inf(void) +@@ -538,5 +539,5 @@ module_exit(cleanup_inf); + + MODULE_AUTHOR("Marcel Selhorst <selhorst@xxxxxxxxxxxxx>"); + MODULE_DESCRIPTION("Driver for Infineon TPM SLD 9630 TT 1.1 / SLB 9635 TT 1.2"); +-MODULE_VERSION("1.7"); ++MODULE_VERSION("1.8"); + MODULE_LICENSE("GPL"); +diff -pruN ../orig-linux-2.6.16.29/drivers/char/tpm/tpm_nsc.c ./drivers/char/tpm/tpm_nsc.c +--- ../orig-linux-2.6.16.29/drivers/char/tpm/tpm_nsc.c 2006-09-12 19:02:10.000000000 +0100 ++++ ./drivers/char/tpm/tpm_nsc.c 2006-09-19 14:05:52.000000000 +0100 +@@ -71,7 +71,7 @@ static int wait_for_stat(struct tpm_chip + unsigned long stop; + + /* status immediately available check */ +- *data = inb(chip->vendor->base + NSC_STATUS); ++ *data = inb(chip->vendor.base + NSC_STATUS); + if ((*data & mask) == val) + return 0; + +@@ -79,7 +79,7 @@ static int wait_for_stat(struct tpm_chip + stop = jiffies + 10 * HZ; + do { + msleep(TPM_TIMEOUT); +- *data = inb(chip->vendor->base + 1); ++ *data = inb(chip->vendor.base + 1); + if ((*data & mask) == val) + return 0; + } +@@ -94,9 +94,9 @@ static int nsc_wait_for_ready(struct tpm + unsigned long stop; + + /* status immediately available check */ +- status = inb(chip->vendor->base + NSC_STATUS); ++ status = inb(chip->vendor.base + NSC_STATUS); + if (status & NSC_STATUS_OBF) +- status = inb(chip->vendor->base + NSC_DATA); ++ status = inb(chip->vendor.base + NSC_DATA); + if (status & NSC_STATUS_RDY) + return 0; + +@@ -104,9 +104,9 @@ static int nsc_wait_for_ready(struct tpm + stop = jiffies + 100; + do { + msleep(TPM_TIMEOUT); +- status = inb(chip->vendor->base + NSC_STATUS); ++ status = inb(chip->vendor.base + NSC_STATUS); + if (status & NSC_STATUS_OBF) +- status = inb(chip->vendor->base + NSC_DATA); ++ status = inb(chip->vendor.base + NSC_DATA); + if (status & NSC_STATUS_RDY) + return 0; + } +@@ -132,7 +132,7 @@ static int tpm_nsc_recv(struct tpm_chip + return -EIO; + } + if ((data = +- inb(chip->vendor->base + NSC_DATA)) != NSC_COMMAND_NORMAL) { ++ inb(chip->vendor.base + NSC_DATA)) != NSC_COMMAND_NORMAL) { + dev_err(chip->dev, "not in normal mode (0x%x)\n", + data); + return -EIO; +@@ -148,7 +148,7 @@ static int tpm_nsc_recv(struct tpm_chip + } + if (data & NSC_STATUS_F0) + break; +- *p = inb(chip->vendor->base + NSC_DATA); ++ *p = inb(chip->vendor.base + NSC_DATA); + } + + if ((data & NSC_STATUS_F0) == 0 && +@@ -156,7 +156,7 @@ static int tpm_nsc_recv(struct tpm_chip + dev_err(chip->dev, "F0 not set\n"); + return -EIO; + } +- if ((data = inb(chip->vendor->base + NSC_DATA)) != NSC_COMMAND_EOC) { ++ if ((data = inb(chip->vendor.base + NSC_DATA)) != NSC_COMMAND_EOC) { + dev_err(chip->dev, + "expected end of command(0x%x)\n", data); + return -EIO; +@@ -182,7 +182,7 @@ static int tpm_nsc_send(struct tpm_chip + * fix it. Not sure why this is needed, we followed the flow + * chart in the manual to the letter. + */ +- outb(NSC_COMMAND_CANCEL, chip->vendor->base + NSC_COMMAND); ++ outb(NSC_COMMAND_CANCEL, chip->vendor.base + NSC_COMMAND); + + if (nsc_wait_for_ready(chip) != 0) + return -EIO; +@@ -192,7 +192,7 @@ static int tpm_nsc_send(struct tpm_chip + return -EIO; + } + +- outb(NSC_COMMAND_NORMAL, chip->vendor->base + NSC_COMMAND); ++ outb(NSC_COMMAND_NORMAL, chip->vendor.base + NSC_COMMAND); + if (wait_for_stat(chip, NSC_STATUS_IBR, NSC_STATUS_IBR, &data) < 0) { + dev_err(chip->dev, "IBR timeout\n"); + return -EIO; +@@ -204,26 +204,26 @@ static int tpm_nsc_send(struct tpm_chip + "IBF timeout (while writing data)\n"); + return -EIO; + } +- outb(buf[i], chip->vendor->base + NSC_DATA); ++ outb(buf[i], chip->vendor.base + NSC_DATA); + } + + if (wait_for_stat(chip, NSC_STATUS_IBF, 0, &data) < 0) { + dev_err(chip->dev, "IBF timeout\n"); + return -EIO; + } +- outb(NSC_COMMAND_EOC, chip->vendor->base + NSC_COMMAND); ++ outb(NSC_COMMAND_EOC, chip->vendor.base + NSC_COMMAND); + + return count; + } + + static void tpm_nsc_cancel(struct tpm_chip *chip) + { +- outb(NSC_COMMAND_CANCEL, chip->vendor->base + NSC_COMMAND); ++ outb(NSC_COMMAND_CANCEL, chip->vendor.base + NSC_COMMAND); + } + + static u8 tpm_nsc_status(struct tpm_chip *chip) + { +- return inb(chip->vendor->base + NSC_STATUS); ++ return inb(chip->vendor.base + NSC_STATUS); + } + + static struct file_operations nsc_ops = { +@@ -250,7 +250,7 @@ static struct attribute * nsc_attrs[] = + + static struct attribute_group nsc_attr_grp = { .attrs = nsc_attrs }; + +-static struct tpm_vendor_specific tpm_nsc = { ++static const struct tpm_vendor_specific tpm_nsc = { + .recv = tpm_nsc_recv, + .send = tpm_nsc_send, + .cancel = tpm_nsc_cancel, +@@ -268,7 +268,7 @@ static void __devexit tpm_nsc_remove(str + { + struct tpm_chip *chip = dev_get_drvdata(dev); + if ( chip ) { +- release_region(chip->vendor->base, 2); ++ release_region(chip->vendor.base, 2); + tpm_remove_hardware(chip->dev); + } + } +@@ -286,7 +286,8 @@ static int __init init_nsc(void) + int rc = 0; + int lo, hi; + int nscAddrBase = TPM_ADDR; +- ++ struct tpm_chip *chip; ++ unsigned long base; + + /* verify that it is a National part (SID) */ + if (tpm_read_index(TPM_ADDR, NSC_SID_INDEX) != 0xEF) { +@@ -300,7 +301,7 @@ static int __init init_nsc(void) + + hi = tpm_read_index(nscAddrBase, TPM_NSC_BASE0_HI); + lo = tpm_read_index(nscAddrBase, TPM_NSC_BASE0_LO); +- tpm_nsc.base = (hi<<8) | lo; ++ base = (hi<<8) | lo; + + /* enable the DPM module */ + tpm_write_index(nscAddrBase, NSC_LDC_INDEX, 0x01); +@@ -320,13 +321,15 @@ static int __init init_nsc(void) + if ((rc = platform_device_register(pdev)) < 0) + goto err_free_dev; + +- if (request_region(tpm_nsc.base, 2, "tpm_nsc0") == NULL ) { ++ if (request_region(base, 2, "tpm_nsc0") == NULL ) { + rc = -EBUSY; + goto err_unreg_dev; + } + +- if ((rc = tpm_register_hardware(&pdev->dev, &tpm_nsc)) < 0) ++ if (!(chip = tpm_register_hardware(&pdev->dev, &tpm_nsc))) { ++ rc = -ENODEV; + goto err_rel_reg; ++ } + + dev_dbg(&pdev->dev, "NSC TPM detected\n"); + dev_dbg(&pdev->dev, +@@ -361,10 +364,12 @@ static int __init init_nsc(void) + "NSC TPM revision %d\n", + tpm_read_index(nscAddrBase, 0x27) & 0x1F); + ++ chip->vendor.base = base; ++ + return 0; + + err_rel_reg: +- release_region(tpm_nsc.base, 2); ++ release_region(base, 2); + err_unreg_dev: + platform_device_unregister(pdev); + err_free_dev: +diff -pruN ../orig-linux-2.6.16.29/drivers/char/tpm/tpm_tis.c ./drivers/char/tpm/tpm_tis.c +--- ../orig-linux-2.6.16.29/drivers/char/tpm/tpm_tis.c 1970-01-01 01:00:00.000000000 +0100 ++++ ./drivers/char/tpm/tpm_tis.c 2006-09-19 14:05:52.000000000 +0100 +@@ -0,0 +1,665 @@ ++/* ++ * Copyright (C) 2005, 2006 IBM Corporation ++ * ++ * Authors: ++ * Leendert van Doorn <leendert@xxxxxxxxxxxxxx> ++ * Kylene Hall <kjhall@xxxxxxxxxx> ++ * ++ * Device driver for TCG/TCPA TPM (trusted platform module). ++ * Specifications at www.trustedcomputinggroup.org ++ * ++ * This device driver implements the TPM interface as defined in ++ * the TCG TPM Interface Spec version 1.2, revision 1.0. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation, version 2 of the ++ * License. ++ */ ++#include <linux/init.h> ++#include <linux/module.h> ++#include <linux/moduleparam.h> ++#include <linux/pnp.h> ++#include <linux/interrupt.h> ++#include <linux/wait.h> ++#include "tpm.h" ++ ++#define TPM_HEADER_SIZE 10 ++ ++enum tis_access { ++ TPM_ACCESS_VALID = 0x80, ++ TPM_ACCESS_ACTIVE_LOCALITY = 0x20, ++ TPM_ACCESS_REQUEST_PENDING = 0x04, ++ TPM_ACCESS_REQUEST_USE = 0x02, ++}; ++ ++enum tis_status { ++ TPM_STS_VALID = 0x80, ++ TPM_STS_COMMAND_READY = 0x40, ++ TPM_STS_GO = 0x20, ++ TPM_STS_DATA_AVAIL = 0x10, ++ TPM_STS_DATA_EXPECT = 0x08, ++}; ++ ++enum tis_int_flags { ++ TPM_GLOBAL_INT_ENABLE = 0x80000000, ++ TPM_INTF_BURST_COUNT_STATIC = 0x100, ++ TPM_INTF_CMD_READY_INT = 0x080, ++ TPM_INTF_INT_EDGE_FALLING = 0x040, ++ TPM_INTF_INT_EDGE_RISING = 0x020, ++ TPM_INTF_INT_LEVEL_LOW = 0x010, ++ TPM_INTF_INT_LEVEL_HIGH = 0x008, ++ TPM_INTF_LOCALITY_CHANGE_INT = 0x004, ++ TPM_INTF_STS_VALID_INT = 0x002, ++ TPM_INTF_DATA_AVAIL_INT = 0x001, ++}; ++ ++enum tis_defaults { ++ TIS_MEM_BASE = 0xFED40000, ++ TIS_MEM_LEN = 0x5000, ++ TIS_SHORT_TIMEOUT = 750, /* ms */ ++ TIS_LONG_TIMEOUT = 2000, /* 2 sec */ ++}; ++ ++#define TPM_ACCESS(l) (0x0000 | ((l) << 12)) ++#define TPM_INT_ENABLE(l) (0x0008 | ((l) << 12)) ++#define TPM_INT_VECTOR(l) (0x000C | ((l) << 12)) ++#define TPM_INT_STATUS(l) (0x0010 | ((l) << 12)) ++#define TPM_INTF_CAPS(l) (0x0014 | ((l) << 12)) ++#define TPM_STS(l) (0x0018 | ((l) << 12)) ++#define TPM_DATA_FIFO(l) (0x0024 | ((l) << 12)) ++ ++#define TPM_DID_VID(l) (0x0F00 | ((l) << 12)) ++#define TPM_RID(l) (0x0F04 | ((l) << 12)) ++ ++static LIST_HEAD(tis_chips); ++static DEFINE_SPINLOCK(tis_lock); ++ ++static int check_locality(struct tpm_chip *chip, int l) ++{ ++ if ((ioread8(chip->vendor.iobase + TPM_ACCESS(l)) & ++ (TPM_ACCESS_ACTIVE_LOCALITY | TPM_ACCESS_VALID)) == ++ (TPM_ACCESS_ACTIVE_LOCALITY | TPM_ACCESS_VALID)) ++ return chip->vendor.locality = l; ++ ++ return -1; ++} ++ ++static void release_locality(struct tpm_chip *chip, int l, int force) ++{ ++ if (force || (ioread8(chip->vendor.iobase + TPM_ACCESS(l)) & ++ (TPM_ACCESS_REQUEST_PENDING | TPM_ACCESS_VALID)) == ++ (TPM_ACCESS_REQUEST_PENDING | TPM_ACCESS_VALID)) ++ iowrite8(TPM_ACCESS_ACTIVE_LOCALITY, ++ chip->vendor.iobase + TPM_ACCESS(l)); ++} ++ ++static int request_locality(struct tpm_chip *chip, int l) ++{ ++ unsigned long stop; ++ long rc; ++ ++ if (check_locality(chip, l) >= 0) ++ return l; ++ ++ iowrite8(TPM_ACCESS_REQUEST_USE, ++ chip->vendor.iobase + TPM_ACCESS(l)); ++ ++ if (chip->vendor.irq) { ++ rc = wait_event_interruptible_timeout(chip->vendor.int_queue, ++ (check_locality ++ (chip, l) >= 0), ++ chip->vendor.timeout_a); ++ if (rc > 0) ++ return l; ++ ++ } else { ++ /* wait for burstcount */ ++ stop = jiffies + chip->vendor.timeout_a; ++ do { ++ if (check_locality(chip, l) >= 0) ++ return l; ++ msleep(TPM_TIMEOUT); ++ } ++ while (time_before(jiffies, stop)); ++ } ++ return -1; ++} ++ ++static u8 tpm_tis_status(struct tpm_chip *chip) ++{ ++ return ioread8(chip->vendor.iobase + ++ TPM_STS(chip->vendor.locality)); ++} ++ ++static void tpm_tis_ready(struct tpm_chip *chip) ++{ ++ /* this causes the current command to be aborted */ ++ iowrite8(TPM_STS_COMMAND_READY, ++ chip->vendor.iobase + TPM_STS(chip->vendor.locality)); ++} ++ ++static int get_burstcount(struct tpm_chip *chip) ++{ ++ unsigned long stop; ++ int burstcnt; ++ ++ /* wait for burstcount */ ++ /* which timeout value, spec has 2 answers (c & d) */ ++ stop = jiffies + chip->vendor.timeout_d; ++ do { ++ burstcnt = ioread8(chip->vendor.iobase + ++ TPM_STS(chip->vendor.locality) + 1); ++ burstcnt += ioread8(chip->vendor.iobase + ++ TPM_STS(chip->vendor.locality) + ++ 2) << 8; ++ if (burstcnt) ++ return burstcnt; ++ msleep(TPM_TIMEOUT); ++ } while (time_before(jiffies, stop)); ++ return -EBUSY; ++} ++ ++static int wait_for_stat(struct tpm_chip *chip, u8 mask, unsigned long timeout, ++ wait_queue_head_t *queue) ++{ ++ unsigned long stop; ++ long rc; ++ u8 status; ++ ++ /* check current status */ ++ status = tpm_tis_status(chip); ++ if ((status & mask) == mask) ++ return 0; ++ ++ if (chip->vendor.irq) { ++ rc = wait_event_interruptible_timeout(*queue, ++ ((tpm_tis_status ++ (chip) & mask) == ++ mask), timeout); ++ if (rc > 0) ++ return 0; ++ } else { ++ stop = jiffies + timeout; ++ do { ++ msleep(TPM_TIMEOUT); ++ status = tpm_tis_status(chip); ++ if ((status & mask) == mask) ++ return 0; ++ } while (time_before(jiffies, stop)); ++ } ++ return -ETIME; ++} ++ ++static int recv_data(struct tpm_chip *chip, u8 *buf, size_t count) ++{ ++ int size = 0, burstcnt; ++ while (size < count && ++ wait_for_stat(chip, ++ TPM_STS_DATA_AVAIL | TPM_STS_VALID, ++ chip->vendor.timeout_c, ++ &chip->vendor.read_queue) ++ == 0) { ++ burstcnt = get_burstcount(chip); ++ for (; burstcnt > 0 && size < count; burstcnt--) ++ buf[size++] = ioread8(chip->vendor.iobase + ++ TPM_DATA_FIFO(chip->vendor. ++ locality)); ++ } ++ return size; ++} ++ ++static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count) ++{ ++ int size = 0; ++ int expected, status; ++ ++ if (count < TPM_HEADER_SIZE) { ++ size = -EIO; ++ goto out; ++ } ++ ++ /* read first 10 bytes, including tag, paramsize, and result */ ++ if ((size = ++ recv_data(chip, buf, TPM_HEADER_SIZE)) < TPM_HEADER_SIZE) { ++ dev_err(chip->dev, "Unable to read header\n"); ++ goto out; ++ } ++ ++ expected = be32_to_cpu(*(__be32 *) (buf + 2)); ++ if (expected > count) { ++ size = -EIO; ++ goto out; ++ } ++ ++ if ((size += ++ recv_data(chip, &buf[TPM_HEADER_SIZE], ++ expected - TPM_HEADER_SIZE)) < expected) { ++ dev_err(chip->dev, "Unable to read remainder of result\n"); ++ size = -ETIME; ++ goto out; ++ } ++ ++ wait_for_stat(chip, TPM_STS_VALID, chip->vendor.timeout_c, ++ &chip->vendor.int_queue); ++ status = tpm_tis_status(chip); ++ if (status & TPM_STS_DATA_AVAIL) { /* retry? */ ++ dev_err(chip->dev, "Error left over data\n"); ++ size = -EIO; ++ goto out; ++ } ++ ++out: ++ tpm_tis_ready(chip); ++ release_locality(chip, chip->vendor.locality, 0); ++ return size; ++} ++ ++/* ++ * If interrupts are used (signaled by an irq set in the vendor structure) ++ * tpm.c can skip polling for the data to be available as the interrupt is ++ * waited for here ++ */ ++static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len) ++{ ++ int rc, status, burstcnt; ++ size_t count = 0; ++ u32 ordinal; ++ ++ if (request_locality(chip, 0) < 0) ++ return -EBUSY; ++ ++ status = tpm_tis_status(chip); ++ if ((status & TPM_STS_COMMAND_READY) == 0) { ++ tpm_tis_ready(chip); ++ if (wait_for_stat ++ (chip, TPM_STS_COMMAND_READY, chip->vendor.timeout_b, ++ &chip->vendor.int_queue) < 0) { ++ rc = -ETIME; ++ goto out_err; ++ } ++ } ++ ++ while (count < len - 1) { ++ burstcnt = get_burstcount(chip); ++ for (; burstcnt > 0 && count < len - 1; burstcnt--) { ++ iowrite8(buf[count], chip->vendor.iobase + ++ TPM_DATA_FIFO(chip->vendor.locality)); ++ count++; ++ } ++ ++ wait_for_stat(chip, TPM_STS_VALID, chip->vendor.timeout_c, ++ &chip->vendor.int_queue); ++ status = tpm_tis_status(chip); ++ if ((status & TPM_STS_DATA_EXPECT) == 0) { ++ rc = -EIO; ++ goto out_err; ++ } ++ } ++ ++ /* write last byte */ ++ iowrite8(buf[count], ++ chip->vendor.iobase + ++ TPM_DATA_FIFO(chip->vendor.locality)); ++ wait_for_stat(chip, TPM_STS_VALID, chip->vendor.timeout_c, ++ &chip->vendor.int_queue); ++ status = tpm_tis_status(chip); ++ if ((status & TPM_STS_DATA_EXPECT) != 0) { ++ rc = -EIO; ++ goto out_err; ++ } ++ ++ /* go and do it */ ++ iowrite8(TPM_STS_GO, ++ chip->vendor.iobase + TPM_STS(chip->vendor.locality)); ++ ++ if (chip->vendor.irq) { ++ ordinal = be32_to_cpu(*((__be32 *) (buf + 6))); ++ if (wait_for_stat ++ (chip, TPM_STS_DATA_AVAIL | TPM_STS_VALID, ++ tpm_calc_ordinal_duration(chip, ordinal), ++ &chip->vendor.read_queue) < 0) { ++ rc = -ETIME; ++ goto out_err; ++ } ++ } ++ return len; ++out_err: ++ tpm_tis_ready(chip); ++ release_locality(chip, chip->vendor.locality, 0); ++ return rc; ++} ++ ++static struct file_operations tis_ops = { ++ .owner = THIS_MODULE, ++ .llseek = no_llseek, ++ .open = tpm_open, ++ .read = tpm_read, ++ .write = tpm_write, ++ .release = tpm_release, ++}; ++ ++static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL); ++static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL); ++static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL); ++static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL); ++static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL); ++static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, ++ NULL); ++static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL); ++static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel); ++ ++static struct attribute *tis_attrs[] = { ++ &dev_attr_pubek.attr, ++ &dev_attr_pcrs.attr, ++ &dev_attr_enabled.attr, ++ &dev_attr_active.attr, ++ &dev_attr_owned.attr, ++ &dev_attr_temp_deactivated.attr, ++ &dev_attr_caps.attr, ++ &dev_attr_cancel.attr, NULL, ++}; ++ ++static struct attribute_group tis_attr_grp = { ++ .attrs = tis_attrs ++}; ++ ++static struct tpm_vendor_specific tpm_tis = { ++ .status = tpm_tis_status, ++ .recv = tpm_tis_recv, ++ .send = tpm_tis_send, ++ .cancel = tpm_tis_ready, ++ .req_complete_mask = TPM_STS_DATA_AVAIL | TPM_STS_VALID, ++ .req_complete_val = TPM_STS_DATA_AVAIL | TPM_STS_VALID, ++ .req_canceled = TPM_STS_COMMAND_READY, ++ .attr_group = &tis_attr_grp, ++ .miscdev = { ++ .fops = &tis_ops,}, ++}; ++ ++static irqreturn_t tis_int_probe(int irq, void *dev_id, struct pt_regs *regs) ++{ ++ struct tpm_chip *chip = (struct tpm_chip *) dev_id; ++ u32 interrupt; ++ ++ interrupt = ioread32(chip->vendor.iobase + ++ TPM_INT_STATUS(chip->vendor.locality)); ++ ++ if (interrupt == 0) ++ return IRQ_NONE; ++ ++ chip->vendor.irq = irq; ++ ++ /* Clear interrupts handled with TPM_EOI */ ++ iowrite32(interrupt, ++ chip->vendor.iobase + ++ TPM_INT_STATUS(chip->vendor.locality)); ++ return IRQ_HANDLED; ++} ++ ++static irqreturn_t tis_int_handler(int irq, void *dev_id, struct pt_regs *regs) ++{ ++ struct tpm_chip *chip = (struct tpm_chip *) dev_id; ++ u32 interrupt; ++ int i; ++ ++ interrupt = ioread32(chip->vendor.iobase + ++ TPM_INT_STATUS(chip->vendor.locality)); ++ ++ if (interrupt == 0) ++ return IRQ_NONE; ++ ++ if (interrupt & TPM_INTF_DATA_AVAIL_INT) ++ wake_up_interruptible(&chip->vendor.read_queue); ++ if (interrupt & TPM_INTF_LOCALITY_CHANGE_INT) ++ for (i = 0; i < 5; i++) ++ if (check_locality(chip, i) >= 0) ++ break; ++ if (interrupt & ++ (TPM_INTF_LOCALITY_CHANGE_INT | TPM_INTF_STS_VALID_INT | ++ TPM_INTF_CMD_READY_INT)) ++ wake_up_interruptible(&chip->vendor.int_queue); ++ ++ /* Clear interrupts handled with TPM_EOI */ ++ iowrite32(interrupt, ++ chip->vendor.iobase + ++ TPM_INT_STATUS(chip->vendor.locality)); ++ return IRQ_HANDLED; ++} ++ ++static int interrupts = 1; ++module_param(interrupts, bool, 0444); ++MODULE_PARM_DESC(interrupts, "Enable interrupts"); ++ ++static int __devinit tpm_tis_pnp_init(struct pnp_dev *pnp_dev, ++ const struct pnp_device_id *pnp_id) ++{ ++ u32 vendor, intfcaps, intmask; ++ int rc, i; ++ unsigned long start, len; ++ struct tpm_chip *chip; ++ ++ start = pnp_mem_start(pnp_dev, 0); ++ len = pnp_mem_len(pnp_dev, 0); ++ ++ if (!start) ++ start = TIS_MEM_BASE; ++ if (!len) ++ len = TIS_MEM_LEN; ++ ++ if (!(chip = tpm_register_hardware(&pnp_dev->dev, &tpm_tis))) ++ return -ENODEV; ++ ++ chip->vendor.iobase = ioremap(start, len); ++ if (!chip->vendor.iobase) { ++ rc = -EIO; ++ goto out_err; ++ } ++ ++ vendor = ioread32(chip->vendor.iobase + TPM_DID_VID(0)); ++ ++ /* Default timeouts */ ++ chip->vendor.timeout_a = msecs_to_jiffies(TIS_SHORT_TIMEOUT); ++ chip->vendor.timeout_b = msecs_to_jiffies(TIS_LONG_TIMEOUT); ++ chip->vendor.timeout_c = msecs_to_jiffies(TIS_SHORT_TIMEOUT); ++ chip->vendor.timeout_d = msecs_to_jiffies(TIS_SHORT_TIMEOUT); ++ ++ dev_info(&pnp_dev->dev, ++ "1.2 TPM (device-id 0x%X, rev-id %d)\n", ++ vendor >> 16, ioread8(chip->vendor.iobase + TPM_RID(0))); ++ ++ /* Figure out the capabilities */ ++ intfcaps = ++ ioread32(chip->vendor.iobase + ++ TPM_INTF_CAPS(chip->vendor.locality)); ++ dev_dbg(&pnp_dev->dev, "TPM interface capabilities (0x%x):\n", ++ intfcaps); ++ if (intfcaps & TPM_INTF_BURST_COUNT_STATIC) ++ dev_dbg(&pnp_dev->dev, "\tBurst Count Static\n"); ++ if (intfcaps & TPM_INTF_CMD_READY_INT) ++ dev_dbg(&pnp_dev->dev, "\tCommand Ready Int Support\n"); ++ if (intfcaps & TPM_INTF_INT_EDGE_FALLING) ++ dev_dbg(&pnp_dev->dev, "\tInterrupt Edge Falling\n"); ++ if (intfcaps & TPM_INTF_INT_EDGE_RISING) ++ dev_dbg(&pnp_dev->dev, "\tInterrupt Edge Rising\n"); ++ if (intfcaps & TPM_INTF_INT_LEVEL_LOW) ++ dev_dbg(&pnp_dev->dev, "\tInterrupt Level Low\n"); ++ if (intfcaps & TPM_INTF_INT_LEVEL_HIGH) ++ dev_dbg(&pnp_dev->dev, "\tInterrupt Level High\n"); ++ if (intfcaps & TPM_INTF_LOCALITY_CHANGE_INT) ++ dev_dbg(&pnp_dev->dev, "\tLocality Change Int Support\n"); ++ if (intfcaps & TPM_INTF_STS_VALID_INT) ++ dev_dbg(&pnp_dev->dev, "\tSts Valid Int Support\n"); ++ if (intfcaps & TPM_INTF_DATA_AVAIL_INT) ++ dev_dbg(&pnp_dev->dev, "\tData Avail Int Support\n"); ++ ++ if (request_locality(chip, 0) != 0) { ++ rc = -ENODEV; ++ goto out_err; ++ } ++ ++ /* INTERRUPT Setup */ ++ init_waitqueue_head(&chip->vendor.read_queue); ++ init_waitqueue_head(&chip->vendor.int_queue); ++ ++ intmask = ++ ioread32(chip->vendor.iobase + ++ TPM_INT_ENABLE(chip->vendor.locality)); ++ ++ intmask |= TPM_INTF_CMD_READY_INT ++ | TPM_INTF_LOCALITY_CHANGE_INT | TPM_INTF_DATA_AVAIL_INT ++ | TPM_INTF_STS_VALID_INT; ++ ++ iowrite32(intmask, ++ chip->vendor.iobase + ++ TPM_INT_ENABLE(chip->vendor.locality)); ++ if (interrupts) { ++ chip->vendor.irq = ++ ioread8(chip->vendor.iobase + ++ TPM_INT_VECTOR(chip->vendor.locality)); ++ ++ for (i = 3; i < 16 && chip->vendor.irq == 0; i++) { ++ iowrite8(i, chip->vendor.iobase + ++ TPM_INT_VECTOR(chip->vendor.locality)); ++ if (request_irq ++ (i, tis_int_probe, SA_SHIRQ, ++ chip->vendor.miscdev.name, chip) != 0) { ++ dev_info(chip->dev, ++ "Unable to request irq: %d for probe\n", ++ i); ++ continue; ++ } ++ ++ /* Clear all existing */ ++ iowrite32(ioread32 ++ (chip->vendor.iobase + ++ TPM_INT_STATUS(chip->vendor.locality)), ++ chip->vendor.iobase + ++ TPM_INT_STATUS(chip->vendor.locality)); ++ ++ /* Turn on */ ++ iowrite32(intmask | TPM_GLOBAL_INT_ENABLE, ++ chip->vendor.iobase + ++ TPM_INT_ENABLE(chip->vendor.locality)); ++ ++ /* Generate Interrupts */ ++ tpm_gen_interrupt(chip); ++ ++ /* Turn off */ ++ iowrite32(intmask, ++ chip->vendor.iobase + ++ TPM_INT_ENABLE(chip->vendor.locality)); ++ free_irq(i, chip); ++ } ++ } ++ if (chip->vendor.irq) { ++ iowrite8(chip->vendor.irq, ++ chip->vendor.iobase + ++ TPM_INT_VECTOR(chip->vendor.locality)); ++ if (request_irq ++ (chip->vendor.irq, tis_int_handler, SA_SHIRQ, ++ chip->vendor.miscdev.name, chip) != 0) { ++ dev_info(chip->dev, ++ "Unable to request irq: %d for use\n", ++ chip->vendor.irq); ++ chip->vendor.irq = 0; ++ } else { ++ /* Clear all existing */ ++ iowrite32(ioread32 ++ (chip->vendor.iobase + ++ TPM_INT_STATUS(chip->vendor.locality)), ++ chip->vendor.iobase + ++ TPM_INT_STATUS(chip->vendor.locality)); ++ ++ /* Turn on */ ++ iowrite32(intmask | TPM_GLOBAL_INT_ENABLE, ++ chip->vendor.iobase + ++ TPM_INT_ENABLE(chip->vendor.locality)); ++ } ++ } ++ ++ INIT_LIST_HEAD(&chip->vendor.list); ++ spin_lock(&tis_lock); ++ list_add(&chip->vendor.list, &tis_chips); ++ spin_unlock(&tis_lock); ++ ++ tpm_get_timeouts(chip); ++ tpm_continue_selftest(chip); ++ ++ return 0; ++out_err: ++ if (chip->vendor.iobase) ++ iounmap(chip->vendor.iobase); ++ tpm_remove_hardware(chip->dev); ++ return rc; ++} ++ ++static int tpm_tis_pnp_suspend(struct pnp_dev *dev, pm_message_t msg) ++{ ++ return tpm_pm_suspend(&dev->dev, msg); ++} ++ ++static int tpm_tis_pnp_resume(struct pnp_dev *dev) ++{ ++ return tpm_pm_resume(&dev->dev); ++} ++ ++static struct pnp_device_id tpm_pnp_tbl[] __devinitdata = { ++ {"PNP0C31", 0}, /* TPM */ ++ {"ATM1200", 0}, /* Atmel */ ++ {"IFX0102", 0}, /* Infineon */ ++ {"BCM0101", 0}, /* Broadcom */ ++ {"NSC1200", 0}, /* National */ ++ /* Add new here */ ++ {"", 0}, /* User Specified */ ++ {"", 0} /* Terminator */ ++}; ++ ++static struct pnp_driver tis_pnp_driver = { ++ .name = "tpm_tis", ++ .id_table = tpm_pnp_tbl, ++ .probe = tpm_tis_pnp_init, ++ .suspend = tpm_tis_pnp_suspend, ++ .resume = tpm_tis_pnp_resume, ++}; ++ ++#define TIS_HID_USR_IDX sizeof(tpm_pnp_tbl)/sizeof(struct pnp_device_id) -2 ++module_param_string(hid, tpm_pnp_tbl[TIS_HID_USR_IDX].id, ++ sizeof(tpm_pnp_tbl[TIS_HID_USR_IDX].id), 0444); ++MODULE_PARM_DESC(hid, "Set additional specific HID for this driver to probe"); ++ ++static int __init init_tis(void) ++{ ++ return pnp_register_driver(&tis_pnp_driver); ++} ++ ++static void __exit cleanup_tis(void) ++{ ++ struct tpm_vendor_specific *i, *j; ++ struct tpm_chip *chip; ++ spin_lock(&tis_lock); ++ list_for_each_entry_safe(i, j, &tis_chips, list) { ++ chip = to_tpm_chip(i); ++ iowrite32(~TPM_GLOBAL_INT_ENABLE & ++ ioread32(chip->vendor.iobase + ++ TPM_INT_ENABLE(chip->vendor. ++ locality)), ++ chip->vendor.iobase + ++ TPM_INT_ENABLE(chip->vendor.locality)); ++ release_locality(chip, chip->vendor.locality, 1); ++ if (chip->vendor.irq) ++ free_irq(chip->vendor.irq, chip); ++ iounmap(i->iobase); ++ list_del(&i->list); ++ tpm_remove_hardware(chip->dev); ++ } ++ spin_unlock(&tis_lock); ++ pnp_unregister_driver(&tis_pnp_driver); ++} ++ ++module_init(init_tis); ++module_exit(cleanup_tis); ++MODULE_AUTHOR("Leendert van Doorn (leendert@xxxxxxxxxxxxxx)"); ++MODULE_DESCRIPTION("TPM Driver"); ++MODULE_VERSION("2.0"); ++MODULE_LICENSE("GPL"); diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.32/x86-elfnote-as-preprocessor-macro.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.32/x86-elfnote-as-preprocessor-macro.patch Mon Nov 27 13:50:02 2006 +0000 @@ -0,0 +1,43 @@ +diff -pruN ../orig-linux-2.6.16.29/include/linux/elfnote.h ./include/linux/elfnote.h +--- ../orig-linux-2.6.16.29/include/linux/elfnote.h 2006-09-19 14:06:10.000000000 +0100 ++++ ./include/linux/elfnote.h 2006-09-19 14:06:20.000000000 +0100 +@@ -31,22 +31,24 @@ + /* + * Generate a structure with the same shape as Elf{32,64}_Nhdr (which + * turn out to be the same size and shape), followed by the name and +- * desc data with appropriate padding. The 'desc' argument includes +- * the assembler pseudo op defining the type of the data: .asciz +- * "hello, world" ++ * desc data with appropriate padding. The 'desctype' argument is the ++ * assembler pseudo op defining the type of the data e.g. .asciz while ++ * 'descdata' is the data itself e.g. "hello, world". ++ * ++ * e.g. ELFNOTE(XYZCo, 42, .asciz, "forty-two") ++ * ELFNOTE(XYZCo, 12, .long, 0xdeadbeef) + */ +-.macro ELFNOTE name type desc:vararg +-.pushsection ".note.\name" +- .align 4 +- .long 2f - 1f /* namesz */ +- .long 4f - 3f /* descsz */ +- .long \type +-1:.asciz "\name" +-2:.align 4 +-3:\desc +-4:.align 4 +-.popsection +-.endm ++#define ELFNOTE(name, type, desctype, descdata) \ ++.pushsection .note.name ; \ ++ .align 4 ; \ ++ .long 2f - 1f /* namesz */ ; \ ++ .long 4f - 3f /* descsz */ ; \ ++ .long type ; \ ++1:.asciz "name" ; \ ++2:.align 4 ; \ ++3:desctype descdata ; \ ++4:.align 4 ; \ ++.popsection ; + #else /* !__ASSEMBLER__ */ + #include <linux/elf.h> + /* diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.32/x86-increase-interrupt-vector-range.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.32/x86-increase-interrupt-vector-range.patch Mon Nov 27 13:50:02 2006 +0000 @@ -0,0 +1,89 @@ +diff -pruN ../orig-linux-2.6.16.29/arch/i386/kernel/entry.S ./arch/i386/kernel/entry.S +--- ../orig-linux-2.6.16.29/arch/i386/kernel/entry.S 2006-09-19 14:05:44.000000000 +0100 ++++ ./arch/i386/kernel/entry.S 2006-09-19 14:05:56.000000000 +0100 +@@ -406,7 +406,7 @@ vector=0 + ENTRY(irq_entries_start) + .rept NR_IRQS + ALIGN +-1: pushl $vector-256 ++1: pushl $~(vector) + jmp common_interrupt + .data + .long 1b +@@ -423,7 +423,7 @@ common_interrupt: + + #define BUILD_INTERRUPT(name, nr) \ + ENTRY(name) \ +- pushl $nr-256; \ ++ pushl $~(nr); \ + SAVE_ALL \ + movl %esp,%eax; \ + call smp_/**/name; \ +diff -pruN ../orig-linux-2.6.16.29/arch/i386/kernel/irq.c ./arch/i386/kernel/irq.c +--- ../orig-linux-2.6.16.29/arch/i386/kernel/irq.c 2006-09-12 19:02:10.000000000 +0100 ++++ ./arch/i386/kernel/irq.c 2006-09-19 14:05:56.000000000 +0100 +@@ -53,8 +53,8 @@ static union irq_ctx *softirq_ctx[NR_CPU + */ + fastcall unsigned int do_IRQ(struct pt_regs *regs) + { +- /* high bits used in ret_from_ code */ +- int irq = regs->orig_eax & 0xff; ++ /* high bit used in ret_from_ code */ ++ int irq = ~regs->orig_eax; + #ifdef CONFIG_4KSTACKS + union irq_ctx *curctx, *irqctx; + u32 *isp; +diff -pruN ../orig-linux-2.6.16.29/arch/x86_64/kernel/entry.S ./arch/x86_64/kernel/entry.S +--- ../orig-linux-2.6.16.29/arch/x86_64/kernel/entry.S 2006-09-12 19:02:10.000000000 +0100 ++++ ./arch/x86_64/kernel/entry.S 2006-09-19 14:05:56.000000000 +0100 +@@ -596,7 +596,7 @@ retint_kernel: + */ + .macro apicinterrupt num,func + INTR_FRAME +- pushq $\num-256 ++ pushq $~(\num) + CFI_ADJUST_CFA_OFFSET 8 + interrupt \func + jmp ret_from_intr +diff -pruN ../orig-linux-2.6.16.29/arch/x86_64/kernel/irq.c ./arch/x86_64/kernel/irq.c +--- ../orig-linux-2.6.16.29/arch/x86_64/kernel/irq.c 2006-09-12 19:02:10.000000000 +0100 ++++ ./arch/x86_64/kernel/irq.c 2006-09-19 14:05:56.000000000 +0100 +@@ -96,8 +96,8 @@ skip: + */ + asmlinkage unsigned int do_IRQ(struct pt_regs *regs) + { +- /* high bits used in ret_from_ code */ +- unsigned irq = regs->orig_rax & 0xff; ++ /* high bit used in ret_from_ code */ ++ unsigned irq = ~regs->orig_rax; + + exit_idle(); + irq_enter(); +diff -pruN ../orig-linux-2.6.16.29/arch/x86_64/kernel/smp.c ./arch/x86_64/kernel/smp.c +--- ../orig-linux-2.6.16.29/arch/x86_64/kernel/smp.c 2006-09-12 19:02:10.000000000 +0100 ++++ ./arch/x86_64/kernel/smp.c 2006-09-19 14:05:56.000000000 +0100 +@@ -135,10 +135,10 @@ asmlinkage void smp_invalidate_interrupt + + cpu = smp_processor_id(); + /* +- * orig_rax contains the interrupt vector - 256. ++ * orig_rax contains the negated interrupt vector. + * Use that to determine where the sender put the data. + */ +- sender = regs->orig_rax + 256 - INVALIDATE_TLB_VECTOR_START; ++ sender = ~regs->orig_rax - INVALIDATE_TLB_VECTOR_START; + f = &per_cpu(flush_state, sender); + + if (!cpu_isset(cpu, f->flush_cpumask)) +diff -pruN ../orig-linux-2.6.16.29/include/asm-x86_64/hw_irq.h ./include/asm-x86_64/hw_irq.h +--- ../orig-linux-2.6.16.29/include/asm-x86_64/hw_irq.h 2006-09-12 19:02:10.000000000 +0100 ++++ ./include/asm-x86_64/hw_irq.h 2006-09-19 14:05:56.000000000 +0100 +@@ -127,7 +127,7 @@ asmlinkage void IRQ_NAME(nr); \ + __asm__( \ + "\n.p2align\n" \ + "IRQ" #nr "_interrupt:\n\t" \ +- "push $" #nr "-256 ; " \ ++ "push $~(" #nr ") ; " \ + "jmp common_interrupt"); + + #if defined(CONFIG_X86_IO_APIC) diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.32/x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.32/x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch Mon Nov 27 13:50:02 2006 +0000 @@ -0,0 +1,143 @@ +diff -pruN ../orig-linux-2.6.16.29/arch/i386/kernel/vmlinux.lds.S ./arch/i386/kernel/vmlinux.lds.S +--- ../orig-linux-2.6.16.29/arch/i386/kernel/vmlinux.lds.S 2006-09-19 14:05:48.000000000 +0100 ++++ ./arch/i386/kernel/vmlinux.lds.S 2006-09-19 14:06:10.000000000 +0100 +@@ -12,6 +12,12 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386" + OUTPUT_ARCH(i386) + ENTRY(phys_startup_32) + jiffies = jiffies_64; ++ ++PHDRS { ++ text PT_LOAD FLAGS(5); /* R_E */ ++ data PT_LOAD FLAGS(7); /* RWE */ ++ note PT_NOTE FLAGS(4); /* R__ */ ++} + SECTIONS + { + . = __KERNEL_START; +@@ -25,7 +31,7 @@ SECTIONS + KPROBES_TEXT + *(.fixup) + *(.gnu.warning) +- } = 0x9090 ++ } :text = 0x9090 + + _etext = .; /* End of text section */ + +@@ -47,7 +53,7 @@ SECTIONS + .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ + *(.data) + CONSTRUCTORS +- } ++ } :data + + . = ALIGN(4096); + __nosave_begin = .; +@@ -154,4 +160,6 @@ SECTIONS + STABS_DEBUG + + DWARF_DEBUG ++ ++ NOTES + } +diff -pruN ../orig-linux-2.6.16.29/include/asm-generic/vmlinux.lds.h ./include/asm-generic/vmlinux.lds.h +--- ../orig-linux-2.6.16.29/include/asm-generic/vmlinux.lds.h 2006-09-12 19:02:10.000000000 +0100 ++++ ./include/asm-generic/vmlinux.lds.h 2006-09-19 14:06:10.000000000 +0100 +@@ -152,3 +152,6 @@ + .stab.index 0 : { *(.stab.index) } \ + .stab.indexstr 0 : { *(.stab.indexstr) } \ + .comment 0 : { *(.comment) } ++ ++#define NOTES \ ++ .notes : { *(.note.*) } :note +diff -pruN ../orig-linux-2.6.16.29/include/linux/elfnote.h ./include/linux/elfnote.h +--- ../orig-linux-2.6.16.29/include/linux/elfnote.h 1970-01-01 01:00:00.000000000 +0100 ++++ ./include/linux/elfnote.h 2006-09-19 14:06:10.000000000 +0100 +@@ -0,0 +1,88 @@ ++#ifndef _LINUX_ELFNOTE_H ++#define _LINUX_ELFNOTE_H ++/* ++ * Helper macros to generate ELF Note structures, which are put into a ++ * PT_NOTE segment of the final vmlinux image. These are useful for ++ * including name-value pairs of metadata into the kernel binary (or ++ * modules?) for use by external programs. ++ * ++ * Each note has three parts: a name, a type and a desc. The name is ++ * intended to distinguish the note's originator, so it would be a ++ * company, project, subsystem, etc; it must be in a suitable form for ++ * use in a section name. The type is an integer which is used to tag ++ * the data, and is considered to be within the "name" namespace (so ++ * "FooCo"'s type 42 is distinct from "BarProj"'s type 42). The ++ * "desc" field is the actual data. There are no constraints on the ++ * desc field's contents, though typically they're fairly small. ++ * ++ * All notes from a given NAME are put into a section named ++ * .note.NAME. When the kernel image is finally linked, all the notes ++ * are packed into a single .notes section, which is mapped into the ++ * PT_NOTE segment. Because notes for a given name are grouped into ++ * the same section, they'll all be adjacent the output file. ++ * ++ * This file defines macros for both C and assembler use. Their ++ * syntax is slightly different, but they're semantically similar. ++ * ++ * See the ELF specification for more detail about ELF notes. ++ */ ++ ++#ifdef __ASSEMBLER__ ++/* ++ * Generate a structure with the same shape as Elf{32,64}_Nhdr (which ++ * turn out to be the same size and shape), followed by the name and ++ * desc data with appropriate padding. The 'desc' argument includes ++ * the assembler pseudo op defining the type of the data: .asciz ++ * "hello, world" ++ */ ++.macro ELFNOTE name type desc:vararg ++.pushsection ".note.\name" ++ .align 4 ++ .long 2f - 1f /* namesz */ ++ .long 4f - 3f /* descsz */ ++ .long \type ++1:.asciz "\name" ++2:.align 4 ++3:\desc ++4:.align 4 ++.popsection ++.endm ++#else /* !__ASSEMBLER__ */ ++#include <linux/elf.h> ++/* ++ * Use an anonymous structure which matches the shape of ++ * Elf{32,64}_Nhdr, but includes the name and desc data. The size and ++ * type of name and desc depend on the macro arguments. "name" must ++ * be a literal string, and "desc" must be passed by value. You may ++ * only define one note per line, since __LINE__ is used to generate ++ * unique symbols. ++ */ ++#define _ELFNOTE_PASTE(a,b) a##b ++#define _ELFNOTE(size, name, unique, type, desc) \ ++ static const struct { \ ++ struct elf##size##_note _nhdr; \ ++ unsigned char _name[sizeof(name)] \ ++ __attribute__((aligned(sizeof(Elf##size##_Word)))); \ ++ typeof(desc) _desc \ ++ __attribute__((aligned(sizeof(Elf##size##_Word)))); \ ++ } _ELFNOTE_PASTE(_note_, unique) \ ++ __attribute_used__ \ ++ __attribute__((section(".note." name), \ ++ aligned(sizeof(Elf##size##_Word)), \ ++ unused)) = { \ ++ { \ ++ sizeof(name), \ ++ sizeof(desc), \ ++ type, \ ++ }, \ ++ name, \ ++ desc \ ++ } ++#define ELFNOTE(size, name, type, desc) \ ++ _ELFNOTE(size, name, __LINE__, type, desc) ++ ++#define ELFNOTE32(name, type, desc) ELFNOTE(32, name, type, desc) ++#define ELFNOTE64(name, type, desc) ELFNOTE(64, name, type, desc) ++#endif /* __ASSEMBLER__ */ ++ ++#endif /* _LINUX_ELFNOTE_H */ diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.32/x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.32/x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch Mon Nov 27 13:50:02 2006 +0000 @@ -0,0 +1,84 @@ +diff -pruN ../orig-linux-2.6.16.29/arch/x86_64/kernel/vmlinux.lds.S ./arch/x86_64/kernel/vmlinux.lds.S +--- ../orig-linux-2.6.16.29/arch/x86_64/kernel/vmlinux.lds.S 2006-09-12 19:02:10.000000000 +0100 ++++ ./arch/x86_64/kernel/vmlinux.lds.S 2006-09-19 14:06:15.000000000 +0100 +@@ -14,6 +14,12 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86 + OUTPUT_ARCH(i386:x86-64) + ENTRY(phys_startup_64) + jiffies_64 = jiffies; ++PHDRS { ++ text PT_LOAD FLAGS(5); /* R_E */ ++ data PT_LOAD FLAGS(7); /* RWE */ ++ user PT_LOAD FLAGS(7); /* RWE */ ++ note PT_NOTE FLAGS(4); /* R__ */ ++} + SECTIONS + { + . = __START_KERNEL; +@@ -26,7 +32,7 @@ SECTIONS + KPROBES_TEXT + *(.fixup) + *(.gnu.warning) +- } = 0x9090 ++ } :text = 0x9090 + /* out-of-line lock text */ + .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET) { *(.text.lock) } + +@@ -43,17 +49,10 @@ SECTIONS + .data : AT(ADDR(.data) - LOAD_OFFSET) { + *(.data) + CONSTRUCTORS +- } ++ } :data + + _edata = .; /* End of data section */ + +- __bss_start = .; /* BSS */ +- .bss : AT(ADDR(.bss) - LOAD_OFFSET) { +- *(.bss.page_aligned) +- *(.bss) +- } +- __bss_stop = .; +- + . = ALIGN(PAGE_SIZE); + . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); + .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { +@@ -75,7 +74,7 @@ SECTIONS + #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) + + . = VSYSCALL_ADDR; +- .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) } ++ .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) } :user + __vsyscall_0 = VSYSCALL_VIRT_ADDR; + + . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); +@@ -118,7 +117,7 @@ SECTIONS + . = ALIGN(8192); /* init_task */ + .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { + *(.data.init_task) +- } ++ } :data + + . = ALIGN(4096); + .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { +@@ -188,6 +187,14 @@ SECTIONS + . = ALIGN(4096); + __nosave_end = .; + ++ __bss_start = .; /* BSS */ ++ . = ALIGN(4096); ++ .bss : AT(ADDR(.bss) - LOAD_OFFSET) { ++ *(.bss.page_aligned) ++ *(.bss) ++ } ++ __bss_stop = .; ++ + _end = . ; + + /* Sections to be discarded */ +@@ -201,4 +208,6 @@ SECTIONS + STABS_DEBUG + + DWARF_DEBUG ++ ++ NOTES + } diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.32/xen-hotplug.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.32/xen-hotplug.patch Mon Nov 27 13:50:02 2006 +0000 @@ -0,0 +1,12 @@ +diff -pruN ../orig-linux-2.6.16.29/fs/proc/proc_misc.c ./fs/proc/proc_misc.c +--- ../orig-linux-2.6.16.29/fs/proc/proc_misc.c 2006-09-12 19:02:10.000000000 +0100 ++++ ./fs/proc/proc_misc.c 2006-09-19 14:06:00.000000000 +0100 +@@ -433,7 +433,7 @@ static int show_stat(struct seq_file *p, + (unsigned long long)cputime64_to_clock_t(irq), + (unsigned long long)cputime64_to_clock_t(softirq), + (unsigned long long)cputime64_to_clock_t(steal)); +- for_each_online_cpu(i) { ++ for_each_cpu(i) { + + /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ + user = kstat_cpu(i).cpustat.user; diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.32/xenoprof-generic.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.32/xenoprof-generic.patch Mon Nov 27 13:50:02 2006 +0000 @@ -0,0 +1,662 @@ +diff -pruN ../orig-linux-2.6.16.29/drivers/oprofile/buffer_sync.c ./drivers/oprofile/buffer_sync.c +--- ../orig-linux-2.6.16.29/drivers/oprofile/buffer_sync.c 2006-11-06 14:46:52.000000000 -0800 ++++ ./drivers/oprofile/buffer_sync.c 2006-11-06 15:16:52.000000000 -0800 +@@ -6,6 +6,10 @@ + * + * @author John Levon <levon@xxxxxxxxxxxxxxxxx> + * ++ * Modified by Aravind Menon for Xen ++ * These modifications are: ++ * Copyright (C) 2005 Hewlett-Packard Co. ++ * + * This is the core of the buffer management. Each + * CPU buffer is processed and entered into the + * global event buffer. Such processing is necessary +@@ -38,6 +42,7 @@ static cpumask_t marked_cpus = CPU_MASK_ + static DEFINE_SPINLOCK(task_mortuary); + static void process_task_mortuary(void); + ++static int cpu_current_domain[NR_CPUS]; + + /* Take ownership of the task struct and place it on the + * list for processing. Only after two full buffer syncs +@@ -146,6 +151,11 @@ static void end_sync(void) + int sync_start(void) + { + int err; ++ int i; ++ ++ for (i = 0; i < NR_CPUS; i++) { ++ cpu_current_domain[i] = COORDINATOR_DOMAIN; ++ } + + start_cpu_work(); + +@@ -275,15 +285,31 @@ static void add_cpu_switch(int i) + last_cookie = INVALID_COOKIE; + } + +-static void add_kernel_ctx_switch(unsigned int in_kernel) ++static void add_cpu_mode_switch(unsigned int cpu_mode) + { + add_event_entry(ESCAPE_CODE); +- if (in_kernel) +- add_event_entry(KERNEL_ENTER_SWITCH_CODE); +- else +- add_event_entry(KERNEL_EXIT_SWITCH_CODE); ++ switch (cpu_mode) { ++ case CPU_MODE_USER: ++ add_event_entry(USER_ENTER_SWITCH_CODE); ++ break; ++ case CPU_MODE_KERNEL: ++ add_event_entry(KERNEL_ENTER_SWITCH_CODE); ++ break; ++ case CPU_MODE_XEN: ++ add_event_entry(XEN_ENTER_SWITCH_CODE); ++ break; ++ default: ++ break; ++ } + } +- ++ ++static void add_domain_switch(unsigned long domain_id) ++{ ++ add_event_entry(ESCAPE_CODE); ++ add_event_entry(DOMAIN_SWITCH_CODE); ++ add_event_entry(domain_id); ++} ++ + static void + add_user_ctx_switch(struct task_struct const * task, unsigned long cookie) + { +@@ -348,9 +374,9 @@ static int add_us_sample(struct mm_struc + * for later lookup from userspace. + */ + static int +-add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel) ++add_sample(struct mm_struct * mm, struct op_sample * s, int cpu_mode) + { +- if (in_kernel) { ++ if (cpu_mode >= CPU_MODE_KERNEL) { + add_sample_entry(s->eip, s->event); + return 1; + } else if (mm) { +@@ -496,15 +522,21 @@ void sync_buffer(int cpu) + struct mm_struct *mm = NULL; + struct task_struct * new; + unsigned long cookie = 0; +- int in_kernel = 1; ++ int cpu_mode = 1; + unsigned int i; + sync_buffer_state state = sb_buffer_start; + unsigned long available; ++ int domain_switch = 0; + + down(&buffer_sem); + + add_cpu_switch(cpu); + ++ /* We need to assign the first samples in this CPU buffer to the ++ same domain that we were processing at the last sync_buffer */ ++ if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) { ++ add_domain_switch(cpu_current_domain[cpu]); ++ } + /* Remember, only we can modify tail_pos */ + + available = get_slots(cpu_buf); +@@ -512,16 +544,18 @@ void sync_buffer(int cpu) + for (i = 0; i < available; ++i) { + struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos]; + +- if (is_code(s->eip)) { +- if (s->event <= CPU_IS_KERNEL) { +- /* kernel/userspace switch */ +- in_kernel = s->event; ++ if (is_code(s->eip) && !domain_switch) { ++ if (s->event <= CPU_MODE_XEN) { ++ /* xen/kernel/userspace switch */ ++ cpu_mode = s->event; + if (state == sb_buffer_start) + state = sb_sample_start; +- add_kernel_ctx_switch(s->event); ++ add_cpu_mode_switch(s->event); + } else if (s->event == CPU_TRACE_BEGIN) { + state = sb_bt_start; + add_trace_begin(); ++ } else if (s->event == CPU_DOMAIN_SWITCH) { ++ domain_switch = 1; + } else { + struct mm_struct * oldmm = mm; + +@@ -535,11 +569,21 @@ void sync_buffer(int cpu) + add_user_ctx_switch(new, cookie); + } + } else { +- if (state >= sb_bt_start && +- !add_sample(mm, s, in_kernel)) { +- if (state == sb_bt_start) { +- state = sb_bt_ignore; +- atomic_inc(&oprofile_stats.bt_lost_no_mapping); ++ if (domain_switch) { ++ cpu_current_domain[cpu] = s->eip; ++ add_domain_switch(s->eip); ++ domain_switch = 0; ++ } else { ++ if (cpu_current_domain[cpu] != ++ COORDINATOR_DOMAIN) { ++ add_sample_entry(s->eip, s->event); ++ } ++ else if (state >= sb_bt_start && ++ !add_sample(mm, s, cpu_mode)) { ++ if (state == sb_bt_start) { ++ state = sb_bt_ignore; ++ atomic_inc(&oprofile_stats.bt_lost_no_mapping); ++ } + } + } + } +@@ -548,6 +592,11 @@ void sync_buffer(int cpu) + } + release_mm(mm); + ++ /* We reset domain to COORDINATOR at each CPU switch */ ++ if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) { ++ add_domain_switch(COORDINATOR_DOMAIN); ++ } ++ + mark_done(cpu); + + up(&buffer_sem); +diff -pruN ../orig-linux-2.6.16.29/drivers/oprofile/cpu_buffer.c ./drivers/oprofile/cpu_buffer.c +--- ../orig-linux-2.6.16.29/drivers/oprofile/cpu_buffer.c 2006-11-06 14:46:52.000000000 -0800 ++++ ./drivers/oprofile/cpu_buffer.c 2006-11-06 14:47:55.000000000 -0800 +@@ -6,6 +6,10 @@ + * + * @author John Levon <levon@xxxxxxxxxxxxxxxxx> + * ++ * Modified by Aravind Menon for Xen ++ * These modifications are: ++ * Copyright (C) 2005 Hewlett-Packard Co. ++ * + * Each CPU has a local buffer that stores PC value/event + * pairs. We also log context switches when we notice them. + * Eventually each CPU's buffer is processed into the global +@@ -34,6 +38,8 @@ static void wq_sync_buffer(void *); + #define DEFAULT_TIMER_EXPIRE (HZ / 10) + static int work_enabled; + ++static int32_t current_domain = COORDINATOR_DOMAIN; ++ + void free_cpu_buffers(void) + { + int i; +@@ -58,7 +64,7 @@ int alloc_cpu_buffers(void) + goto fail; + + b->last_task = NULL; +- b->last_is_kernel = -1; ++ b->last_cpu_mode = -1; + b->tracing = 0; + b->buffer_size = buffer_size; + b->tail_pos = 0; +@@ -114,7 +120,7 @@ void cpu_buffer_reset(struct oprofile_cp + * collected will populate the buffer with proper + * values to initialize the buffer + */ +- cpu_buf->last_is_kernel = -1; ++ cpu_buf->last_cpu_mode = -1; + cpu_buf->last_task = NULL; + } + +@@ -164,13 +170,13 @@ add_code(struct oprofile_cpu_buffer * bu + * because of the head/tail separation of the writer and reader + * of the CPU buffer. + * +- * is_kernel is needed because on some architectures you cannot ++ * cpu_mode is needed because on some architectures you cannot + * tell if you are in kernel or user space simply by looking at +- * pc. We tag this in the buffer by generating kernel enter/exit +- * events whenever is_kernel changes ++ * pc. We tag this in the buffer by generating kernel/user (and xen) ++ * enter events whenever cpu_mode changes + */ + static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc, +- int is_kernel, unsigned long event) ++ int cpu_mode, unsigned long event) + { + struct task_struct * task; + +@@ -181,18 +187,18 @@ static int log_sample(struct oprofile_cp + return 0; + } + +- is_kernel = !!is_kernel; +- + task = current; + + /* notice a switch from user->kernel or vice versa */ +- if (cpu_buf->last_is_kernel != is_kernel) { +- cpu_buf->last_is_kernel = is_kernel; +- add_code(cpu_buf, is_kernel); ++ if (cpu_buf->last_cpu_mode != cpu_mode) { ++ cpu_buf->last_cpu_mode = cpu_mode; ++ add_code(cpu_buf, cpu_mode); + } +- ++ + /* notice a task switch */ +- if (cpu_buf->last_task != task) { ++ /* if not processing other domain samples */ ++ if ((cpu_buf->last_task != task) && ++ (current_domain == COORDINATOR_DOMAIN)) { + cpu_buf->last_task = task; + add_code(cpu_buf, (unsigned long)task); + } +@@ -269,6 +275,25 @@ void oprofile_add_trace(unsigned long pc + add_sample(cpu_buf, pc, 0); + } + ++int oprofile_add_domain_switch(int32_t domain_id) ++{ ++ struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()]; ++ ++ /* should have space for switching into and out of domain ++ (2 slots each) plus one sample and one cpu mode switch */ ++ if (((nr_available_slots(cpu_buf) < 6) && ++ (domain_id != COORDINATOR_DOMAIN)) || ++ (nr_available_slots(cpu_buf) < 2)) ++ return 0; ++ ++ add_code(cpu_buf, CPU_DOMAIN_SWITCH); ++ add_sample(cpu_buf, domain_id, 0); ++ ++ current_domain = domain_id; ++ ++ return 1; ++} ++ + /* + * This serves to avoid cpu buffer overflow, and makes sure + * the task mortuary progresses +diff -pruN ../orig-linux-2.6.16.29/drivers/oprofile/cpu_buffer.h ./drivers/oprofile/cpu_buffer.h +--- ../orig-linux-2.6.16.29/drivers/oprofile/cpu_buffer.h 2006-11-06 14:46:52.000000000 -0800 ++++ ./drivers/oprofile/cpu_buffer.h 2006-11-06 14:47:55.000000000 -0800 +@@ -36,7 +36,7 @@ struct oprofile_cpu_buffer { + volatile unsigned long tail_pos; + unsigned long buffer_size; + struct task_struct * last_task; +- int last_is_kernel; ++ int last_cpu_mode; + int tracing; + struct op_sample * buffer; + unsigned long sample_received; +@@ -51,7 +51,10 @@ extern struct oprofile_cpu_buffer cpu_bu + void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf); + + /* transient events for the CPU buffer -> event buffer */ +-#define CPU_IS_KERNEL 1 +-#define CPU_TRACE_BEGIN 2 ++#define CPU_MODE_USER 0 ++#define CPU_MODE_KERNEL 1 ++#define CPU_MODE_XEN 2 ++#define CPU_TRACE_BEGIN 3 ++#define CPU_DOMAIN_SWITCH 4 + + #endif /* OPROFILE_CPU_BUFFER_H */ +diff -pruN ../orig-linux-2.6.16.29/drivers/oprofile/event_buffer.h ./drivers/oprofile/event_buffer.h +--- ../orig-linux-2.6.16.29/drivers/oprofile/event_buffer.h 2006-11-06 14:46:52.000000000 -0800 ++++ ./drivers/oprofile/event_buffer.h 2006-11-06 14:47:55.000000000 -0800 +@@ -29,15 +29,20 @@ void wake_up_buffer_waiter(void); + #define CPU_SWITCH_CODE 2 + #define COOKIE_SWITCH_CODE 3 + #define KERNEL_ENTER_SWITCH_CODE 4 +-#define KERNEL_EXIT_SWITCH_CODE 5 ++#define USER_ENTER_SWITCH_CODE 5 + #define MODULE_LOADED_CODE 6 + #define CTX_TGID_CODE 7 + #define TRACE_BEGIN_CODE 8 + #define TRACE_END_CODE 9 ++#define XEN_ENTER_SWITCH_CODE 10 ++#define DOMAIN_SWITCH_CODE 11 + + #define INVALID_COOKIE ~0UL + #define NO_COOKIE 0UL + ++/* Constant used to refer to coordinator domain (Xen) */ ++#define COORDINATOR_DOMAIN -1 ++ + /* add data to the event buffer */ + void add_event_entry(unsigned long data); + +diff -pruN ../orig-linux-2.6.16.29/drivers/oprofile/oprof.c ./drivers/oprofile/oprof.c +--- ../orig-linux-2.6.16.29/drivers/oprofile/oprof.c 2006-11-06 14:46:52.000000000 -0800 ++++ ./drivers/oprofile/oprof.c 2006-11-06 14:47:55.000000000 -0800 +@@ -5,6 +5,10 @@ + * @remark Read the file COPYING + * + * @author John Levon <levon@xxxxxxxxxxxxxxxxx> ++ * ++ * Modified by Aravind Menon for Xen ++ * These modifications are: ++ * Copyright (C) 2005 Hewlett-Packard Co. + */ + + #include <linux/kernel.h> +@@ -19,7 +23,7 @@ + #include "cpu_buffer.h" + #include "buffer_sync.h" + #include "oprofile_stats.h" +- ++ + struct oprofile_operations oprofile_ops; + + unsigned long oprofile_started; +@@ -33,6 +37,32 @@ static DECLARE_MUTEX(start_sem); + */ + static int timer = 0; + ++int oprofile_set_active(int active_domains[], unsigned int adomains) ++{ ++ int err; ++ ++ if (!oprofile_ops.set_active) ++ return -EINVAL; ++ ++ down(&start_sem); ++ err = oprofile_ops.set_active(active_domains, adomains); ++ up(&start_sem); ++ return err; ++} ++ ++int oprofile_set_passive(int passive_domains[], unsigned int pdomains) ++{ ++ int err; ++ ++ if (!oprofile_ops.set_passive) ++ return -EINVAL; ++ ++ down(&start_sem); ++ err = oprofile_ops.set_passive(passive_domains, pdomains); ++ up(&start_sem); ++ return err; ++} ++ + int oprofile_setup(void) + { + int err; +diff -pruN ../orig-linux-2.6.16.29/drivers/oprofile/oprof.h ./drivers/oprofile/oprof.h +--- ../orig-linux-2.6.16.29/drivers/oprofile/oprof.h 2006-11-06 14:46:52.000000000 -0800 ++++ ./drivers/oprofile/oprof.h 2006-11-06 14:47:55.000000000 -0800 +@@ -35,5 +35,8 @@ void oprofile_create_files(struct super_ + void oprofile_timer_init(struct oprofile_operations * ops); + + int oprofile_set_backtrace(unsigned long depth); ++ ++int oprofile_set_active(int active_domains[], unsigned int adomains); ++int oprofile_set_passive(int passive_domains[], unsigned int pdomains); + + #endif /* OPROF_H */ +diff -pruN ../orig-linux-2.6.16.29/drivers/oprofile/oprofile_files.c ./drivers/oprofile/oprofile_files.c +--- ../orig-linux-2.6.16.29/drivers/oprofile/oprofile_files.c 2006-11-06 14:46:52.000000000 -0800 ++++ ./drivers/oprofile/oprofile_files.c 2006-11-06 14:47:55.000000000 -0800 +@@ -5,15 +5,21 @@ + * @remark Read the file COPYING + * + * @author John Levon <levon@xxxxxxxxxxxxxxxxx> ++ * ++ * Modified by Aravind Menon for Xen ++ * These modifications are: ++ * Copyright (C) 2005 Hewlett-Packard Co. + */ + + #include <linux/fs.h> + #include <linux/oprofile.h> ++#include <asm/uaccess.h> ++#include <linux/ctype.h> + + #include "event_buffer.h" + #include "oprofile_stats.h" + #include "oprof.h" +- ++ + unsigned long fs_buffer_size = 131072; + unsigned long fs_cpu_buffer_size = 8192; + unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */ +@@ -117,11 +123,202 @@ static ssize_t dump_write(struct file * + static struct file_operations dump_fops = { + .write = dump_write, + }; +- ++ ++#define TMPBUFSIZE 512 ++ ++static unsigned int adomains = 0; ++static int active_domains[MAX_OPROF_DOMAINS + 1]; ++static DEFINE_MUTEX(adom_mutex); ++ ++static ssize_t adomain_write(struct file * file, char const __user * buf, ++ size_t count, loff_t * offset) ++{ ++ char *tmpbuf; ++ char *startp, *endp; ++ int i; ++ unsigned long val; ++ ssize_t retval = count; ++ ++ if (*offset) ++ return -EINVAL; ++ if (count > TMPBUFSIZE - 1) ++ return -EINVAL; ++ ++ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL))) ++ return -ENOMEM; ++ ++ if (copy_from_user(tmpbuf, buf, count)) { ++ kfree(tmpbuf); ++ return -EFAULT; ++ } ++ tmpbuf[count] = 0; ++ ++ mutex_lock(&adom_mutex); ++ ++ startp = tmpbuf; ++ /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */ ++ for (i = 0; i <= MAX_OPROF_DOMAINS; i++) { ++ val = simple_strtoul(startp, &endp, 0); ++ if (endp == startp) ++ break; ++ while (ispunct(*endp) || isspace(*endp)) ++ endp++; ++ active_domains[i] = val; ++ if (active_domains[i] != val) ++ /* Overflow, force error below */ ++ i = MAX_OPROF_DOMAINS + 1; ++ startp = endp; ++ } ++ /* Force error on trailing junk */ ++ adomains = *startp ? MAX_OPROF_DOMAINS + 1 : i; ++ ++ kfree(tmpbuf); ++ ++ if (adomains > MAX_OPROF_DOMAINS ++ || oprofile_set_active(active_domains, adomains)) { ++ adomains = 0; ++ retval = -EINVAL; ++ } ++ ++ mutex_unlock(&adom_mutex); ++ return retval; ++} ++ ++static ssize_t adomain_read(struct file * file, char __user * buf, ++ size_t count, loff_t * offset) ++{ ++ char * tmpbuf; ++ size_t len; ++ int i; ++ ssize_t retval; ++ ++ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL))) ++ return -ENOMEM; ++ ++ mutex_lock(&adom_mutex); ++ ++ len = 0; ++ for (i = 0; i < adomains; i++) ++ len += snprintf(tmpbuf + len, ++ len < TMPBUFSIZE ? TMPBUFSIZE - len : 0, ++ "%u ", active_domains[i]); ++ WARN_ON(len > TMPBUFSIZE); ++ if (len != 0 && len <= TMPBUFSIZE) ++ tmpbuf[len-1] = '\n'; ++ ++ mutex_unlock(&adom_mutex); ++ ++ retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len); ++ ++ kfree(tmpbuf); ++ return retval; ++} ++ ++ ++static struct file_operations active_domain_ops = { ++ .read = adomain_read, ++ .write = adomain_write, ++}; ++ ++static unsigned int pdomains = 0; ++static int passive_domains[MAX_OPROF_DOMAINS]; ++static DEFINE_MUTEX(pdom_mutex); ++ ++static ssize_t pdomain_write(struct file * file, char const __user * buf, ++ size_t count, loff_t * offset) ++{ ++ char *tmpbuf; ++ char *startp, *endp; ++ int i; ++ unsigned long val; ++ ssize_t retval = count; ++ ++ if (*offset) ++ return -EINVAL; ++ if (count > TMPBUFSIZE - 1) ++ return -EINVAL; ++ ++ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL))) ++ return -ENOMEM; ++ ++ if (copy_from_user(tmpbuf, buf, count)) { ++ kfree(tmpbuf); ++ return -EFAULT; ++ } ++ tmpbuf[count] = 0; ++ ++ mutex_lock(&pdom_mutex); ++ ++ startp = tmpbuf; ++ /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */ ++ for (i = 0; i <= MAX_OPROF_DOMAINS; i++) { ++ val = simple_strtoul(startp, &endp, 0); ++ if (endp == startp) ++ break; ++ while (ispunct(*endp) || isspace(*endp)) ++ endp++; ++ passive_domains[i] = val; ++ if (passive_domains[i] != val) ++ /* Overflow, force error below */ ++ i = MAX_OPROF_DOMAINS + 1; ++ startp = endp; ++ } ++ /* Force error on trailing junk */ ++ pdomains = *startp ? MAX_OPROF_DOMAINS + 1 : i; ++ ++ kfree(tmpbuf); ++ ++ if (pdomains > MAX_OPROF_DOMAINS ++ || oprofile_set_passive(passive_domains, pdomains)) { ++ pdomains = 0; ++ retval = -EINVAL; ++ } ++ ++ mutex_unlock(&pdom_mutex); ++ return retval; ++} ++ ++static ssize_t pdomain_read(struct file * file, char __user * buf, ++ size_t count, loff_t * offset) ++{ ++ char * tmpbuf; ++ size_t len; ++ int i; ++ ssize_t retval; ++ ++ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL))) ++ return -ENOMEM; ++ ++ mutex_lock(&pdom_mutex); ++ ++ len = 0; ++ for (i = 0; i < pdomains; i++) ++ len += snprintf(tmpbuf + len, ++ len < TMPBUFSIZE ? TMPBUFSIZE - len : 0, ++ "%u ", passive_domains[i]); ++ WARN_ON(len > TMPBUFSIZE); ++ if (len != 0 && len <= TMPBUFSIZE) ++ tmpbuf[len-1] = '\n'; ++ ++ mutex_unlock(&pdom_mutex); ++ ++ retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len); ++ ++ kfree(tmpbuf); ++ return retval; ++} ++ ++static struct file_operations passive_domain_ops = { ++ .read = pdomain_read, ++ .write = pdomain_write, ++}; ++ + void oprofile_create_files(struct super_block * sb, struct dentry * root) + { + oprofilefs_create_file(sb, root, "enable", &enable_fops); + oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666); ++ oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops); ++ oprofilefs_create_file(sb, root, "passive_domains", &passive_domain_ops); + oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops); + oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size); + oprofilefs_create_ulong(sb, root, "buffer_watershed", &fs_buffer_watershed); +diff -pruN ../orig-linux-2.6.16.29/include/linux/oprofile.h ./include/linux/oprofile.h +--- ../orig-linux-2.6.16.29/include/linux/oprofile.h 2006-11-06 14:46:42.000000000 -0800 ++++ ./include/linux/oprofile.h 2006-11-06 14:47:55.000000000 -0800 +@@ -16,6 +16,8 @@ + #include <linux/types.h> + #include <linux/spinlock.h> + #include <asm/atomic.h> ++ ++#include <xen/interface/xenoprof.h> + + struct super_block; + struct dentry; +@@ -27,6 +29,11 @@ struct oprofile_operations { + /* create any necessary configuration files in the oprofile fs. + * Optional. */ + int (*create_files)(struct super_block * sb, struct dentry * root); ++ /* setup active domains with Xen */ ++ int (*set_active)(int *active_domains, unsigned int adomains); ++ /* setup passive domains with Xen */ ++ int (*set_passive)(int *passive_domains, unsigned int pdomains); ++ + /* Do any necessary interrupt setup. Optional. */ + int (*setup)(void); + /* Do any necessary interrupt shutdown. Optional. */ +@@ -68,6 +75,8 @@ void oprofile_add_pc(unsigned long pc, i + /* add a backtrace entry, to be called from the ->backtrace callback */ + void oprofile_add_trace(unsigned long eip); + ++/* add a domain switch entry */ ++int oprofile_add_domain_switch(int32_t domain_id); + + /** + * Create a file of the given name as a child of the given root, with diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.31/blktap-aio-16_03_06.patch --- a/patches/linux-2.6.16.31/blktap-aio-16_03_06.patch Mon Nov 27 13:50:02 2006 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,294 +0,0 @@ -diff -pruN ../orig-linux-2.6.16.29/fs/aio.c ./fs/aio.c ---- ../orig-linux-2.6.16.29/fs/aio.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./fs/aio.c 2006-09-19 13:58:49.000000000 +0100 -@@ -34,6 +34,11 @@ - #include <asm/uaccess.h> - #include <asm/mmu_context.h> - -+#ifdef CONFIG_EPOLL -+#include <linux/poll.h> -+#include <linux/eventpoll.h> -+#endif -+ - #if DEBUG > 1 - #define dprintk printk - #else -@@ -1016,6 +1021,10 @@ put_rq: - if (waitqueue_active(&ctx->wait)) - wake_up(&ctx->wait); - -+#ifdef CONFIG_EPOLL -+ if (ctx->file && waitqueue_active(&ctx->poll_wait)) -+ wake_up(&ctx->poll_wait); -+#endif - if (ret) - put_ioctx(ctx); - -@@ -1025,6 +1034,8 @@ put_rq: - /* aio_read_evt - * Pull an event off of the ioctx's event ring. Returns the number of - * events fetched (0 or 1 ;-) -+ * If ent parameter is 0, just returns the number of events that would -+ * be fetched. - * FIXME: make this use cmpxchg. - * TODO: make the ringbuffer user mmap()able (requires FIXME). - */ -@@ -1047,13 +1058,18 @@ static int aio_read_evt(struct kioctx *i - - head = ring->head % info->nr; - if (head != ring->tail) { -- struct io_event *evp = aio_ring_event(info, head, KM_USER1); -- *ent = *evp; -- head = (head + 1) % info->nr; -- smp_mb(); /* finish reading the event before updatng the head */ -- ring->head = head; -- ret = 1; -- put_aio_ring_event(evp, KM_USER1); -+ if (ent) { /* event requested */ -+ struct io_event *evp = -+ aio_ring_event(info, head, KM_USER1); -+ *ent = *evp; -+ head = (head + 1) % info->nr; -+ /* finish reading the event before updatng the head */ -+ smp_mb(); -+ ring->head = head; -+ ret = 1; -+ put_aio_ring_event(evp, KM_USER1); -+ } else /* only need to know availability */ -+ ret = 1; - } - spin_unlock(&info->ring_lock); - -@@ -1236,9 +1252,78 @@ static void io_destroy(struct kioctx *io - - aio_cancel_all(ioctx); - wait_for_all_aios(ioctx); -+#ifdef CONFIG_EPOLL -+ /* forget the poll file, but it's up to the user to close it */ -+ if (ioctx->file) { -+ ioctx->file->private_data = 0; -+ ioctx->file = 0; -+ } -+#endif - put_ioctx(ioctx); /* once for the lookup */ - } - -+#ifdef CONFIG_EPOLL -+ -+static int aio_queue_fd_close(struct inode *inode, struct file *file) -+{ -+ struct kioctx *ioctx = file->private_data; -+ if (ioctx) { -+ file->private_data = 0; -+ spin_lock_irq(&ioctx->ctx_lock); -+ ioctx->file = 0; -+ spin_unlock_irq(&ioctx->ctx_lock); -+ } -+ return 0; -+} -+ -+static unsigned int aio_queue_fd_poll(struct file *file, poll_table *wait) -+{ unsigned int pollflags = 0; -+ struct kioctx *ioctx = file->private_data; -+ -+ if (ioctx) { -+ -+ spin_lock_irq(&ioctx->ctx_lock); -+ /* Insert inside our poll wait queue */ -+ poll_wait(file, &ioctx->poll_wait, wait); -+ -+ /* Check our condition */ -+ if (aio_read_evt(ioctx, 0)) -+ pollflags = POLLIN | POLLRDNORM; -+ spin_unlock_irq(&ioctx->ctx_lock); -+ } -+ -+ return pollflags; -+} -+ -+static struct file_operations aioq_fops = { -+ .release = aio_queue_fd_close, -+ .poll = aio_queue_fd_poll -+}; -+ -+/* make_aio_fd: -+ * Create a file descriptor that can be used to poll the event queue. -+ * Based and piggybacked on the excellent epoll code. -+ */ -+ -+static int make_aio_fd(struct kioctx *ioctx) -+{ -+ int error, fd; -+ struct inode *inode; -+ struct file *file; -+ -+ error = ep_getfd(&fd, &inode, &file, NULL, &aioq_fops); -+ if (error) -+ return error; -+ -+ /* associate the file with the IO context */ -+ file->private_data = ioctx; -+ ioctx->file = file; -+ init_waitqueue_head(&ioctx->poll_wait); -+ return fd; -+} -+#endif -+ -+ - /* sys_io_setup: - * Create an aio_context capable of receiving at least nr_events. - * ctxp must not point to an aio_context that already exists, and -@@ -1251,18 +1336,30 @@ static void io_destroy(struct kioctx *io - * resources are available. May fail with -EFAULT if an invalid - * pointer is passed for ctxp. Will fail with -ENOSYS if not - * implemented. -+ * -+ * To request a selectable fd, the user context has to be initialized -+ * to 1, instead of 0, and the return value is the fd. -+ * This keeps the system call compatible, since a non-zero value -+ * was not allowed so far. - */ - asmlinkage long sys_io_setup(unsigned nr_events, aio_context_t __user *ctxp) - { - struct kioctx *ioctx = NULL; - unsigned long ctx; - long ret; -+ int make_fd = 0; - - ret = get_user(ctx, ctxp); - if (unlikely(ret)) - goto out; - - ret = -EINVAL; -+#ifdef CONFIG_EPOLL -+ if (ctx == 1) { -+ make_fd = 1; -+ ctx = 0; -+ } -+#endif - if (unlikely(ctx || nr_events == 0)) { - pr_debug("EINVAL: io_setup: ctx %lu nr_events %u\n", - ctx, nr_events); -@@ -1273,8 +1370,12 @@ asmlinkage long sys_io_setup(unsigned nr - ret = PTR_ERR(ioctx); - if (!IS_ERR(ioctx)) { - ret = put_user(ioctx->user_id, ctxp); -- if (!ret) -- return 0; -+#ifdef CONFIG_EPOLL -+ if (make_fd && ret >= 0) -+ ret = make_aio_fd(ioctx); -+#endif -+ if (ret >= 0) -+ return ret; - - get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */ - io_destroy(ioctx); -diff -pruN ../orig-linux-2.6.16.29/fs/eventpoll.c ./fs/eventpoll.c ---- ../orig-linux-2.6.16.29/fs/eventpoll.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./fs/eventpoll.c 2006-09-19 13:58:49.000000000 +0100 -@@ -235,8 +235,6 @@ struct ep_pqueue { - - static void ep_poll_safewake_init(struct poll_safewake *psw); - static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq); --static int ep_getfd(int *efd, struct inode **einode, struct file **efile, -- struct eventpoll *ep); - static int ep_alloc(struct eventpoll **pep); - static void ep_free(struct eventpoll *ep); - static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd); -@@ -266,7 +264,7 @@ static int ep_events_transfer(struct eve - static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, - int maxevents, long timeout); - static int eventpollfs_delete_dentry(struct dentry *dentry); --static struct inode *ep_eventpoll_inode(void); -+static struct inode *ep_eventpoll_inode(struct file_operations *fops); - static struct super_block *eventpollfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data); -@@ -525,7 +523,7 @@ asmlinkage long sys_epoll_create(int siz - * Creates all the items needed to setup an eventpoll file. That is, - * a file structure, and inode and a free file descriptor. - */ -- error = ep_getfd(&fd, &inode, &file, ep); -+ error = ep_getfd(&fd, &inode, &file, ep, &eventpoll_fops); - if (error) - goto eexit_2; - -@@ -710,8 +708,8 @@ eexit_1: - /* - * Creates the file descriptor to be used by the epoll interface. - */ --static int ep_getfd(int *efd, struct inode **einode, struct file **efile, -- struct eventpoll *ep) -+int ep_getfd(int *efd, struct inode **einode, struct file **efile, -+ struct eventpoll *ep, struct file_operations *fops) - { - struct qstr this; - char name[32]; -@@ -727,7 +725,7 @@ static int ep_getfd(int *efd, struct ino - goto eexit_1; - - /* Allocates an inode from the eventpoll file system */ -- inode = ep_eventpoll_inode(); -+ inode = ep_eventpoll_inode(fops); - error = PTR_ERR(inode); - if (IS_ERR(inode)) - goto eexit_2; -@@ -758,7 +756,7 @@ static int ep_getfd(int *efd, struct ino - - file->f_pos = 0; - file->f_flags = O_RDONLY; -- file->f_op = &eventpoll_fops; -+ file->f_op = fops; - file->f_mode = FMODE_READ; - file->f_version = 0; - file->private_data = ep; -@@ -1574,7 +1572,7 @@ static int eventpollfs_delete_dentry(str - } - - --static struct inode *ep_eventpoll_inode(void) -+static struct inode *ep_eventpoll_inode(struct file_operations *fops) - { - int error = -ENOMEM; - struct inode *inode = new_inode(eventpoll_mnt->mnt_sb); -@@ -1582,7 +1580,7 @@ static struct inode *ep_eventpoll_inode( - if (!inode) - goto eexit_1; - -- inode->i_fop = &eventpoll_fops; -+ inode->i_fop = fops; - - /* - * Mark the inode dirty from the very beginning, -diff -pruN ../orig-linux-2.6.16.29/include/linux/aio.h ./include/linux/aio.h ---- ../orig-linux-2.6.16.29/include/linux/aio.h 2006-09-12 19:02:10.000000000 +0100 -+++ ./include/linux/aio.h 2006-09-19 13:58:49.000000000 +0100 -@@ -191,6 +191,11 @@ struct kioctx { - struct aio_ring_info ring_info; - - struct work_struct wq; -+#ifdef CONFIG_EPOLL -+ // poll integration -+ wait_queue_head_t poll_wait; -+ struct file *file; -+#endif - }; - - /* prototypes */ -diff -pruN ../orig-linux-2.6.16.29/include/linux/eventpoll.h ./include/linux/eventpoll.h ---- ../orig-linux-2.6.16.29/include/linux/eventpoll.h 2006-09-12 19:02:10.000000000 +0100 -+++ ./include/linux/eventpoll.h 2006-09-19 13:58:49.000000000 +0100 -@@ -86,6 +86,12 @@ static inline void eventpoll_release(str - } - - -+/* -+ * called by aio code to create fd that can poll the aio event queueQ -+ */ -+struct eventpoll; -+int ep_getfd(int *efd, struct inode **einode, struct file **efile, -+ struct eventpoll *ep, struct file_operations *fops); - #else - - static inline void eventpoll_init_file(struct file *file) {} diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.31/device_bind.patch --- a/patches/linux-2.6.16.31/device_bind.patch Mon Nov 27 13:50:02 2006 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,15 +0,0 @@ -diff -pruN ../orig-linux-2.6.16.29/drivers/base/bus.c ./drivers/base/bus.c ---- ../orig-linux-2.6.16.29/drivers/base/bus.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./drivers/base/bus.c 2006-09-19 13:58:54.000000000 +0100 -@@ -188,6 +188,11 @@ static ssize_t driver_bind(struct device - up(&dev->sem); - if (dev->parent) - up(&dev->parent->sem); -+ -+ if (err > 0) /* success */ -+ err = count; -+ else if (err == 0) /* driver didn't accept device */ -+ err = -ENODEV; - } - put_device(dev); - put_bus(bus); diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.31/fix-hz-suspend.patch --- a/patches/linux-2.6.16.31/fix-hz-suspend.patch Mon Nov 27 13:50:02 2006 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,26 +0,0 @@ -diff -pruN ../orig-linux-2.6.16.29/kernel/timer.c ./kernel/timer.c ---- ../orig-linux-2.6.16.29/kernel/timer.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./kernel/timer.c 2006-09-19 13:58:58.000000000 +0100 -@@ -555,6 +555,22 @@ found: - } - spin_unlock(&base->t_base.lock); - -+ /* -+ * It can happen that other CPUs service timer IRQs and increment -+ * jiffies, but we have not yet got a local timer tick to process -+ * the timer wheels. In that case, the expiry time can be before -+ * jiffies, but since the high-resolution timer here is relative to -+ * jiffies, the default expression when high-resolution timers are -+ * not active, -+ * -+ * time_before(MAX_JIFFY_OFFSET + jiffies, expires) -+ * -+ * would falsely evaluate to true. If that is the case, just -+ * return jiffies so that we can immediately fire the local timer -+ */ -+ if (time_before(expires, jiffies)) -+ return jiffies; -+ - if (time_before(hr_expires, expires)) - return hr_expires; - diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.31/fix-ide-cd-pio-mode.patch --- a/patches/linux-2.6.16.31/fix-ide-cd-pio-mode.patch Mon Nov 27 13:50:02 2006 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,18 +0,0 @@ -diff -pruN ../orig-linux-2.6.16.29/drivers/ide/ide-lib.c ./drivers/ide/ide-lib.c ---- ../orig-linux-2.6.16.29/drivers/ide/ide-lib.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./drivers/ide/ide-lib.c 2006-09-19 13:59:03.000000000 +0100 -@@ -410,10 +410,10 @@ void ide_toggle_bounce(ide_drive_t *driv - { - u64 addr = BLK_BOUNCE_HIGH; /* dma64_addr_t */ - -- if (!PCI_DMA_BUS_IS_PHYS) { -- addr = BLK_BOUNCE_ANY; -- } else if (on && drive->media == ide_disk) { -- if (HWIF(drive)->pci_dev) -+ if (on && drive->media == ide_disk) { -+ if (!PCI_DMA_BUS_IS_PHYS) -+ addr = BLK_BOUNCE_ANY; -+ else if (HWIF(drive)->pci_dev) - addr = HWIF(drive)->pci_dev->dma_mask; - } - diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.31/i386-mach-io-check-nmi.patch --- a/patches/linux-2.6.16.31/i386-mach-io-check-nmi.patch Mon Nov 27 13:50:02 2006 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,45 +0,0 @@ -diff -pruN ../orig-linux-2.6.16.29/arch/i386/kernel/traps.c ./arch/i386/kernel/traps.c ---- ../orig-linux-2.6.16.29/arch/i386/kernel/traps.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./arch/i386/kernel/traps.c 2006-09-19 13:59:06.000000000 +0100 -@@ -567,18 +567,11 @@ static void mem_parity_error(unsigned ch - - static void io_check_error(unsigned char reason, struct pt_regs * regs) - { -- unsigned long i; -- - printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); - show_registers(regs); - - /* Re-enable the IOCK line, wait for a few seconds */ -- reason = (reason & 0xf) | 8; -- outb(reason, 0x61); -- i = 2000; -- while (--i) udelay(1000); -- reason &= ~8; -- outb(reason, 0x61); -+ clear_io_check_error(reason); - } - - static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) -diff -pruN ../orig-linux-2.6.16.29/include/asm-i386/mach-default/mach_traps.h ./include/asm-i386/mach-default/mach_traps.h ---- ../orig-linux-2.6.16.29/include/asm-i386/mach-default/mach_traps.h 2006-09-12 19:02:10.000000000 +0100 -+++ ./include/asm-i386/mach-default/mach_traps.h 2006-09-19 13:59:06.000000000 +0100 -@@ -15,6 +15,18 @@ static inline void clear_mem_error(unsig - outb(reason, 0x61); - } - -+static inline void clear_io_check_error(unsigned char reason) -+{ -+ unsigned long i; -+ -+ reason = (reason & 0xf) | 8; -+ outb(reason, 0x61); -+ i = 2000; -+ while (--i) udelay(1000); -+ reason &= ~8; -+ outb(reason, 0x61); -+} -+ - static inline unsigned char get_nmi_reason(void) - { - return inb(0x61); diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.31/ipv6-no-autoconf.patch --- a/patches/linux-2.6.16.31/ipv6-no-autoconf.patch Mon Nov 27 13:50:02 2006 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,19 +0,0 @@ -diff -pruN ../orig-linux-2.6.16.29/net/ipv6/addrconf.c ./net/ipv6/addrconf.c ---- ../orig-linux-2.6.16.29/net/ipv6/addrconf.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./net/ipv6/addrconf.c 2006-09-19 13:59:11.000000000 +0100 -@@ -2471,6 +2471,7 @@ static void addrconf_dad_start(struct in - spin_lock_bh(&ifp->lock); - - if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || -+ !(dev->flags&IFF_MULTICAST) || - !(ifp->flags&IFA_F_TENTATIVE)) { - ifp->flags &= ~IFA_F_TENTATIVE; - spin_unlock_bh(&ifp->lock); -@@ -2555,6 +2556,7 @@ static void addrconf_dad_completed(struc - if (ifp->idev->cnf.forwarding == 0 && - ifp->idev->cnf.rtr_solicits > 0 && - (dev->flags&IFF_LOOPBACK) == 0 && -+ (dev->flags & IFF_MULTICAST) && - (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) { - struct in6_addr all_routers; - diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.31/net-csum.patch --- a/patches/linux-2.6.16.31/net-csum.patch Mon Nov 27 13:50:02 2006 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,63 +0,0 @@ -diff -pruN ../orig-linux-2.6.16.29/net/ipv4/netfilter/ip_nat_proto_tcp.c ./net/ipv4/netfilter/ip_nat_proto_tcp.c ---- ../orig-linux-2.6.16.29/net/ipv4/netfilter/ip_nat_proto_tcp.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./net/ipv4/netfilter/ip_nat_proto_tcp.c 2006-09-19 13:59:15.000000000 +0100 -@@ -129,7 +129,12 @@ tcp_manip_pkt(struct sk_buff **pskb, - if (hdrsize < sizeof(*hdr)) - return 1; - -- hdr->check = ip_nat_cheat_check(~oldip, newip, -+#ifdef CONFIG_XEN -+ if ((*pskb)->proto_csum_blank) -+ hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check); -+ else -+#endif -+ hdr->check = ip_nat_cheat_check(~oldip, newip, - ip_nat_cheat_check(oldport ^ 0xFFFF, - newport, - hdr->check)); -diff -pruN ../orig-linux-2.6.16.29/net/ipv4/netfilter/ip_nat_proto_udp.c ./net/ipv4/netfilter/ip_nat_proto_udp.c ---- ../orig-linux-2.6.16.29/net/ipv4/netfilter/ip_nat_proto_udp.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./net/ipv4/netfilter/ip_nat_proto_udp.c 2006-09-19 13:59:15.000000000 +0100 -@@ -113,11 +113,17 @@ udp_manip_pkt(struct sk_buff **pskb, - newport = tuple->dst.u.udp.port; - portptr = &hdr->dest; - } -- if (hdr->check) /* 0 is a special case meaning no checksum */ -- hdr->check = ip_nat_cheat_check(~oldip, newip, -+ if (hdr->check) { /* 0 is a special case meaning no checksum */ -+#ifdef CONFIG_XEN -+ if ((*pskb)->proto_csum_blank) -+ hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check); -+ else -+#endif -+ hdr->check = ip_nat_cheat_check(~oldip, newip, - ip_nat_cheat_check(*portptr ^ 0xFFFF, - newport, - hdr->check)); -+ } - *portptr = newport; - return 1; - } -diff -pruN ../orig-linux-2.6.16.29/net/ipv4/xfrm4_output.c ./net/ipv4/xfrm4_output.c ---- ../orig-linux-2.6.16.29/net/ipv4/xfrm4_output.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./net/ipv4/xfrm4_output.c 2006-09-19 13:59:15.000000000 +0100 -@@ -17,6 +17,8 @@ - #include <net/xfrm.h> - #include <net/icmp.h> - -+extern int skb_checksum_setup(struct sk_buff *skb); -+ - /* Add encapsulation header. - * - * In transport mode, the IP header will be moved forward to make space -@@ -103,6 +105,10 @@ static int xfrm4_output_one(struct sk_bu - struct xfrm_state *x = dst->xfrm; - int err; - -+ err = skb_checksum_setup(skb); -+ if (err) -+ goto error_nolock; -+ - if (skb->ip_summed == CHECKSUM_HW) { - err = skb_checksum_help(skb, 0); - if (err) diff -r 447ac06f74d3 -r aaaa249e6f3b patches/linux-2.6.16.31/net-gso-0-base.patch --- a/patches/linux-2.6.16.31/net-gso-0-base.patch Mon Nov 27 13:50:02 2006 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2835 +0,0 @@ -diff -pruN ../orig-linux-2.6.16.29/Documentation/networking/netdevices.txt ./Documentation/networking/netdevices.txt ---- ../orig-linux-2.6.16.29/Documentation/networking/netdevices.txt 2006-09-12 19:02:10.000000000 +0100 -+++ ./Documentation/networking/netdevices.txt 2006-09-19 13:59:20.000000000 +0100 -@@ -42,9 +42,9 @@ dev->get_stats: - Context: nominally process, but don't sleep inside an rwlock - - dev->hard_start_xmit: -- Synchronization: dev->xmit_lock spinlock. -+ Synchronization: netif_tx_lock spinlock. - When the driver sets NETIF_F_LLTX in dev->features this will be -- called without holding xmit_lock. In this case the driver -+ called without holding netif_tx_lock. In this case the driver - has to lock by itself when needed. It is recommended to use a try lock - for this and return -1 when the spin lock fails. - The locking there should also properly protect against -@@ -62,12 +62,12 @@ dev->hard_start_xmit: - Only valid when NETIF_F_LLTX is set. - - dev->tx_timeout: -- Synchronization: dev->xmit_lock spinlock. -+ Synchronization: netif_tx_lock spinlock. - Context: BHs disabled - Notes: netif_queue_stopped() is guaranteed true - - dev->set_multicast_list: -- Synchronization: dev->xmit_lock spinlock. -+ Synchronization: netif_tx_lock spinlock. - Context: BHs disabled - - dev->poll: -diff -pruN ../orig-linux-2.6.16.29/drivers/block/aoe/aoenet.c ./drivers/block/aoe/aoenet.c ---- ../orig-linux-2.6.16.29/drivers/block/aoe/aoenet.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./drivers/block/aoe/aoenet.c 2006-09-19 13:59:20.000000000 +0100 -@@ -95,9 +95,8 @@ mac_addr(char addr[6]) - static struct sk_buff * - skb_check(struct sk_buff *skb) - { -- if (skb_is_nonlinear(skb)) - if ((skb = skb_share_check(skb, GFP_ATOMIC))) -- if (skb_linearize(skb, GFP_ATOMIC) < 0) { -+ if (skb_linearize(skb)) { - dev_kfree_skb(skb); - return NULL; - } -diff -pruN ../orig-linux-2.6.16.29/drivers/infiniband/ulp/ipoib/ipoib_multicast.c ./drivers/infiniband/ulp/ipoib/ipoib_multicast.c ---- ../orig-linux-2.6.16.29/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2006-09-19 13:59:20.000000000 +0100 -@@ -821,7 +821,8 @@ void ipoib_mcast_restart_task(void *dev_ - - ipoib_mcast_stop_thread(dev, 0); - -- spin_lock_irqsave(&dev->xmit_lock, flags); -+ local_irq_save(flags); -+ netif_tx_lock(dev); - spin_lock(&priv->lock); - - /* -@@ -896,7 +897,8 @@ void ipoib_mcast_restart_task(void *dev_ - } - - spin_unlock(&priv->lock); -- spin_unlock_irqrestore(&dev->xmit_lock, flags); -+ netif_tx_unlock(dev); -+ local_irq_restore(flags); - - /* We have to cancel outside of the spinlock */ - list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { -diff -pruN ../orig-linux-2.6.16.29/drivers/media/dvb/dvb-core/dvb_net.c ./drivers/media/dvb/dvb-core/dvb_net.c ---- ../orig-linux-2.6.16.29/drivers/media/dvb/dvb-core/dvb_net.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./drivers/media/dvb/dvb-core/dvb_net.c 2006-09-19 13:59:20.000000000 +0100 -@@ -1052,7 +1052,7 @@ static void wq_set_multicast_list (void - - dvb_net_feed_stop(dev); - priv->rx_mode = RX_MODE_UNI; -- spin_lock_bh(&dev->xmit_lock); -+ netif_tx_lock_bh(dev); - - if (dev->flags & IFF_PROMISC) { - dprintk("%s: promiscuous mode\n", dev->name); -@@ -1077,7 +1077,7 @@ static void wq_set_multicast_list (void - } - } - -- spin_unlock_bh(&dev->xmit_lock); -+ netif_tx_unlock_bh(dev); - dvb_net_feed_start(dev); - } - -diff -pruN ../orig-linux-2.6.16.29/drivers/net/8139cp.c ./drivers/net/8139cp.c ---- ../orig-linux-2.6.16.29/drivers/net/8139cp.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./drivers/net/8139cp.c 2006-09-19 13:59:20.000000000 +0100 -@@ -794,7 +794,7 @@ static int cp_start_xmit (struct sk_buff - entry = cp->tx_head; - eor = (entry == (CP_TX_RING_SIZE - 1)) ? RingEnd : 0; - if (dev->features & NETIF_F_TSO) -- mss = skb_shinfo(skb)->tso_size; -+ mss = skb_shinfo(skb)->gso_size; - - if (skb_shinfo(skb)->nr_frags == 0) { - struct cp_desc *txd = &cp->tx_ring[entry]; -diff -pruN ../orig-linux-2.6.16.29/drivers/net/bnx2.c ./drivers/net/bnx2.c ---- ../orig-linux-2.6.16.29/drivers/net/bnx2.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./drivers/net/bnx2.c 2006-09-19 13:59:20.000000000 +0100 -@@ -1593,7 +1593,7 @@ bnx2_tx_int(struct bnx2 *bp) - skb = tx_buf->skb; - #ifdef BCM_TSO - /* partial BD completions possible with TSO packets */ -- if (skb_shinfo(skb)->tso_size) { -+ if (skb_shinfo(skb)->gso_size) { - u16 last_idx, last_ring_idx; - - last_idx = sw_cons + -@@ -1948,7 +1948,7 @@ bnx2_poll(struct net_device *dev, int *b - return 1; - } - --/* Called with rtnl_lock from vlan functions and also dev->xmit_lock -+/* Called with rtnl_lock from vlan functions and also netif_tx_lock - * from set_multicast. - */ - static void -@@ -4403,7 +4403,7 @@ bnx2_vlan_rx_kill_vid(struct net_device - } - #endif - --/* Called with dev->xmit_lock. -+/* Called with netif_tx_lock. - * hard_start_xmit is pseudo-lockless - a lock is only required when - * the tx queue is full. This way, we get the benefit of lockless - * operations most of the time without the complexities to handle -@@ -4441,7 +4441,7 @@ bnx2_start_xmit(struct sk_buff *skb, str - (TX_BD_FLAGS_VLAN_TAG | (vlan_tx_tag_get(skb) << 16)); - } - #ifdef BCM_TSO -- if ((mss = skb_shinfo(skb)->tso_size) && -+ if ((mss = skb_shinfo(skb)->gso_size) && - (skb->len > (bp->dev->mtu + ETH_HLEN))) { - u32 tcp_opt_len, ip_tcp_len; - -diff -pruN ../orig-linux-2.6.16.29/drivers/net/bonding/bond_main.c ./drivers/net/bonding/bond_main.c ---- ../orig-linux-2.6.16.29/drivers/net/bonding/bond_main.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./drivers/net/bonding/bond_main.c 2006-09-19 13:59:20.000000000 +0100 -@@ -1145,8 +1145,7 @@ int bond_sethwaddr(struct net_device *bo - } - - #define BOND_INTERSECT_FEATURES \ -- (NETIF_F_SG|NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM|\ -- NETIF_F_TSO|NETIF_F_UFO) -+ (NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_TSO | NETIF_F_UFO) - - /* - * Compute the common dev->feature set available to all slaves. Some -@@ -1164,9 +1163,7 @@ static int bond_compute_features(struct - features &= (slave->dev->features & BOND_INTERSECT_FEATURES); - - if ((features & NETIF_F_SG) && -- !(features & (NETIF_F_IP_CSUM | -- NETIF_F_NO_CSUM | -- NETIF_F_HW_CSUM))) -+ !(features & NETIF_F_ALL_CSUM)) - features &= ~NETIF_F_SG; - - /* -@@ -4147,7 +4144,7 @@ static int bond_init(struct net_device * - */ - bond_dev->features |= NETIF_F_VLAN_CHALLENGED; - -- /* don't acquire bond device's xmit_lock when -+ /* don't acquire bond device's netif_tx_lock when - * transmitting */ - bond_dev->features |= NETIF_F_LLTX; - -diff -pruN ../orig-linux-2.6.16.29/drivers/net/chelsio/sge.c ./drivers/net/chelsio/sge.c ---- ../orig-linux-2.6.16.29/drivers/net/chelsio/sge.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./drivers/net/chelsio/sge.c 2006-09-19 13:59:20.000000000 +0100 -@@ -1419,7 +1419,7 @@ int t1_start_xmit(struct sk_buff *skb, s - struct cpl_tx_pkt *cpl; - - #ifdef NETIF_F_TSO -- if (skb_shinfo(skb)->tso_size) { -+ if (skb_shinfo(skb)->gso_size) { - int eth_type; - struct cpl_tx_pkt_lso *hdr; - -@@ -1434,7 +1434,7 @@ int t1_start_xmit(struct sk_buff *skb, s - hdr->ip_hdr_words = skb->nh.iph->ihl; - hdr->tcp_hdr_words = skb->h.th->doff; - hdr->eth_type_mss = htons(MK_ETH_TYPE_MSS(eth_type, -- skb_shinfo(skb)->tso_size)); -+ skb_shinfo(skb)->gso_size)); - hdr->len = htonl(skb->len - sizeof(*hdr)); - cpl = (struct cpl_tx_pkt *)hdr; - sge->stats.tx_lso_pkts++; -diff -pruN ../orig-linux-2.6.16.29/drivers/net/e1000/e1000_main.c ./drivers/net/e1000/e1000_main.c ---- ../orig-linux-2.6.16.29/drivers/net/e1000/e1000_main.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./drivers/net/e1000/e1000_main.c 2006-09-19 13:59:20.000000000 +0100 -@@ -2526,7 +2526,7 @@ e1000_tso(struct e1000_adapter *adapter, - uint8_t ipcss, ipcso, tucss, tucso, hdr_len; - int err; - -- if (skb_shinfo(skb)->tso_size) { -+ if (skb_shinfo(skb)->gso_size) { - if (skb_header_cloned(skb)) { - err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); - if (err) -@@ -2534,7 +2534,7 @@ e1000_tso(struct e1000_adapter *adapter, - } - - hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2)); -- mss = skb_shinfo(skb)->tso_size; -+ mss = skb_shinfo(skb)->gso_size; - if (skb->protocol == ntohs(ETH_P_IP)) { - skb->nh.iph->tot_len = 0; - skb->nh.iph->check = 0; -@@ -2651,7 +2651,7 @@ e1000_tx_map(struct e1000_adapter *adapt - * tso gets written back prematurely before the data is fully - * DMAd to the controller */ - if (!skb->data_len && tx_ring->last_tx_tso && -- !skb_shinfo(skb)->tso_size) { -+ !skb_shinfo(skb)->gso_size) { - tx_ring->last_tx_tso = 0; - size -= 4; - } -@@ -2893,7 +2893,7 @@ e1000_xmit_frame(struct sk_buff *skb, st - } - - #ifdef NETIF_F_TSO -- mss = skb_shinfo(skb)->tso_size; -+ mss = skb_shinfo(skb)->gso_size; - /* The controller does a simple calculation to - * make sure there is enough room in the FIFO before - * initiating the DMA for each buffer. The calc is: -@@ -2935,7 +2935,7 @@ e1000_xmit_frame(struct sk_buff *skb, st - #ifdef NETIF_F_TSO - /* Controller Erratum workaround */ - if (!skb->data_len && tx_ring->last_tx_tso && -- !skb_shinfo(skb)->tso_size) -+ !skb_shinfo(skb)->gso_size) - count++; - #endif - -diff -pruN ../orig-linux-2.6.16.29/drivers/net/forcedeth.c ./drivers/net/forcedeth.c ---- ../orig-linux-2.6.16.29/drivers/net/forcedeth.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./drivers/net/forcedeth.c 2006-09-19 13:59:20.000000000 +0100 -@@ -482,9 +482,9 @@ typedef union _ring_type { - * critical parts: - * - rx is (pseudo-) lockless: it relies on the single-threading provided - * by the arch code for interrupts. -- * - tx setup is lockless: it relies on dev->xmit_lock. Actual submission -+ * - tx setup is lockless: it relies on netif_tx_lock. Actual submission - * needs dev->priv->lock :-( -- * - set_multicast_list: preparation lockless, relies on dev->xmit_lock. -+ * - set_multicast_list: preparation lockless, relies on netif_tx_lock. - */ - - /* in dev: base, irq */ -@@ -1016,7 +1016,7 @@ static void drain_ring(struct net_device - - /* - * nv_start_xmit: dev->hard_start_xmit function -- * Called with dev->xmit_lock held. -+ * Called with netif_tx_lock held. - */ - static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev) - { -@@ -1105,8 +1105,8 @@ static int nv_start_xmit(struct sk_buff - np->tx_skbuff[nr] = skb; - - #ifdef NETIF_F_TSO -- if (skb_shinfo(skb)->tso_size) -- tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->tso_size << NV_TX2_TSO_SHIFT); -+ if (skb_shinfo(skb)->gso_size) -+ tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->gso_size << NV_TX2_TSO_SHIFT); - else - #endif - tx_flags_extra = (skb->ip_summed == CHECKSUM_HW ? (NV_TX2_CHECKSUM_L3|NV_TX2_CHECKSUM_L4) : 0); -@@ -1203,7 +1203,7 @@ static void nv_tx_done(struct net_device - - /* - * nv_tx_timeout: dev->tx_timeout function -- * Called with dev->xmit_lock held. -+ * Called with netif_tx_lock held. - */ - static void nv_tx_timeout(struct net_device *dev) - { -@@ -1524,7 +1524,7 @@ static int nv_change_mtu(struct net_devi - * Changing the MTU is a rare event, it shouldn't matter. - */ - disable_irq(dev->irq); -- spin_lock_bh(&dev->xmit_lock); -+ netif_tx_lock_bh(dev); - spin_lock(&np->lock); - /* stop engines */ - nv_stop_rx(dev); -@@ -1559,7 +1559,7 @@ static int nv_change_mtu(struct net_devi - nv_start_rx(dev); - nv_start_tx(dev); - spin_unlock(&np->lock); -- spin_unlock_bh(&dev->xmit_lock); -+ netif_tx_unlock_bh(dev); - enable_irq(dev->irq); - } - return 0; -@@ -1594,7 +1594,7 @@ static int nv_set_mac_address(struct net - memcpy(dev->dev_addr, macaddr->sa_data, ETH_ALEN); - - if (netif_running(dev)) { -- spin_lock_bh(&dev->xmit_lock); -+ netif_tx_lock_bh(dev); - spin_lock_irq(&np->lock); - - /* stop rx engine */ -@@ -1606,7 +1606,7 @@ static int nv_set_mac_address(struct net - /* restart rx engine */ - nv_start_rx(dev); - spin_unlock_irq(&np->lock); -- spin_unlock_bh(&dev->xmit_lock); -+ netif_tx_unlock_bh(dev); - } else { - nv_copy_mac_to_hw(dev); - } -@@ -1615,7 +1615,7 @@ static int nv_set_mac_address(struct net - - /* - * nv_set_multicast: dev->set_multicast function -- * Called with dev->xmit_lock held. -+ * Called with netif_tx_lock held. - */ - static void nv_set_multicast(struct net_device *dev) - { -diff -pruN ../orig-linux-2.6.16.29/drivers/net/hamradio/6pack.c ./drivers/net/hamradio/6pack.c ---- ../orig-linux-2.6.16.29/drivers/net/hamradio/6pack.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./drivers/net/hamradio/6pack.c 2006-09-19 13:59:20.000000000 +0100 -@@ -308,9 +308,9 @@ static int sp_set_mac_address(struct net - { - struct sockaddr_ax25 *sa = addr; - -- spin_lock_irq(&dev->xmit_lock); -+ netif_tx_lock_bh(dev); - memcpy(dev->dev_addr, &sa->sax25_call, AX25_ADDR_LEN); -- spin_unlock_irq(&dev->xmit_lock); -+ netif_tx_unlock_bh(dev); - - return 0; - } -@@ -767,9 +767,9 @@ static int sixpack_ioctl(struct tty_stru - break; - } - -- spin_lock_irq(&dev->xmit_lock); -+ netif_tx_lock_bh(dev); - memcpy(dev->dev_addr, &addr, AX25_ADDR_LEN); -- spin_unlock_irq(&dev->xmit_lock); -+ netif_tx_unlock_bh(dev); - - err = 0; - break; -diff -pruN ../orig-linux-2.6.16.29/drivers/net/hamradio/mkiss.c ./drivers/net/hamradio/mkiss.c ---- ../orig-linux-2.6.16.29/drivers/net/hamradio/mkiss.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./drivers/net/hamradio/mkiss.c 2006-09-19 13:59:20.000000000 +0100 -@@ -357,9 +357,9 @@ static int ax_set_mac_address(struct net - { - struct sockaddr_ax25 *sa = addr; - -- spin_lock_irq(&dev->xmit_lock); -+ netif_tx_lock_bh(dev); - memcpy(dev->dev_addr, &sa->sax25_call, AX25_ADDR_LEN); -- spin_unlock_irq(&dev->xmit_lock); -+ netif_tx_unlock_bh(dev); - - return 0; - } -@@ -886,9 +886,9 @@ static int mkiss_ioctl(struct tty_struct - break; - } - -- spin_lock_irq(&dev->xmit_lock); -+ netif_tx_lock_bh(dev); - memcpy(dev->dev_addr, addr, AX25_ADDR_LEN); -- spin_unlock_irq(&dev->xmit_lock); -+ netif_tx_unlock_bh(dev); - - err = 0; - break; -diff -pruN ../orig-linux-2.6.16.29/drivers/net/ifb.c ./drivers/net/ifb.c ---- ../orig-linux-2.6.16.29/drivers/net/ifb.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./drivers/net/ifb.c 2006-09-19 13:59:20.000000000 +0100 -@@ -76,13 +76,13 @@ static void ri_tasklet(unsigned long dev - dp->st_task_enter++; - if ((skb = skb_peek(&dp->tq)) == NULL) { - dp->st_txq_refl_try++; -- if (spin_trylock(&_dev->xmit_lock)) { -+ if (netif_tx_trylock(_dev)) { - dp->st_rxq_enter++; - while ((skb = skb_dequeue(&dp->rq)) != NULL) { - skb_queue_tail(&dp->tq, skb); - dp->st_rx2tx_tran++; - } -- spin_unlock(&_dev->xmit_lock); -+ netif_tx_unlock(_dev); - } else { - /* reschedule */ - dp->st_rxq_notenter++; -@@ -110,7 +110,7 @@ static void ri_tasklet(unsigned long dev - } - } - -- if (spin_trylock(&_dev->xmit_lock)) { -+ if (netif_tx_trylock(_dev)) { - dp->st_rxq_check++; - if ((skb = skb_peek(&dp->rq)) == NULL) { - dp->tasklet_pending = 0; -@@ -118,10 +118,10 @@ static void ri_tasklet(unsigned long dev - netif_wake_queue(_dev); - } else { - dp->st_rxq_rsch++; -- spin_unlock(&_dev->xmit_lock); -+ netif_tx_unlock(_dev); - goto resched; - } -- spin_unlock(&_dev->xmit_lock); -+ netif_tx_unlock(_dev); - } else { - resched: - dp->tasklet_pending = 1; -diff -pruN ../orig-linux-2.6.16.29/drivers/net/irda/vlsi_ir.c ./drivers/net/irda/vlsi_ir.c ---- ../orig-linux-2.6.16.29/drivers/net/irda/vlsi_ir.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./drivers/net/irda/vlsi_ir.c 2006-09-19 13:59:20.000000000 +0100 -@@ -959,7 +959,7 @@ static int vlsi_hard_start_xmit(struct s - || (now.tv_sec==ready.tv_sec && now.tv_usec>=ready.tv_usec)) - break; - udelay(100); -- /* must not sleep here - we are called under xmit_lock! */ -+ /* must not sleep here - called under netif_tx_lock! */ - } - } - -diff -pruN ../orig-linux-2.6.16.29/drivers/net/ixgb/ixgb_main.c ./drivers/net/ixgb/ixgb_main.c ---- ../orig-linux-2.6.16.29/drivers/net/ixgb/ixgb_main.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./drivers/net/ixgb/ixgb_main.c 2006-09-19 13:59:20.000000000 +0100 -@@ -1163,7 +1163,7 @@ ixgb_tso(struct ixgb_adapter *adapter, s - uint16_t ipcse, tucse, mss; - int err; - -- if(likely(skb_shinfo(skb)->tso_size)) { -+ if(likely(skb_shinfo(skb)->gso_size)) { - if (skb_header_cloned(skb)) { - err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); - if (err) -@@ -1171,7 +1171,7 @@ ixgb_tso(struct ixgb_adapter *adapter, s - } - - hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2)); -- mss = skb_shinfo(skb)->tso_size; -+ mss = skb_shinfo(skb)->gso_size; - skb->nh.iph->tot_len = 0; - skb->nh.iph->check = 0; - skb->h.th->check = ~csum_tcpudp_magic(skb->nh.iph->saddr, -diff -pruN ../orig-linux-2.6.16.29/drivers/net/loopback.c ./drivers/net/loopback.c ---- ../orig-linux-2.6.16.29/drivers/net/loopback.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./drivers/net/loopback.c 2006-09-19 13:59:20.000000000 +0100 -@@ -74,7 +74,7 @@ static void emulate_large_send_offload(s - struct iphdr *iph = skb->nh.iph; - struct tcphdr *th = (struct tcphdr*)(skb->nh.raw + (iph->ihl * 4)); - unsigned int doffset = (iph->ihl + th->doff) * 4; -- unsigned int mtu = skb_shinfo(skb)->tso_size + doffset; -+ unsigned int mtu = skb_shinfo(skb)->gso_size + doffset; - unsigned int offset = 0; - u32 seq = ntohl(th->seq); - u16 id = ntohs(iph->id); -@@ -139,7 +139,7 @@ static int loopback_xmit(struct sk_buff - #endif - - #ifdef LOOPBACK_TSO -- if (skb_shinfo(skb)->tso_size) { -+ if (skb_shinfo(skb)->gso_size) { - BUG_ON(skb->protocol != htons(ETH_P_IP)); - BUG_ON(skb->nh.iph->protocol != IPPROTO_TCP); - -diff -pruN ../orig-linux-2.6.16.29/drivers/net/mv643xx_eth.c ./drivers/net/mv643xx_eth.c ---- ../orig-linux-2.6.16.29/drivers/net/mv643xx_eth.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./drivers/net/mv643xx_eth.c 2006-09-19 13:59:20.000000000 +0100 -@@ -1107,7 +1107,7 @@ static int mv643xx_eth_start_xmit(struct - - #ifdef MV643XX_CHECKSUM_OFFLOAD_TX - if (has_tiny_unaligned_frags(skb)) { -- if ((skb_linearize(skb, GFP_ATOMIC) != 0)) { -+ if (__skb_linearize(skb)) { - stats->tx_dropped++; - printk(KERN_DEBUG "%s: failed to linearize tiny " - "unaligned fragment\n", dev->name); -diff -pruN ../orig-linux-2.6.16.29/drivers/net/natsemi.c ./drivers/net/natsemi.c ---- ../orig-linux-2.6.16.29/drivers/net/natsemi.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./drivers/net/natsemi.c 2006-09-19 13:59:20.000000000 +0100 -@@ -323,12 +323,12 @@ performance critical codepaths: - The rx process only runs in the interrupt handler. Access from outside - the interrupt handler is only permitted after disable_irq(). - --The rx process usually runs under the dev->xmit_lock. If np->intr_tx_reap -+The rx process usually runs under the netif_tx_lock. If np->intr_tx_reap - is set, then access is permitted under spin_lock_irq(&np->lock). - - Thus configuration functions that want to access everything must call - disable_irq(dev->irq); -- spin_lock_bh(dev->xmit_lock); -+ netif_tx_lock_bh(dev); - spin_lock_irq(&np->lock); - - IV. Notes -diff -pruN ../orig-linux-2.6.16.29/drivers/net/r8169.c ./drivers/net/r8169.c ---- ../orig-linux-2.6.16.29/drivers/net/r8169.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./drivers/net/r8169.c 2006-09-19 13:59:20.000000000 +0100 -@@ -2171,7 +2171,7 @@ static int rtl8169_xmit_frags(struct rtl - static inline u32 rtl8169_tso_csum(struct sk_buff *skb, struct net_device *dev) - { - if (dev->features & NETIF_F_TSO) { -- u32 mss = skb_shinfo(skb)->tso_size; -+ u32 mss = skb_shinfo(skb)->gso_size; - - if (mss) - return LargeSend | ((mss & MSSMask) << MSSShift); -diff -pruN ../orig-linux-2.6.16.29/drivers/net/s2io.c ./drivers/net/s2io.c ---- ../orig-linux-2.6.16.29/drivers/net/s2io.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./drivers/net/s2io.c 2006-09-19 13:59:20.000000000 +0100 -@@ -3522,8 +3522,8 @@ static int s2io_xmit(struct sk_buff *skb - txdp->Control_1 = 0; - txdp->Control_2 = 0; - #ifdef NETIF_F_TSO -- mss = skb_shinfo(skb)->tso_size; -- if (mss) { -+ mss = skb_shinfo(skb)->gso_size; -+ if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV4) { - txdp->Control_1 |= TXD_TCP_LSO_EN; - txdp->Control_1 |= TXD_TCP_LSO_MSS(mss); - } -@@ -3543,10 +3543,10 @@ static int s2io_xmit(struct sk_buff *skb - } - - frg_len = skb->len - skb->data_len; -- if (skb_shinfo(skb)->ufo_size) { -+ if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4) { - int ufo_size; - -- ufo_size = skb_shinfo(skb)->ufo_size; -+ ufo_size = skb_shinfo(skb)->gso_size; - ufo_size &= ~7; - txdp->Control_1 |= TXD_UFO_EN; - txdp->Control_1 |= TXD_UFO_MSS(ufo_size); -@@ -3572,7 +3572,7 @@ static int s2io_xmit(struct sk_buff *skb - txdp->Host_Control = (unsigned long) skb; - txdp->Control_1 |= TXD_BUFFER0_SIZE(frg_len); - -- if (skb_shinfo(skb)->ufo_size) -+ if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4) - txdp->Control_1 |= TXD_UFO_EN; - - frg_cnt = skb_shinfo(skb)->nr_frags; -@@ -3587,12 +3587,12 @@ static int s2io_xmit(struct sk_buff *skb - (sp->pdev, frag->page, frag->page_offset, - frag->size, PCI_DMA_TODEVICE); - txdp->Control_1 = TXD_BUFFER0_SIZE(frag->size); -- if (skb_shinfo(skb)->ufo_size) -+ if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4) - txdp->Control_1 |= TXD_UFO_EN; - } - txdp->Control_1 |= TXD_GATHER_CODE_LAST; - -- if (skb_shinfo(skb)->ufo_size) -+ if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4) - frg_cnt++; /* as Txd0 was used for inband header */ - - tx_fifo = mac_control->tx_FIFO_start[queue]; -@@ -3606,7 +3606,7 @@ static int s2io_xmit(struct sk_buff *skb - if (mss) - val64 |= TX_FIFO_SPECIAL_FUNC; - #endif -- if (skb_shinfo(skb)->ufo_size) -+ if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4) - val64 |= TX_FIFO_SPECIAL_FUNC; - writeq(val64, &tx_fifo->List_Control); - -diff -pruN ../orig-linux-2.6.16.29/drivers/net/sky2.c ./drivers/net/sky2.c ---- ../orig-linux-2.6.16.29/drivers/net/sky2.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./drivers/net/sky2.c 2006-09-19 13:59:20.000000000 +0100 -@@ -1125,7 +1125,7 @@ static unsigned tx_le_req(const struct s - count = sizeof(dma_addr_t) / sizeof(u32); - count += skb_shinfo(skb)->nr_frags * count; - -- if (skb_shinfo(skb)->tso_size) -+ if (skb_shinfo(skb)->gso_size) - ++count; - - if (skb->ip_summed == CHECKSUM_HW) -@@ -1197,7 +1197,7 @@ static int sky2_xmit_frame(struct sk_buf - } - - /* Check for TCP Segmentation Offload */ -- mss = skb_shinfo(skb)->tso_size; -+ mss = skb_shinfo(skb)->gso_size; - if (mss != 0) { - /* just drop the packet if non-linear expansion fails */ - if (skb_header_cloned(skb) && -diff -pruN ../orig-linux-2.6.16.29/drivers/net/tg3.c ./drivers/net/tg3.c ---- ../orig-linux-2.6.16.29/drivers/net/tg3.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./drivers/net/tg3.c 2006-09-19 13:59:20.000000000 +0100 -@@ -3664,7 +3664,7 @@ static int tg3_start_xmit(struct sk_buff - #if TG3_TSO_SUPPORT != 0 - mss = 0; - if (skb->len > (tp->dev->mtu + ETH_HLEN) && -- (mss = skb_shinfo(skb)->tso_size) != 0) { -+ (mss = skb_shinfo(skb)->gso_size) != 0) { - int tcp_opt_len, ip_tcp_len; - - if (skb_header_cloned(skb) && -diff -pruN ../orig-linux-2.6.16.29/drivers/net/tulip/winbond-840.c ./drivers/net/tulip/winbond-840.c ---- ../orig-linux-2.6.16.29/drivers/net/tulip/winbond-840.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./drivers/net/tulip/winbond-840.c 2006-09-19 13:59:20.000000000 +0100 -@@ -1605,11 +1605,11 @@ static void __devexit w840_remove1 (stru - * - get_stats: - * spin_lock_irq(np->lock), doesn't touch hw if not present - * - hard_start_xmit: -- * netif_stop_queue + spin_unlock_wait(&dev->xmit_lock); -+ * synchronize_irq + netif_tx_disable; - * - tx_timeout: -- * netif_device_detach + spin_unlock_wait(&dev->xmit_lock); -+ * netif_device_detach + netif_tx_disable; - * - set_multicast_list -- * netif_device_detach + spin_unlock_wait(&dev->xmit_lock); -+ * netif_device_detach + netif_tx_disable; - * - interrupt handler - * doesn't touch hw if not present, synchronize_irq waits for - * running instances of the interrupt handler. -@@ -1635,11 +1635,10 @@ static int w840_suspend (struct pci_dev - netif_device_detach(dev); - update_csr6(dev, 0); - iowrite32(0, ioaddr + IntrEnable); -- netif_stop_queue(dev); - spin_unlock_irq(&np->lock); - -- spin_unlock_wait(&dev->xmit_lock); - synchronize_irq(dev->irq); -+ netif_tx_disable(dev); - - np->stats.rx_missed_errors += ioread32(ioaddr + RxMissed) & 0xffff; - -diff -pruN ../orig-linux-2.6.16.29/drivers/net/typhoon.c ./drivers/net/typhoon.c ---- ../orig-linux-2.6.16.29/drivers/net/typhoon.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./drivers/net/typhoon.c 2006-09-19 13:59:20.000000000 +0100 -@@ -340,7 +340,7 @@ enum state_values { - #endif - - #if defined(NETIF_F_TSO) --#define skb_tso_size(x) (skb_shinfo(x)->tso_size) -+#define skb_tso_size(x) (skb_shinfo(x)->gso_size) - #define TSO_NUM_DESCRIPTORS 2 - #define TSO_OFFLOAD_ON TYPHOON_OFFLOAD_TCP_SEGMENT - #else -diff -pruN ../orig-linux-2.6.16.29/drivers/net/via-velocity.c ./drivers/net/via-velocity.c ---- ../orig-linux-2.6.16.29/drivers/net/via-velocity.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./drivers/net/via-velocity.c 2006-09-19 13:59:20.000000000 +0100 -@@ -1899,6 +1899,13 @@ static int velocity_xmit(struct sk_buff - - int pktlen = skb->len; - -+#ifdef VELOCITY_ZERO_COPY_SUPPORT -+ if (skb_shinfo(skb)->nr_frags > 6 && __skb_linearize(skb)) { -+ kfree_skb(skb); -+ return 0; -+ } -+#endif -+ - spin_lock_irqsave(&vptr->lock, flags); - - index = vptr->td_curr[qnum]; -@@ -1914,8 +1921,6 @@ static int velocity_xmit(struct sk_buff - */ - if (pktlen < ETH_ZLEN) { - /* Cannot occur until ZC support */ -- if(skb_linearize(skb, GFP_ATOMIC)) -- return 0; - pktlen = ETH_ZLEN; - memcpy(tdinfo->buf, skb->data, skb->len); - memset(tdinfo->buf + skb->len, 0, ETH_ZLEN - skb->len); -@@ -1933,7 +1938,6 @@ static int velocity_xmit(struct sk_buff - int nfrags = skb_shinfo(skb)->nr_frags; - tdinfo->skb = skb; - if (nfrags > 6) { -- skb_linearize(skb, GFP_ATOMIC); - memcpy(tdinfo->buf, skb->data, skb->len); - tdinfo->skb_dma[0] = tdinfo->buf_dma; - td_ptr->tdesc0.pktsize = -diff -pruN ../orig-linux-2.6.16.29/drivers/net/wireless/orinoco.c ./drivers/net/wireless/orinoco.c ---- ../orig-linux-2.6.16.29/drivers/net/wireless/orinoco.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./drivers/net/wireless/orinoco.c 2006-09-19 13:59:20.000000000 +0100 -@@ -1835,7 +1835,9 @@ static int __orinoco_program_rids(struct - /* Set promiscuity / multicast*/ - priv->promiscuous = 0; - priv->mc_count = 0; -- __orinoco_set_multicast_list(dev); /* FIXME: what about the xmit_lock */ -+ -+ /* FIXME: what about netif_tx_lock */ -+ __orinoco_set_multicast_list(dev); - - return 0; - } -diff -pruN ../orig-linux-2.6.16.29/drivers/s390/net/qeth_eddp.c ./drivers/s390/net/qeth_eddp.c ---- ../orig-linux-2.6.16.29/drivers/s390/net/qeth_eddp.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./drivers/s390/net/qeth_eddp.c 2006-09-19 13:59:20.000000000 +0100 -@@ -421,7 +421,7 @@ __qeth_eddp_fill_context_tcp(struct qeth - } - tcph = eddp->skb->h.th; - while (eddp->skb_offset < eddp->skb->len) { -- data_len = min((int)skb_shinfo(eddp->skb)->tso_size, -+ data_len = min((int)skb_shinfo(eddp->skb)->gso_size, - (int)(eddp->skb->len - eddp->skb_offset)); - /* prepare qdio hdr */ - if (eddp->qh.hdr.l2.id == QETH_HEADER_TYPE_LAYER2){ -@@ -516,20 +516,20 @@ qeth_eddp_calc_num_pages(struct qeth_edd - - QETH_DBF_TEXT(trace, 5, "eddpcanp"); - /* can we put multiple skbs in one page? */ -- skbs_per_page = PAGE_SIZE / (skb_shinfo(skb)->tso_size + hdr_len); -+ skbs_per_page = PAGE_SIZE / (skb_shinfo(skb)->gso_size + hdr_len); - if (skbs_per_page > 1){ -- ctx->num_pages = (skb_shinfo(skb)->tso_segs + 1) / -+ ctx->num_pages = (skb_shinfo(skb)->gso_segs + 1) / - skbs_per_page + 1; - ctx->elements_per_skb = 1; - } else { - /* no -> how many elements per skb? */ -- ctx->elements_per_skb = (skb_shinfo(skb)->tso_size + hdr_len + -+ ctx->elements_per_skb = (skb_shinfo(skb)->gso_size + hdr_len + - PAGE_SIZE) >> PAGE_SHIFT; - ctx->num_pages = ctx->elements_per_skb * -- (skb_shinfo(skb)->tso_segs + 1); -+ (skb_shinfo(skb)->gso_segs + 1); - } - ctx->num_elements = ctx->elements_per_skb * -- (skb_shinfo(skb)->tso_segs + 1); -+ (skb_shinfo(skb)->gso_segs + 1); - } - - static inline struct qeth_eddp_context * -diff -pruN ../orig-linux-2.6.16.29/drivers/s390/net/qeth_main.c ./drivers/s390/net/qeth_main.c ---- ../orig-linux-2.6.16.29/drivers/s390/net/qeth_main.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./drivers/s390/net/qeth_main.c 2006-09-19 13:59:20.000000000 +0100 -@@ -4454,7 +4454,7 @@ qeth_send_packet(struct qeth_card *card, - queue = card->qdio.out_qs - [qeth_get_priority_queue(card, skb, ipv, cast_type)]; - -- if (skb_shinfo(skb)->tso_size) -+ if (skb_shinfo(skb)->gso_size) - large_send = card->options.large_send; - - /*are we able to do TSO ? If so ,prepare and send it from here */ -@@ -4501,7 +4501,7 @@ qeth_send_packet(struct qeth_card *card, - card->stats.tx_packets++; - card->stats.tx_bytes += skb->len; - #ifdef CONFIG_QETH_PERF_STATS -- if (skb_shinfo(skb)->tso_size && -+ if (skb_shinfo(skb)->gso_size && - !(large_send == QETH_LARGE_SEND_NO)) { - card->perf_stats.large_send_bytes += skb->len; - card->perf_stats.large_send_cnt++; -diff -pruN ../orig-linux-2.6.16.29/drivers/s390/net/qeth_tso.h ./drivers/s390/net/qeth_tso.h ---- ../orig-linux-2.6.16.29/drivers/s390/net/qeth_tso.h 2006-09-12 19:02:10.000000000 +0100 -+++ ./drivers/s390/net/qeth_tso.h 2006-09-19 13:59:20.000000000 +0100 -@@ -51,7 +51,7 @@ qeth_tso_fill_header(struct qeth_card *c - hdr->ext.hdr_version = 1; - hdr->ext.hdr_len = 28; - /*insert non-fix values */ -- hdr->ext.mss = skb_shinfo(skb)->tso_size; -+ hdr->ext.mss = skb_shinfo(skb)->gso_size; - hdr->ext.dg_hdr_len = (__u16)(iph->ihl*4 + tcph->doff*4); - hdr->ext.payload_len = (__u16)(skb->len - hdr->ext.dg_hdr_len - - sizeof(struct qeth_hdr_tso)); -diff -pruN ../orig-linux-2.6.16.29/include/linux/ethtool.h ./include/linux/ethtool.h ---- ../orig-linux-2.6.16.29/include/linux/ethtool.h 2006-09-12 19:02:10.000000000 +0100 -+++ ./include/linux/ethtool.h 2006-09-19 13:59:20.000000000 +0100 -@@ -408,6 +408,8 @@ struct ethtool_ops { - #define ETHTOOL_GPERMADDR 0x00000020 /* Get permanent hardware address */ - #define ETHTOOL_GUFO 0x00000021 /* Get UFO enable (ethtool_value) */ - #define ETHTOOL_SUFO 0x00000022 /* Set UFO enable (ethtool_value) */ -+#define ETHTOOL_GGSO 0x00000023 /* Get GSO enable (ethtool_value) */ -+#define ETHTOOL_SGSO 0x00000024 /* Set GSO enable (ethtool_value) */ - - /* compatibility with older code */ - #define SPARC_ETH_GSET ETHTOOL_GSET -diff -pruN ../orig-linux-2.6.16.29/include/linux/netdevice.h ./include/linux/netdevice.h ---- ../orig-linux-2.6.16.29/include/linux/netdevice.h 2006-09-12 19:02:10.000000000 +0100 -+++ ./include/linux/netdevice.h 2006-09-19 13:59:20.000000000 +0100 -@@ -230,7 +230,8 @@ enum netdev_state_t - __LINK_STATE_SCHED, - __LINK_STATE_NOCARRIER, - __LINK_STATE_RX_SCHED, -- __LINK_STATE_LINKWATCH_PENDING -+ __LINK_STATE_LINKWATCH_PENDING, -+ __LINK_STATE_QDISC_RUNNING, - }; - - -@@ -306,9 +307,17 @@ struct net_device - #define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */ - #define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */ - #define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ --#define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */ -+#define NETIF_F_GSO 2048 /* Enable software GSO. */ - #define NETIF_F_LLTX 4096 /* LockLess TX */ --#define NETIF_F_UFO 8192 /* Can offload UDP Large Send*/ -+ -+ /* Segmentation offload features */ -+#define NETIF_F_GSO_SHIFT 16 -+#define NETIF_F_TSO (SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT) -+#define NETIF_F_UFO (SKB_GSO_UDPV4 << NETIF_F_GSO_SHIFT) -+#define NETIF_F_GSO_ROBUST (SKB_GSO_DODGY << NETIF_F_GSO_SHIFT) -+ -+#define NETIF_F_GEN_CSUM (NETIF_F_NO_CSUM | NETIF_F_HW_CSUM) -+#define NETIF_F_ALL_CSUM (NETIF_F_IP_CSUM | NETIF_F_GEN_CSUM) - - struct net_device *next_sched; - -@@ -394,6 +403,9 @@ struct net_device - struct list_head qdisc_list; - unsigned long tx_queue_len; /* Max frames per queue allowed */ - -+ /* Partially transmitted GSO packet. */ -+ struct sk_buff *gso_skb; -+ - /* ingress path synchronizer */ - spinlock_t ingress_lock; - struct Qdisc *qdisc_ingress; -@@ -402,7 +414,7 @@ struct net_device - * One part is mostly used on xmit path (device) - */ - /* hard_start_xmit synchronizer */ -- spinlock_t xmit_lock ____cacheline_aligned_in_smp; -+ spinlock_t _xmit_lock ____cacheline_aligned_in_smp; - /* cpu id of processor entered to hard_start_xmit or -1, - if nobody entered there. - */ -@@ -527,6 +539,8 @@ struct packet_type { - struct net_device *, - struct packet_type *, - struct net_device *); -+ struct sk_buff *(*gso_segment)(struct sk_buff *skb, -+ int features); - void *af_packet_priv; - struct list_head list; - }; -@@ -693,7 +707,8 @@ extern int dev_change_name(struct net_d - extern int dev_set_mtu(struct net_device *, int); - extern int dev_set_mac_address(struct net_device *, - struct sockaddr *); --extern void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); -+extern int dev_hard_start_xmit(struct sk_buff *skb, -+ struct net_device *dev); - - extern void dev_init(void); - -@@ -900,11 +915,43 @@ static inline void __netif_rx_complete(s - clear_bit(__LINK_STATE_RX_SCHED, &dev->state); - } - -+static inline void netif_tx_lock(struct net_device *dev) -+{ -+ spin_lock(&dev->_xmit_lock); -+ dev->xmit_lock_owner = smp_processor_id(); -+} -+ -+static inline void netif_tx_lock_bh(struct net_device *dev) -+{ -+ spin_lock_bh(&dev->_xmit_lock); -+ dev->xmit_lock_owner = smp_processor_id(); -+} -+ -+static inline int netif_tx_trylock(struct net_device *dev) -+{ -+ int err = spin_trylock(&dev->_xmit_lock); -+ if (!err) -+ dev->xmit_lock_owner = smp_processor_id(); -+ return err; -+} -+ -+static inline void netif_tx_unlock(struct net_device *dev) -+{ -+ dev->xmit_lock_owner = -1; -+ spin_unlock(&dev->_xmit_lock); -+} -+ -+static inline void netif_tx_unlock_bh(struct net_device *dev) -+{ -+ dev->xmit_lock_owner = -1; -+ spin_unlock_bh(&dev->_xmit_lock); -+} -+ - static inline void netif_tx_disable(struct net_device *dev) - { -- spin_lock_bh(&dev->xmit_lock); -+ netif_tx_lock_bh(dev); - netif_stop_queue(dev); -- spin_unlock_bh(&dev->xmit_lock); -+ netif_tx_unlock_bh(dev); - } - - /* These functions live elsewhere (drivers/net/net_init.c, but related) */ -@@ -932,6 +979,7 @@ extern int netdev_max_backlog; - extern int weight_p; - extern int netdev_set_master(struct net_device *dev, struct net_device *master); - extern int skb_checksum_help(struct sk_buff *skb, int inward); -+extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features); - #ifdef CONFIG_BUG - extern void netdev_rx_csum_fault(struct net_device *dev); - #else -@@ -951,6 +999,18 @@ extern void dev_seq_stop(struct seq_file - - extern void linkwatch_run_queue(void); - -+static inline int skb_gso_ok(struct sk_buff *skb, int features) -+{ -+ int feature = skb_shinfo(skb)->gso_size ? -+ skb_shinfo(skb)->gso_type << NETIF_F_GSO_SHIFT : 0; -+ return (features & feature) == feature; -+} -+ -+static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) -+{ -+ return !skb_gso_ok(skb, dev->features); -+} -+ - #endif /* __KERNEL__ */ - - #endif /* _LINUX_DEV_H */ -diff -pruN ../orig-linux-2.6.16.29/include/linux/skbuff.h ./include/linux/skbuff.h ---- ../orig-linux-2.6.16.29/include/linux/skbuff.h 2006-09-12 19:02:10.000000000 +0100 -+++ ./include/linux/skbuff.h 2006-09-19 13:59:20.000000000 +0100 -@@ -134,9 +134,10 @@ struct skb_frag_struct { - struct skb_shared_info { - atomic_t dataref; - unsigned short nr_frags; -- unsigned short tso_size; -- unsigned short tso_segs; -- unsigned short ufo_size; -+ unsigned short gso_size; -+ /* Warning: this field is not always filled in (UFO)! */ -+ unsigned short gso_segs; -+ unsigned short gso_type; - unsigned int ip6_frag_id; - struct sk_buff *frag_list; - skb_frag_t frags[MAX_SKB_FRAGS]; -@@ -168,6 +169,14 @@ enum { - SKB_FCLONE_CLONE, - }; - -+enum { -+ SKB_GSO_TCPV4 = 1 << 0, -+ SKB_GSO_UDPV4 = 1 << 1, -+ -+ /* This indicates the skb is from an untrusted source. */ -+ SKB_GSO_DODGY = 1 << 2, -+}; -+ - /** - * struct sk_buff - socket buffer - * @next: Next buffer in list -@@ -1148,18 +1157,34 @@ static inline int skb_can_coalesce(struc - return 0; - } - -+static inline int __skb_linearize(struct sk_buff *skb) -+{ -+ return __pskb_pull_tail(skb, skb->data_len) ? 0 : -ENOMEM; -+} -+ - /** - * skb_linearize - convert paged skb to linear one - * @skb: buffer to linarize -- * @gfp: allocation mode - * - * If there is no free memory -ENOMEM is returned, otherwise zero - * is returned and the old skb data released. - */ --extern int __skb_linearize(struct sk_buff *skb, gfp_t gfp); --static inline int skb_linearize(struct sk_buff *skb, gfp_t gfp) -+static inline int skb_linearize(struct sk_buff *skb) -+{ -+ return skb_is_nonlinear(skb) ? __skb_linearize(skb) : 0; -+} -+ -+/** -+ * skb_linearize_cow - make sure skb is linear and writable -+ * @skb: buffer to process -+ * -+ * If there is no free memory -ENOMEM is returned, otherwise zero -+ * is returned and the old skb data released. -+ */ -+static inline int skb_linearize_cow(struct sk_buff *skb) - { -- return __skb_linearize(skb, gfp); -+ return skb_is_nonlinear(skb) || skb_cloned(skb) ? -+ __skb_linearize(skb) : 0; - } - - /** -@@ -1254,6 +1279,7 @@ extern void skb_split(struct sk_b - struct sk_buff *skb1, const u32 len); - - extern void skb_release_data(struct sk_buff *skb); -+extern struct sk_buff *skb_segment(struct sk_buff *skb, int features); - - static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, - int len, void *buffer) -diff -pruN ../orig-linux-2.6.16.29/include/net/pkt_sched.h ./include/net/pkt_sched.h ---- ../orig-linux-2.6.16.29/include/net/pkt_sched.h 2006-09-12 19:02:10.000000000 +0100 -+++ ./include/net/pkt_sched.h 2006-09-19 13:59:20.000000000 +0100 -@@ -218,12 +218,13 @@ extern struct qdisc_rate_table *qdisc_ge - struct rtattr *tab); - extern void qdisc_put_rtab(struct qdisc_rate_table *tab); - --extern int qdisc_restart(struct net_device *dev); -+extern void __qdisc_run(struct net_device *dev); - - static inline void qdisc_run(struct net_device *dev) - { -- while (!netif_queue_stopped(dev) && qdisc_restart(dev) < 0) -- /* NOTHING */; -+ if (!netif_queue_stopped(dev) && -+ !test_and_set_bit(__LINK_STATE_QDISC_RUNNING, &dev->state)) -+ __qdisc_run(dev); - } - - extern int tc_classify(struct sk_buff *skb, struct tcf_proto *tp, -diff -pruN ../orig-linux-2.6.16.29/include/net/protocol.h ./include/net/protocol.h ---- ../orig-linux-2.6.16.29/include/net/protocol.h 2006-09-12 19:02:10.000000000 +0100 -+++ ./include/net/protocol.h 2006-09-19 13:59:20.000000000 +0100 -@@ -37,6 +37,8 @@ - struct net_protocol { - int (*handler)(struct sk_buff *skb); - void (*err_handler)(struct sk_buff *skb, u32 info); -+ struct sk_buff *(*gso_segment)(struct sk_buff *skb, -+ int features); - int no_policy; - }; - -diff -pruN ../orig-linux-2.6.16.29/include/net/sock.h ./include/net/sock.h ---- ../orig-linux-2.6.16.29/include/net/sock.h 2006-09-12 19:02:10.000000000 +0100 -+++ ./include/net/sock.h 2006-09-19 13:59:20.000000000 +0100 -@@ -1064,9 +1064,13 @@ static inline void sk_setup_caps(struct - { - __sk_dst_set(sk, dst); - sk->sk_route_caps = dst->dev->features; -+ if (sk->sk_route_caps & NETIF_F_GSO) -+ sk->sk_route_caps |= NETIF_F_TSO; - if (sk->sk_route_caps & NETIF_F_TSO) { - if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len) - sk->sk_route_caps &= ~NETIF_F_TSO; -+ else -+ sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; - } - } - -diff -pruN ../orig-linux-2.6.16.29/include/net/tcp.h ./include/net/tcp.h ---- ../orig-linux-2.6.16.29/include/net/tcp.h 2006-09-12 19:02:10.000000000 +0100 -+++ ./include/net/tcp.h 2006-09-19 13:59:20.000000000 +0100 -@@ -552,13 +552,13 @@ struct tcp_skb_cb { - */ - static inline int tcp_skb_pcount(const struct sk_buff *skb) - { -- return skb_shinfo(skb)->tso_segs; -+ return skb_shinfo(skb)->gso_segs; - } - - /* This is valid iff tcp_skb_pcount() > 1. */ - static inline int tcp_skb_mss(const struct sk_buff *skb) - { -- return skb_shinfo(skb)->tso_size; -+ return skb_shinfo(skb)->gso_size; - } - - static inline void tcp_dec_pcount_approx(__u32 *count, -@@ -1063,6 +1063,8 @@ extern struct request_sock_ops tcp_reque - - extern int tcp_v4_destroy_sock(struct sock *sk); - -+extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features); -+ - #ifdef CONFIG_PROC_FS - extern int tcp4_proc_init(void); - extern void tcp4_proc_exit(void); -diff -pruN ../orig-linux-2.6.16.29/net/atm/clip.c ./net/atm/clip.c ---- ../orig-linux-2.6.16.29/net/atm/clip.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./net/atm/clip.c 2006-09-19 13:59:20.000000000 +0100 -@@ -101,7 +101,7 @@ static void unlink_clip_vcc(struct clip_ - printk(KERN_CRIT "!clip_vcc->entry (clip_vcc %p)\n",clip_vcc); - return; - } -- spin_lock_bh(&entry->neigh->dev->xmit_lock); /* block clip_start_xmit() */ -+ netif_tx_lock_bh(entry->neigh->dev); /* block clip_start_xmit() */ - entry->neigh->used = jiffies; - for (walk = &entry->vccs; *walk; walk = &(*walk)->next) - if (*walk == clip_vcc) { -@@ -125,7 +125,7 @@ static void unlink_clip_vcc(struct clip_ - printk(KERN_CRIT "ATMARP: unlink_clip_vcc failed (entry %p, vcc " - "0x%p)\n",entry,clip_vcc); - out: -- spin_unlock_bh(&entry->neigh->dev->xmit_lock); -+ netif_tx_unlock_bh(entry->neigh->dev); - } - - /* The neighbour entry n->lock is held. */ -diff -pruN ../orig-linux-2.6.16.29/net/bridge/br_device.c ./net/bridge/br_device.c ---- ../orig-linux-2.6.16.29/net/bridge/br_device.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./net/bridge/br_device.c 2006-09-19 13:59:20.000000000 +0100 -@@ -146,9 +146,9 @@ static int br_set_tx_csum(struct net_dev - struct net_bridge *br = netdev_priv(dev); - - if (data) -- br->feature_mask |= NETIF_F_IP_CSUM; -+ br->feature_mask |= NETIF_F_NO_CSUM; - else -- br->feature_mask &= ~NETIF_F_IP_CSUM; -+ br->feature_mask &= ~NETIF_F_ALL_CSUM; - - br_features_recompute(br); - return 0; -@@ -185,6 +185,6 @@ void br_dev_setup(struct net_device *dev - dev->set_mac_address = br_set_mac_address; - dev->priv_flags = IFF_EBRIDGE; - -- dev->features = NETIF_F_SG | NETIF_F_FRAGLIST -- | NETIF_F_HIGHDMA | NETIF_F_TSO | NETIF_F_IP_CSUM; -+ dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | -+ NETIF_F_TSO | NETIF_F_NO_CSUM | NETIF_F_GSO_ROBUST; - } -diff -pruN ../orig-linux-2.6.16.29/net/bridge/br_forward.c ./net/bridge/br_forward.c ---- ../orig-linux-2.6.16.29/net/bridge/br_forward.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./net/bridge/br_forward.c 2006-09-19 13:59:20.000000000 +0100 -@@ -32,7 +32,7 @@ static inline int should_deliver(const s - int br_dev_queue_push_xmit(struct sk_buff *skb) - { - /* drop mtu oversized packets except tso */ -- if (skb->len > skb->dev->mtu && !skb_shinfo(skb)->tso_size) -+ if (skb->len > skb->dev->mtu && !skb_shinfo(skb)->gso_size) - kfree_skb(skb); - else { - #ifdef CONFIG_BRIDGE_NETFILTER -diff -pruN ../orig-linux-2.6.16.29/net/bridge/br_if.c ./net/bridge/br_if.c ---- ../orig-linux-2.6.16.29/net/bridge/br_if.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./net/bridge/br_if.c 2006-09-19 13:59:20.000000000 +0100 -@@ -385,17 +385,28 @@ void br_features_recompute(struct net_br - struct net_bridge_port *p; - unsigned long features, checksum; - -- features = br->feature_mask &~ NETIF_F_IP_CSUM; -- checksum = br->feature_mask & NETIF_F_IP_CSUM; -+ checksum = br->feature_mask & NETIF_F_ALL_CSUM ? NETIF_F_NO_CSUM : 0; -+ features = br->feature_mask & ~NETIF_F_ALL_CSUM; - - list_for_each_entry(p, &br->port_list, list) { -- if (!(p->dev->features -- & (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM))) -+ unsigned long feature = p->dev->features; -+ -+ if (checksum & NETIF_F_NO_CSUM && !(feature & NETIF_F_NO_CSUM)) -+ checksum ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM; -+ if (checksum & NETIF_F_HW_CSUM && !(feature & NETIF_F_HW_CSUM)) -+ checksum ^= NETIF_F_HW_CSUM | NETIF_F_IP_CSUM; -+ if (!(feature & NETIF_F_IP_CSUM)) - checksum = 0; -- features &= p->dev->features; -+ -+ if (feature & NETIF_F_GSO) -+ feature |= NETIF_F_TSO; -+ feature |= NETIF_F_GSO; -+ -+ features &= feature; - } - -- br->dev->features = features | checksum | NETIF_F_LLTX; -+ br->dev->features = features | checksum | NETIF_F_LLTX | -+ NETIF_F_GSO_ROBUST; - } - - /* called with RTNL */ -diff -pruN ../orig-linux-2.6.16.29/net/bridge/br_netfilter.c ./net/bridge/br_netfilter.c ---- ../orig-linux-2.6.16.29/net/bridge/br_netfilter.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./net/bridge/br_netfilter.c 2006-09-19 13:59:20.000000000 +0100 -@@ -743,7 +743,7 @@ static int br_nf_dev_queue_xmit(struct s - { - if (skb->protocol == htons(ETH_P_IP) && - skb->len > skb->dev->mtu && -- !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size)) -+ !skb_shinfo(skb)->gso_size) - return ip_fragment(skb, br_dev_queue_push_xmit); - else - return br_dev_queue_push_xmit(skb); -diff -pruN ../orig-linux-2.6.16.29/net/core/dev.c ./net/core/dev.c ---- ../orig-linux-2.6.16.29/net/core/dev.c 2006-09-12 19:02:10.000000000 +0100 -+++ ./net/core/dev.c 2006-09-19 13:59:20.000000000 +0100 -@@ -115,6 +115,7 @@ - #include <net/iw_handler.h> - #endif /* CONFIG_NET_RADIO */ - #include <asm/current.h> -+#include <linux/err.h> - - /* - * The list of packet types we will receive (as opposed to discard) -@@ -1032,7 +1033,7 @@ static inline void net_timestamp(struct - * taps currently in use. - */ - --void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) -+static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) - { - struct packet_type *ptype; - -@@ -1106,6 +1107,45 @@ out: - return ret; - } - -+/** -+ * skb_gso_segment - Perform segmentation on skb. -+ * @skb: buffer to segment -+ * @features: features for the output path (see dev->features) -+ * -+ * This function segments the given skb and returns a list of segments. -+ * -+ * It may return NULL if the skb requires no segmentation. This is -+ * only possible when GSO is used for verifying header integrity. -+ */ -+struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) -+{ -+ struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); -+ struct packet_type *ptype; -+ int type = skb->protocol; -+ -+ BUG_ON(skb_shinfo(skb)->frag_list); -+ BUG_ON(skb->ip_summed != CHECKSUM_HW); -+ -+ skb->mac.raw = skb->data; -+ skb->mac_len = skb->nh.raw - skb->data; -+ __skb_pull(skb, skb->mac_len); -+ -+ rcu_read_lock(); -+ list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) { -+ if (ptype->type == type && !ptype->dev && ptype->gso_segment) { -+ segs = ptype->gso_segment(skb, features); -+ break; -+ } -+ } -+ rcu_read_unlock(); -+ -+ __skb_push(skb, skb->data - skb->mac.raw); -+ -+ return segs; -+} -+ -+EXPORT_SYMBOL(skb_gso_segment); -+ - /* Take action when hardware reception checksum errors are detected. */ - #ifdef CONFIG_BUG - void netdev_rx_csum_fault(struct net_device *dev) -@@ -1142,75 +1182,108 @@ static inline int illegal_highdma(struct - #define illegal_highdma(dev, skb) (0) - #endif - --/* Keep head the same: replace data */ --int __skb_linearize(struct sk_buff *skb, gfp_t gfp_mask) -+struct dev_gso_cb { -+ void (*destructor)(struct sk_buff *skb); -+}; -+ -+#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb) -+ -+static void dev_gso_skb_destructor(struct sk_buff *skb) -+{ -+ struct dev_gso_cb *cb; -+ -+ do { -+ struct sk_buff *nskb = skb->next; -+ -+ skb->next = nskb->next; -+ nskb->next = NULL; -+ kfree_skb(nskb); -+ } while (skb->next); -+ -+ cb = DEV_GSO_CB(skb); -+ if (cb->destructor) -+ cb->destructor(skb); -+} -+ -+/** -+ * dev_gso_segment - Perform emulated hardware segmentation on skb. -+ * @skb: buffer to segment -+ * -+ * This function segments the given skb and stores the list of segments -+ * in skb->next. -+ */ -+static int dev_gso_segment(struct sk_buff *skb) - { -- unsigned int size; -- u8 *data; -- long offset; -- struct skb_shared_info *ninfo; -- int headerlen = skb->data - skb->head; -- int expand = (skb->tail + skb->data_len) - skb->end; -- -- if (skb_shared(skb)) -- BUG(); -- -- if (expand <= 0) -- expand = 0; -- -- size = skb->end - skb->head + expand; -- size = SKB_DATA_ALIGN(size); -- data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); -- if (!data) -- return -ENOMEM; -- -- /* Copy entire thing */ -- if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len)) -- BUG(); -- -- /* Set up shinfo */ -- ninfo = (struct skb_shared_info*)(data + size); -- atomic_set(&ninfo->dataref, 1); -- ninfo->tso_size = skb_shinfo(skb)->tso_size; -- ninfo->tso_segs = skb_shinfo(skb)->tso_segs; -- ninfo->nr_frags = 0; -- ninfo->frag_list = NULL; -- -- /* Offset between the two in bytes */ -- offset = data - skb->head; -- -- /* Free old data. */ -- skb_release_data(skb); -- -- skb->head = data; -- skb->end = data + size; -- -- /* Set up new pointers */ -- skb->h.raw += offset; -- skb->nh.raw += offset; -- skb->mac.raw += offset; -- skb->tail += offset; -- skb->data += offset; -+ struct net_device *dev = skb->dev; -+ struct sk_buff *segs; -+ int features = dev->features & ~(illegal_highdma(dev, skb) ? -+ NETIF_F_SG : 0); -+ -+ segs = skb_gso_segment(skb, features); -+ -+ /* Verifying header integrity only. */ -+ if (!segs) -+ return 0; - -- /* We are no longer a clone, even if we were. */ -- skb->cloned = 0; -+ if (unlikely(IS_ERR(segs))) -+ return PTR_ERR(segs); -+ -+ skb->next = segs; -+ DEV_GSO_CB(skb)->destructor = skb->destructor; -+ skb->destructor = dev_gso_skb_destructor; - -- skb->tail += skb->data_len; -- skb->data_len = 0; -+ return 0; -+} -+ -+int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) -+{ -+ if (likely(!skb->next)) { -+ if (netdev_nit) -+ dev_queue_xmit_nit(skb, dev); -+ -+ if (netif_needs_gso(dev, skb)) { -+ if (unlikely(dev_gso_segment(skb))) -+ goto out_kfree_skb; -+ if (skb->next) -+ goto gso; -+ } -+ -+ return dev->hard_start_xmit(skb, dev); -+ } -+ -+gso: -+ do { -+ struct sk_buff *nskb = skb->next; -+ int rc; -+ -+ skb->next = nskb->next; -+ nskb->next = NULL; -+ rc = dev->hard_start_xmit(nskb, dev); -+ if (unlikely(rc)) { -+ nskb->next = skb->next; -+ skb->next = nskb; -+ return rc; -+ } -+ if (unlikely(netif_queue_stopped(dev) && skb->next)) -+ return NETDEV_TX_BUSY; -+ } while (skb->next); -+ -+ skb->destructor = DEV_GSO_CB(skb)->destructor; -+ -+out_kfree_skb: -+ kfree_skb(skb); - return 0; - } - - #define HARD_TX_LOCK(dev, cpu) { \ - if ((dev->features & NETIF_F_LLTX) == 0) { \ -- spin_lock(&dev->xmit_lock); \ -- dev->xmit_lock_owner = cpu; \ -+ netif_tx_lock(dev); \ - } \ - } - - #define HARD_TX_UNLOCK(dev) { \ - if ((dev->features & NETIF_F_LLTX) == 0) { \ -- dev->xmit_lock_owner = -1; \ -- spin_unlock(&dev->xmit_lock); \ -+ netif_tx_unlock(dev); \ - } \ - } - -@@ -1246,9 +1319,13 @@ int dev_queue_xmit(struct sk_buff *skb) - struct Qdisc *q; - int rc = -ENOMEM; - -+ /* GSO will handle the following emulations directly. */ -+ if (netif_needs_gso(dev, skb)) -+ goto gso; -+ - if (skb_shinfo(skb)->frag_list && - !(dev->features & NETIF_F_FRAGLIST) && -- __skb_linearize(skb, GFP_ATOMIC)) -+ __skb_linearize(skb)) - goto out_kfree_skb; - _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |