[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-3.1-testing] Update to Linux 2.6.18.8.
# HG changeset patch # User Keir Fraser <keir.fraser@xxxxxxxxxx> # Date 1200582338 0 # Node ID b17dfd182f7c4bda5417e39f5d1c9ee01273ec12 # Parent c9b32b389e62948b20447bd7e0d4b59dd7ecb887 Update to Linux 2.6.18.8. Signed-off-by: S.Caglar Onur <caglar@xxxxxxxxxxxxx> --- patches/linux-2.6.18/allow-i386-crash-kernels-to-handle-x86_64-dumps-fix.patch | 27 patches/linux-2.6.18/allow-i386-crash-kernels-to-handle-x86_64-dumps.patch | 61 patches/linux-2.6.18/blktap-aio-16_03_06.patch | 271 ---- patches/linux-2.6.18/fix-ide-cd-pio-mode.patch | 13 patches/linux-2.6.18/fixaddr-top.patch | 64 - patches/linux-2.6.18/git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch | 274 ---- patches/linux-2.6.18/git-4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f.patch | 301 ---- patches/linux-2.6.18/git-5ee7737379b1d7f0c977c0f1661fbaf01a8d4721.patch | 23 patches/linux-2.6.18/git-c06cb8b1c4d25e5b4d7a2d7c2462619de1e0dbc4.patch | 28 patches/linux-2.6.18/git-dbaab49f92ff6ae6255762a948375e4036cbdbd2.patch | 18 patches/linux-2.6.18/i386-mach-io-check-nmi.patch | 35 patches/linux-2.6.18/linux-2.6.18-xen-375-748cd890ea7f | 214 --- patches/linux-2.6.18/linux-2.6.18-xen-376-353802ec1caf | 34 patches/linux-2.6.18/linux-2.6.19-rc1-kexec-move_segment_code-i386.patch | 85 - patches/linux-2.6.18/linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch | 78 - patches/linux-2.6.18/net-csum.patch | 57 patches/linux-2.6.18/net-gso-5-rcv-mss.patch | 10 patches/linux-2.6.18/net-gso-6-linear-segmentation.patch | 21 patches/linux-2.6.18/pmd-shared.patch | 100 - patches/linux-2.6.18/rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch | 26 patches/linux-2.6.18/series | 26 patches/linux-2.6.18/softlockup-no-idle-hz.patch | 52 patches/linux-2.6.18/x86-elfnote-as-preprocessor-macro.patch | 41 patches/linux-2.6.18/x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch | 138 -- patches/linux-2.6.18/x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch | 72 - patches/linux-2.6.18/xen-hotplug.patch | 10 patches/linux-2.6.18/xenoprof-generic.patch | 615 ---------- buildconfigs/mk.linux-2.6-xen | 2 linux-2.6-xen-sparse/arch/i386/Kconfig.cpu | 3 linux-2.6-xen-sparse/drivers/char/mem.c | 12 linux-2.6-xen-sparse/mm/memory.c | 41 linux-2.6-xen-sparse/mm/page_alloc.c | 25 linux-2.6-xen-sparse/net/core/dev.c | 16 linux-2.6-xen-sparse/net/core/skbuff.c | 1 patches/linux-2.6.18.8/allow-i386-crash-kernels-to-handle-x86_64-dumps-fix.patch | 28 patches/linux-2.6.18.8/allow-i386-crash-kernels-to-handle-x86_64-dumps.patch | 51 patches/linux-2.6.18.8/blktap-aio-16_03_06.patch | 161 ++ patches/linux-2.6.18.8/fix-ide-cd-pio-mode.patch | 13 patches/linux-2.6.18.8/fixaddr-top.patch | 50 patches/linux-2.6.18.8/git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch | 178 ++ patches/linux-2.6.18.8/git-4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f.patch | 136 ++ patches/linux-2.6.18.8/git-5ee7737379b1d7f0c977c0f1661fbaf01a8d4721.patch | 21 patches/linux-2.6.18.8/git-c06cb8b1c4d25e5b4d7a2d7c2462619de1e0dbc4.patch | 27 patches/linux-2.6.18.8/git-dbaab49f92ff6ae6255762a948375e4036cbdbd2.patch | 17 patches/linux-2.6.18.8/i386-mach-io-check-nmi.patch | 30 patches/linux-2.6.18.8/linux-2.6.18-xen-375-748cd890ea7f | 233 +++ patches/linux-2.6.18.8/linux-2.6.18-xen-376-353802ec1caf | 34 patches/linux-2.6.18.8/linux-2.6.19-rc1-kexec-move_segment_code-i386.patch | 96 + patches/linux-2.6.18.8/linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch | 96 + patches/linux-2.6.18.8/net-csum.patch | 40 patches/linux-2.6.18.8/net-gso-5-rcv-mss.patch | 10 patches/linux-2.6.18.8/pmd-shared.patch | 57 patches/linux-2.6.18.8/rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch | 26 patches/linux-2.6.18.8/series | 25 patches/linux-2.6.18.8/softlockup-no-idle-hz.patch | 32 patches/linux-2.6.18.8/x86-elfnote-as-preprocessor-macro.patch | 25 patches/linux-2.6.18.8/x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch | 39 patches/linux-2.6.18.8/x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch | 63 + patches/linux-2.6.18.8/xen-hotplug.patch | 10 patches/linux-2.6.18.8/xenoprof-generic.patch | 294 ++++ 60 files changed, 1865 insertions(+), 2721 deletions(-) diff -r c9b32b389e62 -r b17dfd182f7c buildconfigs/mk.linux-2.6-xen --- a/buildconfigs/mk.linux-2.6-xen Thu Jan 17 14:35:38 2008 +0000 +++ b/buildconfigs/mk.linux-2.6-xen Thu Jan 17 15:05:38 2008 +0000 @@ -1,5 +1,5 @@ LINUX_SERIES = 2.6 LINUX_SERIES = 2.6 -LINUX_VER = 2.6.18 +LINUX_VER = 2.6.18.8 EXTRAVERSION ?= xen diff -r c9b32b389e62 -r b17dfd182f7c linux-2.6-xen-sparse/arch/i386/Kconfig.cpu --- a/linux-2.6-xen-sparse/arch/i386/Kconfig.cpu Thu Jan 17 14:35:38 2008 +0000 +++ b/linux-2.6-xen-sparse/arch/i386/Kconfig.cpu Thu Jan 17 15:05:38 2008 +0000 @@ -7,6 +7,7 @@ choice config M386 bool "386" + depends on !UML ---help--- This is the processor type of your CPU. This information is used for optimizing purposes. In order to compile a kernel that can run on @@ -301,7 +302,7 @@ config X86_USE_PPRO_CHECKSUM config X86_USE_3DNOW bool - depends on MCYRIXIII || MK7 || MGEODE_LX + depends on (MCYRIXIII || MK7 || MGEODE_LX) && !UML default y config X86_OOSTORE diff -r c9b32b389e62 -r b17dfd182f7c linux-2.6-xen-sparse/drivers/char/mem.c --- a/linux-2.6-xen-sparse/drivers/char/mem.c Thu Jan 17 14:35:38 2008 +0000 +++ b/linux-2.6-xen-sparse/drivers/char/mem.c Thu Jan 17 15:05:38 2008 +0000 @@ -618,7 +618,8 @@ static inline size_t read_zero_pagealign count = size; zap_page_range(vma, addr, count, NULL); - zeromap_page_range(vma, addr, count, PAGE_COPY); + if (zeromap_page_range(vma, addr, count, PAGE_COPY)) + break; size -= count; buf += count; @@ -685,11 +686,14 @@ out: static int mmap_zero(struct file * file, struct vm_area_struct * vma) { + int err; + if (vma->vm_flags & VM_SHARED) return shmem_zero_setup(vma); - if (zeromap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, vma->vm_page_prot)) - return -EAGAIN; - return 0; + err = zeromap_page_range(vma, vma->vm_start, + vma->vm_end - vma->vm_start, vma->vm_page_prot); + BUG_ON(err == -EEXIST); + return err; } #else /* CONFIG_MMU */ static ssize_t read_zero(struct file * file, char * buf, diff -r c9b32b389e62 -r b17dfd182f7c linux-2.6-xen-sparse/mm/memory.c --- a/linux-2.6-xen-sparse/mm/memory.c Thu Jan 17 14:35:38 2008 +0000 +++ b/linux-2.6-xen-sparse/mm/memory.c Thu Jan 17 15:05:38 2008 +0000 @@ -1131,21 +1131,27 @@ static int zeromap_pte_range(struct mm_s { pte_t *pte; spinlock_t *ptl; + int err = 0; pte = pte_alloc_map_lock(mm, pmd, addr, &ptl); if (!pte) - return -ENOMEM; + return -EAGAIN; do { struct page *page = ZERO_PAGE(addr); pte_t zero_pte = pte_wrprotect(mk_pte(page, prot)); + + if (unlikely(!pte_none(*pte))) { + err = -EEXIST; + pte++; + break; + } page_cache_get(page); page_add_file_rmap(page); inc_mm_counter(mm, file_rss); - BUG_ON(!pte_none(*pte)); set_pte_at(mm, addr, pte, zero_pte); } while (pte++, addr += PAGE_SIZE, addr != end); pte_unmap_unlock(pte - 1, ptl); - return 0; + return err; } static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud, @@ -1153,16 +1159,18 @@ static inline int zeromap_pmd_range(stru { pmd_t *pmd; unsigned long next; + int err; pmd = pmd_alloc(mm, pud, addr); if (!pmd) - return -ENOMEM; + return -EAGAIN; do { next = pmd_addr_end(addr, end); - if (zeromap_pte_range(mm, pmd, addr, next, prot)) - return -ENOMEM; + err = zeromap_pte_range(mm, pmd, addr, next, prot); + if (err) + break; } while (pmd++, addr = next, addr != end); - return 0; + return err; } static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd, @@ -1170,16 +1178,18 @@ static inline int zeromap_pud_range(stru { pud_t *pud; unsigned long next; + int err; pud = pud_alloc(mm, pgd, addr); if (!pud) - return -ENOMEM; + return -EAGAIN; do { next = pud_addr_end(addr, end); - if (zeromap_pmd_range(mm, pud, addr, next, prot)) - return -ENOMEM; + err = zeromap_pmd_range(mm, pud, addr, next, prot); + if (err) + break; } while (pud++, addr = next, addr != end); - return 0; + return err; } int zeromap_page_range(struct vm_area_struct *vma, @@ -1674,7 +1684,14 @@ gotten: entry = mk_pte(new_page, vma->vm_page_prot); entry = maybe_mkwrite(pte_mkdirty(entry), vma); lazy_mmu_prot_update(entry); - ptep_establish(vma, address, page_table, entry); + /* + * Clear the pte entry and flush it first, before updating the + * pte with the new entry. This will avoid a race condition + * seen in the presence of one thread doing SMC and another + * thread doing COW. + */ + ptep_clear_flush(vma, address, page_table); + set_pte_at(mm, address, page_table, entry); update_mmu_cache(vma, address, entry); lru_cache_add_active(new_page); page_add_new_anon_rmap(new_page, vma, address); diff -r c9b32b389e62 -r b17dfd182f7c linux-2.6-xen-sparse/mm/page_alloc.c --- a/linux-2.6-xen-sparse/mm/page_alloc.c Thu Jan 17 14:35:38 2008 +0000 +++ b/linux-2.6-xen-sparse/mm/page_alloc.c Thu Jan 17 15:05:38 2008 +0000 @@ -1687,6 +1687,8 @@ void __meminit memmap_init_zone(unsigned for (pfn = start_pfn; pfn < end_pfn; pfn++) { if (!early_pfn_valid(pfn)) continue; + if (!early_pfn_in_nid(pfn, nid)) + continue; page = pfn_to_page(pfn); set_page_links(page, zone, nid, pfn); init_page_count(page); @@ -1859,8 +1861,10 @@ static inline void free_zone_pagesets(in for_each_zone(zone) { struct per_cpu_pageset *pset = zone_pcp(zone, cpu); + /* Free per_cpu_pageset if it is slab allocated */ + if (pset != &boot_pageset[cpu]) + kfree(pset); zone_pcp(zone, cpu) = NULL; - kfree(pset); } } @@ -2022,6 +2026,7 @@ static void __meminit free_area_init_cor #ifdef CONFIG_NUMA zone->min_unmapped_ratio = (realsize*sysctl_min_unmapped_ratio) / 100; + zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100; #endif zone->name = zone_names[j]; spin_lock_init(&zone->lock); @@ -2030,7 +2035,7 @@ static void __meminit free_area_init_cor zone->zone_pgdat = pgdat; zone->free_pages = 0; - zone->temp_priority = zone->prev_priority = DEF_PRIORITY; + zone->prev_priority = DEF_PRIORITY; zone_pcp_init(zone); INIT_LIST_HEAD(&zone->active_list); @@ -2332,6 +2337,22 @@ int sysctl_min_unmapped_ratio_sysctl_han sysctl_min_unmapped_ratio) / 100; return 0; } + +int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write, + struct file *file, void __user *buffer, size_t *length, loff_t *ppos) +{ + struct zone *zone; + int rc; + + rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); + if (rc) + return rc; + + for_each_zone(zone) + zone->min_slab_pages = (zone->present_pages * + sysctl_min_slab_ratio) / 100; + return 0; +} #endif /* diff -r c9b32b389e62 -r b17dfd182f7c linux-2.6-xen-sparse/net/core/dev.c --- a/linux-2.6-xen-sparse/net/core/dev.c Thu Jan 17 14:35:38 2008 +0000 +++ b/linux-2.6-xen-sparse/net/core/dev.c Thu Jan 17 15:05:38 2008 +0000 @@ -1528,14 +1528,16 @@ gso: if (q->enqueue) { /* Grab device queue */ spin_lock(&dev->queue_lock); - - rc = q->enqueue(skb, q); - - qdisc_run(dev); - + q = dev->qdisc; + if (q->enqueue) { + rc = q->enqueue(skb, q); + qdisc_run(dev); + spin_unlock(&dev->queue_lock); + + rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; + goto out; + } spin_unlock(&dev->queue_lock); - rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; - goto out; } /* The device has no queue. Common case for software devices: diff -r c9b32b389e62 -r b17dfd182f7c linux-2.6-xen-sparse/net/core/skbuff.c --- a/linux-2.6-xen-sparse/net/core/skbuff.c Thu Jan 17 14:35:38 2008 +0000 +++ b/linux-2.6-xen-sparse/net/core/skbuff.c Thu Jan 17 15:05:38 2008 +0000 @@ -643,6 +643,7 @@ struct sk_buff *pskb_copy(struct sk_buff n->csum = skb->csum; n->ip_summed = skb->ip_summed; + n->truesize += skb->data_len; n->data_len = skb->data_len; n->len = skb->len; diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18.8/allow-i386-crash-kernels-to-handle-x86_64-dumps-fix.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.18.8/allow-i386-crash-kernels-to-handle-x86_64-dumps-fix.patch Thu Jan 17 15:05:38 2008 +0000 @@ -0,0 +1,30 @@ +From: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> + +In file included from arch/i386/kernel/setup.c:46: +include/linux/crash_dump.h:19:36: warning: extra tokens at end of #ifndef directive + +Cc: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx> +Cc: Andi Kleen <ak@xxxxxxx> +Cc: Horms <horms@xxxxxxxxxxxx> +Cc: Ian Campbell <ian.campbell@xxxxxxxxxxxxx> +Cc: Magnus Damm <magnus.damm@xxxxxxxxx> +Cc: Vivek Goyal <vgoyal@xxxxxxxxxx> +Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> +--- + + include/linux/crash_dump.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff -puN include/linux/crash_dump.h~allow-i386-crash-kernels-to-handle-x86_64-dumps-fix include/linux/crash_dump.h +--- a/include/linux/crash_dump.h~allow-i386-crash-kernels-to-handle-x86_64-dumps-fix ++++ a/include/linux/crash_dump.h +@@ -16,7 +16,7 @@ extern struct proc_dir_entry *proc_vmcor + + /* Architecture code defines this if there are other possible ELF + * machine types, e.g. on bi-arch capable hardware. */ +-#ifndef vmcore_elf_check_arch_cross(x) ++#ifndef vmcore_elf_check_arch_cross + #define vmcore_elf_check_arch_cross(x) 0 + #endif + +_ diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18.8/allow-i386-crash-kernels-to-handle-x86_64-dumps.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.18.8/allow-i386-crash-kernels-to-handle-x86_64-dumps.patch Thu Jan 17 15:05:38 2008 +0000 @@ -0,0 +1,66 @@ +From: Ian Campbell <ian.campbell@xxxxxxxxxxxxx> + +The specific case I am encountering is kdump under Xen with a 64 bit +hypervisor and 32 bit kernel/userspace. The dump created is 64 bit due to +the hypervisor but the dump kernel is 32 bit for maximum compatibility. + +It's possibly less likely to be useful in a purely native scenario but I +see no reason to disallow it. + +Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxxxxx> +Acked-by: Vivek Goyal <vgoyal@xxxxxxxxxx> +Cc: Horms <horms@xxxxxxxxxxxx> +Cc: Magnus Damm <magnus.damm@xxxxxxxxx> +Cc: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx> +Cc: Andi Kleen <ak@xxxxxxx> +Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> +--- + + fs/proc/vmcore.c | 2 +- + include/asm-i386/kexec.h | 3 +++ + include/linux/crash_dump.h | 8 ++++++++ + 3 files changed, 12 insertions(+), 1 deletion(-) + +diff -puN fs/proc/vmcore.c~allow-i386-crash-kernels-to-handle-x86_64-dumps fs/proc/vmcore.c +--- a/fs/proc/vmcore.c~allow-i386-crash-kernels-to-handle-x86_64-dumps ++++ a/fs/proc/vmcore.c +@@ -514,7 +514,7 @@ static int __init parse_crash_elf64_head + /* Do some basic Verification. */ + if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 || + (ehdr.e_type != ET_CORE) || +- !elf_check_arch(&ehdr) || ++ !vmcore_elf_check_arch(&ehdr) || + ehdr.e_ident[EI_CLASS] != ELFCLASS64 || + ehdr.e_ident[EI_VERSION] != EV_CURRENT || + ehdr.e_version != EV_CURRENT || +diff -puN include/asm-i386/kexec.h~allow-i386-crash-kernels-to-handle-x86_64-dumps include/asm-i386/kexec.h +--- a/include/asm-i386/kexec.h~allow-i386-crash-kernels-to-handle-x86_64-dumps ++++ a/include/asm-i386/kexec.h +@@ -47,6 +47,9 @@ + /* The native architecture */ + #define KEXEC_ARCH KEXEC_ARCH_386 + ++/* We can also handle crash dumps from 64 bit kernel. */ ++#define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64) ++ + #define MAX_NOTE_BYTES 1024 + + /* CPU does not save ss and esp on stack if execution is already +diff -puN include/linux/crash_dump.h~allow-i386-crash-kernels-to-handle-x86_64-dumps include/linux/crash_dump.h +--- a/include/linux/crash_dump.h~allow-i386-crash-kernels-to-handle-x86_64-dumps ++++ a/include/linux/crash_dump.h +@@ -14,5 +14,13 @@ extern ssize_t copy_oldmem_page(unsigned + extern const struct file_operations proc_vmcore_operations; + extern struct proc_dir_entry *proc_vmcore; + ++/* Architecture code defines this if there are other possible ELF ++ * machine types, e.g. on bi-arch capable hardware. */ ++#ifndef vmcore_elf_check_arch_cross(x) ++#define vmcore_elf_check_arch_cross(x) 0 ++#endif ++ ++#define vmcore_elf_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x)) ++ + #endif /* CONFIG_CRASH_DUMP */ + #endif /* LINUX_CRASHDUMP_H */ +_ diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18.8/blktap-aio-16_03_06.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.18.8/blktap-aio-16_03_06.patch Thu Jan 17 15:05:38 2008 +0000 @@ -0,0 +1,294 @@ +diff -pruN ../orig-linux-2.6.18/fs/aio.c ./fs/aio.c +--- ../orig-linux-2.6.18/fs/aio.c 2006-09-20 04:42:06.000000000 +0100 ++++ ./fs/aio.c 2007-01-12 16:04:15.000000000 +0000 +@@ -34,6 +34,11 @@ + #include <asm/uaccess.h> + #include <asm/mmu_context.h> + ++#ifdef CONFIG_EPOLL ++#include <linux/poll.h> ++#include <linux/eventpoll.h> ++#endif ++ + #if DEBUG > 1 + #define dprintk printk + #else +@@ -1015,6 +1020,10 @@ put_rq: + if (waitqueue_active(&ctx->wait)) + wake_up(&ctx->wait); + ++#ifdef CONFIG_EPOLL ++ if (ctx->file && waitqueue_active(&ctx->poll_wait)) ++ wake_up(&ctx->poll_wait); ++#endif + if (ret) + put_ioctx(ctx); + +@@ -1024,6 +1033,8 @@ put_rq: + /* aio_read_evt + * Pull an event off of the ioctx's event ring. Returns the number of + * events fetched (0 or 1 ;-) ++ * If ent parameter is 0, just returns the number of events that would ++ * be fetched. + * FIXME: make this use cmpxchg. + * TODO: make the ringbuffer user mmap()able (requires FIXME). + */ +@@ -1046,13 +1057,18 @@ static int aio_read_evt(struct kioctx *i + + head = ring->head % info->nr; + if (head != ring->tail) { +- struct io_event *evp = aio_ring_event(info, head, KM_USER1); +- *ent = *evp; +- head = (head + 1) % info->nr; +- smp_mb(); /* finish reading the event before updatng the head */ +- ring->head = head; +- ret = 1; +- put_aio_ring_event(evp, KM_USER1); ++ if (ent) { /* event requested */ ++ struct io_event *evp = ++ aio_ring_event(info, head, KM_USER1); ++ *ent = *evp; ++ head = (head + 1) % info->nr; ++ /* finish reading the event before updatng the head */ ++ smp_mb(); ++ ring->head = head; ++ ret = 1; ++ put_aio_ring_event(evp, KM_USER1); ++ } else /* only need to know availability */ ++ ret = 1; + } + spin_unlock(&info->ring_lock); + +@@ -1235,9 +1251,78 @@ static void io_destroy(struct kioctx *io + + aio_cancel_all(ioctx); + wait_for_all_aios(ioctx); ++#ifdef CONFIG_EPOLL ++ /* forget the poll file, but it's up to the user to close it */ ++ if (ioctx->file) { ++ ioctx->file->private_data = 0; ++ ioctx->file = 0; ++ } ++#endif + put_ioctx(ioctx); /* once for the lookup */ + } + ++#ifdef CONFIG_EPOLL ++ ++static int aio_queue_fd_close(struct inode *inode, struct file *file) ++{ ++ struct kioctx *ioctx = file->private_data; ++ if (ioctx) { ++ file->private_data = 0; ++ spin_lock_irq(&ioctx->ctx_lock); ++ ioctx->file = 0; ++ spin_unlock_irq(&ioctx->ctx_lock); ++ } ++ return 0; ++} ++ ++static unsigned int aio_queue_fd_poll(struct file *file, poll_table *wait) ++{ unsigned int pollflags = 0; ++ struct kioctx *ioctx = file->private_data; ++ ++ if (ioctx) { ++ ++ spin_lock_irq(&ioctx->ctx_lock); ++ /* Insert inside our poll wait queue */ ++ poll_wait(file, &ioctx->poll_wait, wait); ++ ++ /* Check our condition */ ++ if (aio_read_evt(ioctx, 0)) ++ pollflags = POLLIN | POLLRDNORM; ++ spin_unlock_irq(&ioctx->ctx_lock); ++ } ++ ++ return pollflags; ++} ++ ++static const struct file_operations aioq_fops = { ++ .release = aio_queue_fd_close, ++ .poll = aio_queue_fd_poll ++}; ++ ++/* make_aio_fd: ++ * Create a file descriptor that can be used to poll the event queue. ++ * Based and piggybacked on the excellent epoll code. ++ */ ++ ++static int make_aio_fd(struct kioctx *ioctx) ++{ ++ int error, fd; ++ struct inode *inode; ++ struct file *file; ++ ++ error = ep_getfd(&fd, &inode, &file, NULL, &aioq_fops); ++ if (error) ++ return error; ++ ++ /* associate the file with the IO context */ ++ file->private_data = ioctx; ++ ioctx->file = file; ++ init_waitqueue_head(&ioctx->poll_wait); ++ return fd; ++} ++#endif ++ ++ + /* sys_io_setup: + * Create an aio_context capable of receiving at least nr_events. + * ctxp must not point to an aio_context that already exists, and +@@ -1250,18 +1335,30 @@ static void io_destroy(struct kioctx *io + * resources are available. May fail with -EFAULT if an invalid + * pointer is passed for ctxp. Will fail with -ENOSYS if not + * implemented. ++ * ++ * To request a selectable fd, the user context has to be initialized ++ * to 1, instead of 0, and the return value is the fd. ++ * This keeps the system call compatible, since a non-zero value ++ * was not allowed so far. + */ + asmlinkage long sys_io_setup(unsigned nr_events, aio_context_t __user *ctxp) + { + struct kioctx *ioctx = NULL; + unsigned long ctx; + long ret; ++ int make_fd = 0; + + ret = get_user(ctx, ctxp); + if (unlikely(ret)) + goto out; + + ret = -EINVAL; ++#ifdef CONFIG_EPOLL ++ if (ctx == 1) { ++ make_fd = 1; ++ ctx = 0; ++ } ++#endif + if (unlikely(ctx || nr_events == 0)) { + pr_debug("EINVAL: io_setup: ctx %lu nr_events %u\n", + ctx, nr_events); +@@ -1272,8 +1369,12 @@ asmlinkage long sys_io_setup(unsigned nr + ret = PTR_ERR(ioctx); + if (!IS_ERR(ioctx)) { + ret = put_user(ioctx->user_id, ctxp); +- if (!ret) +- return 0; ++#ifdef CONFIG_EPOLL ++ if (make_fd && ret >= 0) ++ ret = make_aio_fd(ioctx); ++#endif ++ if (ret >= 0) ++ return ret; + + get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */ + io_destroy(ioctx); +diff -pruN ../orig-linux-2.6.18/fs/eventpoll.c ./fs/eventpoll.c +--- ../orig-linux-2.6.18/fs/eventpoll.c 2006-09-20 04:42:06.000000000 +0100 ++++ ./fs/eventpoll.c 2007-01-12 16:04:41.000000000 +0000 +@@ -236,8 +236,6 @@ struct ep_pqueue { + + static void ep_poll_safewake_init(struct poll_safewake *psw); + static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq); +-static int ep_getfd(int *efd, struct inode **einode, struct file **efile, +- struct eventpoll *ep); + static int ep_alloc(struct eventpoll **pep); + static void ep_free(struct eventpoll *ep); + static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd); +@@ -267,7 +265,7 @@ static int ep_events_transfer(struct eve + static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, + int maxevents, long timeout); + static int eventpollfs_delete_dentry(struct dentry *dentry); +-static struct inode *ep_eventpoll_inode(void); ++static struct inode *ep_eventpoll_inode(const struct file_operations *fops); + static int eventpollfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data, struct vfsmount *mnt); +@@ -517,7 +515,7 @@ asmlinkage long sys_epoll_create(int siz + * Creates all the items needed to setup an eventpoll file. That is, + * a file structure, and inode and a free file descriptor. + */ +- error = ep_getfd(&fd, &inode, &file, ep); ++ error = ep_getfd(&fd, &inode, &file, ep, &eventpoll_fops); + if (error) + goto eexit_2; + +@@ -702,8 +700,8 @@ eexit_1: + /* + * Creates the file descriptor to be used by the epoll interface. + */ +-static int ep_getfd(int *efd, struct inode **einode, struct file **efile, +- struct eventpoll *ep) ++int ep_getfd(int *efd, struct inode **einode, struct file **efile, ++ struct eventpoll *ep, const struct file_operations *fops) + { + struct qstr this; + char name[32]; +@@ -719,7 +717,7 @@ static int ep_getfd(int *efd, struct ino + goto eexit_1; + + /* Allocates an inode from the eventpoll file system */ +- inode = ep_eventpoll_inode(); ++ inode = ep_eventpoll_inode(fops); + error = PTR_ERR(inode); + if (IS_ERR(inode)) + goto eexit_2; +@@ -750,7 +748,7 @@ static int ep_getfd(int *efd, struct ino + + file->f_pos = 0; + file->f_flags = O_RDONLY; +- file->f_op = &eventpoll_fops; ++ file->f_op = fops; + file->f_mode = FMODE_READ; + file->f_version = 0; + file->private_data = ep; +@@ -1569,7 +1567,7 @@ static int eventpollfs_delete_dentry(str + } + + +-static struct inode *ep_eventpoll_inode(void) ++static struct inode *ep_eventpoll_inode(const struct file_operations *fops) + { + int error = -ENOMEM; + struct inode *inode = new_inode(eventpoll_mnt->mnt_sb); +@@ -1577,7 +1575,7 @@ static struct inode *ep_eventpoll_inode( + if (!inode) + goto eexit_1; + +- inode->i_fop = &eventpoll_fops; ++ inode->i_fop = fops; + + /* + * Mark the inode dirty from the very beginning, +diff -pruN ../orig-linux-2.6.18/include/linux/aio.h ./include/linux/aio.h +--- ../orig-linux-2.6.18/include/linux/aio.h 2006-09-20 04:42:06.000000000 +0100 ++++ ./include/linux/aio.h 2007-01-12 16:04:15.000000000 +0000 +@@ -191,6 +191,11 @@ struct kioctx { + struct aio_ring_info ring_info; + + struct work_struct wq; ++#ifdef CONFIG_EPOLL ++ // poll integration ++ wait_queue_head_t poll_wait; ++ struct file *file; ++#endif + }; + + /* prototypes */ +diff -pruN ../orig-linux-2.6.18/include/linux/eventpoll.h ./include/linux/eventpoll.h +--- ../orig-linux-2.6.18/include/linux/eventpoll.h 2006-09-20 04:42:06.000000000 +0100 ++++ ./include/linux/eventpoll.h 2007-01-12 16:04:15.000000000 +0000 +@@ -90,6 +90,12 @@ static inline void eventpoll_release(str + eventpoll_release_file(file); + } + ++/* ++ * called by aio code to create fd that can poll the aio event queueQ ++ */ ++struct eventpoll; ++int ep_getfd(int *efd, struct inode **einode, struct file **efile, ++ struct eventpoll *ep, const struct file_operations *fops); + #else + + static inline void eventpoll_init_file(struct file *file) {} diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18.8/fix-ide-cd-pio-mode.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.18.8/fix-ide-cd-pio-mode.patch Thu Jan 17 15:05:38 2008 +0000 @@ -0,0 +1,18 @@ +diff -pruN ../orig-linux-2.6.18/drivers/ide/ide-lib.c ./drivers/ide/ide-lib.c +--- ../orig-linux-2.6.18/drivers/ide/ide-lib.c 2006-09-20 04:42:06.000000000 +0100 ++++ ./drivers/ide/ide-lib.c 2007-01-12 16:07:37.000000000 +0000 +@@ -408,10 +408,10 @@ void ide_toggle_bounce(ide_drive_t *driv + { + u64 addr = BLK_BOUNCE_HIGH; /* dma64_addr_t */ + +- if (!PCI_DMA_BUS_IS_PHYS) { +- addr = BLK_BOUNCE_ANY; +- } else if (on && drive->media == ide_disk) { +- if (HWIF(drive)->pci_dev) ++ if (on && drive->media == ide_disk) { ++ if (!PCI_DMA_BUS_IS_PHYS) ++ addr = BLK_BOUNCE_ANY; ++ else if (HWIF(drive)->pci_dev) + addr = HWIF(drive)->pci_dev->dma_mask; + } + diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18.8/fixaddr-top.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.18.8/fixaddr-top.patch Thu Jan 17 15:05:38 2008 +0000 @@ -0,0 +1,69 @@ +diff -pruN ../orig-linux-2.6.18/arch/i386/mm/pgtable.c ./arch/i386/mm/pgtable.c +--- ../orig-linux-2.6.18/arch/i386/mm/pgtable.c 2006-09-20 04:42:06.000000000 +0100 ++++ ./arch/i386/mm/pgtable.c 2007-01-17 17:19:36.000000000 +0000 +@@ -12,6 +12,7 @@ + #include <linux/slab.h> + #include <linux/pagemap.h> + #include <linux/spinlock.h> ++#include <linux/module.h> + + #include <asm/system.h> + #include <asm/pgtable.h> +@@ -137,6 +138,10 @@ void set_pmd_pfn(unsigned long vaddr, un + __flush_tlb_one(vaddr); + } + ++static int nr_fixmaps = 0; ++unsigned long __FIXADDR_TOP = 0xfffff000; ++EXPORT_SYMBOL(__FIXADDR_TOP); ++ + void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) + { + unsigned long address = __fix_to_virt(idx); +@@ -146,6 +151,13 @@ void __set_fixmap (enum fixed_addresses + return; + } + set_pte_pfn(address, phys >> PAGE_SHIFT, flags); ++ nr_fixmaps++; ++} ++ ++void set_fixaddr_top(unsigned long top) ++{ ++ BUG_ON(nr_fixmaps > 0); ++ __FIXADDR_TOP = top - PAGE_SIZE; + } + + pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) +diff -pruN ../orig-linux-2.6.18/include/asm-i386/fixmap.h ./include/asm-i386/fixmap.h +--- ../orig-linux-2.6.18/include/asm-i386/fixmap.h 2006-09-20 04:42:06.000000000 +0100 ++++ ./include/asm-i386/fixmap.h 2007-01-17 17:19:36.000000000 +0000 +@@ -19,7 +19,7 @@ + * Leave one empty page between vmalloc'ed areas and + * the start of the fixmap. + */ +-#define __FIXADDR_TOP 0xfffff000 ++extern unsigned long __FIXADDR_TOP; + + #ifndef __ASSEMBLY__ + #include <linux/kernel.h> +@@ -94,6 +94,8 @@ enum fixed_addresses { + extern void __set_fixmap (enum fixed_addresses idx, + unsigned long phys, pgprot_t flags); + ++extern void set_fixaddr_top(unsigned long top); ++ + #define set_fixmap(idx, phys) \ + __set_fixmap(idx, phys, PAGE_KERNEL) + /* +diff -pruN ../orig-linux-2.6.18/include/asm-i386/page.h ./include/asm-i386/page.h +--- ../orig-linux-2.6.18/include/asm-i386/page.h 2006-09-20 04:42:06.000000000 +0100 ++++ ./include/asm-i386/page.h 2007-01-17 17:19:36.000000000 +0000 +@@ -122,7 +122,7 @@ extern int page_is_ram(unsigned long pag + + #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) + #define VMALLOC_RESERVE ((unsigned long)__VMALLOC_RESERVE) +-#define MAXMEM (-__PAGE_OFFSET-__VMALLOC_RESERVE) ++#define MAXMEM (__FIXADDR_TOP-__PAGE_OFFSET-__VMALLOC_RESERVE) + #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) + #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) + #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18.8/git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.18.8/git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch Thu Jan 17 15:05:38 2008 +0000 @@ -0,0 +1,382 @@ +diff -pruN ../orig-linux-2.6.18/arch/i386/kernel/machine_kexec.c ./arch/i386/kernel/machine_kexec.c +--- ../orig-linux-2.6.18/arch/i386/kernel/machine_kexec.c 2006-09-20 04:42:06.000000000 +0100 ++++ ./arch/i386/kernel/machine_kexec.c 2007-01-12 16:03:23.000000000 +0000 +@@ -20,70 +20,13 @@ + #include <asm/system.h> + + #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) +- +-#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) +-#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) +-#define L2_ATTR (_PAGE_PRESENT) +- +-#define LEVEL0_SIZE (1UL << 12UL) +- +-#ifndef CONFIG_X86_PAE +-#define LEVEL1_SIZE (1UL << 22UL) +-static u32 pgtable_level1[1024] PAGE_ALIGNED; +- +-static void identity_map_page(unsigned long address) +-{ +- unsigned long level1_index, level2_index; +- u32 *pgtable_level2; +- +- /* Find the current page table */ +- pgtable_level2 = __va(read_cr3()); +- +- /* Find the indexes of the physical address to identity map */ +- level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE; +- level2_index = address / LEVEL1_SIZE; +- +- /* Identity map the page table entry */ +- pgtable_level1[level1_index] = address | L0_ATTR; +- pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR; +- +- /* Flush the tlb so the new mapping takes effect. +- * Global tlb entries are not flushed but that is not an issue. +- */ +- load_cr3(pgtable_level2); +-} +- +-#else +-#define LEVEL1_SIZE (1UL << 21UL) +-#define LEVEL2_SIZE (1UL << 30UL) +-static u64 pgtable_level1[512] PAGE_ALIGNED; +-static u64 pgtable_level2[512] PAGE_ALIGNED; +- +-static void identity_map_page(unsigned long address) +-{ +- unsigned long level1_index, level2_index, level3_index; +- u64 *pgtable_level3; +- +- /* Find the current page table */ +- pgtable_level3 = __va(read_cr3()); +- +- /* Find the indexes of the physical address to identity map */ +- level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE; +- level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE; +- level3_index = address / LEVEL2_SIZE; +- +- /* Identity map the page table entry */ +- pgtable_level1[level1_index] = address | L0_ATTR; +- pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR; +- set_64bit(&pgtable_level3[level3_index], +- __pa(pgtable_level2) | L2_ATTR); +- +- /* Flush the tlb so the new mapping takes effect. +- * Global tlb entries are not flushed but that is not an issue. +- */ +- load_cr3(pgtable_level3); +-} ++static u32 kexec_pgd[1024] PAGE_ALIGNED; ++#ifdef CONFIG_X86_PAE ++static u32 kexec_pmd0[1024] PAGE_ALIGNED; ++static u32 kexec_pmd1[1024] PAGE_ALIGNED; + #endif ++static u32 kexec_pte0[1024] PAGE_ALIGNED; ++static u32 kexec_pte1[1024] PAGE_ALIGNED; + + static void set_idt(void *newidt, __u16 limit) + { +@@ -127,16 +70,6 @@ static void load_segments(void) + #undef __STR + } + +-typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)( +- unsigned long indirection_page, +- unsigned long reboot_code_buffer, +- unsigned long start_address, +- unsigned int has_pae) ATTRIB_NORET; +- +-extern const unsigned char relocate_new_kernel[]; +-extern void relocate_new_kernel_end(void); +-extern const unsigned int relocate_new_kernel_size; +- + /* + * A architecture hook called to validate the + * proposed image and prepare the control pages +@@ -169,25 +102,29 @@ void machine_kexec_cleanup(struct kimage + */ + NORET_TYPE void machine_kexec(struct kimage *image) + { +- unsigned long page_list; +- unsigned long reboot_code_buffer; +- +- relocate_new_kernel_t rnk; ++ unsigned long page_list[PAGES_NR]; ++ void *control_page; + + /* Interrupts aren't acceptable while we reboot */ + local_irq_disable(); + +- /* Compute some offsets */ +- reboot_code_buffer = page_to_pfn(image->control_code_page) +- << PAGE_SHIFT; +- page_list = image->head; +- +- /* Set up an identity mapping for the reboot_code_buffer */ +- identity_map_page(reboot_code_buffer); +- +- /* copy it out */ +- memcpy((void *)reboot_code_buffer, relocate_new_kernel, +- relocate_new_kernel_size); ++ control_page = page_address(image->control_code_page); ++ memcpy(control_page, relocate_kernel, PAGE_SIZE); ++ ++ page_list[PA_CONTROL_PAGE] = __pa(control_page); ++ page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; ++ page_list[PA_PGD] = __pa(kexec_pgd); ++ page_list[VA_PGD] = (unsigned long)kexec_pgd; ++#ifdef CONFIG_X86_PAE ++ page_list[PA_PMD_0] = __pa(kexec_pmd0); ++ page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; ++ page_list[PA_PMD_1] = __pa(kexec_pmd1); ++ page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; ++#endif ++ page_list[PA_PTE_0] = __pa(kexec_pte0); ++ page_list[VA_PTE_0] = (unsigned long)kexec_pte0; ++ page_list[PA_PTE_1] = __pa(kexec_pte1); ++ page_list[VA_PTE_1] = (unsigned long)kexec_pte1; + + /* The segment registers are funny things, they have both a + * visible and an invisible part. Whenever the visible part is +@@ -206,6 +143,6 @@ NORET_TYPE void machine_kexec(struct kim + set_idt(phys_to_virt(0),0); + + /* now call it */ +- rnk = (relocate_new_kernel_t) reboot_code_buffer; +- (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae); ++ relocate_kernel((unsigned long)image->head, (unsigned long)page_list, ++ image->start, cpu_has_pae); + } +diff -pruN ../orig-linux-2.6.18/arch/i386/kernel/relocate_kernel.S ./arch/i386/kernel/relocate_kernel.S +--- ../orig-linux-2.6.18/arch/i386/kernel/relocate_kernel.S 2006-09-20 04:42:06.000000000 +0100 ++++ ./arch/i386/kernel/relocate_kernel.S 2007-01-12 16:03:23.000000000 +0000 +@@ -7,16 +7,138 @@ + */ + + #include <linux/linkage.h> ++#include <asm/page.h> ++#include <asm/kexec.h> ++ ++/* ++ * Must be relocatable PIC code callable as a C function ++ */ ++ ++#define PTR(x) (x << 2) ++#define PAGE_ALIGNED (1 << PAGE_SHIFT) ++#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */ ++#define PAE_PGD_ATTR 0x01 /* _PAGE_PRESENT */ ++ ++ .text ++ .align PAGE_ALIGNED ++ .globl relocate_kernel ++relocate_kernel: ++ movl 8(%esp), %ebp /* list of pages */ ++ ++#ifdef CONFIG_X86_PAE ++ /* map the control page at its virtual address */ ++ ++ movl PTR(VA_PGD)(%ebp), %edi ++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax ++ andl $0xc0000000, %eax ++ shrl $27, %eax ++ addl %edi, %eax ++ ++ movl PTR(PA_PMD_0)(%ebp), %edx ++ orl $PAE_PGD_ATTR, %edx ++ movl %edx, (%eax) ++ ++ movl PTR(VA_PMD_0)(%ebp), %edi ++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax ++ andl $0x3fe00000, %eax ++ shrl $18, %eax ++ addl %edi, %eax ++ ++ movl PTR(PA_PTE_0)(%ebp), %edx ++ orl $PAGE_ATTR, %edx ++ movl %edx, (%eax) ++ ++ movl PTR(VA_PTE_0)(%ebp), %edi ++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax ++ andl $0x001ff000, %eax ++ shrl $9, %eax ++ addl %edi, %eax ++ ++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx ++ orl $PAGE_ATTR, %edx ++ movl %edx, (%eax) ++ ++ /* identity map the control page at its physical address */ ++ ++ movl PTR(VA_PGD)(%ebp), %edi ++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax ++ andl $0xc0000000, %eax ++ shrl $27, %eax ++ addl %edi, %eax ++ ++ movl PTR(PA_PMD_1)(%ebp), %edx ++ orl $PAE_PGD_ATTR, %edx ++ movl %edx, (%eax) ++ ++ movl PTR(VA_PMD_1)(%ebp), %edi ++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax ++ andl $0x3fe00000, %eax ++ shrl $18, %eax ++ addl %edi, %eax ++ ++ movl PTR(PA_PTE_1)(%ebp), %edx ++ orl $PAGE_ATTR, %edx ++ movl %edx, (%eax) ++ ++ movl PTR(VA_PTE_1)(%ebp), %edi ++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax ++ andl $0x001ff000, %eax ++ shrl $9, %eax ++ addl %edi, %eax ++ ++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx ++ orl $PAGE_ATTR, %edx ++ movl %edx, (%eax) ++#else ++ /* map the control page at its virtual address */ ++ ++ movl PTR(VA_PGD)(%ebp), %edi ++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax ++ andl $0xffc00000, %eax ++ shrl $20, %eax ++ addl %edi, %eax ++ ++ movl PTR(PA_PTE_0)(%ebp), %edx ++ orl $PAGE_ATTR, %edx ++ movl %edx, (%eax) ++ ++ movl PTR(VA_PTE_0)(%ebp), %edi ++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax ++ andl $0x003ff000, %eax ++ shrl $10, %eax ++ addl %edi, %eax ++ ++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx ++ orl $PAGE_ATTR, %edx ++ movl %edx, (%eax) ++ ++ /* identity map the control page at its physical address */ ++ ++ movl PTR(VA_PGD)(%ebp), %edi ++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax ++ andl $0xffc00000, %eax ++ shrl $20, %eax ++ addl %edi, %eax ++ ++ movl PTR(PA_PTE_1)(%ebp), %edx ++ orl $PAGE_ATTR, %edx ++ movl %edx, (%eax) ++ ++ movl PTR(VA_PTE_1)(%ebp), %edi ++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax ++ andl $0x003ff000, %eax ++ shrl $10, %eax ++ addl %edi, %eax ++ ++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx ++ orl $PAGE_ATTR, %edx ++ movl %edx, (%eax) ++#endif + +- /* +- * Must be relocatable PIC code callable as a C function, that once +- * it starts can not use the previous processes stack. +- */ +- .globl relocate_new_kernel + relocate_new_kernel: + /* read the arguments and say goodbye to the stack */ + movl 4(%esp), %ebx /* page_list */ +- movl 8(%esp), %ebp /* reboot_code_buffer */ ++ movl 8(%esp), %ebp /* list of pages */ + movl 12(%esp), %edx /* start address */ + movl 16(%esp), %ecx /* cpu_has_pae */ + +@@ -24,11 +146,26 @@ relocate_new_kernel: + pushl $0 + popfl + +- /* set a new stack at the bottom of our page... */ +- lea 4096(%ebp), %esp ++ /* get physical address of control page now */ ++ /* this is impossible after page table switch */ ++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edi ++ ++ /* switch to new set of page tables */ ++ movl PTR(PA_PGD)(%ebp), %eax ++ movl %eax, %cr3 ++ ++ /* setup a new stack at the end of the physical control page */ ++ lea 4096(%edi), %esp + +- /* store the parameters back on the stack */ +- pushl %edx /* store the start address */ ++ /* jump to identity mapped page */ ++ movl %edi, %eax ++ addl $(identity_mapped - relocate_kernel), %eax ++ pushl %eax ++ ret ++ ++identity_mapped: ++ /* store the start address on the stack */ ++ pushl %edx + + /* Set cr0 to a known state: + * 31 0 == Paging disabled +@@ -113,8 +250,3 @@ relocate_new_kernel: + xorl %edi, %edi + xorl %ebp, %ebp + ret +-relocate_new_kernel_end: +- +- .globl relocate_new_kernel_size +-relocate_new_kernel_size: +- .long relocate_new_kernel_end - relocate_new_kernel +diff -pruN ../orig-linux-2.6.18/include/asm-i386/kexec.h ./include/asm-i386/kexec.h +--- ../orig-linux-2.6.18/include/asm-i386/kexec.h 2006-09-20 04:42:06.000000000 +0100 ++++ ./include/asm-i386/kexec.h 2007-01-12 16:03:23.000000000 +0000 +@@ -1,6 +1,26 @@ + #ifndef _I386_KEXEC_H + #define _I386_KEXEC_H + ++#define PA_CONTROL_PAGE 0 ++#define VA_CONTROL_PAGE 1 ++#define PA_PGD 2 ++#define VA_PGD 3 ++#define PA_PTE_0 4 ++#define VA_PTE_0 5 ++#define PA_PTE_1 6 ++#define VA_PTE_1 7 ++#ifdef CONFIG_X86_PAE ++#define PA_PMD_0 8 ++#define VA_PMD_0 9 ++#define PA_PMD_1 10 ++#define VA_PMD_1 11 ++#define PAGES_NR 12 ++#else ++#define PAGES_NR 8 ++#endif ++ ++#ifndef __ASSEMBLY__ ++ + #include <asm/fixmap.h> + #include <asm/ptrace.h> + #include <asm/string.h> +@@ -72,5 +92,12 @@ static inline void crash_setup_regs(stru + newregs->eip = (unsigned long)current_text_addr(); + } + } ++asmlinkage NORET_TYPE void ++relocate_kernel(unsigned long indirection_page, ++ unsigned long control_page, ++ unsigned long start_address, ++ unsigned int has_pae) ATTRIB_NORET; ++ ++#endif /* __ASSEMBLY__ */ + + #endif /* _I386_KEXEC_H */ diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18.8/git-4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.18.8/git-4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f.patch Thu Jan 17 15:05:38 2008 +0000 @@ -0,0 +1,355 @@ +diff -pruN ../orig-linux-2.6.18/arch/x86_64/kernel/machine_kexec.c ./arch/x86_64/kernel/machine_kexec.c +--- ../orig-linux-2.6.18/arch/x86_64/kernel/machine_kexec.c 2006-09-20 04:42:06.000000000 +0100 ++++ ./arch/x86_64/kernel/machine_kexec.c 2007-01-12 16:03:49.000000000 +0000 +@@ -15,6 +15,15 @@ + #include <asm/mmu_context.h> + #include <asm/io.h> + ++#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) ++static u64 kexec_pgd[512] PAGE_ALIGNED; ++static u64 kexec_pud0[512] PAGE_ALIGNED; ++static u64 kexec_pmd0[512] PAGE_ALIGNED; ++static u64 kexec_pte0[512] PAGE_ALIGNED; ++static u64 kexec_pud1[512] PAGE_ALIGNED; ++static u64 kexec_pmd1[512] PAGE_ALIGNED; ++static u64 kexec_pte1[512] PAGE_ALIGNED; ++ + static void init_level2_page(pmd_t *level2p, unsigned long addr) + { + unsigned long end_addr; +@@ -144,32 +153,19 @@ static void load_segments(void) + ); + } + +-typedef NORET_TYPE void (*relocate_new_kernel_t)(unsigned long indirection_page, +- unsigned long control_code_buffer, +- unsigned long start_address, +- unsigned long pgtable) ATTRIB_NORET; +- +-extern const unsigned char relocate_new_kernel[]; +-extern const unsigned long relocate_new_kernel_size; +- + int machine_kexec_prepare(struct kimage *image) + { +- unsigned long start_pgtable, control_code_buffer; ++ unsigned long start_pgtable; + int result; + + /* Calculate the offsets */ + start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; +- control_code_buffer = start_pgtable + PAGE_SIZE; + + /* Setup the identity mapped 64bit page table */ + result = init_pgtable(image, start_pgtable); + if (result) + return result; + +- /* Place the code in the reboot code buffer */ +- memcpy(__va(control_code_buffer), relocate_new_kernel, +- relocate_new_kernel_size); +- + return 0; + } + +@@ -184,28 +180,34 @@ void machine_kexec_cleanup(struct kimage + */ + NORET_TYPE void machine_kexec(struct kimage *image) + { +- unsigned long page_list; +- unsigned long control_code_buffer; +- unsigned long start_pgtable; +- relocate_new_kernel_t rnk; ++ unsigned long page_list[PAGES_NR]; ++ void *control_page; + + /* Interrupts aren't acceptable while we reboot */ + local_irq_disable(); + +- /* Calculate the offsets */ +- page_list = image->head; +- start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; +- control_code_buffer = start_pgtable + PAGE_SIZE; ++ control_page = page_address(image->control_code_page) + PAGE_SIZE; ++ memcpy(control_page, relocate_kernel, PAGE_SIZE); + +- /* Set the low half of the page table to my identity mapped +- * page table for kexec. Leave the high half pointing at the +- * kernel pages. Don't bother to flush the global pages +- * as that will happen when I fully switch to my identity mapped +- * page table anyway. +- */ +- memcpy(__va(read_cr3()), __va(start_pgtable), PAGE_SIZE/2); +- __flush_tlb(); ++ page_list[PA_CONTROL_PAGE] = __pa(control_page); ++ page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; ++ page_list[PA_PGD] = __pa(kexec_pgd); ++ page_list[VA_PGD] = (unsigned long)kexec_pgd; ++ page_list[PA_PUD_0] = __pa(kexec_pud0); ++ page_list[VA_PUD_0] = (unsigned long)kexec_pud0; ++ page_list[PA_PMD_0] = __pa(kexec_pmd0); ++ page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; ++ page_list[PA_PTE_0] = __pa(kexec_pte0); ++ page_list[VA_PTE_0] = (unsigned long)kexec_pte0; ++ page_list[PA_PUD_1] = __pa(kexec_pud1); ++ page_list[VA_PUD_1] = (unsigned long)kexec_pud1; ++ page_list[PA_PMD_1] = __pa(kexec_pmd1); ++ page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; ++ page_list[PA_PTE_1] = __pa(kexec_pte1); ++ page_list[VA_PTE_1] = (unsigned long)kexec_pte1; + ++ page_list[PA_TABLE_PAGE] = ++ (unsigned long)__pa(page_address(image->control_code_page)); + + /* The segment registers are funny things, they have both a + * visible and an invisible part. Whenever the visible part is +@@ -222,7 +224,8 @@ NORET_TYPE void machine_kexec(struct kim + */ + set_gdt(phys_to_virt(0),0); + set_idt(phys_to_virt(0),0); ++ + /* now call it */ +- rnk = (relocate_new_kernel_t) control_code_buffer; +- (*rnk)(page_list, control_code_buffer, image->start, start_pgtable); ++ relocate_kernel((unsigned long)image->head, (unsigned long)page_list, ++ image->start); + } +diff -pruN ../orig-linux-2.6.18/arch/x86_64/kernel/relocate_kernel.S ./arch/x86_64/kernel/relocate_kernel.S +--- ../orig-linux-2.6.18/arch/x86_64/kernel/relocate_kernel.S 2006-09-20 04:42:06.000000000 +0100 ++++ ./arch/x86_64/kernel/relocate_kernel.S 2007-01-12 16:03:49.000000000 +0000 +@@ -7,31 +7,169 @@ + */ + + #include <linux/linkage.h> ++#include <asm/page.h> ++#include <asm/kexec.h> + +- /* +- * Must be relocatable PIC code callable as a C function, that once +- * it starts can not use the previous processes stack. +- */ +- .globl relocate_new_kernel ++/* ++ * Must be relocatable PIC code callable as a C function ++ */ ++ ++#define PTR(x) (x << 3) ++#define PAGE_ALIGNED (1 << PAGE_SHIFT) ++#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */ ++ ++ .text ++ .align PAGE_ALIGNED + .code64 ++ .globl relocate_kernel ++relocate_kernel: ++ /* %rdi indirection_page ++ * %rsi page_list ++ * %rdx start address ++ */ ++ ++ /* map the control page at its virtual address */ ++ ++ movq $0x0000ff8000000000, %r10 /* mask */ ++ mov $(39 - 3), %cl /* bits to shift */ ++ movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 /* address to map */ ++ ++ movq %r11, %r9 ++ andq %r10, %r9 ++ shrq %cl, %r9 ++ ++ movq PTR(VA_PGD)(%rsi), %r8 ++ addq %r8, %r9 ++ movq PTR(PA_PUD_0)(%rsi), %r8 ++ orq $PAGE_ATTR, %r8 ++ movq %r8, (%r9) ++ ++ shrq $9, %r10 ++ sub $9, %cl ++ ++ movq %r11, %r9 ++ andq %r10, %r9 ++ shrq %cl, %r9 ++ ++ movq PTR(VA_PUD_0)(%rsi), %r8 ++ addq %r8, %r9 ++ movq PTR(PA_PMD_0)(%rsi), %r8 ++ orq $PAGE_ATTR, %r8 ++ movq %r8, (%r9) ++ ++ shrq $9, %r10 ++ sub $9, %cl ++ ++ movq %r11, %r9 ++ andq %r10, %r9 ++ shrq %cl, %r9 ++ ++ movq PTR(VA_PMD_0)(%rsi), %r8 ++ addq %r8, %r9 ++ movq PTR(PA_PTE_0)(%rsi), %r8 ++ orq $PAGE_ATTR, %r8 ++ movq %r8, (%r9) ++ ++ shrq $9, %r10 ++ sub $9, %cl ++ ++ movq %r11, %r9 ++ andq %r10, %r9 ++ shrq %cl, %r9 ++ ++ movq PTR(VA_PTE_0)(%rsi), %r8 ++ addq %r8, %r9 ++ movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 ++ orq $PAGE_ATTR, %r8 ++ movq %r8, (%r9) ++ ++ /* identity map the control page at its physical address */ ++ ++ movq $0x0000ff8000000000, %r10 /* mask */ ++ mov $(39 - 3), %cl /* bits to shift */ ++ movq PTR(PA_CONTROL_PAGE)(%rsi), %r11 /* address to map */ ++ ++ movq %r11, %r9 ++ andq %r10, %r9 ++ shrq %cl, %r9 ++ ++ movq PTR(VA_PGD)(%rsi), %r8 ++ addq %r8, %r9 ++ movq PTR(PA_PUD_1)(%rsi), %r8 ++ orq $PAGE_ATTR, %r8 ++ movq %r8, (%r9) ++ ++ shrq $9, %r10 ++ sub $9, %cl ++ ++ movq %r11, %r9 ++ andq %r10, %r9 ++ shrq %cl, %r9 ++ ++ movq PTR(VA_PUD_1)(%rsi), %r8 ++ addq %r8, %r9 ++ movq PTR(PA_PMD_1)(%rsi), %r8 ++ orq $PAGE_ATTR, %r8 ++ movq %r8, (%r9) ++ ++ shrq $9, %r10 ++ sub $9, %cl ++ ++ movq %r11, %r9 ++ andq %r10, %r9 ++ shrq %cl, %r9 ++ ++ movq PTR(VA_PMD_1)(%rsi), %r8 ++ addq %r8, %r9 ++ movq PTR(PA_PTE_1)(%rsi), %r8 ++ orq $PAGE_ATTR, %r8 ++ movq %r8, (%r9) ++ ++ shrq $9, %r10 ++ sub $9, %cl ++ ++ movq %r11, %r9 ++ andq %r10, %r9 ++ shrq %cl, %r9 ++ ++ movq PTR(VA_PTE_1)(%rsi), %r8 ++ addq %r8, %r9 ++ movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 ++ orq $PAGE_ATTR, %r8 ++ movq %r8, (%r9) ++ + relocate_new_kernel: +- /* %rdi page_list +- * %rsi reboot_code_buffer ++ /* %rdi indirection_page ++ * %rsi page_list + * %rdx start address +- * %rcx page_table +- * %r8 arg5 +- * %r9 arg6 + */ + + /* zero out flags, and disable interrupts */ + pushq $0 + popfq + +- /* set a new stack at the bottom of our page... */ +- lea 4096(%rsi), %rsp ++ /* get physical address of control page now */ ++ /* this is impossible after page table switch */ ++ movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 ++ ++ /* get physical address of page table now too */ ++ movq PTR(PA_TABLE_PAGE)(%rsi), %rcx ++ ++ /* switch to new set of page tables */ ++ movq PTR(PA_PGD)(%rsi), %r9 ++ movq %r9, %cr3 ++ ++ /* setup a new stack at the end of the physical control page */ ++ lea 4096(%r8), %rsp ++ ++ /* jump to identity mapped page */ ++ addq $(identity_mapped - relocate_kernel), %r8 ++ pushq %r8 ++ ret + +- /* store the parameters back on the stack */ +- pushq %rdx /* store the start address */ ++identity_mapped: ++ /* store the start address on the stack */ ++ pushq %rdx + + /* Set cr0 to a known state: + * 31 1 == Paging enabled +@@ -136,8 +274,3 @@ relocate_new_kernel: + xorq %r15, %r15 + + ret +-relocate_new_kernel_end: +- +- .globl relocate_new_kernel_size +-relocate_new_kernel_size: +- .quad relocate_new_kernel_end - relocate_new_kernel +diff -pruN ../orig-linux-2.6.18/include/asm-x86_64/kexec.h ./include/asm-x86_64/kexec.h +--- ../orig-linux-2.6.18/include/asm-x86_64/kexec.h 2006-09-20 04:42:06.000000000 +0100 ++++ ./include/asm-x86_64/kexec.h 2007-01-12 16:03:49.000000000 +0000 +@@ -1,6 +1,27 @@ + #ifndef _X86_64_KEXEC_H + #define _X86_64_KEXEC_H + ++#define PA_CONTROL_PAGE 0 ++#define VA_CONTROL_PAGE 1 ++#define PA_PGD 2 ++#define VA_PGD 3 ++#define PA_PUD_0 4 ++#define VA_PUD_0 5 ++#define PA_PMD_0 6 ++#define VA_PMD_0 7 ++#define PA_PTE_0 8 ++#define VA_PTE_0 9 ++#define PA_PUD_1 10 ++#define VA_PUD_1 11 ++#define PA_PMD_1 12 ++#define VA_PMD_1 13 ++#define PA_PTE_1 14 ++#define VA_PTE_1 15 ++#define PA_TABLE_PAGE 16 ++#define PAGES_NR 17 ++ ++#ifndef __ASSEMBLY__ ++ + #include <linux/string.h> + + #include <asm/page.h> +@@ -64,4 +85,12 @@ static inline void crash_setup_regs(stru + newregs->rip = (unsigned long)current_text_addr(); + } + } ++ ++NORET_TYPE void ++relocate_kernel(unsigned long indirection_page, ++ unsigned long page_list, ++ unsigned long start_address) ATTRIB_NORET; ++ ++#endif /* __ASSEMBLY__ */ ++ + #endif /* _X86_64_KEXEC_H */ diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18.8/git-5ee7737379b1d7f0c977c0f1661fbaf01a8d4721.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.18.8/git-5ee7737379b1d7f0c977c0f1661fbaf01a8d4721.patch Thu Jan 17 15:05:38 2008 +0000 @@ -0,0 +1,28 @@ +--- ./arch/ia64/kernel/smp.c.orig 2007-05-02 19:00:01.000000000 +0900 ++++ ./arch/ia64/kernel/smp.c 2007-05-02 19:04:32.000000000 +0900 +@@ -328,10 +328,14 @@ int + smp_call_function (void (*func) (void *info), void *info, int nonatomic, int wait) + { + struct call_data_struct data; +- int cpus = num_online_cpus()-1; ++ int cpus; + +- if (!cpus) ++ spin_lock(&call_lock); ++ cpus = num_online_cpus()-1; ++ if (!cpus) { ++ spin_unlock(&call_lock); + return 0; ++ } + + /* Can deadlock when called with interrupts disabled */ + WARN_ON(irqs_disabled()); +@@ -343,8 +347,6 @@ smp_call_function (void (*func) (void *i + if (wait) + atomic_set(&data.finished, 0); + +- spin_lock(&call_lock); +- + call_data = &data; + mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC */ + send_IPI_allbutself(IPI_CALL_FUNC); diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18.8/git-c06cb8b1c4d25e5b4d7a2d7c2462619de1e0dbc4.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.18.8/git-c06cb8b1c4d25e5b4d7a2d7c2462619de1e0dbc4.patch Thu Jan 17 15:05:38 2008 +0000 @@ -0,0 +1,29 @@ +commit c06cb8b1c4d25e5b4d7a2d7c2462619de1e0dbc4 +Author: Vivek Goyal <vgoyal@xxxxxxxxxx> +Date: Wed Nov 8 17:44:41 2006 -0800 + + [PATCH] i386: Force data segment to be 4K aligned + + o Currently there is no specific alignment restriction in linker script + and in some cases it can be placed non 4K aligned addresses. This fails + kexec which checks that segment to be loaded is page aligned. + + o I guess, it does not harm data segment to be 4K aligned. + + Signed-off-by: Vivek Goyal <vgoyal@xxxxxxxxxx> + Signed-off-by: Andi Kleen <ak@xxxxxxx> + Signed-off-by: Andrew Morton <akpm@xxxxxxxx> + Signed-off-by: Linus Torvalds <torvalds@xxxxxxxx> + +diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S +index adc1f23..c6f84a0 100644 +--- a/arch/i386/kernel/vmlinux.lds.S ++++ b/arch/i386/kernel/vmlinux.lds.S +@@ -51,6 +51,7 @@ SECTIONS + __tracedata_end = .; + + /* writeable */ ++ . = ALIGN(4096); + .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ + *(.data) + CONSTRUCTORS diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18.8/git-dbaab49f92ff6ae6255762a948375e4036cbdbd2.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.18.8/git-dbaab49f92ff6ae6255762a948375e4036cbdbd2.patch Thu Jan 17 15:05:38 2008 +0000 @@ -0,0 +1,20 @@ +diff -pruN ../orig-linux-2.6.18/arch/x86_64/kernel/vmlinux.lds.S ./arch/x86_64/kernel/vmlinux.lds.S +--- ../orig-linux-2.6.18/arch/x86_64/kernel/vmlinux.lds.S 2007-01-12 18:20:02.000000000 +0000 ++++ ./arch/x86_64/kernel/vmlinux.lds.S 2007-01-12 18:20:20.000000000 +0000 +@@ -17,6 +17,7 @@ PHDRS { + text PT_LOAD FLAGS(5); /* R_E */ + data PT_LOAD FLAGS(7); /* RWE */ + user PT_LOAD FLAGS(7); /* RWE */ ++ data.init PT_LOAD FLAGS(7); /* RWE */ + note PT_NOTE FLAGS(4); /* R__ */ + } + SECTIONS +@@ -131,7 +132,7 @@ SECTIONS + . = ALIGN(8192); /* init_task */ + .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { + *(.data.init_task) +- } :data ++ }:data.init + + . = ALIGN(4096); + .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18.8/i386-mach-io-check-nmi.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.18.8/i386-mach-io-check-nmi.patch Thu Jan 17 15:05:38 2008 +0000 @@ -0,0 +1,45 @@ +diff -pruN ../orig-linux-2.6.18/arch/i386/kernel/traps.c ./arch/i386/kernel/traps.c +--- ../orig-linux-2.6.18/arch/i386/kernel/traps.c 2006-09-20 04:42:06.000000000 +0100 ++++ ./arch/i386/kernel/traps.c 2007-01-12 16:07:49.000000000 +0000 +@@ -642,18 +642,11 @@ static void mem_parity_error(unsigned ch + + static void io_check_error(unsigned char reason, struct pt_regs * regs) + { +- unsigned long i; +- + printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); + show_registers(regs); + + /* Re-enable the IOCK line, wait for a few seconds */ +- reason = (reason & 0xf) | 8; +- outb(reason, 0x61); +- i = 2000; +- while (--i) udelay(1000); +- reason &= ~8; +- outb(reason, 0x61); ++ clear_io_check_error(reason); + } + + static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) +diff -pruN ../orig-linux-2.6.18/include/asm-i386/mach-default/mach_traps.h ./include/asm-i386/mach-default/mach_traps.h +--- ../orig-linux-2.6.18/include/asm-i386/mach-default/mach_traps.h 2006-09-20 04:42:06.000000000 +0100 ++++ ./include/asm-i386/mach-default/mach_traps.h 2007-01-12 16:07:49.000000000 +0000 +@@ -15,6 +15,18 @@ static inline void clear_mem_error(unsig + outb(reason, 0x61); + } + ++static inline void clear_io_check_error(unsigned char reason) ++{ ++ unsigned long i; ++ ++ reason = (reason & 0xf) | 8; ++ outb(reason, 0x61); ++ i = 2000; ++ while (--i) udelay(1000); ++ reason &= ~8; ++ outb(reason, 0x61); ++} ++ + static inline unsigned char get_nmi_reason(void) + { + return inb(0x61); diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18.8/linux-2.6.18-xen-375-748cd890ea7f --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.18.8/linux-2.6.18-xen-375-748cd890ea7f Thu Jan 17 15:05:38 2008 +0000 @@ -0,0 +1,294 @@ +# HG changeset patch +# User Keir Fraser <keir.fraser@xxxxxxxxxx> +# Date 1199916724 0 +# Node ID 748cd890ea7f56752311e519e80eece9d850c01a +# Parent fedc10fba9f1d5ec0c72dbcbca87e508222b4c48 +x86_64: Add TIF_RESTORE_SIGMASK (from upstream Linux) + +We need TIF_RESTORE_SIGMASK in order to support ppoll() and pselect() +system calls. This patch originally came from Andi, and was based +heavily on David Howells' implementation of same on i386. I fixed a +typo which was causing do_signal() to use the wrong signal mask. + +Signed-off-by: David Woodhouse <dwmw2@xxxxxxxxxxxxx> +Signed-off-by: Andi Kleen <ak@xxxxxxx> + +diff -r fedc10fba9f1 -r 748cd890ea7f arch/x86_64/ia32/ia32_signal.c +--- a/arch/x86_64/ia32/ia32_signal.c Tue Jan 08 09:55:29 2008 +0000 ++++ b/arch/x86_64/ia32/ia32_signal.c Wed Jan 09 22:12:04 2008 +0000 +@@ -113,25 +113,19 @@ int copy_siginfo_from_user32(siginfo_t * + } + + asmlinkage long +-sys32_sigsuspend(int history0, int history1, old_sigset_t mask, +- struct pt_regs *regs) +-{ +- sigset_t saveset; +- ++sys32_sigsuspend(int history0, int history1, old_sigset_t mask) ++{ + mask &= _BLOCKABLE; + spin_lock_irq(¤t->sighand->siglock); +- saveset = current->blocked; ++ current->saved_sigmask = current->blocked; + siginitset(¤t->blocked, mask); + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + +- regs->rax = -EINTR; +- while (1) { +- current->state = TASK_INTERRUPTIBLE; +- schedule(); +- if (do_signal(regs, &saveset)) +- return -EINTR; +- } ++ current->state = TASK_INTERRUPTIBLE; ++ schedule(); ++ set_thread_flag(TIF_RESTORE_SIGMASK); ++ return -ERESTARTNOHAND; + } + + asmlinkage long +@@ -508,11 +502,11 @@ int ia32_setup_frame(int sig, struct k_s + current->comm, current->pid, frame, regs->rip, frame->pretcode); + #endif + +- return 1; ++ return 0; + + give_sigsegv: + force_sigsegv(sig, current); +- return 0; ++ return -EFAULT; + } + + int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, +@@ -595,7 +589,7 @@ int ia32_setup_rt_frame(int sig, struct + regs->ss = __USER32_DS; + + set_fs(USER_DS); +- regs->eflags &= ~TF_MASK; ++ regs->eflags &= ~TF_MASK; + if (test_thread_flag(TIF_SINGLESTEP)) + ptrace_notify(SIGTRAP); + +@@ -604,9 +598,9 @@ int ia32_setup_rt_frame(int sig, struct + current->comm, current->pid, frame, regs->rip, frame->pretcode); + #endif + +- return 1; ++ return 0; + + give_sigsegv: + force_sigsegv(sig, current); +- return 0; +-} ++ return -EFAULT; ++} +diff -r fedc10fba9f1 -r 748cd890ea7f arch/x86_64/kernel/signal.c +--- a/arch/x86_64/kernel/signal.c Tue Jan 08 09:55:29 2008 +0000 ++++ b/arch/x86_64/kernel/signal.c Wed Jan 09 22:12:04 2008 +0000 +@@ -36,37 +36,6 @@ int ia32_setup_rt_frame(int sig, struct + sigset_t *set, struct pt_regs * regs); + int ia32_setup_frame(int sig, struct k_sigaction *ka, + sigset_t *set, struct pt_regs * regs); +- +-asmlinkage long +-sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize, struct pt_regs *regs) +-{ +- sigset_t saveset, newset; +- +- /* XXX: Don't preclude handling different sized sigset_t's. */ +- if (sigsetsize != sizeof(sigset_t)) +- return -EINVAL; +- +- if (copy_from_user(&newset, unewset, sizeof(newset))) +- return -EFAULT; +- sigdelsetmask(&newset, ~_BLOCKABLE); +- +- spin_lock_irq(¤t->sighand->siglock); +- saveset = current->blocked; +- current->blocked = newset; +- recalc_sigpending(); +- spin_unlock_irq(¤t->sighand->siglock); +-#ifdef DEBUG_SIG +- printk("rt_sigsuspend savset(%lx) newset(%lx) regs(%p) rip(%lx)\n", +- saveset, newset, regs, regs->rip); +-#endif +- regs->rax = -EINTR; +- while (1) { +- current->state = TASK_INTERRUPTIBLE; +- schedule(); +- if (do_signal(regs, &saveset)) +- return -EINTR; +- } +-} + + asmlinkage long + sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, +@@ -341,11 +310,11 @@ static int setup_rt_frame(int sig, struc + current->comm, current->pid, frame, regs->rip, frame->pretcode); + #endif + +- return 1; ++ return 0; + + give_sigsegv: + force_sigsegv(sig, current); +- return 0; ++ return -EFAULT; + } + + /* +@@ -408,7 +377,7 @@ handle_signal(unsigned long sig, siginfo + #endif + ret = setup_rt_frame(sig, ka, info, oldset, regs); + +- if (ret) { ++ if (ret == 0) { + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NODEFER)) +@@ -425,11 +394,12 @@ handle_signal(unsigned long sig, siginfo + * want to handle. Thus you cannot kill init even with a SIGKILL even by + * mistake. + */ +-int do_signal(struct pt_regs *regs, sigset_t *oldset) ++static void do_signal(struct pt_regs *regs) + { + struct k_sigaction ka; + siginfo_t info; + int signr; ++ sigset_t *oldset; + + /* + * We want the common case to go fast, which +@@ -438,9 +408,11 @@ int do_signal(struct pt_regs *regs, sigs + * if so. + */ + if (!user_mode(regs)) +- return 1; +- +- if (!oldset) ++ return; ++ ++ if (test_thread_flag(TIF_RESTORE_SIGMASK)) ++ oldset = ¤t->saved_sigmask; ++ else + oldset = ¤t->blocked; + + signr = get_signal_to_deliver(&info, &ka, regs, NULL); +@@ -454,30 +426,46 @@ int do_signal(struct pt_regs *regs, sigs + set_debugreg(current->thread.debugreg7, 7); + + /* Whee! Actually deliver the signal. */ +- return handle_signal(signr, &info, &ka, oldset, regs); ++ if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { ++ /* a signal was successfully delivered; the saved ++ * sigmask will have been stored in the signal frame, ++ * and will be restored by sigreturn, so we can simply ++ * clear the TIF_RESTORE_SIGMASK flag */ ++ clear_thread_flag(TIF_RESTORE_SIGMASK); ++ } ++ return; + } + + /* Did we come from a system call? */ + if ((long)regs->orig_rax >= 0) { + /* Restart the system call - no handlers present */ + long res = regs->rax; +- if (res == -ERESTARTNOHAND || +- res == -ERESTARTSYS || +- res == -ERESTARTNOINTR) { ++ switch (res) { ++ case -ERESTARTNOHAND: ++ case -ERESTARTSYS: ++ case -ERESTARTNOINTR: + regs->rax = regs->orig_rax; + regs->rip -= 2; +- } +- if (regs->rax == (unsigned long)-ERESTART_RESTARTBLOCK) { ++ break; ++ case -ERESTART_RESTARTBLOCK: + regs->rax = test_thread_flag(TIF_IA32) ? + __NR_ia32_restart_syscall : + __NR_restart_syscall; + regs->rip -= 2; +- } +- } +- return 0; +-} +- +-void do_notify_resume(struct pt_regs *regs, sigset_t *oldset, __u32 thread_info_flags) ++ break; ++ } ++ } ++ ++ /* if there's no signal to deliver, we just put the saved sigmask ++ back. */ ++ if (test_thread_flag(TIF_RESTORE_SIGMASK)) { ++ clear_thread_flag(TIF_RESTORE_SIGMASK); ++ sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); ++ } ++} ++ ++void ++do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) + { + #ifdef DEBUG_SIG + printk("do_notify_resume flags:%x rip:%lx rsp:%lx caller:%lx pending:%lx\n", +@@ -491,8 +479,8 @@ void do_notify_resume(struct pt_regs *re + } + + /* deal with pending signal delivery */ +- if (thread_info_flags & _TIF_SIGPENDING) +- do_signal(regs,oldset); ++ if (thread_info_flags & (_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK)) ++ do_signal(regs); + } + + void signal_fault(struct pt_regs *regs, void __user *frame, char *where) +diff -r fedc10fba9f1 -r 748cd890ea7f include/asm-x86_64/signal.h +--- a/include/asm-x86_64/signal.h Tue Jan 08 09:55:29 2008 +0000 ++++ b/include/asm-x86_64/signal.h Wed Jan 09 22:12:04 2008 +0000 +@@ -22,10 +22,6 @@ typedef struct { + typedef struct { + unsigned long sig[_NSIG_WORDS]; + } sigset_t; +- +- +-struct pt_regs; +-asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset); + + + #else +diff -r fedc10fba9f1 -r 748cd890ea7f include/asm-x86_64/thread_info.h +--- a/include/asm-x86_64/thread_info.h Tue Jan 08 09:55:29 2008 +0000 ++++ b/include/asm-x86_64/thread_info.h Wed Jan 09 22:12:04 2008 +0000 +@@ -114,6 +114,7 @@ static inline struct thread_info *stack_ + #define TIF_IRET 5 /* force IRET */ + #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ + #define TIF_SECCOMP 8 /* secure computing */ ++#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */ + /* 16 free */ + #define TIF_IA32 17 /* 32bit process */ + #define TIF_FORK 18 /* ret_from_fork */ +@@ -128,6 +129,7 @@ static inline struct thread_info *stack_ + #define _TIF_IRET (1<<TIF_IRET) + #define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) + #define _TIF_SECCOMP (1<<TIF_SECCOMP) ++#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) + #define _TIF_IA32 (1<<TIF_IA32) + #define _TIF_FORK (1<<TIF_FORK) + #define _TIF_ABI_PENDING (1<<TIF_ABI_PENDING) +diff -r fedc10fba9f1 -r 748cd890ea7f include/asm-x86_64/unistd.h +--- a/include/asm-x86_64/unistd.h Tue Jan 08 09:55:29 2008 +0000 ++++ b/include/asm-x86_64/unistd.h Wed Jan 09 22:12:04 2008 +0000 +@@ -658,6 +658,7 @@ do { \ + #define __ARCH_WANT_SYS_SIGPENDING + #define __ARCH_WANT_SYS_SIGPROCMASK + #define __ARCH_WANT_SYS_RT_SIGACTION ++#define __ARCH_WANT_SYS_RT_SIGSUSPEND + #define __ARCH_WANT_SYS_TIME + #define __ARCH_WANT_COMPAT_SYS_TIME + diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18.8/linux-2.6.18-xen-376-353802ec1caf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.18.8/linux-2.6.18-xen-376-353802ec1caf Thu Jan 17 15:05:38 2008 +0000 @@ -0,0 +1,40 @@ +# HG changeset patch +# User Keir Fraser <keir.fraser@xxxxxxxxxx> +# Date 1199916752 0 +# Node ID 353802ec1caf399143e48713a04cedd37a106347 +# Parent 748cd890ea7f56752311e519e80eece9d850c01a +x86_64: Add ppoll/pselect syscalls (from upstream Linux) + +Needed TIF_RESTORE_SIGMASK first + +Signed-off-by: Andi Kleen <ak@xxxxxxx> + +diff -r 748cd890ea7f -r 353802ec1caf arch/x86_64/ia32/ia32entry.S +--- a/arch/x86_64/ia32/ia32entry.S Wed Jan 09 22:12:04 2008 +0000 ++++ b/arch/x86_64/ia32/ia32entry.S Wed Jan 09 22:12:32 2008 +0000 +@@ -703,8 +703,8 @@ ia32_sys_call_table: + .quad sys_readlinkat /* 305 */ + .quad sys_fchmodat + .quad sys_faccessat +- .quad quiet_ni_syscall /* pselect6 for now */ +- .quad quiet_ni_syscall /* ppoll for now */ ++ .quad compat_sys_pselect6 ++ .quad compat_sys_ppoll + .quad sys_unshare /* 310 */ + .quad compat_sys_set_robust_list + .quad compat_sys_get_robust_list +diff -r 748cd890ea7f -r 353802ec1caf include/asm-x86_64/unistd.h +--- a/include/asm-x86_64/unistd.h Wed Jan 09 22:12:04 2008 +0000 ++++ b/include/asm-x86_64/unistd.h Wed Jan 09 22:12:32 2008 +0000 +@@ -600,9 +600,9 @@ __SYSCALL(__NR_fchmodat, sys_fchmodat) + #define __NR_faccessat 269 + __SYSCALL(__NR_faccessat, sys_faccessat) + #define __NR_pselect6 270 +-__SYSCALL(__NR_pselect6, sys_ni_syscall) /* for now */ ++__SYSCALL(__NR_pselect6, sys_pselect6) + #define __NR_ppoll 271 +-__SYSCALL(__NR_ppoll, sys_ni_syscall) /* for now */ ++__SYSCALL(__NR_ppoll, sys_ppoll) + #define __NR_unshare 272 + __SYSCALL(__NR_unshare, sys_unshare) + #define __NR_set_robust_list 273 diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18.8/linux-2.6.19-rc1-kexec-move_segment_code-i386.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.18.8/linux-2.6.19-rc1-kexec-move_segment_code-i386.patch Thu Jan 17 15:05:38 2008 +0000 @@ -0,0 +1,151 @@ +diff -pruN ../orig-linux-2.6.18/arch/i386/kernel/machine_kexec.c ./arch/i386/kernel/machine_kexec.c +--- ../orig-linux-2.6.18/arch/i386/kernel/machine_kexec.c 2007-01-12 16:03:23.000000000 +0000 ++++ ./arch/i386/kernel/machine_kexec.c 2007-01-12 16:03:37.000000000 +0000 +@@ -28,48 +28,6 @@ static u32 kexec_pmd1[1024] PAGE_ALIGNED + static u32 kexec_pte0[1024] PAGE_ALIGNED; + static u32 kexec_pte1[1024] PAGE_ALIGNED; + +-static void set_idt(void *newidt, __u16 limit) +-{ +- struct Xgt_desc_struct curidt; +- +- /* ia32 supports unaliged loads & stores */ +- curidt.size = limit; +- curidt.address = (unsigned long)newidt; +- +- load_idt(&curidt); +-}; +- +- +-static void set_gdt(void *newgdt, __u16 limit) +-{ +- struct Xgt_desc_struct curgdt; +- +- /* ia32 supports unaligned loads & stores */ +- curgdt.size = limit; +- curgdt.address = (unsigned long)newgdt; +- +- load_gdt(&curgdt); +-}; +- +-static void load_segments(void) +-{ +-#define __STR(X) #X +-#define STR(X) __STR(X) +- +- __asm__ __volatile__ ( +- "\tljmp $"STR(__KERNEL_CS)",$1f\n" +- "\t1:\n" +- "\tmovl $"STR(__KERNEL_DS)",%%eax\n" +- "\tmovl %%eax,%%ds\n" +- "\tmovl %%eax,%%es\n" +- "\tmovl %%eax,%%fs\n" +- "\tmovl %%eax,%%gs\n" +- "\tmovl %%eax,%%ss\n" +- ::: "eax", "memory"); +-#undef STR +-#undef __STR +-} +- + /* + * A architecture hook called to validate the + * proposed image and prepare the control pages +@@ -126,23 +84,6 @@ NORET_TYPE void machine_kexec(struct kim + page_list[PA_PTE_1] = __pa(kexec_pte1); + page_list[VA_PTE_1] = (unsigned long)kexec_pte1; + +- /* The segment registers are funny things, they have both a +- * visible and an invisible part. Whenever the visible part is +- * set to a specific selector, the invisible part is loaded +- * with from a table in memory. At no other time is the +- * descriptor table in memory accessed. +- * +- * I take advantage of this here by force loading the +- * segments, before I zap the gdt with an invalid value. +- */ +- load_segments(); +- /* The gdt & idt are now invalid. +- * If you want to load them you must set up your own idt & gdt. +- */ +- set_gdt(phys_to_virt(0),0); +- set_idt(phys_to_virt(0),0); +- +- /* now call it */ + relocate_kernel((unsigned long)image->head, (unsigned long)page_list, + image->start, cpu_has_pae); + } +diff -pruN ../orig-linux-2.6.18/arch/i386/kernel/relocate_kernel.S ./arch/i386/kernel/relocate_kernel.S +--- ../orig-linux-2.6.18/arch/i386/kernel/relocate_kernel.S 2007-01-12 16:03:23.000000000 +0000 ++++ ./arch/i386/kernel/relocate_kernel.S 2007-01-12 16:03:37.000000000 +0000 +@@ -154,14 +154,45 @@ relocate_new_kernel: + movl PTR(PA_PGD)(%ebp), %eax + movl %eax, %cr3 + ++ /* setup idt */ ++ movl %edi, %eax ++ addl $(idt_48 - relocate_kernel), %eax ++ lidtl (%eax) ++ ++ /* setup gdt */ ++ movl %edi, %eax ++ addl $(gdt - relocate_kernel), %eax ++ movl %edi, %esi ++ addl $((gdt_48 - relocate_kernel) + 2), %esi ++ movl %eax, (%esi) ++ ++ movl %edi, %eax ++ addl $(gdt_48 - relocate_kernel), %eax ++ lgdtl (%eax) ++ ++ /* setup data segment registers */ ++ mov $(gdt_ds - gdt), %eax ++ mov %eax, %ds ++ mov %eax, %es ++ mov %eax, %fs ++ mov %eax, %gs ++ mov %eax, %ss ++ + /* setup a new stack at the end of the physical control page */ + lea 4096(%edi), %esp + +- /* jump to identity mapped page */ +- movl %edi, %eax +- addl $(identity_mapped - relocate_kernel), %eax +- pushl %eax +- ret ++ /* load new code segment and jump to identity mapped page */ ++ movl %edi, %esi ++ xorl %eax, %eax ++ pushl %eax ++ pushl %esi ++ pushl %eax ++ movl $(gdt_cs - gdt), %eax ++ pushl %eax ++ movl %edi, %eax ++ addl $(identity_mapped - relocate_kernel),%eax ++ pushl %eax ++ iretl + + identity_mapped: + /* store the start address on the stack */ +@@ -250,3 +281,20 @@ identity_mapped: + xorl %edi, %edi + xorl %ebp, %ebp + ret ++ ++ .align 16 ++gdt: ++ .quad 0x0000000000000000 /* NULL descriptor */ ++gdt_cs: ++ .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ ++gdt_ds: ++ .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ ++gdt_end: ++ ++gdt_48: ++ .word gdt_end - gdt - 1 /* limit */ ++ .long 0 /* base - filled in by code above */ ++ ++idt_48: ++ .word 0 /* limit */ ++ .long 0 /* base */ diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18.8/linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.18.8/linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch Thu Jan 17 15:05:38 2008 +0000 @@ -0,0 +1,143 @@ +diff -pruN ../orig-linux-2.6.18/arch/x86_64/kernel/machine_kexec.c ./arch/x86_64/kernel/machine_kexec.c +--- ../orig-linux-2.6.18/arch/x86_64/kernel/machine_kexec.c 2007-01-12 16:03:49.000000000 +0000 ++++ ./arch/x86_64/kernel/machine_kexec.c 2007-01-12 16:04:02.000000000 +0000 +@@ -112,47 +112,6 @@ static int init_pgtable(struct kimage *i + return init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT); + } + +-static void set_idt(void *newidt, u16 limit) +-{ +- struct desc_ptr curidt; +- +- /* x86-64 supports unaliged loads & stores */ +- curidt.size = limit; +- curidt.address = (unsigned long)newidt; +- +- __asm__ __volatile__ ( +- "lidtq %0\n" +- : : "m" (curidt) +- ); +-}; +- +- +-static void set_gdt(void *newgdt, u16 limit) +-{ +- struct desc_ptr curgdt; +- +- /* x86-64 supports unaligned loads & stores */ +- curgdt.size = limit; +- curgdt.address = (unsigned long)newgdt; +- +- __asm__ __volatile__ ( +- "lgdtq %0\n" +- : : "m" (curgdt) +- ); +-}; +- +-static void load_segments(void) +-{ +- __asm__ __volatile__ ( +- "\tmovl %0,%%ds\n" +- "\tmovl %0,%%es\n" +- "\tmovl %0,%%ss\n" +- "\tmovl %0,%%fs\n" +- "\tmovl %0,%%gs\n" +- : : "a" (__KERNEL_DS) : "memory" +- ); +-} +- + int machine_kexec_prepare(struct kimage *image) + { + unsigned long start_pgtable; +@@ -209,23 +168,6 @@ NORET_TYPE void machine_kexec(struct kim + page_list[PA_TABLE_PAGE] = + (unsigned long)__pa(page_address(image->control_code_page)); + +- /* The segment registers are funny things, they have both a +- * visible and an invisible part. Whenever the visible part is +- * set to a specific selector, the invisible part is loaded +- * with from a table in memory. At no other time is the +- * descriptor table in memory accessed. +- * +- * I take advantage of this here by force loading the +- * segments, before I zap the gdt with an invalid value. +- */ +- load_segments(); +- /* The gdt & idt are now invalid. +- * If you want to load them you must set up your own idt & gdt. +- */ +- set_gdt(phys_to_virt(0),0); +- set_idt(phys_to_virt(0),0); +- +- /* now call it */ + relocate_kernel((unsigned long)image->head, (unsigned long)page_list, + image->start); + } +diff -pruN ../orig-linux-2.6.18/arch/x86_64/kernel/relocate_kernel.S ./arch/x86_64/kernel/relocate_kernel.S +--- ../orig-linux-2.6.18/arch/x86_64/kernel/relocate_kernel.S 2007-01-12 16:03:49.000000000 +0000 ++++ ./arch/x86_64/kernel/relocate_kernel.S 2007-01-12 16:04:02.000000000 +0000 +@@ -159,13 +159,39 @@ relocate_new_kernel: + movq PTR(PA_PGD)(%rsi), %r9 + movq %r9, %cr3 + ++ /* setup idt */ ++ movq %r8, %rax ++ addq $(idt_80 - relocate_kernel), %rax ++ lidtq (%rax) ++ ++ /* setup gdt */ ++ movq %r8, %rax ++ addq $(gdt - relocate_kernel), %rax ++ movq %r8, %r9 ++ addq $((gdt_80 - relocate_kernel) + 2), %r9 ++ movq %rax, (%r9) ++ ++ movq %r8, %rax ++ addq $(gdt_80 - relocate_kernel), %rax ++ lgdtq (%rax) ++ ++ /* setup data segment registers */ ++ xorl %eax, %eax ++ movl %eax, %ds ++ movl %eax, %es ++ movl %eax, %fs ++ movl %eax, %gs ++ movl %eax, %ss ++ + /* setup a new stack at the end of the physical control page */ + lea 4096(%r8), %rsp + +- /* jump to identity mapped page */ +- addq $(identity_mapped - relocate_kernel), %r8 +- pushq %r8 +- ret ++ /* load new code segment and jump to identity mapped page */ ++ movq %r8, %rax ++ addq $(identity_mapped - relocate_kernel), %rax ++ pushq $(gdt_cs - gdt) ++ pushq %rax ++ lretq + + identity_mapped: + /* store the start address on the stack */ +@@ -272,5 +298,19 @@ identity_mapped: + xorq %r13, %r13 + xorq %r14, %r14 + xorq %r15, %r15 +- + ret ++ ++ .align 16 ++gdt: ++ .quad 0x0000000000000000 /* NULL descriptor */ ++gdt_cs: ++ .quad 0x00af9a000000ffff ++gdt_end: ++ ++gdt_80: ++ .word gdt_end - gdt - 1 /* limit */ ++ .quad 0 /* base - filled in by code above */ ++ ++idt_80: ++ .word 0 /* limit */ ++ .quad 0 /* base */ diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18.8/net-csum.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.18.8/net-csum.patch Thu Jan 17 15:05:38 2008 +0000 @@ -0,0 +1,63 @@ +diff -pruN ../orig-linux-2.6.18/net/ipv4/netfilter/ip_nat_proto_tcp.c ./net/ipv4/netfilter/ip_nat_proto_tcp.c +--- ../orig-linux-2.6.18/net/ipv4/netfilter/ip_nat_proto_tcp.c 2006-09-20 04:42:06.000000000 +0100 ++++ ./net/ipv4/netfilter/ip_nat_proto_tcp.c 2007-01-12 16:08:53.000000000 +0000 +@@ -129,7 +129,12 @@ tcp_manip_pkt(struct sk_buff **pskb, + if (hdrsize < sizeof(*hdr)) + return 1; + +- hdr->check = ip_nat_cheat_check(~oldip, newip, ++#ifdef CONFIG_XEN ++ if ((*pskb)->proto_csum_blank) ++ hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check); ++ else ++#endif ++ hdr->check = ip_nat_cheat_check(~oldip, newip, + ip_nat_cheat_check(oldport ^ 0xFFFF, + newport, + hdr->check)); +diff -pruN ../orig-linux-2.6.18/net/ipv4/netfilter/ip_nat_proto_udp.c ./net/ipv4/netfilter/ip_nat_proto_udp.c +--- ../orig-linux-2.6.18/net/ipv4/netfilter/ip_nat_proto_udp.c 2006-09-20 04:42:06.000000000 +0100 ++++ ./net/ipv4/netfilter/ip_nat_proto_udp.c 2007-01-12 16:08:53.000000000 +0000 +@@ -113,11 +113,17 @@ udp_manip_pkt(struct sk_buff **pskb, + newport = tuple->dst.u.udp.port; + portptr = &hdr->dest; + } +- if (hdr->check) /* 0 is a special case meaning no checksum */ +- hdr->check = ip_nat_cheat_check(~oldip, newip, ++ if (hdr->check) { /* 0 is a special case meaning no checksum */ ++#ifdef CONFIG_XEN ++ if ((*pskb)->proto_csum_blank) ++ hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check); ++ else ++#endif ++ hdr->check = ip_nat_cheat_check(~oldip, newip, + ip_nat_cheat_check(*portptr ^ 0xFFFF, + newport, + hdr->check)); ++ } + *portptr = newport; + return 1; + } +diff -pruN ../orig-linux-2.6.18/net/ipv4/xfrm4_output.c ./net/ipv4/xfrm4_output.c +--- ../orig-linux-2.6.18/net/ipv4/xfrm4_output.c 2006-09-20 04:42:06.000000000 +0100 ++++ ./net/ipv4/xfrm4_output.c 2007-01-12 17:38:34.000000000 +0000 +@@ -18,6 +18,8 @@ + #include <net/xfrm.h> + #include <net/icmp.h> + ++extern int skb_checksum_setup(struct sk_buff *skb); ++ + static int xfrm4_tunnel_check_size(struct sk_buff *skb) + { + int mtu, ret = 0; +@@ -48,6 +50,10 @@ static int xfrm4_output_one(struct sk_bu + struct xfrm_state *x = dst->xfrm; + int err; + ++ err = skb_checksum_setup(skb); ++ if (err) ++ goto error_nolock; ++ + if (skb->ip_summed == CHECKSUM_HW) { + err = skb_checksum_help(skb, 0); + if (err) diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18.8/net-gso-5-rcv-mss.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.18.8/net-gso-5-rcv-mss.patch Thu Jan 17 15:05:38 2008 +0000 @@ -0,0 +1,12 @@ +diff -pruN ../orig-linux-2.6.18/net/ipv4/tcp_input.c ./net/ipv4/tcp_input.c +--- ../orig-linux-2.6.18/net/ipv4/tcp_input.c 2006-09-20 04:42:06.000000000 +0100 ++++ ./net/ipv4/tcp_input.c 2007-01-12 18:10:16.000000000 +0000 +@@ -127,7 +127,7 @@ static void tcp_measure_rcv_mss(struct s + /* skb->len may jitter because of SACKs, even if peer + * sends good full-sized frames. + */ +- len = skb->len; ++ len = skb_shinfo(skb)->gso_size ?: skb->len; + if (len >= icsk->icsk_ack.rcv_mss) { + icsk->icsk_ack.rcv_mss = len; + } else { diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18.8/pmd-shared.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.18.8/pmd-shared.patch Thu Jan 17 15:05:38 2008 +0000 @@ -0,0 +1,111 @@ +diff -pruN ../orig-linux-2.6.18/arch/i386/mm/pageattr.c ./arch/i386/mm/pageattr.c +--- ../orig-linux-2.6.18/arch/i386/mm/pageattr.c 2006-09-20 04:42:06.000000000 +0100 ++++ ./arch/i386/mm/pageattr.c 2007-01-12 18:11:06.000000000 +0000 +@@ -84,7 +84,7 @@ static void set_pmd_pte(pte_t *kpte, uns + unsigned long flags; + + set_pte_atomic(kpte, pte); /* change init_mm */ +- if (PTRS_PER_PMD > 1) ++ if (HAVE_SHARED_KERNEL_PMD) + return; + + spin_lock_irqsave(&pgd_lock, flags); +diff -pruN ../orig-linux-2.6.18/arch/i386/mm/pgtable.c ./arch/i386/mm/pgtable.c +--- ../orig-linux-2.6.18/arch/i386/mm/pgtable.c 2006-09-20 04:42:06.000000000 +0100 ++++ ./arch/i386/mm/pgtable.c 2007-01-12 18:11:06.000000000 +0000 +@@ -214,9 +214,10 @@ void pgd_ctor(void *pgd, kmem_cache_t *c + spin_lock_irqsave(&pgd_lock, flags); + } + +- clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, +- swapper_pg_dir + USER_PTRS_PER_PGD, +- KERNEL_PGD_PTRS); ++ if (PTRS_PER_PMD == 1 || HAVE_SHARED_KERNEL_PMD) ++ clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, ++ swapper_pg_dir + USER_PTRS_PER_PGD, ++ KERNEL_PGD_PTRS); + if (PTRS_PER_PMD > 1) + return; + +@@ -248,6 +249,30 @@ pgd_t *pgd_alloc(struct mm_struct *mm) + goto out_oom; + set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); + } ++ ++ if (!HAVE_SHARED_KERNEL_PMD) { ++ unsigned long flags; ++ ++ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { ++ pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); ++ if (!pmd) ++ goto out_oom; ++ set_pgd(&pgd[USER_PTRS_PER_PGD], __pgd(1 + __pa(pmd))); ++ } ++ ++ spin_lock_irqsave(&pgd_lock, flags); ++ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { ++ unsigned long v = (unsigned long)i << PGDIR_SHIFT; ++ pgd_t *kpgd = pgd_offset_k(v); ++ pud_t *kpud = pud_offset(kpgd, v); ++ pmd_t *kpmd = pmd_offset(kpud, v); ++ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); ++ memcpy(pmd, kpmd, PAGE_SIZE); ++ } ++ pgd_list_add(pgd); ++ spin_unlock_irqrestore(&pgd_lock, flags); ++ } ++ + return pgd; + + out_oom: +@@ -262,9 +287,23 @@ void pgd_free(pgd_t *pgd) + int i; + + /* in the PAE case user pgd entries are overwritten before usage */ +- if (PTRS_PER_PMD > 1) +- for (i = 0; i < USER_PTRS_PER_PGD; ++i) +- kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); ++ if (PTRS_PER_PMD > 1) { ++ for (i = 0; i < USER_PTRS_PER_PGD; ++i) { ++ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); ++ kmem_cache_free(pmd_cache, pmd); ++ } ++ if (!HAVE_SHARED_KERNEL_PMD) { ++ unsigned long flags; ++ spin_lock_irqsave(&pgd_lock, flags); ++ pgd_list_del(pgd); ++ spin_unlock_irqrestore(&pgd_lock, flags); ++ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { ++ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); ++ memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); ++ kmem_cache_free(pmd_cache, pmd); ++ } ++ } ++ } + /* in the non-PAE case, free_pgtables() clears user pgd entries */ + kmem_cache_free(pgd_cache, pgd); + } +diff -pruN ../orig-linux-2.6.18/include/asm-i386/pgtable-2level-defs.h ./include/asm-i386/pgtable-2level-defs.h +--- ../orig-linux-2.6.18/include/asm-i386/pgtable-2level-defs.h 2006-09-20 04:42:06.000000000 +0100 ++++ ./include/asm-i386/pgtable-2level-defs.h 2007-01-12 18:11:06.000000000 +0000 +@@ -1,6 +1,8 @@ + #ifndef _I386_PGTABLE_2LEVEL_DEFS_H + #define _I386_PGTABLE_2LEVEL_DEFS_H + ++#define HAVE_SHARED_KERNEL_PMD 0 ++ + /* + * traditional i386 two-level paging structure: + */ +diff -pruN ../orig-linux-2.6.18/include/asm-i386/pgtable-3level-defs.h ./include/asm-i386/pgtable-3level-defs.h +--- ../orig-linux-2.6.18/include/asm-i386/pgtable-3level-defs.h 2006-09-20 04:42:06.000000000 +0100 ++++ ./include/asm-i386/pgtable-3level-defs.h 2007-01-12 18:11:06.000000000 +0000 +@@ -1,6 +1,8 @@ + #ifndef _I386_PGTABLE_3LEVEL_DEFS_H + #define _I386_PGTABLE_3LEVEL_DEFS_H + ++#define HAVE_SHARED_KERNEL_PMD 1 ++ + /* + * PGDIR_SHIFT determines what a top-level page table entry can map + */ diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18.8/rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.18.8/rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch Thu Jan 17 15:05:38 2008 +0000 @@ -0,0 +1,30 @@ +diff -pruN ../orig-linux-2.6.18/arch/i386/kernel/entry.S ./arch/i386/kernel/entry.S +--- ../orig-linux-2.6.18/arch/i386/kernel/entry.S 2006-09-20 04:42:06.000000000 +0100 ++++ ./arch/i386/kernel/entry.S 2007-01-12 18:12:31.000000000 +0000 +@@ -269,7 +269,7 @@ ENTRY(sysenter_entry) + CFI_STARTPROC simple + CFI_DEF_CFA esp, 0 + CFI_REGISTER esp, ebp +- movl TSS_sysenter_esp0(%esp),%esp ++ movl SYSENTER_stack_esp0(%esp),%esp + sysenter_past_esp: + /* + * No need to follow this irqs on/off section: the syscall +@@ -689,7 +689,7 @@ device_not_available_emulate: + * that sets up the real kernel stack. Check here, since we can't + * allow the wrong stack to be used. + * +- * "TSS_sysenter_esp0+12" is because the NMI/debug handler will have ++ * "SYSENTER_stack_esp0+12" is because the NMI/debug handler will have + * already pushed 3 words if it hits on the sysenter instruction: + * eflags, cs and eip. + * +@@ -701,7 +701,7 @@ device_not_available_emulate: + cmpw $__KERNEL_CS,4(%esp); \ + jne ok; \ + label: \ +- movl TSS_sysenter_esp0+offset(%esp),%esp; \ ++ movl SYSENTER_stack_esp0+offset(%esp),%esp; \ + pushfl; \ + pushl $__KERNEL_CS; \ + pushl $sysenter_past_esp diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18.8/series --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.18.8/series Thu Jan 17 15:05:38 2008 +0000 @@ -0,0 +1,25 @@ +git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch +linux-2.6.19-rc1-kexec-move_segment_code-i386.patch +git-4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f.patch +linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch +blktap-aio-16_03_06.patch +fix-ide-cd-pio-mode.patch +i386-mach-io-check-nmi.patch +net-csum.patch +net-gso-5-rcv-mss.patch +pmd-shared.patch +rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch +xen-hotplug.patch +xenoprof-generic.patch +x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch +x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch +git-dbaab49f92ff6ae6255762a948375e4036cbdbd2.patch +x86-elfnote-as-preprocessor-macro.patch +fixaddr-top.patch +git-c06cb8b1c4d25e5b4d7a2d7c2462619de1e0dbc4.patch +softlockup-no-idle-hz.patch +allow-i386-crash-kernels-to-handle-x86_64-dumps.patch +allow-i386-crash-kernels-to-handle-x86_64-dumps-fix.patch +git-5ee7737379b1d7f0c977c0f1661fbaf01a8d4721.patch +linux-2.6.18-xen-375-748cd890ea7f +linux-2.6.18-xen-376-353802ec1caf diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18.8/softlockup-no-idle-hz.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.18.8/softlockup-no-idle-hz.patch Thu Jan 17 15:05:38 2008 +0000 @@ -0,0 +1,56 @@ +diff -pruN ../orig-linux-2.6.18/include/linux/sched.h ./include/linux/sched.h +--- ../orig-linux-2.6.18/include/linux/sched.h 2006-09-20 04:42:06.000000000 +0100 ++++ ./include/linux/sched.h 2007-02-07 01:10:24.000000000 +0000 +@@ -211,10 +211,15 @@ extern void update_process_times(int use + extern void scheduler_tick(void); + + #ifdef CONFIG_DETECT_SOFTLOCKUP ++extern unsigned long softlockup_get_next_event(void); + extern void softlockup_tick(void); + extern void spawn_softlockup_task(void); + extern void touch_softlockup_watchdog(void); + #else ++static inline unsigned long softlockup_get_next_event(void) ++{ ++ return MAX_JIFFY_OFFSET; ++} + static inline void softlockup_tick(void) + { + } +diff -pruN ../orig-linux-2.6.18/kernel/softlockup.c ./kernel/softlockup.c +--- ../orig-linux-2.6.18/kernel/softlockup.c 2006-09-20 04:42:06.000000000 +0100 ++++ ./kernel/softlockup.c 2007-02-07 01:53:22.000000000 +0000 +@@ -40,6 +40,19 @@ void touch_softlockup_watchdog(void) + } + EXPORT_SYMBOL(touch_softlockup_watchdog); + ++unsigned long softlockup_get_next_event(void) ++{ ++ int this_cpu = smp_processor_id(); ++ unsigned long touch_timestamp = per_cpu(touch_timestamp, this_cpu); ++ ++ if (per_cpu(print_timestamp, this_cpu) == touch_timestamp || ++ did_panic || ++ !per_cpu(watchdog_task, this_cpu)) ++ return MAX_JIFFY_OFFSET; ++ ++ return max_t(long, 0, touch_timestamp + HZ - jiffies); ++} ++ + /* + * This callback runs from the timer interrupt, and checks + * whether the watchdog thread has hung or not: +diff -pruN ../orig-linux-2.6.18/kernel/timer.c ./kernel/timer.c +--- ../orig-linux-2.6.18/kernel/timer.c 2006-09-20 04:42:06.000000000 +0100 ++++ ./kernel/timer.c 2007-02-07 01:29:34.000000000 +0000 +@@ -485,7 +485,9 @@ unsigned long next_timer_interrupt(void) + if (hr_expires < 3) + return hr_expires + jiffies; + } +- hr_expires += jiffies; ++ hr_expires = min_t(unsigned long, ++ softlockup_get_next_event(), ++ hr_expires) + jiffies; + + base = __get_cpu_var(tvec_bases); + spin_lock(&base->lock); diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18.8/x86-elfnote-as-preprocessor-macro.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.18.8/x86-elfnote-as-preprocessor-macro.patch Thu Jan 17 15:05:38 2008 +0000 @@ -0,0 +1,56 @@ +diff -pruN ../orig-linux-2.6.18/include/linux/elfnote.h ./include/linux/elfnote.h +--- ../orig-linux-2.6.18/include/linux/elfnote.h 2007-01-12 18:19:44.000000000 +0000 ++++ ./include/linux/elfnote.h 2007-01-12 18:21:02.000000000 +0000 +@@ -31,22 +31,38 @@ + /* + * Generate a structure with the same shape as Elf{32,64}_Nhdr (which + * turn out to be the same size and shape), followed by the name and +- * desc data with appropriate padding. The 'desc' argument includes +- * the assembler pseudo op defining the type of the data: .asciz +- * "hello, world" ++ * desc data with appropriate padding. The 'desctype' argument is the ++ * assembler pseudo op defining the type of the data e.g. .asciz while ++ * 'descdata' is the data itself e.g. "hello, world". ++ * ++ * e.g. ELFNOTE(XYZCo, 42, .asciz, "forty-two") ++ * ELFNOTE(XYZCo, 12, .long, 0xdeadbeef) + */ +-.macro ELFNOTE name type desc:vararg +-.pushsection ".note.\name" +- .align 4 +- .long 2f - 1f /* namesz */ +- .long 4f - 3f /* descsz */ +- .long \type +-1:.asciz "\name" +-2:.align 4 +-3:\desc +-4:.align 4 ++#ifdef __STDC__ ++#define ELFNOTE(name, type, desctype, descdata...) \ ++.pushsection .note.name ; \ ++ .align 4 ; \ ++ .long 2f - 1f /* namesz */ ; \ ++ .long 4f - 3f /* descsz */ ; \ ++ .long type ; \ ++1:.asciz #name ; \ ++2:.align 4 ; \ ++3:desctype descdata ; \ ++4:.align 4 ; \ + .popsection +-.endm ++#else /* !__STDC__, i.e. -traditional */ ++#define ELFNOTE(name, type, desctype, descdata) \ ++.pushsection .note.name ; \ ++ .align 4 ; \ ++ .long 2f - 1f /* namesz */ ; \ ++ .long 4f - 3f /* descsz */ ; \ ++ .long type ; \ ++1:.asciz "name" ; \ ++2:.align 4 ; \ ++3:desctype descdata ; \ ++4:.align 4 ; \ ++.popsection ++#endif /* __STDC__ */ + #else /* !__ASSEMBLER__ */ + #include <linux/elf.h> + /* diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18.8/x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.18.8/x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch Thu Jan 17 15:05:38 2008 +0000 @@ -0,0 +1,143 @@ +diff -pruN ../orig-linux-2.6.18/arch/i386/kernel/vmlinux.lds.S ./arch/i386/kernel/vmlinux.lds.S +--- ../orig-linux-2.6.18/arch/i386/kernel/vmlinux.lds.S 2006-09-20 04:42:06.000000000 +0100 ++++ ./arch/i386/kernel/vmlinux.lds.S 2007-01-12 18:19:44.000000000 +0000 +@@ -13,6 +13,12 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386" + OUTPUT_ARCH(i386) + ENTRY(phys_startup_32) + jiffies = jiffies_64; ++ ++PHDRS { ++ text PT_LOAD FLAGS(5); /* R_E */ ++ data PT_LOAD FLAGS(7); /* RWE */ ++ note PT_NOTE FLAGS(4); /* R__ */ ++} + SECTIONS + { + . = __KERNEL_START; +@@ -26,7 +32,7 @@ SECTIONS + KPROBES_TEXT + *(.fixup) + *(.gnu.warning) +- } = 0x9090 ++ } :text = 0x9090 + + _etext = .; /* End of text section */ + +@@ -48,7 +54,7 @@ SECTIONS + .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ + *(.data) + CONSTRUCTORS +- } ++ } :data + + . = ALIGN(4096); + __nosave_begin = .; +@@ -184,4 +190,6 @@ SECTIONS + STABS_DEBUG + + DWARF_DEBUG ++ ++ NOTES + } +diff -pruN ../orig-linux-2.6.18/include/asm-generic/vmlinux.lds.h ./include/asm-generic/vmlinux.lds.h +--- ../orig-linux-2.6.18/include/asm-generic/vmlinux.lds.h 2006-09-20 04:42:06.000000000 +0100 ++++ ./include/asm-generic/vmlinux.lds.h 2007-01-12 18:19:44.000000000 +0000 +@@ -194,3 +194,6 @@ + .stab.index 0 : { *(.stab.index) } \ + .stab.indexstr 0 : { *(.stab.indexstr) } \ + .comment 0 : { *(.comment) } ++ ++#define NOTES \ ++ .notes : { *(.note.*) } :note +diff -pruN ../orig-linux-2.6.18/include/linux/elfnote.h ./include/linux/elfnote.h +--- ../orig-linux-2.6.18/include/linux/elfnote.h 1970-01-01 01:00:00.000000000 +0100 ++++ ./include/linux/elfnote.h 2007-01-12 18:19:44.000000000 +0000 +@@ -0,0 +1,88 @@ ++#ifndef _LINUX_ELFNOTE_H ++#define _LINUX_ELFNOTE_H ++/* ++ * Helper macros to generate ELF Note structures, which are put into a ++ * PT_NOTE segment of the final vmlinux image. These are useful for ++ * including name-value pairs of metadata into the kernel binary (or ++ * modules?) for use by external programs. ++ * ++ * Each note has three parts: a name, a type and a desc. The name is ++ * intended to distinguish the note's originator, so it would be a ++ * company, project, subsystem, etc; it must be in a suitable form for ++ * use in a section name. The type is an integer which is used to tag ++ * the data, and is considered to be within the "name" namespace (so ++ * "FooCo"'s type 42 is distinct from "BarProj"'s type 42). The ++ * "desc" field is the actual data. There are no constraints on the ++ * desc field's contents, though typically they're fairly small. ++ * ++ * All notes from a given NAME are put into a section named ++ * .note.NAME. When the kernel image is finally linked, all the notes ++ * are packed into a single .notes section, which is mapped into the ++ * PT_NOTE segment. Because notes for a given name are grouped into ++ * the same section, they'll all be adjacent the output file. ++ * ++ * This file defines macros for both C and assembler use. Their ++ * syntax is slightly different, but they're semantically similar. ++ * ++ * See the ELF specification for more detail about ELF notes. ++ */ ++ ++#ifdef __ASSEMBLER__ ++/* ++ * Generate a structure with the same shape as Elf{32,64}_Nhdr (which ++ * turn out to be the same size and shape), followed by the name and ++ * desc data with appropriate padding. The 'desc' argument includes ++ * the assembler pseudo op defining the type of the data: .asciz ++ * "hello, world" ++ */ ++.macro ELFNOTE name type desc:vararg ++.pushsection ".note.\name" ++ .align 4 ++ .long 2f - 1f /* namesz */ ++ .long 4f - 3f /* descsz */ ++ .long \type ++1:.asciz "\name" ++2:.align 4 ++3:\desc ++4:.align 4 ++.popsection ++.endm ++#else /* !__ASSEMBLER__ */ ++#include <linux/elf.h> ++/* ++ * Use an anonymous structure which matches the shape of ++ * Elf{32,64}_Nhdr, but includes the name and desc data. The size and ++ * type of name and desc depend on the macro arguments. "name" must ++ * be a literal string, and "desc" must be passed by value. You may ++ * only define one note per line, since __LINE__ is used to generate ++ * unique symbols. ++ */ ++#define _ELFNOTE_PASTE(a,b) a##b ++#define _ELFNOTE(size, name, unique, type, desc) \ ++ static const struct { \ ++ struct elf##size##_note _nhdr; \ ++ unsigned char _name[sizeof(name)] \ ++ __attribute__((aligned(sizeof(Elf##size##_Word)))); \ ++ typeof(desc) _desc \ ++ __attribute__((aligned(sizeof(Elf##size##_Word)))); \ ++ } _ELFNOTE_PASTE(_note_, unique) \ ++ __attribute_used__ \ ++ __attribute__((section(".note." name), \ ++ aligned(sizeof(Elf##size##_Word)), \ ++ unused)) = { \ ++ { \ ++ sizeof(name), \ ++ sizeof(desc), \ ++ type, \ ++ }, \ ++ name, \ ++ desc \ ++ } ++#define ELFNOTE(size, name, type, desc) \ ++ _ELFNOTE(size, name, __LINE__, type, desc) ++ ++#define ELFNOTE32(name, type, desc) ELFNOTE(32, name, type, desc) ++#define ELFNOTE64(name, type, desc) ELFNOTE(64, name, type, desc) ++#endif /* __ASSEMBLER__ */ ++ ++#endif /* _LINUX_ELFNOTE_H */ diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18.8/x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.18.8/x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch Thu Jan 17 15:05:38 2008 +0000 @@ -0,0 +1,84 @@ +diff -pruN ../orig-linux-2.6.18/arch/x86_64/kernel/vmlinux.lds.S ./arch/x86_64/kernel/vmlinux.lds.S +--- ../orig-linux-2.6.18/arch/x86_64/kernel/vmlinux.lds.S 2006-09-20 04:42:06.000000000 +0100 ++++ ./arch/x86_64/kernel/vmlinux.lds.S 2007-01-12 18:20:02.000000000 +0000 +@@ -13,6 +13,12 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86 + OUTPUT_ARCH(i386:x86-64) + ENTRY(phys_startup_64) + jiffies_64 = jiffies; ++PHDRS { ++ text PT_LOAD FLAGS(5); /* R_E */ ++ data PT_LOAD FLAGS(7); /* RWE */ ++ user PT_LOAD FLAGS(7); /* RWE */ ++ note PT_NOTE FLAGS(4); /* R__ */ ++} + SECTIONS + { + . = __START_KERNEL; +@@ -31,7 +37,7 @@ SECTIONS + KPROBES_TEXT + *(.fixup) + *(.gnu.warning) +- } = 0x9090 ++ } :text = 0x9090 + /* out-of-line lock text */ + .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET) { *(.text.lock) } + +@@ -57,17 +63,10 @@ SECTIONS + .data : AT(ADDR(.data) - LOAD_OFFSET) { + *(.data) + CONSTRUCTORS +- } ++ } :data + + _edata = .; /* End of data section */ + +- __bss_start = .; /* BSS */ +- .bss : AT(ADDR(.bss) - LOAD_OFFSET) { +- *(.bss.page_aligned) +- *(.bss) +- } +- __bss_stop = .; +- + . = ALIGN(PAGE_SIZE); + . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); + .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { +@@ -89,7 +88,7 @@ SECTIONS + #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) + + . = VSYSCALL_ADDR; +- .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) } ++ .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) } :user + __vsyscall_0 = VSYSCALL_VIRT_ADDR; + + . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); +@@ -132,7 +131,7 @@ SECTIONS + . = ALIGN(8192); /* init_task */ + .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { + *(.data.init_task) +- } ++ } :data + + . = ALIGN(4096); + .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { +@@ -222,6 +221,14 @@ SECTIONS + . = ALIGN(4096); + __nosave_end = .; + ++ __bss_start = .; /* BSS */ ++ . = ALIGN(4096); ++ .bss : AT(ADDR(.bss) - LOAD_OFFSET) { ++ *(.bss.page_aligned) ++ *(.bss) ++ } ++ __bss_stop = .; ++ + _end = . ; + + /* Sections to be discarded */ +@@ -235,4 +242,6 @@ SECTIONS + STABS_DEBUG + + DWARF_DEBUG ++ ++ NOTES + } diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18.8/xen-hotplug.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.18.8/xen-hotplug.patch Thu Jan 17 15:05:38 2008 +0000 @@ -0,0 +1,12 @@ +diff -pruN ../orig-linux-2.6.18/fs/proc/proc_misc.c ./fs/proc/proc_misc.c +--- ../orig-linux-2.6.18/fs/proc/proc_misc.c 2006-09-20 04:42:06.000000000 +0100 ++++ ./fs/proc/proc_misc.c 2007-01-12 18:18:36.000000000 +0000 +@@ -471,7 +471,7 @@ static int show_stat(struct seq_file *p, + (unsigned long long)cputime64_to_clock_t(irq), + (unsigned long long)cputime64_to_clock_t(softirq), + (unsigned long long)cputime64_to_clock_t(steal)); +- for_each_online_cpu(i) { ++ for_each_possible_cpu(i) { + + /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ + user = kstat_cpu(i).cpustat.user; diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18.8/xenoprof-generic.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.18.8/xenoprof-generic.patch Thu Jan 17 15:05:38 2008 +0000 @@ -0,0 +1,662 @@ +diff -pruN ../orig-linux-2.6.18/drivers/oprofile/buffer_sync.c ./drivers/oprofile/buffer_sync.c +--- ../orig-linux-2.6.18/drivers/oprofile/buffer_sync.c 2006-09-20 04:42:06.000000000 +0100 ++++ ./drivers/oprofile/buffer_sync.c 2007-01-12 18:19:28.000000000 +0000 +@@ -6,6 +6,10 @@ + * + * @author John Levon <levon@xxxxxxxxxxxxxxxxx> + * ++ * Modified by Aravind Menon for Xen ++ * These modifications are: ++ * Copyright (C) 2005 Hewlett-Packard Co. ++ * + * This is the core of the buffer management. Each + * CPU buffer is processed and entered into the + * global event buffer. Such processing is necessary +@@ -38,6 +42,7 @@ static cpumask_t marked_cpus = CPU_MASK_ + static DEFINE_SPINLOCK(task_mortuary); + static void process_task_mortuary(void); + ++static int cpu_current_domain[NR_CPUS]; + + /* Take ownership of the task struct and place it on the + * list for processing. Only after two full buffer syncs +@@ -146,6 +151,11 @@ static void end_sync(void) + int sync_start(void) + { + int err; ++ int i; ++ ++ for (i = 0; i < NR_CPUS; i++) { ++ cpu_current_domain[i] = COORDINATOR_DOMAIN; ++ } + + start_cpu_work(); + +@@ -275,15 +285,31 @@ static void add_cpu_switch(int i) + last_cookie = INVALID_COOKIE; + } + +-static void add_kernel_ctx_switch(unsigned int in_kernel) ++static void add_cpu_mode_switch(unsigned int cpu_mode) + { + add_event_entry(ESCAPE_CODE); +- if (in_kernel) +- add_event_entry(KERNEL_ENTER_SWITCH_CODE); +- else +- add_event_entry(KERNEL_EXIT_SWITCH_CODE); ++ switch (cpu_mode) { ++ case CPU_MODE_USER: ++ add_event_entry(USER_ENTER_SWITCH_CODE); ++ break; ++ case CPU_MODE_KERNEL: ++ add_event_entry(KERNEL_ENTER_SWITCH_CODE); ++ break; ++ case CPU_MODE_XEN: ++ add_event_entry(XEN_ENTER_SWITCH_CODE); ++ break; ++ default: ++ break; ++ } + } +- ++ ++static void add_domain_switch(unsigned long domain_id) ++{ ++ add_event_entry(ESCAPE_CODE); ++ add_event_entry(DOMAIN_SWITCH_CODE); ++ add_event_entry(domain_id); ++} ++ + static void + add_user_ctx_switch(struct task_struct const * task, unsigned long cookie) + { +@@ -348,9 +374,9 @@ static int add_us_sample(struct mm_struc + * for later lookup from userspace. + */ + static int +-add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel) ++add_sample(struct mm_struct * mm, struct op_sample * s, int cpu_mode) + { +- if (in_kernel) { ++ if (cpu_mode >= CPU_MODE_KERNEL) { + add_sample_entry(s->eip, s->event); + return 1; + } else if (mm) { +@@ -496,15 +522,21 @@ void sync_buffer(int cpu) + struct mm_struct *mm = NULL; + struct task_struct * new; + unsigned long cookie = 0; +- int in_kernel = 1; ++ int cpu_mode = 1; + unsigned int i; + sync_buffer_state state = sb_buffer_start; + unsigned long available; ++ int domain_switch = 0; + + mutex_lock(&buffer_mutex); + + add_cpu_switch(cpu); + ++ /* We need to assign the first samples in this CPU buffer to the ++ same domain that we were processing at the last sync_buffer */ ++ if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) { ++ add_domain_switch(cpu_current_domain[cpu]); ++ } + /* Remember, only we can modify tail_pos */ + + available = get_slots(cpu_buf); +@@ -512,16 +544,18 @@ void sync_buffer(int cpu) + for (i = 0; i < available; ++i) { + struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos]; + +- if (is_code(s->eip)) { +- if (s->event <= CPU_IS_KERNEL) { +- /* kernel/userspace switch */ +- in_kernel = s->event; ++ if (is_code(s->eip) && !domain_switch) { ++ if (s->event <= CPU_MODE_XEN) { ++ /* xen/kernel/userspace switch */ ++ cpu_mode = s->event; + if (state == sb_buffer_start) + state = sb_sample_start; +- add_kernel_ctx_switch(s->event); ++ add_cpu_mode_switch(s->event); + } else if (s->event == CPU_TRACE_BEGIN) { + state = sb_bt_start; + add_trace_begin(); ++ } else if (s->event == CPU_DOMAIN_SWITCH) { ++ domain_switch = 1; + } else { + struct mm_struct * oldmm = mm; + +@@ -535,11 +569,21 @@ void sync_buffer(int cpu) + add_user_ctx_switch(new, cookie); + } + } else { +- if (state >= sb_bt_start && +- !add_sample(mm, s, in_kernel)) { +- if (state == sb_bt_start) { +- state = sb_bt_ignore; +- atomic_inc(&oprofile_stats.bt_lost_no_mapping); ++ if (domain_switch) { ++ cpu_current_domain[cpu] = s->eip; ++ add_domain_switch(s->eip); ++ domain_switch = 0; ++ } else { ++ if (cpu_current_domain[cpu] != ++ COORDINATOR_DOMAIN) { ++ add_sample_entry(s->eip, s->event); ++ } ++ else if (state >= sb_bt_start && ++ !add_sample(mm, s, cpu_mode)) { ++ if (state == sb_bt_start) { ++ state = sb_bt_ignore; ++ atomic_inc(&oprofile_stats.bt_lost_no_mapping); ++ } + } + } + } +@@ -548,6 +592,11 @@ void sync_buffer(int cpu) + } + release_mm(mm); + ++ /* We reset domain to COORDINATOR at each CPU switch */ ++ if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) { ++ add_domain_switch(COORDINATOR_DOMAIN); ++ } ++ + mark_done(cpu); + + mutex_unlock(&buffer_mutex); +diff -pruN ../orig-linux-2.6.18/drivers/oprofile/cpu_buffer.c ./drivers/oprofile/cpu_buffer.c +--- ../orig-linux-2.6.18/drivers/oprofile/cpu_buffer.c 2006-09-20 04:42:06.000000000 +0100 ++++ ./drivers/oprofile/cpu_buffer.c 2007-01-12 18:18:50.000000000 +0000 +@@ -6,6 +6,10 @@ + * + * @author John Levon <levon@xxxxxxxxxxxxxxxxx> + * ++ * Modified by Aravind Menon for Xen ++ * These modifications are: ++ * Copyright (C) 2005 Hewlett-Packard Co. ++ * + * Each CPU has a local buffer that stores PC value/event + * pairs. We also log context switches when we notice them. + * Eventually each CPU's buffer is processed into the global +@@ -34,6 +38,8 @@ static void wq_sync_buffer(void *); + #define DEFAULT_TIMER_EXPIRE (HZ / 10) + static int work_enabled; + ++static int32_t current_domain = COORDINATOR_DOMAIN; ++ + void free_cpu_buffers(void) + { + int i; +@@ -57,7 +63,7 @@ int alloc_cpu_buffers(void) + goto fail; + + b->last_task = NULL; +- b->last_is_kernel = -1; ++ b->last_cpu_mode = -1; + b->tracing = 0; + b->buffer_size = buffer_size; + b->tail_pos = 0; +@@ -113,7 +119,7 @@ void cpu_buffer_reset(struct oprofile_cp + * collected will populate the buffer with proper + * values to initialize the buffer + */ +- cpu_buf->last_is_kernel = -1; ++ cpu_buf->last_cpu_mode = -1; + cpu_buf->last_task = NULL; + } + +@@ -163,13 +169,13 @@ add_code(struct oprofile_cpu_buffer * bu + * because of the head/tail separation of the writer and reader + * of the CPU buffer. + * +- * is_kernel is needed because on some architectures you cannot ++ * cpu_mode is needed because on some architectures you cannot + * tell if you are in kernel or user space simply by looking at +- * pc. We tag this in the buffer by generating kernel enter/exit +- * events whenever is_kernel changes ++ * pc. We tag this in the buffer by generating kernel/user (and xen) ++ * enter events whenever cpu_mode changes + */ + static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc, +- int is_kernel, unsigned long event) ++ int cpu_mode, unsigned long event) + { + struct task_struct * task; + +@@ -180,18 +186,18 @@ static int log_sample(struct oprofile_cp + return 0; + } + +- is_kernel = !!is_kernel; +- + task = current; + + /* notice a switch from user->kernel or vice versa */ +- if (cpu_buf->last_is_kernel != is_kernel) { +- cpu_buf->last_is_kernel = is_kernel; +- add_code(cpu_buf, is_kernel); ++ if (cpu_buf->last_cpu_mode != cpu_mode) { ++ cpu_buf->last_cpu_mode = cpu_mode; ++ add_code(cpu_buf, cpu_mode); + } +- ++ + /* notice a task switch */ +- if (cpu_buf->last_task != task) { ++ /* if not processing other domain samples */ ++ if ((cpu_buf->last_task != task) && ++ (current_domain == COORDINATOR_DOMAIN)) { + cpu_buf->last_task = task; + add_code(cpu_buf, (unsigned long)task); + } +@@ -275,6 +281,25 @@ void oprofile_add_trace(unsigned long pc + add_sample(cpu_buf, pc, 0); + } + ++int oprofile_add_domain_switch(int32_t domain_id) ++{ ++ struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()]; ++ ++ /* should have space for switching into and out of domain ++ (2 slots each) plus one sample and one cpu mode switch */ ++ if (((nr_available_slots(cpu_buf) < 6) && ++ (domain_id != COORDINATOR_DOMAIN)) || ++ (nr_available_slots(cpu_buf) < 2)) ++ return 0; ++ ++ add_code(cpu_buf, CPU_DOMAIN_SWITCH); ++ add_sample(cpu_buf, domain_id, 0); ++ ++ current_domain = domain_id; ++ ++ return 1; ++} ++ + /* + * This serves to avoid cpu buffer overflow, and makes sure + * the task mortuary progresses +diff -pruN ../orig-linux-2.6.18/drivers/oprofile/cpu_buffer.h ./drivers/oprofile/cpu_buffer.h +--- ../orig-linux-2.6.18/drivers/oprofile/cpu_buffer.h 2006-09-20 04:42:06.000000000 +0100 ++++ ./drivers/oprofile/cpu_buffer.h 2007-01-12 18:18:50.000000000 +0000 +@@ -36,7 +36,7 @@ struct oprofile_cpu_buffer { + volatile unsigned long tail_pos; + unsigned long buffer_size; + struct task_struct * last_task; +- int last_is_kernel; ++ int last_cpu_mode; + int tracing; + struct op_sample * buffer; + unsigned long sample_received; +@@ -51,7 +51,10 @@ extern struct oprofile_cpu_buffer cpu_bu + void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf); + + /* transient events for the CPU buffer -> event buffer */ +-#define CPU_IS_KERNEL 1 +-#define CPU_TRACE_BEGIN 2 ++#define CPU_MODE_USER 0 ++#define CPU_MODE_KERNEL 1 ++#define CPU_MODE_XEN 2 ++#define CPU_TRACE_BEGIN 3 ++#define CPU_DOMAIN_SWITCH 4 + + #endif /* OPROFILE_CPU_BUFFER_H */ +diff -pruN ../orig-linux-2.6.18/drivers/oprofile/event_buffer.h ./drivers/oprofile/event_buffer.h +--- ../orig-linux-2.6.18/drivers/oprofile/event_buffer.h 2006-09-20 04:42:06.000000000 +0100 ++++ ./drivers/oprofile/event_buffer.h 2007-01-12 18:18:50.000000000 +0000 +@@ -29,15 +29,20 @@ void wake_up_buffer_waiter(void); + #define CPU_SWITCH_CODE 2 + #define COOKIE_SWITCH_CODE 3 + #define KERNEL_ENTER_SWITCH_CODE 4 +-#define KERNEL_EXIT_SWITCH_CODE 5 ++#define USER_ENTER_SWITCH_CODE 5 + #define MODULE_LOADED_CODE 6 + #define CTX_TGID_CODE 7 + #define TRACE_BEGIN_CODE 8 + #define TRACE_END_CODE 9 ++#define XEN_ENTER_SWITCH_CODE 10 ++#define DOMAIN_SWITCH_CODE 11 + + #define INVALID_COOKIE ~0UL + #define NO_COOKIE 0UL + ++/* Constant used to refer to coordinator domain (Xen) */ ++#define COORDINATOR_DOMAIN -1 ++ + /* add data to the event buffer */ + void add_event_entry(unsigned long data); + +diff -pruN ../orig-linux-2.6.18/drivers/oprofile/oprof.c ./drivers/oprofile/oprof.c +--- ../orig-linux-2.6.18/drivers/oprofile/oprof.c 2006-09-20 04:42:06.000000000 +0100 ++++ ./drivers/oprofile/oprof.c 2007-01-12 18:18:50.000000000 +0000 +@@ -5,6 +5,10 @@ + * @remark Read the file COPYING + * + * @author John Levon <levon@xxxxxxxxxxxxxxxxx> ++ * ++ * Modified by Aravind Menon for Xen ++ * These modifications are: ++ * Copyright (C) 2005 Hewlett-Packard Co. + */ + + #include <linux/kernel.h> +@@ -19,7 +23,7 @@ + #include "cpu_buffer.h" + #include "buffer_sync.h" + #include "oprofile_stats.h" +- ++ + struct oprofile_operations oprofile_ops; + + unsigned long oprofile_started; +@@ -33,6 +37,32 @@ static DEFINE_MUTEX(start_mutex); + */ + static int timer = 0; + ++int oprofile_set_active(int active_domains[], unsigned int adomains) ++{ ++ int err; ++ ++ if (!oprofile_ops.set_active) ++ return -EINVAL; ++ ++ mutex_lock(&start_mutex); ++ err = oprofile_ops.set_active(active_domains, adomains); ++ mutex_unlock(&start_mutex); ++ return err; ++} ++ ++int oprofile_set_passive(int passive_domains[], unsigned int pdomains) ++{ ++ int err; ++ ++ if (!oprofile_ops.set_passive) ++ return -EINVAL; ++ ++ mutex_lock(&start_mutex); ++ err = oprofile_ops.set_passive(passive_domains, pdomains); ++ mutex_unlock(&start_mutex); ++ return err; ++} ++ + int oprofile_setup(void) + { + int err; +diff -pruN ../orig-linux-2.6.18/drivers/oprofile/oprof.h ./drivers/oprofile/oprof.h +--- ../orig-linux-2.6.18/drivers/oprofile/oprof.h 2006-09-20 04:42:06.000000000 +0100 ++++ ./drivers/oprofile/oprof.h 2007-01-12 18:18:50.000000000 +0000 +@@ -35,5 +35,8 @@ void oprofile_create_files(struct super_ + void oprofile_timer_init(struct oprofile_operations * ops); + + int oprofile_set_backtrace(unsigned long depth); ++ ++int oprofile_set_active(int active_domains[], unsigned int adomains); ++int oprofile_set_passive(int passive_domains[], unsigned int pdomains); + + #endif /* OPROF_H */ +diff -pruN ../orig-linux-2.6.18/drivers/oprofile/oprofile_files.c ./drivers/oprofile/oprofile_files.c +--- ../orig-linux-2.6.18/drivers/oprofile/oprofile_files.c 2006-09-20 04:42:06.000000000 +0100 ++++ ./drivers/oprofile/oprofile_files.c 2007-01-12 18:18:50.000000000 +0000 +@@ -5,15 +5,21 @@ + * @remark Read the file COPYING + * + * @author John Levon <levon@xxxxxxxxxxxxxxxxx> ++ * ++ * Modified by Aravind Menon for Xen ++ * These modifications are: ++ * Copyright (C) 2005 Hewlett-Packard Co. + */ + + #include <linux/fs.h> + #include <linux/oprofile.h> ++#include <asm/uaccess.h> ++#include <linux/ctype.h> + + #include "event_buffer.h" + #include "oprofile_stats.h" + #include "oprof.h" +- ++ + unsigned long fs_buffer_size = 131072; + unsigned long fs_cpu_buffer_size = 8192; + unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */ +@@ -117,11 +123,202 @@ static ssize_t dump_write(struct file * + static struct file_operations dump_fops = { + .write = dump_write, + }; +- ++ ++#define TMPBUFSIZE 512 ++ ++static unsigned int adomains = 0; ++static int active_domains[MAX_OPROF_DOMAINS + 1]; ++static DEFINE_MUTEX(adom_mutex); ++ ++static ssize_t adomain_write(struct file * file, char const __user * buf, ++ size_t count, loff_t * offset) ++{ ++ char *tmpbuf; ++ char *startp, *endp; ++ int i; ++ unsigned long val; ++ ssize_t retval = count; ++ ++ if (*offset) ++ return -EINVAL; ++ if (count > TMPBUFSIZE - 1) ++ return -EINVAL; ++ ++ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL))) ++ return -ENOMEM; ++ ++ if (copy_from_user(tmpbuf, buf, count)) { ++ kfree(tmpbuf); ++ return -EFAULT; ++ } ++ tmpbuf[count] = 0; ++ ++ mutex_lock(&adom_mutex); ++ ++ startp = tmpbuf; ++ /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */ ++ for (i = 0; i <= MAX_OPROF_DOMAINS; i++) { ++ val = simple_strtoul(startp, &endp, 0); ++ if (endp == startp) ++ break; ++ while (ispunct(*endp) || isspace(*endp)) ++ endp++; ++ active_domains[i] = val; ++ if (active_domains[i] != val) ++ /* Overflow, force error below */ ++ i = MAX_OPROF_DOMAINS + 1; ++ startp = endp; ++ } ++ /* Force error on trailing junk */ ++ adomains = *startp ? MAX_OPROF_DOMAINS + 1 : i; ++ ++ kfree(tmpbuf); ++ ++ if (adomains > MAX_OPROF_DOMAINS ++ || oprofile_set_active(active_domains, adomains)) { ++ adomains = 0; ++ retval = -EINVAL; ++ } ++ ++ mutex_unlock(&adom_mutex); ++ return retval; ++} ++ ++static ssize_t adomain_read(struct file * file, char __user * buf, ++ size_t count, loff_t * offset) ++{ ++ char * tmpbuf; ++ size_t len; ++ int i; ++ ssize_t retval; ++ ++ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL))) ++ return -ENOMEM; ++ ++ mutex_lock(&adom_mutex); ++ ++ len = 0; ++ for (i = 0; i < adomains; i++) ++ len += snprintf(tmpbuf + len, ++ len < TMPBUFSIZE ? TMPBUFSIZE - len : 0, ++ "%u ", active_domains[i]); ++ WARN_ON(len > TMPBUFSIZE); ++ if (len != 0 && len <= TMPBUFSIZE) ++ tmpbuf[len-1] = '\n'; ++ ++ mutex_unlock(&adom_mutex); ++ ++ retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len); ++ ++ kfree(tmpbuf); ++ return retval; ++} ++ ++ ++static struct file_operations active_domain_ops = { ++ .read = adomain_read, ++ .write = adomain_write, ++}; ++ ++static unsigned int pdomains = 0; ++static int passive_domains[MAX_OPROF_DOMAINS]; ++static DEFINE_MUTEX(pdom_mutex); ++ ++static ssize_t pdomain_write(struct file * file, char const __user * buf, ++ size_t count, loff_t * offset) ++{ ++ char *tmpbuf; ++ char *startp, *endp; ++ int i; ++ unsigned long val; ++ ssize_t retval = count; ++ ++ if (*offset) ++ return -EINVAL; ++ if (count > TMPBUFSIZE - 1) ++ return -EINVAL; ++ ++ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL))) ++ return -ENOMEM; ++ ++ if (copy_from_user(tmpbuf, buf, count)) { ++ kfree(tmpbuf); ++ return -EFAULT; ++ } ++ tmpbuf[count] = 0; ++ ++ mutex_lock(&pdom_mutex); ++ ++ startp = tmpbuf; ++ /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */ ++ for (i = 0; i <= MAX_OPROF_DOMAINS; i++) { ++ val = simple_strtoul(startp, &endp, 0); ++ if (endp == startp) ++ break; ++ while (ispunct(*endp) || isspace(*endp)) ++ endp++; ++ passive_domains[i] = val; ++ if (passive_domains[i] != val) ++ /* Overflow, force error below */ ++ i = MAX_OPROF_DOMAINS + 1; ++ startp = endp; ++ } ++ /* Force error on trailing junk */ ++ pdomains = *startp ? MAX_OPROF_DOMAINS + 1 : i; ++ ++ kfree(tmpbuf); ++ ++ if (pdomains > MAX_OPROF_DOMAINS ++ || oprofile_set_passive(passive_domains, pdomains)) { ++ pdomains = 0; ++ retval = -EINVAL; ++ } ++ ++ mutex_unlock(&pdom_mutex); ++ return retval; ++} ++ ++static ssize_t pdomain_read(struct file * file, char __user * buf, ++ size_t count, loff_t * offset) ++{ ++ char * tmpbuf; ++ size_t len; ++ int i; ++ ssize_t retval; ++ ++ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL))) ++ return -ENOMEM; ++ ++ mutex_lock(&pdom_mutex); ++ ++ len = 0; ++ for (i = 0; i < pdomains; i++) ++ len += snprintf(tmpbuf + len, ++ len < TMPBUFSIZE ? TMPBUFSIZE - len : 0, ++ "%u ", passive_domains[i]); ++ WARN_ON(len > TMPBUFSIZE); ++ if (len != 0 && len <= TMPBUFSIZE) ++ tmpbuf[len-1] = '\n'; ++ ++ mutex_unlock(&pdom_mutex); ++ ++ retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len); ++ ++ kfree(tmpbuf); ++ return retval; ++} ++ ++static struct file_operations passive_domain_ops = { ++ .read = pdomain_read, ++ .write = pdomain_write, ++}; ++ + void oprofile_create_files(struct super_block * sb, struct dentry * root) + { + oprofilefs_create_file(sb, root, "enable", &enable_fops); + oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666); ++ oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops); ++ oprofilefs_create_file(sb, root, "passive_domains", &passive_domain_ops); + oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops); + oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size); + oprofilefs_create_ulong(sb, root, "buffer_watershed", &fs_buffer_watershed); +diff -pruN ../orig-linux-2.6.18/include/linux/oprofile.h ./include/linux/oprofile.h +--- ../orig-linux-2.6.18/include/linux/oprofile.h 2006-09-20 04:42:06.000000000 +0100 ++++ ./include/linux/oprofile.h 2007-01-12 18:18:50.000000000 +0000 +@@ -16,6 +16,8 @@ + #include <linux/types.h> + #include <linux/spinlock.h> + #include <asm/atomic.h> ++ ++#include <xen/interface/xenoprof.h> + + struct super_block; + struct dentry; +@@ -27,6 +29,11 @@ struct oprofile_operations { + /* create any necessary configuration files in the oprofile fs. + * Optional. */ + int (*create_files)(struct super_block * sb, struct dentry * root); ++ /* setup active domains with Xen */ ++ int (*set_active)(int *active_domains, unsigned int adomains); ++ /* setup passive domains with Xen */ ++ int (*set_passive)(int *passive_domains, unsigned int pdomains); ++ + /* Do any necessary interrupt setup. Optional. */ + int (*setup)(void); + /* Do any necessary interrupt shutdown. Optional. */ +@@ -78,6 +85,8 @@ void oprofile_add_pc(unsigned long pc, i + /* add a backtrace entry, to be called from the ->backtrace callback */ + void oprofile_add_trace(unsigned long eip); + ++/* add a domain switch entry */ ++int oprofile_add_domain_switch(int32_t domain_id); + + /** + * Create a file of the given name as a child of the given root, with diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/allow-i386-crash-kernels-to-handle-x86_64-dumps-fix.patch --- a/patches/linux-2.6.18/allow-i386-crash-kernels-to-handle-x86_64-dumps-fix.patch Thu Jan 17 14:35:38 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,30 +0,0 @@ -From: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> - -In file included from arch/i386/kernel/setup.c:46: -include/linux/crash_dump.h:19:36: warning: extra tokens at end of #ifndef directive - -Cc: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx> -Cc: Andi Kleen <ak@xxxxxxx> -Cc: Horms <horms@xxxxxxxxxxxx> -Cc: Ian Campbell <ian.campbell@xxxxxxxxxxxxx> -Cc: Magnus Damm <magnus.damm@xxxxxxxxx> -Cc: Vivek Goyal <vgoyal@xxxxxxxxxx> -Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> ---- - - include/linux/crash_dump.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff -puN include/linux/crash_dump.h~allow-i386-crash-kernels-to-handle-x86_64-dumps-fix include/linux/crash_dump.h ---- a/include/linux/crash_dump.h~allow-i386-crash-kernels-to-handle-x86_64-dumps-fix -+++ a/include/linux/crash_dump.h -@@ -16,7 +16,7 @@ extern struct proc_dir_entry *proc_vmcor - - /* Architecture code defines this if there are other possible ELF - * machine types, e.g. on bi-arch capable hardware. */ --#ifndef vmcore_elf_check_arch_cross(x) -+#ifndef vmcore_elf_check_arch_cross - #define vmcore_elf_check_arch_cross(x) 0 - #endif - -_ diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/allow-i386-crash-kernels-to-handle-x86_64-dumps.patch --- a/patches/linux-2.6.18/allow-i386-crash-kernels-to-handle-x86_64-dumps.patch Thu Jan 17 14:35:38 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,66 +0,0 @@ -From: Ian Campbell <ian.campbell@xxxxxxxxxxxxx> - -The specific case I am encountering is kdump under Xen with a 64 bit -hypervisor and 32 bit kernel/userspace. The dump created is 64 bit due to -the hypervisor but the dump kernel is 32 bit for maximum compatibility. - -It's possibly less likely to be useful in a purely native scenario but I -see no reason to disallow it. - -Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxxxxx> -Acked-by: Vivek Goyal <vgoyal@xxxxxxxxxx> -Cc: Horms <horms@xxxxxxxxxxxx> -Cc: Magnus Damm <magnus.damm@xxxxxxxxx> -Cc: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx> -Cc: Andi Kleen <ak@xxxxxxx> -Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> ---- - - fs/proc/vmcore.c | 2 +- - include/asm-i386/kexec.h | 3 +++ - include/linux/crash_dump.h | 8 ++++++++ - 3 files changed, 12 insertions(+), 1 deletion(-) - -diff -puN fs/proc/vmcore.c~allow-i386-crash-kernels-to-handle-x86_64-dumps fs/proc/vmcore.c ---- a/fs/proc/vmcore.c~allow-i386-crash-kernels-to-handle-x86_64-dumps -+++ a/fs/proc/vmcore.c -@@ -514,7 +514,7 @@ static int __init parse_crash_elf64_head - /* Do some basic Verification. */ - if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 || - (ehdr.e_type != ET_CORE) || -- !elf_check_arch(&ehdr) || -+ !vmcore_elf_check_arch(&ehdr) || - ehdr.e_ident[EI_CLASS] != ELFCLASS64 || - ehdr.e_ident[EI_VERSION] != EV_CURRENT || - ehdr.e_version != EV_CURRENT || -diff -puN include/asm-i386/kexec.h~allow-i386-crash-kernels-to-handle-x86_64-dumps include/asm-i386/kexec.h ---- a/include/asm-i386/kexec.h~allow-i386-crash-kernels-to-handle-x86_64-dumps -+++ a/include/asm-i386/kexec.h -@@ -47,6 +47,9 @@ - /* The native architecture */ - #define KEXEC_ARCH KEXEC_ARCH_386 - -+/* We can also handle crash dumps from 64 bit kernel. */ -+#define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64) -+ - #define MAX_NOTE_BYTES 1024 - - /* CPU does not save ss and esp on stack if execution is already -diff -puN include/linux/crash_dump.h~allow-i386-crash-kernels-to-handle-x86_64-dumps include/linux/crash_dump.h ---- a/include/linux/crash_dump.h~allow-i386-crash-kernels-to-handle-x86_64-dumps -+++ a/include/linux/crash_dump.h -@@ -14,5 +14,13 @@ extern ssize_t copy_oldmem_page(unsigned - extern const struct file_operations proc_vmcore_operations; - extern struct proc_dir_entry *proc_vmcore; - -+/* Architecture code defines this if there are other possible ELF -+ * machine types, e.g. on bi-arch capable hardware. */ -+#ifndef vmcore_elf_check_arch_cross(x) -+#define vmcore_elf_check_arch_cross(x) 0 -+#endif -+ -+#define vmcore_elf_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x)) -+ - #endif /* CONFIG_CRASH_DUMP */ - #endif /* LINUX_CRASHDUMP_H */ -_ diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/blktap-aio-16_03_06.patch --- a/patches/linux-2.6.18/blktap-aio-16_03_06.patch Thu Jan 17 14:35:38 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,294 +0,0 @@ -diff -pruN ../orig-linux-2.6.18/fs/aio.c ./fs/aio.c ---- ../orig-linux-2.6.18/fs/aio.c 2006-09-20 04:42:06.000000000 +0100 -+++ ./fs/aio.c 2007-01-12 16:04:15.000000000 +0000 -@@ -34,6 +34,11 @@ - #include <asm/uaccess.h> - #include <asm/mmu_context.h> - -+#ifdef CONFIG_EPOLL -+#include <linux/poll.h> -+#include <linux/eventpoll.h> -+#endif -+ - #if DEBUG > 1 - #define dprintk printk - #else -@@ -1015,6 +1020,10 @@ put_rq: - if (waitqueue_active(&ctx->wait)) - wake_up(&ctx->wait); - -+#ifdef CONFIG_EPOLL -+ if (ctx->file && waitqueue_active(&ctx->poll_wait)) -+ wake_up(&ctx->poll_wait); -+#endif - if (ret) - put_ioctx(ctx); - -@@ -1024,6 +1033,8 @@ put_rq: - /* aio_read_evt - * Pull an event off of the ioctx's event ring. Returns the number of - * events fetched (0 or 1 ;-) -+ * If ent parameter is 0, just returns the number of events that would -+ * be fetched. - * FIXME: make this use cmpxchg. - * TODO: make the ringbuffer user mmap()able (requires FIXME). - */ -@@ -1046,13 +1057,18 @@ static int aio_read_evt(struct kioctx *i - - head = ring->head % info->nr; - if (head != ring->tail) { -- struct io_event *evp = aio_ring_event(info, head, KM_USER1); -- *ent = *evp; -- head = (head + 1) % info->nr; -- smp_mb(); /* finish reading the event before updatng the head */ -- ring->head = head; -- ret = 1; -- put_aio_ring_event(evp, KM_USER1); -+ if (ent) { /* event requested */ -+ struct io_event *evp = -+ aio_ring_event(info, head, KM_USER1); -+ *ent = *evp; -+ head = (head + 1) % info->nr; -+ /* finish reading the event before updatng the head */ -+ smp_mb(); -+ ring->head = head; -+ ret = 1; -+ put_aio_ring_event(evp, KM_USER1); -+ } else /* only need to know availability */ -+ ret = 1; - } - spin_unlock(&info->ring_lock); - -@@ -1235,9 +1251,78 @@ static void io_destroy(struct kioctx *io - - aio_cancel_all(ioctx); - wait_for_all_aios(ioctx); -+#ifdef CONFIG_EPOLL -+ /* forget the poll file, but it's up to the user to close it */ -+ if (ioctx->file) { -+ ioctx->file->private_data = 0; -+ ioctx->file = 0; -+ } -+#endif - put_ioctx(ioctx); /* once for the lookup */ - } - -+#ifdef CONFIG_EPOLL -+ -+static int aio_queue_fd_close(struct inode *inode, struct file *file) -+{ -+ struct kioctx *ioctx = file->private_data; -+ if (ioctx) { -+ file->private_data = 0; -+ spin_lock_irq(&ioctx->ctx_lock); -+ ioctx->file = 0; -+ spin_unlock_irq(&ioctx->ctx_lock); -+ } -+ return 0; -+} -+ -+static unsigned int aio_queue_fd_poll(struct file *file, poll_table *wait) -+{ unsigned int pollflags = 0; -+ struct kioctx *ioctx = file->private_data; -+ -+ if (ioctx) { -+ -+ spin_lock_irq(&ioctx->ctx_lock); -+ /* Insert inside our poll wait queue */ -+ poll_wait(file, &ioctx->poll_wait, wait); -+ -+ /* Check our condition */ -+ if (aio_read_evt(ioctx, 0)) -+ pollflags = POLLIN | POLLRDNORM; -+ spin_unlock_irq(&ioctx->ctx_lock); -+ } -+ -+ return pollflags; -+} -+ -+static const struct file_operations aioq_fops = { -+ .release = aio_queue_fd_close, -+ .poll = aio_queue_fd_poll -+}; -+ -+/* make_aio_fd: -+ * Create a file descriptor that can be used to poll the event queue. -+ * Based and piggybacked on the excellent epoll code. -+ */ -+ -+static int make_aio_fd(struct kioctx *ioctx) -+{ -+ int error, fd; -+ struct inode *inode; -+ struct file *file; -+ -+ error = ep_getfd(&fd, &inode, &file, NULL, &aioq_fops); -+ if (error) -+ return error; -+ -+ /* associate the file with the IO context */ -+ file->private_data = ioctx; -+ ioctx->file = file; -+ init_waitqueue_head(&ioctx->poll_wait); -+ return fd; -+} -+#endif -+ -+ - /* sys_io_setup: - * Create an aio_context capable of receiving at least nr_events. - * ctxp must not point to an aio_context that already exists, and -@@ -1250,18 +1335,30 @@ static void io_destroy(struct kioctx *io - * resources are available. May fail with -EFAULT if an invalid - * pointer is passed for ctxp. Will fail with -ENOSYS if not - * implemented. -+ * -+ * To request a selectable fd, the user context has to be initialized -+ * to 1, instead of 0, and the return value is the fd. -+ * This keeps the system call compatible, since a non-zero value -+ * was not allowed so far. - */ - asmlinkage long sys_io_setup(unsigned nr_events, aio_context_t __user *ctxp) - { - struct kioctx *ioctx = NULL; - unsigned long ctx; - long ret; -+ int make_fd = 0; - - ret = get_user(ctx, ctxp); - if (unlikely(ret)) - goto out; - - ret = -EINVAL; -+#ifdef CONFIG_EPOLL -+ if (ctx == 1) { -+ make_fd = 1; -+ ctx = 0; -+ } -+#endif - if (unlikely(ctx || nr_events == 0)) { - pr_debug("EINVAL: io_setup: ctx %lu nr_events %u\n", - ctx, nr_events); -@@ -1272,8 +1369,12 @@ asmlinkage long sys_io_setup(unsigned nr - ret = PTR_ERR(ioctx); - if (!IS_ERR(ioctx)) { - ret = put_user(ioctx->user_id, ctxp); -- if (!ret) -- return 0; -+#ifdef CONFIG_EPOLL -+ if (make_fd && ret >= 0) -+ ret = make_aio_fd(ioctx); -+#endif -+ if (ret >= 0) -+ return ret; - - get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */ - io_destroy(ioctx); -diff -pruN ../orig-linux-2.6.18/fs/eventpoll.c ./fs/eventpoll.c ---- ../orig-linux-2.6.18/fs/eventpoll.c 2006-09-20 04:42:06.000000000 +0100 -+++ ./fs/eventpoll.c 2007-01-12 16:04:41.000000000 +0000 -@@ -236,8 +236,6 @@ struct ep_pqueue { - - static void ep_poll_safewake_init(struct poll_safewake *psw); - static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq); --static int ep_getfd(int *efd, struct inode **einode, struct file **efile, -- struct eventpoll *ep); - static int ep_alloc(struct eventpoll **pep); - static void ep_free(struct eventpoll *ep); - static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd); -@@ -267,7 +265,7 @@ static int ep_events_transfer(struct eve - static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, - int maxevents, long timeout); - static int eventpollfs_delete_dentry(struct dentry *dentry); --static struct inode *ep_eventpoll_inode(void); -+static struct inode *ep_eventpoll_inode(const struct file_operations *fops); - static int eventpollfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data, struct vfsmount *mnt); -@@ -517,7 +515,7 @@ asmlinkage long sys_epoll_create(int siz - * Creates all the items needed to setup an eventpoll file. That is, - * a file structure, and inode and a free file descriptor. - */ -- error = ep_getfd(&fd, &inode, &file, ep); -+ error = ep_getfd(&fd, &inode, &file, ep, &eventpoll_fops); - if (error) - goto eexit_2; - -@@ -702,8 +700,8 @@ eexit_1: - /* - * Creates the file descriptor to be used by the epoll interface. - */ --static int ep_getfd(int *efd, struct inode **einode, struct file **efile, -- struct eventpoll *ep) -+int ep_getfd(int *efd, struct inode **einode, struct file **efile, -+ struct eventpoll *ep, const struct file_operations *fops) - { - struct qstr this; - char name[32]; -@@ -719,7 +717,7 @@ static int ep_getfd(int *efd, struct ino - goto eexit_1; - - /* Allocates an inode from the eventpoll file system */ -- inode = ep_eventpoll_inode(); -+ inode = ep_eventpoll_inode(fops); - error = PTR_ERR(inode); - if (IS_ERR(inode)) - goto eexit_2; -@@ -750,7 +748,7 @@ static int ep_getfd(int *efd, struct ino - - file->f_pos = 0; - file->f_flags = O_RDONLY; -- file->f_op = &eventpoll_fops; -+ file->f_op = fops; - file->f_mode = FMODE_READ; - file->f_version = 0; - file->private_data = ep; -@@ -1569,7 +1567,7 @@ static int eventpollfs_delete_dentry(str - } - - --static struct inode *ep_eventpoll_inode(void) -+static struct inode *ep_eventpoll_inode(const struct file_operations *fops) - { - int error = -ENOMEM; - struct inode *inode = new_inode(eventpoll_mnt->mnt_sb); -@@ -1577,7 +1575,7 @@ static struct inode *ep_eventpoll_inode( - if (!inode) - goto eexit_1; - -- inode->i_fop = &eventpoll_fops; -+ inode->i_fop = fops; - - /* - * Mark the inode dirty from the very beginning, -diff -pruN ../orig-linux-2.6.18/include/linux/aio.h ./include/linux/aio.h ---- ../orig-linux-2.6.18/include/linux/aio.h 2006-09-20 04:42:06.000000000 +0100 -+++ ./include/linux/aio.h 2007-01-12 16:04:15.000000000 +0000 -@@ -191,6 +191,11 @@ struct kioctx { - struct aio_ring_info ring_info; - - struct work_struct wq; -+#ifdef CONFIG_EPOLL -+ // poll integration -+ wait_queue_head_t poll_wait; -+ struct file *file; -+#endif - }; - - /* prototypes */ -diff -pruN ../orig-linux-2.6.18/include/linux/eventpoll.h ./include/linux/eventpoll.h ---- ../orig-linux-2.6.18/include/linux/eventpoll.h 2006-09-20 04:42:06.000000000 +0100 -+++ ./include/linux/eventpoll.h 2007-01-12 16:04:15.000000000 +0000 -@@ -90,6 +90,12 @@ static inline void eventpoll_release(str - eventpoll_release_file(file); - } - -+/* -+ * called by aio code to create fd that can poll the aio event queueQ -+ */ -+struct eventpoll; -+int ep_getfd(int *efd, struct inode **einode, struct file **efile, -+ struct eventpoll *ep, const struct file_operations *fops); - #else - - static inline void eventpoll_init_file(struct file *file) {} diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/fix-ide-cd-pio-mode.patch --- a/patches/linux-2.6.18/fix-ide-cd-pio-mode.patch Thu Jan 17 14:35:38 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,18 +0,0 @@ -diff -pruN ../orig-linux-2.6.18/drivers/ide/ide-lib.c ./drivers/ide/ide-lib.c ---- ../orig-linux-2.6.18/drivers/ide/ide-lib.c 2006-09-20 04:42:06.000000000 +0100 -+++ ./drivers/ide/ide-lib.c 2007-01-12 16:07:37.000000000 +0000 -@@ -408,10 +408,10 @@ void ide_toggle_bounce(ide_drive_t *driv - { - u64 addr = BLK_BOUNCE_HIGH; /* dma64_addr_t */ - -- if (!PCI_DMA_BUS_IS_PHYS) { -- addr = BLK_BOUNCE_ANY; -- } else if (on && drive->media == ide_disk) { -- if (HWIF(drive)->pci_dev) -+ if (on && drive->media == ide_disk) { -+ if (!PCI_DMA_BUS_IS_PHYS) -+ addr = BLK_BOUNCE_ANY; -+ else if (HWIF(drive)->pci_dev) - addr = HWIF(drive)->pci_dev->dma_mask; - } - diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/fixaddr-top.patch --- a/patches/linux-2.6.18/fixaddr-top.patch Thu Jan 17 14:35:38 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,69 +0,0 @@ -diff -pruN ../orig-linux-2.6.18/arch/i386/mm/pgtable.c ./arch/i386/mm/pgtable.c ---- ../orig-linux-2.6.18/arch/i386/mm/pgtable.c 2006-09-20 04:42:06.000000000 +0100 -+++ ./arch/i386/mm/pgtable.c 2007-01-17 17:19:36.000000000 +0000 -@@ -12,6 +12,7 @@ - #include <linux/slab.h> - #include <linux/pagemap.h> - #include <linux/spinlock.h> -+#include <linux/module.h> - - #include <asm/system.h> - #include <asm/pgtable.h> -@@ -137,6 +138,10 @@ void set_pmd_pfn(unsigned long vaddr, un - __flush_tlb_one(vaddr); - } - -+static int nr_fixmaps = 0; -+unsigned long __FIXADDR_TOP = 0xfffff000; -+EXPORT_SYMBOL(__FIXADDR_TOP); -+ - void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) - { - unsigned long address = __fix_to_virt(idx); -@@ -146,6 +151,13 @@ void __set_fixmap (enum fixed_addresses - return; - } - set_pte_pfn(address, phys >> PAGE_SHIFT, flags); -+ nr_fixmaps++; -+} -+ -+void set_fixaddr_top(unsigned long top) -+{ -+ BUG_ON(nr_fixmaps > 0); -+ __FIXADDR_TOP = top - PAGE_SIZE; - } - - pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) -diff -pruN ../orig-linux-2.6.18/include/asm-i386/fixmap.h ./include/asm-i386/fixmap.h ---- ../orig-linux-2.6.18/include/asm-i386/fixmap.h 2006-09-20 04:42:06.000000000 +0100 -+++ ./include/asm-i386/fixmap.h 2007-01-17 17:19:36.000000000 +0000 -@@ -19,7 +19,7 @@ - * Leave one empty page between vmalloc'ed areas and - * the start of the fixmap. - */ --#define __FIXADDR_TOP 0xfffff000 -+extern unsigned long __FIXADDR_TOP; - - #ifndef __ASSEMBLY__ - #include <linux/kernel.h> -@@ -94,6 +94,8 @@ enum fixed_addresses { - extern void __set_fixmap (enum fixed_addresses idx, - unsigned long phys, pgprot_t flags); - -+extern void set_fixaddr_top(unsigned long top); -+ - #define set_fixmap(idx, phys) \ - __set_fixmap(idx, phys, PAGE_KERNEL) - /* -diff -pruN ../orig-linux-2.6.18/include/asm-i386/page.h ./include/asm-i386/page.h ---- ../orig-linux-2.6.18/include/asm-i386/page.h 2006-09-20 04:42:06.000000000 +0100 -+++ ./include/asm-i386/page.h 2007-01-17 17:19:36.000000000 +0000 -@@ -122,7 +122,7 @@ extern int page_is_ram(unsigned long pag - - #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) - #define VMALLOC_RESERVE ((unsigned long)__VMALLOC_RESERVE) --#define MAXMEM (-__PAGE_OFFSET-__VMALLOC_RESERVE) -+#define MAXMEM (__FIXADDR_TOP-__PAGE_OFFSET-__VMALLOC_RESERVE) - #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) - #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) - #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch --- a/patches/linux-2.6.18/git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch Thu Jan 17 14:35:38 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,382 +0,0 @@ -diff -pruN ../orig-linux-2.6.18/arch/i386/kernel/machine_kexec.c ./arch/i386/kernel/machine_kexec.c ---- ../orig-linux-2.6.18/arch/i386/kernel/machine_kexec.c 2006-09-20 04:42:06.000000000 +0100 -+++ ./arch/i386/kernel/machine_kexec.c 2007-01-12 16:03:23.000000000 +0000 -@@ -20,70 +20,13 @@ - #include <asm/system.h> - - #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) -- --#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) --#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) --#define L2_ATTR (_PAGE_PRESENT) -- --#define LEVEL0_SIZE (1UL << 12UL) -- --#ifndef CONFIG_X86_PAE --#define LEVEL1_SIZE (1UL << 22UL) --static u32 pgtable_level1[1024] PAGE_ALIGNED; -- --static void identity_map_page(unsigned long address) --{ -- unsigned long level1_index, level2_index; -- u32 *pgtable_level2; -- -- /* Find the current page table */ -- pgtable_level2 = __va(read_cr3()); -- -- /* Find the indexes of the physical address to identity map */ -- level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE; -- level2_index = address / LEVEL1_SIZE; -- -- /* Identity map the page table entry */ -- pgtable_level1[level1_index] = address | L0_ATTR; -- pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR; -- -- /* Flush the tlb so the new mapping takes effect. -- * Global tlb entries are not flushed but that is not an issue. -- */ -- load_cr3(pgtable_level2); --} -- --#else --#define LEVEL1_SIZE (1UL << 21UL) --#define LEVEL2_SIZE (1UL << 30UL) --static u64 pgtable_level1[512] PAGE_ALIGNED; --static u64 pgtable_level2[512] PAGE_ALIGNED; -- --static void identity_map_page(unsigned long address) --{ -- unsigned long level1_index, level2_index, level3_index; -- u64 *pgtable_level3; -- -- /* Find the current page table */ -- pgtable_level3 = __va(read_cr3()); -- -- /* Find the indexes of the physical address to identity map */ -- level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE; -- level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE; -- level3_index = address / LEVEL2_SIZE; -- -- /* Identity map the page table entry */ -- pgtable_level1[level1_index] = address | L0_ATTR; -- pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR; -- set_64bit(&pgtable_level3[level3_index], -- __pa(pgtable_level2) | L2_ATTR); -- -- /* Flush the tlb so the new mapping takes effect. -- * Global tlb entries are not flushed but that is not an issue. -- */ -- load_cr3(pgtable_level3); --} -+static u32 kexec_pgd[1024] PAGE_ALIGNED; -+#ifdef CONFIG_X86_PAE -+static u32 kexec_pmd0[1024] PAGE_ALIGNED; -+static u32 kexec_pmd1[1024] PAGE_ALIGNED; - #endif -+static u32 kexec_pte0[1024] PAGE_ALIGNED; -+static u32 kexec_pte1[1024] PAGE_ALIGNED; - - static void set_idt(void *newidt, __u16 limit) - { -@@ -127,16 +70,6 @@ static void load_segments(void) - #undef __STR - } - --typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)( -- unsigned long indirection_page, -- unsigned long reboot_code_buffer, -- unsigned long start_address, -- unsigned int has_pae) ATTRIB_NORET; -- --extern const unsigned char relocate_new_kernel[]; --extern void relocate_new_kernel_end(void); --extern const unsigned int relocate_new_kernel_size; -- - /* - * A architecture hook called to validate the - * proposed image and prepare the control pages -@@ -169,25 +102,29 @@ void machine_kexec_cleanup(struct kimage - */ - NORET_TYPE void machine_kexec(struct kimage *image) - { -- unsigned long page_list; -- unsigned long reboot_code_buffer; -- -- relocate_new_kernel_t rnk; -+ unsigned long page_list[PAGES_NR]; -+ void *control_page; - - /* Interrupts aren't acceptable while we reboot */ - local_irq_disable(); - -- /* Compute some offsets */ -- reboot_code_buffer = page_to_pfn(image->control_code_page) -- << PAGE_SHIFT; -- page_list = image->head; -- -- /* Set up an identity mapping for the reboot_code_buffer */ -- identity_map_page(reboot_code_buffer); -- -- /* copy it out */ -- memcpy((void *)reboot_code_buffer, relocate_new_kernel, -- relocate_new_kernel_size); -+ control_page = page_address(image->control_code_page); -+ memcpy(control_page, relocate_kernel, PAGE_SIZE); -+ -+ page_list[PA_CONTROL_PAGE] = __pa(control_page); -+ page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; -+ page_list[PA_PGD] = __pa(kexec_pgd); -+ page_list[VA_PGD] = (unsigned long)kexec_pgd; -+#ifdef CONFIG_X86_PAE -+ page_list[PA_PMD_0] = __pa(kexec_pmd0); -+ page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; -+ page_list[PA_PMD_1] = __pa(kexec_pmd1); -+ page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; -+#endif -+ page_list[PA_PTE_0] = __pa(kexec_pte0); -+ page_list[VA_PTE_0] = (unsigned long)kexec_pte0; -+ page_list[PA_PTE_1] = __pa(kexec_pte1); -+ page_list[VA_PTE_1] = (unsigned long)kexec_pte1; - - /* The segment registers are funny things, they have both a - * visible and an invisible part. Whenever the visible part is -@@ -206,6 +143,6 @@ NORET_TYPE void machine_kexec(struct kim - set_idt(phys_to_virt(0),0); - - /* now call it */ -- rnk = (relocate_new_kernel_t) reboot_code_buffer; -- (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae); -+ relocate_kernel((unsigned long)image->head, (unsigned long)page_list, -+ image->start, cpu_has_pae); - } -diff -pruN ../orig-linux-2.6.18/arch/i386/kernel/relocate_kernel.S ./arch/i386/kernel/relocate_kernel.S ---- ../orig-linux-2.6.18/arch/i386/kernel/relocate_kernel.S 2006-09-20 04:42:06.000000000 +0100 -+++ ./arch/i386/kernel/relocate_kernel.S 2007-01-12 16:03:23.000000000 +0000 -@@ -7,16 +7,138 @@ - */ - - #include <linux/linkage.h> -+#include <asm/page.h> -+#include <asm/kexec.h> -+ -+/* -+ * Must be relocatable PIC code callable as a C function -+ */ -+ -+#define PTR(x) (x << 2) -+#define PAGE_ALIGNED (1 << PAGE_SHIFT) -+#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */ -+#define PAE_PGD_ATTR 0x01 /* _PAGE_PRESENT */ -+ -+ .text -+ .align PAGE_ALIGNED -+ .globl relocate_kernel -+relocate_kernel: -+ movl 8(%esp), %ebp /* list of pages */ -+ -+#ifdef CONFIG_X86_PAE -+ /* map the control page at its virtual address */ -+ -+ movl PTR(VA_PGD)(%ebp), %edi -+ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax -+ andl $0xc0000000, %eax -+ shrl $27, %eax -+ addl %edi, %eax -+ -+ movl PTR(PA_PMD_0)(%ebp), %edx -+ orl $PAE_PGD_ATTR, %edx -+ movl %edx, (%eax) -+ -+ movl PTR(VA_PMD_0)(%ebp), %edi -+ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax -+ andl $0x3fe00000, %eax -+ shrl $18, %eax -+ addl %edi, %eax -+ -+ movl PTR(PA_PTE_0)(%ebp), %edx -+ orl $PAGE_ATTR, %edx -+ movl %edx, (%eax) -+ -+ movl PTR(VA_PTE_0)(%ebp), %edi -+ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax -+ andl $0x001ff000, %eax -+ shrl $9, %eax -+ addl %edi, %eax -+ -+ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx -+ orl $PAGE_ATTR, %edx -+ movl %edx, (%eax) -+ -+ /* identity map the control page at its physical address */ -+ -+ movl PTR(VA_PGD)(%ebp), %edi -+ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax -+ andl $0xc0000000, %eax -+ shrl $27, %eax -+ addl %edi, %eax -+ -+ movl PTR(PA_PMD_1)(%ebp), %edx -+ orl $PAE_PGD_ATTR, %edx -+ movl %edx, (%eax) -+ -+ movl PTR(VA_PMD_1)(%ebp), %edi -+ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax -+ andl $0x3fe00000, %eax -+ shrl $18, %eax -+ addl %edi, %eax -+ -+ movl PTR(PA_PTE_1)(%ebp), %edx -+ orl $PAGE_ATTR, %edx -+ movl %edx, (%eax) -+ -+ movl PTR(VA_PTE_1)(%ebp), %edi -+ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax -+ andl $0x001ff000, %eax -+ shrl $9, %eax -+ addl %edi, %eax -+ -+ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx -+ orl $PAGE_ATTR, %edx -+ movl %edx, (%eax) -+#else -+ /* map the control page at its virtual address */ -+ -+ movl PTR(VA_PGD)(%ebp), %edi -+ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax -+ andl $0xffc00000, %eax -+ shrl $20, %eax -+ addl %edi, %eax -+ -+ movl PTR(PA_PTE_0)(%ebp), %edx -+ orl $PAGE_ATTR, %edx -+ movl %edx, (%eax) -+ -+ movl PTR(VA_PTE_0)(%ebp), %edi -+ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax -+ andl $0x003ff000, %eax -+ shrl $10, %eax -+ addl %edi, %eax -+ -+ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx -+ orl $PAGE_ATTR, %edx -+ movl %edx, (%eax) -+ -+ /* identity map the control page at its physical address */ -+ -+ movl PTR(VA_PGD)(%ebp), %edi -+ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax -+ andl $0xffc00000, %eax -+ shrl $20, %eax -+ addl %edi, %eax -+ -+ movl PTR(PA_PTE_1)(%ebp), %edx -+ orl $PAGE_ATTR, %edx -+ movl %edx, (%eax) -+ -+ movl PTR(VA_PTE_1)(%ebp), %edi -+ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax -+ andl $0x003ff000, %eax -+ shrl $10, %eax -+ addl %edi, %eax -+ -+ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx -+ orl $PAGE_ATTR, %edx -+ movl %edx, (%eax) -+#endif - -- /* -- * Must be relocatable PIC code callable as a C function, that once -- * it starts can not use the previous processes stack. -- */ -- .globl relocate_new_kernel - relocate_new_kernel: - /* read the arguments and say goodbye to the stack */ - movl 4(%esp), %ebx /* page_list */ -- movl 8(%esp), %ebp /* reboot_code_buffer */ -+ movl 8(%esp), %ebp /* list of pages */ - movl 12(%esp), %edx /* start address */ - movl 16(%esp), %ecx /* cpu_has_pae */ - -@@ -24,11 +146,26 @@ relocate_new_kernel: - pushl $0 - popfl - -- /* set a new stack at the bottom of our page... */ -- lea 4096(%ebp), %esp -+ /* get physical address of control page now */ -+ /* this is impossible after page table switch */ -+ movl PTR(PA_CONTROL_PAGE)(%ebp), %edi -+ -+ /* switch to new set of page tables */ -+ movl PTR(PA_PGD)(%ebp), %eax -+ movl %eax, %cr3 -+ -+ /* setup a new stack at the end of the physical control page */ -+ lea 4096(%edi), %esp - -- /* store the parameters back on the stack */ -- pushl %edx /* store the start address */ -+ /* jump to identity mapped page */ -+ movl %edi, %eax -+ addl $(identity_mapped - relocate_kernel), %eax -+ pushl %eax -+ ret -+ -+identity_mapped: -+ /* store the start address on the stack */ -+ pushl %edx - - /* Set cr0 to a known state: - * 31 0 == Paging disabled -@@ -113,8 +250,3 @@ relocate_new_kernel: - xorl %edi, %edi - xorl %ebp, %ebp - ret --relocate_new_kernel_end: -- -- .globl relocate_new_kernel_size --relocate_new_kernel_size: -- .long relocate_new_kernel_end - relocate_new_kernel -diff -pruN ../orig-linux-2.6.18/include/asm-i386/kexec.h ./include/asm-i386/kexec.h ---- ../orig-linux-2.6.18/include/asm-i386/kexec.h 2006-09-20 04:42:06.000000000 +0100 -+++ ./include/asm-i386/kexec.h 2007-01-12 16:03:23.000000000 +0000 -@@ -1,6 +1,26 @@ - #ifndef _I386_KEXEC_H - #define _I386_KEXEC_H - -+#define PA_CONTROL_PAGE 0 -+#define VA_CONTROL_PAGE 1 -+#define PA_PGD 2 -+#define VA_PGD 3 -+#define PA_PTE_0 4 -+#define VA_PTE_0 5 -+#define PA_PTE_1 6 -+#define VA_PTE_1 7 -+#ifdef CONFIG_X86_PAE -+#define PA_PMD_0 8 -+#define VA_PMD_0 9 -+#define PA_PMD_1 10 -+#define VA_PMD_1 11 -+#define PAGES_NR 12 -+#else -+#define PAGES_NR 8 -+#endif -+ -+#ifndef __ASSEMBLY__ -+ - #include <asm/fixmap.h> - #include <asm/ptrace.h> - #include <asm/string.h> -@@ -72,5 +92,12 @@ static inline void crash_setup_regs(stru - newregs->eip = (unsigned long)current_text_addr(); - } - } -+asmlinkage NORET_TYPE void -+relocate_kernel(unsigned long indirection_page, -+ unsigned long control_page, -+ unsigned long start_address, -+ unsigned int has_pae) ATTRIB_NORET; -+ -+#endif /* __ASSEMBLY__ */ - - #endif /* _I386_KEXEC_H */ diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/git-4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f.patch --- a/patches/linux-2.6.18/git-4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f.patch Thu Jan 17 14:35:38 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,355 +0,0 @@ -diff -pruN ../orig-linux-2.6.18/arch/x86_64/kernel/machine_kexec.c ./arch/x86_64/kernel/machine_kexec.c ---- ../orig-linux-2.6.18/arch/x86_64/kernel/machine_kexec.c 2006-09-20 04:42:06.000000000 +0100 -+++ ./arch/x86_64/kernel/machine_kexec.c 2007-01-12 16:03:49.000000000 +0000 -@@ -15,6 +15,15 @@ - #include <asm/mmu_context.h> - #include <asm/io.h> - -+#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) -+static u64 kexec_pgd[512] PAGE_ALIGNED; -+static u64 kexec_pud0[512] PAGE_ALIGNED; -+static u64 kexec_pmd0[512] PAGE_ALIGNED; -+static u64 kexec_pte0[512] PAGE_ALIGNED; -+static u64 kexec_pud1[512] PAGE_ALIGNED; -+static u64 kexec_pmd1[512] PAGE_ALIGNED; -+static u64 kexec_pte1[512] PAGE_ALIGNED; -+ - static void init_level2_page(pmd_t *level2p, unsigned long addr) - { - unsigned long end_addr; -@@ -144,32 +153,19 @@ static void load_segments(void) - ); - } - --typedef NORET_TYPE void (*relocate_new_kernel_t)(unsigned long indirection_page, -- unsigned long control_code_buffer, -- unsigned long start_address, -- unsigned long pgtable) ATTRIB_NORET; -- --extern const unsigned char relocate_new_kernel[]; --extern const unsigned long relocate_new_kernel_size; -- - int machine_kexec_prepare(struct kimage *image) - { -- unsigned long start_pgtable, control_code_buffer; -+ unsigned long start_pgtable; - int result; - - /* Calculate the offsets */ - start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; -- control_code_buffer = start_pgtable + PAGE_SIZE; - - /* Setup the identity mapped 64bit page table */ - result = init_pgtable(image, start_pgtable); - if (result) - return result; - -- /* Place the code in the reboot code buffer */ -- memcpy(__va(control_code_buffer), relocate_new_kernel, -- relocate_new_kernel_size); -- - return 0; - } - -@@ -184,28 +180,34 @@ void machine_kexec_cleanup(struct kimage - */ - NORET_TYPE void machine_kexec(struct kimage *image) - { -- unsigned long page_list; -- unsigned long control_code_buffer; -- unsigned long start_pgtable; -- relocate_new_kernel_t rnk; -+ unsigned long page_list[PAGES_NR]; -+ void *control_page; - - /* Interrupts aren't acceptable while we reboot */ - local_irq_disable(); - -- /* Calculate the offsets */ -- page_list = image->head; -- start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; -- control_code_buffer = start_pgtable + PAGE_SIZE; -+ control_page = page_address(image->control_code_page) + PAGE_SIZE; -+ memcpy(control_page, relocate_kernel, PAGE_SIZE); - -- /* Set the low half of the page table to my identity mapped -- * page table for kexec. Leave the high half pointing at the -- * kernel pages. Don't bother to flush the global pages -- * as that will happen when I fully switch to my identity mapped -- * page table anyway. -- */ -- memcpy(__va(read_cr3()), __va(start_pgtable), PAGE_SIZE/2); -- __flush_tlb(); -+ page_list[PA_CONTROL_PAGE] = __pa(control_page); -+ page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; -+ page_list[PA_PGD] = __pa(kexec_pgd); -+ page_list[VA_PGD] = (unsigned long)kexec_pgd; -+ page_list[PA_PUD_0] = __pa(kexec_pud0); -+ page_list[VA_PUD_0] = (unsigned long)kexec_pud0; -+ page_list[PA_PMD_0] = __pa(kexec_pmd0); -+ page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; -+ page_list[PA_PTE_0] = __pa(kexec_pte0); -+ page_list[VA_PTE_0] = (unsigned long)kexec_pte0; -+ page_list[PA_PUD_1] = __pa(kexec_pud1); -+ page_list[VA_PUD_1] = (unsigned long)kexec_pud1; -+ page_list[PA_PMD_1] = __pa(kexec_pmd1); -+ page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; -+ page_list[PA_PTE_1] = __pa(kexec_pte1); -+ page_list[VA_PTE_1] = (unsigned long)kexec_pte1; - -+ page_list[PA_TABLE_PAGE] = -+ (unsigned long)__pa(page_address(image->control_code_page)); - - /* The segment registers are funny things, they have both a - * visible and an invisible part. Whenever the visible part is -@@ -222,7 +224,8 @@ NORET_TYPE void machine_kexec(struct kim - */ - set_gdt(phys_to_virt(0),0); - set_idt(phys_to_virt(0),0); -+ - /* now call it */ -- rnk = (relocate_new_kernel_t) control_code_buffer; -- (*rnk)(page_list, control_code_buffer, image->start, start_pgtable); -+ relocate_kernel((unsigned long)image->head, (unsigned long)page_list, -+ image->start); - } -diff -pruN ../orig-linux-2.6.18/arch/x86_64/kernel/relocate_kernel.S ./arch/x86_64/kernel/relocate_kernel.S ---- ../orig-linux-2.6.18/arch/x86_64/kernel/relocate_kernel.S 2006-09-20 04:42:06.000000000 +0100 -+++ ./arch/x86_64/kernel/relocate_kernel.S 2007-01-12 16:03:49.000000000 +0000 -@@ -7,31 +7,169 @@ - */ - - #include <linux/linkage.h> -+#include <asm/page.h> -+#include <asm/kexec.h> - -- /* -- * Must be relocatable PIC code callable as a C function, that once -- * it starts can not use the previous processes stack. -- */ -- .globl relocate_new_kernel -+/* -+ * Must be relocatable PIC code callable as a C function -+ */ -+ -+#define PTR(x) (x << 3) -+#define PAGE_ALIGNED (1 << PAGE_SHIFT) -+#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */ -+ -+ .text -+ .align PAGE_ALIGNED - .code64 -+ .globl relocate_kernel -+relocate_kernel: -+ /* %rdi indirection_page -+ * %rsi page_list -+ * %rdx start address -+ */ -+ -+ /* map the control page at its virtual address */ -+ -+ movq $0x0000ff8000000000, %r10 /* mask */ -+ mov $(39 - 3), %cl /* bits to shift */ -+ movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 /* address to map */ -+ -+ movq %r11, %r9 -+ andq %r10, %r9 -+ shrq %cl, %r9 -+ -+ movq PTR(VA_PGD)(%rsi), %r8 -+ addq %r8, %r9 -+ movq PTR(PA_PUD_0)(%rsi), %r8 -+ orq $PAGE_ATTR, %r8 -+ movq %r8, (%r9) -+ -+ shrq $9, %r10 -+ sub $9, %cl -+ -+ movq %r11, %r9 -+ andq %r10, %r9 -+ shrq %cl, %r9 -+ -+ movq PTR(VA_PUD_0)(%rsi), %r8 -+ addq %r8, %r9 -+ movq PTR(PA_PMD_0)(%rsi), %r8 -+ orq $PAGE_ATTR, %r8 -+ movq %r8, (%r9) -+ -+ shrq $9, %r10 -+ sub $9, %cl -+ -+ movq %r11, %r9 -+ andq %r10, %r9 -+ shrq %cl, %r9 -+ -+ movq PTR(VA_PMD_0)(%rsi), %r8 -+ addq %r8, %r9 -+ movq PTR(PA_PTE_0)(%rsi), %r8 -+ orq $PAGE_ATTR, %r8 -+ movq %r8, (%r9) -+ -+ shrq $9, %r10 -+ sub $9, %cl -+ -+ movq %r11, %r9 -+ andq %r10, %r9 -+ shrq %cl, %r9 -+ -+ movq PTR(VA_PTE_0)(%rsi), %r8 -+ addq %r8, %r9 -+ movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 -+ orq $PAGE_ATTR, %r8 -+ movq %r8, (%r9) -+ -+ /* identity map the control page at its physical address */ -+ -+ movq $0x0000ff8000000000, %r10 /* mask */ -+ mov $(39 - 3), %cl /* bits to shift */ -+ movq PTR(PA_CONTROL_PAGE)(%rsi), %r11 /* address to map */ -+ -+ movq %r11, %r9 -+ andq %r10, %r9 -+ shrq %cl, %r9 -+ -+ movq PTR(VA_PGD)(%rsi), %r8 -+ addq %r8, %r9 -+ movq PTR(PA_PUD_1)(%rsi), %r8 -+ orq $PAGE_ATTR, %r8 -+ movq %r8, (%r9) -+ -+ shrq $9, %r10 -+ sub $9, %cl -+ -+ movq %r11, %r9 -+ andq %r10, %r9 -+ shrq %cl, %r9 -+ -+ movq PTR(VA_PUD_1)(%rsi), %r8 -+ addq %r8, %r9 -+ movq PTR(PA_PMD_1)(%rsi), %r8 -+ orq $PAGE_ATTR, %r8 -+ movq %r8, (%r9) -+ -+ shrq $9, %r10 -+ sub $9, %cl -+ -+ movq %r11, %r9 -+ andq %r10, %r9 -+ shrq %cl, %r9 -+ -+ movq PTR(VA_PMD_1)(%rsi), %r8 -+ addq %r8, %r9 -+ movq PTR(PA_PTE_1)(%rsi), %r8 -+ orq $PAGE_ATTR, %r8 -+ movq %r8, (%r9) -+ -+ shrq $9, %r10 -+ sub $9, %cl -+ -+ movq %r11, %r9 -+ andq %r10, %r9 -+ shrq %cl, %r9 -+ -+ movq PTR(VA_PTE_1)(%rsi), %r8 -+ addq %r8, %r9 -+ movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 -+ orq $PAGE_ATTR, %r8 -+ movq %r8, (%r9) -+ - relocate_new_kernel: -- /* %rdi page_list -- * %rsi reboot_code_buffer -+ /* %rdi indirection_page -+ * %rsi page_list - * %rdx start address -- * %rcx page_table -- * %r8 arg5 -- * %r9 arg6 - */ - - /* zero out flags, and disable interrupts */ - pushq $0 - popfq - -- /* set a new stack at the bottom of our page... */ -- lea 4096(%rsi), %rsp -+ /* get physical address of control page now */ -+ /* this is impossible after page table switch */ -+ movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 -+ -+ /* get physical address of page table now too */ -+ movq PTR(PA_TABLE_PAGE)(%rsi), %rcx -+ -+ /* switch to new set of page tables */ -+ movq PTR(PA_PGD)(%rsi), %r9 -+ movq %r9, %cr3 -+ -+ /* setup a new stack at the end of the physical control page */ -+ lea 4096(%r8), %rsp -+ -+ /* jump to identity mapped page */ -+ addq $(identity_mapped - relocate_kernel), %r8 -+ pushq %r8 -+ ret - -- /* store the parameters back on the stack */ -- pushq %rdx /* store the start address */ -+identity_mapped: -+ /* store the start address on the stack */ -+ pushq %rdx - - /* Set cr0 to a known state: - * 31 1 == Paging enabled -@@ -136,8 +274,3 @@ relocate_new_kernel: - xorq %r15, %r15 - - ret --relocate_new_kernel_end: -- -- .globl relocate_new_kernel_size --relocate_new_kernel_size: -- .quad relocate_new_kernel_end - relocate_new_kernel -diff -pruN ../orig-linux-2.6.18/include/asm-x86_64/kexec.h ./include/asm-x86_64/kexec.h ---- ../orig-linux-2.6.18/include/asm-x86_64/kexec.h 2006-09-20 04:42:06.000000000 +0100 -+++ ./include/asm-x86_64/kexec.h 2007-01-12 16:03:49.000000000 +0000 -@@ -1,6 +1,27 @@ - #ifndef _X86_64_KEXEC_H - #define _X86_64_KEXEC_H - -+#define PA_CONTROL_PAGE 0 -+#define VA_CONTROL_PAGE 1 -+#define PA_PGD 2 -+#define VA_PGD 3 -+#define PA_PUD_0 4 -+#define VA_PUD_0 5 -+#define PA_PMD_0 6 -+#define VA_PMD_0 7 -+#define PA_PTE_0 8 -+#define VA_PTE_0 9 -+#define PA_PUD_1 10 -+#define VA_PUD_1 11 -+#define PA_PMD_1 12 -+#define VA_PMD_1 13 -+#define PA_PTE_1 14 -+#define VA_PTE_1 15 -+#define PA_TABLE_PAGE 16 -+#define PAGES_NR 17 -+ -+#ifndef __ASSEMBLY__ -+ - #include <linux/string.h> - - #include <asm/page.h> -@@ -64,4 +85,12 @@ static inline void crash_setup_regs(stru - newregs->rip = (unsigned long)current_text_addr(); - } - } -+ -+NORET_TYPE void -+relocate_kernel(unsigned long indirection_page, -+ unsigned long page_list, -+ unsigned long start_address) ATTRIB_NORET; -+ -+#endif /* __ASSEMBLY__ */ -+ - #endif /* _X86_64_KEXEC_H */ diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/git-5ee7737379b1d7f0c977c0f1661fbaf01a8d4721.patch --- a/patches/linux-2.6.18/git-5ee7737379b1d7f0c977c0f1661fbaf01a8d4721.patch Thu Jan 17 14:35:38 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ ---- ./arch/ia64/kernel/smp.c.orig 2007-05-02 19:00:01.000000000 +0900 -+++ ./arch/ia64/kernel/smp.c 2007-05-02 19:04:32.000000000 +0900 -@@ -328,10 +328,14 @@ int - smp_call_function (void (*func) (void *info), void *info, int nonatomic, int wait) - { - struct call_data_struct data; -- int cpus = num_online_cpus()-1; -+ int cpus; - -- if (!cpus) -+ spin_lock(&call_lock); -+ cpus = num_online_cpus()-1; -+ if (!cpus) { -+ spin_unlock(&call_lock); - return 0; -+ } - - /* Can deadlock when called with interrupts disabled */ - WARN_ON(irqs_disabled()); -@@ -343,8 +347,6 @@ smp_call_function (void (*func) (void *i - if (wait) - atomic_set(&data.finished, 0); - -- spin_lock(&call_lock); -- - call_data = &data; - mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC */ - send_IPI_allbutself(IPI_CALL_FUNC); diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/git-c06cb8b1c4d25e5b4d7a2d7c2462619de1e0dbc4.patch --- a/patches/linux-2.6.18/git-c06cb8b1c4d25e5b4d7a2d7c2462619de1e0dbc4.patch Thu Jan 17 14:35:38 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,29 +0,0 @@ -commit c06cb8b1c4d25e5b4d7a2d7c2462619de1e0dbc4 -Author: Vivek Goyal <vgoyal@xxxxxxxxxx> -Date: Wed Nov 8 17:44:41 2006 -0800 - - [PATCH] i386: Force data segment to be 4K aligned - - o Currently there is no specific alignment restriction in linker script - and in some cases it can be placed non 4K aligned addresses. This fails - kexec which checks that segment to be loaded is page aligned. - - o I guess, it does not harm data segment to be 4K aligned. - - Signed-off-by: Vivek Goyal <vgoyal@xxxxxxxxxx> - Signed-off-by: Andi Kleen <ak@xxxxxxx> - Signed-off-by: Andrew Morton <akpm@xxxxxxxx> - Signed-off-by: Linus Torvalds <torvalds@xxxxxxxx> - -diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S -index adc1f23..c6f84a0 100644 ---- a/arch/i386/kernel/vmlinux.lds.S -+++ b/arch/i386/kernel/vmlinux.lds.S -@@ -51,6 +51,7 @@ SECTIONS - __tracedata_end = .; - - /* writeable */ -+ . = ALIGN(4096); - .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ - *(.data) - CONSTRUCTORS diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/git-dbaab49f92ff6ae6255762a948375e4036cbdbd2.patch --- a/patches/linux-2.6.18/git-dbaab49f92ff6ae6255762a948375e4036cbdbd2.patch Thu Jan 17 14:35:38 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,20 +0,0 @@ -diff -pruN ../orig-linux-2.6.18/arch/x86_64/kernel/vmlinux.lds.S ./arch/x86_64/kernel/vmlinux.lds.S ---- ../orig-linux-2.6.18/arch/x86_64/kernel/vmlinux.lds.S 2007-01-12 18:20:02.000000000 +0000 -+++ ./arch/x86_64/kernel/vmlinux.lds.S 2007-01-12 18:20:20.000000000 +0000 -@@ -17,6 +17,7 @@ PHDRS { - text PT_LOAD FLAGS(5); /* R_E */ - data PT_LOAD FLAGS(7); /* RWE */ - user PT_LOAD FLAGS(7); /* RWE */ -+ data.init PT_LOAD FLAGS(7); /* RWE */ - note PT_NOTE FLAGS(4); /* R__ */ - } - SECTIONS -@@ -131,7 +132,7 @@ SECTIONS - . = ALIGN(8192); /* init_task */ - .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { - *(.data.init_task) -- } :data -+ }:data.init - - . = ALIGN(4096); - .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/i386-mach-io-check-nmi.patch --- a/patches/linux-2.6.18/i386-mach-io-check-nmi.patch Thu Jan 17 14:35:38 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,45 +0,0 @@ -diff -pruN ../orig-linux-2.6.18/arch/i386/kernel/traps.c ./arch/i386/kernel/traps.c ---- ../orig-linux-2.6.18/arch/i386/kernel/traps.c 2006-09-20 04:42:06.000000000 +0100 -+++ ./arch/i386/kernel/traps.c 2007-01-12 16:07:49.000000000 +0000 -@@ -642,18 +642,11 @@ static void mem_parity_error(unsigned ch - - static void io_check_error(unsigned char reason, struct pt_regs * regs) - { -- unsigned long i; -- - printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); - show_registers(regs); - - /* Re-enable the IOCK line, wait for a few seconds */ -- reason = (reason & 0xf) | 8; -- outb(reason, 0x61); -- i = 2000; -- while (--i) udelay(1000); -- reason &= ~8; -- outb(reason, 0x61); -+ clear_io_check_error(reason); - } - - static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) -diff -pruN ../orig-linux-2.6.18/include/asm-i386/mach-default/mach_traps.h ./include/asm-i386/mach-default/mach_traps.h ---- ../orig-linux-2.6.18/include/asm-i386/mach-default/mach_traps.h 2006-09-20 04:42:06.000000000 +0100 -+++ ./include/asm-i386/mach-default/mach_traps.h 2007-01-12 16:07:49.000000000 +0000 -@@ -15,6 +15,18 @@ static inline void clear_mem_error(unsig - outb(reason, 0x61); - } - -+static inline void clear_io_check_error(unsigned char reason) -+{ -+ unsigned long i; -+ -+ reason = (reason & 0xf) | 8; -+ outb(reason, 0x61); -+ i = 2000; -+ while (--i) udelay(1000); -+ reason &= ~8; -+ outb(reason, 0x61); -+} -+ - static inline unsigned char get_nmi_reason(void) - { - return inb(0x61); diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/linux-2.6.18-xen-375-748cd890ea7f --- a/patches/linux-2.6.18/linux-2.6.18-xen-375-748cd890ea7f Thu Jan 17 14:35:38 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,294 +0,0 @@ -# HG changeset patch -# User Keir Fraser <keir.fraser@xxxxxxxxxx> -# Date 1199916724 0 -# Node ID 748cd890ea7f56752311e519e80eece9d850c01a -# Parent fedc10fba9f1d5ec0c72dbcbca87e508222b4c48 -x86_64: Add TIF_RESTORE_SIGMASK (from upstream Linux) - -We need TIF_RESTORE_SIGMASK in order to support ppoll() and pselect() -system calls. This patch originally came from Andi, and was based -heavily on David Howells' implementation of same on i386. I fixed a -typo which was causing do_signal() to use the wrong signal mask. - -Signed-off-by: David Woodhouse <dwmw2@xxxxxxxxxxxxx> -Signed-off-by: Andi Kleen <ak@xxxxxxx> - -diff -r fedc10fba9f1 -r 748cd890ea7f arch/x86_64/ia32/ia32_signal.c ---- a/arch/x86_64/ia32/ia32_signal.c Tue Jan 08 09:55:29 2008 +0000 -+++ b/arch/x86_64/ia32/ia32_signal.c Wed Jan 09 22:12:04 2008 +0000 -@@ -113,25 +113,19 @@ int copy_siginfo_from_user32(siginfo_t * - } - - asmlinkage long --sys32_sigsuspend(int history0, int history1, old_sigset_t mask, -- struct pt_regs *regs) --{ -- sigset_t saveset; -- -+sys32_sigsuspend(int history0, int history1, old_sigset_t mask) -+{ - mask &= _BLOCKABLE; - spin_lock_irq(¤t->sighand->siglock); -- saveset = current->blocked; -+ current->saved_sigmask = current->blocked; - siginitset(¤t->blocked, mask); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - -- regs->rax = -EINTR; -- while (1) { -- current->state = TASK_INTERRUPTIBLE; -- schedule(); -- if (do_signal(regs, &saveset)) -- return -EINTR; -- } -+ current->state = TASK_INTERRUPTIBLE; -+ schedule(); -+ set_thread_flag(TIF_RESTORE_SIGMASK); -+ return -ERESTARTNOHAND; - } - - asmlinkage long -@@ -508,11 +502,11 @@ int ia32_setup_frame(int sig, struct k_s - current->comm, current->pid, frame, regs->rip, frame->pretcode); - #endif - -- return 1; -+ return 0; - - give_sigsegv: - force_sigsegv(sig, current); -- return 0; -+ return -EFAULT; - } - - int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, -@@ -595,7 +589,7 @@ int ia32_setup_rt_frame(int sig, struct - regs->ss = __USER32_DS; - - set_fs(USER_DS); -- regs->eflags &= ~TF_MASK; -+ regs->eflags &= ~TF_MASK; - if (test_thread_flag(TIF_SINGLESTEP)) - ptrace_notify(SIGTRAP); - -@@ -604,9 +598,9 @@ int ia32_setup_rt_frame(int sig, struct - current->comm, current->pid, frame, regs->rip, frame->pretcode); - #endif - -- return 1; -+ return 0; - - give_sigsegv: - force_sigsegv(sig, current); -- return 0; --} -+ return -EFAULT; -+} -diff -r fedc10fba9f1 -r 748cd890ea7f arch/x86_64/kernel/signal.c ---- a/arch/x86_64/kernel/signal.c Tue Jan 08 09:55:29 2008 +0000 -+++ b/arch/x86_64/kernel/signal.c Wed Jan 09 22:12:04 2008 +0000 -@@ -36,37 +36,6 @@ int ia32_setup_rt_frame(int sig, struct - sigset_t *set, struct pt_regs * regs); - int ia32_setup_frame(int sig, struct k_sigaction *ka, - sigset_t *set, struct pt_regs * regs); -- --asmlinkage long --sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize, struct pt_regs *regs) --{ -- sigset_t saveset, newset; -- -- /* XXX: Don't preclude handling different sized sigset_t's. */ -- if (sigsetsize != sizeof(sigset_t)) -- return -EINVAL; -- -- if (copy_from_user(&newset, unewset, sizeof(newset))) -- return -EFAULT; -- sigdelsetmask(&newset, ~_BLOCKABLE); -- -- spin_lock_irq(¤t->sighand->siglock); -- saveset = current->blocked; -- current->blocked = newset; -- recalc_sigpending(); -- spin_unlock_irq(¤t->sighand->siglock); --#ifdef DEBUG_SIG -- printk("rt_sigsuspend savset(%lx) newset(%lx) regs(%p) rip(%lx)\n", -- saveset, newset, regs, regs->rip); --#endif -- regs->rax = -EINTR; -- while (1) { -- current->state = TASK_INTERRUPTIBLE; -- schedule(); -- if (do_signal(regs, &saveset)) -- return -EINTR; -- } --} - - asmlinkage long - sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, -@@ -341,11 +310,11 @@ static int setup_rt_frame(int sig, struc - current->comm, current->pid, frame, regs->rip, frame->pretcode); - #endif - -- return 1; -+ return 0; - - give_sigsegv: - force_sigsegv(sig, current); -- return 0; -+ return -EFAULT; - } - - /* -@@ -408,7 +377,7 @@ handle_signal(unsigned long sig, siginfo - #endif - ret = setup_rt_frame(sig, ka, info, oldset, regs); - -- if (ret) { -+ if (ret == 0) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); - if (!(ka->sa.sa_flags & SA_NODEFER)) -@@ -425,11 +394,12 @@ handle_signal(unsigned long sig, siginfo - * want to handle. Thus you cannot kill init even with a SIGKILL even by - * mistake. - */ --int do_signal(struct pt_regs *regs, sigset_t *oldset) -+static void do_signal(struct pt_regs *regs) - { - struct k_sigaction ka; - siginfo_t info; - int signr; -+ sigset_t *oldset; - - /* - * We want the common case to go fast, which -@@ -438,9 +408,11 @@ int do_signal(struct pt_regs *regs, sigs - * if so. - */ - if (!user_mode(regs)) -- return 1; -- -- if (!oldset) -+ return; -+ -+ if (test_thread_flag(TIF_RESTORE_SIGMASK)) -+ oldset = ¤t->saved_sigmask; -+ else - oldset = ¤t->blocked; - - signr = get_signal_to_deliver(&info, &ka, regs, NULL); -@@ -454,30 +426,46 @@ int do_signal(struct pt_regs *regs, sigs - set_debugreg(current->thread.debugreg7, 7); - - /* Whee! Actually deliver the signal. */ -- return handle_signal(signr, &info, &ka, oldset, regs); -+ if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { -+ /* a signal was successfully delivered; the saved -+ * sigmask will have been stored in the signal frame, -+ * and will be restored by sigreturn, so we can simply -+ * clear the TIF_RESTORE_SIGMASK flag */ -+ clear_thread_flag(TIF_RESTORE_SIGMASK); -+ } -+ return; - } - - /* Did we come from a system call? */ - if ((long)regs->orig_rax >= 0) { - /* Restart the system call - no handlers present */ - long res = regs->rax; -- if (res == -ERESTARTNOHAND || -- res == -ERESTARTSYS || -- res == -ERESTARTNOINTR) { -+ switch (res) { -+ case -ERESTARTNOHAND: -+ case -ERESTARTSYS: -+ case -ERESTARTNOINTR: - regs->rax = regs->orig_rax; - regs->rip -= 2; -- } -- if (regs->rax == (unsigned long)-ERESTART_RESTARTBLOCK) { -+ break; -+ case -ERESTART_RESTARTBLOCK: - regs->rax = test_thread_flag(TIF_IA32) ? - __NR_ia32_restart_syscall : - __NR_restart_syscall; - regs->rip -= 2; -- } -- } -- return 0; --} -- --void do_notify_resume(struct pt_regs *regs, sigset_t *oldset, __u32 thread_info_flags) -+ break; -+ } -+ } -+ -+ /* if there's no signal to deliver, we just put the saved sigmask -+ back. */ -+ if (test_thread_flag(TIF_RESTORE_SIGMASK)) { -+ clear_thread_flag(TIF_RESTORE_SIGMASK); -+ sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); -+ } -+} -+ -+void -+do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) - { - #ifdef DEBUG_SIG - printk("do_notify_resume flags:%x rip:%lx rsp:%lx caller:%lx pending:%lx\n", -@@ -491,8 +479,8 @@ void do_notify_resume(struct pt_regs *re - } - - /* deal with pending signal delivery */ -- if (thread_info_flags & _TIF_SIGPENDING) -- do_signal(regs,oldset); -+ if (thread_info_flags & (_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK)) -+ do_signal(regs); - } - - void signal_fault(struct pt_regs *regs, void __user *frame, char *where) -diff -r fedc10fba9f1 -r 748cd890ea7f include/asm-x86_64/signal.h ---- a/include/asm-x86_64/signal.h Tue Jan 08 09:55:29 2008 +0000 -+++ b/include/asm-x86_64/signal.h Wed Jan 09 22:12:04 2008 +0000 -@@ -22,10 +22,6 @@ typedef struct { - typedef struct { - unsigned long sig[_NSIG_WORDS]; - } sigset_t; -- -- --struct pt_regs; --asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset); - - - #else -diff -r fedc10fba9f1 -r 748cd890ea7f include/asm-x86_64/thread_info.h ---- a/include/asm-x86_64/thread_info.h Tue Jan 08 09:55:29 2008 +0000 -+++ b/include/asm-x86_64/thread_info.h Wed Jan 09 22:12:04 2008 +0000 -@@ -114,6 +114,7 @@ static inline struct thread_info *stack_ - #define TIF_IRET 5 /* force IRET */ - #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ - #define TIF_SECCOMP 8 /* secure computing */ -+#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */ - /* 16 free */ - #define TIF_IA32 17 /* 32bit process */ - #define TIF_FORK 18 /* ret_from_fork */ -@@ -128,6 +129,7 @@ static inline struct thread_info *stack_ - #define _TIF_IRET (1<<TIF_IRET) - #define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) - #define _TIF_SECCOMP (1<<TIF_SECCOMP) -+#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) - #define _TIF_IA32 (1<<TIF_IA32) - #define _TIF_FORK (1<<TIF_FORK) - #define _TIF_ABI_PENDING (1<<TIF_ABI_PENDING) -diff -r fedc10fba9f1 -r 748cd890ea7f include/asm-x86_64/unistd.h ---- a/include/asm-x86_64/unistd.h Tue Jan 08 09:55:29 2008 +0000 -+++ b/include/asm-x86_64/unistd.h Wed Jan 09 22:12:04 2008 +0000 -@@ -658,6 +658,7 @@ do { \ - #define __ARCH_WANT_SYS_SIGPENDING - #define __ARCH_WANT_SYS_SIGPROCMASK - #define __ARCH_WANT_SYS_RT_SIGACTION -+#define __ARCH_WANT_SYS_RT_SIGSUSPEND - #define __ARCH_WANT_SYS_TIME - #define __ARCH_WANT_COMPAT_SYS_TIME - diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/linux-2.6.18-xen-376-353802ec1caf --- a/patches/linux-2.6.18/linux-2.6.18-xen-376-353802ec1caf Thu Jan 17 14:35:38 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,40 +0,0 @@ -# HG changeset patch -# User Keir Fraser <keir.fraser@xxxxxxxxxx> -# Date 1199916752 0 -# Node ID 353802ec1caf399143e48713a04cedd37a106347 -# Parent 748cd890ea7f56752311e519e80eece9d850c01a -x86_64: Add ppoll/pselect syscalls (from upstream Linux) - -Needed TIF_RESTORE_SIGMASK first - -Signed-off-by: Andi Kleen <ak@xxxxxxx> - -diff -r 748cd890ea7f -r 353802ec1caf arch/x86_64/ia32/ia32entry.S ---- a/arch/x86_64/ia32/ia32entry.S Wed Jan 09 22:12:04 2008 +0000 -+++ b/arch/x86_64/ia32/ia32entry.S Wed Jan 09 22:12:32 2008 +0000 -@@ -703,8 +703,8 @@ ia32_sys_call_table: - .quad sys_readlinkat /* 305 */ - .quad sys_fchmodat - .quad sys_faccessat -- .quad quiet_ni_syscall /* pselect6 for now */ -- .quad quiet_ni_syscall /* ppoll for now */ -+ .quad compat_sys_pselect6 -+ .quad compat_sys_ppoll - .quad sys_unshare /* 310 */ - .quad compat_sys_set_robust_list - .quad compat_sys_get_robust_list -diff -r 748cd890ea7f -r 353802ec1caf include/asm-x86_64/unistd.h ---- a/include/asm-x86_64/unistd.h Wed Jan 09 22:12:04 2008 +0000 -+++ b/include/asm-x86_64/unistd.h Wed Jan 09 22:12:32 2008 +0000 -@@ -600,9 +600,9 @@ __SYSCALL(__NR_fchmodat, sys_fchmodat) - #define __NR_faccessat 269 - __SYSCALL(__NR_faccessat, sys_faccessat) - #define __NR_pselect6 270 --__SYSCALL(__NR_pselect6, sys_ni_syscall) /* for now */ -+__SYSCALL(__NR_pselect6, sys_pselect6) - #define __NR_ppoll 271 --__SYSCALL(__NR_ppoll, sys_ni_syscall) /* for now */ -+__SYSCALL(__NR_ppoll, sys_ppoll) - #define __NR_unshare 272 - __SYSCALL(__NR_unshare, sys_unshare) - #define __NR_set_robust_list 273 diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/linux-2.6.19-rc1-kexec-move_segment_code-i386.patch --- a/patches/linux-2.6.18/linux-2.6.19-rc1-kexec-move_segment_code-i386.patch Thu Jan 17 14:35:38 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,151 +0,0 @@ -diff -pruN ../orig-linux-2.6.18/arch/i386/kernel/machine_kexec.c ./arch/i386/kernel/machine_kexec.c ---- ../orig-linux-2.6.18/arch/i386/kernel/machine_kexec.c 2007-01-12 16:03:23.000000000 +0000 -+++ ./arch/i386/kernel/machine_kexec.c 2007-01-12 16:03:37.000000000 +0000 -@@ -28,48 +28,6 @@ static u32 kexec_pmd1[1024] PAGE_ALIGNED - static u32 kexec_pte0[1024] PAGE_ALIGNED; - static u32 kexec_pte1[1024] PAGE_ALIGNED; - --static void set_idt(void *newidt, __u16 limit) --{ -- struct Xgt_desc_struct curidt; -- -- /* ia32 supports unaliged loads & stores */ -- curidt.size = limit; -- curidt.address = (unsigned long)newidt; -- -- load_idt(&curidt); --}; -- -- --static void set_gdt(void *newgdt, __u16 limit) --{ -- struct Xgt_desc_struct curgdt; -- -- /* ia32 supports unaligned loads & stores */ -- curgdt.size = limit; -- curgdt.address = (unsigned long)newgdt; -- -- load_gdt(&curgdt); --}; -- --static void load_segments(void) --{ --#define __STR(X) #X --#define STR(X) __STR(X) -- -- __asm__ __volatile__ ( -- "\tljmp $"STR(__KERNEL_CS)",$1f\n" -- "\t1:\n" -- "\tmovl $"STR(__KERNEL_DS)",%%eax\n" -- "\tmovl %%eax,%%ds\n" -- "\tmovl %%eax,%%es\n" -- "\tmovl %%eax,%%fs\n" -- "\tmovl %%eax,%%gs\n" -- "\tmovl %%eax,%%ss\n" -- ::: "eax", "memory"); --#undef STR --#undef __STR --} -- - /* - * A architecture hook called to validate the - * proposed image and prepare the control pages -@@ -126,23 +84,6 @@ NORET_TYPE void machine_kexec(struct kim - page_list[PA_PTE_1] = __pa(kexec_pte1); - page_list[VA_PTE_1] = (unsigned long)kexec_pte1; - -- /* The segment registers are funny things, they have both a -- * visible and an invisible part. Whenever the visible part is -- * set to a specific selector, the invisible part is loaded -- * with from a table in memory. At no other time is the -- * descriptor table in memory accessed. -- * -- * I take advantage of this here by force loading the -- * segments, before I zap the gdt with an invalid value. -- */ -- load_segments(); -- /* The gdt & idt are now invalid. -- * If you want to load them you must set up your own idt & gdt. -- */ -- set_gdt(phys_to_virt(0),0); -- set_idt(phys_to_virt(0),0); -- -- /* now call it */ - relocate_kernel((unsigned long)image->head, (unsigned long)page_list, - image->start, cpu_has_pae); - } -diff -pruN ../orig-linux-2.6.18/arch/i386/kernel/relocate_kernel.S ./arch/i386/kernel/relocate_kernel.S ---- ../orig-linux-2.6.18/arch/i386/kernel/relocate_kernel.S 2007-01-12 16:03:23.000000000 +0000 -+++ ./arch/i386/kernel/relocate_kernel.S 2007-01-12 16:03:37.000000000 +0000 -@@ -154,14 +154,45 @@ relocate_new_kernel: - movl PTR(PA_PGD)(%ebp), %eax - movl %eax, %cr3 - -+ /* setup idt */ -+ movl %edi, %eax -+ addl $(idt_48 - relocate_kernel), %eax -+ lidtl (%eax) -+ -+ /* setup gdt */ -+ movl %edi, %eax -+ addl $(gdt - relocate_kernel), %eax -+ movl %edi, %esi -+ addl $((gdt_48 - relocate_kernel) + 2), %esi -+ movl %eax, (%esi) -+ -+ movl %edi, %eax -+ addl $(gdt_48 - relocate_kernel), %eax -+ lgdtl (%eax) -+ -+ /* setup data segment registers */ -+ mov $(gdt_ds - gdt), %eax -+ mov %eax, %ds -+ mov %eax, %es -+ mov %eax, %fs -+ mov %eax, %gs -+ mov %eax, %ss -+ - /* setup a new stack at the end of the physical control page */ - lea 4096(%edi), %esp - -- /* jump to identity mapped page */ -- movl %edi, %eax -- addl $(identity_mapped - relocate_kernel), %eax -- pushl %eax -- ret -+ /* load new code segment and jump to identity mapped page */ -+ movl %edi, %esi -+ xorl %eax, %eax -+ pushl %eax -+ pushl %esi -+ pushl %eax -+ movl $(gdt_cs - gdt), %eax -+ pushl %eax -+ movl %edi, %eax -+ addl $(identity_mapped - relocate_kernel),%eax -+ pushl %eax -+ iretl - - identity_mapped: - /* store the start address on the stack */ -@@ -250,3 +281,20 @@ identity_mapped: - xorl %edi, %edi - xorl %ebp, %ebp - ret -+ -+ .align 16 -+gdt: -+ .quad 0x0000000000000000 /* NULL descriptor */ -+gdt_cs: -+ .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ -+gdt_ds: -+ .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ -+gdt_end: -+ -+gdt_48: -+ .word gdt_end - gdt - 1 /* limit */ -+ .long 0 /* base - filled in by code above */ -+ -+idt_48: -+ .word 0 /* limit */ -+ .long 0 /* base */ diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch --- a/patches/linux-2.6.18/linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch Thu Jan 17 14:35:38 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,143 +0,0 @@ -diff -pruN ../orig-linux-2.6.18/arch/x86_64/kernel/machine_kexec.c ./arch/x86_64/kernel/machine_kexec.c ---- ../orig-linux-2.6.18/arch/x86_64/kernel/machine_kexec.c 2007-01-12 16:03:49.000000000 +0000 -+++ ./arch/x86_64/kernel/machine_kexec.c 2007-01-12 16:04:02.000000000 +0000 -@@ -112,47 +112,6 @@ static int init_pgtable(struct kimage *i - return init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT); - } - --static void set_idt(void *newidt, u16 limit) --{ -- struct desc_ptr curidt; -- -- /* x86-64 supports unaliged loads & stores */ -- curidt.size = limit; -- curidt.address = (unsigned long)newidt; -- -- __asm__ __volatile__ ( -- "lidtq %0\n" -- : : "m" (curidt) -- ); --}; -- -- --static void set_gdt(void *newgdt, u16 limit) --{ -- struct desc_ptr curgdt; -- -- /* x86-64 supports unaligned loads & stores */ -- curgdt.size = limit; -- curgdt.address = (unsigned long)newgdt; -- -- __asm__ __volatile__ ( -- "lgdtq %0\n" -- : : "m" (curgdt) -- ); --}; -- --static void load_segments(void) --{ -- __asm__ __volatile__ ( -- "\tmovl %0,%%ds\n" -- "\tmovl %0,%%es\n" -- "\tmovl %0,%%ss\n" -- "\tmovl %0,%%fs\n" -- "\tmovl %0,%%gs\n" -- : : "a" (__KERNEL_DS) : "memory" -- ); --} -- - int machine_kexec_prepare(struct kimage *image) - { - unsigned long start_pgtable; -@@ -209,23 +168,6 @@ NORET_TYPE void machine_kexec(struct kim - page_list[PA_TABLE_PAGE] = - (unsigned long)__pa(page_address(image->control_code_page)); - -- /* The segment registers are funny things, they have both a -- * visible and an invisible part. Whenever the visible part is -- * set to a specific selector, the invisible part is loaded -- * with from a table in memory. At no other time is the -- * descriptor table in memory accessed. -- * -- * I take advantage of this here by force loading the -- * segments, before I zap the gdt with an invalid value. -- */ -- load_segments(); -- /* The gdt & idt are now invalid. -- * If you want to load them you must set up your own idt & gdt. -- */ -- set_gdt(phys_to_virt(0),0); -- set_idt(phys_to_virt(0),0); -- -- /* now call it */ - relocate_kernel((unsigned long)image->head, (unsigned long)page_list, - image->start); - } -diff -pruN ../orig-linux-2.6.18/arch/x86_64/kernel/relocate_kernel.S ./arch/x86_64/kernel/relocate_kernel.S ---- ../orig-linux-2.6.18/arch/x86_64/kernel/relocate_kernel.S 2007-01-12 16:03:49.000000000 +0000 -+++ ./arch/x86_64/kernel/relocate_kernel.S 2007-01-12 16:04:02.000000000 +0000 -@@ -159,13 +159,39 @@ relocate_new_kernel: - movq PTR(PA_PGD)(%rsi), %r9 - movq %r9, %cr3 - -+ /* setup idt */ -+ movq %r8, %rax -+ addq $(idt_80 - relocate_kernel), %rax -+ lidtq (%rax) -+ -+ /* setup gdt */ -+ movq %r8, %rax -+ addq $(gdt - relocate_kernel), %rax -+ movq %r8, %r9 -+ addq $((gdt_80 - relocate_kernel) + 2), %r9 -+ movq %rax, (%r9) -+ -+ movq %r8, %rax -+ addq $(gdt_80 - relocate_kernel), %rax -+ lgdtq (%rax) -+ -+ /* setup data segment registers */ -+ xorl %eax, %eax -+ movl %eax, %ds -+ movl %eax, %es -+ movl %eax, %fs -+ movl %eax, %gs -+ movl %eax, %ss -+ - /* setup a new stack at the end of the physical control page */ - lea 4096(%r8), %rsp - -- /* jump to identity mapped page */ -- addq $(identity_mapped - relocate_kernel), %r8 -- pushq %r8 -- ret -+ /* load new code segment and jump to identity mapped page */ -+ movq %r8, %rax -+ addq $(identity_mapped - relocate_kernel), %rax -+ pushq $(gdt_cs - gdt) -+ pushq %rax -+ lretq - - identity_mapped: - /* store the start address on the stack */ -@@ -272,5 +298,19 @@ identity_mapped: - xorq %r13, %r13 - xorq %r14, %r14 - xorq %r15, %r15 -- - ret -+ -+ .align 16 -+gdt: -+ .quad 0x0000000000000000 /* NULL descriptor */ -+gdt_cs: -+ .quad 0x00af9a000000ffff -+gdt_end: -+ -+gdt_80: -+ .word gdt_end - gdt - 1 /* limit */ -+ .quad 0 /* base - filled in by code above */ -+ -+idt_80: -+ .word 0 /* limit */ -+ .quad 0 /* base */ diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/net-csum.patch --- a/patches/linux-2.6.18/net-csum.patch Thu Jan 17 14:35:38 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,63 +0,0 @@ -diff -pruN ../orig-linux-2.6.18/net/ipv4/netfilter/ip_nat_proto_tcp.c ./net/ipv4/netfilter/ip_nat_proto_tcp.c ---- ../orig-linux-2.6.18/net/ipv4/netfilter/ip_nat_proto_tcp.c 2006-09-20 04:42:06.000000000 +0100 -+++ ./net/ipv4/netfilter/ip_nat_proto_tcp.c 2007-01-12 16:08:53.000000000 +0000 -@@ -129,7 +129,12 @@ tcp_manip_pkt(struct sk_buff **pskb, - if (hdrsize < sizeof(*hdr)) - return 1; - -- hdr->check = ip_nat_cheat_check(~oldip, newip, -+#ifdef CONFIG_XEN -+ if ((*pskb)->proto_csum_blank) -+ hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check); -+ else -+#endif -+ hdr->check = ip_nat_cheat_check(~oldip, newip, - ip_nat_cheat_check(oldport ^ 0xFFFF, - newport, - hdr->check)); -diff -pruN ../orig-linux-2.6.18/net/ipv4/netfilter/ip_nat_proto_udp.c ./net/ipv4/netfilter/ip_nat_proto_udp.c ---- ../orig-linux-2.6.18/net/ipv4/netfilter/ip_nat_proto_udp.c 2006-09-20 04:42:06.000000000 +0100 -+++ ./net/ipv4/netfilter/ip_nat_proto_udp.c 2007-01-12 16:08:53.000000000 +0000 -@@ -113,11 +113,17 @@ udp_manip_pkt(struct sk_buff **pskb, - newport = tuple->dst.u.udp.port; - portptr = &hdr->dest; - } -- if (hdr->check) /* 0 is a special case meaning no checksum */ -- hdr->check = ip_nat_cheat_check(~oldip, newip, -+ if (hdr->check) { /* 0 is a special case meaning no checksum */ -+#ifdef CONFIG_XEN -+ if ((*pskb)->proto_csum_blank) -+ hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check); -+ else -+#endif -+ hdr->check = ip_nat_cheat_check(~oldip, newip, - ip_nat_cheat_check(*portptr ^ 0xFFFF, - newport, - hdr->check)); -+ } - *portptr = newport; - return 1; - } -diff -pruN ../orig-linux-2.6.18/net/ipv4/xfrm4_output.c ./net/ipv4/xfrm4_output.c ---- ../orig-linux-2.6.18/net/ipv4/xfrm4_output.c 2006-09-20 04:42:06.000000000 +0100 -+++ ./net/ipv4/xfrm4_output.c 2007-01-12 17:38:34.000000000 +0000 -@@ -18,6 +18,8 @@ - #include <net/xfrm.h> - #include <net/icmp.h> - -+extern int skb_checksum_setup(struct sk_buff *skb); -+ - static int xfrm4_tunnel_check_size(struct sk_buff *skb) - { - int mtu, ret = 0; -@@ -48,6 +50,10 @@ static int xfrm4_output_one(struct sk_bu - struct xfrm_state *x = dst->xfrm; - int err; - -+ err = skb_checksum_setup(skb); -+ if (err) -+ goto error_nolock; -+ - if (skb->ip_summed == CHECKSUM_HW) { - err = skb_checksum_help(skb, 0); - if (err) diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/net-gso-5-rcv-mss.patch --- a/patches/linux-2.6.18/net-gso-5-rcv-mss.patch Thu Jan 17 14:35:38 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ -diff -pruN ../orig-linux-2.6.18/net/ipv4/tcp_input.c ./net/ipv4/tcp_input.c ---- ../orig-linux-2.6.18/net/ipv4/tcp_input.c 2006-09-20 04:42:06.000000000 +0100 -+++ ./net/ipv4/tcp_input.c 2007-01-12 18:10:16.000000000 +0000 -@@ -127,7 +127,7 @@ static void tcp_measure_rcv_mss(struct s - /* skb->len may jitter because of SACKs, even if peer - * sends good full-sized frames. - */ -- len = skb->len; -+ len = skb_shinfo(skb)->gso_size ?: skb->len; - if (len >= icsk->icsk_ack.rcv_mss) { - icsk->icsk_ack.rcv_mss = len; - } else { diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/net-gso-6-linear-segmentation.patch --- a/patches/linux-2.6.18/net-gso-6-linear-segmentation.patch Thu Jan 17 14:35:38 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,27 +0,0 @@ -diff -pruN ../orig-linux-2.6.18/net/core/skbuff.c ./net/core/skbuff.c ---- ../orig-linux-2.6.18/net/core/skbuff.c 2006-09-20 04:42:06.000000000 +0100 -+++ ./net/core/skbuff.c 2007-01-12 18:10:37.000000000 +0000 -@@ -1945,7 +1945,7 @@ struct sk_buff *skb_segment(struct sk_bu - do { - struct sk_buff *nskb; - skb_frag_t *frag; -- int hsize, nsize; -+ int hsize; - int k; - int size; - -@@ -1956,11 +1956,10 @@ struct sk_buff *skb_segment(struct sk_bu - hsize = skb_headlen(skb) - offset; - if (hsize < 0) - hsize = 0; -- nsize = hsize + doffset; -- if (nsize > len + doffset || !sg) -- nsize = len + doffset; -+ if (hsize > len || !sg) -+ hsize = len; - -- nskb = alloc_skb(nsize + headroom, GFP_ATOMIC); -+ nskb = alloc_skb(hsize + doffset + headroom, GFP_ATOMIC); - if (unlikely(!nskb)) - goto err; - diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/pmd-shared.patch --- a/patches/linux-2.6.18/pmd-shared.patch Thu Jan 17 14:35:38 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,111 +0,0 @@ -diff -pruN ../orig-linux-2.6.18/arch/i386/mm/pageattr.c ./arch/i386/mm/pageattr.c ---- ../orig-linux-2.6.18/arch/i386/mm/pageattr.c 2006-09-20 04:42:06.000000000 +0100 -+++ ./arch/i386/mm/pageattr.c 2007-01-12 18:11:06.000000000 +0000 -@@ -84,7 +84,7 @@ static void set_pmd_pte(pte_t *kpte, uns - unsigned long flags; - - set_pte_atomic(kpte, pte); /* change init_mm */ -- if (PTRS_PER_PMD > 1) -+ if (HAVE_SHARED_KERNEL_PMD) - return; - - spin_lock_irqsave(&pgd_lock, flags); -diff -pruN ../orig-linux-2.6.18/arch/i386/mm/pgtable.c ./arch/i386/mm/pgtable.c ---- ../orig-linux-2.6.18/arch/i386/mm/pgtable.c 2006-09-20 04:42:06.000000000 +0100 -+++ ./arch/i386/mm/pgtable.c 2007-01-12 18:11:06.000000000 +0000 -@@ -214,9 +214,10 @@ void pgd_ctor(void *pgd, kmem_cache_t *c - spin_lock_irqsave(&pgd_lock, flags); - } - -- clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, -- swapper_pg_dir + USER_PTRS_PER_PGD, -- KERNEL_PGD_PTRS); -+ if (PTRS_PER_PMD == 1 || HAVE_SHARED_KERNEL_PMD) -+ clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, -+ swapper_pg_dir + USER_PTRS_PER_PGD, -+ KERNEL_PGD_PTRS); - if (PTRS_PER_PMD > 1) - return; - -@@ -248,6 +249,30 @@ pgd_t *pgd_alloc(struct mm_struct *mm) - goto out_oom; - set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); - } -+ -+ if (!HAVE_SHARED_KERNEL_PMD) { -+ unsigned long flags; -+ -+ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { -+ pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); -+ if (!pmd) -+ goto out_oom; -+ set_pgd(&pgd[USER_PTRS_PER_PGD], __pgd(1 + __pa(pmd))); -+ } -+ -+ spin_lock_irqsave(&pgd_lock, flags); -+ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { -+ unsigned long v = (unsigned long)i << PGDIR_SHIFT; -+ pgd_t *kpgd = pgd_offset_k(v); -+ pud_t *kpud = pud_offset(kpgd, v); -+ pmd_t *kpmd = pmd_offset(kpud, v); -+ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); -+ memcpy(pmd, kpmd, PAGE_SIZE); -+ } -+ pgd_list_add(pgd); -+ spin_unlock_irqrestore(&pgd_lock, flags); -+ } -+ - return pgd; - - out_oom: -@@ -262,9 +287,23 @@ void pgd_free(pgd_t *pgd) - int i; - - /* in the PAE case user pgd entries are overwritten before usage */ -- if (PTRS_PER_PMD > 1) -- for (i = 0; i < USER_PTRS_PER_PGD; ++i) -- kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); -+ if (PTRS_PER_PMD > 1) { -+ for (i = 0; i < USER_PTRS_PER_PGD; ++i) { -+ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); -+ kmem_cache_free(pmd_cache, pmd); -+ } -+ if (!HAVE_SHARED_KERNEL_PMD) { -+ unsigned long flags; -+ spin_lock_irqsave(&pgd_lock, flags); -+ pgd_list_del(pgd); -+ spin_unlock_irqrestore(&pgd_lock, flags); -+ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { -+ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); -+ memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); -+ kmem_cache_free(pmd_cache, pmd); -+ } -+ } -+ } - /* in the non-PAE case, free_pgtables() clears user pgd entries */ - kmem_cache_free(pgd_cache, pgd); - } -diff -pruN ../orig-linux-2.6.18/include/asm-i386/pgtable-2level-defs.h ./include/asm-i386/pgtable-2level-defs.h ---- ../orig-linux-2.6.18/include/asm-i386/pgtable-2level-defs.h 2006-09-20 04:42:06.000000000 +0100 -+++ ./include/asm-i386/pgtable-2level-defs.h 2007-01-12 18:11:06.000000000 +0000 -@@ -1,6 +1,8 @@ - #ifndef _I386_PGTABLE_2LEVEL_DEFS_H - #define _I386_PGTABLE_2LEVEL_DEFS_H - -+#define HAVE_SHARED_KERNEL_PMD 0 -+ - /* - * traditional i386 two-level paging structure: - */ -diff -pruN ../orig-linux-2.6.18/include/asm-i386/pgtable-3level-defs.h ./include/asm-i386/pgtable-3level-defs.h ---- ../orig-linux-2.6.18/include/asm-i386/pgtable-3level-defs.h 2006-09-20 04:42:06.000000000 +0100 -+++ ./include/asm-i386/pgtable-3level-defs.h 2007-01-12 18:11:06.000000000 +0000 -@@ -1,6 +1,8 @@ - #ifndef _I386_PGTABLE_3LEVEL_DEFS_H - #define _I386_PGTABLE_3LEVEL_DEFS_H - -+#define HAVE_SHARED_KERNEL_PMD 1 -+ - /* - * PGDIR_SHIFT determines what a top-level page table entry can map - */ diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch --- a/patches/linux-2.6.18/rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch Thu Jan 17 14:35:38 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,30 +0,0 @@ -diff -pruN ../orig-linux-2.6.18/arch/i386/kernel/entry.S ./arch/i386/kernel/entry.S ---- ../orig-linux-2.6.18/arch/i386/kernel/entry.S 2006-09-20 04:42:06.000000000 +0100 -+++ ./arch/i386/kernel/entry.S 2007-01-12 18:12:31.000000000 +0000 -@@ -269,7 +269,7 @@ ENTRY(sysenter_entry) - CFI_STARTPROC simple - CFI_DEF_CFA esp, 0 - CFI_REGISTER esp, ebp -- movl TSS_sysenter_esp0(%esp),%esp -+ movl SYSENTER_stack_esp0(%esp),%esp - sysenter_past_esp: - /* - * No need to follow this irqs on/off section: the syscall -@@ -689,7 +689,7 @@ device_not_available_emulate: - * that sets up the real kernel stack. Check here, since we can't - * allow the wrong stack to be used. - * -- * "TSS_sysenter_esp0+12" is because the NMI/debug handler will have -+ * "SYSENTER_stack_esp0+12" is because the NMI/debug handler will have - * already pushed 3 words if it hits on the sysenter instruction: - * eflags, cs and eip. - * -@@ -701,7 +701,7 @@ device_not_available_emulate: - cmpw $__KERNEL_CS,4(%esp); \ - jne ok; \ - label: \ -- movl TSS_sysenter_esp0+offset(%esp),%esp; \ -+ movl SYSENTER_stack_esp0+offset(%esp),%esp; \ - pushfl; \ - pushl $__KERNEL_CS; \ - pushl $sysenter_past_esp diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/series --- a/patches/linux-2.6.18/series Thu Jan 17 14:35:38 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,26 +0,0 @@ -git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch -linux-2.6.19-rc1-kexec-move_segment_code-i386.patch -git-4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f.patch -linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch -blktap-aio-16_03_06.patch -fix-ide-cd-pio-mode.patch -i386-mach-io-check-nmi.patch -net-csum.patch -net-gso-5-rcv-mss.patch -net-gso-6-linear-segmentation.patch -pmd-shared.patch -rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch -xen-hotplug.patch -xenoprof-generic.patch -x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch -x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch -git-dbaab49f92ff6ae6255762a948375e4036cbdbd2.patch -x86-elfnote-as-preprocessor-macro.patch -fixaddr-top.patch -git-c06cb8b1c4d25e5b4d7a2d7c2462619de1e0dbc4.patch -softlockup-no-idle-hz.patch -allow-i386-crash-kernels-to-handle-x86_64-dumps.patch -allow-i386-crash-kernels-to-handle-x86_64-dumps-fix.patch -git-5ee7737379b1d7f0c977c0f1661fbaf01a8d4721.patch -linux-2.6.18-xen-375-748cd890ea7f -linux-2.6.18-xen-376-353802ec1caf diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/softlockup-no-idle-hz.patch --- a/patches/linux-2.6.18/softlockup-no-idle-hz.patch Thu Jan 17 14:35:38 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,56 +0,0 @@ -diff -pruN ../orig-linux-2.6.18/include/linux/sched.h ./include/linux/sched.h ---- ../orig-linux-2.6.18/include/linux/sched.h 2006-09-20 04:42:06.000000000 +0100 -+++ ./include/linux/sched.h 2007-02-07 01:10:24.000000000 +0000 -@@ -211,10 +211,15 @@ extern void update_process_times(int use - extern void scheduler_tick(void); - - #ifdef CONFIG_DETECT_SOFTLOCKUP -+extern unsigned long softlockup_get_next_event(void); - extern void softlockup_tick(void); - extern void spawn_softlockup_task(void); - extern void touch_softlockup_watchdog(void); - #else -+static inline unsigned long softlockup_get_next_event(void) -+{ -+ return MAX_JIFFY_OFFSET; -+} - static inline void softlockup_tick(void) - { - } -diff -pruN ../orig-linux-2.6.18/kernel/softlockup.c ./kernel/softlockup.c ---- ../orig-linux-2.6.18/kernel/softlockup.c 2006-09-20 04:42:06.000000000 +0100 -+++ ./kernel/softlockup.c 2007-02-07 01:53:22.000000000 +0000 -@@ -40,6 +40,19 @@ void touch_softlockup_watchdog(void) - } - EXPORT_SYMBOL(touch_softlockup_watchdog); - -+unsigned long softlockup_get_next_event(void) -+{ -+ int this_cpu = smp_processor_id(); -+ unsigned long touch_timestamp = per_cpu(touch_timestamp, this_cpu); -+ -+ if (per_cpu(print_timestamp, this_cpu) == touch_timestamp || -+ did_panic || -+ !per_cpu(watchdog_task, this_cpu)) -+ return MAX_JIFFY_OFFSET; -+ -+ return max_t(long, 0, touch_timestamp + HZ - jiffies); -+} -+ - /* - * This callback runs from the timer interrupt, and checks - * whether the watchdog thread has hung or not: -diff -pruN ../orig-linux-2.6.18/kernel/timer.c ./kernel/timer.c ---- ../orig-linux-2.6.18/kernel/timer.c 2006-09-20 04:42:06.000000000 +0100 -+++ ./kernel/timer.c 2007-02-07 01:29:34.000000000 +0000 -@@ -485,7 +485,9 @@ unsigned long next_timer_interrupt(void) - if (hr_expires < 3) - return hr_expires + jiffies; - } -- hr_expires += jiffies; -+ hr_expires = min_t(unsigned long, -+ softlockup_get_next_event(), -+ hr_expires) + jiffies; - - base = __get_cpu_var(tvec_bases); - spin_lock(&base->lock); diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/x86-elfnote-as-preprocessor-macro.patch --- a/patches/linux-2.6.18/x86-elfnote-as-preprocessor-macro.patch Thu Jan 17 14:35:38 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,56 +0,0 @@ -diff -pruN ../orig-linux-2.6.18/include/linux/elfnote.h ./include/linux/elfnote.h ---- ../orig-linux-2.6.18/include/linux/elfnote.h 2007-01-12 18:19:44.000000000 +0000 -+++ ./include/linux/elfnote.h 2007-01-12 18:21:02.000000000 +0000 -@@ -31,22 +31,38 @@ - /* - * Generate a structure with the same shape as Elf{32,64}_Nhdr (which - * turn out to be the same size and shape), followed by the name and -- * desc data with appropriate padding. The 'desc' argument includes -- * the assembler pseudo op defining the type of the data: .asciz -- * "hello, world" -+ * desc data with appropriate padding. The 'desctype' argument is the -+ * assembler pseudo op defining the type of the data e.g. .asciz while -+ * 'descdata' is the data itself e.g. "hello, world". -+ * -+ * e.g. ELFNOTE(XYZCo, 42, .asciz, "forty-two") -+ * ELFNOTE(XYZCo, 12, .long, 0xdeadbeef) - */ --.macro ELFNOTE name type desc:vararg --.pushsection ".note.\name" -- .align 4 -- .long 2f - 1f /* namesz */ -- .long 4f - 3f /* descsz */ -- .long \type --1:.asciz "\name" --2:.align 4 --3:\desc --4:.align 4 -+#ifdef __STDC__ -+#define ELFNOTE(name, type, desctype, descdata...) \ -+.pushsection .note.name ; \ -+ .align 4 ; \ -+ .long 2f - 1f /* namesz */ ; \ -+ .long 4f - 3f /* descsz */ ; \ -+ .long type ; \ -+1:.asciz #name ; \ -+2:.align 4 ; \ -+3:desctype descdata ; \ -+4:.align 4 ; \ - .popsection --.endm -+#else /* !__STDC__, i.e. -traditional */ -+#define ELFNOTE(name, type, desctype, descdata) \ -+.pushsection .note.name ; \ -+ .align 4 ; \ -+ .long 2f - 1f /* namesz */ ; \ -+ .long 4f - 3f /* descsz */ ; \ -+ .long type ; \ -+1:.asciz "name" ; \ -+2:.align 4 ; \ -+3:desctype descdata ; \ -+4:.align 4 ; \ -+.popsection -+#endif /* __STDC__ */ - #else /* !__ASSEMBLER__ */ - #include <linux/elf.h> - /* diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch --- a/patches/linux-2.6.18/x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch Thu Jan 17 14:35:38 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,143 +0,0 @@ -diff -pruN ../orig-linux-2.6.18/arch/i386/kernel/vmlinux.lds.S ./arch/i386/kernel/vmlinux.lds.S ---- ../orig-linux-2.6.18/arch/i386/kernel/vmlinux.lds.S 2006-09-20 04:42:06.000000000 +0100 -+++ ./arch/i386/kernel/vmlinux.lds.S 2007-01-12 18:19:44.000000000 +0000 -@@ -13,6 +13,12 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386" - OUTPUT_ARCH(i386) - ENTRY(phys_startup_32) - jiffies = jiffies_64; -+ -+PHDRS { -+ text PT_LOAD FLAGS(5); /* R_E */ -+ data PT_LOAD FLAGS(7); /* RWE */ -+ note PT_NOTE FLAGS(4); /* R__ */ -+} - SECTIONS - { - . = __KERNEL_START; -@@ -26,7 +32,7 @@ SECTIONS - KPROBES_TEXT - *(.fixup) - *(.gnu.warning) -- } = 0x9090 -+ } :text = 0x9090 - - _etext = .; /* End of text section */ - -@@ -48,7 +54,7 @@ SECTIONS - .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ - *(.data) - CONSTRUCTORS -- } -+ } :data - - . = ALIGN(4096); - __nosave_begin = .; -@@ -184,4 +190,6 @@ SECTIONS - STABS_DEBUG - - DWARF_DEBUG -+ -+ NOTES - } -diff -pruN ../orig-linux-2.6.18/include/asm-generic/vmlinux.lds.h ./include/asm-generic/vmlinux.lds.h ---- ../orig-linux-2.6.18/include/asm-generic/vmlinux.lds.h 2006-09-20 04:42:06.000000000 +0100 -+++ ./include/asm-generic/vmlinux.lds.h 2007-01-12 18:19:44.000000000 +0000 -@@ -194,3 +194,6 @@ - .stab.index 0 : { *(.stab.index) } \ - .stab.indexstr 0 : { *(.stab.indexstr) } \ - .comment 0 : { *(.comment) } -+ -+#define NOTES \ -+ .notes : { *(.note.*) } :note -diff -pruN ../orig-linux-2.6.18/include/linux/elfnote.h ./include/linux/elfnote.h ---- ../orig-linux-2.6.18/include/linux/elfnote.h 1970-01-01 01:00:00.000000000 +0100 -+++ ./include/linux/elfnote.h 2007-01-12 18:19:44.000000000 +0000 -@@ -0,0 +1,88 @@ -+#ifndef _LINUX_ELFNOTE_H -+#define _LINUX_ELFNOTE_H -+/* -+ * Helper macros to generate ELF Note structures, which are put into a -+ * PT_NOTE segment of the final vmlinux image. These are useful for -+ * including name-value pairs of metadata into the kernel binary (or -+ * modules?) for use by external programs. -+ * -+ * Each note has three parts: a name, a type and a desc. The name is -+ * intended to distinguish the note's originator, so it would be a -+ * company, project, subsystem, etc; it must be in a suitable form for -+ * use in a section name. The type is an integer which is used to tag -+ * the data, and is considered to be within the "name" namespace (so -+ * "FooCo"'s type 42 is distinct from "BarProj"'s type 42). The -+ * "desc" field is the actual data. There are no constraints on the -+ * desc field's contents, though typically they're fairly small. -+ * -+ * All notes from a given NAME are put into a section named -+ * .note.NAME. When the kernel image is finally linked, all the notes -+ * are packed into a single .notes section, which is mapped into the -+ * PT_NOTE segment. Because notes for a given name are grouped into -+ * the same section, they'll all be adjacent the output file. -+ * -+ * This file defines macros for both C and assembler use. Their -+ * syntax is slightly different, but they're semantically similar. -+ * -+ * See the ELF specification for more detail about ELF notes. -+ */ -+ -+#ifdef __ASSEMBLER__ -+/* -+ * Generate a structure with the same shape as Elf{32,64}_Nhdr (which -+ * turn out to be the same size and shape), followed by the name and -+ * desc data with appropriate padding. The 'desc' argument includes -+ * the assembler pseudo op defining the type of the data: .asciz -+ * "hello, world" -+ */ -+.macro ELFNOTE name type desc:vararg -+.pushsection ".note.\name" -+ .align 4 -+ .long 2f - 1f /* namesz */ -+ .long 4f - 3f /* descsz */ -+ .long \type -+1:.asciz "\name" -+2:.align 4 -+3:\desc -+4:.align 4 -+.popsection -+.endm -+#else /* !__ASSEMBLER__ */ -+#include <linux/elf.h> -+/* -+ * Use an anonymous structure which matches the shape of -+ * Elf{32,64}_Nhdr, but includes the name and desc data. The size and -+ * type of name and desc depend on the macro arguments. "name" must -+ * be a literal string, and "desc" must be passed by value. You may -+ * only define one note per line, since __LINE__ is used to generate -+ * unique symbols. -+ */ -+#define _ELFNOTE_PASTE(a,b) a##b -+#define _ELFNOTE(size, name, unique, type, desc) \ -+ static const struct { \ -+ struct elf##size##_note _nhdr; \ -+ unsigned char _name[sizeof(name)] \ -+ __attribute__((aligned(sizeof(Elf##size##_Word)))); \ -+ typeof(desc) _desc \ -+ __attribute__((aligned(sizeof(Elf##size##_Word)))); \ -+ } _ELFNOTE_PASTE(_note_, unique) \ -+ __attribute_used__ \ -+ __attribute__((section(".note." name), \ -+ aligned(sizeof(Elf##size##_Word)), \ -+ unused)) = { \ -+ { \ -+ sizeof(name), \ -+ sizeof(desc), \ -+ type, \ -+ }, \ -+ name, \ -+ desc \ -+ } -+#define ELFNOTE(size, name, type, desc) \ -+ _ELFNOTE(size, name, __LINE__, type, desc) -+ -+#define ELFNOTE32(name, type, desc) ELFNOTE(32, name, type, desc) -+#define ELFNOTE64(name, type, desc) ELFNOTE(64, name, type, desc) -+#endif /* __ASSEMBLER__ */ -+ -+#endif /* _LINUX_ELFNOTE_H */ diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch --- a/patches/linux-2.6.18/x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch Thu Jan 17 14:35:38 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,84 +0,0 @@ -diff -pruN ../orig-linux-2.6.18/arch/x86_64/kernel/vmlinux.lds.S ./arch/x86_64/kernel/vmlinux.lds.S ---- ../orig-linux-2.6.18/arch/x86_64/kernel/vmlinux.lds.S 2006-09-20 04:42:06.000000000 +0100 -+++ ./arch/x86_64/kernel/vmlinux.lds.S 2007-01-12 18:20:02.000000000 +0000 -@@ -13,6 +13,12 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86 - OUTPUT_ARCH(i386:x86-64) - ENTRY(phys_startup_64) - jiffies_64 = jiffies; -+PHDRS { -+ text PT_LOAD FLAGS(5); /* R_E */ -+ data PT_LOAD FLAGS(7); /* RWE */ -+ user PT_LOAD FLAGS(7); /* RWE */ -+ note PT_NOTE FLAGS(4); /* R__ */ -+} - SECTIONS - { - . = __START_KERNEL; -@@ -31,7 +37,7 @@ SECTIONS - KPROBES_TEXT - *(.fixup) - *(.gnu.warning) -- } = 0x9090 -+ } :text = 0x9090 - /* out-of-line lock text */ - .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET) { *(.text.lock) } - -@@ -57,17 +63,10 @@ SECTIONS - .data : AT(ADDR(.data) - LOAD_OFFSET) { - *(.data) - CONSTRUCTORS -- } -+ } :data - - _edata = .; /* End of data section */ - -- __bss_start = .; /* BSS */ -- .bss : AT(ADDR(.bss) - LOAD_OFFSET) { -- *(.bss.page_aligned) -- *(.bss) -- } -- __bss_stop = .; -- - . = ALIGN(PAGE_SIZE); - . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); - .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { -@@ -89,7 +88,7 @@ SECTIONS - #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) - - . = VSYSCALL_ADDR; -- .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) } -+ .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) } :user - __vsyscall_0 = VSYSCALL_VIRT_ADDR; - - . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); -@@ -132,7 +131,7 @@ SECTIONS - . = ALIGN(8192); /* init_task */ - .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { - *(.data.init_task) -- } -+ } :data - - . = ALIGN(4096); - .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { -@@ -222,6 +221,14 @@ SECTIONS - . = ALIGN(4096); - __nosave_end = .; - -+ __bss_start = .; /* BSS */ -+ . = ALIGN(4096); -+ .bss : AT(ADDR(.bss) - LOAD_OFFSET) { -+ *(.bss.page_aligned) -+ *(.bss) -+ } -+ __bss_stop = .; -+ - _end = . ; - - /* Sections to be discarded */ -@@ -235,4 +242,6 @@ SECTIONS - STABS_DEBUG - - DWARF_DEBUG -+ -+ NOTES - } diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/xen-hotplug.patch --- a/patches/linux-2.6.18/xen-hotplug.patch Thu Jan 17 14:35:38 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ -diff -pruN ../orig-linux-2.6.18/fs/proc/proc_misc.c ./fs/proc/proc_misc.c ---- ../orig-linux-2.6.18/fs/proc/proc_misc.c 2006-09-20 04:42:06.000000000 +0100 -+++ ./fs/proc/proc_misc.c 2007-01-12 18:18:36.000000000 +0000 -@@ -471,7 +471,7 @@ static int show_stat(struct seq_file *p, - (unsigned long long)cputime64_to_clock_t(irq), - (unsigned long long)cputime64_to_clock_t(softirq), - (unsigned long long)cputime64_to_clock_t(steal)); -- for_each_online_cpu(i) { -+ for_each_possible_cpu(i) { - - /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ - user = kstat_cpu(i).cpustat.user; diff -r c9b32b389e62 -r b17dfd182f7c patches/linux-2.6.18/xenoprof-generic.patch --- a/patches/linux-2.6.18/xenoprof-generic.patch Thu Jan 17 14:35:38 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,662 +0,0 @@ -diff -pruN ../orig-linux-2.6.18/drivers/oprofile/buffer_sync.c ./drivers/oprofile/buffer_sync.c ---- ../orig-linux-2.6.18/drivers/oprofile/buffer_sync.c 2006-09-20 04:42:06.000000000 +0100 -+++ ./drivers/oprofile/buffer_sync.c 2007-01-12 18:19:28.000000000 +0000 -@@ -6,6 +6,10 @@ - * - * @author John Levon <levon@xxxxxxxxxxxxxxxxx> - * -+ * Modified by Aravind Menon for Xen -+ * These modifications are: -+ * Copyright (C) 2005 Hewlett-Packard Co. -+ * - * This is the core of the buffer management. Each - * CPU buffer is processed and entered into the - * global event buffer. Such processing is necessary -@@ -38,6 +42,7 @@ static cpumask_t marked_cpus = CPU_MASK_ - static DEFINE_SPINLOCK(task_mortuary); - static void process_task_mortuary(void); - -+static int cpu_current_domain[NR_CPUS]; - - /* Take ownership of the task struct and place it on the - * list for processing. Only after two full buffer syncs -@@ -146,6 +151,11 @@ static void end_sync(void) - int sync_start(void) - { - int err; -+ int i; -+ -+ for (i = 0; i < NR_CPUS; i++) { -+ cpu_current_domain[i] = COORDINATOR_DOMAIN; -+ } - - start_cpu_work(); - -@@ -275,15 +285,31 @@ static void add_cpu_switch(int i) - last_cookie = INVALID_COOKIE; - } - --static void add_kernel_ctx_switch(unsigned int in_kernel) -+static void add_cpu_mode_switch(unsigned int cpu_mode) - { - add_event_entry(ESCAPE_CODE); -- if (in_kernel) -- add_event_entry(KERNEL_ENTER_SWITCH_CODE); -- else -- add_event_entry(KERNEL_EXIT_SWITCH_CODE); -+ switch (cpu_mode) { -+ case CPU_MODE_USER: -+ add_event_entry(USER_ENTER_SWITCH_CODE); -+ break; -+ case CPU_MODE_KERNEL: -+ add_event_entry(KERNEL_ENTER_SWITCH_CODE); -+ break; -+ case CPU_MODE_XEN: -+ add_event_entry(XEN_ENTER_SWITCH_CODE); -+ break; -+ default: -+ break; -+ } - } -- -+ -+static void add_domain_switch(unsigned long domain_id) -+{ -+ add_event_entry(ESCAPE_CODE); -+ add_event_entry(DOMAIN_SWITCH_CODE); -+ add_event_entry(domain_id); -+} -+ - static void - add_user_ctx_switch(struct task_struct const * task, unsigned long cookie) - { -@@ -348,9 +374,9 @@ static int add_us_sample(struct mm_struc - * for later lookup from userspace. - */ - static int --add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel) -+add_sample(struct mm_struct * mm, struct op_sample * s, int cpu_mode) - { -- if (in_kernel) { -+ if (cpu_mode >= CPU_MODE_KERNEL) { - add_sample_entry(s->eip, s->event); - return 1; - } else if (mm) { -@@ -496,15 +522,21 @@ void sync_buffer(int cpu) - struct mm_struct *mm = NULL; - struct task_struct * new; - unsigned long cookie = 0; -- int in_kernel = 1; -+ int cpu_mode = 1; - unsigned int i; - sync_buffer_state state = sb_buffer_start; - unsigned long available; -+ int domain_switch = 0; - - mutex_lock(&buffer_mutex); - - add_cpu_switch(cpu); - -+ /* We need to assign the first samples in this CPU buffer to the -+ same domain that we were processing at the last sync_buffer */ -+ if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) { -+ add_domain_switch(cpu_current_domain[cpu]); -+ } - /* Remember, only we can modify tail_pos */ - - available = get_slots(cpu_buf); -@@ -512,16 +544,18 @@ void sync_buffer(int cpu) - for (i = 0; i < available; ++i) { - struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos]; - -- if (is_code(s->eip)) { -- if (s->event <= CPU_IS_KERNEL) { -- /* kernel/userspace switch */ -- in_kernel = s->event; -+ if (is_code(s->eip) && !domain_switch) { -+ if (s->event <= CPU_MODE_XEN) { -+ /* xen/kernel/userspace switch */ -+ cpu_mode = s->event; - if (state == sb_buffer_start) - state = sb_sample_start; -- add_kernel_ctx_switch(s->event); -+ add_cpu_mode_switch(s->event); - } else if (s->event == CPU_TRACE_BEGIN) { - state = sb_bt_start; - add_trace_begin(); -+ } else if (s->event == CPU_DOMAIN_SWITCH) { -+ domain_switch = 1; - } else { - struct mm_struct * oldmm = mm; - -@@ -535,11 +569,21 @@ void sync_buffer(int cpu) - add_user_ctx_switch(new, cookie); - } - } else { -- if (state >= sb_bt_start && -- !add_sample(mm, s, in_kernel)) { -- if (state == sb_bt_start) { -- state = sb_bt_ignore; -- atomic_inc(&oprofile_stats.bt_lost_no_mapping); -+ if (domain_switch) { -+ cpu_current_domain[cpu] = s->eip; -+ add_domain_switch(s->eip); -+ domain_switch = 0; -+ } else { -+ if (cpu_current_domain[cpu] != -+ COORDINATOR_DOMAIN) { -+ add_sample_entry(s->eip, s->event); -+ } -+ else if (state >= sb_bt_start && -+ !add_sample(mm, s, cpu_mode)) { -+ if (state == sb_bt_start) { -+ state = sb_bt_ignore; -+ atomic_inc(&oprofile_stats.bt_lost_no_mapping); -+ } - } - } - } -@@ -548,6 +592,11 @@ void sync_buffer(int cpu) - } - release_mm(mm); - -+ /* We reset domain to COORDINATOR at each CPU switch */ -+ if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) { -+ add_domain_switch(COORDINATOR_DOMAIN); -+ } -+ - mark_done(cpu); - - mutex_unlock(&buffer_mutex); -diff -pruN ../orig-linux-2.6.18/drivers/oprofile/cpu_buffer.c ./drivers/oprofile/cpu_buffer.c ---- ../orig-linux-2.6.18/drivers/oprofile/cpu_buffer.c 2006-09-20 04:42:06.000000000 +0100 -+++ ./drivers/oprofile/cpu_buffer.c 2007-01-12 18:18:50.000000000 +0000 -@@ -6,6 +6,10 @@ - * - * @author John Levon <levon@xxxxxxxxxxxxxxxxx> - * -+ * Modified by Aravind Menon for Xen -+ * These modifications are: -+ * Copyright (C) 2005 Hewlett-Packard Co. -+ * - * Each CPU has a local buffer that stores PC value/event - * pairs. We also log context switches when we notice them. - * Eventually each CPU's buffer is processed into the global -@@ -34,6 +38,8 @@ static void wq_sync_buffer(void *); - #define DEFAULT_TIMER_EXPIRE (HZ / 10) - static int work_enabled; - -+static int32_t current_domain = COORDINATOR_DOMAIN; -+ - void free_cpu_buffers(void) - { - int i; -@@ -57,7 +63,7 @@ int alloc_cpu_buffers(void) - goto fail; - - b->last_task = NULL; -- b->last_is_kernel = -1; -+ b->last_cpu_mode = -1; - b->tracing = 0; - b->buffer_size = buffer_size; - b->tail_pos = 0; -@@ -113,7 +119,7 @@ void cpu_buffer_reset(struct oprofile_cp - * collected will populate the buffer with proper - * values to initialize the buffer - */ -- cpu_buf->last_is_kernel = -1; -+ cpu_buf->last_cpu_mode = -1; - cpu_buf->last_task = NULL; - } - -@@ -163,13 +169,13 @@ add_code(struct oprofile_cpu_buffer * bu - * because of the head/tail separation of the writer and reader - * of the CPU buffer. - * -- * is_kernel is needed because on some architectures you cannot -+ * cpu_mode is needed because on some architectures you cannot - * tell if you are in kernel or user space simply by looking at -- * pc. We tag this in the buffer by generating kernel enter/exit -- * events whenever is_kernel changes -+ * pc. We tag this in the buffer by generating kernel/user (and xen) -+ * enter events whenever cpu_mode changes - */ - static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc, -- int is_kernel, unsigned long event) -+ int cpu_mode, unsigned long event) - { - struct task_struct * task; - -@@ -180,18 +186,18 @@ static int log_sample(struct oprofile_cp - return 0; - } - -- is_kernel = !!is_kernel; -- - task = current; - - /* notice a switch from user->kernel or vice versa */ -- if (cpu_buf->last_is_kernel != is_kernel) { -- cpu_buf->last_is_kernel = is_kernel; -- add_code(cpu_buf, is_kernel); -+ if (cpu_buf->last_cpu_mode != cpu_mode) { -+ cpu_buf->last_cpu_mode = cpu_mode; -+ add_code(cpu_buf, cpu_mode); - } -- -+ - /* notice a task switch */ -- if (cpu_buf->last_task != task) { -+ /* if not processing other domain samples */ -+ if ((cpu_buf->last_task != task) && -+ (current_domain == COORDINATOR_DOMAIN)) { - cpu_buf->last_task = task; - add_code(cpu_buf, (unsigned long)task); - } -@@ -275,6 +281,25 @@ void oprofile_add_trace(unsigned long pc - add_sample(cpu_buf, pc, 0); - } - -+int oprofile_add_domain_switch(int32_t domain_id) -+{ -+ struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()]; -+ -+ /* should have space for switching into and out of domain -+ (2 slots each) plus one sample and one cpu mode switch */ -+ if (((nr_available_slots(cpu_buf) < 6) && -+ (domain_id != COORDINATOR_DOMAIN)) || -+ (nr_available_slots(cpu_buf) < 2)) -+ return 0; -+ -+ add_code(cpu_buf, CPU_DOMAIN_SWITCH); -+ add_sample(cpu_buf, domain_id, 0); -+ -+ current_domain = domain_id; -+ -+ return 1; -+} -+ - /* - * This serves to avoid cpu buffer overflow, and makes sure - * the task mortuary progresses -diff -pruN ../orig-linux-2.6.18/drivers/oprofile/cpu_buffer.h ./drivers/oprofile/cpu_buffer.h ---- ../orig-linux-2.6.18/drivers/oprofile/cpu_buffer.h 2006-09-20 04:42:06.000000000 +0100 -+++ ./drivers/oprofile/cpu_buffer.h 2007-01-12 18:18:50.000000000 +0000 -@@ -36,7 +36,7 @@ struct oprofile_cpu_buffer { - volatile unsigned long tail_pos; - unsigned long buffer_size; - struct task_struct * last_task; -- int last_is_kernel; -+ int last_cpu_mode; - int tracing; - struct op_sample * buffer; - unsigned long sample_received; -@@ -51,7 +51,10 @@ extern struct oprofile_cpu_buffer cpu_bu - void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf); - - /* transient events for the CPU buffer -> event buffer */ --#define CPU_IS_KERNEL 1 --#define CPU_TRACE_BEGIN 2 -+#define CPU_MODE_USER 0 -+#define CPU_MODE_KERNEL 1 -+#define CPU_MODE_XEN 2 -+#define CPU_TRACE_BEGIN 3 -+#define CPU_DOMAIN_SWITCH 4 - - #endif /* OPROFILE_CPU_BUFFER_H */ -diff -pruN ../orig-linux-2.6.18/drivers/oprofile/event_buffer.h ./drivers/oprofile/event_buffer.h ---- ../orig-linux-2.6.18/drivers/oprofile/event_buffer.h 2006-09-20 04:42:06.000000000 +0100 -+++ ./drivers/oprofile/event_buffer.h 2007-01-12 18:18:50.000000000 +0000 -@@ -29,15 +29,20 @@ void wake_up_buffer_waiter(void); - #define CPU_SWITCH_CODE 2 - #define COOKIE_SWITCH_CODE 3 - #define KERNEL_ENTER_SWITCH_CODE 4 --#define KERNEL_EXIT_SWITCH_CODE 5 -+#define USER_ENTER_SWITCH_CODE 5 - #define MODULE_LOADED_CODE 6 - #define CTX_TGID_CODE 7 - #define TRACE_BEGIN_CODE 8 - #define TRACE_END_CODE 9 -+#define XEN_ENTER_SWITCH_CODE 10 -+#define DOMAIN_SWITCH_CODE 11 - - #define INVALID_COOKIE ~0UL - #define NO_COOKIE 0UL - -+/* Constant used to refer to coordinator domain (Xen) */ -+#define COORDINATOR_DOMAIN -1 -+ - /* add data to the event buffer */ - void add_event_entry(unsigned long data); - -diff -pruN ../orig-linux-2.6.18/drivers/oprofile/oprof.c ./drivers/oprofile/oprof.c ---- ../orig-linux-2.6.18/drivers/oprofile/oprof.c 2006-09-20 04:42:06.000000000 +0100 -+++ ./drivers/oprofile/oprof.c 2007-01-12 18:18:50.000000000 +0000 -@@ -5,6 +5,10 @@ - * @remark Read the file COPYING - * - * @author John Levon <levon@xxxxxxxxxxxxxxxxx> -+ * -+ * Modified by Aravind Menon for Xen -+ * These modifications are: -+ * Copyright (C) 2005 Hewlett-Packard Co. - */ - - #include <linux/kernel.h> -@@ -19,7 +23,7 @@ - #include "cpu_buffer.h" - #include "buffer_sync.h" - #include "oprofile_stats.h" -- -+ - struct oprofile_operations oprofile_ops; - - unsigned long oprofile_started; -@@ -33,6 +37,32 @@ static DEFINE_MUTEX(start_mutex); - */ - static int timer = 0; - -+int oprofile_set_active(int active_domains[], unsigned int adomains) -+{ -+ int err; -+ -+ if (!oprofile_ops.set_active) -+ return -EINVAL; -+ -+ mutex_lock(&start_mutex); -+ err = oprofile_ops.set_active(active_domains, adomains); -+ mutex_unlock(&start_mutex); -+ return err; -+} -+ -+int oprofile_set_passive(int passive_domains[], unsigned int pdomains) -+{ -+ int err; -+ -+ if (!oprofile_ops.set_passive) -+ return -EINVAL; -+ -+ mutex_lock(&start_mutex); -+ err = oprofile_ops.set_passive(passive_domains, pdomains); -+ mutex_unlock(&start_mutex); -+ return err; -+} -+ - int oprofile_setup(void) - { - int err; -diff -pruN ../orig-linux-2.6.18/drivers/oprofile/oprof.h ./drivers/oprofile/oprof.h ---- ../orig-linux-2.6.18/drivers/oprofile/oprof.h 2006-09-20 04:42:06.000000000 +0100 -+++ ./drivers/oprofile/oprof.h 2007-01-12 18:18:50.000000000 +0000 -@@ -35,5 +35,8 @@ void oprofile_create_files(struct super_ - void oprofile_timer_init(struct oprofile_operations * ops); - - int oprofile_set_backtrace(unsigned long depth); -+ -+int oprofile_set_active(int active_domains[], unsigned int adomains); -+int oprofile_set_passive(int passive_domains[], unsigned int pdomains); - - #endif /* OPROF_H */ -diff -pruN ../orig-linux-2.6.18/drivers/oprofile/oprofile_files.c ./drivers/oprofile/oprofile_files.c ---- ../orig-linux-2.6.18/drivers/oprofile/oprofile_files.c 2006-09-20 04:42:06.000000000 +0100 -+++ ./drivers/oprofile/oprofile_files.c 2007-01-12 18:18:50.000000000 +0000 -@@ -5,15 +5,21 @@ - * @remark Read the file COPYING - * - * @author John Levon <levon@xxxxxxxxxxxxxxxxx> -+ * -+ * Modified by Aravind Menon for Xen -+ * These modifications are: -+ * Copyright (C) 2005 Hewlett-Packard Co. - */ - - #include <linux/fs.h> - #include <linux/oprofile.h> -+#include <asm/uaccess.h> -+#include <linux/ctype.h> - - #include "event_buffer.h" - #include "oprofile_stats.h" - #include "oprof.h" -- -+ - unsigned long fs_buffer_size = 131072; - unsigned long fs_cpu_buffer_size = 8192; - unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */ -@@ -117,11 +123,202 @@ static ssize_t dump_write(struct file * - static struct file_operations dump_fops = { - .write = dump_write, - }; -- -+ -+#define TMPBUFSIZE 512 -+ -+static unsigned int adomains = 0; -+static int active_domains[MAX_OPROF_DOMAINS + 1]; -+static DEFINE_MUTEX(adom_mutex); -+ -+static ssize_t adomain_write(struct file * file, char const __user * buf, -+ size_t count, loff_t * offset) -+{ -+ char *tmpbuf; -+ char *startp, *endp; -+ int i; -+ unsigned long val; -+ ssize_t retval = count; -+ -+ if (*offset) -+ return -EINVAL; -+ if (count > TMPBUFSIZE - 1) -+ return -EINVAL; -+ -+ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL))) -+ return -ENOMEM; -+ -+ if (copy_from_user(tmpbuf, buf, count)) { -+ kfree(tmpbuf); -+ return -EFAULT; -+ } -+ tmpbuf[count] = 0; -+ -+ mutex_lock(&adom_mutex); -+ -+ startp = tmpbuf; -+ /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */ -+ for (i = 0; i <= MAX_OPROF_DOMAINS; i++) { -+ val = simple_strtoul(startp, &endp, 0); -+ if (endp == startp) -+ break; -+ while (ispunct(*endp) || isspace(*endp)) -+ endp++; -+ active_domains[i] = val; -+ if (active_domains[i] != val) -+ /* Overflow, force error below */ -+ i = MAX_OPROF_DOMAINS + 1; -+ startp = endp; -+ } -+ /* Force error on trailing junk */ -+ adomains = *startp ? MAX_OPROF_DOMAINS + 1 : i; -+ -+ kfree(tmpbuf); -+ -+ if (adomains > MAX_OPROF_DOMAINS -+ || oprofile_set_active(active_domains, adomains)) { -+ adomains = 0; -+ retval = -EINVAL; -+ } -+ -+ mutex_unlock(&adom_mutex); -+ return retval; -+} -+ -+static ssize_t adomain_read(struct file * file, char __user * buf, -+ size_t count, loff_t * offset) -+{ -+ char * tmpbuf; -+ size_t len; -+ int i; -+ ssize_t retval; -+ -+ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL))) -+ return -ENOMEM; -+ -+ mutex_lock(&adom_mutex); -+ -+ len = 0; -+ for (i = 0; i < adomains; i++) -+ len += snprintf(tmpbuf + len, -+ len < TMPBUFSIZE ? TMPBUFSIZE - len : 0, -+ "%u ", active_domains[i]); -+ WARN_ON(len > TMPBUFSIZE); -+ if (len != 0 && len <= TMPBUFSIZE) -+ tmpbuf[len-1] = '\n'; -+ -+ mutex_unlock(&adom_mutex); -+ -+ retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len); -+ -+ kfree(tmpbuf); -+ return retval; -+} -+ -+ -+static struct file_operations active_domain_ops = { -+ .read = adomain_read, -+ .write = adomain_write, -+}; -+ -+static unsigned int pdomains = 0; -+static int passive_domains[MAX_OPROF_DOMAINS]; -+static DEFINE_MUTEX(pdom_mutex); -+ -+static ssize_t pdomain_write(struct file * file, char const __user * buf, -+ size_t count, loff_t * offset) -+{ -+ char *tmpbuf; -+ char *startp, *endp; -+ int i; -+ unsigned long val; -+ ssize_t retval = count; -+ -+ if (*offset) -+ return -EINVAL; -+ if (count > TMPBUFSIZE - 1) -+ return -EINVAL; -+ -+ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL))) -+ return -ENOMEM; -+ -+ if (copy_from_user(tmpbuf, buf, count)) { -+ kfree(tmpbuf); -+ return -EFAULT; -+ } -+ tmpbuf[count] = 0; -+ -+ mutex_lock(&pdom_mutex); -+ -+ startp = tmpbuf; -+ /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */ -+ for (i = 0; i <= MAX_OPROF_DOMAINS; i++) { -+ val = simple_strtoul(startp, &endp, 0); -+ if (endp == startp) -+ break; -+ while (ispunct(*endp) || isspace(*endp)) -+ endp++; -+ passive_domains[i] = val; -+ if (passive_domains[i] != val) -+ /* Overflow, force error below */ -+ i = MAX_OPROF_DOMAINS + 1; -+ startp = endp; -+ } -+ /* Force error on trailing junk */ -+ pdomains = *startp ? MAX_OPROF_DOMAINS + 1 : i; -+ -+ kfree(tmpbuf); -+ -+ if (pdomains > MAX_OPROF_DOMAINS -+ || oprofile_set_passive(passive_domains, pdomains)) { -+ pdomains = 0; -+ retval = -EINVAL; -+ } -+ -+ mutex_unlock(&pdom_mutex); -+ return retval; -+} -+ -+static ssize_t pdomain_read(struct file * file, char __user * buf, -+ size_t count, loff_t * offset) -+{ -+ char * tmpbuf; -+ size_t len; -+ int i; -+ ssize_t retval; -+ -+ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL))) -+ return -ENOMEM; -+ -+ mutex_lock(&pdom_mutex); -+ -+ len = 0; -+ for (i = 0; i < pdomains; i++) -+ len += snprintf(tmpbuf + len, -+ len < TMPBUFSIZE ? TMPBUFSIZE - len : 0, -+ "%u ", passive_domains[i]); -+ WARN_ON(len > TMPBUFSIZE); -+ if (len != 0 && len <= TMPBUFSIZE) -+ tmpbuf[len-1] = '\n'; -+ -+ mutex_unlock(&pdom_mutex); -+ -+ retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len); -+ -+ kfree(tmpbuf); -+ return retval; -+} -+ -+static struct file_operations passive_domain_ops = { -+ .read = pdomain_read, -+ .write = pdomain_write, -+}; -+ - void oprofile_create_files(struct super_block * sb, struct dentry * root) - { - oprofilefs_create_file(sb, root, "enable", &enable_fops); - oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666); -+ oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops); -+ oprofilefs_create_file(sb, root, "passive_domains", &passive_domain_ops); - oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops); - oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size); - oprofilefs_create_ulong(sb, root, "buffer_watershed", &fs_buffer_watershed); -diff -pruN ../orig-linux-2.6.18/include/linux/oprofile.h ./include/linux/oprofile.h ---- ../orig-linux-2.6.18/include/linux/oprofile.h 2006-09-20 04:42:06.000000000 +0100 -+++ ./include/linux/oprofile.h 2007-01-12 18:18:50.000000000 +0000 -@@ -16,6 +16,8 @@ - #include <linux/types.h> - #include <linux/spinlock.h> - #include <asm/atomic.h> -+ -+#include <xen/interface/xenoprof.h> - - struct super_block; - struct dentry; -@@ -27,6 +29,11 @@ struct oprofile_operations { - /* create any necessary configuration files in the oprofile fs. - * Optional. */ - int (*create_files)(struct super_block * sb, struct dentry * root); -+ /* setup active domains with Xen */ -+ int (*set_active)(int *active_domains, unsigned int adomains); -+ /* setup passive domains with Xen */ -+ int (*set_passive)(int *passive_domains, unsigned int pdomains); -+ - /* Do any necessary interrupt setup. Optional. */ - int (*setup)(void); - /* Do any necessary interrupt shutdown. Optional. */ -@@ -78,6 +85,8 @@ void oprofile_add_pc(unsigned long pc, i - /* add a backtrace entry, to be called from the ->backtrace callback */ - void oprofile_add_trace(unsigned long eip); - -+/* add a domain switch entry */ -+int oprofile_add_domain_switch(int32_t domain_id); - - /** - * Create a file of the given name as a child of the given root, with _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |