[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] Merged.
# HG changeset patch # User emellor@xxxxxxxxxxxxxxxxxxxxxx # Node ID 5262bd9e9d1939baa369f4ec8cfcee341853cf47 # Parent c028ba081c8495fb26f5797fadd673a49d5c87bb # Parent b470657718fec949adbaa9cdba6a202f8314dd4b Merged. diff -r c028ba081c84 -r 5262bd9e9d19 buildconfigs/mk.linux-2.6-xen --- a/buildconfigs/mk.linux-2.6-xen Mon Feb 27 16:26:16 2006 +++ b/buildconfigs/mk.linux-2.6-xen Mon Feb 27 17:44:38 2006 @@ -2,8 +2,8 @@ OS = linux LINUX_SERIES = 2.6 -LINUX_VER = 2.6.16-rc4 -LINUX_SRCS = linux-2.6.15.tar.bz2 patch-2.6.16-rc4.bz2 +LINUX_VER = 2.6.16-rc5 +LINUX_SRCS = linux-2.6.15.tar.bz2 patch-2.6.16-rc5.bz2 LINUX_PDIR = linux-$(LINUX_VER) EXTRAVERSION ?= xen @@ -34,7 +34,7 @@ touch $(@D)/.hgskip touch $@ -pristine-linux-%.16-rc4/.valid-pristine: pristine-$(LINUX_PDIR)/.valid-srcs +pristine-linux-%.16-rc5/.valid-pristine: pristine-$(LINUX_PDIR)/.valid-srcs touch $@ # update timestamp to avoid rebuild $(LINUX_DIR)/include/linux/autoconf.h: ref-$(OS)-$(LINUX_VER)/.valid-ref diff -r c028ba081c84 -r 5262bd9e9d19 linux-2.6-xen-sparse/arch/i386/Kconfig --- a/linux-2.6-xen-sparse/arch/i386/Kconfig Mon Feb 27 16:26:16 2006 +++ b/linux-2.6-xen-sparse/arch/i386/Kconfig Mon Feb 27 17:44:38 2006 @@ -770,7 +770,7 @@ config HOTPLUG_CPU bool "Support for hot-pluggable CPUs (EXPERIMENTAL)" - depends on SMP && HOTPLUG && EXPERIMENTAL + depends on SMP && HOTPLUG && EXPERIMENTAL && !X86_VOYAGER ---help--- Say Y here to experiment with turning CPUs off and on. CPUs can be controlled through /sys/devices/system/cpu. @@ -1122,6 +1122,7 @@ config KPROBES bool "Kprobes (EXPERIMENTAL)" + depends on EXPERIMENTAL && MODULES help Kprobes allows you to trap at almost any kernel address and execute a callback function. register_kprobe() establishes diff -r c028ba081c84 -r 5262bd9e9d19 linux-2.6-xen-sparse/arch/i386/kernel/Makefile --- a/linux-2.6-xen-sparse/arch/i386/kernel/Makefile Mon Feb 27 16:26:16 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/Makefile Mon Feb 27 17:44:38 2006 @@ -7,7 +7,7 @@ obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \ - quirks.o i8237.o + quirks.o i8237.o topology.o obj-y += cpu/ obj-y += timers/ diff -r c028ba081c84 -r 5262bd9e9d19 linux-2.6-xen-sparse/arch/i386/kernel/acpi/boot-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/acpi/boot-xen.c Mon Feb 27 16:26:16 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/acpi/boot-xen.c Mon Feb 27 17:44:38 2006 @@ -44,9 +44,6 @@ extern int gsi_irq_sharing(int gsi); #include <asm/proto.h> -static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; } - - #else /* X86 */ #ifdef CONFIG_X86_LOCAL_APIC diff -r c028ba081c84 -r 5262bd9e9d19 linux-2.6-xen-sparse/arch/i386/kernel/cpu/common-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/cpu/common-xen.c Mon Feb 27 16:26:16 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/cpu/common-xen.c Mon Feb 27 17:44:38 2006 @@ -4,6 +4,7 @@ #include <linux/smp.h> #include <linux/module.h> #include <linux/percpu.h> +#include <linux/bootmem.h> #include <asm/semaphore.h> #include <asm/processor.h> #include <asm/i387.h> @@ -18,6 +19,9 @@ #include <asm/hypervisor.h> #include "cpu.h" + +DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); +EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr); #ifndef CONFIG_XEN DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); @@ -598,6 +602,8 @@ struct tss_struct * t = &per_cpu(init_tss, cpu); #endif struct thread_struct *thread = ¤t->thread; + struct desc_struct *gdt; + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); if (cpu_test_and_set(cpu, cpu_initialized)) { printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); @@ -614,7 +620,54 @@ set_in_cr4(X86_CR4_TSD); } - cpu_gdt_init(&cpu_gdt_descr[cpu]); +#ifndef CONFIG_XEN + /* + * This is a horrible hack to allocate the GDT. The problem + * is that cpu_init() is called really early for the boot CPU + * (and hence needs bootmem) but much later for the secondary + * CPUs, when bootmem will have gone away + */ + if (NODE_DATA(0)->bdata->node_bootmem_map) { + gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE); + /* alloc_bootmem_pages panics on failure, so no check */ + memset(gdt, 0, PAGE_SIZE); + } else { + gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL); + if (unlikely(!gdt)) { + printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu); + for (;;) + local_irq_enable(); + } + } + + /* + * Initialize the per-CPU GDT with the boot GDT, + * and set up the GDT descriptor: + */ + memcpy(gdt, cpu_gdt_table, GDT_SIZE); + + /* Set up GDT entry for 16bit stack */ + *(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |= + ((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) | + ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) | + (CPU_16BIT_STACK_SIZE - 1); + + cpu_gdt_descr->size = GDT_SIZE - 1; + cpu_gdt_descr->address = (unsigned long)gdt; +#else + if (cpu == 0 && cpu_gdt_descr->address == 0) { + gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE); + /* alloc_bootmem_pages panics on failure, so no check */ + memset(gdt, 0, PAGE_SIZE); + + memcpy(gdt, cpu_gdt_table, GDT_SIZE); + + cpu_gdt_descr->size = GDT_SIZE; + cpu_gdt_descr->address = (unsigned long)gdt; + } +#endif + + cpu_gdt_init(cpu_gdt_descr); /* * Set up and load the per-CPU TSS and LDT diff -r c028ba081c84 -r 5262bd9e9d19 linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S --- a/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S Mon Feb 27 16:26:16 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S Mon Feb 27 17:44:38 2006 @@ -87,19 +87,9 @@ */ .data - ALIGN - .word 0 # 32 bit align gdt_desc.address - .globl cpu_gdt_descr -cpu_gdt_descr: - .word GDT_SIZE - .long cpu_gdt_table - - .fill NR_CPUS-1,8,0 # space for the other GDT descriptors - /* * The Global Descriptor Table contains 28 quadwords, per-CPU. */ - .align PAGE_SIZE_asm ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* NULL descriptor */ .quad 0x0000000000000000 /* 0x0b reserved */ @@ -148,10 +138,6 @@ .quad 0x0000000000000000 /* 0xf0 - unused */ .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */ - /* Be sure this is zeroed to avoid false validations in Xen */ - .fill PAGE_SIZE_asm / 8 - GDT_ENTRIES,8,0 - - /* * __xen_guest information */ diff -r c028ba081c84 -r 5262bd9e9d19 linux-2.6-xen-sparse/arch/i386/kernel/io_apic-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/io_apic-xen.c Mon Feb 27 16:26:16 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/io_apic-xen.c Mon Feb 27 17:44:38 2006 @@ -2634,8 +2634,10 @@ spin_unlock_irqrestore(&ioapic_lock, flags); /* Sanity check */ - if (reg_00.bits.ID != apic_id) - panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic); + if (reg_00.bits.ID != apic_id) { + printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); + return -1; + } } apic_printk(APIC_VERBOSE, KERN_INFO diff -r c028ba081c84 -r 5262bd9e9d19 linux-2.6-xen-sparse/arch/i386/kernel/mpparse-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/mpparse-xen.c Mon Feb 27 16:26:16 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/mpparse-xen.c Mon Feb 27 17:44:38 2006 @@ -935,6 +935,7 @@ u32 gsi_base) { int idx = 0; + int tmpid; if (nr_ioapics >= MAX_IO_APICS) { printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " @@ -957,9 +958,14 @@ set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); #endif if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 < 15)) - mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id); + tmpid = io_apic_get_unique_id(idx, id); else - mp_ioapics[idx].mpc_apicid = id; + tmpid = id; + if (tmpid == -1) { + nr_ioapics--; + return; + } + mp_ioapics[idx].mpc_apicid = tmpid; mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); /* diff -r c028ba081c84 -r 5262bd9e9d19 linux-2.6-xen-sparse/arch/i386/kernel/smpboot.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/smpboot.c Mon Feb 27 16:26:16 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/smpboot.c Mon Feb 27 17:44:38 2006 @@ -898,12 +898,6 @@ unsigned long start_eip; unsigned short nmi_high = 0, nmi_low = 0; - if (!cpu_gdt_descr[cpu].address && - !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) { - printk("Failed to allocate GDT for CPU %d\n", cpu); - return 1; - } - ++cpucount; /* diff -r c028ba081c84 -r 5262bd9e9d19 linux-2.6-xen-sparse/arch/i386/mach-xen/Makefile --- a/linux-2.6-xen-sparse/arch/i386/mach-xen/Makefile Mon Feb 27 16:26:16 2006 +++ b/linux-2.6-xen-sparse/arch/i386/mach-xen/Makefile Mon Feb 27 17:44:38 2006 @@ -2,6 +2,4 @@ # Makefile for the linux kernel. # -obj-y := setup.o topology.o - -topology-y := ../mach-default/topology.o +obj-y := setup.o diff -r c028ba081c84 -r 5262bd9e9d19 linux-2.6-xen-sparse/arch/x86_64/Kconfig --- a/linux-2.6-xen-sparse/arch/x86_64/Kconfig Mon Feb 27 16:26:16 2006 +++ b/linux-2.6-xen-sparse/arch/x86_64/Kconfig Mon Feb 27 17:44:38 2006 @@ -381,21 +381,6 @@ as it is off-chip. You can find the HPET spec at <http://www.intel.com/hardwaredesign/hpetspec.htm>. -config X86_PM_TIMER - bool "PM timer" if EMBEDDED - depends on ACPI && !X86_64_XEN - default y - help - Support the ACPI PM timer for time keeping. This is slow, - but is useful on some chipsets without HPET on systems with more - than one CPU. On a single processor or single socket multi core - system it is normally not required. - When the PM timer is active 64bit vsyscalls are disabled - and should not be enabled (/proc/sys/kernel/vsyscall64 should - not be changed). - The kernel selects the PM timer only as a last resort, so it is - useful to enable just in case. - config HPET_EMULATE_RTC bool "Provide RTC interrupt" depends on HPET_TIMER && RTC=y @@ -640,6 +625,7 @@ config KPROBES bool "Kprobes (EXPERIMENTAL)" + depends on EXPERIMENTAL && MODULES help Kprobes allows you to trap at almost any kernel address and execute a callback function. register_kprobe() establishes diff -r c028ba081c84 -r 5262bd9e9d19 linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile Mon Feb 27 16:26:16 2006 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile Mon Feb 27 17:44:38 2006 @@ -45,7 +45,7 @@ bootflag-y += ../../i386/kernel/bootflag.o cpuid-$(subst m,y,$(CONFIG_X86_CPUID)) += ../../i386/kernel/cpuid.o -topology-y += ../../i386/mach-default/topology.o +topology-y += ../../i386/kernel/topology.o microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../i386/kernel/microcode.o intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o quirks-y += ../../i386/kernel/quirks.o diff -r c028ba081c84 -r 5262bd9e9d19 linux-2.6-xen-sparse/arch/x86_64/kernel/apic-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/apic-xen.c Mon Feb 27 16:26:16 2006 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/apic-xen.c Mon Feb 27 17:44:38 2006 @@ -114,6 +114,8 @@ irq_exit(); } +int __initdata unsync_tsc_on_multicluster; + /* * This interrupt should _never_ happen with our APIC/SMP architecture */ diff -r c028ba081c84 -r 5262bd9e9d19 linux-2.6-xen-sparse/arch/x86_64/kernel/io_apic-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/io_apic-xen.c Mon Feb 27 16:26:16 2006 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/io_apic-xen.c Mon Feb 27 17:44:38 2006 @@ -51,6 +51,8 @@ int disable_timer_pin_1 __initdata; #ifndef CONFIG_XEN +int timer_over_8254 __initdata = 1; + /* Where if anywhere is the i8259 connect in external int mode */ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; #endif @@ -300,6 +302,22 @@ __setup("noapic", disable_ioapic_setup); __setup("apic", enable_ioapic_setup); + +#ifndef CONFIG_XEN +static int __init setup_disable_8254_timer(char *s) +{ + timer_over_8254 = -1; + return 1; +} +static int __init setup_enable_8254_timer(char *s) +{ + timer_over_8254 = 2; + return 1; +} + +__setup("disable_8254_timer", setup_disable_8254_timer); +__setup("enable_8254_timer", setup_enable_8254_timer); +#endif /* !CONFIG_XEN */ #include <asm/pci-direct.h> #include <linux/pci_ids.h> @@ -360,27 +378,20 @@ /* RED-PEN skip them on mptables too? */ return; case PCI_VENDOR_ID_ATI: + + /* This should be actually default, but + for 2.6.16 let's do it for ATI only where + it's really needed. */ #ifndef CONFIG_XEN - if (apic_runs_main_timer != 0) - break; -#ifdef CONFIG_ACPI - /* Don't do this for laptops right - right now because their timer - doesn't necessarily tick in C2/3 */ - if (acpi_fadt.revision >= 3 && - (acpi_fadt.plvl2_lat + acpi_fadt.plvl3_lat) < 1100) { - printk(KERN_INFO -"ATI board detected, but seems to be a laptop. Timer might be shakey, sorry\n"); - break; - } -#endif + if (timer_over_8254 == 1) { + timer_over_8254 = 0; printk(KERN_INFO - "ATI board detected. Using APIC/PM timer.\n"); - apic_runs_main_timer = 1; - nohpet = 1; + "ATI board detected. Disabling timer routing over 8254.\n"); + } #endif return; } + /* No multi-function device? */ type = read_pci_config_byte(num,slot,func, @@ -1848,6 +1859,8 @@ * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ * is so screwy. Thanks to Brian Perkins for testing/hacking this beast * fanatically on his truly buggy board. + * + * FIXME: really need to revamp this for modern platforms only. */ static inline void check_timer(void) { @@ -1870,7 +1883,8 @@ */ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); - enable_8259A_irq(0); + if (timer_over_8254 > 0) + enable_8259A_irq(0); pin1 = find_isa_irq_pin(0, mp_INT); apic1 = find_isa_irq_apic(0, mp_INT); @@ -1925,7 +1939,7 @@ } printk(" failed.\n"); - if (nmi_watchdog) { + if (nmi_watchdog == NMI_IO_APIC) { printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); nmi_watchdog = 0; } diff -r c028ba081c84 -r 5262bd9e9d19 linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Mon Feb 27 16:26:16 2006 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Mon Feb 27 17:44:38 2006 @@ -462,6 +462,12 @@ else if(!memcmp(from, "elfcorehdr=", 11)) elfcorehdr_addr = memparse(from+11, &from); #endif + +#if defined(CONFIG_HOTPLUG_CPU) && !defined(CONFIG_XEN) + else if (!memcmp(from, "additional_cpus=", 16)) + setup_additional_cpus(from+16); +#endif + next_char: c = *(from++); if (!c) diff -r c028ba081c84 -r 5262bd9e9d19 linux-2.6-xen-sparse/drivers/acpi/Kconfig --- a/linux-2.6-xen-sparse/drivers/acpi/Kconfig Mon Feb 27 16:26:16 2006 +++ b/linux-2.6-xen-sparse/drivers/acpi/Kconfig Mon Feb 27 17:44:38 2006 @@ -247,7 +247,7 @@ Enter the full path name to the file wich includes the AmlCode declaration. config ACPI_BLACKLIST_YEAR - int "Disable ACPI for systems before Jan 1st this year" if X86 + int "Disable ACPI for systems before Jan 1st this year" if X86_32 default 0 help enter a 4-digit year, eg. 2001 to disable ACPI by default @@ -285,9 +285,9 @@ dump your ACPI DSDT table using /proc/acpi/dsdt. config X86_PM_TIMER - bool "Power Management Timer Support" - depends on X86 - depends on !X86_64 + bool "Power Management Timer Support" if EMBEDDED + depends on X86 + depends on !XEN default y help The Power Management Timer is available on all ACPI-capable, @@ -298,9 +298,8 @@ voltage scaling, unlike the commonly used Time Stamp Counter (TSC) timing source. - So, if you see messages like 'Losing too many ticks!' in the - kernel logs, and/or you are using this on a notebook which - does not yet have an HPET, you should say "Y" here. + You should nearly always say Y here because many modern + systems require this timer. config ACPI_CONTAINER tristate "ACPI0004,PNP0A05 and PNP0A06 Container Driver (EXPERIMENTAL)" diff -r c028ba081c84 -r 5262bd9e9d19 linux-2.6-xen-sparse/drivers/video/Kconfig --- a/linux-2.6-xen-sparse/drivers/video/Kconfig Mon Feb 27 16:26:16 2006 +++ b/linux-2.6-xen-sparse/drivers/video/Kconfig Mon Feb 27 17:44:38 2006 @@ -520,7 +520,7 @@ config FB_GBE_MEM int "Video memory size in MB" depends on FB_GBE - default 8 + default 4 help This is the amount of memory reserved for the framebuffer, which can be any value between 1MB and 8MB. diff -r c028ba081c84 -r 5262bd9e9d19 linux-2.6-xen-sparse/drivers/xen/core/smpboot.c --- a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c Mon Feb 27 16:26:16 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c Mon Feb 27 17:44:38 2006 @@ -150,6 +150,11 @@ { vcpu_guest_context_t ctxt; struct task_struct *idle = idle_task(vcpu); +#ifdef __x86_64__ + struct desc_ptr *gdt_descr = &cpu_gdt_descr[vcpu]; +#else + struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, vcpu); +#endif if (vcpu == 0) return; @@ -171,8 +176,8 @@ ctxt.ldt_ents = 0; - ctxt.gdt_frames[0] = virt_to_mfn(cpu_gdt_descr[vcpu].address); - ctxt.gdt_ents = cpu_gdt_descr[vcpu].size / 8; + ctxt.gdt_frames[0] = virt_to_mfn(gdt_descr->address); + ctxt.gdt_ents = gdt_descr->size / 8; #ifdef __i386__ ctxt.user_regs.cs = __KERNEL_CS; @@ -210,6 +215,11 @@ { int cpu; struct task_struct *idle; +#ifdef __x86_64__ + struct desc_ptr *gdt_descr; +#else + struct Xgt_desc_struct *gdt_descr; +#endif cpu_data[0] = boot_cpu_data; @@ -225,6 +235,22 @@ for_each_cpu_mask (cpu, cpu_possible_map) { if (cpu == 0) continue; + +#ifdef __x86_64__ + gdt_descr = &cpu_gdt_descr[cpu]; +#else + gdt_descr = &per_cpu(cpu_gdt_descr, cpu); +#endif + gdt_descr->address = get_zeroed_page(GFP_KERNEL); + if (unlikely(!gdt_descr->address)) { + printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu); + continue; + } + gdt_descr->size = GDT_SIZE; + memcpy((void *)gdt_descr->address, cpu_gdt_table, GDT_SIZE); + make_page_readonly( + (void *)gdt_descr->address, + XENFEAT_writable_descriptor_tables); cpu_data[cpu] = boot_cpu_data; cpu_2_logical_apicid[cpu] = cpu; @@ -241,17 +267,6 @@ #endif irq_ctx_init(cpu); - - cpu_gdt_descr[cpu].address = - __get_free_page(GFP_KERNEL|__GFP_ZERO); - BUG_ON(cpu_gdt_descr[0].size > PAGE_SIZE); - cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size; - memcpy((void *)cpu_gdt_descr[cpu].address, - (void *)cpu_gdt_descr[0].address, - cpu_gdt_descr[0].size); - make_page_readonly( - (void *)cpu_gdt_descr[cpu].address, - XENFEAT_writable_descriptor_tables); #ifdef CONFIG_HOTPLUG_CPU if (xen_start_info->flags & SIF_INITDOMAIN) diff -r c028ba081c84 -r 5262bd9e9d19 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/desc.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/desc.h Mon Feb 27 16:26:16 2006 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/desc.h Mon Feb 27 17:44:38 2006 @@ -23,11 +23,13 @@ unsigned short pad; } __attribute__ ((packed)); -extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS]; +extern struct Xgt_desc_struct idt_descr; +DECLARE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); + static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) { - return ((struct desc_struct *)cpu_gdt_descr[cpu].address); + return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address; } #define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8)) diff -r c028ba081c84 -r 5262bd9e9d19 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pci.h --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pci.h Mon Feb 27 16:26:16 2006 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pci.h Mon Feb 27 17:44:38 2006 @@ -18,8 +18,6 @@ #define pcibios_assign_all_busses() 0 #endif #define pcibios_scan_all_fns(a, b) 0 - -extern int no_iommu, force_iommu; extern unsigned long pci_mem_start; #define PCIBIOS_MIN_IO 0x1000 diff -r c028ba081c84 -r 5262bd9e9d19 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h Mon Feb 27 16:26:16 2006 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h Mon Feb 27 17:44:38 2006 @@ -169,7 +169,7 @@ #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) -#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE) +#define USER_PTRS_PER_PGD ((TASK_SIZE-1)/PGDIR_SIZE+1) #define FIRST_USER_ADDRESS 0 #ifndef __ASSEMBLY__ diff -r c028ba081c84 -r 5262bd9e9d19 linux-2.6-xen-sparse/include/linux/mm.h --- a/linux-2.6-xen-sparse/include/linux/mm.h Mon Feb 27 16:26:16 2006 +++ b/linux-2.6-xen-sparse/include/linux/mm.h Mon Feb 27 17:44:38 2006 @@ -1064,7 +1064,11 @@ void drop_pagecache(void); void drop_slab(void); +#ifndef CONFIG_MMU +#define randomize_va_space 0 +#else extern int randomize_va_space; +#endif #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff -r c028ba081c84 -r 5262bd9e9d19 linux-2.6-xen-sparse/mm/page_alloc.c --- a/linux-2.6-xen-sparse/mm/page_alloc.c Mon Feb 27 16:26:16 2006 +++ b/linux-2.6-xen-sparse/mm/page_alloc.c Mon Feb 27 17:44:38 2006 @@ -1017,7 +1017,7 @@ if (page) goto got_pg; - out_of_memory(gfp_mask, order); + out_of_memory(zonelist, gfp_mask, order); goto restart; } diff -r c028ba081c84 -r 5262bd9e9d19 linux-2.6-xen-sparse/net/core/skbuff.c --- a/linux-2.6-xen-sparse/net/core/skbuff.c Mon Feb 27 16:26:16 2006 +++ b/linux-2.6-xen-sparse/net/core/skbuff.c Mon Feb 27 17:44:38 2006 @@ -434,6 +434,9 @@ C(pkt_type); C(ip_summed); C(priority); +#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) + C(ipvs_property); +#endif C(protocol); n->destructor = NULL; #ifdef CONFIG_NETFILTER @@ -441,13 +444,6 @@ C(nfct); nf_conntrack_get(skb->nfct); C(nfctinfo); -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) - C(nfct_reasm); - nf_conntrack_get_reasm(skb->nfct_reasm); -#endif -#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) - C(ipvs_property); -#endif #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) C(nfct_reasm); nf_conntrack_get_reasm(skb->nfct_reasm); diff -r c028ba081c84 -r 5262bd9e9d19 tools/libxc/xc_linux_build.c --- a/tools/libxc/xc_linux_build.c Mon Feb 27 16:26:16 2006 +++ b/tools/libxc/xc_linux_build.c Mon Feb 27 17:44:38 2006 @@ -45,6 +45,77 @@ #ifdef __ia64__ #define probe_aout9(image,image_size,load_funcs) 1 #endif + +static const char *feature_names[XENFEAT_NR_SUBMAPS*32] = { + [XENFEAT_writable_page_tables] = "writable_page_tables", + [XENFEAT_writable_descriptor_tables] = "writable_descriptor_tables", + [XENFEAT_auto_translated_physmap] = "auto_translated_physmap", + [XENFEAT_supervisor_mode_kernel] = "supervisor_mode_kernel", + [XENFEAT_pae_pgdir_above_4gb] = "pae_pgdir_above_4gb" +}; + +static inline void set_feature_bit (int nr, uint32_t *addr) +{ + addr[nr>>5] |= (1<<(nr&31)); +} + +static inline int test_feature_bit(int nr, uint32_t *addr) +{ + return !!(addr[nr>>5] & (1<<(nr&31))); +} + +static int parse_features( + const char *feats, + uint32_t supported[XENFEAT_NR_SUBMAPS], + uint32_t required[XENFEAT_NR_SUBMAPS]) +{ + const char *end, *p; + int i, req; + + if ( (end = strchr(feats, ',')) == NULL ) + end = feats + strlen(feats); + + while ( feats < end ) + { + p = strchr(feats, '|'); + if ( (p == NULL) || (p > end) ) + p = end; + + req = (*feats == '!'); + if ( req ) + feats++; + + for ( i = 0; i < XENFEAT_NR_SUBMAPS*32; i++ ) + { + if ( feature_names[i] == NULL ) + continue; + + if ( strncmp(feature_names[i], feats, p-feats) == 0 ) + { + set_feature_bit(i, supported); + if ( required && req ) + set_feature_bit(i, required); + break; + } + } + + if ( i == XENFEAT_NR_SUBMAPS*32 ) + { + ERROR("Unknown feature \"%.*s\".\n", (int)(p-feats), feats); + if ( req ) + { + ERROR("Kernel requires an unknown hypervisor feature.\n"); + return -EINVAL; + } + } + + feats = p; + if ( *feats == '|' ) + feats++; + } + + return -EINVAL; +} static int probeimageformat(char *image, unsigned long image_size, @@ -344,7 +415,8 @@ unsigned long shared_info_frame, unsigned long flags, unsigned int store_evtchn, unsigned long *store_mfn, - unsigned int console_evtchn, unsigned long *console_mfn) + unsigned int console_evtchn, unsigned long *console_mfn, + uint32_t required_features[XENFEAT_NR_SUBMAPS]) { unsigned long *page_array = NULL; struct load_funcs load_funcs; @@ -483,7 +555,8 @@ unsigned long shared_info_frame, unsigned long flags, unsigned int store_evtchn, unsigned long *store_mfn, - unsigned int console_evtchn, unsigned long *console_mfn) + unsigned int console_evtchn, unsigned long *console_mfn, + uint32_t required_features[XENFEAT_NR_SUBMAPS]) { unsigned long *page_array = NULL; unsigned long count, i, hypercall_pfn; @@ -515,8 +588,9 @@ unsigned long vpt_start; unsigned long vpt_end; unsigned long v_end; - unsigned shadow_mode_enabled; unsigned long guest_store_mfn, guest_console_mfn, guest_shared_info_mfn; + unsigned long shadow_mode_enabled; + uint32_t supported_features[XENFEAT_NR_SUBMAPS] = { 0, }; rc = probeimageformat(image, image_size, &load_funcs); if ( rc != 0 ) @@ -534,8 +608,6 @@ goto error_out; } - shadow_mode_enabled = !!strstr(dsi.xen_guest_string, - "SHADOW=translate"); /* * Why do we need this? The number of page-table frames depends on the * size of the bootstrap address space. But the size of the address space @@ -637,6 +709,35 @@ (load_funcs.loadimage)(image, image_size, xc_handle, dom, page_array, &dsi); + /* Parse and validate kernel features. */ + p = strstr(dsi.xen_guest_string, "FEATURES="); + if ( p != NULL ) + { + if ( !parse_features(p + strlen("FEATURES="), + supported_features, + required_features) ) + { + ERROR("Failed to parse guest kernel features.\n"); + goto error_out; + } + + fprintf(stderr, "Supported features = { %08x }.\n", + supported_features[0]); + fprintf(stderr, "Required features = { %08x }.\n", + required_features[0]); + } + + for ( i = 0; i < XENFEAT_NR_SUBMAPS; i++ ) + { + if ( (supported_features[i]&required_features[i]) != required_features[i] ) + { + ERROR("Guest kernel does not support a required feature.\n"); + goto error_out; + } + } + + shadow_mode_enabled = test_feature_bit(XENFEAT_auto_translated_physmap, required_features); + /* Load the initial ramdisk image. */ if ( initrd_len != 0 ) { @@ -870,6 +971,7 @@ const char *image_name, const char *ramdisk_name, const char *cmdline, + const char *features, unsigned long flags, unsigned int store_evtchn, unsigned long *store_mfn, @@ -886,6 +988,16 @@ char *image = NULL; unsigned long image_size, initrd_size=0; unsigned long vstartinfo_start, vkern_entry, vstack_start; + uint32_t features_bitmap[XENFEAT_NR_SUBMAPS] = { 0, }; + + if ( features != NULL ) + { + if ( !parse_features(features, features_bitmap, NULL) ) + { + PERROR("Failed to parse configured features\n"); + goto error_out; + } + } if ( (nr_pages = get_tot_pages(xc_handle, domid)) < 0 ) { @@ -940,7 +1052,8 @@ &vstack_start, ctxt, cmdline, op.u.getdomaininfo.shared_info_frame, flags, store_evtchn, store_mfn, - console_evtchn, console_mfn) < 0 ) + console_evtchn, console_mfn, + features_bitmap) < 0 ) { ERROR("Error constructing guest OS"); goto error_out; diff -r c028ba081c84 -r 5262bd9e9d19 tools/libxc/xenguest.h --- a/tools/libxc/xenguest.h Mon Feb 27 16:26:16 2006 +++ b/tools/libxc/xenguest.h Mon Feb 27 17:44:38 2006 @@ -47,6 +47,7 @@ const char *image_name, const char *ramdisk_name, const char *cmdline, + const char *features, unsigned long flags, unsigned int store_evtchn, unsigned long *store_mfn, diff -r c028ba081c84 -r 5262bd9e9d19 tools/python/xen/lowlevel/xc/xc.c --- a/tools/python/xen/lowlevel/xc/xc.c Mon Feb 27 16:26:16 2006 +++ b/tools/python/xen/lowlevel/xc/xc.c Mon Feb 27 17:44:38 2006 @@ -326,27 +326,29 @@ PyObject *kwds) { uint32_t dom; - char *image, *ramdisk = NULL, *cmdline = ""; + char *image, *ramdisk = NULL, *cmdline = "", *features = NULL; int flags = 0; int store_evtchn, console_evtchn; unsigned long store_mfn = 0; unsigned long console_mfn = 0; - static char *kwd_list[] = { "dom", "store_evtchn", - "console_evtchn", "image", + static char *kwd_list[] = { "dom", "store_evtchn", + "console_evtchn", "image", /* optional */ - "ramdisk", "cmdline", "flags", NULL }; - - if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiis|ssi", kwd_list, + "ramdisk", "cmdline", "flags", + "features", NULL }; + + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiis|ssis", kwd_list, &dom, &store_evtchn, - &console_evtchn, &image, + &console_evtchn, &image, /* optional */ - &ramdisk, &cmdline, &flags) ) + &ramdisk, &cmdline, &flags, + &features) ) return NULL; if ( xc_linux_build(self->xc_handle, dom, image, - ramdisk, cmdline, flags, - store_evtchn, &store_mfn, + ramdisk, cmdline, features, flags, + store_evtchn, &store_mfn, console_evtchn, &console_mfn) != 0 ) { if (!errno) errno = EINVAL; diff -r c028ba081c84 -r 5262bd9e9d19 tools/python/xen/xend/image.py --- a/tools/python/xen/xend/image.py Mon Feb 27 16:26:16 2006 +++ b/tools/python/xen/xend/image.py Mon Feb 27 17:44:38 2006 @@ -68,6 +68,7 @@ self.kernel = None self.ramdisk = None self.cmdline = None + self.features = None self.configure(imageConfig, deviceConfig) @@ -89,6 +90,7 @@ if args: self.cmdline += " " + args self.ramdisk = get_cfg("ramdisk", '') + self.features = get_cfg("features", '') self.vm.storeVm(("image/ostype", self.ostype), ("image/kernel", self.kernel), @@ -175,13 +177,15 @@ log.debug("cmdline = %s", self.cmdline) log.debug("ramdisk = %s", self.ramdisk) log.debug("vcpus = %d", self.vm.getVCpuCount()) + log.debug("features = %s", self.features) return xc.linux_build(dom = self.vm.getDomid(), image = self.kernel, store_evtchn = store_evtchn, console_evtchn = console_evtchn, cmdline = self.cmdline, - ramdisk = self.ramdisk) + ramdisk = self.ramdisk, + features = self.features) class HVMImageHandler(ImageHandler): diff -r c028ba081c84 -r 5262bd9e9d19 tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Mon Feb 27 16:26:16 2006 +++ b/tools/python/xen/xm/create.py Mon Feb 27 17:44:38 2006 @@ -137,6 +137,10 @@ fn=set_value, default='', use="Path to ramdisk.") +gopts.var('features', val='FEATURES', + fn=set_value, default='', + use="Features to enable in guest kernel") + gopts.var('builder', val='FUNCTION', fn=set_value, default='linux', use="Function to use to build the domain.") @@ -445,6 +449,8 @@ config_image.append(['root', cmdline_root]) if vals.extra: config_image.append(['args', vals.extra]) + if vals.features: + config_image.append(['features', vals.features]) if vals.builder == 'hvm': configure_hvm(config_image, vals) diff -r c028ba081c84 -r 5262bd9e9d19 xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Mon Feb 27 16:26:16 2006 +++ b/xen/arch/x86/domain_build.c Mon Feb 27 17:44:38 2006 @@ -802,6 +802,14 @@ v->arch.guest_context.user_regs.fs &= ~3; v->arch.guest_context.user_regs.gs &= ~3; printk("Dom0 runs in ring 0 (supervisor mode)\n"); + if ( !test_bit(XENFEAT_supervisor_mode_kernel, + dom0_features_supported) ) + panic("Dom0 does not support supervisor-mode execution\n"); + } + else + { + if ( test_bit(XENFEAT_supervisor_mode_kernel, dom0_features_required) ) + panic("Dom0 requires supervisor-mode execution\n"); } rc = 0; diff -r c028ba081c84 -r 5262bd9e9d19 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Mon Feb 27 16:26:16 2006 +++ b/xen/arch/x86/mm.c Mon Feb 27 17:44:38 2006 @@ -1570,42 +1570,68 @@ unsigned long old_base_mfn; if ( shadow_mode_refcounts(d) ) + { okay = get_page_from_pagenr(mfn, d); + if ( unlikely(!okay) ) + { + MEM_LOG("Error while installing new baseptr %lx", mfn); + return 0; + } + } else + { okay = get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d); - - if ( likely(okay) ) - { - invalidate_shadow_ldt(v); - - old_base_mfn = pagetable_get_pfn(v->arch.guest_table); - v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT); - update_pagetables(v); /* update shadow_table and monitor_table */ - - write_ptbase(v); - + if ( unlikely(!okay) ) + { + /* Switch to idle pagetable: this VCPU has no active p.t. now. */ + old_base_mfn = pagetable_get_pfn(v->arch.guest_table); + v->arch.guest_table = mk_pagetable(0); + update_pagetables(v); + write_cr3(__pa(idle_pg_table)); + if ( old_base_mfn != 0 ) + put_page_and_type(mfn_to_page(old_base_mfn)); + + /* Retry the validation with no active p.t. for this VCPU. */ + okay = get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d); + if ( !okay ) + { + /* Failure here is unrecoverable: the VCPU has no pagetable! */ + MEM_LOG("Fatal error while installing new baseptr %lx", mfn); + domain_crash(d); + percpu_info[v->processor].deferred_ops = 0; + return 0; + } + } + } + + invalidate_shadow_ldt(v); + + old_base_mfn = pagetable_get_pfn(v->arch.guest_table); + v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT); + update_pagetables(v); /* update shadow_table and monitor_table */ + + write_ptbase(v); + + if ( likely(old_base_mfn != 0) ) + { if ( shadow_mode_refcounts(d) ) put_page(mfn_to_page(old_base_mfn)); else put_page_and_type(mfn_to_page(old_base_mfn)); - - /* CR3 also holds a ref to its shadow... */ - if ( shadow_mode_enabled(d) ) - { - if ( v->arch.monitor_shadow_ref ) - put_shadow_ref(v->arch.monitor_shadow_ref); - v->arch.monitor_shadow_ref = - pagetable_get_pfn(v->arch.monitor_table); - ASSERT(!page_get_owner(mfn_to_page(v->arch.monitor_shadow_ref))); - get_shadow_ref(v->arch.monitor_shadow_ref); - } - } - else - { - MEM_LOG("Error while installing new baseptr %lx", mfn); - } - - return okay; + } + + /* CR3 also holds a ref to its shadow... */ + if ( shadow_mode_enabled(d) ) + { + if ( v->arch.monitor_shadow_ref ) + put_shadow_ref(v->arch.monitor_shadow_ref); + v->arch.monitor_shadow_ref = + pagetable_get_pfn(v->arch.monitor_table); + ASSERT(!page_get_owner(mfn_to_page(v->arch.monitor_shadow_ref))); + get_shadow_ref(v->arch.monitor_shadow_ref); + } + + return 1; } static void process_deferred_ops(unsigned int cpu) @@ -1625,7 +1651,7 @@ else local_flush_tlb(); } - + if ( deferred_ops & DOP_RELOAD_LDT ) (void)map_ldt_shadow_page(0); diff -r c028ba081c84 -r 5262bd9e9d19 patches/linux-2.6.16-rc5/i386-mach-io-check-nmi.patch --- /dev/null Mon Feb 27 16:26:16 2006 +++ b/patches/linux-2.6.16-rc5/i386-mach-io-check-nmi.patch Mon Feb 27 17:44:38 2006 @@ -0,0 +1,45 @@ +diff -pruN ../pristine-linux-2.6.16-rc5/arch/i386/kernel/traps.c ./arch/i386/kernel/traps.c +--- ../pristine-linux-2.6.16-rc5/arch/i386/kernel/traps.c 2006-02-27 15:46:58.000000000 +0000 ++++ ./arch/i386/kernel/traps.c 2006-02-27 15:55:23.000000000 +0000 +@@ -567,18 +567,11 @@ static void mem_parity_error(unsigned ch + + static void io_check_error(unsigned char reason, struct pt_regs * regs) + { +- unsigned long i; +- + printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); + show_registers(regs); + + /* Re-enable the IOCK line, wait for a few seconds */ +- reason = (reason & 0xf) | 8; +- outb(reason, 0x61); +- i = 2000; +- while (--i) udelay(1000); +- reason &= ~8; +- outb(reason, 0x61); ++ clear_io_check_error(reason); + } + + static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) +diff -pruN ../pristine-linux-2.6.16-rc5/include/asm-i386/mach-default/mach_traps.h ./include/asm-i386/mach-default/mach_traps.h +--- ../pristine-linux-2.6.16-rc5/include/asm-i386/mach-default/mach_traps.h 2006-01-03 03:21:10.000000000 +0000 ++++ ./include/asm-i386/mach-default/mach_traps.h 2006-02-27 15:55:23.000000000 +0000 +@@ -15,6 +15,18 @@ static inline void clear_mem_error(unsig + outb(reason, 0x61); + } + ++static inline void clear_io_check_error(unsigned char reason) ++{ ++ unsigned long i; ++ ++ reason = (reason & 0xf) | 8; ++ outb(reason, 0x61); ++ i = 2000; ++ while (--i) udelay(1000); ++ reason &= ~8; ++ outb(reason, 0x61); ++} ++ + static inline unsigned char get_nmi_reason(void) + { + return inb(0x61); diff -r c028ba081c84 -r 5262bd9e9d19 patches/linux-2.6.16-rc5/net-csum.patch --- /dev/null Mon Feb 27 16:26:16 2006 +++ b/patches/linux-2.6.16-rc5/net-csum.patch Mon Feb 27 17:44:38 2006 @@ -0,0 +1,41 @@ +diff -pruN ../pristine-linux-2.6.16-rc5/net/ipv4/netfilter/ip_nat_proto_tcp.c ./net/ipv4/netfilter/ip_nat_proto_tcp.c +--- ../pristine-linux-2.6.16-rc5/net/ipv4/netfilter/ip_nat_proto_tcp.c 2006-02-27 15:47:38.000000000 +0000 ++++ ./net/ipv4/netfilter/ip_nat_proto_tcp.c 2006-02-27 15:55:25.000000000 +0000 +@@ -129,10 +129,14 @@ tcp_manip_pkt(struct sk_buff **pskb, + if (hdrsize < sizeof(*hdr)) + return 1; + +- hdr->check = ip_nat_cheat_check(~oldip, newip, ++ if ((*pskb)->proto_csum_blank) { ++ hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check); ++ } else { ++ hdr->check = ip_nat_cheat_check(~oldip, newip, + ip_nat_cheat_check(oldport ^ 0xFFFF, + newport, + hdr->check)); ++ } + return 1; + } + +diff -pruN ../pristine-linux-2.6.16-rc5/net/ipv4/netfilter/ip_nat_proto_udp.c ./net/ipv4/netfilter/ip_nat_proto_udp.c +--- ../pristine-linux-2.6.16-rc5/net/ipv4/netfilter/ip_nat_proto_udp.c 2006-02-27 15:47:38.000000000 +0000 ++++ ./net/ipv4/netfilter/ip_nat_proto_udp.c 2006-02-27 15:55:25.000000000 +0000 +@@ -113,11 +113,16 @@ udp_manip_pkt(struct sk_buff **pskb, + newport = tuple->dst.u.udp.port; + portptr = &hdr->dest; + } +- if (hdr->check) /* 0 is a special case meaning no checksum */ +- hdr->check = ip_nat_cheat_check(~oldip, newip, ++ if (hdr->check) { /* 0 is a special case meaning no checksum */ ++ if ((*pskb)->proto_csum_blank) { ++ hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check); ++ } else { ++ hdr->check = ip_nat_cheat_check(~oldip, newip, + ip_nat_cheat_check(*portptr ^ 0xFFFF, + newport, + hdr->check)); ++ } ++ } + *portptr = newport; + return 1; + } diff -r c028ba081c84 -r 5262bd9e9d19 patches/linux-2.6.16-rc5/pmd-shared.patch --- /dev/null Mon Feb 27 16:26:16 2006 +++ b/patches/linux-2.6.16-rc5/pmd-shared.patch Mon Feb 27 17:44:38 2006 @@ -0,0 +1,111 @@ +diff -pruN ../pristine-linux-2.6.16-rc5/arch/i386/mm/pageattr.c ./arch/i386/mm/pageattr.c +--- ../pristine-linux-2.6.16-rc5/arch/i386/mm/pageattr.c 2006-02-27 15:46:58.000000000 +0000 ++++ ./arch/i386/mm/pageattr.c 2006-02-27 15:55:31.000000000 +0000 +@@ -78,7 +78,7 @@ static void set_pmd_pte(pte_t *kpte, uns + unsigned long flags; + + set_pte_atomic(kpte, pte); /* change init_mm */ +- if (PTRS_PER_PMD > 1) ++ if (HAVE_SHARED_KERNEL_PMD) + return; + + spin_lock_irqsave(&pgd_lock, flags); +diff -pruN ../pristine-linux-2.6.16-rc5/arch/i386/mm/pgtable.c ./arch/i386/mm/pgtable.c +--- ../pristine-linux-2.6.16-rc5/arch/i386/mm/pgtable.c 2006-01-03 03:21:10.000000000 +0000 ++++ ./arch/i386/mm/pgtable.c 2006-02-27 15:55:31.000000000 +0000 +@@ -215,9 +215,10 @@ void pgd_ctor(void *pgd, kmem_cache_t *c + spin_lock_irqsave(&pgd_lock, flags); + } + +- clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, +- swapper_pg_dir + USER_PTRS_PER_PGD, +- KERNEL_PGD_PTRS); ++ if (PTRS_PER_PMD == 1 || HAVE_SHARED_KERNEL_PMD) ++ clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, ++ swapper_pg_dir + USER_PTRS_PER_PGD, ++ KERNEL_PGD_PTRS); + if (PTRS_PER_PMD > 1) + return; + +@@ -249,6 +250,30 @@ pgd_t *pgd_alloc(struct mm_struct *mm) + goto out_oom; + set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); + } ++ ++ if (!HAVE_SHARED_KERNEL_PMD) { ++ unsigned long flags; ++ ++ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { ++ pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); ++ if (!pmd) ++ goto out_oom; ++ set_pgd(&pgd[USER_PTRS_PER_PGD], __pgd(1 + __pa(pmd))); ++ } ++ ++ spin_lock_irqsave(&pgd_lock, flags); ++ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { ++ unsigned long v = (unsigned long)i << PGDIR_SHIFT; ++ pgd_t *kpgd = pgd_offset_k(v); ++ pud_t *kpud = pud_offset(kpgd, v); ++ pmd_t *kpmd = pmd_offset(kpud, v); ++ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); ++ memcpy(pmd, kpmd, PAGE_SIZE); ++ } ++ pgd_list_add(pgd); ++ spin_unlock_irqrestore(&pgd_lock, flags); ++ } ++ + return pgd; + + out_oom: +@@ -263,9 +288,23 @@ void pgd_free(pgd_t *pgd) + int i; + + /* in the PAE case user pgd entries are overwritten before usage */ +- if (PTRS_PER_PMD > 1) +- for (i = 0; i < USER_PTRS_PER_PGD; ++i) +- kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); ++ if (PTRS_PER_PMD > 1) { ++ for (i = 0; i < USER_PTRS_PER_PGD; ++i) { ++ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); ++ kmem_cache_free(pmd_cache, pmd); ++ } ++ if (!HAVE_SHARED_KERNEL_PMD) { ++ unsigned long flags; ++ spin_lock_irqsave(&pgd_lock, flags); ++ pgd_list_del(pgd); ++ spin_unlock_irqrestore(&pgd_lock, flags); ++ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { ++ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); ++ memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); ++ kmem_cache_free(pmd_cache, pmd); ++ } ++ } ++ } + /* in the non-PAE case, free_pgtables() clears user pgd entries */ + kmem_cache_free(pgd_cache, pgd); + } +diff -pruN ../pristine-linux-2.6.16-rc5/include/asm-i386/pgtable-2level-defs.h ./include/asm-i386/pgtable-2level-defs.h +--- ../pristine-linux-2.6.16-rc5/include/asm-i386/pgtable-2level-defs.h 2006-01-03 03:21:10.000000000 +0000 ++++ ./include/asm-i386/pgtable-2level-defs.h 2006-02-27 15:55:31.000000000 +0000 +@@ -1,6 +1,8 @@ + #ifndef _I386_PGTABLE_2LEVEL_DEFS_H + #define _I386_PGTABLE_2LEVEL_DEFS_H + ++#define HAVE_SHARED_KERNEL_PMD 0 ++ + /* + * traditional i386 two-level paging structure: + */ +diff -pruN ../pristine-linux-2.6.16-rc5/include/asm-i386/pgtable-3level-defs.h ./include/asm-i386/pgtable-3level-defs.h +--- ../pristine-linux-2.6.16-rc5/include/asm-i386/pgtable-3level-defs.h 2006-01-03 03:21:10.000000000 +0000 ++++ ./include/asm-i386/pgtable-3level-defs.h 2006-02-27 15:55:31.000000000 +0000 +@@ -1,6 +1,8 @@ + #ifndef _I386_PGTABLE_3LEVEL_DEFS_H + #define _I386_PGTABLE_3LEVEL_DEFS_H + ++#define HAVE_SHARED_KERNEL_PMD 1 ++ + /* + * PGDIR_SHIFT determines what a top-level page table entry can map + */ diff -r c028ba081c84 -r 5262bd9e9d19 patches/linux-2.6.16-rc5/smp-alts.patch --- /dev/null Mon Feb 27 16:26:16 2006 +++ b/patches/linux-2.6.16-rc5/smp-alts.patch Mon Feb 27 17:44:38 2006 @@ -0,0 +1,591 @@ +diff -pruN ../pristine-linux-2.6.16-rc5/arch/i386/Kconfig ./arch/i386/Kconfig +--- ../pristine-linux-2.6.16-rc5/arch/i386/Kconfig 2006-02-27 15:46:58.000000000 +0000 ++++ ./arch/i386/Kconfig 2006-02-27 15:55:34.000000000 +0000 +@@ -202,6 +202,19 @@ config SMP + + If you don't know what to do here, say N. + ++config SMP_ALTERNATIVES ++ bool "SMP alternatives support (EXPERIMENTAL)" ++ depends on SMP && EXPERIMENTAL ++ help ++ Try to reduce the overhead of running an SMP kernel on a uniprocessor ++ host slightly by replacing certain key instruction sequences ++ according to whether we currently have more than one CPU available. ++ This should provide a noticeable boost to performance when ++ running SMP kernels on UP machines, and have negligible impact ++ when running on an true SMP host. ++ ++ If unsure, say N. ++ + config NR_CPUS + int "Maximum number of CPUs (2-255)" + range 2 255 +diff -pruN ../pristine-linux-2.6.16-rc5/arch/i386/kernel/Makefile ./arch/i386/kernel/Makefile +--- ../pristine-linux-2.6.16-rc5/arch/i386/kernel/Makefile 2006-02-27 15:46:58.000000000 +0000 ++++ ./arch/i386/kernel/Makefile 2006-02-27 15:55:34.000000000 +0000 +@@ -37,6 +37,7 @@ obj-$(CONFIG_EFI) += efi.o efi_stub.o + obj-$(CONFIG_DOUBLEFAULT) += doublefault.o + obj-$(CONFIG_VM86) += vm86.o + obj-$(CONFIG_EARLY_PRINTK) += early_printk.o ++obj-$(CONFIG_SMP_ALTERNATIVES) += smpalts.o + + EXTRA_AFLAGS := -traditional + +diff -pruN ../pristine-linux-2.6.16-rc5/arch/i386/kernel/smpalts.c ./arch/i386/kernel/smpalts.c +--- ../pristine-linux-2.6.16-rc5/arch/i386/kernel/smpalts.c 1970-01-01 01:00:00.000000000 +0100 ++++ ./arch/i386/kernel/smpalts.c 2006-02-27 15:55:34.000000000 +0000 +@@ -0,0 +1,85 @@ ++#include <linux/kernel.h> ++#include <asm/system.h> ++#include <asm/smp_alt.h> ++#include <asm/processor.h> ++#include <asm/string.h> ++ ++struct smp_replacement_record { ++ unsigned char targ_size; ++ unsigned char smp1_size; ++ unsigned char smp2_size; ++ unsigned char up_size; ++ unsigned char feature; ++ unsigned char data[0]; ++}; ++ ++struct smp_alternative_record { ++ void *targ_start; ++ struct smp_replacement_record *repl; ++}; ++ ++extern struct smp_alternative_record __start_smp_alternatives_table, ++ __stop_smp_alternatives_table; ++extern unsigned long __init_begin, __init_end; ++ ++void prepare_for_smp(void) ++{ ++ struct smp_alternative_record *r; ++ printk(KERN_INFO "Enabling SMP...\n"); ++ for (r = &__start_smp_alternatives_table; ++ r != &__stop_smp_alternatives_table; ++ r++) { ++ BUG_ON(r->repl->targ_size < r->repl->smp1_size); ++ BUG_ON(r->repl->targ_size < r->repl->smp2_size); ++ BUG_ON(r->repl->targ_size < r->repl->up_size); ++ if (system_state == SYSTEM_RUNNING && ++ r->targ_start >= (void *)&__init_begin && ++ r->targ_start < (void *)&__init_end) ++ continue; ++ if (r->repl->feature != (unsigned char)-1 && ++ boot_cpu_has(r->repl->feature)) { ++ memcpy(r->targ_start, ++ r->repl->data + r->repl->smp1_size, ++ r->repl->smp2_size); ++ memset(r->targ_start + r->repl->smp2_size, ++ 0x90, ++ r->repl->targ_size - r->repl->smp2_size); ++ } else { ++ memcpy(r->targ_start, ++ r->repl->data, ++ r->repl->smp1_size); ++ memset(r->targ_start + r->repl->smp1_size, ++ 0x90, ++ r->repl->targ_size - r->repl->smp1_size); ++ } ++ } ++ /* Paranoia */ ++ asm volatile ("jmp 1f\n1:"); ++ mb(); ++} ++ ++void unprepare_for_smp(void) ++{ ++ struct smp_alternative_record *r; ++ printk(KERN_INFO "Disabling SMP...\n"); ++ for (r = &__start_smp_alternatives_table; ++ r != &__stop_smp_alternatives_table; ++ r++) { ++ BUG_ON(r->repl->targ_size < r->repl->smp1_size); ++ BUG_ON(r->repl->targ_size < r->repl->smp2_size); ++ BUG_ON(r->repl->targ_size < r->repl->up_size); ++ if (system_state == SYSTEM_RUNNING && ++ r->targ_start >= (void *)&__init_begin && ++ r->targ_start < (void *)&__init_end) ++ continue; ++ memcpy(r->targ_start, ++ r->repl->data + r->repl->smp1_size + r->repl->smp2_size, ++ r->repl->up_size); ++ memset(r->targ_start + r->repl->up_size, ++ 0x90, ++ r->repl->targ_size - r->repl->up_size); ++ } ++ /* Paranoia */ ++ asm volatile ("jmp 1f\n1:"); ++ mb(); ++} +diff -pruN ../pristine-linux-2.6.16-rc5/arch/i386/kernel/smpboot.c ./arch/i386/kernel/smpboot.c +--- ../pristine-linux-2.6.16-rc5/arch/i386/kernel/smpboot.c 2006-02-27 15:46:58.000000000 +0000 ++++ ./arch/i386/kernel/smpboot.c 2006-02-27 15:55:34.000000000 +0000 +@@ -1208,6 +1208,11 @@ static void __init smp_boot_cpus(unsigne + if (max_cpus <= cpucount+1) + continue; + ++#ifdef CONFIG_SMP_ALTERNATIVES ++ if (kicked == 1) ++ prepare_for_smp(); ++#endif ++ + if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu)) + printk("CPU #%d not responding - cannot use it.\n", + apicid); +@@ -1386,6 +1391,11 @@ int __devinit __cpu_up(unsigned int cpu) + return -EIO; + } + ++#ifdef CONFIG_SMP_ALTERNATIVES ++ if (num_online_cpus() == 1) ++ prepare_for_smp(); ++#endif ++ + local_irq_enable(); + per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; + /* Unleash the CPU! */ +diff -pruN ../pristine-linux-2.6.16-rc5/arch/i386/kernel/vmlinux.lds.S ./arch/i386/kernel/vmlinux.lds.S +--- ../pristine-linux-2.6.16-rc5/arch/i386/kernel/vmlinux.lds.S 2006-01-03 03:21:10.000000000 +0000 ++++ ./arch/i386/kernel/vmlinux.lds.S 2006-02-27 15:55:34.000000000 +0000 +@@ -34,6 +34,13 @@ SECTIONS + __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) } + __stop___ex_table = .; + ++ . = ALIGN(16); ++ __start_smp_alternatives_table = .; ++ __smp_alternatives : { *(__smp_alternatives) } ++ __stop_smp_alternatives_table = .; ++ ++ __smp_replacements : { *(__smp_replacements) } ++ + RODATA + + /* writeable */ +diff -pruN ../pristine-linux-2.6.16-rc5/include/asm-i386/atomic.h ./include/asm-i386/atomic.h +--- ../pristine-linux-2.6.16-rc5/include/asm-i386/atomic.h 2006-02-27 15:47:25.000000000 +0000 ++++ ./include/asm-i386/atomic.h 2006-02-27 15:55:34.000000000 +0000 +@@ -4,18 +4,13 @@ + #include <linux/config.h> + #include <linux/compiler.h> + #include <asm/processor.h> ++#include <asm/smp_alt.h> + + /* + * Atomic operations that C can't guarantee us. Useful for + * resource counting etc.. + */ + +-#ifdef CONFIG_SMP +-#define LOCK "lock ; " +-#else +-#define LOCK "" +-#endif +- + /* + * Make sure gcc doesn't try to be clever and move things around + * on us. We need to use _exactly_ the address the user gave us, +diff -pruN ../pristine-linux-2.6.16-rc5/include/asm-i386/bitops.h ./include/asm-i386/bitops.h +--- ../pristine-linux-2.6.16-rc5/include/asm-i386/bitops.h 2006-02-27 15:47:25.000000000 +0000 ++++ ./include/asm-i386/bitops.h 2006-02-27 15:55:34.000000000 +0000 +@@ -7,6 +7,7 @@ + + #include <linux/config.h> + #include <linux/compiler.h> ++#include <asm/smp_alt.h> + + /* + * These have to be done with inline assembly: that way the bit-setting +@@ -16,12 +17,6 @@ + * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). + */ + +-#ifdef CONFIG_SMP +-#define LOCK_PREFIX "lock ; " +-#else +-#define LOCK_PREFIX "" +-#endif +- + #define ADDR (*(volatile long *) addr) + + /** +@@ -41,7 +36,7 @@ + */ + static inline void set_bit(int nr, volatile unsigned long * addr) + { +- __asm__ __volatile__( LOCK_PREFIX ++ __asm__ __volatile__( LOCK + "btsl %1,%0" + :"+m" (ADDR) + :"Ir" (nr)); +@@ -76,7 +71,7 @@ static inline void __set_bit(int nr, vol + */ + static inline void clear_bit(int nr, volatile unsigned long * addr) + { +- __asm__ __volatile__( LOCK_PREFIX ++ __asm__ __volatile__( LOCK + "btrl %1,%0" + :"+m" (ADDR) + :"Ir" (nr)); +@@ -121,7 +116,7 @@ static inline void __change_bit(int nr, + */ + static inline void change_bit(int nr, volatile unsigned long * addr) + { +- __asm__ __volatile__( LOCK_PREFIX ++ __asm__ __volatile__( LOCK + "btcl %1,%0" + :"+m" (ADDR) + :"Ir" (nr)); +@@ -140,7 +135,7 @@ static inline int test_and_set_bit(int n + { + int oldbit; + +- __asm__ __volatile__( LOCK_PREFIX ++ __asm__ __volatile__( LOCK + "btsl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"+m" (ADDR) + :"Ir" (nr) : "memory"); +@@ -180,7 +175,7 @@ static inline int test_and_clear_bit(int + { + int oldbit; + +- __asm__ __volatile__( LOCK_PREFIX ++ __asm__ __volatile__( LOCK + "btrl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"+m" (ADDR) + :"Ir" (nr) : "memory"); +@@ -231,7 +226,7 @@ static inline int test_and_change_bit(in + { + int oldbit; + +- __asm__ __volatile__( LOCK_PREFIX ++ __asm__ __volatile__( LOCK + "btcl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"+m" (ADDR) + :"Ir" (nr) : "memory"); +diff -pruN ../pristine-linux-2.6.16-rc5/include/asm-i386/futex.h ./include/asm-i386/futex.h +--- ../pristine-linux-2.6.16-rc5/include/asm-i386/futex.h 2006-02-27 15:47:25.000000000 +0000 ++++ ./include/asm-i386/futex.h 2006-02-27 15:55:34.000000000 +0000 +@@ -28,7 +28,7 @@ + "1: movl %2, %0\n\ + movl %0, %3\n" \ + insn "\n" \ +-"2: " LOCK_PREFIX "cmpxchgl %3, %2\n\ ++"2: " LOCK "cmpxchgl %3, %2\n\ + jnz 1b\n\ + 3: .section .fixup,\"ax\"\n\ + 4: mov %5, %1\n\ +@@ -68,7 +68,7 @@ futex_atomic_op_inuser (int encoded_op, + #endif + switch (op) { + case FUTEX_OP_ADD: +- __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, ++ __futex_atomic_op1(LOCK "xaddl %0, %2", ret, + oldval, uaddr, oparg); + break; + case FUTEX_OP_OR: +diff -pruN ../pristine-linux-2.6.16-rc5/include/asm-i386/rwsem.h ./include/asm-i386/rwsem.h +--- ../pristine-linux-2.6.16-rc5/include/asm-i386/rwsem.h 2006-01-03 03:21:10.000000000 +0000 ++++ ./include/asm-i386/rwsem.h 2006-02-27 15:55:34.000000000 +0000 +@@ -40,6 +40,7 @@ + + #include <linux/list.h> + #include <linux/spinlock.h> ++#include <asm/smp_alt.h> + + struct rwsem_waiter; + +@@ -99,7 +100,7 @@ static inline void __down_read(struct rw + { + __asm__ __volatile__( + "# beginning down_read\n\t" +-LOCK_PREFIX " incl (%%eax)\n\t" /* adds 0x00000001, returns the old value */ ++LOCK " incl (%%eax)\n\t" /* adds 0x00000001, returns the old value */ + " js 2f\n\t" /* jump if we weren't granted the lock */ + "1:\n\t" + LOCK_SECTION_START("") +@@ -130,7 +131,7 @@ static inline int __down_read_trylock(st + " movl %1,%2\n\t" + " addl %3,%2\n\t" + " jle 2f\n\t" +-LOCK_PREFIX " cmpxchgl %2,%0\n\t" ++LOCK " cmpxchgl %2,%0\n\t" + " jnz 1b\n\t" + "2:\n\t" + "# ending __down_read_trylock\n\t" +@@ -150,7 +151,7 @@ static inline void __down_write(struct r + tmp = RWSEM_ACTIVE_WRITE_BIAS; + __asm__ __volatile__( + "# beginning down_write\n\t" +-LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */ ++LOCK " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */ + " testl %%edx,%%edx\n\t" /* was the count 0 before? */ + " jnz 2f\n\t" /* jump if we weren't granted the lock */ + "1:\n\t" +@@ -188,7 +189,7 @@ static inline void __up_read(struct rw_s + __s32 tmp = -RWSEM_ACTIVE_READ_BIAS; + __asm__ __volatile__( + "# beginning __up_read\n\t" +-LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtracts 1, returns the old value */ ++LOCK " xadd %%edx,(%%eax)\n\t" /* subtracts 1, returns the old value */ + " js 2f\n\t" /* jump if the lock is being waited upon */ + "1:\n\t" + LOCK_SECTION_START("") +@@ -214,7 +215,7 @@ static inline void __up_write(struct rw_ + __asm__ __volatile__( + "# beginning __up_write\n\t" + " movl %2,%%edx\n\t" +-LOCK_PREFIX " xaddl %%edx,(%%eax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */ ++LOCK " xaddl %%edx,(%%eax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */ + " jnz 2f\n\t" /* jump if the lock is being waited upon */ + "1:\n\t" + LOCK_SECTION_START("") +@@ -239,7 +240,7 @@ static inline void __downgrade_write(str + { + __asm__ __volatile__( + "# beginning __downgrade_write\n\t" +-LOCK_PREFIX " addl %2,(%%eax)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */ ++LOCK " addl %2,(%%eax)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */ + " js 2f\n\t" /* jump if the lock is being waited upon */ + "1:\n\t" + LOCK_SECTION_START("") +@@ -263,7 +264,7 @@ LOCK_PREFIX " addl %2,(%%eax)\n\t" + static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) + { + __asm__ __volatile__( +-LOCK_PREFIX "addl %1,%0" ++LOCK "addl %1,%0" + : "=m"(sem->count) + : "ir"(delta), "m"(sem->count)); + } +@@ -276,7 +277,7 @@ static inline int rwsem_atomic_update(in + int tmp = delta; + + __asm__ __volatile__( +-LOCK_PREFIX "xadd %0,(%2)" ++LOCK "xadd %0,(%2)" + : "+r"(tmp), "=m"(sem->count) + : "r"(sem), "m"(sem->count) + : "memory"); +diff -pruN ../pristine-linux-2.6.16-rc5/include/asm-i386/smp_alt.h ./include/asm-i386/smp_alt.h +--- ../pristine-linux-2.6.16-rc5/include/asm-i386/smp_alt.h 1970-01-01 01:00:00.000000000 +0100 ++++ ./include/asm-i386/smp_alt.h 2006-02-27 15:55:34.000000000 +0000 +@@ -0,0 +1,32 @@ ++#ifndef __ASM_SMP_ALT_H__ ++#define __ASM_SMP_ALT_H__ ++ ++#include <linux/config.h> ++ ++#ifdef CONFIG_SMP ++#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE) ++#define LOCK \ ++ "6677: nop\n" \ ++ ".section __smp_alternatives,\"a\"\n" \ ++ ".long 6677b\n" \ ++ ".long 6678f\n" \ ++ ".previous\n" \ ++ ".section __smp_replacements,\"a\"\n" \ ++ "6678: .byte 1\n" \ ++ ".byte 1\n" \ ++ ".byte 0\n" \ ++ ".byte 1\n" \ ++ ".byte -1\n" \ ++ "lock\n" \ ++ "nop\n" \ ++ ".previous\n" ++void prepare_for_smp(void); ++void unprepare_for_smp(void); ++#else ++#define LOCK "lock ; " ++#endif ++#else ++#define LOCK "" ++#endif ++ ++#endif /* __ASM_SMP_ALT_H__ */ +diff -pruN ../pristine-linux-2.6.16-rc5/include/asm-i386/spinlock.h ./include/asm-i386/spinlock.h +--- ../pristine-linux-2.6.16-rc5/include/asm-i386/spinlock.h 2006-01-03 03:21:10.000000000 +0000 ++++ ./include/asm-i386/spinlock.h 2006-02-27 15:55:34.000000000 +0000 +@@ -6,6 +6,7 @@ + #include <asm/page.h> + #include <linux/config.h> + #include <linux/compiler.h> ++#include <asm/smp_alt.h> + + /* + * Your basic SMP spinlocks, allowing only a single CPU anywhere +@@ -23,7 +24,8 @@ + + #define __raw_spin_lock_string \ + "\n1:\t" \ +- "lock ; decb %0\n\t" \ ++ LOCK \ ++ "decb %0\n\t" \ + "jns 3f\n" \ + "2:\t" \ + "rep;nop\n\t" \ +@@ -34,7 +36,8 @@ + + #define __raw_spin_lock_string_flags \ + "\n1:\t" \ +- "lock ; decb %0\n\t" \ ++ LOCK \ ++ "decb %0\n\t" \ + "jns 4f\n\t" \ + "2:\t" \ + "testl $0x200, %1\n\t" \ +@@ -65,10 +68,34 @@ static inline void __raw_spin_lock_flags + static inline int __raw_spin_trylock(raw_spinlock_t *lock) + { + char oldval; ++#ifdef CONFIG_SMP_ALTERNATIVES + __asm__ __volatile__( +- "xchgb %b0,%1" ++ "1:movb %1,%b0\n" ++ "movb $0,%1\n" ++ "2:" ++ ".section __smp_alternatives,\"a\"\n" ++ ".long 1b\n" ++ ".long 3f\n" ++ ".previous\n" ++ ".section __smp_replacements,\"a\"\n" ++ "3: .byte 2b - 1b\n" ++ ".byte 5f-4f\n" ++ ".byte 0\n" ++ ".byte 6f-5f\n" ++ ".byte -1\n" ++ "4: xchgb %b0,%1\n" ++ "5: movb %1,%b0\n" ++ "movb $0,%1\n" ++ "6:\n" ++ ".previous\n" + :"=q" (oldval), "=m" (lock->slock) + :"0" (0) : "memory"); ++#else ++ __asm__ __volatile__( ++ "xchgb %b0,%1\n" ++ :"=q" (oldval), "=m" (lock->slock) ++ :"0" (0) : "memory"); ++#endif + return oldval > 0; + } + +@@ -178,12 +205,12 @@ static inline int __raw_write_trylock(ra + + static inline void __raw_read_unlock(raw_rwlock_t *rw) + { +- asm volatile("lock ; incl %0" :"=m" (rw->lock) : : "memory"); ++ asm volatile(LOCK "incl %0" :"=m" (rw->lock) : : "memory"); + } + + static inline void __raw_write_unlock(raw_rwlock_t *rw) + { +- asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ", %0" ++ asm volatile(LOCK "addl $" RW_LOCK_BIAS_STR ", %0" + : "=m" (rw->lock) : : "memory"); + } + +diff -pruN ../pristine-linux-2.6.16-rc5/include/asm-i386/system.h ./include/asm-i386/system.h +--- ../pristine-linux-2.6.16-rc5/include/asm-i386/system.h 2006-02-27 15:47:25.000000000 +0000 ++++ ./include/asm-i386/system.h 2006-02-27 15:55:34.000000000 +0000 +@@ -5,7 +5,7 @@ + #include <linux/kernel.h> + #include <asm/segment.h> + #include <asm/cpufeature.h> +-#include <linux/bitops.h> /* for LOCK_PREFIX */ ++#include <asm/smp_alt.h> + + #ifdef __KERNEL__ + +@@ -271,19 +271,19 @@ static inline unsigned long __cmpxchg(vo + unsigned long prev; + switch (size) { + case 1: +- __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2" ++ __asm__ __volatile__(LOCK "cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 2: +- __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2" ++ __asm__ __volatile__(LOCK "cmpxchgw %w1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 4: +- __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2" ++ __asm__ __volatile__(LOCK "cmpxchgl %1,%2" + : "=a"(prev) + : "r"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); +@@ -336,7 +336,7 @@ static inline unsigned long long __cmpxc + unsigned long long new) + { + unsigned long long prev; +- __asm__ __volatile__(LOCK_PREFIX "cmpxchg8b %3" ++ __asm__ __volatile__(LOCK "cmpxchg8b %3" + : "=A"(prev) + : "b"((unsigned long)new), + "c"((unsigned long)(new >> 32)), +@@ -503,11 +503,55 @@ struct alt_instr { + #endif + + #ifdef CONFIG_SMP ++#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE) ++#define smp_alt_mb(instr) \ ++__asm__ __volatile__("6667:\nnop\nnop\nnop\nnop\nnop\nnop\n6668:\n" \ ++ ".section __smp_alternatives,\"a\"\n" \ ++ ".long 6667b\n" \ ++ ".long 6673f\n" \ ++ ".previous\n" \ ++ ".section __smp_replacements,\"a\"\n" \ ++ "6673:.byte 6668b-6667b\n" \ ++ ".byte 6670f-6669f\n" \ ++ ".byte 6671f-6670f\n" \ ++ ".byte 0\n" \ ++ ".byte %c0\n" \ ++ "6669:lock;addl $0,0(%%esp)\n" \ ++ "6670:" instr "\n" \ ++ "6671:\n" \ ++ ".previous\n" \ ++ : \ ++ : "i" (X86_FEATURE_XMM2) \ ++ : "memory") ++#define smp_rmb() smp_alt_mb("lfence") ++#define smp_mb() smp_alt_mb("mfence") ++#define set_mb(var, value) do { \ ++unsigned long __set_mb_temp; \ ++__asm__ __volatile__("6667:movl %1, %0\n6668:\n" \ ++ ".section __smp_alternatives,\"a\"\n" \ ++ ".long 6667b\n" \ ++ ".long 6673f\n" \ ++ ".previous\n" \ ++ ".section __smp_replacements,\"a\"\n" \ ++ "6673: .byte 6668b-6667b\n" \ ++ ".byte 6670f-6669f\n" \ ++ ".byte 0\n" \ ++ ".byte 6671f-6670f\n" \ ++ ".byte -1\n" \ ++ "6669: xchg %1, %0\n" \ ++ "6670:movl %1, %0\n" \ ++ "6671:\n" \ ++ ".previous\n" \ ++ : "=m" (var), "=r" (__set_mb_temp) \ ++ : "1" (value) \ ++ : "memory"); } while (0) ++#else + #define smp_mb() mb() + #define smp_rmb() rmb() ++#define set_mb(var, value) do { (void) xchg(&var, value); } while (0) ++#endif + #define smp_wmb() wmb() + #define smp_read_barrier_depends() read_barrier_depends() +-#define set_mb(var, value) do { (void) xchg(&var, value); } while (0) + #else + #define smp_mb() barrier() + #define smp_rmb() barrier() diff -r c028ba081c84 -r 5262bd9e9d19 patches/linux-2.6.16-rc4/i386-mach-io-check-nmi.patch --- a/patches/linux-2.6.16-rc4/i386-mach-io-check-nmi.patch Mon Feb 27 16:26:16 2006 +++ /dev/null Mon Feb 27 17:44:38 2006 @@ -1,45 +0,0 @@ -diff -pruN ../pristine-linux-2.6.16-rc3/arch/i386/kernel/traps.c ./arch/i386/kernel/traps.c ---- ../pristine-linux-2.6.16-rc3/arch/i386/kernel/traps.c 2006-02-15 20:38:51.000000000 +0000 -+++ ./arch/i386/kernel/traps.c 2006-02-15 20:40:43.000000000 +0000 -@@ -567,18 +567,11 @@ static void mem_parity_error(unsigned ch - - static void io_check_error(unsigned char reason, struct pt_regs * regs) - { -- unsigned long i; -- - printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); - show_registers(regs); - - /* Re-enable the IOCK line, wait for a few seconds */ -- reason = (reason & 0xf) | 8; -- outb(reason, 0x61); -- i = 2000; -- while (--i) udelay(1000); -- reason &= ~8; -- outb(reason, 0x61); -+ clear_io_check_error(reason); - } - - static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) -diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/mach-default/mach_traps.h ./include/asm-i386/mach-default/mach_traps.h ---- ../pristine-linux-2.6.16-rc3/include/asm-i386/mach-default/mach_traps.h 2006-01-03 03:21:10.000000000 +0000 -+++ ./include/asm-i386/mach-default/mach_traps.h 2006-02-15 20:40:43.000000000 +0000 -@@ -15,6 +15,18 @@ static inline void clear_mem_error(unsig - outb(reason, 0x61); - } - -+static inline void clear_io_check_error(unsigned char reason) -+{ -+ unsigned long i; -+ -+ reason = (reason & 0xf) | 8; -+ outb(reason, 0x61); -+ i = 2000; -+ while (--i) udelay(1000); -+ reason &= ~8; -+ outb(reason, 0x61); -+} -+ - static inline unsigned char get_nmi_reason(void) - { - return inb(0x61); diff -r c028ba081c84 -r 5262bd9e9d19 patches/linux-2.6.16-rc4/net-csum.patch --- a/patches/linux-2.6.16-rc4/net-csum.patch Mon Feb 27 16:26:16 2006 +++ /dev/null Mon Feb 27 17:44:38 2006 @@ -1,41 +0,0 @@ -diff -pruN ../pristine-linux-2.6.16-rc1-git4/net/ipv4/netfilter/ip_nat_proto_tcp.c ./net/ipv4/netfilter/ip_nat_proto_tcp.c ---- ../pristine-linux-2.6.16-rc1-git4/net/ipv4/netfilter/ip_nat_proto_tcp.c 2006-02-02 17:39:51.000000000 +0000 -+++ ./net/ipv4/netfilter/ip_nat_proto_tcp.c 2006-02-02 17:44:18.000000000 +0000 -@@ -129,10 +129,14 @@ tcp_manip_pkt(struct sk_buff **pskb, - if (hdrsize < sizeof(*hdr)) - return 1; - -- hdr->check = ip_nat_cheat_check(~oldip, newip, -+ if ((*pskb)->proto_csum_blank) { -+ hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check); -+ } else { -+ hdr->check = ip_nat_cheat_check(~oldip, newip, - ip_nat_cheat_check(oldport ^ 0xFFFF, - newport, - hdr->check)); -+ } - return 1; - } - -diff -pruN ../pristine-linux-2.6.16-rc1-git4/net/ipv4/netfilter/ip_nat_proto_udp.c ./net/ipv4/netfilter/ip_nat_proto_udp.c ---- ../pristine-linux-2.6.16-rc1-git4/net/ipv4/netfilter/ip_nat_proto_udp.c 2006-02-02 17:39:51.000000000 +0000 -+++ ./net/ipv4/netfilter/ip_nat_proto_udp.c 2006-02-02 17:44:18.000000000 +0000 -@@ -113,11 +113,16 @@ udp_manip_pkt(struct sk_buff **pskb, - newport = tuple->dst.u.udp.port; - portptr = &hdr->dest; - } -- if (hdr->check) /* 0 is a special case meaning no checksum */ -- hdr->check = ip_nat_cheat_check(~oldip, newip, -+ if (hdr->check) { /* 0 is a special case meaning no checksum */ -+ if ((*pskb)->proto_csum_blank) { -+ hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check); -+ } else { -+ hdr->check = ip_nat_cheat_check(~oldip, newip, - ip_nat_cheat_check(*portptr ^ 0xFFFF, - newport, - hdr->check)); -+ } -+ } - *portptr = newport; - return 1; - } diff -r c028ba081c84 -r 5262bd9e9d19 patches/linux-2.6.16-rc4/pmd-shared.patch --- a/patches/linux-2.6.16-rc4/pmd-shared.patch Mon Feb 27 16:26:16 2006 +++ /dev/null Mon Feb 27 17:44:38 2006 @@ -1,111 +0,0 @@ -diff -pruN ../pristine-linux-2.6.16-rc1-git4/arch/i386/mm/pageattr.c ./arch/i386/mm/pageattr.c ---- ../pristine-linux-2.6.16-rc1-git4/arch/i386/mm/pageattr.c 2006-02-02 17:39:29.000000000 +0000 -+++ ./arch/i386/mm/pageattr.c 2006-02-02 17:45:14.000000000 +0000 -@@ -78,7 +78,7 @@ static void set_pmd_pte(pte_t *kpte, uns - unsigned long flags; - - set_pte_atomic(kpte, pte); /* change init_mm */ -- if (PTRS_PER_PMD > 1) -+ if (HAVE_SHARED_KERNEL_PMD) - return; - - spin_lock_irqsave(&pgd_lock, flags); -diff -pruN ../pristine-linux-2.6.16-rc1-git4/arch/i386/mm/pgtable.c ./arch/i386/mm/pgtable.c ---- ../pristine-linux-2.6.16-rc1-git4/arch/i386/mm/pgtable.c 2006-01-03 03:21:10.000000000 +0000 -+++ ./arch/i386/mm/pgtable.c 2006-02-02 17:45:14.000000000 +0000 -@@ -215,9 +215,10 @@ void pgd_ctor(void *pgd, kmem_cache_t *c - spin_lock_irqsave(&pgd_lock, flags); - } - -- clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, -- swapper_pg_dir + USER_PTRS_PER_PGD, -- KERNEL_PGD_PTRS); -+ if (PTRS_PER_PMD == 1 || HAVE_SHARED_KERNEL_PMD) -+ clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, -+ swapper_pg_dir + USER_PTRS_PER_PGD, -+ KERNEL_PGD_PTRS); - if (PTRS_PER_PMD > 1) - return; - -@@ -249,6 +250,30 @@ pgd_t *pgd_alloc(struct mm_struct *mm) - goto out_oom; - set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); - } -+ -+ if (!HAVE_SHARED_KERNEL_PMD) { -+ unsigned long flags; -+ -+ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { -+ pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); -+ if (!pmd) -+ goto out_oom; -+ set_pgd(&pgd[USER_PTRS_PER_PGD], __pgd(1 + __pa(pmd))); -+ } -+ -+ spin_lock_irqsave(&pgd_lock, flags); -+ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { -+ unsigned long v = (unsigned long)i << PGDIR_SHIFT; -+ pgd_t *kpgd = pgd_offset_k(v); -+ pud_t *kpud = pud_offset(kpgd, v); -+ pmd_t *kpmd = pmd_offset(kpud, v); -+ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); -+ memcpy(pmd, kpmd, PAGE_SIZE); -+ } -+ pgd_list_add(pgd); -+ spin_unlock_irqrestore(&pgd_lock, flags); -+ } -+ - return pgd; - - out_oom: -@@ -263,9 +288,23 @@ void pgd_free(pgd_t *pgd) - int i; - - /* in the PAE case user pgd entries are overwritten before usage */ -- if (PTRS_PER_PMD > 1) -- for (i = 0; i < USER_PTRS_PER_PGD; ++i) -- kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); -+ if (PTRS_PER_PMD > 1) { -+ for (i = 0; i < USER_PTRS_PER_PGD; ++i) { -+ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); -+ kmem_cache_free(pmd_cache, pmd); -+ } -+ if (!HAVE_SHARED_KERNEL_PMD) { -+ unsigned long flags; -+ spin_lock_irqsave(&pgd_lock, flags); -+ pgd_list_del(pgd); -+ spin_unlock_irqrestore(&pgd_lock, flags); -+ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { -+ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); -+ memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); -+ kmem_cache_free(pmd_cache, pmd); -+ } -+ } -+ } - /* in the non-PAE case, free_pgtables() clears user pgd entries */ - kmem_cache_free(pgd_cache, pgd); - } -diff -pruN ../pristine-linux-2.6.16-rc1-git4/include/asm-i386/pgtable-2level-defs.h ./include/asm-i386/pgtable-2level-defs.h ---- ../pristine-linux-2.6.16-rc1-git4/include/asm-i386/pgtable-2level-defs.h 2006-01-03 03:21:10.000000000 +0000 -+++ ./include/asm-i386/pgtable-2level-defs.h 2006-02-02 17:45:14.000000000 +0000 -@@ -1,6 +1,8 @@ - #ifndef _I386_PGTABLE_2LEVEL_DEFS_H - #define _I386_PGTABLE_2LEVEL_DEFS_H - -+#define HAVE_SHARED_KERNEL_PMD 0 -+ - /* - * traditional i386 two-level paging structure: - */ -diff -pruN ../pristine-linux-2.6.16-rc1-git4/include/asm-i386/pgtable-3level-defs.h ./include/asm-i386/pgtable-3level-defs.h ---- ../pristine-linux-2.6.16-rc1-git4/include/asm-i386/pgtable-3level-defs.h 2006-01-03 03:21:10.000000000 +0000 -+++ ./include/asm-i386/pgtable-3level-defs.h 2006-02-02 17:45:14.000000000 +0000 -@@ -1,6 +1,8 @@ - #ifndef _I386_PGTABLE_3LEVEL_DEFS_H - #define _I386_PGTABLE_3LEVEL_DEFS_H - -+#define HAVE_SHARED_KERNEL_PMD 1 -+ - /* - * PGDIR_SHIFT determines what a top-level page table entry can map - */ diff -r c028ba081c84 -r 5262bd9e9d19 patches/linux-2.6.16-rc4/smp-alts.patch --- a/patches/linux-2.6.16-rc4/smp-alts.patch Mon Feb 27 16:26:16 2006 +++ /dev/null Mon Feb 27 17:44:38 2006 @@ -1,591 +0,0 @@ -diff -pruN ../pristine-linux-2.6.16-rc3/arch/i386/Kconfig ./arch/i386/Kconfig ---- ../pristine-linux-2.6.16-rc3/arch/i386/Kconfig 2006-02-15 20:38:51.000000000 +0000 -+++ ./arch/i386/Kconfig 2006-02-15 20:45:57.000000000 +0000 -@@ -202,6 +202,19 @@ config SMP - - If you don't know what to do here, say N. - -+config SMP_ALTERNATIVES -+ bool "SMP alternatives support (EXPERIMENTAL)" -+ depends on SMP && EXPERIMENTAL -+ help -+ Try to reduce the overhead of running an SMP kernel on a uniprocessor -+ host slightly by replacing certain key instruction sequences -+ according to whether we currently have more than one CPU available. -+ This should provide a noticeable boost to performance when -+ running SMP kernels on UP machines, and have negligible impact -+ when running on an true SMP host. -+ -+ If unsure, say N. -+ - config NR_CPUS - int "Maximum number of CPUs (2-255)" - range 2 255 -diff -pruN ../pristine-linux-2.6.16-rc3/arch/i386/kernel/Makefile ./arch/i386/kernel/Makefile ---- ../pristine-linux-2.6.16-rc3/arch/i386/kernel/Makefile 2006-02-15 20:38:51.000000000 +0000 -+++ ./arch/i386/kernel/Makefile 2006-02-15 20:45:57.000000000 +0000 -@@ -37,6 +37,7 @@ obj-$(CONFIG_EFI) += efi.o efi_stub.o - obj-$(CONFIG_DOUBLEFAULT) += doublefault.o - obj-$(CONFIG_VM86) += vm86.o - obj-$(CONFIG_EARLY_PRINTK) += early_printk.o -+obj-$(CONFIG_SMP_ALTERNATIVES) += smpalts.o - - EXTRA_AFLAGS := -traditional - -diff -pruN ../pristine-linux-2.6.16-rc3/arch/i386/kernel/smpalts.c ./arch/i386/kernel/smpalts.c ---- ../pristine-linux-2.6.16-rc3/arch/i386/kernel/smpalts.c 1970-01-01 01:00:00.000000000 +0100 -+++ ./arch/i386/kernel/smpalts.c 2006-02-15 20:45:57.000000000 +0000 -@@ -0,0 +1,85 @@ -+#include <linux/kernel.h> -+#include <asm/system.h> -+#include <asm/smp_alt.h> -+#include <asm/processor.h> -+#include <asm/string.h> -+ -+struct smp_replacement_record { -+ unsigned char targ_size; -+ unsigned char smp1_size; -+ unsigned char smp2_size; -+ unsigned char up_size; -+ unsigned char feature; -+ unsigned char data[0]; -+}; -+ -+struct smp_alternative_record { -+ void *targ_start; -+ struct smp_replacement_record *repl; -+}; -+ -+extern struct smp_alternative_record __start_smp_alternatives_table, -+ __stop_smp_alternatives_table; -+extern unsigned long __init_begin, __init_end; -+ -+void prepare_for_smp(void) -+{ -+ struct smp_alternative_record *r; -+ printk(KERN_INFO "Enabling SMP...\n"); -+ for (r = &__start_smp_alternatives_table; -+ r != &__stop_smp_alternatives_table; -+ r++) { -+ BUG_ON(r->repl->targ_size < r->repl->smp1_size); -+ BUG_ON(r->repl->targ_size < r->repl->smp2_size); -+ BUG_ON(r->repl->targ_size < r->repl->up_size); -+ if (system_state == SYSTEM_RUNNING && -+ r->targ_start >= (void *)&__init_begin && -+ r->targ_start < (void *)&__init_end) -+ continue; -+ if (r->repl->feature != (unsigned char)-1 && -+ boot_cpu_has(r->repl->feature)) { -+ memcpy(r->targ_start, -+ r->repl->data + r->repl->smp1_size, -+ r->repl->smp2_size); -+ memset(r->targ_start + r->repl->smp2_size, -+ 0x90, -+ r->repl->targ_size - r->repl->smp2_size); -+ } else { -+ memcpy(r->targ_start, -+ r->repl->data, -+ r->repl->smp1_size); -+ memset(r->targ_start + r->repl->smp1_size, -+ 0x90, -+ r->repl->targ_size - r->repl->smp1_size); -+ } -+ } -+ /* Paranoia */ -+ asm volatile ("jmp 1f\n1:"); -+ mb(); -+} -+ -+void unprepare_for_smp(void) -+{ -+ struct smp_alternative_record *r; -+ printk(KERN_INFO "Disabling SMP...\n"); -+ for (r = &__start_smp_alternatives_table; -+ r != &__stop_smp_alternatives_table; -+ r++) { -+ BUG_ON(r->repl->targ_size < r->repl->smp1_size); -+ BUG_ON(r->repl->targ_size < r->repl->smp2_size); -+ BUG_ON(r->repl->targ_size < r->repl->up_size); -+ if (system_state == SYSTEM_RUNNING && -+ r->targ_start >= (void *)&__init_begin && -+ r->targ_start < (void *)&__init_end) -+ continue; -+ memcpy(r->targ_start, -+ r->repl->data + r->repl->smp1_size + r->repl->smp2_size, -+ r->repl->up_size); -+ memset(r->targ_start + r->repl->up_size, -+ 0x90, -+ r->repl->targ_size - r->repl->up_size); -+ } -+ /* Paranoia */ -+ asm volatile ("jmp 1f\n1:"); -+ mb(); -+} -diff -pruN ../pristine-linux-2.6.16-rc3/arch/i386/kernel/smpboot.c ./arch/i386/kernel/smpboot.c ---- ../pristine-linux-2.6.16-rc3/arch/i386/kernel/smpboot.c 2006-02-15 20:38:51.000000000 +0000 -+++ ./arch/i386/kernel/smpboot.c 2006-02-15 20:45:57.000000000 +0000 -@@ -1214,6 +1214,11 @@ static void __init smp_boot_cpus(unsigne - if (max_cpus <= cpucount+1) - continue; - -+#ifdef CONFIG_SMP_ALTERNATIVES -+ if (kicked == 1) -+ prepare_for_smp(); -+#endif -+ - if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu)) - printk("CPU #%d not responding - cannot use it.\n", - apicid); -@@ -1392,6 +1397,11 @@ int __devinit __cpu_up(unsigned int cpu) - return -EIO; - } - -+#ifdef CONFIG_SMP_ALTERNATIVES -+ if (num_online_cpus() == 1) -+ prepare_for_smp(); -+#endif -+ - local_irq_enable(); - per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; - /* Unleash the CPU! */ -diff -pruN ../pristine-linux-2.6.16-rc3/arch/i386/kernel/vmlinux.lds.S ./arch/i386/kernel/vmlinux.lds.S ---- ../pristine-linux-2.6.16-rc3/arch/i386/kernel/vmlinux.lds.S 2006-01-03 03:21:10.000000000 +0000 -+++ ./arch/i386/kernel/vmlinux.lds.S 2006-02-15 20:45:57.000000000 +0000 -@@ -34,6 +34,13 @@ SECTIONS - __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) } - __stop___ex_table = .; - -+ . = ALIGN(16); -+ __start_smp_alternatives_table = .; -+ __smp_alternatives : { *(__smp_alternatives) } -+ __stop_smp_alternatives_table = .; -+ -+ __smp_replacements : { *(__smp_replacements) } -+ - RODATA - - /* writeable */ -diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/atomic.h ./include/asm-i386/atomic.h ---- ../pristine-linux-2.6.16-rc3/include/asm-i386/atomic.h 2006-02-15 20:38:57.000000000 +0000 -+++ ./include/asm-i386/atomic.h 2006-02-15 20:45:57.000000000 +0000 -@@ -4,18 +4,13 @@ - #include <linux/config.h> - #include <linux/compiler.h> - #include <asm/processor.h> -+#include <asm/smp_alt.h> - - /* - * Atomic operations that C can't guarantee us. Useful for - * resource counting etc.. - */ - --#ifdef CONFIG_SMP --#define LOCK "lock ; " --#else --#define LOCK "" --#endif -- - /* - * Make sure gcc doesn't try to be clever and move things around - * on us. We need to use _exactly_ the address the user gave us, -diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/bitops.h ./include/asm-i386/bitops.h ---- ../pristine-linux-2.6.16-rc3/include/asm-i386/bitops.h 2006-02-15 20:38:57.000000000 +0000 -+++ ./include/asm-i386/bitops.h 2006-02-15 20:45:57.000000000 +0000 -@@ -7,6 +7,7 @@ - - #include <linux/config.h> - #include <linux/compiler.h> -+#include <asm/smp_alt.h> - - /* - * These have to be done with inline assembly: that way the bit-setting -@@ -16,12 +17,6 @@ - * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). - */ - --#ifdef CONFIG_SMP --#define LOCK_PREFIX "lock ; " --#else --#define LOCK_PREFIX "" --#endif -- - #define ADDR (*(volatile long *) addr) - - /** -@@ -41,7 +36,7 @@ - */ - static inline void set_bit(int nr, volatile unsigned long * addr) - { -- __asm__ __volatile__( LOCK_PREFIX -+ __asm__ __volatile__( LOCK - "btsl %1,%0" - :"+m" (ADDR) - :"Ir" (nr)); -@@ -76,7 +71,7 @@ static inline void __set_bit(int nr, vol - */ - static inline void clear_bit(int nr, volatile unsigned long * addr) - { -- __asm__ __volatile__( LOCK_PREFIX -+ __asm__ __volatile__( LOCK - "btrl %1,%0" - :"+m" (ADDR) - :"Ir" (nr)); -@@ -121,7 +116,7 @@ static inline void __change_bit(int nr, - */ - static inline void change_bit(int nr, volatile unsigned long * addr) - { -- __asm__ __volatile__( LOCK_PREFIX -+ __asm__ __volatile__( LOCK - "btcl %1,%0" - :"+m" (ADDR) - :"Ir" (nr)); -@@ -140,7 +135,7 @@ static inline int test_and_set_bit(int n - { - int oldbit; - -- __asm__ __volatile__( LOCK_PREFIX -+ __asm__ __volatile__( LOCK - "btsl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),"+m" (ADDR) - :"Ir" (nr) : "memory"); -@@ -180,7 +175,7 @@ static inline int test_and_clear_bit(int - { - int oldbit; - -- __asm__ __volatile__( LOCK_PREFIX -+ __asm__ __volatile__( LOCK - "btrl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),"+m" (ADDR) - :"Ir" (nr) : "memory"); -@@ -231,7 +226,7 @@ static inline int test_and_change_bit(in - { - int oldbit; - -- __asm__ __volatile__( LOCK_PREFIX -+ __asm__ __volatile__( LOCK - "btcl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),"+m" (ADDR) - :"Ir" (nr) : "memory"); -diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/futex.h ./include/asm-i386/futex.h ---- ../pristine-linux-2.6.16-rc3/include/asm-i386/futex.h 2006-02-15 20:38:57.000000000 +0000 -+++ ./include/asm-i386/futex.h 2006-02-15 20:45:57.000000000 +0000 -@@ -28,7 +28,7 @@ - "1: movl %2, %0\n\ - movl %0, %3\n" \ - insn "\n" \ --"2: " LOCK_PREFIX "cmpxchgl %3, %2\n\ -+"2: " LOCK "cmpxchgl %3, %2\n\ - jnz 1b\n\ - 3: .section .fixup,\"ax\"\n\ - 4: mov %5, %1\n\ -@@ -68,7 +68,7 @@ futex_atomic_op_inuser (int encoded_op, - #endif - switch (op) { - case FUTEX_OP_ADD: -- __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, -+ __futex_atomic_op1(LOCK "xaddl %0, %2", ret, - oldval, uaddr, oparg); - break; - case FUTEX_OP_OR: -diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/rwsem.h ./include/asm-i386/rwsem.h ---- ../pristine-linux-2.6.16-rc3/include/asm-i386/rwsem.h 2006-01-03 03:21:10.000000000 +0000 -+++ ./include/asm-i386/rwsem.h 2006-02-15 20:45:57.000000000 +0000 -@@ -40,6 +40,7 @@ - - #include <linux/list.h> - #include <linux/spinlock.h> -+#include <asm/smp_alt.h> - - struct rwsem_waiter; - -@@ -99,7 +100,7 @@ static inline void __down_read(struct rw - { - __asm__ __volatile__( - "# beginning down_read\n\t" --LOCK_PREFIX " incl (%%eax)\n\t" /* adds 0x00000001, returns the old value */ -+LOCK " incl (%%eax)\n\t" /* adds 0x00000001, returns the old value */ - " js 2f\n\t" /* jump if we weren't granted the lock */ - "1:\n\t" - LOCK_SECTION_START("") -@@ -130,7 +131,7 @@ static inline int __down_read_trylock(st - " movl %1,%2\n\t" - " addl %3,%2\n\t" - " jle 2f\n\t" --LOCK_PREFIX " cmpxchgl %2,%0\n\t" -+LOCK " cmpxchgl %2,%0\n\t" - " jnz 1b\n\t" - "2:\n\t" - "# ending __down_read_trylock\n\t" -@@ -150,7 +151,7 @@ static inline void __down_write(struct r - tmp = RWSEM_ACTIVE_WRITE_BIAS; - __asm__ __volatile__( - "# beginning down_write\n\t" --LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */ -+LOCK " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */ - " testl %%edx,%%edx\n\t" /* was the count 0 before? */ - " jnz 2f\n\t" /* jump if we weren't granted the lock */ - "1:\n\t" -@@ -188,7 +189,7 @@ static inline void __up_read(struct rw_s - __s32 tmp = -RWSEM_ACTIVE_READ_BIAS; - __asm__ __volatile__( - "# beginning __up_read\n\t" --LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtracts 1, returns the old value */ -+LOCK " xadd %%edx,(%%eax)\n\t" /* subtracts 1, returns the old value */ - " js 2f\n\t" /* jump if the lock is being waited upon */ - "1:\n\t" - LOCK_SECTION_START("") -@@ -214,7 +215,7 @@ static inline void __up_write(struct rw_ - __asm__ __volatile__( - "# beginning __up_write\n\t" - " movl %2,%%edx\n\t" --LOCK_PREFIX " xaddl %%edx,(%%eax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */ -+LOCK " xaddl %%edx,(%%eax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */ - " jnz 2f\n\t" /* jump if the lock is being waited upon */ - "1:\n\t" - LOCK_SECTION_START("") -@@ -239,7 +240,7 @@ static inline void __downgrade_write(str - { - __asm__ __volatile__( - "# beginning __downgrade_write\n\t" --LOCK_PREFIX " addl %2,(%%eax)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */ -+LOCK " addl %2,(%%eax)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */ - " js 2f\n\t" /* jump if the lock is being waited upon */ - "1:\n\t" - LOCK_SECTION_START("") -@@ -263,7 +264,7 @@ LOCK_PREFIX " addl %2,(%%eax)\n\t" - static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) - { - __asm__ __volatile__( --LOCK_PREFIX "addl %1,%0" -+LOCK "addl %1,%0" - : "=m"(sem->count) - : "ir"(delta), "m"(sem->count)); - } -@@ -276,7 +277,7 @@ static inline int rwsem_atomic_update(in - int tmp = delta; - - __asm__ __volatile__( --LOCK_PREFIX "xadd %0,(%2)" -+LOCK "xadd %0,(%2)" - : "+r"(tmp), "=m"(sem->count) - : "r"(sem), "m"(sem->count) - : "memory"); -diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/smp_alt.h ./include/asm-i386/smp_alt.h ---- ../pristine-linux-2.6.16-rc3/include/asm-i386/smp_alt.h 1970-01-01 01:00:00.000000000 +0100 -+++ ./include/asm-i386/smp_alt.h 2006-02-15 20:45:57.000000000 +0000 -@@ -0,0 +1,32 @@ -+#ifndef __ASM_SMP_ALT_H__ -+#define __ASM_SMP_ALT_H__ -+ -+#include <linux/config.h> -+ -+#ifdef CONFIG_SMP -+#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE) -+#define LOCK \ -+ "6677: nop\n" \ -+ ".section __smp_alternatives,\"a\"\n" \ -+ ".long 6677b\n" \ -+ ".long 6678f\n" \ -+ ".previous\n" \ -+ ".section __smp_replacements,\"a\"\n" \ -+ "6678: .byte 1\n" \ -+ ".byte 1\n" \ -+ ".byte 0\n" \ -+ ".byte 1\n" \ -+ ".byte -1\n" \ -+ "lock\n" \ -+ "nop\n" \ -+ ".previous\n" -+void prepare_for_smp(void); -+void unprepare_for_smp(void); -+#else -+#define LOCK "lock ; " -+#endif -+#else -+#define LOCK "" -+#endif -+ -+#endif /* __ASM_SMP_ALT_H__ */ -diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/spinlock.h ./include/asm-i386/spinlock.h ---- ../pristine-linux-2.6.16-rc3/include/asm-i386/spinlock.h 2006-01-03 03:21:10.000000000 +0000 -+++ ./include/asm-i386/spinlock.h 2006-02-15 20:45:57.000000000 +0000 -@@ -6,6 +6,7 @@ - #include <asm/page.h> - #include <linux/config.h> - #include <linux/compiler.h> -+#include <asm/smp_alt.h> - - /* - * Your basic SMP spinlocks, allowing only a single CPU anywhere -@@ -23,7 +24,8 @@ - - #define __raw_spin_lock_string \ - "\n1:\t" \ -- "lock ; decb %0\n\t" \ -+ LOCK \ -+ "decb %0\n\t" \ - "jns 3f\n" \ - "2:\t" \ - "rep;nop\n\t" \ -@@ -34,7 +36,8 @@ - - #define __raw_spin_lock_string_flags \ - "\n1:\t" \ -- "lock ; decb %0\n\t" \ -+ LOCK \ -+ "decb %0\n\t" \ - "jns 4f\n\t" \ - "2:\t" \ - "testl $0x200, %1\n\t" \ -@@ -65,10 +68,34 @@ static inline void __raw_spin_lock_flags - static inline int __raw_spin_trylock(raw_spinlock_t *lock) - { - char oldval; -+#ifdef CONFIG_SMP_ALTERNATIVES - __asm__ __volatile__( -- "xchgb %b0,%1" -+ "1:movb %1,%b0\n" -+ "movb $0,%1\n" -+ "2:" -+ ".section __smp_alternatives,\"a\"\n" -+ ".long 1b\n" -+ ".long 3f\n" -+ ".previous\n" -+ ".section __smp_replacements,\"a\"\n" -+ "3: .byte 2b - 1b\n" -+ ".byte 5f-4f\n" -+ ".byte 0\n" -+ ".byte 6f-5f\n" -+ ".byte -1\n" -+ "4: xchgb %b0,%1\n" -+ "5: movb %1,%b0\n" -+ "movb $0,%1\n" -+ "6:\n" -+ ".previous\n" - :"=q" (oldval), "=m" (lock->slock) - :"0" (0) : "memory"); -+#else -+ __asm__ __volatile__( -+ "xchgb %b0,%1\n" -+ :"=q" (oldval), "=m" (lock->slock) -+ :"0" (0) : "memory"); -+#endif - return oldval > 0; - } - -@@ -178,12 +205,12 @@ static inline int __raw_write_trylock(ra - - static inline void __raw_read_unlock(raw_rwlock_t *rw) - { -- asm volatile("lock ; incl %0" :"=m" (rw->lock) : : "memory"); -+ asm volatile(LOCK "incl %0" :"=m" (rw->lock) : : "memory"); - } - - static inline void __raw_write_unlock(raw_rwlock_t *rw) - { -- asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ", %0" -+ asm volatile(LOCK "addl $" RW_LOCK_BIAS_STR ", %0" - : "=m" (rw->lock) : : "memory"); - } - -diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/system.h ./include/asm-i386/system.h ---- ../pristine-linux-2.6.16-rc3/include/asm-i386/system.h 2006-02-15 20:38:57.000000000 +0000 -+++ ./include/asm-i386/system.h 2006-02-15 20:45:57.000000000 +0000 -@@ -5,7 +5,7 @@ - #include <linux/kernel.h> - #include <asm/segment.h> - #include <asm/cpufeature.h> --#include <linux/bitops.h> /* for LOCK_PREFIX */ -+#include <asm/smp_alt.h> - - #ifdef __KERNEL__ - -@@ -271,19 +271,19 @@ static inline unsigned long __cmpxchg(vo - unsigned long prev; - switch (size) { - case 1: -- __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2" -+ __asm__ __volatile__(LOCK "cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: -- __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2" -+ __asm__ __volatile__(LOCK "cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: -- __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2" -+ __asm__ __volatile__(LOCK "cmpxchgl %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); -@@ -336,7 +336,7 @@ static inline unsigned long long __cmpxc - unsigned long long new) - { - unsigned long long prev; -- __asm__ __volatile__(LOCK_PREFIX "cmpxchg8b %3" -+ __asm__ __volatile__(LOCK "cmpxchg8b %3" - : "=A"(prev) - : "b"((unsigned long)new), - "c"((unsigned long)(new >> 32)), -@@ -503,11 +503,55 @@ struct alt_instr { - #endif - - #ifdef CONFIG_SMP -+#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE) -+#define smp_alt_mb(instr) \ -+__asm__ __volatile__("6667:\nnop\nnop\nnop\nnop\nnop\nnop\n6668:\n" \ -+ ".section __smp_alternatives,\"a\"\n" \ -+ ".long 6667b\n" \ -+ ".long 6673f\n" \ -+ ".previous\n" \ -+ ".section __smp_replacements,\"a\"\n" \ -+ "6673:.byte 6668b-6667b\n" \ -+ ".byte 6670f-6669f\n" \ -+ ".byte 6671f-6670f\n" \ -+ ".byte 0\n" \ -+ ".byte %c0\n" \ -+ "6669:lock;addl $0,0(%%esp)\n" \ -+ "6670:" instr "\n" \ -+ "6671:\n" \ -+ ".previous\n" \ -+ : \ -+ : "i" (X86_FEATURE_XMM2) \ -+ : "memory") -+#define smp_rmb() smp_alt_mb("lfence") -+#define smp_mb() smp_alt_mb("mfence") -+#define set_mb(var, value) do { \ -+unsigned long __set_mb_temp; \ -+__asm__ __volatile__("6667:movl %1, %0\n6668:\n" \ -+ ".section __smp_alternatives,\"a\"\n" \ -+ ".long 6667b\n" \ -+ ".long 6673f\n" \ -+ ".previous\n" \ -+ ".section __smp_replacements,\"a\"\n" \ -+ "6673: .byte 6668b-6667b\n" \ -+ ".byte 6670f-6669f\n" \ -+ ".byte 0\n" \ -+ ".byte 6671f-6670f\n" \ -+ ".byte -1\n" \ -+ "6669: xchg %1, %0\n" \ -+ "6670:movl %1, %0\n" \ -+ "6671:\n" \ -+ ".previous\n" \ -+ : "=m" (var), "=r" (__set_mb_temp) \ -+ : "1" (value) \ -+ : "memory"); } while (0) -+#else - #define smp_mb() mb() - #define smp_rmb() rmb() -+#define set_mb(var, value) do { (void) xchg(&var, value); } while (0) -+#endif - #define smp_wmb() wmb() - #define smp_read_barrier_depends() read_barrier_depends() --#define set_mb(var, value) do { (void) xchg(&var, value); } while (0) - #else - #define smp_mb() barrier() - #define smp_rmb() barrier() _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |