[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] upgrade the sparse repository to 2.6.12



# HG changeset patch
# User vh249@xxxxxxxxxxxxxxxxxxxxxxxx
# Node ID de310533c48375f3008a0484c3a770b807aee5c3
# Parent  fa660d79f69573459949c847743f6341f466c7d0
upgrade the sparse repository to 2.6.12

Signed-off-by: Vincent Hanquez <vincent@xxxxxxxxxxxxx>

diff -r fa660d79f695 -r de310533c483 buildconfigs/mk.linux-2.6-xen0
--- a/buildconfigs/mk.linux-2.6-xen0    Tue Aug  9 15:17:45 2005
+++ b/buildconfigs/mk.linux-2.6-xen0    Tue Aug  9 23:57:17 2005
@@ -2,7 +2,7 @@
 OS           = linux
 
 LINUX_SERIES = 2.6
-LINUX_VER    = 2.6.11
+LINUX_VER    = 2.6.12
 
 EXTRAVERSION = xen0
 
diff -r fa660d79f695 -r de310533c483 buildconfigs/mk.linux-2.6-xenU
--- a/buildconfigs/mk.linux-2.6-xenU    Tue Aug  9 15:17:45 2005
+++ b/buildconfigs/mk.linux-2.6-xenU    Tue Aug  9 23:57:17 2005
@@ -2,7 +2,7 @@
 OS           = linux
 
 LINUX_SERIES = 2.6
-LINUX_VER    = 2.6.11
+LINUX_VER    = 2.6.12
 
 EXTRAVERSION = xenU
 
diff -r fa660d79f695 -r de310533c483 linux-2.6-xen-sparse/arch/xen/Kconfig
--- a/linux-2.6-xen-sparse/arch/xen/Kconfig     Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/Kconfig     Tue Aug  9 23:57:17 2005
@@ -150,3 +150,5 @@
 source "crypto/Kconfig"
 
 source "lib/Kconfig"
+
+source "arch/xen/Kconfig.debug"
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig      Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig      Tue Aug  9 
23:57:17 2005
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.11-xen0
-# Tue May  3 13:22:55 2005
+# Linux kernel version: 2.6.12-xen0
+# Sat Jul  9 09:19:47 2005
 #
 CONFIG_XEN=y
 CONFIG_ARCH_XEN=y
@@ -31,6 +31,7 @@
 CONFIG_BROKEN=y
 CONFIG_BROKEN_ON_SMP=y
 CONFIG_LOCK_KERNEL=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
 
 #
 # General setup
@@ -42,7 +43,6 @@
 # CONFIG_BSD_PROCESS_ACCT is not set
 CONFIG_SYSCTL=y
 # CONFIG_AUDIT is not set
-CONFIG_LOG_BUF_SHIFT=14
 CONFIG_HOTPLUG=y
 CONFIG_KOBJECT_UEVENT=y
 # CONFIG_IKCONFIG is not set
@@ -50,15 +50,18 @@
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_BASE_FULL=y
 CONFIG_FUTEX=y
 CONFIG_EPOLL=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_SHMEM=y
 CONFIG_CC_ALIGN_FUNCTIONS=0
 CONFIG_CC_ALIGN_LABELS=0
 CONFIG_CC_ALIGN_LOOPS=0
 CONFIG_CC_ALIGN_JUMPS=0
 # CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=0
 
 #
 # Loadable module support
@@ -97,6 +100,7 @@
 # CONFIG_MWINCHIPC6 is not set
 # CONFIG_MWINCHIP2 is not set
 # CONFIG_MWINCHIP3D is not set
+# CONFIG_MGEODEGX1 is not set
 # CONFIG_MCYRIXIII is not set
 # CONFIG_MVIAC3_2 is not set
 # CONFIG_X86_GENERIC is not set
@@ -117,6 +121,7 @@
 # CONFIG_SMP is not set
 CONFIG_PREEMPT=y
 CONFIG_PREEMPT_BKL=y
+# CONFIG_X86_REBOOTFIXUPS is not set
 CONFIG_MICROCODE=y
 CONFIG_X86_CPUID=y
 
@@ -137,6 +142,8 @@
 CONFIG_PCI_DIRECT=y
 CONFIG_PCI_LEGACY_PROC=y
 # CONFIG_PCI_NAMES is not set
+# CONFIG_PCI_DEBUG is not set
+CONFIG_ISA_DMA_API=y
 CONFIG_ISA=y
 # CONFIG_EISA is not set
 # CONFIG_MCA is not set
@@ -148,11 +155,6 @@
 # CONFIG_PCCARD is not set
 
 #
-# PC-card bridges
-#
-CONFIG_PCMCIA_PROBE=y
-
-#
 # PCI Hotplug Support
 #
 # CONFIG_HOTPLUG_PCI is not set
@@ -160,12 +162,14 @@
 #
 # Kernel hacking
 #
+# CONFIG_PRINTK_TIME is not set
 CONFIG_DEBUG_KERNEL=y
 CONFIG_EARLY_PRINTK=y
 # CONFIG_DEBUG_STACKOVERFLOW is not set
 # CONFIG_DEBUG_STACK_USAGE is not set
 # CONFIG_DEBUG_SLAB is not set
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_LOG_BUF_SHIFT=14
 # CONFIG_DEBUG_SPINLOCK is not set
 # CONFIG_DEBUG_PAGEALLOC is not set
 # CONFIG_DEBUG_INFO is not set
@@ -176,6 +180,7 @@
 CONFIG_GENERIC_IRQ_PROBE=y
 CONFIG_X86_BIOS_REBOOT=y
 CONFIG_PC=y
+CONFIG_SECCOMP=y
 
 #
 # Executable file formats
@@ -332,7 +337,7 @@
 #
 # SCSI Transport Attributes
 #
-# CONFIG_SCSI_SPI_ATTRS is not set
+CONFIG_SCSI_SPI_ATTRS=y
 # CONFIG_SCSI_FC_ATTRS is not set
 # CONFIG_SCSI_ISCSI_ATTRS is not set
 
@@ -409,6 +414,7 @@
 # CONFIG_SCSI_QLA2300 is not set
 # CONFIG_SCSI_QLA2322 is not set
 # CONFIG_SCSI_QLA6312 is not set
+# CONFIG_SCSI_LPFC is not set
 # CONFIG_SCSI_SEAGATE is not set
 # CONFIG_SCSI_SYM53C416 is not set
 # CONFIG_SCSI_DC395x is not set
@@ -442,6 +448,7 @@
 CONFIG_DM_SNAPSHOT=y
 CONFIG_DM_MIRROR=y
 # CONFIG_DM_ZERO is not set
+# CONFIG_DM_MULTIPATH is not set
 
 #
 # Fusion MPT device support
@@ -470,7 +477,6 @@
 #
 CONFIG_PACKET=y
 # CONFIG_PACKET_MMAP is not set
-# CONFIG_NETLINK_DEV is not set
 CONFIG_UNIX=y
 # CONFIG_NET_KEY is not set
 CONFIG_INET=y
@@ -650,7 +656,6 @@
 # CONFIG_DGRS is not set
 # CONFIG_EEPRO100 is not set
 CONFIG_E100=y
-# CONFIG_E100_NAPI is not set
 # CONFIG_FEALNX is not set
 # CONFIG_NATSEMI is not set
 CONFIG_NE2K_PCI=y
@@ -683,6 +688,7 @@
 # CONFIG_SK98LIN is not set
 # CONFIG_VIA_VELOCITY is not set
 CONFIG_TIGON3=y
+# CONFIG_BNX2 is not set
 
 #
 # Ethernet (10000 Mbit)
@@ -738,19 +744,6 @@
 # CONFIG_INPUT_TSDEV is not set
 # CONFIG_INPUT_EVDEV is not set
 # CONFIG_INPUT_EVBUG is not set
-
-#
-# Input I/O drivers
-#
-# CONFIG_GAMEPORT is not set
-CONFIG_SOUND_GAMEPORT=y
-CONFIG_SERIO=y
-CONFIG_SERIO_I8042=y
-CONFIG_SERIO_SERPORT=y
-# CONFIG_SERIO_CT82C710 is not set
-# CONFIG_SERIO_PCIPS2 is not set
-CONFIG_SERIO_LIBPS2=y
-# CONFIG_SERIO_RAW is not set
 
 #
 # Input Device Drivers
@@ -773,6 +766,18 @@
 # CONFIG_INPUT_MISC is not set
 
 #
+# Hardware I/O ports
+#
+CONFIG_SERIO=y
+CONFIG_SERIO_I8042=y
+CONFIG_SERIO_SERPORT=y
+# CONFIG_SERIO_CT82C710 is not set
+# CONFIG_SERIO_PCIPS2 is not set
+CONFIG_SERIO_LIBPS2=y
+# CONFIG_SERIO_RAW is not set
+# CONFIG_GAMEPORT is not set
+
+#
 # Character devices
 #
 CONFIG_VT=y
@@ -788,6 +793,7 @@
 #
 # Non-8250 serial port support
 #
+# CONFIG_SERIAL_JSM is not set
 CONFIG_UNIX98_PTYS=y
 CONFIG_LEGACY_PTYS=y
 CONFIG_LEGACY_PTY_COUNT=256
@@ -820,7 +826,6 @@
 CONFIG_AGP_AMD=m
 CONFIG_AGP_AMD64=m
 CONFIG_AGP_INTEL=m
-CONFIG_AGP_INTEL_MCH=m
 CONFIG_AGP_NVIDIA=m
 CONFIG_AGP_SIS=m
 CONFIG_AGP_SWORKS=m
@@ -841,6 +846,11 @@
 # CONFIG_HANGCHECK_TIMER is not set
 
 #
+# TPM devices
+#
+# CONFIG_TCG_TPM is not set
+
+#
 # I2C support
 #
 # CONFIG_I2C is not set
@@ -886,6 +896,8 @@
 #
 # USB support
 #
+CONFIG_USB_ARCH_HAS_HCD=y
+CONFIG_USB_ARCH_HAS_OHCI=y
 CONFIG_USB=y
 # CONFIG_USB_DEBUG is not set
 
@@ -896,14 +908,14 @@
 # CONFIG_USB_BANDWIDTH is not set
 # CONFIG_USB_DYNAMIC_MINORS is not set
 # CONFIG_USB_OTG is not set
-CONFIG_USB_ARCH_HAS_HCD=y
-CONFIG_USB_ARCH_HAS_OHCI=y
 
 #
 # USB Host Controller Drivers
 #
 # CONFIG_USB_EHCI_HCD is not set
 CONFIG_USB_OHCI_HCD=y
+# CONFIG_USB_OHCI_BIG_ENDIAN is not set
+CONFIG_USB_OHCI_LITTLE_ENDIAN=y
 CONFIG_USB_UHCI_HCD=y
 # CONFIG_USB_SL811_HCD is not set
 
@@ -940,7 +952,6 @@
 #
 # CONFIG_USB_MDC800 is not set
 # CONFIG_USB_MICROTEK is not set
-# CONFIG_USB_HPUSBSCSI is not set
 
 #
 # USB Multimedia devices
@@ -959,6 +970,7 @@
 # CONFIG_USB_PEGASUS is not set
 # CONFIG_USB_RTL8150 is not set
 # CONFIG_USB_USBNET is not set
+CONFIG_USB_MON=y
 
 #
 # USB port drivers
@@ -1174,6 +1186,7 @@
 # CONFIG_CRYPTO_SHA256 is not set
 # CONFIG_CRYPTO_SHA512 is not set
 # CONFIG_CRYPTO_WP512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
 CONFIG_CRYPTO_DES=m
 # CONFIG_CRYPTO_BLOWFISH is not set
 # CONFIG_CRYPTO_TWOFISH is not set
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig
--- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig      Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig      Tue Aug  9 
23:57:17 2005
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.11-xenU
-# Wed Apr 13 23:18:37 2005
+# Linux kernel version: 2.6.12-xenU
+# Sun Jul 10 17:32:04 2005
 #
 CONFIG_XEN=y
 CONFIG_ARCH_XEN=y
@@ -28,6 +28,7 @@
 CONFIG_CLEAN_COMPILE=y
 CONFIG_BROKEN_ON_SMP=y
 CONFIG_LOCK_KERNEL=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
 
 #
 # General setup
@@ -39,23 +40,26 @@
 # CONFIG_BSD_PROCESS_ACCT is not set
 CONFIG_SYSCTL=y
 # CONFIG_AUDIT is not set
-CONFIG_LOG_BUF_SHIFT=14
 CONFIG_HOTPLUG=y
 CONFIG_KOBJECT_UEVENT=y
 # CONFIG_IKCONFIG is not set
+# CONFIG_CPUSETS is not set
 # CONFIG_EMBEDDED is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_BASE_FULL=y
 CONFIG_FUTEX=y
 CONFIG_EPOLL=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_SHMEM=y
 CONFIG_CC_ALIGN_FUNCTIONS=0
 CONFIG_CC_ALIGN_LABELS=0
 CONFIG_CC_ALIGN_LOOPS=0
 CONFIG_CC_ALIGN_JUMPS=0
 # CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=0
 
 #
 # Loadable module support
@@ -94,6 +98,7 @@
 # CONFIG_MWINCHIPC6 is not set
 # CONFIG_MWINCHIP2 is not set
 # CONFIG_MWINCHIP3D is not set
+# CONFIG_MGEODEGX1 is not set
 # CONFIG_MCYRIXIII is not set
 # CONFIG_MVIAC3_2 is not set
 # CONFIG_X86_GENERIC is not set
@@ -114,6 +119,7 @@
 # CONFIG_SMP is not set
 CONFIG_PREEMPT=y
 CONFIG_PREEMPT_BKL=y
+# CONFIG_X86_REBOOTFIXUPS is not set
 CONFIG_X86_CPUID=y
 
 #
@@ -144,6 +150,8 @@
 CONFIG_GENERIC_IRQ_PROBE=y
 CONFIG_X86_BIOS_REBOOT=y
 CONFIG_PC=y
+CONFIG_SECCOMP=y
+CONFIG_EARLY_PRINTK=y
 
 #
 # Executable file formats
@@ -239,7 +247,6 @@
 #
 CONFIG_PACKET=y
 # CONFIG_PACKET_MMAP is not set
-# CONFIG_NETLINK_DEV is not set
 CONFIG_UNIX=y
 # CONFIG_NET_KEY is not set
 CONFIG_INET=y
@@ -506,6 +513,7 @@
 # CONFIG_CRYPTO_SHA256 is not set
 # CONFIG_CRYPTO_SHA512 is not set
 # CONFIG_CRYPTO_WP512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
 # CONFIG_CRYPTO_DES is not set
 # CONFIG_CRYPTO_BLOWFISH is not set
 # CONFIG_CRYPTO_TWOFISH is not set
@@ -534,3 +542,27 @@
 # CONFIG_CRC32 is not set
 CONFIG_LIBCRC32C=m
 CONFIG_ZLIB_INFLATE=y
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_HIGHMEM is not set
+CONFIG_DEBUG_BUGVERBOSE=y
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_FRAME_POINTER is not set
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+# CONFIG_KPROBES is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_4KSTACKS is not set
diff -r fa660d79f695 -r de310533c483 linux-2.6-xen-sparse/arch/xen/i386/Kconfig
--- a/linux-2.6-xen-sparse/arch/xen/i386/Kconfig        Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/Kconfig        Tue Aug  9 23:57:17 2005
@@ -65,6 +65,7 @@
          - "Winchip-C6" for original IDT Winchip.
          - "Winchip-2" for IDT Winchip 2.
          - "Winchip-2A" for IDT Winchips with 3dNow! capabilities.
+         - "GeodeGX1" for Geode GX1 (Cyrix MediaGX).
          - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3.
          - "VIA C3-2 for VIA C3-2 "Nehemiah" (model 9 and above).
 
@@ -191,6 +192,11 @@
          and alignment reqirements.  Also enable out of order memory
          stores for this CPU, which can increase performance of some
          operations.
+
+config MGEODEGX1
+       bool "GeodeGX1"
+       help
+         Select this for a Geode GX1 (Cyrix MediaGX) chip.
 
 config MCYRIXIII
        bool "CyrixIII/VIA-C3"
@@ -240,7 +246,7 @@
        int
        default "7" if MPENTIUM4 || X86_GENERIC
        default "4" if X86_ELAN || M486 || M386
-       default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || 
MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX 
|| M586TSC || M586 || MVIAC3_2
+       default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || 
MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX 
|| M586TSC || M586 || MVIAC3_2 || MGEODEGX1
        default "6" if MK7 || MK8 || MPENTIUMM
 
 config RWSEM_GENERIC_SPINLOCK
@@ -259,7 +265,7 @@
 
 config X86_PPRO_FENCE
        bool
-       depends on M686 || M586MMX || M586TSC || M586 || M486 || M386
+       depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || 
MGEODEGX1
        default y
 
 config X86_F00F_BUG
@@ -289,7 +295,7 @@
 
 config X86_ALIGNMENT_16
        bool
-       depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || 
X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2
+       depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || 
X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1
        default y
 
 config X86_GOOD_APIC
@@ -415,7 +421,7 @@
 
 #config X86_TSC
 #       bool
-#      depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || 
MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII 
|| M686 || M586MMX || M586TSC || MK8 || MVIAC3_2) && !X86_NUMAQ
+#      depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || 
MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII 
|| M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1) && !X86_NUMAQ
 #       default y
 
 #config X86_MCE
@@ -455,6 +461,24 @@
 #         Enabling this feature will cause a message to be printed when the P4
 #         enters thermal throttling.
 
+config X86_REBOOTFIXUPS
+       bool "Enable X86 board specific fixups for reboot"
+       depends on X86
+       default n
+       ---help---
+         This enables chipset and/or board specific fixups to be done
+         in order to get reboot to work correctly. This is only needed on
+         some combinations of hardware and BIOS. The symptom, for which
+         this config is intended, is when reboot ends with a stalled/hung
+         system.
+
+         Currently, the only fixup is for the Geode GX1/CS5530A/TROM2.1.
+         combination.
+
+         Say Y if you want to enable the fixup. Currently, it's safe to
+         enable this option even if you don't need it.
+         Say N otherwise.
+
 config MICROCODE
        tristate "/dev/cpu/microcode - Intel IA32 CPU microcode support"
         depends on XEN_PRIVILEGED_GUEST
@@ -578,6 +602,16 @@
 config HAVE_ARCH_BOOTMEM_NODE
        bool
        depends on NUMA
+       default y
+
+config HAVE_MEMORY_PRESENT
+       bool
+       depends on DISCONTIGMEM
+       default y
+
+config NEED_NODE_MEMMAP_SIZE
+       bool
+       depends on DISCONTIGMEM
        default y
 
 #config HIGHPTE
@@ -673,13 +707,18 @@
 
 config X86_LOCAL_APIC
        bool
-       depends on (X86_VISWS || SMP) && !X86_VOYAGER
+       depends on XEN_PRIVILEGED_GUEST && (X86_UP_APIC || ((X86_VISWS || SMP) 
&& !X86_VOYAGER))
        default y
 
 config X86_IO_APIC
        bool
-       depends on SMP && !(X86_VISWS || X86_VOYAGER)
-       default y
+       depends on XEN_PRIVILEGED_GUEST && (X86_UP_IOAPIC || (SMP && 
!(X86_VISWS || X86_VOYAGER)))
+       default y
+
+config X86_VISWS_APIC
+       bool
+       depends on X86_VISWS
+       default y
 
 config PCI
        bool "PCI support" if !X86_VISWS
@@ -748,6 +787,10 @@
 
 source "drivers/pci/Kconfig"
 
+config ISA_DMA_API
+       bool
+       default y
+
 config ISA
        bool "ISA support"
        depends on !(X86_VOYAGER || X86_VISWS)
@@ -777,17 +820,13 @@
 source "drivers/eisa/Kconfig"
 
 config MCA
-       bool "MCA support"
-       depends on !(X86_VISWS || X86_VOYAGER)
+       bool "MCA support" if !(X86_VISWS || X86_VOYAGER)
+       default y if X86_VOYAGER
        help
          MicroChannel Architecture is found in some IBM PS/2 machines and
          laptops.  It is a bus system similar to PCI or ISA. See
          <file:Documentation/mca.txt> (and especially the web page given
          there) before attempting to build an MCA bus kernel.
-
-config MCA
-       depends on X86_VOYAGER
-       default y if X86_VOYAGER
 
 source "drivers/mca/Kconfig"
 
@@ -971,4 +1010,21 @@
        depends on X86 && !EMBEDDED
        default y
 
+config SECCOMP
+       bool "Enable seccomp to safely compute untrusted bytecode"
+       depends on PROC_FS
+       default y
+       help
+         This kernel feature is useful for number crunching applications
+         that may need to compute untrusted bytecode during their
+         execution. By using pipes or other transports made available to
+         the process as file descriptors supporting the read/write
+         syscalls, it's possible to isolate those applications in
+         their own address space using seccomp. Once seccomp is
+         enabled via /proc/<pid>/seccomp, it cannot be disabled
+         and the task is only allowed to execute a few safe syscalls
+         defined by each seccomp mode.
+
+         If unsure, say Y. Only embedded should say N here.
+
 endmenu
diff -r fa660d79f695 -r de310533c483 linux-2.6-xen-sparse/arch/xen/i386/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/i386/Makefile       Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/Makefile       Tue Aug  9 23:57:17 2005
@@ -14,6 +14,8 @@
 # 19990713  Artur Skawina <skawina@xxxxxxxxxxxxx>
 #           Added '-march' and '-mpreferred-stack-boundary' support
 #
+# 20050320  Kianusch Sayah Karadji <kianusch@xxxxxxxxxxx>
+#           Added support for GEODE CPU
 
 XENARCH        := $(subst ",,$(CONFIG_XENARCH))
 
@@ -55,6 +57,9 @@
 
 # AMD Elan support
 cflags-$(CONFIG_X86_ELAN)      += -march=i486
+
+# Geode GX1 support
+cflags-$(CONFIG_MGEODEGX1)             += $(call 
cc-option,-march=pentium-mmx,-march=i486)
 
 # -mregparm=3 works ok on gcc-3.0 and later
 #
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile        Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile        Tue Aug  9 
23:57:17 2005
@@ -32,6 +32,7 @@
 c-obj-$(CONFIG_X86_MPPARSE)    += mpparse.o
 c-obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
 c-obj-$(CONFIG_X86_IO_APIC)    += io_apic.o
+c-obj-$(CONFIG_X86_REBOOTFIXUPS)+= reboot_fixups.o
 c-obj-$(CONFIG_X86_NUMAQ)      += numaq.o
 c-obj-$(CONFIG_X86_SUMMIT_NUMA)        += summit.o
 c-obj-$(CONFIG_MODULES)                += module.o
@@ -51,11 +52,11 @@
 # Note: kbuild does not track this dependency due to usage of .incbin
 $(obj)/vsyscall.o: $(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so
 targets += $(foreach F,int80 sysenter,vsyscall-$F.o vsyscall-$F.so)
-targets += vsyscall.lds
+targets += vsyscall-note.o vsyscall.lds
 
 # The DSO images are built using a special linker script.
 quiet_cmd_syscall = SYSCALL $@
-      cmd_syscall = $(CC) -nostdlib -m32 $(SYSCFLAGS_$(@F)) \
+      cmd_syscall = $(CC) -m elf_i386 -nostdlib $(SYSCFLAGS_$(@F)) \
                          -Wl,-T,$(filter-out FORCE,$^) -o $@
 
 export CPPFLAGS_vsyscall.lds += -P -C -U$(ARCH)
@@ -65,7 +66,8 @@
 SYSCFLAGS_vsyscall-int80.so    = $(vsyscall-flags)
 
 $(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so: \
-$(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE
+$(obj)/vsyscall-%.so: $(src)/vsyscall.lds \
+                     $(obj)/vsyscall-%.o FORCE
        $(call if_changed,syscall)
 
 # We also create a special relocatable object that should mirror the symbol
@@ -76,17 +78,20 @@
 $(obj)/built-in.o: ld_flags += -R $(obj)/vsyscall-syms.o
 
 SYSCFLAGS_vsyscall-syms.o = -r
-$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds $(obj)/vsyscall-sysenter.o FORCE
+$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \
+                       $(obj)/vsyscall-sysenter.o FORCE
        $(call if_changed,syscall)
 
 c-link := init_task.o
-s-link := vsyscall-int80.o vsyscall-sysenter.o vsyscall-sigreturn.o 
vsyscall.lds.o
+s-link := vsyscall-int80.o vsyscall-sysenter.o vsyscall-sigreturn.o 
vsyscall.lds.o syscall_table.o
 
 $(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)) $(patsubst 
%.o,$(obj)/%.S,$(s-obj-y) $(s-link)):
        @ln -fsn $(srctree)/arch/i386/kernel/$(notdir $@) $@
 
 $(obj)/vsyscall-int80.S: $(obj)/vsyscall-sigreturn.S
 
+$(obj)/entry.o: $(src)/entry.S $(src)/syscall_table.S
+
 obj-y  += $(c-obj-y) $(s-obj-y)
 
 clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c    Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c    Tue Aug  9 
23:57:17 2005
@@ -22,6 +22,9 @@
 DEFINE_PER_CPU(struct desc_struct, cpu_gdt_table[GDT_ENTRIES]);
 EXPORT_PER_CPU_SYMBOL(cpu_gdt_table);
 
+DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
+EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
+
 static int cachesize_override __initdata = -1;
 static int disable_x86_fxsr __initdata = 0;
 static int disable_x86_serial_nr __initdata = 1;
@@ -202,7 +205,7 @@
 
 
 /* Probe for the CPUID instruction */
-int __init have_cpuid_p(void)
+static int __init have_cpuid_p(void)
 {
        return flag_is_changeable_p(X86_EFLAGS_ID);
 }
@@ -210,7 +213,7 @@
 /* Do minimum CPU detection early.
    Fields really needed: vendor, cpuid_level, family, model, mask, cache 
alignment.
    The others are not touched to avoid unwanted side effects. */
-void __init early_cpu_detect(void)
+static void __init early_cpu_detect(void)
 {
        struct cpuinfo_x86 *c = &boot_cpu_data;
 
@@ -243,6 +246,10 @@
        }
 
        early_intel_workaround(c);
+
+#ifdef CONFIG_X86_HT
+       phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
+#endif
 }
 
 void __init generic_identify(struct cpuinfo_x86 * c)
@@ -431,25 +438,15 @@
        mcheck_init(c);
 #endif
 }
-/*
- *     Perform early boot up checks for a valid TSC. See 
arch/i386/kernel/time.c
- */
- 
-void __init dodgy_tsc(void)
-{
-       if (( boot_cpu_data.x86_vendor == X86_VENDOR_CYRIX ) ||
-           ( boot_cpu_data.x86_vendor == X86_VENDOR_NSC   ))
-               cpu_devs[X86_VENDOR_CYRIX]->c_init(&boot_cpu_data);
-}
 
 #ifdef CONFIG_X86_HT
 void __init detect_ht(struct cpuinfo_x86 *c)
 {
        u32     eax, ebx, ecx, edx;
-       int     index_lsb, index_msb, tmp;
+       int     index_msb, tmp;
        int     cpu = smp_processor_id();
 
-       if (!cpu_has(c, X86_FEATURE_HT))
+       if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
                return;
 
        cpuid(1, &eax, &ebx, &ecx, &edx);
@@ -458,7 +455,6 @@
        if (smp_num_siblings == 1) {
                printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
        } else if (smp_num_siblings > 1 ) {
-               index_lsb = 0;
                index_msb = 31;
 
                if (smp_num_siblings > NR_CPUS) {
@@ -467,21 +463,34 @@
                        return;
                }
                tmp = smp_num_siblings;
-               while ((tmp & 1) == 0) {
-                       tmp >>=1 ;
-                       index_lsb++;
-               }
-               tmp = smp_num_siblings;
                while ((tmp & 0x80000000 ) == 0) {
                        tmp <<=1 ;
                        index_msb--;
                }
-               if (index_lsb != index_msb )
+               if (smp_num_siblings & (smp_num_siblings - 1))
                        index_msb++;
                phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
 
                printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
                       phys_proc_id[cpu]);
+
+               smp_num_siblings = smp_num_siblings / c->x86_num_cores;
+
+               tmp = smp_num_siblings;
+               index_msb = 31;
+               while ((tmp & 0x80000000) == 0) {
+                       tmp <<=1 ;
+                       index_msb--;
+               }
+
+               if (smp_num_siblings & (smp_num_siblings - 1))
+                       index_msb++;
+
+               cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
+
+               if (c->x86_num_cores > 1)
+                       printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
+                              cpu_core_id[cpu]);
        }
 }
 #endif
@@ -528,7 +537,6 @@
 extern int rise_init_cpu(void);
 extern int nexgen_init_cpu(void);
 extern int umc_init_cpu(void);
-void early_cpu_detect(void);
 
 void __init early_cpu_init(void)
 {
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c Tue Aug  9 
23:57:17 2005
@@ -31,7 +31,7 @@
 unsigned int num_var_ranges;
 unsigned int *usage_table;
 
-void __init set_num_var_ranges(void)
+static void __init set_num_var_ranges(void)
 {
        dom0_op_t op;
 
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S Tue Aug  9 23:57:17 2005
@@ -595,8 +595,6 @@
        xorl %edx,%edx                  # error code 0
        movl %esp,%eax                  # pt_regs pointer
        call do_debug
-       testl %eax,%eax
-       jnz restore_all
        jmp ret_from_exception
 
 #if 0 /* XEN */
@@ -651,8 +649,6 @@
        xorl %edx,%edx          # zero error code
        movl %esp,%eax          # pt_regs pointer
        call do_int3
-       testl %eax,%eax
-       jnz restore_all
        jmp ret_from_exception
 
 ENTRY(overflow)
@@ -736,296 +732,6 @@
        pushl $do_fixup_4gb_segment
        jmp error_code
 
-.data
-ENTRY(sys_call_table)
-       .long sys_restart_syscall       /* 0 - old "setup()" system call, used 
for restarting */
-       .long sys_exit
-       .long sys_fork
-       .long sys_read
-       .long sys_write
-       .long sys_open          /* 5 */
-       .long sys_close
-       .long sys_waitpid
-       .long sys_creat
-       .long sys_link
-       .long sys_unlink        /* 10 */
-       .long sys_execve
-       .long sys_chdir
-       .long sys_time
-       .long sys_mknod
-       .long sys_chmod         /* 15 */
-       .long sys_lchown16
-       .long sys_ni_syscall    /* old break syscall holder */
-       .long sys_stat
-       .long sys_lseek
-       .long sys_getpid        /* 20 */
-       .long sys_mount
-       .long sys_oldumount
-       .long sys_setuid16
-       .long sys_getuid16
-       .long sys_stime         /* 25 */
-       .long sys_ptrace
-       .long sys_alarm
-       .long sys_fstat
-       .long sys_pause
-       .long sys_utime         /* 30 */
-       .long sys_ni_syscall    /* old stty syscall holder */
-       .long sys_ni_syscall    /* old gtty syscall holder */
-       .long sys_access
-       .long sys_nice
-       .long sys_ni_syscall    /* 35 - old ftime syscall holder */
-       .long sys_sync
-       .long sys_kill
-       .long sys_rename
-       .long sys_mkdir
-       .long sys_rmdir         /* 40 */
-       .long sys_dup
-       .long sys_pipe
-       .long sys_times
-       .long sys_ni_syscall    /* old prof syscall holder */
-       .long sys_brk           /* 45 */
-       .long sys_setgid16
-       .long sys_getgid16
-       .long sys_signal
-       .long sys_geteuid16
-       .long sys_getegid16     /* 50 */
-       .long sys_acct
-       .long sys_umount        /* recycled never used phys() */
-       .long sys_ni_syscall    /* old lock syscall holder */
-       .long sys_ioctl
-       .long sys_fcntl         /* 55 */
-       .long sys_ni_syscall    /* old mpx syscall holder */
-       .long sys_setpgid
-       .long sys_ni_syscall    /* old ulimit syscall holder */
-       .long sys_olduname
-       .long sys_umask         /* 60 */
-       .long sys_chroot
-       .long sys_ustat
-       .long sys_dup2
-       .long sys_getppid
-       .long sys_getpgrp       /* 65 */
-       .long sys_setsid
-       .long sys_sigaction
-       .long sys_sgetmask
-       .long sys_ssetmask
-       .long sys_setreuid16    /* 70 */
-       .long sys_setregid16
-       .long sys_sigsuspend
-       .long sys_sigpending
-       .long sys_sethostname
-       .long sys_setrlimit     /* 75 */
-       .long sys_old_getrlimit
-       .long sys_getrusage
-       .long sys_gettimeofday
-       .long sys_settimeofday
-       .long sys_getgroups16   /* 80 */
-       .long sys_setgroups16
-       .long old_select
-       .long sys_symlink
-       .long sys_lstat
-       .long sys_readlink      /* 85 */
-       .long sys_uselib
-       .long sys_swapon
-       .long sys_reboot
-       .long old_readdir
-       .long old_mmap          /* 90 */
-       .long sys_munmap
-       .long sys_truncate
-       .long sys_ftruncate
-       .long sys_fchmod
-       .long sys_fchown16      /* 95 */
-       .long sys_getpriority
-       .long sys_setpriority
-       .long sys_ni_syscall    /* old profil syscall holder */
-       .long sys_statfs
-       .long sys_fstatfs       /* 100 */
-       .long sys_ioperm
-       .long sys_socketcall
-       .long sys_syslog
-       .long sys_setitimer
-       .long sys_getitimer     /* 105 */
-       .long sys_newstat
-       .long sys_newlstat
-       .long sys_newfstat
-       .long sys_uname
-       .long sys_iopl          /* 110 */
-       .long sys_vhangup
-       .long sys_ni_syscall    /* old "idle" system call */
-       .long sys_vm86old
-       .long sys_wait4
-       .long sys_swapoff       /* 115 */
-       .long sys_sysinfo
-       .long sys_ipc
-       .long sys_fsync
-       .long sys_sigreturn
-       .long sys_clone         /* 120 */
-       .long sys_setdomainname
-       .long sys_newuname
-       .long sys_modify_ldt
-       .long sys_adjtimex
-       .long sys_mprotect      /* 125 */
-       .long sys_sigprocmask
-       .long sys_ni_syscall    /* old "create_module" */ 
-       .long sys_init_module
-       .long sys_delete_module
-       .long sys_ni_syscall    /* 130: old "get_kernel_syms" */
-       .long sys_quotactl
-       .long sys_getpgid
-       .long sys_fchdir
-       .long sys_bdflush
-       .long sys_sysfs         /* 135 */
-       .long sys_personality
-       .long sys_ni_syscall    /* reserved for afs_syscall */
-       .long sys_setfsuid16
-       .long sys_setfsgid16
-       .long sys_llseek        /* 140 */
-       .long sys_getdents
-       .long sys_select
-       .long sys_flock
-       .long sys_msync
-       .long sys_readv         /* 145 */
-       .long sys_writev
-       .long sys_getsid
-       .long sys_fdatasync
-       .long sys_sysctl
-       .long sys_mlock         /* 150 */
-       .long sys_munlock
-       .long sys_mlockall
-       .long sys_munlockall
-       .long sys_sched_setparam
-       .long sys_sched_getparam   /* 155 */
-       .long sys_sched_setscheduler
-       .long sys_sched_getscheduler
-       .long sys_sched_yield
-       .long sys_sched_get_priority_max
-       .long sys_sched_get_priority_min  /* 160 */
-       .long sys_sched_rr_get_interval
-       .long sys_nanosleep
-       .long sys_mremap
-       .long sys_setresuid16
-       .long sys_getresuid16   /* 165 */
-       .long sys_vm86
-       .long sys_ni_syscall    /* Old sys_query_module */
-       .long sys_poll
-       .long sys_nfsservctl
-       .long sys_setresgid16   /* 170 */
-       .long sys_getresgid16
-       .long sys_prctl
-       .long sys_rt_sigreturn
-       .long sys_rt_sigaction
-       .long sys_rt_sigprocmask        /* 175 */
-       .long sys_rt_sigpending
-       .long sys_rt_sigtimedwait
-       .long sys_rt_sigqueueinfo
-       .long sys_rt_sigsuspend
-       .long sys_pread64       /* 180 */
-       .long sys_pwrite64
-       .long sys_chown16
-       .long sys_getcwd
-       .long sys_capget
-       .long sys_capset        /* 185 */
-       .long sys_sigaltstack
-       .long sys_sendfile
-       .long sys_ni_syscall    /* reserved for streams1 */
-       .long sys_ni_syscall    /* reserved for streams2 */
-       .long sys_vfork         /* 190 */
-       .long sys_getrlimit
-       .long sys_mmap2
-       .long sys_truncate64
-       .long sys_ftruncate64
-       .long sys_stat64        /* 195 */
-       .long sys_lstat64
-       .long sys_fstat64
-       .long sys_lchown
-       .long sys_getuid
-       .long sys_getgid        /* 200 */
-       .long sys_geteuid
-       .long sys_getegid
-       .long sys_setreuid
-       .long sys_setregid
-       .long sys_getgroups     /* 205 */
-       .long sys_setgroups
-       .long sys_fchown
-       .long sys_setresuid
-       .long sys_getresuid
-       .long sys_setresgid     /* 210 */
-       .long sys_getresgid
-       .long sys_chown
-       .long sys_setuid
-       .long sys_setgid
-       .long sys_setfsuid      /* 215 */
-       .long sys_setfsgid
-       .long sys_pivot_root
-       .long sys_mincore
-       .long sys_madvise
-       .long sys_getdents64    /* 220 */
-       .long sys_fcntl64
-       .long sys_ni_syscall    /* reserved for TUX */
-       .long sys_ni_syscall
-       .long sys_gettid
-       .long sys_readahead     /* 225 */
-       .long sys_setxattr
-       .long sys_lsetxattr
-       .long sys_fsetxattr
-       .long sys_getxattr
-       .long sys_lgetxattr     /* 230 */
-       .long sys_fgetxattr
-       .long sys_listxattr
-       .long sys_llistxattr
-       .long sys_flistxattr
-       .long sys_removexattr   /* 235 */
-       .long sys_lremovexattr
-       .long sys_fremovexattr
-       .long sys_tkill
-       .long sys_sendfile64
-       .long sys_futex         /* 240 */
-       .long sys_sched_setaffinity
-       .long sys_sched_getaffinity
-       .long sys_set_thread_area
-       .long sys_get_thread_area
-       .long sys_io_setup      /* 245 */
-       .long sys_io_destroy
-       .long sys_io_getevents
-       .long sys_io_submit
-       .long sys_io_cancel
-       .long sys_fadvise64     /* 250 */
-       .long sys_ni_syscall
-       .long sys_exit_group
-       .long sys_lookup_dcookie
-       .long sys_epoll_create
-       .long sys_epoll_ctl     /* 255 */
-       .long sys_epoll_wait
-       .long sys_remap_file_pages
-       .long sys_set_tid_address
-       .long sys_timer_create
-       .long sys_timer_settime         /* 260 */
-       .long sys_timer_gettime
-       .long sys_timer_getoverrun
-       .long sys_timer_delete
-       .long sys_clock_settime
-       .long sys_clock_gettime         /* 265 */
-       .long sys_clock_getres
-       .long sys_clock_nanosleep
-       .long sys_statfs64
-       .long sys_fstatfs64     
-       .long sys_tgkill        /* 270 */
-       .long sys_utimes
-       .long sys_fadvise64_64
-       .long sys_ni_syscall    /* sys_vserver */
-       .long sys_mbind
-       .long sys_get_mempolicy
-       .long sys_set_mempolicy
-       .long sys_mq_open
-       .long sys_mq_unlink
-       .long sys_mq_timedsend
-       .long sys_mq_timedreceive       /* 280 */
-       .long sys_mq_notify
-       .long sys_mq_getsetattr
-       .long sys_ni_syscall            /* reserved for kexec */
-       .long sys_waitid
-       .long sys_ni_syscall            /* 285 */ /* available */
-       .long sys_add_key
-       .long sys_request_key
-       .long sys_keyctl
+#include "syscall_table.S"
 
 syscall_table_size=(.-sys_call_table)
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c    Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c    Tue Aug  9 
23:57:17 2005
@@ -99,6 +99,11 @@
 EXPORT_SYMBOL(__get_user_2);
 EXPORT_SYMBOL(__get_user_4);
 
+EXPORT_SYMBOL(__put_user_1);
+EXPORT_SYMBOL(__put_user_2);
+EXPORT_SYMBOL(__put_user_4);
+EXPORT_SYMBOL(__put_user_8);
+
 EXPORT_SYMBOL(strpbrk);
 EXPORT_SYMBOL(strstr);
 
@@ -114,7 +119,6 @@
 EXPORT_SYMBOL(dma_free_coherent);
 
 #ifdef CONFIG_PCI
-EXPORT_SYMBOL(pcibios_penalize_isa_irq);
 EXPORT_SYMBOL(pci_mem_start);
 #endif
 
@@ -146,7 +150,6 @@
 
 /* TLB flushing */
 EXPORT_SYMBOL(flush_tlb_page);
-EXPORT_SYMBOL_GPL(flush_tlb_all);
 #endif
 
 #ifdef CONFIG_X86_IO_APIC
@@ -168,10 +171,6 @@
 EXPORT_SYMBOL_GPL(set_nmi_callback);
 EXPORT_SYMBOL_GPL(unset_nmi_callback);
 
-#undef memcmp
-extern int memcmp(const void *,const void *,__kernel_size_t);
-EXPORT_SYMBOL(memcmp);
-
 EXPORT_SYMBOL(register_die_notifier);
 #ifdef CONFIG_HAVE_DEC_LOCK
 EXPORT_SYMBOL(_atomic_dec_and_lock);
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c       Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c       Tue Aug  9 
23:57:17 2005
@@ -86,7 +86,7 @@
                           dma_addr_t *dma_handle)
 #else
 void *dma_alloc_coherent(struct device *dev, size_t size,
-                          dma_addr_t *dma_handle, int gfp)
+                          dma_addr_t *dma_handle, unsigned int __nocast gfp)
 #endif
 {
        void *ret;
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c       Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c       Tue Aug  9 
23:57:17 2005
@@ -36,6 +36,7 @@
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/ptrace.h>
+#include <linux/random.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -57,7 +58,7 @@
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
-int hlt_counter;
+static int hlt_counter;
 
 unsigned long boot_option_idle_override = 0;
 EXPORT_SYMBOL(boot_option_idle_override);
@@ -74,7 +75,7 @@
  * Powermanagement idle function, if any..
  */
 void (*pm_idle)(void);
-static cpumask_t cpu_idle_map;
+static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
 
 void disable_hlt(void)
 {
@@ -120,11 +121,11 @@
        while (1) {
                while (!need_resched()) {
 
-                       if (cpu_isset(cpu, cpu_idle_map))
-                               cpu_clear(cpu, cpu_idle_map);
+                       if (__get_cpu_var(cpu_idle_state))
+                               __get_cpu_var(cpu_idle_state) = 0;
                        rmb();
 
-                       irq_stat[cpu].idle_timestamp = jiffies;
+                       __get_cpu_var(irq_stat).idle_timestamp = jiffies;
                        xen_idle();
                }
                schedule();
@@ -133,16 +134,28 @@
 
 void cpu_idle_wait(void)
 {
-       int cpu;
+       unsigned int cpu, this_cpu = get_cpu();
        cpumask_t map;
 
-       for_each_online_cpu(cpu)
-               cpu_set(cpu, cpu_idle_map);
+       set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
+       put_cpu();
+
+       cpus_clear(map);
+       for_each_online_cpu(cpu) {
+               per_cpu(cpu_idle_state, cpu) = 1;
+               cpu_set(cpu, map);
+       }
+
+       __get_cpu_var(cpu_idle_state) = 0;
 
        wmb();
        do {
                ssleep(1);
-               cpus_and(map, cpu_idle_map, cpu_online_map);
+               for_each_online_cpu(cpu) {
+                       if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, 
cpu))
+                               cpu_clear(cpu, map);
+               }
+               cpus_and(map, map, cpu_online_map);
        } while (!cpus_empty(map));
 }
 EXPORT_SYMBOL_GPL(cpu_idle_wait);
@@ -286,6 +299,17 @@
        unsigned long eflags;
 
        childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) 
p->thread_info)) - 1;
+       /*
+        * The below -8 is to reserve 8 bytes on top of the ring0 stack.
+        * This is necessary to guarantee that the entire "struct pt_regs"
+        * is accessable even if the CPU haven't stored the SS/ESP registers
+        * on the stack (interrupt gate does not save these registers
+        * when switching to the same priv ring).
+        * Therefore beware: accessing the xss/esp fields of the
+        * "struct pt_regs" is possible, but they may contain the
+        * completely wrong values.
+        */
+       childregs = (struct pt_regs *) ((unsigned long) childregs - 8);
        *childregs = *regs;
        childregs->eax = 0;
        childregs->esp = esp;
@@ -439,12 +463,6 @@
         */
        tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
 }
-/*
- * This special macro can be used to load a debugging register
- */
-#define loaddebug(thread,register) \
-               HYPERVISOR_set_debugreg((register),     \
-                       (thread->debugreg[register]))
 
 /*
  *     switch_to(x,yn) should switch tasks from x to y.
@@ -777,3 +795,9 @@
        return 0;
 }
 
+unsigned long arch_align_stack(unsigned long sp)
+{
+       if (randomize_va_space)
+               sp -= get_random_int() % 8192;
+       return sp & ~0xf;
+}
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Tue Aug  9 23:57:17 2005
@@ -40,6 +40,7 @@
 #include <linux/efi.h>
 #include <linux/init.h>
 #include <linux/edd.h>
+#include <linux/nodemask.h>
 #include <linux/kernel.h>
 #include <linux/notifier.h>
 #include <video/edid.h>
@@ -80,7 +81,6 @@
 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 0, 1, 0, -1 };
 
 unsigned long mmu_cr4_features;
-EXPORT_SYMBOL_GPL(mmu_cr4_features);
 
 #ifdef CONFIG_ACPI_INTERPRETER
        int acpi_disabled = 0;
@@ -122,8 +122,6 @@
 struct edid_info edid_info;
 struct ist_info ist_info;
 struct e820map e820;
-
-unsigned char aux_device_present;
 
 extern void early_cpu_init(void);
 extern void dmi_scan_machine(void);
@@ -454,10 +452,10 @@
        struct e820entry *pbios; /* pointer to original bios entry */
        unsigned long long addr; /* address for this change point */
 };
-struct change_member change_point_list[2*E820MAX] __initdata;
-struct change_member *change_point[2*E820MAX] __initdata;
-struct e820entry *overlap_list[E820MAX] __initdata;
-struct e820entry new_bios[E820MAX] __initdata;
+static struct change_member change_point_list[2*E820MAX] __initdata;
+static struct change_member *change_point[2*E820MAX] __initdata;
+static struct e820entry *overlap_list[E820MAX] __initdata;
+static struct e820entry new_bios[E820MAX] __initdata;
 
 static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
 {
@@ -995,8 +993,6 @@
        return max_low_pfn;
 }
 
-#ifndef CONFIG_DISCONTIGMEM
-
 /*
  * Free all available memory for boot time allocation.  Used
  * as a callback function by efi_memory_walk()
@@ -1070,15 +1066,16 @@
                reserve_bootmem(addr, PAGE_SIZE);       
 }
 
+#ifndef CONFIG_DISCONTIGMEM
+void __init setup_bootmem_allocator(void);
 static unsigned long __init setup_memory(void)
 {
-       unsigned long bootmap_size, start_pfn, max_low_pfn;
 
        /*
         * partially used pages are not usable - thus
         * we are rounding upwards:
         */
-       start_pfn = PFN_UP(__pa(xen_start_info.pt_base)) + 
xen_start_info.nr_pt_frames;
+       min_low_pfn = PFN_UP(__pa(xen_start_info.pt_base)) + 
xen_start_info.nr_pt_frames;
 
        find_max_pfn();
 
@@ -1094,10 +1091,43 @@
 #endif
        printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
                        pages_to_mb(max_low_pfn));
+
+       setup_bootmem_allocator();
+
+       return max_low_pfn;
+}
+
+void __init zone_sizes_init(void)
+{
+       unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+       unsigned int max_dma, low;
+
+       max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+       low = max_low_pfn;
+
+       if (low < max_dma)
+               zones_size[ZONE_DMA] = low;
+       else {
+               zones_size[ZONE_DMA] = max_dma;
+               zones_size[ZONE_NORMAL] = low - max_dma;
+#ifdef CONFIG_HIGHMEM
+               zones_size[ZONE_HIGHMEM] = highend_pfn - low;
+#endif
+       }
+       free_area_init(zones_size);
+}
+#else
+extern unsigned long setup_memory(void);
+extern void zone_sizes_init(void);
+#endif /* !CONFIG_DISCONTIGMEM */
+
+void __init setup_bootmem_allocator(void)
+{
+       unsigned long bootmap_size;
        /*
         * Initialize the boot-time allocator (with low memory only):
         */
-       bootmap_size = init_bootmem(start_pfn, max_low_pfn);
+       bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
 
        register_bootmem_low_pages(max_low_pfn);
 
@@ -1107,7 +1137,7 @@
         * the (very unlikely) case of us accidentally initializing the
         * bootmem allocator with an invalid RAM area.
         */
-       reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) +
+       reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(min_low_pfn) +
                         bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
 
        /* reserve EBDA region, it's a 4K region */
@@ -1160,12 +1190,25 @@
 #endif
 
        phys_to_machine_mapping = (unsigned int *)xen_start_info.mfn_list;
-
-       return max_low_pfn;
-}
-#else
-extern unsigned long setup_memory(void);
-#endif /* !CONFIG_DISCONTIGMEM */
+}
+
+/*
+ * The node 0 pgdat is initialized before all of these because
+ * it's needed for bootmem.  node>0 pgdats have their virtual
+ * space allocated before the pagetables are in place to access
+ * them, so they can't be cleared then.
+ *
+ * This should all compile down to nothing when NUMA is off.
+ */
+void __init remapped_pgdat_init(void)
+{
+       int nid;
+
+       for_each_online_node(nid) {
+               if (nid != 0)
+                       memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
+       }
+}
 
 /*
  * Request address space for all standard RAM and ROM resources
@@ -1440,7 +1483,6 @@
                machine_submodel_id = SYS_DESC_TABLE.table[1];
                BIOS_revision = SYS_DESC_TABLE.table[2];
        }
-       aux_device_present = AUX_DEVICE_INFO;
        bootloader_type = LOADER_TYPE;
 
 #ifdef CONFIG_XEN_PHYSDEV_ACCESS
@@ -1500,6 +1542,8 @@
        smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
 #endif
        paging_init();
+       remapped_pgdat_init();
+       zone_sizes_init();
 
        /* Make sure we have a correctly sized P->M table. */
        if (max_pfn != xen_start_info.nr_pages) {
@@ -1564,11 +1608,13 @@
        if (efi_enabled)
                efi_map_memmap();
 
+#ifdef CONFIG_ACPI_BOOT
        /*
         * Parse the ACPI tables for possible boot-time SMP configuration.
         */
        acpi_boot_table_init();
        acpi_boot_init();
+#endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
        if (smp_found_config)
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/i386/kernel/signal.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/signal.c        Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/signal.c        Tue Aug  9 
23:57:17 2005
@@ -93,7 +93,7 @@
 
        if (act) {
                old_sigset_t mask;
-               if (verify_area(VERIFY_READ, act, sizeof(*act)) ||
+               if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
                    __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
                    __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
                        return -EFAULT;
@@ -105,7 +105,7 @@
        ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
 
        if (!ret && oact) {
-               if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) ||
+               if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
                    __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
                    __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
                        return -EFAULT;
@@ -187,7 +187,7 @@
                struct _fpstate __user * buf;
                err |= __get_user(buf, &sc->fpstate);
                if (buf) {
-                       if (verify_area(VERIFY_READ, buf, sizeof(*buf)))
+                       if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
                                goto badframe;
                        err |= restore_i387(buf);
                } else {
@@ -213,7 +213,7 @@
        sigset_t set;
        int eax;
 
-       if (verify_area(VERIFY_READ, frame, sizeof(*frame)))
+       if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
                goto badframe;
        if (__get_user(set.sig[0], &frame->sc.oldmask)
            || (_NSIG_WORDS > 1
@@ -243,7 +243,7 @@
        sigset_t set;
        int eax;
 
-       if (verify_area(VERIFY_READ, frame, sizeof(*frame)))
+       if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
                goto badframe;
        if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
                goto badframe;
@@ -557,6 +557,16 @@
                }
        }
 
+       /*
+        * If TF is set due to a debugger (PT_DTRACE), clear the TF flag so
+        * that register information in the sigcontext is correct.
+        */
+       if (unlikely(regs->eflags & TF_MASK)
+           && likely(current->ptrace & PT_DTRACE)) {
+               current->ptrace &= ~PT_DTRACE;
+               regs->eflags &= ~TF_MASK;
+       }
+
        /* Set up the stack frame */
        if (ka->sa.sa_flags & SA_SIGINFO)
                setup_rt_frame(sig, ka, info, oldset, regs);
@@ -608,8 +618,7 @@
                 * inside the kernel.
                 */
                if (unlikely(current->thread.debugreg[7])) {
-                       HYPERVISOR_set_debugreg(7,
-                                               current->thread.debugreg[7]);
+                       loaddebug(&current->thread, 7);
                }
 
                /* Whee!  Actually deliver the signal.  */
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c  Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c  Tue Aug  9 23:57:17 2005
@@ -176,6 +176,35 @@
  ({ rmb(); (shadow_time_version == HYPERVISOR_shared_info->time_version2); })
 
 /*
+ * This is a special lock that is owned by the CPU and holds the index
+ * register we are working with.  It is required for NMI access to the
+ * CMOS/RTC registers.  See include/asm-i386/mc146818rtc.h for details.
+ */
+volatile unsigned long cmos_lock = 0;
+EXPORT_SYMBOL(cmos_lock);
+
+/* Routines for accessing the CMOS RAM/RTC. */
+unsigned char rtc_cmos_read(unsigned char addr)
+{
+       unsigned char val;
+       lock_cmos_prefix(addr);
+       outb_p(addr, RTC_PORT(0));
+       val = inb_p(RTC_PORT(1));
+       lock_cmos_suffix(addr);
+       return val;
+}
+EXPORT_SYMBOL(rtc_cmos_read);
+
+void rtc_cmos_write(unsigned char val, unsigned char addr)
+{
+       lock_cmos_prefix(addr);
+       outb_p(addr, RTC_PORT(0));
+       outb_p(val, RTC_PORT(1));
+       lock_cmos_suffix(addr);
+}
+EXPORT_SYMBOL(rtc_cmos_write);
+
+/*
  * This version of gettimeofday has microsecond resolution
  * and better than microsecond precision on fast x86 machines with TSC.
  */
@@ -335,15 +364,22 @@
 {
        int retval;
 
+       WARN_ON(irqs_disabled());
+
        /* gets recalled with irq locally disabled */
-       spin_lock(&rtc_lock);
+       spin_lock_irq(&rtc_lock);
        if (efi_enabled)
                retval = efi_set_rtc_mmss(nowtime);
        else
                retval = mach_set_rtc_mmss(nowtime);
-       spin_unlock(&rtc_lock);
+       spin_unlock_irq(&rtc_lock);
 
        return retval;
+}
+#else
+static int set_rtc_mmss(unsigned long nowtime)
+{
+       return 0;
 }
 #endif
 
@@ -476,29 +512,6 @@
 
                last_update_to_xen = xtime.tv_sec;
        }
-
-       /*
-        * If we have an externally synchronized Linux clock, then update
-        * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
-        * called as close as possible to 500 ms before the new second starts.
-        */
-       if ((time_status & STA_UNSYNC) == 0 &&
-           xtime.tv_sec > last_rtc_update + 660 &&
-           (xtime.tv_nsec / 1000)
-                       >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
-           (xtime.tv_nsec / 1000)
-                       <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2) {
-               /* horrible...FIXME */
-               if (efi_enabled) {
-                       if (efi_set_rtc_mmss(xtime.tv_sec) == 0)
-                               last_rtc_update = xtime.tv_sec;
-                       else
-                               last_rtc_update = xtime.tv_sec - 600;
-               } else if (set_rtc_mmss(xtime.tv_sec) == 0)
-                       last_rtc_update = xtime.tv_sec;
-               else
-                       last_rtc_update = xtime.tv_sec - 600; /* do it again in 
60 s */
-       }
 #endif
 }
 
@@ -538,10 +551,59 @@
 
        return retval;
 }
+static void sync_cmos_clock(unsigned long dummy);
+
+static struct timer_list sync_cmos_timer =
+                                      TIMER_INITIALIZER(sync_cmos_clock, 0, 0);
+
+static void sync_cmos_clock(unsigned long dummy)
+{
+       struct timeval now, next;
+       int fail = 1;
+
+       /*
+        * If we have an externally synchronized Linux clock, then update
+        * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
+        * called as close as possible to 500 ms before the new second starts.
+        * This code is run on a timer.  If the clock is set, that timer
+        * may not expire at the correct time.  Thus, we adjust...
+        */
+       if ((time_status & STA_UNSYNC) != 0)
+               /*
+                * Not synced, exit, do not restart a timer (if one is
+                * running, let it run out).
+                */
+               return;
+
+       do_gettimeofday(&now);
+       if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
+           now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2)
+               fail = set_rtc_mmss(now.tv_sec);
+
+       next.tv_usec = USEC_AFTER - now.tv_usec;
+       if (next.tv_usec <= 0)
+               next.tv_usec += USEC_PER_SEC;
+
+       if (!fail)
+               next.tv_sec = 659;
+       else
+               next.tv_sec = 0;
+
+       if (next.tv_usec >= USEC_PER_SEC) {
+               next.tv_sec++;
+               next.tv_usec -= USEC_PER_SEC;
+       }
+       mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next));
+}
+
+void notify_arch_cmos_timer(void)
+{
+       mod_timer(&sync_cmos_timer, jiffies + 1);
+}
 
 static long clock_cmos_diff, sleep_start;
 
-static int timer_suspend(struct sys_device *dev, u32 state)
+static int timer_suspend(struct sys_device *dev, pm_message_t state)
 {
        /*
         * Estimate time zone so that set_time can update the clock
@@ -599,14 +661,14 @@
 #ifdef CONFIG_HPET_TIMER
 extern void (*late_time_init)(void);
 /* Duplicate of time_init() below, with hpet_enable part added */
-void __init hpet_time_init(void)
+static void __init hpet_time_init(void)
 {
        xtime.tv_sec = get_cmos_time();
        xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
        set_normalized_timespec(&wall_to_monotonic,
                -xtime.tv_sec, -xtime.tv_nsec);
 
-       if (hpet_enable() >= 0) {
+       if ((hpet_enable() >= 0) && hpet_use_timer) {
                printk("Using HPET for base-timer\n");
        }
 
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c Tue Aug  9 23:57:17 2005
@@ -342,8 +342,7 @@
 
        if (panic_on_oops) {
                printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n");
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               schedule_timeout(5 * HZ);
+               ssleep(5);
                panic("Fatal exception");
        }
        do_exit(SIGSEGV);
@@ -450,6 +449,7 @@
 DO_ERROR(11, SIGBUS,  "segment not present", segment_not_present)
 DO_ERROR(12, SIGBUS,  "stack segment", stack_segment)
 DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
+DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0)
 #ifdef CONFIG_X86_MCE
 DO_ERROR(18, SIGBUS, "machine check", machine_check)
 #endif
@@ -635,16 +635,15 @@
 }
 
 #ifdef CONFIG_KPROBES
-fastcall int do_int3(struct pt_regs *regs, long error_code)
+fastcall void do_int3(struct pt_regs *regs, long error_code)
 {
        if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
                        == NOTIFY_STOP)
-               return 1;
+               return;
        /* This is an interrupt gate, because kprobes wants interrupts
        disabled.  Normal trap handlers don't. */
        restore_interrupts(regs);
        do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL);
-       return 0;
 }
 #endif
 
@@ -701,8 +700,6 @@
        /*
         * Single-stepping through TF: make sure we ignore any events in
         * kernel space (but re-enable TF when returning to user mode).
-        * And if the event was due to a debugger (PT_DTRACE), clear the
-        * TF flag so that register information is correct.
         */
        if (condition & DR_STEP) {
                /*
@@ -712,11 +709,6 @@
                 */
                if ((regs->xcs & 2) == 0)
                        goto clear_TF_reenable;
-
-               if (likely(tsk->ptrace & PT_DTRACE)) {
-                       tsk->ptrace &= ~PT_DTRACE;
-                       regs->eflags &= ~TF_MASK;
-               }
        }
 
        /* Ok, finally something we can handle */
@@ -806,7 +798,7 @@
        math_error((void __user *)regs->eip);
 }
 
-void simd_math_error(void __user *eip)
+static void simd_math_error(void __user *eip)
 {
        struct task_struct * task;
        siginfo_t info;
@@ -876,6 +868,51 @@
                current->thread.error_code = error_code;
                force_sig(SIGSEGV, current);
        }
+}
+
+fastcall void setup_x86_bogus_stack(unsigned char * stk)
+{
+       unsigned long *switch16_ptr, *switch32_ptr;
+       struct pt_regs *regs;
+       unsigned long stack_top, stack_bot;
+       unsigned short iret_frame16_off;
+       int cpu = smp_processor_id();
+       /* reserve the space on 32bit stack for the magic switch16 pointer */
+       memmove(stk, stk + 8, sizeof(struct pt_regs));
+       switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs));
+       regs = (struct pt_regs *)stk;
+       /* now the switch32 on 16bit stack */
+       stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
+       stack_top = stack_bot + CPU_16BIT_STACK_SIZE;
+       switch32_ptr = (unsigned long *)(stack_top - 8);
+       iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20;
+       /* copy iret frame on 16bit stack */
+       memcpy((void *)(stack_bot + iret_frame16_off), &regs->eip, 20);
+       /* fill in the switch pointers */
+       switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off;
+       switch16_ptr[1] = __ESPFIX_SS;
+       switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) +
+               8 - CPU_16BIT_STACK_SIZE;
+       switch32_ptr[1] = __KERNEL_DS;
+}
+
+fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp)
+{
+       unsigned long *switch32_ptr;
+       unsigned char *stack16, *stack32;
+       unsigned long stack_top, stack_bot;
+       int len;
+       int cpu = smp_processor_id();
+       stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
+       stack_top = stack_bot + CPU_16BIT_STACK_SIZE;
+       switch32_ptr = (unsigned long *)(stack_top - 8);
+       /* copy the data from 16bit stack to 32bit stack */
+       len = CPU_16BIT_STACK_SIZE - 8 - sp;
+       stack16 = (unsigned char *)(stack_bot + sp);
+       stack32 = (unsigned char *)
+               (switch32_ptr[0] + CPU_16BIT_STACK_SIZE - 8 - len);
+       memcpy(stack32, stack16, len);
+       return stack32;
 }
 
 /*
@@ -978,3 +1015,10 @@
         */
        cpu_init();
 }
+
+static int __init kstack_setup(char *s)
+{
+       kstack_depth_to_print = simple_strtoul(s, NULL, 0);
+       return 0;
+}
+__setup("kstack=", kstack_setup);
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/i386/mm/highmem.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/highmem.c   Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/highmem.c   Tue Aug  9 23:57:17 2005
@@ -77,7 +77,7 @@
         * force other mappings to Oops if they'll try to access
         * this pte without first remap it
         */
-       pte_clear(kmap_pte-idx);
+       pte_clear(&init_mm, vaddr, kmap_pte-idx);
        __flush_tlb_one(vaddr);
 #endif
 
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/i386/mm/init.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c      Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c      Tue Aug  9 23:57:17 2005
@@ -249,13 +249,10 @@
 pte_t *kmap_pte;
 pgprot_t kmap_prot;
 
-EXPORT_SYMBOL(kmap_prot);
-EXPORT_SYMBOL(kmap_pte);
-
 #define kmap_get_fixmap_pte(vaddr)                                     \
        pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), vaddr), 
(vaddr)), (vaddr))
 
-void __init kmap_init(void)
+static void __init kmap_init(void)
 {
        unsigned long kmap_vstart;
 
@@ -266,7 +263,7 @@
        kmap_prot = PAGE_KERNEL;
 }
 
-void __init permanent_kmaps_init(pgd_t *pgd_base)
+static void __init permanent_kmaps_init(pgd_t *pgd_base)
 {
        pgd_t *pgd;
        pud_t *pud;
@@ -298,7 +295,7 @@
 }
 
 #ifndef CONFIG_DISCONTIGMEM
-void __init set_highmem_pages_init(int bad_ppro) 
+static void __init set_highmem_pages_init(int bad_ppro)
 {
        int pfn;
        for (pfn = highstart_pfn; pfn < highend_pfn; pfn++)
@@ -426,38 +423,6 @@
        flush_tlb_all();
 }
 
-#ifndef CONFIG_DISCONTIGMEM
-void __init zone_sizes_init(void)
-{
-       unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
-       unsigned int /*max_dma,*/ high, low;
-       
-       /*
-        * XEN: Our notion of "DMA memory" is fake when running over Xen.
-        * We simply put all RAM in the DMA zone so that those drivers which
-        * needlessly specify GFP_DMA do not get starved of RAM unnecessarily.
-        * Those drivers that *do* require lowmem are screwed anyway when
-        * running over Xen!
-        */
-       /*max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;*/
-       low = max_low_pfn;
-       high = highend_pfn;
-       
-       /*if (low < max_dma)*/
-               zones_size[ZONE_DMA] = low;
-       /*else*/ {
-               /*zones_size[ZONE_DMA] = max_dma;*/
-               /*zones_size[ZONE_NORMAL] = low - max_dma;*/
-#ifdef CONFIG_HIGHMEM
-               zones_size[ZONE_HIGHMEM] = high - low;
-#endif
-       }
-       free_area_init(zones_size);     
-}
-#else
-extern void zone_sizes_init(void);
-#endif /* !CONFIG_DISCONTIGMEM */
-
 static int disable_nx __initdata = 0;
 u64 __supported_pte_mask = ~_PAGE_NX;
 
@@ -560,7 +525,6 @@
        __flush_tlb_all();
 
        kmap_init();
-       zone_sizes_init();
 
        /* Switch to the real shared_info page, and clear the dummy page. */
        flush_page_update_queue();
@@ -586,7 +550,7 @@
  * but fortunately the switch to using exceptions got rid of all that.
  */
 
-void __init test_wp_bit(void)
+static void __init test_wp_bit(void)
 {
        printk("Checking if this processor honours the WP bit even in 
supervisor mode... ");
 
@@ -605,20 +569,17 @@
        }
 }
 
+static void __init set_max_mapnr_init(void)
+{
+#ifdef CONFIG_HIGHMEM
+       num_physpages = highend_pfn;
+#else
+       num_physpages = max_low_pfn;
+#endif
 #ifndef CONFIG_DISCONTIGMEM
-static void __init set_max_mapnr_init(void)
-{
-#ifdef CONFIG_HIGHMEM
-       max_mapnr = num_physpages = highend_pfn;
-#else
-       max_mapnr = num_physpages = max_low_pfn;
-#endif
-}
-#define __free_all_bootmem() free_all_bootmem()
-#else
-#define __free_all_bootmem() free_all_bootmem_node(NODE_DATA(0))
-extern void set_max_mapnr_init(void);
-#endif /* !CONFIG_DISCONTIGMEM */
+       max_mapnr = num_physpages;
+#endif
+}
 
 static struct kcore_list kcore_mem, kcore_vmalloc; 
 
@@ -650,16 +611,16 @@
        set_max_mapnr_init();
 
 #ifdef CONFIG_HIGHMEM
-       high_memory = (void *) __va(highstart_pfn * PAGE_SIZE);
+       high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
 #else
-       high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
+       high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
 #endif
        printk("vmalloc area: %lx-%lx, maxmem %lx\n",
               VMALLOC_START,VMALLOC_END,MAXMEM);
        BUG_ON(VMALLOC_START > VMALLOC_END);
        
        /* this will put all low memory onto the freelists */
-       totalram_pages += __free_all_bootmem();
+       totalram_pages += free_all_bootmem();
        /* XEN: init and count low-mem pages outside initial allocation. */
        for (pfn = xen_start_info.nr_pages; pfn < max_low_pfn; pfn++) {
                ClearPageReserved(&mem_map[pfn]);
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c   Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c   Tue Aug  9 23:57:17 2005
@@ -368,7 +368,7 @@
        if (PTRS_PER_PMD > 1)
                for (i = 0; i < USER_PTRS_PER_PGD; ++i)
                        kmem_cache_free(pmd_cache, (void 
*)__va(pgd_val(pgd[i])-1));
-       /* in the non-PAE case, clear_page_range() clears user pgd entries */
+       /* in the non-PAE case, free_pgtables() clears user pgd entries */
        kmem_cache_free(pgd_cache, pgd);
 }
 
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/arch/xen/i386/pci/irq.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/pci/irq.c      Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/pci/irq.c      Tue Aug  9 23:57:17 2005
@@ -114,11 +114,11 @@
        if (pin != 0) {
                if (dev->irq != 0)
                        printk(KERN_INFO "PCI: Obtained IRQ %d for device %s\n",
-                           dev->irq, dev->slot_name);
+                           dev->irq, pci_name(dev));
                else
                        printk(KERN_WARNING "PCI: No IRQ known for interrupt "
                            "pin %c of device %s.\n", 'A' + pin - 1,
-                           dev->slot_name);
+                           pci_name(dev));
        }
 
        return 0;
diff -r fa660d79f695 -r de310533c483 linux-2.6-xen-sparse/drivers/Makefile
--- a/linux-2.6-xen-sparse/drivers/Makefile     Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/drivers/Makefile     Tue Aug  9 23:57:17 2005
@@ -48,8 +48,8 @@
 obj-$(CONFIG_TC)               += tc/
 obj-$(CONFIG_USB)              += usb/
 obj-$(CONFIG_USB_GADGET)       += usb/gadget/
+obj-$(CONFIG_GAMEPORT)         += input/gameport/
 obj-$(CONFIG_INPUT)            += input/
-obj-$(CONFIG_GAMEPORT)         += input/gameport/
 obj-$(CONFIG_I2O)              += message/
 obj-$(CONFIG_I2C)              += i2c/
 obj-$(CONFIG_W1)               += w1/
@@ -62,5 +62,6 @@
 obj-$(CONFIG_CPU_FREQ)         += cpufreq/
 obj-$(CONFIG_MMC)              += mmc/
 obj-$(CONFIG_INFINIBAND)       += infiniband/
+obj-$(CONFIG_BLK_DEV_SGIIOC4)  += sn/
 obj-y                          += firmware/
 obj-$(CONFIG_CRYPTO)           += crypto/
diff -r fa660d79f695 -r de310533c483 linux-2.6-xen-sparse/drivers/char/mem.c
--- a/linux-2.6-xen-sparse/drivers/char/mem.c   Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/drivers/char/mem.c   Tue Aug  9 23:57:17 2005
@@ -23,6 +23,7 @@
 #include <linux/devfs_fs_kernel.h>
 #include <linux/ptrace.h>
 #include <linux/device.h>
+#include <linux/backing-dev.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -76,14 +77,6 @@
         * On ia64, we ignore O_SYNC because we cannot tolerate memory 
attribute aliases.
         */
        return !(efi_mem_attributes(addr) & EFI_MEMORY_WB);
-#elif defined(CONFIG_PPC64)
-       /* On PPC64, we always do non-cacheable access to the IO hole and
-        * cacheable elsewhere. Cache paradox can checkstop the CPU and
-        * the high_memory heuristic below is wrong on machines with memory
-        * above the IO hole... Ah, and of course, XFree86 doesn't pass
-        * O_SYNC when mapping us to tap IO space. Surprised ?
-        */
-       return !page_is_ram(addr >> PAGE_SHIFT);
 #else
        /*
         * Accessing memory above the top the kernel knows about or through a 
file pointer
@@ -111,38 +104,6 @@
 }
 #endif
 
-static ssize_t do_write_mem(void *p, unsigned long realp,
-                           const char __user * buf, size_t count, loff_t *ppos)
-{
-       ssize_t written;
-       unsigned long copied;
-
-       written = 0;
-#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
-       /* we don't have page 0 mapped on sparc and m68k.. */
-       if (realp < PAGE_SIZE) {
-               unsigned long sz = PAGE_SIZE-realp;
-               if (sz > count) sz = count; 
-               /* Hmm. Do something? */
-               buf+=sz;
-               p+=sz;
-               count-=sz;
-               written+=sz;
-       }
-#endif
-       copied = copy_from_user(p, buf, count);
-       if (copied) {
-               ssize_t ret = written + (count - copied);
-
-               if (ret)
-                       return ret;
-               return -EFAULT;
-       }
-       written += count;
-       *ppos += written;
-       return written;
-}
-
 #ifndef ARCH_HAS_DEV_MEM
 /*
  * This funcion reads the *physical* memory. The f_pos points directly to the 
@@ -152,15 +113,16 @@
                        size_t count, loff_t *ppos)
 {
        unsigned long p = *ppos;
-       ssize_t read;
+       ssize_t read, sz;
+       char *ptr;
 
        if (!valid_phys_addr_range(p, &count))
                return -EFAULT;
        read = 0;
-#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
+#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
        /* we don't have page 0 mapped on sparc and m68k.. */
        if (p < PAGE_SIZE) {
-               unsigned long sz = PAGE_SIZE-p;
+               sz = PAGE_SIZE - p;
                if (sz > count) 
                        sz = count; 
                if (sz > 0) {
@@ -173,9 +135,33 @@
                }
        }
 #endif
-       if (copy_to_user(buf, __va(p), count))
-               return -EFAULT;
-       read += count;
+
+       while (count > 0) {
+               /*
+                * Handle first page in case it's not aligned
+                */
+               if (-p & (PAGE_SIZE - 1))
+                       sz = -p & (PAGE_SIZE - 1);
+               else
+                       sz = PAGE_SIZE;
+
+               sz = min_t(unsigned long, sz, count);
+
+               /*
+                * On ia64 if a page has been mapped somewhere as
+                * uncached, then it must also be accessed uncached
+                * by the kernel or data corruption may occur
+                */
+               ptr = xlate_dev_mem_ptr(p);
+
+               if (copy_to_user(buf, ptr, sz))
+                       return -EFAULT;
+               buf += sz;
+               p += sz;
+               count -= sz;
+               read += sz;
+       }
+
        *ppos += read;
        return read;
 }
@@ -184,16 +170,76 @@
                         size_t count, loff_t *ppos)
 {
        unsigned long p = *ppos;
+       ssize_t written, sz;
+       unsigned long copied;
+       void *ptr;
 
        if (!valid_phys_addr_range(p, &count))
                return -EFAULT;
-       return do_write_mem(__va(p), p, buf, count, ppos);
+
+       written = 0;
+
+#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
+       /* we don't have page 0 mapped on sparc and m68k.. */
+       if (p < PAGE_SIZE) {
+               unsigned long sz = PAGE_SIZE - p;
+               if (sz > count)
+                       sz = count;
+               /* Hmm. Do something? */
+               buf += sz;
+               p += sz;
+               count -= sz;
+               written += sz;
+       }
+#endif
+
+       while (count > 0) {
+               /*
+                * Handle first page in case it's not aligned
+                */
+               if (-p & (PAGE_SIZE - 1))
+                       sz = -p & (PAGE_SIZE - 1);
+               else
+                       sz = PAGE_SIZE;
+
+               sz = min_t(unsigned long, sz, count);
+
+               /*
+                * On ia64 if a page has been mapped somewhere as
+                * uncached, then it must also be accessed uncached
+                * by the kernel or data corruption may occur
+                */
+               ptr = xlate_dev_mem_ptr(p);
+
+               copied = copy_from_user(ptr, buf, sz);
+               if (copied) {
+                       ssize_t ret;
+
+                       ret = written + (sz - copied);
+                       if (ret)
+                               return ret;
+                       return -EFAULT;
+               }
+               buf += sz;
+               p += sz;
+               count -= sz;
+               written += sz;
+       }
+
+       *ppos += written;
+       return written;
 }
 #endif
 
 static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
 {
-#ifdef pgprot_noncached
+#if defined(__HAVE_PHYS_MEM_ACCESS_PROT)
+       unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+
+       vma->vm_page_prot = phys_mem_access_prot(file, offset,
+                                                vma->vm_end - vma->vm_start,
+                                                vma->vm_page_prot);
+#elif defined(pgprot_noncached)
        unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
        int uncached;
 
@@ -212,6 +258,25 @@
        return 0;
 }
 
+#if 0
+static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
+{
+        unsigned long long val;
+       /*
+        * RED-PEN: on some architectures there is more mapped memory
+        * than available in mem_map which pfn_valid checks
+        * for. Perhaps should add a new macro here.
+        *
+        * RED-PEN: vmalloc is not supported right now.
+        */
+       if (!pfn_valid(vma->vm_pgoff))
+               return -EIO;
+       val = (u64)vma->vm_pgoff << PAGE_SHIFT;
+       vma->vm_pgoff = __pa(val) >> PAGE_SHIFT;
+       return mmap_mem(file, vma);
+}
+#endif
+
 extern long vread(char *buf, char *addr, unsigned long count);
 extern long vwrite(char *buf, char *addr, unsigned long count);
 
@@ -222,33 +287,55 @@
                         size_t count, loff_t *ppos)
 {
        unsigned long p = *ppos;
-       ssize_t read = 0;
-       ssize_t virtr = 0;
+       ssize_t low_count, read, sz;
        char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */
-               
+
+       read = 0;
        if (p < (unsigned long) high_memory) {
-               read = count;
+               low_count = count;
                if (count > (unsigned long) high_memory - p)
-                       read = (unsigned long) high_memory - p;
-
-#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
+                       low_count = (unsigned long) high_memory - p;
+
+#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
                /* we don't have page 0 mapped on sparc and m68k.. */
-               if (p < PAGE_SIZE && read > 0) {
+               if (p < PAGE_SIZE && low_count > 0) {
                        size_t tmp = PAGE_SIZE - p;
-                       if (tmp > read) tmp = read;
+                       if (tmp > low_count) tmp = low_count;
                        if (clear_user(buf, tmp))
                                return -EFAULT;
                        buf += tmp;
                        p += tmp;
-                       read -= tmp;
+                       read += tmp;
+                       low_count -= tmp;
                        count -= tmp;
                }
 #endif
-               if (copy_to_user(buf, (char *)p, read))
-                       return -EFAULT;
-               p += read;
-               buf += read;
-               count -= read;
+               while (low_count > 0) {
+                       /*
+                        * Handle first page in case it's not aligned
+                        */
+                       if (-p & (PAGE_SIZE - 1))
+                               sz = -p & (PAGE_SIZE - 1);
+                       else
+                               sz = PAGE_SIZE;
+
+                       sz = min_t(unsigned long, sz, low_count);
+
+                       /*
+                        * On ia64 if a page has been mapped somewhere as
+                        * uncached, then it must also be accessed uncached
+                        * by the kernel or data corruption may occur
+                        */
+                       kbuf = xlate_dev_kmem_ptr((char *)p);
+
+                       if (copy_to_user(buf, kbuf, sz))
+                               return -EFAULT;
+                       buf += sz;
+                       p += sz;
+                       read += sz;
+                       low_count -= sz;
+                       count -= sz;
+               }
        }
 
        if (count > 0) {
@@ -269,14 +356,78 @@
                        }
                        count -= len;
                        buf += len;
-                       virtr += len;
+                       read += len;
                        p += len;
                }
                free_page((unsigned long)kbuf);
        }
        *ppos = p;
-       return virtr + read;
-}
+       return read;
+}
+
+
+static inline ssize_t
+do_write_kmem(void *p, unsigned long realp, const char __user * buf,
+             size_t count, loff_t *ppos)
+{
+       ssize_t written, sz;
+       unsigned long copied;
+
+       written = 0;
+#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
+       /* we don't have page 0 mapped on sparc and m68k.. */
+       if (realp < PAGE_SIZE) {
+               unsigned long sz = PAGE_SIZE - realp;
+               if (sz > count)
+                       sz = count;
+               /* Hmm. Do something? */
+               buf += sz;
+               p += sz;
+               realp += sz;
+               count -= sz;
+               written += sz;
+       }
+#endif
+
+       while (count > 0) {
+               char *ptr;
+               /*
+                * Handle first page in case it's not aligned
+                */
+               if (-realp & (PAGE_SIZE - 1))
+                       sz = -realp & (PAGE_SIZE - 1);
+               else
+                       sz = PAGE_SIZE;
+
+               sz = min_t(unsigned long, sz, count);
+
+               /*
+                * On ia64 if a page has been mapped somewhere as
+                * uncached, then it must also be accessed uncached
+                * by the kernel or data corruption may occur
+                */
+               ptr = xlate_dev_kmem_ptr(p);
+
+               copied = copy_from_user(ptr, buf, sz);
+               if (copied) {
+                       ssize_t ret;
+
+                       ret = written + (sz - copied);
+                       if (ret)
+                               return ret;
+                       return -EFAULT;
+               }
+               buf += sz;
+               p += sz;
+               realp += sz;
+               count -= sz;
+               written += sz;
+       }
+
+       *ppos += written;
+       return written;
+}
+
 
 /*
  * This function writes to the *virtual* memory as seen by the kernel.
@@ -296,7 +447,7 @@
                if (count > (unsigned long) high_memory - p)
                        wrote = (unsigned long) high_memory - p;
 
-               written = do_write_mem((void*)p, p, buf, wrote, ppos);
+               written = do_write_kmem((void*)p, p, buf, wrote, ppos);
                if (written != wrote)
                        return written;
                wrote = written;
@@ -344,7 +495,7 @@
        unsigned long i = *ppos;
        char __user *tmp = buf;
 
-       if (verify_area(VERIFY_WRITE,buf,count))
+       if (!access_ok(VERIFY_WRITE, buf, count))
                return -EFAULT; 
        while (count-- > 0 && i < 65536) {
                if (__put_user(inb(i),tmp) < 0) 
@@ -362,7 +513,7 @@
        unsigned long i = *ppos;
        const char __user * tmp = buf;
 
-       if (verify_area(VERIFY_READ,buf,count))
+       if (!access_ok(VERIFY_READ,buf,count))
                return -EFAULT;
        while (count-- > 0 && i < 65536) {
                char c;
@@ -568,7 +719,6 @@
        return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
 }
 
-#define mmap_mem       mmap_kmem
 #define zero_lseek     null_lseek
 #define full_lseek      null_lseek
 #define write_zero     write_null
@@ -581,7 +731,7 @@
        .llseek         = memory_lseek,
        .read           = read_mem,
        .write          = write_mem,
-       .mmap           = mmap_mem,
+       .mmap           = mmap_kmem,
        .open           = open_mem,
 };
 #else
@@ -616,6 +766,10 @@
        .read           = read_zero,
        .write          = write_zero,
        .mmap           = mmap_zero,
+};
+
+static struct backing_dev_info zero_bdi = {
+       .capabilities   = BDI_CAP_MAP_COPY,
 };
 
 static struct file_operations full_fops = {
@@ -664,6 +818,7 @@
                        break;
 #endif
                case 5:
+                       filp->f_mapping->backing_dev_info = &zero_bdi;
                        filp->f_op = &zero_fops;
                        break;
                case 7:
diff -r fa660d79f695 -r de310533c483 linux-2.6-xen-sparse/drivers/char/tty_io.c
--- a/linux-2.6-xen-sparse/drivers/char/tty_io.c        Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/drivers/char/tty_io.c        Tue Aug  9 23:57:17 2005
@@ -187,7 +187,7 @@
 
 EXPORT_SYMBOL(tty_name);
 
-inline int tty_paranoia_check(struct tty_struct *tty, struct inode *inode,
+int tty_paranoia_check(struct tty_struct *tty, struct inode *inode,
                              const char *routine)
 {
 #ifdef TTY_PARANOIA_CHECK
@@ -1791,7 +1791,6 @@
        }
 #ifdef CONFIG_VT
        if (console_use_vt && (device == MKDEV(TTY_MAJOR,0))) {
-               extern int fg_console;
                extern struct tty_driver *console_driver;
                driver = console_driver;
                index = fg_console;
@@ -2018,11 +2017,10 @@
                return 0;
 #ifdef CONFIG_VT
        if (tty->driver->type == TTY_DRIVER_TYPE_CONSOLE) {
-               unsigned int currcons = tty->index;
                int rc;
 
                acquire_console_sem();
-               rc = vc_resize(currcons, tmp_ws.ws_col, tmp_ws.ws_row);
+               rc = vc_resize(tty->driver_data, tmp_ws.ws_col, tmp_ws.ws_row);
                release_console_sem();
                if (rc)
                        return -ENXIO;
@@ -2634,6 +2632,7 @@
        tty->magic = TTY_MAGIC;
        tty_ldisc_assign(tty, tty_ldisc_get(N_TTY));
        tty->pgrp = -1;
+       tty->overrun_time = jiffies;
        tty->flip.char_buf_ptr = tty->flip.char_buf;
        tty->flip.flag_buf_ptr = tty->flip.flag_buf;
        INIT_WORK(&tty->flip.work, flush_to_ldisc, tty);
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/include/asm-generic/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-generic/pgtable.h        Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-generic/pgtable.h        Tue Aug  9 
23:57:17 2005
@@ -16,7 +16,7 @@
 #ifndef __HAVE_ARCH_SET_PTE_ATOMIC
 #define ptep_establish(__vma, __address, __ptep, __entry)              \
 do {                                                                   \
-       set_pte(__ptep, __entry);                                       \
+       set_pte_at((__vma)->vm_mm, (__address), __ptep, __entry);       \
        flush_tlb_page(__vma, __address);                               \
 } while (0)
 #else /* __HAVE_ARCH_SET_PTE_ATOMIC */
@@ -37,7 +37,7 @@
  */
 #define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
 do {                                                                     \
-       set_pte(__ptep, __entry);                                         \
+       set_pte_at((__vma)>vm_mm, (__address), __ptep, __entry);          \
        flush_tlb_page(__vma, __address);                                 \
 } while (0)
 #endif
@@ -53,20 +53,24 @@
 #endif
 
 #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-static inline int ptep_test_and_clear_young(pte_t *ptep)
-{
-       pte_t pte = *ptep;
-       if (!pte_young(pte))
-               return 0;
-       set_pte(ptep, pte_mkold(pte));
-       return 1;
-}
+#define ptep_test_and_clear_young(__vma, __address, __ptep)            \
+({                                                                     \
+       pte_t __pte = *(__ptep);                                        \
+       int r = 1;                                                      \
+       if (!pte_young(__pte))                                          \
+               r = 0;                                                  \
+       else                                                            \
+               set_pte_at((__vma)->vm_mm, (__address),                 \
+                          (__ptep), pte_mkold(__pte));                 \
+       r;                                                              \
+})
 #endif
 
 #ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
 #define ptep_clear_flush_young(__vma, __address, __ptep)               \
 ({                                                                     \
-       int __young = ptep_test_and_clear_young(__ptep);                \
+       int __young;                                                    \
+       __young = ptep_test_and_clear_young(__vma, __address, __ptep);  \
        if (__young)                                                    \
                flush_tlb_page(__vma, __address);                       \
        __young;                                                        \
@@ -74,20 +78,24 @@
 #endif
 
 #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
-static inline int ptep_test_and_clear_dirty(pte_t *ptep)
-{
-       pte_t pte = *ptep;
-       if (!pte_dirty(pte))
-               return 0;
-       set_pte(ptep, pte_mkclean(pte));
-       return 1;
-}
+#define ptep_test_and_clear_dirty(__vma, __address, __ptep)            \
+({                                                                     \
+       pte_t __pte = *__ptep;                                          \
+       int r = 1;                                                      \
+       if (!pte_dirty(__pte))                                          \
+               r = 0;                                                  \
+       else                                                            \
+               set_pte_at((__vma)->vm_mm, (__address), (__ptep),       \
+                          pte_mkclean(__pte));                         \
+       r;                                                              \
+})
 #endif
 
 #ifndef __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
 #define ptep_clear_flush_dirty(__vma, __address, __ptep)               \
 ({                                                                     \
-       int __dirty = ptep_test_and_clear_dirty(__ptep);                \
+       int __dirty;                                                    \
+       __dirty = ptep_test_and_clear_dirty(__vma, __address, __ptep);  \
        if (__dirty)                                                    \
                flush_tlb_page(__vma, __address);                       \
        __dirty;                                                        \
@@ -95,36 +103,29 @@
 #endif
 
 #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
-static inline pte_t ptep_get_and_clear(pte_t *ptep)
-{
-       pte_t pte = *ptep;
-       pte_clear(ptep);
-       return pte;
-}
+#define ptep_get_and_clear(__mm, __address, __ptep)                    \
+({                                                                     \
+       pte_t __pte = *(__ptep);                                        \
+       pte_clear((__mm), (__address), (__ptep));                       \
+       __pte;                                                          \
+})
 #endif
 
 #ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH
 #define ptep_clear_flush(__vma, __address, __ptep)                     \
 ({                                                                     \
-       pte_t __pte = ptep_get_and_clear(__ptep);                       \
+       pte_t __pte;                                                    \
+       __pte = ptep_get_and_clear((__vma)->vm_mm, __address, __ptep);  \
        flush_tlb_page(__vma, __address);                               \
        __pte;                                                          \
 })
 #endif
 
 #ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
-static inline void ptep_set_wrprotect(pte_t *ptep)
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long 
address, pte_t *ptep)
 {
        pte_t old_pte = *ptep;
-       set_pte(ptep, pte_wrprotect(old_pte));
-}
-#endif
-
-#ifndef __HAVE_ARCH_PTEP_MKDIRTY
-static inline void ptep_mkdirty(pte_t *ptep)
-{
-       pte_t old_pte = *ptep;
-       set_pte(ptep, pte_mkdirty(old_pte));
+       set_pte_at(mm, address, ptep, pte_wrprotect(old_pte));
 }
 #endif
 
@@ -144,4 +145,77 @@
 #define pgd_offset_gate(mm, addr)      pgd_offset(mm, addr)
 #endif
 
+#ifndef __HAVE_ARCH_LAZY_MMU_PROT_UPDATE
+#define lazy_mmu_prot_update(pte)      do { } while (0)
+#endif
+
+/*
+ * When walking page tables, get the address of the next boundary,
+ * or the end address of the range if that comes earlier.  Although no
+ * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout.
+ */
+
+#define pgd_addr_end(addr, end)                                                
\
+({     unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK;  \
+       (__boundary - 1 < (end) - 1)? __boundary: (end);                \
+})
+
+#ifndef pud_addr_end
+#define pud_addr_end(addr, end)                                                
\
+({     unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK;      \
+       (__boundary - 1 < (end) - 1)? __boundary: (end);                \
+})
+#endif
+
+#ifndef pmd_addr_end
+#define pmd_addr_end(addr, end)                                                
\
+({     unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK;      \
+       (__boundary - 1 < (end) - 1)? __boundary: (end);                \
+})
+#endif
+
+#ifndef __ASSEMBLY__
+/*
+ * When walking page tables, we usually want to skip any p?d_none entries;
+ * and any p?d_bad entries - reporting the error before resetting to none.
+ * Do the tests inline, but report and clear the bad entry in mm/memory.c.
+ */
+void pgd_clear_bad(pgd_t *);
+void pud_clear_bad(pud_t *);
+void pmd_clear_bad(pmd_t *);
+
+static inline int pgd_none_or_clear_bad(pgd_t *pgd)
+{
+       if (pgd_none(*pgd))
+               return 1;
+       if (unlikely(pgd_bad(*pgd))) {
+               pgd_clear_bad(pgd);
+               return 1;
+       }
+       return 0;
+}
+
+static inline int pud_none_or_clear_bad(pud_t *pud)
+{
+       if (pud_none(*pud))
+               return 1;
+       if (unlikely(pud_bad(*pud))) {
+               pud_clear_bad(pud);
+               return 1;
+       }
+       return 0;
+}
+
+static inline int pmd_none_or_clear_bad(pmd_t *pmd)
+{
+       if (pmd_none(*pmd))
+               return 1;
+       if (unlikely(pmd_bad(*pmd))) {
+               pmd_clear_bad(pmd);
+               return 1;
+       }
+       return 0;
+}
+#endif /* !__ASSEMBLY__ */
+
 #endif /* _ASM_GENERIC_PGTABLE_H */
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/desc.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/desc.h      Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/desc.h      Tue Aug  9 
23:57:17 2005
@@ -3,6 +3,8 @@
 
 #include <asm/ldt.h>
 #include <asm/segment.h>
+
+#define CPU_16BIT_STACK_SIZE 1024
 
 #ifndef __ASSEMBLY__
 
@@ -12,6 +14,8 @@
 #include <asm/mmu.h>
 
 extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES];
+
+DECLARE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
 
 struct Xgt_desc_struct {
        unsigned short size;
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h       Tue Aug 
 9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h       Tue Aug 
 9 23:57:17 2005
@@ -11,7 +11,7 @@
 #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
 
 void *dma_alloc_coherent(struct device *dev, size_t size,
-                          dma_addr_t *dma_handle, int flag);
+                          dma_addr_t *dma_handle, unsigned int __nocast flag);
 
 void dma_free_coherent(struct device *dev, size_t size,
                         void *vaddr, dma_addr_t dma_handle);
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/highmem.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/highmem.h   Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/highmem.h   Tue Aug  9 
23:57:17 2005
@@ -32,8 +32,6 @@
 extern pte_t *kmap_pte;
 extern pgprot_t kmap_prot;
 extern pte_t *pkmap_page_table;
-
-extern void kmap_init(void);
 
 /*
  * Right now we initialize only a single pte table. It can be extended
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/io.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/io.h        Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/io.h        Tue Aug  9 
23:57:17 2005
@@ -49,6 +49,17 @@
 
 #include <linux/vmalloc.h>
 #include <asm/fixmap.h>
+
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p)   __va(p)
+
+/*
+ * Convert a virtual cached pointer to an uncached pointer
+ */
+#define xlate_dev_kmem_ptr(p)  p
 
 /**
  *     virt_to_phys    -       map virtual addresses to physical
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h       Tue Aug 
 9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h       Tue Aug 
 9 23:57:17 2005
@@ -64,7 +64,7 @@
 }
 
 #define deactivate_mm(tsk, mm) \
-       asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0))
+       asm("mov %0,%%fs ; mov %0,%%gs": :"r" (0))
 
 #define activate_mm(prev, next) do {           \
        switch_mm((prev),(next),NULL);          \
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgalloc.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgalloc.h   Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgalloc.h   Tue Aug  9 
23:57:17 2005
@@ -2,7 +2,6 @@
 #define _I386_PGALLOC_H
 
 #include <linux/config.h>
-#include <asm/processor.h>
 #include <asm/fixmap.h>
 #include <linux/threads.h>
 #include <linux/mm.h>          /* for struct page */
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h    Tue Aug 
 9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h    Tue Aug 
 9 23:57:17 2005
@@ -16,6 +16,7 @@
 #define set_pte_batched(pteptr, pteval) \
        queue_l1_entry_update(pteptr, (pteval).pte_low)
 #define set_pte(pteptr, pteval) (*(pteptr) = pteval)
+#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
 #define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval)
 #define set_pmd(pmdptr, pmdval) xen_l2_entry_update((pmdptr), (pmdval))
 
@@ -28,14 +29,8 @@
  * each domain will have separate page tables, with their own versions of
  * accessed & dirty state.
  */
-static inline pte_t ptep_get_and_clear(pte_t *xp)
-{
-       pte_t pte = *xp;
-       if (pte.pte_low)
-               set_pte(xp, __pte_ma(0));
-       return pte;
-}
 
+#define ptep_get_and_clear(mm,addr,xp) __pte_ma(xchg(&(xp)->pte_low, 0))
 #define pte_same(a, b)         ((a).pte_low == (b).pte_low)
 /*
  * We detect special mappings in one of two ways:
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h   Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h   Tue Aug  9 
23:57:17 2005
@@ -64,7 +64,7 @@
 #define PGDIR_MASK     (~(PGDIR_SIZE-1))
 
 #define USER_PTRS_PER_PGD      (TASK_SIZE/PGDIR_SIZE)
-#define FIRST_USER_PGD_NR      0
+#define FIRST_USER_ADDRESS     0
 
 #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
 #define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
@@ -200,15 +200,15 @@
 /*
  * Define this if things work differently on an i386 and an i486:
  * it will (on an i486) warn about kernel memory accesses that are
- * done without a 'verify_area(VERIFY_WRITE,..)'
- */
-#undef TEST_VERIFY_AREA
+ * done without a 'access_ok(VERIFY_WRITE,..)'
+ */
+#undef TEST_ACCESS_OK
 
 /* The boot page tables (all created as a single array) */
 extern unsigned long pg0[];
 
 #define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
-#define pte_clear(xp)  do { set_pte(xp, __pte(0)); } while (0)
+#define pte_clear(mm,addr,xp)  do { set_pte_at(mm, addr, xp, __pte(0)); } 
while (0)
 
 #define pmd_none(x)    (!pmd_val(x))
 /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
@@ -252,7 +252,7 @@
 # include <asm/pgtable-2level.h>
 #endif
 
-static inline int ptep_test_and_clear_dirty(pte_t *ptep)
+static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, 
unsigned long addr, pte_t *ptep)
 {
        pte_t pte = *ptep;
        int ret = pte_dirty(pte);
@@ -261,7 +261,7 @@
        return ret;
 }
 
-static inline int ptep_test_and_clear_young(pte_t *ptep)
+static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, 
unsigned long addr, pte_t *ptep)
 {
        pte_t pte = *ptep;
        int ret = pte_young(pte);
@@ -270,18 +270,11 @@
        return ret;
 }
 
-static inline void ptep_set_wrprotect(pte_t *ptep)
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long 
addr, pte_t *ptep)
 {
        pte_t pte = *ptep;
        if (pte_write(pte))
                set_pte(ptep, pte_wrprotect(pte));
-}
-
-static inline void ptep_mkdirty(pte_t *ptep)
-{
-       pte_t pte = *ptep;
-       if (!pte_dirty(pte))
-               xen_l1_entry_update(ptep, pte_mkdirty(pte).pte_low);
 }
 
 /*
@@ -495,11 +488,14 @@
 #define io_remap_pfn_range(vma,from,pfn,size,prot) \
 direct_remap_area_pages(vma->vm_mm,from,pfn<<PAGE_SHIFT,size,prot,DOMID_IO)
 
+#define MK_IOSPACE_PFN(space, pfn)     (pfn)
+#define GET_IOSPACE(pfn)               0
+#define GET_PFN(pfn)                   (pfn)
+
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
-#define __HAVE_ARCH_PTEP_MKDIRTY
 #define __HAVE_ARCH_PTE_SAME
 #include <asm-generic/pgtable.h>
 
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/processor.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/processor.h Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/processor.h Tue Aug  9 
23:57:17 2005
@@ -99,12 +99,12 @@
 #endif
 
 extern int phys_proc_id[NR_CPUS];
+extern int cpu_core_id[NR_CPUS];
 extern char ignore_fpu_irq;
 
 extern void identify_cpu(struct cpuinfo_x86 *);
 extern void print_cpu_info(struct cpuinfo_x86 *);
 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
-extern void dodgy_tsc(void);
 
 #ifdef CONFIG_X86_HT
 extern void detect_ht(struct cpuinfo_x86 *c);
@@ -138,7 +138,7 @@
  * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
  * resulting in stale register contents being returned.
  */
-static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
+static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int 
*ebx, unsigned int *ecx, unsigned int *edx)
 {
        __asm__("cpuid"
                : "=a" (*eax),
@@ -146,6 +146,18 @@
                  "=c" (*ecx),
                  "=d" (*edx)
                : "0" (op), "c"(0));
+}
+
+/* Some CPUID calls want 'count' to be placed in ecx */
+static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
+               int *edx)
+{
+       __asm__("cpuid"
+               : "=a" (*eax),
+                 "=b" (*ebx),
+                 "=c" (*ecx),
+                 "=d" (*edx)
+               : "0" (op), "c" (count));
 }
 
 /*
@@ -501,6 +513,13 @@
        regs->esp = new_esp;                                    \
 } while (0)
 
+/*
+ * This special macro can be used to load a debugging register
+ */
+#define loaddebug(thread,register) \
+       HYPERVISOR_set_debugreg((register),     \
+                       ((thread)->debugreg[register]))
+
 /* Forward declaration, a strange C thing */
 struct task_struct;
 struct mm_struct;
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/segment.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/segment.h   Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/segment.h   Tue Aug  9 
23:57:17 2005
@@ -38,7 +38,7 @@
  *  24 - APM BIOS support
  *  25 - APM BIOS support 
  *
- *  26 - unused
+ *  26 - ESPFIX small SS
  *  27 - unused
  *  28 - unused
  *  29 - unused
@@ -71,6 +71,9 @@
 #define GDT_ENTRY_PNPBIOS_BASE         (GDT_ENTRY_KERNEL_BASE + 6)
 #define GDT_ENTRY_APMBIOS_BASE         (GDT_ENTRY_KERNEL_BASE + 11)
 
+#define GDT_ENTRY_ESPFIX_SS            (GDT_ENTRY_KERNEL_BASE + 14)
+#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8)
+
 #define GDT_ENTRY_DOUBLEFAULT_TSS      31
 
 /*
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/setup.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/setup.h     Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/setup.h     Tue Aug  9 
23:57:17 2005
@@ -16,7 +16,7 @@
 #define MAXMEM_PFN     PFN_DOWN(MAXMEM)
 #define MAX_NONPAE_PFN (1 << 20)
 
-#define PARAM_SIZE 2048
+#define PARAM_SIZE 4096
 #define COMMAND_LINE_SIZE 256
 
 #define OLD_CL_MAGIC_ADDR      0x90020
diff -r fa660d79f695 -r de310533c483 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/system.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/system.h    Tue Aug  9 
15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/system.h    Tue Aug  9 
23:57:17 2005
@@ -84,7 +84,7 @@
 #define loadsegment(seg,value)                 \
        asm volatile("\n"                       \
                "1:\t"                          \
-               "movl %0,%%" #seg "\n"          \
+               "mov %0,%%" #seg "\n"           \
                "2:\n"                          \
                ".section .fixup,\"ax\"\n"      \
                "3:\t"                          \
@@ -96,13 +96,13 @@
                ".align 4\n\t"                  \
                ".long 1b,3b\n"                 \
                ".previous"                     \
-               : :"m" (*(unsigned int *)&(value)))
+               : :"m" (value))
 
 /*
  * Save a segment register away
  */
 #define savesegment(seg, value) \
-       asm volatile("movl %%" #seg ",%0":"=m" (*(int *)&(value)))
+       asm volatile("mov %%" #seg ",%0":"=m" (value))
 
 /*
  * Clear and set 'TS' bit respectively
@@ -519,4 +519,6 @@
 extern int es7000_plat;
 void cpu_idle_wait(void);
 
+extern unsigned long arch_align_stack(unsigned long sp);
+
 #endif
diff -r fa660d79f695 -r de310533c483 linux-2.6-xen-sparse/include/linux/gfp.h
--- a/linux-2.6-xen-sparse/include/linux/gfp.h  Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/include/linux/gfp.h  Tue Aug  9 23:57:17 2005
@@ -26,26 +26,28 @@
  *
  * __GFP_NORETRY: The VM implementation must not retry indefinitely.
  */
-#define __GFP_WAIT     0x10    /* Can wait and reschedule? */
-#define __GFP_HIGH     0x20    /* Should access emergency pools? */
-#define __GFP_IO       0x40    /* Can start physical IO? */
-#define __GFP_FS       0x80    /* Can call down to low-level FS? */
-#define __GFP_COLD     0x100   /* Cache-cold page required */
-#define __GFP_NOWARN   0x200   /* Suppress page allocation failure warning */
-#define __GFP_REPEAT   0x400   /* Retry the allocation.  Might fail */
-#define __GFP_NOFAIL   0x800   /* Retry for ever.  Cannot fail */
-#define __GFP_NORETRY  0x1000  /* Do not retry.  Might fail */
-#define __GFP_NO_GROW  0x2000  /* Slab internal usage */
-#define __GFP_COMP     0x4000  /* Add compound page metadata */
-#define __GFP_ZERO     0x8000  /* Return zeroed page on success */
+#define __GFP_WAIT     0x10u   /* Can wait and reschedule? */
+#define __GFP_HIGH     0x20u   /* Should access emergency pools? */
+#define __GFP_IO       0x40u   /* Can start physical IO? */
+#define __GFP_FS       0x80u   /* Can call down to low-level FS? */
+#define __GFP_COLD     0x100u  /* Cache-cold page required */
+#define __GFP_NOWARN   0x200u  /* Suppress page allocation failure warning */
+#define __GFP_REPEAT   0x400u  /* Retry the allocation.  Might fail */
+#define __GFP_NOFAIL   0x800u  /* Retry for ever.  Cannot fail */
+#define __GFP_NORETRY  0x1000u /* Do not retry.  Might fail */
+#define __GFP_NO_GROW  0x2000u /* Slab internal usage */
+#define __GFP_COMP     0x4000u /* Add compound page metadata */
+#define __GFP_ZERO     0x8000u /* Return zeroed page on success */
+#define __GFP_NOMEMALLOC 0x10000u /* Don't use emergency reserves */
 
-#define __GFP_BITS_SHIFT 16    /* Room for 16 __GFP_FOO bits */
+#define __GFP_BITS_SHIFT 20    /* Room for 20 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1)
 
 /* if you forget to add the bitmask here kernel will crash, period */
 #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
                        __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
-                       __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP)
+                       __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
+                       __GFP_NOMEMALLOC)
 
 #define GFP_ATOMIC     (__GFP_HIGH)
 #define GFP_NOIO       (__GFP_WAIT)
@@ -86,7 +88,7 @@
 extern struct page *
 FASTCALL(__alloc_pages(unsigned int, unsigned int, struct zonelist *));
 
-static inline struct page *alloc_pages_node(int nid, unsigned int gfp_mask,
+static inline struct page *alloc_pages_node(int nid, unsigned int __nocast 
gfp_mask,
                                                unsigned int order)
 {
        if (unlikely(order >= MAX_ORDER))
@@ -97,17 +99,17 @@
 }
 
 #ifdef CONFIG_NUMA
-extern struct page *alloc_pages_current(unsigned gfp_mask, unsigned order);
+extern struct page *alloc_pages_current(unsigned int __nocast gfp_mask, 
unsigned order);
 
 static inline struct page *
-alloc_pages(unsigned int gfp_mask, unsigned int order)
+alloc_pages(unsigned int __nocast gfp_mask, unsigned int order)
 {
        if (unlikely(order >= MAX_ORDER))
                return NULL;
 
        return alloc_pages_current(gfp_mask, order);
 }
-extern struct page *alloc_page_vma(unsigned gfp_mask,
+extern struct page *alloc_page_vma(unsigned __nocast gfp_mask,
                        struct vm_area_struct *vma, unsigned long addr);
 #else
 #define alloc_pages(gfp_mask, order) \
@@ -116,8 +118,8 @@
 #endif
 #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
 
-extern unsigned long FASTCALL(__get_free_pages(unsigned int gfp_mask, unsigned 
int order));
-extern unsigned long FASTCALL(get_zeroed_page(unsigned int gfp_mask));
+extern unsigned long FASTCALL(__get_free_pages(unsigned int __nocast gfp_mask, 
unsigned int order));
+extern unsigned long FASTCALL(get_zeroed_page(unsigned int __nocast gfp_mask));
 
 #define __get_free_page(gfp_mask) \
                __get_free_pages((gfp_mask),0)
diff -r fa660d79f695 -r de310533c483 linux-2.6-xen-sparse/mm/highmem.c
--- a/linux-2.6-xen-sparse/mm/highmem.c Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/mm/highmem.c Tue Aug  9 23:57:17 2005
@@ -30,9 +30,9 @@
 
 static mempool_t *page_pool, *isa_page_pool;
 
-static void *page_pool_alloc(int gfp_mask, void *data)
-{
-       int gfp = gfp_mask | (int) (long) data;
+static void *page_pool_alloc(unsigned int __nocast gfp_mask, void *data)
+{
+       unsigned int gfp = gfp_mask | (unsigned int) (long) data;
 
        return alloc_page(gfp);
 }
@@ -90,7 +90,8 @@
                 * So no dangers, even with speculative execution.
                 */
                page = pte_page(pkmap_page_table[i]);
-               pte_clear(&pkmap_page_table[i]);
+               pte_clear(&init_mm, (unsigned long)page_address(page),
+                         &pkmap_page_table[i]);
 
                set_page_address(page, NULL);
        }
@@ -138,7 +139,8 @@
                }
        }
        vaddr = PKMAP_ADDR(last_pkmap_nr);
-       set_pte(&(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot));
+       set_pte_at(&init_mm, vaddr,
+                  &(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot));
 
        pkmap_count[last_pkmap_nr] = 1;
        set_page_address(page, (void *)vaddr);
@@ -332,6 +334,7 @@
                        continue;
 
                mempool_free(bvec->bv_page, pool);      
+               dec_page_state(nr_bounce);
        }
 
        bio_endio(bio_orig, bio_orig->bi_size, err);
@@ -412,6 +415,7 @@
                to->bv_page = mempool_alloc(pool, q->bounce_gfp);
                to->bv_len = from->bv_len;
                to->bv_offset = from->bv_offset;
+               inc_page_state(nr_bounce);
 
                if (rw == WRITE) {
                        char *vto, *vfrom;
diff -r fa660d79f695 -r de310533c483 linux-2.6-xen-sparse/mm/memory.c
--- a/linux-2.6-xen-sparse/mm/memory.c  Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/mm/memory.c  Tue Aug  9 23:57:17 2005
@@ -46,7 +46,6 @@
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
 #include <linux/rmap.h>
-#include <linux/acct.h>
 #include <linux/module.h>
 #include <linux/init.h>
 
@@ -84,116 +83,205 @@
 EXPORT_SYMBOL(vmalloc_earlyreserve);
 
 /*
+ * If a p?d_bad entry is found while walking page tables, report
+ * the error, before resetting entry to p?d_none.  Usually (but
+ * very seldom) called out from the p?d_none_or_clear_bad macros.
+ */
+
+void pgd_clear_bad(pgd_t *pgd)
+{
+       pgd_ERROR(*pgd);
+       pgd_clear(pgd);
+}
+
+void pud_clear_bad(pud_t *pud)
+{
+       pud_ERROR(*pud);
+       pud_clear(pud);
+}
+
+void pmd_clear_bad(pmd_t *pmd)
+{
+       pmd_ERROR(*pmd);
+       pmd_clear(pmd);
+}
+
+/*
  * Note: this doesn't free the actual pages themselves. That
  * has been handled earlier when unmapping all the memory regions.
  */
-static inline void clear_pmd_range(struct mmu_gather *tlb, pmd_t *pmd, 
unsigned long start, unsigned long end)
-{
-       struct page *page;
-
-       if (pmd_none(*pmd))
+static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd)
+{
+       struct page *page = pmd_page(*pmd);
+       pmd_clear(pmd);
+       pte_free_tlb(tlb, page);
+       dec_page_state(nr_page_table_pages);
+       tlb->mm->nr_ptes--;
+}
+
+static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
+                               unsigned long addr, unsigned long end,
+                               unsigned long floor, unsigned long ceiling)
+{
+       pmd_t *pmd;
+       unsigned long next;
+       unsigned long start;
+
+       start = addr;
+       pmd = pmd_offset(pud, addr);
+       do {
+               next = pmd_addr_end(addr, end);
+               if (pmd_none_or_clear_bad(pmd))
+                       continue;
+               free_pte_range(tlb, pmd);
+       } while (pmd++, addr = next, addr != end);
+
+       start &= PUD_MASK;
+       if (start < floor)
                return;
-       if (unlikely(pmd_bad(*pmd))) {
-               pmd_ERROR(*pmd);
-               pmd_clear(pmd);
+       if (ceiling) {
+               ceiling &= PUD_MASK;
+               if (!ceiling)
+                       return;
+       }
+       if (end - 1 > ceiling - 1)
                return;
-       }
-       if (!((start | end) & ~PMD_MASK)) {
-               /* Only clear full, aligned ranges */
-               page = pmd_page(*pmd);
-               pmd_clear(pmd);
-               dec_page_state(nr_page_table_pages);
-               tlb->mm->nr_ptes--;
-               pte_free_tlb(tlb, page);
-       }
-}
-
-static inline void clear_pud_range(struct mmu_gather *tlb, pud_t *pud, 
unsigned long start, unsigned long end)
-{
-       unsigned long addr = start, next;
-       pmd_t *pmd, *__pmd;
-
-       if (pud_none(*pud))
+
+       pmd = pmd_offset(pud, start);
+       pud_clear(pud);
+       pmd_free_tlb(tlb, pmd);
+}
+
+static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+                               unsigned long addr, unsigned long end,
+                               unsigned long floor, unsigned long ceiling)
+{
+       pud_t *pud;
+       unsigned long next;
+       unsigned long start;
+
+       start = addr;
+       pud = pud_offset(pgd, addr);
+       do {
+               next = pud_addr_end(addr, end);
+               if (pud_none_or_clear_bad(pud))
+                       continue;
+               free_pmd_range(tlb, pud, addr, next, floor, ceiling);
+       } while (pud++, addr = next, addr != end);
+
+       start &= PGDIR_MASK;
+       if (start < floor)
                return;
-       if (unlikely(pud_bad(*pud))) {
-               pud_ERROR(*pud);
-               pud_clear(pud);
+       if (ceiling) {
+               ceiling &= PGDIR_MASK;
+               if (!ceiling)
+                       return;
+       }
+       if (end - 1 > ceiling - 1)
                return;
-       }
-
-       pmd = __pmd = pmd_offset(pud, start);
+
+       pud = pud_offset(pgd, start);
+       pgd_clear(pgd);
+       pud_free_tlb(tlb, pud);
+}
+
+/*
+ * This function frees user-level page tables of a process.
+ *
+ * Must be called with pagetable lock held.
+ */
+void free_pgd_range(struct mmu_gather **tlb,
+                       unsigned long addr, unsigned long end,
+                       unsigned long floor, unsigned long ceiling)
+{
+       pgd_t *pgd;
+       unsigned long next;
+       unsigned long start;
+
+       /*
+        * The next few lines have given us lots of grief...
+        *
+        * Why are we testing PMD* at this top level?  Because often
+        * there will be no work to do at all, and we'd prefer not to
+        * go all the way down to the bottom just to discover that.
+        *
+        * Why all these "- 1"s?  Because 0 represents both the bottom
+        * of the address space and the top of it (using -1 for the
+        * top wouldn't help much: the masks would do the wrong thing).
+        * The rule is that addr 0 and floor 0 refer to the bottom of
+        * the address space, but end 0 and ceiling 0 refer to the top
+        * Comparisons need to use "end - 1" and "ceiling - 1" (though
+        * that end 0 case should be mythical).
+        *
+        * Wherever addr is brought up or ceiling brought down, we must
+        * be careful to reject "the opposite 0" before it confuses the
+        * subsequent tests.  But what about where end is brought down
+        * by PMD_SIZE below? no, end can't go down to 0 there.
+        *
+        * Whereas we round start (addr) and ceiling down, by different
+        * masks at different levels, in order to test whether a table
+        * now has no other vmas using it, so can be freed, we don't
+        * bother to round floor or end up - the tests don't need that.
+        */
+
+       addr &= PMD_MASK;
+       if (addr < floor) {
+               addr += PMD_SIZE;
+               if (!addr)
+                       return;
+       }
+       if (ceiling) {
+               ceiling &= PMD_MASK;
+               if (!ceiling)
+                       return;
+       }
+       if (end - 1 > ceiling - 1)
+               end -= PMD_SIZE;
+       if (addr > end - 1)
+               return;
+
+       start = addr;
+       pgd = pgd_offset((*tlb)->mm, addr);
        do {
-               next = (addr + PMD_SIZE) & PMD_MASK;
-               if (next > end || next <= addr)
-                       next = end;
-               
-               clear_pmd_range(tlb, pmd, addr, next);
-               pmd++;
-               addr = next;
-       } while (addr && (addr < end));
-
-       if (!((start | end) & ~PUD_MASK)) {
-               /* Only clear full, aligned ranges */
-               pud_clear(pud);
-               pmd_free_tlb(tlb, __pmd);
-       }
-}
-
-
-static inline void clear_pgd_range(struct mmu_gather *tlb, pgd_t *pgd, 
unsigned long start, unsigned long end)
-{
-       unsigned long addr = start, next;
-       pud_t *pud, *__pud;
-
-       if (pgd_none(*pgd))
-               return;
-       if (unlikely(pgd_bad(*pgd))) {
-               pgd_ERROR(*pgd);
-               pgd_clear(pgd);
-               return;
-       }
-
-       pud = __pud = pud_offset(pgd, start);
-       do {
-               next = (addr + PUD_SIZE) & PUD_MASK;
-               if (next > end || next <= addr)
-                       next = end;
-               
-               clear_pud_range(tlb, pud, addr, next);
-               pud++;
-               addr = next;
-       } while (addr && (addr < end));
-
-       if (!((start | end) & ~PGDIR_MASK)) {
-               /* Only clear full, aligned ranges */
-               pgd_clear(pgd);
-               pud_free_tlb(tlb, __pud);
-       }
-}
-
-/*
- * This function clears user-level page tables of a process.
- *
- * Must be called with pagetable lock held.
- */
-void clear_page_range(struct mmu_gather *tlb, unsigned long start, unsigned 
long end)
-{
-       unsigned long addr = start, next;
-       pgd_t * pgd = pgd_offset(tlb->mm, start);
-       unsigned long i;
-
-       for (i = pgd_index(start); i <= pgd_index(end-1); i++) {
-               next = (addr + PGDIR_SIZE) & PGDIR_MASK;
-               if (next > end || next <= addr)
-                       next = end;
-               
-               clear_pgd_range(tlb, pgd, addr, next);
-               pgd++;
-               addr = next;
-       }
-}
-
-pte_t fastcall * pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long 
address)
+               next = pgd_addr_end(addr, end);
+               if (pgd_none_or_clear_bad(pgd))
+                       continue;
+               free_pud_range(*tlb, pgd, addr, next, floor, ceiling);
+       } while (pgd++, addr = next, addr != end);
+
+       if (!tlb_is_full_mm(*tlb))
+               flush_tlb_pgtables((*tlb)->mm, start, end);
+}
+
+void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
+               unsigned long floor, unsigned long ceiling)
+{
+       while (vma) {
+               struct vm_area_struct *next = vma->vm_next;
+               unsigned long addr = vma->vm_start;
+
+               if (is_hugepage_only_range(vma->vm_mm, addr, HPAGE_SIZE)) {
+                       hugetlb_free_pgd_range(tlb, addr, vma->vm_end,
+                               floor, next? next->vm_start: ceiling);
+               } else {
+                       /*
+                        * Optimization: gather nearby vmas into one call down
+                        */
+                       while (next && next->vm_start <= vma->vm_end + PMD_SIZE
+                         && !is_hugepage_only_range(vma->vm_mm, next->vm_start,
+                                                       HPAGE_SIZE)) {
+                               vma = next;
+                               next = vma->vm_next;
+                       }
+                       free_pgd_range(tlb, addr, vma->vm_end,
+                               floor, next? next->vm_start: ceiling);
+               }
+               vma = next;
+       }
+}
+
+pte_t fastcall *pte_alloc_map(struct mm_struct *mm, pmd_t *pmd,
+                               unsigned long address)
 {
        if (!pmd_present(*pmd)) {
                struct page *new;
@@ -254,20 +342,7 @@
  */
 
 static inline void
-copy_swap_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, pte_t pte)
-{
-       if (pte_file(pte))
-               return;
-       swap_duplicate(pte_to_swp_entry(pte));
-       if (list_empty(&dst_mm->mmlist)) {
-               spin_lock(&mmlist_lock);
-               list_add(&dst_mm->mmlist, &src_mm->mmlist);
-               spin_unlock(&mmlist_lock);
-       }
-}
-
-static inline void
-copy_one_pte(struct mm_struct *dst_mm,  struct mm_struct *src_mm,
+copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                pte_t *dst_pte, pte_t *src_pte, unsigned long vm_flags,
                unsigned long addr)
 {
@@ -275,12 +350,21 @@
        struct page *page;
        unsigned long pfn;
 
-       /* pte contains position in swap, so copy. */
-       if (!pte_present(pte)) {
-               copy_swap_pte(dst_mm, src_mm, pte);
-               set_pte(dst_pte, pte);
+       /* pte contains position in swap or file, so copy. */
+       if (unlikely(!pte_present(pte))) {
+               if (!pte_file(pte)) {
+                       swap_duplicate(pte_to_swp_entry(pte));
+                       /* make sure dst_mm is on swapoff's mmlist. */
+                       if (unlikely(list_empty(&dst_mm->mmlist))) {
+                               spin_lock(&mmlist_lock);
+                               list_add(&dst_mm->mmlist, &src_mm->mmlist);
+                               spin_unlock(&mmlist_lock);
+                       }
+               }
+               set_pte_at(dst_mm, addr, dst_pte, pte);
                return;
        }
+
        pfn = pte_pfn(pte);
        /* the pte points outside of valid memory, the
         * mapping is assumed to be good, meaningful
@@ -292,7 +376,7 @@
                page = pfn_to_page(pfn);
 
        if (!page || PageReserved(page)) {
-               set_pte(dst_pte, pte);
+               set_pte_at(dst_mm, addr, dst_pte, pte);
                return;
        }
 
@@ -301,7 +385,7 @@
         * in the parent and the child
         */
        if ((vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE) {
-               ptep_set_wrprotect(src_pte);
+               ptep_set_wrprotect(src_mm, addr, src_pte);
                pte = *src_pte;
        }
 
@@ -313,172 +397,137 @@
                pte = pte_mkclean(pte);
        pte = pte_mkold(pte);
        get_page(page);
-       dst_mm->rss++;
+       inc_mm_counter(dst_mm, rss);
        if (PageAnon(page))
-               dst_mm->anon_rss++;
-       set_pte(dst_pte, pte);
+               inc_mm_counter(dst_mm, anon_rss);
+       set_pte_at(dst_mm, addr, dst_pte, pte);
        page_dup_rmap(page);
 }
 
-static int copy_pte_range(struct mm_struct *dst_mm,  struct mm_struct *src_mm,
+static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
                unsigned long addr, unsigned long end)
 {
        pte_t *src_pte, *dst_pte;
-       pte_t *s, *d;
        unsigned long vm_flags = vma->vm_flags;
-
-       d = dst_pte = pte_alloc_map(dst_mm, dst_pmd, addr);
+       int progress;
+
+again:
+       dst_pte = pte_alloc_map(dst_mm, dst_pmd, addr);
        if (!dst_pte)
                return -ENOMEM;
-
+       src_pte = pte_offset_map_nested(src_pmd, addr);
+
+       progress = 0;
        spin_lock(&src_mm->page_table_lock);
-       s = src_pte = pte_offset_map_nested(src_pmd, addr);
-       for (; addr < end; addr += PAGE_SIZE, s++, d++) {
-               if (pte_none(*s))
+       do {
+               /*
+                * We are holding two locks at this point - either of them
+                * could generate latencies in another task on another CPU.
+                */
+               if (progress >= 32 && (need_resched() ||
+                   need_lockbreak(&src_mm->page_table_lock) ||
+                   need_lockbreak(&dst_mm->page_table_lock)))
+                       break;
+               if (pte_none(*src_pte)) {
+                       progress++;
                        continue;
-               copy_one_pte(dst_mm, src_mm, d, s, vm_flags, addr);
-       }
-       pte_unmap_nested(src_pte);
-       pte_unmap(dst_pte);
+               }
+               copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vm_flags, addr);
+               progress += 8;
+       } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
        spin_unlock(&src_mm->page_table_lock);
+
+       pte_unmap_nested(src_pte - 1);
+       pte_unmap(dst_pte - 1);
        cond_resched_lock(&dst_mm->page_table_lock);
+       if (addr != end)
+               goto again;
        return 0;
 }
 
-static int copy_pmd_range(struct mm_struct *dst_mm,  struct mm_struct *src_mm,
+static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct 
*src_mm,
                pud_t *dst_pud, pud_t *src_pud, struct vm_area_struct *vma,
                unsigned long addr, unsigned long end)
 {
        pmd_t *src_pmd, *dst_pmd;
-       int err = 0;
        unsigned long next;
 
-       src_pmd = pmd_offset(src_pud, addr);
        dst_pmd = pmd_alloc(dst_mm, dst_pud, addr);
        if (!dst_pmd)
                return -ENOMEM;
-
-       for (; addr < end; addr = next, src_pmd++, dst_pmd++) {
-               next = (addr + PMD_SIZE) & PMD_MASK;
-               if (next > end || next <= addr)
-                       next = end;
-               if (pmd_none(*src_pmd))
+       src_pmd = pmd_offset(src_pud, addr);
+       do {
+               next = pmd_addr_end(addr, end);
+               if (pmd_none_or_clear_bad(src_pmd))
                        continue;
-               if (pmd_bad(*src_pmd)) {
-                       pmd_ERROR(*src_pmd);
-                       pmd_clear(src_pmd);
-                       continue;
-               }
-               err = copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
-                                                       vma, addr, next);
-               if (err)
-                       break;
-       }
-       return err;
-}
-
-static int copy_pud_range(struct mm_struct *dst_mm,  struct mm_struct *src_mm,
+               if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
+                                               vma, addr, next))
+                       return -ENOMEM;
+       } while (dst_pmd++, src_pmd++, addr = next, addr != end);
+       return 0;
+}
+
+static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct 
*src_mm,
                pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
                unsigned long addr, unsigned long end)
 {
        pud_t *src_pud, *dst_pud;
-       int err = 0;
        unsigned long next;
 
-       src_pud = pud_offset(src_pgd, addr);
        dst_pud = pud_alloc(dst_mm, dst_pgd, addr);
        if (!dst_pud)
                return -ENOMEM;
-
-       for (; addr < end; addr = next, src_pud++, dst_pud++) {
-               next = (addr + PUD_SIZE) & PUD_MASK;
-               if (next > end || next <= addr)
-                       next = end;
-               if (pud_none(*src_pud))
+       src_pud = pud_offset(src_pgd, addr);
+       do {
+               next = pud_addr_end(addr, end);
+               if (pud_none_or_clear_bad(src_pud))
                        continue;
-               if (pud_bad(*src_pud)) {
-                       pud_ERROR(*src_pud);
-                       pud_clear(src_pud);
+               if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
+                                               vma, addr, next))
+                       return -ENOMEM;
+       } while (dst_pud++, src_pud++, addr = next, addr != end);
+       return 0;
+}
+
+int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+               struct vm_area_struct *vma)
+{
+       pgd_t *src_pgd, *dst_pgd;
+       unsigned long next;
+       unsigned long addr = vma->vm_start;
+       unsigned long end = vma->vm_end;
+
+       if (is_vm_hugetlb_page(vma))
+               return copy_hugetlb_page_range(dst_mm, src_mm, vma);
+
+       dst_pgd = pgd_offset(dst_mm, addr);
+       src_pgd = pgd_offset(src_mm, addr);
+       do {
+               next = pgd_addr_end(addr, end);
+               if (pgd_none_or_clear_bad(src_pgd))
                        continue;
-               }
-               err = copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
-                                                       vma, addr, next);
-               if (err)
-                       break;
-       }
-       return err;
-}
-
-int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
-               struct vm_area_struct *vma)
-{
-       pgd_t *src_pgd, *dst_pgd;
-       unsigned long addr, start, end, next;
-       int err = 0;
-
-       if (is_vm_hugetlb_page(vma))
-               return copy_hugetlb_page_range(dst, src, vma);
-
-       start = vma->vm_start;
-       src_pgd = pgd_offset(src, start);
-       dst_pgd = pgd_offset(dst, start);
-
-       end = vma->vm_end;
-       addr = start;
-       while (addr && (addr < end-1)) {
-               next = (addr + PGDIR_SIZE) & PGDIR_MASK;
-               if (next > end || next <= addr)
-                       next = end;
-               if (pgd_none(*src_pgd))
-                       goto next_pgd;
-               if (pgd_bad(*src_pgd)) {
-                       pgd_ERROR(*src_pgd);
-                       pgd_clear(src_pgd);
-                       goto next_pgd;
-               }
-               err = copy_pud_range(dst, src, dst_pgd, src_pgd,
-                                                       vma, addr, next);
-               if (err)
-                       break;
-
-next_pgd:
-               src_pgd++;
-               dst_pgd++;
-               addr = next;
-       }
-
-       return err;
-}
-
-static void zap_pte_range(struct mmu_gather *tlb,
-               pmd_t *pmd, unsigned long address,
-               unsigned long size, struct zap_details *details)
-{
-       unsigned long offset;
-       pte_t *ptep;
-
-       if (pmd_none(*pmd))
-               return;
-       if (unlikely(pmd_bad(*pmd))) {
-               pmd_ERROR(*pmd);
-               pmd_clear(pmd);
-               return;
-       }
-       ptep = pte_offset_map(pmd, address);
-       offset = address & ~PMD_MASK;
-       if (offset + size > PMD_SIZE)
-               size = PMD_SIZE - offset;
-       size &= PAGE_MASK;
-       if (details && !details->check_mapping && !details->nonlinear_vma)
-               details = NULL;
-       for (offset=0; offset < size; ptep++, offset += PAGE_SIZE) {
-               pte_t pte = *ptep;
-               if (pte_none(pte))
+               if (copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd,
+                                               vma, addr, next))
+                       return -ENOMEM;
+       } while (dst_pgd++, src_pgd++, addr = next, addr != end);
+       return 0;
+}
+
+static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
+                               unsigned long addr, unsigned long end,
+                               struct zap_details *details)
+{
+       pte_t *pte;
+
+       pte = pte_offset_map(pmd, addr);
+       do {
+               pte_t ptent = *pte;
+               if (pte_none(ptent))
                        continue;
-               if (pte_present(pte)) {
+               if (pte_present(ptent)) {
                        struct page *page = NULL;
-                       unsigned long pfn = pte_pfn(pte);
+                       unsigned long pfn = pte_pfn(ptent);
                        if (pfn_valid(pfn)) {
                                page = pfn_to_page(pfn);
                                if (PageReserved(page))
@@ -502,19 +551,20 @@
                                     page->index > details->last_index))
                                        continue;
                        }
-                       pte = ptep_get_and_clear(ptep);
-                       tlb_remove_tlb_entry(tlb, ptep, address+offset);
+                       ptent = ptep_get_and_clear(tlb->mm, addr, pte);
+                       tlb_remove_tlb_entry(tlb, pte, addr);
                        if (unlikely(!page))
                                continue;
                        if (unlikely(details) && details->nonlinear_vma
                            && linear_page_index(details->nonlinear_vma,
-                                       address+offset) != page->index)
-                               set_pte(ptep, pgoff_to_pte(page->index));
-                       if (pte_dirty(pte))
+                                               addr) != page->index)
+                               set_pte_at(tlb->mm, addr, pte,
+                                          pgoff_to_pte(page->index));
+                       if (pte_dirty(ptent))
                                set_page_dirty(page);
                        if (PageAnon(page))
-                               tlb->mm->anon_rss--;
-                       else if (pte_young(pte))
+                               dec_mm_counter(tlb->mm, anon_rss);
+                       else if (pte_young(ptent))
                                mark_page_accessed(page);
                        tlb->freed++;
                        page_remove_rmap(page);
@@ -527,78 +577,64 @@
                 */
                if (unlikely(details))
                        continue;
-               if (!pte_file(pte))
-                       free_swap_and_cache(pte_to_swp_entry(pte));
-               pte_clear(ptep);
-       }
-       pte_unmap(ptep-1);
-}
-
-static void zap_pmd_range(struct mmu_gather *tlb,
-               pud_t *pud, unsigned long address,
-               unsigned long size, struct zap_details *details)
-{
-       pmd_t * pmd;
-       unsigned long end;
-
-       if (pud_none(*pud))
-               return;
-       if (unlikely(pud_bad(*pud))) {
-               pud_ERROR(*pud);
-               pud_clear(pud);
-               return;
-       }
-       pmd = pmd_offset(pud, address);
-       end = address + size;
-       if (end > ((address + PUD_SIZE) & PUD_MASK))
-               end = ((address + PUD_SIZE) & PUD_MASK);
+               if (!pte_file(ptent))
+                       free_swap_and_cache(pte_to_swp_entry(ptent));
+               pte_clear(tlb->mm, addr, pte);
+       } while (pte++, addr += PAGE_SIZE, addr != end);
+       pte_unmap(pte - 1);
+}
+
+static inline void zap_pmd_range(struct mmu_gather *tlb, pud_t *pud,
+                               unsigned long addr, unsigned long end,
+                               struct zap_details *details)
+{
+       pmd_t *pmd;
+       unsigned long next;
+
+       pmd = pmd_offset(pud, addr);
        do {
-               zap_pte_range(tlb, pmd, address, end - address, details);
-               address = (address + PMD_SIZE) & PMD_MASK; 
-               pmd++;
-       } while (address && (address < end));
-}
-
-static void zap_pud_range(struct mmu_gather *tlb,
-               pgd_t * pgd, unsigned long address,
-               unsigned long end, struct zap_details *details)
-{
-       pud_t * pud;
-
-       if (pgd_none(*pgd))
-               return;
-       if (unlikely(pgd_bad(*pgd))) {
-               pgd_ERROR(*pgd);
-               pgd_clear(pgd);
-               return;
-       }
-       pud = pud_offset(pgd, address);
+               next = pmd_addr_end(addr, end);
+               if (pmd_none_or_clear_bad(pmd))
+                       continue;
+               zap_pte_range(tlb, pmd, addr, next, details);
+       } while (pmd++, addr = next, addr != end);
+}
+
+static inline void zap_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+                               unsigned long addr, unsigned long end,
+                               struct zap_details *details)
+{
+       pud_t *pud;
+       unsigned long next;
+
+       pud = pud_offset(pgd, addr);
        do {
-               zap_pmd_range(tlb, pud, address, end - address, details);
-               address = (address + PUD_SIZE) & PUD_MASK; 
-               pud++;
-       } while (address && (address < end));
-}
-
-static void unmap_page_range(struct mmu_gather *tlb,
-               struct vm_area_struct *vma, unsigned long address,
-               unsigned long end, struct zap_details *details)
-{
+               next = pud_addr_end(addr, end);
+               if (pud_none_or_clear_bad(pud))
+                       continue;
+               zap_pmd_range(tlb, pud, addr, next, details);
+       } while (pud++, addr = next, addr != end);
+}
+
+static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct 
*vma,
+                               unsigned long addr, unsigned long end,
+                               struct zap_details *details)
+{
+       pgd_t *pgd;
        unsigned long next;
-       pgd_t *pgd;
-       int i;
-
-       BUG_ON(address >= end);
-       pgd = pgd_offset(vma->vm_mm, address);
+
+       if (details && !details->check_mapping && !details->nonlinear_vma)
+               details = NULL;
+
+       BUG_ON(addr >= end);
        tlb_start_vma(tlb, vma);
-       for (i = pgd_index(address); i <= pgd_index(end-1); i++) {
-               next = (address + PGDIR_SIZE) & PGDIR_MASK;
-               if (next <= address || next > end)
-                       next = end;
-               zap_pud_range(tlb, pgd, address, next, details);
-               address = next;
-               pgd++;
-       }
+       pgd = pgd_offset(vma->vm_mm, addr);
+       do {
+               next = pgd_addr_end(addr, end);
+               if (pgd_none_or_clear_bad(pgd))
+                       continue;
+               zap_pud_range(tlb, pgd, addr, next, details);
+       } while (pgd++, addr = next, addr != end);
        tlb_end_vma(tlb, vma);
 }
 
@@ -619,7 +655,7 @@
  * @nr_accounted: Place number of unmapped pages in vm-accountable vma's here
  * @details: details of nonlinear truncation or shared cache invalidation
  *
- * Returns the number of vma's which were covered by the unmapping.
+ * Returns the end address of the unmapping (restart addr if interrupted).
  *
  * Unmap all pages in the vma list.  Called under page_table_lock.
  *
@@ -636,7 +672,7 @@
  * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
  * drops the lock and schedules.
  */
-int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
+unsigned long unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
                struct vm_area_struct *vma, unsigned long start_addr,
                unsigned long end_addr, unsigned long *nr_accounted,
                struct zap_details *details)
@@ -644,12 +680,11 @@
        unsigned long zap_bytes = ZAP_BLOCK_SIZE;
        unsigned long tlb_start = 0;    /* For tlb_finish_mmu */
        int tlb_start_valid = 0;
-       int ret = 0;
+       unsigned long start = start_addr;
        spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
        int fullmm = tlb_is_full_mm(*tlbp);
 
        for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {
-               unsigned long start;
                unsigned long end;
 
                start = max(vma->vm_start, start_addr);
@@ -662,7 +697,6 @@
                if (vma->vm_flags & VM_ACCOUNT)
                        *nr_accounted += (end - start) >> PAGE_SHIFT;
 
-               ret++;
                while (start != end) {
                        unsigned long block;
 
@@ -693,7 +727,6 @@
                                if (i_mmap_lock) {
                                        /* must reset count of rss freed */
                                        *tlbp = tlb_gather_mmu(mm, fullmm);
-                                       details->break_addr = start;
                                        goto out;
                                }
                                spin_unlock(&mm->page_table_lock);
@@ -707,7 +740,7 @@
                }
        }
 out:
-       return ret;
+       return start;   /* which is now the end (or restart) address */
 }
 
 /**
@@ -717,7 +750,7 @@
  * @size: number of bytes to zap
  * @details: details of nonlinear truncation or shared cache invalidation
  */
-void zap_page_range(struct vm_area_struct *vma, unsigned long address,
+unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
                unsigned long size, struct zap_details *details)
 {
        struct mm_struct *mm = vma->vm_mm;
@@ -727,16 +760,16 @@
 
        if (is_vm_hugetlb_page(vma)) {
                zap_hugepage_range(vma, address, size);
-               return;
+               return end;
        }
 
        lru_add_drain();
        spin_lock(&mm->page_table_lock);
        tlb = tlb_gather_mmu(mm, 0);
-       unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details);
+       end = unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details);
        tlb_finish_mmu(tlb, address, end);
-       acct_update_integrals();
        spin_unlock(&mm->page_table_lock);
+       return end;
 }
 
 /*
@@ -987,111 +1020,78 @@
 
 EXPORT_SYMBOL(get_user_pages);
 
-static void zeromap_pte_range(pte_t * pte, unsigned long address,
-                                     unsigned long size, pgprot_t prot)
-{
-       unsigned long end;
-
-       address &= ~PMD_MASK;
-       end = address + size;
-       if (end > PMD_SIZE)
-               end = PMD_SIZE;
+static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
+                       unsigned long addr, unsigned long end, pgprot_t prot)
+{
+       pte_t *pte;
+
+       pte = pte_alloc_map(mm, pmd, addr);
+       if (!pte)
+               return -ENOMEM;
        do {
-               pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(address), 
prot));
+               pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(addr), prot));
                BUG_ON(!pte_none(*pte));
-               set_pte(pte, zero_pte);
-               address += PAGE_SIZE;
-               pte++;
-       } while (address && (address < end));
-}
-
-static inline int zeromap_pmd_range(struct mm_struct *mm, pmd_t * pmd,
-               unsigned long address, unsigned long size, pgprot_t prot)
-{
-       unsigned long base, end;
-
-       base = address & PUD_MASK;
-       address &= ~PUD_MASK;
-       end = address + size;
-       if (end > PUD_SIZE)
-               end = PUD_SIZE;
+               set_pte_at(mm, addr, pte, zero_pte);
+       } while (pte++, addr += PAGE_SIZE, addr != end);
+       pte_unmap(pte - 1);
+       return 0;
+}
+
+static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud,
+                       unsigned long addr, unsigned long end, pgprot_t prot)
+{
+       pmd_t *pmd;
+       unsigned long next;
+
+       pmd = pmd_alloc(mm, pud, addr);
+       if (!pmd)
+               return -ENOMEM;
        do {
-               pte_t * pte = pte_alloc_map(mm, pmd, base + address);
-               if (!pte)
+               next = pmd_addr_end(addr, end);
+               if (zeromap_pte_range(mm, pmd, addr, next, prot))
                        return -ENOMEM;
-               zeromap_pte_range(pte, base + address, end - address, prot);
-               pte_unmap(pte);
-               address = (address + PMD_SIZE) & PMD_MASK;
-               pmd++;
-       } while (address && (address < end));
+       } while (pmd++, addr = next, addr != end);
        return 0;
 }
 
-static inline int zeromap_pud_range(struct mm_struct *mm, pud_t * pud,
-                                   unsigned long address,
-                                    unsigned long size, pgprot_t prot)
-{
-       unsigned long base, end;
-       int error = 0;
-
-       base = address & PGDIR_MASK;
-       address &= ~PGDIR_MASK;
-       end = address + size;
-       if (end > PGDIR_SIZE)
-               end = PGDIR_SIZE;
+static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd,
+                       unsigned long addr, unsigned long end, pgprot_t prot)
+{
+       pud_t *pud;
+       unsigned long next;
+
+       pud = pud_alloc(mm, pgd, addr);
+       if (!pud)
+               return -ENOMEM;
        do {
-               pmd_t * pmd = pmd_alloc(mm, pud, base + address);
-               error = -ENOMEM;
-               if (!pmd)
+               next = pud_addr_end(addr, end);
+               if (zeromap_pmd_range(mm, pud, addr, next, prot))
+                       return -ENOMEM;
+       } while (pud++, addr = next, addr != end);
+       return 0;
+}
+
+int zeromap_page_range(struct vm_area_struct *vma,
+                       unsigned long addr, unsigned long size, pgprot_t prot)
+{
+       pgd_t *pgd;
+       unsigned long next;
+       unsigned long end = addr + size;
+       struct mm_struct *mm = vma->vm_mm;
+       int err;
+
+       BUG_ON(addr >= end);
+       pgd = pgd_offset(mm, addr);
+       flush_cache_range(vma, addr, end);
+       spin_lock(&mm->page_table_lock);
+       do {
+               next = pgd_addr_end(addr, end);
+               err = zeromap_pud_range(mm, pgd, addr, next, prot);
+               if (err)
                        break;
-               error = zeromap_pmd_range(mm, pmd, base + address,
-                                         end - address, prot);
-               if (error)
-                       break;
-               address = (address + PUD_SIZE) & PUD_MASK;
-               pud++;
-       } while (address && (address < end));
-       return 0;
-}
-
-int zeromap_page_range(struct vm_area_struct *vma, unsigned long address,
-                                       unsigned long size, pgprot_t prot)
-{
-       int i;
-       int error = 0;
-       pgd_t * pgd;
-       unsigned long beg = address;
-       unsigned long end = address + size;
-       unsigned long next;
-       struct mm_struct *mm = vma->vm_mm;
-
-       pgd = pgd_offset(mm, address);
-       flush_cache_range(vma, beg, end);
-       BUG_ON(address >= end);
-       BUG_ON(end > vma->vm_end);
-
-       spin_lock(&mm->page_table_lock);
-       for (i = pgd_index(address); i <= pgd_index(end-1); i++) {
-               pud_t *pud = pud_alloc(mm, pgd, address);
-               error = -ENOMEM;
-               if (!pud)
-                       break;
-               next = (address + PGDIR_SIZE) & PGDIR_MASK;
-               if (next <= beg || next > end)
-                       next = end;
-               error = zeromap_pud_range(mm, pud, address,
-                                               next - address, prot);
-               if (error)
-                       break;
-               address = next;
-               pgd++;
-       }
-       /*
-        * Why flush? zeromap_pte_range has a BUG_ON for !pte_none()
-        */
-       flush_tlb_range(vma, beg, end);
+       } while (pgd++, addr = next, addr != end);
        spin_unlock(&mm->page_table_lock);
-       return error;
+       return err;
 }
 
 /*
@@ -1099,95 +1099,74 @@
  * mappings are removed. any references to nonexistent pages results
  * in null mappings (currently treated as "copy-on-access")
  */
-static inline void
-remap_pte_range(pte_t * pte, unsigned long address, unsigned long size,
-               unsigned long pfn, pgprot_t prot)
-{
-       unsigned long end;
-
-       address &= ~PMD_MASK;
-       end = address + size;
-       if (end > PMD_SIZE)
-               end = PMD_SIZE;
+static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
+                       unsigned long addr, unsigned long end,
+                       unsigned long pfn, pgprot_t prot)
+{
+       pte_t *pte;
+
+       pte = pte_alloc_map(mm, pmd, addr);
+       if (!pte)
+               return -ENOMEM;
        do {
                BUG_ON(!pte_none(*pte));
                if (!pfn_valid(pfn) || PageReserved(pfn_to_page(pfn)))
-                       set_pte(pte, pfn_pte(pfn, prot));
-               address += PAGE_SIZE;
+                       set_pte_at(mm, addr, pte, pfn_pte(pfn, prot));
                pfn++;
-               pte++;
-       } while (address && (address < end));
-}
-
-static inline int
-remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address,
-               unsigned long size, unsigned long pfn, pgprot_t prot)
-{
-       unsigned long base, end;
-
-       base = address & PUD_MASK;
-       address &= ~PUD_MASK;
-       end = address + size;
-       if (end > PUD_SIZE)
-               end = PUD_SIZE;
-       pfn -= (address >> PAGE_SHIFT);
+       } while (pte++, addr += PAGE_SIZE, addr != end);
+       pte_unmap(pte - 1);
+       return 0;
+}
+
+static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
+                       unsigned long addr, unsigned long end,
+                       unsigned long pfn, pgprot_t prot)
+{
+       pmd_t *pmd;
+       unsigned long next;
+
+       pfn -= addr >> PAGE_SHIFT;
+       pmd = pmd_alloc(mm, pud, addr);
+       if (!pmd)
+               return -ENOMEM;
        do {
-               pte_t * pte = pte_alloc_map(mm, pmd, base + address);
-               if (!pte)
+               next = pmd_addr_end(addr, end);
+               if (remap_pte_range(mm, pmd, addr, next,
+                               pfn + (addr >> PAGE_SHIFT), prot))
                        return -ENOMEM;
-               remap_pte_range(pte, base + address, end - address,
-                               (address >> PAGE_SHIFT) + pfn, prot);
-               pte_unmap(pte);
-               address = (address + PMD_SIZE) & PMD_MASK;
-               pmd++;
-       } while (address && (address < end));
+       } while (pmd++, addr = next, addr != end);
        return 0;
 }
 
-static inline int remap_pud_range(struct mm_struct *mm, pud_t * pud,
-                                 unsigned long address, unsigned long size,
-                                 unsigned long pfn, pgprot_t prot)
-{
-       unsigned long base, end;
-       int error;
-
-       base = address & PGDIR_MASK;
-       address &= ~PGDIR_MASK;
-       end = address + size;
-       if (end > PGDIR_SIZE)
-               end = PGDIR_SIZE;
-       pfn -= address >> PAGE_SHIFT;
+static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
+                       unsigned long addr, unsigned long end,
+                       unsigned long pfn, pgprot_t prot)
+{
+       pud_t *pud;
+       unsigned long next;
+
+       pfn -= addr >> PAGE_SHIFT;
+       pud = pud_alloc(mm, pgd, addr);
+       if (!pud)
+               return -ENOMEM;
        do {
-               pmd_t *pmd = pmd_alloc(mm, pud, base+address);
-               error = -ENOMEM;
-               if (!pmd)
-                       break;
-               error = remap_pmd_range(mm, pmd, base + address, end - address,
-                               (address >> PAGE_SHIFT) + pfn, prot);
-               if (error)
-                       break;
-               address = (address + PUD_SIZE) & PUD_MASK;
-               pud++;
-       } while (address && (address < end));
-       return error;
+               next = pud_addr_end(addr, end);
+               if (remap_pmd_range(mm, pud, addr, next,
+                               pfn + (addr >> PAGE_SHIFT), prot))
+                       return -ENOMEM;
+       } while (pud++, addr = next, addr != end);
+       return 0;
 }
 
 /*  Note: this is only safe if the mm semaphore is held when called. */
-int remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
+int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
                    unsigned long pfn, unsigned long size, pgprot_t prot)
 {
-       int error = 0;
        pgd_t *pgd;
-       unsigned long beg = from;
-       unsigned long end = from + size;
        unsigned long next;
+       unsigned long end = addr + size;
        struct mm_struct *mm = vma->vm_mm;
-       int i;
-
-       pfn -= from >> PAGE_SHIFT;
-       pgd = pgd_offset(mm, from);
-       flush_cache_range(vma, beg, end);
-       BUG_ON(from >= end);
+       int err;
 
        /*
         * Physically remapped pages are special. Tell the
@@ -1199,31 +1178,21 @@
         */
        vma->vm_flags |= VM_IO | VM_RESERVED;
 
+       BUG_ON(addr >= end);
+       pfn -= addr >> PAGE_SHIFT;
+       pgd = pgd_offset(mm, addr);
+       flush_cache_range(vma, addr, end);
        spin_lock(&mm->page_table_lock);
-       for (i = pgd_index(beg); i <= pgd_index(end-1); i++) {
-               pud_t *pud = pud_alloc(mm, pgd, from);
-               error = -ENOMEM;
-               if (!pud)
+       do {
+               next = pgd_addr_end(addr, end);
+               err = remap_pud_range(mm, pgd, addr, next,
+                               pfn + (addr >> PAGE_SHIFT), prot);
+               if (err)
                        break;
-               next = (from + PGDIR_SIZE) & PGDIR_MASK;
-               if (next > end || next <= from)
-                       next = end;
-               error = remap_pud_range(mm, pud, from, end - from,
-                                       pfn + (from >> PAGE_SHIFT), prot);
-               if (error)
-                       break;
-               from = next;
-               pgd++;
-       }
-       /*
-        * Why flush? remap_pte_range has a BUG_ON for !pte_none()
-        */
-       flush_tlb_range(vma, beg, end);
+       } while (pgd++, addr = next, addr != end);
        spin_unlock(&mm->page_table_lock);
-
-       return error;
-}
-
+       return err;
+}
 EXPORT_SYMBOL(remap_pfn_range);
 
 /*
@@ -1247,11 +1216,11 @@
 {
        pte_t entry;
 
-       flush_cache_page(vma, address);
        entry = maybe_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot)),
                              vma);
        ptep_establish(vma, address, page_table, entry);
        update_mmu_cache(vma, address, entry);
+       lazy_mmu_prot_update(entry);
 }
 
 /*
@@ -1299,11 +1268,12 @@
                int reuse = can_share_swap_page(old_page);
                unlock_page(old_page);
                if (reuse) {
-                       flush_cache_page(vma, address);
+                       flush_cache_page(vma, address, pfn);
                        entry = maybe_mkwrite(pte_mkyoung(pte_mkdirty(pte)),
                                              vma);
                        ptep_set_access_flags(vma, address, page_table, entry, 
1);
                        update_mmu_cache(vma, address, entry);
+                       lazy_mmu_prot_update(entry);
                        pte_unmap(page_table);
                        spin_unlock(&mm->page_table_lock);
                        return VM_FAULT_MINOR;
@@ -1337,13 +1307,12 @@
        page_table = pte_offset_map(pmd, address);
        if (likely(pte_same(*page_table, pte))) {
                if (PageAnon(old_page))
-                       mm->anon_rss--;
-               if (PageReserved(old_page)) {
-                       ++mm->rss;
-                       acct_update_integrals();
-                       update_mem_hiwater();
-               } else
+                       dec_mm_counter(mm, anon_rss);
+               if (PageReserved(old_page))
+                       inc_mm_counter(mm, rss);
+               else
                        page_remove_rmap(old_page);
+               flush_cache_page(vma, address, pfn);
                break_cow(vma, new_page, address, page_table);
                lru_cache_add_active(new_page);
                page_add_anon_rmap(new_page, vma, address);
@@ -1387,7 +1356,7 @@
  * i_mmap_lock.
  *
  * In order to make forward progress despite repeatedly restarting some
- * large vma, note the break_addr set by unmap_vmas when it breaks out:
+ * large vma, note the restart_addr from unmap_vmas when it breaks out:
  * and restart from that address when we reach that vma again.  It might
  * have been split or merged, shrunk or extended, but never shifted: so
  * restart_addr remains valid so long as it remains in the vma's range.
@@ -1425,8 +1394,8 @@
                }
        }
 
-       details->break_addr = end_addr;
-       zap_page_range(vma, start_addr, end_addr - start_addr, details);
+       restart_addr = zap_page_range(vma, start_addr,
+                                       end_addr - start_addr, details);
 
        /*
         * We cannot rely on the break test in unmap_vmas:
@@ -1437,14 +1406,14 @@
        need_break = need_resched() ||
                        need_lockbreak(details->i_mmap_lock);
 
-       if (details->break_addr >= end_addr) {
+       if (restart_addr >= end_addr) {
                /* We have now completed this vma: mark it so */
                vma->vm_truncate_count = details->truncate_count;
                if (!need_break)
                        return 0;
        } else {
                /* Note restart_addr in vma's truncate_count field */
-               vma->vm_truncate_count = details->break_addr;
+               vma->vm_truncate_count = restart_addr;
                if (!need_break)
                        goto again;
        }
@@ -1732,12 +1701,13 @@
        spin_lock(&mm->page_table_lock);
        page_table = pte_offset_map(pmd, address);
        if (unlikely(!pte_same(*page_table, orig_pte))) {
-               pte_unmap(page_table);
-               spin_unlock(&mm->page_table_lock);
-               unlock_page(page);
-               page_cache_release(page);
                ret = VM_FAULT_MINOR;
-               goto out;
+               goto out_nomap;
+       }
+
+       if (unlikely(!PageUptodate(page))) {
+               ret = VM_FAULT_SIGBUS;
+               goto out_nomap;
        }
 
        /* The page isn't present yet, go ahead with the fault. */
@@ -1746,10 +1716,7 @@
        if (vm_swap_full())
                remove_exclusive_swap_page(page);
 
-       mm->rss++;
-       acct_update_integrals();
-       update_mem_hiwater();
-
+       inc_mm_counter(mm, rss);
        pte = mk_pte(page, vma->vm_page_prot);
        if (write_access && can_share_swap_page(page)) {
                pte = maybe_mkwrite(pte_mkdirty(pte), vma);
@@ -1758,7 +1725,7 @@
        unlock_page(page);
 
        flush_icache_page(vma, page);
-       set_pte(page_table, pte);
+       set_pte_at(mm, address, page_table, pte);
        page_add_anon_rmap(page, vma, address);
 
        if (write_access) {
@@ -1770,10 +1737,17 @@
 
        /* No need to invalidate - it was non-present before */
        update_mmu_cache(vma, address, pte);
+       lazy_mmu_prot_update(pte);
        pte_unmap(page_table);
        spin_unlock(&mm->page_table_lock);
 out:
        return ret;
+out_nomap:
+       pte_unmap(page_table);
+       spin_unlock(&mm->page_table_lock);
+       unlock_page(page);
+       page_cache_release(page);
+       goto out;
 }
 
 /*
@@ -1813,9 +1787,7 @@
                        spin_unlock(&mm->page_table_lock);
                        goto out;
                }
-               mm->rss++;
-               acct_update_integrals();
-               update_mem_hiwater();
+               inc_mm_counter(mm, rss);
                entry = maybe_mkwrite(pte_mkdirty(mk_pte(page,
                                                         vma->vm_page_prot)),
                                      vma);
@@ -1824,11 +1796,12 @@
                page_add_anon_rmap(page, vma, addr);
        }
 
-       ptep_establish_new(vma, addr, page_table, entry);
+       set_pte_at(mm, addr, page_table, entry);
        pte_unmap(page_table);
 
        /* No need to invalidate - it was non-present before */
        update_mmu_cache(vma, addr, entry);
+       lazy_mmu_prot_update(entry);
        spin_unlock(&mm->page_table_lock);
 out:
        return VM_FAULT_MINOR;
@@ -1931,15 +1904,13 @@
        /* Only go through if we didn't race with anybody else... */
        if (pte_none(*page_table)) {
                if (!PageReserved(new_page))
-                       ++mm->rss;
-               acct_update_integrals();
-               update_mem_hiwater();
+                       inc_mm_counter(mm, rss);
 
                flush_icache_page(vma, new_page);
                entry = mk_pte(new_page, vma->vm_page_prot);
                if (write_access)
                        entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-               ptep_establish_new(vma, address, page_table, entry);
+               set_pte_at(mm, address, page_table, entry);
                if (anon) {
                        lru_cache_add_active(new_page);
                        page_add_anon_rmap(new_page, vma, address);
@@ -1956,6 +1927,7 @@
 
        /* no need to invalidate: a not-present page shouldn't be cached */
        update_mmu_cache(vma, address, entry);
+       lazy_mmu_prot_update(entry);
        spin_unlock(&mm->page_table_lock);
 out:
        return ret;
@@ -1983,7 +1955,7 @@
         */
        if (!vma->vm_ops || !vma->vm_ops->populate || 
                        (write_access && !(vma->vm_flags & VM_SHARED))) {
-               pte_clear(pte);
+               pte_clear(mm, address, pte);
                return do_no_page(mm, vma, address, write_access, pte, pmd);
        }
 
@@ -2050,6 +2022,7 @@
        entry = pte_mkyoung(entry);
        ptep_set_access_flags(vma, address, pte, entry, write_access);
        update_mmu_cache(vma, address, entry);
+       lazy_mmu_prot_update(entry);
        pte_unmap(pte);
        spin_unlock(&mm->page_table_lock);
        return VM_FAULT_MINOR;
@@ -2099,15 +2072,12 @@
        return VM_FAULT_OOM;
 }
 
-#ifndef __ARCH_HAS_4LEVEL_HACK
+#ifndef __PAGETABLE_PUD_FOLDED
 /*
  * Allocate page upper directory.
  *
  * We've already handled the fast-path in-line, and we own the
  * page table lock.
- *
- * On a two-level or three-level page table, this ends up actually being
- * entirely optimized away.
  */
 pud_t fastcall *__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long 
address)
 {
@@ -2131,15 +2101,14 @@
  out:
        return pud_offset(pgd, address);
 }
-
+#endif /* __PAGETABLE_PUD_FOLDED */
+
+#ifndef __PAGETABLE_PMD_FOLDED
 /*
  * Allocate page middle directory.
  *
  * We've already handled the fast-path in-line, and we own the
  * page table lock.
- *
- * On a two-level page table, this ends up actually being entirely
- * optimized away.
  */
 pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long 
address)
 {
@@ -2155,38 +2124,24 @@
         * Because we dropped the lock, we should re-check the
         * entry, as somebody else could have populated it..
         */
+#ifndef __ARCH_HAS_4LEVEL_HACK
        if (pud_present(*pud)) {
                pmd_free(new);
                goto out;
        }
        pud_populate(mm, pud, new);
- out:
-       return pmd_offset(pud, address);
-}
 #else
-pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long 
address)
-{
-       pmd_t *new;
-
-       spin_unlock(&mm->page_table_lock);
-       new = pmd_alloc_one(mm, address);
-       spin_lock(&mm->page_table_lock);
-       if (!new)
-               return NULL;
-
-       /*
-        * Because we dropped the lock, we should re-check the
-        * entry, as somebody else could have populated it..
-        */
        if (pgd_present(*pud)) {
                pmd_free(new);
                goto out;
        }
        pgd_populate(mm, pud, new);
-out:
+#endif /* __ARCH_HAS_4LEVEL_HACK */
+
+ out:
        return pmd_offset(pud, address);
 }
-#endif
+#endif /* __PAGETABLE_PMD_FOLDED */
 
 int make_pages_present(unsigned long addr, unsigned long end)
 {
@@ -2253,13 +2208,13 @@
  * update_mem_hiwater
  *     - update per process rss and vm high water data
  */
-void update_mem_hiwater(void)
-{
-       struct task_struct *tsk = current;
-
+void update_mem_hiwater(struct task_struct *tsk)
+{
        if (tsk->mm) {
-               if (tsk->mm->hiwater_rss < tsk->mm->rss)
-                       tsk->mm->hiwater_rss = tsk->mm->rss;
+               unsigned long rss = get_mm_counter(tsk->mm, rss);
+
+               if (tsk->mm->hiwater_rss < rss)
+                       tsk->mm->hiwater_rss = rss;
                if (tsk->mm->hiwater_vm < tsk->mm->total_vm)
                        tsk->mm->hiwater_vm = tsk->mm->total_vm;
        }
diff -r fa660d79f695 -r de310533c483 linux-2.6-xen-sparse/mm/page_alloc.c
--- a/linux-2.6-xen-sparse/mm/page_alloc.c      Tue Aug  9 15:17:45 2005
+++ b/linux-2.6-xen-sparse/mm/page_alloc.c      Tue Aug  9 23:57:17 2005
@@ -31,19 +31,26 @@
 #include <linux/topology.h>
 #include <linux/sysctl.h>
 #include <linux/cpu.h>
+#include <linux/cpuset.h>
 #include <linux/nodemask.h>
 #include <linux/vmalloc.h>
 
 #include <asm/tlbflush.h>
 #include "internal.h"
 
-/* MCD - HACK: Find somewhere to initialize this EARLY, or make this 
initializer cleaner */
+/*
+ * MCD - HACK: Find somewhere to initialize this EARLY, or make this
+ * initializer cleaner
+ */
 nodemask_t node_online_map = { { [0] = 1UL } };
+EXPORT_SYMBOL(node_online_map);
 nodemask_t node_possible_map = NODE_MASK_ALL;
+EXPORT_SYMBOL(node_possible_map);
 struct pglist_data *pgdat_list;
 unsigned long totalram_pages;
 unsigned long totalhigh_pages;
 long nr_swap_pages;
+
 /*
  * results with 256, 32 in the lowmem_reserve sysctl:
  *     1G machine -> (16M dma, 800M-16M normal, 1G-800M high)
@@ -188,6 +195,37 @@
 {
        __ClearPagePrivate(page);
        page->private = 0;
+}
+
+/*
+ * Locate the struct page for both the matching buddy in our
+ * pair (buddy1) and the combined O(n+1) page they form (page).
+ *
+ * 1) Any buddy B1 will have an order O twin B2 which satisfies
+ * the following equation:
+ *     B2 = B1 ^ (1 << O)
+ * For example, if the starting buddy (buddy2) is #8 its order
+ * 1 buddy is #10:
+ *     B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10
+ *
+ * 2) Any buddy B will have an order O+1 parent P which
+ * satisfies the following equation:
+ *     P = B & ~(1 << O)
+ *
+ * Assumption: *_mem_map is contigious at least up to MAX_ORDER
+ */
+static inline struct page *
+__page_find_buddy(struct page *page, unsigned long page_idx, unsigned int 
order)
+{
+       unsigned long buddy_idx = page_idx ^ (1 << order);
+
+       return page + (buddy_idx - page_idx);
+}
+
+static inline unsigned long
+__find_combined_index(unsigned long page_idx, unsigned int order)
+{
+       return (page_idx & ~(1 << order));
 }
 
 /*
@@ -233,50 +271,49 @@
  * -- wli
  */
 
-static inline void __free_pages_bulk (struct page *page, struct page *base,
+static inline void __free_pages_bulk (struct page *page,
                struct zone *zone, unsigned int order)
 {
        unsigned long page_idx;
-       struct page *coalesced;
        int order_size = 1 << order;
 
        if (unlikely(order))
                destroy_compound_page(page, order);
 
-       page_idx = page - base;
+       page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
 
        BUG_ON(page_idx & (order_size - 1));
        BUG_ON(bad_range(zone, page));
 
        zone->free_pages += order_size;
        while (order < MAX_ORDER-1) {
+               unsigned long combined_idx;
                struct free_area *area;
                struct page *buddy;
-               int buddy_idx;
-
-               buddy_idx = (page_idx ^ (1 << order));
-               buddy = base + buddy_idx;
+
+               combined_idx = __find_combined_index(page_idx, order);
+               buddy = __page_find_buddy(page, page_idx, order);
+
                if (bad_range(zone, buddy))
                        break;
                if (!page_is_buddy(buddy, order))
-                       break;
-               /* Move the buddy up one level. */
+                       break;          /* Move the buddy up one level. */
                list_del(&buddy->lru);
                area = zone->free_area + order;
                area->nr_free--;
                rmv_page_order(buddy);
-               page_idx &= buddy_idx;
+               page = page + (combined_idx - page_idx);
+               page_idx = combined_idx;
                order++;
        }
-       coalesced = base + page_idx;
-       set_page_order(coalesced, order);
-       list_add(&coalesced->lru, &zone->free_area[order].free_list);
+       set_page_order(page, order);
+       list_add(&page->lru, &zone->free_area[order].free_list);
        zone->free_area[order].nr_free++;
 }
 
 static inline void free_pages_check(const char *function, struct page *page)
 {
-       if (    page_mapped(page) ||
+       if (    page_mapcount(page) ||
                page->mapping != NULL ||
                page_count(page) != 0 ||
                (page->flags & (
@@ -309,10 +346,9 @@
                struct list_head *list, unsigned int order)
 {
        unsigned long flags;
-       struct page *base, *page = NULL;
+       struct page *page = NULL;
        int ret = 0;
 
-       base = zone->zone_mem_map;
        spin_lock_irqsave(&zone->lock, flags);
        zone->all_unreclaimable = 0;
        zone->pages_scanned = 0;
@@ -320,7 +356,7 @@
                page = list_entry(list->prev, struct page, lru);
                /* have to delete it as __free_pages_bulk list manipulates */
                list_del(&page->lru);
-               __free_pages_bulk(page, base, zone, order);
+               __free_pages_bulk(page, zone, order);
                ret++;
        }
        spin_unlock_irqrestore(&zone->lock, flags);
@@ -405,7 +441,7 @@
  */
 static void prep_new_page(struct page *page, int order)
 {
-       if (page->mapping || page_mapped(page) ||
+       if (page->mapping || page_mapcount(page) ||
            (page->flags & (
                        1 << PG_private |
                        1 << PG_locked  |
@@ -601,7 +637,7 @@
        free_hot_cold_page(page, 1);
 }
 
-static inline void prep_zero_page(struct page *page, int order, int gfp_flags)
+static inline void prep_zero_page(struct page *page, int order, unsigned int 
__nocast gfp_flags)
 {
        int i;
 
@@ -616,7 +652,7 @@
  * or two.
  */
 static struct page *
-buffered_rmqueue(struct zone *zone, int order, int gfp_flags)
+buffered_rmqueue(struct zone *zone, int order, unsigned int __nocast gfp_flags)
 {
        unsigned long flags;
        struct page *page = NULL;
@@ -694,7 +730,7 @@
  * This is the 'heart' of the zoned buddy allocator.
  */
 struct page * fastcall
-__alloc_pages(unsigned int gfp_mask, unsigned int order,
+__alloc_pages(unsigned int __nocast gfp_mask, unsigned int order,
                struct zonelist *zonelist)
 {
        const int wait = gfp_mask & __GFP_WAIT;
@@ -734,6 +770,9 @@
                                       classzone_idx, 0, 0))
                        continue;
 
+               if (!cpuset_zone_allowed(z))
+                       continue;
+
                page = buffered_rmqueue(z, order, gfp_mask);
                if (page)
                        goto got_pg;
@@ -745,6 +784,9 @@
        /*
         * Go through the zonelist again. Let __GFP_HIGH and allocations
         * coming from realtime tasks to go deeper into reserves
+        *
+        * This is the last chance, in general, before the goto nopage.
+        * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
         */
        for (i = 0; (z = zones[i]) != NULL; i++) {
                if (!zone_watermark_ok(z, order, z->pages_min,
@@ -752,18 +794,27 @@
                                       gfp_mask & __GFP_HIGH))
                        continue;
 
+               if (wait && !cpuset_zone_allowed(z))
+                       continue;
+
                page = buffered_rmqueue(z, order, gfp_mask);
                if (page)
                        goto got_pg;
        }
 
        /* This allocation should allow future memory freeing. */
-       if (((p->flags & PF_MEMALLOC) || 
unlikely(test_thread_flag(TIF_MEMDIE))) && !in_interrupt()) {
-               /* go through the zonelist yet again, ignoring mins */
-               for (i = 0; (z = zones[i]) != NULL; i++) {
-                       page = buffered_rmqueue(z, order, gfp_mask);
-                       if (page)
-                               goto got_pg;
+
+       if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE)))
+                       && !in_interrupt()) {
+               if (!(gfp_mask & __GFP_NOMEMALLOC)) {
+                       /* go through the zonelist yet again, ignoring mins */
+                       for (i = 0; (z = zones[i]) != NULL; i++) {
+                               if (!cpuset_zone_allowed(z))
+                                       continue;
+                               page = buffered_rmqueue(z, order, gfp_mask);
+                               if (page)
+                                       goto got_pg;
+                       }
                }
                goto nopage;
        }
@@ -798,6 +849,9 @@
                        if (!zone_watermark_ok(z, order, z->pages_min,
                                               classzone_idx, can_try_harder,
                                               gfp_mask & __GFP_HIGH))
+                               continue;
+
+                       if (!cpuset_zone_allowed(z))
                                continue;
 
                        page = buffered_rmqueue(z, order, gfp_mask);
@@ -816,6 +870,9 @@
                                               classzone_idx, 0, 0))
                                continue;
 
+                       if (!cpuset_zone_allowed(z))
+                               continue;
+
                        page = buffered_rmqueue(z, order, gfp_mask);
                        if (page)
                                goto got_pg;
@@ -862,7 +919,7 @@
 /*
  * Common helper functions.
  */
-fastcall unsigned long __get_free_pages(unsigned int gfp_mask, unsigned int 
order)
+fastcall unsigned long __get_free_pages(unsigned int __nocast gfp_mask, 
unsigned int order)
 {
        struct page * page;
        page = alloc_pages(gfp_mask, order);
@@ -873,7 +930,7 @@
 
 EXPORT_SYMBOL(__get_free_pages);
 
-fastcall unsigned long get_zeroed_page(unsigned int gfp_mask)
+fastcall unsigned long get_zeroed_page(unsigned int __nocast gfp_mask)
 {
        struct page * page;
 
@@ -1302,8 +1359,7 @@
 #define MAX_NODE_LOAD (num_online_nodes())
 static int __initdata node_load[MAX_NUMNODES];
 /**
- * find_next_best_node - find the next node that should appear in a given
- *    node's fallback list
+ * find_next_best_node - find the next node that should appear in a given 
node's fallback list
  * @node: node whose fallback list we're appending
  * @used_node_mask: nodemask_t of already used nodes
  *
@@ -1372,7 +1428,6 @@
        /* initialize zonelists */
        for (i = 0; i < GFP_ZONETYPES; i++) {
                zonelist = pgdat->node_zonelists + i;
-               memset(zonelist, 0, sizeof(*zonelist));
                zonelist->zones[0] = NULL;
        }
 
@@ -1419,7 +1474,6 @@
                struct zonelist *zonelist;
 
                zonelist = pgdat->node_zonelists + i;
-               memset(zonelist, 0, sizeof(*zonelist));
 
                j = 0;
                k = ZONE_NORMAL;
@@ -1461,6 +1515,7 @@
        for_each_online_node(i)
                build_zonelists(NODE_DATA(i));
        printk("Built %i zonelists\n", num_online_nodes());
+       cpuset_init_current_mems_allowed();
 }
 
 /*
@@ -1622,6 +1677,18 @@
                batch /= 4;             /* We effectively *= 4 below */
                if (batch < 1)
                        batch = 1;
+
+               /*
+                * Clamp the batch to a 2^n - 1 value. Having a power
+                * of 2 value was found to be more likely to have
+                * suboptimal cache aliasing properties in some cases.
+                *
+                * For example if 2 tasks are alternately allocating
+                * batches of pages, one task can end up with a lot
+                * of pages of one half of the possible page colors
+                * and the other with pages of the other colors.
+                */
+               batch = (1 << fls(batch + batch/2)) - 1;
 
                for (cpu = 0; cpu < NR_CPUS; cpu++) {
                        struct per_cpu_pages *pcp;
@@ -1681,14 +1748,25 @@
        }
 }
 
-void __init node_alloc_mem_map(struct pglist_data *pgdat)
+static void __init alloc_node_mem_map(struct pglist_data *pgdat)
 {
        unsigned long size;
 
-       size = (pgdat->node_spanned_pages + 1) * sizeof(struct page);
-       pgdat->node_mem_map = alloc_bootmem_node(pgdat, size);
+       /* Skip empty nodes */
+       if (!pgdat->node_spanned_pages)
+               return;
+
+       /* ia64 gets its own node_mem_map, before this, without bootmem */
+       if (!pgdat->node_mem_map) {
+               size = (pgdat->node_spanned_pages + 1) * sizeof(struct page);
+               pgdat->node_mem_map = alloc_bootmem_node(pgdat, size);
+       }
 #ifndef CONFIG_DISCONTIGMEM
-       mem_map = contig_page_data.node_mem_map;
+       /*
+        * With no DISCONTIG, the global mem_map is just set as node 0's
+        */
+       if (pgdat == NODE_DATA(0))
+               mem_map = NODE_DATA(0)->node_mem_map;
 #endif
 }
 
@@ -1700,8 +1778,7 @@
        pgdat->node_start_pfn = node_start_pfn;
        calculate_zone_totalpages(pgdat, zones_size, zholes_size);
 
-       if (!pfn_to_page(node_start_pfn))
-               node_alloc_mem_map(pgdat);
+       alloc_node_mem_map(pgdat);
 
        free_area_init_core(pgdat, zones_size, zholes_size);
 }
@@ -1823,6 +1900,7 @@
        "allocstall",
 
        "pgrotated",
+       "nr_bounce",
 };
 
 static void *vmstat_start(struct seq_file *m, loff_t *pos)
@@ -1926,15 +2004,20 @@
 
        for_each_pgdat(pgdat) {
                for (j = 0; j < MAX_NR_ZONES; j++) {
-                       struct zone * zone = pgdat->node_zones + j;
+                       struct zone *zone = pgdat->node_zones + j;
                        unsigned long present_pages = zone->present_pages;
 
                        zone->lowmem_reserve[j] = 0;
 
                        for (idx = j-1; idx >= 0; idx--) {
-                               struct zone * lower_zone = pgdat->node_zones + 
idx;
-
-                               lower_zone->lowmem_reserve[j] = present_pages / 
sysctl_lowmem_reserve_ratio[idx];
+                               struct zone *lower_zone;
+
+                               if (sysctl_lowmem_reserve_ratio[idx] < 1)
+                                       sysctl_lowmem_reserve_ratio[idx] = 1;
+
+                               lower_zone = pgdat->node_zones + idx;
+                               lower_zone->lowmem_reserve[j] = present_pages /
+                                       sysctl_lowmem_reserve_ratio[idx];
                                present_pages += lower_zone->present_pages;
                        }
                }
@@ -2041,7 +2124,7 @@
  *     changes.
  */
 int min_free_kbytes_sysctl_handler(ctl_table *table, int write, 
-               struct file *file, void __user *buffer, size_t *length, loff_t 
*ppos)
+       struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
 {
        proc_dointvec(table, write, file, buffer, length, ppos);
        setup_per_zone_pages_min();
@@ -2058,7 +2141,7 @@
  * if in function of the boot time zone sizes.
  */
 int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
-                struct file *file, void __user *buffer, size_t *length, loff_t 
*ppos)
+       struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
 {
        proc_dointvec_minmax(table, write, file, buffer, length, ppos);
        setup_per_zone_lowmem_reserve();

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.