[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Xen-changelog] [linux-2.6.18-xen] Imported linux-2.6-xen-sparse from xen-unstable.hg 15200:bd3d6b4c52ec

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
From: "Xen patchbot-linux-2.6.18-xen" <patchbot-linux-2.6.18-xen@xxxxxxxxxxxxxxxxxxx>
Date: Mon, 11 Jun 2007 02:23:05 -0700
Delivery-date: Tue, 12 Jun 2007 05:08:57 -0700
List-id: BK change log <xen-changelog.lists.xensource.com>
# HG changeset patch
# User Ian Campbell <ian.campbell@xxxxxxxxxxxxx>
# Date 1180947928 -3600
# Node ID a533be77c572209cd133734bab614a102a61ab75
# Parent  42f6970e18c9d339a78789e1a3e50b4cb948d99a
Imported linux-2.6-xen-sparse from xen-unstable.hg 15200:bd3d6b4c52ec
---
 arch/i386/Kconfig                                   |   85 
 arch/i386/Kconfig.cpu                               |    6 
 arch/i386/Kconfig.debug                             |    1 
 arch/i386/Makefile                                  |   24 
 arch/i386/boot-xen/Makefile                         |   19 
 arch/i386/kernel/Makefile                           |   22 
 arch/i386/kernel/acpi/Makefile                      |    4 
 arch/i386/kernel/acpi/boot-xen.c                    | 1168 ++++++++
 arch/i386/kernel/apic-xen.c                         |  155 +
 arch/i386/kernel/asm-offsets.c                      |    7 
 arch/i386/kernel/cpu/Makefile                       |    5 
 arch/i386/kernel/cpu/common-xen.c                   |  743 +++++
 arch/i386/kernel/cpu/mtrr/Makefile                  |    7 
 arch/i386/kernel/cpu/mtrr/main-xen.c                |  197 +
 arch/i386/kernel/crash.c                            |    4 
 arch/i386/kernel/early_printk-xen.c                 |    2 
 arch/i386/kernel/entry-xen.S                        | 1216 ++++++++
 arch/i386/kernel/fixup.c                            |   88 
 arch/i386/kernel/head-xen.S                         |  207 +
 arch/i386/kernel/init_task-xen.c                    |   51 
 arch/i386/kernel/io_apic-xen.c                      | 2777 ++++++++++++++++++++
 arch/i386/kernel/ioport-xen.c                       |  122 
 arch/i386/kernel/irq-xen.c                          |  324 ++
 arch/i386/kernel/ldt-xen.c                          |  270 +
 arch/i386/kernel/machine_kexec.c                    |   40 
 arch/i386/kernel/microcode-xen.c                    |  144 +
 arch/i386/kernel/mpparse-xen.c                      | 1185 ++++++++
 arch/i386/kernel/pci-dma-xen.c                      |  378 ++
 arch/i386/kernel/process-xen.c                      |  853 ++++++
 arch/i386/kernel/quirks-xen.c                       |   47 
 arch/i386/kernel/setup-xen.c                        | 1898 +++++++++++++
 arch/i386/kernel/smp-xen.c                          |  624 ++++
 arch/i386/kernel/swiotlb.c                          |  726 +++++
 arch/i386/kernel/sysenter.c                         |   18 
 arch/i386/kernel/time-xen.c                         | 1143 ++++++++
 arch/i386/kernel/traps-xen.c                        | 1186 ++++++++
 arch/i386/kernel/vm86.c                             |   12 
 arch/i386/kernel/vsyscall-note-xen.S                |   32 
 arch/i386/mach-xen/Makefile                         |    5 
 arch/i386/mach-xen/setup.c                          |  147 +
 arch/i386/mm/Makefile                               |    8 
 arch/i386/mm/fault-xen.c                            |  769 +++++
 arch/i386/mm/highmem-xen.c                          |  136 
 arch/i386/mm/hypervisor.c                           |  432 +++
 arch/i386/mm/init-xen.c                             |  850 ++++++
 arch/i386/mm/ioremap-xen.c                          |  443 +++
 arch/i386/mm/pgtable-xen.c                          |  727 +++++
 arch/i386/oprofile/Makefile                         |    7 
 arch/i386/oprofile/xenoprof.c                       |  179 +
 arch/i386/pci/Makefile                              |    9 
 arch/i386/pci/irq-xen.c                             | 1205 ++++++++
 arch/i386/pci/pcifront.c                            |   55 
 arch/i386/power/Makefile                            |    4 
 arch/ia64/Kconfig                                   |   57 
 arch/ia64/Makefile                                  |   12 
 arch/ia64/hp/common/sba_iommu.c                     |   61 
 arch/ia64/kernel/acpi.c                             |    6 
 arch/ia64/kernel/asm-offsets.c                      |   25 
 arch/ia64/kernel/entry.S                            |   36 
 arch/ia64/kernel/fsys.S                             |   41 
 arch/ia64/kernel/gate.S                             |  103 
 arch/ia64/kernel/gate.lds.S                         |   14 
 arch/ia64/kernel/head.S                             |    6 
 arch/ia64/kernel/iosapic.c                          |   75 
 arch/ia64/kernel/irq_ia64.c                         |  356 ++
 arch/ia64/kernel/pal.S                              |    5 
 arch/ia64/kernel/patch.c                            |   67 
 arch/ia64/kernel/perfmon.c                          |   89 
 arch/ia64/kernel/setup.c                            |  135 
 arch/ia64/kernel/time.c                             |  194 +
 arch/ia64/mm/ioremap.c                              |    3 
 arch/ia64/oprofile/Makefile                         |    4 
 arch/ia64/oprofile/init.c                           |   14 
 arch/ia64/oprofile/oprofile_perfmon.h               |   28 
 arch/ia64/oprofile/perfmon.c                        |   35 
 arch/ia64/oprofile/xenoprof.c                       |  142 +
 arch/ia64/pci/pci.c                                 |   16 
 arch/ia64/xen/Makefile                              |    9 
 arch/ia64/xen/hypercall.S                           |  170 +
 arch/ia64/xen/hypervisor.c                          | 1264 +++++++++
 arch/ia64/xen/machvec.c                             |    4 
 arch/ia64/xen/mem.c                                 |   75 
 arch/ia64/xen/swiotlb.c                             |  882 ++++++
 arch/ia64/xen/util.c                                |  105 
 arch/ia64/xen/xcom_hcall.c                          |  397 ++
 arch/ia64/xen/xcom_mini.c                           |  469 +++
 arch/ia64/xen/xcom_privcmd.c                        |  673 ++++
 arch/ia64/xen/xen_dma.c                             |  145 +
 arch/ia64/xen/xencomm.c                             |  263 +
 arch/ia64/xen/xenentry.S                            |  931 ++++++
 arch/ia64/xen/xenhpski.c                            |   19 
 arch/ia64/xen/xenivt.S                              | 2177 +++++++++++++++
 arch/ia64/xen/xenminstate.h                         |  358 ++
 arch/ia64/xen/xenpal.S                              |   85 
 arch/ia64/xen/xensetup.S                            |   52 
 arch/um/kernel/physmem.c                            |    4 
 arch/x86_64/Kconfig                                 |   68 
 arch/x86_64/Makefile                                |   20 
 arch/x86_64/ia32/Makefile                           |   20 
 arch/x86_64/ia32/ia32entry-xen.S                    |  743 +++++
 arch/x86_64/ia32/syscall32-xen.c                    |  128 
 arch/x86_64/ia32/syscall32_syscall-xen.S            |   28 
 arch/x86_64/ia32/vsyscall-int80.S                   |   58 
 arch/x86_64/ia32/vsyscall-sigreturn.S               |    2 
 arch/x86_64/kernel/Makefile                         |   19 
 arch/x86_64/kernel/acpi/Makefile                    |    1 
 arch/x86_64/kernel/apic-xen.c                       |  197 +
 arch/x86_64/kernel/asm-offsets.c                    |    2 
 arch/x86_64/kernel/crash.c                          |    6 
 arch/x86_64/kernel/e820-xen.c                       |  774 +++++
 arch/x86_64/kernel/early_printk-xen.c               |  302 ++
 arch/x86_64/kernel/entry-xen.S                      | 1325 +++++++++
 arch/x86_64/kernel/genapic-xen.c                    |  143 +
 arch/x86_64/kernel/genapic_xen.c                    |  161 +
 arch/x86_64/kernel/head-xen.S                       |  203 +
 arch/x86_64/kernel/head64-xen.c                     |  162 +
 arch/x86_64/kernel/init_task.c                      |    3 
 arch/x86_64/kernel/io_apic-xen.c                    | 2269 ++++++++++++++++
 arch/x86_64/kernel/ioport-xen.c                     |   99 
 arch/x86_64/kernel/irq-xen.c                        |  197 +
 arch/x86_64/kernel/ldt-xen.c                        |  282 ++
 arch/x86_64/kernel/machine_kexec.c                  |  118 
 arch/x86_64/kernel/mpparse-xen.c                    | 1011 +++++++
 arch/x86_64/kernel/pci-swiotlb-xen.c                |   55 
 arch/x86_64/kernel/process-xen.c                    |  829 +++++
 arch/x86_64/kernel/setup-xen.c                      | 1677 ++++++++++++
 arch/x86_64/kernel/setup64-xen.c                    |  361 ++
 arch/x86_64/kernel/smp-xen.c                        |  600 ++++
 arch/x86_64/kernel/traps-xen.c                      | 1175 ++++++++
 arch/x86_64/kernel/vsyscall-xen.c                   |  239 +
 arch/x86_64/kernel/xen_entry.S                      |   40 
 arch/x86_64/mm/Makefile                             |   10 
 arch/x86_64/mm/fault-xen.c                          |  724 +++++
 arch/x86_64/mm/init-xen.c                           | 1238 ++++++++
 arch/x86_64/mm/pageattr-xen.c                       |  433 +++
 arch/x86_64/oprofile/Makefile                       |   10 
 arch/x86_64/pci/Makefile                            |   12 
 drivers/Makefile                                    |    1 
 drivers/acpi/Kconfig                                |    3 
 drivers/char/mem.c                                  |    6 
 drivers/char/tpm/Kconfig                            |   10 
 drivers/char/tpm/Makefile                           |    2 
 drivers/char/tpm/tpm.h                              |   15 
 drivers/char/tpm/tpm_vtpm.c                         |  542 +++
 drivers/char/tpm/tpm_vtpm.h                         |   55 
 drivers/char/tpm/tpm_xen.c                          |  720 +++++
 drivers/char/tty_io.c                               |    7 
 drivers/firmware/Kconfig                            |    1 
 drivers/pci/Kconfig                                 |    1 
 drivers/serial/Kconfig                              |    1 
 drivers/video/console/Kconfig                       |    1 
 drivers/xen/Kconfig                                 |  260 +
 drivers/xen/Makefile                                |   20 
 drivers/xen/balloon/Makefile                        |    2 
 drivers/xen/balloon/balloon.c                       |  663 ++++
 drivers/xen/balloon/common.h                        |   58 
 drivers/xen/balloon/sysfs.c                         |  170 +
 drivers/xen/blkback/Makefile                        |    3 
 drivers/xen/blkback/blkback.c                       |  614 ++++
 drivers/xen/blkback/common.h                        |  139 +
 drivers/xen/blkback/interface.c                     |  181 +
 drivers/xen/blkback/vbd.c                           |  118 
 drivers/xen/blkback/xenbus.c                        |  533 +++
 drivers/xen/blkfront/Makefile                       |    5 
 drivers/xen/blkfront/blkfront.c                     |  902 ++++++
 drivers/xen/blkfront/block.h                        |  142 +
 drivers/xen/blkfront/vbd.c                          |  372 ++
 drivers/xen/blktap/Makefile                         |    5 
 drivers/xen/blktap/blktap.c                         | 1641 +++++++++++
 drivers/xen/blktap/common.h                         |  121 
 drivers/xen/blktap/interface.c                      |  174 +
 drivers/xen/blktap/xenbus.c                         |  477 +++
 drivers/xen/char/Makefile                           |    2 
 drivers/xen/char/mem.c                              |  203 +
 drivers/xen/console/Makefile                        |    2 
 drivers/xen/console/console.c                       |  721 +++++
 drivers/xen/console/xencons_ring.c                  |  143 +
 drivers/xen/core/Makefile                           |   12 
 drivers/xen/core/cpu_hotplug.c                      |  172 +
 drivers/xen/core/evtchn.c                           | 1015 +++++++
 drivers/xen/core/features.c                         |   34 
 drivers/xen/core/gnttab.c                           |  753 +++++
 drivers/xen/core/hypervisor_sysfs.c                 |   59 
 drivers/xen/core/machine_kexec.c                    |  189 +
 drivers/xen/core/machine_reboot.c                   |  241 +
 drivers/xen/core/reboot.c                           |  249 +
 drivers/xen/core/smpboot.c                          |  452 +++
 drivers/xen/core/xen_proc.c                         |   23 
 drivers/xen/core/xen_sysfs.c                        |  378 ++
 drivers/xen/evtchn/Makefile                         |    2 
 drivers/xen/evtchn/evtchn.c                         |  469 +++
 drivers/xen/fbfront/Makefile                        |    2 
 drivers/xen/fbfront/xenfb.c                         |  752 +++++
 drivers/xen/fbfront/xenkbd.c                        |  333 ++
 drivers/xen/gntdev/Makefile                         |    1 
 drivers/xen/gntdev/gntdev.c                         |  973 +++++++
 drivers/xen/netback/Makefile                        |    5 
 drivers/xen/netback/common.h                        |  165 +
 drivers/xen/netback/interface.c                     |  336 ++
 drivers/xen/netback/loopback.c                      |  320 ++
 drivers/xen/netback/netback.c                       | 1606 +++++++++++
 drivers/xen/netback/xenbus.c                        |  453 +++
 drivers/xen/netfront/Makefile                       |    4 
 drivers/xen/netfront/netfront.c                     | 2133 +++++++++++++++
 drivers/xen/pciback/Makefile                        |   15 
 drivers/xen/pciback/conf_space.c                    |  426 +++
 drivers/xen/pciback/conf_space.h                    |  126 
 drivers/xen/pciback/conf_space_capability.c         |   71 
 drivers/xen/pciback/conf_space_capability.h         |   23 
 drivers/xen/pciback/conf_space_capability_pm.c      |  128 
 drivers/xen/pciback/conf_space_capability_vpd.c     |   42 
 drivers/xen/pciback/conf_space_header.c             |  309 ++
 drivers/xen/pciback/conf_space_quirks.c             |  126 
 drivers/xen/pciback/conf_space_quirks.h             |   35 
 drivers/xen/pciback/passthrough.c                   |  157 +
 drivers/xen/pciback/pci_stub.c                      |  929 ++++++
 drivers/xen/pciback/pciback.h                       |   93 
 drivers/xen/pciback/pciback_ops.c                   |   95 
 drivers/xen/pciback/slot.c                          |  151 +
 drivers/xen/pciback/vpci.c                          |  204 +
 drivers/xen/pciback/xenbus.c                        |  454 +++
 drivers/xen/pcifront/Makefile                       |    7 
 drivers/xen/pcifront/pci.c                          |   46 
 drivers/xen/pcifront/pci_op.c                       |  268 +
 drivers/xen/pcifront/pcifront.h                     |   40 
 drivers/xen/pcifront/xenbus.c                       |  295 ++
 drivers/xen/privcmd/Makefile                        |    2 
 drivers/xen/privcmd/privcmd.c                       |  284 ++
 drivers/xen/tpmback/Makefile                        |    4 
 drivers/xen/tpmback/common.h                        |   85 
 drivers/xen/tpmback/interface.c                     |  167 +
 drivers/xen/tpmback/tpmback.c                       |  944 ++++++
 drivers/xen/tpmback/xenbus.c                        |  289 ++
 drivers/xen/util.c                                  |   70 
 drivers/xen/xenbus/Makefile                         |    9 
 drivers/xen/xenbus/xenbus_backend_client.c          |  147 +
 drivers/xen/xenbus/xenbus_client.c                  |  283 ++
 drivers/xen/xenbus/xenbus_comms.c                   |  232 +
 drivers/xen/xenbus/xenbus_comms.h                   |   46 
 drivers/xen/xenbus/xenbus_dev.c                     |  404 ++
 drivers/xen/xenbus/xenbus_probe.c                   | 1087 +++++++
 drivers/xen/xenbus/xenbus_probe.h                   |   75 
 drivers/xen/xenbus/xenbus_probe_backend.c           |  287 ++
 drivers/xen/xenbus/xenbus_xs.c                      |  880 ++++++
 drivers/xen/xenoprof/xenoprofile.c                  |  500 +++
 fs/Kconfig                                          |    1 
 include/asm-i386/apic.h                             |    2 
 include/asm-i386/kexec.h                            |   14 
 include/asm-i386/mach-xen/asm/agp.h                 |   37 
 include/asm-i386/mach-xen/asm/desc.h                |  164 +
 include/asm-i386/mach-xen/asm/dma-mapping.h         |  157 +
 include/asm-i386/mach-xen/asm/fixmap.h              |  155 +
 include/asm-i386/mach-xen/asm/floppy.h              |  147 +
 include/asm-i386/mach-xen/asm/highmem.h             |   80 
 include/asm-i386/mach-xen/asm/hw_irq.h              |   72 
 include/asm-i386/mach-xen/asm/hypercall.h           |  407 ++
 include/asm-i386/mach-xen/asm/hypervisor.h          |  258 +
 include/asm-i386/mach-xen/asm/io.h                  |  390 ++
 include/asm-i386/mach-xen/asm/irqflags.h            |  127 
 include/asm-i386/mach-xen/asm/maddr.h               |  193 +
 include/asm-i386/mach-xen/asm/mmu.h                 |   29 
 include/asm-i386/mach-xen/asm/mmu_context.h         |  108 
 include/asm-i386/mach-xen/asm/page.h                |  235 +
 include/asm-i386/mach-xen/asm/param.h               |   23 
 include/asm-i386/mach-xen/asm/pci.h                 |  146 +
 include/asm-i386/mach-xen/asm/pgalloc.h             |   59 
 include/asm-i386/mach-xen/asm/pgtable-2level-defs.h |   20 
 include/asm-i386/mach-xen/asm/pgtable-2level.h      |  120 
 include/asm-i386/mach-xen/asm/pgtable-3level-defs.h |   24 
 include/asm-i386/mach-xen/asm/pgtable-3level.h      |  209 +
 include/asm-i386/mach-xen/asm/pgtable.h             |  531 +++
 include/asm-i386/mach-xen/asm/processor.h           |  741 +++++
 include/asm-i386/mach-xen/asm/ptrace.h              |   90 
 include/asm-i386/mach-xen/asm/scatterlist.h         |   22 
 include/asm-i386/mach-xen/asm/segment.h             |  117 
 include/asm-i386/mach-xen/asm/setup.h               |   81 
 include/asm-i386/mach-xen/asm/smp.h                 |  103 
 include/asm-i386/mach-xen/asm/spinlock.h            |  202 +
 include/asm-i386/mach-xen/asm/swiotlb.h             |   43 
 include/asm-i386/mach-xen/asm/synch_bitops.h        |  145 +
 include/asm-i386/mach-xen/asm/system.h              |  488 +++
 include/asm-i386/mach-xen/asm/tlbflush.h            |  101 
 include/asm-i386/mach-xen/asm/vga.h                 |   20 
 include/asm-i386/mach-xen/asm/xenoprof.h            |   48 
 include/asm-i386/mach-xen/irq_vectors.h             |  125 
 include/asm-i386/mach-xen/mach_traps.h              |   33 
 include/asm-i386/mach-xen/setup_arch.h              |    5 
 include/asm-ia64/agp.h                              |   31 
 include/asm-ia64/dma-mapping.h                      |   23 
 include/asm-ia64/gcc_intrin.h                       |   68 
 include/asm-ia64/hw_irq.h                           |   10 
 include/asm-ia64/hypercall.h                        |  416 ++
 include/asm-ia64/hypervisor.h                       |  226 +
 include/asm-ia64/intel_intrin.h                     |   66 
 include/asm-ia64/io.h                               |   37 
 include/asm-ia64/iosapic.h                          |    2 
 include/asm-ia64/irq.h                              |   33 
 include/asm-ia64/machvec.h                          |    2 
 include/asm-ia64/machvec_xen.h                      |   43 
 include/asm-ia64/maddr.h                            |  107 
 include/asm-ia64/meminit.h                          |    5 
 include/asm-ia64/page.h                             |   23 
 include/asm-ia64/pal.h                              |    1 
 include/asm-ia64/pgalloc.h                          |    4 
 include/asm-ia64/privop.h                           |   60 
 include/asm-ia64/processor.h                        |    1 
 include/asm-ia64/sal.h                              |   21 
 include/asm-ia64/swiotlb.h                          |   41 
 include/asm-ia64/synch_bitops.h                     |   61 
 include/asm-ia64/system.h                           |    4 
 include/asm-ia64/uaccess.h                          |   20 
 include/asm-ia64/xen/privop.h                       |  310 ++
 include/asm-ia64/xen/xcom_hcall.h                   |   88 
 include/asm-ia64/xen/xencomm.h                      |   60 
 include/asm-ia64/xenoprof.h                         |   48 
 include/asm-um/page.h                               |    2 
 include/asm-x86_64/apic.h                           |    2 
 include/asm-x86_64/kexec.h                          |   13 
 include/asm-x86_64/mach-xen/asm/agp.h               |   35 
 include/asm-x86_64/mach-xen/asm/arch_hooks.h        |   27 
 include/asm-x86_64/mach-xen/asm/bootsetup.h         |   42 
 include/asm-x86_64/mach-xen/asm/desc.h              |  263 +
 include/asm-x86_64/mach-xen/asm/dma-mapping.h       |  207 +
 include/asm-x86_64/mach-xen/asm/e820.h              |   66 
 include/asm-x86_64/mach-xen/asm/fixmap.h            |  112 
 include/asm-x86_64/mach-xen/asm/floppy.h            |  206 +
 include/asm-x86_64/mach-xen/asm/hw_irq.h            |  136 
 include/asm-x86_64/mach-xen/asm/hypercall.h         |  406 ++
 include/asm-x86_64/mach-xen/asm/hypervisor.h        |    2 
 include/asm-x86_64/mach-xen/asm/io.h                |  330 ++
 include/asm-x86_64/mach-xen/asm/irq.h               |   38 
 include/asm-x86_64/mach-xen/asm/irqflags.h          |  139 +
 include/asm-x86_64/mach-xen/asm/maddr.h             |  161 +
 include/asm-x86_64/mach-xen/asm/mmu.h               |   38 
 include/asm-x86_64/mach-xen/asm/mmu_context.h       |  136 
 include/asm-x86_64/mach-xen/asm/msr.h               |  399 ++
 include/asm-x86_64/mach-xen/asm/nmi.h               |   93 
 include/asm-x86_64/mach-xen/asm/page.h              |  217 +
 include/asm-x86_64/mach-xen/asm/pci.h               |  166 +
 include/asm-x86_64/mach-xen/asm/pgalloc.h           |  204 +
 include/asm-x86_64/mach-xen/asm/pgtable.h           |  575 ++++
 include/asm-x86_64/mach-xen/asm/processor.h         |  506 +++
 include/asm-x86_64/mach-xen/asm/ptrace.h            |  127 
 include/asm-x86_64/mach-xen/asm/smp.h               |  150 +
 include/asm-x86_64/mach-xen/asm/synch_bitops.h      |    2 
 include/asm-x86_64/mach-xen/asm/system.h            |  262 +
 include/asm-x86_64/mach-xen/asm/timer.h             |   67 
 include/asm-x86_64/mach-xen/asm/tlbflush.h          |  103 
 include/asm-x86_64/mach-xen/asm/vga.h               |   20 
 include/asm-x86_64/mach-xen/asm/xenoprof.h          |    1 
 include/asm-x86_64/mach-xen/asm/xor.h               |  328 ++
 include/asm-x86_64/mach-xen/irq_vectors.h           |  123 
 include/asm-x86_64/mach-xen/mach_time.h             |  111 
 include/asm-x86_64/mach-xen/mach_timer.h            |   50 
 include/asm-x86_64/mach-xen/setup_arch_post.h       |   63 
 include/asm-x86_64/mach-xen/setup_arch_pre.h        |    5 
 include/linux/gfp.h                                 |    6 
 include/linux/highmem.h                             |    6 
 include/linux/interrupt.h                           |    6 
 include/linux/kexec.h                               |   13 
 include/linux/mm.h                                  |   14 
 include/linux/page-flags.h                          |   14 
 include/linux/skbuff.h                              |    8 
 include/xen/balloon.h                               |   57 
 include/xen/blkif.h                                 |   97 
 include/xen/cpu_hotplug.h                           |   44 
 include/xen/driver_util.h                           |   14 
 include/xen/evtchn.h                                |  126 
 include/xen/features.h                              |   20 
 include/xen/gnttab.h                                |  167 +
 include/xen/hvm.h                                   |   23 
 include/xen/hypercall.h                             |   24 
 include/xen/hypervisor_sysfs.h                      |   32 
 include/xen/interface/COPYING                       |   38 
 include/xen/interface/acm.h                         |  228 +
 include/xen/interface/acm_ops.h                     |  159 +
 include/xen/interface/arch-ia64.h                   |  628 ++++
 include/xen/interface/arch-powerpc.h                |  125 
 include/xen/interface/arch-x86/xen-x86_32.h         |  168 +
 include/xen/interface/arch-x86/xen-x86_64.h         |  211 +
 include/xen/interface/arch-x86/xen.h                |  204 +
 include/xen/interface/arch-x86_32.h                 |   27 
 include/xen/interface/arch-x86_64.h                 |   27 
 include/xen/interface/callback.h                    |   92 
 include/xen/interface/dom0_ops.h                    |  120 
 include/xen/interface/domctl.h                      |  478 +++
 include/xen/interface/elfnote.h                     |  233 +
 include/xen/interface/elfstructs.h                  |  527 +++
 include/xen/interface/event_channel.h               |  264 +
 include/xen/interface/features.h                    |   71 
 include/xen/interface/foreign/Makefile              |   37 
 include/xen/interface/foreign/mkchecker.py          |   58 
 include/xen/interface/foreign/mkheader.py           |  167 +
 include/xen/interface/foreign/reference.size        |   18 
 include/xen/interface/foreign/structs.py            |   58 
 include/xen/interface/grant_table.h                 |  422 +++
 include/xen/interface/hvm/e820.h                    |   47 
 include/xen/interface/hvm/hvm_info_table.h          |   41 
 include/xen/interface/hvm/hvm_op.h                  |   73 
 include/xen/interface/hvm/ioreq.h                   |  122 
 include/xen/interface/hvm/params.h                  |   60 
 include/xen/interface/hvm/save.h                    |  462 +++
 include/xen/interface/hvm/vmx_assist.h              |  116 
 include/xen/interface/io/blkif.h                    |  128 
 include/xen/interface/io/console.h                  |   51 
 include/xen/interface/io/fbif.h                     |  138 
 include/xen/interface/io/kbdif.h                    |  130 
 include/xen/interface/io/netif.h                    |  184 +
 include/xen/interface/io/pciif.h                    |   83 
 include/xen/interface/io/protocols.h                |   21 
 include/xen/interface/io/ring.h                     |  299 ++
 include/xen/interface/io/tpmif.h                    |   77 
 include/xen/interface/io/xenbus.h                   |   73 
 include/xen/interface/io/xs_wire.h                  |  117 
 include/xen/interface/kexec.h                       |  137 
 include/xen/interface/libelf.h                      |  241 +
 include/xen/interface/memory.h                      |  281 ++
 include/xen/interface/nmi.h                         |   78 
 include/xen/interface/physdev.h                     |  169 +
 include/xen/interface/platform.h                    |  143 +
 include/xen/interface/sched.h                       |  121 
 include/xen/interface/sysctl.h                      |  182 +
 include/xen/interface/trace.h                       |  119 
 include/xen/interface/vcpu.h                        |  192 +
 include/xen/interface/version.h                     |   91 
 include/xen/interface/xen-compat.h                  |   51 
 include/xen/interface/xen.h                         |  610 ++++
 include/xen/interface/xencomm.h                     |   41 
 include/xen/interface/xenoprof.h                    |  132 
 include/xen/pcifront.h                              |   77 
 include/xen/public/evtchn.h                         |   88 
 include/xen/public/gntdev.h                         |  105 
 include/xen/public/privcmd.h                        |   79 
 include/xen/xen_proc.h                              |   12 
 include/xen/xenbus.h                                |  302 ++
 include/xen/xencons.h                               |   19 
 include/xen/xenoprof.h                              |   42 
 kernel/Kconfig.preempt                              |    1 
 kernel/fork.c                                       |    3 
 kernel/irq/spurious.c                               |    3 
 kernel/kexec.c                                      |   73 
 lib/Makefile                                        |    6 
 mm/Kconfig                                          |    3 
 mm/highmem.c                                        |   11 
 mm/memory.c                                         |  129 
 mm/mmap.c                                           |    4 
 mm/page_alloc.c                                     |   24 
 net/core/dev.c                                      |   66 
 net/core/skbuff.c                                   |    5 
 scripts/Makefile.xen                                |   14 
 450 files changed, 100652 insertions(+), 189 deletions(-)

diff -r 42f6970e18c9 -r a533be77c572 arch/i386/Kconfig
--- a/arch/i386/Kconfig Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/Kconfig Mon Jun 04 10:05:28 2007 +0100
@@ -16,6 +16,7 @@ config X86_32
 
 config GENERIC_TIME
        bool
+       depends on !X86_XEN
        default y
 
 config LOCKDEP_SUPPORT
@@ -102,6 +103,15 @@ config X86_PC
        bool "PC-compatible"
        help
          Choose this option if your computer is a standard PC or compatible.
+
+config X86_XEN
+       bool "Xen-compatible"
+       select X86_UP_APIC if !SMP && XEN_PRIVILEGED_GUEST
+       select X86_UP_IOAPIC if !SMP && XEN_PRIVILEGED_GUEST
+       select SWIOTLB
+       help
+         Choose this option if you plan to run this kernel on top of the
+         Xen Hypervisor.
 
 config X86_ELAN
        bool "AMD Elan"
@@ -213,6 +223,7 @@ source "arch/i386/Kconfig.cpu"
 
 config HPET_TIMER
        bool "HPET Timer Support"
+       depends on !X86_XEN
        help
          This enables the use of the HPET for the kernel's internal timer.
          HPET is the next generation timer replacing legacy 8254s.
@@ -263,7 +274,7 @@ source "kernel/Kconfig.preempt"
 
 config X86_UP_APIC
        bool "Local APIC support on uniprocessors"
-       depends on !SMP && !(X86_VISWS || X86_VOYAGER)
+       depends on !SMP && !(X86_VISWS || X86_VOYAGER || XEN_UNPRIVILEGED_GUEST)
        help
          A local APIC (Advanced Programmable Interrupt Controller) is an
          integrated interrupt controller in the CPU. If you have a single-CPU
@@ -288,12 +299,12 @@ config X86_UP_IOAPIC
 
 config X86_LOCAL_APIC
        bool
-       depends on X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER)
+       depends on X86_UP_APIC || ((X86_VISWS || SMP) && !(X86_VOYAGER || 
XEN_UNPRIVILEGED_GUEST))
        default y
 
 config X86_IO_APIC
        bool
-       depends on X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER))
+       depends on X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER || 
XEN_UNPRIVILEGED_GUEST))
        default y
 
 config X86_VISWS_APIC
@@ -303,7 +314,7 @@ config X86_VISWS_APIC
 
 config X86_MCE
        bool "Machine Check Exception"
-       depends on !X86_VOYAGER
+       depends on !(X86_VOYAGER || X86_XEN)
        ---help---
          Machine Check Exception support allows the processor to notify the
          kernel if it detects a problem (e.g. overheating, component failure).
@@ -402,6 +413,7 @@ config X86_REBOOTFIXUPS
 
 config MICROCODE
        tristate "/dev/cpu/microcode - Intel IA32 CPU microcode support"
+       depends on !XEN_UNPRIVILEGED_GUEST
        ---help---
          If you say Y here and also to "/dev file system support" in the
          'File systems' section, you will be able to update the microcode on
@@ -419,6 +431,7 @@ config MICROCODE
 
 config X86_MSR
        tristate "/dev/cpu/*/msr - Model-specific register support"
+       depends on !X86_XEN
        help
          This device gives privileged processes access to the x86
          Model-Specific Registers (MSRs).  It is a character device with
@@ -433,6 +446,10 @@ config X86_CPUID
          be executed on a specific processor.  It is a character device
          with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to
          /dev/cpu/31/cpuid.
+
+config SWIOTLB
+       bool
+       default n
 
 source "drivers/firmware/Kconfig"
 
@@ -616,6 +633,7 @@ config HIGHPTE
 
 config MATH_EMULATION
        bool "Math emulation"
+       depends on !X86_XEN
        ---help---
          Linux can emulate a math coprocessor (used for floating point
          operations) if you don't have one. 486DX and Pentium processors have
@@ -641,6 +659,8 @@ config MATH_EMULATION
 
 config MTRR
        bool "MTRR (Memory Type Range Register) support"
+       depends on !XEN_UNPRIVILEGED_GUEST
+       default y if X86_XEN
        ---help---
          On Intel P6 family processors (Pentium Pro, Pentium II and later)
          the Memory Type Range Registers (MTRRs) may be used to control
@@ -675,7 +695,7 @@ config MTRR
 
 config EFI
        bool "Boot from EFI support"
-       depends on ACPI
+       depends on ACPI && !X86_XEN
        default n
        ---help---
        This enables the the kernel to boot on EFI platforms using
@@ -693,7 +713,7 @@ config EFI
 
 config IRQBALANCE
        bool "Enable kernel irq balancing"
-       depends on SMP && X86_IO_APIC
+       depends on SMP && X86_IO_APIC && !X86_XEN
        default y
        help
          The default yes will allow the kernel to do irq load balancing.
@@ -741,7 +761,7 @@ source kernel/Kconfig.hz
 
 config KEXEC
        bool "kexec system call (EXPERIMENTAL)"
-       depends on EXPERIMENTAL
+       depends on EXPERIMENTAL && !XEN_UNPRIVILEGED_GUEST
        help
          kexec is a system call that implements the ability to shutdown your
          current kernel, and to start another kernel.  It is like a reboot
@@ -793,6 +813,7 @@ config HOTPLUG_CPU
 
 config COMPAT_VDSO
        bool "Compat VDSO support"
+       depends on !X86_XEN
        default y
        help
          Map the VDSO to the predictable old-style address too.
@@ -810,18 +831,20 @@ config ARCH_ENABLE_MEMORY_HOTPLUG
        depends on HIGHMEM
 
 menu "Power management options (ACPI, APM)"
-       depends on !X86_VOYAGER
-
+       depends on !(X86_VOYAGER || XEN_UNPRIVILEGED_GUEST)
+
+if !X86_XEN
 source kernel/power/Kconfig
+endif
 
 source "drivers/acpi/Kconfig"
 
 menu "APM (Advanced Power Management) BIOS Support"
-depends on PM && !X86_VISWS
+depends on PM && !(X86_VISWS || X86_XEN)
 
 config APM
        tristate "APM (Advanced Power Management) BIOS support"
-       depends on PM
+       depends on PM && PM_LEGACY
        ---help---
          APM is a BIOS specification for saving power using several different
          techniques. This is mostly useful for battery powered laptops with
@@ -1006,6 +1029,7 @@ choice
 
 config PCI_GOBIOS
        bool "BIOS"
+       depends on !X86_XEN
 
 config PCI_GOMMCONFIG
        bool "MMConfig"
@@ -1013,6 +1037,13 @@ config PCI_GODIRECT
 config PCI_GODIRECT
        bool "Direct"
 
+config PCI_GOXEN_FE
+       bool "Xen PCI Frontend"
+       depends on X86_XEN
+       help
+         The PCI device frontend driver allows the kernel to import arbitrary
+         PCI devices from a PCI backend to support PCI driver domains.
+
 config PCI_GOANY
        bool "Any"
 
@@ -1020,7 +1051,7 @@ endchoice
 
 config PCI_BIOS
        bool
-       depends on !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY)
+       depends on !(X86_VISWS || X86_XEN) && PCI && (PCI_GOBIOS || PCI_GOANY)
        default y
 
 config PCI_DIRECT
@@ -1032,6 +1063,18 @@ config PCI_MMCONFIG
        bool
        depends on PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY)
        default y
+
+config XEN_PCIDEV_FRONTEND
+       bool
+       depends on PCI && X86_XEN && (PCI_GOXEN_FE || PCI_GOANY)
+       default y
+
+config XEN_PCIDEV_FE_DEBUG
+       bool "Xen PCI Frontend Debugging"
+       depends on XEN_PCIDEV_FRONTEND
+       default n
+       help
+         Enables some debug statements within the PCI Frontend.
 
 source "drivers/pci/pcie/Kconfig"
 
@@ -1043,7 +1086,7 @@ config ISA_DMA_API
 
 config ISA
        bool "ISA support"
-       depends on !(X86_VOYAGER || X86_VISWS)
+       depends on !(X86_VOYAGER || X86_VISWS || X86_XEN)
        help
          Find out whether you have ISA slots on your motherboard.  ISA is the
          name of a bus system, i.e. the way the CPU talks to the other stuff
@@ -1070,7 +1113,7 @@ source "drivers/eisa/Kconfig"
 source "drivers/eisa/Kconfig"
 
 config MCA
-       bool "MCA support" if !(X86_VISWS || X86_VOYAGER)
+       bool "MCA support" if !(X86_VISWS || X86_VOYAGER || X86_XEN)
        default y if X86_VOYAGER
        help
          MicroChannel Architecture is found in some IBM PS/2 machines and
@@ -1146,6 +1189,8 @@ source "security/Kconfig"
 
 source "crypto/Kconfig"
 
+source "drivers/xen/Kconfig"
+
 source "lib/Kconfig"
 
 #
@@ -1171,7 +1216,7 @@ config X86_SMP
 
 config X86_HT
        bool
-       depends on SMP && !(X86_VISWS || X86_VOYAGER)
+       depends on SMP && !(X86_VISWS || X86_VOYAGER || X86_XEN)
        default y
 
 config X86_BIOS_REBOOT
@@ -1184,6 +1229,16 @@ config X86_TRAMPOLINE
        depends on X86_SMP || (X86_VOYAGER && SMP)
        default y
 
+config X86_NO_TSS
+       bool
+       depends on X86_XEN
+       default y
+
+config X86_NO_IDT
+       bool
+       depends on X86_XEN
+       default y
+
 config KTIME_SCALAR
        bool
        default y
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/Kconfig.cpu
--- a/arch/i386/Kconfig.cpu     Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/Kconfig.cpu     Mon Jun 04 10:05:28 2007 +0100
@@ -251,7 +251,7 @@ config X86_PPRO_FENCE
 
 config X86_F00F_BUG
        bool
-       depends on M586MMX || M586TSC || M586 || M486 || M386
+       depends on (M586MMX || M586TSC || M586 || M486 || M386) && !X86_NO_IDT
        default y
 
 config X86_WP_WORKS_OK
@@ -311,5 +311,5 @@ config X86_OOSTORE
 
 config X86_TSC
        bool
-       depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || 
MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII 
|| M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1 || MGEODE_LX) && 
!X86_NUMAQ
-       default y
+       depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || 
MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII 
|| M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1 || MGEODE_LX) && 
!X86_NUMAQ && !X86_XEN
+       default y
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/Kconfig.debug
--- a/arch/i386/Kconfig.debug   Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/Kconfig.debug   Mon Jun 04 10:05:28 2007 +0100
@@ -79,6 +79,7 @@ config DOUBLEFAULT
 config DOUBLEFAULT
        default y
        bool "Enable doublefault exception handler" if EMBEDDED
+       depends on !X86_NO_TSS
        help
           This option allows trapping of rare doublefault exceptions that
           would otherwise cause a system to silently reboot. Disabling this
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/Makefile
--- a/arch/i386/Makefile        Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/Makefile        Mon Jun 04 10:05:28 2007 +0100
@@ -48,6 +48,11 @@ CFLAGS                               += $(shell if [ $(call 
cc-vers
 
 CFLAGS += $(cflags-y)
 
+cppflags-$(CONFIG_XEN) += \
+       -D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION)
+
+CPPFLAGS += $(cppflags-y)
+
 # Default subarch .c files
 mcore-y  := mach-default
 
@@ -70,6 +75,10 @@ mcore-$(CONFIG_X86_BIGSMP)   := mach-defau
 #Summit subarch support
 mflags-$(CONFIG_X86_SUMMIT) := -Iinclude/asm-i386/mach-summit
 mcore-$(CONFIG_X86_SUMMIT)  := mach-default
+
+# Xen subarch support
+mflags-$(CONFIG_X86_XEN)       := -Iinclude/asm-i386/mach-xen
+mcore-$(CONFIG_X86_XEN)                := mach-xen
 
 # generic subarchitecture
 mflags-$(CONFIG_X86_GENERICARCH) := -Iinclude/asm-i386/mach-generic
@@ -105,6 +114,19 @@ PHONY += zImage bzImage compressed zlilo
 PHONY += zImage bzImage compressed zlilo bzlilo \
          zdisk bzdisk fdimage fdimage144 fdimage288 isoimage install
 
+ifdef CONFIG_XEN
+CPPFLAGS := -Iinclude$(if $(KBUILD_SRC),2)/asm/mach-xen $(CPPFLAGS)
+head-y := arch/i386/kernel/head-xen.o arch/i386/kernel/init_task-xen.o
+boot := arch/i386/boot-xen
+.PHONY: vmlinuz
+all: vmlinuz
+
+vmlinuz: vmlinux
+       $(Q)$(MAKE) $(build)=$(boot) $@
+
+install:
+       $(Q)$(MAKE) $(build)=$(boot) $@
+else
 all: bzImage
 
 # KBUILD_IMAGE specify target image being built
@@ -127,6 +149,7 @@ fdimage fdimage144 fdimage288 isoimage: 
 
 install:
        $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) install
+endif
 
 archclean:
        $(Q)$(MAKE) $(clean)=arch/i386/boot
@@ -145,3 +168,4 @@ CLEAN_FILES += arch/$(ARCH)/boot/fdimage
 CLEAN_FILES += arch/$(ARCH)/boot/fdimage \
               arch/$(ARCH)/boot/image.iso \
               arch/$(ARCH)/boot/mtools.conf
+CLEAN_FILES += vmlinuz vmlinux-stripped
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/boot-xen/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/boot-xen/Makefile       Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,19 @@
+
+OBJCOPYFLAGS := -g --strip-unneeded
+
+vmlinuz: vmlinux-stripped FORCE
+       $(call if_changed,gzip)
+
+vmlinux-stripped: vmlinux FORCE
+       $(call if_changed,objcopy)
+
+INSTALL_ROOT := $(patsubst %/boot,%,$(INSTALL_PATH))
+
+XINSTALL_NAME ?= $(KERNELRELEASE)
+install:
+       mkdir -p $(INSTALL_ROOT)/boot
+       install -m0644 vmlinuz 
$(INSTALL_ROOT)/boot/vmlinuz-$(XINSTALL_NAME)$(INSTALL_SUFFIX)
+       install -m0644 vmlinux 
$(INSTALL_ROOT)/boot/vmlinux-syms-$(XINSTALL_NAME)$(INSTALL_SUFFIX)
+       install -m0664 .config 
$(INSTALL_ROOT)/boot/config-$(XINSTALL_NAME)$(INSTALL_SUFFIX)
+       install -m0664 System.map 
$(INSTALL_ROOT)/boot/System.map-$(XINSTALL_NAME)$(INSTALL_SUFFIX)
+       ln -f -s vmlinuz-$(XINSTALL_NAME)$(INSTALL_SUFFIX) 
$(INSTALL_ROOT)/boot/vmlinuz-$(VERSION).$(PATCHLEVEL)$(shell [ -e 
$(objtree)/localversion-xen ] && cat 
$(objtree)/localversion-xen)$(INSTALL_SUFFIX)
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/Makefile
--- a/arch/i386/kernel/Makefile Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/kernel/Makefile Mon Jun 04 10:05:28 2007 +0100
@@ -44,6 +44,12 @@ EXTRA_AFLAGS   := -traditional
 
 obj-$(CONFIG_SCx200)           += scx200.o
 
+ifdef CONFIG_XEN
+vsyscall_note := vsyscall-note-xen.o
+else
+vsyscall_note := vsyscall-note.o
+endif
+
 # vsyscall.o contains the vsyscall DSO images as __initdata.
 # We must build both images before we can assemble it.
 # Note: kbuild does not track this dependency due to usage of .incbin
@@ -65,7 +71,7 @@ SYSCFLAGS_vsyscall-int80.so   = $(vsyscall
 
 $(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so: \
 $(obj)/vsyscall-%.so: $(src)/vsyscall.lds \
-                     $(obj)/vsyscall-%.o $(obj)/vsyscall-note.o FORCE
+                     $(obj)/vsyscall-%.o $(obj)/$(vsyscall_note) FORCE
        $(call if_changed,syscall)
 
 # We also create a special relocatable object that should mirror the symbol
@@ -77,8 +83,20 @@ extra-y += vsyscall-syms.o
 
 SYSCFLAGS_vsyscall-syms.o = -r
 $(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \
-                       $(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE
+                       $(obj)/vsyscall-sysenter.o $(obj)/$(vsyscall_note) FORCE
        $(call if_changed,syscall)
 
 k8-y                      += ../../x86_64/kernel/k8.o
 
+ifdef CONFIG_XEN
+include $(srctree)/scripts/Makefile.xen
+
+obj-y += fixup.o
+microcode-$(subst m,y,$(CONFIG_MICROCODE)) := microcode-xen.o
+n-obj-xen := i8259.o timers/ reboot.o smpboot.o trampoline.o
+
+obj-y := $(call filterxen, $(obj-y), $(n-obj-xen))
+obj-y := $(call cherrypickxen, $(obj-y))
+extra-y := $(call cherrypickxen, $(extra-y))
+%/head-xen.o %/head-xen.s: EXTRA_AFLAGS :=
+endif
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/acpi/Makefile
--- a/arch/i386/kernel/acpi/Makefile    Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/kernel/acpi/Makefile    Mon Jun 04 10:05:28 2007 +0100
@@ -6,3 +6,7 @@ obj-y                           += cstate.o processor.o
 obj-y                          += cstate.o processor.o
 endif
 
+ifdef CONFIG_XEN
+include $(srctree)/scripts/Makefile.xen
+obj-y := $(call cherrypickxen, $(obj-y), $(src))
+endif
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/acpi/boot-xen.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/acpi/boot-xen.c  Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,1168 @@
+/*
+ *  boot.c - Architecture-Specific Low-Level ACPI Boot Support
+ *
+ *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@xxxxxxxxx>
+ *  Copyright (C) 2001 Jun Nakajima <jun.nakajima@xxxxxxxxx>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/efi.h>
+#include <linux/module.h>
+#include <linux/dmi.h>
+#include <linux/irq.h>
+
+#include <asm/pgtable.h>
+#include <asm/io_apic.h>
+#include <asm/apic.h>
+#include <asm/io.h>
+#include <asm/mpspec.h>
+
+#ifdef CONFIG_X86_64
+
+extern void __init clustered_apic_check(void);
+
+extern int gsi_irq_sharing(int gsi);
+#include <asm/proto.h>
+
+static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { 
return 0; }
+
+
+#else                          /* X86 */
+
+#ifdef CONFIG_X86_LOCAL_APIC
+#include <mach_apic.h>
+#include <mach_mpparse.h>
+#endif                         /* CONFIG_X86_LOCAL_APIC */
+
+static inline int gsi_irq_sharing(int gsi) { return gsi; }
+
+#endif                         /* X86 */
+
+#define BAD_MADT_ENTRY(entry, end) (                                       \
+               (!entry) || (unsigned long)entry + sizeof(*entry) > end ||  \
+               ((acpi_table_entry_header *)entry)->length < sizeof(*entry))
+
+#define PREFIX                 "ACPI: "
+
+int acpi_noirq __initdata;     /* skip ACPI IRQ initialization */
+int acpi_pci_disabled __initdata;      /* skip ACPI PCI scan and IRQ 
initialization */
+int acpi_ht __initdata = 1;    /* enable HT */
+
+int acpi_lapic;
+int acpi_ioapic;
+int acpi_strict;
+EXPORT_SYMBOL(acpi_strict);
+
+acpi_interrupt_flags acpi_sci_flags __initdata;
+int acpi_sci_override_gsi __initdata;
+int acpi_skip_timer_override __initdata;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
+#endif
+
+#ifndef __HAVE_ARCH_CMPXCHG
+#warning ACPI uses CMPXCHG, i486 and later hardware
+#endif
+
+#define MAX_MADT_ENTRIES       256
+u8 x86_acpiid_to_apicid[MAX_MADT_ENTRIES] =
+    {[0 ... MAX_MADT_ENTRIES - 1] = 0xff };
+EXPORT_SYMBOL(x86_acpiid_to_apicid);
+
+/* --------------------------------------------------------------------------
+                              Boot-time Configuration
+   -------------------------------------------------------------------------- 
*/
+
+/*
+ * The default interrupt routing model is PIC (8259).  This gets
+ * overriden if IOAPICs are enumerated (below).
+ */
+enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC;
+
+#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
+
+/* rely on all ACPI tables being in the direct mapping */
+char *__acpi_map_table(unsigned long phys_addr, unsigned long size)
+{
+       if (!phys_addr || !size)
+               return NULL;
+
+       if (phys_addr+size <= (end_pfn_map << PAGE_SHIFT) + PAGE_SIZE)
+               return __va(phys_addr);
+
+       return NULL;
+}
+
+#else
+
+/*
+ * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
+ * to map the target physical address. The problem is that set_fixmap()
+ * provides a single page, and it is possible that the page is not
+ * sufficient.
+ * By using this area, we can map up to MAX_IO_APICS pages temporarily,
+ * i.e. until the next __va_range() call.
+ *
+ * Important Safety Note:  The fixed I/O APIC page numbers are *subtracted*
+ * from the fixed base.  That's why we start at FIX_IO_APIC_BASE_END and
+ * count idx down while incrementing the phys address.
+ */
+char *__acpi_map_table(unsigned long phys, unsigned long size)
+{
+       unsigned long base, offset, mapped_size;
+       int idx;
+
+#ifndef CONFIG_XEN
+       if (phys + size < 8 * 1024 * 1024)
+               return __va(phys);
+#endif
+
+       offset = phys & (PAGE_SIZE - 1);
+       mapped_size = PAGE_SIZE - offset;
+       set_fixmap(FIX_ACPI_END, phys);
+       base = fix_to_virt(FIX_ACPI_END);
+
+       /*
+        * Most cases can be covered by the below.
+        */
+       idx = FIX_ACPI_END;
+       while (mapped_size < size) {
+               if (--idx < FIX_ACPI_BEGIN)
+                       return NULL;    /* cannot handle this */
+               phys += PAGE_SIZE;
+               set_fixmap(idx, phys);
+               mapped_size += PAGE_SIZE;
+       }
+
+       return ((unsigned char *)base + offset);
+}
+#endif
+
+#ifdef CONFIG_PCI_MMCONFIG
+/* The physical address of the MMCONFIG aperture.  Set from ACPI tables. */
+struct acpi_table_mcfg_config *pci_mmcfg_config;
+int pci_mmcfg_config_num;
+
+int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
+{
+       struct acpi_table_mcfg *mcfg;
+       unsigned long i;
+       int config_size;
+
+       if (!phys_addr || !size)
+               return -EINVAL;
+
+       mcfg = (struct acpi_table_mcfg *)__acpi_map_table(phys_addr, size);
+       if (!mcfg) {
+               printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
+               return -ENODEV;
+       }
+
+       /* how many config structures do we have */
+       pci_mmcfg_config_num = 0;
+       i = size - sizeof(struct acpi_table_mcfg);
+       while (i >= sizeof(struct acpi_table_mcfg_config)) {
+               ++pci_mmcfg_config_num;
+               i -= sizeof(struct acpi_table_mcfg_config);
+       };
+       if (pci_mmcfg_config_num == 0) {
+               printk(KERN_ERR PREFIX "MMCONFIG has no entries\n");
+               return -ENODEV;
+       }
+
+       config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config);
+       pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL);
+       if (!pci_mmcfg_config) {
+               printk(KERN_WARNING PREFIX
+                      "No memory for MCFG config tables\n");
+               return -ENOMEM;
+       }
+
+       memcpy(pci_mmcfg_config, &mcfg->config, config_size);
+       for (i = 0; i < pci_mmcfg_config_num; ++i) {
+               if (mcfg->config[i].base_reserved) {
+                       printk(KERN_ERR PREFIX
+                              "MMCONFIG not in low 4GB of memory\n");
+                       kfree(pci_mmcfg_config);
+                       pci_mmcfg_config_num = 0;
+                       return -ENODEV;
+               }
+       }
+
+       return 0;
+}
+#endif                         /* CONFIG_PCI_MMCONFIG */
+
+#ifdef CONFIG_X86_LOCAL_APIC
+static int __init acpi_parse_madt(unsigned long phys_addr, unsigned long size)
+{
+       struct acpi_table_madt *madt = NULL;
+
+       if (!phys_addr || !size || !cpu_has_apic)
+               return -EINVAL;
+
+       madt = (struct acpi_table_madt *)__acpi_map_table(phys_addr, size);
+       if (!madt) {
+               printk(KERN_WARNING PREFIX "Unable to map MADT\n");
+               return -ENODEV;
+       }
+
+       if (madt->lapic_address) {
+               acpi_lapic_addr = (u64) madt->lapic_address;
+
+               printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n",
+                      madt->lapic_address);
+       }
+
+       acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id);
+
+       return 0;
+}
+
+static int __init
+acpi_parse_lapic(acpi_table_entry_header * header, const unsigned long end)
+{
+       struct acpi_table_lapic *processor = NULL;
+
+       processor = (struct acpi_table_lapic *)header;
+
+       if (BAD_MADT_ENTRY(processor, end))
+               return -EINVAL;
+
+       acpi_table_print_madt_entry(header);
+
+       /* Record local apic id only when enabled */
+       if (processor->flags.enabled)
+               x86_acpiid_to_apicid[processor->acpi_id] = processor->id;
+
+       /*
+        * We need to register disabled CPU as well to permit
+        * counting disabled CPUs. This allows us to size
+        * cpus_possible_map more accurately, to permit
+        * to not preallocating memory for all NR_CPUS
+        * when we use CPU hotplug.
+        */
+       mp_register_lapic(processor->id,        /* APIC ID */
+                         processor->flags.enabled);    /* Enabled? */
+
+       return 0;
+}
+
+static int __init
+acpi_parse_lapic_addr_ovr(acpi_table_entry_header * header,
+                         const unsigned long end)
+{
+       struct acpi_table_lapic_addr_ovr *lapic_addr_ovr = NULL;
+
+       lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr *)header;
+
+       if (BAD_MADT_ENTRY(lapic_addr_ovr, end))
+               return -EINVAL;
+
+       acpi_lapic_addr = lapic_addr_ovr->address;
+
+       return 0;
+}
+
+static int __init
+acpi_parse_lapic_nmi(acpi_table_entry_header * header, const unsigned long end)
+{
+       struct acpi_table_lapic_nmi *lapic_nmi = NULL;
+
+       lapic_nmi = (struct acpi_table_lapic_nmi *)header;
+
+       if (BAD_MADT_ENTRY(lapic_nmi, end))
+               return -EINVAL;
+
+       acpi_table_print_madt_entry(header);
+
+       if (lapic_nmi->lint != 1)
+               printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n");
+
+       return 0;
+}
+
+#endif                         /*CONFIG_X86_LOCAL_APIC */
+
+#ifdef CONFIG_X86_IO_APIC
+
+static int __init
+acpi_parse_ioapic(acpi_table_entry_header * header, const unsigned long end)
+{
+       struct acpi_table_ioapic *ioapic = NULL;
+
+       ioapic = (struct acpi_table_ioapic *)header;
+
+       if (BAD_MADT_ENTRY(ioapic, end))
+               return -EINVAL;
+
+       acpi_table_print_madt_entry(header);
+
+       mp_register_ioapic(ioapic->id,
+                          ioapic->address, ioapic->global_irq_base);
+
+       return 0;
+}
+
+/*
+ * Parse Interrupt Source Override for the ACPI SCI
+ */
+static void acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger)
+{
+       if (trigger == 0)       /* compatible SCI trigger is level */
+               trigger = 3;
+
+       if (polarity == 0)      /* compatible SCI polarity is low */
+               polarity = 3;
+
+       /* Command-line over-ride via acpi_sci= */
+       if (acpi_sci_flags.trigger)
+               trigger = acpi_sci_flags.trigger;
+
+       if (acpi_sci_flags.polarity)
+               polarity = acpi_sci_flags.polarity;
+
+       /*
+        * mp_config_acpi_legacy_irqs() already setup IRQs < 16
+        * If GSI is < 16, this will update its flags,
+        * else it will create a new mp_irqs[] entry.
+        */
+       mp_override_legacy_irq(gsi, polarity, trigger, gsi);
+
+       /*
+        * stash over-ride to indicate we've been here
+        * and for later update of acpi_fadt
+        */
+       acpi_sci_override_gsi = gsi;
+       return;
+}
+
+static int __init
+acpi_parse_int_src_ovr(acpi_table_entry_header * header,
+                      const unsigned long end)
+{
+       struct acpi_table_int_src_ovr *intsrc = NULL;
+
+       intsrc = (struct acpi_table_int_src_ovr *)header;
+
+       if (BAD_MADT_ENTRY(intsrc, end))
+               return -EINVAL;
+
+       acpi_table_print_madt_entry(header);
+
+       if (intsrc->bus_irq == acpi_fadt.sci_int) {
+               acpi_sci_ioapic_setup(intsrc->global_irq,
+                                     intsrc->flags.polarity,
+                                     intsrc->flags.trigger);
+               return 0;
+       }
+
+       if (acpi_skip_timer_override &&
+           intsrc->bus_irq == 0 && intsrc->global_irq == 2) {
+               printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n");
+               return 0;
+       }
+
+       mp_override_legacy_irq(intsrc->bus_irq,
+                              intsrc->flags.polarity,
+                              intsrc->flags.trigger, intsrc->global_irq);
+
+       return 0;
+}
+
+static int __init
+acpi_parse_nmi_src(acpi_table_entry_header * header, const unsigned long end)
+{
+       struct acpi_table_nmi_src *nmi_src = NULL;
+
+       nmi_src = (struct acpi_table_nmi_src *)header;
+
+       if (BAD_MADT_ENTRY(nmi_src, end))
+               return -EINVAL;
+
+       acpi_table_print_madt_entry(header);
+
+       /* TBD: Support nimsrc entries? */
+
+       return 0;
+}
+
+#endif                         /* CONFIG_X86_IO_APIC */
+
+/*
+ * acpi_pic_sci_set_trigger()
+ * 
+ * use ELCR to set PIC-mode trigger type for SCI
+ *
+ * If a PIC-mode SCI is not recognized or gives spurious IRQ7's
+ * it may require Edge Trigger -- use "acpi_sci=edge"
+ *
+ * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers
+ * for the 8259 PIC.  bit[n] = 1 means irq[n] is Level, otherwise Edge.
+ * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0)
+ * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0)
+ */
+
+void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
+{
+       unsigned int mask = 1 << irq;
+       unsigned int old, new;
+
+       /* Real old ELCR mask */
+       old = inb(0x4d0) | (inb(0x4d1) << 8);
+
+       /*
+        * If we use ACPI to set PCI irq's, then we should clear ELCR
+        * since we will set it correctly as we enable the PCI irq
+        * routing.
+        */
+       new = acpi_noirq ? old : 0;
+
+       /*
+        * Update SCI information in the ELCR, it isn't in the PCI
+        * routing tables..
+        */
+       switch (trigger) {
+       case 1:         /* Edge - clear */
+               new &= ~mask;
+               break;
+       case 3:         /* Level - set */
+               new |= mask;
+               break;
+       }
+
+       if (old == new)
+               return;
+
+       printk(PREFIX "setting ELCR to %04x (from %04x)\n", new, old);
+       outb(new, 0x4d0);
+       outb(new >> 8, 0x4d1);
+}
+
+int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
+{
+#ifdef CONFIG_X86_IO_APIC
+       if (use_pci_vector() && !platform_legacy_irq(gsi))
+               *irq = IO_APIC_VECTOR(gsi);
+       else
+#endif
+               *irq = gsi_irq_sharing(gsi);
+       return 0;
+}
+
+/*
+ * success: return IRQ number (>=0)
+ * failure: return < 0
+ */
+int acpi_register_gsi(u32 gsi, int triggering, int polarity)
+{
+       unsigned int irq;
+       unsigned int plat_gsi = gsi;
+
+#ifdef CONFIG_PCI
+       /*
+        * Make sure all (legacy) PCI IRQs are set as level-triggered.
+        */
+       if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
+               extern void eisa_set_level_irq(unsigned int irq);
+
+               if (triggering == ACPI_LEVEL_SENSITIVE)
+                       eisa_set_level_irq(gsi);
+       }
+#endif
+
+#ifdef CONFIG_X86_IO_APIC
+       if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) {
+               plat_gsi = mp_register_gsi(gsi, triggering, polarity);
+       }
+#endif
+       acpi_gsi_to_irq(plat_gsi, &irq);
+       return irq;
+}
+
+EXPORT_SYMBOL(acpi_register_gsi);
+
+/*
+ *  ACPI based hotplug support for CPU
+ */
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+int acpi_map_lsapic(acpi_handle handle, int *pcpu)
+{
+       /* TBD */
+       return -EINVAL;
+}
+
+EXPORT_SYMBOL(acpi_map_lsapic);
+
+int acpi_unmap_lsapic(int cpu)
+{
+       /* TBD */
+       return -EINVAL;
+}
+
+EXPORT_SYMBOL(acpi_unmap_lsapic);
+#endif                         /* CONFIG_ACPI_HOTPLUG_CPU */
+
+int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
+{
+       /* TBD */
+       return -EINVAL;
+}
+
+EXPORT_SYMBOL(acpi_register_ioapic);
+
+int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base)
+{
+       /* TBD */
+       return -EINVAL;
+}
+
+EXPORT_SYMBOL(acpi_unregister_ioapic);
+
+static unsigned long __init
+acpi_scan_rsdp(unsigned long start, unsigned long length)
+{
+       unsigned long offset = 0;
+       unsigned long sig_len = sizeof("RSD PTR ") - 1;
+       unsigned long vstart = (unsigned long)isa_bus_to_virt(start);
+
+       /*
+        * Scan all 16-byte boundaries of the physical memory region for the
+        * RSDP signature.
+        */
+       for (offset = 0; offset < length; offset += 16) {
+               if (strncmp((char *)(vstart + offset), "RSD PTR ", sig_len))
+                       continue;
+               return (start + offset);
+       }
+
+       return 0;
+}
+
+static int __init acpi_parse_sbf(unsigned long phys_addr, unsigned long size)
+{
+       struct acpi_table_sbf *sb;
+
+       if (!phys_addr || !size)
+               return -EINVAL;
+
+       sb = (struct acpi_table_sbf *)__acpi_map_table(phys_addr, size);
+       if (!sb) {
+               printk(KERN_WARNING PREFIX "Unable to map SBF\n");
+               return -ENODEV;
+       }
+
+       sbf_port = sb->sbf_cmos;        /* Save CMOS port */
+
+       return 0;
+}
+
+#ifdef CONFIG_HPET_TIMER
+
+static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
+{
+       struct acpi_table_hpet *hpet_tbl;
+
+       if (!phys || !size)
+               return -EINVAL;
+
+       hpet_tbl = (struct acpi_table_hpet *)__acpi_map_table(phys, size);
+       if (!hpet_tbl) {
+               printk(KERN_WARNING PREFIX "Unable to map HPET\n");
+               return -ENODEV;
+       }
+
+       if (hpet_tbl->addr.space_id != ACPI_SPACE_MEM) {
+               printk(KERN_WARNING PREFIX "HPET timers must be located in "
+                      "memory.\n");
+               return -1;
+       }
+#ifdef CONFIG_X86_64
+       vxtime.hpet_address = hpet_tbl->addr.addrl |
+           ((long)hpet_tbl->addr.addrh << 32);
+
+       printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
+              hpet_tbl->id, vxtime.hpet_address);
+#else                          /* X86 */
+       {
+               extern unsigned long hpet_address;
+
+               hpet_address = hpet_tbl->addr.addrl;
+               printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
+                      hpet_tbl->id, hpet_address);
+       }
+#endif                         /* X86 */
+
+       return 0;
+}
+#else
+#define        acpi_parse_hpet NULL
+#endif
+
+#ifdef CONFIG_X86_PM_TIMER
+extern u32 pmtmr_ioport;
+#endif
+
+static int __init acpi_parse_fadt(unsigned long phys, unsigned long size)
+{
+       struct fadt_descriptor *fadt = NULL;
+
+       fadt = (struct fadt_descriptor *)__acpi_map_table(phys, size);
+       if (!fadt) {
+               printk(KERN_WARNING PREFIX "Unable to map FADT\n");
+               return 0;
+       }
+       /* initialize sci_int early for INT_SRC_OVR MADT parsing */
+       acpi_fadt.sci_int = fadt->sci_int;
+
+       /* initialize rev and apic_phys_dest_mode for x86_64 genapic */
+       acpi_fadt.revision = fadt->revision;
+       acpi_fadt.force_apic_physical_destination_mode =
+           fadt->force_apic_physical_destination_mode;
+
+#if defined(CONFIG_X86_PM_TIMER) && !defined(CONFIG_XEN)
+       /* detect the location of the ACPI PM Timer */
+       if (fadt->revision >= FADT2_REVISION_ID) {
+               /* FADT rev. 2 */
+               if (fadt->xpm_tmr_blk.address_space_id !=
+                   ACPI_ADR_SPACE_SYSTEM_IO)
+                       return 0;
+
+               pmtmr_ioport = fadt->xpm_tmr_blk.address;
+               /*
+                * "X" fields are optional extensions to the original V1.0
+                * fields, so we must selectively expand V1.0 fields if the
+                * corresponding X field is zero.
+                */
+               if (!pmtmr_ioport)
+                       pmtmr_ioport = fadt->V1_pm_tmr_blk;
+       } else {
+               /* FADT rev. 1 */
+               pmtmr_ioport = fadt->V1_pm_tmr_blk;
+       }
+       if (pmtmr_ioport)
+               printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n",
+                      pmtmr_ioport);
+#endif
+       return 0;
+}
+
+unsigned long __init acpi_find_rsdp(void)
+{
+       unsigned long rsdp_phys = 0;
+
+       if (efi_enabled) {
+               if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
+                       return efi.acpi20;
+               else if (efi.acpi != EFI_INVALID_TABLE_ADDR)
+                       return efi.acpi;
+       }
+       /*
+        * Scan memory looking for the RSDP signature. First search EBDA (low
+        * memory) paragraphs and then search upper memory (E0000-FFFFF).
+        */
+       rsdp_phys = acpi_scan_rsdp(0, 0x400);
+       if (!rsdp_phys)
+               rsdp_phys = acpi_scan_rsdp(0xE0000, 0x20000);
+
+       return rsdp_phys;
+}
+
+#ifdef CONFIG_X86_LOCAL_APIC
+/*
+ * Parse LAPIC entries in MADT
+ * returns 0 on success, < 0 on error
+ */
+static int __init acpi_parse_madt_lapic_entries(void)
+{
+       int count;
+
+       if (!cpu_has_apic)
+               return -ENODEV;
+
+       /* 
+        * Note that the LAPIC address is obtained from the MADT (32-bit value)
+        * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
+        */
+
+       count =
+           acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR,
+                                 acpi_parse_lapic_addr_ovr, 0);
+       if (count < 0) {
+               printk(KERN_ERR PREFIX
+                      "Error parsing LAPIC address override entry\n");
+               return count;
+       }
+
+       mp_register_lapic_address(acpi_lapic_addr);
+
+       count = acpi_table_parse_madt(ACPI_MADT_LAPIC, acpi_parse_lapic,
+                                     MAX_APICS);
+       if (!count) {
+               printk(KERN_ERR PREFIX "No LAPIC entries present\n");
+               /* TBD: Cleanup to allow fallback to MPS */
+               return -ENODEV;
+       } else if (count < 0) {
+               printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n");
+               /* TBD: Cleanup to allow fallback to MPS */
+               return count;
+       }
+
+       count =
+           acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0);
+       if (count < 0) {
+               printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
+               /* TBD: Cleanup to allow fallback to MPS */
+               return count;
+       }
+       return 0;
+}
+#endif                         /* CONFIG_X86_LOCAL_APIC */
+
+#ifdef CONFIG_X86_IO_APIC
+/*
+ * Parse IOAPIC related entries in MADT
+ * returns 0 on success, < 0 on error
+ */
+static int __init acpi_parse_madt_ioapic_entries(void)
+{
+       int count;
+
+       /*
+        * ACPI interpreter is required to complete interrupt setup,
+        * so if it is off, don't enumerate the io-apics with ACPI.
+        * If MPS is present, it will handle them,
+        * otherwise the system will stay in PIC mode
+        */
+       if (acpi_disabled || acpi_noirq) {
+               return -ENODEV;
+       }
+
+       if (!cpu_has_apic) 
+               return -ENODEV;
+
+       /*
+        * if "noapic" boot option, don't look for IO-APICs
+        */
+       if (skip_ioapic_setup) {
+               printk(KERN_INFO PREFIX "Skipping IOAPIC probe "
+                      "due to 'noapic' option.\n");
+               return -ENODEV;
+       }
+
+       count =
+           acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic,
+                                 MAX_IO_APICS);
+       if (!count) {
+               printk(KERN_ERR PREFIX "No IOAPIC entries present\n");
+               return -ENODEV;
+       } else if (count < 0) {
+               printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n");
+               return count;
+       }
+
+       count =
+           acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr,
+                                 NR_IRQ_VECTORS);
+       if (count < 0) {
+               printk(KERN_ERR PREFIX
+                      "Error parsing interrupt source overrides entry\n");
+               /* TBD: Cleanup to allow fallback to MPS */
+               return count;
+       }
+
+       /*
+        * If BIOS did not supply an INT_SRC_OVR for the SCI
+        * pretend we got one so we can set the SCI flags.
+        */
+       if (!acpi_sci_override_gsi)
+               acpi_sci_ioapic_setup(acpi_fadt.sci_int, 0, 0);
+
+       /* Fill in identity legacy mapings where no override */
+       mp_config_acpi_legacy_irqs();
+
+       count =
+           acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src,
+                                 NR_IRQ_VECTORS);
+       if (count < 0) {
+               printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
+               /* TBD: Cleanup to allow fallback to MPS */
+               return count;
+       }
+
+       return 0;
+}
+#else
+static inline int acpi_parse_madt_ioapic_entries(void)
+{
+       return -1;
+}
+#endif /* !CONFIG_X86_IO_APIC */
+
+static void __init acpi_process_madt(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+       int count, error;
+
+       count = acpi_table_parse(ACPI_APIC, acpi_parse_madt);
+       if (count >= 1) {
+
+               /*
+                * Parse MADT LAPIC entries
+                */
+               error = acpi_parse_madt_lapic_entries();
+               if (!error) {
+                       acpi_lapic = 1;
+
+#ifdef CONFIG_X86_GENERICARCH
+                       generic_bigsmp_probe();
+#endif
+                       /*
+                        * Parse MADT IO-APIC entries
+                        */
+                       error = acpi_parse_madt_ioapic_entries();
+                       if (!error) {
+                               acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
+                               acpi_irq_balance_set(NULL);
+                               acpi_ioapic = 1;
+
+                               smp_found_config = 1;
+                               clustered_apic_check();
+                       }
+               }
+               if (error == -EINVAL) {
+                       /*
+                        * Dell Precision Workstation 410, 610 come here.
+                        */
+                       printk(KERN_ERR PREFIX
+                              "Invalid BIOS MADT, disabling ACPI\n");
+                       disable_acpi();
+               }
+       }
+#endif
+       return;
+}
+
+extern int acpi_force;
+
+#ifdef __i386__
+
+static int __init disable_acpi_irq(struct dmi_system_id *d)
+{
+       if (!acpi_force) {
+               printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n",
+                      d->ident);
+               acpi_noirq_set();
+       }
+       return 0;
+}
+
+static int __init disable_acpi_pci(struct dmi_system_id *d)
+{
+       if (!acpi_force) {
+               printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n",
+                      d->ident);
+               acpi_disable_pci();
+       }
+       return 0;
+}
+
+static int __init dmi_disable_acpi(struct dmi_system_id *d)
+{
+       if (!acpi_force) {
+               printk(KERN_NOTICE "%s detected: acpi off\n", d->ident);
+               disable_acpi();
+       } else {
+               printk(KERN_NOTICE
+                      "Warning: DMI blacklist says broken, but acpi forced\n");
+       }
+       return 0;
+}
+
+/*
+ * Limit ACPI to CPU enumeration for HT
+ */
+static int __init force_acpi_ht(struct dmi_system_id *d)
+{
+       if (!acpi_force) {
+               printk(KERN_NOTICE "%s detected: force use of acpi=ht\n",
+                      d->ident);
+               disable_acpi();
+               acpi_ht = 1;
+       } else {
+               printk(KERN_NOTICE
+                      "Warning: acpi=force overrules DMI blacklist: 
acpi=ht\n");
+       }
+       return 0;
+}
+
+/*
+ * If your system is blacklisted here, but you find that acpi=force
+ * works for you, please contact acpi-devel@xxxxxxxxxxxxxxx
+ */
+static struct dmi_system_id __initdata acpi_dmi_table[] = {
+       /*
+        * Boxes that need ACPI disabled
+        */
+       {
+        .callback = dmi_disable_acpi,
+        .ident = "IBM Thinkpad",
+        .matches = {
+                    DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+                    DMI_MATCH(DMI_BOARD_NAME, "2629H1G"),
+                    },
+        },
+
+       /*
+        * Boxes that need acpi=ht
+        */
+       {
+        .callback = force_acpi_ht,
+        .ident = "FSC Primergy T850",
+        .matches = {
+                    DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
+                    DMI_MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"),
+                    },
+        },
+       {
+        .callback = force_acpi_ht,
+        .ident = "DELL GX240",
+        .matches = {
+                    DMI_MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"),
+                    DMI_MATCH(DMI_BOARD_NAME, "OptiPlex GX240"),
+                    },
+        },
+       {
+        .callback = force_acpi_ht,
+        .ident = "HP VISUALIZE NT Workstation",
+        .matches = {
+                    DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
+                    DMI_MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"),
+                    },
+        },
+       {
+        .callback = force_acpi_ht,
+        .ident = "Compaq Workstation W8000",
+        .matches = {
+                    DMI_MATCH(DMI_SYS_VENDOR, "Compaq"),
+                    DMI_MATCH(DMI_PRODUCT_NAME, "Workstation W8000"),
+                    },
+        },
+       {
+        .callback = force_acpi_ht,
+        .ident = "ASUS P4B266",
+        .matches = {
+                    DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+                    DMI_MATCH(DMI_BOARD_NAME, "P4B266"),
+                    },
+        },
+       {
+        .callback = force_acpi_ht,
+        .ident = "ASUS P2B-DS",
+        .matches = {
+                    DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+                    DMI_MATCH(DMI_BOARD_NAME, "P2B-DS"),
+                    },
+        },
+       {
+        .callback = force_acpi_ht,
+        .ident = "ASUS CUR-DLS",
+        .matches = {
+                    DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+                    DMI_MATCH(DMI_BOARD_NAME, "CUR-DLS"),
+                    },
+        },
+       {
+        .callback = force_acpi_ht,
+        .ident = "ABIT i440BX-W83977",
+        .matches = {
+                    DMI_MATCH(DMI_BOARD_VENDOR, "ABIT <http://www.abit.com>"),
+                    DMI_MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"),
+                    },
+        },
+       {
+        .callback = force_acpi_ht,
+        .ident = "IBM Bladecenter",
+        .matches = {
+                    DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+                    DMI_MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"),
+                    },
+        },
+       {
+        .callback = force_acpi_ht,
+        .ident = "IBM eServer xSeries 360",
+        .matches = {
+                    DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+                    DMI_MATCH(DMI_BOARD_NAME, "eServer xSeries 360"),
+                    },
+        },
+       {
+        .callback = force_acpi_ht,
+        .ident = "IBM eserver xSeries 330",
+        .matches = {
+                    DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+                    DMI_MATCH(DMI_BOARD_NAME, "eserver xSeries 330"),
+                    },
+        },
+       {
+        .callback = force_acpi_ht,
+        .ident = "IBM eserver xSeries 440",
+        .matches = {
+                    DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+                    DMI_MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"),
+                    },
+        },
+
+       /*
+        * Boxes that need ACPI PCI IRQ routing disabled
+        */
+       {
+        .callback = disable_acpi_irq,
+        .ident = "ASUS A7V",
+        .matches = {
+                    DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"),
+                    DMI_MATCH(DMI_BOARD_NAME, "<A7V>"),
+                    /* newer BIOS, Revision 1011, does work */
+                    DMI_MATCH(DMI_BIOS_VERSION,
+                              "ASUS A7V ACPI BIOS Revision 1007"),
+                    },
+        },
+
+       /*
+        * Boxes that need ACPI PCI IRQ routing and PCI scan disabled
+        */
+       {                       /* _BBN 0 bug */
+        .callback = disable_acpi_pci,
+        .ident = "ASUS PR-DLS",
+        .matches = {
+                    DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+                    DMI_MATCH(DMI_BOARD_NAME, "PR-DLS"),
+                    DMI_MATCH(DMI_BIOS_VERSION,
+                              "ASUS PR-DLS ACPI BIOS Revision 1010"),
+                    DMI_MATCH(DMI_BIOS_DATE, "03/21/2003")
+                    },
+        },
+       {
+        .callback = disable_acpi_pci,
+        .ident = "Acer TravelMate 36x Laptop",
+        .matches = {
+                    DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+                    DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
+                    },
+        },
+       {}
+};
+
+#endif                         /* __i386__ */
+
+/*
+ * acpi_boot_table_init() and acpi_boot_init()
+ *  called from setup_arch(), always.
+ *     1. checksums all tables
+ *     2. enumerates lapics
+ *     3. enumerates io-apics
+ *
+ * acpi_table_init() is separate to allow reading SRAT without
+ * other side effects.
+ *
+ * side effects of acpi_boot_init:
+ *     acpi_lapic = 1 if LAPIC found
+ *     acpi_ioapic = 1 if IOAPIC found
+ *     if (acpi_lapic && acpi_ioapic) smp_found_config = 1;
+ *     if acpi_blacklisted() acpi_disabled = 1;
+ *     acpi_irq_model=...
+ *     ...
+ *
+ * return value: (currently ignored)
+ *     0: success
+ *     !0: failure
+ */
+
+int __init acpi_boot_table_init(void)
+{
+       int error;
+
+#ifdef __i386__
+       dmi_check_system(acpi_dmi_table);
+#endif
+
+       /*
+        * If acpi_disabled, bail out
+        * One exception: acpi=ht continues far enough to enumerate LAPICs
+        */
+       if (acpi_disabled && !acpi_ht)
+               return 1;
+
+       /* 
+        * Initialize the ACPI boot-time table parser.
+        */
+       error = acpi_table_init();
+       if (error) {
+               disable_acpi();
+               return error;
+       }
+
+       acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
+
+       /*
+        * blacklist may disable ACPI entirely
+        */
+       error = acpi_blacklisted();
+       if (error) {
+               if (acpi_force) {
+                       printk(KERN_WARNING PREFIX "acpi=force override\n");
+               } else {
+                       printk(KERN_WARNING PREFIX "Disabling ACPI support\n");
+                       disable_acpi();
+                       return error;
+               }
+       }
+
+       return 0;
+}
+
+int __init acpi_boot_init(void)
+{
+       /*
+        * If acpi_disabled, bail out
+        * One exception: acpi=ht continues far enough to enumerate LAPICs
+        */
+       if (acpi_disabled && !acpi_ht)
+               return 1;
+
+       acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
+
+       /*
+        * set sci_int and PM timer address
+        */
+       acpi_table_parse(ACPI_FADT, acpi_parse_fadt);
+
+       /*
+        * Process the Multiple APIC Description Table (MADT), if present
+        */
+       acpi_process_madt();
+
+       acpi_table_parse(ACPI_HPET, acpi_parse_hpet);
+
+       return 0;
+}
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/apic-xen.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/apic-xen.c       Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,155 @@
+/*
+ *     Local APIC handling, local APIC timers
+ *
+ *     (c) 1999, 2000 Ingo Molnar <mingo@xxxxxxxxxx>
+ *
+ *     Fixes
+ *     Maciej W. Rozycki       :       Bits for genuine 82489DX APICs;
+ *                                     thanks to Eric Gilmore
+ *                                     and Rolf G. Tews
+ *                                     for testing these extensively.
+ *     Maciej W. Rozycki       :       Various updates and fixes.
+ *     Mikael Pettersson       :       Power Management for UP-APIC.
+ *     Pavel Machek and
+ *     Mikael Pettersson       :       PM converted to driver model.
+ */
+
+#include <linux/init.h>
+
+#include <linux/mm.h>
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/mc146818rtc.h>
+#include <linux/kernel_stat.h>
+#include <linux/sysdev.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+
+#include <asm/atomic.h>
+#include <asm/smp.h>
+#include <asm/mtrr.h>
+#include <asm/mpspec.h>
+#include <asm/desc.h>
+#include <asm/arch_hooks.h>
+#include <asm/hpet.h>
+#include <asm/i8253.h>
+#include <asm/nmi.h>
+
+#include <mach_apic.h>
+#include <mach_apicdef.h>
+#include <mach_ipi.h>
+
+#include "io_ports.h"
+
+#ifndef CONFIG_XEN
+/*
+ * cpu_mask that denotes the CPUs that needs timer interrupt coming in as
+ * IPIs in place of local APIC timers
+ */
+static cpumask_t timer_bcast_ipi;
+#endif
+
+/*
+ * Knob to control our willingness to enable the local APIC.
+ */
+int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
+
+/*
+ * Debug level
+ */
+int apic_verbosity;
+
+#ifndef CONFIG_XEN
+static int modern_apic(void)
+{
+       unsigned int lvr, version;
+       /* AMD systems use old APIC versions, so check the CPU */
+       if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+               boot_cpu_data.x86 >= 0xf)
+               return 1;
+       lvr = apic_read(APIC_LVR);
+       version = GET_APIC_VERSION(lvr);
+       return version >= 0x14;
+}
+#endif /* !CONFIG_XEN */
+
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
+ */
+void ack_bad_irq(unsigned int irq)
+{
+       printk("unexpected IRQ trap at vector %02x\n", irq);
+       /*
+        * Currently unexpected vectors happen only on SMP and APIC.
+        * We _must_ ack these because every local APIC has only N
+        * irq slots per priority level, and a 'hanging, unacked' IRQ
+        * holds up an irq slot - in excessive cases (when multiple
+        * unexpected vectors occur) that might lock up the APIC
+        * completely.
+        * But only ack when the APIC is enabled -AK
+        */
+       if (cpu_has_apic)
+               ack_APIC_irq();
+}
+
+int get_physical_broadcast(void)
+{
+        return 0xff;
+}
+
+#ifndef CONFIG_XEN
+#ifndef CONFIG_SMP
+static void up_apic_timer_interrupt_call(struct pt_regs *regs)
+{
+       int cpu = smp_processor_id();
+
+       /*
+        * the NMI deadlock-detector uses this.
+        */
+       per_cpu(irq_stat, cpu).apic_timer_irqs++;
+
+       smp_local_timer_interrupt(regs);
+}
+#endif
+
+void smp_send_timer_broadcast_ipi(struct pt_regs *regs)
+{
+       cpumask_t mask;
+
+       cpus_and(mask, cpu_online_map, timer_bcast_ipi);
+       if (!cpus_empty(mask)) {
+#ifdef CONFIG_SMP
+               send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
+#else
+               /*
+                * We can directly call the apic timer interrupt handler
+                * in UP case. Minus all irq related functions
+                */
+               up_apic_timer_interrupt_call(regs);
+#endif
+       }
+}
+#endif
+
+int setup_profiling_timer(unsigned int multiplier)
+{
+       return -EINVAL;
+}
+
+/*
+ * This initializes the IO-APIC and APIC hardware if this is
+ * a UP kernel.
+ */
+int __init APIC_init_uniprocessor (void)
+{
+#ifdef CONFIG_X86_IO_APIC
+       if (smp_found_config)
+               if (!skip_ioapic_setup && nr_ioapics)
+                       setup_IO_APIC();
+#endif
+
+       return 0;
+}
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/asm-offsets.c
--- a/arch/i386/kernel/asm-offsets.c    Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/kernel/asm-offsets.c    Mon Jun 04 10:05:28 2007 +0100
@@ -66,9 +66,14 @@ void foo(void)
        OFFSET(pbe_orig_address, pbe, orig_address);
        OFFSET(pbe_next, pbe, next);
 
+#ifndef CONFIG_X86_NO_TSS
        /* Offset from the sysenter stack to tss.esp0 */
-       DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, esp0) -
+       DEFINE(SYSENTER_stack_esp0, offsetof(struct tss_struct, esp0) -
                 sizeof(struct tss_struct));
+#else
+       /* sysenter stack points directly to esp0 */
+       DEFINE(SYSENTER_stack_esp0, 0);
+#endif
 
        DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
        DEFINE(VDSO_PRELINK, VDSO_PRELINK);
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/cpu/Makefile
--- a/arch/i386/kernel/cpu/Makefile     Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/kernel/cpu/Makefile     Mon Jun 04 10:05:28 2007 +0100
@@ -17,3 +17,8 @@ obj-$(CONFIG_X86_MCE) +=      mcheck/
 
 obj-$(CONFIG_MTRR)     +=      mtrr/
 obj-$(CONFIG_CPU_FREQ) +=      cpufreq/
+
+ifdef CONFIG_XEN
+include $(srctree)/scripts/Makefile.xen
+obj-y := $(call cherrypickxen, $(obj-y), $(src))
+endif
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/cpu/common-xen.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/cpu/common-xen.c Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,743 @@
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/smp.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/bootmem.h>
+#include <asm/semaphore.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/msr.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/mtrr.h>
+#include <asm/mce.h>
+#ifdef CONFIG_X86_LOCAL_APIC
+#include <asm/mpspec.h>
+#include <asm/apic.h>
+#include <mach_apic.h>
+#else
+#ifdef CONFIG_XEN
+#define phys_pkg_id(a,b) a
+#endif
+#endif
+#include <asm/hypervisor.h>
+
+#include "cpu.h"
+
+DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
+EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr);
+
+#ifndef CONFIG_XEN
+DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
+EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
+#endif
+
+static int cachesize_override __cpuinitdata = -1;
+static int disable_x86_fxsr __cpuinitdata;
+static int disable_x86_serial_nr __cpuinitdata = 1;
+static int disable_x86_sep __cpuinitdata;
+
+struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
+
+extern int disable_pse;
+
+static void default_init(struct cpuinfo_x86 * c)
+{
+       /* Not much we can do here... */
+       /* Check if at least it has cpuid */
+       if (c->cpuid_level == -1) {
+               /* No cpuid. It must be an ancient CPU */
+               if (c->x86 == 4)
+                       strcpy(c->x86_model_id, "486");
+               else if (c->x86 == 3)
+                       strcpy(c->x86_model_id, "386");
+       }
+}
+
+static struct cpu_dev default_cpu = {
+       .c_init = default_init,
+       .c_vendor = "Unknown",
+};
+static struct cpu_dev * this_cpu = &default_cpu;
+
+static int __init cachesize_setup(char *str)
+{
+       get_option (&str, &cachesize_override);
+       return 1;
+}
+__setup("cachesize=", cachesize_setup);
+
+int __cpuinit get_model_name(struct cpuinfo_x86 *c)
+{
+       unsigned int *v;
+       char *p, *q;
+
+       if (cpuid_eax(0x80000000) < 0x80000004)
+               return 0;
+
+       v = (unsigned int *) c->x86_model_id;
+       cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
+       cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
+       cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
+       c->x86_model_id[48] = 0;
+
+       /* Intel chips right-justify this string for some dumb reason;
+          undo that brain damage */
+       p = q = &c->x86_model_id[0];
+       while ( *p == ' ' )
+            p++;
+       if ( p != q ) {
+            while ( *p )
+                 *q++ = *p++;
+            while ( q <= &c->x86_model_id[48] )
+                 *q++ = '\0';  /* Zero-pad the rest */
+       }
+
+       return 1;
+}
+
+
+void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
+{
+       unsigned int n, dummy, ecx, edx, l2size;
+
+       n = cpuid_eax(0x80000000);
+
+       if (n >= 0x80000005) {
+               cpuid(0x80000005, &dummy, &dummy, &ecx, &edx);
+               printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache 
%dK (%d bytes/line)\n",
+                       edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
+               c->x86_cache_size=(ecx>>24)+(edx>>24);  
+       }
+
+       if (n < 0x80000006)     /* Some chips just has a large L1. */
+               return;
+
+       ecx = cpuid_ecx(0x80000006);
+       l2size = ecx >> 16;
+       
+       /* do processor-specific cache resizing */
+       if (this_cpu->c_size_cache)
+               l2size = this_cpu->c_size_cache(c,l2size);
+
+       /* Allow user to override all this if necessary. */
+       if (cachesize_override != -1)
+               l2size = cachesize_override;
+
+       if ( l2size == 0 )
+               return;         /* Again, no L2 cache is possible */
+
+       c->x86_cache_size = l2size;
+
+       printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
+              l2size, ecx & 0xFF);
+}
+
+/* Naming convention should be: <Name> [(<Codename>)] */
+/* This table only is used unless init_<vendor>() below doesn't set it; */
+/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used 
*/
+
+/* Look up CPU names by table lookup. */
+static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
+{
+       struct cpu_model_info *info;
+
+       if ( c->x86_model >= 16 )
+               return NULL;    /* Range check */
+
+       if (!this_cpu)
+               return NULL;
+
+       info = this_cpu->c_models;
+
+       while (info && info->family) {
+               if (info->family == c->x86)
+                       return info->model_names[c->x86_model];
+               info++;
+       }
+       return NULL;            /* Not found */
+}
+
+
+static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early)
+{
+       char *v = c->x86_vendor_id;
+       int i;
+       static int printed;
+
+       for (i = 0; i < X86_VENDOR_NUM; i++) {
+               if (cpu_devs[i]) {
+                       if (!strcmp(v,cpu_devs[i]->c_ident[0]) ||
+                           (cpu_devs[i]->c_ident[1] && 
+                            !strcmp(v,cpu_devs[i]->c_ident[1]))) {
+                               c->x86_vendor = i;
+                               if (!early)
+                                       this_cpu = cpu_devs[i];
+                               return;
+                       }
+               }
+       }
+       if (!printed) {
+               printed++;
+               printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n");
+               printk(KERN_ERR "CPU: Your system may be unstable.\n");
+       }
+       c->x86_vendor = X86_VENDOR_UNKNOWN;
+       this_cpu = &default_cpu;
+}
+
+
+static int __init x86_fxsr_setup(char * s)
+{
+       disable_x86_fxsr = 1;
+       return 1;
+}
+__setup("nofxsr", x86_fxsr_setup);
+
+
+static int __init x86_sep_setup(char * s)
+{
+       disable_x86_sep = 1;
+       return 1;
+}
+__setup("nosep", x86_sep_setup);
+
+
+/* Standard macro to see if a specific flag is changeable */
+static inline int flag_is_changeable_p(u32 flag)
+{
+       u32 f1, f2;
+
+       asm("pushfl\n\t"
+           "pushfl\n\t"
+           "popl %0\n\t"
+           "movl %0,%1\n\t"
+           "xorl %2,%0\n\t"
+           "pushl %0\n\t"
+           "popfl\n\t"
+           "pushfl\n\t"
+           "popl %0\n\t"
+           "popfl\n\t"
+           : "=&r" (f1), "=&r" (f2)
+           : "ir" (flag));
+
+       return ((f1^f2) & flag) != 0;
+}
+
+
+/* Probe for the CPUID instruction */
+static int __cpuinit have_cpuid_p(void)
+{
+       return flag_is_changeable_p(X86_EFLAGS_ID);
+}
+
+/* Do minimum CPU detection early.
+   Fields really needed: vendor, cpuid_level, family, model, mask, cache 
alignment.
+   The others are not touched to avoid unwanted side effects.
+
+   WARNING: this function is only called on the BP.  Don't add code here
+   that is supposed to run on all CPUs. */
+static void __init early_cpu_detect(void)
+{
+       struct cpuinfo_x86 *c = &boot_cpu_data;
+
+       c->x86_cache_alignment = 32;
+
+       if (!have_cpuid_p())
+               return;
+
+       /* Get vendor name */
+       cpuid(0x00000000, &c->cpuid_level,
+             (int *)&c->x86_vendor_id[0],
+             (int *)&c->x86_vendor_id[8],
+             (int *)&c->x86_vendor_id[4]);
+
+       get_cpu_vendor(c, 1);
+
+       c->x86 = 4;
+       if (c->cpuid_level >= 0x00000001) {
+               u32 junk, tfms, cap0, misc;
+               cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
+               c->x86 = (tfms >> 8) & 15;
+               c->x86_model = (tfms >> 4) & 15;
+               if (c->x86 == 0xf)
+                       c->x86 += (tfms >> 20) & 0xff;
+               if (c->x86 >= 0x6)
+                       c->x86_model += ((tfms >> 16) & 0xF) << 4;
+               c->x86_mask = tfms & 15;
+               if (cap0 & (1<<19))
+                       c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
+       }
+}
+
+void __cpuinit generic_identify(struct cpuinfo_x86 * c)
+{
+       u32 tfms, xlvl;
+       int ebx;
+
+       if (have_cpuid_p()) {
+               /* Get vendor name */
+               cpuid(0x00000000, &c->cpuid_level,
+                     (int *)&c->x86_vendor_id[0],
+                     (int *)&c->x86_vendor_id[8],
+                     (int *)&c->x86_vendor_id[4]);
+               
+               get_cpu_vendor(c, 0);
+               /* Initialize the standard set of capabilities */
+               /* Note that the vendor-specific code below might override */
+       
+               /* Intel-defined flags: level 0x00000001 */
+               if ( c->cpuid_level >= 0x00000001 ) {
+                       u32 capability, excap;
+                       cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
+                       c->x86_capability[0] = capability;
+                       c->x86_capability[4] = excap;
+                       c->x86 = (tfms >> 8) & 15;
+                       c->x86_model = (tfms >> 4) & 15;
+                       if (c->x86 == 0xf)
+                               c->x86 += (tfms >> 20) & 0xff;
+                       if (c->x86 >= 0x6)
+                               c->x86_model += ((tfms >> 16) & 0xF) << 4;
+                       c->x86_mask = tfms & 15;
+#ifdef CONFIG_X86_HT
+                       c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
+#else
+                       c->apicid = (ebx >> 24) & 0xFF;
+#endif
+               } else {
+                       /* Have CPUID level 0 only - unheard of */
+                       c->x86 = 4;
+               }
+
+               /* AMD-defined flags: level 0x80000001 */
+               xlvl = cpuid_eax(0x80000000);
+               if ( (xlvl & 0xffff0000) == 0x80000000 ) {
+                       if ( xlvl >= 0x80000001 ) {
+                               c->x86_capability[1] = cpuid_edx(0x80000001);
+                               c->x86_capability[6] = cpuid_ecx(0x80000001);
+                       }
+                       if ( xlvl >= 0x80000004 )
+                               get_model_name(c); /* Default name */
+               }
+       }
+
+       early_intel_workaround(c);
+
+#ifdef CONFIG_X86_HT
+       c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
+#endif
+}
+
+static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
+{
+       if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) {
+               /* Disable processor serial number */
+               unsigned long lo,hi;
+               rdmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
+               lo |= 0x200000;
+               wrmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
+               printk(KERN_NOTICE "CPU serial number disabled.\n");
+               clear_bit(X86_FEATURE_PN, c->x86_capability);
+
+               /* Disabling the serial number may affect the cpuid level */
+               c->cpuid_level = cpuid_eax(0);
+       }
+}
+
+static int __init x86_serial_nr_setup(char *s)
+{
+       disable_x86_serial_nr = 0;
+       return 1;
+}
+__setup("serialnumber", x86_serial_nr_setup);
+
+
+
+/*
+ * This does the hard work of actually picking apart the CPU stuff...
+ */
+void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
+{
+       int i;
+
+       c->loops_per_jiffy = loops_per_jiffy;
+       c->x86_cache_size = -1;
+       c->x86_vendor = X86_VENDOR_UNKNOWN;
+       c->cpuid_level = -1;    /* CPUID not detected */
+       c->x86_model = c->x86_mask = 0; /* So far unknown... */
+       c->x86_vendor_id[0] = '\0'; /* Unset */
+       c->x86_model_id[0] = '\0';  /* Unset */
+       c->x86_max_cores = 1;
+       memset(&c->x86_capability, 0, sizeof c->x86_capability);
+
+       if (!have_cpuid_p()) {
+               /* First of all, decide if this is a 486 or higher */
+               /* It's a 486 if we can modify the AC flag */
+               if ( flag_is_changeable_p(X86_EFLAGS_AC) )
+                       c->x86 = 4;
+               else
+                       c->x86 = 3;
+       }
+
+       generic_identify(c);
+
+       printk(KERN_DEBUG "CPU: After generic identify, caps:");
+       for (i = 0; i < NCAPINTS; i++)
+               printk(" %08lx", c->x86_capability[i]);
+       printk("\n");
+
+       if (this_cpu->c_identify) {
+               this_cpu->c_identify(c);
+
+               printk(KERN_DEBUG "CPU: After vendor identify, caps:");
+               for (i = 0; i < NCAPINTS; i++)
+                       printk(" %08lx", c->x86_capability[i]);
+               printk("\n");
+       }
+
+       /*
+        * Vendor-specific initialization.  In this section we
+        * canonicalize the feature flags, meaning if there are
+        * features a certain CPU supports which CPUID doesn't
+        * tell us, CPUID claiming incorrect flags, or other bugs,
+        * we handle them here.
+        *
+        * At the end of this section, c->x86_capability better
+        * indicate the features this CPU genuinely supports!
+        */
+       if (this_cpu->c_init)
+               this_cpu->c_init(c);
+
+       /* Disable the PN if appropriate */
+       squash_the_stupid_serial_number(c);
+
+       /*
+        * The vendor-specific functions might have changed features.  Now
+        * we do "generic changes."
+        */
+
+       /* TSC disabled? */
+       if ( tsc_disable )
+               clear_bit(X86_FEATURE_TSC, c->x86_capability);
+
+       /* FXSR disabled? */
+       if (disable_x86_fxsr) {
+               clear_bit(X86_FEATURE_FXSR, c->x86_capability);
+               clear_bit(X86_FEATURE_XMM, c->x86_capability);
+       }
+
+       /* SEP disabled? */
+       if (disable_x86_sep)
+               clear_bit(X86_FEATURE_SEP, c->x86_capability);
+
+       if (disable_pse)
+               clear_bit(X86_FEATURE_PSE, c->x86_capability);
+
+       /* If the model name is still unset, do table lookup. */
+       if ( !c->x86_model_id[0] ) {
+               char *p;
+               p = table_lookup_model(c);
+               if ( p )
+                       strcpy(c->x86_model_id, p);
+               else
+                       /* Last resort... */
+                       sprintf(c->x86_model_id, "%02x/%02x",
+                               c->x86, c->x86_model);
+       }
+
+       /* Now the feature flags better reflect actual CPU features! */
+
+       printk(KERN_DEBUG "CPU: After all inits, caps:");
+       for (i = 0; i < NCAPINTS; i++)
+               printk(" %08lx", c->x86_capability[i]);
+       printk("\n");
+
+       /*
+        * On SMP, boot_cpu_data holds the common feature set between
+        * all CPUs; so make sure that we indicate which features are
+        * common between the CPUs.  The first time this routine gets
+        * executed, c == &boot_cpu_data.
+        */
+       if ( c != &boot_cpu_data ) {
+               /* AND the already accumulated flags with these */
+               for ( i = 0 ; i < NCAPINTS ; i++ )
+                       boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
+       }
+
+       /* Init Machine Check Exception if available. */
+       mcheck_init(c);
+
+       if (c == &boot_cpu_data)
+               sysenter_setup();
+       enable_sep_cpu();
+
+       if (c == &boot_cpu_data)
+               mtrr_bp_init();
+       else
+               mtrr_ap_init();
+}
+
+#ifdef CONFIG_X86_HT
+void __cpuinit detect_ht(struct cpuinfo_x86 *c)
+{
+       u32     eax, ebx, ecx, edx;
+       int     index_msb, core_bits;
+
+       cpuid(1, &eax, &ebx, &ecx, &edx);
+
+       if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
+               return;
+
+       smp_num_siblings = (ebx & 0xff0000) >> 16;
+
+       if (smp_num_siblings == 1) {
+               printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
+       } else if (smp_num_siblings > 1 ) {
+
+               if (smp_num_siblings > NR_CPUS) {
+                       printk(KERN_WARNING "CPU: Unsupported number of the "
+                                       "siblings %d", smp_num_siblings);
+                       smp_num_siblings = 1;
+                       return;
+               }
+
+               index_msb = get_count_order(smp_num_siblings);
+               c->phys_proc_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
+
+               printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
+                      c->phys_proc_id);
+
+               smp_num_siblings = smp_num_siblings / c->x86_max_cores;
+
+               index_msb = get_count_order(smp_num_siblings) ;
+
+               core_bits = get_count_order(c->x86_max_cores);
+
+               c->cpu_core_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) &
+                                              ((1 << core_bits) - 1);
+
+               if (c->x86_max_cores > 1)
+                       printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
+                              c->cpu_core_id);
+       }
+}
+#endif
+
+void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
+{
+       char *vendor = NULL;
+
+       if (c->x86_vendor < X86_VENDOR_NUM)
+               vendor = this_cpu->c_vendor;
+       else if (c->cpuid_level >= 0)
+               vendor = c->x86_vendor_id;
+
+       if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor)))
+               printk("%s ", vendor);
+
+       if (!c->x86_model_id[0])
+               printk("%d86", c->x86);
+       else
+               printk("%s", c->x86_model_id);
+
+       if (c->x86_mask || c->cpuid_level >= 0) 
+               printk(" stepping %02x\n", c->x86_mask);
+       else
+               printk("\n");
+}
+
+cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
+
+/* This is hacky. :)
+ * We're emulating future behavior.
+ * In the future, the cpu-specific init functions will be called implicitly
+ * via the magic of initcalls.
+ * They will insert themselves into the cpu_devs structure.
+ * Then, when cpu_init() is called, we can just iterate over that array.
+ */
+
+extern int intel_cpu_init(void);
+extern int cyrix_init_cpu(void);
+extern int nsc_init_cpu(void);
+extern int amd_init_cpu(void);
+extern int centaur_init_cpu(void);
+extern int transmeta_init_cpu(void);
+extern int rise_init_cpu(void);
+extern int nexgen_init_cpu(void);
+extern int umc_init_cpu(void);
+
+void __init early_cpu_init(void)
+{
+       intel_cpu_init();
+       cyrix_init_cpu();
+       nsc_init_cpu();
+       amd_init_cpu();
+       centaur_init_cpu();
+       transmeta_init_cpu();
+       rise_init_cpu();
+       nexgen_init_cpu();
+       umc_init_cpu();
+       early_cpu_detect();
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+       /* pse is not compatible with on-the-fly unmapping,
+        * disable it even if the cpus claim to support it.
+        */
+       clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
+       disable_pse = 1;
+#endif
+}
+
+void __cpuinit cpu_gdt_init(struct Xgt_desc_struct *gdt_descr)
+{
+       unsigned long frames[16];
+       unsigned long va;
+       int f;
+
+       for (va = gdt_descr->address, f = 0;
+            va < gdt_descr->address + gdt_descr->size;
+            va += PAGE_SIZE, f++) {
+               frames[f] = virt_to_mfn(va);
+               make_lowmem_page_readonly(
+                       (void *)va, XENFEAT_writable_descriptor_tables);
+       }
+       if (HYPERVISOR_set_gdt(frames, gdt_descr->size / 8))
+               BUG();
+}
+
+/*
+ * cpu_init() initializes state that is per-CPU. Some data is already
+ * initialized (naturally) in the bootstrap process, such as the GDT
+ * and IDT. We reload them nevertheless, this function acts as a
+ * 'CPU state barrier', nothing should get across.
+ */
+void __cpuinit cpu_init(void)
+{
+       int cpu = smp_processor_id();
+#ifndef CONFIG_X86_NO_TSS
+       struct tss_struct * t = &per_cpu(init_tss, cpu);
+#endif
+       struct thread_struct *thread = &current->thread;
+       struct desc_struct *gdt;
+       struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
+
+       if (cpu_test_and_set(cpu, cpu_initialized)) {
+               printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
+               for (;;) local_irq_enable();
+       }
+       printk(KERN_INFO "Initializing CPU#%d\n", cpu);
+
+       if (cpu_has_vme || cpu_has_de)
+               clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
+       if (tsc_disable && cpu_has_tsc) {
+               printk(KERN_NOTICE "Disabling TSC...\n");
+               /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/
+               clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability);
+               set_in_cr4(X86_CR4_TSD);
+       }
+
+#ifndef CONFIG_XEN
+       /* The CPU hotplug case */
+       if (cpu_gdt_descr->address) {
+               gdt = (struct desc_struct *)cpu_gdt_descr->address;
+               memset(gdt, 0, PAGE_SIZE);
+               goto old_gdt;
+       }
+       /*
+        * This is a horrible hack to allocate the GDT.  The problem
+        * is that cpu_init() is called really early for the boot CPU
+        * (and hence needs bootmem) but much later for the secondary
+        * CPUs, when bootmem will have gone away
+        */
+       if (NODE_DATA(0)->bdata->node_bootmem_map) {
+               gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE);
+               /* alloc_bootmem_pages panics on failure, so no check */
+               memset(gdt, 0, PAGE_SIZE);
+       } else {
+               gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
+               if (unlikely(!gdt)) {
+                       printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu);
+                       for (;;)
+                               local_irq_enable();
+               }
+       }
+old_gdt:
+       /*
+        * Initialize the per-CPU GDT with the boot GDT,
+        * and set up the GDT descriptor:
+        */
+       memcpy(gdt, cpu_gdt_table, GDT_SIZE);
+
+       /* Set up GDT entry for 16bit stack */
+       *(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |=
+               ((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) |
+               ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) |
+               (CPU_16BIT_STACK_SIZE - 1);
+
+       cpu_gdt_descr->size = GDT_SIZE - 1;
+       cpu_gdt_descr->address = (unsigned long)gdt;
+#else
+       if (cpu == 0 && cpu_gdt_descr->address == 0) {
+               gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE);
+               /* alloc_bootmem_pages panics on failure, so no check */
+               memset(gdt, 0, PAGE_SIZE);
+
+               memcpy(gdt, cpu_gdt_table, GDT_SIZE);
+               
+               cpu_gdt_descr->size = GDT_SIZE;
+               cpu_gdt_descr->address = (unsigned long)gdt;
+       }
+#endif
+
+       cpu_gdt_init(cpu_gdt_descr);
+
+       /*
+        * Set up and load the per-CPU TSS and LDT
+        */
+       atomic_inc(&init_mm.mm_count);
+       current->active_mm = &init_mm;
+       if (current->mm)
+               BUG();
+       enter_lazy_tlb(&init_mm, current);
+
+       load_esp0(t, thread);
+
+       load_LDT(&init_mm.context);
+
+#ifdef CONFIG_DOUBLEFAULT
+       /* Set up doublefault TSS pointer in the GDT */
+       __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
+#endif
+
+       /* Clear %fs and %gs. */
+       asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
+
+       /* Clear all 6 debug registers: */
+       set_debugreg(0, 0);
+       set_debugreg(0, 1);
+       set_debugreg(0, 2);
+       set_debugreg(0, 3);
+       set_debugreg(0, 6);
+       set_debugreg(0, 7);
+
+       /*
+        * Force FPU initialization:
+        */
+       current_thread_info()->status = 0;
+       clear_used_math();
+       mxcsr_feature_mask_init();
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+void __cpuinit cpu_uninit(void)
+{
+       int cpu = raw_smp_processor_id();
+       cpu_clear(cpu, cpu_initialized);
+
+       /* lazy TLB state */
+       per_cpu(cpu_tlbstate, cpu).state = 0;
+       per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
+}
+#endif
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/cpu/mtrr/Makefile
--- a/arch/i386/kernel/cpu/mtrr/Makefile        Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/kernel/cpu/mtrr/Makefile        Mon Jun 04 10:05:28 2007 +0100
@@ -3,3 +3,10 @@ obj-y          += cyrix.o
 obj-y          += cyrix.o
 obj-y          += centaur.o
 
+ifdef CONFIG_XEN
+include $(srctree)/scripts/Makefile.xen
+n-obj-xen := generic.o state.o amd.o cyrix.o centaur.o
+
+obj-y := $(call filterxen, $(obj-y), $(n-obj-xen))
+obj-y := $(call cherrypickxen, $(obj-y))
+endif
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/cpu/mtrr/main-xen.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/cpu/mtrr/main-xen.c      Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,197 @@
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/ctype.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <asm/uaccess.h>
+#include <linux/mutex.h>
+
+#include <asm/mtrr.h>
+#include "mtrr.h"
+
+static DEFINE_MUTEX(mtrr_mutex);
+
+void generic_get_mtrr(unsigned int reg, unsigned long *base,
+                     unsigned int *size, mtrr_type * type)
+{
+       struct xen_platform_op op;
+
+       op.cmd = XENPF_read_memtype;
+       op.u.read_memtype.reg = reg;
+       (void)HYPERVISOR_platform_op(&op);
+
+       *size = op.u.read_memtype.nr_mfns;
+       *base = op.u.read_memtype.mfn;
+       *type = op.u.read_memtype.type;
+}
+
+struct mtrr_ops generic_mtrr_ops = {
+       .use_intel_if      = 1,
+       .get               = generic_get_mtrr,
+};
+
+struct mtrr_ops *mtrr_if = &generic_mtrr_ops;
+unsigned int num_var_ranges;
+unsigned int *usage_table;
+
+static void __init set_num_var_ranges(void)
+{
+       struct xen_platform_op op;
+
+       for (num_var_ranges = 0; ; num_var_ranges++) {
+               op.cmd = XENPF_read_memtype;
+               op.u.read_memtype.reg = num_var_ranges;
+               if (HYPERVISOR_platform_op(&op) != 0)
+                       break;
+       }
+}
+
+static void __init init_table(void)
+{
+       int i, max;
+
+       max = num_var_ranges;
+       if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL))
+           == NULL) {
+               printk(KERN_ERR "mtrr: could not allocate\n");
+               return;
+       }
+       for (i = 0; i < max; i++)
+               usage_table[i] = 0;
+}
+
+int mtrr_add_page(unsigned long base, unsigned long size, 
+                 unsigned int type, char increment)
+{
+       int error;
+       struct xen_platform_op op;
+
+       mutex_lock(&mtrr_mutex);
+
+       op.cmd = XENPF_add_memtype;
+       op.u.add_memtype.mfn     = base;
+       op.u.add_memtype.nr_mfns = size;
+       op.u.add_memtype.type    = type;
+       error = HYPERVISOR_platform_op(&op);
+       if (error) {
+               mutex_unlock(&mtrr_mutex);
+               BUG_ON(error > 0);
+               return error;
+       }
+
+       if (increment)
+               ++usage_table[op.u.add_memtype.reg];
+
+       mutex_unlock(&mtrr_mutex);
+
+       return op.u.add_memtype.reg;
+}
+
+static int mtrr_check(unsigned long base, unsigned long size)
+{
+       if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
+               printk(KERN_WARNING
+                       "mtrr: size and base must be multiples of 4 kiB\n");
+               printk(KERN_DEBUG
+                       "mtrr: size: 0x%lx  base: 0x%lx\n", size, base);
+               dump_stack();
+               return -1;
+       }
+       return 0;
+}
+
+int
+mtrr_add(unsigned long base, unsigned long size, unsigned int type,
+        char increment)
+{
+       if (mtrr_check(base, size))
+               return -EINVAL;
+       return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
+                            increment);
+}
+
+int mtrr_del_page(int reg, unsigned long base, unsigned long size)
+{
+       unsigned i;
+       mtrr_type ltype;
+       unsigned long lbase;
+       unsigned int lsize;
+       int error = -EINVAL;
+       struct xen_platform_op op;
+
+       mutex_lock(&mtrr_mutex);
+
+       if (reg < 0) {
+               /*  Search for existing MTRR  */
+               for (i = 0; i < num_var_ranges; ++i) {
+                       mtrr_if->get(i, &lbase, &lsize, &ltype);
+                       if (lbase == base && lsize == size) {
+                               reg = i;
+                               break;
+                       }
+               }
+               if (reg < 0) {
+                       printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000 
found\n", base,
+                              size);
+                       goto out;
+               }
+       }
+       if (usage_table[reg] < 1) {
+               printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg);
+               goto out;
+       }
+       if (--usage_table[reg] < 1) {
+               op.cmd = XENPF_del_memtype;
+               op.u.del_memtype.handle = 0;
+               op.u.del_memtype.reg    = reg;
+               error = HYPERVISOR_platform_op(&op);
+               if (error) {
+                       BUG_ON(error > 0);
+                       goto out;
+               }
+       }
+       error = reg;
+ out:
+       mutex_unlock(&mtrr_mutex);
+       return error;
+}
+
+int
+mtrr_del(int reg, unsigned long base, unsigned long size)
+{
+       if (mtrr_check(base, size))
+               return -EINVAL;
+       return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
+}
+
+EXPORT_SYMBOL(mtrr_add);
+EXPORT_SYMBOL(mtrr_del);
+
+void __init mtrr_bp_init(void)
+{
+}
+
+void mtrr_ap_init(void)
+{
+}
+
+static int __init mtrr_init(void)
+{
+       struct cpuinfo_x86 *c = &boot_cpu_data;
+
+       if (!is_initial_xendomain())
+               return -ENODEV;
+
+       if ((!cpu_has(c, X86_FEATURE_MTRR)) &&
+           (!cpu_has(c, X86_FEATURE_K6_MTRR)) &&
+           (!cpu_has(c, X86_FEATURE_CYRIX_ARR)) &&
+           (!cpu_has(c, X86_FEATURE_CENTAUR_MCR)))
+               return -ENODEV;
+
+       set_num_var_ranges();
+       init_table();
+
+       return 0;
+}
+
+subsys_initcall(mtrr_init);
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/crash.c
--- a/arch/i386/kernel/crash.c  Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/kernel/crash.c  Mon Jun 04 10:05:28 2007 +0100
@@ -90,6 +90,7 @@ static void crash_save_self(struct pt_re
        crash_save_this_cpu(regs, cpu);
 }
 
+#ifndef CONFIG_XEN
 #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
 static atomic_t waiting_for_crash_ipi;
 
@@ -154,6 +155,7 @@ static void nmi_shootdown_cpus(void)
        /* There are no cpus to shootdown */
 }
 #endif
+#endif /* CONFIG_XEN */
 
 void machine_crash_shutdown(struct pt_regs *regs)
 {
@@ -170,10 +172,12 @@ void machine_crash_shutdown(struct pt_re
 
        /* Make a note of crashing cpu. Will be used in NMI callback.*/
        crashing_cpu = smp_processor_id();
+#ifndef CONFIG_XEN
        nmi_shootdown_cpus();
        lapic_shutdown();
 #if defined(CONFIG_X86_IO_APIC)
        disable_IO_APIC();
 #endif
+#endif /* CONFIG_XEN */
        crash_save_self(regs);
 }
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/early_printk-xen.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/early_printk-xen.c       Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,2 @@
+
+#include "../../x86_64/kernel/early_printk-xen.c"
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/entry-xen.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/entry-xen.S      Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,1216 @@
+/*
+ *  linux/arch/i386/entry.S
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+/*
+ * entry.S contains the system-call and fault low-level handling routines.
+ * This also contains the timer-interrupt handler, as well as all interrupts
+ * and faults that can result in a task-switch.
+ *
+ * NOTE: This code handles signal-recognition, which happens every time
+ * after a timer-interrupt and after each system call.
+ *
+ * I changed all the .align's to 4 (16 byte alignment), as that's faster
+ * on a 486.
+ *
+ * Stack layout in 'ret_from_system_call':
+ *     ptrace needs to have all regs on the stack.
+ *     if the order here is changed, it needs to be
+ *     updated in fork.c:copy_process, signal.c:do_signal,
+ *     ptrace.c and ptrace.h
+ *
+ *      0(%esp) - %ebx
+ *      4(%esp) - %ecx
+ *      8(%esp) - %edx
+ *       C(%esp) - %esi
+ *     10(%esp) - %edi
+ *     14(%esp) - %ebp
+ *     18(%esp) - %eax
+ *     1C(%esp) - %ds
+ *     20(%esp) - %es
+ *     24(%esp) - orig_eax
+ *     28(%esp) - %eip
+ *     2C(%esp) - %cs
+ *     30(%esp) - %eflags
+ *     34(%esp) - %oldesp
+ *     38(%esp) - %oldss
+ *
+ * "current" is in register %ebx during any slow entries.
+ */
+
+#include <linux/linkage.h>
+#include <asm/thread_info.h>
+#include <asm/irqflags.h>
+#include <asm/errno.h>
+#include <asm/segment.h>
+#include <asm/smp.h>
+#include <asm/page.h>
+#include <asm/desc.h>
+#include <asm/dwarf2.h>
+#include "irq_vectors.h"
+#include <xen/interface/xen.h>
+
+#define nr_syscalls ((syscall_table_size)/4)
+
+EBX            = 0x00
+ECX            = 0x04
+EDX            = 0x08
+ESI            = 0x0C
+EDI            = 0x10
+EBP            = 0x14
+EAX            = 0x18
+DS             = 0x1C
+ES             = 0x20
+ORIG_EAX       = 0x24
+EIP            = 0x28
+CS             = 0x2C
+EFLAGS         = 0x30
+OLDESP         = 0x34
+OLDSS          = 0x38
+
+CF_MASK                = 0x00000001
+TF_MASK                = 0x00000100
+IF_MASK                = 0x00000200
+DF_MASK                = 0x00000400 
+NT_MASK                = 0x00004000
+VM_MASK                = 0x00020000
+/* Pseudo-eflags. */
+NMI_MASK       = 0x80000000
+
+#ifndef CONFIG_XEN
+#define DISABLE_INTERRUPTS     cli
+#define ENABLE_INTERRUPTS      sti
+#else
+/* Offsets into shared_info_t. */
+#define evtchn_upcall_pending          /* 0 */
+#define evtchn_upcall_mask             1
+
+#define sizeof_vcpu_shift              6
+
+#ifdef CONFIG_SMP
+#define GET_VCPU_INFO          movl TI_cpu(%ebp),%esi                  ; \
+                               shl  $sizeof_vcpu_shift,%esi            ; \
+                               addl HYPERVISOR_shared_info,%esi
+#else
+#define GET_VCPU_INFO          movl HYPERVISOR_shared_info,%esi
+#endif
+
+#define __DISABLE_INTERRUPTS   movb $1,evtchn_upcall_mask(%esi)
+#define __ENABLE_INTERRUPTS    movb $0,evtchn_upcall_mask(%esi)
+#define DISABLE_INTERRUPTS     GET_VCPU_INFO                           ; \
+                               __DISABLE_INTERRUPTS
+#define ENABLE_INTERRUPTS      GET_VCPU_INFO                           ; \
+                               __ENABLE_INTERRUPTS
+#define __TEST_PENDING         testb $0xFF,evtchn_upcall_pending(%esi)
+#endif
+
+#ifdef CONFIG_PREEMPT
+#define preempt_stop           cli; TRACE_IRQS_OFF
+#else
+#define preempt_stop
+#define resume_kernel          restore_nocheck
+#endif
+
+.macro TRACE_IRQS_IRET
+#ifdef CONFIG_TRACE_IRQFLAGS
+       testl $IF_MASK,EFLAGS(%esp)     # interrupts off?
+       jz 1f
+       TRACE_IRQS_ON
+1:
+#endif
+.endm
+
+#ifdef CONFIG_VM86
+#define resume_userspace_sig   check_userspace
+#else
+#define resume_userspace_sig   resume_userspace
+#endif
+
+#define SAVE_ALL \
+       cld; \
+       pushl %es; \
+       CFI_ADJUST_CFA_OFFSET 4;\
+       /*CFI_REL_OFFSET es, 0;*/\
+       pushl %ds; \
+       CFI_ADJUST_CFA_OFFSET 4;\
+       /*CFI_REL_OFFSET ds, 0;*/\
+       pushl %eax; \
+       CFI_ADJUST_CFA_OFFSET 4;\
+       CFI_REL_OFFSET eax, 0;\
+       pushl %ebp; \
+       CFI_ADJUST_CFA_OFFSET 4;\
+       CFI_REL_OFFSET ebp, 0;\
+       pushl %edi; \
+       CFI_ADJUST_CFA_OFFSET 4;\
+       CFI_REL_OFFSET edi, 0;\
+       pushl %esi; \
+       CFI_ADJUST_CFA_OFFSET 4;\
+       CFI_REL_OFFSET esi, 0;\
+       pushl %edx; \
+       CFI_ADJUST_CFA_OFFSET 4;\
+       CFI_REL_OFFSET edx, 0;\
+       pushl %ecx; \
+       CFI_ADJUST_CFA_OFFSET 4;\
+       CFI_REL_OFFSET ecx, 0;\
+       pushl %ebx; \
+       CFI_ADJUST_CFA_OFFSET 4;\
+       CFI_REL_OFFSET ebx, 0;\
+       movl $(__USER_DS), %edx; \
+       movl %edx, %ds; \
+       movl %edx, %es;
+
+#define RESTORE_INT_REGS \
+       popl %ebx;      \
+       CFI_ADJUST_CFA_OFFSET -4;\
+       CFI_RESTORE ebx;\
+       popl %ecx;      \
+       CFI_ADJUST_CFA_OFFSET -4;\
+       CFI_RESTORE ecx;\
+       popl %edx;      \
+       CFI_ADJUST_CFA_OFFSET -4;\
+       CFI_RESTORE edx;\
+       popl %esi;      \
+       CFI_ADJUST_CFA_OFFSET -4;\
+       CFI_RESTORE esi;\
+       popl %edi;      \
+       CFI_ADJUST_CFA_OFFSET -4;\
+       CFI_RESTORE edi;\
+       popl %ebp;      \
+       CFI_ADJUST_CFA_OFFSET -4;\
+       CFI_RESTORE ebp;\
+       popl %eax;      \
+       CFI_ADJUST_CFA_OFFSET -4;\
+       CFI_RESTORE eax
+
+#define RESTORE_REGS   \
+       RESTORE_INT_REGS; \
+1:     popl %ds;       \
+       CFI_ADJUST_CFA_OFFSET -4;\
+       /*CFI_RESTORE ds;*/\
+2:     popl %es;       \
+       CFI_ADJUST_CFA_OFFSET -4;\
+       /*CFI_RESTORE es;*/\
+.section .fixup,"ax";  \
+3:     movl $0,(%esp); \
+       jmp 1b;         \
+4:     movl $0,(%esp); \
+       jmp 2b;         \
+.previous;             \
+.section __ex_table,"a";\
+       .align 4;       \
+       .long 1b,3b;    \
+       .long 2b,4b;    \
+.previous
+
+#define RING0_INT_FRAME \
+       CFI_STARTPROC simple;\
+       CFI_DEF_CFA esp, 3*4;\
+       /*CFI_OFFSET cs, -2*4;*/\
+       CFI_OFFSET eip, -3*4
+
+#define RING0_EC_FRAME \
+       CFI_STARTPROC simple;\
+       CFI_DEF_CFA esp, 4*4;\
+       /*CFI_OFFSET cs, -2*4;*/\
+       CFI_OFFSET eip, -3*4
+
+#define RING0_PTREGS_FRAME \
+       CFI_STARTPROC simple;\
+       CFI_DEF_CFA esp, OLDESP-EBX;\
+       /*CFI_OFFSET cs, CS-OLDESP;*/\
+       CFI_OFFSET eip, EIP-OLDESP;\
+       /*CFI_OFFSET es, ES-OLDESP;*/\
+       /*CFI_OFFSET ds, DS-OLDESP;*/\
+       CFI_OFFSET eax, EAX-OLDESP;\
+       CFI_OFFSET ebp, EBP-OLDESP;\
+       CFI_OFFSET edi, EDI-OLDESP;\
+       CFI_OFFSET esi, ESI-OLDESP;\
+       CFI_OFFSET edx, EDX-OLDESP;\
+       CFI_OFFSET ecx, ECX-OLDESP;\
+       CFI_OFFSET ebx, EBX-OLDESP
+
+ENTRY(ret_from_fork)
+       CFI_STARTPROC
+       pushl %eax
+       CFI_ADJUST_CFA_OFFSET 4
+       call schedule_tail
+       GET_THREAD_INFO(%ebp)
+       popl %eax
+       CFI_ADJUST_CFA_OFFSET -4
+       pushl $0x0202                   # Reset kernel eflags
+       CFI_ADJUST_CFA_OFFSET 4
+       popfl
+       CFI_ADJUST_CFA_OFFSET -4
+       jmp syscall_exit
+       CFI_ENDPROC
+
+/*
+ * Return to user mode is not as complex as all this looks,
+ * but we want the default path for a system call return to
+ * go as quickly as possible which is why some of this is
+ * less clear than it otherwise should be.
+ */
+
+       # userspace resumption stub bypassing syscall exit tracing
+       ALIGN
+       RING0_PTREGS_FRAME
+ret_from_exception:
+       preempt_stop
+ret_from_intr:
+       GET_THREAD_INFO(%ebp)
+check_userspace:
+       movl EFLAGS(%esp), %eax         # mix EFLAGS and CS
+       movb CS(%esp), %al
+       testl $(VM_MASK | 2), %eax
+       jz resume_kernel
+ENTRY(resume_userspace)
+       DISABLE_INTERRUPTS              # make sure we don't miss an interrupt
+                                       # setting need_resched or sigpending
+                                       # between sampling and the iret
+       movl TI_flags(%ebp), %ecx
+       andl $_TIF_WORK_MASK, %ecx      # is there any work to be done on
+                                       # int/exception return?
+       jne work_pending
+       jmp restore_all
+
+#ifdef CONFIG_PREEMPT
+ENTRY(resume_kernel)
+       cli
+       cmpl $0,TI_preempt_count(%ebp)  # non-zero preempt_count ?
+       jnz restore_nocheck
+need_resched:
+       movl TI_flags(%ebp), %ecx       # need_resched set ?
+       testb $_TIF_NEED_RESCHED, %cl
+       jz restore_all
+       testl $IF_MASK,EFLAGS(%esp)     # interrupts off (exception path) ?
+       jz restore_all
+       call preempt_schedule_irq
+       jmp need_resched
+#endif
+       CFI_ENDPROC
+
+/* SYSENTER_RETURN points to after the "sysenter" instruction in
+   the vsyscall page.  See vsyscall-sysentry.S, which defines the symbol.  */
+
+       # sysenter call handler stub
+ENTRY(sysenter_entry)
+       CFI_STARTPROC simple
+       CFI_DEF_CFA esp, 0
+       CFI_REGISTER esp, ebp
+       movl SYSENTER_stack_esp0(%esp),%esp
+sysenter_past_esp:
+       /*
+        * No need to follow this irqs on/off section: the syscall
+        * disabled irqs and here we enable it straight after entry:
+        */
+       sti
+       pushl $(__USER_DS)
+       CFI_ADJUST_CFA_OFFSET 4
+       /*CFI_REL_OFFSET ss, 0*/
+       pushl %ebp
+       CFI_ADJUST_CFA_OFFSET 4
+       CFI_REL_OFFSET esp, 0
+       pushfl
+       CFI_ADJUST_CFA_OFFSET 4
+       pushl $(__USER_CS)
+       CFI_ADJUST_CFA_OFFSET 4
+       /*CFI_REL_OFFSET cs, 0*/
+       /*
+        * Push current_thread_info()->sysenter_return to the stack.
+        * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
+        * pushed above; +8 corresponds to copy_thread's esp0 setting.
+        */
+       pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
+       CFI_ADJUST_CFA_OFFSET 4
+       CFI_REL_OFFSET eip, 0
+
+/*
+ * Load the potential sixth argument from user stack.
+ * Careful about security.
+ */
+       cmpl $__PAGE_OFFSET-3,%ebp
+       jae syscall_fault
+1:     movl (%ebp),%ebp
+.section __ex_table,"a"
+       .align 4
+       .long 1b,syscall_fault
+.previous
+
+       pushl %eax
+       CFI_ADJUST_CFA_OFFSET 4
+       SAVE_ALL
+       GET_THREAD_INFO(%ebp)
+
+       /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not 
testb */
+       testw 
$(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
+       jnz syscall_trace_entry
+       cmpl $(nr_syscalls), %eax
+       jae syscall_badsys
+       call *sys_call_table(,%eax,4)
+       movl %eax,EAX(%esp)
+       DISABLE_INTERRUPTS
+       TRACE_IRQS_OFF
+       movl TI_flags(%ebp), %ecx
+       testw $_TIF_ALLWORK_MASK, %cx
+       jne syscall_exit_work
+/* if something modifies registers it must also disable sysexit */
+       movl EIP(%esp), %edx
+       movl OLDESP(%esp), %ecx
+       xorl %ebp,%ebp
+#ifdef CONFIG_XEN
+       TRACE_IRQS_ON
+       __ENABLE_INTERRUPTS
+sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/
+       __TEST_PENDING
+       jnz  14f                        # process more events if necessary...
+       movl ESI(%esp), %esi
+       sysexit
+14:    __DISABLE_INTERRUPTS
+       TRACE_IRQS_OFF
+sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/
+       push %esp
+       call evtchn_do_upcall
+       add  $4,%esp
+       jmp  ret_from_intr
+#else
+       TRACE_IRQS_ON
+       sti
+       sysexit
+#endif /* !CONFIG_XEN */
+       CFI_ENDPROC
+
+
+       # system call handler stub
+ENTRY(system_call)
+       RING0_INT_FRAME                 # can't unwind into user space anyway
+       pushl %eax                      # save orig_eax
+       CFI_ADJUST_CFA_OFFSET 4
+       SAVE_ALL
+       GET_THREAD_INFO(%ebp)
+       testl $TF_MASK,EFLAGS(%esp)
+       jz no_singlestep
+       orl $_TIF_SINGLESTEP,TI_flags(%ebp)
+no_singlestep:
+                                       # system call tracing in operation / 
emulation
+       /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not 
testb */
+       testw 
$(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
+       jnz syscall_trace_entry
+       cmpl $(nr_syscalls), %eax
+       jae syscall_badsys
+syscall_call:
+       call *sys_call_table(,%eax,4)
+       movl %eax,EAX(%esp)             # store the return value
+syscall_exit:
+       DISABLE_INTERRUPTS              # make sure we don't miss an interrupt
+                                       # setting need_resched or sigpending
+                                       # between sampling and the iret
+       TRACE_IRQS_OFF
+       movl TI_flags(%ebp), %ecx
+       testw $_TIF_ALLWORK_MASK, %cx   # current->work
+       jne syscall_exit_work
+
+restore_all:
+#ifndef CONFIG_XEN
+       movl EFLAGS(%esp), %eax         # mix EFLAGS, SS and CS
+       # Warning: OLDSS(%esp) contains the wrong/random values if we
+       # are returning to the kernel.
+       # See comments in process.c:copy_thread() for details.
+       movb OLDSS(%esp), %ah
+       movb CS(%esp), %al
+       andl $(VM_MASK | (4 << 8) | 3), %eax
+       cmpl $((4 << 8) | 3), %eax
+       CFI_REMEMBER_STATE
+       je ldt_ss                       # returning to user-space with LDT SS
+restore_nocheck:
+#else
+restore_nocheck:
+       movl EFLAGS(%esp), %eax
+       testl $(VM_MASK|NMI_MASK), %eax
+       CFI_REMEMBER_STATE
+       jnz hypervisor_iret
+       shr $9, %eax                    # EAX[0] == IRET_EFLAGS.IF
+       GET_VCPU_INFO
+       andb evtchn_upcall_mask(%esi),%al
+       andb $1,%al                     # EAX[0] == IRET_EFLAGS.IF & event_mask
+       CFI_REMEMBER_STATE
+       jnz restore_all_enable_events   #        != 0 => enable event delivery
+#endif
+       TRACE_IRQS_IRET
+restore_nocheck_notrace:
+       RESTORE_REGS
+       addl $4, %esp
+       CFI_ADJUST_CFA_OFFSET -4
+1:     iret
+.section .fixup,"ax"
+iret_exc:
+#ifndef CONFIG_XEN
+       TRACE_IRQS_ON
+       sti
+#endif
+       pushl $0                        # no error code
+       pushl $do_iret_error
+       jmp error_code
+.previous
+.section __ex_table,"a"
+       .align 4
+       .long 1b,iret_exc
+.previous
+
+       CFI_RESTORE_STATE
+#ifndef CONFIG_XEN
+ldt_ss:
+       larl OLDSS(%esp), %eax
+       jnz restore_nocheck
+       testl $0x00400000, %eax         # returning to 32bit stack?
+       jnz restore_nocheck             # allright, normal return
+       /* If returning to userspace with 16bit stack,
+        * try to fix the higher word of ESP, as the CPU
+        * won't restore it.
+        * This is an "official" bug of all the x86-compatible
+        * CPUs, which we can try to work around to make
+        * dosemu and wine happy. */
+       subl $8, %esp           # reserve space for switch16 pointer
+       CFI_ADJUST_CFA_OFFSET 8
+       cli
+       TRACE_IRQS_OFF
+       movl %esp, %eax
+       /* Set up the 16bit stack frame with switch32 pointer on top,
+        * and a switch16 pointer on top of the current frame. */
+       call setup_x86_bogus_stack
+       CFI_ADJUST_CFA_OFFSET -8        # frame has moved
+       TRACE_IRQS_IRET
+       RESTORE_REGS
+       lss 20+4(%esp), %esp    # switch to 16bit stack
+1:     iret
+.section __ex_table,"a"
+       .align 4
+       .long 1b,iret_exc
+.previous
+#else
+        ALIGN
+restore_all_enable_events:
+       TRACE_IRQS_ON
+       __ENABLE_INTERRUPTS
+scrit: /**** START OF CRITICAL REGION ****/
+       __TEST_PENDING
+       jnz  14f                        # process more events if necessary...
+       RESTORE_REGS
+       addl $4, %esp
+       CFI_ADJUST_CFA_OFFSET -4
+1:     iret
+.section __ex_table,"a"
+       .align 4
+       .long 1b,iret_exc
+.previous
+14:    __DISABLE_INTERRUPTS
+       TRACE_IRQS_OFF
+       jmp  11f
+ecrit:  /**** END OF CRITICAL REGION ****/
+
+       CFI_RESTORE_STATE
+hypervisor_iret:
+       andl $~NMI_MASK, EFLAGS(%esp)
+       RESTORE_REGS
+       addl $4, %esp
+       CFI_ADJUST_CFA_OFFSET -4
+       jmp  hypercall_page + (__HYPERVISOR_iret * 32)
+#endif
+       CFI_ENDPROC
+
+       # perform work that needs to be done immediately before resumption
+       ALIGN
+       RING0_PTREGS_FRAME              # can't unwind into user space anyway
+work_pending:
+       testb $_TIF_NEED_RESCHED, %cl
+       jz work_notifysig
+work_resched:
+       call schedule
+       DISABLE_INTERRUPTS              # make sure we don't miss an interrupt
+                                       # setting need_resched or sigpending
+                                       # between sampling and the iret
+       TRACE_IRQS_OFF
+       movl TI_flags(%ebp), %ecx
+       andl $_TIF_WORK_MASK, %ecx      # is there any work to be done other
+                                       # than syscall tracing?
+       jz restore_all
+       testb $_TIF_NEED_RESCHED, %cl
+       jnz work_resched
+
+work_notifysig:                                # deal with pending signals and
+                                       # notify-resume requests
+       testl $VM_MASK, EFLAGS(%esp)
+       movl %esp, %eax
+       jne work_notifysig_v86          # returning to kernel-space or
+                                       # vm86-space
+       xorl %edx, %edx
+       call do_notify_resume
+       jmp resume_userspace_sig
+
+       ALIGN
+work_notifysig_v86:
+#ifdef CONFIG_VM86
+       pushl %ecx                      # save ti_flags for do_notify_resume
+       CFI_ADJUST_CFA_OFFSET 4
+       call save_v86_state             # %eax contains pt_regs pointer
+       popl %ecx
+       CFI_ADJUST_CFA_OFFSET -4
+       movl %eax, %esp
+       xorl %edx, %edx
+       call do_notify_resume
+       jmp resume_userspace_sig
+#endif
+
+       # perform syscall exit tracing
+       ALIGN
+syscall_trace_entry:
+       movl $-ENOSYS,EAX(%esp)
+       movl %esp, %eax
+       xorl %edx,%edx
+       call do_syscall_trace
+       cmpl $0, %eax
+       jne resume_userspace            # ret != 0 -> running under 
PTRACE_SYSEMU,
+                                       # so must skip actual syscall
+       movl ORIG_EAX(%esp), %eax
+       cmpl $(nr_syscalls), %eax
+       jnae syscall_call
+       jmp syscall_exit
+
+       # perform syscall exit tracing
+       ALIGN
+syscall_exit_work:
+       testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
+       jz work_pending
+       TRACE_IRQS_ON
+       ENABLE_INTERRUPTS               # could let do_syscall_trace() call
+                                       # schedule() instead
+       movl %esp, %eax
+       movl $1, %edx
+       call do_syscall_trace
+       jmp resume_userspace
+       CFI_ENDPROC
+
+       RING0_INT_FRAME                 # can't unwind into user space anyway
+syscall_fault:
+       pushl %eax                      # save orig_eax
+       CFI_ADJUST_CFA_OFFSET 4
+       SAVE_ALL
+       GET_THREAD_INFO(%ebp)
+       movl $-EFAULT,EAX(%esp)
+       jmp resume_userspace
+
+syscall_badsys:
+       movl $-ENOSYS,EAX(%esp)
+       jmp resume_userspace
+       CFI_ENDPROC
+
+#ifndef CONFIG_XEN
+#define FIXUP_ESPFIX_STACK \
+       movl %esp, %eax; \
+       /* switch to 32bit stack using the pointer on top of 16bit stack */ \
+       lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \
+       /* copy data from 16bit stack to 32bit stack */ \
+       call fixup_x86_bogus_stack; \
+       /* put ESP to the proper location */ \
+       movl %eax, %esp;
+#define UNWIND_ESPFIX_STACK \
+       pushl %eax; \
+       CFI_ADJUST_CFA_OFFSET 4; \
+       movl %ss, %eax; \
+       /* see if on 16bit stack */ \
+       cmpw $__ESPFIX_SS, %ax; \
+       je 28f; \
+27:    popl %eax; \
+       CFI_ADJUST_CFA_OFFSET -4; \
+.section .fixup,"ax"; \
+28:    movl $__KERNEL_DS, %eax; \
+       movl %eax, %ds; \
+       movl %eax, %es; \
+       /* switch to 32bit stack */ \
+       FIXUP_ESPFIX_STACK; \
+       jmp 27b; \
+.previous
+
+/*
+ * Build the entry stubs and pointer table with
+ * some assembler magic.
+ */
+.data
+ENTRY(interrupt)
+.text
+
+vector=0
+ENTRY(irq_entries_start)
+       RING0_INT_FRAME
+.rept NR_IRQS
+       ALIGN
+ .if vector
+       CFI_ADJUST_CFA_OFFSET -4
+ .endif
+1:     pushl $~(vector)
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp common_interrupt
+.data
+       .long 1b
+.text
+vector=vector+1
+.endr
+
+/*
+ * the CPU automatically disables interrupts when executing an IRQ vector,
+ * so IRQ-flags tracing has to follow that:
+ */
+       ALIGN
+common_interrupt:
+       SAVE_ALL
+       TRACE_IRQS_OFF
+       movl %esp,%eax
+       call do_IRQ
+       jmp ret_from_intr
+       CFI_ENDPROC
+
+#define BUILD_INTERRUPT(name, nr)      \
+ENTRY(name)                            \
+       RING0_INT_FRAME;                \
+       pushl $~(nr);                   \
+       CFI_ADJUST_CFA_OFFSET 4;        \
+       SAVE_ALL;                       \
+       TRACE_IRQS_OFF                  \
+       movl %esp,%eax;                 \
+       call smp_/**/name;              \
+       jmp ret_from_intr;              \
+       CFI_ENDPROC
+
+/* The include is where all of the SMP etc. interrupts come from */
+#include "entry_arch.h"
+#else
+#define UNWIND_ESPFIX_STACK
+#endif
+
+ENTRY(divide_error)
+       RING0_INT_FRAME
+       pushl $0                        # no error code
+       CFI_ADJUST_CFA_OFFSET 4
+       pushl $do_divide_error
+       CFI_ADJUST_CFA_OFFSET 4
+       ALIGN
+error_code:
+       pushl %ds
+       CFI_ADJUST_CFA_OFFSET 4
+       /*CFI_REL_OFFSET ds, 0*/
+       pushl %eax
+       CFI_ADJUST_CFA_OFFSET 4
+       CFI_REL_OFFSET eax, 0
+       xorl %eax, %eax
+       pushl %ebp
+       CFI_ADJUST_CFA_OFFSET 4
+       CFI_REL_OFFSET ebp, 0
+       pushl %edi
+       CFI_ADJUST_CFA_OFFSET 4
+       CFI_REL_OFFSET edi, 0
+       pushl %esi
+       CFI_ADJUST_CFA_OFFSET 4
+       CFI_REL_OFFSET esi, 0
+       pushl %edx
+       CFI_ADJUST_CFA_OFFSET 4
+       CFI_REL_OFFSET edx, 0
+       decl %eax                       # eax = -1
+       pushl %ecx
+       CFI_ADJUST_CFA_OFFSET 4
+       CFI_REL_OFFSET ecx, 0
+       pushl %ebx
+       CFI_ADJUST_CFA_OFFSET 4
+       CFI_REL_OFFSET ebx, 0
+       cld
+       pushl %es
+       CFI_ADJUST_CFA_OFFSET 4
+       /*CFI_REL_OFFSET es, 0*/
+       UNWIND_ESPFIX_STACK
+       popl %ecx
+       CFI_ADJUST_CFA_OFFSET -4
+       /*CFI_REGISTER es, ecx*/
+       movl ES(%esp), %edi             # get the function address
+       movl ORIG_EAX(%esp), %edx       # get the error code
+       movl %eax, ORIG_EAX(%esp)
+       movl %ecx, ES(%esp)
+       /*CFI_REL_OFFSET es, ES*/
+       movl $(__USER_DS), %ecx
+       movl %ecx, %ds
+       movl %ecx, %es
+       movl %esp,%eax                  # pt_regs pointer
+       call *%edi
+       jmp ret_from_exception
+       CFI_ENDPROC
+
+#ifdef CONFIG_XEN
+# A note on the "critical region" in our callback handler.
+# We want to avoid stacking callback handlers due to events occurring
+# during handling of the last event. To do this, we keep events disabled
+# until we've done all processing. HOWEVER, we must enable events before
+# popping the stack frame (can't be done atomically) and so it would still
+# be possible to get enough handler activations to overflow the stack.
+# Although unlikely, bugs of that kind are hard to track down, so we'd
+# like to avoid the possibility.
+# So, on entry to the handler we detect whether we interrupted an
+# existing activation in its critical region -- if so, we pop the current
+# activation and restart the handler using the previous one.
+#
+# The sysexit critical region is slightly different. sysexit
+# atomically removes the entire stack frame. If we interrupt in the
+# critical region we know that the entire frame is present and correct
+# so we can simply throw away the new one.
+ENTRY(hypervisor_callback)
+       RING0_INT_FRAME
+       pushl %eax
+       CFI_ADJUST_CFA_OFFSET 4
+       SAVE_ALL
+       movl EIP(%esp),%eax
+       cmpl $scrit,%eax
+       jb   11f
+       cmpl $ecrit,%eax
+       jb   critical_region_fixup
+       cmpl $sysexit_scrit,%eax
+       jb   11f
+       cmpl $sysexit_ecrit,%eax
+       ja   11f
+       addl $OLDESP,%esp               # Remove eflags...ebx from stack frame.
+11:    push %esp
+       CFI_ADJUST_CFA_OFFSET 4
+       call evtchn_do_upcall
+       add  $4,%esp
+       CFI_ADJUST_CFA_OFFSET -4
+       jmp  ret_from_intr
+       CFI_ENDPROC
+
+# [How we do the fixup]. We want to merge the current stack frame with the
+# just-interrupted frame. How we do this depends on where in the critical
+# region the interrupted handler was executing, and so how many saved
+# registers are in each frame. We do this quickly using the lookup table
+# 'critical_fixup_table'. For each byte offset in the critical region, it
+# provides the number of bytes which have already been popped from the
+# interrupted stack frame.
+critical_region_fixup:
+       movzbl critical_fixup_table-scrit(%eax),%ecx # %eax contains num bytes 
popped
+       cmpb $0xff,%cl                  # 0xff => vcpu_info critical region
+       jne  15f
+       xorl %ecx,%ecx
+15:    leal (%esp,%ecx),%esi           # %esi points at end of src region
+       leal OLDESP(%esp),%edi          # %edi points at end of dst region
+       shrl $2,%ecx                    # convert words to bytes
+       je   17f                        # skip loop if nothing to copy
+16:    subl $4,%esi                    # pre-decrementing copy loop
+       subl $4,%edi
+       movl (%esi),%eax
+       movl %eax,(%edi)
+       loop 16b
+17:    movl %edi,%esp                  # final %edi is top of merged stack
+       jmp  11b
+
+.section .rodata,"a"
+critical_fixup_table:
+       .byte 0xff,0xff,0xff            # testb $0xff,(%esi) = __TEST_PENDING
+       .byte 0xff,0xff                 # jnz  14f
+       .byte 0x00                      # pop  %ebx
+       .byte 0x04                      # pop  %ecx
+       .byte 0x08                      # pop  %edx
+       .byte 0x0c                      # pop  %esi
+       .byte 0x10                      # pop  %edi
+       .byte 0x14                      # pop  %ebp
+       .byte 0x18                      # pop  %eax
+       .byte 0x1c                      # pop  %ds
+       .byte 0x20                      # pop  %es
+       .byte 0x24,0x24,0x24            # add  $4,%esp
+       .byte 0x28                      # iret
+       .byte 0xff,0xff,0xff,0xff       # movb $1,1(%esi)
+       .byte 0x00,0x00                 # jmp  11b
+.previous
+
+# Hypervisor uses this for application faults while it executes.
+# We get here for two reasons:
+#  1. Fault while reloading DS, ES, FS or GS
+#  2. Fault while executing IRET
+# Category 1 we fix up by reattempting the load, and zeroing the segment
+# register if the load fails.
+# Category 2 we fix up by jumping to do_iret_error. We cannot use the
+# normal Linux return path in this case because if we use the IRET hypercall
+# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
+# We distinguish between categories by maintaining a status value in EAX.
+ENTRY(failsafe_callback)
+       pushl %eax
+       movl $1,%eax
+1:     mov 4(%esp),%ds
+2:     mov 8(%esp),%es
+3:     mov 12(%esp),%fs
+4:     mov 16(%esp),%gs
+       testl %eax,%eax
+       popl %eax
+       jz 5f
+       addl $16,%esp           # EAX != 0 => Category 2 (Bad IRET)
+       jmp iret_exc
+5:     addl $16,%esp           # EAX == 0 => Category 1 (Bad segment)
+       RING0_INT_FRAME
+       pushl $0
+       SAVE_ALL
+       jmp ret_from_exception
+.section .fixup,"ax";          \
+6:     xorl %eax,%eax;         \
+       movl %eax,4(%esp);      \
+       jmp 1b;                 \
+7:     xorl %eax,%eax;         \
+       movl %eax,8(%esp);      \
+       jmp 2b;                 \
+8:     xorl %eax,%eax;         \
+       movl %eax,12(%esp);     \
+       jmp 3b;                 \
+9:     xorl %eax,%eax;         \
+       movl %eax,16(%esp);     \
+       jmp 4b;                 \
+.previous;                     \
+.section __ex_table,"a";       \
+       .align 4;               \
+       .long 1b,6b;            \
+       .long 2b,7b;            \
+       .long 3b,8b;            \
+       .long 4b,9b;            \
+.previous
+#endif
+       CFI_ENDPROC
+
+ENTRY(coprocessor_error)
+       RING0_INT_FRAME
+       pushl $0
+       CFI_ADJUST_CFA_OFFSET 4
+       pushl $do_coprocessor_error
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+
+ENTRY(simd_coprocessor_error)
+       RING0_INT_FRAME
+       pushl $0
+       CFI_ADJUST_CFA_OFFSET 4
+       pushl $do_simd_coprocessor_error
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+
+ENTRY(device_not_available)
+       RING0_INT_FRAME
+       pushl $-1                       # mark this as an int
+       CFI_ADJUST_CFA_OFFSET 4
+       SAVE_ALL
+#ifndef CONFIG_XEN
+       movl %cr0, %eax
+       testl $0x4, %eax                # EM (math emulation bit)
+       je device_available_emulate
+       pushl $0                        # temporary storage for ORIG_EIP
+       CFI_ADJUST_CFA_OFFSET 4
+       call math_emulate
+       addl $4, %esp
+       CFI_ADJUST_CFA_OFFSET -4
+       jmp ret_from_exception
+device_available_emulate:
+#endif
+       preempt_stop
+       call math_state_restore
+       jmp ret_from_exception
+       CFI_ENDPROC
+
+#ifndef CONFIG_XEN
+/*
+ * Debug traps and NMI can happen at the one SYSENTER instruction
+ * that sets up the real kernel stack. Check here, since we can't
+ * allow the wrong stack to be used.
+ *
+ * "SYSENTER_stack_esp0+12" is because the NMI/debug handler will have
+ * already pushed 3 words if it hits on the sysenter instruction:
+ * eflags, cs and eip.
+ *
+ * We just load the right stack, and push the three (known) values
+ * by hand onto the new stack - while updating the return eip past
+ * the instruction that would have done it for sysenter.
+ */
+#define FIX_STACK(offset, ok, label)           \
+       cmpw $__KERNEL_CS,4(%esp);              \
+       jne ok;                                 \
+label:                                         \
+       movl SYSENTER_stack_esp0+offset(%esp),%esp;     \
+       pushfl;                                 \
+       pushl $__KERNEL_CS;                     \
+       pushl $sysenter_past_esp
+#endif /* CONFIG_XEN */
+
+KPROBE_ENTRY(debug)
+       RING0_INT_FRAME
+#ifndef CONFIG_XEN
+       cmpl $sysenter_entry,(%esp)
+       jne debug_stack_correct
+       FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
+debug_stack_correct:
+#endif /* !CONFIG_XEN */
+       pushl $-1                       # mark this as an int
+       CFI_ADJUST_CFA_OFFSET 4
+       SAVE_ALL
+       xorl %edx,%edx                  # error code 0
+       movl %esp,%eax                  # pt_regs pointer
+       call do_debug
+       jmp ret_from_exception
+       CFI_ENDPROC
+       .previous .text
+#ifndef CONFIG_XEN
+/*
+ * NMI is doubly nasty. It can happen _while_ we're handling
+ * a debug fault, and the debug fault hasn't yet been able to
+ * clear up the stack. So we first check whether we got  an
+ * NMI on the sysenter entry path, but after that we need to
+ * check whether we got an NMI on the debug path where the debug
+ * fault happened on the sysenter path.
+ */
+ENTRY(nmi)
+       RING0_INT_FRAME
+       pushl %eax
+       CFI_ADJUST_CFA_OFFSET 4
+       movl %ss, %eax
+       cmpw $__ESPFIX_SS, %ax
+       popl %eax
+       CFI_ADJUST_CFA_OFFSET -4
+       je nmi_16bit_stack
+       cmpl $sysenter_entry,(%esp)
+       je nmi_stack_fixup
+       pushl %eax
+       CFI_ADJUST_CFA_OFFSET 4
+       movl %esp,%eax
+       /* Do not access memory above the end of our stack page,
+        * it might not exist.
+        */
+       andl $(THREAD_SIZE-1),%eax
+       cmpl $(THREAD_SIZE-20),%eax
+       popl %eax
+       CFI_ADJUST_CFA_OFFSET -4
+       jae nmi_stack_correct
+       cmpl $sysenter_entry,12(%esp)
+       je nmi_debug_stack_check
+nmi_stack_correct:
+       pushl %eax
+       CFI_ADJUST_CFA_OFFSET 4
+       SAVE_ALL
+       xorl %edx,%edx          # zero error code
+       movl %esp,%eax          # pt_regs pointer
+       call do_nmi
+       jmp restore_nocheck_notrace
+       CFI_ENDPROC
+
+nmi_stack_fixup:
+       FIX_STACK(12,nmi_stack_correct, 1)
+       jmp nmi_stack_correct
+nmi_debug_stack_check:
+       cmpw $__KERNEL_CS,16(%esp)
+       jne nmi_stack_correct
+       cmpl $debug,(%esp)
+       jb nmi_stack_correct
+       cmpl $debug_esp_fix_insn,(%esp)
+       ja nmi_stack_correct
+       FIX_STACK(24,nmi_stack_correct, 1)
+       jmp nmi_stack_correct
+
+nmi_16bit_stack:
+       RING0_INT_FRAME
+       /* create the pointer to lss back */
+       pushl %ss
+       CFI_ADJUST_CFA_OFFSET 4
+       pushl %esp
+       CFI_ADJUST_CFA_OFFSET 4
+       movzwl %sp, %esp
+       addw $4, (%esp)
+       /* copy the iret frame of 12 bytes */
+       .rept 3
+       pushl 16(%esp)
+       CFI_ADJUST_CFA_OFFSET 4
+       .endr
+       pushl %eax
+       CFI_ADJUST_CFA_OFFSET 4
+       SAVE_ALL
+       FIXUP_ESPFIX_STACK              # %eax == %esp
+       CFI_ADJUST_CFA_OFFSET -20       # the frame has now moved
+       xorl %edx,%edx                  # zero error code
+       call do_nmi
+       RESTORE_REGS
+       lss 12+4(%esp), %esp            # back to 16bit stack
+1:     iret
+       CFI_ENDPROC
+.section __ex_table,"a"
+       .align 4
+       .long 1b,iret_exc
+.previous
+#else
+ENTRY(nmi)
+       RING0_INT_FRAME
+       pushl %eax
+       CFI_ADJUST_CFA_OFFSET 4
+       SAVE_ALL
+       xorl %edx,%edx          # zero error code
+       movl %esp,%eax          # pt_regs pointer
+       call do_nmi
+       orl  $NMI_MASK, EFLAGS(%esp)
+       jmp restore_all
+       CFI_ENDPROC
+#endif
+
+KPROBE_ENTRY(int3)
+       RING0_INT_FRAME
+       pushl $-1                       # mark this as an int
+       CFI_ADJUST_CFA_OFFSET 4
+       SAVE_ALL
+       xorl %edx,%edx          # zero error code
+       movl %esp,%eax          # pt_regs pointer
+       call do_int3
+       jmp ret_from_exception
+       CFI_ENDPROC
+       .previous .text
+
+ENTRY(overflow)
+       RING0_INT_FRAME
+       pushl $0
+       CFI_ADJUST_CFA_OFFSET 4
+       pushl $do_overflow
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+
+ENTRY(bounds)
+       RING0_INT_FRAME
+       pushl $0
+       CFI_ADJUST_CFA_OFFSET 4
+       pushl $do_bounds
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+
+ENTRY(invalid_op)
+       RING0_INT_FRAME
+       pushl $0
+       CFI_ADJUST_CFA_OFFSET 4
+       pushl $do_invalid_op
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+
+ENTRY(coprocessor_segment_overrun)
+       RING0_INT_FRAME
+       pushl $0
+       CFI_ADJUST_CFA_OFFSET 4
+       pushl $do_coprocessor_segment_overrun
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+
+ENTRY(invalid_TSS)
+       RING0_EC_FRAME
+       pushl $do_invalid_TSS
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+
+ENTRY(segment_not_present)
+       RING0_EC_FRAME
+       pushl $do_segment_not_present
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+
+ENTRY(stack_segment)
+       RING0_EC_FRAME
+       pushl $do_stack_segment
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+
+KPROBE_ENTRY(general_protection)
+       RING0_EC_FRAME
+       pushl $do_general_protection
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+       .previous .text
+
+ENTRY(alignment_check)
+       RING0_EC_FRAME
+       pushl $do_alignment_check
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+
+KPROBE_ENTRY(page_fault)
+       RING0_EC_FRAME
+       pushl $do_page_fault
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+       .previous .text
+
+#ifdef CONFIG_X86_MCE
+ENTRY(machine_check)
+       RING0_INT_FRAME
+       pushl $0
+       CFI_ADJUST_CFA_OFFSET 4
+       pushl machine_check_vector
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+#endif
+
+#ifndef CONFIG_XEN
+ENTRY(spurious_interrupt_bug)
+       RING0_INT_FRAME
+       pushl $0
+       CFI_ADJUST_CFA_OFFSET 4
+       pushl $do_spurious_interrupt_bug
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+#endif /* !CONFIG_XEN */
+
+#ifdef CONFIG_STACK_UNWIND
+ENTRY(arch_unwind_init_running)
+       CFI_STARTPROC
+       movl    4(%esp), %edx
+       movl    (%esp), %ecx
+       leal    4(%esp), %eax
+       movl    %ebx, EBX(%edx)
+       xorl    %ebx, %ebx
+       movl    %ebx, ECX(%edx)
+       movl    %ebx, EDX(%edx)
+       movl    %esi, ESI(%edx)
+       movl    %edi, EDI(%edx)
+       movl    %ebp, EBP(%edx)
+       movl    %ebx, EAX(%edx)
+       movl    $__USER_DS, DS(%edx)
+       movl    $__USER_DS, ES(%edx)
+       movl    %ebx, ORIG_EAX(%edx)
+       movl    %ecx, EIP(%edx)
+       movl    12(%esp), %ecx
+       movl    $__KERNEL_CS, CS(%edx)
+       movl    %ebx, EFLAGS(%edx)
+       movl    %eax, OLDESP(%edx)
+       movl    8(%esp), %eax
+       movl    %ecx, 8(%esp)
+       movl    EBX(%edx), %ebx
+       movl    $__KERNEL_DS, OLDSS(%edx)
+       jmpl    *%eax
+       CFI_ENDPROC
+ENDPROC(arch_unwind_init_running)
+#endif
+
+ENTRY(fixup_4gb_segment)
+       RING0_EC_FRAME
+       pushl $do_fixup_4gb_segment
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+
+.section .rodata,"a"
+.align 4
+#include "syscall_table.S"
+
+syscall_table_size=(.-sys_call_table)
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/fixup.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/fixup.c  Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,88 @@
+/******************************************************************************
+ * fixup.c
+ * 
+ * Binary-rewriting of certain IA32 instructions, on notification by Xen.
+ * Used to avoid repeated slow emulation of common instructions used by the
+ * user-space TLS (Thread-Local Storage) libraries.
+ * 
+ * **** NOTE ****
+ *  Issues with the binary rewriting have caused it to be removed. Instead
+ *  we rely on Xen's emulator to boot the kernel, and then print a banner
+ *  message recommending that the user disables /lib/tls.
+ * 
+ * Copyright (c) 2004, K A Fraser
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/version.h>
+
+#define DP(_f, _args...) printk(KERN_ALERT "  " _f "\n" , ## _args )
+
+fastcall void do_fixup_4gb_segment(struct pt_regs *regs, long error_code)
+{
+       static unsigned long printed = 0;
+       char info[100];
+       int i;
+
+       /* Ignore statically-linked init. */
+       if (current->tgid == 1)
+               return;
+            
+       HYPERVISOR_vm_assist(
+               VMASST_CMD_disable, VMASST_TYPE_4gb_segments_notify);
+
+       if (test_and_set_bit(0, &printed))
+               return;
+
+       sprintf(info, "%s (pid=%d)", current->comm, current->tgid);
+
+       DP("");
+       DP("***************************************************************");
+       DP("***************************************************************");
+       DP("** WARNING: Currently emulating unsupported memory accesses  **");
+       DP("**          in /lib/tls glibc libraries. The emulation is    **");
+       DP("**          slow. To ensure full performance you should      **");
+       DP("**          install a 'xen-friendly' (nosegneg) version of   **");
+       DP("**          the library, or disable tls support by executing **");
+       DP("**          the following as root:                           **");
+       DP("**          mv /lib/tls /lib/tls.disabled                    **");
+       DP("** Offending process: %-38.38s **", info);
+       DP("***************************************************************");
+       DP("***************************************************************");
+       DP("");
+
+       for (i = 5; i > 0; i--) {
+               touch_softlockup_watchdog();
+               printk("Pausing... %d", i);
+               mdelay(1000);
+               printk("\b\b\b\b\b\b\b\b\b\b\b\b");
+       }
+
+       printk("Continuing...\n\n");
+}
+
+static int __init fixup_init(void)
+{
+       HYPERVISOR_vm_assist(
+               VMASST_CMD_enable, VMASST_TYPE_4gb_segments_notify);
+       return 0;
+}
+__initcall(fixup_init);
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/head-xen.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/head-xen.S       Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,207 @@
+
+
+.text
+#include <linux/elfnote.h>
+#include <linux/threads.h>
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/page.h>
+#include <asm/cache.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+#include <asm/dwarf2.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/elfnote.h>
+
+/*
+ * References to members of the new_cpu_data structure.
+ */
+
+#define X86            new_cpu_data+CPUINFO_x86
+#define X86_VENDOR     new_cpu_data+CPUINFO_x86_vendor
+#define X86_MODEL      new_cpu_data+CPUINFO_x86_model
+#define X86_MASK       new_cpu_data+CPUINFO_x86_mask
+#define X86_HARD_MATH  new_cpu_data+CPUINFO_hard_math
+#define X86_CPUID      new_cpu_data+CPUINFO_cpuid_level
+#define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability
+#define X86_VENDOR_ID  new_cpu_data+CPUINFO_x86_vendor_id
+
+#define VIRT_ENTRY_OFFSET 0x0
+.org VIRT_ENTRY_OFFSET
+ENTRY(startup_32)
+       movl %esi,xen_start_info
+       cld
+
+       /* Set up the stack pointer */
+       movl $(init_thread_union+THREAD_SIZE),%esp
+
+       /* get vendor info */
+       xorl %eax,%eax                  # call CPUID with 0 -> return vendor ID
+       XEN_CPUID
+       movl %eax,X86_CPUID             # save CPUID level
+       movl %ebx,X86_VENDOR_ID         # lo 4 chars
+       movl %edx,X86_VENDOR_ID+4       # next 4 chars
+       movl %ecx,X86_VENDOR_ID+8       # last 4 chars
+
+       movl $1,%eax            # Use the CPUID instruction to get CPU type
+       XEN_CPUID
+       movb %al,%cl            # save reg for future use
+       andb $0x0f,%ah          # mask processor family
+       movb %ah,X86
+       andb $0xf0,%al          # mask model
+       shrb $4,%al
+       movb %al,X86_MODEL
+       andb $0x0f,%cl          # mask mask revision
+       movb %cl,X86_MASK
+       movl %edx,X86_CAPABILITY
+
+       movb $1,X86_HARD_MATH
+
+       xorl %eax,%eax                  # Clear FS/GS and LDT
+       movl %eax,%fs
+       movl %eax,%gs
+       cld                     # gcc2 wants the direction flag cleared at all 
times
+
+       pushl %eax              # fake return address
+       jmp start_kernel
+
+#define HYPERCALL_PAGE_OFFSET 0x1000
+.org HYPERCALL_PAGE_OFFSET
+ENTRY(hypercall_page)
+       CFI_STARTPROC
+.skip 0x1000
+       CFI_ENDPROC
+
+/*
+ * Real beginning of normal "text" segment
+ */
+ENTRY(stext)
+ENTRY(_stext)
+
+/*
+ * BSS section
+ */
+.section ".bss.page_aligned","w"
+ENTRY(empty_zero_page)
+       .fill 4096,1,0
+
+/*
+ * This starts the data section.
+ */
+.data
+
+/*
+ * The Global Descriptor Table contains 28 quadwords, per-CPU.
+ */
+       .align L1_CACHE_BYTES
+ENTRY(cpu_gdt_table)
+       .quad 0x0000000000000000        /* NULL descriptor */
+       .quad 0x0000000000000000        /* 0x0b reserved */
+       .quad 0x0000000000000000        /* 0x13 reserved */
+       .quad 0x0000000000000000        /* 0x1b reserved */
+       .quad 0x0000000000000000        /* 0x20 unused */
+       .quad 0x0000000000000000        /* 0x28 unused */
+       .quad 0x0000000000000000        /* 0x33 TLS entry 1 */
+       .quad 0x0000000000000000        /* 0x3b TLS entry 2 */
+       .quad 0x0000000000000000        /* 0x43 TLS entry 3 */
+       .quad 0x0000000000000000        /* 0x4b reserved */
+       .quad 0x0000000000000000        /* 0x53 reserved */
+       .quad 0x0000000000000000        /* 0x5b reserved */
+
+       .quad 0x00cf9a000000ffff        /* 0x60 kernel 4GB code at 0x00000000 */
+       .quad 0x00cf92000000ffff        /* 0x68 kernel 4GB data at 0x00000000 */
+       .quad 0x00cffa000000ffff        /* 0x73 user 4GB code at 0x00000000 */
+       .quad 0x00cff2000000ffff        /* 0x7b user 4GB data at 0x00000000 */
+
+       .quad 0x0000000000000000        /* 0x80 TSS descriptor */
+       .quad 0x0000000000000000        /* 0x88 LDT descriptor */
+
+       /*
+        * Segments used for calling PnP BIOS have byte granularity.
+        * They code segments and data segments have fixed 64k limits,
+        * the transfer segment sizes are set at run time.
+        */
+       .quad 0x0000000000000000        /* 0x90 32-bit code */
+       .quad 0x0000000000000000        /* 0x98 16-bit code */
+       .quad 0x0000000000000000        /* 0xa0 16-bit data */
+       .quad 0x0000000000000000        /* 0xa8 16-bit data */
+       .quad 0x0000000000000000        /* 0xb0 16-bit data */
+
+       /*
+        * The APM segments have byte granularity and their bases
+        * are set at run time.  All have 64k limits.
+        */
+       .quad 0x0000000000000000        /* 0xb8 APM CS    code */
+       .quad 0x0000000000000000        /* 0xc0 APM CS 16 code (16 bit) */
+       .quad 0x0000000000000000        /* 0xc8 APM DS    data */
+
+       .quad 0x0000000000000000        /* 0xd0 - ESPFIX 16-bit SS */
+       .quad 0x0000000000000000        /* 0xd8 - unused */
+       .quad 0x0000000000000000        /* 0xe0 - unused */
+       .quad 0x0000000000000000        /* 0xe8 - unused */
+       .quad 0x0000000000000000        /* 0xf0 - unused */
+       .quad 0x0000000000000000        /* 0xf8 - GDT entry 31: double-fault 
TSS */
+
+#if CONFIG_XEN_COMPAT <= 0x030002
+/*
+ * __xen_guest information
+ */
+.macro utoa value
+ .if (\value) < 0 || (\value) >= 0x10
+       utoa (((\value)>>4)&0x0fffffff)
+ .endif
+ .if ((\value) & 0xf) < 10
+  .byte '0' + ((\value) & 0xf)
+ .else
+  .byte 'A' + ((\value) & 0xf) - 10
+ .endif
+.endm
+
+.section __xen_guest
+       .ascii  "GUEST_OS=linux,GUEST_VER=2.6"
+       .ascii  ",XEN_VER=xen-3.0"
+       .ascii  ",VIRT_BASE=0x"
+               utoa __PAGE_OFFSET
+       .ascii  ",ELF_PADDR_OFFSET=0x"
+               utoa __PAGE_OFFSET
+       .ascii  ",VIRT_ENTRY=0x"
+               utoa (__PAGE_OFFSET + __PHYSICAL_START + VIRT_ENTRY_OFFSET)
+       .ascii  ",HYPERCALL_PAGE=0x"
+               utoa ((__PHYSICAL_START+HYPERCALL_PAGE_OFFSET)>>PAGE_SHIFT)
+       .ascii  ",FEATURES=writable_page_tables"
+       .ascii           "|writable_descriptor_tables"
+       .ascii           "|auto_translated_physmap"
+       .ascii           "|pae_pgdir_above_4gb"
+       .ascii           "|supervisor_mode_kernel"
+#ifdef CONFIG_X86_PAE
+       .ascii  ",PAE=yes[extended-cr3]"
+#else
+       .ascii  ",PAE=no"
+#endif
+       .ascii  ",LOADER=generic"
+       .byte   0
+#endif /* CONFIG_XEN_COMPAT <= 0x030002 */
+
+
+       ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS,       .asciz, "linux")       
+       ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION,  .asciz, "2.6")
+       ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION,    .asciz, "xen-3.0")
+       ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE,      .long,  __PAGE_OFFSET)
+#if CONFIG_XEN_COMPAT <= 0x030002
+       ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET,   .long,  __PAGE_OFFSET)
+#else
+       ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET,   .long,  0)
+#endif
+       ELFNOTE(Xen, XEN_ELFNOTE_ENTRY,          .long,  startup_32)
+       ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long,  hypercall_page)
+       ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW,   .long,  HYPERVISOR_VIRT_START)
+       ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,       .asciz, 
"writable_page_tables|writable_descriptor_tables|auto_translated_physmap|pae_pgdir_above_4gb|supervisor_mode_kernel")
+#ifdef CONFIG_X86_PAE
+       ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,       .asciz, "yes")
+       ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,   .quad,  
_PAGE_PRESENT,_PAGE_PRESENT)
+#else
+       ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,       .asciz, "no")
+       ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,   .long,  
_PAGE_PRESENT,_PAGE_PRESENT)
+#endif
+       ELFNOTE(Xen, XEN_ELFNOTE_LOADER,         .asciz, "generic")
+       ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long,  1)
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/init_task-xen.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/init_task-xen.c  Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,51 @@
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/init_task.h>
+#include <linux/fs.h>
+#include <linux/mqueue.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/desc.h>
+
+static struct fs_struct init_fs = INIT_FS;
+static struct files_struct init_files = INIT_FILES;
+static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+
+#define swapper_pg_dir ((pgd_t *)NULL)
+struct mm_struct init_mm = INIT_MM(init_mm);
+#undef swapper_pg_dir
+
+EXPORT_SYMBOL(init_mm);
+
+/*
+ * Initial thread structure.
+ *
+ * We need to make sure that this is THREAD_SIZE aligned due to the
+ * way process stacks are handled. This is done by having a special
+ * "init_task" linker map entry..
+ */
+union thread_union init_thread_union 
+       __attribute__((__section__(".data.init_task"))) =
+               { INIT_THREAD_INFO(init_task) };
+
+/*
+ * Initial task structure.
+ *
+ * All other task structs will be allocated on slabs in fork.c
+ */
+struct task_struct init_task = INIT_TASK(init_task);
+
+EXPORT_SYMBOL(init_task);
+
+#ifndef CONFIG_X86_NO_TSS
+/*
+ * per-CPU TSS segments. Threads are completely 'soft' on Linux,
+ * no more per-task TSS's.
+ */ 
+DEFINE_PER_CPU(struct tss_struct, init_tss) 
____cacheline_internodealigned_in_smp = INIT_TSS;
+#endif
+
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/io_apic-xen.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/io_apic-xen.c    Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,2777 @@
+/*
+ *     Intel IO-APIC support for multi-Pentium hosts.
+ *
+ *     Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
+ *
+ *     Many thanks to Stig Venaas for trying out countless experimental
+ *     patches and reporting/debugging problems patiently!
+ *
+ *     (c) 1999, Multiple IO-APIC support, developed by
+ *     Ken-ichi Yaku <yaku@xxxxxxxxxxxxxxxxxxxx> and
+ *      Hidemi Kishimoto <kisimoto@xxxxxxxxxxxxxxxxxxxx>,
+ *     further tested and cleaned up by Zach Brown <zab@xxxxxxxxxx>
+ *     and Ingo Molnar <mingo@xxxxxxxxxx>
+ *
+ *     Fixes
+ *     Maciej W. Rozycki       :       Bits for genuine 82489DX APICs;
+ *                                     thanks to Eric Gilmore
+ *                                     and Rolf G. Tews
+ *                                     for testing these extensively
+ *     Paul Diefenbaugh        :       Added full ACPI support
+ */
+
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+#include <linux/mc146818rtc.h>
+#include <linux/compiler.h>
+#include <linux/acpi.h>
+#include <linux/module.h>
+#include <linux/sysdev.h>
+
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/desc.h>
+#include <asm/timer.h>
+#include <asm/i8259.h>
+#include <asm/nmi.h>
+
+#include <mach_apic.h>
+
+#include "io_ports.h"
+
+#ifdef CONFIG_XEN
+
+#include <xen/interface/xen.h>
+#include <xen/interface/physdev.h>
+
+/* Fake i8259 */
+#define make_8259A_irq(_irq)     (io_apic_irqs &= ~(1UL<<(_irq)))
+#define disable_8259A_irq(_irq)  ((void)0)
+#define i8259A_irq_pending(_irq) (0)
+
+unsigned long io_apic_irqs;
+
+static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int 
reg)
+{
+       struct physdev_apic apic_op;
+       int ret;
+
+       apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
+       apic_op.reg = reg;
+       ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
+       if (ret)
+               return ret;
+       return apic_op.value;
+}
+
+static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, 
unsigned int value)
+{
+       struct physdev_apic apic_op;
+
+       apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
+       apic_op.reg = reg;
+       apic_op.value = value;
+       HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op);
+}
+
+#define io_apic_read(a,r)    xen_io_apic_read(a,r)
+#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v)
+
+#endif /* CONFIG_XEN */
+
+int (*ioapic_renumber_irq)(int ioapic, int irq);
+atomic_t irq_mis_count;
+
+/* Where if anywhere is the i8259 connect in external int mode */
+static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
+
+static DEFINE_SPINLOCK(ioapic_lock);
+static DEFINE_SPINLOCK(vector_lock);
+
+int timer_over_8254 __initdata = 1;
+
+/*
+ *     Is the SiS APIC rmw bug present ?
+ *     -1 = don't know, 0 = no, 1 = yes
+ */
+int sis_apic_bug = -1;
+
+/*
+ * # of IRQ routing registers
+ */
+int nr_ioapic_registers[MAX_IO_APICS];
+
+int disable_timer_pin_1 __initdata;
+
+/*
+ * Rough estimation of how many shared IRQs there are, can
+ * be changed anytime.
+ */
+#define MAX_PLUS_SHARED_IRQS NR_IRQS
+#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
+
+/*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * the indexing order of this array favors 1:1 mappings
+ * between pins and IRQs.
+ */
+
+static struct irq_pin_list {
+       int apic, pin, next;
+} irq_2_pin[PIN_MAP_SIZE];
+
+int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
+#ifdef CONFIG_PCI_MSI
+#define vector_to_irq(vector)  \
+       (platform_legacy_irq(vector) ? vector : vector_irq[vector])
+#else
+#define vector_to_irq(vector)  (vector)
+#endif
+
+/*
+ * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
+ * shared ISA-space IRQs, so we have to support them. We are super
+ * fast in the common case, and fast for shared ISA-space IRQs.
+ */
+static void add_pin_to_irq(unsigned int irq, int apic, int pin)
+{
+       static int first_free_entry = NR_IRQS;
+       struct irq_pin_list *entry = irq_2_pin + irq;
+
+       while (entry->next)
+               entry = irq_2_pin + entry->next;
+
+       if (entry->pin != -1) {
+               entry->next = first_free_entry;
+               entry = irq_2_pin + entry->next;
+               if (++first_free_entry >= PIN_MAP_SIZE)
+                       panic("io_apic.c: whoops");
+       }
+       entry->apic = apic;
+       entry->pin = pin;
+}
+
+#ifdef CONFIG_XEN
+#define clear_IO_APIC() ((void)0)
+#else
+/*
+ * Reroute an IRQ to a different pin.
+ */
+static void __init replace_pin_at_irq(unsigned int irq,
+                                     int oldapic, int oldpin,
+                                     int newapic, int newpin)
+{
+       struct irq_pin_list *entry = irq_2_pin + irq;
+
+       while (1) {
+               if (entry->apic == oldapic && entry->pin == oldpin) {
+                       entry->apic = newapic;
+                       entry->pin = newpin;
+               }
+               if (!entry->next)
+                       break;
+               entry = irq_2_pin + entry->next;
+       }
+}
+
+static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, 
unsigned long disable)
+{
+       struct irq_pin_list *entry = irq_2_pin + irq;
+       unsigned int pin, reg;
+
+       for (;;) {
+               pin = entry->pin;
+               if (pin == -1)
+                       break;
+               reg = io_apic_read(entry->apic, 0x10 + pin*2);
+               reg &= ~disable;
+               reg |= enable;
+               io_apic_modify(entry->apic, 0x10 + pin*2, reg);
+               if (!entry->next)
+                       break;
+               entry = irq_2_pin + entry->next;
+       }
+}
+
+/* mask = 1 */
+static void __mask_IO_APIC_irq (unsigned int irq)
+{
+       __modify_IO_APIC_irq(irq, 0x00010000, 0);
+}
+
+/* mask = 0 */
+static void __unmask_IO_APIC_irq (unsigned int irq)
+{
+       __modify_IO_APIC_irq(irq, 0, 0x00010000);
+}
+
+/* mask = 1, trigger = 0 */
+static void __mask_and_edge_IO_APIC_irq (unsigned int irq)
+{
+       __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);
+}
+
+/* mask = 0, trigger = 1 */
+static void __unmask_and_level_IO_APIC_irq (unsigned int irq)
+{
+       __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);
+}
+
+static void mask_IO_APIC_irq (unsigned int irq)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       __mask_IO_APIC_irq(irq);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void unmask_IO_APIC_irq (unsigned int irq)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       __unmask_IO_APIC_irq(irq);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
+{
+       struct IO_APIC_route_entry entry;
+       unsigned long flags;
+       
+       /* Check delivery_mode to be sure we're not clearing an SMI pin */
+       spin_lock_irqsave(&ioapic_lock, flags);
+       *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
+       *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+       if (entry.delivery_mode == dest_SMI)
+               return;
+
+       /*
+        * Disable it in the IO-APIC irq-routing table:
+        */
+       memset(&entry, 0, sizeof(entry));
+       entry.mask = 1;
+       spin_lock_irqsave(&ioapic_lock, flags);
+       io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
+       io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void clear_IO_APIC (void)
+{
+       int apic, pin;
+
+       for (apic = 0; apic < nr_ioapics; apic++)
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+                       clear_IO_APIC_pin(apic, pin);
+}
+
+#ifdef CONFIG_SMP
+static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
+{
+       unsigned long flags;
+       int pin;
+       struct irq_pin_list *entry = irq_2_pin + irq;
+       unsigned int apicid_value;
+       cpumask_t tmp;
+       
+       cpus_and(tmp, cpumask, cpu_online_map);
+       if (cpus_empty(tmp))
+               tmp = TARGET_CPUS;
+
+       cpus_and(cpumask, tmp, CPU_MASK_ALL);
+
+       apicid_value = cpu_mask_to_apicid(cpumask);
+       /* Prepare to do the io_apic_write */
+       apicid_value = apicid_value << 24;
+       spin_lock_irqsave(&ioapic_lock, flags);
+       for (;;) {
+               pin = entry->pin;
+               if (pin == -1)
+                       break;
+               io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value);
+               if (!entry->next)
+                       break;
+               entry = irq_2_pin + entry->next;
+       }
+       set_irq_info(irq, cpumask);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+#if defined(CONFIG_IRQBALANCE)
+# include <asm/processor.h>    /* kernel_thread() */
+# include <linux/kernel_stat.h>        /* kstat */
+# include <linux/slab.h>               /* kmalloc() */
+# include <linux/timer.h>      /* time_after() */
+ 
+#ifdef CONFIG_BALANCED_IRQ_DEBUG
+#  define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, 
__LINE__); printk(x); } while (0)
+#  define Dprintk(x...) do { TDprintk(x); } while (0)
+# else
+#  define TDprintk(x...) 
+#  define Dprintk(x...) 
+# endif
+
+#define IRQBALANCE_CHECK_ARCH -999
+#define MAX_BALANCED_IRQ_INTERVAL      (5*HZ)
+#define MIN_BALANCED_IRQ_INTERVAL      (HZ/2)
+#define BALANCED_IRQ_MORE_DELTA                (HZ/10)
+#define BALANCED_IRQ_LESS_DELTA                (HZ)
+
+static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH;
+static int physical_balance __read_mostly;
+static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL;
+
+static struct irq_cpu_info {
+       unsigned long * last_irq;
+       unsigned long * irq_delta;
+       unsigned long irq;
+} irq_cpu_data[NR_CPUS];
+
+#define CPU_IRQ(cpu)           (irq_cpu_data[cpu].irq)
+#define LAST_CPU_IRQ(cpu,irq)   (irq_cpu_data[cpu].last_irq[irq])
+#define IRQ_DELTA(cpu,irq)     (irq_cpu_data[cpu].irq_delta[irq])
+
+#define IDLE_ENOUGH(cpu,now) \
+       (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
+
+#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask)
+
+#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i]))
+
+static cpumask_t balance_irq_affinity[NR_IRQS] = {
+       [0 ... NR_IRQS-1] = CPU_MASK_ALL
+};
+
+void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
+{
+       balance_irq_affinity[irq] = mask;
+}
+
+static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
+                       unsigned long now, int direction)
+{
+       int search_idle = 1;
+       int cpu = curr_cpu;
+
+       goto inside;
+
+       do {
+               if (unlikely(cpu == curr_cpu))
+                       search_idle = 0;
+inside:
+               if (direction == 1) {
+                       cpu++;
+                       if (cpu >= NR_CPUS)
+                               cpu = 0;
+               } else {
+                       cpu--;
+                       if (cpu == -1)
+                               cpu = NR_CPUS-1;
+               }
+       } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) ||
+                       (search_idle && !IDLE_ENOUGH(cpu,now)));
+
+       return cpu;
+}
+
+static inline void balance_irq(int cpu, int irq)
+{
+       unsigned long now = jiffies;
+       cpumask_t allowed_mask;
+       unsigned int new_cpu;
+               
+       if (irqbalance_disabled)
+               return; 
+
+       cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]);
+       new_cpu = move(cpu, allowed_mask, now, 1);
+       if (cpu != new_cpu) {
+               set_pending_irq(irq, cpumask_of_cpu(new_cpu));
+       }
+}
+
+static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
+{
+       int i, j;
+       Dprintk("Rotating IRQs among CPUs.\n");
+       for_each_online_cpu(i) {
+               for (j = 0; j < NR_IRQS; j++) {
+                       if (!irq_desc[j].action)
+                               continue;
+                       /* Is it a significant load ?  */
+                       if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) <
+                                               useful_load_threshold)
+                               continue;
+                       balance_irq(i, j);
+               }
+       }
+       balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
+               balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);       
+       return;
+}
+
+static void do_irq_balance(void)
+{
+       int i, j;
+       unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
+       unsigned long move_this_load = 0;
+       int max_loaded = 0, min_loaded = 0;
+       int load;
+       unsigned long useful_load_threshold = balanced_irq_interval + 10;
+       int selected_irq;
+       int tmp_loaded, first_attempt = 1;
+       unsigned long tmp_cpu_irq;
+       unsigned long imbalance = 0;
+       cpumask_t allowed_mask, target_cpu_mask, tmp;
+
+       for_each_possible_cpu(i) {
+               int package_index;
+               CPU_IRQ(i) = 0;
+               if (!cpu_online(i))
+                       continue;
+               package_index = CPU_TO_PACKAGEINDEX(i);
+               for (j = 0; j < NR_IRQS; j++) {
+                       unsigned long value_now, delta;
+                       /* Is this an active IRQ? */
+                       if (!irq_desc[j].action)
+                               continue;
+                       if ( package_index == i )
+                               IRQ_DELTA(package_index,j) = 0;
+                       /* Determine the total count per processor per IRQ */
+                       value_now = (unsigned long) kstat_cpu(i).irqs[j];
+
+                       /* Determine the activity per processor per IRQ */
+                       delta = value_now - LAST_CPU_IRQ(i,j);
+
+                       /* Update last_cpu_irq[][] for the next time */
+                       LAST_CPU_IRQ(i,j) = value_now;
+
+                       /* Ignore IRQs whose rate is less than the clock */
+                       if (delta < useful_load_threshold)
+                               continue;
+                       /* update the load for the processor or package total */
+                       IRQ_DELTA(package_index,j) += delta;
+
+                       /* Keep track of the higher numbered sibling as well */
+                       if (i != package_index)
+                               CPU_IRQ(i) += delta;
+                       /*
+                        * We have sibling A and sibling B in the package
+                        *
+                        * cpu_irq[A] = load for cpu A + load for cpu B
+                        * cpu_irq[B] = load for cpu B
+                        */
+                       CPU_IRQ(package_index) += delta;
+               }
+       }
+       /* Find the least loaded processor package */
+       for_each_online_cpu(i) {
+               if (i != CPU_TO_PACKAGEINDEX(i))
+                       continue;
+               if (min_cpu_irq > CPU_IRQ(i)) {
+                       min_cpu_irq = CPU_IRQ(i);
+                       min_loaded = i;
+               }
+       }
+       max_cpu_irq = ULONG_MAX;
+
+tryanothercpu:
+       /* Look for heaviest loaded processor.
+        * We may come back to get the next heaviest loaded processor.
+        * Skip processors with trivial loads.
+        */
+       tmp_cpu_irq = 0;
+       tmp_loaded = -1;
+       for_each_online_cpu(i) {
+               if (i != CPU_TO_PACKAGEINDEX(i))
+                       continue;
+               if (max_cpu_irq <= CPU_IRQ(i)) 
+                       continue;
+               if (tmp_cpu_irq < CPU_IRQ(i)) {
+                       tmp_cpu_irq = CPU_IRQ(i);
+                       tmp_loaded = i;
+               }
+       }
+
+       if (tmp_loaded == -1) {
+        /* In the case of small number of heavy interrupt sources, 
+         * loading some of the cpus too much. We use Ingo's original 
+         * approach to rotate them around.
+         */
+               if (!first_attempt && imbalance >= useful_load_threshold) {
+                       rotate_irqs_among_cpus(useful_load_threshold);
+                       return;
+               }
+               goto not_worth_the_effort;
+       }
+       
+       first_attempt = 0;              /* heaviest search */
+       max_cpu_irq = tmp_cpu_irq;      /* load */
+       max_loaded = tmp_loaded;        /* processor */
+       imbalance = (max_cpu_irq - min_cpu_irq) / 2;
+       
+       Dprintk("max_loaded cpu = %d\n", max_loaded);
+       Dprintk("min_loaded cpu = %d\n", min_loaded);
+       Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq);
+       Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq);
+       Dprintk("load imbalance = %lu\n", imbalance);
+
+       /* if imbalance is less than approx 10% of max load, then
+        * observe diminishing returns action. - quit
+        */
+       if (imbalance < (max_cpu_irq >> 3)) {
+               Dprintk("Imbalance too trivial\n");
+               goto not_worth_the_effort;
+       }
+
+tryanotherirq:
+       /* if we select an IRQ to move that can't go where we want, then
+        * see if there is another one to try.
+        */
+       move_this_load = 0;
+       selected_irq = -1;
+       for (j = 0; j < NR_IRQS; j++) {
+               /* Is this an active IRQ? */
+               if (!irq_desc[j].action)
+                       continue;
+               if (imbalance <= IRQ_DELTA(max_loaded,j))
+                       continue;
+               /* Try to find the IRQ that is closest to the imbalance
+                * without going over.
+                */
+               if (move_this_load < IRQ_DELTA(max_loaded,j)) {
+                       move_this_load = IRQ_DELTA(max_loaded,j);
+                       selected_irq = j;
+               }
+       }
+       if (selected_irq == -1) {
+               goto tryanothercpu;
+       }
+
+       imbalance = move_this_load;
+       
+       /* For physical_balance case, we accumlated both load
+        * values in the one of the siblings cpu_irq[],
+        * to use the same code for physical and logical processors
+        * as much as possible. 
+        *
+        * NOTE: the cpu_irq[] array holds the sum of the load for
+        * sibling A and sibling B in the slot for the lowest numbered
+        * sibling (A), _AND_ the load for sibling B in the slot for
+        * the higher numbered sibling.
+        *
+        * We seek the least loaded sibling by making the comparison
+        * (A+B)/2 vs B
+        */
+       load = CPU_IRQ(min_loaded) >> 1;
+       for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) {
+               if (load > CPU_IRQ(j)) {
+                       /* This won't change cpu_sibling_map[min_loaded] */
+                       load = CPU_IRQ(j);
+                       min_loaded = j;
+               }
+       }
+
+       cpus_and(allowed_mask,
+               cpu_online_map,
+               balance_irq_affinity[selected_irq]);
+       target_cpu_mask = cpumask_of_cpu(min_loaded);
+       cpus_and(tmp, target_cpu_mask, allowed_mask);
+
+       if (!cpus_empty(tmp)) {
+
+               Dprintk("irq = %d moved to cpu = %d\n",
+                               selected_irq, min_loaded);
+               /* mark for change destination */
+               set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
+
+               /* Since we made a change, come back sooner to 
+                * check for more variation.
+                */
+               balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
+                       balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);       
+               return;
+       }
+       goto tryanotherirq;
+
+not_worth_the_effort:
+       /*
+        * if we did not find an IRQ to move, then adjust the time interval
+        * upward
+        */
+       balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
+               balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);       
+       Dprintk("IRQ worth rotating not found\n");
+       return;
+}
+
+static int balanced_irq(void *unused)
+{
+       int i;
+       unsigned long prev_balance_time = jiffies;
+       long time_remaining = balanced_irq_interval;
+
+       daemonize("kirqd");
+       
+       /* push everything to CPU 0 to give us a starting point.  */
+       for (i = 0 ; i < NR_IRQS ; i++) {
+               irq_desc[i].pending_mask = cpumask_of_cpu(0);
+               set_pending_irq(i, cpumask_of_cpu(0));
+       }
+
+       for ( ; ; ) {
+               time_remaining = schedule_timeout_interruptible(time_remaining);
+               try_to_freeze();
+               if (time_after(jiffies,
+                               prev_balance_time+balanced_irq_interval)) {
+                       preempt_disable();
+                       do_irq_balance();
+                       prev_balance_time = jiffies;
+                       time_remaining = balanced_irq_interval;
+                       preempt_enable();
+               }
+       }
+       return 0;
+}
+
+static int __init balanced_irq_init(void)
+{
+       int i;
+       struct cpuinfo_x86 *c;
+       cpumask_t tmp;
+
+       cpus_shift_right(tmp, cpu_online_map, 2);
+        c = &boot_cpu_data;
+       /* When not overwritten by the command line ask subarchitecture. */
+       if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
+               irqbalance_disabled = NO_BALANCE_IRQ;
+       if (irqbalance_disabled)
+               return 0;
+       
+        /* disable irqbalance completely if there is only one processor online 
*/
+       if (num_online_cpus() < 2) {
+               irqbalance_disabled = 1;
+               return 0;
+       }
+       /*
+        * Enable physical balance only if more than 1 physical processor
+        * is present
+        */
+       if (smp_num_siblings > 1 && !cpus_empty(tmp))
+               physical_balance = 1;
+
+       for_each_online_cpu(i) {
+               irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * 
NR_IRQS, GFP_KERNEL);
+               irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * 
NR_IRQS, GFP_KERNEL);
+               if (irq_cpu_data[i].irq_delta == NULL || 
irq_cpu_data[i].last_irq == NULL) {
+                       printk(KERN_ERR "balanced_irq_init: out of memory");
+                       goto failed;
+               }
+               memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * 
NR_IRQS);
+               memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * 
NR_IRQS);
+       }
+       
+       printk(KERN_INFO "Starting balanced_irq\n");
+       if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) 
+               return 0;
+       else 
+               printk(KERN_ERR "balanced_irq_init: failed to spawn 
balanced_irq");
+failed:
+       for_each_possible_cpu(i) {
+               kfree(irq_cpu_data[i].irq_delta);
+               irq_cpu_data[i].irq_delta = NULL;
+               kfree(irq_cpu_data[i].last_irq);
+               irq_cpu_data[i].last_irq = NULL;
+       }
+       return 0;
+}
+
+int __init irqbalance_disable(char *str)
+{
+       irqbalance_disabled = 1;
+       return 1;
+}
+
+__setup("noirqbalance", irqbalance_disable);
+
+late_initcall(balanced_irq_init);
+#endif /* CONFIG_IRQBALANCE */
+#endif /* CONFIG_SMP */
+#endif
+
+#ifndef CONFIG_SMP
+void fastcall send_IPI_self(int vector)
+{
+#ifndef CONFIG_XEN
+       unsigned int cfg;
+
+       /*
+        * Wait for idle.
+        */
+       apic_wait_icr_idle();
+       cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
+       /*
+        * Send the IPI. The write to APIC_ICR fires this off.
+        */
+       apic_write_around(APIC_ICR, cfg);
+#endif
+}
+#endif /* !CONFIG_SMP */
+
+
+/*
+ * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
+ * specific CPU-side IRQs.
+ */
+
+#define MAX_PIRQS 8
+static int pirq_entries [MAX_PIRQS];
+static int pirqs_enabled;
+int skip_ioapic_setup;
+
+static int __init ioapic_setup(char *str)
+{
+       skip_ioapic_setup = 1;
+       return 1;
+}
+
+__setup("noapic", ioapic_setup);
+
+static int __init ioapic_pirq_setup(char *str)
+{
+       int i, max;
+       int ints[MAX_PIRQS+1];
+
+       get_options(str, ARRAY_SIZE(ints), ints);
+
+       for (i = 0; i < MAX_PIRQS; i++)
+               pirq_entries[i] = -1;
+
+       pirqs_enabled = 1;
+       apic_printk(APIC_VERBOSE, KERN_INFO
+                       "PIRQ redirection, working around broken MP-BIOS.\n");
+       max = MAX_PIRQS;
+       if (ints[0] < MAX_PIRQS)
+               max = ints[0];
+
+       for (i = 0; i < max; i++) {
+               apic_printk(APIC_VERBOSE, KERN_DEBUG
+                               "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
+               /*
+                * PIRQs are mapped upside down, usually.
+                */
+               pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
+       }
+       return 1;
+}
+
+__setup("pirq=", ioapic_pirq_setup);
+
+/*
+ * Find the IRQ entry number of a certain pin.
+ */
+static int find_irq_entry(int apic, int pin, int type)
+{
+       int i;
+
+       for (i = 0; i < mp_irq_entries; i++)
+               if (mp_irqs[i].mpc_irqtype == type &&
+                   (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
+                    mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
+                   mp_irqs[i].mpc_dstirq == pin)
+                       return i;
+
+       return -1;
+}
+
+/*
+ * Find the pin to which IRQ[irq] (ISA) is connected
+ */
+static int __init find_isa_irq_pin(int irq, int type)
+{
+       int i;
+
+       for (i = 0; i < mp_irq_entries; i++) {
+               int lbus = mp_irqs[i].mpc_srcbus;
+
+               if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
+                    mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
+                    mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
+                    mp_bus_id_to_type[lbus] == MP_BUS_NEC98
+                   ) &&
+                   (mp_irqs[i].mpc_irqtype == type) &&
+                   (mp_irqs[i].mpc_srcbusirq == irq))
+
+                       return mp_irqs[i].mpc_dstirq;
+       }
+       return -1;
+}
+
+static int __init find_isa_irq_apic(int irq, int type)
+{
+       int i;
+
+       for (i = 0; i < mp_irq_entries; i++) {
+               int lbus = mp_irqs[i].mpc_srcbus;
+
+               if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
+                    mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
+                    mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
+                    mp_bus_id_to_type[lbus] == MP_BUS_NEC98
+                   ) &&
+                   (mp_irqs[i].mpc_irqtype == type) &&
+                   (mp_irqs[i].mpc_srcbusirq == irq))
+                       break;
+       }
+       if (i < mp_irq_entries) {
+               int apic;
+               for(apic = 0; apic < nr_ioapics; apic++) {
+                       if (mp_ioapics[apic].mpc_apicid == 
mp_irqs[i].mpc_dstapic)
+                               return apic;
+               }
+       }
+
+       return -1;
+}
+
+/*
+ * Find a specific PCI IRQ entry.
+ * Not an __init, possibly needed by modules
+ */
+static int pin_2_irq(int idx, int apic, int pin);
+
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
+{
+       int apic, i, best_guess = -1;
+
+       apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, "
+               "slot:%d, pin:%d.\n", bus, slot, pin);
+       if (mp_bus_id_to_pci_bus[bus] == -1) {
+               printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus 
%d!\n", bus);
+               return -1;
+       }
+       for (i = 0; i < mp_irq_entries; i++) {
+               int lbus = mp_irqs[i].mpc_srcbus;
+
+               for (apic = 0; apic < nr_ioapics; apic++)
+                       if (mp_ioapics[apic].mpc_apicid == 
mp_irqs[i].mpc_dstapic ||
+                           mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
+                               break;
+
+               if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
+                   !mp_irqs[i].mpc_irqtype &&
+                   (bus == lbus) &&
+                   (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
+                       int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
+
+                       if (!(apic || IO_APIC_IRQ(irq)))
+                               continue;
+
+                       if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
+                               return irq;
+                       /*
+                        * Use the first all-but-pin matching entry as a
+                        * best-guess fuzzy result for broken mptables.
+                        */
+                       if (best_guess < 0)
+                               best_guess = irq;
+               }
+       }
+       return best_guess;
+}
+EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+
+/*
+ * This function currently is only a helper for the i386 smp boot process 
where 
+ * we need to reprogram the ioredtbls to cater for the cpus which have come 
online
+ * so mask in all cases should simply be TARGET_CPUS
+ */
+#ifdef CONFIG_SMP
+#ifndef CONFIG_XEN
+void __init setup_ioapic_dest(void)
+{
+       int pin, ioapic, irq, irq_entry;
+
+       if (skip_ioapic_setup == 1)
+               return;
+
+       for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
+               for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
+                       irq_entry = find_irq_entry(ioapic, pin, mp_INT);
+                       if (irq_entry == -1)
+                               continue;
+                       irq = pin_2_irq(irq_entry, ioapic, pin);
+                       set_ioapic_affinity_irq(irq, TARGET_CPUS);
+               }
+
+       }
+}
+#endif /* !CONFIG_XEN */
+#endif
+
+/*
+ * EISA Edge/Level control register, ELCR
+ */
+static int EISA_ELCR(unsigned int irq)
+{
+       if (irq < 16) {
+               unsigned int port = 0x4d0 + (irq >> 3);
+               return (inb(port) >> (irq & 7)) & 1;
+       }
+       apic_printk(APIC_VERBOSE, KERN_INFO
+                       "Broken MPtable reports ISA irq %d\n", irq);
+       return 0;
+}
+
+/* EISA interrupts are always polarity zero and can be edge or level
+ * trigger depending on the ELCR value.  If an interrupt is listed as
+ * EISA conforming in the MP table, that means its trigger type must
+ * be read in from the ELCR */
+
+#define default_EISA_trigger(idx)      (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
+#define default_EISA_polarity(idx)     (0)
+
+/* ISA interrupts are always polarity zero edge triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_ISA_trigger(idx)       (0)
+#define default_ISA_polarity(idx)      (0)
+
+/* PCI interrupts are always polarity one level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_PCI_trigger(idx)       (1)
+#define default_PCI_polarity(idx)      (1)
+
+/* MCA interrupts are always polarity zero level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_MCA_trigger(idx)       (1)
+#define default_MCA_polarity(idx)      (0)
+
+/* NEC98 interrupts are always polarity zero edge triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_NEC98_trigger(idx)     (0)
+#define default_NEC98_polarity(idx)    (0)
+
+static int __init MPBIOS_polarity(int idx)
+{
+       int bus = mp_irqs[idx].mpc_srcbus;
+       int polarity;
+
+       /*
+        * Determine IRQ line polarity (high active or low active):
+        */
+       switch (mp_irqs[idx].mpc_irqflag & 3)
+       {
+               case 0: /* conforms, ie. bus-type dependent polarity */
+               {
+                       switch (mp_bus_id_to_type[bus])
+                       {
+                               case MP_BUS_ISA: /* ISA pin */
+                               {
+                                       polarity = default_ISA_polarity(idx);
+                                       break;
+                               }
+                               case MP_BUS_EISA: /* EISA pin */
+                               {
+                                       polarity = default_EISA_polarity(idx);
+                                       break;
+                               }
+                               case MP_BUS_PCI: /* PCI pin */
+                               {
+                                       polarity = default_PCI_polarity(idx);
+                                       break;
+                               }
+                               case MP_BUS_MCA: /* MCA pin */
+                               {
+                                       polarity = default_MCA_polarity(idx);
+                                       break;
+                               }
+                               case MP_BUS_NEC98: /* NEC 98 pin */
+                               {
+                                       polarity = default_NEC98_polarity(idx);
+                                       break;
+                               }
+                               default:
+                               {
+                                       printk(KERN_WARNING "broken BIOS!!\n");
+                                       polarity = 1;
+                                       break;
+                               }
+                       }
+                       break;
+               }
+               case 1: /* high active */
+               {
+                       polarity = 0;
+                       break;
+               }
+               case 2: /* reserved */
+               {
+                       printk(KERN_WARNING "broken BIOS!!\n");
+                       polarity = 1;
+                       break;
+               }
+               case 3: /* low active */
+               {
+                       polarity = 1;
+                       break;
+               }
+               default: /* invalid */
+               {
+                       printk(KERN_WARNING "broken BIOS!!\n");
+                       polarity = 1;
+                       break;
+               }
+       }
+       return polarity;
+}
+
+static int MPBIOS_trigger(int idx)
+{
+       int bus = mp_irqs[idx].mpc_srcbus;
+       int trigger;
+
+       /*
+        * Determine IRQ trigger mode (edge or level sensitive):
+        */
+       switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
+       {
+               case 0: /* conforms, ie. bus-type dependent */
+               {
+                       switch (mp_bus_id_to_type[bus])
+                       {
+                               case MP_BUS_ISA: /* ISA pin */
+                               {
+                                       trigger = default_ISA_trigger(idx);
+                                       break;
+                               }
+                               case MP_BUS_EISA: /* EISA pin */
+                               {
+                                       trigger = default_EISA_trigger(idx);
+                                       break;
+                               }
+                               case MP_BUS_PCI: /* PCI pin */
+                               {
+                                       trigger = default_PCI_trigger(idx);
+                                       break;
+                               }
+                               case MP_BUS_MCA: /* MCA pin */
+                               {
+                                       trigger = default_MCA_trigger(idx);
+                                       break;
+                               }
+                               case MP_BUS_NEC98: /* NEC 98 pin */
+                               {
+                                       trigger = default_NEC98_trigger(idx);
+                                       break;
+                               }
+                               default:
+                               {
+                                       printk(KERN_WARNING "broken BIOS!!\n");
+                                       trigger = 1;
+                                       break;
+                               }
+                       }
+                       break;
+               }
+               case 1: /* edge */
+               {
+                       trigger = 0;
+                       break;
+               }
+               case 2: /* reserved */
+               {
+                       printk(KERN_WARNING "broken BIOS!!\n");
+                       trigger = 1;
+                       break;
+               }
+               case 3: /* level */
+               {
+                       trigger = 1;
+                       break;
+               }
+               default: /* invalid */
+               {
+                       printk(KERN_WARNING "broken BIOS!!\n");
+                       trigger = 0;
+                       break;
+               }
+       }
+       return trigger;
+}
+
+static inline int irq_polarity(int idx)
+{
+       return MPBIOS_polarity(idx);
+}
+
+static inline int irq_trigger(int idx)
+{
+       return MPBIOS_trigger(idx);
+}
+
+static int pin_2_irq(int idx, int apic, int pin)
+{
+       int irq, i;
+       int bus = mp_irqs[idx].mpc_srcbus;
+
+       /*
+        * Debugging check, we are in big trouble if this message pops up!
+        */
+       if (mp_irqs[idx].mpc_dstirq != pin)
+               printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
+
+       switch (mp_bus_id_to_type[bus])
+       {
+               case MP_BUS_ISA: /* ISA pin */
+               case MP_BUS_EISA:
+               case MP_BUS_MCA:
+               case MP_BUS_NEC98:
+               {
+                       irq = mp_irqs[idx].mpc_srcbusirq;
+                       break;
+               }
+               case MP_BUS_PCI: /* PCI pin */
+               {
+                       /*
+                        * PCI IRQs are mapped in order
+                        */
+                       i = irq = 0;
+                       while (i < apic)
+                               irq += nr_ioapic_registers[i++];
+                       irq += pin;
+
+                       /*
+                        * For MPS mode, so far only needed by ES7000 platform
+                        */
+                       if (ioapic_renumber_irq)
+                               irq = ioapic_renumber_irq(apic, irq);
+
+                       break;
+               }
+               default:
+               {
+                       printk(KERN_ERR "unknown bus type %d.\n",bus); 
+                       irq = 0;
+                       break;
+               }
+       }
+
+       /*
+        * PCI IRQ command line redirection. Yes, limits are hardcoded.
+        */
+       if ((pin >= 16) && (pin <= 23)) {
+               if (pirq_entries[pin-16] != -1) {
+                       if (!pirq_entries[pin-16]) {
+                               apic_printk(APIC_VERBOSE, KERN_DEBUG
+                                               "disabling PIRQ%d\n", pin-16);
+                       } else {
+                               irq = pirq_entries[pin-16];
+                               apic_printk(APIC_VERBOSE, KERN_DEBUG
+                                               "using PIRQ%d -> IRQ %d\n",
+                                               pin-16, irq);
+                       }
+               }
+       }
+       return irq;
+}
+
+static inline int IO_APIC_irq_trigger(int irq)
+{
+       int apic, idx, pin;
+
+       for (apic = 0; apic < nr_ioapics; apic++) {
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+                       idx = find_irq_entry(apic,pin,mp_INT);
+                       if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin)))
+                               return irq_trigger(idx);
+               }
+       }
+       /*
+        * nonexistent IRQs are edge default
+        */
+       return 0;
+}
+
+/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
+u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; 
*/
+
+int assign_irq_vector(int irq)
+{
+       unsigned long flags;
+       int vector;
+       struct physdev_irq irq_op;
+
+       BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
+
+       spin_lock_irqsave(&vector_lock, flags);
+
+       if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
+               spin_unlock_irqrestore(&vector_lock, flags);
+               return IO_APIC_VECTOR(irq);
+       }
+
+       irq_op.irq = irq;
+       if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
+               spin_unlock_irqrestore(&vector_lock, flags);
+               return -ENOSPC;
+       }
+
+       vector = irq_op.vector;
+       vector_irq[vector] = irq;
+       if (irq != AUTO_ASSIGN)
+               IO_APIC_VECTOR(irq) = vector;
+
+       spin_unlock_irqrestore(&vector_lock, flags);
+
+       return vector;
+}
+
+#ifndef CONFIG_XEN
+static struct hw_interrupt_type ioapic_level_type;
+static struct hw_interrupt_type ioapic_edge_type;
+
+#define IOAPIC_AUTO    -1
+#define IOAPIC_EDGE    0
+#define IOAPIC_LEVEL   1
+
+static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
+{
+       unsigned idx;
+
+       idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
+
+       if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+                       trigger == IOAPIC_LEVEL)
+               irq_desc[idx].chip = &ioapic_level_type;
+       else
+               irq_desc[idx].chip = &ioapic_edge_type;
+       set_intr_gate(vector, interrupt[idx]);
+}
+#else
+#define ioapic_register_intr(_irq,_vector,_trigger) ((void)0)
+#endif
+
+static void __init setup_IO_APIC_irqs(void)
+{
+       struct IO_APIC_route_entry entry;
+       int apic, pin, idx, irq, first_notcon = 1, vector;
+       unsigned long flags;
+
+       apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
+
+       for (apic = 0; apic < nr_ioapics; apic++) {
+       for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+
+               /*
+                * add it to the IO-APIC irq-routing table:
+                */
+               memset(&entry,0,sizeof(entry));
+
+               entry.delivery_mode = INT_DELIVERY_MODE;
+               entry.dest_mode = INT_DEST_MODE;
+               entry.mask = 0;                         /* enable IRQ */
+               entry.dest.logical.logical_dest = 
+                                       cpu_mask_to_apicid(TARGET_CPUS);
+
+               idx = find_irq_entry(apic,pin,mp_INT);
+               if (idx == -1) {
+                       if (first_notcon) {
+                               apic_printk(APIC_VERBOSE, KERN_DEBUG
+                                               " IO-APIC (apicid-pin) %d-%d",
+                                               mp_ioapics[apic].mpc_apicid,
+                                               pin);
+                               first_notcon = 0;
+                       } else
+                               apic_printk(APIC_VERBOSE, ", %d-%d",
+                                       mp_ioapics[apic].mpc_apicid, pin);
+                       continue;
+               }
+
+               entry.trigger = irq_trigger(idx);
+               entry.polarity = irq_polarity(idx);
+
+               if (irq_trigger(idx)) {
+                       entry.trigger = 1;
+                       entry.mask = 1;
+               }
+
+               irq = pin_2_irq(idx, apic, pin);
+               /*
+                * skip adding the timer int on secondary nodes, which causes
+                * a small but painful rift in the time-space continuum
+                */
+               if (multi_timer_check(apic, irq))
+                       continue;
+               else
+                       add_pin_to_irq(irq, apic, pin);
+
+               if (/*!apic &&*/ !IO_APIC_IRQ(irq))
+                       continue;
+
+               if (IO_APIC_IRQ(irq)) {
+                       vector = assign_irq_vector(irq);
+                       entry.vector = vector;
+                       ioapic_register_intr(irq, vector, IOAPIC_AUTO);
+               
+                       if (!apic && (irq < 16))
+                               disable_8259A_irq(irq);
+               }
+               spin_lock_irqsave(&ioapic_lock, flags);
+               io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+               io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+               set_native_irq_info(irq, TARGET_CPUS);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+       }
+       }
+
+       if (!first_notcon)
+               apic_printk(APIC_VERBOSE, " not connected.\n");
+}
+
+/*
+ * Set up the 8259A-master output pin:
+ */
+#ifndef CONFIG_XEN
+static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, 
int vector)
+{
+       struct IO_APIC_route_entry entry;
+       unsigned long flags;
+
+       memset(&entry,0,sizeof(entry));
+
+       disable_8259A_irq(0);
+
+       /* mask LVT0 */
+       apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+
+       /*
+        * We use logical delivery to get the timer IRQ
+        * to the first CPU.
+        */
+       entry.dest_mode = INT_DEST_MODE;
+       entry.mask = 0;                                 /* unmask IRQ now */
+       entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
+       entry.delivery_mode = INT_DELIVERY_MODE;
+       entry.polarity = 0;
+       entry.trigger = 0;
+       entry.vector = vector;
+
+       /*
+        * The timer IRQ doesn't have to know that behind the
+        * scene we have a 8259A-master in AEOI mode ...
+        */
+       irq_desc[0].chip = &ioapic_edge_type;
+
+       /*
+        * Add it to the IO-APIC irq-routing table:
+        */
+       spin_lock_irqsave(&ioapic_lock, flags);
+       io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+       io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       enable_8259A_irq(0);
+}
+
+static inline void UNEXPECTED_IO_APIC(void)
+{
+}
+
+void __init print_IO_APIC(void)
+{
+       int apic, i;
+       union IO_APIC_reg_00 reg_00;
+       union IO_APIC_reg_01 reg_01;
+       union IO_APIC_reg_02 reg_02;
+       union IO_APIC_reg_03 reg_03;
+       unsigned long flags;
+
+       if (apic_verbosity == APIC_QUIET)
+               return;
+
+       printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
+       for (i = 0; i < nr_ioapics; i++)
+               printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
+                      mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
+
+       /*
+        * We are a bit conservative about what we expect.  We have to
+        * know about every hardware change ASAP.
+        */
+       printk(KERN_INFO "testing the IO APIC.......................\n");
+
+       for (apic = 0; apic < nr_ioapics; apic++) {
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       reg_00.raw = io_apic_read(apic, 0);
+       reg_01.raw = io_apic_read(apic, 1);
+       if (reg_01.bits.version >= 0x10)
+               reg_02.raw = io_apic_read(apic, 2);
+       if (reg_01.bits.version >= 0x20)
+               reg_03.raw = io_apic_read(apic, 3);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
+       printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
+       printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", 
reg_00.bits.ID);
+       printk(KERN_DEBUG ".......    : Delivery Type: %X\n", 
reg_00.bits.delivery_type);
+       printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
+       if (reg_00.bits.ID >= get_physical_broadcast())
+               UNEXPECTED_IO_APIC();
+       if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
+               UNEXPECTED_IO_APIC();
+
+       printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
+       printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", 
reg_01.bits.entries);
+       if (    (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
+               (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
+               (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
+               (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
+               (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
+               (reg_01.bits.entries != 0x2E) &&
+               (reg_01.bits.entries != 0x3F)
+       )
+               UNEXPECTED_IO_APIC();
+
+       printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", 
reg_01.bits.PRQ);
+       printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", 
reg_01.bits.version);
+       if (    (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
+               (reg_01.bits.version != 0x10) && /* oldest IO-APICs */
+               (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
+               (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
+               (reg_01.bits.version != 0x20)    /* Intel P64H (82806 AA) */
+       )
+               UNEXPECTED_IO_APIC();
+       if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
+               UNEXPECTED_IO_APIC();
+
+       /*
+        * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
+        * but the value of reg_02 is read as the previous read register
+        * value, so ignore it if reg_02 == reg_01.
+        */
+       if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
+               printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
+               printk(KERN_DEBUG ".......     : arbitration: %02X\n", 
reg_02.bits.arbitration);
+               if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
+                       UNEXPECTED_IO_APIC();
+       }
+
+       /*
+        * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
+        * or reg_03, but the value of reg_0[23] is read as the previous read
+        * register value, so ignore it if reg_03 == reg_0[12].
+        */
+       if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
+           reg_03.raw != reg_01.raw) {
+               printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
+               printk(KERN_DEBUG ".......     : Boot DT    : %X\n", 
reg_03.bits.boot_DT);
+               if (reg_03.bits.__reserved_1)
+                       UNEXPECTED_IO_APIC();
+       }
+
+       printk(KERN_DEBUG ".... IRQ redirection table:\n");
+
+       printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
+                         " Stat Dest Deli Vect:   \n");
+
+       for (i = 0; i <= reg_01.bits.entries; i++) {
+               struct IO_APIC_route_entry entry;
+
+               spin_lock_irqsave(&ioapic_lock, flags);
+               *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
+               *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+
+               printk(KERN_DEBUG " %02x %03X %02X  ",
+                       i,
+                       entry.dest.logical.logical_dest,
+                       entry.dest.physical.physical_dest
+               );
+
+               printk("%1d    %1d    %1d   %1d   %1d    %1d    %1d    %02X\n",
+                       entry.mask,
+                       entry.trigger,
+                       entry.irr,
+                       entry.polarity,
+                       entry.delivery_status,
+                       entry.dest_mode,
+                       entry.delivery_mode,
+                       entry.vector
+               );
+       }
+       }
+       if (use_pci_vector())
+               printk(KERN_INFO "Using vector-based indexing\n");
+       printk(KERN_DEBUG "IRQ to pin mappings:\n");
+       for (i = 0; i < NR_IRQS; i++) {
+               struct irq_pin_list *entry = irq_2_pin + i;
+               if (entry->pin < 0)
+                       continue;
+               if (use_pci_vector() && !platform_legacy_irq(i))
+                       printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
+               else
+                       printk(KERN_DEBUG "IRQ%d ", i);
+               for (;;) {
+                       printk("-> %d:%d", entry->apic, entry->pin);
+                       if (!entry->next)
+                               break;
+                       entry = irq_2_pin + entry->next;
+               }
+               printk("\n");
+       }
+
+       printk(KERN_INFO ".................................... done.\n");
+
+       return;
+}
+
+#if 0
+
+static void print_APIC_bitfield (int base)
+{
+       unsigned int v;
+       int i, j;
+
+       if (apic_verbosity == APIC_QUIET)
+               return;
+
+       printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
+       for (i = 0; i < 8; i++) {
+               v = apic_read(base + i*0x10);
+               for (j = 0; j < 32; j++) {
+                       if (v & (1<<j))
+                               printk("1");
+                       else
+                               printk("0");
+               }
+               printk("\n");
+       }
+}
+
+void /*__init*/ print_local_APIC(void * dummy)
+{
+       unsigned int v, ver, maxlvt;
+
+       if (apic_verbosity == APIC_QUIET)
+               return;
+
+       printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
+               smp_processor_id(), hard_smp_processor_id());
+       v = apic_read(APIC_ID);
+       printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(v));
+       v = apic_read(APIC_LVR);
+       printk(KERN_INFO "... APIC VERSION: %08x\n", v);
+       ver = GET_APIC_VERSION(v);
+       maxlvt = get_maxlvt();
+
+       v = apic_read(APIC_TASKPRI);
+       printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & 
APIC_TPRI_MASK);
+
+       if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
+               v = apic_read(APIC_ARBPRI);
+               printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
+                       v & APIC_ARBPRI_MASK);
+               v = apic_read(APIC_PROCPRI);
+               printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
+       }
+
+       v = apic_read(APIC_EOI);
+       printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
+       v = apic_read(APIC_RRR);
+       printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
+       v = apic_read(APIC_LDR);
+       printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
+       v = apic_read(APIC_DFR);
+       printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
+       v = apic_read(APIC_SPIV);
+       printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
+
+       printk(KERN_DEBUG "... APIC ISR field:\n");
+       print_APIC_bitfield(APIC_ISR);
+       printk(KERN_DEBUG "... APIC TMR field:\n");
+       print_APIC_bitfield(APIC_TMR);
+       printk(KERN_DEBUG "... APIC IRR field:\n");
+       print_APIC_bitfield(APIC_IRR);
+
+       if (APIC_INTEGRATED(ver)) {             /* !82489DX */
+               if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
+                       apic_write(APIC_ESR, 0);
+               v = apic_read(APIC_ESR);
+               printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
+       }
+
+       v = apic_read(APIC_ICR);
+       printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
+       v = apic_read(APIC_ICR2);
+       printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
+
+       v = apic_read(APIC_LVTT);
+       printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
+
+       if (maxlvt > 3) {                       /* PC is LVT#4. */
+               v = apic_read(APIC_LVTPC);
+               printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
+       }
+       v = apic_read(APIC_LVT0);
+       printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
+       v = apic_read(APIC_LVT1);
+       printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
+
+       if (maxlvt > 2) {                       /* ERR is LVT#3. */
+               v = apic_read(APIC_LVTERR);
+               printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
+       }
+
+       v = apic_read(APIC_TMICT);
+       printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
+       v = apic_read(APIC_TMCCT);
+       printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
+       v = apic_read(APIC_TDCR);
+       printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
+       printk("\n");
+}
+
+void print_all_local_APICs (void)
+{
+       on_each_cpu(print_local_APIC, NULL, 1, 1);
+}
+
+void /*__init*/ print_PIC(void)
+{
+       unsigned int v;
+       unsigned long flags;
+
+       if (apic_verbosity == APIC_QUIET)
+               return;
+
+       printk(KERN_DEBUG "\nprinting PIC contents\n");
+
+       spin_lock_irqsave(&i8259A_lock, flags);
+
+       v = inb(0xa1) << 8 | inb(0x21);
+       printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
+
+       v = inb(0xa0) << 8 | inb(0x20);
+       printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
+
+       outb(0x0b,0xa0);
+       outb(0x0b,0x20);
+       v = inb(0xa0) << 8 | inb(0x20);
+       outb(0x0a,0xa0);
+       outb(0x0a,0x20);
+
+       spin_unlock_irqrestore(&i8259A_lock, flags);
+
+       printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
+
+       v = inb(0x4d1) << 8 | inb(0x4d0);
+       printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
+}
+
+#endif  /*  0  */
+
+#else
+void __init print_IO_APIC(void) { }
+#endif /* !CONFIG_XEN */
+
+static void __init enable_IO_APIC(void)
+{
+       union IO_APIC_reg_01 reg_01;
+       int i8259_apic, i8259_pin;
+       int i, apic;
+       unsigned long flags;
+
+       for (i = 0; i < PIN_MAP_SIZE; i++) {
+               irq_2_pin[i].pin = -1;
+               irq_2_pin[i].next = 0;
+       }
+       if (!pirqs_enabled)
+               for (i = 0; i < MAX_PIRQS; i++)
+                       pirq_entries[i] = -1;
+
+       /*
+        * The number of IO-APIC IRQ registers (== #pins):
+        */
+       for (apic = 0; apic < nr_ioapics; apic++) {
+               spin_lock_irqsave(&ioapic_lock, flags);
+               reg_01.raw = io_apic_read(apic, 1);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+               nr_ioapic_registers[apic] = reg_01.bits.entries+1;
+       }
+       for(apic = 0; apic < nr_ioapics; apic++) {
+               int pin;
+               /* See if any of the pins is in ExtINT mode */
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+                       struct IO_APIC_route_entry entry;
+                       spin_lock_irqsave(&ioapic_lock, flags);
+                       *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * 
pin);
+                       *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * 
pin);
+                       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+
+                       /* If the interrupt line is enabled and in ExtInt mode
+                        * I have found the pin where the i8259 is connected.
+                        */
+                       if ((entry.mask == 0) && (entry.delivery_mode == 
dest_ExtINT)) {
+                               ioapic_i8259.apic = apic;
+                               ioapic_i8259.pin  = pin;
+                               goto found_i8259;
+                       }
+               }
+       }
+ found_i8259:
+       /* Look to see what if the MP table has reported the ExtINT */
+       /* If we could not find the appropriate pin by looking at the ioapic
+        * the i8259 probably is not connected the ioapic but give the
+        * mptable a chance anyway.
+        */
+       i8259_pin  = find_isa_irq_pin(0, mp_ExtINT);
+       i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
+       /* Trust the MP table if nothing is setup in the hardware */
+       if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
+               printk(KERN_WARNING "ExtINT not setup in hardware but reported 
by MP table\n");
+               ioapic_i8259.pin  = i8259_pin;
+               ioapic_i8259.apic = i8259_apic;
+       }
+       /* Complain if the MP table and the hardware disagree */
+       if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != 
i8259_pin)) &&
+               (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
+       {
+               printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
+       }
+
+       /*
+        * Do not trust the IO-APIC being empty at bootup
+        */
+       clear_IO_APIC();
+}
+
+/*
+ * Not an __init, needed by the reboot code
+ */
+void disable_IO_APIC(void)
+{
+       /*
+        * Clear the IO-APIC before rebooting:
+        */
+       clear_IO_APIC();
+
+#ifndef CONFIG_XEN
+       /*
+        * If the i8259 is routed through an IOAPIC
+        * Put that IOAPIC in virtual wire mode
+        * so legacy interrupts can be delivered.
+        */
+       if (ioapic_i8259.pin != -1) {
+               struct IO_APIC_route_entry entry;
+               unsigned long flags;
+
+               memset(&entry, 0, sizeof(entry));
+               entry.mask            = 0; /* Enabled */
+               entry.trigger         = 0; /* Edge */
+               entry.irr             = 0;
+               entry.polarity        = 0; /* High */
+               entry.delivery_status = 0;
+               entry.dest_mode       = 0; /* Physical */
+               entry.delivery_mode   = dest_ExtINT; /* ExtInt */
+               entry.vector          = 0;
+               entry.dest.physical.physical_dest =
+                                       GET_APIC_ID(apic_read(APIC_ID));
+
+               /*
+                * Add it to the IO-APIC irq-routing table:
+                */
+               spin_lock_irqsave(&ioapic_lock, flags);
+               io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
+                       *(((int *)&entry)+1));
+               io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
+                       *(((int *)&entry)+0));
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+       }
+       disconnect_bsp_APIC(ioapic_i8259.pin != -1);
+#endif
+}
+
+/*
+ * function to set the IO-APIC physical IDs based on the
+ * values stored in the MPC table.
+ *
+ * by Matt Domsch <Matt_Domsch@xxxxxxxx>  Tue Dec 21 12:25:05 CST 1999
+ */
+
+#if !defined(CONFIG_XEN) && !defined(CONFIG_X86_NUMAQ)
+static void __init setup_ioapic_ids_from_mpc(void)
+{
+       union IO_APIC_reg_00 reg_00;
+       physid_mask_t phys_id_present_map;
+       int apic;
+       int i;
+       unsigned char old_id;
+       unsigned long flags;
+
+       /*
+        * Don't check I/O APIC IDs for xAPIC systems.  They have
+        * no meaning without the serial APIC bus.
+        */
+       if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+               || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+               return;
+       /*
+        * This is broken; anything with a real cpu count has to
+        * circumvent this idiocy regardless.
+        */
+       phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
+
+       /*
+        * Set the IOAPIC ID to the value stored in the MPC table.
+        */
+       for (apic = 0; apic < nr_ioapics; apic++) {
+
+               /* Read the register 0 value */
+               spin_lock_irqsave(&ioapic_lock, flags);
+               reg_00.raw = io_apic_read(apic, 0);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+               
+               old_id = mp_ioapics[apic].mpc_apicid;
+
+               if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) {
+                       printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the 
MPC table!...\n",
+                               apic, mp_ioapics[apic].mpc_apicid);
+                       printk(KERN_ERR "... fixing up to %d. (tell your hw 
vendor)\n",
+                               reg_00.bits.ID);
+                       mp_ioapics[apic].mpc_apicid = reg_00.bits.ID;
+               }
+
+               /*
+                * Sanity check, is the ID really free? Every APIC in a
+                * system must have a unique ID or we get lots of nice
+                * 'stuck on smp_invalidate_needed IPI wait' messages.
+                */
+               if (check_apicid_used(phys_id_present_map,
+                                       mp_ioapics[apic].mpc_apicid)) {
+                       printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already 
used!...\n",
+                               apic, mp_ioapics[apic].mpc_apicid);
+                       for (i = 0; i < get_physical_broadcast(); i++)
+                               if (!physid_isset(i, phys_id_present_map))
+                                       break;
+                       if (i >= get_physical_broadcast())
+                               panic("Max APIC ID exceeded!\n");
+                       printk(KERN_ERR "... fixing up to %d. (tell your hw 
vendor)\n",
+                               i);
+                       physid_set(i, phys_id_present_map);
+                       mp_ioapics[apic].mpc_apicid = i;
+               } else {
+                       physid_mask_t tmp;
+                       tmp = 
apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid);
+                       apic_printk(APIC_VERBOSE, "Setting %d in the "
+                                       "phys_id_present_map\n",
+                                       mp_ioapics[apic].mpc_apicid);
+                       physids_or(phys_id_present_map, phys_id_present_map, 
tmp);
+               }
+
+
+               /*
+                * We need to adjust the IRQ routing table
+                * if the ID changed.
+                */
+               if (old_id != mp_ioapics[apic].mpc_apicid)
+                       for (i = 0; i < mp_irq_entries; i++)
+                               if (mp_irqs[i].mpc_dstapic == old_id)
+                                       mp_irqs[i].mpc_dstapic
+                                               = mp_ioapics[apic].mpc_apicid;
+
+               /*
+                * Read the right value from the MPC table and
+                * write it into the ID register.
+                */
+               apic_printk(APIC_VERBOSE, KERN_INFO
+                       "...changing IO-APIC physical APIC ID to %d ...",
+                       mp_ioapics[apic].mpc_apicid);
+
+               reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
+               spin_lock_irqsave(&ioapic_lock, flags);
+               io_apic_write(apic, 0, reg_00.raw);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+
+               /*
+                * Sanity check
+                */
+               spin_lock_irqsave(&ioapic_lock, flags);
+               reg_00.raw = io_apic_read(apic, 0);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+               if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
+                       printk("could not set ID!\n");
+               else
+                       apic_printk(APIC_VERBOSE, " ok.\n");
+       }
+}
+#else
+static void __init setup_ioapic_ids_from_mpc(void) { }
+#endif
+
+#ifndef CONFIG_XEN
+/*
+ * There is a nasty bug in some older SMP boards, their mptable lies
+ * about the timer IRQ. We do the following to work around the situation:
+ *
+ *     - timer IRQ defaults to IO-APIC IRQ
+ *     - if this function detects that timer IRQs are defunct, then we fall
+ *       back to ISA timer IRQs
+ */
+static int __init timer_irq_works(void)
+{
+       unsigned long t1 = jiffies;
+
+       local_irq_enable();
+       /* Let ten ticks pass... */
+       mdelay((10 * 1000) / HZ);
+
+       /*
+        * Expect a few ticks at least, to be sure some possible
+        * glue logic does not lock up after one or two first
+        * ticks in a non-ExtINT mode.  Also the local APIC
+        * might have cached one ExtINT interrupt.  Finally, at
+        * least one tick may be lost due to delays.
+        */
+       if (jiffies - t1 > 4)
+               return 1;
+
+       return 0;
+}
+
+/*
+ * In the SMP+IOAPIC case it might happen that there are an unspecified
+ * number of pending IRQ events unhandled. These cases are very rare,
+ * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
+ * better to do it this way as thus we do not have to be aware of
+ * 'pending' interrupts in the IRQ path, except at this point.
+ */
+/*
+ * Edge triggered needs to resend any interrupt
+ * that was delayed but this is now handled in the device
+ * independent code.
+ */
+
+/*
+ * Starting up a edge-triggered IO-APIC interrupt is
+ * nasty - we need to make sure that we get the edge.
+ * If it is already asserted for some reason, we need
+ * return 1 to indicate that is was pending.
+ *
+ * This is not complete - we should be able to fake
+ * an edge even if it isn't on the 8259A...
+ */
+static unsigned int startup_edge_ioapic_irq(unsigned int irq)
+{
+       int was_pending = 0;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       if (irq < 16) {
+               disable_8259A_irq(irq);
+               if (i8259A_irq_pending(irq))
+                       was_pending = 1;
+       }
+       __unmask_IO_APIC_irq(irq);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       return was_pending;
+}
+
+/*
+ * Once we have recorded IRQ_PENDING already, we can mask the
+ * interrupt for real. This prevents IRQ storms from unhandled
+ * devices.
+ */
+static void ack_edge_ioapic_irq(unsigned int irq)
+{
+       move_irq(irq);
+       if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
+                                       == (IRQ_PENDING | IRQ_DISABLED))
+               mask_IO_APIC_irq(irq);
+       ack_APIC_irq();
+}
+
+/*
+ * Level triggered interrupts can just be masked,
+ * and shutting down and starting up the interrupt
+ * is the same as enabling and disabling them -- except
+ * with a startup need to return a "was pending" value.
+ *
+ * Level triggered interrupts are special because we
+ * do not touch any IO-APIC register while handling
+ * them. We ack the APIC in the end-IRQ handler, not
+ * in the start-IRQ-handler. Protection against reentrance
+ * from the same interrupt is still provided, both by the
+ * generic IRQ layer and by the fact that an unacked local
+ * APIC does not accept IRQs.
+ */
+static unsigned int startup_level_ioapic_irq (unsigned int irq)
+{
+       unmask_IO_APIC_irq(irq);
+
+       return 0; /* don't check for pending */
+}
+
+static void end_level_ioapic_irq (unsigned int irq)
+{
+       unsigned long v;
+       int i;
+
+       move_irq(irq);
+/*
+ * It appears there is an erratum which affects at least version 0x11
+ * of I/O APIC (that's the 82093AA and cores integrated into various
+ * chipsets).  Under certain conditions a level-triggered interrupt is
+ * erroneously delivered as edge-triggered one but the respective IRR
+ * bit gets set nevertheless.  As a result the I/O unit expects an EOI
+ * message but it will never arrive and further interrupts are blocked
+ * from the source.  The exact reason is so far unknown, but the
+ * phenomenon was observed when two consecutive interrupt requests
+ * from a given source get delivered to the same CPU and the source is
+ * temporarily disabled in between.
+ *
+ * A workaround is to simulate an EOI message manually.  We achieve it
+ * by setting the trigger mode to edge and then to level when the edge
+ * trigger mode gets detected in the TMR of a local APIC for a
+ * level-triggered interrupt.  We mask the source for the time of the
+ * operation to prevent an edge-triggered interrupt escaping meanwhile.
+ * The idea is from Manfred Spraul.  --macro
+ */
+       i = IO_APIC_VECTOR(irq);
+
+       v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+
+       ack_APIC_irq();
+
+       if (!(v & (1 << (i & 0x1f)))) {
+               atomic_inc(&irq_mis_count);
+               spin_lock(&ioapic_lock);
+               __mask_and_edge_IO_APIC_irq(irq);
+               __unmask_and_level_IO_APIC_irq(irq);
+               spin_unlock(&ioapic_lock);
+       }
+}
+
+#ifdef CONFIG_PCI_MSI
+static unsigned int startup_edge_ioapic_vector(unsigned int vector)
+{
+       int irq = vector_to_irq(vector);
+
+       return startup_edge_ioapic_irq(irq);
+}
+
+static void ack_edge_ioapic_vector(unsigned int vector)
+{
+       int irq = vector_to_irq(vector);
+
+       move_native_irq(vector);
+       ack_edge_ioapic_irq(irq);
+}
+
+static unsigned int startup_level_ioapic_vector (unsigned int vector)
+{
+       int irq = vector_to_irq(vector);
+
+       return startup_level_ioapic_irq (irq);
+}
+
+static void end_level_ioapic_vector (unsigned int vector)
+{
+       int irq = vector_to_irq(vector);
+
+       move_native_irq(vector);
+       end_level_ioapic_irq(irq);
+}
+
+static void mask_IO_APIC_vector (unsigned int vector)
+{
+       int irq = vector_to_irq(vector);
+
+       mask_IO_APIC_irq(irq);
+}
+
+static void unmask_IO_APIC_vector (unsigned int vector)
+{
+       int irq = vector_to_irq(vector);
+
+       unmask_IO_APIC_irq(irq);
+}
+
+#ifdef CONFIG_SMP
+static void set_ioapic_affinity_vector (unsigned int vector,
+                                       cpumask_t cpu_mask)
+{
+       int irq = vector_to_irq(vector);
+
+       set_native_irq_info(vector, cpu_mask);
+       set_ioapic_affinity_irq(irq, cpu_mask);
+}
+#endif
+#endif
+
+static int ioapic_retrigger(unsigned int irq)
+{
+       send_IPI_self(IO_APIC_VECTOR(irq));
+
+       return 1;
+}
+
+/*
+ * Level and edge triggered IO-APIC interrupts need different handling,
+ * so we use two separate IRQ descriptors. Edge triggered IRQs can be
+ * handled with the level-triggered descriptor, but that one has slightly
+ * more overhead. Level-triggered interrupts cannot be handled with the
+ * edge-triggered handler, without risking IRQ storms and other ugly
+ * races.
+ */
+static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
+       .typename       = "IO-APIC-edge",
+       .startup        = startup_edge_ioapic,
+       .shutdown       = shutdown_edge_ioapic,
+       .enable         = enable_edge_ioapic,
+       .disable        = disable_edge_ioapic,
+       .ack            = ack_edge_ioapic,
+       .end            = end_edge_ioapic,
+#ifdef CONFIG_SMP
+       .set_affinity   = set_ioapic_affinity,
+#endif
+       .retrigger      = ioapic_retrigger,
+};
+
+static struct hw_interrupt_type ioapic_level_type __read_mostly = {
+       .typename       = "IO-APIC-level",
+       .startup        = startup_level_ioapic,
+       .shutdown       = shutdown_level_ioapic,
+       .enable         = enable_level_ioapic,
+       .disable        = disable_level_ioapic,
+       .ack            = mask_and_ack_level_ioapic,
+       .end            = end_level_ioapic,
+#ifdef CONFIG_SMP
+       .set_affinity   = set_ioapic_affinity,
+#endif
+       .retrigger      = ioapic_retrigger,
+};
+#endif /* !CONFIG_XEN */
+
+static inline void init_IO_APIC_traps(void)
+{
+       int irq;
+
+       /*
+        * NOTE! The local APIC isn't very good at handling
+        * multiple interrupts at the same interrupt level.
+        * As the interrupt level is determined by taking the
+        * vector number and shifting that right by 4, we
+        * want to spread these out a bit so that they don't
+        * all fall in the same interrupt level.
+        *
+        * Also, we've got to be careful not to trash gate
+        * 0x80, because int 0x80 is hm, kind of importantish. ;)
+        */
+       for (irq = 0; irq < NR_IRQS ; irq++) {
+               int tmp = irq;
+               if (use_pci_vector()) {
+                       if (!platform_legacy_irq(tmp))
+                               if ((tmp = vector_to_irq(tmp)) == -1)
+                                       continue;
+               }
+               if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
+                       /*
+                        * Hmm.. We don't have an entry for this,
+                        * so default to an old-fashioned 8259
+                        * interrupt if we can..
+                        */
+                       if (irq < 16)
+                               make_8259A_irq(irq);
+#ifndef CONFIG_XEN
+                       else
+                               /* Strange. Oh, well.. */
+                               irq_desc[irq].chip = &no_irq_type;
+#endif
+               }
+       }
+}
+
+#ifndef CONFIG_XEN
+static void enable_lapic_irq (unsigned int irq)
+{
+       unsigned long v;
+
+       v = apic_read(APIC_LVT0);
+       apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
+}
+
+static void disable_lapic_irq (unsigned int irq)
+{
+       unsigned long v;
+
+       v = apic_read(APIC_LVT0);
+       apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
+}
+
+static void ack_lapic_irq (unsigned int irq)
+{
+       ack_APIC_irq();
+}
+
+static void end_lapic_irq (unsigned int i) { /* nothing */ }
+
+static struct hw_interrupt_type lapic_irq_type __read_mostly = {
+       .typename       = "local-APIC-edge",
+       .startup        = NULL, /* startup_irq() not used for IRQ0 */
+       .shutdown       = NULL, /* shutdown_irq() not used for IRQ0 */
+       .enable         = enable_lapic_irq,
+       .disable        = disable_lapic_irq,
+       .ack            = ack_lapic_irq,
+       .end            = end_lapic_irq
+};
+
+static void setup_nmi (void)
+{
+       /*
+        * Dirty trick to enable the NMI watchdog ...
+        * We put the 8259A master into AEOI mode and
+        * unmask on all local APICs LVT0 as NMI.
+        *
+        * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
+        * is from Maciej W. Rozycki - so we do not have to EOI from
+        * the NMI handler or the timer interrupt.
+        */ 
+       apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
+
+       on_each_cpu(enable_NMI_through_LVT0, NULL, 1, 1);
+
+       apic_printk(APIC_VERBOSE, " done.\n");
+}
+
+/*
+ * This looks a bit hackish but it's about the only one way of sending
+ * a few INTA cycles to 8259As and any associated glue logic.  ICR does
+ * not support the ExtINT mode, unfortunately.  We need to send these
+ * cycles as some i82489DX-based boards have glue logic that keeps the
+ * 8259A interrupt line asserted until INTA.  --macro
+ */
+static inline void unlock_ExtINT_logic(void)
+{
+       int apic, pin, i;
+       struct IO_APIC_route_entry entry0, entry1;
+       unsigned char save_control, save_freq_select;
+       unsigned long flags;
+
+       pin  = find_isa_irq_pin(8, mp_INT);
+       apic = find_isa_irq_apic(8, mp_INT);
+       if (pin == -1)
+               return;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
+       *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+       clear_IO_APIC_pin(apic, pin);
+
+       memset(&entry1, 0, sizeof(entry1));
+
+       entry1.dest_mode = 0;                   /* physical delivery */
+       entry1.mask = 0;                        /* unmask IRQ now */
+       entry1.dest.physical.physical_dest = hard_smp_processor_id();
+       entry1.delivery_mode = dest_ExtINT;
+       entry1.polarity = entry0.polarity;
+       entry1.trigger = 0;
+       entry1.vector = 0;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
+       io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       save_control = CMOS_READ(RTC_CONTROL);
+       save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+       CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
+                  RTC_FREQ_SELECT);
+       CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
+
+       i = 100;
+       while (i-- > 0) {
+               mdelay(10);
+               if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
+                       i -= 10;
+       }
+
+       CMOS_WRITE(save_control, RTC_CONTROL);
+       CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+       clear_IO_APIC_pin(apic, pin);
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
+       io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+int timer_uses_ioapic_pin_0;
+
+/*
+ * This code may look a bit paranoid, but it's supposed to cooperate with
+ * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
+ * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
+ * fanatically on his truly buggy board.
+ */
+static inline void check_timer(void)
+{
+       int apic1, pin1, apic2, pin2;
+       int vector;
+
+       /*
+        * get/set the timer IRQ vector:
+        */
+       disable_8259A_irq(0);
+       vector = assign_irq_vector(0);
+       set_intr_gate(vector, interrupt[0]);
+
+       /*
+        * Subtle, code in do_timer_interrupt() expects an AEOI
+        * mode for the 8259A whenever interrupts are routed
+        * through I/O APICs.  Also IRQ0 has to be enabled in
+        * the 8259A which implies the virtual wire has to be
+        * disabled in the local APIC.
+        */
+       apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+       init_8259A(1);
+       timer_ack = 1;
+       if (timer_over_8254 > 0)
+               enable_8259A_irq(0);
+
+       pin1  = find_isa_irq_pin(0, mp_INT);
+       apic1 = find_isa_irq_apic(0, mp_INT);
+       pin2  = ioapic_i8259.pin;
+       apic2 = ioapic_i8259.apic;
+
+       if (pin1 == 0)
+               timer_uses_ioapic_pin_0 = 1;
+
+       printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d 
pin2=%d\n",
+               vector, apic1, pin1, apic2, pin2);
+
+       if (pin1 != -1) {
+               /*
+                * Ok, does IRQ0 through the IOAPIC work?
+                */
+               unmask_IO_APIC_irq(0);
+               if (timer_irq_works()) {
+                       if (nmi_watchdog == NMI_IO_APIC) {
+                               disable_8259A_irq(0);
+                               setup_nmi();
+                               enable_8259A_irq(0);
+                       }
+                       if (disable_timer_pin_1 > 0)
+                               clear_IO_APIC_pin(0, pin1);
+                       return;
+               }
+               clear_IO_APIC_pin(apic1, pin1);
+               printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to "
+                               "IO-APIC\n");
+       }
+
+       printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A 
... ");
+       if (pin2 != -1) {
+               printk("\n..... (found pin %d) ...", pin2);
+               /*
+                * legacy devices should be connected to IO APIC #0
+                */
+               setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
+               if (timer_irq_works()) {
+                       printk("works.\n");
+                       if (pin1 != -1)
+                               replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
+                       else
+                               add_pin_to_irq(0, apic2, pin2);
+                       if (nmi_watchdog == NMI_IO_APIC) {
+                               setup_nmi();
+                       }
+                       return;
+               }
+               /*
+                * Cleanup, just in case ...
+                */
+               clear_IO_APIC_pin(apic2, pin2);
+       }
+       printk(" failed.\n");
+
+       if (nmi_watchdog == NMI_IO_APIC) {
+               printk(KERN_WARNING "timer doesn't work through the IO-APIC - 
disabling NMI Watchdog!\n");
+               nmi_watchdog = 0;
+       }
+
+       printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
+
+       disable_8259A_irq(0);
+       irq_desc[0].chip = &lapic_irq_type;
+       apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector);   /* Fixed mode */
+       enable_8259A_irq(0);
+
+       if (timer_irq_works()) {
+               printk(" works.\n");
+               return;
+       }
+       apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
+       printk(" failed.\n");
+
+       printk(KERN_INFO "...trying to set up timer as ExtINT IRQ...");
+
+       timer_ack = 0;
+       init_8259A(0);
+       make_8259A_irq(0);
+       apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
+
+       unlock_ExtINT_logic();
+
+       if (timer_irq_works()) {
+               printk(" works.\n");
+               return;
+       }
+       printk(" failed :(.\n");
+       panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
+               "report.  Then try booting with the 'noapic' option");
+}
+#else
+int timer_uses_ioapic_pin_0 = 0;
+#define check_timer() ((void)0)
+#endif
+
+/*
+ *
+ * IRQ's that are handled by the PIC in the MPS IOAPIC case.
+ * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
+ *   Linux doesn't really care, as it's not actually used
+ *   for any interrupt handling anyway.
+ */
+#define PIC_IRQS       (1 << PIC_CASCADE_IR)
+
+void __init setup_IO_APIC(void)
+{
+       enable_IO_APIC();
+
+       if (acpi_ioapic)
+               io_apic_irqs = ~0;      /* all IRQs go through IOAPIC */
+       else
+               io_apic_irqs = ~PIC_IRQS;
+
+       printk("ENABLING IO-APIC IRQs\n");
+
+       /*
+        * Set up IO-APIC IRQ routing.
+        */
+       if (!acpi_ioapic)
+               setup_ioapic_ids_from_mpc();
+#ifndef CONFIG_XEN
+       sync_Arb_IDs();
+#endif
+       setup_IO_APIC_irqs();
+       init_IO_APIC_traps();
+       check_timer();
+       if (!acpi_ioapic)
+               print_IO_APIC();
+}
+
+static int __init setup_disable_8254_timer(char *s)
+{
+       timer_over_8254 = -1;
+       return 1;
+}
+static int __init setup_enable_8254_timer(char *s)
+{
+       timer_over_8254 = 2;
+       return 1;
+}
+
+__setup("disable_8254_timer", setup_disable_8254_timer);
+__setup("enable_8254_timer", setup_enable_8254_timer);
+
+/*
+ *     Called after all the initialization is done. If we didnt find any
+ *     APIC bugs then we can allow the modify fast path
+ */
+ 
+static int __init io_apic_bug_finalize(void)
+{
+       if(sis_apic_bug == -1)
+               sis_apic_bug = 0;
+       if (is_initial_xendomain()) {
+               struct xen_platform_op op = { .cmd = XENPF_platform_quirk };
+               op.u.platform_quirk.quirk_id = sis_apic_bug ?
+                       QUIRK_IOAPIC_BAD_REGSEL : QUIRK_IOAPIC_GOOD_REGSEL;
+               HYPERVISOR_platform_op(&op);
+       }
+       return 0;
+}
+
+late_initcall(io_apic_bug_finalize);
+
+struct sysfs_ioapic_data {
+       struct sys_device dev;
+       struct IO_APIC_route_entry entry[0];
+};
+static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
+
+static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
+{
+       struct IO_APIC_route_entry *entry;
+       struct sysfs_ioapic_data *data;
+       unsigned long flags;
+       int i;
+       
+       data = container_of(dev, struct sysfs_ioapic_data, dev);
+       entry = data->entry;
+       spin_lock_irqsave(&ioapic_lock, flags);
+       for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
+               *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
+               *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
+       }
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       return 0;
+}
+
+static int ioapic_resume(struct sys_device *dev)
+{
+       struct IO_APIC_route_entry *entry;
+       struct sysfs_ioapic_data *data;
+       unsigned long flags;
+       union IO_APIC_reg_00 reg_00;
+       int i;
+       
+       data = container_of(dev, struct sysfs_ioapic_data, dev);
+       entry = data->entry;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       reg_00.raw = io_apic_read(dev->id, 0);
+       if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
+               reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
+               io_apic_write(dev->id, 0, reg_00.raw);
+       }
+       for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
+               io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
+               io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
+       }
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       return 0;
+}
+
+static struct sysdev_class ioapic_sysdev_class = {
+       set_kset_name("ioapic"),
+       .suspend = ioapic_suspend,
+       .resume = ioapic_resume,
+};
+
+static int __init ioapic_init_sysfs(void)
+{
+       struct sys_device * dev;
+       int i, size, error = 0;
+
+       error = sysdev_class_register(&ioapic_sysdev_class);
+       if (error)
+               return error;
+
+       for (i = 0; i < nr_ioapics; i++ ) {
+               size = sizeof(struct sys_device) + nr_ioapic_registers[i] 
+                       * sizeof(struct IO_APIC_route_entry);
+               mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL);
+               if (!mp_ioapic_data[i]) {
+                       printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
+                       continue;
+               }
+               memset(mp_ioapic_data[i], 0, size);
+               dev = &mp_ioapic_data[i]->dev;
+               dev->id = i; 
+               dev->cls = &ioapic_sysdev_class;
+               error = sysdev_register(dev);
+               if (error) {
+                       kfree(mp_ioapic_data[i]);
+                       mp_ioapic_data[i] = NULL;
+                       printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
+                       continue;
+               }
+       }
+
+       return 0;
+}
+
+device_initcall(ioapic_init_sysfs);
+
+/* --------------------------------------------------------------------------
+                          ACPI-based IOAPIC Configuration
+   -------------------------------------------------------------------------- 
*/
+
+#ifdef CONFIG_ACPI
+
+int __init io_apic_get_unique_id (int ioapic, int apic_id)
+{
+#ifndef CONFIG_XEN
+       union IO_APIC_reg_00 reg_00;
+       static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
+       physid_mask_t tmp;
+       unsigned long flags;
+       int i = 0;
+
+       /*
+        * The P4 platform supports up to 256 APIC IDs on two separate APIC 
+        * buses (one for LAPICs, one for IOAPICs), where predecessors only 
+        * supports up to 16 on one shared APIC bus.
+        * 
+        * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
+        *      advantage of new APIC bus architecture.
+        */
+
+       if (physids_empty(apic_id_map))
+               apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       reg_00.raw = io_apic_read(ioapic, 0);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       if (apic_id >= get_physical_broadcast()) {
+               printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
+                       "%d\n", ioapic, apic_id, reg_00.bits.ID);
+               apic_id = reg_00.bits.ID;
+       }
+
+       /*
+        * Every APIC in a system must have a unique ID or we get lots of nice 
+        * 'stuck on smp_invalidate_needed IPI wait' messages.
+        */
+       if (check_apicid_used(apic_id_map, apic_id)) {
+
+               for (i = 0; i < get_physical_broadcast(); i++) {
+                       if (!check_apicid_used(apic_id_map, i))
+                               break;
+               }
+
+               if (i == get_physical_broadcast())
+                       panic("Max apic_id exceeded!\n");
+
+               printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
+                       "trying %d\n", ioapic, apic_id, i);
+
+               apic_id = i;
+       } 
+
+       tmp = apicid_to_cpu_present(apic_id);
+       physids_or(apic_id_map, apic_id_map, tmp);
+
+       if (reg_00.bits.ID != apic_id) {
+               reg_00.bits.ID = apic_id;
+
+               spin_lock_irqsave(&ioapic_lock, flags);
+               io_apic_write(ioapic, 0, reg_00.raw);
+               reg_00.raw = io_apic_read(ioapic, 0);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+
+               /* Sanity check */
+               if (reg_00.bits.ID != apic_id) {
+                       printk("IOAPIC[%d]: Unable to change apic_id!\n", 
ioapic);
+                       return -1;
+               }
+       }
+
+       apic_printk(APIC_VERBOSE, KERN_INFO
+                       "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
+#endif /* !CONFIG_XEN */
+
+       return apic_id;
+}
+
+
+int __init io_apic_get_version (int ioapic)
+{
+       union IO_APIC_reg_01    reg_01;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       reg_01.raw = io_apic_read(ioapic, 1);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       return reg_01.bits.version;
+}
+
+
+int __init io_apic_get_redir_entries (int ioapic)
+{
+       union IO_APIC_reg_01    reg_01;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       reg_01.raw = io_apic_read(ioapic, 1);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       return reg_01.bits.entries;
+}
+
+
+int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int 
active_high_low)
+{
+       struct IO_APIC_route_entry entry;
+       unsigned long flags;
+
+       if (!IO_APIC_IRQ(irq)) {
+               printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
+                       ioapic);
+               return -EINVAL;
+       }
+
+       /*
+        * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
+        * Note that we mask (disable) IRQs now -- these get enabled when the
+        * corresponding device driver registers for this IRQ.
+        */
+
+       memset(&entry,0,sizeof(entry));
+
+       entry.delivery_mode = INT_DELIVERY_MODE;
+       entry.dest_mode = INT_DEST_MODE;
+       entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
+       entry.trigger = edge_level;
+       entry.polarity = active_high_low;
+       entry.mask  = 1;
+
+       /*
+        * IRQs < 16 are already in the irq_2_pin[] map
+        */
+       if (irq >= 16)
+               add_pin_to_irq(irq, ioapic, pin);
+
+       entry.vector = assign_irq_vector(irq);
+
+       apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
+               "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
+               mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq,
+               edge_level, active_high_low);
+
+       ioapic_register_intr(irq, entry.vector, edge_level);
+
+       if (!ioapic && (irq < 16))
+               disable_8259A_irq(irq);
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
+       io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
+       set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       return 0;
+}
+
+#endif /* CONFIG_ACPI */
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/ioport-xen.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/ioport-xen.c     Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,122 @@
+/*
+ *     linux/arch/i386/kernel/ioport.c
+ *
+ * This contains the io-permission bitmap code - written by obz, with changes
+ * by Linus.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/capability.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/ioport.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/slab.h>
+#include <linux/thread_info.h>
+#include <xen/interface/physdev.h>
+
+/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
+static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int 
extent, int new_value)
+{
+       unsigned long mask;
+       unsigned long *bitmap_base = bitmap + (base / BITS_PER_LONG);
+       unsigned int low_index = base & (BITS_PER_LONG-1);
+       int length = low_index + extent;
+
+       if (low_index != 0) {
+               mask = (~0UL << low_index);
+               if (length < BITS_PER_LONG)
+                       mask &= ~(~0UL << length);
+               if (new_value)
+                       *bitmap_base++ |= mask;
+               else
+                       *bitmap_base++ &= ~mask;
+               length -= BITS_PER_LONG;
+       }
+
+       mask = (new_value ? ~0UL : 0UL);
+       while (length >= BITS_PER_LONG) {
+               *bitmap_base++ = mask;
+               length -= BITS_PER_LONG;
+       }
+
+       if (length > 0) {
+               mask = ~(~0UL << length);
+               if (new_value)
+                       *bitmap_base++ |= mask;
+               else
+                       *bitmap_base++ &= ~mask;
+       }
+}
+
+
+/*
+ * this changes the io permissions bitmap in the current task.
+ */
+asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
+{
+       struct thread_struct * t = &current->thread;
+       unsigned long *bitmap;
+       struct physdev_set_iobitmap set_iobitmap;
+
+       if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
+               return -EINVAL;
+       if (turn_on && !capable(CAP_SYS_RAWIO))
+               return -EPERM;
+
+       /*
+        * If it's the first ioperm() call in this thread's lifetime, set the
+        * IO bitmap up. ioperm() is much less timing critical than clone(),
+        * this is why we delay this operation until now:
+        */
+       if (!t->io_bitmap_ptr) {
+               bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
+               if (!bitmap)
+                       return -ENOMEM;
+
+               memset(bitmap, 0xff, IO_BITMAP_BYTES);
+               t->io_bitmap_ptr = bitmap;
+               set_thread_flag(TIF_IO_BITMAP);
+
+               set_xen_guest_handle(set_iobitmap.bitmap, (char *)bitmap);
+               set_iobitmap.nr_ports = IO_BITMAP_BITS;
+               HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &set_iobitmap);
+       }
+
+       set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);
+
+       return 0;
+}
+
+/*
+ * sys_iopl has to be used when you want to access the IO ports
+ * beyond the 0x3ff range: to get the full 65536 ports bitmapped
+ * you'd need 8kB of bitmaps/process, which is a bit excessive.
+ *
+ * Here we just change the eflags value on the stack: we allow
+ * only the super-user to do it. This depends on the stack-layout
+ * on system-call entry - see also fork() and the signal handling
+ * code.
+ */
+
+asmlinkage long sys_iopl(unsigned long unused)
+{
+       volatile struct pt_regs * regs = (struct pt_regs *) &unused;
+       unsigned int level = regs->ebx;
+       struct thread_struct *t = &current->thread;
+       unsigned int old = (t->iopl >> 12) & 3;
+
+       if (level > 3)
+               return -EINVAL;
+       /* Trying to gain more privileges? */
+       if (level > old) {
+               if (!capable(CAP_SYS_RAWIO))
+                       return -EPERM;
+       }
+       t->iopl = level << 12;
+       set_iopl_mask(t->iopl);
+       return 0;
+}
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/irq-xen.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/irq-xen.c        Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,324 @@
+/*
+ *     linux/arch/i386/kernel/irq.c
+ *
+ *     Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the lowest level x86-specific interrupt
+ * entry, irq-stacks and irq statistics code. All the remaining
+ * irq logic is done by the generic kernel/irq/ code and
+ * by the x86-specific irq controller code. (e.g. i8259.c and
+ * io_apic.c.)
+ */
+
+#include <asm/uaccess.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
+
+DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp;
+EXPORT_PER_CPU_SYMBOL(irq_stat);
+
+#ifndef CONFIG_X86_LOCAL_APIC
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
+ */
+void ack_bad_irq(unsigned int irq)
+{
+       printk("unexpected IRQ trap at vector %02x\n", irq);
+}
+#endif
+
+#ifdef CONFIG_4KSTACKS
+/*
+ * per-CPU IRQ handling contexts (thread information and stack)
+ */
+union irq_ctx {
+       struct thread_info      tinfo;
+       u32                     stack[THREAD_SIZE/sizeof(u32)];
+};
+
+static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly;
+static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
+#endif
+
+/*
+ * do_IRQ handles all normal device IRQ's (the special
+ * SMP cross-CPU interrupts have their own specific
+ * handlers).
+ */
+fastcall unsigned int do_IRQ(struct pt_regs *regs)
+{      
+       /* high bit used in ret_from_ code */
+       int irq = ~regs->orig_eax;
+#ifdef CONFIG_4KSTACKS
+       union irq_ctx *curctx, *irqctx;
+       u32 *isp;
+#endif
+
+       if (unlikely((unsigned)irq >= NR_IRQS)) {
+               printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
+                                       __FUNCTION__, irq);
+               BUG();
+       }
+
+       irq_enter();
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+       /* Debugging check for stack overflow: is there less than 1KB free? */
+       {
+               long esp;
+
+               __asm__ __volatile__("andl %%esp,%0" :
+                                       "=r" (esp) : "0" (THREAD_SIZE - 1));
+               if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) {
+                       printk("do_IRQ: stack overflow: %ld\n",
+                               esp - sizeof(struct thread_info));
+                       dump_stack();
+               }
+       }
+#endif
+
+#ifdef CONFIG_4KSTACKS
+
+       curctx = (union irq_ctx *) current_thread_info();
+       irqctx = hardirq_ctx[smp_processor_id()];
+
+       /*
+        * this is where we switch to the IRQ stack. However, if we are
+        * already using the IRQ stack (because we interrupted a hardirq
+        * handler) we can't do that and just have to keep using the
+        * current stack (which is the irq stack already after all)
+        */
+       if (curctx != irqctx) {
+               int arg1, arg2, ebx;
+
+               /* build the stack frame on the IRQ stack */
+               isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
+               irqctx->tinfo.task = curctx->tinfo.task;
+               irqctx->tinfo.previous_esp = current_stack_pointer;
+
+               /*
+                * Copy the softirq bits in preempt_count so that the
+                * softirq checks work in the hardirq context.
+                */
+               irqctx->tinfo.preempt_count =
+                       (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) |
+                       (curctx->tinfo.preempt_count & SOFTIRQ_MASK);
+
+               asm volatile(
+                       "       xchgl   %%ebx,%%esp      \n"
+                       "       call    __do_IRQ         \n"
+                       "       movl   %%ebx,%%esp      \n"
+                       : "=a" (arg1), "=d" (arg2), "=b" (ebx)
+                       :  "0" (irq),   "1" (regs),  "2" (isp)
+                       : "memory", "cc", "ecx"
+               );
+       } else
+#endif
+               __do_IRQ(irq, regs);
+
+       irq_exit();
+
+       return 1;
+}
+
+#ifdef CONFIG_4KSTACKS
+
+/*
+ * These should really be __section__(".bss.page_aligned") as well, but
+ * gcc's 3.0 and earlier don't handle that correctly.
+ */
+static char softirq_stack[NR_CPUS * THREAD_SIZE]
+               __attribute__((__aligned__(THREAD_SIZE)));
+
+static char hardirq_stack[NR_CPUS * THREAD_SIZE]
+               __attribute__((__aligned__(THREAD_SIZE)));
+
+/*
+ * allocate per-cpu stacks for hardirq and for softirq processing
+ */
+void irq_ctx_init(int cpu)
+{
+       union irq_ctx *irqctx;
+
+       if (hardirq_ctx[cpu])
+               return;
+
+       irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE];
+       irqctx->tinfo.task              = NULL;
+       irqctx->tinfo.exec_domain       = NULL;
+       irqctx->tinfo.cpu               = cpu;
+       irqctx->tinfo.preempt_count     = HARDIRQ_OFFSET;
+       irqctx->tinfo.addr_limit        = MAKE_MM_SEG(0);
+
+       hardirq_ctx[cpu] = irqctx;
+
+       irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE];
+       irqctx->tinfo.task              = NULL;
+       irqctx->tinfo.exec_domain       = NULL;
+       irqctx->tinfo.cpu               = cpu;
+       irqctx->tinfo.preempt_count     = 0;
+       irqctx->tinfo.addr_limit        = MAKE_MM_SEG(0);
+
+       softirq_ctx[cpu] = irqctx;
+
+       printk("CPU %u irqstacks, hard=%p soft=%p\n",
+               cpu,hardirq_ctx[cpu],softirq_ctx[cpu]);
+}
+
+void irq_ctx_exit(int cpu)
+{
+       hardirq_ctx[cpu] = NULL;
+}
+
+extern asmlinkage void __do_softirq(void);
+
+asmlinkage void do_softirq(void)
+{
+       unsigned long flags;
+       struct thread_info *curctx;
+       union irq_ctx *irqctx;
+       u32 *isp;
+
+       if (in_interrupt())
+               return;
+
+       local_irq_save(flags);
+
+       if (local_softirq_pending()) {
+               curctx = current_thread_info();
+               irqctx = softirq_ctx[smp_processor_id()];
+               irqctx->tinfo.task = curctx->task;
+               irqctx->tinfo.previous_esp = current_stack_pointer;
+
+               /* build the stack frame on the softirq stack */
+               isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
+
+               asm volatile(
+                       "       xchgl   %%ebx,%%esp     \n"
+                       "       call    __do_softirq    \n"
+                       "       movl    %%ebx,%%esp     \n"
+                       : "=b"(isp)
+                       : "0"(isp)
+                       : "memory", "cc", "edx", "ecx", "eax"
+               );
+               /*
+                * Shouldnt happen, we returned above if in_interrupt():
+                */
+               WARN_ON_ONCE(softirq_count());
+       }
+
+       local_irq_restore(flags);
+}
+
+EXPORT_SYMBOL(do_softirq);
+#endif
+
+/*
+ * Interrupt statistics:
+ */
+
+atomic_t irq_err_count;
+
+/*
+ * /proc/interrupts printing:
+ */
+
+int show_interrupts(struct seq_file *p, void *v)
+{
+       int i = *(loff_t *) v, j;
+       struct irqaction * action;
+       unsigned long flags;
+
+       if (i == 0) {
+               seq_printf(p, "           ");
+               for_each_online_cpu(j)
+                       seq_printf(p, "CPU%-8d",j);
+               seq_putc(p, '\n');
+       }
+
+       if (i < NR_IRQS) {
+               spin_lock_irqsave(&irq_desc[i].lock, flags);
+               action = irq_desc[i].action;
+               if (!action)
+                       goto skip;
+               seq_printf(p, "%3d: ",i);
+#ifndef CONFIG_SMP
+               seq_printf(p, "%10u ", kstat_irqs(i));
+#else
+               for_each_online_cpu(j)
+                       seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+#endif
+               seq_printf(p, " %14s", irq_desc[i].chip->typename);
+               seq_printf(p, "  %s", action->name);
+
+               for (action=action->next; action; action = action->next)
+                       seq_printf(p, ", %s", action->name);
+
+               seq_putc(p, '\n');
+skip:
+               spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+       } else if (i == NR_IRQS) {
+               seq_printf(p, "NMI: ");
+               for_each_online_cpu(j)
+                       seq_printf(p, "%10u ", nmi_count(j));
+               seq_putc(p, '\n');
+#ifdef CONFIG_X86_LOCAL_APIC
+               seq_printf(p, "LOC: ");
+               for_each_online_cpu(j)
+                       seq_printf(p, "%10u ",
+                               per_cpu(irq_stat,j).apic_timer_irqs);
+               seq_putc(p, '\n');
+#endif
+               seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+#if defined(CONFIG_X86_IO_APIC)
+               seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
+#endif
+       }
+       return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+void fixup_irqs(cpumask_t map)
+{
+       unsigned int irq;
+       static int warned;
+
+       for (irq = 0; irq < NR_IRQS; irq++) {
+               cpumask_t mask;
+               if (irq == 2)
+                       continue;
+
+               cpus_and(mask, irq_desc[irq].affinity, map);
+               if (any_online_cpu(mask) == NR_CPUS) {
+                       /*printk("Breaking affinity for irq %i\n", irq);*/
+                       mask = map;
+               }
+               if (irq_desc[irq].chip->set_affinity)
+                       irq_desc[irq].chip->set_affinity(irq, mask);
+               else if (irq_desc[irq].action && !(warned++))
+                       printk("Cannot set affinity for irq %i\n", irq);
+       }
+
+#if 0
+       barrier();
+       /* Ingo Molnar says: "after the IO-APIC masks have been redirected
+          [note the nop - the interrupt-enable boundary on x86 is two
+          instructions from sti] - to flush out pending hardirqs and
+          IPIs. After this point nothing is supposed to reach this CPU." */
+       __asm__ __volatile__("sti; nop; cli");
+       barrier();
+#else
+       /* That doesn't seem sufficient.  Give it 1ms. */
+       local_irq_enable();
+       mdelay(1);
+       local_irq_disable();
+#endif
+}
+#endif
+
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/ldt-xen.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/ldt-xen.c        Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,270 @@
+/*
+ * linux/kernel/ldt.c
+ *
+ * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
+ * Copyright (C) 1999 Ingo Molnar <mingo@xxxxxxxxxx>
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/ldt.h>
+#include <asm/desc.h>
+#include <asm/mmu_context.h>
+
+#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
+static void flush_ldt(void *null)
+{
+       if (current->active_mm)
+               load_LDT(&current->active_mm->context);
+}
+#endif
+
+static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
+{
+       void *oldldt;
+       void *newldt;
+       int oldsize;
+
+       if (mincount <= pc->size)
+               return 0;
+       oldsize = pc->size;
+       mincount = (mincount+511)&(~511);
+       if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
+               newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
+       else
+               newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
+
+       if (!newldt)
+               return -ENOMEM;
+
+       if (oldsize)
+               memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE);
+       oldldt = pc->ldt;
+       memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, 
(mincount-oldsize)*LDT_ENTRY_SIZE);
+       pc->ldt = newldt;
+       wmb();
+       pc->size = mincount;
+       wmb();
+
+       if (reload) {
+#ifdef CONFIG_SMP
+               cpumask_t mask;
+               preempt_disable();
+#endif
+               make_pages_readonly(
+                       pc->ldt,
+                       (pc->size * LDT_ENTRY_SIZE) / PAGE_SIZE,
+                       XENFEAT_writable_descriptor_tables);
+               load_LDT(pc);
+#ifdef CONFIG_SMP
+               mask = cpumask_of_cpu(smp_processor_id());
+               if (!cpus_equal(current->mm->cpu_vm_mask, mask))
+                       smp_call_function(flush_ldt, NULL, 1, 1);
+               preempt_enable();
+#endif
+       }
+       if (oldsize) {
+               make_pages_writable(
+                       oldldt,
+                       (oldsize * LDT_ENTRY_SIZE) / PAGE_SIZE,
+                       XENFEAT_writable_descriptor_tables);
+               if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
+                       vfree(oldldt);
+               else
+                       kfree(oldldt);
+       }
+       return 0;
+}
+
+static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
+{
+       int err = alloc_ldt(new, old->size, 0);
+       if (err < 0)
+               return err;
+       memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
+       make_pages_readonly(
+               new->ldt,
+               (new->size * LDT_ENTRY_SIZE) / PAGE_SIZE,
+               XENFEAT_writable_descriptor_tables);
+       return 0;
+}
+
+/*
+ * we do not have to muck with descriptors here, that is
+ * done in switch_mm() as needed.
+ */
+int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+       struct mm_struct * old_mm;
+       int retval = 0;
+
+       init_MUTEX(&mm->context.sem);
+       mm->context.size = 0;
+       mm->context.has_foreign_mappings = 0;
+       old_mm = current->mm;
+       if (old_mm && old_mm->context.size > 0) {
+               down(&old_mm->context.sem);
+               retval = copy_ldt(&mm->context, &old_mm->context);
+               up(&old_mm->context.sem);
+       }
+       return retval;
+}
+
+/*
+ * No need to lock the MM as we are the last user
+ */
+void destroy_context(struct mm_struct *mm)
+{
+       if (mm->context.size) {
+               if (mm == current->active_mm)
+                       clear_LDT();
+               make_pages_writable(
+                       mm->context.ldt,
+                       (mm->context.size * LDT_ENTRY_SIZE) / PAGE_SIZE,
+                       XENFEAT_writable_descriptor_tables);
+               if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
+                       vfree(mm->context.ldt);
+               else
+                       kfree(mm->context.ldt);
+               mm->context.size = 0;
+       }
+}
+
+static int read_ldt(void __user * ptr, unsigned long bytecount)
+{
+       int err;
+       unsigned long size;
+       struct mm_struct * mm = current->mm;
+
+       if (!mm->context.size)
+               return 0;
+       if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
+               bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
+
+       down(&mm->context.sem);
+       size = mm->context.size*LDT_ENTRY_SIZE;
+       if (size > bytecount)
+               size = bytecount;
+
+       err = 0;
+       if (copy_to_user(ptr, mm->context.ldt, size))
+               err = -EFAULT;
+       up(&mm->context.sem);
+       if (err < 0)
+               goto error_return;
+       if (size != bytecount) {
+               /* zero-fill the rest */
+               if (clear_user(ptr+size, bytecount-size) != 0) {
+                       err = -EFAULT;
+                       goto error_return;
+               }
+       }
+       return bytecount;
+error_return:
+       return err;
+}
+
+static int read_default_ldt(void __user * ptr, unsigned long bytecount)
+{
+       int err;
+       unsigned long size;
+       void *address;
+
+       err = 0;
+       address = &default_ldt[0];
+       size = 5*sizeof(struct desc_struct);
+       if (size > bytecount)
+               size = bytecount;
+
+       err = size;
+       if (copy_to_user(ptr, address, size))
+               err = -EFAULT;
+
+       return err;
+}
+
+static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
+{
+       struct mm_struct * mm = current->mm;
+       __u32 entry_1, entry_2;
+       int error;
+       struct user_desc ldt_info;
+
+       error = -EINVAL;
+       if (bytecount != sizeof(ldt_info))
+               goto out;
+       error = -EFAULT;        
+       if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
+               goto out;
+
+       error = -EINVAL;
+       if (ldt_info.entry_number >= LDT_ENTRIES)
+               goto out;
+       if (ldt_info.contents == 3) {
+               if (oldmode)
+                       goto out;
+               if (ldt_info.seg_not_present == 0)
+                       goto out;
+       }
+
+       down(&mm->context.sem);
+       if (ldt_info.entry_number >= mm->context.size) {
+               error = alloc_ldt(&current->mm->context, 
ldt_info.entry_number+1, 1);
+               if (error < 0)
+                       goto out_unlock;
+       }
+
+       /* Allow LDTs to be cleared by the user. */
+       if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
+               if (oldmode || LDT_empty(&ldt_info)) {
+                       entry_1 = 0;
+                       entry_2 = 0;
+                       goto install;
+               }
+       }
+
+       entry_1 = LDT_entry_a(&ldt_info);
+       entry_2 = LDT_entry_b(&ldt_info);
+       if (oldmode)
+               entry_2 &= ~(1 << 20);
+
+       /* Install the new entry ...  */
+install:
+       error = write_ldt_entry(mm->context.ldt, ldt_info.entry_number,
+                               entry_1, entry_2);
+
+out_unlock:
+       up(&mm->context.sem);
+out:
+       return error;
+}
+
+asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long 
bytecount)
+{
+       int ret = -ENOSYS;
+
+       switch (func) {
+       case 0:
+               ret = read_ldt(ptr, bytecount);
+               break;
+       case 1:
+               ret = write_ldt(ptr, bytecount, 1);
+               break;
+       case 2:
+               ret = read_default_ldt(ptr, bytecount);
+               break;
+       case 0x11:
+               ret = write_ldt(ptr, bytecount, 0);
+               break;
+       }
+       return ret;
+}
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/machine_kexec.c
--- a/arch/i386/kernel/machine_kexec.c  Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/kernel/machine_kexec.c  Mon Jun 04 10:05:28 2007 +0100
@@ -19,6 +19,10 @@
 #include <asm/desc.h>
 #include <asm/system.h>
 
+#ifdef CONFIG_XEN
+#include <xen/interface/kexec.h>
+#endif
+
 #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
 static u32 kexec_pgd[1024] PAGE_ALIGNED;
 #ifdef CONFIG_X86_PAE
@@ -27,6 +31,40 @@ static u32 kexec_pmd1[1024] PAGE_ALIGNED
 #endif
 static u32 kexec_pte0[1024] PAGE_ALIGNED;
 static u32 kexec_pte1[1024] PAGE_ALIGNED;
+
+#ifdef CONFIG_XEN
+
+#define __ma(x) (pfn_to_mfn(__pa((x)) >> PAGE_SHIFT) << PAGE_SHIFT)
+
+#if PAGES_NR > KEXEC_XEN_NO_PAGES
+#error PAGES_NR is greater than KEXEC_XEN_NO_PAGES - Xen support will break
+#endif
+
+#if PA_CONTROL_PAGE != 0
+#error PA_CONTROL_PAGE is non zero - Xen support will break
+#endif
+
+void machine_kexec_setup_load_arg(xen_kexec_image_t *xki, struct kimage *image)
+{
+       void *control_page;
+
+       memset(xki->page_list, 0, sizeof(xki->page_list));
+
+       control_page = page_address(image->control_code_page);
+       memcpy(control_page, relocate_kernel, PAGE_SIZE);
+
+       xki->page_list[PA_CONTROL_PAGE] = __ma(control_page);
+       xki->page_list[PA_PGD] = __ma(kexec_pgd);
+#ifdef CONFIG_X86_PAE
+       xki->page_list[PA_PMD_0] = __ma(kexec_pmd0);
+       xki->page_list[PA_PMD_1] = __ma(kexec_pmd1);
+#endif
+       xki->page_list[PA_PTE_0] = __ma(kexec_pte0);
+       xki->page_list[PA_PTE_1] = __ma(kexec_pte1);
+
+}
+
+#endif /* CONFIG_XEN */
 
 /*
  * A architecture hook called to validate the
@@ -54,6 +92,7 @@ void machine_kexec_cleanup(struct kimage
 {
 }
 
+#ifndef CONFIG_XEN
 /*
  * Do not allocate memory (or fail in any way) in machine_kexec().
  * We are past the point of no return, committed to rebooting now.
@@ -87,3 +126,4 @@ NORET_TYPE void machine_kexec(struct kim
        relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
                        image->start, cpu_has_pae);
 }
+#endif
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/microcode-xen.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/microcode-xen.c  Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,144 @@
+/*
+ *     Intel CPU Microcode Update Driver for Linux
+ *
+ *     Copyright (C) 2000-2004 Tigran Aivazian
+ *
+ *     This driver allows to upgrade microcode on Intel processors
+ *     belonging to IA-32 family - PentiumPro, Pentium II, 
+ *     Pentium III, Xeon, Pentium 4, etc.
+ *
+ *     Reference: Section 8.10 of Volume III, Intel Pentium 4 Manual, 
+ *     Order Number 245472 or free download from:
+ *             
+ *     http://developer.intel.com/design/pentium4/manuals/245472.htm
+ *
+ *     For more information, go to http://www.urbanmyth.org/microcode
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     as published by the Free Software Foundation; either version
+ *     2 of the License, or (at your option) any later version.
+ */
+
+//#define DEBUG /* pr_debug */
+#include <linux/capability.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/cpumask.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/miscdevice.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/syscalls.h>
+
+#include <asm/msr.h>
+#include <asm/uaccess.h>
+#include <asm/processor.h>
+
+MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver");
+MODULE_AUTHOR("Tigran Aivazian <tigran@xxxxxxxxxxx>");
+MODULE_LICENSE("GPL");
+
+static int verbose;
+module_param(verbose, int, 0644);
+
+#define MICROCODE_VERSION      "1.14a-xen"
+
+#define DEFAULT_UCODE_DATASIZE         (2000)    /* 2000 bytes */
+#define MC_HEADER_SIZE         (sizeof (microcode_header_t))     /* 48 bytes */
+#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 
2048 bytes */
+
+/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
+static DEFINE_MUTEX(microcode_mutex);
+                               
+static int microcode_open (struct inode *unused1, struct file *unused2)
+{
+       return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
+}
+
+
+static int do_microcode_update (const void __user *ubuf, size_t len)
+{
+       int err;
+       void *kbuf;
+
+       kbuf = vmalloc(len);
+       if (!kbuf)
+               return -ENOMEM;
+
+       if (copy_from_user(kbuf, ubuf, len) == 0) {
+               struct xen_platform_op op;
+
+               op.cmd = XENPF_microcode_update;
+               set_xen_guest_handle(op.u.microcode.data, kbuf);
+               op.u.microcode.length = len;
+               err = HYPERVISOR_platform_op(&op);
+       } else
+               err = -EFAULT;
+
+       vfree(kbuf);
+
+       return err;
+}
+
+static ssize_t microcode_write (struct file *file, const char __user *buf, 
size_t len, loff_t *ppos)
+{
+       ssize_t ret;
+
+       if (len < MC_HEADER_SIZE) {
+               printk(KERN_ERR "microcode: not enough data\n"); 
+               return -EINVAL;
+       }
+
+       mutex_lock(&microcode_mutex);
+
+       ret = do_microcode_update(buf, len);
+       if (!ret)
+               ret = (ssize_t)len;
+
+       mutex_unlock(&microcode_mutex);
+
+       return ret;
+}
+
+static struct file_operations microcode_fops = {
+       .owner          = THIS_MODULE,
+       .write          = microcode_write,
+       .open           = microcode_open,
+};
+
+static struct miscdevice microcode_dev = {
+       .minor          = MICROCODE_MINOR,
+       .name           = "microcode",
+       .fops           = &microcode_fops,
+};
+
+static int __init microcode_init (void)
+{
+       int error;
+
+       error = misc_register(&microcode_dev);
+       if (error) {
+               printk(KERN_ERR
+                       "microcode: can't misc_register on minor=%d\n",
+                       MICROCODE_MINOR);
+               return error;
+       }
+
+       printk(KERN_INFO 
+               "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " 
<tigran@xxxxxxxxxxx>\n");
+       return 0;
+}
+
+static void __exit microcode_exit (void)
+{
+       misc_deregister(&microcode_dev);
+}
+
+module_init(microcode_init)
+module_exit(microcode_exit)
+MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/mpparse-xen.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/mpparse-xen.c    Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,1185 @@
+/*
+ *     Intel Multiprocessor Specification 1.1 and 1.4
+ *     compliant MP-table parsing routines.
+ *
+ *     (c) 1995 Alan Cox, Building #3 <alan@xxxxxxxxxx>
+ *     (c) 1998, 1999, 2000 Ingo Molnar <mingo@xxxxxxxxxx>
+ *
+ *     Fixes
+ *             Erich Boleyn    :       MP v1.4 and additional changes.
+ *             Alan Cox        :       Added EBDA scanning
+ *             Ingo Molnar     :       various cleanups and rewrites
+ *             Maciej W. Rozycki:      Bits for default MP configurations
+ *             Paul Diefenbaugh:       Added full ACPI support
+ */
+
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/smp_lock.h>
+#include <linux/kernel_stat.h>
+#include <linux/mc146818rtc.h>
+#include <linux/bitops.h>
+
+#include <asm/smp.h>
+#include <asm/acpi.h>
+#include <asm/mtrr.h>
+#include <asm/mpspec.h>
+#include <asm/io_apic.h>
+
+#include <mach_apic.h>
+#include <mach_mpparse.h>
+#include <bios_ebda.h>
+
+/* Have we found an MP table */
+int smp_found_config;
+unsigned int __initdata maxcpus = NR_CPUS;
+
+/*
+ * Various Linux-internal data structures created from the
+ * MP-table.
+ */
+int apic_version [MAX_APICS];
+int mp_bus_id_to_type [MAX_MP_BUSSES];
+int mp_bus_id_to_node [MAX_MP_BUSSES];
+int mp_bus_id_to_local [MAX_MP_BUSSES];
+int quad_local_to_mp_bus_id [NR_CPUS/4][4];
+int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
+static int mp_current_pci_id;
+
+/* I/O APIC entries */
+struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+
+/* # of MP IRQ source entries */
+struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+
+/* MP IRQ source entries */
+int mp_irq_entries;
+
+int nr_ioapics;
+
+int pic_mode;
+unsigned long mp_lapic_addr;
+
+unsigned int def_to_bigsmp = 0;
+
+/* Processor that is doing the boot up */
+unsigned int boot_cpu_physical_apicid = -1U;
+/* Internal processor count */
+static unsigned int __devinitdata num_processors;
+
+/* Bitmask of physically existing CPUs */
+physid_mask_t phys_cpu_present_map;
+
+u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+
+/*
+ * Intel MP BIOS table parsing routines:
+ */
+
+
+/*
+ * Checksum an MP configuration block.
+ */
+
+static int __init mpf_checksum(unsigned char *mp, int len)
+{
+       int sum = 0;
+
+       while (len--)
+               sum += *mp++;
+
+       return sum & 0xFF;
+}
+
+/*
+ * Have to match translation table entries to main table entries by counter
+ * hence the mpc_record variable .... can't see a less disgusting way of
+ * doing this ....
+ */
+
+static int mpc_record; 
+static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] 
__initdata;
+
+#ifndef CONFIG_XEN
+static void __devinit MP_processor_info (struct mpc_config_processor *m)
+{
+       int ver, apicid;
+       physid_mask_t phys_cpu;
+       
+       if (!(m->mpc_cpuflag & CPU_ENABLED))
+               return;
+
+       apicid = mpc_apic_id(m, translation_table[mpc_record]);
+
+       if (m->mpc_featureflag&(1<<0))
+               Dprintk("    Floating point unit present.\n");
+       if (m->mpc_featureflag&(1<<7))
+               Dprintk("    Machine Exception supported.\n");
+       if (m->mpc_featureflag&(1<<8))
+               Dprintk("    64 bit compare & exchange supported.\n");
+       if (m->mpc_featureflag&(1<<9))
+               Dprintk("    Internal APIC present.\n");
+       if (m->mpc_featureflag&(1<<11))
+               Dprintk("    SEP present.\n");
+       if (m->mpc_featureflag&(1<<12))
+               Dprintk("    MTRR  present.\n");
+       if (m->mpc_featureflag&(1<<13))
+               Dprintk("    PGE  present.\n");
+       if (m->mpc_featureflag&(1<<14))
+               Dprintk("    MCA  present.\n");
+       if (m->mpc_featureflag&(1<<15))
+               Dprintk("    CMOV  present.\n");
+       if (m->mpc_featureflag&(1<<16))
+               Dprintk("    PAT  present.\n");
+       if (m->mpc_featureflag&(1<<17))
+               Dprintk("    PSE  present.\n");
+       if (m->mpc_featureflag&(1<<18))
+               Dprintk("    PSN  present.\n");
+       if (m->mpc_featureflag&(1<<19))
+               Dprintk("    Cache Line Flush Instruction present.\n");
+       /* 20 Reserved */
+       if (m->mpc_featureflag&(1<<21))
+               Dprintk("    Debug Trace and EMON Store present.\n");
+       if (m->mpc_featureflag&(1<<22))
+               Dprintk("    ACPI Thermal Throttle Registers  present.\n");
+       if (m->mpc_featureflag&(1<<23))
+               Dprintk("    MMX  present.\n");
+       if (m->mpc_featureflag&(1<<24))
+               Dprintk("    FXSR  present.\n");
+       if (m->mpc_featureflag&(1<<25))
+               Dprintk("    XMM  present.\n");
+       if (m->mpc_featureflag&(1<<26))
+               Dprintk("    Willamette New Instructions  present.\n");
+       if (m->mpc_featureflag&(1<<27))
+               Dprintk("    Self Snoop  present.\n");
+       if (m->mpc_featureflag&(1<<28))
+               Dprintk("    HT  present.\n");
+       if (m->mpc_featureflag&(1<<29))
+               Dprintk("    Thermal Monitor present.\n");
+       /* 30, 31 Reserved */
+
+
+       if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
+               Dprintk("    Bootup CPU\n");
+               boot_cpu_physical_apicid = m->mpc_apicid;
+       }
+
+       ver = m->mpc_apicver;
+
+       /*
+        * Validate version
+        */
+       if (ver == 0x0) {
+               printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
+                               "fixing up to 0x10. (tell your hw vendor)\n",
+                               m->mpc_apicid);
+               ver = 0x10;
+       }
+       apic_version[m->mpc_apicid] = ver;
+
+       phys_cpu = apicid_to_cpu_present(apicid);
+       physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu);
+
+       if (num_processors >= NR_CPUS) {
+               printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
+                       "  Processor ignored.\n", NR_CPUS);
+               return;
+       }
+
+       if (num_processors >= maxcpus) {
+               printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
+                       " Processor ignored.\n", maxcpus);
+               return;
+       }
+
+       cpu_set(num_processors, cpu_possible_map);
+       num_processors++;
+
+       /*
+        * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
+        * but we need to work other dependencies like SMP_SUSPEND etc
+        * before this can be done without some confusion.
+        * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
+        *       - Ashok Raj <ashok.raj@xxxxxxxxx>
+        */
+       if (num_processors > 8) {
+               switch (boot_cpu_data.x86_vendor) {
+               case X86_VENDOR_INTEL:
+                       if (!APIC_XAPIC(ver)) {
+                               def_to_bigsmp = 0;
+                               break;
+                       }
+                       /* If P4 and above fall through */
+               case X86_VENDOR_AMD:
+                       def_to_bigsmp = 1;
+               }
+       }
+       bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
+}
+#else
+void __init MP_processor_info (struct mpc_config_processor *m)
+{
+       num_processors++;
+}
+#endif /* CONFIG_XEN */
+
+static void __init MP_bus_info (struct mpc_config_bus *m)
+{
+       char str[7];
+
+       memcpy(str, m->mpc_bustype, 6);
+       str[6] = 0;
+
+       mpc_oem_bus_info(m, str, translation_table[mpc_record]);
+
+       if (m->mpc_busid >= MAX_MP_BUSSES) {
+               printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
+                       " is too large, max. supported is %d\n",
+                       m->mpc_busid, str, MAX_MP_BUSSES - 1);
+               return;
+       }
+
+       if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
+               mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
+       } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) {
+               mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
+       } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) {
+               mpc_oem_pci_bus(m, translation_table[mpc_record]);
+               mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
+               mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
+               mp_current_pci_id++;
+       } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) {
+               mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
+       } else if (strncmp(str, BUSTYPE_NEC98, sizeof(BUSTYPE_NEC98)-1) == 0) {
+               mp_bus_id_to_type[m->mpc_busid] = MP_BUS_NEC98;
+       } else {
+               printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
+       }
+}
+
+static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
+{
+       if (!(m->mpc_flags & MPC_APIC_USABLE))
+               return;
+
+       printk(KERN_INFO "I/O APIC #%d Version %d at 0x%lX.\n",
+               m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
+       if (nr_ioapics >= MAX_IO_APICS) {
+               printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found 
%d).\n",
+                       MAX_IO_APICS, nr_ioapics);
+               panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
+       }
+       if (!m->mpc_apicaddr) {
+               printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
+                       " found in MP table, skipping!\n");
+               return;
+       }
+       mp_ioapics[nr_ioapics] = *m;
+       nr_ioapics++;
+}
+
+static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
+{
+       mp_irqs [mp_irq_entries] = *m;
+       Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
+               " IRQ %02x, APIC ID %x, APIC INT %02x\n",
+                       m->mpc_irqtype, m->mpc_irqflag & 3,
+                       (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
+                       m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
+       if (++mp_irq_entries == MAX_IRQ_SOURCES)
+               panic("Max # of irq sources exceeded!!\n");
+}
+
+static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
+{
+       Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
+               " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
+                       m->mpc_irqtype, m->mpc_irqflag & 3,
+                       (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
+                       m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
+       /*
+        * Well it seems all SMP boards in existence
+        * use ExtINT/LVT1 == LINT0 and
+        * NMI/LVT2 == LINT1 - the following check
+        * will show us if this assumptions is false.
+        * Until then we do not have to add baggage.
+        */
+       if ((m->mpc_irqtype == mp_ExtINT) &&
+               (m->mpc_destapiclint != 0))
+                       BUG();
+       if ((m->mpc_irqtype == mp_NMI) &&
+               (m->mpc_destapiclint != 1))
+                       BUG();
+}
+
+#ifdef CONFIG_X86_NUMAQ
+static void __init MP_translation_info (struct mpc_config_translation *m)
+{
+       printk(KERN_INFO "Translation: record %d, type %d, quad %d, global %d, 
local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, 
m->trans_local);
+
+       if (mpc_record >= MAX_MPC_ENTRY) 
+               printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
+       else
+               translation_table[mpc_record] = m; /* stash this for later */
+       if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
+               node_set_online(m->trans_quad);
+}
+
+/*
+ * Read/parse the MPC oem tables
+ */
+
+static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \
+       unsigned short oemsize)
+{
+       int count = sizeof (*oemtable); /* the header size */
+       unsigned char *oemptr = ((unsigned char *)oemtable)+count;
+       
+       mpc_record = 0;
+       printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", 
oemtable);
+       if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4))
+       {
+               printk(KERN_WARNING "SMP mpc oemtable: bad signature 
[%c%c%c%c]!\n",
+                       oemtable->oem_signature[0],
+                       oemtable->oem_signature[1],
+                       oemtable->oem_signature[2],
+                       oemtable->oem_signature[3]);
+               return;
+       }
+       if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length))
+       {
+               printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
+               return;
+       }
+       while (count < oemtable->oem_length) {
+               switch (*oemptr) {
+                       case MP_TRANSLATION:
+                       {
+                               struct mpc_config_translation *m=
+                                       (struct mpc_config_translation *)oemptr;
+                               MP_translation_info(m);
+                               oemptr += sizeof(*m);
+                               count += sizeof(*m);
+                               ++mpc_record;
+                               break;
+                       }
+                       default:
+                       {
+                               printk(KERN_WARNING "Unrecognised OEM table 
entry type! - %d\n", (int) *oemptr);
+                               return;
+                       }
+               }
+       }
+}
+
+static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
+               char *productid)
+{
+       if (strncmp(oem, "IBM NUMA", 8))
+               printk("Warning!  May not be a NUMA-Q system!\n");
+       if (mpc->mpc_oemptr)
+               smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr,
+                               mpc->mpc_oemsize);
+}
+#endif /* CONFIG_X86_NUMAQ */
+
+/*
+ * Read/parse the MPC
+ */
+
+static int __init smp_read_mpc(struct mp_config_table *mpc)
+{
+       char str[16];
+       char oem[10];
+       int count=sizeof(*mpc);
+       unsigned char *mpt=((unsigned char *)mpc)+count;
+
+       if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
+               printk(KERN_ERR "SMP mptable: bad signature [0x%x]!\n",
+                       *(u32 *)mpc->mpc_signature);
+               return 0;
+       }
+       if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
+               printk(KERN_ERR "SMP mptable: checksum error!\n");
+               return 0;
+       }
+       if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
+               printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
+                       mpc->mpc_spec);
+               return 0;
+       }
+       if (!mpc->mpc_lapic) {
+               printk(KERN_ERR "SMP mptable: null local APIC address!\n");
+               return 0;
+       }
+       memcpy(oem,mpc->mpc_oem,8);
+       oem[8]=0;
+       printk(KERN_INFO "OEM ID: %s ",oem);
+
+       memcpy(str,mpc->mpc_productid,12);
+       str[12]=0;
+       printk("Product ID: %s ",str);
+
+       mps_oem_check(mpc, oem, str);
+
+       printk("APIC at: 0x%lX\n",mpc->mpc_lapic);
+
+       /* 
+        * Save the local APIC address (it might be non-default) -- but only
+        * if we're not using ACPI.
+        */
+       if (!acpi_lapic)
+               mp_lapic_addr = mpc->mpc_lapic;
+
+       /*
+        *      Now process the configuration blocks.
+        */
+       mpc_record = 0;
+       while (count < mpc->mpc_length) {
+               switch(*mpt) {
+                       case MP_PROCESSOR:
+                       {
+                               struct mpc_config_processor *m=
+                                       (struct mpc_config_processor *)mpt;
+                               /* ACPI may have already provided this data */
+                               if (!acpi_lapic)
+                                       MP_processor_info(m);
+                               mpt += sizeof(*m);
+                               count += sizeof(*m);
+                               break;
+                       }
+                       case MP_BUS:
+                       {
+                               struct mpc_config_bus *m=
+                                       (struct mpc_config_bus *)mpt;
+                               MP_bus_info(m);
+                               mpt += sizeof(*m);
+                               count += sizeof(*m);
+                               break;
+                       }
+                       case MP_IOAPIC:
+                       {
+                               struct mpc_config_ioapic *m=
+                                       (struct mpc_config_ioapic *)mpt;
+                               MP_ioapic_info(m);
+                               mpt+=sizeof(*m);
+                               count+=sizeof(*m);
+                               break;
+                       }
+                       case MP_INTSRC:
+                       {
+                               struct mpc_config_intsrc *m=
+                                       (struct mpc_config_intsrc *)mpt;
+
+                               MP_intsrc_info(m);
+                               mpt+=sizeof(*m);
+                               count+=sizeof(*m);
+                               break;
+                       }
+                       case MP_LINTSRC:
+                       {
+                               struct mpc_config_lintsrc *m=
+                                       (struct mpc_config_lintsrc *)mpt;
+                               MP_lintsrc_info(m);
+                               mpt+=sizeof(*m);
+                               count+=sizeof(*m);
+                               break;
+                       }
+                       default:
+                       {
+                               count = mpc->mpc_length;
+                               break;
+                       }
+               }
+               ++mpc_record;
+       }
+       clustered_apic_check();
+       if (!num_processors)
+               printk(KERN_ERR "SMP mptable: no processors registered!\n");
+       return num_processors;
+}
+
+static int __init ELCR_trigger(unsigned int irq)
+{
+       unsigned int port;
+
+       port = 0x4d0 + (irq >> 3);
+       return (inb(port) >> (irq & 7)) & 1;
+}
+
+static void __init construct_default_ioirq_mptable(int mpc_default_type)
+{
+       struct mpc_config_intsrc intsrc;
+       int i;
+       int ELCR_fallback = 0;
+
+       intsrc.mpc_type = MP_INTSRC;
+       intsrc.mpc_irqflag = 0;                 /* conforming */
+       intsrc.mpc_srcbus = 0;
+       intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
+
+       intsrc.mpc_irqtype = mp_INT;
+
+       /*
+        *  If true, we have an ISA/PCI system with no IRQ entries
+        *  in the MP table. To prevent the PCI interrupts from being set up
+        *  incorrectly, we try to use the ELCR. The sanity check to see if
+        *  there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
+        *  never be level sensitive, so we simply see if the ELCR agrees.
+        *  If it does, we assume it's valid.
+        */
+       if (mpc_default_type == 5) {
+               printk(KERN_INFO "ISA/PCI bus type with no IRQ information... 
falling back to ELCR\n");
+
+               if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || 
ELCR_trigger(13))
+                       printk(KERN_WARNING "ELCR contains invalid data... not 
using ELCR\n");
+               else {
+                       printk(KERN_INFO "Using ELCR to identify PCI 
interrupts\n");
+                       ELCR_fallback = 1;
+               }
+       }
+
+       for (i = 0; i < 16; i++) {
+               switch (mpc_default_type) {
+               case 2:
+                       if (i == 0 || i == 13)
+                               continue;       /* IRQ0 & IRQ13 not connected */
+                       /* fall through */
+               default:
+                       if (i == 2)
+                               continue;       /* IRQ2 is never connected */
+               }
+
+               if (ELCR_fallback) {
+                       /*
+                        *  If the ELCR indicates a level-sensitive interrupt, 
we
+                        *  copy that information over to the MP table in the
+                        *  irqflag field (level sensitive, active high 
polarity).
+                        */
+                       if (ELCR_trigger(i))
+                               intsrc.mpc_irqflag = 13;
+                       else
+                               intsrc.mpc_irqflag = 0;
+               }
+
+               intsrc.mpc_srcbusirq = i;
+               intsrc.mpc_dstirq = i ? i : 2;          /* IRQ0 to INTIN2 */
+               MP_intsrc_info(&intsrc);
+       }
+
+       intsrc.mpc_irqtype = mp_ExtINT;
+       intsrc.mpc_srcbusirq = 0;
+       intsrc.mpc_dstirq = 0;                          /* 8259A to INTIN0 */
+       MP_intsrc_info(&intsrc);
+}
+
+static inline void __init construct_default_ISA_mptable(int mpc_default_type)
+{
+       struct mpc_config_processor processor;
+       struct mpc_config_bus bus;
+       struct mpc_config_ioapic ioapic;
+       struct mpc_config_lintsrc lintsrc;
+       int linttypes[2] = { mp_ExtINT, mp_NMI };
+       int i;
+
+       /*
+        * local APIC has default address
+        */
+       mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+
+       /*
+        * 2 CPUs, numbered 0 & 1.
+        */
+       processor.mpc_type = MP_PROCESSOR;
+       /* Either an integrated APIC or a discrete 82489DX. */
+       processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+       processor.mpc_cpuflag = CPU_ENABLED;
+       processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
+                                  (boot_cpu_data.x86_model << 4) |
+                                  boot_cpu_data.x86_mask;
+       processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
+       processor.mpc_reserved[0] = 0;
+       processor.mpc_reserved[1] = 0;
+       for (i = 0; i < 2; i++) {
+               processor.mpc_apicid = i;
+               MP_processor_info(&processor);
+       }
+
+       bus.mpc_type = MP_BUS;
+       bus.mpc_busid = 0;
+       switch (mpc_default_type) {
+               default:
+                       printk("???\n");
+                       printk(KERN_ERR "Unknown standard configuration %d\n",
+                               mpc_default_type);
+                       /* fall through */
+               case 1:
+               case 5:
+                       memcpy(bus.mpc_bustype, "ISA   ", 6);
+                       break;
+               case 2:
+               case 6:
+               case 3:
+                       memcpy(bus.mpc_bustype, "EISA  ", 6);
+                       break;
+               case 4:
+               case 7:
+                       memcpy(bus.mpc_bustype, "MCA   ", 6);
+       }
+       MP_bus_info(&bus);
+       if (mpc_default_type > 4) {
+               bus.mpc_busid = 1;
+               memcpy(bus.mpc_bustype, "PCI   ", 6);
+               MP_bus_info(&bus);
+       }
+
+       ioapic.mpc_type = MP_IOAPIC;
+       ioapic.mpc_apicid = 2;
+       ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+       ioapic.mpc_flags = MPC_APIC_USABLE;
+       ioapic.mpc_apicaddr = 0xFEC00000;
+       MP_ioapic_info(&ioapic);
+
+       /*
+        * We set up most of the low 16 IO-APIC pins according to MPS rules.
+        */
+       construct_default_ioirq_mptable(mpc_default_type);
+
+       lintsrc.mpc_type = MP_LINTSRC;
+       lintsrc.mpc_irqflag = 0;                /* conforming */
+       lintsrc.mpc_srcbusid = 0;
+       lintsrc.mpc_srcbusirq = 0;
+       lintsrc.mpc_destapic = MP_APIC_ALL;
+       for (i = 0; i < 2; i++) {
+               lintsrc.mpc_irqtype = linttypes[i];
+               lintsrc.mpc_destapiclint = i;
+               MP_lintsrc_info(&lintsrc);
+       }
+}
+
+static struct intel_mp_floating *mpf_found;
+
+/*
+ * Scan the memory blocks for an SMP configuration block.
+ */
+void __init get_smp_config (void)
+{
+       struct intel_mp_floating *mpf = mpf_found;
+
+       /*
+        * ACPI supports both logical (e.g. Hyper-Threading) and physical 
+        * processors, where MPS only supports physical.
+        */
+       if (acpi_lapic && acpi_ioapic) {
+               printk(KERN_INFO "Using ACPI (MADT) for SMP configuration 
information\n");
+               return;
+       }
+       else if (acpi_lapic)
+               printk(KERN_INFO "Using ACPI for processor (LAPIC) 
configuration information\n");
+
+       printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", 
mpf->mpf_specification);
+       if (mpf->mpf_feature2 & (1<<7)) {
+               printk(KERN_INFO "    IMCR and PIC compatibility mode.\n");
+               pic_mode = 1;
+       } else {
+               printk(KERN_INFO "    Virtual Wire compatibility mode.\n");
+               pic_mode = 0;
+       }
+
+       /*
+        * Now see if we need to read further.
+        */
+       if (mpf->mpf_feature1 != 0) {
+
+               printk(KERN_INFO "Default MP configuration #%d\n", 
mpf->mpf_feature1);
+               construct_default_ISA_mptable(mpf->mpf_feature1);
+
+       } else if (mpf->mpf_physptr) {
+
+               /*
+                * Read the physical hardware table.  Anything here will
+                * override the defaults.
+                */
+               if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr))) {
+                       smp_found_config = 0;
+                       printk(KERN_ERR "BIOS bug, MP table errors 
detected!...\n");
+                       printk(KERN_ERR "... disabling SMP support. (tell your 
hw vendor)\n");
+                       return;
+               }
+               /*
+                * If there are no explicit MP IRQ entries, then we are
+                * broken.  We set up most of the low 16 IO-APIC pins to
+                * ISA defaults and hope it will work.
+                */
+               if (!mp_irq_entries) {
+                       struct mpc_config_bus bus;
+
+                       printk(KERN_ERR "BIOS bug, no explicit IRQ entries, 
using default mptable. (tell your hw vendor)\n");
+
+                       bus.mpc_type = MP_BUS;
+                       bus.mpc_busid = 0;
+                       memcpy(bus.mpc_bustype, "ISA   ", 6);
+                       MP_bus_info(&bus);
+
+                       construct_default_ioirq_mptable(0);
+               }
+
+       } else
+               BUG();
+
+       printk(KERN_INFO "Processors: %d\n", num_processors);
+       /*
+        * Only use the first configuration found.
+        */
+}
+
+static int __init smp_scan_config (unsigned long base, unsigned long length)
+{
+       unsigned long *bp = isa_bus_to_virt(base);
+       struct intel_mp_floating *mpf;
+
+       Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length);
+       if (sizeof(*mpf) != 16)
+               printk("Error: MPF size\n");
+
+       while (length > 0) {
+               mpf = (struct intel_mp_floating *)bp;
+               if ((*bp == SMP_MAGIC_IDENT) &&
+                       (mpf->mpf_length == 1) &&
+                       !mpf_checksum((unsigned char *)bp, 16) &&
+                       ((mpf->mpf_specification == 1)
+                               || (mpf->mpf_specification == 4)) ) {
+
+                       smp_found_config = 1;
+#ifndef CONFIG_XEN
+                       printk(KERN_INFO "found SMP MP-table at %08lx\n",
+                                               virt_to_phys(mpf));
+                       reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE);
+                       if (mpf->mpf_physptr) {
+                               /*
+                                * We cannot access to MPC table to compute
+                                * table size yet, as only few megabytes from
+                                * the bottom is mapped now.
+                                * PC-9800's MPC table places on the very last
+                                * of physical memory; so that simply reserving
+                                * PAGE_SIZE from mpg->mpf_physptr yields BUG()
+                                * in reserve_bootmem.
+                                */
+                               unsigned long size = PAGE_SIZE;
+                               unsigned long end = max_low_pfn * PAGE_SIZE;
+                               if (mpf->mpf_physptr + size > end)
+                                       size = end - mpf->mpf_physptr;
+                               reserve_bootmem(mpf->mpf_physptr, size);
+                       }
+#else
+                       printk(KERN_INFO "found SMP MP-table at %08lx\n",
+                               ((unsigned long)bp - (unsigned 
long)isa_bus_to_virt(base)) + base);
+#endif
+
+                       mpf_found = mpf;
+                       return 1;
+               }
+               bp += 4;
+               length -= 16;
+       }
+       return 0;
+}
+
+void __init find_smp_config (void)
+{
+#ifndef CONFIG_XEN
+       unsigned int address;
+#endif
+
+       /*
+        * FIXME: Linux assumes you have 640K of base ram..
+        * this continues the error...
+        *
+        * 1) Scan the bottom 1K for a signature
+        * 2) Scan the top 1K of base RAM
+        * 3) Scan the 64K of bios
+        */
+       if (smp_scan_config(0x0,0x400) ||
+               smp_scan_config(639*0x400,0x400) ||
+                       smp_scan_config(0xF0000,0x10000))
+               return;
+       /*
+        * If it is an SMP machine we should know now, unless the
+        * configuration is in an EISA/MCA bus machine with an
+        * extended bios data area.
+        *
+        * there is a real-mode segmented pointer pointing to the
+        * 4K EBDA area at 0x40E, calculate and scan it here.
+        *
+        * NOTE! There are Linux loaders that will corrupt the EBDA
+        * area, and as such this kind of SMP config may be less
+        * trustworthy, simply because the SMP table may have been
+        * stomped on during early boot. These loaders are buggy and
+        * should be fixed.
+        *
+        * MP1.4 SPEC states to only scan first 1K of 4K EBDA.
+        */
+
+#ifndef CONFIG_XEN
+       address = get_bios_ebda();
+       if (address)
+               smp_scan_config(address, 0x400);
+#endif
+}
+
+int es7000_plat;
+
+/* --------------------------------------------------------------------------
+                            ACPI-based MP Configuration
+   -------------------------------------------------------------------------- 
*/
+
+#ifdef CONFIG_ACPI
+
+void __init mp_register_lapic_address (
+       u64                     address)
+{
+#ifndef CONFIG_XEN
+       mp_lapic_addr = (unsigned long) address;
+
+       set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
+
+       if (boot_cpu_physical_apicid == -1U)
+               boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
+
+       Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
+#endif
+}
+
+
+void __devinit mp_register_lapic (
+       u8                      id, 
+       u8                      enabled)
+{
+       struct mpc_config_processor processor;
+       int                     boot_cpu = 0;
+       
+       if (MAX_APICS - id <= 0) {
+               printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
+                       id, MAX_APICS);
+               return;
+       }
+
+       if (id == boot_cpu_physical_apicid)
+               boot_cpu = 1;
+
+#ifndef CONFIG_XEN
+       processor.mpc_type = MP_PROCESSOR;
+       processor.mpc_apicid = id;
+       processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
+       processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
+       processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
+       processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | 
+               (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
+       processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
+       processor.mpc_reserved[0] = 0;
+       processor.mpc_reserved[1] = 0;
+#endif
+
+       MP_processor_info(&processor);
+}
+
+#ifdef CONFIG_X86_IO_APIC
+
+#define MP_ISA_BUS             0
+#define MP_MAX_IOAPIC_PIN      127
+
+static struct mp_ioapic_routing {
+       int                     apic_id;
+       int                     gsi_base;
+       int                     gsi_end;
+       u32                     pin_programmed[4];
+} mp_ioapic_routing[MAX_IO_APICS];
+
+
+static int mp_find_ioapic (
+       int                     gsi)
+{
+       int                     i = 0;
+
+       /* Find the IOAPIC that manages this GSI. */
+       for (i = 0; i < nr_ioapics; i++) {
+               if ((gsi >= mp_ioapic_routing[i].gsi_base)
+                       && (gsi <= mp_ioapic_routing[i].gsi_end))
+                       return i;
+       }
+
+       printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
+
+       return -1;
+}
+       
+
+void __init mp_register_ioapic (
+       u8                      id, 
+       u32                     address,
+       u32                     gsi_base)
+{
+       int                     idx = 0;
+       int                     tmpid;
+
+       if (nr_ioapics >= MAX_IO_APICS) {
+               printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
+                       "(found %d)\n", MAX_IO_APICS, nr_ioapics);
+               panic("Recompile kernel with bigger MAX_IO_APICS!\n");
+       }
+       if (!address) {
+               printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
+                       " found in MADT table, skipping!\n");
+               return;
+       }
+
+       idx = nr_ioapics++;
+
+       mp_ioapics[idx].mpc_type = MP_IOAPIC;
+       mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
+       mp_ioapics[idx].mpc_apicaddr = address;
+
+#ifndef CONFIG_XEN
+       set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
+#endif
+       if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+               && !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+               tmpid = io_apic_get_unique_id(idx, id);
+       else
+               tmpid = id;
+       if (tmpid == -1) {
+               nr_ioapics--;
+               return;
+       }
+       mp_ioapics[idx].mpc_apicid = tmpid;
+       mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
+       
+       /* 
+        * Build basic GSI lookup table to facilitate gsi->io_apic lookups
+        * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
+        */
+       mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
+       mp_ioapic_routing[idx].gsi_base = gsi_base;
+       mp_ioapic_routing[idx].gsi_end = gsi_base + 
+               io_apic_get_redir_entries(idx);
+
+       printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, "
+               "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, 
+               mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
+               mp_ioapic_routing[idx].gsi_base,
+               mp_ioapic_routing[idx].gsi_end);
+
+       return;
+}
+
+
+void __init mp_override_legacy_irq (
+       u8                      bus_irq,
+       u8                      polarity, 
+       u8                      trigger, 
+       u32                     gsi)
+{
+       struct mpc_config_intsrc intsrc;
+       int                     ioapic = -1;
+       int                     pin = -1;
+
+       /* 
+        * Convert 'gsi' to 'ioapic.pin'.
+        */
+       ioapic = mp_find_ioapic(gsi);
+       if (ioapic < 0)
+               return;
+       pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
+
+       /*
+        * TBD: This check is for faulty timer entries, where the override
+        *      erroneously sets the trigger to level, resulting in a HUGE 
+        *      increase of timer interrupts!
+        */
+       if ((bus_irq == 0) && (trigger == 3))
+               trigger = 1;
+
+       intsrc.mpc_type = MP_INTSRC;
+       intsrc.mpc_irqtype = mp_INT;
+       intsrc.mpc_irqflag = (trigger << 2) | polarity;
+       intsrc.mpc_srcbus = MP_ISA_BUS;
+       intsrc.mpc_srcbusirq = bus_irq;                                /* IRQ */
+       intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;        /* APIC ID */
+       intsrc.mpc_dstirq = pin;                                    /* INTIN# */
+
+       Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n",
+               intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, 
+               (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, 
+               intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
+
+       mp_irqs[mp_irq_entries] = intsrc;
+       if (++mp_irq_entries == MAX_IRQ_SOURCES)
+               panic("Max # of irq sources exceeded!\n");
+
+       return;
+}
+
+void __init mp_config_acpi_legacy_irqs (void)
+{
+       struct mpc_config_intsrc intsrc;
+       int                     i = 0;
+       int                     ioapic = -1;
+
+       /* 
+        * Fabricate the legacy ISA bus (bus #31).
+        */
+       mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
+       Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
+
+       /*
+        * Older generations of ES7000 have no legacy identity mappings
+        */
+       if (es7000_plat == 1)
+               return;
+
+       /* 
+        * Locate the IOAPIC that manages the ISA IRQs (0-15). 
+        */
+       ioapic = mp_find_ioapic(0);
+       if (ioapic < 0)
+               return;
+
+       intsrc.mpc_type = MP_INTSRC;
+       intsrc.mpc_irqflag = 0;                                 /* Conforming */
+       intsrc.mpc_srcbus = MP_ISA_BUS;
+       intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
+
+       /* 
+        * Use the default configuration for the IRQs 0-15.  Unless
+        * overriden by (MADT) interrupt source override entries.
+        */
+       for (i = 0; i < 16; i++) {
+               int idx;
+
+               for (idx = 0; idx < mp_irq_entries; idx++) {
+                       struct mpc_config_intsrc *irq = mp_irqs + idx;
+
+                       /* Do we already have a mapping for this ISA IRQ? */
+                       if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq 
== i)
+                               break;
+
+                       /* Do we already have a mapping for this IOAPIC pin */
+                       if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
+                               (irq->mpc_dstirq == i))
+                               break;
+               }
+
+               if (idx != mp_irq_entries) {
+                       printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
+                       continue;                       /* IRQ already used */
+               }
+
+               intsrc.mpc_irqtype = mp_INT;
+               intsrc.mpc_srcbusirq = i;                  /* Identity mapped */
+               intsrc.mpc_dstirq = i;
+
+               Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
+                       "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, 
+                       (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, 
+                       intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, 
+                       intsrc.mpc_dstirq);
+
+               mp_irqs[mp_irq_entries] = intsrc;
+               if (++mp_irq_entries == MAX_IRQ_SOURCES)
+                       panic("Max # of irq sources exceeded!\n");
+       }
+}
+
+#define MAX_GSI_NUM    4096
+
+int mp_register_gsi (u32 gsi, int triggering, int polarity)
+{
+       int                     ioapic = -1;
+       int                     ioapic_pin = 0;
+       int                     idx, bit = 0;
+       static int              pci_irq = 16;
+       /*
+        * Mapping between Global System Interrups, which
+        * represent all possible interrupts, and IRQs
+        * assigned to actual devices.
+        */
+       static int              gsi_to_irq[MAX_GSI_NUM];
+
+       /* Don't set up the ACPI SCI because it's already set up */
+       if (acpi_fadt.sci_int == gsi)
+               return gsi;
+
+       ioapic = mp_find_ioapic(gsi);
+       if (ioapic < 0) {
+               printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
+               return gsi;
+       }
+
+       ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
+
+       if (ioapic_renumber_irq)
+               gsi = ioapic_renumber_irq(ioapic, gsi);
+
+       /* 
+        * Avoid pin reprogramming.  PRTs typically include entries  
+        * with redundant pin->gsi mappings (but unique PCI devices);
+        * we only program the IOAPIC on the first.
+        */
+       bit = ioapic_pin % 32;
+       idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
+       if (idx > 3) {
+               printk(KERN_ERR "Invalid reference to IOAPIC pin "
+                       "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, 
+                       ioapic_pin);
+               return gsi;
+       }
+       if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
+               Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
+                       mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
+               return gsi_to_irq[gsi];
+       }
+
+       mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
+
+       if (triggering == ACPI_LEVEL_SENSITIVE) {
+               /*
+                * For PCI devices assign IRQs in order, avoiding gaps
+                * due to unused I/O APIC pins.
+                */
+               int irq = gsi;
+               if (gsi < MAX_GSI_NUM) {
+                       /*
+                        * Retain the VIA chipset work-around (gsi > 15), but
+                        * avoid a problem where the 8254 timer (IRQ0) is setup
+                        * via an override (so it's not on pin 0 of the ioapic),
+                        * and at the same time, the pin 0 interrupt is a PCI
+                        * type.  The gsi > 15 test could cause these two pins
+                        * to be shared as IRQ0, and they are not shareable.
+                        * So test for this condition, and if necessary, avoid
+                        * the pin collision.
+                        */
+                       if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0))
+                               gsi = pci_irq++;
+                       /*
+                        * Don't assign IRQ used by ACPI SCI
+                        */
+                       if (gsi == acpi_fadt.sci_int)
+                               gsi = pci_irq++;
+                       gsi_to_irq[irq] = gsi;
+               } else {
+                       printk(KERN_ERR "GSI %u is too high\n", gsi);
+                       return gsi;
+               }
+       }
+
+       io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
+                   triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
+                   polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
+       return gsi;
+}
+
+#endif /* CONFIG_X86_IO_APIC */
+#endif /* CONFIG_ACPI */
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/pci-dma-xen.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/pci-dma-xen.c    Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,378 @@
+/*
+ * Dynamic DMA mapping support.
+ *
+ * On i386 there is no hardware dynamic DMA address translation,
+ * so consistent alloc/free are merely page allocation/freeing.
+ * The rest of the dynamic DMA mapping interface is implemented
+ * in asm/pci.h.
+ */
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <asm/io.h>
+#include <xen/balloon.h>
+#include <xen/gnttab.h>
+#include <asm/swiotlb.h>
+#include <asm/tlbflush.h>
+#include <asm-i386/mach-xen/asm/swiotlb.h>
+#include <asm/bug.h>
+
+#ifdef __x86_64__
+#include <asm/proto.h>
+
+int iommu_merge __read_mostly = 0;
+EXPORT_SYMBOL(iommu_merge);
+
+dma_addr_t bad_dma_address __read_mostly;
+EXPORT_SYMBOL(bad_dma_address);
+
+/* This tells the BIO block layer to assume merging. Default to off
+   because we cannot guarantee merging later. */
+int iommu_bio_merge __read_mostly = 0;
+EXPORT_SYMBOL(iommu_bio_merge);
+
+int force_iommu __read_mostly= 0;
+
+__init int iommu_setup(char *p)
+{
+    return 1;
+}
+
+void __init pci_iommu_alloc(void)
+{
+#ifdef CONFIG_SWIOTLB
+       pci_swiotlb_init();
+#endif
+}
+
+static int __init pci_iommu_init(void)
+{
+       no_iommu_init();
+       return 0;
+}
+
+/* Must execute after PCI subsystem */
+fs_initcall(pci_iommu_init);
+#endif
+
+struct dma_coherent_mem {
+       void            *virt_base;
+       u32             device_base;
+       int             size;
+       int             flags;
+       unsigned long   *bitmap;
+};
+
+#define IOMMU_BUG_ON(test)                             \
+do {                                                   \
+       if (unlikely(test)) {                           \
+               printk(KERN_ALERT "Fatal DMA error! "   \
+                      "Please use 'swiotlb=force'\n"); \
+               BUG();                                  \
+       }                                               \
+} while (0)
+
+int
+dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents,
+          enum dma_data_direction direction)
+{
+       int i, rc;
+
+       if (direction == DMA_NONE)
+               BUG();
+       WARN_ON(nents == 0 || sg[0].length == 0);
+
+       if (swiotlb) {
+               rc = swiotlb_map_sg(hwdev, sg, nents, direction);
+       } else {
+               for (i = 0; i < nents; i++ ) {
+                       sg[i].dma_address =
+                               gnttab_dma_map_page(sg[i].page) + sg[i].offset;
+                       sg[i].dma_length  = sg[i].length;
+                       BUG_ON(!sg[i].page);
+                       IOMMU_BUG_ON(address_needs_mapping(
+                               hwdev, sg[i].dma_address));
+               }
+               rc = nents;
+       }
+
+       flush_write_buffers();
+       return rc;
+}
+EXPORT_SYMBOL(dma_map_sg);
+
+void
+dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
+            enum dma_data_direction direction)
+{
+       int i;
+
+       BUG_ON(direction == DMA_NONE);
+       if (swiotlb)
+               swiotlb_unmap_sg(hwdev, sg, nents, direction);
+       else {
+               for (i = 0; i < nents; i++ )
+                       gnttab_dma_unmap_page(sg[i].dma_address);
+       }
+}
+EXPORT_SYMBOL(dma_unmap_sg);
+
+#ifdef CONFIG_HIGHMEM
+dma_addr_t
+dma_map_page(struct device *dev, struct page *page, unsigned long offset,
+            size_t size, enum dma_data_direction direction)
+{
+       dma_addr_t dma_addr;
+
+       BUG_ON(direction == DMA_NONE);
+
+       if (swiotlb) {
+               dma_addr = swiotlb_map_page(
+                       dev, page, offset, size, direction);
+       } else {
+               dma_addr = gnttab_dma_map_page(page) + offset;
+               IOMMU_BUG_ON(address_needs_mapping(dev, dma_addr));
+       }
+
+       return dma_addr;
+}
+EXPORT_SYMBOL(dma_map_page);
+
+void
+dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+              enum dma_data_direction direction)
+{
+       BUG_ON(direction == DMA_NONE);
+       if (swiotlb)
+               swiotlb_unmap_page(dev, dma_address, size, direction);
+       else
+               gnttab_dma_unmap_page(dma_address);
+}
+EXPORT_SYMBOL(dma_unmap_page);
+#endif /* CONFIG_HIGHMEM */
+
+int
+dma_mapping_error(dma_addr_t dma_addr)
+{
+       if (swiotlb)
+               return swiotlb_dma_mapping_error(dma_addr);
+       return 0;
+}
+EXPORT_SYMBOL(dma_mapping_error);
+
+int
+dma_supported(struct device *dev, u64 mask)
+{
+       if (swiotlb)
+               return swiotlb_dma_supported(dev, mask);
+       /*
+        * By default we'll BUG when an infeasible DMA is requested, and
+        * request swiotlb=force (see IOMMU_BUG_ON).
+        */
+       return 1;
+}
+EXPORT_SYMBOL(dma_supported);
+
+void *dma_alloc_coherent(struct device *dev, size_t size,
+                          dma_addr_t *dma_handle, gfp_t gfp)
+{
+       void *ret;
+       struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
+       unsigned int order = get_order(size);
+       unsigned long vstart;
+       u64 mask;
+
+       /* ignore region specifiers */
+       gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
+
+       if (mem) {
+               int page = bitmap_find_free_region(mem->bitmap, mem->size,
+                                                    order);
+               if (page >= 0) {
+                       *dma_handle = mem->device_base + (page << PAGE_SHIFT);
+                       ret = mem->virt_base + (page << PAGE_SHIFT);
+                       memset(ret, 0, size);
+                       return ret;
+               }
+               if (mem->flags & DMA_MEMORY_EXCLUSIVE)
+                       return NULL;
+       }
+
+       if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
+               gfp |= GFP_DMA;
+
+       vstart = __get_free_pages(gfp, order);
+       ret = (void *)vstart;
+
+       if (dev != NULL && dev->coherent_dma_mask)
+               mask = dev->coherent_dma_mask;
+       else
+               mask = 0xffffffff;
+
+       if (ret != NULL) {
+               if (xen_create_contiguous_region(vstart, order,
+                                                fls64(mask)) != 0) {
+                       free_pages(vstart, order);
+                       return NULL;
+               }
+               memset(ret, 0, size);
+               *dma_handle = virt_to_bus(ret);
+       }
+       return ret;
+}
+EXPORT_SYMBOL(dma_alloc_coherent);
+
+void dma_free_coherent(struct device *dev, size_t size,
+                        void *vaddr, dma_addr_t dma_handle)
+{

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
Prev by Date: [Xen-changelog] [linux-2.6.18-xen] Imported buildconfigs from xen-unstable.hg 15200:bd3d6b4c52ec
Next by Date: [Xen-changelog] [xen-unstable] xend: Fix xm block-detach regression introduced by c/s 15157.
Previous by thread: [Xen-changelog] [linux-2.6.18-xen] Imported buildconfigs from xen-unstable.hg 15200:bd3d6b4c52ec
Next by thread: [Xen-changelog] [xen-unstable] xend: Fix xm block-detach regression introduced by c/s 15157.
Index(es):
- Date
- Thread
Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.