[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] [merge] with xen-unstable



# HG changeset patch
# User Christian Limpach <Christian.Limpach@xxxxxxxxxxxxx>
# Date 1169218137 0
# Node ID 3c8bb086025ee18f077582a5343da631c67fbaca
# Parent  8475a4e0425ed158923d9849a8e5a6821e8bdb34
# Parent  3157835b1d45f7175aba2b4a98cac93f527d6b10
[merge] with xen-unstable

Signed-off-by: Christian Limpach <Christian.Limpach@xxxxxxxxxxxxx>
---
 tools/python/xen/xend/XendRoot.py                        |  322 -
 xen/arch/ia64/linux/cmdline.c                            |  121 
 xen/include/asm-ia64/linux-null/linux/ioport.h           |    1 
 xen/include/asm-ia64/linux-xen/asm/sn/sn_sal.h           |  994 ----
 xen/include/asm-ia64/linux/asm/machvec.h                 |  390 -
 xen/include/asm-ia64/linux/asm/pci.h                     |  161 
 Config.mk                                                |    5 
 buildconfigs/linux-defconfig_xen0_ia64                   |    3 
 buildconfigs/linux-defconfig_xen0_x86_32                 |    6 
 buildconfigs/linux-defconfig_xen0_x86_64                 |    6 
 buildconfigs/linux-defconfig_xenU_ia64                   |    3 
 buildconfigs/linux-defconfig_xenU_x86_32                 |    2 
 buildconfigs/linux-defconfig_xenU_x86_64                 |    2 
 buildconfigs/linux-defconfig_xen_ia64                    |    3 
 buildconfigs/linux-defconfig_xen_x86_32                  |    2 
 buildconfigs/linux-defconfig_xen_x86_64                  |    2 
 buildconfigs/mk.linux-2.6-xen                            |    4 
 extras/mini-os/Makefile                                  |    2 
 extras/mini-os/include/x86/x86_32/hypercall-x86_32.h     |   14 
 extras/mini-os/include/x86/x86_64/hypercall-x86_64.h     |   14 
 linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c         |    4 
 linux-2.6-xen-sparse/arch/ia64/Kconfig                   |    6 
 linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c      |    1 
 linux-2.6-xen-sparse/arch/ia64/kernel/fsys.S             |  925 ++++
 linux-2.6-xen-sparse/arch/ia64/kernel/gate.S             |   24 
 linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S         |    8 
 linux-2.6-xen-sparse/arch/ia64/kernel/patch.c            |    4 
 linux-2.6-xen-sparse/arch/ia64/kernel/setup.c            |   10 
 linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S           |  267 -
 linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c          |   88 
 linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S            |   49 
 linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S              |   54 
 linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h         |   11 
 linux-2.6-xen-sparse/arch/ia64/xen/xenpal.S              |   23 
 linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S            |    7 
 linux-2.6-xen-sparse/drivers/xen/Kconfig                 |    8 
 linux-2.6-xen-sparse/include/asm-ia64/hypercall.h        |  108 
 linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h       |    3 
 linux-2.6-xen-sparse/include/asm-ia64/maddr.h            |    1 
 linux-2.6-xen-sparse/include/asm-ia64/page.h             |    1 
 linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h       |   18 
 linux-2.6-xen-sparse/include/linux/skbuff.h              |    3 
 linux-2.6-xen-sparse/net/core/skbuff.c                   |   41 
 tools/ioemu/hw/cirrus_vga.c                              |   52 
 tools/ioemu/hw/ide.c                                     |    3 
 tools/ioemu/hw/pci.c                                     |    3 
 tools/ioemu/target-i386-dm/helper2.c                     |   15 
 tools/ioemu/vl.c                                         |   38 
 tools/libxc/Makefile                                     |    2 
 tools/libxc/ia64/xc_ia64_hvm_build.c                     |   44 
 tools/libxc/xc_domain.c                                  |   44 
 tools/libxc/xc_hvm_restore.c                             |  360 +
 tools/libxc/xc_hvm_save.c                                |  727 +++
 tools/libxc/xenctrl.h                                    |   24 
 tools/libxc/xenguest.h                                   |   20 
 tools/pygrub/src/pygrub                                  |    6 
 tools/python/setup.py                                    |   13 
 tools/python/xen/lowlevel/scf/scf.c                      |  156 
 tools/python/xen/lowlevel/xc/xc.c                        |   22 
 tools/python/xen/util/xmlrpclib2.py                      |    1 
 tools/python/xen/web/httpserver.py                       |   12 
 tools/python/xen/xend/Vifctl.py                          |    4 
 tools/python/xen/xend/XendCheckpoint.py                  |   77 
 tools/python/xen/xend/XendConfig.py                      |   45 
 tools/python/xen/xend/XendDomain.py                      |   12 
 tools/python/xen/xend/XendDomainInfo.py                  |   64 
 tools/python/xen/xend/XendNode.py                        |    4 
 tools/python/xen/xend/XendOptions.py                     |  373 +
 tools/python/xen/xend/XendProtocol.py                    |    6 
 tools/python/xen/xend/balloon.py                         |    6 
 tools/python/xen/xend/image.py                           |   28 
 tools/python/xen/xend/osdep.py                           |   10 
 tools/python/xen/xend/server/DevController.py            |    8 
 tools/python/xen/xend/server/SrvRoot.py                  |    2 
 tools/python/xen/xend/server/SrvServer.py                |   58 
 tools/python/xen/xend/server/XMLRPCServer.py             |   17 
 tools/python/xen/xend/server/netif.py                    |   13 
 tools/python/xen/xend/server/relocate.py                 |   14 
 tools/python/xen/xend/server/tests/test_controllers.py   |   10 
 tools/python/xen/xend/server/tpmif.py                    |    8 
 tools/python/xen/xend/server/vfbif.py                    |    4 
 tools/python/xen/xm/create.py                            |    6 
 tools/python/xen/xm/main.py                              |   13 
 tools/python/xen/xm/opts.py                              |    8 
 tools/python/xen/xm/tests/test_create.py                 |    6 
 tools/xcutils/xc_restore.c                               |   19 
 tools/xcutils/xc_save.c                                  |    5 
 unmodified_drivers/linux-2.6/platform-pci/platform-pci.c |    9 
 xen/arch/ia64/asm-offsets.c                              |    2 
 xen/arch/ia64/asm-xsi-offsets.c                          |    2 
 xen/arch/ia64/linux-xen/Makefile                         |    3 
 xen/arch/ia64/linux-xen/README.origin                    |    1 
 xen/arch/ia64/linux-xen/cmdline.c                        |  131 
 xen/arch/ia64/linux-xen/entry.S                          |  120 
 xen/arch/ia64/linux-xen/sn/Makefile                      |    1 
 xen/arch/ia64/linux-xen/sn/kernel/Makefile               |    5 
 xen/arch/ia64/linux-xen/sn/kernel/README.origin          |   12 
 xen/arch/ia64/linux-xen/sn/kernel/io_init.c              |  783 +++
 xen/arch/ia64/linux-xen/sn/kernel/iomv.c                 |   82 
 xen/arch/ia64/linux-xen/sn/kernel/irq.c                  |  542 ++
 xen/arch/ia64/linux-xen/sn/kernel/setup.c                |  808 +++
 xen/arch/ia64/linux-xen/sn/kernel/sn2_smp.c              |  548 ++
 xen/arch/ia64/linux/Makefile                             |    6 
 xen/arch/ia64/linux/README.origin                        |    4 
 xen/arch/ia64/linux/dig/Makefile                         |    1 
 xen/arch/ia64/linux/dig/README.origin                    |    7 
 xen/arch/ia64/linux/dig/machvec.c                        |    3 
 xen/arch/ia64/linux/hp/Makefile                          |    1 
 xen/arch/ia64/linux/hp/zx1/Makefile                      |    1 
 xen/arch/ia64/linux/hp/zx1/README.origin                 |    7 
 xen/arch/ia64/linux/hp/zx1/hpzx1_machvec.c               |    3 
 xen/arch/ia64/linux/io.c                                 |  164 
 xen/arch/ia64/linux/sn/Makefile                          |    2 
 xen/arch/ia64/linux/sn/kernel/Makefile                   |    3 
 xen/arch/ia64/linux/sn/kernel/README.origin              |    9 
 xen/arch/ia64/linux/sn/kernel/machvec.c                  |   11 
 xen/arch/ia64/linux/sn/kernel/pio_phys.S                 |   71 
 xen/arch/ia64/linux/sn/kernel/ptc_deadlock.S             |   92 
 xen/arch/ia64/linux/sn/pci/Makefile                      |    1 
 xen/arch/ia64/linux/sn/pci/pcibr/Makefile                |    1 
 xen/arch/ia64/linux/sn/pci/pcibr/README.origin           |    7 
 xen/arch/ia64/linux/sn/pci/pcibr/pcibr_reg.c             |  285 +
 xen/arch/ia64/vmx/mmio.c                                 |   10 
 xen/arch/ia64/vmx/vmx_process.c                          |   23 
 xen/arch/ia64/vmx/vmx_vcpu.c                             |   45 
 xen/arch/ia64/vmx/vmx_virt.c                             |    2 
 xen/arch/ia64/xen/dom0_ops.c                             |   11 
 xen/arch/ia64/xen/dom_fw.c                               |  103 
 xen/arch/ia64/xen/domain.c                               |  149 
 xen/arch/ia64/xen/faults.c                               |   37 
 xen/arch/ia64/xen/hypercall.c                            |  172 
 xen/arch/ia64/xen/hyperprivop.S                          |  153 
 xen/arch/ia64/xen/irq.c                                  |    7 
 xen/arch/ia64/xen/ivt.S                                  |  458 +-
 xen/arch/ia64/xen/mm.c                                   |   19 
 xen/arch/ia64/xen/tlb_track.c                            |   25 
 xen/arch/ia64/xen/vcpu.c                                 |   75 
 xen/arch/ia64/xen/vhpt.c                                 |   13 
 xen/arch/ia64/xen/xenasm.S                               |   22 
 xen/arch/ia64/xen/xencomm.c                              |    4 
 xen/arch/ia64/xen/xensetup.c                             |    6 
 xen/arch/x86/hvm/hpet.c                                  |   24 
 xen/arch/x86/hvm/hvm.c                                   |   11 
 xen/arch/x86/hvm/i8254.c                                 |  153 
 xen/arch/x86/hvm/intercept.c                             |  303 +
 xen/arch/x86/hvm/rtc.c                                   |    2 
 xen/arch/x86/hvm/vioapic.c                               |  132 
 xen/arch/x86/hvm/vlapic.c                                |   74 
 xen/arch/x86/hvm/vmx/vmx.c                               |  296 +
 xen/arch/x86/hvm/vpic.c                                  |   83 
 xen/arch/x86/hvm/vpt.c                                   |   10 
 xen/arch/x86/mm/shadow/common.c                          |    7 
 xen/arch/x86/mm/shadow/multi.c                           |    2 
 xen/common/domain.c                                      |    1 
 xen/common/domctl.c                                      |   73 
 xen/include/asm-ia64/config.h                            |   29 
 xen/include/asm-ia64/domain.h                            |    4 
 xen/include/asm-ia64/hypercall.h                         |    2 
 xen/include/asm-ia64/linux-null/asm/nmi.h                |    1 
 xen/include/asm-ia64/linux-null/linux/dmapool.h          |    1 
 xen/include/asm-ia64/linux-null/linux/rwsem.h            |    1 
 xen/include/asm-ia64/linux-xen/asm/README.origin         |    7 
 xen/include/asm-ia64/linux-xen/asm/machvec.h             |  498 ++
 xen/include/asm-ia64/linux-xen/asm/machvec_dig.h         |   46 
 xen/include/asm-ia64/linux-xen/asm/machvec_hpzx1.h       |   66 
 xen/include/asm-ia64/linux-xen/asm/machvec_sn2.h         |  166 
 xen/include/asm-ia64/linux-xen/asm/page.h                |   10 
 xen/include/asm-ia64/linux-xen/asm/pci.h                 |  185 
 xen/include/asm-ia64/linux-xen/asm/sn/README.origin      |   16 
 xen/include/asm-ia64/linux-xen/asm/sn/addrs.h            |  299 +
 xen/include/asm-ia64/linux-xen/asm/sn/arch.h             |   92 
 xen/include/asm-ia64/linux-xen/asm/sn/hubdev.h           |   95 
 xen/include/asm-ia64/linux-xen/asm/sn/intr.h             |   73 
 xen/include/asm-ia64/linux-xen/asm/sn/io.h               |  281 +
 xen/include/asm-ia64/linux-xen/asm/sn/nodepda.h          |   87 
 xen/include/asm-ia64/linux-xen/asm/sn/pcibr_provider.h   |  153 
 xen/include/asm-ia64/linux-xen/asm/sn/rw_mmr.h           |   32 
 xen/include/asm-ia64/linux-xen/asm/sn/types.h            |   28 
 xen/include/asm-ia64/linux-xen/asm/system.h              |    1 
 xen/include/asm-ia64/linux-xen/asm/types.h               |    8 
 xen/include/asm-ia64/linux-xen/linux/README.origin       |    5 
 xen/include/asm-ia64/linux-xen/linux/device.h            |  489 ++
 xen/include/asm-ia64/linux-xen/linux/kobject.h           |  286 +
 xen/include/asm-ia64/linux-xen/linux/pci.h               |  820 +++
 xen/include/asm-ia64/linux/README.origin                 |   12 
 xen/include/asm-ia64/linux/asm/README.origin             |    4 
 xen/include/asm-ia64/linux/asm/machvec_init.h            |   32 
 xen/include/asm-ia64/linux/asm/sn/README.origin          |   24 
 xen/include/asm-ia64/linux/asm/sn/geo.h                  |  132 
 xen/include/asm-ia64/linux/asm/sn/klconfig.h             |  246 +
 xen/include/asm-ia64/linux/asm/sn/l1.h                   |   51 
 xen/include/asm-ia64/linux/asm/sn/leds.h                 |   33 
 xen/include/asm-ia64/linux/asm/sn/module.h               |  127 
 xen/include/asm-ia64/linux/asm/sn/pcibus_provider_defs.h |   68 
 xen/include/asm-ia64/linux/asm/sn/pcidev.h               |   83 
 xen/include/asm-ia64/linux/asm/sn/pda.h                  |   69 
 xen/include/asm-ia64/linux/asm/sn/pic.h                  |  261 +
 xen/include/asm-ia64/linux/asm/sn/shub_mmr.h             |  502 ++
 xen/include/asm-ia64/linux/asm/sn/shubio.h               | 3358 +++++++++++++++
 xen/include/asm-ia64/linux/asm/sn/simulator.h            |   20 
 xen/include/asm-ia64/linux/asm/sn/sn_cpuid.h             |  132 
 xen/include/asm-ia64/linux/asm/sn/sn_feature_sets.h      |   51 
 xen/include/asm-ia64/linux/asm/sn/sn_sal.h               | 1157 +++++
 xen/include/asm-ia64/linux/asm/sn/tiocp.h                |  257 +
 xen/include/asm-ia64/linux/asm/sn/xbow.h                 |  301 +
 xen/include/asm-ia64/linux/asm/sn/xwidgetdev.h           |   70 
 xen/include/asm-ia64/linux/completion.h                  |   57 
 xen/include/asm-ia64/linux/ioport.h                      |  136 
 xen/include/asm-ia64/linux/klist.h                       |   61 
 xen/include/asm-ia64/linux/kref.h                        |   32 
 xen/include/asm-ia64/linux/mod_devicetable.h             |  323 +
 xen/include/asm-ia64/linux/pci_ids.h                     | 2356 ++++++++++
 xen/include/asm-ia64/linux/pci_regs.h                    |  488 ++
 xen/include/asm-ia64/linux/pm.h                          |  279 +
 xen/include/asm-ia64/linux/sysfs.h                       |  206 
 xen/include/asm-ia64/multicall.h                         |   12 
 xen/include/asm-ia64/tlbflush.h                          |    2 
 xen/include/asm-ia64/vcpu.h                              |    1 
 xen/include/asm-ia64/vmx_vcpu.h                          |    2 
 xen/include/asm-ia64/xensystem.h                         |    1 
 xen/include/asm-ia64/xentypes.h                          |   19 
 xen/include/asm-x86/hvm/domain.h                         |   17 
 xen/include/asm-x86/hvm/hvm.h                            |   38 
 xen/include/asm-x86/hvm/support.h                        |  127 
 xen/include/asm-x86/hvm/vpt.h                            |    2 
 xen/include/public/arch-ia64.h                           |   64 
 xen/include/public/arch-x86/xen.h                        |   64 
 xen/include/public/domctl.h                              |   16 
 xen/include/xlat.lst                                     |    2 
 229 files changed, 23967 insertions(+), 3425 deletions(-)

diff -r 8475a4e0425e -r 3c8bb086025e Config.mk
--- a/Config.mk Thu Jan 18 15:18:07 2007 +0000
+++ b/Config.mk Fri Jan 19 14:48:57 2007 +0000
@@ -6,8 +6,11 @@ XEN_COMPILE_ARCH    ?= $(shell uname -m 
 XEN_COMPILE_ARCH    ?= $(shell uname -m | sed -e s/i.86/x86_32/ \
                          -e s/ppc/powerpc/ -e s/i86pc/x86_32/)
 XEN_TARGET_ARCH     ?= $(XEN_COMPILE_ARCH)
-XEN_TARGET_X86_PAE  ?= n
 XEN_OS              ?= $(shell uname -s)
+
+ifeq ($(XEN_TARGET_ARCH),x86_32)
+XEN_TARGET_X86_PAE  ?= y
+endif
 
 CONFIG_$(XEN_OS) := y
 
diff -r 8475a4e0425e -r 3c8bb086025e buildconfigs/linux-defconfig_xen0_ia64
--- a/buildconfigs/linux-defconfig_xen0_ia64    Thu Jan 18 15:18:07 2007 +0000
+++ b/buildconfigs/linux-defconfig_xen0_ia64    Fri Jan 19 14:48:57 2007 +0000
@@ -1512,10 +1512,7 @@ CONFIG_CRYPTO_DES=y
 # Hardware crypto devices
 #
 # CONFIG_XEN_UTIL is not set
-CONFIG_HAVE_ARCH_ALLOC_SKB=y
-CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
 CONFIG_XEN_BALLOON=y
-CONFIG_XEN_SKBUFF=y
 # CONFIG_XEN_DEVMEM is not set
 CONFIG_XEN_REBOOT=y
 # CONFIG_XEN_SMPBOOT is not set
diff -r 8475a4e0425e -r 3c8bb086025e buildconfigs/linux-defconfig_xen0_x86_32
--- a/buildconfigs/linux-defconfig_xen0_x86_32  Thu Jan 18 15:18:07 2007 +0000
+++ b/buildconfigs/linux-defconfig_xen0_x86_32  Fri Jan 19 14:48:57 2007 +0000
@@ -569,7 +569,7 @@ CONFIG_MEGARAID_NEWGEN=y
 # CONFIG_MEGARAID_LEGACY is not set
 # CONFIG_MEGARAID_SAS is not set
 CONFIG_SCSI_SATA=y
-# CONFIG_SCSI_SATA_AHCI is not set
+CONFIG_SCSI_SATA_AHCI=y
 # CONFIG_SCSI_SATA_SVW is not set
 CONFIG_SCSI_ATA_PIIX=y
 # CONFIG_SCSI_SATA_MV is not set
@@ -734,7 +734,7 @@ CONFIG_SK98LIN=y
 CONFIG_SK98LIN=y
 # CONFIG_VIA_VELOCITY is not set
 CONFIG_TIGON3=y
-# CONFIG_BNX2 is not set
+CONFIG_BNX2=y
 
 #
 # Ethernet (10000 Mbit)
@@ -1413,8 +1413,6 @@ CONFIG_XEN_COMPAT_030002_AND_LATER=y
 CONFIG_XEN_COMPAT_030002_AND_LATER=y
 # CONFIG_XEN_COMPAT_LATEST_ONLY is not set
 CONFIG_XEN_COMPAT_030002=y
-CONFIG_HAVE_ARCH_ALLOC_SKB=y
-CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
 CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y
 CONFIG_NO_IDLE_HZ=y
 CONFIG_XEN_UTIL=y
diff -r 8475a4e0425e -r 3c8bb086025e buildconfigs/linux-defconfig_xen0_x86_64
--- a/buildconfigs/linux-defconfig_xen0_x86_64  Thu Jan 18 15:18:07 2007 +0000
+++ b/buildconfigs/linux-defconfig_xen0_x86_64  Fri Jan 19 14:48:57 2007 +0000
@@ -517,7 +517,7 @@ CONFIG_MEGARAID_NEWGEN=y
 # CONFIG_MEGARAID_LEGACY is not set
 # CONFIG_MEGARAID_SAS is not set
 CONFIG_SCSI_SATA=y
-# CONFIG_SCSI_SATA_AHCI is not set
+CONFIG_SCSI_SATA_AHCI=y
 # CONFIG_SCSI_SATA_SVW is not set
 CONFIG_SCSI_ATA_PIIX=y
 # CONFIG_SCSI_SATA_MV is not set
@@ -683,7 +683,7 @@ CONFIG_SK98LIN=y
 CONFIG_SK98LIN=y
 # CONFIG_VIA_VELOCITY is not set
 CONFIG_TIGON3=y
-# CONFIG_BNX2 is not set
+CONFIG_BNX2=y
 
 #
 # Ethernet (10000 Mbit)
@@ -1363,8 +1363,6 @@ CONFIG_XEN_COMPAT_030002_AND_LATER=y
 CONFIG_XEN_COMPAT_030002_AND_LATER=y
 # CONFIG_XEN_COMPAT_LATEST_ONLY is not set
 CONFIG_XEN_COMPAT_030002=y
-CONFIG_HAVE_ARCH_ALLOC_SKB=y
-CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
 CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y
 CONFIG_NO_IDLE_HZ=y
 CONFIG_XEN_UTIL=y
diff -r 8475a4e0425e -r 3c8bb086025e buildconfigs/linux-defconfig_xenU_ia64
--- a/buildconfigs/linux-defconfig_xenU_ia64    Thu Jan 18 15:18:07 2007 +0000
+++ b/buildconfigs/linux-defconfig_xenU_ia64    Fri Jan 19 14:48:57 2007 +0000
@@ -1386,10 +1386,7 @@ CONFIG_CRYPTO_DES=y
 # Hardware crypto devices
 #
 # CONFIG_XEN_UTIL is not set
-CONFIG_HAVE_ARCH_ALLOC_SKB=y
-CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
 CONFIG_XEN_BALLOON=y
-CONFIG_XEN_SKBUFF=y
 # CONFIG_XEN_DEVMEM is not set
 CONFIG_XEN_REBOOT=y
 # CONFIG_XEN_SMPBOOT is not set
diff -r 8475a4e0425e -r 3c8bb086025e buildconfigs/linux-defconfig_xenU_x86_32
--- a/buildconfigs/linux-defconfig_xenU_x86_32  Thu Jan 18 15:18:07 2007 +0000
+++ b/buildconfigs/linux-defconfig_xenU_x86_32  Fri Jan 19 14:48:57 2007 +0000
@@ -922,8 +922,6 @@ CONFIG_XEN_COMPAT_030002_AND_LATER=y
 CONFIG_XEN_COMPAT_030002_AND_LATER=y
 # CONFIG_XEN_COMPAT_LATEST_ONLY is not set
 CONFIG_XEN_COMPAT_030002=y
-CONFIG_HAVE_ARCH_ALLOC_SKB=y
-CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
 CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y
 CONFIG_NO_IDLE_HZ=y
 CONFIG_XEN_UTIL=y
diff -r 8475a4e0425e -r 3c8bb086025e buildconfigs/linux-defconfig_xenU_x86_64
--- a/buildconfigs/linux-defconfig_xenU_x86_64  Thu Jan 18 15:18:07 2007 +0000
+++ b/buildconfigs/linux-defconfig_xenU_x86_64  Fri Jan 19 14:48:57 2007 +0000
@@ -1218,8 +1218,6 @@ CONFIG_XEN_COMPAT_030002_AND_LATER=y
 CONFIG_XEN_COMPAT_030002_AND_LATER=y
 # CONFIG_XEN_COMPAT_LATEST_ONLY is not set
 CONFIG_XEN_COMPAT_030002=y
-CONFIG_HAVE_ARCH_ALLOC_SKB=y
-CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
 CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y
 CONFIG_NO_IDLE_HZ=y
 CONFIG_XEN_UTIL=y
diff -r 8475a4e0425e -r 3c8bb086025e buildconfigs/linux-defconfig_xen_ia64
--- a/buildconfigs/linux-defconfig_xen_ia64     Thu Jan 18 15:18:07 2007 +0000
+++ b/buildconfigs/linux-defconfig_xen_ia64     Fri Jan 19 14:48:57 2007 +0000
@@ -1518,10 +1518,7 @@ CONFIG_CRYPTO_DES=y
 # Hardware crypto devices
 #
 # CONFIG_XEN_UTIL is not set
-CONFIG_HAVE_ARCH_ALLOC_SKB=y
-CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
 CONFIG_XEN_BALLOON=y
-CONFIG_XEN_SKBUFF=y
 # CONFIG_XEN_DEVMEM is not set
 CONFIG_XEN_REBOOT=y
 # CONFIG_XEN_SMPBOOT is not set
diff -r 8475a4e0425e -r 3c8bb086025e buildconfigs/linux-defconfig_xen_x86_32
--- a/buildconfigs/linux-defconfig_xen_x86_32   Thu Jan 18 15:18:07 2007 +0000
+++ b/buildconfigs/linux-defconfig_xen_x86_32   Fri Jan 19 14:48:57 2007 +0000
@@ -3272,8 +3272,6 @@ CONFIG_XEN_COMPAT_030002_AND_LATER=y
 CONFIG_XEN_COMPAT_030002_AND_LATER=y
 # CONFIG_XEN_COMPAT_LATEST_ONLY is not set
 CONFIG_XEN_COMPAT_030002=y
-CONFIG_HAVE_ARCH_ALLOC_SKB=y
-CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
 CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y
 CONFIG_NO_IDLE_HZ=y
 CONFIG_XEN_UTIL=y
diff -r 8475a4e0425e -r 3c8bb086025e buildconfigs/linux-defconfig_xen_x86_64
--- a/buildconfigs/linux-defconfig_xen_x86_64   Thu Jan 18 15:18:07 2007 +0000
+++ b/buildconfigs/linux-defconfig_xen_x86_64   Fri Jan 19 14:48:57 2007 +0000
@@ -3103,8 +3103,6 @@ CONFIG_XEN_COMPAT_030002_AND_LATER=y
 CONFIG_XEN_COMPAT_030002_AND_LATER=y
 # CONFIG_XEN_COMPAT_LATEST_ONLY is not set
 CONFIG_XEN_COMPAT_030002=y
-CONFIG_HAVE_ARCH_ALLOC_SKB=y
-CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
 CONFIG_HAVE_IRQ_IGNORE_UNHANDLED=y
 CONFIG_NO_IDLE_HZ=y
 CONFIG_XEN_UTIL=y
diff -r 8475a4e0425e -r 3c8bb086025e buildconfigs/mk.linux-2.6-xen
--- a/buildconfigs/mk.linux-2.6-xen     Thu Jan 18 15:18:07 2007 +0000
+++ b/buildconfigs/mk.linux-2.6-xen     Fri Jan 19 14:48:57 2007 +0000
@@ -8,6 +8,10 @@ LINUX_DIR    = build-linux-$(LINUX_VER)-
 
 IMAGE_TARGET ?= vmlinuz
 INSTALL_BOOT_PATH ?= $(DESTDIR)
+
+ifeq ($(XEN_TARGET_ARCH),ia64)
+INSTALL_BOOT_PATH := $(DESTDIR)/boot
+endif
 
 LINUX_VER3  := $(LINUX_SERIES).$(word 3, $(subst ., ,$(LINUX_VER)))
 
diff -r 8475a4e0425e -r 3c8bb086025e extras/mini-os/Makefile
--- a/extras/mini-os/Makefile   Thu Jan 18 15:18:07 2007 +0000
+++ b/extras/mini-os/Makefile   Fri Jan 19 14:48:57 2007 +0000
@@ -9,7 +9,7 @@ XEN_ROOT = ../..
 XEN_ROOT = ../..
 include $(XEN_ROOT)/Config.mk
 
-XEN_INTERFACE_VERSION := 0x00030204
+XEN_INTERFACE_VERSION := 0x00030205
 export XEN_INTERFACE_VERSION
 
 # Set TARGET_ARCH
diff -r 8475a4e0425e -r 3c8bb086025e 
extras/mini-os/include/x86/x86_32/hypercall-x86_32.h
--- a/extras/mini-os/include/x86/x86_32/hypercall-x86_32.h      Thu Jan 18 
15:18:07 2007 +0000
+++ b/extras/mini-os/include/x86/x86_32/hypercall-x86_32.h      Fri Jan 19 
14:48:57 2007 +0000
@@ -303,6 +303,20 @@ HYPERVISOR_nmi_op(
        unsigned long arg)
 {
        return _hypercall2(int, nmi_op, op, arg);
+}
+
+static inline int
+HYPERVISOR_sysctl(
+       unsigned long op)
+{
+       return _hypercall1(int, sysctl, op);
+}
+
+static inline int
+HYPERVISOR_domctl(
+       unsigned long op)
+{
+       return _hypercall1(int, domctl, op);
 }
 
 #endif /* __HYPERCALL_X86_32_H__ */
diff -r 8475a4e0425e -r 3c8bb086025e 
extras/mini-os/include/x86/x86_64/hypercall-x86_64.h
--- a/extras/mini-os/include/x86/x86_64/hypercall-x86_64.h      Thu Jan 18 
15:18:07 2007 +0000
+++ b/extras/mini-os/include/x86/x86_64/hypercall-x86_64.h      Fri Jan 19 
14:48:57 2007 +0000
@@ -303,6 +303,20 @@ HYPERVISOR_nmi_op(
        unsigned long arg)
 {
        return _hypercall2(int, nmi_op, op, arg);
+}
+
+static inline int
+HYPERVISOR_sysctl(
+       unsigned long op)
+{
+       return _hypercall1(int, sysctl, op);
+}
+
+static inline int
+HYPERVISOR_domctl(
+       unsigned long op)
+{
+       return _hypercall1(int, domctl, op);
 }
 
 #endif /* __HYPERCALL_X86_64_H__ */
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c  Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c  Fri Jan 19 14:48:57 
2007 +0000
@@ -76,7 +76,9 @@ static void *syscall_page;
 
 int __init sysenter_setup(void)
 {
-       syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
+       void *page = (void *)get_zeroed_page(GFP_ATOMIC);
+
+       syscall_page = page;
 
 #ifdef CONFIG_XEN
        if (boot_cpu_has(X86_FEATURE_SEP)) {
diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/arch/ia64/Kconfig
--- a/linux-2.6-xen-sparse/arch/ia64/Kconfig    Thu Jan 18 15:18:07 2007 +0000
+++ b/linux-2.6-xen-sparse/arch/ia64/Kconfig    Fri Jan 19 14:48:57 2007 +0000
@@ -579,12 +579,6 @@ config XEN_UTIL
 config XEN_UTIL
        default n
 
-config HAVE_ARCH_ALLOC_SKB
-       default y
-
-config HAVE_ARCH_DEV_ALLOC_SKB
-       default y
-
 config XEN_BALLOON
        default y
 
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c       Thu Jan 18 
15:18:07 2007 +0000
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c       Fri Jan 19 
14:48:57 2007 +0000
@@ -287,7 +287,6 @@ void foo(void)
        DEFINE_MAPPED_REG_OFS(XSI_IHA_OFS, iha);
        DEFINE_MAPPED_REG_OFS(XSI_ITIR_OFS, itir);
        DEFINE_MAPPED_REG_OFS(XSI_PSR_IC_OFS, interrupt_collection_enabled);
-       DEFINE_MAPPED_REG_OFS(XSI_INCOMPL_REGFR_OFS, incomplete_regframe);
        DEFINE_MAPPED_REG_OFS(XSI_BANKNUM_OFS, banknum);
        DEFINE_MAPPED_REG_OFS(XSI_BANK0_R16_OFS, bank0_regs[0]);
        DEFINE_MAPPED_REG_OFS(XSI_BANK1_R16_OFS, bank1_regs[0]);
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/arch/ia64/kernel/fsys.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/fsys.S      Fri Jan 19 14:48:57 
2007 +0000
@@ -0,0 +1,925 @@
+/*
+ * This file contains the light-weight system call handlers 
(fsyscall-handlers).
+ *
+ * Copyright (C) 2003 Hewlett-Packard Co
+ *     David Mosberger-Tang <davidm@xxxxxxxxxx>
+ *
+ * 25-Sep-03 davidm    Implement fsys_rt_sigprocmask().
+ * 18-Feb-03 louisk    Implement fsys_gettimeofday().
+ * 28-Feb-03 davidm    Fixed several bugs in fsys_gettimeofday().  Tuned it 
some more,
+ *                     probably broke it along the way... ;-)
+ * 13-Jul-04 clameter   Implement fsys_clock_gettime and revise 
fsys_gettimeofday to make
+ *                      it capable of using memory based clocks without 
falling back to C code.
+ */
+
+#include <asm/asmmacro.h>
+#include <asm/errno.h>
+#include <asm/asm-offsets.h>
+#include <asm/percpu.h>
+#include <asm/thread_info.h>
+#include <asm/sal.h>
+#include <asm/signal.h>
+#include <asm/system.h>
+#include <asm/unistd.h>
+
+#include "entry.h"
+
+/*
+ * See Documentation/ia64/fsys.txt for details on fsyscalls.
+ *
+ * On entry to an fsyscall handler:
+ *   r10       = 0 (i.e., defaults to "successful syscall return")
+ *   r11       = saved ar.pfs (a user-level value)
+ *   r15       = system call number
+ *   r16       = "current" task pointer (in normal kernel-mode, this is in r13)
+ *   r32-r39   = system call arguments
+ *   b6                = return address (a user-level value)
+ *   ar.pfs    = previous frame-state (a user-level value)
+ *   PSR.be    = cleared to zero (i.e., little-endian byte order is in effect)
+ *   all other registers may contain values passed in from user-mode
+ *
+ * On return from an fsyscall handler:
+ *   r11       = saved ar.pfs (as passed into the fsyscall handler)
+ *   r15       = system call number (as passed into the fsyscall handler)
+ *   r32-r39   = system call arguments (as passed into the fsyscall handler)
+ *   b6                = return address (as passed into the fsyscall handler)
+ *   ar.pfs    = previous frame-state (as passed into the fsyscall handler)
+ */
+
+ENTRY(fsys_ni_syscall)
+       .prologue
+       .altrp b6
+       .body
+       mov r8=ENOSYS
+       mov r10=-1
+       FSYS_RETURN
+END(fsys_ni_syscall)
+
+ENTRY(fsys_getpid)
+       .prologue
+       .altrp b6
+       .body
+       add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+       ;;
+       ld4 r9=[r9]
+       add r8=IA64_TASK_TGID_OFFSET,r16
+       ;;
+       and r9=TIF_ALLWORK_MASK,r9
+       ld4 r8=[r8]                             // r8 = current->tgid
+       ;;
+       cmp.ne p8,p0=0,r9
+(p8)   br.spnt.many fsys_fallback_syscall
+       FSYS_RETURN
+END(fsys_getpid)
+
+ENTRY(fsys_getppid)
+       .prologue
+       .altrp b6
+       .body
+       add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
+       ;;
+       ld8 r17=[r17]                           // r17 = current->group_leader
+       add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+       ;;
+
+       ld4 r9=[r9]
+       add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = 
&current->group_leader->real_parent
+       ;;
+       and r9=TIF_ALLWORK_MASK,r9
+
+1:     ld8 r18=[r17]                           // r18 = 
current->group_leader->real_parent
+       ;;
+       cmp.ne p8,p0=0,r9
+       add r8=IA64_TASK_TGID_OFFSET,r18        // r8 = 
&current->group_leader->real_parent->tgid
+       ;;
+
+       /*
+        * The .acq is needed to ensure that the read of tgid has returned its 
data before
+        * we re-check "real_parent".
+        */
+       ld4.acq r8=[r8]                         // r8 = 
current->group_leader->real_parent->tgid
+#ifdef CONFIG_SMP
+       /*
+        * Re-read current->group_leader->real_parent.
+        */
+       ld8 r19=[r17]                           // r19 = 
current->group_leader->real_parent
+(p8)   br.spnt.many fsys_fallback_syscall
+       ;;
+       cmp.ne p6,p0=r18,r19                    // did real_parent change?
+       mov r19=0                       // i must not leak kernel bits...
+(p6)   br.cond.spnt.few 1b                     // yes -> redo the read of tgid 
and the check
+       ;;
+       mov r17=0                       // i must not leak kernel bits...
+       mov r18=0                       // i must not leak kernel bits...
+#else
+       mov r17=0                       // i must not leak kernel bits...
+       mov r18=0                       // i must not leak kernel bits...
+       mov r19=0                       // i must not leak kernel bits...
+#endif
+       FSYS_RETURN
+END(fsys_getppid)
+
+ENTRY(fsys_set_tid_address)
+       .prologue
+       .altrp b6
+       .body
+       add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+       ;;
+       ld4 r9=[r9]
+       tnat.z p6,p7=r32                // check argument register for being NaT
+       ;;
+       and r9=TIF_ALLWORK_MASK,r9
+       add r8=IA64_TASK_PID_OFFSET,r16
+       add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
+       ;;
+       ld4 r8=[r8]
+       cmp.ne p8,p0=0,r9
+       mov r17=-1
+       ;;
+(p6)   st8 [r18]=r32
+(p7)   st8 [r18]=r17
+(p8)   br.spnt.many fsys_fallback_syscall
+       ;;
+       mov r17=0                       // i must not leak kernel bits...
+       mov r18=0                       // i must not leak kernel bits...
+       FSYS_RETURN
+END(fsys_set_tid_address)
+
+/*
+ * Ensure that the time interpolator structure is compatible with the asm code
+ */
+#if IA64_TIME_INTERPOLATOR_SOURCE_OFFSET !=0 || 
IA64_TIME_INTERPOLATOR_SHIFT_OFFSET != 2 \
+       || IA64_TIME_INTERPOLATOR_JITTER_OFFSET != 3 || 
IA64_TIME_INTERPOLATOR_NSEC_OFFSET != 4
+#error fsys_gettimeofday incompatible with changes to struct time_interpolator
+#endif
+#define CLOCK_REALTIME 0
+#define CLOCK_MONOTONIC 1
+#define CLOCK_DIVIDE_BY_1000 0x4000
+#define CLOCK_ADD_MONOTONIC 0x8000
+
+ENTRY(fsys_gettimeofday)
+       .prologue
+       .altrp b6
+       .body
+       mov r31 = r32
+       tnat.nz p6,p0 = r33             // guard against NaT argument
+(p6)    br.cond.spnt.few .fail_einval
+       mov r30 = CLOCK_DIVIDE_BY_1000
+       ;;
+.gettime:
+       // Register map
+       // Incoming r31 = pointer to address where to place result
+       //          r30 = flags determining how time is processed
+       // r2,r3 = temp r4-r7 preserved
+       // r8 = result nanoseconds
+       // r9 = result seconds
+       // r10 = temporary storage for clock difference
+       // r11 = preserved: saved ar.pfs
+       // r12 = preserved: memory stack
+       // r13 = preserved: thread pointer
+       // r14 = address of mask / mask
+       // r15 = preserved: system call number
+       // r16 = preserved: current task pointer
+       // r17 = wall to monotonic use
+       // r18 = time_interpolator->offset
+       // r19 = address of wall_to_monotonic
+       // r20 = pointer to struct time_interpolator / pointer to 
time_interpolator->address
+       // r21 = shift factor
+       // r22 = address of time interpolator->last_counter
+       // r23 = address of time_interpolator->last_cycle
+       // r24 = adress of time_interpolator->offset
+       // r25 = last_cycle value
+       // r26 = last_counter value
+       // r27 = pointer to xtime
+       // r28 = sequence number at the beginning of critcal section
+       // r29 = address of seqlock
+       // r30 = time processing flags / memory address
+       // r31 = pointer to result
+       // Predicates
+       // p6,p7 short term use
+       // p8 = timesource ar.itc
+       // p9 = timesource mmio64
+       // p10 = timesource mmio32
+       // p11 = timesource not to be handled by asm code
+       // p12 = memory time source ( = p9 | p10)
+       // p13 = do cmpxchg with time_interpolator_last_cycle
+       // p14 = Divide by 1000
+       // p15 = Add monotonic
+       //
+       // Note that instructions are optimized for McKinley. McKinley can 
process two
+       // bundles simultaneously and therefore we continuously try to feed the 
CPU
+       // two bundles and then a stop.
+       tnat.nz p6,p0 = r31     // branch deferred since it does not fit into 
bundle structure
+       mov pr = r30,0xc000     // Set predicates according to function
+       add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
+       movl r20 = time_interpolator
+       ;;
+       ld8 r20 = [r20]         // get pointer to time_interpolator structure
+       movl r29 = xtime_lock
+       ld4 r2 = [r2]           // process work pending flags
+       movl r27 = xtime
+       ;;      // only one bundle here
+       ld8 r21 = [r20]         // first quad with control information
+       and r2 = TIF_ALLWORK_MASK,r2
+(p6)    br.cond.spnt.few .fail_einval  // deferred branch
+       ;;
+       add r10 = IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET,r20
+       extr r3 = r21,32,32     // time_interpolator->nsec_per_cyc
+       extr r8 = r21,0,16      // time_interpolator->source
+       cmp.ne p6, p0 = 0, r2   // Fallback if work is scheduled
+(p6)    br.cond.spnt.many fsys_fallback_syscall
+       ;;
+       cmp.eq p8,p12 = 0,r8    // Check for cpu timer
+       cmp.eq p9,p0 = 1,r8     // MMIO64 ?
+       extr r2 = r21,24,8      // time_interpolator->jitter
+       cmp.eq p10,p0 = 2,r8    // MMIO32 ?
+       cmp.ltu p11,p0 = 2,r8   // function or other clock
+(p11)  br.cond.spnt.many fsys_fallback_syscall
+       ;;
+       setf.sig f7 = r3        // Setup for scaling of counter
+(p15)  movl r19 = wall_to_monotonic
+(p12)  ld8 r30 = [r10]
+       cmp.ne p13,p0 = r2,r0   // need jitter compensation?
+       extr r21 = r21,16,8     // shift factor
+       ;;
+.time_redo:
+       .pred.rel.mutex p8,p9,p10
+       ld4.acq r28 = [r29]     // xtime_lock.sequence. Must come first for 
locking purposes
+(p8)   mov r2 = ar.itc         // CPU_TIMER. 36 clocks latency!!!
+       add r22 = IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET,r20
+(p9)   ld8 r2 = [r30]          // readq(ti->address). Could also have latency 
issues..
+(p10)  ld4 r2 = [r30]          // readw(ti->address)
+(p13)  add r23 = IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET,r20
+       ;;                      // could be removed by moving the last add 
upward
+       ld8 r26 = [r22]         // time_interpolator->last_counter
+(p13)  ld8 r25 = [r23]         // time interpolator->last_cycle
+       add r24 = IA64_TIME_INTERPOLATOR_OFFSET_OFFSET,r20
+(p15)  ld8 r17 = [r19],IA64_TIMESPEC_TV_NSEC_OFFSET
+       ld8 r9 = [r27],IA64_TIMESPEC_TV_NSEC_OFFSET
+       add r14 = IA64_TIME_INTERPOLATOR_MASK_OFFSET, r20
+       ;;
+       ld8 r18 = [r24]         // time_interpolator->offset
+       ld8 r8 = [r27],-IA64_TIMESPEC_TV_NSEC_OFFSET    // xtime.tv_nsec
+(p13)  sub r3 = r25,r2 // Diff needed before comparison (thanks davidm)
+       ;;
+       ld8 r14 = [r14]         // time_interpolator->mask
+(p13)  cmp.gt.unc p6,p7 = r3,r0        // check if it is less than last. p6,p7 
cleared
+       sub r10 = r2,r26        // current_counter - last_counter
+       ;;
+(p6)   sub r10 = r25,r26       // time we got was less than last_cycle
+(p7)   mov ar.ccv = r25        // more than last_cycle. Prep for cmpxchg
+       ;;
+       and r10 = r10,r14       // Apply mask
+       ;;
+       setf.sig f8 = r10
+       nop.i 123
+       ;;
+(p7)   cmpxchg8.rel r3 = [r23],r2,ar.ccv
+EX(.fail_efault, probe.w.fault r31, 3) // This takes 5 cycles and we have 
spare time
+       xmpy.l f8 = f8,f7       // nsec_per_cyc*(counter-last_counter)
+(p15)  add r9 = r9,r17         // Add wall to monotonic.secs to result secs
+       ;;
+(p15)  ld8 r17 = [r19],-IA64_TIMESPEC_TV_NSEC_OFFSET
+(p7)   cmp.ne p7,p0 = r25,r3   // if cmpxchg not successful redo
+       // simulate tbit.nz.or p7,p0 = r28,0
+       and r28 = ~1,r28        // Make sequence even to force retry if odd
+       getf.sig r2 = f8
+       mf
+       add r8 = r8,r18         // Add time interpolator offset
+       ;;
+       ld4 r10 = [r29]         // xtime_lock.sequence
+(p15)  add r8 = r8, r17        // Add monotonic.nsecs to nsecs
+       shr.u r2 = r2,r21
+       ;;              // overloaded 3 bundles!
+       // End critical section.
+       add r8 = r8,r2          // Add xtime.nsecs
+       cmp4.ne.or p7,p0 = r28,r10
+(p7)   br.cond.dpnt.few .time_redo     // sequence number changed ?
+       // Now r8=tv->tv_nsec and r9=tv->tv_sec
+       mov r10 = r0
+       movl r2 = 1000000000
+       add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31
+(p14)  movl r3 = 2361183241434822607   // Prep for / 1000 hack
+       ;;
+.time_normalize:
+       mov r21 = r8
+       cmp.ge p6,p0 = r8,r2
+(p14)  shr.u r20 = r8, 3               // We can repeat this if necessary just 
wasting some time
+       ;;
+(p14)  setf.sig f8 = r20
+(p6)   sub r8 = r8,r2
+(p6)   add r9 = 1,r9                   // two nops before the branch.
+(p14)  setf.sig f7 = r3                // Chances for repeats are 1 in 10000 
for gettod
+(p6)   br.cond.dpnt.few .time_normalize
+       ;;
+       // Divided by 8 though shift. Now divide by 125
+       // The compiler was able to do that with a multiply
+       // and a shift and we do the same
+EX(.fail_efault, probe.w.fault r23, 3)         // This also costs 5 cycles
+(p14)  xmpy.hu f8 = f8, f7                     // xmpy has 5 cycles latency so 
use it...
+       ;;
+       mov r8 = r0
+(p14)  getf.sig r2 = f8
+       ;;
+(p14)  shr.u r21 = r2, 4
+       ;;
+EX(.fail_efault, st8 [r31] = r9)
+EX(.fail_efault, st8 [r23] = r21)
+       FSYS_RETURN
+.fail_einval:
+       mov r8 = EINVAL
+       mov r10 = -1
+       FSYS_RETURN
+.fail_efault:
+       mov r8 = EFAULT
+       mov r10 = -1
+       FSYS_RETURN
+END(fsys_gettimeofday)
+
+ENTRY(fsys_clock_gettime)
+       .prologue
+       .altrp b6
+       .body
+       cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32
+       // Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC
+(p6)   br.spnt.few fsys_fallback_syscall
+       mov r31 = r33
+       shl r30 = r32,15
+       br.many .gettime
+END(fsys_clock_gettime)
+
+/*
+ * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t 
sigsetsize).
+ */
+#if _NSIG_WORDS != 1
+# error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1.
+#endif
+ENTRY(fsys_rt_sigprocmask)
+       .prologue
+       .altrp b6
+       .body
+
+       add r2=IA64_TASK_BLOCKED_OFFSET,r16
+       add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+       cmp4.ltu p6,p0=SIG_SETMASK,r32
+
+       cmp.ne p15,p0=r0,r34                    // oset != NULL?
+       tnat.nz p8,p0=r34
+       add r31=IA64_TASK_SIGHAND_OFFSET,r16
+       ;;
+       ld8 r3=[r2]                             // read/prefetch 
current->blocked
+       ld4 r9=[r9]
+       tnat.nz.or p6,p0=r35
+
+       cmp.ne.or p6,p0=_NSIG_WORDS*8,r35
+       tnat.nz.or p6,p0=r32
+(p6)   br.spnt.few .fail_einval                // fail with EINVAL
+       ;;
+#ifdef CONFIG_SMP
+       ld8 r31=[r31]                           // r31 <- current->sighand
+#endif
+       and r9=TIF_ALLWORK_MASK,r9
+       tnat.nz.or p8,p0=r33
+       ;;
+       cmp.ne p7,p0=0,r9
+       cmp.eq p6,p0=r0,r33                     // set == NULL?
+       add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31 // r31 <- 
current->sighand->siglock
+(p8)   br.spnt.few .fail_efault                // fail with EFAULT
+(p7)   br.spnt.many fsys_fallback_syscall      // got pending kernel work...
+(p6)   br.dpnt.many .store_mask                // -> short-circuit to just 
reading the signal mask
+
+       /* Argh, we actually have to do some work and _update_ the signal mask: 
*/
+
+EX(.fail_efault, probe.r.fault r33, 3)         // verify user has read-access 
to *set
+EX(.fail_efault, ld8 r14=[r33])                        // r14 <- *set
+       mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1))
+       ;;
+
+       rsm psr.i                               // mask interrupt delivery
+       mov ar.ccv=0
+       andcm r14=r14,r17                       // filter out SIGKILL & SIGSTOP
+
+#ifdef CONFIG_SMP
+       mov r17=1
+       ;;
+       cmpxchg4.acq r18=[r31],r17,ar.ccv       // try to acquire the lock
+       mov r8=EINVAL                   // default to EINVAL
+       ;;
+       ld8 r3=[r2]                     // re-read current->blocked now that we 
hold the lock
+       cmp4.ne p6,p0=r18,r0
+(p6)   br.cond.spnt.many .lock_contention
+       ;;
+#else
+       ld8 r3=[r2]                     // re-read current->blocked now that we 
hold the lock
+       mov r8=EINVAL                   // default to EINVAL
+#endif
+       add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16
+       add r19=IA64_TASK_SIGNAL_OFFSET,r16
+       cmp4.eq p6,p0=SIG_BLOCK,r32
+       ;;
+       ld8 r19=[r19]                   // r19 <- current->signal
+       cmp4.eq p7,p0=SIG_UNBLOCK,r32
+       cmp4.eq p8,p0=SIG_SETMASK,r32
+       ;;
+       ld8 r18=[r18]                   // r18 <- current->pending.signal
+       .pred.rel.mutex p6,p7,p8
+(p6)   or r14=r3,r14                   // SIG_BLOCK
+(p7)   andcm r14=r3,r14                // SIG_UNBLOCK
+
+(p8)   mov r14=r14                     // SIG_SETMASK
+(p6)   mov r8=0                        // clear error code
+       // recalc_sigpending()
+       add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19
+
+       add 
r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19
+       ;;
+       ld4 r17=[r17]           // r17 <- current->signal->group_stop_count
+(p7)   mov r8=0                // clear error code
+
+       ld8 r19=[r19]           // r19 <- current->signal->shared_pending
+       ;;
+       cmp4.gt p6,p7=r17,r0    // p6/p7 <- (current->signal->group_stop_count 
> 0)?
+(p8)   mov r8=0                // clear error code
+
+       or r18=r18,r19          // r18 <- current->pending | 
current->signal->shared_pending
+       ;;
+       // r18 <- (current->pending | current->signal->shared_pending) & 
~current->blocked:
+       andcm r18=r18,r14
+       add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+       ;;
+
+(p7)   cmp.ne.or.andcm p6,p7=r18,r0            // p6/p7 <- signal pending
+       mov r19=0                                       // i must not leak 
kernel bits...
+(p6)   br.cond.dpnt.many .sig_pending
+       ;;
+
+1:     ld4 r17=[r9]                            // r17 <- 
current->thread_info->flags
+       ;;
+       mov ar.ccv=r17
+       and r18=~_TIF_SIGPENDING,r17            // r18 <- r17 & ~(1 << 
TIF_SIGPENDING)
+       ;;
+
+       st8 [r2]=r14                            // update current->blocked with 
new mask
+       cmpxchg4.acq r8=[r9],r18,ar.ccv         // current->thread_info->flags 
<- r18
+       ;;
+       cmp.ne p6,p0=r17,r8                     // update failed?
+(p6)   br.cond.spnt.few 1b                     // yes -> retry
+
+#ifdef CONFIG_SMP
+       st4.rel [r31]=r0                        // release the lock
+#endif
+       ssm psr.i
+       ;;
+
+       srlz.d                                  // ensure psr.i is set again
+       mov r18=0                                       // i must not leak 
kernel bits...
+
+.store_mask:
+EX(.fail_efault, (p15) probe.w.fault r34, 3)   // verify user has write-access 
to *oset
+EX(.fail_efault, (p15) st8 [r34]=r3)
+       mov r2=0                                        // i must not leak 
kernel bits...
+       mov r3=0                                        // i must not leak 
kernel bits...
+       mov r8=0                                // return 0
+       mov r9=0                                        // i must not leak 
kernel bits...
+       mov r14=0                                       // i must not leak 
kernel bits...
+       mov r17=0                                       // i must not leak 
kernel bits...
+       mov r31=0                                       // i must not leak 
kernel bits...
+       FSYS_RETURN
+
+.sig_pending:
+#ifdef CONFIG_SMP
+       st4.rel [r31]=r0                        // release the lock
+#endif
+       ssm psr.i
+       ;;
+       srlz.d
+       br.sptk.many fsys_fallback_syscall      // with signal pending, do the 
heavy-weight syscall
+
+#ifdef CONFIG_SMP
+.lock_contention:
+       /* Rather than spinning here, fall back on doing a heavy-weight 
syscall.  */
+       ssm psr.i
+       ;;
+       srlz.d
+       br.sptk.many fsys_fallback_syscall
+#endif
+END(fsys_rt_sigprocmask)
+
+ENTRY(fsys_fallback_syscall)
+       .prologue
+       .altrp b6
+       .body
+       /*
+        * We only get here from light-weight syscall handlers.  Thus, we 
already
+        * know that r15 contains a valid syscall number.  No need to re-check.
+        */
+       adds r17=-1024,r15
+       movl r14=sys_call_table
+       ;;
+#ifdef CONFIG_XEN
+       movl r18=running_on_xen;;
+       ld4 r18=[r18];;
+       // p14 = running_on_xen
+       // p15 = !running_on_xen
+       cmp.ne p14,p15=r0,r18
+       ;;    
+(p14)  movl r18=XSI_PSR_I_ADDR;;
+(p14)  ld8 r18=[r18]
+(p14)  mov r29=1;;
+(p14)  st1 [r18]=r29
+(p15)  rsm psr.i
+#else    
+       rsm psr.i
+#endif    
+       shladd r18=r17,3,r14
+       ;;
+       ld8 r18=[r18]                           // load normal (heavy-weight) 
syscall entry-point
+#ifdef CONFIG_XEN
+(p14)  mov r27=r8
+(p14)  XEN_HYPER_GET_PSR
+       ;;
+(p14)  mov r29=r8
+(p14)  mov r8=r27
+(p15)  mov r29=psr                             // read psr (12 cyc load 
latency)
+#else    
+       mov r29=psr                             // read psr (12 cyc load 
latency)
+#endif    
+       mov r27=ar.rsc
+       mov r21=ar.fpsr
+       mov r26=ar.pfs
+END(fsys_fallback_syscall)
+       /* FALL THROUGH */
+GLOBAL_ENTRY(fsys_bubble_down)
+       .prologue
+       .altrp b6
+       .body
+       /*
+        * We get here for syscalls that don't have a lightweight
+        * handler.  For those, we need to bubble down into the kernel
+        * and that requires setting up a minimal pt_regs structure,
+        * and initializing the CPU state more or less as if an
+        * interruption had occurred.  To make syscall-restarts work,
+        * we setup pt_regs such that cr_iip points to the second
+        * instruction in syscall_via_break.  Decrementing the IP
+        * hence will restart the syscall via break and not
+        * decrementing IP will return us to the caller, as usual.
+        * Note that we preserve the value of psr.pp rather than
+        * initializing it from dcr.pp.  This makes it possible to
+        * distinguish fsyscall execution from other privileged
+        * execution.
+        *
+        * On entry:
+        *      - normal fsyscall handler register usage, except
+        *        that we also have:
+        *      - r18: address of syscall entry point
+        *      - r21: ar.fpsr
+        *      - r26: ar.pfs
+        *      - r27: ar.rsc
+        *      - r29: psr
+        *
+        * We used to clear some PSR bits here but that requires slow
+        * serialization.  Fortuntely, that isn't really necessary.
+        * The rationale is as follows: we used to clear bits
+        * ~PSR_PRESERVED_BITS in PSR.L.  Since
+        * PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we
+        * ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}.
+        * However,
+        *
+        * PSR.BE : already is turned off in __kernel_syscall_via_epc()
+        * PSR.AC : don't care (kernel normally turns PSR.AC on)
+        * PSR.I  : already turned off by the time fsys_bubble_down gets
+        *          invoked
+        * PSR.DFL: always 0 (kernel never turns it on)
+        * PSR.DFH: don't care --- kernel never touches f32-f127 on its own
+        *          initiative
+        * PSR.DI : always 0 (kernel never turns it on)
+        * PSR.SI : always 0 (kernel never turns it on)
+        * PSR.DB : don't care --- kernel never enables kernel-level
+        *          breakpoints
+        * PSR.TB : must be 0 already; if it wasn't zero on entry to
+        *          __kernel_syscall_via_epc, the branch to fsys_bubble_down
+        *          will trigger a taken branch; the taken-trap-handler then
+        *          converts the syscall into a break-based system-call.
+        */
+       /*
+        * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc.
+        * The rest we have to synthesize.
+        */
+#      define PSR_ONE_BITS             ((3 << IA64_PSR_CPL0_BIT)       \
+                                        | (0x1 << IA64_PSR_RI_BIT)     \
+                                        | IA64_PSR_BN | IA64_PSR_I)
+
+       invala                                  // M0|1
+       movl r14=ia64_ret_from_syscall          // X
+
+       nop.m 0
+       movl r28=__kernel_syscall_via_break     // X    create cr.iip
+       ;;
+
+       mov r2=r16                              // A    get task addr to 
addl-addressable register
+       adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A
+       mov r31=pr                              // I0   save pr (2 cyc)
+       ;;
+       st1 [r16]=r0                            // M2|3 clear 
current->thread.on_ustack flag
+       addl r22=IA64_RBS_OFFSET,r2             // A    compute base of RBS
+       add r3=TI_FLAGS+IA64_TASK_SIZE,r2       // A
+       ;;
+       ld4 r3=[r3]                             // M0|1 r3 = 
current_thread_info()->flags
+       lfetch.fault.excl.nt1 [r22]             // M0|1 prefetch register 
backing-store
+       nop.i 0
+       ;;
+       mov ar.rsc=0                            // M2   set enforced lazy mode, 
pl 0, LE, loadrs=0
+       nop.m 0
+       nop.i 0
+       ;;
+       mov r23=ar.bspstore                     // M2 (12 cyc) save ar.bspstore
+       mov.m r24=ar.rnat                       // M2 (5 cyc) read ar.rnat 
(dual-issues!)
+       nop.i 0
+       ;;
+       mov ar.bspstore=r22                     // M2 (6 cyc) switch to kernel 
RBS
+       movl r8=PSR_ONE_BITS                    // X
+       ;;
+       mov r25=ar.unat                         // M2 (5 cyc) save ar.unat
+       mov r19=b6                              // I0   save b6 (2 cyc)
+       mov r20=r1                              // A    save caller's gp in r20
+       ;;
+       or r29=r8,r29                           // A    construct cr.ipsr value 
to save
+       mov b6=r18                              // I0   copy syscall 
entry-point to b6 (7 cyc)
+       addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of 
memory stack
+
+       mov r18=ar.bsp                          // M2   save (kernel) ar.bsp 
(12 cyc)
+       cmp.ne pKStk,pUStk=r0,r0                // A    set pKStk <- 0, pUStk 
<- 1
+       br.call.sptk.many b7=ia64_syscall_setup // B
+       ;;
+       mov ar.rsc=0x3                          // M2   set eager mode, pl 0, 
LE, loadrs=0
+       mov rp=r14                              // I0   set the real return addr
+       and r3=_TIF_SYSCALL_TRACEAUDIT,r3       // A
+       ;;
+#ifdef CONFIG_XEN
+       movl r14=running_on_xen;;
+       ld4 r14=[r14];;
+       // p14 = running_on_xen
+       // p15 = !running_on_xen
+       cmp.ne p14,p15=r0,r14
+       ;; 
+(p14)  movl r28=XSI_PSR_I_ADDR;;
+(p14)  ld8 r28=[r28];;
+(p14)  adds r28=-1,r28;;                       // event_pending
+(p14)  ld1 r14=[r28];;
+(p14)  cmp.ne.unc p13,p14=r14,r0;;
+(p13)  XEN_HYPER_SSM_I
+(p14)  adds r28=1,r28;;                        // event_mask
+(p14)  st1 [r28]=r0;;
+(p15)  ssm psr.i
+#else
+       ssm psr.i                               // M2   we're on kernel stacks 
now, reenable irqs
+#endif    
+       cmp.eq p8,p0=r3,r0                      // A
+(p10)  br.cond.spnt.many ia64_ret_from_syscall // B    return if bad 
call-frame or r15 is a NaT
+
+       nop.m 0
+(p8)   br.call.sptk.many b6=b6                 // B    (ignore return address)
+       br.cond.spnt ia64_trace_syscall         // B
+END(fsys_bubble_down)
+
+       .rodata
+       .align 8
+       .globl fsyscall_table
+
+       data8 fsys_bubble_down
+fsyscall_table:
+       data8 fsys_ni_syscall
+       data8 0                         // exit                 // 1025
+       data8 0                         // read
+       data8 0                         // write
+       data8 0                         // open
+       data8 0                         // close
+       data8 0                         // creat                // 1030
+       data8 0                         // link
+       data8 0                         // unlink
+       data8 0                         // execve
+       data8 0                         // chdir
+       data8 0                         // fchdir               // 1035
+       data8 0                         // utimes
+       data8 0                         // mknod
+       data8 0                         // chmod
+       data8 0                         // chown
+       data8 0                         // lseek                // 1040
+       data8 fsys_getpid               // getpid
+       data8 fsys_getppid              // getppid
+       data8 0                         // mount
+       data8 0                         // umount
+       data8 0                         // setuid               // 1045
+       data8 0                         // getuid
+       data8 0                         // geteuid
+       data8 0                         // ptrace
+       data8 0                         // access
+       data8 0                         // sync                 // 1050
+       data8 0                         // fsync
+       data8 0                         // fdatasync
+       data8 0                         // kill
+       data8 0                         // rename
+       data8 0                         // mkdir                // 1055
+       data8 0                         // rmdir
+       data8 0                         // dup
+       data8 0                         // pipe
+       data8 0                         // times
+       data8 0                         // brk                  // 1060
+       data8 0                         // setgid
+       data8 0                         // getgid
+       data8 0                         // getegid
+       data8 0                         // acct
+       data8 0                         // ioctl                // 1065
+       data8 0                         // fcntl
+       data8 0                         // umask
+       data8 0                         // chroot
+       data8 0                         // ustat
+       data8 0                         // dup2                 // 1070
+       data8 0                         // setreuid
+       data8 0                         // setregid
+       data8 0                         // getresuid
+       data8 0                         // setresuid
+       data8 0                         // getresgid            // 1075
+       data8 0                         // setresgid
+       data8 0                         // getgroups
+       data8 0                         // setgroups
+       data8 0                         // getpgid
+       data8 0                         // setpgid              // 1080
+       data8 0                         // setsid
+       data8 0                         // getsid
+       data8 0                         // sethostname
+       data8 0                         // setrlimit
+       data8 0                         // getrlimit            // 1085
+       data8 0                         // getrusage
+       data8 fsys_gettimeofday         // gettimeofday
+       data8 0                         // settimeofday
+       data8 0                         // select
+       data8 0                         // poll                 // 1090
+       data8 0                         // symlink
+       data8 0                         // readlink
+       data8 0                         // uselib
+       data8 0                         // swapon
+       data8 0                         // swapoff              // 1095
+       data8 0                         // reboot
+       data8 0                         // truncate
+       data8 0                         // ftruncate
+       data8 0                         // fchmod
+       data8 0                         // fchown               // 1100
+       data8 0                         // getpriority
+       data8 0                         // setpriority
+       data8 0                         // statfs
+       data8 0                         // fstatfs
+       data8 0                         // gettid               // 1105
+       data8 0                         // semget
+       data8 0                         // semop
+       data8 0                         // semctl
+       data8 0                         // msgget
+       data8 0                         // msgsnd               // 1110
+       data8 0                         // msgrcv
+       data8 0                         // msgctl
+       data8 0                         // shmget
+       data8 0                         // shmat
+       data8 0                         // shmdt                // 1115
+       data8 0                         // shmctl
+       data8 0                         // syslog
+       data8 0                         // setitimer
+       data8 0                         // getitimer
+       data8 0                                                 // 1120
+       data8 0
+       data8 0
+       data8 0                         // vhangup
+       data8 0                         // lchown
+       data8 0                         // remap_file_pages     // 1125
+       data8 0                         // wait4
+       data8 0                         // sysinfo
+       data8 0                         // clone
+       data8 0                         // setdomainname
+       data8 0                         // newuname             // 1130
+       data8 0                         // adjtimex
+       data8 0
+       data8 0                         // init_module
+       data8 0                         // delete_module
+       data8 0                                                 // 1135
+       data8 0
+       data8 0                         // quotactl
+       data8 0                         // bdflush
+       data8 0                         // sysfs
+       data8 0                         // personality          // 1140
+       data8 0                         // afs_syscall
+       data8 0                         // setfsuid
+       data8 0                         // setfsgid
+       data8 0                         // getdents
+       data8 0                         // flock                // 1145
+       data8 0                         // readv
+       data8 0                         // writev
+       data8 0                         // pread64
+       data8 0                         // pwrite64
+       data8 0                         // sysctl               // 1150
+       data8 0                         // mmap
+       data8 0                         // munmap
+       data8 0                         // mlock
+       data8 0                         // mlockall
+       data8 0                         // mprotect             // 1155
+       data8 0                         // mremap
+       data8 0                         // msync
+       data8 0                         // munlock
+       data8 0                         // munlockall
+       data8 0                         // sched_getparam       // 1160
+       data8 0                         // sched_setparam
+       data8 0                         // sched_getscheduler
+       data8 0                         // sched_setscheduler
+       data8 0                         // sched_yield
+       data8 0                         // sched_get_priority_max       // 1165
+       data8 0                         // sched_get_priority_min
+       data8 0                         // sched_rr_get_interval
+       data8 0                         // nanosleep
+       data8 0                         // nfsservctl
+       data8 0                         // prctl                // 1170
+       data8 0                         // getpagesize
+       data8 0                         // mmap2
+       data8 0                         // pciconfig_read
+       data8 0                         // pciconfig_write
+       data8 0                         // perfmonctl           // 1175
+       data8 0                         // sigaltstack
+       data8 0                         // rt_sigaction
+       data8 0                         // rt_sigpending
+       data8 fsys_rt_sigprocmask       // rt_sigprocmask
+       data8 0                         // rt_sigqueueinfo      // 1180
+       data8 0                         // rt_sigreturn
+       data8 0                         // rt_sigsuspend
+       data8 0                         // rt_sigtimedwait
+       data8 0                         // getcwd
+       data8 0                         // capget               // 1185
+       data8 0                         // capset
+       data8 0                         // sendfile
+       data8 0
+       data8 0
+       data8 0                         // socket               // 1190
+       data8 0                         // bind
+       data8 0                         // connect
+       data8 0                         // listen
+       data8 0                         // accept
+       data8 0                         // getsockname          // 1195
+       data8 0                         // getpeername
+       data8 0                         // socketpair
+       data8 0                         // send
+       data8 0                         // sendto
+       data8 0                         // recv                 // 1200
+       data8 0                         // recvfrom
+       data8 0                         // shutdown
+       data8 0                         // setsockopt
+       data8 0                         // getsockopt
+       data8 0                         // sendmsg              // 1205
+       data8 0                         // recvmsg
+       data8 0                         // pivot_root
+       data8 0                         // mincore
+       data8 0                         // madvise
+       data8 0                         // newstat              // 1210
+       data8 0                         // newlstat
+       data8 0                         // newfstat
+       data8 0                         // clone2
+       data8 0                         // getdents64
+       data8 0                         // getunwind            // 1215
+       data8 0                         // readahead
+       data8 0                         // setxattr
+       data8 0                         // lsetxattr
+       data8 0                         // fsetxattr
+       data8 0                         // getxattr             // 1220
+       data8 0                         // lgetxattr
+       data8 0                         // fgetxattr
+       data8 0                         // listxattr
+       data8 0                         // llistxattr
+       data8 0                         // flistxattr           // 1225
+       data8 0                         // removexattr
+       data8 0                         // lremovexattr
+       data8 0                         // fremovexattr
+       data8 0                         // tkill
+       data8 0                         // futex                // 1230
+       data8 0                         // sched_setaffinity
+       data8 0                         // sched_getaffinity
+       data8 fsys_set_tid_address      // set_tid_address
+       data8 0                         // fadvise64_64
+       data8 0                         // tgkill               // 1235
+       data8 0                         // exit_group
+       data8 0                         // lookup_dcookie
+       data8 0                         // io_setup
+       data8 0                         // io_destroy
+       data8 0                         // io_getevents         // 1240
+       data8 0                         // io_submit
+       data8 0                         // io_cancel
+       data8 0                         // epoll_create
+       data8 0                         // epoll_ctl
+       data8 0                         // epoll_wait           // 1245
+       data8 0                         // restart_syscall
+       data8 0                         // semtimedop
+       data8 0                         // timer_create
+       data8 0                         // timer_settime
+       data8 0                         // timer_gettime        // 1250
+       data8 0                         // timer_getoverrun
+       data8 0                         // timer_delete
+       data8 0                         // clock_settime
+       data8 fsys_clock_gettime        // clock_gettime
+
+       // fill in zeros for the remaining entries
+       .zero:
+       .space fsyscall_table + 8*NR_syscalls - .zero, 0
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/arch/ia64/kernel/gate.S
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/gate.S      Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/gate.S      Fri Jan 19 14:48:57 
2007 +0000
@@ -55,18 +55,6 @@
 #define LOAD_RUNNING_ON_XEN(reg)                       \
 [1:]   movl reg=0;                                     \
        .xdata4 ".data.patch.running_on_xen", 1b-.
-
-       .section ".data.patch.brl_xen_rsm_be_i", "a"
-       .previous
-#define BRL_COND_XEN_RSM_BE_I(pr)                      \
-[1:](pr)brl.cond.sptk 0;                               \
-       .xdata4 ".data.patch.brl_xen_rsm_be_i", 1b-.
-
-       .section ".data.patch.brl_xen_get_psr", "a"
-       .previous
-#define BRL_COND_XEN_GET_PSR(pr)                       \
-[1:](pr)brl.cond.sptk 0;                               \
-       .xdata4 ".data.patch.brl_xen_get_psr", 1b-.
 
        .section ".data.patch.brl_xen_ssm_i_0", "a"
        .previous
@@ -155,9 +143,9 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc)
        cmp.ne isXen,isRaw=r0,r30
        ;;
 (isRaw)        rsm psr.be | psr.i
-       BRL_COND_XEN_RSM_BE_I(isXen)
-       .global .vdso_rsm_be_i_ret
-.vdso_rsm_be_i_ret:
+(isXen)        st1 [r22]=r20
+(isXen)        rum psr.be
+       ;;
 #else
        rsm psr.be | psr.i                      // M2 (5 cyc to srlz.d)
 #endif
@@ -170,9 +158,9 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc)
        lfetch [r18]                            // M0|1
 #ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT
 (isRaw)        mov r29=psr
-       BRL_COND_XEN_GET_PSR(isXen)
-       .global .vdso_get_psr_ret
-.vdso_get_psr_ret:
+(isXen)        XEN_HYPER_GET_PSR
+       ;;
+(isXen)        mov r29=r8
 #else
        mov r29=psr                             // M2 (12 cyc)
 #endif
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S  Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S  Fri Jan 19 14:48:57 
2007 +0000
@@ -48,14 +48,6 @@ SECTIONS
                                    __start_gate_running_on_xen_patchlist = .;
                                    *(.data.patch.running_on_xen)
                                    __end_gate_running_on_xen_patchlist = .;
-
-                                   __start_gate_brl_xen_rsm_be_i_patchlist = .;
-                                   *(.data.patch.brl_xen_rsm_be_i)
-                                   __end_gate_brl_xen_rsm_be_i_patchlist = .;
-
-                                   __start_gate_brl_xen_get_psr_patchlist = .;
-                                   *(.data.patch.brl_xen_get_psr)
-                                   __end_gate_brl_xen_get_psr_patchlist = .;
 
                                    __start_gate_brl_xen_ssm_i_0_patchlist = .;
                                    *(.data.patch.brl_xen_ssm_i_0)
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/arch/ia64/kernel/patch.c
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/patch.c     Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/patch.c     Fri Jan 19 14:48:57 
2007 +0000
@@ -236,13 +236,9 @@ static void
 static void
 patch_brl_in_vdso(void)
 {
-       EXTERN_PATCHLIST(xen_rsm_be_i);
-       EXTERN_PATCHLIST(xen_get_psr);
        EXTERN_PATCHLIST(xen_ssm_i_0);
        EXTERN_PATCHLIST(xen_ssm_i_1);
 
-       PATCH_BRL_SYMADDR(xen_rsm_be_i);
-       PATCH_BRL_SYMADDR(xen_get_psr);
        PATCH_BRL_SYMADDR(xen_ssm_i_0);
        PATCH_BRL_SYMADDR(xen_ssm_i_1);
 }
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/arch/ia64/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c     Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c     Fri Jan 19 14:48:57 
2007 +0000
@@ -76,10 +76,18 @@ EXPORT_SYMBOL(__per_cpu_offset);
 #endif
 
 #ifdef CONFIG_XEN
+static void
+xen_panic_hypercall(struct unw_frame_info *info, void *arg)
+{
+       current->thread.ksp = (__u64)info->sw - 16;
+       HYPERVISOR_shutdown(SHUTDOWN_crash);
+       /* we're never actually going to get here... */
+}
+
 static int
 xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
 {
-       HYPERVISOR_shutdown(SHUTDOWN_crash);
+       unw_init_running(xen_panic_hypercall, NULL);
        /* we're never actually going to get here... */
        return NOTIFY_DONE;
 }
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S
--- a/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S    Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S    Fri Jan 19 14:48:57 
2007 +0000
@@ -8,336 +8,110 @@
 #include <asm/processor.h>
 #include <asm/asmmacro.h>
 
-/* To clear vpsr.ic, vpsr.i needs to be cleared first */
-#define XEN_CLEAR_PSR_IC                               \
-       mov r14=1;                                      \
-       movl r15=XSI_PSR_I_ADDR;                        \
-       movl r2=XSI_PSR_IC;                             \
-       ;;                                              \
-       ld8 r15=[r15];                                  \
-       ld4 r3=[r2];                                    \
-       ;;                                              \
-       ld1 r16=[r15];                                  \
-       ;;                                              \
-       st1 [r15]=r14;                                  \
-       st4 [r2]=r0;                                    \
-       ;;
-
-/* First restore vpsr.ic, and then vpsr.i */
-#define XEN_RESTORE_PSR_IC                             \
-       st4 [r2]=r3;                                    \
-       st1 [r15]=r16;                                  \
-       ;;
+GLOBAL_ENTRY(xen_get_psr)
+       XEN_HYPER_GET_PSR
+       br.ret.sptk.many rp
+    ;;
+END(xen_get_psr)
 
 GLOBAL_ENTRY(xen_get_ivr)
-       movl r8=running_on_xen;;
-       ld4 r8=[r8];;
-       cmp.eq p7,p0=r8,r0;;
-(p7)   mov r8=cr.ivr;;
-(p7)   br.ret.sptk.many rp
-       ;;
-       XEN_CLEAR_PSR_IC
-       ;;
        XEN_HYPER_GET_IVR
-       ;;
-       XEN_RESTORE_PSR_IC
-       ;;
        br.ret.sptk.many rp
        ;;
 END(xen_get_ivr)
 
 GLOBAL_ENTRY(xen_get_tpr)
-       movl r8=running_on_xen;;
-       ld4 r8=[r8];;
-       cmp.eq p7,p0=r8,r0;;
-(p7)   mov r8=cr.tpr;;
-(p7)   br.ret.sptk.many rp
-       ;;
-       XEN_CLEAR_PSR_IC
-       ;;
        XEN_HYPER_GET_TPR
-       ;;
-       XEN_RESTORE_PSR_IC
-       ;;
        br.ret.sptk.many rp
        ;;
 END(xen_get_tpr)
 
 GLOBAL_ENTRY(xen_set_tpr)
-       movl r8=running_on_xen;;
-       ld4 r8=[r8];;
-       cmp.eq p7,p0=r8,r0;;
-(p7)   mov cr.tpr=r32;;
-(p7)   br.ret.sptk.many rp
-       ;;
        mov r8=r32
-       ;;
-       XEN_CLEAR_PSR_IC
-       ;;
        XEN_HYPER_SET_TPR
-       ;;
-       XEN_RESTORE_PSR_IC
-       ;;
        br.ret.sptk.many rp
        ;;
 END(xen_set_tpr)
 
 GLOBAL_ENTRY(xen_eoi)
-       movl r8=running_on_xen;;
-       ld4 r8=[r8];;
-       cmp.eq p7,p0=r8,r0;;
-(p7)   mov cr.eoi=r0;;
-(p7)   br.ret.sptk.many rp
-       ;;
        mov r8=r32
-       ;;
-       XEN_CLEAR_PSR_IC
-       ;;
        XEN_HYPER_EOI
-       ;;
-       XEN_RESTORE_PSR_IC
-       ;;
        br.ret.sptk.many rp
        ;;
 END(xen_eoi)
 
 GLOBAL_ENTRY(xen_thash)
-       movl r8=running_on_xen;;
-       ld4 r8=[r8];;
-       cmp.eq p7,p0=r8,r0;;
-(p7)   thash r8=r32;;
-(p7)   br.ret.sptk.many rp
-       ;;
        mov r8=r32
-       ;;
-       XEN_CLEAR_PSR_IC
-       ;;
        XEN_HYPER_THASH
-       ;;
-       XEN_RESTORE_PSR_IC
-       ;;
        br.ret.sptk.many rp
        ;;
 END(xen_thash)
 
 GLOBAL_ENTRY(xen_set_itm)
-       movl r8=running_on_xen;;
-       ld4 r8=[r8];;
-       cmp.eq p7,p0=r8,r0;;
-(p7)   mov cr.itm=r32;;
-(p7)   br.ret.sptk.many rp
-       ;;
        mov r8=r32
-       ;;
-       XEN_CLEAR_PSR_IC
-       ;;
        XEN_HYPER_SET_ITM
-       ;;
-       XEN_RESTORE_PSR_IC
-       ;;
        br.ret.sptk.many rp
        ;;
 END(xen_set_itm)
 
 GLOBAL_ENTRY(xen_ptcga)
-       movl r8=running_on_xen;;
-       ld4 r8=[r8];;
-       cmp.eq p7,p0=r8,r0;;
-(p7)   ptc.ga r32,r33;;
-(p7)   br.ret.sptk.many rp
-       ;;
        mov r8=r32
        mov r9=r33
-       ;;
-       XEN_CLEAR_PSR_IC
-       ;;
        XEN_HYPER_PTC_GA
-       ;;
-       XEN_RESTORE_PSR_IC
-       ;;
        br.ret.sptk.many rp
        ;;
 END(xen_ptcga)
 
 GLOBAL_ENTRY(xen_get_rr)
-       movl r8=running_on_xen;;
-       ld4 r8=[r8];;
-       cmp.eq p7,p0=r8,r0;;
-(p7)   mov r8=rr[r32];;
-(p7)   br.ret.sptk.many rp
-       ;;
        mov r8=r32
-       ;;
-       XEN_CLEAR_PSR_IC
-       ;;
        XEN_HYPER_GET_RR
-       ;;
-       XEN_RESTORE_PSR_IC
-       ;;
        br.ret.sptk.many rp
        ;;
 END(xen_get_rr)
 
 GLOBAL_ENTRY(xen_set_rr)
-       movl r8=running_on_xen;;
-       ld4 r8=[r8];;
-       cmp.eq p7,p0=r8,r0;;
-(p7)   mov rr[r32]=r33;;
-(p7)   br.ret.sptk.many rp
-       ;;
        mov r8=r32
        mov r9=r33
-       ;;
-       XEN_CLEAR_PSR_IC
-       ;;
        XEN_HYPER_SET_RR
-       ;;
-       XEN_RESTORE_PSR_IC
-       ;;
        br.ret.sptk.many rp
        ;;
 END(xen_set_rr)
 
 GLOBAL_ENTRY(xen_set_kr)
-       movl r8=running_on_xen;;
-       ld4 r8=[r8];;
-       cmp.ne p7,p0=r8,r0;;
-(p7)   br.cond.spnt.few 1f;
-       ;;
-       cmp.eq p7,p0=r8,r0
-       adds r8=-1,r8;;
-(p7)   mov ar0=r9
-(p7)   br.ret.sptk.many rp;;
-       cmp.eq p7,p0=r8,r0
-       adds r8=-1,r8;;
-(p7)   mov ar1=r9
-(p7)   br.ret.sptk.many rp;;
-       cmp.eq p7,p0=r8,r0
-       adds r8=-1,r8;;
-(p7)   mov ar2=r9
-(p7)   br.ret.sptk.many rp;;
-       cmp.eq p7,p0=r8,r0
-       adds r8=-1,r8;;
-(p7)   mov ar3=r9
-(p7)   br.ret.sptk.many rp;;
-       cmp.eq p7,p0=r8,r0
-       adds r8=-1,r8;;
-(p7)   mov ar4=r9
-(p7)   br.ret.sptk.many rp;;
-       cmp.eq p7,p0=r8,r0
-       adds r8=-1,r8;;
-(p7)   mov ar5=r9
-(p7)   br.ret.sptk.many rp;;
-       cmp.eq p7,p0=r8,r0
-       adds r8=-1,r8;;
-(p7)   mov ar6=r9
-(p7)   br.ret.sptk.many rp;;
-       cmp.eq p7,p0=r8,r0
-       adds r8=-1,r8;;
-(p7)   mov ar7=r9
-(p7)   br.ret.sptk.many rp;;
-
-1:     mov r8=r32
+       mov r8=r32
        mov r9=r33
-       ;;
-       XEN_CLEAR_PSR_IC
-       ;;
        XEN_HYPER_SET_KR
-       ;;
-       XEN_RESTORE_PSR_IC
-       ;;
        br.ret.sptk.many rp
 END(xen_set_kr)
 
 GLOBAL_ENTRY(xen_fc)
-       movl r8=running_on_xen;;
-       ld4 r8=[r8];;
-       cmp.eq p7,p0=r8,r0;;
-(p7)   fc r32;;
-(p7)   br.ret.sptk.many rp
-       ;;
        mov r8=r32
-       ;;
-       XEN_CLEAR_PSR_IC
-       ;;
        XEN_HYPER_FC
-       ;;
-       XEN_RESTORE_PSR_IC
-       ;;
        br.ret.sptk.many rp
 END(xen_fc)
 
 GLOBAL_ENTRY(xen_get_cpuid)
-       movl r8=running_on_xen;;
-       ld4 r8=[r8];;
-       cmp.eq p7,p0=r8,r0;;
-(p7)   mov r8=cpuid[r32];;
-(p7)   br.ret.sptk.many rp
-       ;;
        mov r8=r32
-       ;;
-       XEN_CLEAR_PSR_IC
-       ;;
        XEN_HYPER_GET_CPUID
-       ;;
-       XEN_RESTORE_PSR_IC
-       ;;
        br.ret.sptk.many rp
 END(xen_get_cpuid)
 
 GLOBAL_ENTRY(xen_get_pmd)
-       movl r8=running_on_xen;;
-       ld4 r8=[r8];;
-       cmp.eq p7,p0=r8,r0;;
-(p7)   mov r8=pmd[r32];;
-(p7)   br.ret.sptk.many rp
-       ;;
        mov r8=r32
-       ;;
-       XEN_CLEAR_PSR_IC
-       ;;
        XEN_HYPER_GET_PMD
-       ;;
-       XEN_RESTORE_PSR_IC
-       ;;
        br.ret.sptk.many rp
 END(xen_get_pmd)
 
 #ifdef CONFIG_IA32_SUPPORT
 GLOBAL_ENTRY(xen_get_eflag)
-       movl r8=running_on_xen;;
-       ld4 r8=[r8];;
-       cmp.eq p7,p0=r8,r0;;
-(p7)   mov r8=ar24;;
-(p7)   br.ret.sptk.many rp
-       ;;
-       mov r8=r32
-       ;;
-       XEN_CLEAR_PSR_IC
-       ;;
        XEN_HYPER_GET_EFLAG
-       ;;
-       XEN_RESTORE_PSR_IC
-       ;;
        br.ret.sptk.many rp
 END(xen_get_eflag)
        
 // some bits aren't set if pl!=0, see SDM vol1 3.1.8
 GLOBAL_ENTRY(xen_set_eflag)
-       movl r8=running_on_xen;;
-       ld4 r8=[r8];;
-       cmp.eq p7,p0=r8,r0;;
-(p7)   mov ar24=r32
-(p7)   br.ret.sptk.many rp
-       ;;
        mov r8=r32
-       ;;
-       XEN_CLEAR_PSR_IC
-       ;;
        XEN_HYPER_SET_EFLAG
-       ;;
-       XEN_RESTORE_PSR_IC
-       ;;
        br.ret.sptk.many rp
 END(xen_set_eflag)
 #endif
@@ -355,27 +129,6 @@ END(xen_send_ipi)
 #ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT
 // Those are vdso specialized.
 // In fsys mode, call, ret can't be used.
-GLOBAL_ENTRY(xen_rsm_be_i)
-       st1 [r22]=r20
-       st4 [r23]=r0
-       XEN_HYPER_RSM_BE
-       st4 [r23]=r20
-       brl.cond.sptk   .vdso_rsm_be_i_ret
-       ;; 
-END(xen_rsm_be_i)
-
-GLOBAL_ENTRY(xen_get_psr)
-       mov r31=r8
-       mov r25=IA64_PSR_IC
-       st4 [r23]=r0
-       XEN_HYPER_GET_PSR
-       ;; 
-       st4 [r23]=r20
-       or r29=r8,r25 // vpsr.ic was cleared for hyperprivop
-       mov r8=r31
-       brl.cond.sptk   .vdso_get_psr_ret
-       ;; 
-END(xen_get_psr)
 
        // see xen_ssm_i() in privop.h
        // r22 = &vcpu->vcpu_info->evtchn_upcall_mask
@@ -395,7 +148,6 @@ END(xen_get_psr)
 (p14)  cmp.ne.unc p11,p0=r0,r25;       \
        ;;                              \
 (p11)  st1 [r22]=r20;                  \
-(p11)  st4 [r23]=r0;                   \
 (p11)  XEN_HYPER_SSM_I;
                
 GLOBAL_ENTRY(xen_ssm_i_0)
@@ -409,4 +161,11 @@ GLOBAL_ENTRY(xen_ssm_i_1)
        brl.cond.sptk   .vdso_ssm_i_1_ret
        ;; 
 END(xen_ssm_i_1)
+
+GLOBAL_ENTRY(__hypercall)
+       mov r2=r37
+       break 0x1000
+       br.ret.sptk.many b0
+       ;; 
+END(__hypercall)
 #endif
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c
--- a/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c   Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c   Fri Jan 19 14:48:57 
2007 +0000
@@ -25,7 +25,10 @@
 #include <linux/bootmem.h>
 #include <linux/module.h>
 #include <linux/vmalloc.h>
+#include <linux/efi.h>
 #include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/meminit.h>
 #include <asm/hypervisor.h>
 #include <asm/hypercall.h>
 #include <xen/interface/memory.h>
@@ -45,6 +48,8 @@ static int p2m_expose_init(void);
 #else
 #define p2m_expose_init() (-ENOSYS)
 #endif
+
+EXPORT_SYMBOL(__hypercall);
 
 //XXX same as i386, x86_64 contiguous_bitmap_set(), contiguous_bitmap_clear()
 // move those to lib/contiguous_bitmap?
@@ -56,13 +61,90 @@ static int p2m_expose_init(void);
  */
 unsigned long *contiguous_bitmap;
 
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+/* Following logic is stolen from create_mem_map_table() for virtual memmap */
+static int
+create_contiguous_bitmap(u64 start, u64 end, void *arg)
+{
+       unsigned long address, start_page, end_page;
+       unsigned long bitmap_start, bitmap_end;
+       unsigned char *bitmap;
+       int node;
+       pgd_t *pgd;
+       pud_t *pud;
+       pmd_t *pmd;
+       pte_t *pte;
+
+       bitmap_start = (unsigned long)contiguous_bitmap +
+                      ((__pa(start) >> PAGE_SHIFT) >> 3);
+       bitmap_end = (unsigned long)contiguous_bitmap +
+                    (((__pa(end) >> PAGE_SHIFT) + 2 * BITS_PER_LONG) >> 3);
+
+       start_page = bitmap_start & PAGE_MASK;
+       end_page = PAGE_ALIGN(bitmap_end);
+       node = paddr_to_nid(__pa(start));
+
+       bitmap = alloc_bootmem_pages_node(NODE_DATA(node),
+                                         end_page - start_page);
+       BUG_ON(!bitmap);
+       memset(bitmap, 0, end_page - start_page);
+
+       for (address = start_page; address < end_page; address += PAGE_SIZE) {
+               pgd = pgd_offset_k(address);
+               if (pgd_none(*pgd))
+                       pgd_populate(&init_mm, pgd,
+                                    alloc_bootmem_pages_node(NODE_DATA(node),
+                                                             PAGE_SIZE));
+               pud = pud_offset(pgd, address);
+
+               if (pud_none(*pud))
+                       pud_populate(&init_mm, pud,
+                                    alloc_bootmem_pages_node(NODE_DATA(node),
+                                                             PAGE_SIZE));
+               pmd = pmd_offset(pud, address);
+
+               if (pmd_none(*pmd))
+                       pmd_populate_kernel(&init_mm, pmd,
+                                           alloc_bootmem_pages_node
+                                           (NODE_DATA(node), PAGE_SIZE));
+               pte = pte_offset_kernel(pmd, address);
+
+               if (pte_none(*pte))
+                       set_pte(pte,
+                               pfn_pte(__pa(bitmap + (address - start_page))
+                                       >> PAGE_SHIFT, PAGE_KERNEL));
+       }
+       return 0;
+}
+#endif
+
+static void
+__contiguous_bitmap_init(unsigned long size)
+{
+       contiguous_bitmap = alloc_bootmem_pages(size);
+       BUG_ON(!contiguous_bitmap);
+       memset(contiguous_bitmap, 0, size);
+}
+
 void
 contiguous_bitmap_init(unsigned long end_pfn)
 {
        unsigned long size = (end_pfn + 2 * BITS_PER_LONG) >> 3;
-       contiguous_bitmap = alloc_bootmem_low_pages(size);
-       BUG_ON(!contiguous_bitmap);
-       memset(contiguous_bitmap, 0, size);
+#ifndef CONFIG_VIRTUAL_MEM_MAP
+       __contiguous_bitmap_init(size);
+#else
+       unsigned long max_gap = 0;
+
+       efi_memmap_walk(find_largest_hole, (u64*)&max_gap);
+       if (max_gap < LARGE_GAP) {
+               __contiguous_bitmap_init(size);
+       } else {
+               unsigned long map_size = PAGE_ALIGN(size);
+               vmalloc_end -= map_size;
+               contiguous_bitmap = (unsigned long*)vmalloc_end;
+               efi_memmap_walk(create_contiguous_bitmap, NULL);
+       }
+#endif
 }
 
 #if 0
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S     Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S     Fri Jan 19 14:48:57 
2007 +0000
@@ -59,12 +59,6 @@ GLOBAL_ENTRY(ia64_switch_to)
        shr.u r26=r20,IA64_GRANULE_SHIFT
        cmp.eq p7,p6=r25,in0
        ;;
-#ifdef CONFIG_XEN
-       movl r8=XSI_PSR_IC
-       ;;
-       st4 [r8]=r0     // force psr.ic off for hyperprivop(s)
-       ;;
-#endif
        /*
         * If we've already mapped this task's page, we can skip doing it again.
         */
@@ -72,19 +66,13 @@ GLOBAL_ENTRY(ia64_switch_to)
 (p6)   br.cond.dpnt .map
        ;;
 .done:
-#ifdef CONFIG_XEN
-       // psr.ic already off
+       ld8 sp=[r21]                    // load kernel stack pointer of new task
+#ifdef CONFIG_XEN
        // update "current" application register
        mov r8=IA64_KR_CURRENT
        mov r9=in0;;
        XEN_HYPER_SET_KR
-       ld8 sp=[r21]                    // load kernel stack pointer of new task
-       movl r27=XSI_PSR_IC
-       mov r8=1
-       ;;
-       st4 [r27]=r8                    // psr.ic back on
-#else
-       ld8 sp=[r21]                    // load kernel stack pointer of new task
+#else
        mov IA64_KR(CURRENT)=in0        // update "current" application register
 #endif
        mov r8=r13                      // return pointer to previously running 
task
@@ -99,7 +87,10 @@ GLOBAL_ENTRY(ia64_switch_to)
 
 .map:
 #ifdef CONFIG_XEN
-       // psr.ic already off
+    movl r25=XSI_PSR_IC                        // clear psr.ic
+    ;;
+    st4 [r25]=r0
+    ;;
 #else
        rsm psr.ic                      // interrupts (psr.i) are already 
disabled here
 #endif
@@ -132,7 +123,13 @@ GLOBAL_ENTRY(ia64_switch_to)
 #endif
        ;;
        itr.d dtr[r25]=r23              // wire in new mapping...
-#ifndef CONFIG_XEN
+#ifdef CONFIG_XEN
+       mov r9=1
+       movl r8=XSI_PSR_IC
+       ;;
+       st4 [r8]=r9
+       ;;
+#else    
        ssm psr.ic                      // reenable the psr.ic bit
        ;;
        srlz.d
@@ -415,7 +412,16 @@ ENTRY(ia64_leave_syscall)
 (pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
        ;;
        ld8 r26=[r2],PT(B0)-PT(AR_PFS)  // M0|1 load ar.pfs
+#ifdef CONFIG_XEN
+(pKStk)        mov r21=r8
+(pKStk)        XEN_HYPER_GET_PSR
+       ;;
+(pKStk)        mov r22=r8
+(pKStk)        mov r8=r21
+       ;;
+#else    
 (pKStk)        mov r22=psr                     // M2   read PSR now that 
interrupts are disabled
+#endif
        nop 0
        ;;
        ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0
@@ -645,7 +651,16 @@ GLOBAL_ENTRY(ia64_leave_kernel)
        adds r16=PT(CR_IPSR)+16,r12
        adds r17=PT(CR_IIP)+16,r12
 
+#ifdef CONFIG_XEN    
+(pKStk)        mov r29=r8
+(pKStk)        XEN_HYPER_GET_PSR
+       ;;
+(pKStk)        mov r22=r8
+(pKStk)        mov r8=r29
+       ;;
+#else
 (pKStk)        mov r22=psr             // M2 read PSR now that interrupts are 
disabled
+#endif
        nop.i 0
        nop.i 0
        ;;
diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S       Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S       Fri Jan 19 14:48:57 
2007 +0000
@@ -709,11 +709,23 @@ ENTRY(page_fault)
        ;;
 #endif
 #ifdef CONFIG_XEN
-       br.cond.sptk.many       xen_page_fault
-       ;;
-done_xen_page_fault:
-#endif
+    
+#define MASK_TO_PEND_OFS    (-1)
+    
+(p15)  movl r14=XSI_PSR_I_ADDR
+       ;;
+(p15)  ld8 r14=[r14]
+       ;;
+(p15)  st1 [r14]=r0,MASK_TO_PEND_OFS           // if (p15) vpsr.i = 1
+       ;;              // if (p15) (vcpu->vcpu_info->evtchn_upcall_mask)=0
+(p15)  ld1 r14=[r14]   // if (vcpu->vcpu_info->evtchn_upcall_pending)
+       ;;
+(p15)  cmp.ne  p15,p0=r14,r0
+       ;;
+(p15)  XEN_HYPER_SSM_I
+#else
 (p15)  ssm psr.i                               // restore psr.i
+#endif
        movl r14=ia64_leave_kernel
        ;;
        SAVE_REST
@@ -729,25 +741,6 @@ ENTRY(dkey_miss)
 ENTRY(dkey_miss)
        DBG_FAULT(7)
        FAULT(7)
-#ifdef CONFIG_XEN
-       // Leaving this code inline above results in an IVT section overflow
-       // There is no particular reason for this code to be here...
-xen_page_fault:
-(p15)  movl r3=XSI_PSR_I_ADDR
-       ;;
-(p15)  ld8 r3=[r3]
-       ;;
-(p15)  st1 [r3]=r0,-1  // if (p15) vpsr.i = 1
-       mov r14=r0
-       ;;
-(p15)  ld1 r14=[r3]                            // if (pending_events)
-       adds r3=8,r2                            // re-set up second base pointer
-       ;;
-(p15)  cmp.ne  p15,p0=r14,r0
-       ;;
-       br.cond.sptk.many done_xen_page_fault
-       ;;
-#endif
 END(dkey_miss)
 
        .org ia64_ivt+0x2000
@@ -1170,14 +1163,13 @@ 1:
 #ifdef CONFIG_XEN
 (p15)  ld8 r16=[r16]                           // vpsr.i
        ;;
-(p15)  st1 [r16]=r0,-1         // if (p15) vpsr.i = 1
-       mov r2=r0
-       ;;
-(p15)  ld1 r2=[r16]                            // if (pending_events)
-       ;;
-       cmp.ne  p6,p0=r2,r0
-       ;;
-(p6)   ssm     psr.i                           //   do a real ssm psr.i
+(p15)  st1 [r16]=r0,MASK_TO_PEND_OFS           // if (p15) vpsr.i = 1
+       ;;              // if (p15) (vcpu->vcpu_info->evtchn_upcall_mask)=0
+(p15)  ld1 r2=[r16]    // if (vcpu->vcpu_info->evtchn_upcall_pending)
+       ;;
+(p15)  cmp.ne.unc p6,p0=r2,r0
+       ;;
+(p6)   XEN_HYPER_SSM_I                         //   do a real ssm psr.i
 #else
 (p15)  ssm psr.i                               // M2   restore psr.i
 #endif
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h  Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h  Fri Jan 19 14:48:57 
2007 +0000
@@ -151,16 +151,7 @@
 .mem.offset 8,0; st8.spill [r17]=r11,24;                                       
                \
         ;;                                                                     
                \
        /* xen special handling for possibly lazy cover */                      
                \
-       movl r8=XSI_INCOMPL_REGFR;                                              
                \
-       ;;                                                                      
                \
-       ld4 r30=[r8];                                                           
                \
-       ;;                                                                      
                \
-       /* set XSI_INCOMPL_REGFR 0 */                                           
                \
-       st4 [r8]=r0;                                                            
                \
-       cmp.eq  p6,p7=r30,r0;                                                   
                \
-       ;; /* not sure if this stop bit is necessary */                         
                \
-(p6)   adds r8=XSI_PRECOVER_IFS-XSI_INCOMPL_REGFR,r8;                          
                \
-(p7)   adds r8=XSI_IFS-XSI_INCOMPL_REGFR,r8;                                   
                \
+       movl r8=XSI_PRECOVER_IFS;                                               
                \
        ;;                                                                      
                \
        ld8 r30=[r8];                                                           
                \
        ;;                                                                      
                \
diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/arch/ia64/xen/xenpal.S
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xenpal.S       Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenpal.S       Fri Jan 19 14:48:57 
2007 +0000
@@ -33,7 +33,16 @@ 1:   {
        mov loc4=ar.rsc                 // save RSE configuration
        ;;
        mov ar.rsc=0                    // put RSE in enforced lazy, LE mode
+#ifdef CONFIG_XEN
+       mov r9 = r8
+       XEN_HYPER_GET_PSR
+       ;;
+       mov loc3 = r8
+       mov r8 = r9
+       ;;
+#else    
        mov loc3 = psr
+#endif    
        mov loc0 = rp
        .body
        mov r30 = in2
@@ -41,16 +50,16 @@ 1:  {
 #ifdef CONFIG_XEN
        // this is low priority for paravirtualization, but is called
        // from the idle loop so confuses privop counting
-       movl r31=XSI_PSR_IC
+       movl r31=XSI_PSR_I_ADDR
        ;;
-(p6)   st4 [r31]=r0
+       ld8 r31=[r31]
+       mov r22=1
        ;;
-(p7)   adds r31=XSI_PSR_I_ADDR_OFS-XSI_PSR_IC_OFS,r31
-(p7)   mov r22=1
+       st1 [r31]=r22
+       ;;  
+(p6)   movl r31=XSI_PSR_IC
        ;;
-(p7)   ld8 r31=[r31]
-       ;;
-(p7)   st1 [r31]=r22
+(p6)   st4.rel [r31]=r0
        ;;
        mov r31 = in3
        mov b7 = loc2
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S     Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S     Fri Jan 19 14:48:57 
2007 +0000
@@ -38,17 +38,16 @@ END(early_xen_setup)
 /* Stub for suspend.
    Just force the stacked registers to be written in memory.  */       
 GLOBAL_ENTRY(xencomm_arch_hypercall_suspend)
-       mov r15=r32
        ;; 
-       alloc r20=ar.pfs,0,0,0,0
+       alloc r20=ar.pfs,0,0,6,0
        mov r2=__HYPERVISOR_sched_op
        ;; 
        /* We don't want to deal with RSE.  */
        flushrs
-       mov r14=2 // SCHEDOP_shutdown
+       mov r33=r32
+       mov r32=2 // SCHEDOP_shutdown
        ;;
        break 0x1000
        ;; 
-       mov ar.pfs=r20
        br.ret.sptk.many b0
 END(xencomm_arch_hypercall_suspend)
diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/drivers/xen/Kconfig
--- a/linux-2.6-xen-sparse/drivers/xen/Kconfig  Thu Jan 18 15:18:07 2007 +0000
+++ b/linux-2.6-xen-sparse/drivers/xen/Kconfig  Fri Jan 19 14:48:57 2007 +0000
@@ -238,14 +238,6 @@ config XEN_COMPAT_030002
 
 endmenu
 
-config HAVE_ARCH_ALLOC_SKB
-       bool
-       default y
-
-config HAVE_ARCH_DEV_ALLOC_SKB
-       bool
-       default y
-
 config HAVE_IRQ_IGNORE_UNHANDLED
        bool
        default y
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/include/asm-ia64/hypercall.h
--- a/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h Fri Jan 19 14:48:57 
2007 +0000
@@ -39,6 +39,9 @@
 
 #include <asm/xen/xcom_hcall.h>
 struct xencomm_handle;
+extern unsigned long __hypercall(unsigned long a1, unsigned long a2,
+                                 unsigned long a3, unsigned long a4,
+                                 unsigned long a5, unsigned long cmd);
 
 /*
  * Assembler stubs for hyper-calls.
@@ -47,115 +50,58 @@ struct xencomm_handle;
 #define _hypercall0(type, name)                                        \
 ({                                                             \
        long __res;                                             \
-       __asm__ __volatile__ (";;\n"                            \
-                             "mov r2=%1\n"                     \
-                             "break 0x1000 ;;\n"               \
-                             "mov %0=r8 ;;\n"                  \
-                             : "=r" (__res)                    \
-                             : "J" (__HYPERVISOR_##name)       \
-                             : "r2","r8",                      \
-                               "memory" );                     \
+       __res=__hypercall(0, 0, 0, 0, 0, __HYPERVISOR_##name);  \
        (type)__res;                                            \
 })
 
 #define _hypercall1(type, name, a1)                            \
 ({                                                             \
        long __res;                                             \
-       __asm__ __volatile__ (";;\n"                            \
-                             "mov r14=%2\n"                    \
-                             "mov r2=%1\n"                     \
-                             "break 0x1000 ;;\n"               \
-                             "mov %0=r8 ;;\n"                  \
-                             : "=r" (__res)                    \
-                             : "J" (__HYPERVISOR_##name),      \
-                               "rI" ((unsigned long)(a1))      \
-                             : "r14","r2","r8",                \
-                               "memory" );                     \
+       __res = __hypercall((unsigned long)a1,                  \
+                            0, 0, 0, 0, __HYPERVISOR_##name);  \
        (type)__res;                                            \
 })
 
 #define _hypercall2(type, name, a1, a2)                                \
 ({                                                             \
        long __res;                                             \
-       __asm__ __volatile__ (";;\n"                            \
-                             "mov r14=%2\n"                    \
-                             "mov r15=%3\n"                    \
-                             "mov r2=%1\n"                     \
-                             "break 0x1000 ;;\n"               \
-                             "mov %0=r8 ;;\n"                  \
-                             : "=r" (__res)                    \
-                             : "J" (__HYPERVISOR_##name),      \
-                               "rI" ((unsigned long)(a1)),     \
-                               "rI" ((unsigned long)(a2))      \
-                             : "r14","r15","r2","r8",          \
-                               "memory" );                     \
+       __res = __hypercall((unsigned long)a1,                  \
+                           (unsigned long)a2,                  \
+                           0, 0, 0, __HYPERVISOR_##name);      \
        (type)__res;                                            \
 })
 
 #define _hypercall3(type, name, a1, a2, a3)                    \
 ({                                                             \
        long __res;                                             \
-       __asm__ __volatile__ (";;\n"                            \
-                             "mov r14=%2\n"                    \
-                             "mov r15=%3\n"                    \
-                             "mov r16=%4\n"                    \
-                             "mov r2=%1\n"                     \
-                             "break 0x1000 ;;\n"               \
-                             "mov %0=r8 ;;\n"                  \
-                             : "=r" (__res)                    \
-                             : "J" (__HYPERVISOR_##name),      \
-                               "rI" ((unsigned long)(a1)),     \
-                               "rI" ((unsigned long)(a2)),     \
-                               "rI" ((unsigned long)(a3))      \
-                             : "r14","r15","r16","r2","r8",    \
-                               "memory" );                     \
-       (type)__res;                                            \
+       __res = __hypercall((unsigned long)a1,                  \
+                           (unsigned long)a2,                  \
+                           (unsigned long)a3,                  \
+                           0, 0, __HYPERVISOR_##name);         \
+       (type)__res;                                            \
 })
 
 #define _hypercall4(type, name, a1, a2, a3, a4)                        \
 ({                                                             \
        long __res;                                             \
-       __asm__ __volatile__ (";;\n"                            \
-                             "mov r14=%2\n"                    \
-                             "mov r15=%3\n"                    \
-                             "mov r16=%4\n"                    \
-                             "mov r17=%5\n"                    \
-                             "mov r2=%1\n"                     \
-                             "break 0x1000 ;;\n"               \
-                             "mov %0=r8 ;;\n"                  \
-                             : "=r" (__res)                    \
-                             : "J" (__HYPERVISOR_##name),      \
-                               "rI" ((unsigned long)(a1)),     \
-                               "rI" ((unsigned long)(a2)),     \
-                               "rI" ((unsigned long)(a3)),     \
-                               "rI" ((unsigned long)(a4))      \
-                             : "r14","r15","r16","r2","r8",    \
-                               "r17","memory" );               \
-       (type)__res;                                            \
+       __res = __hypercall((unsigned long)a1,                  \
+                           (unsigned long)a2,                  \
+                           (unsigned long)a3,                  \
+                           (unsigned long)a4,                  \
+                           0, __HYPERVISOR_##name);            \
+       (type)__res;                                            \
 })
 
 #define _hypercall5(type, name, a1, a2, a3, a4, a5)            \
 ({                                                             \
        long __res;                                             \
-       __asm__ __volatile__ (";;\n"                            \
-                             "mov r14=%2\n"                    \
-                             "mov r15=%3\n"                    \
-                             "mov r16=%4\n"                    \
-                             "mov r17=%5\n"                    \
-                             "mov r18=%6\n"                    \
-                             "mov r2=%1\n"                     \
-                             "break 0x1000 ;;\n"               \
-                             "mov %0=r8 ;;\n"                  \
-                             : "=r" (__res)                    \
-                             : "J" (__HYPERVISOR_##name),      \
-                               "rI" ((unsigned long)(a1)),     \
-                               "rI" ((unsigned long)(a2)),     \
-                               "rI" ((unsigned long)(a3)),     \
-                               "rI" ((unsigned long)(a4)),     \
-                               "rI" ((unsigned long)(a5))      \
-                             : "r14","r15","r16","r2","r8",    \
-                               "r17","r18","memory" );         \
-       (type)__res;                                            \
+       __res = __hypercall((unsigned long)a1,                  \
+                           (unsigned long)a2,                  \
+                           (unsigned long)a3,                  \
+                           (unsigned long)a4,                  \
+                           (unsigned long)a5,                  \
+                           __HYPERVISOR_##name);               \
+       (type)__res;                                            \
 })
 
 
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h
--- a/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h        Thu Jan 18 
15:18:07 2007 +0000
+++ b/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h        Fri Jan 19 
14:48:57 2007 +0000
@@ -215,7 +215,8 @@ asmlinkage int xprintk(const char *fmt, 
 #endif /* CONFIG_XEN || CONFIG_VMX_GUEST */
 
 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
-#define is_initial_xendomain() (xen_start_info->flags & SIF_INITDOMAIN)
+#define is_initial_xendomain()                                         \
+       (is_running_on_xen() ? xen_start_info->flags & SIF_INITDOMAIN : 0)
 #else
 #define is_initial_xendomain() 0
 #endif
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/include/asm-ia64/maddr.h
--- a/linux-2.6-xen-sparse/include/asm-ia64/maddr.h     Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/include/asm-ia64/maddr.h     Fri Jan 19 14:48:57 
2007 +0000
@@ -68,7 +68,6 @@ static inline unsigned long
 static inline unsigned long
 mfn_to_local_pfn(unsigned long mfn)
 {
-       extern unsigned long max_mapnr;
        unsigned long pfn = mfn_to_pfn_for_dma(mfn);
        if (!pfn_valid(pfn))
                return INVALID_P2M_ENTRY;
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/include/asm-ia64/page.h
--- a/linux-2.6-xen-sparse/include/asm-ia64/page.h      Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/include/asm-ia64/page.h      Fri Jan 19 14:48:57 
2007 +0000
@@ -119,6 +119,7 @@ extern struct page *vmem_map;
 #endif
 
 #ifdef CONFIG_FLATMEM
+extern unsigned long max_mapnr;
 # define pfn_valid(pfn)                (((pfn) < max_mapnr) && 
ia64_pfn_valid(pfn))
 #elif defined(CONFIG_DISCONTIGMEM)
 extern unsigned long min_low_pfn;
diff -r 8475a4e0425e -r 3c8bb086025e 
linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h
--- a/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h        Thu Jan 18 
15:18:07 2007 +0000
+++ b/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h        Fri Jan 19 
14:48:57 2007 +0000
@@ -45,12 +45,10 @@
 #define        XEN_HYPER_GET_PMD               break HYPERPRIVOP_GET_PMD
 #define        XEN_HYPER_GET_EFLAG             break HYPERPRIVOP_GET_EFLAG
 #define        XEN_HYPER_SET_EFLAG             break HYPERPRIVOP_SET_EFLAG
-#define        XEN_HYPER_RSM_BE                break HYPERPRIVOP_RSM_BE
 #define        XEN_HYPER_GET_PSR               break HYPERPRIVOP_GET_PSR
 
 #define XSI_IFS                        (XSI_BASE + XSI_IFS_OFS)
 #define XSI_PRECOVER_IFS       (XSI_BASE + XSI_PRECOVER_IFS_OFS)
-#define XSI_INCOMPL_REGFR      (XSI_BASE + XSI_INCOMPL_REGFR_OFS)
 #define XSI_IFA                        (XSI_BASE + XSI_IFA_OFS)
 #define XSI_ISR                        (XSI_BASE + XSI_ISR_OFS)
 #define XSI_IIM                        (XSI_BASE + XSI_IIM_OFS)
@@ -123,8 +121,6 @@ extern void xen_set_eflag(unsigned long)
  * that we inline it */
 #define xen_hyper_ssm_i()                                              \
 ({                                                                     \
-       xen_set_virtual_psr_i(0);                                       \
-       xen_set_virtual_psr_ic(0);                                      \
        XEN_HYPER_SSM_I;                                                \
 })
 
@@ -139,8 +135,12 @@ extern void xen_set_eflag(unsigned long)
 #define xen_ssm_i()                                                    \
 ({                                                                     \
        int old = xen_get_virtual_psr_i();                              \
-       xen_set_virtual_psr_i(1);                                       \
-       if (!old && xen_get_virtual_pend()) xen_hyper_ssm_i();          \
+       if (!old) {                                                     \
+               if (xen_get_virtual_pend())                             \
+                       xen_hyper_ssm_i();                              \
+               else                                                    \
+                       xen_set_virtual_psr_i(1);                       \
+       }                                                               \
 })
 
 #define xen_ia64_intrin_local_irq_restore(x)                           \
@@ -182,6 +182,7 @@ extern void xen_set_eflag(unsigned long)
  * be properly handled by Xen, some are frequent enough that we use
  * hyperprivops for performance. */
 
+extern unsigned long xen_get_psr(void);
 extern unsigned long xen_get_ivr(void);
 extern unsigned long xen_get_tpr(void);
 extern void xen_set_itm(unsigned long);
@@ -201,6 +202,11 @@ extern void xen_ptcga(unsigned long addr
        __u64 ia64_intri_res;                                           \
                                                                        \
        switch(regnum) {                                                \
+       case _IA64_REG_PSR:                                             \
+               ia64_intri_res = (is_running_on_xen()) ?                        
\
+                       xen_get_psr() :                                 \
+                       __ia64_getreg(regnum);                          \
+               break;                                                  \
        case _IA64_REG_CR_IVR:                                          \
                ia64_intri_res = (is_running_on_xen()) ?                        
\
                        xen_get_ivr() :                                 \
diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/include/linux/skbuff.h
--- a/linux-2.6-xen-sparse/include/linux/skbuff.h       Thu Jan 18 15:18:07 
2007 +0000
+++ b/linux-2.6-xen-sparse/include/linux/skbuff.h       Fri Jan 19 14:48:57 
2007 +0000
@@ -353,8 +353,7 @@ static inline struct sk_buff *alloc_skb_
 
 extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
                                            unsigned int size,
-                                           gfp_t priority,
-                                           int fclone);
+                                           gfp_t priority);
 extern void           kfree_skbmem(struct sk_buff *skb);
 extern struct sk_buff *skb_clone(struct sk_buff *skb,
                                 gfp_t priority);
diff -r 8475a4e0425e -r 3c8bb086025e linux-2.6-xen-sparse/net/core/skbuff.c
--- a/linux-2.6-xen-sparse/net/core/skbuff.c    Thu Jan 18 15:18:07 2007 +0000
+++ b/linux-2.6-xen-sparse/net/core/skbuff.c    Fri Jan 19 14:48:57 2007 +0000
@@ -210,18 +210,14 @@ nodata:
  */
 struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
                                     unsigned int size,
-                                    gfp_t gfp_mask,
-                                    int fclone)
-{
-       kmem_cache_t *cache;
-       struct skb_shared_info *shinfo;
+                                    gfp_t gfp_mask)
+{
        struct sk_buff *skb;
        u8 *data;
 
-       cache = fclone ? skbuff_fclone_cache : skbuff_head_cache;
-
        /* Get the HEAD */
-       skb = kmem_cache_alloc(cache, gfp_mask & ~__GFP_DMA);
+       skb = kmem_cache_alloc(skbuff_head_cache,
+                              gfp_mask & ~__GFP_DMA);
        if (!skb)
                goto out;
 
@@ -238,29 +234,18 @@ struct sk_buff *alloc_skb_from_cache(kme
        skb->data = data;
        skb->tail = data;
        skb->end  = data + size;
-       /* make sure we initialize shinfo sequentially */
-       shinfo = skb_shinfo(skb);
-       atomic_set(&shinfo->dataref, 1);
-       shinfo->nr_frags  = 0;
-       shinfo->gso_size = 0;
-       shinfo->gso_segs = 0;
-       shinfo->gso_type = 0;
-       shinfo->ip6_frag_id = 0;
-       shinfo->frag_list = NULL;
-
-       if (fclone) {
-               struct sk_buff *child = skb + 1;
-               atomic_t *fclone_ref = (atomic_t *) (child + 1);
-
-               skb->fclone = SKB_FCLONE_ORIG;
-               atomic_set(fclone_ref, 1);
-
-               child->fclone = SKB_FCLONE_UNAVAILABLE;
-       }
+
+       atomic_set(&(skb_shinfo(skb)->dataref), 1);
+       skb_shinfo(skb)->nr_frags  = 0;
+       skb_shinfo(skb)->gso_size = 0;
+       skb_shinfo(skb)->gso_segs = 0;
+       skb_shinfo(skb)->gso_type = 0;
+       skb_shinfo(skb)->ip6_frag_id = 0;
+       skb_shinfo(skb)->frag_list = NULL;
 out:
        return skb;
 nodata:
-       kmem_cache_free(cache, skb);
+       kmem_cache_free(skbuff_head_cache, skb);
        skb = NULL;
        goto out;
 }
diff -r 8475a4e0425e -r 3c8bb086025e tools/ioemu/hw/cirrus_vga.c
--- a/tools/ioemu/hw/cirrus_vga.c       Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/ioemu/hw/cirrus_vga.c       Fri Jan 19 14:48:57 2007 +0000
@@ -3010,11 +3010,44 @@ static CPUWriteMemoryFunc *cirrus_mmio_w
     cirrus_mmio_writel,
 };
 
+void cirrus_stop_acc(CirrusVGAState *s)
+{
+    if (s->map_addr){
+        int error;
+        s->map_addr = 0;
+        error = unset_vram_mapping(s->cirrus_lfb_addr,
+                s->cirrus_lfb_end);
+        fprintf(stderr, "cirrus_stop_acc:unset_vram_mapping.\n");
+
+        munmap(s->vram_ptr, VGA_RAM_SIZE);
+    }
+}
+
+void cirrus_restart_acc(CirrusVGAState *s)
+{
+    if (s->cirrus_lfb_addr && s->cirrus_lfb_end) {
+        void *vram_pointer, *old_vram;
+        fprintf(stderr, "cirrus_vga_load:re-enable vga acc.lfb_addr=0x%lx, 
lfb_end=0x%lx.\n",
+                s->cirrus_lfb_addr, s->cirrus_lfb_end);
+        vram_pointer = set_vram_mapping(s->cirrus_lfb_addr ,s->cirrus_lfb_end);
+        if (!vram_pointer){
+            fprintf(stderr, "cirrus_vga_load:NULL vram_pointer\n");
+        } else {
+            old_vram = vga_update_vram((VGAState *)s, vram_pointer,
+                    VGA_RAM_SIZE);
+            qemu_free(old_vram);
+            s->map_addr = s->cirrus_lfb_addr;
+            s->map_end = s->cirrus_lfb_end;
+        }
+    }
+}
+
 /* load/save state */
 
 static void cirrus_vga_save(QEMUFile *f, void *opaque)
 {
     CirrusVGAState *s = opaque;
+    uint8_t vga_acc;
 
     qemu_put_be32s(f, &s->latch);
     qemu_put_8s(f, &s->sr_index);
@@ -3049,11 +3082,20 @@ static void cirrus_vga_save(QEMUFile *f,
     qemu_put_be32s(f, &s->hw_cursor_y);
     /* XXX: we do not save the bitblt state - we assume we do not save
        the state when the blitter is active */
+
+    vga_acc = (!!s->map_addr);
+    qemu_put_8s(f, &vga_acc);
+    qemu_put_be64s(f, (uint64_t*)&s->cirrus_lfb_addr);
+    qemu_put_be64s(f, (uint64_t*)&s->cirrus_lfb_end);
+    qemu_put_buffer(f, s->vram_ptr, VGA_RAM_SIZE); 
+    if (vga_acc)
+        cirrus_stop_acc(s);
 }
 
 static int cirrus_vga_load(QEMUFile *f, void *opaque, int version_id)
 {
     CirrusVGAState *s = opaque;
+    uint8_t vga_acc = 0;
 
     if (version_id != 1)
         return -EINVAL;
@@ -3091,6 +3133,14 @@ static int cirrus_vga_load(QEMUFile *f, 
 
     qemu_get_be32s(f, &s->hw_cursor_x);
     qemu_get_be32s(f, &s->hw_cursor_y);
+
+    qemu_get_8s(f, &vga_acc);
+    qemu_get_be64s(f, (uint64_t*)&s->cirrus_lfb_addr);
+    qemu_get_be64s(f, (uint64_t*)&s->cirrus_lfb_end);
+    qemu_get_buffer(f, s->vram_ptr, VGA_RAM_SIZE); 
+    if (vga_acc){
+        cirrus_restart_acc(s);
+    }
 
     /* force refresh */
     s->graphic_mode = -1;
@@ -3297,6 +3347,8 @@ void pci_cirrus_vga_init(PCIBus *bus, Di
                     ds, vga_ram_base, vga_ram_offset, vga_ram_size);
     cirrus_init_common(s, device_id, 1);
 
+    register_savevm("cirrus_vga_pci", 0, 1, generic_pci_save, 
generic_pci_load, d);
+
     /* setup memory space */
     /* memory #0 LFB */
     /* memory #1 memory-mapped I/O */
diff -r 8475a4e0425e -r 3c8bb086025e tools/ioemu/hw/ide.c
--- a/tools/ioemu/hw/ide.c      Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/ioemu/hw/ide.c      Fri Jan 19 14:48:57 2007 +0000
@@ -2512,6 +2512,9 @@ void pci_piix3_ide_init(PCIBus *bus, Blo
               pic_set_irq_new, isa_pic, 15);
     ide_init_ioport(&d->ide_if[0], 0x1f0, 0x3f6);
     ide_init_ioport(&d->ide_if[2], 0x170, 0x376);
+
+    register_savevm("ide_pci", 0, 1, generic_pci_save, generic_pci_load, d);
+
 #ifdef DMA_MULTI_THREAD    
     dma_create_thread();
 #endif //DMA_MULTI_THREAD    
diff -r 8475a4e0425e -r 3c8bb086025e tools/ioemu/hw/pci.c
--- a/tools/ioemu/hw/pci.c      Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/ioemu/hw/pci.c      Fri Jan 19 14:48:57 2007 +0000
@@ -40,6 +40,8 @@ static int pci_irq_index;
 static int pci_irq_index;
 static PCIBus *first_bus;
 
+static void pci_update_mappings(PCIDevice *d);
+
 PCIBus *pci_register_bus(pci_set_irq_fn set_irq, void *pic, int devfn_min)
 {
     PCIBus *bus;
@@ -71,6 +73,7 @@ int generic_pci_load(QEMUFile* f, void *
         return -EINVAL;
 
     qemu_get_buffer(f, s->config, 256);
+    pci_update_mappings(s);
     return 0;
 }
 
diff -r 8475a4e0425e -r 3c8bb086025e tools/ioemu/target-i386-dm/helper2.c
--- a/tools/ioemu/target-i386-dm/helper2.c      Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/ioemu/target-i386-dm/helper2.c      Fri Jan 19 14:48:57 2007 +0000
@@ -546,6 +546,7 @@ int main_loop(void)
 {
     extern int vm_running;
     extern int shutdown_requested;
+    extern int suspend_requested;
     CPUState *env = cpu_single_env;
     int evtchn_fd = xc_evtchn_fd(xce_handle);
 
@@ -563,12 +564,24 @@ int main_loop(void)
                 qemu_system_reset();
                 reset_requested = 0;
             }
+            if (suspend_requested) {
+                fprintf(logfile, "device model received suspend signal!\n");
+                break;
+            }
         }
 
         /* Wait up to 10 msec. */
         main_loop_wait(10);
     }
-    destroy_hvm_domain();
+    if (!suspend_requested)
+        destroy_hvm_domain();
+    else {
+        char qemu_file[20];
+        sprintf(qemu_file, "/tmp/xen.qemu-dm.%d", domid);
+        if (qemu_savevm(qemu_file) < 0)
+            fprintf(stderr, "qemu save fail.\n");
+    }
+
     return 0;
 }
 
diff -r 8475a4e0425e -r 3c8bb086025e tools/ioemu/vl.c
--- a/tools/ioemu/vl.c  Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/ioemu/vl.c  Fri Jan 19 14:48:57 2007 +0000
@@ -4441,6 +4441,11 @@ int qemu_loadvm(const char *filename)
         qemu_fseek(f, cur_pos + record_len, SEEK_SET);
     }
     fclose(f);
+
+    /* del tmp file */
+    if (unlink(filename) == -1)
+        fprintf(stderr, "delete tmp qemu state file failed.\n");
+
     ret = 0;
  the_end:
     if (saved_vm_running)
@@ -5027,6 +5032,7 @@ static QEMUResetEntry *first_reset_entry
 static QEMUResetEntry *first_reset_entry;
 int reset_requested;
 int shutdown_requested;
+int suspend_requested;
 static int powerdown_requested;
 
 void qemu_register_reset(QEMUResetHandler *func, void *opaque)
@@ -5806,6 +5812,14 @@ int set_mm_mapping(int xc_handle, uint32
     }
 
     return 0;
+}
+
+void suspend(int sig)
+{
+   fprintf(logfile, "suspend sig handler called with requested=%d!\n", 
suspend_requested);
+    if (sig != SIGUSR1)
+        fprintf(logfile, "suspend signal dismatch, get sig=%d!\n", sig);
+    suspend_requested = 1;
 }
 
 #if defined(__i386__) || defined(__x86_64__)
@@ -6464,10 +6478,6 @@ int main(int argc, char **argv)
     }
 
 #if defined (__ia64__)
-    /* ram_size passed from xend has added on GFW memory,
-       so we must subtract it here */
-    ram_size -= 16 * MEM_M;
-
     if (ram_size > MMIO_START)
         ram_size += 1 * MEM_G; /* skip 3G-4G MMIO, LEGACY_IO_SPACE etc. */
 #endif
@@ -6718,6 +6728,26 @@ int main(int argc, char **argv)
             vm_start();
         }
     }
+
+    /* register signal for the suspend request when save */
+    {
+        struct sigaction act;
+        sigset_t set;
+        act.sa_handler = suspend;
+        act.sa_flags = SA_RESTART;
+        sigemptyset(&act.sa_mask);
+
+        sigaction(SIGUSR1, &act, NULL);
+
+        /* control panel mask some signals when spawn qemu, need unmask here*/
+        sigemptyset(&set);
+        sigaddset(&set, SIGUSR1);
+        sigaddset(&set, SIGTERM);
+        if (sigprocmask(SIG_UNBLOCK, &set, NULL) == -1)
+            fprintf(stderr, "unblock signal fail, possible issue for HVM 
save!\n");
+
+    }
+
     main_loop();
     quit_timers();
     return 0;
diff -r 8475a4e0425e -r 3c8bb086025e tools/libxc/Makefile
--- a/tools/libxc/Makefile      Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/libxc/Makefile      Fri Jan 19 14:48:57 2007 +0000
@@ -27,7 +27,7 @@ GUEST_SRCS-$(CONFIG_X86) += xc_linux_bui
 GUEST_SRCS-$(CONFIG_X86) += xc_linux_build.c
 GUEST_SRCS-$(CONFIG_IA64) += xc_linux_build.c
 GUEST_SRCS-$(CONFIG_MIGRATE) += xc_linux_restore.c xc_linux_save.c
-GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c
+GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c xc_hvm_restore.c xc_hvm_save.c
 
 -include $(XEN_TARGET_ARCH)/Makefile
 
diff -r 8475a4e0425e -r 3c8bb086025e tools/libxc/ia64/xc_ia64_hvm_build.c
--- a/tools/libxc/ia64/xc_ia64_hvm_build.c      Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/libxc/ia64/xc_ia64_hvm_build.c      Fri Jan 19 14:48:57 2007 +0000
@@ -569,18 +569,13 @@ setup_guest(int xc_handle, uint32_t dom,
     xen_pfn_t *pfn_list;
     shared_iopage_t *sp;
     void *ioreq_buffer_page;
-    // memsize equal to normal memory size(in configure file) + 16M
-    // dom_memsize will pass to xc_ia64_build_hob(), so must be subbed 16M 
-    unsigned long dom_memsize = ((memsize - 16) << 20);
-    unsigned long nr_pages = (unsigned long)memsize << (20 - PAGE_SHIFT);
-    unsigned long normal_pages = nr_pages - GFW_PAGES;
+    unsigned long dom_memsize = memsize << 20;
+    unsigned long nr_pages = memsize << (20 - PAGE_SHIFT);
     unsigned long vcpus;
     int rc;
-    long i, j;
+    long i;
     DECLARE_DOMCTL;
 
-    // ROM size for guest firmware, ioreq page and xenstore page
-    nr_pages += 3; 
 
     if ((image_size > 12 * MEM_M) || (image_size & (PAGE_SIZE - 1))) {
         PERROR("Guest firmware size is incorrect [%ld]?", image_size);
@@ -598,20 +593,20 @@ setup_guest(int xc_handle, uint32_t dom,
         pfn_list[i] = i;
 
     // If normal memory > 3G. Reserve 3G ~ 4G for MMIO, GFW and others.
-    for (j = (MMIO_START >> PAGE_SHIFT); j < (dom_memsize >> PAGE_SHIFT); j++)
-        pfn_list[j] += ((1 * MEM_G) >> PAGE_SHIFT);
+    for (i = (MMIO_START >> PAGE_SHIFT); i < (dom_memsize >> PAGE_SHIFT); i++)
+        pfn_list[i] += ((1 * MEM_G) >> PAGE_SHIFT);
 
     // Allocate memory for VTI guest, up to VGA hole from 0xA0000-0xC0000. 
     rc = xc_domain_memory_populate_physmap(xc_handle, dom,
-                                           (normal_pages > VGA_START_PAGE) ?
-                                           VGA_START_PAGE : normal_pages,
+                                           (nr_pages > VGA_START_PAGE) ?
+                                           VGA_START_PAGE : nr_pages,
                                            0, 0, &pfn_list[0]);
 
     // We're not likely to attempt to create a domain with less than
     // 640k of memory, but test for completeness
     if (rc == 0 && nr_pages > VGA_END_PAGE)
         rc = xc_domain_memory_populate_physmap(xc_handle, dom,
-                                               normal_pages - VGA_END_PAGE,
+                                               nr_pages - VGA_END_PAGE,
                                                0, 0, &pfn_list[VGA_END_PAGE]);
     if (rc != 0) {
         PERROR("Could not allocate normal memory for Vti guest.\n");
@@ -621,24 +616,22 @@ setup_guest(int xc_handle, uint32_t dom,
     // We allocate additional pfn for GFW and other three pages, so
     // the pfn_list is not contiguous.  Due to this we must support
     // old interface xc_ia64_get_pfn_list().
-    // Here i = (dom_memsize >> PAGE_SHIFT)
-    for (j = 0; i < nr_pages - 3; i++, j++) 
-        pfn_list[i] = (GFW_START >> PAGE_SHIFT) + j;
+    for (i = 0; i < GFW_PAGES; i++) 
+        pfn_list[i] = (GFW_START >> PAGE_SHIFT) + i;
 
     rc = xc_domain_memory_populate_physmap(xc_handle, dom, GFW_PAGES,
-                                           0, 0, &pfn_list[normal_pages]);
+                                           0, 0, &pfn_list[0]);
     if (rc != 0) {
         PERROR("Could not allocate GFW memory for Vti guest.\n");
         goto error_out;
     }
 
-    // Here i = (dom_memsize >> PAGE_SHIFT) + GFW_PAGES
-    pfn_list[i] = IO_PAGE_START >> PAGE_SHIFT;
-    pfn_list[i+1] = STORE_PAGE_START >> PAGE_SHIFT;
-    pfn_list[i+2] = BUFFER_IO_PAGE_START >> PAGE_SHIFT; 
+    pfn_list[0] = IO_PAGE_START >> PAGE_SHIFT;
+    pfn_list[1] = STORE_PAGE_START >> PAGE_SHIFT;
+    pfn_list[2] = BUFFER_IO_PAGE_START >> PAGE_SHIFT; 
 
     rc = xc_domain_memory_populate_physmap(xc_handle, dom, 3,
-                                           0, 0, &pfn_list[nr_pages - 3]);
+                                           0, 0, &pfn_list[0]);
     if (rc != 0) {
         PERROR("Could not allocate IO page or store page or buffer io 
page.\n");
         goto error_out;
@@ -675,13 +668,12 @@ setup_guest(int xc_handle, uint32_t dom,
         goto error_out;
     }
 
-    xc_set_hvm_param(xc_handle, dom,
-                     HVM_PARAM_STORE_PFN, pfn_list[nr_pages - 2]);
+    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, pfn_list[1]);
 
     // Retrieve special pages like io, xenstore, etc. 
     sp = (shared_iopage_t *)xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
                                                  PROT_READ | PROT_WRITE,
-                                                 pfn_list[nr_pages - 3]);
+                                                 pfn_list[0]);
     if (sp == 0)
         goto error_out;
 
@@ -689,7 +681,7 @@ setup_guest(int xc_handle, uint32_t dom,
     munmap(sp, PAGE_SIZE);
     ioreq_buffer_page = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
                                              PROT_READ | PROT_WRITE,
-                                             pfn_list[nr_pages - 1]); 
+                                             pfn_list[2]); 
     memset(ioreq_buffer_page,0,PAGE_SIZE);
     munmap(ioreq_buffer_page, PAGE_SIZE);
     free(pfn_list);
diff -r 8475a4e0425e -r 3c8bb086025e tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c   Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/libxc/xc_domain.c   Fri Jan 19 14:48:57 2007 +0000
@@ -233,6 +233,50 @@ int xc_domain_getinfolist(int xc_handle,
     unlock_pages(info, max_domains*sizeof(xc_domaininfo_t));
 
     return ret;
+}
+
+/* get info from hvm guest for save */
+int xc_domain_hvm_getcontext(int xc_handle,
+                             uint32_t domid,
+                             hvm_domain_context_t *hvm_ctxt)
+{
+    int rc;
+    DECLARE_DOMCTL;
+
+    domctl.cmd = XEN_DOMCTL_gethvmcontext;
+    domctl.domain = (domid_t)domid;
+    set_xen_guest_handle(domctl.u.hvmcontext.ctxt, hvm_ctxt);
+
+    if ( (rc = mlock(hvm_ctxt, sizeof(*hvm_ctxt))) != 0 )
+        return rc;
+
+    rc = do_domctl(xc_handle, &domctl);
+
+    safe_munlock(hvm_ctxt, sizeof(*hvm_ctxt));
+
+    return rc;
+}
+
+/* set info to hvm guest for restore */
+int xc_domain_hvm_setcontext(int xc_handle,
+                             uint32_t domid,
+                             hvm_domain_context_t *hvm_ctxt)
+{
+    int rc;
+    DECLARE_DOMCTL;
+
+    domctl.cmd = XEN_DOMCTL_sethvmcontext;
+    domctl.domain = domid;
+    set_xen_guest_handle(domctl.u.hvmcontext.ctxt, hvm_ctxt);
+
+    if ( (rc = mlock(hvm_ctxt, sizeof(*hvm_ctxt))) != 0 )
+        return rc;
+
+    rc = do_domctl(xc_handle, &domctl);
+
+    safe_munlock(hvm_ctxt, sizeof(*hvm_ctxt));
+
+    return rc;
 }
 
 int xc_vcpu_getcontext(int xc_handle,
diff -r 8475a4e0425e -r 3c8bb086025e tools/libxc/xc_hvm_restore.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_hvm_restore.c      Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,360 @@
+/******************************************************************************
+ * xc_hvm_restore.c
+ *
+ * Restore the state of a HVM guest.
+ *
+ * Copyright (c) 2003, K A Fraser.
+ * Copyright (c) 2006 Intel Corperation
+ * rewriten for hvm guest by Zhai Edwin <edwin.zhai@xxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "xg_private.h"
+#include "xg_save_restore.h"
+
+#include <xen/hvm/ioreq.h>
+#include <xen/hvm/params.h>
+#include <xen/hvm/e820.h>
+
+/* max mfn of the whole machine */
+static unsigned long max_mfn;
+
+/* virtual starting address of the hypervisor */
+static unsigned long hvirt_start;
+
+/* #levels of page tables used by the currrent guest */
+static unsigned int pt_levels;
+
+/* total number of pages used by the current guest */
+static unsigned long max_pfn;
+
+/* A table mapping each PFN to its new MFN. */
+static xen_pfn_t *p2m = NULL;
+
+static ssize_t
+read_exact(int fd, void *buf, size_t count)
+{
+    int r = 0, s;
+    unsigned char *b = buf;
+
+    while (r < count) {
+        s = read(fd, &b[r], count - r);
+        if ((s == -1) && (errno == EINTR))
+            continue;
+        if (s <= 0) {
+            break;
+        }
+        r += s;
+    }
+
+    return (r == count) ? 1 : 0;
+}
+
+int xc_hvm_restore(int xc_handle, int io_fd,
+                     uint32_t dom, unsigned long nr_pfns,
+                     unsigned int store_evtchn, unsigned long *store_mfn,
+                     unsigned int console_evtchn, unsigned long *console_mfn,
+                     unsigned int pae, unsigned int apic)
+{
+    DECLARE_DOMCTL;
+
+    /* The new domain's shared-info frame number. */
+    unsigned long shared_info_frame;
+
+    /* A copy of the CPU context of the guest. */
+    vcpu_guest_context_t ctxt;
+
+    char *region_base;
+
+    unsigned long buf[PAGE_SIZE/sizeof(unsigned long)];
+
+    xc_dominfo_t info;
+    unsigned int rc = 1, n, i;
+    uint32_t rec_len, nr_vcpus;
+    hvm_domain_context_t hvm_ctxt;
+    unsigned long long v_end, memsize;
+    unsigned long shared_page_nr;
+
+    unsigned long mfn, pfn;
+    unsigned int prev_pc, this_pc;
+    int verify = 0;
+
+    /* Types of the pfns in the current region */
+    unsigned long region_pfn_type[MAX_BATCH_SIZE];
+
+    /* hvm guest mem size (Mb) */
+    memsize = (unsigned long long)*store_mfn;
+    v_end = memsize << 20;
+
+    DPRINTF("xc_hvm_restore:dom=%d, nr_pfns=0x%lx, store_evtchn=%d, 
*store_mfn=%ld, console_evtchn=%d, *console_mfn=%ld, pae=%u, apic=%u.\n", 
+            dom, nr_pfns, store_evtchn, *store_mfn, console_evtchn, 
*console_mfn, pae, apic);
+
+    max_pfn = nr_pfns;
+
+    if(!get_platform_info(xc_handle, dom,
+                          &max_mfn, &hvirt_start, &pt_levels)) {
+        ERROR("Unable to get platform info.");
+        return 1;
+    }
+
+    DPRINTF("xc_hvm_restore start: max_pfn = %lx, max_mfn = %lx, 
hvirt_start=%lx, pt_levels=%d\n",
+            max_pfn,
+            max_mfn,
+            hvirt_start,
+            pt_levels);
+
+    if (mlock(&ctxt, sizeof(ctxt))) {
+        /* needed for build dom0 op, but might as well do early */
+        ERROR("Unable to mlock ctxt");
+        return 1;
+    }
+
+
+    p2m        = malloc(max_pfn * sizeof(xen_pfn_t));
+
+    if (p2m == NULL) {
+        ERROR("memory alloc failed");
+        errno = ENOMEM;
+        goto out;
+    }
+
+    /* Get the domain's shared-info frame. */
+    domctl.cmd = XEN_DOMCTL_getdomaininfo;
+    domctl.domain = (domid_t)dom;
+    if (xc_domctl(xc_handle, &domctl) < 0) {
+        ERROR("Could not get information on new domain");
+        goto out;
+    }
+    shared_info_frame = domctl.u.getdomaininfo.shared_info_frame;
+
+    if(xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_pfn)) != 0) {
+        errno = ENOMEM;
+        goto out;
+    }
+
+    for ( i = 0; i < max_pfn; i++ )
+        p2m[i] = i;
+    for ( i = HVM_BELOW_4G_RAM_END >> PAGE_SHIFT; i < max_pfn; i++ )
+        p2m[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
+
+    /* Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000. */
+    rc = xc_domain_memory_populate_physmap(
+        xc_handle, dom, (max_pfn > 0xa0) ? 0xa0 : max_pfn,
+        0, 0, &p2m[0x00]);
+    if ( (rc == 0) && (max_pfn > 0xc0) )
+        rc = xc_domain_memory_populate_physmap(
+            xc_handle, dom, max_pfn - 0xc0, 0, 0, &p2m[0xc0]);
+    if ( rc != 0 )
+    {
+        PERROR("Could not allocate memory for HVM guest.\n");
+        goto out;
+    }
+
+
+    /**********XXXXXXXXXXXXXXXX******************/
+    if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) {
+        ERROR("Could not get domain info");
+        return 1;
+    }
+
+    domctl.cmd = XEN_DOMCTL_getdomaininfo;
+    domctl.domain = (domid_t)dom;
+    if (xc_domctl(xc_handle, &domctl) < 0) {
+        ERROR("Could not get information on new domain");
+        goto out;
+    }
+
+    for ( i = 0; i < max_pfn; i++)
+        p2m[i] = i;
+
+    prev_pc = 0;
+
+    n = 0;
+    while (1) {
+
+        int j;
+
+        this_pc = (n * 100) / max_pfn;
+        if ( (this_pc - prev_pc) >= 5 )
+        {
+            PPRINTF("\b\b\b\b%3d%%", this_pc);
+            prev_pc = this_pc;
+        }
+
+        if (!read_exact(io_fd, &j, sizeof(int))) {
+            ERROR("HVM restore Error when reading batch size");
+            goto out;
+        }
+
+        PPRINTF("batch %d\n",j);
+
+        if (j == -1) {
+            verify = 1;
+            DPRINTF("Entering page verify mode\n");
+            continue;
+        }
+
+        if (j == 0)
+            break;  /* our work here is done */
+
+        if (j > MAX_BATCH_SIZE) {
+            ERROR("Max batch size exceeded. Giving up.");
+            goto out;
+        }
+
+        if (!read_exact(io_fd, region_pfn_type, j*sizeof(unsigned long))) {
+            ERROR("Error when reading region pfn types");
+            goto out;
+        }
+
+        region_base = xc_map_foreign_batch(
+            xc_handle, dom, PROT_WRITE, region_pfn_type, j);
+
+        for ( i = 0; i < j; i++ )
+        {
+            void *page;
+
+            pfn = region_pfn_type[i];
+            if ( pfn > max_pfn )
+            {
+                ERROR("pfn out of range");
+                goto out;
+            }
+
+            if ( pfn >= 0xa0 && pfn < 0xc0) {
+                ERROR("hvm restore:pfn in vga hole");
+                goto out;
+            }
+
+
+            mfn = p2m[pfn];
+
+            /* In verify mode, we use a copy; otherwise we work in place */
+            page = verify ? (void *)buf : (region_base + i*PAGE_SIZE);
+
+            if (!read_exact(io_fd, page, PAGE_SIZE)) {
+                ERROR("Error when reading page (%x)", i);
+                goto out;
+            }
+
+            if (verify) {
+
+                int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE);
+
+                if (res) {
+
+                    int v;
+
+                    DPRINTF("************** pfn=%lx mfn=%lx gotcs=%08lx "
+                            "actualcs=%08lx\n", pfn, p2m[pfn],
+                            csum_page(region_base + i*PAGE_SIZE),
+                            csum_page(buf));
+
+                    for (v = 0; v < 4; v++) {
+
+                        unsigned long *p = (unsigned long *)
+                            (region_base + i*PAGE_SIZE);
+                        if (buf[v] != p[v])
+                            DPRINTF("    %d: %08lx %08lx\n", v, buf[v], p[v]);
+                    }
+                }
+            }
+
+        } /* end of 'batch' for loop */
+        munmap(region_base, j*PAGE_SIZE);
+        n+= j; /* crude stats */
+
+    }/*while 1*/
+    
+/*    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_APIC_ENABLED, apic);*/
+    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae);
+    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn);
+
+    if ( v_end > HVM_BELOW_4G_RAM_END )
+        shared_page_nr = (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) - 1;
+    else
+        shared_page_nr = (v_end >> PAGE_SHIFT) - 1;
+
+    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, shared_page_nr-1);
+    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, shared_page_nr-2);
+    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, shared_page_nr);
+
+    /* caculate the store_mfn , wrong val cause hang when introduceDomain */
+    *store_mfn = (v_end >> PAGE_SHIFT) - 2;
+    DPRINTF("hvm restore:calculate new store_mfn=0x%lx,v_end=0x%llx..\n", 
*store_mfn, v_end);
+
+    /* restore hvm context including pic/pit/shpage */
+    if (!read_exact(io_fd, &rec_len, sizeof(uint32_t))) {
+        ERROR("error read hvm context size!\n");
+        goto out;
+    }
+    if (rec_len != sizeof(hvm_ctxt)) {
+        ERROR("hvm context size dismatch!\n");
+        goto out;
+    }
+
+    if (!read_exact(io_fd, &hvm_ctxt, sizeof(hvm_ctxt))) {
+        ERROR("error read hvm context!\n");
+        goto out;
+    }
+
+    if (( rc = xc_domain_hvm_setcontext(xc_handle, dom, &hvm_ctxt))) {
+        ERROR("error set hvm context!\n");
+        goto out;
+    }
+
+    if (!read_exact(io_fd, &nr_vcpus, sizeof(uint32_t))) {
+        ERROR("error read nr vcpu !\n");
+        goto out;
+    }
+    DPRINTF("hvm restore:get nr_vcpus=%d.\n", nr_vcpus);
+
+    for (i =0; i < nr_vcpus; i++) {
+        if (!read_exact(io_fd, &rec_len, sizeof(uint32_t))) {
+            ERROR("error read vcpu context size!\n");
+            goto out;
+        }
+        if (rec_len != sizeof(ctxt)) {
+            ERROR("vcpu context size dismatch!\n");
+            goto out;
+        }
+
+        if (!read_exact(io_fd, &(ctxt), sizeof(ctxt))) {
+            ERROR("error read vcpu context.\n");
+            goto out;
+        }
+
+        if ( (rc = xc_vcpu_setcontext(xc_handle, dom, i, &ctxt)) ) {
+            ERROR("Could not set vcpu context, rc=%d", rc);
+            goto out;
+        }
+    }
+
+    rc = 0;
+    goto out;
+
+ out:
+    if ( (rc != 0) && (dom != 0) )
+        xc_domain_destroy(xc_handle, dom);
+    free(p2m);
+
+    DPRINTF("Restore exit with rc=%d\n", rc);
+
+    return rc;
+}
diff -r 8475a4e0425e -r 3c8bb086025e tools/libxc/xc_hvm_save.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_hvm_save.c Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,727 @@
+/******************************************************************************
+ * xc_hvm_save.c
+ *
+ * Save the state of a running HVM guest.
+ *
+ * Copyright (c) 2003, K A Fraser.
+ * Copyright (c) 2006 Intel Corperation
+ * rewriten for hvm guest by Zhai Edwin <edwin.zhai@xxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <inttypes.h>
+#include <time.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/time.h>
+
+#include "xc_private.h"
+#include "xg_private.h"
+#include "xg_save_restore.h"
+
+/*
+** Default values for important tuning parameters. Can override by passing
+** non-zero replacement values to xc_hvm_save().
+**
+** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too.
+**
+*/
+#define DEF_MAX_ITERS   29   /* limit us to 30 times round loop   */
+#define DEF_MAX_FACTOR   3   /* never send more than 3x nr_pfns   */
+
+/* max mfn of the whole machine */
+static unsigned long max_mfn;
+
+/* virtual starting address of the hypervisor */
+static unsigned long hvirt_start;
+
+/* #levels of page tables used by the currrent guest */
+static unsigned int pt_levels;
+
+/* total number of pages used by the current guest */
+static unsigned long max_pfn;
+
+/*
+** During (live) save/migrate, we maintain a number of bitmaps to track
+** which pages we have to send, to fixup, and to skip.
+*/
+
+#define BITS_PER_LONG (sizeof(unsigned long) * 8)
+#define BITMAP_SIZE   ((max_pfn + BITS_PER_LONG - 1) / 8)
+
+#define BITMAP_ENTRY(_nr,_bmap) \
+   ((unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG]
+
+#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG)
+
+static inline int test_bit (int nr, volatile void * addr)
+{
+    return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1;
+}
+
+static inline void clear_bit (int nr, volatile void * addr)
+{
+    BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr));
+}
+
+static inline int permute( int i, int nr, int order_nr  )
+{
+    /* Need a simple permutation function so that we scan pages in a
+       pseudo random order, enabling us to get a better estimate of
+       the domain's page dirtying rate as we go (there are often
+       contiguous ranges of pfns that have similar behaviour, and we
+       want to mix them up. */
+
+    /* e.g. nr->oder 15->4 16->4 17->5 */
+    /* 512MB domain, 128k pages, order 17 */
+
+    /*
+      QPONMLKJIHGFEDCBA
+             QPONMLKJIH
+      GFEDCBA
+     */
+
+    /*
+      QPONMLKJIHGFEDCBA
+                  EDCBA
+             QPONM
+      LKJIHGF
+      */
+
+    do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); }
+    while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */
+
+    return i;
+}
+
+static uint64_t tv_to_us(struct timeval *new)
+{
+    return (new->tv_sec * 1000000) + new->tv_usec;
+}
+
+static uint64_t llgettimeofday(void)
+{
+    struct timeval now;
+    gettimeofday(&now, NULL);
+    return tv_to_us(&now);
+}
+
+static uint64_t tv_delta(struct timeval *new, struct timeval *old)
+{
+    return ((new->tv_sec - old->tv_sec)*1000000 ) +
+        (new->tv_usec - old->tv_usec);
+}
+
+
+#define RATE_IS_MAX() (0)
+#define ratewrite(_io_fd, _buf, _n) write((_io_fd), (_buf), (_n))
+#define initialize_mbit_rate()
+
+static inline ssize_t write_exact(int fd, void *buf, size_t count)
+{
+    if(write(fd, buf, count) != count)
+        return 0;
+    return 1;
+}
+
+static int print_stats(int xc_handle, uint32_t domid, int pages_sent,
+                       xc_shadow_op_stats_t *stats, int print)
+{
+    static struct timeval wall_last;
+    static long long      d0_cpu_last;
+    static long long      d1_cpu_last;
+
+    struct timeval        wall_now;
+    long long             wall_delta;
+    long long             d0_cpu_now, d0_cpu_delta;
+    long long             d1_cpu_now, d1_cpu_delta;
+
+    gettimeofday(&wall_now, NULL);
+
+    d0_cpu_now = xc_domain_get_cpu_usage(xc_handle, 0, /* FIXME */ 0)/1000;
+    d1_cpu_now = xc_domain_get_cpu_usage(xc_handle, domid, /* FIXME */ 0)/1000;
+
+    if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) )
+        DPRINTF("ARRHHH!!\n");
+
+    wall_delta = tv_delta(&wall_now,&wall_last)/1000;
+
+    if (wall_delta == 0) wall_delta = 1;
+
+    d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000;
+    d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000;
+
+    if (print)
+        DPRINTF(
+                "delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, "
+                "dirtied %dMb/s %" PRId32 " pages\n",
+                wall_delta,
+                (int)((d0_cpu_delta*100)/wall_delta),
+                (int)((d1_cpu_delta*100)/wall_delta),
+                (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))),
+                (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))),
+                stats->dirty_count);
+
+    d0_cpu_last = d0_cpu_now;
+    d1_cpu_last = d1_cpu_now;
+    wall_last   = wall_now;
+
+    return 0;
+}
+
+static int analysis_phase(int xc_handle, uint32_t domid, int max_pfn,
+                          unsigned long *arr, int runs)
+{
+    long long start, now;
+    xc_shadow_op_stats_t stats;
+    int j;
+
+    start = llgettimeofday();
+
+    for (j = 0; j < runs; j++) {
+        int i;
+
+        xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
+                          arr, max_pfn, NULL, 0, NULL);
+        DPRINTF("#Flush\n");
+        for ( i = 0; i < 40; i++ ) {
+            usleep(50000);
+            now = llgettimeofday();
+            xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_PEEK,
+                              NULL, 0, NULL, 0, &stats);
+
+            DPRINTF("now= %lld faults= %"PRId32" dirty= %"PRId32"\n",
+                    ((now-start)+500)/1000,
+                    stats.fault_count, stats.dirty_count);
+        }
+    }
+
+    return -1;
+}
+
+static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
+                             int dom, xc_dominfo_t *info,
+                             vcpu_guest_context_t *ctxt)
+{
+    int i = 0;
+
+    if (!(*suspend)(dom)) {
+        ERROR("Suspend request failed");
+        return -1;
+    }
+
+ retry:
+
+    if (xc_domain_getinfo(xc_handle, dom, 1, info) != 1) {
+        ERROR("Could not get domain info");
+        return -1;
+    }
+
+    if ( xc_vcpu_getcontext(xc_handle, dom, 0 /* XXX */, ctxt))
+        ERROR("Could not get vcpu context");
+
+
+    if (info->shutdown && info->shutdown_reason == SHUTDOWN_suspend)
+        return 0; // success
+
+    if (info->paused) {
+        // try unpausing domain, wait, and retest
+        xc_domain_unpause( xc_handle, dom );
+
+        ERROR("Domain was paused. Wait and re-test.");
+        usleep(10000);  // 10ms
+
+        goto retry;
+    }
+
+
+    if( ++i < 100 ) {
+        ERROR("Retry suspend domain.");
+        usleep(10000);  // 10ms
+        goto retry;
+    }
+
+    ERROR("Unable to suspend domain.");
+
+    return -1;
+}
+
+int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
+                  uint32_t max_factor, uint32_t flags, int (*suspend)(int))
+{
+    xc_dominfo_t info;
+
+    int rc = 1, i, last_iter, iter = 0;
+    int live  = (flags & XCFLAGS_LIVE);
+    int debug = (flags & XCFLAGS_DEBUG);
+    int sent_last_iter, skip_this_iter;
+
+    /* The new domain's shared-info frame number. */
+    unsigned long shared_info_frame;
+
+    /* A copy of the CPU context of the guest. */
+    vcpu_guest_context_t ctxt;
+
+    /* A table containg the type of each PFN (/not/ MFN!). */
+    unsigned long *pfn_type = NULL;
+    unsigned long *pfn_batch = NULL;
+
+    /* A copy of hvm domain context */
+    hvm_domain_context_t hvm_ctxt;
+
+    /* Live mapping of shared info structure */
+    shared_info_t *live_shinfo = NULL;
+
+    /* base of the region in which domain memory is mapped */
+    unsigned char *region_base = NULL;
+
+    uint32_t nr_pfns, rec_size, nr_vcpus;
+    unsigned long *page_array = NULL;
+
+    /* power of 2 order of max_pfn */
+    int order_nr;
+
+    /* bitmap of pages:
+       - that should be sent this iteration (unless later marked as skip);
+       - to skip this iteration because already dirty; */
+    unsigned long *to_send = NULL, *to_skip = NULL;
+
+    xc_shadow_op_stats_t stats;
+
+    unsigned long total_sent    = 0;
+
+    DPRINTF("xc_hvm_save:dom=%d, max_iters=%d, max_factor=%d, flags=0x%x, 
live=%d, debug=%d.\n",
+            dom, max_iters, max_factor, flags,
+            live, debug);
+
+    /* If no explicit control parameters given, use defaults */
+    if(!max_iters)
+        max_iters = DEF_MAX_ITERS;
+    if(!max_factor)
+        max_factor = DEF_MAX_FACTOR;
+
+    initialize_mbit_rate();
+
+    if(!get_platform_info(xc_handle, dom,
+                          &max_mfn, &hvirt_start, &pt_levels)) {
+        ERROR("HVM:Unable to get platform info.");
+        return 1;
+    }
+
+    if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) {
+        ERROR("HVM:Could not get domain info");
+        return 1;
+    }
+    nr_vcpus = info.nr_online_vcpus;
+
+    if (mlock(&ctxt, sizeof(ctxt))) {
+        ERROR("HVM:Unable to mlock ctxt");
+        return 1;
+    }
+
+    /* Only have to worry about vcpu 0 even for SMP */
+    if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) {
+        ERROR("HVM:Could not get vcpu context");
+        goto out;
+    }
+    shared_info_frame = info.shared_info_frame;
+
+    /* A cheesy test to see whether the domain contains valid state. */
+    if (ctxt.ctrlreg[3] == 0)
+    {
+        ERROR("Domain is not in a valid HVM guest state");
+        goto out;
+    }
+
+   /* cheesy sanity check */
+    if ((info.max_memkb >> (PAGE_SHIFT - 10)) > max_mfn) {
+        ERROR("Invalid HVM state record -- pfn count out of range: %lu",
+            (info.max_memkb >> (PAGE_SHIFT - 10)));
+        goto out;
+    }
+
+    /* Map the shared info frame */
+    if(!(live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                            PROT_READ, shared_info_frame))) {
+        ERROR("HVM:Couldn't map live_shinfo");
+        goto out;
+    }
+
+    max_pfn = live_shinfo->arch.max_pfn;
+
+    DPRINTF("saved hvm domain info:max_memkb=0x%lx, max_mfn=0x%lx, 
nr_pages=0x%lx\n", info.max_memkb, max_mfn, info.nr_pages); 
+
+    /* nr_pfns: total pages excluding vga acc mem
+     * max_pfn: nr_pfns + 0x20 vga hole(0xa0~0xc0)
+     * getdomaininfo.tot_pages: all the allocated pages for this domain
+     */
+    if (live) {
+        ERROR("hvm domain doesn't support live migration now.\n");
+        goto out;
+
+        if (xc_shadow_control(xc_handle, dom,
+                              XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
+                              NULL, 0, NULL, 0, NULL) < 0) {
+            ERROR("Couldn't enable shadow mode");
+            goto out;
+        }
+
+        /* excludes vga acc mem */
+        nr_pfns = info.nr_pages - 0x800;
+
+        last_iter = 0;
+        DPRINTF("hvm domain live migration debug start: logdirty enable.\n");
+    } else {
+        /* This is a non-live suspend. Issue the call back to get the
+           domain suspended */
+
+        last_iter = 1;
+
+        /* suspend hvm domain */
+        if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt)) {
+            ERROR("HVM Domain appears not to have suspended");
+            goto out;
+        }
+        nr_pfns = info.nr_pages;
+        DPRINTF("after suspend hvm domain nr_pages=0x%x.\n", nr_pfns);
+    }
+
+    DPRINTF("after 1st handle hvm domain nr_pfns=0x%x, nr_pages=0x%lx, 
max_memkb=0x%lx, live=%d.\n",
+            nr_pfns,
+            info.nr_pages,
+            info.max_memkb,
+            live);
+
+    nr_pfns = info.nr_pages;
+
+    /*XXX: caculate the VGA hole*/
+    max_pfn = nr_pfns + 0x20;
+
+    skip_this_iter = 0;/*XXX*/
+    /* pretend we sent all the pages last iteration */
+    sent_last_iter = max_pfn;
+
+    /* calculate the power of 2 order of max_pfn, e.g.
+       15->4 16->4 17->5 */
+    for (i = max_pfn-1, order_nr = 0; i ; i >>= 1, order_nr++)
+        continue;
+
+    /* Setup to_send / to_fix and to_skip bitmaps */
+    to_send = malloc(BITMAP_SIZE);
+    to_skip = malloc(BITMAP_SIZE);
+
+    if (!to_send ||!to_skip) {
+        ERROR("Couldn't allocate to_send array");
+        goto out;
+    }
+
+    memset(to_send, 0xff, BITMAP_SIZE);
+
+    if (lock_pages(to_send, BITMAP_SIZE)) {
+        ERROR("Unable to lock to_send");
+        return 1;
+    }
+
+    /* (to fix is local only) */
+    if (lock_pages(to_skip, BITMAP_SIZE)) {
+        ERROR("Unable to lock to_skip");
+        return 1;
+    }
+
+    analysis_phase(xc_handle, dom, max_pfn, to_skip, 0);
+
+    /* get all the HVM domain pfns */
+    if ( (page_array = (unsigned long *) malloc (sizeof(unsigned long) * 
max_pfn)) == NULL) {
+        ERROR("HVM:malloc fail!\n");
+        goto out;
+    }
+
+    for ( i = 0; i < max_pfn; i++)
+        page_array[i] = i;
+
+
+    /* We want zeroed memory so use calloc rather than malloc. */
+    pfn_type  = calloc(MAX_BATCH_SIZE, sizeof(*pfn_type));
+    pfn_batch = calloc(MAX_BATCH_SIZE, sizeof(*pfn_batch));
+
+    if ((pfn_type == NULL) || (pfn_batch == NULL)) {
+        ERROR("failed to alloc memory for pfn_type and/or pfn_batch arrays");
+        errno = ENOMEM;
+        goto out;
+    }
+
+    if (lock_pages(pfn_type, MAX_BATCH_SIZE * sizeof(*pfn_type))) {
+        ERROR("Unable to lock");
+        goto out;
+    }
+
+    /* Start writing out the saved-domain record. */
+    if (!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) {
+        ERROR("write: max_pfn");
+        goto out;
+    }
+
+    while(1) {
+
+        unsigned int prev_pc, sent_this_iter, N, batch;
+
+        iter++;
+        sent_this_iter = 0;
+        skip_this_iter = 0;
+        prev_pc = 0;
+        N=0;
+
+        DPRINTF("Saving HVM domain memory pages: iter %d   0%%", iter);
+
+        while( N < max_pfn ){
+
+            unsigned int this_pc = (N * 100) / max_pfn;
+
+            if ((this_pc - prev_pc) >= 5) {
+                DPRINTF("\b\b\b\b%3d%%", this_pc);
+                prev_pc = this_pc;
+            }
+
+            /* slightly wasteful to peek the whole array evey time,
+               but this is fast enough for the moment. */
+            if (!last_iter && xc_shadow_control(
+                    xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK,
+                    to_skip, max_pfn, NULL, 0, NULL) != max_pfn) {
+                ERROR("Error peeking HVM shadow bitmap");
+                goto out;
+            }
+
+
+            /* load pfn_type[] with the mfn of all the pages we're doing in
+               this batch. */
+            for (batch = 0; batch < MAX_BATCH_SIZE && N < max_pfn ; N++) {
+
+                int n = permute(N, max_pfn, order_nr);
+
+                if (debug) {
+                    DPRINTF("%d pfn= %08lx mfn= %08lx %d \n",
+                            iter, (unsigned long)n, page_array[n],
+                            test_bit(n, to_send));
+                }
+
+                if (!last_iter && test_bit(n, to_send)&& test_bit(n, to_skip))
+                    skip_this_iter++; /* stats keeping */
+
+                if (!((test_bit(n, to_send) && !test_bit(n, to_skip)) ||
+                      (test_bit(n, to_send) && last_iter)))
+                    continue;
+
+                if (n >= 0xa0 && n < 0xc0) {
+/*                    DPRINTF("get a vga hole pfn= %x.\n", n);*/
+                    continue;
+                }
+                /*
+                ** we get here if:
+                **  1. page is marked to_send & hasn't already been re-dirtied
+                **  2. (ignore to_skip in last iteration)
+                */
+
+                pfn_batch[batch] = n;
+                pfn_type[batch]  = page_array[n];
+
+                batch++;
+            }
+
+            if (batch == 0)
+                goto skip; /* vanishingly unlikely... */
+
+            /* map_foreign use pfns now !*/
+            if ((region_base = xc_map_foreign_batch(
+                     xc_handle, dom, PROT_READ, pfn_batch, batch)) == 0) {
+                ERROR("map batch failed");
+                goto out;
+            }
+
+            /* write num of pfns */
+            if(!write_exact(io_fd, &batch, sizeof(unsigned int))) {
+                ERROR("Error when writing to state file (2)");
+                goto out;
+            }
+
+            /* write all the pfns */
+            if(!write_exact(io_fd, pfn_batch, sizeof(unsigned long)*batch)) {
+                ERROR("Error when writing to state file (3)");
+                goto out;
+            }
+
+            if (ratewrite(io_fd, region_base, PAGE_SIZE * batch) != PAGE_SIZE 
* batch) {
+                ERROR("ERROR when writting to state file (4)");
+                goto out;
+            }
+
+
+            sent_this_iter += batch;
+
+            munmap(region_base, batch*PAGE_SIZE);
+
+        } /* end of this while loop for this iteration */
+
+      skip:
+
+        total_sent += sent_this_iter;
+
+        DPRINTF("\r %d: sent %d, skipped %d, ",
+                iter, sent_this_iter, skip_this_iter );
+
+        if (last_iter) {
+            print_stats( xc_handle, dom, sent_this_iter, &stats, 1);
+
+            DPRINTF("Total pages sent= %ld (%.2fx)\n",
+                    total_sent, ((float)total_sent)/max_pfn );
+        }
+
+        if (last_iter && debug){
+            int minusone = -1;
+            memset(to_send, 0xff, BITMAP_SIZE);
+            debug = 0;
+            DPRINTF("Entering debug resend-all mode\n");
+
+            /* send "-1" to put receiver into debug mode */
+            if(!write_exact(io_fd, &minusone, sizeof(int))) {
+                ERROR("Error when writing to state file (6)");
+                goto out;
+            }
+
+            continue;
+        }
+
+        if (last_iter) break;
+
+        if (live) {
+
+
+            if(
+                ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) ||
+                (iter >= max_iters) ||
+                (sent_this_iter+skip_this_iter < 50) ||
+                (total_sent > max_pfn*max_factor) ) {
+
+                DPRINTF("Start last iteration for HVM domain\n");
+                last_iter = 1;
+
+                if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info,
+                                      &ctxt)) {
+                    ERROR("Domain appears not to have suspended");
+                    goto out;
+                }
+
+                DPRINTF("SUSPEND shinfo %08lx eip %08lx edx %08lx\n",
+                        info.shared_info_frame,
+                        (unsigned long)ctxt.user_regs.eip,
+                        (unsigned long)ctxt.user_regs.edx);
+            }
+
+            if (xc_shadow_control(xc_handle, dom, 
+                                  XEN_DOMCTL_SHADOW_OP_CLEAN, to_send, 
+                                  max_pfn, NULL, 0, &stats) != max_pfn) {
+                ERROR("Error flushing shadow PT");
+                goto out;
+            }
+
+            sent_last_iter = sent_this_iter;
+
+            print_stats(xc_handle, dom, sent_this_iter, &stats, 1);
+
+        }
+
+
+    } /* end of while 1 */
+
+
+    DPRINTF("All HVM memory is saved\n");
+
+    /* Zero terminate */
+    i = 0;
+    if (!write_exact(io_fd, &i, sizeof(int))) {
+        ERROR("Error when writing to state file (6)");
+        goto out;
+    }
+
+    /* save hvm hypervisor state including pic/pit/shpage */
+    if (mlock(&hvm_ctxt, sizeof(hvm_ctxt))) {
+        ERROR("Unable to mlock ctxt");
+        return 1;
+    }
+
+    if (xc_domain_hvm_getcontext(xc_handle, dom, &hvm_ctxt)){
+        ERROR("HVM:Could not get hvm context");
+        goto out;
+    }
+
+    rec_size = sizeof(hvm_ctxt);
+    if (!write_exact(io_fd, &rec_size, sizeof(uint32_t))) {
+        ERROR("error write hvm ctxt size");
+        goto out;
+    }
+
+    if ( !write_exact(io_fd, &hvm_ctxt, sizeof(hvm_ctxt)) ) {
+        ERROR("write HVM info failed!\n");
+    }
+
+    /* save vcpu/vmcs context */
+    if (!write_exact(io_fd, &nr_vcpus, sizeof(uint32_t))) {
+        ERROR("error write nr vcpus");
+        goto out;
+    }
+
+    /*XXX: need a online map to exclude down cpu */
+    for (i = 0; i < nr_vcpus; i++) {
+
+        if (xc_vcpu_getcontext(xc_handle, dom, i, &ctxt)) {
+            ERROR("HVM:Could not get vcpu context");
+            goto out;
+        }
+
+        rec_size = sizeof(ctxt);
+        DPRINTF("write %d vcpucontext of total %d.\n", i, nr_vcpus); 
+        if (!write_exact(io_fd, &rec_size, sizeof(uint32_t))) {
+            ERROR("error write vcpu ctxt size");
+            goto out;
+        }
+
+        if (!write_exact(io_fd, &(ctxt), sizeof(ctxt)) ) {
+            ERROR("write vmcs failed!\n");
+            goto out;
+        }
+    }
+ 
+    /* Success! */
+    rc = 0;
+
+ out:
+
+    if (live) {
+        if(xc_shadow_control(xc_handle, dom, 
+                             XEN_DOMCTL_SHADOW_OP_OFF,
+                             NULL, 0, NULL, 0, NULL) < 0) {
+            DPRINTF("Warning - couldn't disable shadow mode");
+        }
+    }
+
+    free(page_array);
+
+    free(pfn_type);
+    free(pfn_batch);
+    free(to_send);
+    free(to_skip);
+
+    return !!rc;
+}
diff -r 8475a4e0425e -r 3c8bb086025e tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/libxc/xenctrl.h     Fri Jan 19 14:48:57 2007 +0000
@@ -313,6 +313,30 @@ int xc_domain_getinfolist(int xc_handle,
                           xc_domaininfo_t *info);
 
 /**
+ * This function returns information about the context of a hvm domain
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm domid the domain to get information from
+ * @parm hvm_ctxt a pointer to a structure to store the execution context of 
the
+ *            hvm domain
+ * @return 0 on success, -1 on failure
+ */
+int xc_domain_hvm_getcontext(int xc_handle,
+                             uint32_t domid,
+                             hvm_domain_context_t *hvm_ctxt);
+
+/**
+ * This function will set the context for hvm domain
+ *
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm domid the domain to set the hvm domain context for
+ * @parm hvm_ctxt pointer to the the hvm context with the values to set
+ * @return 0 on success, -1 on failure
+ */
+int xc_domain_hvm_setcontext(int xc_handle,
+                             uint32_t domid,
+                             hvm_domain_context_t *hvm_ctxt);
+
+/**
  * This function returns information about the execution context of a
  * particular vcpu of a domain.
  *
diff -r 8475a4e0425e -r 3c8bb086025e tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h    Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/libxc/xenguest.h    Fri Jan 19 14:48:57 2007 +0000
@@ -11,6 +11,7 @@
 
 #define XCFLAGS_LIVE      1
 #define XCFLAGS_DEBUG     2
+#define XCFLAGS_HVM       4
 
 
 /**
@@ -25,6 +26,13 @@ int xc_linux_save(int xc_handle, int io_
                   uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
                   int (*suspend)(int domid));
 
+/**
+ * This function will save a hvm domain running unmodified guest.
+ * @return 0 on success, -1 on failure
+ */
+int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
+                  uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
+                  int (*suspend)(int domid));
 
 /**
  * This function will restore a saved domain running Linux.
@@ -41,6 +49,18 @@ int xc_linux_restore(int xc_handle, int 
                      unsigned long nr_pfns, unsigned int store_evtchn,
                      unsigned long *store_mfn, unsigned int console_evtchn,
                      unsigned long *console_mfn);
+
+/**
+ * This function will restore a saved hvm domain running unmodified guest.
+ *
+ * @parm store_mfn pass mem size & returned with the mfn of the store page
+ * @return 0 on success, -1 on failure
+ */
+int xc_hvm_restore(int xc_handle, int io_fd, uint32_t dom,
+                      unsigned long nr_pfns, unsigned int store_evtchn,
+                      unsigned long *store_mfn, unsigned int console_evtchn,
+                      unsigned long *console_mfn,
+                      unsigned int pae, unsigned int apic);
 
 /**
  * This function will create a domain for a paravirtualized Linux
diff -r 8475a4e0425e -r 3c8bb086025e tools/pygrub/src/pygrub
--- a/tools/pygrub/src/pygrub   Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/pygrub/src/pygrub   Fri Jan 19 14:48:57 2007 +0000
@@ -503,7 +503,7 @@ def run_grub(file, entry, fs):
 # If nothing has been specified, look for a Solaris domU. If found, perform the
 # necessary tweaks.
 def sniff_solaris(fs, cfg):
-    if not fs.file_exists("/platform/i86xen/kernel/unix"):
+    if not fs.file_exists("/platform/i86xpv/kernel/unix"):
         return cfg
     
     # darned python
@@ -516,10 +516,10 @@ def sniff_solaris(fs, cfg):
             longmode = True
 
     if not cfg["kernel"]:
-        cfg["kernel"] = "/platform/i86xen/kernel/unix"
+        cfg["kernel"] = "/platform/i86xpv/kernel/unix"
         cfg["ramdisk"] = "/platform/i86pc/boot_archive"
         if longmode:
-            cfg["kernel"] = "/platform/i86xen/kernel/amd64/unix"
+            cfg["kernel"] = "/platform/i86xpv/kernel/amd64/unix"
             cfg["ramdisk"] = "/platform/i86pc/amd64/boot_archive"
 
     # Unpleasant. Typically we'll have 'root=foo -k' or 'root=foo /kernel -k',
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/setup.py
--- a/tools/python/setup.py     Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/setup.py     Fri Jan 19 14:48:57 2007 +0000
@@ -30,12 +30,23 @@ xs = Extension("xs",
                libraries          = libraries,
                sources            = [ "xen/lowlevel/xs/xs.c" ])
 
+scf = Extension("scf",
+               extra_compile_args = extra_compile_args,
+               include_dirs       = include_dirs + [ "xen/lowlevel/scf" ],
+               library_dirs       = library_dirs,
+               libraries          = libraries,
+               sources            = [ "xen/lowlevel/scf/scf.c" ])
+             
 acm = Extension("acm",
                extra_compile_args = extra_compile_args,
                include_dirs       = include_dirs + [ "xen/lowlevel/acm" ],
                library_dirs       = library_dirs,
                libraries          = libraries,
                sources            = [ "xen/lowlevel/acm/acm.c" ])
+
+modules = [ xc, xs, acm ]
+if os.uname()[0] == 'SunOS':
+    modules.append(scf)
 
 setup(name            = 'xen',
       version         = '3.0',
@@ -56,7 +67,7 @@ setup(name            = 'xen',
                          'xen.xm.tests'
                          ],
       ext_package = "xen.lowlevel",
-      ext_modules = [ xc, xs, acm ]
+      ext_modules = modules
       )
 
 os.chdir('logging')
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/lowlevel/scf/scf.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/python/xen/lowlevel/scf/scf.c       Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,156 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <Python.h>
+
+#include <libscf.h>
+#include <stdio.h>
+
+#define        XEND_FMRI "svc:/system/xen/xend:default"
+#define        XEND_PG "config"
+
+static PyObject *scf_exc;
+
+static void *
+scf_exception(const char *err, const char *value)
+{
+       int scferr = scf_error();
+       const char *scfstrerr = scf_strerror(scferr);
+       PyObject *obj = Py_BuildValue("(isss)", scferr, err, scfstrerr, value);
+       PyErr_SetObject(scf_exc, obj);
+       return (NULL);
+}
+
+static PyObject *
+pyscf_get_bool(PyObject *o, PyObject *args, PyObject *kwargs)
+{
+       static char *kwlist[] = { "name", NULL };
+       scf_simple_prop_t *prop;
+       uint8_t *val;
+       char *name;
+
+       if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s", kwlist, &name))
+               return (NULL);
+
+       prop = scf_simple_prop_get(NULL, XEND_FMRI, XEND_PG, name);
+
+       if (prop == NULL)
+               return (scf_exception("scf_simple_prop_get() failed", name));
+
+       if ((val = scf_simple_prop_next_boolean(prop)) == NULL)
+               return (scf_exception("scf_simple_prop_next_boolean() failed",
+                   name));
+
+       if (*val) {
+               scf_simple_prop_free(prop);
+               Py_INCREF(Py_True);
+               return (Py_True);
+       }
+
+       scf_simple_prop_free(prop);
+       Py_INCREF(Py_False);
+       return (Py_False);
+}
+
+static PyObject *
+pyscf_get_int(PyObject *o, PyObject *args, PyObject *kwargs)
+{
+       static char *kwlist[] = { "name", NULL };
+       scf_simple_prop_t *prop;
+       PyObject *obj;
+       int64_t *val;
+       char *name;
+
+       if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s", kwlist, &name))
+               return (NULL);
+
+       prop = scf_simple_prop_get(NULL, XEND_FMRI, XEND_PG, name);
+
+       if (prop == NULL)
+               return (scf_exception("scf_simple_prop_get() failed", name));
+
+       if ((val = scf_simple_prop_next_integer(prop)) == NULL)
+               return (scf_exception("scf_simple_prop_next_integer() failed",
+                   name));
+
+       obj = PyInt_FromLong((long)*val);
+       scf_simple_prop_free(prop);
+       return (obj);
+}
+
+static PyObject *
+pyscf_get_string(PyObject *o, PyObject *args, PyObject *kwargs)
+{
+       static char *kwlist[] = { "name", NULL };
+       scf_simple_prop_t *prop;
+       PyObject *obj;
+       char *name;
+       char *str;
+
+       if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s", kwlist, &name))
+               return (NULL);
+
+       prop = scf_simple_prop_get(NULL, XEND_FMRI, XEND_PG, name);
+
+       if (prop == NULL)
+               return (scf_exception("scf_simple_prop_get() failed", name));
+
+       if ((str = scf_simple_prop_next_astring(prop)) == NULL) {
+               scf_simple_prop_free(prop);
+               return (scf_exception("scf_simple_prop_next_astring() failed",
+                   name));
+       }
+
+       obj = PyString_FromString(str);
+       scf_simple_prop_free(prop);
+       return (obj);
+}
+
+PyDoc_STRVAR(pyscf_get_bool__doc__,
+   "get_bool(name) - get the value of the named boolean property");
+PyDoc_STRVAR(pyscf_get_int__doc__,
+   "get_int(name) - get the value of the named integer property");
+PyDoc_STRVAR(pyscf_get_string__doc__,
+   "get_string(name) - get the value of the named string property");
+
+static struct PyMethodDef pyscf_module_methods[] = {
+       { "get_bool", (PyCFunction) pyscf_get_bool,
+         METH_VARARGS|METH_KEYWORDS, pyscf_get_bool__doc__ },
+       { "get_int", (PyCFunction) pyscf_get_int,
+         METH_VARARGS|METH_KEYWORDS, pyscf_get_int__doc__ },
+       { "get_string", (PyCFunction) pyscf_get_string,
+         METH_VARARGS|METH_KEYWORDS, pyscf_get_string__doc__ },
+       { NULL, NULL, 0, NULL } 
+};
+
+PyMODINIT_FUNC
+initscf(void)
+{
+       PyObject *m;
+       m = Py_InitModule("scf", pyscf_module_methods);
+
+       scf_exc = PyErr_NewException("scf.error", NULL, NULL);
+       Py_INCREF(scf_exc);
+       PyModule_AddObject(m, "error", scf_exc);
+       PyModule_AddIntConstant(m, "SCF_ERROR_NOT_FOUND", SCF_ERROR_NOT_FOUND);
+}
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/lowlevel/xc/xc.c Fri Jan 19 14:48:57 2007 +0000
@@ -158,6 +158,20 @@ static PyObject *pyxc_domain_destroy(XcO
 static PyObject *pyxc_domain_destroy(XcObject *self, PyObject *args)
 {
     return dom_op(self, args, xc_domain_destroy);
+}
+
+static PyObject *pyxc_domain_shutdown(XcObject *self, PyObject *args)
+{
+    uint32_t dom, reason;
+
+    if (!PyArg_ParseTuple(args, "ii", &dom, &reason))
+      return NULL;
+
+    if (xc_domain_shutdown(self->xc_handle, dom, reason) != 0)
+        return pyxc_error_to_exception();
+    
+    Py_INCREF(zero);
+    return zero;
 }
 
 
@@ -1027,6 +1041,14 @@ static PyMethodDef pyxc_methods[] = {
       METH_VARARGS, "\n"
       "Destroy a domain.\n"
       " dom [int]:    Identifier of domain to be destroyed.\n\n"
+      "Returns: [int] 0 on success; -1 on error.\n" },
+
+    { "domain_shutdown", 
+      (PyCFunction)pyxc_domain_shutdown,
+      METH_VARARGS, "\n"
+      "Shutdown a domain.\n"
+      " dom       [int, 0]:      Domain identifier to use.\n"
+      " reason     [int, 0]:      Reason for shutdown.\n"
       "Returns: [int] 0 on success; -1 on error.\n" },
 
     { "vcpu_setaffinity", 
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/util/xmlrpclib2.py
--- a/tools/python/xen/util/xmlrpclib2.py       Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/util/xmlrpclib2.py       Fri Jan 19 14:48:57 2007 +0000
@@ -256,6 +256,7 @@ class UnixXMLRPCRequestHandler(XMLRPCReq
 
 class UnixXMLRPCServer(TCPXMLRPCServer):
     address_family = socket.AF_UNIX
+    allow_address_reuse = True
 
     def __init__(self, addr, allowed, xenapi, logRequests = 1):
         mkdir.parents(os.path.dirname(addr), stat.S_IRWXU, True)
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/web/httpserver.py
--- a/tools/python/xen/web/httpserver.py        Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/web/httpserver.py        Fri Jan 19 14:48:57 2007 +0000
@@ -294,8 +294,6 @@ class HttpServer:
 
     backlog = 5
 
-    closed = False
-
     def __init__(self, root, interface, port=8080):
         self.root = root
         self.interface = interface
@@ -303,6 +301,7 @@ class HttpServer:
         # ready indicates when we are ready to begin accept connections
         # it should be set after a successful bind
         self.ready = False
+        self.closed = False
 
     def run(self):
         self.bind()
@@ -316,7 +315,6 @@ class HttpServer:
 
     def stop(self):
         self.close()
-
 
     def bind(self):
         self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
@@ -334,7 +332,10 @@ class HttpServer:
 
     def close(self):
         self.closed = True
-        try:
+        self.ready = False
+        try:
+            # shutdown socket explicitly to allow reuse
+            self.socket.shutdown(socket.SHUT_RDWR)
             self.socket.close()
         except:
             pass
@@ -344,6 +345,9 @@ class HttpServer:
 
     def getResource(self, req):
         return self.root.getRequestResource(req)
+
+    def shutdown(self):
+        self.close()
 
 
 class UnixHttpServer(HttpServer):
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/Vifctl.py
--- a/tools/python/xen/xend/Vifctl.py   Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/Vifctl.py   Fri Jan 19 14:48:57 2007 +0000
@@ -20,7 +20,7 @@
 """
 import os
 
-import XendRoot
+import XendOptions
 
 
 def network(op):
@@ -30,7 +30,7 @@ def network(op):
     """
     if op not in ['start', 'stop']:
         raise ValueError('Invalid operation: ' + op)
-    script = XendRoot.instance().get_network_script()
+    script = XendOptions.instance().get_network_script()
     if script:
         script.insert(1, op)
         os.spawnv(os.P_WAIT, script[0], script)
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py   Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/XendCheckpoint.py   Fri Jan 19 14:48:57 2007 +0000
@@ -22,11 +22,14 @@ from xen.xend.XendConstants import *
 from xen.xend.XendConstants import *
 
 SIGNATURE = "LinuxGuestRecord"
+QEMU_SIGNATURE = "QemuDeviceModelRecord"
+dm_batch = 512
 XC_SAVE = "xc_save"
 XC_RESTORE = "xc_restore"
 
 
 sizeof_int = calcsize("i")
+sizeof_unsigned_int = calcsize("I")
 sizeof_unsigned_long = calcsize("L")
 
 
@@ -69,6 +72,11 @@ def save(fd, dominfo, network, live, dst
                     "could not write guest state file: config len")
         write_exact(fd, config, "could not write guest state file: config")
 
+        image_cfg = dominfo.info.get('image', {})
+        hvm = image_cfg.has_key('hvm')
+
+        if hvm:
+            log.info("save hvm domain")
         # xc_save takes three customization parameters: maxit, max_f, and
         # flags the last controls whether or not save is 'live', while the
         # first two further customize behaviour when 'live' save is
@@ -76,7 +84,7 @@ def save(fd, dominfo, network, live, dst
         # libxenguest; see the comments and/or code in xc_linux_save() for
         # more information.
         cmd = [xen.util.auxbin.pathTo(XC_SAVE), str(fd),
-               str(dominfo.getDomid()), "0", "0", str(int(live)) ]
+               str(dominfo.getDomid()), "0", "0", str(int(live) | (int(hvm) << 
2)) ]
         log.debug("[xc_save]: %s", string.join(cmd))
 
         def saveInputHandler(line, tochild):
@@ -90,11 +98,28 @@ def save(fd, dominfo, network, live, dst
                 log.info("Domain %d suspended.", dominfo.getDomid())
                 dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP3,
                                        domain_name)
+                #send signal to device model for save
+                if hvm:
+                    log.info("release_devices for hvm domain")
+                    dominfo._releaseDevices(True)
                 tochild.write("done\n")
                 tochild.flush()
                 log.debug('Written done')
 
         forkHelper(cmd, fd, saveInputHandler, False)
+
+        # put qemu device model state
+        if hvm:
+            write_exact(fd, QEMU_SIGNATURE, "could not write qemu signature")
+            qemu_fd = os.open("/tmp/xen.qemu-dm.%d" % dominfo.getDomid(), 
os.O_RDONLY)
+            while True:
+                buf = os.read(qemu_fd, dm_batch)
+                if len(buf):
+                    write_exact(fd, buf, "could not write device model state")
+                else:
+                    break
+            os.close(qemu_fd)
+            os.remove("/tmp/xen.qemu-dm.%d" % dominfo.getDomid())
 
         dominfo.destroyDomain()
         try:
@@ -149,19 +174,43 @@ def restore(xd, fd, dominfo = None, paus
 
     nr_pfns = (dominfo.getMemoryTarget() + 3) / 4 
 
+    # if hvm, pass mem size to calculate the store_mfn
+    image_cfg = dominfo.info.get('image', {})
+    is_hvm  = image_cfg.has_key('hvm')
+    if is_hvm:
+        hvm  = dominfo.info['memory_static_min']
+        apic = dominfo.info['image']['hvm'].get('apic', 0)
+        pae  = dominfo.info['image']['hvm'].get('pae',  0)
+        log.info("restore hvm domain %d, mem=%d, apic=%d, pae=%d",
+                 dominfo.domid, hvm, apic, pae)
+    else:
+        hvm  = 0
+        apic = 0
+        pae  = 0
+
     try:
         l = read_exact(fd, sizeof_unsigned_long,
                        "not a valid guest state file: pfn count read")
         max_pfn = unpack("L", l)[0]    # native sizeof long
+
         if max_pfn > 16*1024*1024:     # XXX 
             raise XendError(
                 "not a valid guest state file: pfn count out of range")
 
-        balloon.free(xc.pages_to_kib(nr_pfns))
+        shadow = dominfo.info['shadow_memory']
+        log.debug("restore:shadow=0x%x, _static_max=0x%x, _static_min=0x%x, "
+                  "nr_pfns=0x%x.", dominfo.info['shadow_memory'],
+                  dominfo.info['memory_static_max'],
+                  dominfo.info['memory_static_min'], nr_pfns)
+
+        balloon.free(xc.pages_to_kib(nr_pfns) + shadow * 1024)
+
+        shadow_cur = xc.shadow_mem_control(dominfo.getDomid(), shadow)
+        dominfo.info['shadow_memory'] = shadow_cur
 
         cmd = map(str, [xen.util.auxbin.pathTo(XC_RESTORE),
                         fd, dominfo.getDomid(), max_pfn,
-                        store_port, console_port])
+                        store_port, console_port, hvm, pae, apic])
         log.debug("[xc_restore]: %s", string.join(cmd))
 
         handler = RestoreInputHandler()
@@ -171,10 +220,30 @@ def restore(xd, fd, dominfo = None, paus
         if handler.store_mfn is None or handler.console_mfn is None:
             raise XendError('Could not read store/console MFN')
 
-        os.read(fd, 1)           # Wait for source to close connection
         dominfo.waitForDevices() # Wait for backends to set up
         if not paused:
             dominfo.unpause()
+
+         # get qemu state and create a tmp file for dm restore
+        if is_hvm:
+            qemu_signature = read_exact(fd, len(QEMU_SIGNATURE),
+                                        "invalid device model signature read")
+            if qemu_signature != QEMU_SIGNATURE:
+                raise XendError("not a valid device model state: found '%s'" %
+                                qemu_signature)
+            qemu_fd = os.open("/tmp/xen.qemu-dm.%d" % dominfo.getDomid(),
+                              os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
+            while True:
+                buf = os.read(fd, dm_batch)
+                if len(buf):
+                    write_exact(qemu_fd, buf,
+                                "could not write dm state to tmp file")
+                else:
+                    break
+            os.close(qemu_fd)
+
+
+        os.read(fd, 1)           # Wait for source to close connection
         
         dominfo.completeRestore(handler.store_mfn, handler.console_mfn)
         
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py       Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/XendConfig.py       Fri Jan 19 14:48:57 2007 +0000
@@ -77,6 +77,25 @@ def scrub_password(data):
         return re.sub(r'\(vncpasswd\s+[^\)]+\)','(vncpasswd XXXXXX)', data)
     else:
         return data
+
+#
+# CPU fields:
+#
+# vcpus_number -- the maximum number of vcpus that this domain may ever have.
+#                 aka XendDomainInfo.getVCpuCount().
+# vcpus        -- the legacy configuration name for above.
+# max_vcpu_id  -- vcpus_number - 1.  This is given to us by Xen.
+#
+# cpus         -- the list of pCPUs available to each vCPU.
+#
+#   vcpu_avail:  a bitmap telling the guest domain whether it may use each of
+#                its VCPUs.  This is translated to
+#                <dompath>/cpu/<id>/availability = {online,offline} for use
+#                by the guest domain.
+# online_vpcus -- the number of VCPUs currently up, as reported by Xen.  This
+#                 is changed by changing vcpu_avail, and waiting for the
+#                 domain to respond.
+#
 
 
 # Mapping from XendConfig configuration keys to the old
@@ -185,7 +204,7 @@ LEGACY_CFG_TYPES = {
     'uuid':          str,
     'name':          str,
     'vcpus':         int,
-    'vcpu_avail':    int,
+    'vcpu_avail':    long,
     'memory':        int,
     'shadow_memory': int,
     'maxmem':        int,
@@ -355,9 +374,6 @@ class XendConfig(dict):
             'cpu_weight': 256,
             'cpu_cap': 0,
             'vcpus_number': 1,
-            'online_vcpus': 1,
-            'max_vcpu_id': 0,
-            'vcpu_avail': 1,
             'console_refs': [],
             'vif_refs': [],
             'vbd_refs': [],
@@ -389,7 +405,7 @@ class XendConfig(dict):
                                       event)
 
     def _vcpus_sanity_check(self):
-        if self.get('vcpus_number') != None:
+        if 'vcpus_number' in self and 'vcpu_avail' not in self:
             self['vcpu_avail'] = (1 << self['vcpus_number']) - 1
 
     def _uuid_sanity_check(self):
@@ -405,7 +421,7 @@ class XendConfig(dict):
     def _dominfo_to_xapi(self, dominfo):
         self['domid'] = dominfo['domid']
         self['online_vcpus'] = dominfo['online_vcpus']
-        self['max_vcpu_id'] = dominfo['max_vcpu_id']
+        self['vcpus_number'] = dominfo['max_vcpu_id'] + 1
         self['memory_dynamic_min'] = (dominfo['mem_kb'] + 1023)/1024
         self['memory_dynamic_max'] = (dominfo['maxmem_kb'] + 1023)/1024
         self['cpu_time'] = dominfo['cpu_time']/1e9
@@ -636,9 +652,6 @@ class XendConfig(dict):
         self['memory_dynamic_max'] = self['memory_static_max']
         self['memory_dynamic_min'] = self['memory_static_min']
 
-        # make sure max_vcpu_id is set correctly
-        self['max_vcpu_id'] = self['vcpus_number'] - 1
-
         # set device references in the configuration
         self['devices'] = cfg.get('devices', {})
         
@@ -720,13 +733,11 @@ class XendConfig(dict):
         _set_cfg_if_exists('on_xend_stop')
         _set_cfg_if_exists('on_xend_start')
         _set_cfg_if_exists('vcpu_avail')
-        _set_cfg_if_exists('max_vcpu_id') # needed for vcpuDomDetails
         _set_cfg_if_exists('cpu_weight')
         _set_cfg_if_exists('cpu_cap')
         
         # Parse and store runtime configuration 
         _set_cfg_if_exists('start_time')
-        _set_cfg_if_exists('online_vcpus')
         _set_cfg_if_exists('cpu_time')
         _set_cfg_if_exists('shutdown_reason')
         _set_cfg_if_exists('up_time')
@@ -1115,19 +1126,17 @@ class XendConfig(dict):
         # configuration
         log.debug("update_with_image_sxp(%s)" % scrub_password(image_sxp))
 
-        kernel_args = ""
+        # user-specified args must come last: previous releases did this and
+        # some domU kernels rely upon the ordering.
+        kernel_args = sxp.child_value(image_sxp, 'args', '')
 
         # attempt to extract extra arguments from SXP config
         arg_ip = sxp.child_value(image_sxp, 'ip')
         if arg_ip and not re.search(r'ip=[^ ]+', kernel_args):
-            kernel_args += 'ip=%s ' % arg_ip
+            kernel_args = 'ip=%s ' % arg_ip + kernel_args
         arg_root = sxp.child_value(image_sxp, 'root')
         if arg_root and not re.search(r'root=', kernel_args):
-            kernel_args += 'root=%s ' % arg_root
-
-        # user-specified args must come last: previous releases did this and
-        # some domU kernels rely upon the ordering.
-        kernel_args += sxp.child_value(image_sxp, 'args', '')
+            kernel_args = 'root=%s ' % arg_root + kernel_args
 
         if bootloader:
             self['_temp_using_bootloader'] = '1'
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/XendDomain.py       Fri Jan 19 14:48:57 2007 +0000
@@ -32,7 +32,7 @@ import xen.lowlevel.xc
 import xen.lowlevel.xc
 
 
-from xen.xend import XendRoot, XendCheckpoint, XendDomainInfo
+from xen.xend import XendOptions, XendCheckpoint, XendDomainInfo
 from xen.xend.PrettyPrint import prettyprint
 from xen.xend.XendConfig import XendConfig
 from xen.xend.XendError import XendError, XendInvalidDomain, VmError
@@ -51,7 +51,7 @@ from xen.xend import uuid
 from xen.xend import uuid
 
 xc = xen.lowlevel.xc.xc()
-xroot = XendRoot.instance() 
+xoptions = XendOptions.instance() 
 
 __all__ = [ "XendDomain" ]
 
@@ -214,7 +214,7 @@ class XendDomain:
         @rtype: String
         @return: Path.
         """
-        dom_path = xroot.get_xend_domains_path()
+        dom_path = xoptions.get_xend_domains_path()
         if domuuid:
             dom_path = os.path.join(dom_path, domuuid)
         return dom_path
@@ -361,7 +361,7 @@ class XendDomain:
 
     def _setDom0CPUCount(self):
         """Sets the number of VCPUs dom0 has. Retreived from the
-        Xend configuration, L{XendRoot}.
+        Xend configuration, L{XendOptions}.
 
         @requires: Expects to be protected by domains_lock.
         @rtype: None
@@ -369,7 +369,7 @@ class XendDomain:
         dom0 = self.privilegedDomain()
 
         # get max number of vcpus to use for dom0 from config
-        target = int(xroot.get_dom0_vcpus())
+        target = int(xoptions.get_dom0_vcpus())
         log.debug("number of vcpus to use is %d", target)
    
         # target == 0 means use all processors
@@ -1164,7 +1164,7 @@ class XendDomain:
             dominfo.checkLiveMigrateMemory()
 
         if port == 0:
-            port = xroot.get_xend_relocation_port()
+            port = xoptions.get_xend_relocation_port()
         try:
             sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
             sock.connect((dst, port))
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/XendDomainInfo.py   Fri Jan 19 14:48:57 2007 +0000
@@ -38,7 +38,7 @@ from xen.util import security
 from xen.util import security
 
 from xen.xend import balloon, sxp, uuid, image, arch, osdep
-from xen.xend import XendRoot, XendNode, XendConfig
+from xen.xend import XendOptions, XendNode, XendConfig
 
 from xen.xend.XendConfig import scrub_password
 from xen.xend.XendBootloader import bootloader
@@ -54,29 +54,10 @@ BOOTLOADER_LOOPBACK_DEVICE = '/dev/xvdp'
 BOOTLOADER_LOOPBACK_DEVICE = '/dev/xvdp'
 
 xc = xen.lowlevel.xc.xc()
-xroot = XendRoot.instance()
+xoptions = XendOptions.instance()
 
 log = logging.getLogger("xend.XendDomainInfo")
 #log.setLevel(logging.TRACE)
-
-
-#
-# There are a number of CPU-related fields:
-#
-#   vcpus:       the number of virtual CPUs this domain is configured to use.
-#   vcpu_avail:  a bitmap telling the guest domain whether it may use each of
-#                its VCPUs.  This is translated to
-#                <dompath>/cpu/<id>/availability = {online,offline} for use
-#                by the guest domain.
-#   cpumap:      a list of bitmaps, one for each VCPU, giving the physical
-#                CPUs that that VCPU may use.
-#   cpu:         a configuration setting requesting that VCPU 0 is pinned to
-#                the specified physical CPU.
-#
-# vcpus and vcpu_avail settings persist with the VM (i.e. they are persistent
-# across save, restore, migrate, and restart).  The other settings are only
-# specific to the domain, so are lost when the VM moves.
-#
 
 
 def create(config):
@@ -451,6 +432,16 @@ class XendDomainInfo:
         self._removeVm('xend/previous_restart_time')
         self.storeDom("control/shutdown", reason)
 
+        ## shutdown hypercall for hvm domain desides xenstore write
+        image_cfg = self.info.get('image', {})
+        hvm = image_cfg.has_key('hvm')
+        if hvm:
+            for code in DOMAIN_SHUTDOWN_REASONS.keys():
+                if DOMAIN_SHUTDOWN_REASONS[code] == reason:
+                    break
+            xc.domain_shutdown(self.domid, code)
+
+
     def pause(self):
         """Pause domain
         
@@ -614,7 +605,7 @@ class XendDomainInfo:
                     ['name',       self.info['name_label']],
                     ['vcpu_count', self.info['vcpus_number']]]
 
-            for i in range(0, self.info['max_vcpu_id']+1):
+            for i in range(0, self.info['vcpus_number']):
                 info = xc.vcpu_getinfo(self.domid, i)
 
                 sxpr.append(['vcpu',
@@ -739,7 +730,7 @@ class XendDomainInfo:
             'domid':              str(self.domid),
             'vm':                 self.vmpath,
             'name':               self.info['name_label'],
-            'console/limit':      str(xroot.get_console_limit() * 1024),
+            'console/limit':      str(xoptions.get_console_limit() * 1024),
             'memory/target':      str(self.info['memory_static_min'] * 1024)
             }
 
@@ -898,8 +889,9 @@ class XendDomainInfo:
                 self._writeDom(self._vcpuDomDetails())
         else:
             self.info['vcpus_number'] = vcpus
-            self.info['online_vcpus'] = vcpus
             xen.xend.XendDomain.instance().managed_config_save(self)
+        log.info("Set VCPU count on domain %s to %d", self.info['name_label'],
+                 vcpus)
 
     def getLabel(self):
         return security.get_security_info(self.info, 'label')
@@ -976,7 +968,7 @@ class XendDomainInfo:
                          self.info['name_label'], self.domid)
                 self._writeVm(LAST_SHUTDOWN_REASON, 'crash')
 
-                if xroot.get_enable_dump():
+                if xoptions.get_enable_dump():
                     try:
                         self.dumpCore()
                     except XendError:
@@ -1228,8 +1220,11 @@ class XendDomainInfo:
         if self.image:
             self.image.createDeviceModel()
 
-    def _releaseDevices(self):
+    def _releaseDevices(self, suspend = False):
         """Release all domain's devices.  Nothrow guarantee."""
+        if suspend and self.image:
+            self.image.destroy(suspend)
+            return
 
         while True:
             t = xstransact("%s/device" % self.dompath)
@@ -1381,7 +1376,7 @@ class XendDomainInfo:
             # this is done prior to memory allocation to aide in memory
             # distribution for NUMA systems.
             if self.info['cpus'] is not None and len(self.info['cpus']) > 0:
-                for v in range(0, self.info['max_vcpu_id']+1):
+                for v in range(0, self.info['vcpus_number']):
                     xc.vcpu_setaffinity(self.domid, v, self.info['cpus'])
 
             # Use architecture- and image-specific calculations to determine
@@ -1395,6 +1390,7 @@ class XendDomainInfo:
                 self.info['shadow_memory'] * 1024,
                 self.info['memory_static_max'] * 1024)
 
+            log.debug("_initDomain:shadow_memory=0x%x, memory_static_max=0x%x, 
memory_static_min=0x%x.", self.info['shadow_memory'], 
self.info['memory_static_max'], self.info['memory_static_min'],)
             # Round shadow up to a multiple of a MiB, as shadow_mem_control
             # takes MiB and we must not round down and end up under-providing.
             shadow = ((shadow + 1023) / 1024) * 1024
@@ -1494,6 +1490,16 @@ class XendDomainInfo:
         self.console_mfn = console_mfn
 
         self._introduceDomain()
+        image_cfg = self.info.get('image', {})
+        hvm = image_cfg.has_key('hvm')
+        if hvm:
+            self.image = image.create(self,
+                    self.info,
+                    self.info['image'],
+                    self.info['devices'])
+            if self.image:
+                self.image.createDeviceModel(True)
+                self.image.register_shutdown_watch()
         self._storeDomDetails()
         self._registerWatches()
         self.refreshShutdown()
@@ -2028,8 +2034,8 @@ class XendDomainInfo:
         # TODO: spec says that key is int, however, python does not allow
         #       non-string keys to dictionaries.
         vcpu_util = {}
-        if 'max_vcpu_id' in self.info and self.domid != None:
-            for i in range(0, self.info['max_vcpu_id']+1):
+        if 'vcpus_number' in self.info and self.domid != None:
+            for i in range(0, self.info['vcpus_number']):
                 info = xc.vcpu_getinfo(self.domid, i)
                 vcpu_util[str(i)] = info['cpu_time']/1000000000.0
                 
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/XendNode.py
--- a/tools/python/xen/xend/XendNode.py Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/XendNode.py Fri Jan 19 14:48:57 2007 +0000
@@ -24,7 +24,7 @@ from xen.util import Brctl
 
 from xen.xend import uuid
 from xen.xend.XendError import XendError, NetworkAlreadyConnected
-from xen.xend.XendRoot import instance as xendroot
+from xen.xend.XendOptions import instance as xendoptions
 from xen.xend.XendStorageRepository import XendStorageRepository
 from xen.xend.XendLogging import log
 from xen.xend.XendPIF import *
@@ -45,7 +45,7 @@ class XendNode:
         """
         
         self.xc = xen.lowlevel.xc.xc()
-        self.state_store = XendStateStore(xendroot().get_xend_state_path())
+        self.state_store = XendStateStore(xendoptions().get_xend_state_path())
 
         # load host state from XML file
         saved_host = self.state_store.load_state('host')
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/XendOptions.py
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/python/xen/xend/XendOptions.py      Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,373 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+# Copyright (C) 2005 XenSource Ltd
+#============================================================================
+
+"""Xend root class.
+Creates the servers and handles configuration.
+
+Other classes get config variables by importing this module,
+using instance() to get a XendOptions instance, and then
+the config functions (e.g. get_xend_port()) to get
+configured values.
+"""
+
+import os
+import os.path
+import string
+import sys
+
+from xen.xend import sxp, osdep, XendLogging
+from xen.xend.XendError import XendError
+
+if os.uname()[0] == 'SunOS':
+    from xen.lowlevel import scf
+
+class XendOptions:
+    """Configuration options."""
+
+    """Where network control scripts live."""
+    network_script_dir = osdep.scripts_dir
+
+    """Where block control scripts live."""
+    block_script_dir = osdep.scripts_dir
+
+    """Default path to the log file. """
+    logfile_default = "/var/log/xen/xend.log"
+
+    """Default level of information to be logged."""
+    loglevel_default = 'DEBUG'
+
+    """Default Xen-API server configuration. """
+    xen_api_server_default = [['unix']]
+
+    """Default for the flag indicating whether xend should run an http server
+    (deprecated)."""
+    xend_http_server_default = 'no'
+
+    xend_tcp_xmlrpc_server_default = 'no'
+
+    xend_unix_xmlrpc_server_default = 'yes'
+
+    """Default interface address xend listens at. """
+    xend_address_default      = ''
+
+    """Default for the flag indicating whether xend should run a relocation 
server."""
+    xend_relocation_server_default = 'no'
+
+    """Default interface address the xend relocation server listens at. """
+    xend_relocation_address_default = ''
+
+    """Default port xend serves HTTP at. """
+    xend_port_default         = 8000
+
+    """Default port xend serves relocation at. """
+    xend_relocation_port_default = 8002
+
+    xend_relocation_hosts_allow_default = ''
+
+    """Default for the flag indicating whether xend should run a unix-domain
+    server (deprecated)."""
+    xend_unix_server_default = 'no'
+
+    """Default external migration tool """
+    external_migration_tool_default = ''
+
+    """Default path the unix-domain server listens at."""
+    xend_unix_path_default = '/var/lib/xend/xend-socket'
+
+    dom0_min_mem_default = 0
+
+    dom0_vcpus_default = 0
+
+    vncpasswd_default = None
+
+    """Default interface to listen for VNC connections on"""
+    xend_vnc_listen_default = '127.0.0.1'
+
+    """Default session storage path."""
+    xend_domains_path_default = '/var/lib/xend/domains'
+
+    """Default xend management state storage."""
+    xend_state_path_default = '/var/lib/xend/state'
+
+    """Default type of backend network interfaces"""
+    netback_type = osdep.netback_type
+
+    """Default script to configure a backend network interface"""
+    vif_script = osdep.vif_script
+
+    def __init__(self):
+        self.configure()
+
+    def _logError(self, fmt, *args):
+        """Logging function to log to stderr. We use this for XendOptions log
+        messages because they may be logged before the logger has been
+        configured.  Other components can safely use the logger.
+        """
+        print >>sys.stderr, "xend [ERROR]", fmt % args
+
+
+    def configure(self):
+        self.set_config()
+        XendLogging.init(self.get_config_string("logfile",
+                                               self.logfile_default),
+                         self.get_config_string("loglevel",
+                                               self.loglevel_default))
+
+    def set_config(self):
+        raise NotImplementedError()
+
+    def get_config_bool(self, name, val=None):
+        raise NotImplementedError()
+         
+    def get_config_int(self, name, val=None):
+        raise NotImplementedError()
+
+    def get_config_string(self, name, val=None):
+        raise NotImplementedError()
+
+    def get_xen_api_server(self):
+        raise NotImplementedError()
+
+    def get_xend_http_server(self):
+        """Get the flag indicating whether xend should run an http server.
+        """
+        return self.get_config_bool("xend-http-server", 
self.xend_http_server_default)
+
+    def get_xend_tcp_xmlrpc_server(self):
+        return self.get_config_bool("xend-tcp-xmlrpc-server",
+                                    self.xend_tcp_xmlrpc_server_default)
+
+    def get_xend_unix_xmlrpc_server(self):
+        return self.get_config_bool("xend-unix-xmlrpc-server",
+                                    self.xend_unix_xmlrpc_server_default)
+
+    def get_xend_relocation_server(self):
+        """Get the flag indicating whether xend should run a relocation server.
+        """
+        return self.get_config_bool("xend-relocation-server",
+                                    self.xend_relocation_server_default)
+
+    def get_xend_port(self):
+        """Get the port xend listens at for its HTTP interface.
+        """
+        return self.get_config_int('xend-port', self.xend_port_default)
+
+    def get_xend_relocation_port(self):
+        """Get the port xend listens at for connection to its relocation 
server.
+        """
+        return self.get_config_int('xend-relocation-port',
+                                   self.xend_relocation_port_default)
+
+    def get_xend_relocation_hosts_allow(self):
+        return self.get_config_string("xend-relocation-hosts-allow",
+                                     self.xend_relocation_hosts_allow_default)
+
+    def get_xend_address(self):
+        """Get the address xend listens at for its HTTP port.
+        This defaults to the empty string which allows all hosts to connect.
+        If this is set to 'localhost' only the localhost will be able to 
connect
+        to the HTTP port.
+        """
+        return self.get_config_string('xend-address', 
self.xend_address_default)
+
+    def get_xend_relocation_address(self):
+        """Get the address xend listens at for its relocation server port.
+        This defaults to the empty string which allows all hosts to connect.
+        If this is set to 'localhost' only the localhost will be able to 
connect
+        to the relocation port.
+        """
+        return self.get_config_string('xend-relocation-address', 
self.xend_relocation_address_default)
+
+    def get_xend_unix_server(self):
+        """Get the flag indicating whether xend should run a unix-domain 
server.
+        """
+        return self.get_config_bool("xend-unix-server", 
self.xend_unix_server_default)
+
+    def get_xend_unix_path(self):
+        """Get the path the xend unix-domain server listens at.
+        """
+        return self.get_config_string("xend-unix-path", 
self.xend_unix_path_default)
+
+    def get_xend_domains_path(self):
+        """ Get the path for persistent domain configuration storage
+        """
+        return self.get_config_string("xend-domains-path", 
self.xend_domains_path_default)
+
+    def get_xend_state_path(self):
+        """ Get the path for persistent domain configuration storage
+        """
+        return self.get_config_string("xend-state-path", 
self.xend_state_path_default)    
+
+    def get_network_script(self):
+        """@return the script used to alter the network configuration when
+        Xend starts and stops, or None if no such script is specified."""
+        
+        s = self.get_config_string('network-script')
+
+        if s:
+            result = s.split(" ")
+            result[0] = os.path.join(self.network_script_dir, result[0])
+            return result
+        else:
+            return None
+
+    def get_external_migration_tool(self):
+        """@return the name of the tool to handle virtual TPM migration."""
+        return self.get_config_string('external-migration-tool', 
self.external_migration_tool_default)
+
+    def get_enable_dump(self):
+        return self.get_config_bool('enable-dump', 'no')
+
+    def get_vif_script(self):
+        return self.get_config_string('vif-script', self.vif_script)
+
+    def get_dom0_min_mem(self):
+        return self.get_config_int('dom0-min-mem', self.dom0_min_mem_default)
+
+    def get_dom0_vcpus(self):
+        return self.get_config_int('dom0-cpus', self.dom0_vcpus_default)
+
+    def get_console_limit(self):
+        return self.get_config_int('console-limit', 1024)
+
+    def get_vnclisten_address(self):
+        return self.get_config_string('vnc-listen', 
self.xend_vnc_listen_default)
+
+    def get_vncpasswd_default(self):
+        return self.get_config_string('vncpasswd',
+                                     self.vncpasswd_default)
+
+class XendOptionsFile(XendOptions):
+
+    """Default path to the config file."""
+    config_default = "/etc/xen/xend-config.sxp"
+
+    """Environment variable used to override config_default."""
+    config_var     = "XEND_CONFIG"
+
+    def set_config(self):
+        """If the config file exists, read it. If not, ignore it.
+
+        The config file is a sequence of sxp forms.
+        """
+        self.config_path = os.getenv(self.config_var, self.config_default)
+        if os.path.exists(self.config_path):
+            try:
+                fin = file(self.config_path, 'rb')
+                try:
+                    config = sxp.parse(fin)
+                finally:
+                    fin.close()
+                if config is None:
+                    config = ['xend-config']
+                else:
+                    config.insert(0, 'xend-config')
+                self.config = config
+            except Exception, ex:
+                self._logError('Reading config file %s: %s',
+                               self.config_path, str(ex))
+                raise
+        else:
+            self._logError('Config file does not exist: %s',
+                           self.config_path)
+            self.config = ['xend-config']
+
+    def get_config_value(self, name, val=None):
+        """Get the value of an atomic configuration element.
+
+        @param name: element name
+        @param val:  default value (optional, defaults to None)
+        @return: value
+        """
+        return sxp.child_value(self.config, name, val=val)
+
+    def get_config_bool(self, name, val=None):
+        v = string.lower(str(self.get_config_value(name, val)))
+        if v in ['yes', 'y', '1', 'on',  'true',  't']:
+            return True
+        if v in ['no',  'n', '0', 'off', 'false', 'f']:
+            return False
+        raise XendError("invalid xend config %s: expected bool: %s" % (name, 
v))
+
+    def get_config_int(self, name, val=None):
+        v = self.get_config_value(name, val)
+        try:
+            return int(v)
+        except Exception:
+            raise XendError("invalid xend config %s: expected int: %s" % 
(name, v))
+
+    def get_config_string(self, name, val=None):
+        return self.get_config_value(name, val)
+
+    def get_xen_api_server(self):
+        """Get the Xen-API server configuration.
+        """
+        return self.get_config_value('xen-api-server',
+                                     self.xen_api_server_default)
+
+if os.uname()[0] == 'SunOS':
+    class XendOptionsSMF(XendOptions):
+
+        def set_config(self):
+            pass
+
+        def get_config_bool(self, name, val=None):
+            try:
+                return scf.get_bool(name)
+            except scf.error, e:
+                if e[0] == scf.SCF_ERROR_NOT_FOUND:
+                    return val
+                else:
+                    raise XendError("option %s: %s:%s" % (name, e[1], e[2]))
+
+        def get_config_int(self, name, val=None):
+            try:
+                return scf.get_int(name)
+            except scf.error, e:
+                if e[0] == scf.SCF_ERROR_NOT_FOUND:
+                    return val
+                else:
+                    raise XendError("option %s: %s:%s" % (name, e[1], e[2]))
+
+        def get_config_string(self, name, val=None):
+            try:
+                return scf.get_string(name)
+            except scf.error, e:
+                if e[0] == scf.SCF_ERROR_NOT_FOUND:
+                    return val
+                else:
+                    raise XendError("option %s: %s:%s" % (name, e[1], e[2]))
+
+        def get_xen_api_server(self):
+            # When the new server is a supported configuration, we should
+            # expand this.
+            return [["unix"]]
+
+def instance():
+    """Get an instance of XendOptions.
+    Use this instead of the constructor.
+    """
+    global inst
+    try:
+        inst
+    except:
+        if os.uname()[0] == 'SunOS':
+            inst = XendOptionsSMF()
+        else:
+            inst = XendOptionsFile()
+    return inst
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/XendProtocol.py
--- a/tools/python/xen/xend/XendProtocol.py     Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/XendProtocol.py     Fri Jan 19 14:48:57 2007 +0000
@@ -24,7 +24,7 @@ from encode import *
 from encode import *
 from xen.xend import sxp
 
-from xen.xend import XendRoot
+from xen.xend import XendOptions
 
 DEBUG = 0
 
@@ -34,7 +34,7 @@ HTTP_NO_CONTENT                      = 2
 HTTP_NO_CONTENT                      = 204
 
 
-xroot = XendRoot.instance()
+xoptions = XendOptions.instance()
 
 
 class XendError(RuntimeError):
@@ -218,7 +218,7 @@ class UnixXendClientProtocol(HttpXendCli
 
     def __init__(self, path=None):
         if path is None:
-            path = xroot.get_xend_unix_path()
+            path = xoptions.get_xend_unix_path()
         self.path = path
 
     def makeConnection(self, _):
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/XendRoot.py
--- a/tools/python/xen/xend/XendRoot.py Thu Jan 18 15:18:07 2007 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,322 +0,0 @@
-#============================================================================
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of version 2.1 of the GNU Lesser General Public
-# License as published by the Free Software Foundation.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-#============================================================================
-# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
-# Copyright (C) 2005 XenSource Ltd
-#============================================================================
-
-"""Xend root class.
-Creates the servers and handles configuration.
-
-Other classes get config variables by importing this module,
-using instance() to get a XendRoot instance, and then
-the config functions (e.g. get_xend_port()) to get
-configured values.
-"""
-
-import os
-import os.path
-import string
-import sys
-
-from xen.xend import sxp, osdep, XendLogging
-from xen.xend.XendError import XendError
-
-class XendRoot:
-    """Root of the management classes."""
-
-    """Default path to the config file."""
-    config_default = "/etc/xen/xend-config.sxp"
-
-    """Environment variable used to override config_default."""
-    config_var     = "XEND_CONFIG"
-
-    """Where network control scripts live."""
-    network_script_dir = osdep.scripts_dir
-
-    """Where block control scripts live."""
-    block_script_dir = osdep.scripts_dir
-
-    """Default path to the log file. """
-    logfile_default = "/var/log/xen/xend.log"
-
-    """Default level of information to be logged."""
-    loglevel_default = 'DEBUG'
-
-    """Default Xen-API server configuration. """
-    xen_api_server_default = [['unix']]
-
-    """Default for the flag indicating whether xend should run an http server
-    (deprecated)."""
-    xend_http_server_default = 'no'
-
-    xend_tcp_xmlrpc_server_default = 'no'
-
-    xend_unix_xmlrpc_server_default = 'yes'
-
-    """Default interface address xend listens at. """
-    xend_address_default      = ''
-
-    """Default for the flag indicating whether xend should run a relocation 
server."""
-    xend_relocation_server_default = 'no'
-
-    """Default interface address the xend relocation server listens at. """
-    xend_relocation_address_default = ''
-
-    """Default port xend serves HTTP at. """
-    xend_port_default         = '8000'
-
-    """Default port xend serves relocation at. """
-    xend_relocation_port_default = '8002'
-
-    xend_relocation_hosts_allow_default = ''
-
-    """Default for the flag indicating whether xend should run a unix-domain
-    server (deprecated)."""
-    xend_unix_server_default = 'no'
-
-    """Default external migration tool """
-    external_migration_tool_default = ''
-
-    """Default path the unix-domain server listens at."""
-    xend_unix_path_default = '/var/lib/xend/xend-socket'
-
-    dom0_min_mem_default = '0'
-
-    dom0_vcpus_default = '0'
-
-    vncpasswd_default = None
-
-    """Default interface to listen for VNC connections on"""
-    xend_vnc_listen_default = '127.0.0.1'
-
-    """Default session storage path."""
-    xend_domains_path_default = '/var/lib/xend/domains'
-
-    """Default xend management state storage."""
-    xend_state_path_default = '/var/lib/xend/state'
-
-    components = {}
-
-    def __init__(self):
-        self.config_path = None
-        self.config = None
-        self.configure()
-
-
-    def _logError(self, fmt, *args):
-        """Logging function to log to stderr. We use this for XendRoot log
-        messages because they may be logged before the logger has been
-        configured.  Other components can safely use the logger.
-        """
-        print >>sys.stderr, "xend [ERROR]", fmt % args
-
-
-    def configure(self):
-        self.set_config()
-        XendLogging.init(self.get_config_value("logfile",
-                                               self.logfile_default),
-                         self.get_config_value("loglevel",
-                                               self.loglevel_default))
-
-
-    def set_config(self):
-        """If the config file exists, read it. If not, ignore it.
-
-        The config file is a sequence of sxp forms.
-        """
-        self.config_path = os.getenv(self.config_var, self.config_default)
-        if os.path.exists(self.config_path):
-            try:
-                fin = file(self.config_path, 'rb')
-                try:
-                    config = sxp.parse(fin)
-                finally:
-                    fin.close()
-                if config is None:
-                    config = ['xend-config']
-                else:
-                    config.insert(0, 'xend-config')
-                self.config = config
-            except Exception, ex:
-                self._logError('Reading config file %s: %s',
-                               self.config_path, str(ex))
-                raise
-        else:
-            self._logError('Config file does not exist: %s',
-                           self.config_path)
-            self.config = ['xend-config']
-
-    def get_config(self, name=None):
-        """Get the configuration element with the given name, or
-        the whole configuration if no name is given.
-
-        @param name: element name (optional)
-        @return: config or none
-        """
-        if name is None:
-            val = self.config
-        else:
-            val = sxp.child(self.config, name)
-        return val
-
-    def get_config_value(self, name, val=None):
-        """Get the value of an atomic configuration element.
-
-        @param name: element name
-        @param val:  default value (optional, defaults to None)
-        @return: value
-        """
-        return sxp.child_value(self.config, name, val=val)
-
-    def get_config_bool(self, name, val=None):
-        v = string.lower(str(self.get_config_value(name, val)))
-        if v in ['yes', 'y', '1', 'on',  'true',  't']:
-            return True
-        if v in ['no',  'n', '0', 'off', 'false', 'f']:
-            return False
-        raise XendError("invalid xend config %s: expected bool: %s" % (name, 
v))
-
-    def get_config_int(self, name, val=None):
-        v = self.get_config_value(name, val)
-        try:
-            return int(v)
-        except Exception:
-            raise XendError("invalid xend config %s: expected int: %s" % 
(name, v))
-
-    def get_xen_api_server(self):
-        """Get the Xen-API server configuration.
-        """
-        return self.get_config_value('xen-api-server',
-                                     self.xen_api_server_default)
-
-    def get_xend_http_server(self):
-        """Get the flag indicating whether xend should run an http server.
-        """
-        return self.get_config_bool("xend-http-server", 
self.xend_http_server_default)
-
-    def get_xend_tcp_xmlrpc_server(self):
-        return self.get_config_bool("xend-tcp-xmlrpc-server",
-                                    self.xend_tcp_xmlrpc_server_default)
-
-    def get_xend_unix_xmlrpc_server(self):
-        return self.get_config_bool("xend-unix-xmlrpc-server",
-                                    self.xend_unix_xmlrpc_server_default)
-
-    def get_xend_relocation_server(self):
-        """Get the flag indicating whether xend should run a relocation server.
-        """
-        return self.get_config_bool("xend-relocation-server",
-                                    self.xend_relocation_server_default)
-
-    def get_xend_port(self):
-        """Get the port xend listens at for its HTTP interface.
-        """
-        return self.get_config_int('xend-port', self.xend_port_default)
-
-    def get_xend_relocation_port(self):
-        """Get the port xend listens at for connection to its relocation 
server.
-        """
-        return self.get_config_int('xend-relocation-port',
-                                   self.xend_relocation_port_default)
-
-    def get_xend_relocation_hosts_allow(self):
-        return self.get_config_value("xend-relocation-hosts-allow",
-                                     self.xend_relocation_hosts_allow_default)
-
-    def get_xend_address(self):
-        """Get the address xend listens at for its HTTP port.
-        This defaults to the empty string which allows all hosts to connect.
-        If this is set to 'localhost' only the localhost will be able to 
connect
-        to the HTTP port.
-        """
-        return self.get_config_value('xend-address', self.xend_address_default)
-
-    def get_xend_relocation_address(self):
-        """Get the address xend listens at for its relocation server port.
-        This defaults to the empty string which allows all hosts to connect.
-        If this is set to 'localhost' only the localhost will be able to 
connect
-        to the relocation port.
-        """
-        return self.get_config_value('xend-relocation-address', 
self.xend_relocation_address_default)
-
-    def get_xend_unix_server(self):
-        """Get the flag indicating whether xend should run a unix-domain 
server.
-        """
-        return self.get_config_bool("xend-unix-server", 
self.xend_unix_server_default)
-
-    def get_xend_unix_path(self):
-        """Get the path the xend unix-domain server listens at.
-        """
-        return self.get_config_value("xend-unix-path", 
self.xend_unix_path_default)
-
-    def get_xend_domains_path(self):
-        """ Get the path for persistent domain configuration storage
-        """
-        return self.get_config_value("xend-domains-path", 
self.xend_domains_path_default)
-
-    def get_xend_state_path(self):
-        """ Get the path for persistent domain configuration storage
-        """
-        return self.get_config_value("xend-state-path", 
self.xend_state_path_default)    
-
-    def get_network_script(self):
-        """@return the script used to alter the network configuration when
-        Xend starts and stops, or None if no such script is specified."""
-        
-        s = self.get_config_value('network-script')
-
-        if s:
-            result = s.split(" ")
-            result[0] = os.path.join(self.network_script_dir, result[0])
-            return result
-        else:
-            return None
-
-    def get_external_migration_tool(self):
-        """@return the name of the tool to handle virtual TPM migration."""
-        return self.get_config_value('external-migration-tool', 
self.external_migration_tool_default)
-
-    def get_enable_dump(self):
-        return self.get_config_bool('enable-dump', 'no')
-
-    def get_vif_script(self):
-        return self.get_config_value('vif-script', 'vif-bridge')
-
-    def get_dom0_min_mem(self):
-        return self.get_config_int('dom0-min-mem', self.dom0_min_mem_default)
-
-    def get_dom0_vcpus(self):
-        return self.get_config_int('dom0-cpus', self.dom0_vcpus_default)
-
-    def get_console_limit(self):
-        return self.get_config_int('console-limit', 1024)
-
-    def get_vnclisten_address(self):
-        return self.get_config_value('vnc-listen', 
self.xend_vnc_listen_default)
-
-    def get_vncpasswd_default(self):
-        return self.get_config_value('vncpasswd',
-                                     self.vncpasswd_default)
-
-def instance():
-    """Get an instance of XendRoot.
-    Use this instead of the constructor.
-    """
-    global inst
-    try:
-        inst
-    except:
-        inst = XendRoot()
-    return inst
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/balloon.py
--- a/tools/python/xen/xend/balloon.py  Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/balloon.py  Fri Jan 19 14:48:57 2007 +0000
@@ -22,7 +22,7 @@ import xen.lowlevel.xc
 import xen.lowlevel.xc
 
 import XendDomain
-import XendRoot
+import XendOptions
 from XendLogging import log
 from XendError import VmError
 
@@ -107,11 +107,11 @@ def free(need_mem):
     # usage, so we recheck the required alloc each time around the loop, but
     # track the last used value so that we don't trigger too many watches.
 
-    xroot = XendRoot.instance()
+    xoptions = XendOptions.instance()
     xc = xen.lowlevel.xc.xc()
 
     try:
-        dom0_min_mem = xroot.get_dom0_min_mem() * 1024
+        dom0_min_mem = xoptions.get_dom0_min_mem() * 1024
 
         retries = 0
         sleep_time = SLEEP_TIME_GROWTH
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/image.py    Fri Jan 19 14:48:57 2007 +0000
@@ -173,7 +173,7 @@ class ImageHandler:
         """Build the domain. Define in subclass."""
         raise NotImplementedError()
 
-    def createDeviceModel(self):
+    def createDeviceModel(self, restore = False):
         """Create device model for the domain (define in subclass if 
needed)."""
         pass
     
@@ -377,11 +377,12 @@ class HVMImageHandler(ImageHandler):
     # xm config file
     def parseDeviceModelArgs(self, imageConfig, deviceConfig):
         dmargs = [ 'boot', 'fda', 'fdb', 'soundhw',
-                   'localtime', 'serial', 'stdvga', 'isa', 'vcpus',
+                   'localtime', 'serial', 'stdvga', 'isa',
                    'acpi', 'usb', 'usbdevice', 'keymap' ]
-        ret = []
         hvmDeviceConfig = imageConfig['hvm']['devices']
-        
+
+        ret = ['-vcpus', str(self.vm.getVCpuCount())]
+
         for a in dmargs:
             v = hvmDeviceConfig.get(a)
 
@@ -461,14 +462,14 @@ class HVMImageHandler(ImageHandler):
             vnclisten = imageConfig.get('vnclisten')
 
             if not(vnclisten):
-                vnclisten = (xen.xend.XendRoot.instance().
+                vnclisten = (xen.xend.XendOptions.instance().
                              get_vnclisten_address())
             if vnclisten:
                 ret += ['-vnclisten', vnclisten]
 
             vncpasswd = vncpasswd_vmconfig
             if vncpasswd is None:
-                vncpasswd = (xen.xend.XendRoot.instance().
+                vncpasswd = (xen.xend.XendOptions.instance().
                              get_vncpasswd_default())
                 if vncpasswd is None:
                     raise VmError('vncpasswd is not set up in ' +
@@ -478,7 +479,7 @@ class HVMImageHandler(ImageHandler):
 
         return ret
 
-    def createDeviceModel(self):
+    def createDeviceModel(self, restore = False):
         if self.pid:
             return
         # Execute device model.
@@ -487,6 +488,8 @@ class HVMImageHandler(ImageHandler):
         args = args + ([ "-d",  "%d" % self.vm.getDomid(),
                   "-m", "%s" % (self.getRequiredInitialReservation() / 1024)])
         args = args + self.dmargs
+        if restore:
+            args = args + ([ "-loadvm", "/tmp/xen.qemu-dm.%d" % 
self.vm.getDomid() ])
         env = dict(os.environ)
         if self.display:
             env['DISPLAY'] = self.display
@@ -505,12 +508,16 @@ class HVMImageHandler(ImageHandler):
         self.register_reboot_feature_watch()
         self.pid = self.vm.gatherDom(('image/device-model-pid', int))
 
-    def destroy(self):
+    def destroy(self, suspend = False):
         self.unregister_shutdown_watch()
         self.unregister_reboot_feature_watch();
         if self.pid:
             try:
-                os.kill(self.pid, signal.SIGKILL)
+                sig = signal.SIGKILL
+                if suspend:
+                    log.info("use sigusr1 to signal qemu %d", self.pid)
+                    sig = signal.SIGUSR1
+                os.kill(self.pid, sig)
             except OSError, exn:
                 log.exception(exn)
             try:
@@ -598,6 +605,9 @@ class IA64_HVM_ImageHandler(HVMImageHand
         # ROM size for guest firmware, ioreq page and xenstore page
         extra_pages = 1024 + 3
         return mem_kb + extra_pages * page_kb
+
+    def getRequiredInitialReservation(self):
+        return self.vm.getMemoryTarget()
 
     def getRequiredShadowMemory(self, shadow_mem_kb, maxmem_kb):
         # Explicit shadow memory is not a concept 
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/osdep.py
--- a/tools/python/xen/xend/osdep.py    Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/osdep.py    Fri Jan 19 14:48:57 2007 +0000
@@ -33,9 +33,19 @@ _pygrub_path = {
     "SunOS": "/usr/lib/xen/bin/pygrub"
 }
 
+_netback_type = {
+    "SunOS": "SUNW_mac"
+}
+
+_vif_script = {
+    "SunOS": "vif-vnic"
+}
+
 def _get(var, default=None):
     return var.get(os.uname()[0], default)
 
 scripts_dir = _get(_scripts_dir, "/etc/xen/scripts")
 xend_autorestart = _get(_xend_autorestart)
 pygrub_path = _get(_pygrub_path, "/usr/bin/pygrub")
+netback_type = _get(_netback_type, "netfront")
+vif_script = _get(_vif_script, "vif-bridge")
diff -r 8475a4e0425e -r 3c8bb086025e 
tools/python/xen/xend/server/DevController.py
--- a/tools/python/xen/xend/server/DevController.py     Thu Jan 18 15:18:07 
2007 +0000
+++ b/tools/python/xen/xend/server/DevController.py     Fri Jan 19 14:48:57 
2007 +0000
@@ -19,7 +19,7 @@ from threading import Event
 from threading import Event
 import types
 
-from xen.xend import sxp, XendRoot
+from xen.xend import sxp, XendOptions
 from xen.xend.XendError import VmError
 from xen.xend.XendLogging import log
 
@@ -50,7 +50,7 @@ xenbusState = {
     'Closed'       : 6,
     }
 
-xroot = XendRoot.instance()
+xoptions = XendOptions.instance()
 
 xenbusState.update(dict(zip(xenbusState.values(), xenbusState.keys())))
 
@@ -324,7 +324,7 @@ class DevController:
                       Make sure that the migration has finished and only
                       then return from the call.
         """
-        tool = xroot.get_external_migration_tool()
+        tool = xoptions.get_external_migration_tool()
         if tool:
             log.info("Calling external migration tool for step %d" % step)
             fd = os.popen("%s -type %s -step %d -host %s -domname %s" %
@@ -341,7 +341,7 @@ class DevController:
         """ Recover from device migration. The given step was the
             last one that was successfully executed.
         """
-        tool = xroot.get_external_migration_tool()
+        tool = xoptions.get_external_migration_tool()
         if tool:
             log.info("Calling external migration tool")
             fd = os.popen("%s -type %s -step %d -host %s -domname %s -recover" 
%
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/server/SrvRoot.py
--- a/tools/python/xen/xend/server/SrvRoot.py   Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/server/SrvRoot.py   Fri Jan 19 14:48:57 2007 +0000
@@ -25,7 +25,7 @@ class SrvRoot(SrvDir):
     """Server sub-components. Each entry is (name, class), where
     'name' is the entry name and  'class' is the name of its class.
     """
-    #todo Get this list from the XendRoot config.
+    #todo Get this list from the XendOptions config.
     subdirs = [
         ('node',    'SrvNode'       ),
         ('domain',  'SrvDomainDir'  ),
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/server/SrvServer.py
--- a/tools/python/xen/xend/server/SrvServer.py Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/server/SrvServer.py Fri Jan 19 14:48:57 2007 +0000
@@ -48,7 +48,7 @@ from threading import Thread
 
 from xen.web.httpserver import HttpServer, UnixHttpServer
 
-from xen.xend import XendNode, XendRoot, XendAPI
+from xen.xend import XendNode, XendOptions, XendAPI
 from xen.xend import Vifctl
 from xen.xend.XendLogging import log
 from xen.xend.XendClient import XEN_API_SOCKET
@@ -57,7 +57,7 @@ from SrvRoot import SrvRoot
 from SrvRoot import SrvRoot
 from XMLRPCServer import XMLRPCServer
 
-xroot = XendRoot.instance()
+xoptions = XendOptions.instance()
 
 
 class XendServers:
@@ -65,6 +65,7 @@ class XendServers:
     def __init__(self, root):
         self.servers = []
         self.root = root
+        self.running = False
         self.cleaningUp = False
         self.reloadingConfig = False
 
@@ -79,6 +80,7 @@ class XendServers:
                 server.shutdown()
             except:
                 pass
+        self.running = False
 
     def reloadConfig(self, signum = 0, frame = None):
         log.debug("SrvServer.reloadConfig()")
@@ -107,12 +109,11 @@ class XendServers:
                 if server.ready:
                     continue
 
-                thread = Thread(target=server.run, 
name=server.__class__.__name__)
-                if isinstance(server, HttpServer):
-                    thread.setDaemon(True)
+                thread = Thread(target=server.run,
+                                name=server.__class__.__name__)
+                thread.setDaemon(True)
                 thread.start()
                 threads.append(thread)
-
 
             # check for when all threads have initialized themselves and then
             # close the status pipe
@@ -143,47 +144,32 @@ class XendServers:
                 status.close()
                 status = None
 
-            # Interruptible Thread.join - Python Bug #1167930
-            #   Replaces: for t in threads: t.join()
-            #   Reason:   The above will cause python signal handlers to be
-            #             blocked so we're not able to catch SIGTERM in any
-            #             way for cleanup
-            runningThreads = threads
-            while len(runningThreads) > 0:
-                try:
-                    for t in threads:
-                        t.join(1.0)
-                    runningThreads = [t for t in threads
-                                      if t.isAlive() and not t.isDaemon()]
-                    if self.cleaningUp and len(runningThreads) > 0:
-                        log.debug("Waiting for %s." %
-                                  [x.getName() for x in runningThreads])
-                except:
-                    pass
-
+            # loop to keep main thread alive until it receives a SIGTERM
+            self.running = True
+            while self.running:
+                time.sleep(100000000)
+                
             if self.reloadingConfig:
                 log.info("Restarting all XML-RPC and Xen-API servers...")
                 self.cleaningUp = False
                 self.reloadingConfig = False
-                xroot.set_config()
-                new_servers = [x for x in self.servers
-                               if isinstance(x, HttpServer)]
-                self.servers = new_servers
+                xoptions.set_config()
+                self.servers = []
                 _loadConfig(self, self.root, True)
             else:
                 break
 
 def _loadConfig(servers, root, reload):
-    if not reload and xroot.get_xend_http_server():
+    if xoptions.get_xend_http_server():
         servers.add(HttpServer(root,
-                               xroot.get_xend_address(),
-                               xroot.get_xend_port()))
-    if not reload and xroot.get_xend_unix_server():
-        path = xroot.get_xend_unix_path()
+                               xoptions.get_xend_address(),
+                               xoptions.get_xend_port()))
+    if  xoptions.get_xend_unix_server():
+        path = xoptions.get_xend_unix_path()
         log.info('unix path=' + path)
         servers.add(UnixHttpServer(root, path))
 
-    api_cfg = xroot.get_xen_api_server()
+    api_cfg = xoptions.get_xen_api_server()
     if api_cfg:
         try:
             addrs = [(str(x[0]).split(':'),
@@ -218,10 +204,10 @@ def _loadConfig(servers, root, reload):
         except TypeError, exn:
             log.error('Xen-API server configuration %s is invalid.', api_cfg)
 
-    if xroot.get_xend_tcp_xmlrpc_server():
+    if xoptions.get_xend_tcp_xmlrpc_server():
         servers.add(XMLRPCServer(XendAPI.AUTH_PAM, False, True))
 
-    if xroot.get_xend_unix_xmlrpc_server():
+    if xoptions.get_xend_unix_xmlrpc_server():
         servers.add(XMLRPCServer(XendAPI.AUTH_PAM, False))
 
 
diff -r 8475a4e0425e -r 3c8bb086025e 
tools/python/xen/xend/server/XMLRPCServer.py
--- a/tools/python/xen/xend/server/XMLRPCServer.py      Thu Jan 18 15:18:07 
2007 +0000
+++ b/tools/python/xen/xend/server/XMLRPCServer.py      Fri Jan 19 14:48:57 
2007 +0000
@@ -179,21 +179,24 @@ class XMLRPCServer:
         # Custom runloop so we can cleanup when exiting.
         # -----------------------------------------------------------------
         try:
-            self.server.socket.settimeout(1.0)
             while self.running:
                 self.server.handle_request()
         finally:
-            self.cleanup()
+            self.shutdown()
 
     def cleanup(self):
-        log.debug("XMLRPCServer.cleanup()")
-        try:
-            self.server.socket.close()
+        log.debug('XMLRPCServer.cleanup()')
+        try:
+            if hasattr(self, 'server'):
+                # shutdown socket explicitly to allow reuse
+                self.server.socket.shutdown(socket.SHUT_RDWR)
+                self.server.socket.close()
         except Exception, exn:
             log.exception(exn)
             pass
 
     def shutdown(self):
         self.running = False
-        self.ready = False
-
+        if self.ready:
+            self.ready = False
+            self.cleanup()
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/server/netif.py
--- a/tools/python/xen/xend/server/netif.py     Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/server/netif.py     Fri Jan 19 14:48:57 2007 +0000
@@ -24,10 +24,10 @@ import random
 import random
 import re
 
-from xen.xend import XendRoot
+from xen.xend import XendOptions
 from xen.xend.server.DevController import DevController
 
-xroot = XendRoot.instance()
+xoptions = XendOptions.instance()
 
 def randomMAC():
     """Generate a random MAC address.
@@ -138,8 +138,8 @@ class NetifController(DevController):
     def getDeviceDetails(self, config):
         """@see DevController.getDeviceDetails"""
 
-        script = os.path.join(xroot.network_script_dir,
-                              config.get('script', xroot.get_vif_script()))
+        script = os.path.join(xoptions.network_script_dir,
+                              config.get('script', xoptions.get_vif_script()))
         typ     = config.get('type')
         bridge  = config.get('bridge')
         mac     = config.get('mac')
@@ -150,9 +150,8 @@ class NetifController(DevController):
 
         devid = self.allocateDeviceID()
 
-        # The default type is 'netfront'.
         if not typ:
-            typ = 'netfront'
+            typ = xoptions.netback_type
             
         if not mac:
             mac = randomMAC()
@@ -190,7 +189,7 @@ class NetifController(DevController):
         (script, ip, bridge, mac, typ, vifname, rate, uuid) = devinfo
 
         if script:
-            network_script_dir = xroot.network_script_dir + os.sep
+            network_script_dir = xoptions.network_script_dir + os.sep
             result['script'] = script.replace(network_script_dir, "")
         if ip:
             result['ip'] = ip
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/server/relocate.py
--- a/tools/python/xen/xend/server/relocate.py  Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/server/relocate.py  Fri Jan 19 14:48:57 2007 +0000
@@ -24,7 +24,7 @@ from xen.web import protocol, tcp, unix
 
 from xen.xend import sxp
 from xen.xend import XendDomain
-from xen.xend import XendRoot
+from xen.xend import XendOptions
 from xen.xend.XendError import XendError
 from xen.xend.XendLogging import log
 
@@ -114,15 +114,15 @@ class RelocationProtocol(protocol.Protoc
 
 
 def listenRelocation():
-    xroot = XendRoot.instance()
-    if xroot.get_xend_unix_server():
+    xoptions = XendOptions.instance()
+    if xoptions.get_xend_unix_server():
         path = '/var/lib/xend/relocation-socket'
         unix.UnixListener(path, RelocationProtocol)
-    if xroot.get_xend_relocation_server():
-        port = xroot.get_xend_relocation_port()
-        interface = xroot.get_xend_relocation_address()
+    if xoptions.get_xend_relocation_server():
+        port = xoptions.get_xend_relocation_port()
+        interface = xoptions.get_xend_relocation_address()
 
-        hosts_allow = xroot.get_xend_relocation_hosts_allow()
+        hosts_allow = xoptions.get_xend_relocation_hosts_allow()
         if hosts_allow == '':
             hosts_allow = None
         else:
diff -r 8475a4e0425e -r 3c8bb086025e 
tools/python/xen/xend/server/tests/test_controllers.py
--- a/tools/python/xen/xend/server/tests/test_controllers.py    Thu Jan 18 
15:18:07 2007 +0000
+++ b/tools/python/xen/xend/server/tests/test_controllers.py    Fri Jan 19 
14:48:57 2007 +0000
@@ -2,9 +2,9 @@ import re
 import re
 import unittest
 
-import xen.xend.XendRoot
+import xen.xend.XendOptions
 
-xen.xend.XendRoot.XendRoot.config_default = '/dev/null'
+xen.xend.XendOptions.XendOptions.config_default = '/dev/null'
 
 from xen.xend.server import netif
 
@@ -13,7 +13,7 @@ FAKE_DEVID = 63
 FAKE_DEVID = 63
 
 
-xroot = xen.xend.XendRoot.instance()
+xoptions = xen.xend.XendOptions.instance()
 
 
 class test_controllers(unittest.TestCase):
@@ -36,8 +36,8 @@ class test_controllers(unittest.TestCase
 
         self.assertEqual(backdets['handle'], str(FAKE_DEVID))
         self.assertEqual(backdets['script'],
-                         os.path.join(xroot.network_script_dir,
-                                      xroot.get_vif_script()))
+                         os.path.join(xoptions.network_script_dir,
+                                      xoptions.get_vif_script()))
         self.assertValidMac(backdets['mac'], expectedMac)
 
         self.assertEqual(frontdets['handle'], str(FAKE_DEVID))
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/server/tpmif.py
--- a/tools/python/xen/xend/server/tpmif.py     Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/server/tpmif.py     Fri Jan 19 14:48:57 2007 +0000
@@ -20,7 +20,7 @@
 
 """Support for virtual TPM interfaces."""
 
-from xen.xend import XendRoot
+from xen.xend import XendOptions
 from xen.xend.XendLogging import log
 from xen.xend.XendError import XendError
 from xen.xend.XendConstants import DEV_MIGRATE_TEST, VTPM_DELETE_SCRIPT
@@ -29,7 +29,7 @@ import os
 import os
 import re
 
-xroot = XendRoot.instance()
+xoptions = XendOptions.instance()
 
 def destroy_vtpmstate(name):
     if os.path.exists(VTPM_DELETE_SCRIPT):
@@ -88,7 +88,7 @@ class TPMifController(DevController):
     def migrate(self, deviceConfig, network, dst, step, domName):
         """@see DevContoller.migrate"""
         if network:
-            tool = xroot.get_external_migration_tool()
+            tool = xoptions.get_external_migration_tool()
             if tool != '':
                 log.info("Request to network-migrate device to %s. step=%d.",
                          dst, step)
@@ -116,7 +116,7 @@ class TPMifController(DevController):
     def recover_migrate(self, deviceConfig, network, dst, step, domName):
         """@see DevContoller.recover_migrate"""
         if network:
-            tool = xroot.get_external_migration_tool()
+            tool = xoptions.get_external_migration_tool()
             if tool != '':
                 log.info("Request to recover network-migrated device. last 
good step=%d.",
                          step)
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xend/server/vfbif.py
--- a/tools/python/xen/xend/server/vfbif.py     Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xend/server/vfbif.py     Fri Jan 19 14:48:57 2007 +0000
@@ -52,7 +52,7 @@ class VfbifController(DevController):
             if config.has_key("vncpasswd"):
                 passwd = config["vncpasswd"]
             else:
-                passwd = xen.xend.XendRoot.instance().get_vncpasswd_default()
+                passwd = 
xen.xend.XendOptions.instance().get_vncpasswd_default()
             if passwd:
                 self.vm.storeVm("vncpasswd", passwd)
                 log.debug("Stored a VNC password for vfb access")
@@ -66,7 +66,7 @@ class VfbifController(DevController):
             elif config.has_key("vncdisplay"):
                 args += ["--vncport", "%d" % (5900 + 
int(config["vncdisplay"]))]
             vnclisten = config.get("vnclisten",
-                                   
xen.xend.XendRoot.instance().get_vnclisten_address())
+                                   
xen.xend.XendOptions.instance().get_vnclisten_address())
             args += [ "--listen", vnclisten ]
             spawn_detached(args[0], args + std_args, os.environ)
         elif t == "sdl":
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py     Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xm/create.py     Fri Jan 19 14:48:57 2007 +0000
@@ -189,6 +189,10 @@ gopts.var('vcpus', val='VCPUS',
 gopts.var('vcpus', val='VCPUS',
           fn=set_int, default=1,
           use="# of Virtual CPUS in domain.")
+
+gopts.var('vcpu_avail', val='VCPUS',
+          fn=set_long, default=None,
+          use="Bitmask for virtual CPUs to make available immediately.")
 
 gopts.var('cpu_cap', val='CAP',
           fn=set_int, default=None,
@@ -740,7 +744,7 @@ def make_config(vals):
 
     map(add_conf, ['name', 'memory', 'maxmem', 'shadow_memory',
                    'restart', 'on_poweroff',
-                   'on_reboot', 'on_crash', 'vcpus', 'features',
+                   'on_reboot', 'on_crash', 'vcpus', 'vcpu_avail', 'features',
                    'on_xend_start', 'on_xend_stop'])
 
     if vals.uuid is not None:
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xm/main.py       Fri Jan 19 14:48:57 2007 +0000
@@ -693,12 +693,15 @@ def parse_doms_info(info):
         up_time = time.time() - start_time
 
     return {
-        'domid'    : get_info('domid',        str,   ''),
-        'name'     : get_info('name',         str,   '??'),
+        'domid'    : get_info('domid',              str,   ''),
+        'name'     : get_info('name',               str,   '??'),
         'mem'      : get_info('memory_dynamic_min', int,   0),
-        'vcpus'    : get_info('online_vcpus',        int,   0),
-        'state'    : get_info('state',        str,    ''),
-        'cpu_time' : get_info('cpu_time',     float, 0),
+        'state'    : get_info('state',              str,   ''),
+        'cpu_time' : get_info('cpu_time',           float, 0.0),
+        # VCPUs is the number online when the VM is up, or the number
+        # configured otherwise.
+        'vcpus'    : get_info('online_vcpus', int,
+                              get_info('vcpus', int, 0)),
         'up_time'  : up_time,
         'seclabel' : security.get_security_printlabel(info),
         }
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xm/opts.py
--- a/tools/python/xen/xm/opts.py       Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xm/opts.py       Fri Jan 19 14:48:57 2007 +0000
@@ -571,6 +571,14 @@ def set_int(opt, k, v):
         opt.opts.err('Invalid value: ' + str(v))
     opt.set(v)
 
+def set_long(opt, k, v):
+    """Set an option to a long integer value."""
+    try:
+        v = long(v)
+    except:
+        opt.opts.err('Invalid value: ' + str(v))
+    opt.set(v)
+
 def set_float(opt, k, v):
     """Set an option to a float value."""
     try:
diff -r 8475a4e0425e -r 3c8bb086025e tools/python/xen/xm/tests/test_create.py
--- a/tools/python/xen/xm/tests/test_create.py  Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/python/xen/xm/tests/test_create.py  Fri Jan 19 14:48:57 2007 +0000
@@ -3,9 +3,9 @@ import tempfile
 import tempfile
 import unittest
 
-import xen.xend.XendRoot
-
-xen.xend.XendRoot.XendRoot.config_default = '/dev/null'
+import xen.xend.XendOptions
+
+xen.xend.XendOptions.XendOptions.config_default = '/dev/null'
 
 import xen.xm.create
 
diff -r 8475a4e0425e -r 3c8bb086025e tools/xcutils/xc_restore.c
--- a/tools/xcutils/xc_restore.c        Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/xcutils/xc_restore.c        Fri Jan 19 14:48:57 2007 +0000
@@ -19,12 +19,13 @@ main(int argc, char **argv)
 main(int argc, char **argv)
 {
     unsigned int xc_fd, io_fd, domid, nr_pfns, store_evtchn, console_evtchn;
+    unsigned int hvm, pae, apic;
     int ret;
     unsigned long store_mfn, console_mfn;
 
-    if (argc != 6)
+    if (argc != 9)
        errx(1,
-            "usage: %s iofd domid nr_pfns store_evtchn console_evtchn",
+            "usage: %s iofd domid nr_pfns store_evtchn console_evtchn hvm pae 
apic",
             argv[0]);
 
     xc_fd = xc_interface_open();
@@ -36,9 +37,19 @@ main(int argc, char **argv)
     nr_pfns = atoi(argv[3]);
     store_evtchn = atoi(argv[4]);
     console_evtchn = atoi(argv[5]);
+    hvm  = atoi(argv[6]);
+    pae  = atoi(argv[7]);
+    apic = atoi(argv[8]);
 
-    ret = xc_linux_restore(xc_fd, io_fd, domid, nr_pfns, store_evtchn,
-                          &store_mfn, console_evtchn, &console_mfn);
+    if (hvm) {
+         /* pass the memsize to xc_hvm_restore to find the store_mfn */
+        store_mfn = hvm;
+        ret = xc_hvm_restore(xc_fd, io_fd, domid, nr_pfns, store_evtchn,
+                &store_mfn, console_evtchn, &console_mfn, pae, apic);
+    } else 
+        ret = xc_linux_restore(xc_fd, io_fd, domid, nr_pfns, store_evtchn,
+                &store_mfn, console_evtchn, &console_mfn);
+
     if (ret == 0) {
        printf("store-mfn %li\n", store_mfn);
        printf("console-mfn %li\n", console_mfn);
diff -r 8475a4e0425e -r 3c8bb086025e tools/xcutils/xc_save.c
--- a/tools/xcutils/xc_save.c   Thu Jan 18 15:18:07 2007 +0000
+++ b/tools/xcutils/xc_save.c   Fri Jan 19 14:48:57 2007 +0000
@@ -51,7 +51,10 @@ main(int argc, char **argv)
     max_f = atoi(argv[4]);
     flags = atoi(argv[5]);
 
-    ret = xc_linux_save(xc_fd, io_fd, domid, maxit, max_f, flags, &suspend);
+    if (flags & XCFLAGS_HVM)
+        ret = xc_hvm_save(xc_fd, io_fd, domid, maxit, max_f, flags, &suspend);
+    else 
+        ret = xc_linux_save(xc_fd, io_fd, domid, maxit, max_f, flags, 
&suspend);
 
     xc_interface_close(xc_fd);
 
diff -r 8475a4e0425e -r 3c8bb086025e 
unmodified_drivers/linux-2.6/platform-pci/platform-pci.c
--- a/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c  Thu Jan 18 
15:18:07 2007 +0000
+++ b/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c  Fri Jan 19 
14:48:57 2007 +0000
@@ -182,12 +182,17 @@ static uint64_t get_callback_via(struct 
 static uint64_t get_callback_via(struct pci_dev *pdev)
 {
 #ifdef __ia64__
-       int irq;
+       int irq, rid;
        for (irq = 0; irq < 16; irq++) {
                if (isa_irq_to_vector(irq) == pdev->irq)
                        return irq;
        }
-       return 0;
+       /* use Requester-ID as callback_irq */
+       /* RID: '<#bus(8)><#dev(5)><#func(3)>' (cf. PCI-Express spec) */
+       rid = ((pdev->bus->number & 0xff) << 8) | pdev->devfn;
+       printk(KERN_INFO DRV_NAME ":use Requester-ID(%04x) as callback irq\n",
+              rid);
+       return rid | IA64_CALLBACK_IRQ_RID;
 #else /* !__ia64__ */
        if (pdev->irq < 16)
                return pdev->irq; /* ISA IRQ */
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/asm-offsets.c
--- a/xen/arch/ia64/asm-offsets.c       Thu Jan 18 15:18:07 2007 +0000
+++ b/xen/arch/ia64/asm-offsets.c       Fri Jan 19 14:48:57 2007 +0000
@@ -56,10 +56,12 @@ void foo(void)
        DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct vcpu, 
arch._thread.on_ustack));
 
        DEFINE(IA64_VCPU_DOMAIN_OFFSET, offsetof (struct vcpu, domain));
+       DEFINE(IA64_VCPU_HYPERCALL_CONTINUATION_OFS, offsetof (struct vcpu, 
arch.hypercall_continuation));
        DEFINE(IA64_VCPU_META_RR0_OFFSET, offsetof (struct vcpu, 
arch.metaphysical_rr0));
        DEFINE(IA64_VCPU_META_SAVED_RR0_OFFSET, offsetof (struct vcpu, 
arch.metaphysical_saved_rr0));
        DEFINE(IA64_VCPU_BREAKIMM_OFFSET, offsetof (struct vcpu, 
arch.breakimm));
        DEFINE(IA64_VCPU_IVA_OFFSET, offsetof (struct vcpu, arch.iva));
+       DEFINE(IA64_VCPU_EVENT_CALLBACK_IP_OFFSET, offsetof (struct vcpu, 
arch.event_callback_ip));
        DEFINE(IA64_VCPU_IRR0_OFFSET, offsetof (struct vcpu, arch.irr[0]));
        DEFINE(IA64_VCPU_IRR3_OFFSET, offsetof (struct vcpu, arch.irr[3]));
        DEFINE(IA64_VCPU_INSVC3_OFFSET, offsetof (struct vcpu, arch.insvc[3]));
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/asm-xsi-offsets.c
--- a/xen/arch/ia64/asm-xsi-offsets.c   Thu Jan 18 15:18:07 2007 +0000
+++ b/xen/arch/ia64/asm-xsi-offsets.c   Fri Jan 19 14:48:57 2007 +0000
@@ -62,7 +62,7 @@ void foo(void)
        DEFINE_MAPPED_REG_OFS(XSI_ITV_OFS, itv);
        DEFINE_MAPPED_REG_OFS(XSI_PTA_OFS, pta);
        DEFINE_MAPPED_REG_OFS(XSI_PSR_IC_OFS, interrupt_collection_enabled);
-       DEFINE_MAPPED_REG_OFS(XSI_INCOMPL_REGFR_OFS, incomplete_regframe);
+       DEFINE_MAPPED_REG_OFS(XSI_VPSR_PP_OFS, vpsr_pp);
        DEFINE_MAPPED_REG_OFS(XSI_METAPHYS_OFS, metaphysical_mode);
        DEFINE_MAPPED_REG_OFS(XSI_BANKNUM_OFS, banknum);
        DEFINE_MAPPED_REG_OFS(XSI_BANK0_R16_OFS, bank0_regs[0]);
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/Makefile
--- a/xen/arch/ia64/linux-xen/Makefile  Thu Jan 18 15:18:07 2007 +0000
+++ b/xen/arch/ia64/linux-xen/Makefile  Fri Jan 19 14:48:57 2007 +0000
@@ -1,3 +1,6 @@ obj-y += efi.o
+subdir-y += sn
+
+obj-y += cmdline.o
 obj-y += efi.o
 obj-y += entry.o
 obj-y += irq_ia64.o
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/README.origin
--- a/xen/arch/ia64/linux-xen/README.origin     Thu Jan 18 15:18:07 2007 +0000
+++ b/xen/arch/ia64/linux-xen/README.origin     Fri Jan 19 14:48:57 2007 +0000
@@ -5,6 +5,7 @@
 # (e.g. with #ifdef XEN or XEN in a comment) so that they can be
 # easily updated to future versions of the corresponding Linux files.
 
+cmdline.c              -> linux/lib/cmdline.c
 efi.c                  -> linux/arch/ia64/kernel/efi.c
 entry.h                        -> linux/arch/ia64/kernel/entry.h
 entry.S                        -> linux/arch/ia64/kernel/entry.S
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/cmdline.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux-xen/cmdline.c Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,131 @@
+/*
+ * linux/lib/cmdline.c
+ * Helper functions generally used for parsing kernel command line
+ * and module options.
+ *
+ * Code and copyrights come from init/main.c and arch/i386/kernel/setup.c.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ *
+ * GNU Indent formatting options for this file: -kr -i8 -npsl -pcs
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#ifdef XEN
+#include <xen/lib.h>
+#endif
+
+
+/**
+ *     get_option - Parse integer from an option string
+ *     @str: option string
+ *     @pint: (output) integer value parsed from @str
+ *
+ *     Read an int from an option string; if available accept a subsequent
+ *     comma as well.
+ *
+ *     Return values:
+ *     0 : no int in string
+ *     1 : int found, no subsequent comma
+ *     2 : int found including a subsequent comma
+ */
+
+int get_option (char **str, int *pint)
+{
+       char *cur = *str;
+
+       if (!cur || !(*cur))
+               return 0;
+#ifndef XEN
+       *pint = simple_strtol (cur, str, 0);
+#else
+       *pint = simple_strtol (cur, (const char**)str, 0);
+#endif
+       if (cur == *str)
+               return 0;
+       if (**str == ',') {
+               (*str)++;
+               return 2;
+       }
+
+       return 1;
+}
+
+/**
+ *     get_options - Parse a string into a list of integers
+ *     @str: String to be parsed
+ *     @nints: size of integer array
+ *     @ints: integer array
+ *
+ *     This function parses a string containing a comma-separated
+ *     list of integers.  The parse halts when the array is
+ *     full, or when no more numbers can be retrieved from the
+ *     string.
+ *
+ *     Return value is the character in the string which caused
+ *     the parse to end (typically a null terminator, if @str is
+ *     completely parseable).
+ */
+ 
+char *get_options(const char *str, int nints, int *ints)
+{
+       int res, i = 1;
+
+       while (i < nints) {
+               res = get_option ((char **)&str, ints + i);
+               if (res == 0)
+                       break;
+               i++;
+               if (res == 1)
+                       break;
+       }
+       ints[0] = i - 1;
+       return (char *)str;
+}
+
+/**
+ *     memparse - parse a string with mem suffixes into a number
+ *     @ptr: Where parse begins
+ *     @retptr: (output) Pointer to next char after parse completes
+ *
+ *     Parses a string into a number.  The number stored at @ptr is
+ *     potentially suffixed with %K (for kilobytes, or 1024 bytes),
+ *     %M (for megabytes, or 1048576 bytes), or %G (for gigabytes, or
+ *     1073741824).  If the number is suffixed with K, M, or G, then
+ *     the return value is the number multiplied by one kilobyte, one
+ *     megabyte, or one gigabyte, respectively.
+ */
+
+unsigned long long memparse (char *ptr, char **retptr)
+{
+#ifndef XEN
+       unsigned long long ret = simple_strtoull (ptr, retptr, 0);
+#else
+       unsigned long long ret = simple_strtoull (ptr, (const char**)retptr, 0);
+#endif
+
+       switch (**retptr) {
+       case 'G':
+       case 'g':
+               ret <<= 10;
+       case 'M':
+       case 'm':
+               ret <<= 10;
+       case 'K':
+       case 'k':
+               ret <<= 10;
+               (*retptr)++;
+       default:
+               break;
+       }
+       return ret;
+}
+
+
+EXPORT_SYMBOL(memparse);
+EXPORT_SYMBOL(get_option);
+EXPORT_SYMBOL(get_options);
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/entry.S
--- a/xen/arch/ia64/linux-xen/entry.S   Thu Jan 18 15:18:07 2007 +0000
+++ b/xen/arch/ia64/linux-xen/entry.S   Fri Jan 19 14:48:57 2007 +0000
@@ -676,7 +676,9 @@ GLOBAL_ENTRY(ia64_ret_from_syscall)
        cmp.ge p6,p7=r8,r0                      // syscall executed 
successfully?
        adds r2=PT(R8)+16,sp                    // r2 = &pt_regs.r8
        mov r10=r0                              // clear error indication in r10
+#ifndef XEN    
 (p7)   br.cond.spnt handle_syscall_error       // handle potential syscall 
failure
+#endif
 END(ia64_ret_from_syscall)
        // fall through
 /*
@@ -764,7 +766,9 @@ ENTRY(ia64_leave_syscall)
        ld8 r19=[r2],PT(B6)-PT(LOADRS)          // load ar.rsc value for 
"loadrs"
        nop.i 0
        ;;
+#ifndef XEN    
        mov r16=ar.bsp                          // M2  get existing backing 
store pointer
+#endif    
        ld8 r18=[r2],PT(R9)-PT(B6)              // load b6
 #ifndef XEN
 (p6)   and r15=TIF_WORK_MASK,r31               // any work other than 
TIF_SYSCALL_TRACE?
@@ -814,7 +818,11 @@ ENTRY(ia64_leave_syscall)
        mov f8=f0                               // F    clear f8
        ;;
        ld8.fill r12=[r2]                       // M0|1 restore r12 (sp)
+#ifdef XEN    
+       ld8.fill r2=[r3]                        // M0|1
+#else    
        ld8.fill r15=[r3]                       // M0|1 restore r15
+#endif    
        mov b6=r18                              // I0   restore b6
 
 #ifdef XEN
@@ -827,7 +835,9 @@ ENTRY(ia64_leave_syscall)
 
        srlz.d                          // M0   ensure interruption collection 
is off (for cover)
        shr.u r18=r19,16                // I0|1 get byte size of existing 
"dirty" partition
+#ifndef XEN    
        cover                           // B    add current frame into dirty 
partition & set cr.ifs
+#endif    
        ;;
 (pUStk) ld4 r17=[r17]                  // M0|1 r17 = 
cpu_data->phys_stacked_size_p8
        mov r19=ar.bsp                  // M2   get new backing store pointer
@@ -893,23 +903,18 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 .work_processed_kernel:
 #ifdef XEN
        ;;
-(pUStk) ssm psr.i
-(pUStk)    br.call.sptk.many b0=do_softirq
-(pUStk) rsm psr.i
-    ;;
-       alloc loc0=ar.pfs,0,1,1,0
-       adds out0=16,r12
+(pUStk)        ssm psr.i
+(pUStk)        br.call.sptk.many b0=do_softirq
+(pUStk)        ssm psr.i
+       ;;
+(pUStk)        br.call.sptk.many b0=reflect_event
+       ;;
        adds r7 = PT(EML_UNAT)+16,r12
        ;;
        ld8 r7 = [r7]
        ;;
-(pUStk)        br.call.sptk.many b0=reflect_event
-//(pUStk)      br.call.sptk.many b0=deliver_pending_interrupt
-    ;;
-       mov ar.pfs=loc0
        mov ar.unat=r7  /* load eml_unat  */
        mov r31=r0
-
 
 #else
        adds r17=TI_FLAGS+IA64_TASK_SIZE,r13
@@ -1184,8 +1189,11 @@ skip_rbs_switch:
        mov cr.ipsr=r29         // M2
        mov ar.pfs=r26          // I0
 (pLvSys)mov r17=r0             // A  clear r17 for leave_syscall, no-op 
otherwise
-
+#ifdef XEN
+       mov cr.ifs=r30          // M2
+#else    
 (p9)   mov cr.ifs=r30          // M2
+#endif
        mov b0=r21              // I0
 (pLvSys)mov r18=r0             // A  clear r18 for leave_syscall, no-op 
otherwise
 
@@ -1195,7 +1203,11 @@ skip_rbs_switch:
        ;;
 (pUStk)        mov ar.rnat=r24         // M2 must happen with RSE in lazy mode
        nop 0
+#ifdef XEN    
+(pLvSys)mov r15=r0
+#else
 (pLvSys)mov r2=r0
+#endif
 
        mov ar.rsc=r27          // M2
        mov pr=r31,-1           // I0
@@ -1459,7 +1471,89 @@ 1:       mov gp=loc2                             // 
restore gp
        br.ret.sptk.many rp
 END(unw_init_running)
 
-#ifndef XEN
+#ifdef XEN
+GLOBAL_ENTRY(ia64_do_multicall_call)
+       movl r2=ia64_hypercall_table;;
+       shladd r2=r38,3,r2;;
+       ld8 r2=[r2];;
+       mov b6=r2
+       br.sptk.many b6;;
+END(ia64_do_multicall_call)
+
+    
+       .rodata
+       .align 8
+       .globl ia64_hypercall_table
+ia64_hypercall_table:
+       data8 do_ni_hypercall           /* do_set_trap_table *//*  0 */
+       data8 do_ni_hypercall           /* do_mmu_update */
+       data8 do_ni_hypercall           /* do_set_gdt */
+       data8 do_ni_hypercall           /* do_stack_switch */
+       data8 do_ni_hypercall           /* do_set_callbacks */
+       data8 do_ni_hypercall           /* do_fpu_taskswitch *//*  5 */
+       data8 do_sched_op_compat
+       data8 do_ni_hypercall
+       data8 do_ni_hypercall           /* do_set_debugreg */
+       data8 do_ni_hypercall           /* do_get_debugreg */
+       data8 do_ni_hypercall           /* do_update_descriptor * 10 */
+       data8 do_ni_hypercall           /* do_ni_hypercall */
+       data8 do_memory_op
+       data8 do_multicall
+       data8 do_ni_hypercall           /* do_update_va_mapping */
+       data8 do_ni_hypercall           /* do_set_timer_op */  /* 15 */
+       data8 do_ni_hypercall
+       data8 do_xen_version
+       data8 do_console_io
+       data8 do_ni_hypercall
+       data8 do_grant_table_op                                /* 20 */
+       data8 do_ni_hypercall           /* do_vm_assist */
+       data8 do_ni_hypercall           /* do_update_va_mapping_othe */
+       data8 do_ni_hypercall           /* (x86 only) */
+       data8 do_ni_hypercall           /* do_vcpu_op */
+       data8 do_ni_hypercall           /* (x86_64 only) */    /* 25 */
+       data8 do_ni_hypercall           /* do_mmuext_op */
+       data8 do_ni_hypercall           /* do_acm_op */
+       data8 do_ni_hypercall           /* do_nmi_op */
+       data8 do_sched_op
+       data8 do_callback_op            /*  */                 /* 30 */
+       data8 do_xenoprof_op            /*  */
+       data8 do_event_channel_op
+       data8 do_physdev_op
+       data8 do_hvm_op                 /*  */
+       data8 do_sysctl                 /*  */                  /* 35 */
+       data8 do_domctl                 /*  */
+       data8 do_ni_hypercall           /*  */
+       data8 do_ni_hypercall           /*  */
+       data8 do_ni_hypercall           /*  */
+       data8 do_ni_hypercall           /*  */                 /* 40 */
+       data8 do_ni_hypercall           /*  */
+       data8 do_ni_hypercall           /*  */
+       data8 do_ni_hypercall           /*  */
+       data8 do_ni_hypercall           /*  */
+       data8 do_ni_hypercall           /*  */                 /* 45 */
+       data8 do_ni_hypercall           /*  */
+       data8 do_ni_hypercall           /*  */
+       data8 do_dom0vp_op              /* dom0vp_op */
+       data8 do_pirq_guest_eoi         /* arch_1 */
+       data8 do_ni_hypercall           /* arch_2 */           /* 50 */
+       data8 do_ni_hypercall           /* arch_3 */
+       data8 do_ni_hypercall           /* arch_4 */
+       data8 do_ni_hypercall           /* arch_5 */
+       data8 do_ni_hypercall           /* arch_6 */
+       data8 do_ni_hypercall           /* arch_7 */           /* 55 */
+       data8 do_ni_hypercall
+       data8 do_ni_hypercall
+       data8 do_ni_hypercall
+       data8 do_ni_hypercall
+       data8 do_ni_hypercall                                  /* 60 */
+       data8 do_ni_hypercall
+       data8 do_ni_hypercall
+       data8 do_ni_hypercall
+
+       // guard against failures to increase NR_hypercalls
+       .org ia64_hypercall_table + 8*NR_hypercalls
+
+#else
        .rodata
        .align 8
        .globl sys_call_table
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/sn/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux-xen/sn/Makefile       Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,1 @@
+subdir-y += kernel
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/sn/kernel/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux-xen/sn/kernel/Makefile        Fri Jan 19 14:48:57 
2007 +0000
@@ -0,0 +1,5 @@
+obj-y += sn2_smp.o
+obj-y += setup.o
+obj-y += iomv.o
+obj-y += irq.o
+obj-y += io_init.o
diff -r 8475a4e0425e -r 3c8bb086025e 
xen/arch/ia64/linux-xen/sn/kernel/README.origin
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux-xen/sn/kernel/README.origin   Fri Jan 19 14:48:57 
2007 +0000
@@ -0,0 +1,12 @@
+# Source files in this directory are near-identical copies of linux-2.6.19
+# files:
+
+# NOTE: ALL changes to these files should be clearly marked
+# (e.g. with #ifdef XEN or XEN in a comment) so that they can be
+# easily updated to future versions of the corresponding Linux files.
+
+io_init.c              -> linux/arch/ia64/sn/kernel/io_init.c
+iomv.c                 -> linux/arch/ia64/sn/kernel/iomv.c
+irq.c                  -> linux/arch/ia64/sn/kernel/irq.c
+setup.c                        -> linux/arch/ia64/sn/kernel/setup.c
+sn2_smp.c              -> linux/arch/ia64/sn/kernel/sn2/sn2_smp.c
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/sn/kernel/io_init.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux-xen/sn/kernel/io_init.c       Fri Jan 19 14:48:57 
2007 +0000
@@ -0,0 +1,783 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000-2005 Silicon Graphics, Inc. All rights 
reserved.
+ */
+
+#include <linux/bootmem.h>
+#include <linux/nodemask.h>
+#ifdef XEN
+#include <linux/init.h>
+#endif
+#include <asm/sn/types.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/sn_feature_sets.h>
+#include <asm/sn/geo.h>
+#include <asm/sn/io.h>
+#include <asm/sn/l1.h>
+#include <asm/sn/module.h>
+#include <asm/sn/pcibr_provider.h>
+#include <asm/sn/pcibus_provider_defs.h>
+#ifndef XEN
+#include <asm/sn/pcidev.h>
+#endif
+#include <asm/sn/simulator.h>
+#include <asm/sn/sn_sal.h>
+#ifndef XEN
+#include <asm/sn/tioca_provider.h>
+#include <asm/sn/tioce_provider.h>
+#endif
+#ifdef XEN
+#include "asm/sn/hubdev.h"
+#include "asm/sn/xwidgetdev.h"
+#else
+#include "xtalk/hubdev.h"
+#include "xtalk/xwidgetdev.h"
+#endif
+
+
+extern void sn_init_cpei_timer(void);
+extern void register_sn_procfs(void);
+#ifdef XEN
+extern void sn_irq_lh_init(void);
+#endif
+
+static struct list_head sn_sysdata_list;
+
+/* sysdata list struct */
+struct sysdata_el {
+       struct list_head entry;
+       void *sysdata;
+};
+
+struct slab_info {
+       struct hubdev_info hubdev;
+};
+
+struct brick {
+       moduleid_t id;          /* Module ID of this module        */
+       struct slab_info slab_info[MAX_SLABS + 1];
+};
+
+int sn_ioif_inited;            /* SN I/O infrastructure initialized? */
+
+struct sn_pcibus_provider *sn_pci_provider[PCIIO_ASIC_MAX_TYPES];      /* 
indexed by asic type */
+
+#ifndef XEN
+static int max_segment_number;          /* Default highest segment number */
+static int max_pcibus_number = 255;    /* Default highest pci bus number */
+
+/*
+ * Hooks and struct for unsupported pci providers
+ */
+
+static dma_addr_t
+sn_default_pci_map(struct pci_dev *pdev, unsigned long paddr, size_t size, int 
type)
+{
+       return 0;
+}
+
+static void
+sn_default_pci_unmap(struct pci_dev *pdev, dma_addr_t addr, int direction)
+{
+       return;
+}
+
+static void *
+sn_default_pci_bus_fixup(struct pcibus_bussoft *soft, struct pci_controller 
*controller)
+{
+       return NULL;
+}
+
+static struct sn_pcibus_provider sn_pci_default_provider = {
+       .dma_map = sn_default_pci_map,
+       .dma_map_consistent = sn_default_pci_map,
+       .dma_unmap = sn_default_pci_unmap,
+       .bus_fixup = sn_default_pci_bus_fixup,
+};
+#endif
+
+/*
+ * Retrieve the DMA Flush List given nasid, widget, and device.
+ * This list is needed to implement the WAR - Flush DMA data on PIO Reads.
+ */
+static inline u64
+sal_get_device_dmaflush_list(u64 nasid, u64 widget_num, u64 device_num,
+                            u64 address)
+{
+       struct ia64_sal_retval ret_stuff;
+       ret_stuff.status = 0;
+       ret_stuff.v0 = 0;
+
+       SAL_CALL_NOLOCK(ret_stuff,
+                       (u64) SN_SAL_IOIF_GET_DEVICE_DMAFLUSH_LIST,
+                       (u64) nasid, (u64) widget_num,
+                       (u64) device_num, (u64) address, 0, 0, 0);
+       return ret_stuff.status;
+}
+
+/*
+ * Retrieve the hub device info structure for the given nasid.
+ */
+static inline u64 sal_get_hubdev_info(u64 handle, u64 address)
+{
+       struct ia64_sal_retval ret_stuff;
+       ret_stuff.status = 0;
+       ret_stuff.v0 = 0;
+
+       SAL_CALL_NOLOCK(ret_stuff,
+                       (u64) SN_SAL_IOIF_GET_HUBDEV_INFO,
+                       (u64) handle, (u64) address, 0, 0, 0, 0, 0);
+       return ret_stuff.v0;
+}
+
+/*
+ * Retrieve the pci bus information given the bus number.
+ */
+static inline u64 sal_get_pcibus_info(u64 segment, u64 busnum, u64 address)
+{
+       struct ia64_sal_retval ret_stuff;
+       ret_stuff.status = 0;
+       ret_stuff.v0 = 0;
+
+       SAL_CALL_NOLOCK(ret_stuff,
+                       (u64) SN_SAL_IOIF_GET_PCIBUS_INFO,
+                       (u64) segment, (u64) busnum, (u64) address, 0, 0, 0, 0);
+       return ret_stuff.v0;
+}
+
+#ifndef XEN
+/*
+ * Retrieve the pci device information given the bus and device|function 
number.
+ */
+static inline u64
+sal_get_pcidev_info(u64 segment, u64 bus_number, u64 devfn, u64 pci_dev,
+                   u64 sn_irq_info)
+{
+       struct ia64_sal_retval ret_stuff;
+       ret_stuff.status = 0;
+       ret_stuff.v0 = 0;
+
+       SAL_CALL_NOLOCK(ret_stuff,
+                       (u64) SN_SAL_IOIF_GET_PCIDEV_INFO,
+                       (u64) segment, (u64) bus_number, (u64) devfn,
+                       (u64) pci_dev,
+                       sn_irq_info, 0, 0);
+       return ret_stuff.v0;
+}
+
+/*
+ * sn_pcidev_info_get() - Retrieve the pcidev_info struct for the specified
+ *                       device.
+ */
+inline struct pcidev_info *
+sn_pcidev_info_get(struct pci_dev *dev)
+{
+       struct pcidev_info *pcidev;
+
+       list_for_each_entry(pcidev,
+                           &(SN_PCI_CONTROLLER(dev)->pcidev_info), pdi_list) {
+               if (pcidev->pdi_linux_pcidev == dev) {
+                       return pcidev;
+               }
+       }
+       return NULL;
+}
+
+/* Older PROM flush WAR
+ *
+ * 01/16/06 -- This war will be in place until a new official PROM is released.
+ * Additionally note that the struct sn_flush_device_war also has to be
+ * removed from arch/ia64/sn/include/xtalk/hubdev.h
+ */
+static u8 war_implemented = 0;
+
+static s64 sn_device_fixup_war(u64 nasid, u64 widget, int device,
+                              struct sn_flush_device_common *common)
+{
+       struct sn_flush_device_war *war_list;
+       struct sn_flush_device_war *dev_entry;
+       struct ia64_sal_retval isrv = {0,0,0,0};
+
+       if (!war_implemented) {
+               printk(KERN_WARNING "PROM version < 4.50 -- implementing old "
+                      "PROM flush WAR\n");
+               war_implemented = 1;
+       }
+
+       war_list = kzalloc(DEV_PER_WIDGET * sizeof(*war_list), GFP_KERNEL);
+       if (!war_list)
+               BUG();
+
+       SAL_CALL_NOLOCK(isrv, SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST,
+                       nasid, widget, __pa(war_list), 0, 0, 0 ,0);
+       if (isrv.status)
+               panic("sn_device_fixup_war failed: %s\n",
+                     ia64_sal_strerror(isrv.status));
+
+       dev_entry = war_list + device;
+       memcpy(common,dev_entry, sizeof(*common));
+       kfree(war_list);
+
+       return isrv.status;
+}
+
+/*
+ * sn_fixup_ionodes() - This routine initializes the HUB data strcuture for
+ *     each node in the system.
+ */
+static void __init sn_fixup_ionodes(void)
+{
+       struct sn_flush_device_kernel *sn_flush_device_kernel;
+       struct sn_flush_device_kernel *dev_entry;
+       struct hubdev_info *hubdev;
+       u64 status;
+       u64 nasid;
+       int i, widget, device, size;
+
+       /*
+        * Get SGI Specific HUB chipset information.
+        * Inform Prom that this kernel can support domain bus numbering.
+        */
+       for (i = 0; i < num_cnodes; i++) {
+               hubdev = (struct hubdev_info *)(NODEPDA(i)->pdinfo);
+               nasid = cnodeid_to_nasid(i);
+               hubdev->max_segment_number = 0xffffffff;
+               hubdev->max_pcibus_number = 0xff;
+               status = sal_get_hubdev_info(nasid, (u64) __pa(hubdev));
+               if (status)
+                       continue;
+
+               /* Save the largest Domain and pcibus numbers found. */
+               if (hubdev->max_segment_number) {
+                       /*
+                        * Dealing with a Prom that supports segments.
+                        */
+                       max_segment_number = hubdev->max_segment_number;
+                       max_pcibus_number = hubdev->max_pcibus_number;
+               }
+
+               /* Attach the error interrupt handlers */
+               if (nasid & 1)
+                       ice_error_init(hubdev);
+               else
+                       hub_error_init(hubdev);
+
+               for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++)
+                       hubdev->hdi_xwidget_info[widget].xwi_hubinfo = hubdev;
+
+               if (!hubdev->hdi_flush_nasid_list.widget_p)
+                       continue;
+
+               size = (HUB_WIDGET_ID_MAX + 1) *
+                       sizeof(struct sn_flush_device_kernel *);
+               hubdev->hdi_flush_nasid_list.widget_p =
+                       kzalloc(size, GFP_KERNEL);
+               if (!hubdev->hdi_flush_nasid_list.widget_p)
+                       BUG();
+
+               for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) {
+                       size = DEV_PER_WIDGET *
+                               sizeof(struct sn_flush_device_kernel);
+                       sn_flush_device_kernel = kzalloc(size, GFP_KERNEL);
+                       if (!sn_flush_device_kernel)
+                               BUG();
+
+                       dev_entry = sn_flush_device_kernel;
+                       for (device = 0; device < DEV_PER_WIDGET;
+                            device++,dev_entry++) {
+                               size = sizeof(struct sn_flush_device_common);
+                               dev_entry->common = kzalloc(size, GFP_KERNEL);
+                               if (!dev_entry->common)
+                                       BUG();
+
+                               if (sn_prom_feature_available(
+                                                      PRF_DEVICE_FLUSH_LIST))
+                                       status = sal_get_device_dmaflush_list(
+                                                    nasid, widget, device,
+                                                    (u64)(dev_entry->common));
+                               else
+#ifdef XEN
+                                       BUG();
+#else
+                                       status = sn_device_fixup_war(nasid,
+                                                    widget, device,
+                                                    dev_entry->common);
+#endif
+                               if (status != SALRET_OK)
+                                       panic("SAL call failed: %s\n",
+                                             ia64_sal_strerror(status));
+
+                               spin_lock_init(&dev_entry->sfdl_flush_lock);
+                       }
+
+                       if (sn_flush_device_kernel)
+                               hubdev->hdi_flush_nasid_list.widget_p[widget] =
+                                                      sn_flush_device_kernel;
+               }
+       }
+}
+
+/*
+ * sn_pci_window_fixup() - Create a pci_window for each device resource.
+ *                        Until ACPI support is added, we need this code
+ *                        to setup pci_windows for use by
+ *                        pcibios_bus_to_resource(),
+ *                        pcibios_resource_to_bus(), etc.
+ */
+static void
+sn_pci_window_fixup(struct pci_dev *dev, unsigned int count,
+                   s64 * pci_addrs)
+{
+       struct pci_controller *controller = PCI_CONTROLLER(dev->bus);
+       unsigned int i;
+       unsigned int idx;
+       unsigned int new_count;
+       struct pci_window *new_window;
+
+       if (count == 0)
+               return;
+       idx = controller->windows;
+       new_count = controller->windows + count;
+       new_window = kcalloc(new_count, sizeof(struct pci_window), GFP_KERNEL);
+       if (new_window == NULL)
+               BUG();
+       if (controller->window) {
+               memcpy(new_window, controller->window,
+                      sizeof(struct pci_window) * controller->windows);
+               kfree(controller->window);
+       }
+
+       /* Setup a pci_window for each device resource. */
+       for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
+               if (pci_addrs[i] == -1)
+                       continue;
+
+               new_window[idx].offset = dev->resource[i].start - pci_addrs[i];
+               new_window[idx].resource = dev->resource[i];
+               idx++;
+       }
+
+       controller->windows = new_count;
+       controller->window = new_window;
+}
+
+void sn_pci_unfixup_slot(struct pci_dev *dev)
+{
+       struct pci_dev *host_pci_dev = SN_PCIDEV_INFO(dev)->host_pci_dev;
+
+       sn_irq_unfixup(dev);
+       pci_dev_put(host_pci_dev);
+       pci_dev_put(dev);
+}
+
+/*
+ * sn_pci_fixup_slot() - This routine sets up a slot's resources
+ * consistent with the Linux PCI abstraction layer.  Resources acquired
+ * from our PCI provider include PIO maps to BAR space and interrupt
+ * objects.
+ */
+void sn_pci_fixup_slot(struct pci_dev *dev)
+{
+       unsigned int count = 0;
+       int idx;
+       int segment = pci_domain_nr(dev->bus);
+       int status = 0;
+       struct pcibus_bussoft *bs;
+       struct pci_bus *host_pci_bus;
+       struct pci_dev *host_pci_dev;
+       struct pcidev_info *pcidev_info;
+       s64 pci_addrs[PCI_ROM_RESOURCE + 1];
+       struct sn_irq_info *sn_irq_info;
+       unsigned long size;
+       unsigned int bus_no, devfn;
+
+       pci_dev_get(dev); /* for the sysdata pointer */
+       pcidev_info = kzalloc(sizeof(struct pcidev_info), GFP_KERNEL);
+       if (!pcidev_info)
+               BUG();          /* Cannot afford to run out of memory */
+
+       sn_irq_info = kzalloc(sizeof(struct sn_irq_info), GFP_KERNEL);
+       if (!sn_irq_info)
+               BUG();          /* Cannot afford to run out of memory */
+
+       /* Call to retrieve pci device information needed by kernel. */
+       status = sal_get_pcidev_info((u64) segment, (u64) dev->bus->number, 
+                                    dev->devfn,
+                                    (u64) __pa(pcidev_info),
+                                    (u64) __pa(sn_irq_info));
+       if (status)
+               BUG(); /* Cannot get platform pci device information */
+
+       /* Add pcidev_info to list in sn_pci_controller struct */
+       list_add_tail(&pcidev_info->pdi_list,
+                     &(SN_PCI_CONTROLLER(dev->bus)->pcidev_info));
+
+       /* Copy over PIO Mapped Addresses */
+       for (idx = 0; idx <= PCI_ROM_RESOURCE; idx++) {
+               unsigned long start, end, addr;
+
+               if (!pcidev_info->pdi_pio_mapped_addr[idx]) {
+                       pci_addrs[idx] = -1;
+                       continue;
+               }
+
+               start = dev->resource[idx].start;
+               end = dev->resource[idx].end;
+               size = end - start;
+               if (size == 0) {
+                       pci_addrs[idx] = -1;
+                       continue;
+               }
+               pci_addrs[idx] = start;
+               count++;
+               addr = pcidev_info->pdi_pio_mapped_addr[idx];
+               addr = ((addr << 4) >> 4) | __IA64_UNCACHED_OFFSET;
+               dev->resource[idx].start = addr;
+               dev->resource[idx].end = addr + size;
+               if (dev->resource[idx].flags & IORESOURCE_IO)
+                       dev->resource[idx].parent = &ioport_resource;
+               else
+                       dev->resource[idx].parent = &iomem_resource;
+       }
+       /* Create a pci_window in the pci_controller struct for
+        * each device resource.
+        */
+       if (count > 0)
+               sn_pci_window_fixup(dev, count, pci_addrs);
+
+       /*
+        * Using the PROMs values for the PCI host bus, get the Linux
+        * PCI host_pci_dev struct and set up host bus linkages
+        */
+
+       bus_no = (pcidev_info->pdi_slot_host_handle >> 32) & 0xff;
+       devfn = pcidev_info->pdi_slot_host_handle & 0xffffffff;
+       host_pci_bus = pci_find_bus(segment, bus_no);
+       host_pci_dev = pci_get_slot(host_pci_bus, devfn);
+
+       pcidev_info->host_pci_dev = host_pci_dev;
+       pcidev_info->pdi_linux_pcidev = dev;
+       pcidev_info->pdi_host_pcidev_info = SN_PCIDEV_INFO(host_pci_dev);
+       bs = SN_PCIBUS_BUSSOFT(dev->bus);
+       pcidev_info->pdi_pcibus_info = bs;
+
+       if (bs && bs->bs_asic_type < PCIIO_ASIC_MAX_TYPES) {
+               SN_PCIDEV_BUSPROVIDER(dev) = sn_pci_provider[bs->bs_asic_type];
+       } else {
+               SN_PCIDEV_BUSPROVIDER(dev) = &sn_pci_default_provider;
+       }
+
+       /* Only set up IRQ stuff if this device has a host bus context */
+       if (bs && sn_irq_info->irq_irq) {
+               pcidev_info->pdi_sn_irq_info = sn_irq_info;
+               dev->irq = pcidev_info->pdi_sn_irq_info->irq_irq;
+               sn_irq_fixup(dev, sn_irq_info);
+       } else {
+               pcidev_info->pdi_sn_irq_info = NULL;
+               kfree(sn_irq_info);
+       }
+}
+
+/*
+ * sn_pci_controller_fixup() - This routine sets up a bus's resources
+ * consistent with the Linux PCI abstraction layer.
+ */
+void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus)
+{
+       int status;
+       int nasid, cnode;
+       struct pci_controller *controller;
+       struct sn_pci_controller *sn_controller;
+       struct pcibus_bussoft *prom_bussoft_ptr;
+       struct hubdev_info *hubdev_info;
+       void *provider_soft;
+       struct sn_pcibus_provider *provider;
+
+       status = sal_get_pcibus_info((u64) segment, (u64) busnum,
+                                    (u64) ia64_tpa(&prom_bussoft_ptr));
+       if (status > 0)
+               return;         /*bus # does not exist */
+       prom_bussoft_ptr = __va(prom_bussoft_ptr);
+
+       /* Allocate a sn_pci_controller, which has a pci_controller struct
+        * as the first member.
+        */
+       sn_controller = kzalloc(sizeof(struct sn_pci_controller), GFP_KERNEL);
+       if (!sn_controller)
+               BUG();
+       INIT_LIST_HEAD(&sn_controller->pcidev_info);
+       controller = &sn_controller->pci_controller;
+       controller->segment = segment;
+
+       if (bus == NULL) {
+               bus = pci_scan_bus(busnum, &pci_root_ops, controller);
+               if (bus == NULL)
+                       goto error_return; /* error, or bus already scanned */
+               bus->sysdata = NULL;
+       }
+
+       if (bus->sysdata)
+               goto error_return; /* sysdata already alloc'd */
+
+       /*
+        * Per-provider fixup.  Copies the contents from prom to local
+        * area and links SN_PCIBUS_BUSSOFT().
+        */
+
+       if (prom_bussoft_ptr->bs_asic_type >= PCIIO_ASIC_MAX_TYPES)
+               goto error_return; /* unsupported asic type */
+
+       if (prom_bussoft_ptr->bs_asic_type == PCIIO_ASIC_TYPE_PPB)
+               goto error_return; /* no further fixup necessary */
+
+       provider = sn_pci_provider[prom_bussoft_ptr->bs_asic_type];
+       if (provider == NULL)
+               goto error_return; /* no provider registerd for this asic */
+
+       bus->sysdata = controller;
+       if (provider->bus_fixup)
+               provider_soft = (*provider->bus_fixup) (prom_bussoft_ptr, 
controller);
+       else
+               provider_soft = NULL;
+
+       if (provider_soft == NULL) {
+               /* fixup failed or not applicable */
+               bus->sysdata = NULL;
+               goto error_return;
+       }
+
+       /*
+        * Setup pci_windows for legacy IO and MEM space.
+        * (Temporary until ACPI support is in place.)
+        */
+       controller->window = kcalloc(2, sizeof(struct pci_window), GFP_KERNEL);
+       if (controller->window == NULL)
+               BUG();
+       controller->window[0].offset = prom_bussoft_ptr->bs_legacy_io;
+       controller->window[0].resource.name = "legacy_io";
+       controller->window[0].resource.flags = IORESOURCE_IO;
+       controller->window[0].resource.start = prom_bussoft_ptr->bs_legacy_io;
+       controller->window[0].resource.end =
+           controller->window[0].resource.start + 0xffff;
+       controller->window[0].resource.parent = &ioport_resource;
+       controller->window[1].offset = prom_bussoft_ptr->bs_legacy_mem;
+       controller->window[1].resource.name = "legacy_mem";
+       controller->window[1].resource.flags = IORESOURCE_MEM;
+       controller->window[1].resource.start = prom_bussoft_ptr->bs_legacy_mem;
+       controller->window[1].resource.end =
+           controller->window[1].resource.start + (1024 * 1024) - 1;
+       controller->window[1].resource.parent = &iomem_resource;
+       controller->windows = 2;
+
+       /*
+        * Generic bus fixup goes here.  Don't reference prom_bussoft_ptr
+        * after this point.
+        */
+
+       PCI_CONTROLLER(bus)->platform_data = provider_soft;
+       nasid = NASID_GET(SN_PCIBUS_BUSSOFT(bus)->bs_base);
+       cnode = nasid_to_cnodeid(nasid);
+       hubdev_info = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo);
+       SN_PCIBUS_BUSSOFT(bus)->bs_xwidget_info =
+           &(hubdev_info->hdi_xwidget_info[SN_PCIBUS_BUSSOFT(bus)->bs_xid]);
+
+       /*
+        * If the node information we obtained during the fixup phase is invalid
+        * then set controller->node to -1 (undetermined)
+        */
+       if (controller->node >= num_online_nodes()) {
+               struct pcibus_bussoft *b = SN_PCIBUS_BUSSOFT(bus);
+
+               printk(KERN_WARNING "Device ASIC=%u XID=%u PBUSNUM=%u"
+                                   "L_IO=%lx L_MEM=%lx BASE=%lx\n",
+                       b->bs_asic_type, b->bs_xid, b->bs_persist_busnum,
+                       b->bs_legacy_io, b->bs_legacy_mem, b->bs_base);
+               printk(KERN_WARNING "on node %d but only %d nodes online."
+                       "Association set to undetermined.\n",
+                       controller->node, num_online_nodes());
+               controller->node = -1;
+       }
+       return;
+
+error_return:
+
+       kfree(sn_controller);
+       return;
+}
+
+void sn_bus_store_sysdata(struct pci_dev *dev)
+{
+       struct sysdata_el *element;
+
+       element = kzalloc(sizeof(struct sysdata_el), GFP_KERNEL);
+       if (!element) {
+               dev_dbg(dev, "%s: out of memory!\n", __FUNCTION__);
+               return;
+       }
+       element->sysdata = SN_PCIDEV_INFO(dev);
+       list_add(&element->entry, &sn_sysdata_list);
+}
+
+void sn_bus_free_sysdata(void)
+{
+       struct sysdata_el *element;
+       struct list_head *list, *safe;
+
+       list_for_each_safe(list, safe, &sn_sysdata_list) {
+               element = list_entry(list, struct sysdata_el, entry);
+               list_del(&element->entry);
+               list_del(&(((struct pcidev_info *)
+                            (element->sysdata))->pdi_list));
+               kfree(element->sysdata);
+               kfree(element);
+       }
+       return;
+}
+#endif
+
+/*
+ * Ugly hack to get PCI setup until we have a proper ACPI namespace.
+ */
+
+#define PCI_BUSES_TO_SCAN 256
+
+static int __init sn_pci_init(void)
+{
+#ifndef XEN
+       int i, j;
+       struct pci_dev *pci_dev = NULL;
+#endif
+
+       if (!ia64_platform_is("sn2") || IS_RUNNING_ON_FAKE_PROM())
+               return 0;
+
+#ifndef XEN
+       /*
+        * prime sn_pci_provider[].  Individial provider init routines will
+        * override their respective default entries.
+        */
+
+       for (i = 0; i < PCIIO_ASIC_MAX_TYPES; i++)
+               sn_pci_provider[i] = &sn_pci_default_provider;
+
+       pcibr_init_provider();
+       tioca_init_provider();
+       tioce_init_provider();
+#endif
+
+       /*
+        * This is needed to avoid bounce limit checks in the blk layer
+        */
+       ia64_max_iommu_merge_mask = ~PAGE_MASK;
+#ifndef XEN
+       sn_fixup_ionodes();
+#endif
+       sn_irq_lh_init();
+       INIT_LIST_HEAD(&sn_sysdata_list);
+#ifndef XEN
+       sn_init_cpei_timer();
+
+#ifdef CONFIG_PROC_FS
+       register_sn_procfs();
+#endif
+
+       /* busses are not known yet ... */
+       for (i = 0; i <= max_segment_number; i++)
+               for (j = 0; j <= max_pcibus_number; j++)
+                       sn_pci_controller_fixup(i, j, NULL);
+
+       /*
+        * Generic Linux PCI Layer has created the pci_bus and pci_dev 
+        * structures - time for us to add our SN PLatform specific 
+        * information.
+        */
+
+       while ((pci_dev =
+               pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pci_dev)) != NULL)
+               sn_pci_fixup_slot(pci_dev);
+#endif
+
+       sn_ioif_inited = 1;     /* sn I/O infrastructure now initialized */
+
+       return 0;
+}
+
+/*
+ * hubdev_init_node() - Creates the HUB data structure and link them to it's 
+ *     own NODE specific data area.
+ */
+void hubdev_init_node(nodepda_t * npda, cnodeid_t node)
+{
+       struct hubdev_info *hubdev_info;
+       int size;
+#ifndef XEN
+       pg_data_t *pg;
+#else
+       struct pglist_data *pg;
+#endif
+
+       size = sizeof(struct hubdev_info);
+
+       if (node >= num_online_nodes()) /* Headless/memless IO nodes */
+               pg = NODE_DATA(0);
+       else
+               pg = NODE_DATA(node);
+
+       hubdev_info = (struct hubdev_info *)alloc_bootmem_node(pg, size);
+
+       npda->pdinfo = (void *)hubdev_info;
+}
+
+geoid_t
+cnodeid_get_geoid(cnodeid_t cnode)
+{
+       struct hubdev_info *hubdev;
+
+       hubdev = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo);
+       return hubdev->hdi_geoid;
+}
+
+#ifndef XEN
+void sn_generate_path(struct pci_bus *pci_bus, char *address)
+{
+       nasid_t nasid;
+       cnodeid_t cnode;
+       geoid_t geoid;
+       moduleid_t moduleid;
+       u16 bricktype;
+
+       nasid = NASID_GET(SN_PCIBUS_BUSSOFT(pci_bus)->bs_base);
+       cnode = nasid_to_cnodeid(nasid);
+       geoid = cnodeid_get_geoid(cnode);
+       moduleid = geo_module(geoid);
+
+       sprintf(address, "module_%c%c%c%c%.2d",
+               '0'+RACK_GET_CLASS(MODULE_GET_RACK(moduleid)),
+               '0'+RACK_GET_GROUP(MODULE_GET_RACK(moduleid)),
+               '0'+RACK_GET_NUM(MODULE_GET_RACK(moduleid)),
+               MODULE_GET_BTCHAR(moduleid), MODULE_GET_BPOS(moduleid));
+
+       /* Tollhouse requires slot id to be displayed */
+       bricktype = MODULE_GET_BTYPE(moduleid);
+       if ((bricktype == L1_BRICKTYPE_191010) ||
+           (bricktype == L1_BRICKTYPE_1932))
+                       sprintf(address, "%s^%d", address, geo_slot(geoid));
+}
+#endif
+
+#ifdef XEN
+__initcall(sn_pci_init);
+#else
+subsys_initcall(sn_pci_init);
+#endif
+#ifndef XEN
+EXPORT_SYMBOL(sn_pci_fixup_slot);
+EXPORT_SYMBOL(sn_pci_unfixup_slot);
+EXPORT_SYMBOL(sn_pci_controller_fixup);
+EXPORT_SYMBOL(sn_bus_store_sysdata);
+EXPORT_SYMBOL(sn_bus_free_sysdata);
+EXPORT_SYMBOL(sn_generate_path);
+#endif
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/sn/kernel/iomv.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux-xen/sn/kernel/iomv.c  Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,82 @@
+/* 
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000-2003 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/module.h>
+#include <asm/io.h>
+#include <asm/delay.h>
+#ifndef XEN
+#include <asm/vga.h>
+#endif
+#include <asm/sn/nodepda.h>
+#include <asm/sn/simulator.h>
+#include <asm/sn/pda.h>
+#include <asm/sn/sn_cpuid.h>
+#include <asm/sn/shub_mmr.h>
+
+#define IS_LEGACY_VGA_IOPORT(p) \
+       (((p) >= 0x3b0 && (p) <= 0x3bb) || ((p) >= 0x3c0 && (p) <= 0x3df))
+
+#ifdef XEN
+#define vga_console_iobase     0
+#endif
+
+/**
+ * sn_io_addr - convert an in/out port to an i/o address
+ * @port: port to convert
+ *
+ * Legacy in/out instructions are converted to ld/st instructions
+ * on IA64.  This routine will convert a port number into a valid 
+ * SN i/o address.  Used by sn_in*() and sn_out*().
+ */
+void *sn_io_addr(unsigned long port)
+{
+       if (!IS_RUNNING_ON_SIMULATOR()) {
+               if (IS_LEGACY_VGA_IOPORT(port))
+                       port += vga_console_iobase;
+               /* On sn2, legacy I/O ports don't point at anything */
+               if (port < (64 * 1024))
+                       return NULL;
+               return ((void *)(port | __IA64_UNCACHED_OFFSET));
+       } else {
+               /* but the simulator uses them... */
+               unsigned long addr;
+
+               /*
+                * word align port, but need more than 10 bits
+                * for accessing registers in bedrock local block
+                * (so we don't do port&0xfff)
+                */
+               addr = (is_shub2() ? 0xc00000028c000000UL : 
0xc0000087cc000000UL) | ((port >> 2) << 12);
+               if ((port >= 0x1f0 && port <= 0x1f7) || port == 0x3f6 || port 
== 0x3f7)
+                       addr |= port;
+               return (void *)addr;
+       }
+}
+
+EXPORT_SYMBOL(sn_io_addr);
+
+/**
+ * __sn_mmiowb - I/O space memory barrier
+ *
+ * See include/asm-ia64/io.h and Documentation/DocBook/deviceiobook.tmpl
+ * for details.
+ *
+ * On SN2, we wait for the PIO_WRITE_STATUS SHub register to clear.
+ * See PV 871084 for details about the WAR about zero value.
+ *
+ */
+void __sn_mmiowb(void)
+{
+       volatile unsigned long *adr = pda->pio_write_status_addr;
+       unsigned long val = pda->pio_write_status_val;
+
+       while ((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != val)
+               cpu_relax();
+}
+
+EXPORT_SYMBOL(__sn_mmiowb);
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/sn/kernel/irq.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux-xen/sn/kernel/irq.c   Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,542 @@
+/*
+ * Platform dependent support for SGI SN
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#include <linux/irq.h>
+#include <linux/spinlock.h>
+#include <linux/init.h>
+#ifdef XEN
+#include <linux/pci.h>
+#include <asm/hw_irq.h>
+#endif
+#include <asm/sn/addrs.h>
+#include <asm/sn/arch.h>
+#include <asm/sn/intr.h>
+#include <asm/sn/pcibr_provider.h>
+#include <asm/sn/pcibus_provider_defs.h>
+#ifndef XEN
+#include <asm/sn/pcidev.h>
+#endif
+#include <asm/sn/shub_mmr.h>
+#include <asm/sn/sn_sal.h>
+
+#ifdef XEN
+#define move_native_irq(foo)   do {} while(0)
+#endif
+
+static void force_interrupt(int irq);
+#ifndef XEN
+static void register_intr_pda(struct sn_irq_info *sn_irq_info);
+static void unregister_intr_pda(struct sn_irq_info *sn_irq_info);
+#endif
+
+int sn_force_interrupt_flag = 1;
+extern int sn_ioif_inited;
+struct list_head **sn_irq_lh;
+static DEFINE_SPINLOCK(sn_irq_info_lock); /* non-IRQ lock */
+
+u64 sn_intr_alloc(nasid_t local_nasid, int local_widget,
+                                    struct sn_irq_info *sn_irq_info,
+                                    int req_irq, nasid_t req_nasid,
+                                    int req_slice)
+{
+       struct ia64_sal_retval ret_stuff;
+       ret_stuff.status = 0;
+       ret_stuff.v0 = 0;
+
+       SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT,
+                       (u64) SAL_INTR_ALLOC, (u64) local_nasid,
+                       (u64) local_widget, __pa(sn_irq_info), (u64) req_irq,
+                       (u64) req_nasid, (u64) req_slice);
+
+       return ret_stuff.status;
+}
+
+void sn_intr_free(nasid_t local_nasid, int local_widget,
+                               struct sn_irq_info *sn_irq_info)
+{
+       struct ia64_sal_retval ret_stuff;
+       ret_stuff.status = 0;
+       ret_stuff.v0 = 0;
+
+       SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT,
+                       (u64) SAL_INTR_FREE, (u64) local_nasid,
+                       (u64) local_widget, (u64) sn_irq_info->irq_irq,
+                       (u64) sn_irq_info->irq_cookie, 0, 0);
+}
+
+static unsigned int sn_startup_irq(unsigned int irq)
+{
+       return 0;
+}
+
+static void sn_shutdown_irq(unsigned int irq)
+{
+}
+
+static void sn_disable_irq(unsigned int irq)
+{
+}
+
+static void sn_enable_irq(unsigned int irq)
+{
+}
+
+static void sn_ack_irq(unsigned int irq)
+{
+       u64 event_occurred, mask;
+
+       irq = irq & 0xff;
+       event_occurred = HUB_L((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED));
+       mask = event_occurred & SH_ALL_INT_MASK;
+       HUB_S((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED_ALIAS), mask);
+       __set_bit(irq, (volatile void *)pda->sn_in_service_ivecs);
+
+       move_native_irq(irq);
+}
+
+static void sn_end_irq(unsigned int irq)
+{
+       int ivec;
+       u64 event_occurred;
+
+       ivec = irq & 0xff;
+       if (ivec == SGI_UART_VECTOR) {
+               event_occurred = HUB_L((u64*)LOCAL_MMR_ADDR 
(SH_EVENT_OCCURRED));
+               /* If the UART bit is set here, we may have received an
+                * interrupt from the UART that the driver missed.  To
+                * make sure, we IPI ourselves to force us to look again.
+                */
+               if (event_occurred & SH_EVENT_OCCURRED_UART_INT_MASK) {
+                       platform_send_ipi(smp_processor_id(), SGI_UART_VECTOR,
+                                         IA64_IPI_DM_INT, 0);
+               }
+       }
+       __clear_bit(ivec, (volatile void *)pda->sn_in_service_ivecs);
+       if (sn_force_interrupt_flag)
+               force_interrupt(irq);
+}
+
+#ifndef XEN
+static void sn_irq_info_free(struct rcu_head *head);
+
+struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info,
+                                      nasid_t nasid, int slice)
+{
+       int vector;
+       int cpuphys;
+       int64_t bridge;
+       int local_widget, status;
+       nasid_t local_nasid;
+       struct sn_irq_info *new_irq_info;
+       struct sn_pcibus_provider *pci_provider;
+
+       new_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_ATOMIC);
+       if (new_irq_info == NULL)
+               return NULL;
+
+       memcpy(new_irq_info, sn_irq_info, sizeof(struct sn_irq_info));
+
+       bridge = (u64) new_irq_info->irq_bridge;
+       if (!bridge) {
+               kfree(new_irq_info);
+               return NULL; /* irq is not a device interrupt */
+       }
+
+       local_nasid = NASID_GET(bridge);
+
+       if (local_nasid & 1)
+               local_widget = TIO_SWIN_WIDGETNUM(bridge);
+       else
+               local_widget = SWIN_WIDGETNUM(bridge);
+
+       vector = sn_irq_info->irq_irq;
+       /* Free the old PROM new_irq_info structure */
+       sn_intr_free(local_nasid, local_widget, new_irq_info);
+       /* Update kernels new_irq_info with new target info */
+       unregister_intr_pda(new_irq_info);
+
+       /* allocate a new PROM new_irq_info struct */
+       status = sn_intr_alloc(local_nasid, local_widget,
+                              new_irq_info, vector,
+                              nasid, slice);
+
+       /* SAL call failed */
+       if (status) {
+               kfree(new_irq_info);
+               return NULL;
+       }
+
+       cpuphys = nasid_slice_to_cpuid(nasid, slice);
+       new_irq_info->irq_cpuid = cpuphys;
+       register_intr_pda(new_irq_info);
+
+       pci_provider = sn_pci_provider[new_irq_info->irq_bridge_type];
+
+       /*
+        * If this represents a line interrupt, target it.  If it's
+        * an msi (irq_int_bit < 0), it's already targeted.
+        */
+       if (new_irq_info->irq_int_bit >= 0 &&
+           pci_provider && pci_provider->target_interrupt)
+               (pci_provider->target_interrupt)(new_irq_info);
+
+       spin_lock(&sn_irq_info_lock);
+#ifdef XEN
+       list_replace(&sn_irq_info->list, &new_irq_info->list);
+#else
+       list_replace_rcu(&sn_irq_info->list, &new_irq_info->list);
+#endif
+       spin_unlock(&sn_irq_info_lock);
+#ifndef XEN
+       call_rcu(&sn_irq_info->rcu, sn_irq_info_free);
+#endif
+
+#ifdef CONFIG_SMP
+       set_irq_affinity_info((vector & 0xff), cpuphys, 0);
+#endif
+
+       return new_irq_info;
+}
+
+static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+       struct sn_irq_info *sn_irq_info, *sn_irq_info_safe;
+       nasid_t nasid;
+       int slice;
+
+       nasid = cpuid_to_nasid(first_cpu(mask));
+       slice = cpuid_to_slice(first_cpu(mask));
+
+       list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe,
+                                sn_irq_lh[irq], list)
+               (void)sn_retarget_vector(sn_irq_info, nasid, slice);
+}
+#endif
+
+struct hw_interrupt_type irq_type_sn = {
+#ifndef XEN
+       .name           = "SN hub",
+#endif
+       .startup        = sn_startup_irq,
+       .shutdown       = sn_shutdown_irq,
+       .enable         = sn_enable_irq,
+       .disable        = sn_disable_irq,
+       .ack            = sn_ack_irq,
+       .end            = sn_end_irq,
+#ifndef XEN
+       .set_affinity   = sn_set_affinity_irq
+#endif
+};
+
+unsigned int sn_local_vector_to_irq(u8 vector)
+{
+       return (CPU_VECTOR_TO_IRQ(smp_processor_id(), vector));
+}
+
+void sn_irq_init(void)
+{
+#ifndef XEN
+       int i;
+       irq_desc_t *base_desc = irq_desc;
+
+       ia64_first_device_vector = IA64_SN2_FIRST_DEVICE_VECTOR;
+       ia64_last_device_vector = IA64_SN2_LAST_DEVICE_VECTOR;
+
+       for (i = 0; i < NR_IRQS; i++) {
+               if (base_desc[i].chip == &no_irq_type) {
+                       base_desc[i].chip = &irq_type_sn;
+               }
+       }
+#endif
+}
+
+#ifndef XEN
+static void register_intr_pda(struct sn_irq_info *sn_irq_info)
+{
+       int irq = sn_irq_info->irq_irq;
+       int cpu = sn_irq_info->irq_cpuid;
+
+       if (pdacpu(cpu)->sn_last_irq < irq) {
+               pdacpu(cpu)->sn_last_irq = irq;
+       }
+
+       if (pdacpu(cpu)->sn_first_irq == 0 || pdacpu(cpu)->sn_first_irq > irq)
+               pdacpu(cpu)->sn_first_irq = irq;
+}
+
+static void unregister_intr_pda(struct sn_irq_info *sn_irq_info)
+{
+       int irq = sn_irq_info->irq_irq;
+       int cpu = sn_irq_info->irq_cpuid;
+       struct sn_irq_info *tmp_irq_info;
+       int i, foundmatch;
+
+#ifndef XEN
+       rcu_read_lock();
+#else
+       spin_lock(&sn_irq_info_lock);
+#endif
+       if (pdacpu(cpu)->sn_last_irq == irq) {
+               foundmatch = 0;
+               for (i = pdacpu(cpu)->sn_last_irq - 1;
+                    i && !foundmatch; i--) {
+#ifdef XEN
+                       list_for_each_entry(tmp_irq_info,
+                                               sn_irq_lh[i],
+                                               list) {
+#else
+                       list_for_each_entry_rcu(tmp_irq_info,
+                                               sn_irq_lh[i],
+                                               list) {
+#endif
+                               if (tmp_irq_info->irq_cpuid == cpu) {
+                                       foundmatch = 1;
+                                       break;
+                               }
+                       }
+               }
+               pdacpu(cpu)->sn_last_irq = i;
+       }
+
+       if (pdacpu(cpu)->sn_first_irq == irq) {
+               foundmatch = 0;
+               for (i = pdacpu(cpu)->sn_first_irq + 1;
+                    i < NR_IRQS && !foundmatch; i++) {
+#ifdef XEN
+                       list_for_each_entry(tmp_irq_info,
+                                               sn_irq_lh[i],
+                                               list) {
+#else
+                       list_for_each_entry_rcu(tmp_irq_info,
+                                               sn_irq_lh[i],
+                                               list) {
+#endif
+                               if (tmp_irq_info->irq_cpuid == cpu) {
+                                       foundmatch = 1;
+                                       break;
+                               }
+                       }
+               }
+               pdacpu(cpu)->sn_first_irq = ((i == NR_IRQS) ? 0 : i);
+       }
+#ifndef XEN
+       rcu_read_unlock();
+#else
+       spin_unlock(&sn_irq_info_lock);
+#endif
+}
+#endif /* XEN */
+
+#ifndef XEN
+static void sn_irq_info_free(struct rcu_head *head)
+{
+       struct sn_irq_info *sn_irq_info;
+
+       sn_irq_info = container_of(head, struct sn_irq_info, rcu);
+       kfree(sn_irq_info);
+}
+#endif
+
+#ifndef XEN
+void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info)
+{
+       nasid_t nasid = sn_irq_info->irq_nasid;
+       int slice = sn_irq_info->irq_slice;
+       int cpu = nasid_slice_to_cpuid(nasid, slice);
+
+       pci_dev_get(pci_dev);
+       sn_irq_info->irq_cpuid = cpu;
+       sn_irq_info->irq_pciioinfo = SN_PCIDEV_INFO(pci_dev);
+
+       /* link it into the sn_irq[irq] list */
+       spin_lock(&sn_irq_info_lock);
+#ifdef XEN
+       list_add(&sn_irq_info->list, sn_irq_lh[sn_irq_info->irq_irq]);
+#else
+       list_add_rcu(&sn_irq_info->list, sn_irq_lh[sn_irq_info->irq_irq]);
+#endif
+#ifndef XEN
+       reserve_irq_vector(sn_irq_info->irq_irq);
+#endif
+       spin_unlock(&sn_irq_info_lock);
+
+       register_intr_pda(sn_irq_info);
+}
+
+void sn_irq_unfixup(struct pci_dev *pci_dev)
+{
+       struct sn_irq_info *sn_irq_info;
+
+       /* Only cleanup IRQ stuff if this device has a host bus context */
+       if (!SN_PCIDEV_BUSSOFT(pci_dev))
+               return;
+
+       sn_irq_info = SN_PCIDEV_INFO(pci_dev)->pdi_sn_irq_info;
+       if (!sn_irq_info)
+               return;
+       if (!sn_irq_info->irq_irq) {
+               kfree(sn_irq_info);
+               return;
+       }
+
+       unregister_intr_pda(sn_irq_info);
+       spin_lock(&sn_irq_info_lock);
+#ifdef XEN
+       list_del(&sn_irq_info->list);
+#else
+       list_del_rcu(&sn_irq_info->list);
+#endif
+       spin_unlock(&sn_irq_info_lock);
+       if (list_empty(sn_irq_lh[sn_irq_info->irq_irq]))
+               free_irq_vector(sn_irq_info->irq_irq);
+#ifndef XEN
+       call_rcu(&sn_irq_info->rcu, sn_irq_info_free);
+#endif
+       pci_dev_put(pci_dev);
+
+}
+#endif
+
+static inline void
+sn_call_force_intr_provider(struct sn_irq_info *sn_irq_info)
+{
+       struct sn_pcibus_provider *pci_provider;
+
+       pci_provider = sn_pci_provider[sn_irq_info->irq_bridge_type];
+       if (pci_provider && pci_provider->force_interrupt)
+               (*pci_provider->force_interrupt)(sn_irq_info);
+}
+
+static void force_interrupt(int irq)
+{
+       struct sn_irq_info *sn_irq_info;
+
+#ifndef XEN
+       if (!sn_ioif_inited)
+               return;
+#endif
+
+#ifdef XEN
+       spin_lock(&sn_irq_info_lock);
+#else
+       rcu_read_lock();
+#endif
+#ifdef XEN
+       list_for_each_entry(sn_irq_info, sn_irq_lh[irq], list)
+#else
+       list_for_each_entry_rcu(sn_irq_info, sn_irq_lh[irq], list)
+#endif
+               sn_call_force_intr_provider(sn_irq_info);
+
+#ifdef XEN
+       spin_unlock(&sn_irq_info_lock);
+#else
+       rcu_read_unlock();
+#endif
+}
+
+#ifndef XEN
+/*
+ * Check for lost interrupts.  If the PIC int_status reg. says that
+ * an interrupt has been sent, but not handled, and the interrupt
+ * is not pending in either the cpu irr regs or in the soft irr regs,
+ * and the interrupt is not in service, then the interrupt may have
+ * been lost.  Force an interrupt on that pin.  It is possible that
+ * the interrupt is in flight, so we may generate a spurious interrupt,
+ * but we should never miss a real lost interrupt.
+ */
+static void sn_check_intr(int irq, struct sn_irq_info *sn_irq_info)
+{
+       u64 regval;
+       struct pcidev_info *pcidev_info;
+       struct pcibus_info *pcibus_info;
+
+       /*
+        * Bridge types attached to TIO (anything but PIC) do not need this WAR
+        * since they do not target Shub II interrupt registers.  If that
+        * ever changes, this check needs to accomodate.
+        */
+       if (sn_irq_info->irq_bridge_type != PCIIO_ASIC_TYPE_PIC)
+               return;
+
+       pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo;
+       if (!pcidev_info)
+               return;
+
+       pcibus_info =
+           (struct pcibus_info *)pcidev_info->pdi_host_pcidev_info->
+           pdi_pcibus_info;
+       regval = pcireg_intr_status_get(pcibus_info);
+
+       if (!ia64_get_irr(irq_to_vector(irq))) {
+               if (!test_bit(irq, pda->sn_in_service_ivecs)) {
+                       regval &= 0xff;
+                       if (sn_irq_info->irq_int_bit & regval &
+                           sn_irq_info->irq_last_intr) {
+                               regval &= ~(sn_irq_info->irq_int_bit & regval);
+                               sn_call_force_intr_provider(sn_irq_info);
+                       }
+               }
+       }
+       sn_irq_info->irq_last_intr = regval;
+}
+#endif
+
+void sn_lb_int_war_check(void)
+{
+#ifndef XEN
+       struct sn_irq_info *sn_irq_info;
+       int i;
+
+#ifdef XEN
+       if (pda->sn_first_irq == 0)
+#else
+       if (!sn_ioif_inited || pda->sn_first_irq == 0)
+#endif
+               return;
+
+#ifdef XEN
+       spin_lock(&sn_irq_info_lock);
+#else
+       rcu_read_lock();
+#endif
+       for (i = pda->sn_first_irq; i <= pda->sn_last_irq; i++) {
+#ifdef XEN
+               list_for_each_entry(sn_irq_info, sn_irq_lh[i], list) {
+#else
+               list_for_each_entry_rcu(sn_irq_info, sn_irq_lh[i], list) {
+#endif
+                       sn_check_intr(i, sn_irq_info);
+               }
+       }
+#ifdef XEN
+       spin_unlock(&sn_irq_info_lock);
+#else
+       rcu_read_unlock();
+#endif
+#endif
+}
+
+void __init sn_irq_lh_init(void)
+{
+       int i;
+
+       sn_irq_lh = kmalloc(sizeof(struct list_head *) * NR_IRQS, GFP_KERNEL);
+       if (!sn_irq_lh)
+               panic("SN PCI INIT: Failed to allocate memory for PCI init\n");
+
+       for (i = 0; i < NR_IRQS; i++) {
+               sn_irq_lh[i] = kmalloc(sizeof(struct list_head), GFP_KERNEL);
+               if (!sn_irq_lh[i])
+                       panic("SN PCI INIT: Failed IRQ memory allocation\n");
+
+               INIT_LIST_HEAD(sn_irq_lh[i]);
+       }
+}
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/sn/kernel/setup.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux-xen/sn/kernel/setup.c Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,808 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1999,2001-2006 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#ifndef XEN
+#include <linux/kdev_t.h>
+#endif
+#include <linux/string.h>
+#ifndef XEN
+#include <linux/screen_info.h>
+#endif
+#include <linux/console.h>
+#include <linux/timex.h>
+#include <linux/sched.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/serial.h>
+#include <linux/irq.h>
+#include <linux/bootmem.h>
+#include <linux/mmzone.h>
+#include <linux/interrupt.h>
+#include <linux/acpi.h>
+#include <linux/compiler.h>
+#include <linux/sched.h>
+#ifndef XEN
+#include <linux/root_dev.h>
+#endif
+#include <linux/nodemask.h>
+#include <linux/pm.h>
+#include <linux/efi.h>
+
+#include <asm/io.h>
+#include <asm/sal.h>
+#include <asm/machvec.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+#ifndef XEN
+#include <asm/vga.h>
+#endif
+#include <asm/sn/arch.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/pda.h>
+#include <asm/sn/nodepda.h>
+#include <asm/sn/sn_cpuid.h>
+#include <asm/sn/simulator.h>
+#include <asm/sn/leds.h>
+#ifndef XEN
+#include <asm/sn/bte.h>
+#endif
+#include <asm/sn/shub_mmr.h>
+#ifndef XEN
+#include <asm/sn/clksupport.h>
+#endif
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/geo.h>
+#include <asm/sn/sn_feature_sets.h>
+#ifndef XEN
+#include "xtalk/xwidgetdev.h"
+#include "xtalk/hubdev.h"
+#else
+#include "asm/sn/xwidgetdev.h"
+#include "asm/sn/hubdev.h"
+#endif
+#include <asm/sn/klconfig.h>
+#ifdef XEN
+#include <asm/sn/shubio.h>
+
+/* Xen has no clue about NUMA ....  grrrr */
+#define pxm_to_node(foo)               0
+#define node_to_pxm(foo)               0
+#define numa_node_id()                 0
+#endif
+
+
+DEFINE_PER_CPU(struct pda_s, pda_percpu);
+
+#define MAX_PHYS_MEMORY                (1UL << IA64_MAX_PHYS_BITS)     /* Max 
physical address supported */
+
+extern void bte_init_node(nodepda_t *, cnodeid_t);
+
+extern void sn_timer_init(void);
+extern unsigned long last_time_offset;
+extern void (*ia64_mark_idle) (int);
+extern void snidle(int);
+extern unsigned long long (*ia64_printk_clock)(void);
+
+unsigned long sn_rtc_cycles_per_second;
+EXPORT_SYMBOL(sn_rtc_cycles_per_second);
+
+DEFINE_PER_CPU(struct sn_hub_info_s, __sn_hub_info);
+EXPORT_PER_CPU_SYMBOL(__sn_hub_info);
+
+DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]);
+EXPORT_PER_CPU_SYMBOL(__sn_cnodeid_to_nasid);
+
+DEFINE_PER_CPU(struct nodepda_s *, __sn_nodepda);
+EXPORT_PER_CPU_SYMBOL(__sn_nodepda);
+
+char sn_system_serial_number_string[128];
+EXPORT_SYMBOL(sn_system_serial_number_string);
+u64 sn_partition_serial_number;
+EXPORT_SYMBOL(sn_partition_serial_number);
+u8 sn_partition_id;
+EXPORT_SYMBOL(sn_partition_id);
+u8 sn_system_size;
+EXPORT_SYMBOL(sn_system_size);
+u8 sn_sharing_domain_size;
+EXPORT_SYMBOL(sn_sharing_domain_size);
+u8 sn_coherency_id;
+EXPORT_SYMBOL(sn_coherency_id);
+u8 sn_region_size;
+EXPORT_SYMBOL(sn_region_size);
+int sn_prom_type;      /* 0=hardware, 1=medusa/realprom, 2=medusa/fakeprom */
+
+short physical_node_map[MAX_NUMALINK_NODES];
+static unsigned long sn_prom_features[MAX_PROM_FEATURE_SETS];
+
+EXPORT_SYMBOL(physical_node_map);
+
+int num_cnodes;
+
+static void sn_init_pdas(char **);
+static void build_cnode_tables(void);
+
+static nodepda_t *nodepdaindr[MAX_COMPACT_NODES];
+
+#ifndef XEN
+/*
+ * The format of "screen_info" is strange, and due to early i386-setup
+ * code. This is just enough to make the console code think we're on a
+ * VGA color display.
+ */
+struct screen_info sn_screen_info = {
+       .orig_x = 0,
+       .orig_y = 0,
+       .orig_video_mode = 3,
+       .orig_video_cols = 80,
+       .orig_video_ega_bx = 3,
+       .orig_video_lines = 25,
+       .orig_video_isVGA = 1,
+       .orig_video_points = 16
+};
+#endif
+
+/*
+ * This routine can only be used during init, since
+ * smp_boot_data is an init data structure.
+ * We have to use smp_boot_data.cpu_phys_id to find
+ * the physical id of the processor because the normal
+ * cpu_physical_id() relies on data structures that
+ * may not be initialized yet.
+ */
+
+static int __init pxm_to_nasid(int pxm)
+{
+       int i;
+       int nid;
+
+       nid = pxm_to_node(pxm);
+       for (i = 0; i < num_node_memblks; i++) {
+               if (node_memblk[i].nid == nid) {
+                       return NASID_GET(node_memblk[i].start_paddr);
+               }
+       }
+       return -1;
+}
+
+/**
+ * early_sn_setup - early setup routine for SN platforms
+ *
+ * Sets up an initial console to aid debugging.  Intended primarily
+ * for bringup.  See start_kernel() in init/main.c.
+ */
+
+void __init early_sn_setup(void)
+{
+       efi_system_table_t *efi_systab;
+       efi_config_table_t *config_tables;
+       struct ia64_sal_systab *sal_systab;
+       struct ia64_sal_desc_entry_point *ep;
+       char *p;
+       int i, j;
+
+       /*
+        * Parse enough of the SAL tables to locate the SAL entry point. Since, 
console
+        * IO on SN2 is done via SAL calls, early_printk won't work without 
this.
+        *
+        * This code duplicates some of the ACPI table parsing that is in efi.c 
& sal.c.
+        * Any changes to those file may have to be made hereas well.
+        */
+       efi_systab = (efi_system_table_t *) __va(ia64_boot_param->efi_systab);
+       config_tables = __va(efi_systab->tables);
+       for (i = 0; i < efi_systab->nr_tables; i++) {
+               if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) ==
+                   0) {
+                       sal_systab = __va(config_tables[i].table);
+                       p = (char *)(sal_systab + 1);
+                       for (j = 0; j < sal_systab->entry_count; j++) {
+                               if (*p == SAL_DESC_ENTRY_POINT) {
+                                       ep = (struct ia64_sal_desc_entry_point
+                                             *)p;
+                                       ia64_sal_handler_init(__va
+                                                             (ep->sal_proc),
+                                                             __va(ep->gp));
+                                       return;
+                               }
+                               p += SAL_DESC_SIZE(*p);
+                       }
+               }
+       }
+       /* Uh-oh, SAL not available?? */
+       printk(KERN_ERR "failed to find SAL entry point\n");
+}
+
+extern int platform_intr_list[];
+static int __initdata shub_1_1_found;
+
+/*
+ * sn_check_for_wars
+ *
+ * Set flag for enabling shub specific wars
+ */
+
+static inline int __init is_shub_1_1(int nasid)
+{
+       unsigned long id;
+       int rev;
+
+       if (is_shub2())
+               return 0;
+       id = REMOTE_HUB_L(nasid, SH1_SHUB_ID);
+       rev = (id & SH1_SHUB_ID_REVISION_MASK) >> SH1_SHUB_ID_REVISION_SHFT;
+       return rev <= 2;
+}
+
+static void __init sn_check_for_wars(void)
+{
+       int cnode;
+
+       if (is_shub2()) {
+               /* none yet */
+       } else {
+               for_each_online_node(cnode) {
+                       if (is_shub_1_1(cnodeid_to_nasid(cnode)))
+                               shub_1_1_found = 1;
+               }
+       }
+}
+
+#ifndef XEN
+/*
+ * Scan the EFI PCDP table (if it exists) for an acceptable VGA console
+ * output device.  If one exists, pick it and set sn_legacy_{io,mem} to
+ * reflect the bus offsets needed to address it.
+ *
+ * Since pcdp support in SN is not supported in the 2.4 kernel (or at least
+ * the one lbs is based on) just declare the needed structs here.
+ *
+ * Reference spec http://www.dig64.org/specifications/DIG64_PCDPv20.pdf
+ *
+ * Returns 0 if no acceptable vga is found, !0 otherwise.
+ *
+ * Note:  This stuff is duped here because Altix requires the PCDP to
+ * locate a usable VGA device due to lack of proper ACPI support.  Structures
+ * could be used from drivers/firmware/pcdp.h, but it was decided that moving
+ * this file to a more public location just for Altix use was undesireable.
+ */
+
+struct hcdp_uart_desc {
+       u8      pad[45];
+};
+
+struct pcdp {
+       u8      signature[4];   /* should be 'HCDP' */
+       u32     length;
+       u8      rev;            /* should be >=3 for pcdp, <3 for hcdp */
+       u8      sum;
+       u8      oem_id[6];
+       u64     oem_tableid;
+       u32     oem_rev;
+       u32     creator_id;
+       u32     creator_rev;
+       u32     num_type0;
+       struct hcdp_uart_desc uart[0];  /* num_type0 of these */
+       /* pcdp descriptors follow */
+}  __attribute__((packed));
+
+struct pcdp_device_desc {
+       u8      type;
+       u8      primary;
+       u16     length;
+       u16     index;
+       /* interconnect specific structure follows */
+       /* device specific structure follows that */
+}  __attribute__((packed));
+
+struct pcdp_interface_pci {
+       u8      type;           /* 1 == pci */
+       u8      reserved;
+       u16     length;
+       u8      segment;
+       u8      bus;
+       u8      dev;
+       u8      fun;
+       u16     devid;
+       u16     vendid;
+       u32     acpi_interrupt;
+       u64     mmio_tra;
+       u64     ioport_tra;
+       u8      flags;
+       u8      translation;
+}  __attribute__((packed));
+
+struct pcdp_vga_device {
+       u8      num_eas_desc;
+       /* ACPI Extended Address Space Desc follows */
+}  __attribute__((packed));
+
+/* from pcdp_device_desc.primary */
+#define PCDP_PRIMARY_CONSOLE   0x01
+
+/* from pcdp_device_desc.type */
+#define PCDP_CONSOLE_INOUT     0x0
+#define PCDP_CONSOLE_DEBUG     0x1
+#define PCDP_CONSOLE_OUT       0x2
+#define PCDP_CONSOLE_IN                0x3
+#define PCDP_CONSOLE_TYPE_VGA  0x8
+
+#define PCDP_CONSOLE_VGA       (PCDP_CONSOLE_TYPE_VGA | PCDP_CONSOLE_OUT)
+
+/* from pcdp_interface_pci.type */
+#define PCDP_IF_PCI            1
+
+/* from pcdp_interface_pci.translation */
+#define PCDP_PCI_TRANS_IOPORT  0x02
+#define PCDP_PCI_TRANS_MMIO    0x01
+
+#if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE)
+static void
+sn_scan_pcdp(void)
+{
+       u8 *bp;
+       struct pcdp *pcdp;
+       struct pcdp_device_desc device;
+       struct pcdp_interface_pci if_pci;
+       extern struct efi efi;
+
+       if (efi.hcdp == EFI_INVALID_TABLE_ADDR)
+               return;         /* no hcdp/pcdp table */
+
+       pcdp = __va(efi.hcdp);
+
+       if (pcdp->rev < 3)
+               return;         /* only support PCDP (rev >= 3) */
+
+       for (bp = (u8 *)&pcdp->uart[pcdp->num_type0];
+            bp < (u8 *)pcdp + pcdp->length;
+            bp += device.length) {
+               memcpy(&device, bp, sizeof(device));
+               if (! (device.primary & PCDP_PRIMARY_CONSOLE))
+                       continue;       /* not primary console */
+
+               if (device.type != PCDP_CONSOLE_VGA)
+                       continue;       /* not VGA descriptor */
+
+               memcpy(&if_pci, bp+sizeof(device), sizeof(if_pci));
+               if (if_pci.type != PCDP_IF_PCI)
+                       continue;       /* not PCI interconnect */
+
+               if (if_pci.translation & PCDP_PCI_TRANS_IOPORT)
+                       vga_console_iobase =
+                               if_pci.ioport_tra | __IA64_UNCACHED_OFFSET;
+
+               if (if_pci.translation & PCDP_PCI_TRANS_MMIO)
+                       vga_console_membase =
+                               if_pci.mmio_tra | __IA64_UNCACHED_OFFSET;
+
+               break; /* once we find the primary, we're done */
+       }
+}
+#endif
+
+static unsigned long sn2_rtc_initial;
+
+static unsigned long long ia64_sn2_printk_clock(void)
+{
+       unsigned long rtc_now = rtc_time();
+
+       return (rtc_now - sn2_rtc_initial) *
+               (1000000000 / sn_rtc_cycles_per_second);
+}
+#endif
+
+/**
+ * sn_setup - SN platform setup routine
+ * @cmdline_p: kernel command line
+ *
+ * Handles platform setup for SN machines.  This includes determining
+ * the RTC frequency (via a SAL call), initializing secondary CPUs, and
+ * setting up per-node data areas.  The console is also initialized here.
+ */
+#ifdef XEN
+void __cpuinit sn_cpu_init(void);
+#endif
+
+void __init sn_setup(char **cmdline_p)
+{
+#ifndef XEN
+       long status, ticks_per_sec, drift;
+#else
+       unsigned long status, ticks_per_sec, drift;
+#endif
+       u32 version = sn_sal_rev();
+#ifndef XEN
+       extern void sn_cpu_init(void);
+
+       sn2_rtc_initial = rtc_time();
+       ia64_sn_plat_set_error_handling_features();     // obsolete
+       ia64_sn_set_os_feature(OSF_MCA_SLV_TO_OS_INIT_SLV);
+       ia64_sn_set_os_feature(OSF_FEAT_LOG_SBES);
+
+
+#if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE)
+       /*
+        * Handle SN vga console.
+        *
+        * SN systems do not have enough ACPI table information
+        * being passed from prom to identify VGA adapters and the legacy
+        * addresses to access them.  Until that is done, SN systems rely
+        * on the PCDP table to identify the primary VGA console if one
+        * exists.
+        *
+        * However, kernel PCDP support is optional, and even if it is built
+        * into the kernel, it will not be used if the boot cmdline contains
+        * console= directives.
+        *
+        * So, to work around this mess, we duplicate some of the PCDP code
+        * here so that the primary VGA console (as defined by PCDP) will
+        * work on SN systems even if a different console (e.g. serial) is
+        * selected on the boot line (or CONFIG_EFI_PCDP is off).
+        */
+
+       if (! vga_console_membase)
+               sn_scan_pcdp();
+
+       if (vga_console_membase) {
+               /* usable vga ... make tty0 the preferred default console */
+               if (!strstr(*cmdline_p, "console="))
+                       add_preferred_console("tty", 0, NULL);
+       } else {
+               printk(KERN_DEBUG "SGI: Disabling VGA console\n");
+               if (!strstr(*cmdline_p, "console="))
+                       add_preferred_console("ttySG", 0, NULL);
+#ifdef CONFIG_DUMMY_CONSOLE
+               conswitchp = &dummy_con;
+#else
+               conswitchp = NULL;
+#endif                         /* CONFIG_DUMMY_CONSOLE */
+       }
+#endif                         /* def(CONFIG_VT) && def(CONFIG_VGA_CONSOLE) */
+
+       MAX_DMA_ADDRESS = PAGE_OFFSET + MAX_PHYS_MEMORY;
+#endif
+
+       /*
+        * Build the tables for managing cnodes.
+        */
+       build_cnode_tables();
+
+       status =
+           ia64_sal_freq_base(SAL_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec,
+                              &drift);
+       if (status != 0 || ticks_per_sec < 100000) {
+               printk(KERN_WARNING
+                      "unable to determine platform RTC clock frequency, 
guessing.\n");
+               /* PROM gives wrong value for clock freq. so guess */
+               sn_rtc_cycles_per_second = 1000000000000UL / 30000UL;
+       } else
+               sn_rtc_cycles_per_second = ticks_per_sec;
+#ifndef XEN
+
+       platform_intr_list[ACPI_INTERRUPT_CPEI] = IA64_CPE_VECTOR;
+
+       ia64_printk_clock = ia64_sn2_printk_clock;
+#endif
+
+       printk("SGI SAL version %x.%02x\n", version >> 8, version & 0x00FF);
+
+       /*
+        * we set the default root device to /dev/hda
+        * to make simulation easy
+        */
+#ifndef XEN
+       ROOT_DEV = Root_HDA1;
+#endif
+
+       /*
+        * Create the PDAs and NODEPDAs for all the cpus.
+        */
+       sn_init_pdas(cmdline_p);
+
+#ifndef XEN
+       ia64_mark_idle = &snidle;
+#endif
+
+       /*
+        * For the bootcpu, we do this here. All other cpus will make the
+        * call as part of cpu_init in slave cpu initialization.
+        */
+       sn_cpu_init();
+
+#ifndef XEN
+#ifdef CONFIG_SMP
+       init_smp_config();
+#endif
+       screen_info = sn_screen_info;
+
+       sn_timer_init();
+
+       /*
+        * set pm_power_off to a SAL call to allow
+        * sn machines to power off. The SAL call can be replaced
+        * by an ACPI interface call when ACPI is fully implemented
+        * for sn.
+        */
+       pm_power_off = ia64_sn_power_down;
+       current->thread.flags |= IA64_THREAD_MIGRATION;
+#endif
+}
+
+/**
+ * sn_init_pdas - setup node data areas
+ *
+ * One time setup for Node Data Area.  Called by sn_setup().
+ */
+static void __init sn_init_pdas(char **cmdline_p)
+{
+       cnodeid_t cnode;
+
+       /*
+        * Allocate & initalize the nodepda for each node.
+        */
+       for_each_online_node(cnode) {
+               nodepdaindr[cnode] =
+                   alloc_bootmem_node(NODE_DATA(cnode), sizeof(nodepda_t));
+               memset(nodepdaindr[cnode], 0, sizeof(nodepda_t));
+               memset(nodepdaindr[cnode]->phys_cpuid, -1,
+                   sizeof(nodepdaindr[cnode]->phys_cpuid));
+               spin_lock_init(&nodepdaindr[cnode]->ptc_lock);
+       }
+
+       /*
+        * Allocate & initialize nodepda for TIOs.  For now, put them on node 0.
+        */
+       for (cnode = num_online_nodes(); cnode < num_cnodes; cnode++) {
+               nodepdaindr[cnode] =
+                   alloc_bootmem_node(NODE_DATA(0), sizeof(nodepda_t));
+               memset(nodepdaindr[cnode], 0, sizeof(nodepda_t));
+       }
+
+       /*
+        * Now copy the array of nodepda pointers to each nodepda.
+        */
+       for (cnode = 0; cnode < num_cnodes; cnode++)
+               memcpy(nodepdaindr[cnode]->pernode_pdaindr, nodepdaindr,
+                      sizeof(nodepdaindr));
+
+#ifndef XEN
+       /*
+        * Set up IO related platform-dependent nodepda fields.
+        * The following routine actually sets up the hubinfo struct
+        * in nodepda.
+        */
+       for_each_online_node(cnode) {
+               bte_init_node(nodepdaindr[cnode], cnode);
+       }
+
+       /*
+        * Initialize the per node hubdev.  This includes IO Nodes and
+        * headless/memless nodes.
+        */
+       for (cnode = 0; cnode < num_cnodes; cnode++) {
+               hubdev_init_node(nodepdaindr[cnode], cnode);
+       }
+#endif
+}
+
+/**
+ * sn_cpu_init - initialize per-cpu data areas
+ * @cpuid: cpuid of the caller
+ *
+ * Called during cpu initialization on each cpu as it starts.
+ * Currently, initializes the per-cpu data area for SNIA.
+ * Also sets up a few fields in the nodepda.  Also known as
+ * platform_cpu_init() by the ia64 machvec code.
+ */
+void __cpuinit sn_cpu_init(void)
+{
+       int cpuid;
+       int cpuphyid;
+       int nasid;
+       int subnode;
+       int slice;
+       int cnode;
+       int i;
+       static int wars_have_been_checked;
+
+       cpuid = smp_processor_id();
+#ifndef XEN
+       if (cpuid == 0 && IS_MEDUSA()) {
+               if (ia64_sn_is_fake_prom())
+                       sn_prom_type = 2;
+               else
+                       sn_prom_type = 1;
+               printk(KERN_INFO "Running on medusa with %s PROM\n",
+                      (sn_prom_type == 1) ? "real" : "fake");
+       }
+#endif
+
+       memset(pda, 0, sizeof(pda));
+       if (ia64_sn_get_sn_info(0, &sn_hub_info->shub2,
+                               &sn_hub_info->nasid_bitmask,
+                               &sn_hub_info->nasid_shift,
+                               &sn_system_size, &sn_sharing_domain_size,
+                               &sn_partition_id, &sn_coherency_id,
+                               &sn_region_size))
+               BUG();
+       sn_hub_info->as_shift = sn_hub_info->nasid_shift - 2;
+
+       /*
+        * Don't check status. The SAL call is not supported on all PROMs
+        * but a failure is harmless.
+        */
+       (void) ia64_sn_set_cpu_number(cpuid);
+
+       /*
+        * The boot cpu makes this call again after platform initialization is
+        * complete.
+        */
+       if (nodepdaindr[0] == NULL)
+               return;
+
+       for (i = 0; i < MAX_PROM_FEATURE_SETS; i++)
+               if (ia64_sn_get_prom_feature_set(i, &sn_prom_features[i]) != 0)
+                       break;
+
+       cpuphyid = get_sapicid();
+
+       if (ia64_sn_get_sapic_info(cpuphyid, &nasid, &subnode, &slice))
+               BUG();
+
+       for (i=0; i < MAX_NUMNODES; i++) {
+               if (nodepdaindr[i]) {
+                       nodepdaindr[i]->phys_cpuid[cpuid].nasid = nasid;
+                       nodepdaindr[i]->phys_cpuid[cpuid].slice = slice;
+                       nodepdaindr[i]->phys_cpuid[cpuid].subnode = subnode;
+               }
+       }
+
+       cnode = nasid_to_cnodeid(nasid);
+
+       sn_nodepda = nodepdaindr[cnode];
+
+       pda->led_address =
+           (typeof(pda->led_address)) (LED0 + (slice << LED_CPU_SHIFT));
+       pda->led_state = LED_ALWAYS_SET;
+       pda->hb_count = HZ / 2;
+       pda->hb_state = 0;
+       pda->idle_flag = 0;
+
+       if (cpuid != 0) {
+               /* copy cpu 0's sn_cnodeid_to_nasid table to this cpu's */
+               memcpy(sn_cnodeid_to_nasid,
+                      (&per_cpu(__sn_cnodeid_to_nasid, 0)),
+                      sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid)));
+       }
+
+       /*
+        * Check for WARs.
+        * Only needs to be done once, on BSP.
+        * Has to be done after loop above, because it uses this cpu's
+        * sn_cnodeid_to_nasid table which was just initialized if this
+        * isn't cpu 0.
+        * Has to be done before assignment below.
+        */
+       if (!wars_have_been_checked) {
+               sn_check_for_wars();
+               wars_have_been_checked = 1;
+       }
+       sn_hub_info->shub_1_1_found = shub_1_1_found;
+
+       /*
+        * Set up addresses of PIO/MEM write status registers.
+        */
+       {
+               u64 pio1[] = {SH1_PIO_WRITE_STATUS_0, 0, 
SH1_PIO_WRITE_STATUS_1, 0};
+               u64 pio2[] = {SH2_PIO_WRITE_STATUS_0, SH2_PIO_WRITE_STATUS_2,
+                       SH2_PIO_WRITE_STATUS_1, SH2_PIO_WRITE_STATUS_3};
+               u64 *pio;
+               pio = is_shub1() ? pio1 : pio2;
+               pda->pio_write_status_addr =
+                  (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid, pio[slice]);
+               pda->pio_write_status_val = is_shub1() ? 
SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK : 0;
+       }
+
+#ifndef XEN  /* local_node_data is not allocated .... yet */
+       /*
+        * WAR addresses for SHUB 1.x.
+        */
+       if (local_node_data->active_cpu_count++ == 0 && is_shub1()) {
+               int buddy_nasid;
+               buddy_nasid =
+                   cnodeid_to_nasid(numa_node_id() ==
+                                    num_online_nodes() - 1 ? 0 : 
numa_node_id() + 1);
+               pda->pio_shub_war_cam_addr =
+                   (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid,
+                                                             
SH1_PI_CAM_CONTROL);
+       }
+#endif
+}
+
+/*
+ * Build tables for converting between NASIDs and cnodes.
+ */
+static inline int __init board_needs_cnode(int type)
+{
+       return (type == KLTYPE_SNIA || type == KLTYPE_TIO);
+}
+
+void __init build_cnode_tables(void)
+{
+       int nasid;
+       int node;
+       lboard_t *brd;
+
+       memset(physical_node_map, -1, sizeof(physical_node_map));
+       memset(sn_cnodeid_to_nasid, -1,
+                       sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid)));
+
+       /*
+        * First populate the tables with C/M bricks. This ensures that
+        * cnode == node for all C & M bricks.
+        */
+       for_each_online_node(node) {
+               nasid = pxm_to_nasid(node_to_pxm(node));
+               sn_cnodeid_to_nasid[node] = nasid;
+               physical_node_map[nasid] = node;
+       }
+
+       /*
+        * num_cnodes is total number of C/M/TIO bricks. Because of the 256 node
+        * limit on the number of nodes, we can't use the generic node numbers 
+        * for this. Note that num_cnodes is incremented below as TIOs or
+        * headless/memoryless nodes are discovered.
+        */
+       num_cnodes = num_online_nodes();
+
+       /* fakeprom does not support klgraph */
+       if (IS_RUNNING_ON_FAKE_PROM())
+               return;
+
+       /* Find TIOs & headless/memoryless nodes and add them to the tables */
+       for_each_online_node(node) {
+               kl_config_hdr_t *klgraph_header;
+               nasid = cnodeid_to_nasid(node);
+               klgraph_header = ia64_sn_get_klconfig_addr(nasid);
+               if (klgraph_header == NULL)
+                       BUG();
+               brd = NODE_OFFSET_TO_LBOARD(nasid, 
klgraph_header->ch_board_info);
+               while (brd) {
+                       if (board_needs_cnode(brd->brd_type) && 
physical_node_map[brd->brd_nasid] < 0) {
+                               sn_cnodeid_to_nasid[num_cnodes] = 
brd->brd_nasid;
+                               physical_node_map[brd->brd_nasid] = 
num_cnodes++;
+                       }
+                       brd = find_lboard_next(brd);
+               }
+       }
+}
+
+int
+nasid_slice_to_cpuid(int nasid, int slice)
+{
+       long cpu;
+
+       for (cpu = 0; cpu < NR_CPUS; cpu++)
+               if (cpuid_to_nasid(cpu) == nasid &&
+                                       cpuid_to_slice(cpu) == slice)
+                       return cpu;
+
+       return -1;
+}
+
+int sn_prom_feature_available(int id)
+{
+       if (id >= BITS_PER_LONG * MAX_PROM_FEATURE_SETS)
+               return 0;
+       return test_bit(id, sn_prom_features);
+}
+EXPORT_SYMBOL(sn_prom_feature_available);
+
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux-xen/sn/kernel/sn2_smp.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux-xen/sn/kernel/sn2_smp.c       Fri Jan 19 14:48:57 
2007 +0000
@@ -0,0 +1,548 @@
+/*
+ * SN2 Platform specific SMP Support
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000-2006 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/threads.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/mmzone.h>
+#include <linux/module.h>
+#include <linux/bitops.h>
+#include <linux/nodemask.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include <asm/processor.h>
+#include <asm/irq.h>
+#include <asm/sal.h>
+#include <asm/system.h>
+#include <asm/delay.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/tlb.h>
+#include <asm/numa.h>
+#include <asm/hw_irq.h>
+#include <asm/current.h>
+#ifdef XEN
+#include <asm/sn/arch.h>
+#endif
+#include <asm/sn/sn_cpuid.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/shub_mmr.h>
+#include <asm/sn/nodepda.h>
+#include <asm/sn/rw_mmr.h>
+
+DEFINE_PER_CPU(struct ptc_stats, ptcstats);
+DECLARE_PER_CPU(struct ptc_stats, ptcstats);
+
+static  __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock);
+
+extern unsigned long
+sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long,
+                              volatile unsigned long *, unsigned long,
+                              volatile unsigned long *, unsigned long);
+void
+sn2_ptc_deadlock_recovery(short *, short, short, int,
+                         volatile unsigned long *, unsigned long,
+                         volatile unsigned long *, unsigned long);
+
+/*
+ * Note: some is the following is captured here to make degugging easier
+ * (the macros make more sense if you see the debug patch - not posted)
+ */
+#define sn2_ptctest    0
+#define local_node_uses_ptc_ga(sh1)    ((sh1) ? 1 : 0)
+#define max_active_pio(sh1)            ((sh1) ? 32 : 7)
+#define reset_max_active_on_deadlock() 1
+#ifndef XEN
+#define PTC_LOCK(sh1)                  ((sh1) ? &sn2_global_ptc_lock : 
&sn_nodepda->ptc_lock)
+#else
+#define PTC_LOCK(sh1)                  &sn2_global_ptc_lock
+#endif
+
+struct ptc_stats {
+       unsigned long ptc_l;
+       unsigned long change_rid;
+       unsigned long shub_ptc_flushes;
+       unsigned long nodes_flushed;
+       unsigned long deadlocks;
+       unsigned long deadlocks2;
+       unsigned long lock_itc_clocks;
+       unsigned long shub_itc_clocks;
+       unsigned long shub_itc_clocks_max;
+       unsigned long shub_ptc_flushes_not_my_mm;
+};
+
+#define sn2_ptctest    0
+
+static inline unsigned long wait_piowc(void)
+{
+       volatile unsigned long *piows;
+       unsigned long zeroval, ws;
+
+       piows = pda->pio_write_status_addr;
+       zeroval = pda->pio_write_status_val;
+       do {
+               cpu_relax();
+       } while (((ws = *piows) & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) 
!= zeroval);
+       return (ws & SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK) != 0;
+}
+
+#ifndef XEN  /* No idea if Xen will ever support this */
+/**
+ * sn_migrate - SN-specific task migration actions
+ * @task: Task being migrated to new CPU
+ *
+ * SN2 PIO writes from separate CPUs are not guaranteed to arrive in order.
+ * Context switching user threads which have memory-mapped MMIO may cause
+ * PIOs to issue from seperate CPUs, thus the PIO writes must be drained
+ * from the previous CPU's Shub before execution resumes on the new CPU.
+ */
+void sn_migrate(struct task_struct *task)
+{
+       pda_t *last_pda = pdacpu(task_thread_info(task)->last_cpu);
+       volatile unsigned long *adr = last_pda->pio_write_status_addr;
+       unsigned long val = last_pda->pio_write_status_val;
+
+       /* Drain PIO writes from old CPU's Shub */
+       while (unlikely((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK)
+                       != val))
+               cpu_relax();
+}
+
+void sn_tlb_migrate_finish(struct mm_struct *mm)
+{
+       /* flush_tlb_mm is inefficient if more than 1 users of mm */
+#ifndef XEN
+       if (mm == current->mm && mm && atomic_read(&mm->mm_users) == 1)
+#else
+       if (mm == &current->arch.mm && mm && atomic_read(&mm->mm_users) == 1)
+#endif
+               flush_tlb_mm(mm);
+}
+#endif
+
+/**
+ * sn2_global_tlb_purge - globally purge translation cache of virtual address 
range
+ * @mm: mm_struct containing virtual address range
+ * @start: start of virtual address range
+ * @end: end of virtual address range
+ * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits 
& 0xfc))
+ *
+ * Purges the translation caches of all processors of the given virtual address
+ * range.
+ *
+ * Note:
+ *     - cpu_vm_mask is a bit mask that indicates which cpus have loaded the 
context.
+ *     - cpu_vm_mask is converted into a nodemask of the nodes containing the
+ *       cpus in cpu_vm_mask.
+ *     - if only one bit is set in cpu_vm_mask & it is the current cpu & the
+ *       process is purging its own virtual address range, then only the
+ *       local TLB needs to be flushed. This flushing can be done using
+ *       ptc.l. This is the common case & avoids the global spinlock.
+ *     - if multiple cpus have loaded the context, then flushing has to be
+ *       done with ptc.g/MMRs under protection of the global ptc_lock.
+ */
+
+#ifdef XEN  /* Xen is soooooooo stupid! */
+// static cpumask_t mask_all = CPU_MASK_ALL;
+#endif
+
+void
+#ifndef XEN
+sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
+#else
+sn2_global_tlb_purge(unsigned long start,
+#endif
+                    unsigned long end, unsigned long nbits)
+{
+       int i, ibegin, shub1, cnode, mynasid, cpu, lcpu = 0, nasid;
+#ifndef XEN
+       int mymm = (mm == current->active_mm && mm == current->mm);
+#else
+       // struct mm_struct *mm;
+       int mymm = 0;
+#endif
+       int use_cpu_ptcga;
+       volatile unsigned long *ptc0, *ptc1;
+       unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr 
= 0;
+       short nasids[MAX_NUMNODES], nix;
+       nodemask_t nodes_flushed;
+       int active, max_active, deadlock;
+
+       nodes_clear(nodes_flushed);
+       i = 0;
+
+#ifndef XEN  /* One day Xen will grow up! */
+       for_each_cpu_mask(cpu, mm->cpu_vm_mask) {
+               cnode = cpu_to_node(cpu);
+               node_set(cnode, nodes_flushed);
+               lcpu = cpu;
+               i++;
+       }
+#else
+       for_each_cpu(cpu) {
+               cnode = cpu_to_node(cpu);
+               node_set(cnode, nodes_flushed);
+               lcpu = cpu;
+               i++;
+       }
+#endif
+
+       if (i == 0)
+               return;
+
+       preempt_disable();
+
+       if (likely(i == 1 && lcpu == smp_processor_id() && mymm)) {
+               do {
+                       ia64_ptcl(start, nbits << 2);
+                       start += (1UL << nbits);
+               } while (start < end);
+               ia64_srlz_i();
+               __get_cpu_var(ptcstats).ptc_l++;
+               preempt_enable();
+               return;
+       }
+
+#ifndef XEN
+       if (atomic_read(&mm->mm_users) == 1 && mymm) {
+#ifndef XEN  /* I hate Xen! */
+               flush_tlb_mm(mm);
+#else
+               flush_tlb_mask(mask_all);
+#endif
+               __get_cpu_var(ptcstats).change_rid++;
+               preempt_enable();
+               return;
+       }
+#endif
+
+       itc = ia64_get_itc();
+       nix = 0;
+       for_each_node_mask(cnode, nodes_flushed)
+               nasids[nix++] = cnodeid_to_nasid(cnode);
+
+#ifndef XEN
+       rr_value = (mm->context << 3) | REGION_NUMBER(start);
+#else
+       rr_value = REGION_NUMBER(start);
+#endif
+
+       shub1 = is_shub1();
+       if (shub1) {
+               data0 = (1UL << SH1_PTC_0_A_SHFT) |
+                       (nbits << SH1_PTC_0_PS_SHFT) |
+                       (rr_value << SH1_PTC_0_RID_SHFT) |
+                       (1UL << SH1_PTC_0_START_SHFT);
+#ifndef XEN
+               ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_0);
+               ptc1 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_1);
+#else
+               ptc0 = (unsigned long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_0);
+               ptc1 = (unsigned long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_1);
+#endif
+       } else {
+               data0 = (1UL << SH2_PTC_A_SHFT) |
+                       (nbits << SH2_PTC_PS_SHFT) |
+                       (1UL << SH2_PTC_START_SHFT);
+#ifndef XEN
+               ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH2_PTC + 
+#else
+               ptc0 = (unsigned long *)GLOBAL_MMR_PHYS_ADDR(0, SH2_PTC + 
+#endif
+                       (rr_value << SH2_PTC_RID_SHFT));
+               ptc1 = NULL;
+       }
+       
+
+       mynasid = get_nasid();
+       use_cpu_ptcga = local_node_uses_ptc_ga(shub1);
+       max_active = max_active_pio(shub1);
+
+       itc = ia64_get_itc();
+       spin_lock_irqsave(PTC_LOCK(shub1), flags);
+       itc2 = ia64_get_itc();
+
+       __get_cpu_var(ptcstats).lock_itc_clocks += itc2 - itc;
+       __get_cpu_var(ptcstats).shub_ptc_flushes++;
+       __get_cpu_var(ptcstats).nodes_flushed += nix;
+       if (!mymm)
+                __get_cpu_var(ptcstats).shub_ptc_flushes_not_my_mm++;
+
+       if (use_cpu_ptcga && !mymm) {
+               old_rr = ia64_get_rr(start);
+               ia64_set_rr(start, (old_rr & 0xff) | (rr_value << 8));
+               ia64_srlz_d();
+       }
+
+       wait_piowc();
+       do {
+               if (shub1)
+                       data1 = start | (1UL << SH1_PTC_1_START_SHFT);
+               else
+                       data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & 
SH2_PTC_ADDR_MASK);
+               deadlock = 0;
+               active = 0;
+               for (ibegin = 0, i = 0; i < nix; i++) {
+                       nasid = nasids[i];
+                       if (use_cpu_ptcga && unlikely(nasid == mynasid)) {
+                               ia64_ptcga(start, nbits << 2);
+                               ia64_srlz_i();
+                       } else {
+                               ptc0 = CHANGE_NASID(nasid, ptc0);
+                               if (ptc1)
+                                       ptc1 = CHANGE_NASID(nasid, ptc1);
+                               pio_atomic_phys_write_mmrs(ptc0, data0, ptc1, 
data1);
+                               active++;
+                       }
+                       if (active >= max_active || i == (nix - 1)) {
+                               if ((deadlock = wait_piowc())) {
+                                       sn2_ptc_deadlock_recovery(nasids, 
ibegin, i, mynasid, ptc0, data0, ptc1, data1);
+                                       if (reset_max_active_on_deadlock())
+                                               max_active = 1;
+                               }
+                               active = 0;
+                               ibegin = i + 1;
+                       }
+               }
+               start += (1UL << nbits);
+       } while (start < end);
+
+       itc2 = ia64_get_itc() - itc2;
+       __get_cpu_var(ptcstats).shub_itc_clocks += itc2;
+       if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max)
+               __get_cpu_var(ptcstats).shub_itc_clocks_max = itc2;
+
+       if (old_rr) {
+               ia64_set_rr(start, old_rr);
+               ia64_srlz_d();
+       }
+
+       spin_unlock_irqrestore(PTC_LOCK(shub1), flags);
+
+       preempt_enable();
+}
+
+/*
+ * sn2_ptc_deadlock_recovery
+ *
+ * Recover from PTC deadlocks conditions. Recovery requires stepping thru each 
+ * TLB flush transaction.  The recovery sequence is somewhat tricky & is
+ * coded in assembly language.
+ */
+
+void
+sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid,
+                         volatile unsigned long *ptc0, unsigned long data0,
+                         volatile unsigned long *ptc1, unsigned long data1)
+{
+       short nasid, i;
+       unsigned long *piows, zeroval, n;
+
+       __get_cpu_var(ptcstats).deadlocks++;
+
+       piows = (unsigned long *) pda->pio_write_status_addr;
+       zeroval = pda->pio_write_status_val;
+
+
+       for (i=ib; i <= ie; i++) {
+               nasid = nasids[i];
+               if (local_node_uses_ptc_ga(is_shub1()) && nasid == mynasid)
+                       continue;
+               ptc0 = CHANGE_NASID(nasid, ptc0);
+               if (ptc1)
+                       ptc1 = CHANGE_NASID(nasid, ptc1);
+
+               n = sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, 
piows, zeroval);
+               __get_cpu_var(ptcstats).deadlocks2 += n;
+       }
+
+}
+
+/**
+ * sn_send_IPI_phys - send an IPI to a Nasid and slice
+ * @nasid: nasid to receive the interrupt (may be outside partition)
+ * @physid: physical cpuid to receive the interrupt.
+ * @vector: command to send
+ * @delivery_mode: delivery mechanism
+ *
+ * Sends an IPI (interprocessor interrupt) to the processor specified by
+ * @physid
+ *
+ * @delivery_mode can be one of the following
+ *
+ * %IA64_IPI_DM_INT - pend an interrupt
+ * %IA64_IPI_DM_PMI - pend a PMI
+ * %IA64_IPI_DM_NMI - pend an NMI
+ * %IA64_IPI_DM_INIT - pend an INIT interrupt
+ */
+void sn_send_IPI_phys(int nasid, long physid, int vector, int delivery_mode)
+{
+       long val;
+       unsigned long flags = 0;
+       volatile long *p;
+
+       p = (long *)GLOBAL_MMR_PHYS_ADDR(nasid, SH_IPI_INT);
+       val = (1UL << SH_IPI_INT_SEND_SHFT) |
+           (physid << SH_IPI_INT_PID_SHFT) |
+           ((long)delivery_mode << SH_IPI_INT_TYPE_SHFT) |
+           ((long)vector << SH_IPI_INT_IDX_SHFT) |
+           (0x000feeUL << SH_IPI_INT_BASE_SHFT);
+
+       mb();
+       if (enable_shub_wars_1_1()) {
+               spin_lock_irqsave(&sn2_global_ptc_lock, flags);
+       }
+       pio_phys_write_mmr(p, val);
+       if (enable_shub_wars_1_1()) {
+               wait_piowc();
+               spin_unlock_irqrestore(&sn2_global_ptc_lock, flags);
+       }
+
+}
+
+EXPORT_SYMBOL(sn_send_IPI_phys);
+
+/**
+ * sn2_send_IPI - send an IPI to a processor
+ * @cpuid: target of the IPI
+ * @vector: command to send
+ * @delivery_mode: delivery mechanism
+ * @redirect: redirect the IPI?
+ *
+ * Sends an IPI (InterProcessor Interrupt) to the processor specified by
+ * @cpuid.  @vector specifies the command to send, while @delivery_mode can 
+ * be one of the following
+ *
+ * %IA64_IPI_DM_INT - pend an interrupt
+ * %IA64_IPI_DM_PMI - pend a PMI
+ * %IA64_IPI_DM_NMI - pend an NMI
+ * %IA64_IPI_DM_INIT - pend an INIT interrupt
+ */
+void sn2_send_IPI(int cpuid, int vector, int delivery_mode, int redirect)
+{
+       long physid;
+       int nasid;
+
+       physid = cpu_physical_id(cpuid);
+#ifdef XEN
+       if (!sn_nodepda) {
+               ia64_sn_get_sapic_info(physid, &nasid, NULL, NULL);
+       } else
+#endif
+       nasid = cpuid_to_nasid(cpuid);
+
+       /* the following is used only when starting cpus at boot time */
+       if (unlikely(nasid == -1))
+               ia64_sn_get_sapic_info(physid, &nasid, NULL, NULL);
+
+       sn_send_IPI_phys(nasid, physid, vector, delivery_mode);
+}
+
+#ifdef CONFIG_PROC_FS
+
+#define PTC_BASENAME   "sgi_sn/ptc_statistics"
+
+static void *sn2_ptc_seq_start(struct seq_file *file, loff_t * offset)
+{
+       if (*offset < NR_CPUS)
+               return offset;
+       return NULL;
+}
+
+static void *sn2_ptc_seq_next(struct seq_file *file, void *data, loff_t * 
offset)
+{
+       (*offset)++;
+       if (*offset < NR_CPUS)
+               return offset;
+       return NULL;
+}
+
+static void sn2_ptc_seq_stop(struct seq_file *file, void *data)
+{
+}
+
+static int sn2_ptc_seq_show(struct seq_file *file, void *data)
+{
+       struct ptc_stats *stat;
+       int cpu;
+
+       cpu = *(loff_t *) data;
+
+       if (!cpu) {
+               seq_printf(file,
+                          "# cpu ptc_l newrid ptc_flushes nodes_flushed 
deadlocks lock_nsec shub_nsec shub_nsec_max not_my_mm deadlock2\n");
+               seq_printf(file, "# ptctest %d\n", sn2_ptctest);
+       }
+
+       if (cpu < NR_CPUS && cpu_online(cpu)) {
+               stat = &per_cpu(ptcstats, cpu);
+               seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld 
%ld\n", cpu, stat->ptc_l,
+                               stat->change_rid, stat->shub_ptc_flushes, 
stat->nodes_flushed,
+                               stat->deadlocks,
+                               1000 * stat->lock_itc_clocks / 
per_cpu(cpu_info, cpu).cyc_per_usec,
+                               1000 * stat->shub_itc_clocks / 
per_cpu(cpu_info, cpu).cyc_per_usec,
+                               1000 * stat->shub_itc_clocks_max / 
per_cpu(cpu_info, cpu).cyc_per_usec,
+                               stat->shub_ptc_flushes_not_my_mm,
+                               stat->deadlocks2);
+       }
+       return 0;
+}
+
+static struct seq_operations sn2_ptc_seq_ops = {
+       .start = sn2_ptc_seq_start,
+       .next = sn2_ptc_seq_next,
+       .stop = sn2_ptc_seq_stop,
+       .show = sn2_ptc_seq_show
+};
+
+static int sn2_ptc_proc_open(struct inode *inode, struct file *file)
+{
+       return seq_open(file, &sn2_ptc_seq_ops);
+}
+
+static struct file_operations proc_sn2_ptc_operations = {
+       .open = sn2_ptc_proc_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = seq_release,
+};
+
+static struct proc_dir_entry *proc_sn2_ptc;
+
+static int __init sn2_ptc_init(void)
+{
+       if (!ia64_platform_is("sn2"))
+               return 0;
+
+       if (!(proc_sn2_ptc = create_proc_entry(PTC_BASENAME, 0444, NULL))) {
+               printk(KERN_ERR "unable to create %s proc entry", PTC_BASENAME);
+               return -EINVAL;
+       }
+       proc_sn2_ptc->proc_fops = &proc_sn2_ptc_operations;
+       spin_lock_init(&sn2_global_ptc_lock);
+       return 0;
+}
+
+static void __exit sn2_ptc_exit(void)
+{
+       remove_proc_entry(PTC_BASENAME, NULL);
+}
+
+module_init(sn2_ptc_init);
+module_exit(sn2_ptc_exit);
+#endif /* CONFIG_PROC_FS */
+
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/Makefile
--- a/xen/arch/ia64/linux/Makefile      Thu Jan 18 15:18:07 2007 +0000
+++ b/xen/arch/ia64/linux/Makefile      Fri Jan 19 14:48:57 2007 +0000
@@ -1,6 +1,9 @@ obj-y += bitop.o
+subdir-y += dig
+subdir-y += hp
+subdir-y += sn
+
 obj-y += bitop.o
 obj-y += clear_page.o
-obj-y += cmdline.o
 obj-y += copy_page_mck.o
 obj-y += efi_stub.o
 obj-y += extable.o
@@ -23,6 +26,7 @@ obj-y += __moddi3.o
 obj-y += __moddi3.o
 obj-y += __umoddi3.o
 obj-y += carta_random.o
+obj-y += io.o
 
 ## variants of divide/modulo
 ## see files in xen/arch/ia64/linux/lib (linux/arch/ia64/lib)
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/README.origin
--- a/xen/arch/ia64/linux/README.origin Thu Jan 18 15:18:07 2007 +0000
+++ b/xen/arch/ia64/linux/README.origin Fri Jan 19 14:48:57 2007 +0000
@@ -4,7 +4,6 @@ needs to be changed, move it to ../linux
 needs to be changed, move it to ../linux-xen and follow
 the instructions in the README there.
 
-cmdline.c              -> linux/lib/cmdline.c
 efi_stub.S             -> linux/arch/ia64/kernel/efi_stub.S
 extable.c              -> linux/arch/ia64/mm/extable.c
 hpsim.S                        -> linux/arch/ia64/hp/sim/hpsim.S
@@ -27,3 +26,6 @@ strlen.S              -> linux/arch/ia64/lib/strlen.
 
 # The files below are from Linux-2.6.16.33
 carta_random.S         -> linux/arch/ia64/lib/carta_random.S
+
+# The files below are from Linux-2.6.19
+io.c                   -> linux/arch/ia64/lib/io.c
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/cmdline.c
--- a/xen/arch/ia64/linux/cmdline.c     Thu Jan 18 15:18:07 2007 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,121 +0,0 @@
-/*
- * linux/lib/cmdline.c
- * Helper functions generally used for parsing kernel command line
- * and module options.
- *
- * Code and copyrights come from init/main.c and arch/i386/kernel/setup.c.
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2.  See the file COPYING for more details.
- *
- * GNU Indent formatting options for this file: -kr -i8 -npsl -pcs
- *
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <xen/lib.h>
-
-
-/**
- *     get_option - Parse integer from an option string
- *     @str: option string
- *     @pint: (output) integer value parsed from @str
- *
- *     Read an int from an option string; if available accept a subsequent
- *     comma as well.
- *
- *     Return values:
- *     0 : no int in string
- *     1 : int found, no subsequent comma
- *     2 : int found including a subsequent comma
- */
-
-int get_option (char **str, int *pint)
-{
-       char *cur = *str;
-
-       if (!cur || !(*cur))
-               return 0;
-       *pint = simple_strtol (cur, str, 0);
-       if (cur == *str)
-               return 0;
-       if (**str == ',') {
-               (*str)++;
-               return 2;
-       }
-
-       return 1;
-}
-
-/**
- *     get_options - Parse a string into a list of integers
- *     @str: String to be parsed
- *     @nints: size of integer array
- *     @ints: integer array
- *
- *     This function parses a string containing a comma-separated
- *     list of integers.  The parse halts when the array is
- *     full, or when no more numbers can be retrieved from the
- *     string.
- *
- *     Return value is the character in the string which caused
- *     the parse to end (typically a null terminator, if @str is
- *     completely parseable).
- */
- 
-char *get_options(const char *str, int nints, int *ints)
-{
-       int res, i = 1;
-
-       while (i < nints) {
-               res = get_option ((char **)&str, ints + i);
-               if (res == 0)
-                       break;
-               i++;
-               if (res == 1)
-                       break;
-       }
-       ints[0] = i - 1;
-       return (char *)str;
-}
-
-/**
- *     memparse - parse a string with mem suffixes into a number
- *     @ptr: Where parse begins
- *     @retptr: (output) Pointer to next char after parse completes
- *
- *     Parses a string into a number.  The number stored at @ptr is
- *     potentially suffixed with %K (for kilobytes, or 1024 bytes),
- *     %M (for megabytes, or 1048576 bytes), or %G (for gigabytes, or
- *     1073741824).  If the number is suffixed with K, M, or G, then
- *     the return value is the number multiplied by one kilobyte, one
- *     megabyte, or one gigabyte, respectively.
- */
-
-unsigned long long memparse (char *ptr, char **retptr)
-{
-       unsigned long long ret = simple_strtoull (ptr, retptr, 0);
-
-       switch (**retptr) {
-       case 'G':
-       case 'g':
-               ret <<= 10;
-       case 'M':
-       case 'm':
-               ret <<= 10;
-       case 'K':
-       case 'k':
-               ret <<= 10;
-               (*retptr)++;
-       default:
-               break;
-       }
-       return ret;
-}
-
-
-EXPORT_SYMBOL(memparse);
-EXPORT_SYMBOL(get_option);
-EXPORT_SYMBOL(get_options);
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/dig/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux/dig/Makefile  Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,1 @@
+obj-y += machvec.o
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/dig/README.origin
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux/dig/README.origin     Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,7 @@
+Source files in this directory are identical copies of linux-2.6.19 files:
+
+NOTE: DO NOT commit changes to these files!   If a file
+needs to be changed, move it to ../linux-xen and follow
+the instructions in the README there.
+
+machvec.c              -> linux/arch/ia64/dig/machvec.c
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/dig/machvec.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux/dig/machvec.c Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,3 @@
+#define MACHVEC_PLATFORM_NAME          dig
+#define MACHVEC_PLATFORM_HEADER                <asm/machvec_dig.h>
+#include <asm/machvec_init.h>
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/hp/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux/hp/Makefile   Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,1 @@
+subdir-y += zx1
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/hp/zx1/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux/hp/zx1/Makefile       Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,1 @@
+obj-y += hpzx1_machvec.o
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/hp/zx1/README.origin
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux/hp/zx1/README.origin  Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,7 @@
+Source files in this directory are identical copies of linux-2.6.19 files:
+
+NOTE: DO NOT commit changes to these files!   If a file
+needs to be changed, move it to ../linux-xen and follow
+the instructions in the README there.
+
+hpzx1_machvec.c                -> linux/arch/ia64/hp/zx1/hpzx1_machvec.c
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/hp/zx1/hpzx1_machvec.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux/hp/zx1/hpzx1_machvec.c        Fri Jan 19 14:48:57 
2007 +0000
@@ -0,0 +1,3 @@
+#define MACHVEC_PLATFORM_NAME          hpzx1
+#define MACHVEC_PLATFORM_HEADER                <asm/machvec_hpzx1.h>
+#include <asm/machvec_init.h>
diff -r 8475a4e0425e -r 3c8bb086025e xen/arch/ia64/linux/io.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/ia64/linux/io.c  Fri Jan 19 14:48:57 2007 +0000
@@ -0,0 +1,164 @@
+#include <linux/module.h>
+#include <linux/types.h>
+
+#include <asm/io.h>
+
+/*
+ * Copy data from IO memory space to "real" memory space.
+ * This needs to be optimized.
+ */
+void memcpy_fromio(void *to, const volatile void __iomem *from, long count)
+{
+       char *dst = to;
+
+       while (count) {
+               count--;
+               *dst++ = readb(from++);
+       }
+}
+EXPORT_SYMBOL(memcpy_fromio);
+
+/*
+ * Copy data from "real" memory space to IO memory space.
+ * This needs to be optimized.
+ */
+void memcpy_toio(volatile void __iomem *to, const void *from, long count)
+{
+       const char *src = from;
+
+       while (count) {
+               count--;
+               writeb(*src++, to++);
+       }
+}
+EXPORT_SYMBOL(memcpy_toio);
+
+/*
+ * "memset" on IO memory space.
+ * This needs to be optimized.
+ */
+void memset_io(volatile void __iomem *dst, int c, long count)
+{
+       unsigned char ch = (char)(c & 0xff);
+
+       while (count) {
+               count--;
+               writeb(ch, dst);
+               dst++;
+       }
+}
+EXPORT_SYMBOL(memset_io);
+
+#ifdef CONFIG_IA64_GENERIC
+
+#undef __ia64_inb
+#undef __ia64_inw
+#undef __ia64_inl
+#undef __ia64_outb
+#undef __ia64_outw
+#undef __ia64_outl
+#undef __ia64_readb

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.