[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg



# HG changeset patch
# User Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
# Date 1233115605 -32400
# Node ID 79f259a26a11cb57617982ce3bc829cdd76fff46
# Parent  4fd4dcf2f8916ab4656911a76e52fc6b1ad42c2f
# Parent  31983c30c460fb405b4fc6ab8e2ae49ada2cfec5
merge with xen-unstable.hg
---
 tools/firmware/rombios/32bitgateway.h          |   18 
 xen/arch/ia64/tools/p2m_foreign/Makefile       |   51 
 xen/arch/ia64/tools/p2m_foreign/p2m_foreign.c  |  233 ----
 xen/include/public/elfstructs.h                |  527 ----------
 xen/include/public/libelf.h                    |  265 -----
 Config.mk                                      |   15 
 buildconfigs/mk.linux-2.6-common               |    4 
 docs/check_pkgs                                |   12 
 docs/misc/dump-core-format.txt                 |   13 
 docs/misc/vtd.txt                              |   29 
 stubdom/Makefile                               |    1 
 stubdom/README                                 |    5 
 tools/Rules.mk                                 |    5 
 tools/blktap/drivers/Makefile                  |    6 
 tools/console/Makefile                         |    4 
 tools/examples/xmexample.hvm                   |   33 
 tools/firmware/Makefile                        |    4 
 tools/firmware/Rules.mk                        |    8 
 tools/firmware/hvmloader/32bitbios_support.c   |   32 
 tools/firmware/hvmloader/Makefile              |    4 
 tools/firmware/hvmloader/acpi/Makefile         |   18 
 tools/firmware/hvmloader/acpi/build.c          |   56 -
 tools/firmware/hvmloader/acpi/dsdt.asl         |    2 
 tools/firmware/hvmloader/acpi/dsdt.c           |    8 
 tools/firmware/hvmloader/cacheattr.c           |   24 
 tools/firmware/hvmloader/config.h              |   32 
 tools/firmware/hvmloader/hvmloader.c           |  206 ++-
 tools/firmware/hvmloader/mp_tables.c           |    4 
 tools/firmware/hvmloader/smbios.c              |   29 
 tools/firmware/hvmloader/smp.c                 |    2 
 tools/firmware/hvmloader/util.c                |  185 +--
 tools/firmware/hvmloader/util.h                |   17 
 tools/firmware/rombios/32bit/32bitbios.c       |   41 
 tools/firmware/rombios/32bit/Makefile          |   15 
 tools/firmware/rombios/32bit/pmm.c             |  531 ++++++++++
 tools/firmware/rombios/32bit/rombios_compat.h  |    4 
 tools/firmware/rombios/32bit/tcgbios/Makefile  |   14 
 tools/firmware/rombios/32bit/tcgbios/tcgbios.c |   18 
 tools/firmware/rombios/32bitgateway.c          |  459 ++------
 tools/firmware/rombios/32bitprotos.h           |   63 -
 tools/firmware/rombios/Makefile                |    2 
 tools/firmware/rombios/rombios.c               |   88 +
 tools/firmware/rombios/tcgbios.c               |  168 ---
 tools/firmware/vgabios/vbe.c                   |   18 
 tools/firmware/vgabios/vbe.h                   |   64 -
 tools/firmware/vgabios/vbetables-gen.c         |   41 
 tools/firmware/vgabios/vgabios.c               |    6 
 tools/flask/libflask/Makefile                  |    1 
 tools/flask/loadpolicy/Makefile                |    6 
 tools/fs-back/Makefile                         |    4 
 tools/include/Makefile                         |    3 
 tools/include/xen-foreign/reference.size       |    2 
 tools/libaio/src/Makefile                      |    2 
 tools/libfsimage/Rules.mk                      |    4 
 tools/libfsimage/common/Makefile               |    3 
 tools/libxc/Makefile                           |    5 
 tools/libxc/xc_core.c                          |   68 +
 tools/libxc/xc_core.h                          |   10 
 tools/libxc/xc_core_ia64.c                     |   17 
 tools/libxc/xc_core_x86.c                      |   81 +
 tools/libxc/xc_core_x86.h                      |   13 
 tools/libxc/xc_dom.h                           |    2 
 tools/libxc/xc_domain.c                        |   30 
 tools/libxc/xc_elf.h                           |    2 
 tools/libxc/xc_hvm_build.c                     |  189 +--
 tools/libxc/xc_private.c                       |   10 
 tools/libxc/xc_ptrace_core.c                   |    4 
 tools/libxc/xenctrl.h                          |    6 
 tools/misc/Makefile                            |    4 
 tools/misc/xenpm.c                             |  894 ++++++++++-------
 tools/pygrub/Makefile                          |    4 
 tools/python/Makefile                          |    3 
 tools/python/xen/lowlevel/xc/xc.c              |   10 
 tools/python/xen/util/oshelp.py                |    2 
 tools/python/xen/xend/XendConfig.py            |   12 
 tools/python/xen/xend/XendDPCI.py              |    7 
 tools/python/xen/xend/XendDomain.py            |    4 
 tools/python/xen/xend/XendDomainInfo.py        |   23 
 tools/python/xen/xend/balloon.py               |    4 
 tools/python/xen/xend/image.py                 |   13 
 tools/python/xen/xend/server/pciif.py          |   20 
 tools/python/xen/xend/server/relocate.py       |    2 
 tools/python/xen/xm/create.dtd                 |    7 
 tools/python/xen/xm/create.py                  |   54 -
 tools/python/xen/xm/main.py                    |   34 
 tools/python/xen/xm/xenapi_create.py           |   12 
 tools/tests/blowfish.mk                        |    8 
 tools/vnet/libxutil/Makefile                   |    7 
 tools/vtpm/Makefile                            |    2 
 tools/vtpm/Rules.mk                            |    6 
 tools/vtpm_manager/Rules.mk                    |    6 
 tools/xcutils/Makefile                         |    7 
 tools/xcutils/readnotes.c                      |    2 
 tools/xenmon/Makefile                          |    4 
 tools/xenpmd/Makefile                          |    4 
 tools/xenstat/libxenstat/Makefile              |    4 
 tools/xenstat/xentop/Makefile                  |    4 
 tools/xenstore/Makefile                        |    7 
 tools/xentrace/Makefile                        |    5 
 xen/Rules.mk                                   |   39 
 xen/arch/ia64/Makefile                         |    6 
 xen/arch/ia64/Rules.mk                         |   17 
 xen/arch/ia64/xen/domain.c                     |    2 
 xen/arch/ia64/xen/irq.c                        |    2 
 xen/arch/ia64/xen/machine_kexec.c              |    1 
 xen/arch/ia64/xen/mm.c                         |   99 -
 xen/arch/ia64/xen/xensetup.c                   |    5 
 xen/arch/x86/Makefile                          |    7 
 xen/arch/x86/Rules.mk                          |   16 
 xen/arch/x86/acpi/cpu_idle.c                   |    9 
 xen/arch/x86/acpi/power.c                      |    1 
 xen/arch/x86/apic.c                            |    4 
 xen/arch/x86/boot/Makefile                     |    3 
 xen/arch/x86/boot/mkelf32.c                    |    2 
 xen/arch/x86/bzimage.c                         |  242 ++++
 xen/arch/x86/cpu/common.c                      |   37 
 xen/arch/x86/cpu/mcheck/mce_intel.c            |   43 
 xen/arch/x86/domain.c                          |   13 
 xen/arch/x86/domain_build.c                    |  157 ++-
 xen/arch/x86/hvm/hvm.c                         |   15 
 xen/arch/x86/hvm/mtrr.c                        |   20 
 xen/arch/x86/hvm/vmsi.c                        |    2 
 xen/arch/x86/hvm/vmx/vmcs.c                    |   34 
 xen/arch/x86/io_apic.c                         |   14 
 xen/arch/x86/irq.c                             |   22 
 xen/arch/x86/machine_kexec.c                   |    3 
 xen/arch/x86/microcode.c                       |   58 -
 xen/arch/x86/microcode_amd.c                   |  265 ++---
 xen/arch/x86/microcode_intel.c                 |   18 
 xen/arch/x86/mm.c                              |  527 +++++-----
 xen/arch/x86/mm/Makefile                       |    2 
 xen/arch/x86/mm/hap/Makefile                   |    2 
 xen/arch/x86/mm/hap/hap.c                      |    2 
 xen/arch/x86/mm/hap/p2m-ept.c                  |   61 -
 xen/arch/x86/mm/p2m.c                          |   61 -
 xen/arch/x86/mm/shadow/Makefile                |    2 
 xen/arch/x86/mm/shadow/common.c                |   15 
 xen/arch/x86/mm/shadow/multi.c                 |    7 
 xen/arch/x86/mm/shadow/private.h               |   15 
 xen/arch/x86/msi.c                             |   15 
 xen/arch/x86/nmi.c                             |    2 
 xen/arch/x86/oprofile/nmi_int.c                |   42 
 xen/arch/x86/oprofile/op_model_p4.c            |    4 
 xen/arch/x86/physdev.c                         |   20 
 xen/arch/x86/setup.c                           |  114 +-
 xen/arch/x86/smpboot.c                         |   12 
 xen/arch/x86/tboot.c                           |   12 
 xen/arch/x86/time.c                            |    7 
 xen/arch/x86/traps.c                           |   28 
 xen/arch/x86/x86_32/machine_kexec.c            |    3 
 xen/arch/x86/x86_32/mm.c                       |   15 
 xen/arch/x86/x86_64/Makefile                   |   13 
 xen/arch/x86/x86_64/compat/mm.c                |   14 
 xen/arch/x86/x86_64/machine_kexec.c            |    9 
 xen/arch/x86/x86_64/mm.c                       |  126 +-
 xen/common/Makefile                            |   10 
 xen/common/compat/Makefile                     |    4 
 xen/common/compat/memory.c                     |   69 -
 xen/common/inflate.c                           | 1303 +++++++++++++++++++++++++
 xen/common/kexec.c                             |    1 
 xen/common/keyhandler.c                        |    4 
 xen/common/libelf/libelf-dominfo.c             |   15 
 xen/common/libelf/libelf-private.h             |    4 
 xen/common/memory.c                            |   79 -
 xen/common/page_alloc.c                        |  133 +-
 xen/common/xenoprof.c                          |   63 -
 xen/drivers/acpi/pmstat.c                      |   10 
 xen/drivers/acpi/reboot.c                      |    4 
 xen/drivers/cpufreq/utility.c                  |   30 
 xen/drivers/passthrough/amd/iommu_init.c       |   36 
 xen/drivers/passthrough/amd/iommu_intr.c       |   20 
 xen/drivers/passthrough/amd/iommu_map.c        |  158 +--
 xen/drivers/passthrough/amd/pci_amd_iommu.c    |  143 +-
 xen/drivers/passthrough/io.c                   |  123 +-
 xen/drivers/passthrough/iommu.c                |    1 
 xen/drivers/passthrough/vtd/dmar.c             |   18 
 xen/drivers/passthrough/vtd/iommu.c            |   17 
 xen/drivers/passthrough/vtd/iommu.h            |    3 
 xen/drivers/video/vga.c                        |    2 
 xen/include/Makefile                           |    2 
 xen/include/asm-ia64/domain.h                  |    6 
 xen/include/asm-x86/config.h                   |    8 
 xen/include/asm-x86/domain.h                   |    7 
 xen/include/asm-x86/hvm/svm/amd-iommu-proto.h  |   38 
 xen/include/asm-x86/hvm/vmx/vmcs.h             |    9 
 xen/include/asm-x86/hvm/vmx/vmx.h              |    3 
 xen/include/asm-x86/irq.h                      |    9 
 xen/include/asm-x86/microcode.h                |   49 
 xen/include/asm-x86/mm.h                       |  105 --
 xen/include/asm-x86/mtrr.h                     |    8 
 xen/include/asm-x86/paging.h                   |    7 
 xen/include/asm-x86/processor.h                |    3 
 xen/include/asm-x86/smp.h                      |    1 
 xen/include/asm-x86/time.h                     |    1 
 xen/include/public/domctl.h                    |    1 
 xen/include/public/elfnote.h                   |   13 
 xen/include/public/hvm/hvm_info_table.h        |   28 
 xen/include/public/memory.h                    |   43 
 xen/include/public/xen.h                       |    4 
 xen/include/xen/elf.h                          |    2 
 xen/include/xen/elfstructs.h                   |  527 ++++++++++
 xen/include/xen/hvm/iommu.h                    |    2 
 xen/include/xen/hvm/irq.h                      |   11 
 xen/include/xen/iommu.h                        |    1 
 xen/include/xen/libelf.h                       |  271 +++++
 xen/include/xen/mm.h                           |    4 
 xen/include/xen/sched.h                        |    6 
 xen/include/xen/xenoprof.h                     |    7 
 xen/include/xlat.lst                           |    2 
 xen/include/xsm/xsm.h                          |   12 
 xen/xsm/dummy.c                                |   11 
 xen/xsm/flask/hooks.c                          |   21 
 212 files changed, 6397 insertions(+), 4382 deletions(-)

diff -r 4fd4dcf2f891 -r 79f259a26a11 Config.mk
--- a/Config.mk Wed Jan 28 12:22:58 2009 +0900
+++ b/Config.mk Wed Jan 28 13:06:45 2009 +0900
@@ -1,7 +1,7 @@
 # -*- mode: Makefile; -*-
 
 # A debug build of Xen and tools?
-debug ?= n
+debug ?= y ## TEMPORARILY ENABLED
 
 XEN_COMPILE_ARCH    ?= $(shell uname -m | sed -e s/i.86/x86_32/ \
                          -e s/i86pc/x86_32/ -e s/amd64/x86_64/)
@@ -38,6 +38,15 @@ cc-option = $(shell if test -z "`$(1) $(
 cc-option = $(shell if test -z "`$(1) $(2) -S -o /dev/null -xc \
               /dev/null 2>&1`"; then echo "$(2)"; else echo "$(3)"; fi ;)
 
+# cc-option-add: Add an option to compilation flags, but only if supported.
+# Usage: $(call cc-option-add CFLAGS,CC,-march=winchip-c6)
+cc-option-add = $(eval $(call cc-option-add-closure,$(1),$(2),$(3)))
+define cc-option-add-closure
+    ifneq ($$(call cc-option,$$($(2)),$(3),n),n)
+        $(1) += $(3)
+    endif
+endef
+
 # cc-ver: Check compiler is at least specified version. Return boolean 'y'/'n'.
 # Usage: ifeq ($(call cc-ver,$(CC),0x030400),y)
 cc-ver = $(shell if [ $$((`$(1) -dumpversion | awk -F. \
@@ -84,8 +93,8 @@ CFLAGS += -Wall -Wstrict-prototypes
 # result of any casted expression causes a warning.
 CFLAGS += -Wno-unused-value
 
-HOSTCFLAGS += $(call cc-option,$(HOSTCC),-Wdeclaration-after-statement,)
-CFLAGS     += $(call cc-option,$(CC),-Wdeclaration-after-statement,)
+$(call cc-option-add,HOSTCFLAGS,HOSTCC,-Wdeclaration-after-statement)
+$(call cc-option-add,CFLAGS,CC,-Wdeclaration-after-statement)
 
 LDFLAGS += $(foreach i, $(EXTRA_LIB), -L$(i)) 
 CFLAGS += $(foreach i, $(EXTRA_INCLUDES), -I$(i))
diff -r 4fd4dcf2f891 -r 79f259a26a11 buildconfigs/mk.linux-2.6-common
--- a/buildconfigs/mk.linux-2.6-common  Wed Jan 28 12:22:58 2009 +0900
+++ b/buildconfigs/mk.linux-2.6-common  Wed Jan 28 13:06:45 2009 +0900
@@ -100,10 +100,10 @@ endif
 endif
        $(__NONINT_CONFIG) $(MAKE) -C $(LINUX_SRCDIR) ARCH=$(LINUX_ARCH) 
oldconfig O=$$(/bin/pwd)/$(LINUX_DIR)
        @set -e ; if [ ! -f $(LINUX_DIR)/Makefile ] ; then \
-           echo "***********************************"; \
+           echo "==================================="; \
            echo "oldconfig did not create a Makefile"; \
            echo "Generating $(LINUX_DIR)/Makefile   "; \
-           echo "***********************************"; \
+           echo "==================================="; \
            ( echo "# Automatically generated: don't edit"; \
              echo ""; \
              echo "VERSION = 2"; \
diff -r 4fd4dcf2f891 -r 79f259a26a11 docs/check_pkgs
--- a/docs/check_pkgs   Wed Jan 28 12:22:58 2009 +0900
+++ b/docs/check_pkgs   Wed Jan 28 13:06:45 2009 +0900
@@ -2,12 +2,12 @@ silent_which ()
 silent_which ()
 {
         which $1 1>/dev/null 2>/dev/null || {
-                echo "*************************************************"
-                echo "*************************************************"
-                echo "* WARNING: Package '$1' is required"
-                echo "*          to build Xen documentation"
-                echo "*************************************************"
-                echo "*************************************************"
+                echo "================================================="
+                echo "================================================="
+                echo "= WARNING: Package '$1' is required"
+                echo "=          to build Xen documentation"
+                echo "================================================="
+                echo "================================================="
         }
         which $1 1>/dev/null 2>/dev/null
 }
diff -r 4fd4dcf2f891 -r 79f259a26a11 docs/misc/dump-core-format.txt
--- a/docs/misc/dump-core-format.txt    Wed Jan 28 12:22:58 2009 +0900
+++ b/docs/misc/dump-core-format.txt    Wed Jan 28 13:06:45 2009 +0900
@@ -30,8 +30,13 @@ The elf header members are set as follow
         e_ident[EI_OSABI] = ELFOSABI_SYSV = 0
         e_type = ET_CORE = 4
 ELFCLASS64 is always used independent of architecture.
-e_ident[EI_DATA] and e_flags are set according to the dumping system's
-architecture. Other members are set as usual.
+e_ident[EI_DATA] is set as follows
+  For x86 PV domain case, it is set according to the guest configuration
+  (i.e. if guest is 32bit it is set to EM_386 even when the dom0 is 64 bit.)
+  For other domain case (x86 HVM domain case and ia64 domain case),
+  it is set according to the dumping system's architecture.
+e_flags is set according to the dumping system's architecture.
+Other members are set as usual.
 
 Sections
 --------
@@ -241,3 +246,7 @@ Currently only (major, minor) = (0, 1) i
   The format version isn't bumped because analysis tools can distinguish it.
 - .xen_ia64_mapped_regs section was made only for ia64 PV domain.
   In case of IA64 HVM domain, this section doesn't exist.
+- elf header e_ident[EI_DATA]
+  On x86 PV domain case, it is set according to the guest configuration.
+  I.e. 32-on-64 case, the file will be set EM_386 instead of EM_X86_64.
+  This is the same as 32-on-32 case, so there is no impact on analysis tools.
diff -r 4fd4dcf2f891 -r 79f259a26a11 docs/misc/vtd.txt
--- a/docs/misc/vtd.txt Wed Jan 28 12:22:58 2009 +0900
+++ b/docs/misc/vtd.txt Wed Jan 28 13:06:45 2009 +0900
@@ -38,6 +38,30 @@ Add "msi=1" option in kernel line of hos
 Add "msi=1" option in kernel line of host grub.
 
 
+MSI-INTx translation for passthrough devices in HVM
+---------------------------------------------------
+
+If the assigned device uses a physical IRQ that is shared by more than
+one device among multiple domains, there may be significant impact on
+device performance. Unfortunately, this is quite a common case if the
+IO-APIC (INTx) IRQ is used. MSI can avoid this issue, but was only
+available if the guest enables it.
+
+With MSI-INTx translation turned on, Xen enables device MSI if it's
+available, regardless of whether the guest uses INTx or MSI. If the
+guest uses INTx IRQ, Xen will inject a translated INTx IRQ to guest's
+virtual ioapic whenever an MSI message is received. This reduces the
+interrupt sharing of the system. If the guest OS enables MSI or MSI-X,
+the translation is automatically turned off.
+
+To enable or disable MSI-INTx translation globally, add "pci_msitranslate"
+in the config file:
+       pci_msitranslate = 1         (default is 1)
+
+To override for a specific device:
+       pci = [ '01:00.0,msitranslate=0', '03:00.0' ]
+
+
 Caveat on Conventional PCI Device Passthrough
 ---------------------------------------------
 
@@ -79,6 +103,11 @@ 2 virtual PCI slots (6~7) are reserved i
  3. Attach a PCI device to the guest by the physical BDF and desired virtual 
slot(optional). Following command would insert the physical device into guest's 
virtual slot 7
 
        [root@vt-vtd ~]# xm pci-attach HVMDomainVtd 0:2:0.0 7
+
+    To specify options for the device, use -o or --options=. Following command 
would disable MSI-INTx translation for the device
+
+       [root@vt-vtd ~]# xm pci-attach -o msitranslate=0 0:2:0.0 7
+
 
 VTd hotplug usage model:
 ------------------------
diff -r 4fd4dcf2f891 -r 79f259a26a11 stubdom/Makefile
--- a/stubdom/Makefile  Wed Jan 28 12:22:58 2009 +0900
+++ b/stubdom/Makefile  Wed Jan 28 13:06:45 2009 +0900
@@ -194,6 +194,7 @@ mk-headers-$(XEN_TARGET_ARCH): ioemu/lin
           ln -sf $(addprefix ../../,$(wildcard 
$(XEN_ROOT)/xen/include/public/*.h)) include/xen && \
           ln -sf $(addprefix ../../$(XEN_ROOT)/xen/include/public/,arch-ia64 
arch-x86 hvm io xsm) include/xen && \
           ( [ -h include/xen/sys ] || ln -sf 
../../$(XEN_ROOT)/tools/include/xen-sys/MiniOS include/xen/sys ) && \
+          ( [ -h include/xen/libelf ] || ln -sf 
../../$(XEN_ROOT)/tools/include/xen/libelf include/xen/libelf ) && \
          mkdir -p include/xen-foreign && \
          ln -sf $(addprefix ../../,$(wildcard 
$(XEN_ROOT)/tools/include/xen-foreign/*)) include/xen-foreign/ && \
          $(MAKE) -C include/xen-foreign/ && \
diff -r 4fd4dcf2f891 -r 79f259a26a11 stubdom/README
--- a/stubdom/README    Wed Jan 28 12:22:58 2009 +0900
+++ b/stubdom/README    Wed Jan 28 13:06:45 2009 +0900
@@ -55,6 +55,11 @@ sdl = 0
   - In hvmconfig-dm, set an sdl vfb:
 
 vfb = [ 'type=sdl' ]
+
+    by default qemu will use sdl together with opengl for rendering, if
+    you do not want qemu to use opengl then also pass opengl=0:
+
+vfb = [ 'type=sdl, opengl=0' ]
 
 * Using a VNC server in the stub domain
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/Rules.mk
--- a/tools/Rules.mk    Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/Rules.mk    Wed Jan 28 13:06:45 2009 +0900
@@ -29,6 +29,10 @@ X11_LDPATH = -L/usr/X11R6/$(LIBLEAFDIR)
 
 CFLAGS += -D__XEN_TOOLS__
 
+# Get gcc to generate the dependencies for us.
+CFLAGS += -MMD -MF .$(@F).d
+DEPS = .*.d
+
 # Enable implicit LFS support *and* explicit LFS names.
 CFLAGS  += $(shell getconf LFS_CFLAGS)
 CFLAGS  += -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
@@ -59,4 +63,3 @@ subdirs-all subdirs-clean subdirs-instal
 
 subdir-all-% subdir-clean-% subdir-install-%: .phony
        $(MAKE) -C $* $(patsubst subdir-%-$*,%,$@)
-
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/blktap/drivers/Makefile
--- a/tools/blktap/drivers/Makefile     Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/blktap/drivers/Makefile     Wed Jan 28 13:06:45 2009 +0900
@@ -13,16 +13,12 @@ CFLAGS   += -I $(LIBAIO_DIR)
 CFLAGS   += -I $(LIBAIO_DIR)
 CFLAGS   += -D_GNU_SOURCE
 
-# Get gcc to generate the dependencies for us.
-CFLAGS   += -Wp,-MD,.$(@F).d
-DEPS      = .*.d
-
 ifeq ($(shell . ./check_gcrypt),"yes")
 CFLAGS += -DUSE_GCRYPT
 CRYPT_LIB := -lgcrypt
 else
 CRYPT_LIB := -lcrypto
-$(warning *** libgcrypt not installed: falling back to libcrypto ***)
+$(warning === libgcrypt not installed: falling back to libcrypto ===)
 endif
 
 LDFLAGS_blktapctrl := $(LDFLAGS_libxenctrl) $(LDFLAGS_libxenstore) -L../lib 
-lblktap
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/console/Makefile
--- a/tools/console/Makefile    Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/console/Makefile    Wed Jan 28 13:06:45 2009 +0900
@@ -16,7 +16,7 @@ all: $(BIN)
 
 .PHONY: clean
 clean:
-       $(RM) *.a *.so *.o *.rpm $(BIN)
+       $(RM) *.a *.so *.o *.rpm $(BIN) $(DEPS)
        $(RM) client/*.o daemon/*.o
 
 xenconsoled: $(patsubst %.c,%.o,$(wildcard daemon/*.c))
@@ -33,3 +33,5 @@ install: $(BIN)
        $(INSTALL_PROG) xenconsoled $(DESTDIR)/$(SBINDIR)
        $(INSTALL_DIR) $(DESTDIR)$(PRIVATE_BINDIR)
        $(INSTALL_PROG) xenconsole $(DESTDIR)$(PRIVATE_BINDIR)
+
+-include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/examples/xmexample.hvm
--- a/tools/examples/xmexample.hvm      Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/examples/xmexample.hvm      Wed Jan 28 13:06:45 2009 +0900
@@ -288,6 +288,39 @@ serial='pty'
 #  'x' -> we don't care (do not check)
 #  's' -> the bit must be the same as on the host that started this VM
 
+#-----------------------------------------------------------------------------
+#   Configure passthrough PCI{,-X,e} devices:
+#
+#   pci=[ '[SSSS:]BB:DD.F[,option1[,option2[...]]]', ... ]
+#
+#   [SSSS]:BB:DD.F  "bus segment:bus:device.function"(1) of the device to
+#                   be assigned, bus segment is optional. All fields are
+#                   in hexadecimal and no field should be longer than that
+#                   as shown in the pattern. Successful assignment may need
+#                   certain hardware support and additional configurations
+#                   (e.g. VT-d, see docs/misc/vtd.txt for more details).
+#
+#       (1) bus segment is sometimes also referred to as the PCI "domain",
+#           not to be confused with Xen domain.
+#
+#
+#   optionN         per-device options in "key=val" format. Current
+#                   available options are:
+#                   - msitranslate=0|1
+#                      per-device overriden of pci_msitranslate, see below
+#
+#pci=[ '07:00.0', '07:00.1' ]
+
+#   MSI-INTx translation for MSI capable devices:
+#
+#   If it's set, Xen will enable MSI for the device that supports it even
+# if the guest don't use MSI. In the case, an IO-APIC type interrupt will
+# be injected to the guest every time a corresponding MSI message is
+# received.
+#   If the guest enables MSI or MSI-X, the translation is automatically
+# turned off.
+# 
+#pci_msitranslate=1
 
 #-----------------------------------------------------------------------------
 #   Configure PVSCSI devices:
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/Makefile
--- a/tools/firmware/Makefile   Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/Makefile   Wed Jan 28 13:06:45 2009 +0900
@@ -15,10 +15,10 @@ SUBDIRS += hvmloader
 .PHONY: all
 all:
        @set -e; if [ $$((`( bcc -v 2>&1 | grep version || echo 0.0.0 ) | cut 
-d' ' -f 3 | awk -F. '{ printf "0x%02x%02x%02x", $$1, $$2, $$3}'`)) -lt 
$$((0x00100e)) ] ; then \
-       echo "***********************************************************"; \
+       echo "==========================================================="; \
        echo "Require dev86 package version >= 0.16.14 to build firmware!"; \
        echo "(visit http://www.cix.co.uk/~mayday for more information)"; \
-       echo "***********************************************************"; \
+       echo "==========================================================="; \
        else \
        $(MAKE) subdirs-$@; \
        fi
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/Rules.mk
--- a/tools/firmware/Rules.mk   Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/Rules.mk   Wed Jan 28 13:06:45 2009 +0900
@@ -2,7 +2,7 @@ override XEN_TARGET_ARCH = x86_32
 override XEN_TARGET_ARCH = x86_32
 
 # User-supplied CFLAGS are not useful here.
-CFLAGS :=
+CFLAGS =
 
 include $(XEN_ROOT)/tools/Rules.mk
 
@@ -13,9 +13,9 @@ CFLAGS += -Werror
 CFLAGS += -Werror
 
 # Disable PIE/SSP if GCC supports them. They can break us.
-CFLAGS += $(call cc-option,$(CC),-nopie,)
-CFLAGS += $(call cc-option,$(CC),-fno-stack-protector,)
-CFLAGS += $(call cc-option,$(CC),-fno-stack-protector-all,)
+$(call cc-option-add,CFLAGS,CC,-nopie)
+$(call cc-option-add,CFLAGS,CC,-fno-stack-protector)
+$(call cc-option-add,CFLAGS,CC,-fno-stack-protector-all)
 
 # Extra CFLAGS suitable for an embedded type of environment.
 CFLAGS += -fno-builtin -msoft-float
diff -r 4fd4dcf2f891 -r 79f259a26a11 
tools/firmware/hvmloader/32bitbios_support.c
--- a/tools/firmware/hvmloader/32bitbios_support.c      Wed Jan 28 12:22:58 
2009 +0900
+++ b/tools/firmware/hvmloader/32bitbios_support.c      Wed Jan 28 13:06:45 
2009 +0900
@@ -32,15 +32,13 @@
 
 #include "../rombios/32bit/32bitbios_flat.h"
 
-static void relocate_32bitbios(char *elfarray, uint32_t elfarraysize)
+static uint32_t relocate_32bitbios(char *elfarray, uint32_t elfarraysize)
 {
     Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfarray;
     Elf32_Shdr *shdr = (Elf32_Shdr *)&elfarray[ehdr->e_shoff];
-    char *secstrings = &elfarray[shdr[ehdr->e_shstrndx].sh_offset];
-    char *jump_table;
     uint32_t reloc_off, reloc_size;
     char *highbiosarea;
-    int i, jump_sec_idx = 0;
+    int i;
 
     /*
      * Step 1. General elf cleanup, and compute total relocation size.
@@ -50,13 +48,6 @@ static void relocate_32bitbios(char *elf
     {
         /* By default all section data points into elf image data array. */
         shdr[i].sh_addr = (Elf32_Addr)&elfarray[shdr[i].sh_offset];
-
-        if ( !strcmp(".biosjumptable", secstrings + shdr[i].sh_name) )
-        {
-            /* We do not relocate the BIOS jump table to high memory. */
-            shdr[i].sh_flags &= ~SHF_ALLOC;
-            jump_sec_idx = i;
-        }
 
         /* Fix up a corner case of address alignment. */
         if ( shdr[i].sh_addralign == 0 )
@@ -76,7 +67,7 @@ static void relocate_32bitbios(char *elf
      */
     reloc_size = reloc_off;
     printf("%d bytes of ROMBIOS high-memory extensions:\n", reloc_size);
-    highbiosarea = (char *)(long)e820_malloc(reloc_size, 0);
+    highbiosarea = mem_alloc(reloc_size, 0);
     BUG_ON(highbiosarea == NULL);
     printf("  Relocating to 0x%x-0x%x ... ",
            (uint32_t)&highbiosarea[0],
@@ -148,21 +139,12 @@ static void relocate_32bitbios(char *elf
         }
     }
 
-    /* Step 5. Find the ROMBIOS jump-table stub and copy in the real table. */
-    for ( jump_table = (char *)ROMBIOS_BEGIN;
-          jump_table != (char *)ROMBIOS_END;
-          jump_table++ )
-        if ( !strncmp(jump_table, "___JMPT", 7) )
-            break;
-    BUG_ON(jump_table == NULL);
-    BUG_ON(jump_sec_idx == 0);
-    memcpy(jump_table, (char *)shdr[jump_sec_idx].sh_addr,
-           shdr[jump_sec_idx].sh_size);
+    printf("done\n");
 
-    printf("done\n");
+    return (uint32_t)highbiosarea;
 }
 
-void highbios_setup(void)
+uint32_t highbios_setup(void)
 {
-    relocate_32bitbios((char *)highbios_array, sizeof(highbios_array));
+    return relocate_32bitbios((char *)highbios_array, sizeof(highbios_array));
 }
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/Makefile
--- a/tools/firmware/hvmloader/Makefile Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/hvmloader/Makefile Wed Jan 28 13:06:45 2009 +0900
@@ -58,4 +58,6 @@ roms.h: ../rombios/BIOS-bochs-latest ../
 .PHONY: clean
 clean: subdirs-clean
        rm -f roms.h acpi.h
-       rm -f hvmloader hvmloader.tmp *.o
+       rm -f hvmloader hvmloader.tmp *.o $(DEPS)
+
+-include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/acpi/Makefile
--- a/tools/firmware/hvmloader/acpi/Makefile    Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/hvmloader/acpi/Makefile    Wed Jan 28 13:06:45 2009 +0900
@@ -22,10 +22,6 @@ H_SRC = $(wildcard *.h)
 H_SRC = $(wildcard *.h)
 OBJS  = $(patsubst %.c,%.o,$(C_SRC))
 
-IASL_VER = acpica-unix-20080729
-#IASL_URL = http://acpica.org/download/$(IASL_VER).tar.gz
-IASL_URL = $(XEN_EXTFILES_URL)/$(IASL_VER).tar.gz
-
 CFLAGS += -I. -I.. $(CFLAGS_include)
 
 vpath iasl $(PATH)
@@ -46,15 +42,11 @@ dsdt.c: dsdt.asl
 
 iasl:
        @echo
-       @echo "ACPI ASL compiler(iasl) is needed"
-       @echo "Download Intel ACPI CA"
-       @echo "If wget failed, please download and compile manually from"
+       @echo "ACPI ASL compiler (iasl) is needed"
+       @echo "Download and install Intel ACPI CA from"
        @echo "http://acpica.org/downloads/";
        @echo 
-       wget $(IASL_URL)
-       tar xzf $(IASL_VER).tar.gz
-       make -C $(IASL_VER)/compiler
-       $(INSTALL_PROG) $(IASL_VER)/compiler/iasl $(DESTDIR)$(BINDIR)/iasl
+       @exit 1
 
 acpi.a: $(OBJS)
        $(AR) rc $@ $(OBJS)
@@ -63,6 +55,8 @@ acpi.a: $(OBJS)
        $(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $<
 
 clean:
-       rm -rf *.a *.o $(IASL_VER) $(IASL_VER).tar.gz
+       rm -rf *.a *.o $(IASL_VER) $(IASL_VER).tar.gz $(DEPS)
 
 install: all
+
+-include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/acpi/build.c
--- a/tools/firmware/hvmloader/acpi/build.c     Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/hvmloader/acpi/build.c     Wed Jan 28 13:06:45 2009 +0900
@@ -48,48 +48,9 @@ static void set_checksum(
     p[checksum_offset] = -sum;
 }
 
-static int uart_exists(uint16_t uart_base)
-{
-    uint16_t ier = uart_base + 1;
-    uint8_t a, b, c;
-
-    a = inb(ier);
-    outb(ier, 0);
-    b = inb(ier);
-    outb(ier, 0xf);
-    c = inb(ier);
-    outb(ier, a);
-
-    return ((b == 0) && (c == 0xf));
-}
-
-static int hpet_exists(unsigned long hpet_base)
-{
-    uint32_t hpet_id = *(uint32_t *)hpet_base;
-    return ((hpet_id >> 16) == 0x8086);
-}
-
 static uint8_t battery_port_exists(void)
 {
     return (inb(0x88) == 0x1F);
-}
-
-static int construct_bios_info_table(uint8_t *buf)
-{
-    struct bios_info *bios_info = (struct bios_info *)buf;
-
-    memset(bios_info, 0, sizeof(*bios_info));
-
-    bios_info->com1_present = uart_exists(0x3f8);
-    bios_info->com2_present = uart_exists(0x2f8);
-
-    bios_info->hpet_present = hpet_exists(ACPI_HPET_ADDRESS);
-
-    bios_info->pci_min = PCI_MEMBASE;
-    bios_info->pci_len = PCI_MEMSIZE;
-    bios_info->xen_pfiob = 0xdead;
-
-    return align16(sizeof(*bios_info));
 }
 
 static int construct_madt(struct acpi_20_madt *madt)
@@ -150,7 +111,7 @@ static int construct_madt(struct acpi_20
     offset += sizeof(*io_apic);
 
     lapic = (struct acpi_20_madt_lapic *)(io_apic + 1);
-    for ( i = 0; i < get_vcpu_nr(); i++ )
+    for ( i = 0; i < hvm_info->nr_vcpus; i++ )
     {
         memset(lapic, 0, sizeof(*lapic));
         lapic->type    = ACPI_PROCESSOR_LOCAL_APIC;
@@ -199,9 +160,10 @@ static int construct_secondary_tables(ui
     struct acpi_20_tcpa *tcpa;
     static const uint16_t tis_signature[] = {0x0001, 0x0001, 0x0001};
     uint16_t *tis_hdr;
+    void *lasa;
 
     /* MADT. */
-    if ( (get_vcpu_nr() > 1) || get_apic_mode() )
+    if ( (hvm_info->nr_vcpus > 1) || hvm_info->apic_mode )
     {
         madt = (struct acpi_20_madt *)&buf[offset];
         offset += construct_madt(madt);
@@ -246,11 +208,11 @@ static int construct_secondary_tables(ui
         tcpa->header.oem_revision = ACPI_OEM_REVISION;
         tcpa->header.creator_id   = ACPI_CREATOR_ID;
         tcpa->header.creator_revision = ACPI_CREATOR_REVISION;
-        tcpa->lasa = e820_malloc(ACPI_2_0_TCPA_LAML_SIZE, 0);
-        if ( tcpa->lasa )
-        {
+        if ( (lasa = mem_alloc(ACPI_2_0_TCPA_LAML_SIZE, 0)) != NULL )
+        {
+            tcpa->lasa = virt_to_phys(lasa);
             tcpa->laml = ACPI_2_0_TCPA_LAML_SIZE;
-            memset((char *)(unsigned long)tcpa->lasa, 0, tcpa->laml);
+            memset(lasa, 0, tcpa->laml);
             set_checksum(tcpa,
                          offsetof(struct acpi_header, checksum),
                          tcpa->header.length);
@@ -348,9 +310,7 @@ static void __acpi_build_tables(uint8_t 
     buf = (uint8_t *)ACPI_PHYSICAL_ADDRESS;
     offset = 0;
 
-    offset += construct_bios_info_table(&buf[offset]);
     rsdp = (struct acpi_20_rsdp *)&buf[offset];
-
     memcpy(rsdp, &Rsdp, sizeof(struct acpi_20_rsdp));
     offset += align16(sizeof(struct acpi_20_rsdp));
     rsdp->rsdt_address = (unsigned long)rsdt;
@@ -376,7 +336,7 @@ void acpi_build_tables(void)
     memset(buf, 0, high_sz);
 
     /* Allocate data area and set up ACPI tables there. */
-    buf = (uint8_t *)e820_malloc(high_sz, 0);
+    buf = mem_alloc(high_sz, 0);
     __acpi_build_tables(buf, &low_sz, &high_sz);
 
     printf(" - Lo data: %08lx-%08lx\n"
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/acpi/dsdt.asl
--- a/tools/firmware/hvmloader/acpi/dsdt.asl    Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/hvmloader/acpi/dsdt.asl    Wed Jan 28 13:06:45 2009 +0900
@@ -86,7 +86,7 @@ DefinitionBlock ("DSDT.aml", "DSDT", 2, 
 
     Scope (\_SB)
     {
-       /* ACPI_PHYSICAL_ADDRESS == 0xEA000 */
+       /* BIOS_INFO_PHYSICAL_ADDRESS == 0xEA000 */
        OperationRegion(BIOS, SystemMemory, 0xEA000, 16)
        Field(BIOS, ByteAcc, NoLock, Preserve) {
            UAR1, 1,
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/acpi/dsdt.c
--- a/tools/firmware/hvmloader/acpi/dsdt.c      Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/hvmloader/acpi/dsdt.c      Wed Jan 28 13:06:45 2009 +0900
@@ -1,11 +1,11 @@
 /*
  * 
  * Intel ACPI Component Architecture
- * ASL Optimizing Compiler version 20080729 [Dec 25 2008]
+ * ASL Optimizing Compiler version 20081204 [Jan 23 2009]
  * Copyright (C) 2000 - 2008 Intel Corporation
  * Supports ACPI Specification Revision 3.0a
  * 
- * Compilation of "dsdt.asl" - Thu Dec 25 17:00:32 2008
+ * Compilation of "dsdt.asl" - Fri Jan 23 14:30:29 2009
  * 
  * C source code output
  *
@@ -13,10 +13,10 @@ unsigned char AmlCode[] =
 unsigned char AmlCode[] =
 {
     0x44,0x53,0x44,0x54,0x5E,0x11,0x00,0x00,  /* 00000000    "DSDT^..." */
-    0x02,0xD1,0x58,0x65,0x6E,0x00,0x00,0x00,  /* 00000008    "..Xen..." */
+    0x02,0xEB,0x58,0x65,0x6E,0x00,0x00,0x00,  /* 00000008    "..Xen..." */
     0x48,0x56,0x4D,0x00,0x00,0x00,0x00,0x00,  /* 00000010    "HVM....." */
     0x00,0x00,0x00,0x00,0x49,0x4E,0x54,0x4C,  /* 00000018    "....INTL" */
-    0x29,0x07,0x08,0x20,0x08,0x50,0x4D,0x42,  /* 00000020    ").. .PMB" */
+    0x04,0x12,0x08,0x20,0x08,0x50,0x4D,0x42,  /* 00000020    "... .PMB" */
     0x53,0x0B,0x00,0x0C,0x08,0x50,0x4D,0x4C,  /* 00000028    "S....PML" */
     0x4E,0x0A,0x08,0x08,0x49,0x4F,0x42,0x31,  /* 00000030    "N...IOB1" */
     0x00,0x08,0x49,0x4F,0x4C,0x31,0x00,0x08,  /* 00000038    "..IOL1.." */
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/cacheattr.c
--- a/tools/firmware/hvmloader/cacheattr.c      Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/hvmloader/cacheattr.c      Wed Jan 28 13:06:45 2009 +0900
@@ -88,11 +88,25 @@ void cacheattr_init(void)
     nr_var_ranges = (uint8_t)mtrr_cap;
     if ( nr_var_ranges != 0 )
     {
-        /* A single UC range covering PCI space. */
-        wrmsr(MSR_MTRRphysBase(0), PCI_MEMBASE);
-        wrmsr(MSR_MTRRphysMask(0),
-              ((uint64_t)(int32_t)PCI_MEMBASE & addr_mask) | (1u << 11));
-        printf("var MTRRs ... ");
+        unsigned long base = pci_mem_start, size;
+        int i;
+
+        for ( i = 0; (base != pci_mem_end) && (i < nr_var_ranges); i++ )
+        {
+            size = PAGE_SIZE;
+            while ( !(base & size) )
+                size <<= 1;
+            while ( ((base + size) < base) || ((base + size) > pci_mem_end) )
+                size >>= 1;
+
+            wrmsr(MSR_MTRRphysBase(i), base);
+            wrmsr(MSR_MTRRphysMask(i),
+                  (~(uint64_t)(size-1) & addr_mask) | (1u << 11));
+
+            base += size;
+        }
+
+        printf("var MTRRs [%d/%d] ... ", i, nr_var_ranges);
     }
 
     wrmsr(MSR_MTRRdefType, mtrr_def);
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/config.h
--- a/tools/firmware/hvmloader/config.h Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/hvmloader/config.h Wed Jan 28 13:06:45 2009 +0900
@@ -1,5 +1,8 @@
 #ifndef __HVMLOADER_CONFIG_H__
 #define __HVMLOADER_CONFIG_H__
+
+#define PAGE_SHIFT 12
+#define PAGE_SIZE  (1ul << PAGE_SHIFT)
 
 #define IOAPIC_BASE_ADDRESS 0xfec00000
 #define IOAPIC_ID           0x01
@@ -11,8 +14,14 @@
 #define PCI_ISA_DEVFN       0x08    /* dev 1, fn 0 */
 #define PCI_ISA_IRQ_MASK    0x0c20U /* ISA IRQs 5,10,11 are PCI connected */
 
-#define PCI_MEMBASE         0xf0000000
-#define PCI_MEMSIZE         0x0c000000
+/* MMIO hole: Hardcoded defaults, which can be dynamically expanded. */
+#define PCI_MEM_START       0xf0000000
+#define PCI_MEM_END         0xfc000000
+extern unsigned long pci_mem_start, pci_mem_end;
+
+/* We reserve 16MB for special BIOS mappings, etc. */
+#define RESERVED_MEMBASE    0xfc000000
+#define RESERVED_MEMSIZE    0x01000000
 
 #define ROMBIOS_SEG            0xF000
 #define ROMBIOS_BEGIN          0x000F0000
@@ -21,16 +30,17 @@
 #define ROMBIOS_END            (ROMBIOS_BEGIN + ROMBIOS_SIZE)
 
 /* Memory map. */
+#define SCRATCH_PHYSICAL_ADDRESS      0x00010000
 #define HYPERCALL_PHYSICAL_ADDRESS    0x00080000
 #define VGABIOS_PHYSICAL_ADDRESS      0x000C0000
 #define OPTIONROM_PHYSICAL_ADDRESS    0x000C8000
 #define OPTIONROM_PHYSICAL_END        0x000EA000
-#define ACPI_PHYSICAL_ADDRESS         0x000EA000
+#define BIOS_INFO_PHYSICAL_ADDRESS    0x000EA000
+#define ACPI_PHYSICAL_ADDRESS         0x000EA020
 #define E820_PHYSICAL_ADDRESS         0x000EA100
 #define SMBIOS_PHYSICAL_ADDRESS       0x000EB000
 #define SMBIOS_MAXIMUM_SIZE           0x00005000
 #define ROMBIOS_PHYSICAL_ADDRESS      0x000F0000
-#define SCRATCH_PHYSICAL_ADDRESS      0x00010000
 
 /* Offsets from E820_PHYSICAL_ADDRESS. */
 #define E820_NR_OFFSET                0x0
@@ -39,12 +49,16 @@
 /* Xen Platform Device */
 #define PFFLAG_ROM_LOCK 1 /* Sets whether ROM memory area is RW or RO */
 
+/* Located at BIOS_INFO_PHYSICAL_ADDRESS. */
 struct bios_info {
-    uint8_t  com1_present:1;
-    uint8_t  com2_present:1;
-    uint8_t  hpet_present:1;
-    uint32_t pci_min, pci_len;
-    uint16_t xen_pfiob;
+    uint8_t  com1_present:1;    /* 0[0] - System has COM1? */
+    uint8_t  com2_present:1;    /* 0[1] - System has COM2? */
+    uint8_t  hpet_present:1;    /* 0[2] - System has HPET? */
+    uint32_t pci_min, pci_len;  /* 4, 8 - PCI I/O hole boundaries */
+    uint32_t bios32_entry;      /* 12   - Entry point for 32-bit BIOS */
+    uint16_t xen_pfiob;         /* 16   - Xen platform device I/O ports */
 };
+#define BIOSINFO_OFF_bios32_entry 12
+#define BIOSINFO_OFF_xen_pfiob    16
 
 #endif /* __HVMLOADER_CONFIG_H__ */
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/hvmloader.c
--- a/tools/firmware/hvmloader/hvmloader.c      Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/hvmloader/hvmloader.c      Wed Jan 28 13:06:45 2009 +0900
@@ -31,6 +31,7 @@
 #include "option_rom.h"
 #include <xen/version.h>
 #include <xen/hvm/params.h>
+#include <xen/memory.h>
 
 asm (
     "    .text                       \n"
@@ -99,6 +100,9 @@ asm (
     "    .text                       \n"
     );
 
+unsigned long pci_mem_start = PCI_MEM_START;
+unsigned long pci_mem_end = PCI_MEM_END;
+
 static enum { VGA_none, VGA_std, VGA_cirrus } virtual_vga = VGA_none;
 
 static void init_hypercalls(void)
@@ -148,16 +152,14 @@ static void apic_setup(void)
 
 static void pci_setup(void)
 {
-    uint32_t base, devfn, bar_reg, bar_data, bar_sz, cmd;
+    uint32_t base, devfn, bar_reg, bar_data, bar_sz, cmd, mmio_total = 0;
     uint16_t class, vendor_id, device_id;
     unsigned int bar, pin, link, isa_irq;
 
     /* Resources assignable to PCI devices via BARs. */
     struct resource {
         uint32_t base, max;
-    } *resource;
-    struct resource mem_resource = { PCI_MEMBASE, PCI_MEMBASE + PCI_MEMSIZE };
-    struct resource io_resource  = { 0xc000, 0x10000 };
+    } *resource, mem_resource, io_resource;
 
     /* Create a list of device BARs in descending order of size. */
     struct bars {
@@ -248,6 +250,10 @@ static void pci_setup(void)
             bars[i].bar_reg = bar_reg;
             bars[i].bar_sz  = bar_sz;
 
+            if ( (bar_data & PCI_BASE_ADDRESS_SPACE) ==
+                 PCI_BASE_ADDRESS_SPACE_MEMORY )
+                mmio_total += bar_sz;
+
             nr_bars++;
 
             /* Skip the upper-half of the address for a 64-bit BAR. */
@@ -276,6 +282,28 @@ static void pci_setup(void)
         pci_writew(devfn, PCI_COMMAND, cmd);
     }
 
+    while ( (mmio_total > (pci_mem_end - pci_mem_start)) &&
+            ((pci_mem_start << 1) != 0) )
+        pci_mem_start <<= 1;
+
+    while ( (pci_mem_start >> PAGE_SHIFT) < hvm_info->low_mem_pgend )
+    {
+        struct xen_add_to_physmap xatp;
+        if ( hvm_info->high_mem_pgend == 0 )
+            hvm_info->high_mem_pgend = 1ull << (32 - PAGE_SHIFT);
+        xatp.domid = DOMID_SELF;
+        xatp.space = XENMAPSPACE_gmfn;
+        xatp.idx   = --hvm_info->low_mem_pgend;
+        xatp.gpfn  = hvm_info->high_mem_pgend++;
+        if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 )
+            BUG();
+    }
+
+    mem_resource.base = pci_mem_start;
+    mem_resource.max = pci_mem_end;
+    io_resource.base = 0xc000;
+    io_resource.max = 0x10000;
+
     /* Assign iomem and ioport resources in descending order of size. */
     for ( i = 0; i < nr_bars; i++ )
     {
@@ -488,22 +516,13 @@ static int pci_load_option_roms(uint32_t
 /* Replace possibly erroneous memory-size CMOS fields with correct values. */
 static void cmos_write_memory_size(void)
 {
-    struct e820entry *map = E820;
-    int i, nr = *E820_NR;
-    uint32_t base_mem = 640, ext_mem = 0, alt_mem = 0;
-
-    for ( i = 0; i < nr; i++ )
-        if ( (map[i].addr >= 0x100000) && (map[i].type == E820_RAM) )
-            break;
-
-    if ( i != nr )
-    {
-        alt_mem = ext_mem = map[i].addr + map[i].size;
-        ext_mem = (ext_mem > 0x0100000) ? (ext_mem - 0x0100000) >> 10 : 0;
-        if ( ext_mem > 0xffff )
-            ext_mem = 0xffff;
-        alt_mem = (alt_mem > 0x1000000) ? (alt_mem - 0x1000000) >> 16 : 0;
-    }
+    uint32_t base_mem = 640, ext_mem, alt_mem;
+
+    alt_mem = ext_mem = hvm_info->low_mem_pgend << PAGE_SHIFT;
+    ext_mem = (ext_mem > 0x0100000) ? (ext_mem - 0x0100000) >> 10 : 0;
+    if ( ext_mem > 0xffff )
+        ext_mem = 0xffff;
+    alt_mem = (alt_mem > 0x1000000) ? (alt_mem - 0x1000000) >> 16 : 0;
 
     /* All BIOSes: conventional memory (CMOS *always* reports 640kB). */
     cmos_outb(0x15, (uint8_t)(base_mem >> 0));
@@ -520,25 +539,23 @@ static void cmos_write_memory_size(void)
     cmos_outb(0x35, (uint8_t)( alt_mem >> 8));
 }
 
-static uint16_t init_xen_platform_io_base(void)
-{
-    struct bios_info *bios_info = (struct bios_info *)ACPI_PHYSICAL_ADDRESS;
+static uint16_t xen_platform_io_base(void)
+{
     uint32_t devfn, bar_data;
     uint16_t vendor_id, device_id;
 
-    bios_info->xen_pfiob = 0;
-
     for ( devfn = 0; devfn < 128; devfn++ )
     {
         vendor_id = pci_readw(devfn, PCI_VENDOR_ID);
         device_id = pci_readw(devfn, PCI_DEVICE_ID);
-        if ( (vendor_id != 0x5853) || (device_id != 0x0001) )
-            continue;
-        bar_data = pci_readl(devfn, PCI_BASE_ADDRESS_0);
-        bios_info->xen_pfiob = bar_data & PCI_BASE_ADDRESS_IO_MASK;
-    }
-
-    return bios_info->xen_pfiob;
+        if ( (vendor_id == 0x5853) && (device_id == 0x0001) )
+        {
+            bar_data = pci_readl(devfn, PCI_BASE_ADDRESS_0);
+            return bar_data & PCI_BASE_ADDRESS_IO_MASK;
+        }
+    }
+
+    return 0;
 }
 
 /*
@@ -548,27 +565,80 @@ static uint16_t init_xen_platform_io_bas
  */
 static void init_vm86_tss(void)
 {
-    uint32_t tss;
+    void *tss;
     struct xen_hvm_param p;
 
-    tss = e820_malloc(128, 128);
-    memset((char *)tss, 0, 128);
+    tss = mem_alloc(128, 128);
+    memset(tss, 0, 128);
     p.domid = DOMID_SELF;
     p.index = HVM_PARAM_VM86_TSS;
-    p.value = tss;
+    p.value = virt_to_phys(tss);
     hypercall_hvm_op(HVMOP_set_param, &p);
-    printf("vm86 TSS at %08x\n", tss);
-}
-
-/*
- * Copy the E820 table provided by the HVM domain builder into the correct
- * place in the memory map we share with the rombios.
- */
-static void copy_e820_table(void)
-{
-    uint8_t nr = *(uint8_t *)(HVM_E820_PAGE + HVM_E820_NR_OFFSET);
-    BUG_ON(nr > 16);
-    memcpy(E820, (char *)HVM_E820_PAGE + HVM_E820_OFFSET, nr * sizeof(*E820));
+    printf("vm86 TSS at %08lx\n", virt_to_phys(tss));
+}
+
+/* Create an E820 table based on memory parameters provided in hvm_info. */
+static void build_e820_table(void)
+{
+    struct e820entry *e820 = E820;
+    unsigned int nr = 0;
+
+    /* 0x0-0x9FC00: Ordinary RAM. */
+    e820[nr].addr = 0x0;
+    e820[nr].size = 0x9FC00;
+    e820[nr].type = E820_RAM;
+    nr++;
+
+    /* 0x9FC00-0xA0000: Extended BIOS Data Area (EBDA). */
+    e820[nr].addr = 0x9FC00;
+    e820[nr].size = 0x400;
+    e820[nr].type = E820_RESERVED;
+    nr++;
+
+    /*
+     * Following regions are standard regions of the PC memory map.
+     * They are not covered by e820 regions. OSes will not use as RAM.
+     * 0xA0000-0xC0000: VGA memory-mapped I/O. Not covered by E820.
+     * 0xC0000-0xE0000: 16-bit devices, expansion ROMs (inc. vgabios).
+     * TODO: free pages which turn out to be unused.
+     */
+
+    /*
+     * 0xE0000-0x0F0000: PC-specific area. We place various tables here.
+     * 0xF0000-0x100000: System BIOS.
+     * TODO: free pages which turn out to be unused.
+     */
+    e820[nr].addr = 0xE0000;
+    e820[nr].size = 0x20000;
+    e820[nr].type = E820_RESERVED;
+    nr++;
+
+    /* Low RAM goes here. Reserve space for special pages. */
+    BUG_ON((hvm_info->low_mem_pgend << PAGE_SHIFT) < (2u << 20));
+    e820[nr].addr = 0x100000;
+    e820[nr].size = (hvm_info->low_mem_pgend << PAGE_SHIFT) - e820[nr].addr;
+    e820[nr].type = E820_RAM;
+    nr++;
+
+    /*
+     * Explicitly reserve space for special pages.
+     * This space starts at RESERVED_MEMBASE an extends to cover various
+     * fixed hardware mappings (e.g., LAPIC, IOAPIC, default SVGA framebuffer).
+     */
+    e820[nr].addr = RESERVED_MEMBASE;
+    e820[nr].size = (uint32_t)-e820[nr].addr;
+    e820[nr].type = E820_RESERVED;
+    nr++;
+
+    if ( hvm_info->high_mem_pgend )
+    {
+        e820[nr].addr = ((uint64_t)1 << 32);
+        e820[nr].size =
+            ((uint64_t)hvm_info->high_mem_pgend << PAGE_SHIFT) - e820[nr].addr;
+        e820[nr].type = E820_RAM;
+        nr++;
+    }
+
     *E820_NR = nr;
 }
 
@@ -576,16 +646,17 @@ int main(void)
 {
     int option_rom_sz = 0, vgabios_sz = 0, etherboot_sz = 0;
     int rombios_sz, smbios_sz;
-    uint32_t etherboot_phys_addr, option_rom_phys_addr, vga_ram = 0;
-    uint16_t xen_pfiob;
+    uint32_t etherboot_phys_addr, option_rom_phys_addr, bios32_addr;
+    struct bios_info *bios_info;
 
     printf("HVM Loader\n");
 
-    copy_e820_table();
-
     init_hypercalls();
 
     printf("CPU speed is %u MHz\n", get_cpu_mhz());
+
+    apic_setup();
+    pci_setup();
 
     smp_initialise();
 
@@ -599,12 +670,9 @@ int main(void)
     if ( rombios_sz > 0x10000 )
         rombios_sz = 0x10000;
     memcpy((void *)ROMBIOS_PHYSICAL_ADDRESS, rombios, rombios_sz);
-    highbios_setup();
-
-    apic_setup();
-    pci_setup();
-
-    if ( (get_vcpu_nr() > 1) || get_apic_mode() )
+    bios32_addr = highbios_setup();
+
+    if ( (hvm_info->nr_vcpus > 1) || hvm_info->apic_mode )
         create_mp_tables();
 
     switch ( virtual_vga )
@@ -626,12 +694,6 @@ int main(void)
         break;
     }
 
-    if ( virtual_vga != VGA_none )
-    {
-        vga_ram = e820_malloc(8 << 20, 4096);
-        printf("VGA RAM at %08x\n", vga_ram);
-    }
-
     etherboot_phys_addr = VGABIOS_PHYSICAL_ADDRESS + vgabios_sz;
     if ( etherboot_phys_addr < OPTIONROM_PHYSICAL_ADDRESS )
         etherboot_phys_addr = OPTIONROM_PHYSICAL_ADDRESS;
@@ -640,7 +702,7 @@ int main(void)
     option_rom_phys_addr = etherboot_phys_addr + etherboot_sz;
     option_rom_sz = pci_load_option_roms(option_rom_phys_addr);
 
-    if ( get_acpi_enabled() )
+    if ( hvm_info->acpi_enabled )
     {
         printf("Loading ACPI ...\n");
         acpi_build_tables();
@@ -672,9 +734,17 @@ int main(void)
                ROMBIOS_PHYSICAL_ADDRESS,
                ROMBIOS_PHYSICAL_ADDRESS + rombios_sz - 1);
 
-    xen_pfiob = init_xen_platform_io_base();
-    if ( xen_pfiob && vga_ram )
-        outl(xen_pfiob + 4, vga_ram);
+    build_e820_table();
+
+    bios_info = (struct bios_info *)BIOS_INFO_PHYSICAL_ADDRESS;
+    memset(bios_info, 0, sizeof(*bios_info));
+    bios_info->com1_present = uart_exists(0x3f8);
+    bios_info->com2_present = uart_exists(0x2f8);
+    bios_info->hpet_present = hpet_exists(ACPI_HPET_ADDRESS);
+    bios_info->pci_min = pci_mem_start;
+    bios_info->pci_len = pci_mem_end - pci_mem_start;
+    bios_info->bios32_entry = bios32_addr;
+    bios_info->xen_pfiob = xen_platform_io_base();
 
     printf("Invoking ROMBIOS ...\n");
     return 0;
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/mp_tables.c
--- a/tools/firmware/hvmloader/mp_tables.c      Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/hvmloader/mp_tables.c      Wed Jan 28 13:06:45 2009 +0900
@@ -155,7 +155,7 @@ static void fill_mp_config_table(struct 
     int vcpu_nr, i;
     uint8_t checksum;
 
-    vcpu_nr = get_vcpu_nr();
+    vcpu_nr = hvm_info->nr_vcpus;
 
     /* fill in the MP configuration table signature, "PCMP" */
     mpct->signature[0] = 'P';
@@ -317,7 +317,7 @@ void create_mp_tables(void)
     char *p;
     int vcpu_nr, i, length;
 
-    vcpu_nr = get_vcpu_nr();
+    vcpu_nr = hvm_info->nr_vcpus;
 
     printf("Creating MP tables ...\n");
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/smbios.c
--- a/tools/firmware/hvmloader/smbios.c Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/hvmloader/smbios.c Wed Jan 28 13:06:45 2009 +0900
@@ -118,8 +118,9 @@ write_smbios_tables(void *start,
     do_struct(smbios_type_16_init(p, memsize, nr_mem_devs));
     for ( i = 0; i < nr_mem_devs; i++ )
     {
-        uint32_t dev_memsize = ((i == (nr_mem_devs - 1))
-                                ? (memsize & 0x3fff) : 0x4000);
+        uint32_t dev_memsize = 0x4000; /* all but last covers 16GB */
+        if ( (i == (nr_mem_devs - 1)) && ((memsize & 0x3fff) != 0) )
+            dev_memsize = memsize & 0x3fff; /* last dev is <16GB */
         do_struct(smbios_type_17_init(p, dev_memsize, i));
         do_struct(smbios_type_19_init(p, dev_memsize, i));
         do_struct(smbios_type_20_init(p, dev_memsize, i));
@@ -143,28 +144,18 @@ static uint64_t
 static uint64_t
 get_memsize(void)
 {
-    struct e820entry *map = E820;
-    uint8_t num_entries = *E820_NR;
-    uint64_t memsize = 0;
-    int i;
-
-    /*
-     * Walk through e820map, ignoring any entries that aren't marked
-     * as usable or reserved.
-     */
-    for ( i = 0; i < num_entries; i++ )
-    {
-        if ( (map->type == E820_RAM) || (map->type == E820_RESERVED) )
-            memsize += map->size;
-        map++;
-    }
+    uint64_t sz;
+
+    sz = (uint64_t)hvm_info->low_mem_pgend << PAGE_SHIFT;
+    if ( hvm_info->high_mem_pgend )
+        sz += (hvm_info->high_mem_pgend << PAGE_SHIFT) - (1ull << 32);
 
     /*
      * Round up to the nearest MB.  The user specifies domU pseudo-physical 
      * memory in megabytes, so not doing this could easily lead to reporting 
      * one less MB than the user specified.
      */
-    return (memsize + (1 << 20) - 1) >> 20;
+    return (sz + (1ul << 20) - 1) >> 20;
 }
 
 int
@@ -229,7 +220,7 @@ hvm_write_smbios_tables(void)
 
     /* SCRATCH_PHYSICAL_ADDRESS is a safe large memory area for scratch. */
     len = write_smbios_tables((void *)SCRATCH_PHYSICAL_ADDRESS,
-                              get_vcpu_nr(), get_memsize(),
+                              hvm_info->nr_vcpus, get_memsize(),
                               uuid, xen_version_str,
                               xen_major_version, xen_minor_version);
     if ( len > SMBIOS_MAXIMUM_SIZE )
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/smp.c
--- a/tools/firmware/hvmloader/smp.c    Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/hvmloader/smp.c    Wed Jan 28 13:06:45 2009 +0900
@@ -121,7 +121,7 @@ static void boot_cpu(unsigned int cpu)
 
 void smp_initialise(void)
 {
-    unsigned int i, nr_cpus = get_vcpu_nr();
+    unsigned int i, nr_cpus = hvm_info->nr_vcpus;
 
     memcpy((void *)AP_BOOT_EIP, ap_boot_start, ap_boot_end - ap_boot_start);
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/util.c
--- a/tools/firmware/hvmloader/util.c   Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/hvmloader/util.c   Wed Jan 28 13:06:45 2009 +0900
@@ -25,7 +25,6 @@
 #include <stdint.h>
 #include <xen/xen.h>
 #include <xen/memory.h>
-#include <xen/hvm/hvm_info_table.h>
 
 void wrmsr(uint32_t idx, uint64_t v)
 {
@@ -304,63 +303,63 @@ uuid_to_string(char *dest, uint8_t *uuid
     *p = '\0';
 }
 
-static void e820_collapse(void)
-{
-    int i = 0;
-    struct e820entry *ent = E820;
-
-    while ( i < (*E820_NR-1) )
-    {
-        if ( (ent[i].type == ent[i+1].type) &&
-             ((ent[i].addr + ent[i].size) == ent[i+1].addr) )
-        {
-            ent[i].size += ent[i+1].size;
-            memcpy(&ent[i+1], &ent[i+2], (*E820_NR-i-2) * sizeof(*ent));
-            (*E820_NR)--;
-        }
-        else
-        {
-            i++;
-        }
-    }
-}
-
-uint32_t e820_malloc(uint32_t size, uint32_t align)
-{
-    uint32_t addr;
-    int i;
-    struct e820entry *ent = E820;
+void *mem_alloc(uint32_t size, uint32_t align)
+{
+    static uint32_t reserve = RESERVED_MEMBASE - 1;
+    static int over_allocated;
+    struct xen_add_to_physmap xatp;
+    struct xen_memory_reservation xmr;
+    xen_pfn_t mfn;
+    uint32_t s, e;
 
     /* Align to at least one kilobyte. */
     if ( align < 1024 )
         align = 1024;
 
-    for ( i = *E820_NR - 1; i >= 0; i-- )
-    {
-        addr = (ent[i].addr + ent[i].size - size) & ~(align-1);
-        if ( (ent[i].type != E820_RAM) || /* not ram? */
-             (addr < ent[i].addr) ||      /* too small or starts above 4gb? */
-             ((addr + size) < addr) )     /* ends above 4gb? */
-            continue;
-
-        if ( addr != ent[i].addr )
-        {
-            memmove(&ent[i+1], &ent[i], (*E820_NR-i) * sizeof(*ent));
-            (*E820_NR)++;
-            ent[i].size = addr - ent[i].addr;
-            ent[i+1].addr = addr;
-            ent[i+1].size -= ent[i].size;
-            i++;
-        }
-
-        ent[i].type = E820_RESERVED;
-
-        e820_collapse();
-
-        return addr;
-    }
-
-    return 0;
+    s = (reserve + align) & ~(align - 1);
+    e = s + size - 1;
+
+    BUG_ON((e < s) || (e >> PAGE_SHIFT) >= hvm_info->reserved_mem_pgstart);
+
+    while ( (reserve >> PAGE_SHIFT) != (e >> PAGE_SHIFT) )
+    {
+        reserve += PAGE_SIZE;
+        mfn = reserve >> PAGE_SHIFT;
+
+        /* Try to allocate a brand new page in the reserved area. */
+        if ( !over_allocated )
+        {
+            xmr.domid = DOMID_SELF;
+            xmr.mem_flags = 0;
+            xmr.extent_order = 0;
+            xmr.nr_extents = 1;
+            set_xen_guest_handle(xmr.extent_start, &mfn);
+            if ( hypercall_memory_op(XENMEM_populate_physmap, &xmr) == 1 )
+                continue;
+            over_allocated = 1;
+        }
+
+        /* Otherwise, relocate a page from the ordinary RAM map. */
+        if ( hvm_info->high_mem_pgend )
+        {
+            xatp.idx = --hvm_info->high_mem_pgend;
+            if ( xatp.idx == (1ull << (32 - PAGE_SHIFT)) )
+                hvm_info->high_mem_pgend = 0;
+        }
+        else
+        {
+            xatp.idx = --hvm_info->low_mem_pgend;
+        }
+        xatp.domid = DOMID_SELF;
+        xatp.space = XENMAPSPACE_gmfn;
+        xatp.gpfn  = mfn;
+        if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 )
+            BUG();
+    }
+
+    reserve = e;
+
+    return (void *)(unsigned long)s;
 }
 
 uint32_t ioapic_read(uint32_t reg)
@@ -543,30 +542,35 @@ void __bug(char *file, int line)
         asm volatile ( "ud2" );
 }
 
-static int validate_hvm_info(struct hvm_info_table *t)
-{
-    char signature[] = "HVM INFO";
+static void validate_hvm_info(struct hvm_info_table *t)
+{
     uint8_t *ptr = (uint8_t *)t;
     uint8_t sum = 0;
     int i;
 
-    /* strncmp(t->signature, "HVM INFO", 8) */
-    for ( i = 0; i < 8; i++ )
-    {
-        if ( signature[i] != t->signature[i] )
-        {
-            printf("Bad hvm info signature\n");
-            return 0;
-        }
+    if ( strncmp(t->signature, "HVM INFO", 8) )
+    {
+        printf("Bad hvm info signature\n");
+        BUG();
+    }
+
+    if ( t->length < sizeof(struct hvm_info_table) )
+    {
+        printf("Bad hvm info length\n");
+        BUG();
     }
 
     for ( i = 0; i < t->length; i++ )
         sum += ptr[i];
 
-    return (sum == 0);
-}
-
-static struct hvm_info_table *get_hvm_info_table(void)
+    if ( sum != 0 )
+    {
+        printf("Bad hvm info checksum\n");
+        BUG();
+    }
+}
+
+struct hvm_info_table *get_hvm_info_table(void)
 {
     static struct hvm_info_table *table;
     struct hvm_info_table *t;
@@ -576,33 +580,11 @@ static struct hvm_info_table *get_hvm_in
 
     t = (struct hvm_info_table *)HVM_INFO_PADDR;
 
-    if ( !validate_hvm_info(t) )
-    {
-        printf("Bad hvm info table\n");
-        return NULL;
-    }
+    validate_hvm_info(t);
 
     table = t;
 
     return table;
-}
-
-int get_vcpu_nr(void)
-{
-    struct hvm_info_table *t = get_hvm_info_table();
-    return (t ? t->nr_vcpus : 1);
-}
-
-int get_acpi_enabled(void)
-{
-    struct hvm_info_table *t = get_hvm_info_table();
-    return (t ? t->acpi_enabled : 1);
-}
-
-int get_apic_mode(void)
-{
-    struct hvm_info_table *t = get_hvm_info_table();
-    return (t ? t->apic_mode : 1);
 }
 
 uint16_t get_cpu_mhz(void)
@@ -645,6 +627,27 @@ uint16_t get_cpu_mhz(void)
 
     cpu_mhz = (uint16_t)(((uint32_t)cpu_khz + 500) / 1000);
     return cpu_mhz;
+}
+
+int uart_exists(uint16_t uart_base)
+{
+    uint16_t ier = uart_base + 1;
+    uint8_t a, b, c;
+
+    a = inb(ier);
+    outb(ier, 0);
+    b = inb(ier);
+    outb(ier, 0xf);
+    c = inb(ier);
+    outb(ier, a);
+
+    return ((b == 0) && (c == 0xf));
+}
+
+int hpet_exists(unsigned long hpet_base)
+{
+    uint32_t hpet_id = *(uint32_t *)hpet_base;
+    return ((hpet_id >> 16) == 0x8086);
 }
 
 /*
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/util.h
--- a/tools/firmware/hvmloader/util.h   Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/hvmloader/util.h   Wed Jan 28 13:06:45 2009 +0900
@@ -3,6 +3,7 @@
 
 #include <stdarg.h>
 #include <stdint.h>
+#include <xen/hvm/hvm_info_table.h>
 
 #undef offsetof
 #define offsetof(t, m) ((unsigned long)&((t *)0)->m)
@@ -56,6 +57,10 @@ void pci_write(uint32_t devfn, uint32_t 
 /* Get CPU speed in MHz. */
 uint16_t get_cpu_mhz(void);
 
+/* Hardware detection. */
+int uart_exists(uint16_t uart_base);
+int hpet_exists(unsigned long hpet_base);
+
 /* Do cpuid instruction, with operation 'idx' */
 void cpuid(uint32_t idx, uint32_t *eax, uint32_t *ebx,
            uint32_t *ecx, uint32_t *edx);
@@ -103,9 +108,8 @@ static inline void cpu_relax(void)
 })
 
 /* HVM-builder info. */
-int get_vcpu_nr(void);
-int get_acpi_enabled(void);
-int get_apic_mode(void);
+struct hvm_info_table *get_hvm_info_table(void);
+#define hvm_info (get_hvm_info_table())
 
 /* String and memory functions */
 int strcmp(const char *cs, const char *ct);
@@ -131,11 +135,12 @@ int printf(const char *fmt, ...) __attri
 int printf(const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
 int vprintf(const char *fmt, va_list ap);
 
-/* Reserve a RAM region in the e820 table. */
-uint32_t e820_malloc(uint32_t size, uint32_t align);
+/* Allocate memory in a reserved region below 4GB. */
+void *mem_alloc(uint32_t size, uint32_t align);
+#define virt_to_phys(v) ((unsigned long)(v))
 
 /* Prepare the 32bit BIOS */
-void highbios_setup(void);
+uint32_t highbios_setup(void);
 
 /* Miscellaneous. */
 void cacheattr_init(void);
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/rombios/32bit/32bitbios.c
--- a/tools/firmware/rombios/32bit/32bitbios.c  Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/rombios/32bit/32bitbios.c  Wed Jan 28 13:06:45 2009 +0900
@@ -19,35 +19,16 @@
  *
  * Author: Stefan Berger <stefanb@xxxxxxxxxx>
  */
+
 #include "rombios_compat.h"
+
+asm (
+    "    .text                       \n"
+    "     movzwl %bx,%eax            \n"
+    "     jmp *jumptable(,%eax,4)    \n"
+    "    .data                       \n"
+    "jumptable:                      \n"
+#define X(idx, ret, fn, args...) " .long "#fn"\n"
 #include "32bitprotos.h"
-
-/*
-   the jumptable that will be copied into the rombios in the 0xf000 segment
-   for every function that is to be called from the lower BIOS, make an entry
-   here.
- */
-#define TABLE_ENTRY(idx, func) [idx] = (uint32_t)func
-uint32_t jumptable[IDX_LAST+1] __attribute__((section (".biosjumptable"))) =
-{
-       TABLE_ENTRY(IDX_TCPA_ACPI_INIT, tcpa_acpi_init),
-       TABLE_ENTRY(IDX_TCPA_EXTEND_ACPI_LOG, tcpa_extend_acpi_log),
-
-       TABLE_ENTRY(IDX_TCGINTERRUPTHANDLER, TCGInterruptHandler),
-
-       TABLE_ENTRY(IDX_TCPA_CALLING_INT19H, tcpa_calling_int19h),
-       TABLE_ENTRY(IDX_TCPA_RETURNED_INT19H, tcpa_returned_int19h),
-       TABLE_ENTRY(IDX_TCPA_ADD_EVENT_SEPARATORS, tcpa_add_event_separators),
-       TABLE_ENTRY(IDX_TCPA_WAKE_EVENT, tcpa_wake_event),
-       TABLE_ENTRY(IDX_TCPA_ADD_BOOTDEVICE, tcpa_add_bootdevice),
-       TABLE_ENTRY(IDX_TCPA_START_OPTION_ROM_SCAN, tcpa_start_option_rom_scan),
-       TABLE_ENTRY(IDX_TCPA_OPTION_ROM, tcpa_option_rom),
-       TABLE_ENTRY(IDX_TCPA_IPL, tcpa_ipl),
-       TABLE_ENTRY(IDX_TCPA_MEASURE_POST, tcpa_measure_post),
-
-       TABLE_ENTRY(IDX_TCPA_INITIALIZE_TPM, tcpa_initialize_tpm),
-
-       TABLE_ENTRY(IDX_GET_S3_WAKING_VECTOR, get_s3_waking_vector),
-
-       TABLE_ENTRY(IDX_LAST       , 0)     /* keep last */
-};
+#undef X
+    );
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/rombios/32bit/Makefile
--- a/tools/firmware/rombios/32bit/Makefile     Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/rombios/32bit/Makefile     Wed Jan 28 13:06:45 2009 +0900
@@ -1,14 +1,11 @@ XEN_ROOT = ../../../..
 XEN_ROOT = ../../../..
 include $(XEN_ROOT)/tools/firmware/Rules.mk
 
-SOURCES = util.c
 TARGET = 32bitbios_flat.h
 
-CFLAGS += $(CFLAGS_include) -I.. -DGCC_PROTOS
+CFLAGS += $(CFLAGS_include) -I..
 
 SUBDIRS = tcgbios
-
-MODULES = tcgbios/tcgbiosext.o
 
 .PHONY: all
 all: subdirs-all
@@ -16,9 +13,12 @@ all: subdirs-all
 
 .PHONY: clean
 clean: subdirs-clean
-       rm -rf *.o $(TARGET)
+       rm -rf *.o $(TARGET) $(DEPS)
 
-$(TARGET): 32bitbios.o $(MODULES) util.o
+$(TARGET): 32bitbios_all.o
+       sh mkhex highbios_array 32bitbios_all.o > $@
+
+32bitbios_all.o: 32bitbios.o tcgbios/tcgbiosext.o util.o pmm.o
        $(LD) $(LDFLAGS_DIRECT) -s -r $^ -o 32bitbios_all.o
        @nm 32bitbios_all.o |                                \
          egrep '^ +U ' >/dev/null && {                      \
@@ -26,4 +26,5 @@ clean: subdirs-clean
            nm -u 32bitbios_all.o;                           \
            exit 11;                                         \
          } || :
-       sh mkhex highbios_array 32bitbios_all.o > $@
+
+-include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/rombios/32bit/pmm.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/firmware/rombios/32bit/pmm.c        Wed Jan 28 13:06:45 2009 +0900
@@ -0,0 +1,531 @@
+/*
+ *  pmm.c - POST(Power On Self Test) Memory Manager
+ *  according to the specification described in
+ *  
http://www.phoenix.com/NR/rdonlyres/873A00CF-33AC-4775-B77E-08E7B9754993/0/specspmm101.pdf
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public
+ *  License along with this library; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ *  Copyright (C) 2009 FUJITSU LIMITED
+ *
+ *  Author: Kouya Shimura <kouya@xxxxxxxxxxxxxx>
+ */
+
+/*
+ * Algorithm:
+ *
+ * This is not a fast storage allocator but simple one.  There is no
+ * segregated management by block size and it does nothing special for
+ * avoiding the fragmentation.
+ *
+ * The allocation algorithm is a first-fit. All memory blocks are
+ * managed by linear single linked list in order of the address.
+ * (i.e. There is no backward pointer) It searches the first available
+ * equal or larger block from the head (lowest address) of memory
+ * heap. The larger block is splitted into two blocks unless one side
+ * becomes too small.
+ * 
+ * For de-allocation, the specified block is just marked as available
+ * and it does nothing else. Thus, the fragmentation will occur. The
+ * collection of continuous available blocks are done on the search
+ * phase of another block allocation.
+ *
+ * The following is an abstract of this algorithm. The actual code
+ * looks complicated on account of alignment and checking the handle.
+ *
+ *     static memblk_t *
+ *     alloc(heap_t *heap, uint32_t size)
+ *     {
+ *         static memblk_t *mb;
+ *         for_each_memblk(heap, mb) // search memory blocks
+ *             if (memblk_is_avail(mb))
+ *             {
+ *                 collect_avail_memblks(heap, mb);
+ *                 if (size <= memblk_bufsize(mb))
+ *                 {
+ *                     split_memblk(mb, size);
+ *                     set_inuse(mb);
+ *                     return mb;
+ *                 }
+ *             }
+ *         return NULL;
+ *     }
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+#include <../hvmloader/config.h>
+#include <../hvmloader/e820.h>
+#include "util.h"
+
+#define DEBUG_PMM 0
+
+#define ASSERT(_expr, _action)                                  \
+    if (!(_expr)) {                                             \
+        printf("ASSERTION FAIL: %s %s:%d %s()\n",               \
+               __STRING(_expr), __FILE__, __LINE__, __func__);  \
+        _action;                                                \
+    } else
+
+#if DEBUG_PMM
+# define PMM_DEBUG(format, p...) printf("PMM " format, ##p)
+#else
+# define PMM_DEBUG(format, p...)
+#endif
+
+struct pmmAllocArgs {
+    uint16_t function;
+    uint32_t length;
+    uint32_t handle;
+    uint16_t flags;
+} __attribute__ ((packed));
+
+struct pmmFindArgs {
+    uint16_t function;
+    uint32_t handle;
+} __attribute__ ((packed));
+
+struct pmmDeallocateArgs {
+    uint16_t function;
+    uint32_t buffer;
+} __attribute__ ((packed));
+
+#define PMM_FUNCTION_ALLOCATE   0
+#define PMM_FUNCTION_FIND       1         
+#define PMM_FUNCTION_DEALLOC    2
+
+#define PARAGRAPH_LENGTH        16  // unit of length
+
+#define PMM_HANDLE_ANONYMOUS    0xffffffff
+
+#define PMM_FLAGS_MEMORY_TYPE_MASK      0x0003
+#define PMM_FLAGS_MEMORY_INVALID        0
+#define PMM_FLAGS_MEMORY_CONVENTIONAL   1  // 0 to 1MB
+#define PMM_FLAGS_MEMORY_EXTENDED       2  // 1MB to 4GB
+#define PMM_FLAGS_MEMORY_ANY            3  // whichever is available
+#define PMM_FLAGS_ALIGINMENT            0x0004
+
+/* Error code */
+#define PMM_ENOMEM      (0)     // Out of memory, duplicate handle
+#define PMM_EINVAL      (-1)    // Invalid argument
+
+#define ALIGN_UP(addr, size)    (((addr)+((size)-1))&(~((size)-1)))
+#define ALIGN_DOWN(addr, size)  ((addr)&(~((size)-1)))
+
+typedef struct memblk {
+    uint32_t magic;      // inuse or available
+    struct memblk *next; // points the very next of this memblk
+    uint32_t handle;     // identifier of this block
+    uint32_t __fill;     // for 16byte alignment, not used
+    uint8_t buffer[0];
+} memblk_t;
+
+typedef struct heap {
+    memblk_t *head;     // start address of heap
+    memblk_t *end;      // end address of heap
+} heap_t;
+
+#define HEAP_NOT_INITIALIZED    (memblk_t *)-1
+#define HEAP_ALIGNMENT          16
+
+/*
+ * PMM handles two memory heaps, the caller chooses either.
+ *
+ * - conventional memroy (below 1MB)
+ *    In HVM, the area is fixed. 0x00010000-0x0007FFFF
+ *    (from SCRATCH_PHYSICAL_ADDRESS to HYPERCALL_PHYSICAL_ADDRESS)
+ *
+ * - extended memory (start at 1MB, below 4GB)
+ *    In HVM, the area starts at memory address 0x00100000.
+ *    The end address is variable. We read low RAM address from e820 table.
+ *
+ * The following struct must be located in the data segment since bss
+ * in 32bitbios doesn't be relocated.
+ */
+static struct {
+    heap_t heap;     // conventional memory
+    heap_t ext_heap; // extended memory
+} pmm_data = { {HEAP_NOT_INITIALIZED, NULL}, {NULL, NULL} };
+
+/* These values are private use, not a spec in PMM */
+#define MEMBLK_MAGIC_INUSE   0x2A4D4D50  // 'PMM*'
+#define MEMBLK_MAGIC_AVAIL   0x5F4D4D50  // 'PMM_'
+
+#define memblk_is_inuse(_mb)  ((_mb)->magic == MEMBLK_MAGIC_INUSE)
+#define memblk_is_avail(_mb)  ((_mb)->magic == MEMBLK_MAGIC_AVAIL)
+
+static void set_inuse(memblk_t *mb, uint32_t handle)
+{
+    mb->magic = MEMBLK_MAGIC_INUSE;
+    mb->handle = handle;
+}
+
+static void set_avail(memblk_t *mb)
+{
+    mb->magic = MEMBLK_MAGIC_AVAIL;
+    mb->handle = PMM_HANDLE_ANONYMOUS;
+}
+
+#define MEMBLK_HEADER_SIZE   ((int)(&((memblk_t *)0)->buffer))
+#define MIN_MEMBLK_SIZE      (MEMBLK_HEADER_SIZE + PARAGRAPH_LENGTH)
+
+#define memblk_size(_mb)     ((void *)((_mb)->next) - (void *)(_mb))
+#define memblk_buffer(_mb)   ((uint32_t)(&(_mb)->buffer))
+#define memblk_bufsize(_mb)  (memblk_size(_mb) - MEMBLK_HEADER_SIZE)
+
+#define buffer_memblk(_buf)  (memblk_t *)((_buf) - MEMBLK_HEADER_SIZE)
+
+#define memblk_loop_mbondition(_h, _mb) \
+    (((_mb) < (_h)->end) && (/* avoid infinite loop */ (_mb) < (_mb)->next))
+
+#define for_each_memblk(_h, _mb)                \
+    for ((_mb) = (_h)->head;                    \
+         memblk_loop_mbondition(_h, _mb);       \
+         (_mb) = (_mb)->next)
+
+#define for_remain_memblk(_h, _mb)              \
+    for (;                                      \
+         memblk_loop_mbondition(_h, _mb);       \
+         (_mb) = (_mb)->next)
+
+/*
+ *                                       <-size->
+ *    +==================+======+       +========+========+======+
+ *    |      avail       |      |       | avail  | avail  |      |
+ *    |      memblk      |memblk|...    | memblk | memblk |memblk|...
+ *    +==================+======+   =>  +========+========+======+
+ *    ^ |                ^ |    ^         |      ^ |      ^ |    ^
+ *    | |next            | |next|         |next  | |next  | |next|
+ *    | \________________/ \____/         \______/ \______/ \____/
+ *    |                                          ^
+ *    |                                          |
+ *    mb                                         +- sb(return value)
+ */
+static memblk_t *
+split_memblk(memblk_t *mb, uint32_t size)
+{
+    memblk_t *sb = (void *)memblk_buffer(mb) + size;
+
+    /* Only split if the remaining fragment is big enough. */
+    if ( (memblk_bufsize(mb) - size) < MIN_MEMBLK_SIZE)
+        return mb;
+
+    sb->next = mb->next;
+    set_avail(sb);
+
+    mb->next = sb;
+    return sb;
+}
+
+/*
+ *    +======+======+======+======+       +=================+======+
+ *    |avail |avail |avail |inuse |       |      avail      |inuse |   
+ *    |memblk|memblk|memblk|memblk|...    |      memblk     |memblk|...
+ *    +======+======+======+======+   =>  +=================+======+
+ *    ^ |    ^ |    ^ |    ^ |    ^         |               ^ |    ^
+ *    | |next| |next| |next| |next|         |next           | |next|
+ *    | \____/ \____/ \____/ \____/         \_______________/ \____/
+ *    |
+ *    mb
+ */
+static void
+collect_avail_memblks(heap_t *heap, memblk_t *mb)
+{
+    memblk_t *nb = mb->next;
+
+    for_remain_memblk ( heap, nb )
+        if ( memblk_is_inuse(nb) )
+            break;
+    mb->next = nb;
+}
+
+static void
+pmm_init_heap(heap_t *heap, uint32_t from_addr, uint32_t to_addr)
+{
+    memblk_t *mb = (memblk_t *)ALIGN_UP(from_addr, HEAP_ALIGNMENT);
+
+    mb->next = (memblk_t *)ALIGN_DOWN(to_addr, HEAP_ALIGNMENT);
+    set_avail(mb);
+
+    heap->head = mb;
+    heap->end = mb->next;
+}
+
+static void
+pmm_initalize(void)
+{
+    int i, e820_nr = *E820_NR;
+    struct e820entry *e820 = E820;
+
+    /* Extended memory: RAM below 4GB, 0x100000-0xXXXXXXXX */
+    for ( i = 0; i < e820_nr; i++ )
+    {
+        if ( (e820[i].type == E820_RAM) && (e820[i].addr >= 0x00100000) )
+        {
+            pmm_init_heap(&pmm_data.ext_heap, e820[i].addr, 
+                          e820[i].addr + e820[i].size);
+            break;
+        }
+    }
+
+    /* convectional memory: RAM below 1MB, 0x10000-0x7FFFF */
+    pmm_init_heap(&pmm_data.heap, SCRATCH_PHYSICAL_ADDRESS,
+                  HYPERCALL_PHYSICAL_ADDRESS);
+}
+
+static uint32_t
+pmm_max_avail_length(heap_t *heap)
+{
+    memblk_t *mb;
+    uint32_t size, max = 0;
+
+    for_each_memblk ( heap, mb )
+    {
+        if ( !memblk_is_avail(mb) )
+            continue;
+        collect_avail_memblks(heap, mb);
+        size = memblk_bufsize(mb);
+        if ( size > max )
+            max = size;
+    }
+
+    return (max / PARAGRAPH_LENGTH);
+}
+
+static memblk_t *
+first_fit(heap_t *heap, uint32_t size, uint32_t handle, uint32_t flags)
+{
+    memblk_t *mb;
+    int32_t align = 0;
+
+    if ( flags & PMM_FLAGS_ALIGINMENT )
+        align = ((size ^ (size - 1)) >> 1) + 1;
+
+    for_each_memblk ( heap, mb )
+    {
+        if ( memblk_is_avail(mb) )
+        {
+            collect_avail_memblks(heap, mb);
+
+            if ( align )
+            {
+                uint32_t addr = memblk_buffer(mb);
+                uint32_t offset = ALIGN_UP(addr, align) - addr;
+
+                if ( offset > 0 )
+                {
+                    ASSERT(offset >= MEMBLK_HEADER_SIZE, continue);
+
+                    if ( (offset + size) > memblk_bufsize(mb) )
+                        continue;
+
+                    mb = split_memblk(mb, offset - MEMBLK_HEADER_SIZE);
+                    return mb;
+                }
+            }
+
+            if ( size <= memblk_bufsize(mb) )
+                return mb;
+        }
+        else
+        {
+            ASSERT(memblk_is_inuse(mb), return NULL);
+
+            /* Duplication check for handle. */
+            if ( (handle != PMM_HANDLE_ANONYMOUS) && (mb->handle == handle) )
+                return NULL;
+        }
+    }
+
+    return NULL;
+}
+
+static memblk_t *
+pmm_find_handle(heap_t *heap, uint32_t handle)
+{
+    memblk_t *mb;
+
+    if ( handle == PMM_HANDLE_ANONYMOUS )
+        return NULL;
+
+    for_each_memblk ( heap, mb )
+        if ( mb->handle == handle )
+            return mb;
+
+    return NULL;
+}
+
+/*
+ * allocate a memory block of the specified type and size, and returns
+ * the address of the memory block.
+ *
+ * A client-specified identifier to be associated with the allocated
+ * memory block. A handle of 0xFFFFFFFF indicates that no identifier
+ * should be associated with the block. Such a memory block is known
+ * as an "anonymous" memory block and cannot be found using the
+ * pmmFind function. If a specified handle for a requested memory
+ * block is already used in a currently allocated memory block, the
+ * error value of 0x00000000 is returned
+ *
+ * If length is 0x00000000, no memory is allocated and the value
+ * returned is the size of the largest memory block available for the
+ * memory type specified in the flags parameter. The alignment bit in
+ * the flags register is ignored when calculating the largest memory
+ * block available.
+ *
+ * If a specified handle for a requested memory block is already used
+ * in a currently allocated memory block, the error value of
+ * 0x00000000 is returned.
+ * 
+ * A return value of 0x00000000 indicates that an error occurred and
+ * no memory has been allocated. 
+ */
+static uint32_t
+pmmAllocate(uint32_t length, uint32_t handle, uint16_t flags)
+{
+    heap_t *heap;
+    memblk_t *mb;
+    uint32_t size;
+
+    switch ( flags & PMM_FLAGS_MEMORY_TYPE_MASK )
+    {
+    case PMM_FLAGS_MEMORY_CONVENTIONAL:
+        heap = &pmm_data.heap;
+        break;
+
+    case PMM_FLAGS_MEMORY_EXTENDED:
+    case PMM_FLAGS_MEMORY_ANY: /* XXX: ignore conventional memory for now */
+        heap = &pmm_data.ext_heap;
+        break;
+
+    default:
+        return PMM_EINVAL;
+    }
+
+    /* return the largest memory block available */
+    if ( length == 0 )
+        return pmm_max_avail_length(heap);
+
+    size = length * PARAGRAPH_LENGTH;
+    mb = first_fit(heap, size, handle, flags);
+
+    if ( mb == NULL )
+        return PMM_ENOMEM;
+
+    /* duplication check for handle */
+    if ( handle != PMM_HANDLE_ANONYMOUS )
+    {
+        memblk_t *nb = mb->next;
+
+        for_remain_memblk(heap, nb)
+            if (nb->handle == handle)
+                return PMM_ENOMEM;
+    }
+
+    split_memblk(mb, size);
+    set_inuse(mb, handle);
+
+    return memblk_buffer(mb);
+}
+
+/*
+ * returns the address of the memory block associated with the
+ * specified handle.  
+ *
+ * A return value of 0x00000000 indicates that the handle does not
+ * correspond to a currently allocated memory block.
+ */
+static uint32_t
+pmmFind(uint32_t handle)
+{
+    memblk_t *mb;
+
+    if ( handle == PMM_HANDLE_ANONYMOUS )
+        return 0;
+
+    mb = pmm_find_handle(&pmm_data.heap, handle);
+    if ( mb == NULL )
+        mb = pmm_find_handle(&pmm_data.ext_heap, handle);
+
+    return mb ? memblk_buffer(mb) : 0;
+}
+
+/* 
+ * frees the specified memory block that was previously allocated by
+ * pmmAllocate.
+ *
+ * If the memory block was deallocated correctly, the return value is
+ * 0x00000000. If there was an error, the return value is non-zero.
+ */
+static uint32_t
+pmmDeallocate(uint32_t buffer)
+{
+    memblk_t *mb = buffer_memblk(buffer);
+
+    if ( !memblk_is_inuse(mb) )
+        return PMM_EINVAL;
+
+    set_avail(mb);
+    return 0;
+}
+
+
+union pmm_args {
+    uint16_t function;
+    struct pmmAllocArgs alloc;
+    struct pmmFindArgs find;
+    struct pmmDeallocateArgs dealloc;
+} __attribute__ ((packed));
+
+/*
+ * entry function of all PMM services.
+ *
+ * Values returned to the caller are placed in the DX:AX register
+ * pair. The flags and all registers, other than DX and AX, are
+ * preserved across calls to PMM services.
+ */
+uint32_t
+pmm(void *argp)
+{
+    union pmm_args *ap = argp;
+    uint32_t ret = PMM_EINVAL;
+
+    if ( pmm_data.heap.head == HEAP_NOT_INITIALIZED )
+        pmm_initalize();
+
+    switch ( ap->function )
+    {
+    case PMM_FUNCTION_ALLOCATE:
+        ret = pmmAllocate(ap->alloc.length, ap->alloc.handle, ap->alloc.flags);
+        PMM_DEBUG("Alloc length=%x handle=%x flags=%x ret=%x\n", 
+                  ap->alloc.length, ap->alloc.handle, ap->alloc.flags, ret);
+        break;
+
+    case PMM_FUNCTION_FIND:
+        ret = pmmFind(ap->find.handle);
+        PMM_DEBUG("Find handle=%x ret=%x\n", ap->find.handle, ret);
+        break;
+
+    case PMM_FUNCTION_DEALLOC:
+        ret = pmmDeallocate(ap->dealloc.buffer);
+        PMM_DEBUG("Dealloc buffer=%x ret=%x\n", ap->dealloc.buffer, ret);
+        break;
+
+    default:
+        PMM_DEBUG("Invalid function:%d\n", ap->function);
+        break;
+    }
+
+    return ret;
+}
diff -r 4fd4dcf2f891 -r 79f259a26a11 
tools/firmware/rombios/32bit/rombios_compat.h
--- a/tools/firmware/rombios/32bit/rombios_compat.h     Wed Jan 28 12:22:58 
2009 +0900
+++ b/tools/firmware/rombios/32bit/rombios_compat.h     Wed Jan 28 13:06:45 
2009 +0900
@@ -89,4 +89,8 @@ static inline void write_byte(Bit16u seg
        *addr = val;
 }
 
+#define X(idx, ret, fn, args...) ret fn (args);
+#include "32bitprotos.h"
+#undef X
+
 #endif
diff -r 4fd4dcf2f891 -r 79f259a26a11 
tools/firmware/rombios/32bit/tcgbios/Makefile
--- a/tools/firmware/rombios/32bit/tcgbios/Makefile     Wed Jan 28 12:22:58 
2009 +0900
+++ b/tools/firmware/rombios/32bit/tcgbios/Makefile     Wed Jan 28 13:06:45 
2009 +0900
@@ -2,17 +2,17 @@ include $(XEN_ROOT)/tools/firmware/Rules
 include $(XEN_ROOT)/tools/firmware/Rules.mk
 
 TARGET  = tcgbiosext.o
-FILES   = tcgbios tpm_drivers
-OBJECTS = $(foreach f,$(FILES),$(f).o)
 
-CFLAGS += $(CFLAGS_include) -I.. -I../.. -DGCC_PROTOS
+CFLAGS += $(CFLAGS_include) -I.. -I../..
 
-.PHONY: all clean
-
+.PHONY: all
 all: $(TARGET)
 
+.PHONY: clean
 clean:
-       rm -rf *.o $(TARGET)
+       rm -rf *.o $(TARGET) $(DEPS)
 
-$(TARGET): $(OBJECTS)
+$(TARGET): tcgbios.o tpm_drivers.o
        $(LD) $(LDFLAGS_DIRECT) -r $^ -o $@
+
+-include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 
tools/firmware/rombios/32bit/tcgbios/tcgbios.c
--- a/tools/firmware/rombios/32bit/tcgbios/tcgbios.c    Wed Jan 28 12:22:58 
2009 +0900
+++ b/tools/firmware/rombios/32bit/tcgbios/tcgbios.c    Wed Jan 28 13:06:45 
2009 +0900
@@ -26,7 +26,6 @@
 
 #include "util.h"
 #include "tcgbios.h"
-#include "32bitprotos.h"
 
 /* local structure and variables */
 struct ptti_cust {
@@ -259,6 +258,10 @@ uint8_t acpi_validate_entry(struct acpi_
 }
 
 
+/*
+   initialize the TCPA ACPI subsystem; find the ACPI tables and determine
+   where the TCPA table is.
+ */
 void tcpa_acpi_init(void)
 {
        struct acpi_20_rsdt *rsdt;
@@ -313,6 +316,16 @@ static void tcpa_reset_acpi_log(void)
 }
 
 
+/*
+ * Extend the ACPI log with the given entry by copying the
+ * entry data into the log.
+ * Input
+ *  Pointer to the structure to be copied into the log
+ *
+ * Output:
+ *  lower 16 bits of return code contain entry number
+ *  if entry number is '0', then upper 16 bits contain error code.
+ */
 uint32_t tcpa_extend_acpi_log(uint32_t entry_ptr)
 {
        uint32_t res = 0;
@@ -622,7 +635,8 @@ void tcpa_wake_event()
 }
 
 /*
- * add the boot device to the measurement log
+ * Add a measurement regarding the boot device (CDRom, Floppy, HDD) to
+ * the list of measurements.
  */
 void tcpa_add_bootdevice(uint32_t bootcd, uint32_t bootdrv)
 {
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/rombios/32bitgateway.c
--- a/tools/firmware/rombios/32bitgateway.c     Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/rombios/32bitgateway.c     Wed Jan 28 13:06:45 2009 +0900
@@ -19,8 +19,10 @@
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  *
  * Copyright (C) IBM Corporation, 2006
+ * Copyright (c) 2008, Citrix Systems, Inc.
  *
  * Author: Stefan Berger <stefanb@xxxxxxxxxx>
+ * Author: Keir Fraser <keir.fraser@xxxxxxxxxx>
  */
 
 /*
@@ -34,389 +36,142 @@
  *  (4 bytes) even for uint16_t, so casting to 32bit from bcc is a good idea.
  */
 
-#define SEGMENT_OFFSET  0xf0000
-#define REAL_MODE_CODE_SEGMENT  0xf000
+/* At most 32 bytes in argument list to a 32-bit function. */
+#define MAX_ARG_BYTES 32
 
-#define START_PM_CODE  USE32
-#define END_PM_CODE    USE16
+#define REAL_MODE_CODE_OFFSET  0xf0000
 
-/* definition of used code/data segment descriptors */
-#define PM_NORMAL_CS (gdt_entry_pm_cs       - gdt_base)
+/* Definitions of code/data segment descriptors. */
+#define PM_32BIT_CS  (gdt_entry_pm_32bit_cs - gdt_base)
 #define PM_16BIT_CS  (gdt_entry_pm_16bit_cs - gdt_base)
 #define PM_32BIT_DS  (gdt_entry_pm_32bit_ds - gdt_base)
+#define PM_16BIT_DS  (gdt_entry_pm_16bit_ds - gdt_base)
 
-  ASM_START
+    .align 16
+gdt_base:
+    .word 0,0
+    .byte 0,0,0,0
+gdt_entry_pm_32bit_cs:
+    .word 0xffff, 0x0000
+    .byte 0x00, 0x9b, 0xcf, 0x00
+gdt_entry_pm_16bit_cs:
+    .word 0xffff, 0x0000
+    .byte REAL_MODE_CODE_OFFSET >> 16, 0x9b, 0x0, 0x0
+gdt_entry_pm_32bit_ds:
+    .word 0xffff, 0x0000
+    .byte 0x0, 0x93, 0xcf, 0x0
+gdt_entry_pm_16bit_ds:
+    .word 0xffff, 0x0000
+    .byte 0x0, 0x93, 0x0, 0x0
+gdt_entry_end:
 
-    ; Switch into protected mode to allow access to 32 bit addresses.
-    ; This function allows switching into protected mode.
-    ; (the specs says big real mode, but that will not work)
+protmode_gdtdesc:
+    .word (gdt_entry_end - gdt_base) - 1
+    .long gdt_base | REAL_MODE_CODE_OFFSET
+
+realmode_gdtdesc:
+    .word 0xffff
+    .long 0x0
+
+Upcall:
+    ; Do an upcall into 32 bit space
     ;
-    ; preserves all registers and prepares cs, ds, es, ss for usage
-    ; in protected mode; while in prot.mode interrupts remain disabled
-switch_to_protmode:
+    ; Input:
+    ; bx: index of function to call
+    ; Ouput:
+    ; dx, ax: 32 bit result of call (even if 'void' is expected)
+
+    ; Save caller state, stack frame offsets listed below
+#define esp_off     0
+#define ss_off      4
+#define es_off      6
+#define ds_off      8
+#define flags_off   10
+#define retaddr_off 12
+#define args_off    14
+    pushf
     cli
+    push ds
+    push es
+    push ss
+    push esp
 
-    ; have to fix the stack for proper return address in 32 bit mode
-    push WORD #(REAL_MODE_CODE_SEGMENT>>12)    ;extended return address
-    push bp                                    ;pop@A1
-    mov bp, sp
-    push eax                                   ;pop@A2
-    mov eax, 2[bp]                             ; fix return address
-    rol eax, #16
-    mov 2[bp], eax
-
-    mov eax, esp
-    ror eax, #16                               ; hi(esp)
-
-    push bx                                    ; preserve before function call
-    push cx
-    push dx
-
-    push ax                                    ; prepare stack for
-    push es                                    ; call
-    push ds
-    push cs
-    push ss
-    call _store_segment_registers
-    add sp, #10                                        ; pop ax,es-ss
-
-    pop dx                                     ; restore after function call
-    pop cx
-    pop bx
-
-    ; calculate protected-mode esp from ss:sp
+    ; Calculate protected-mode esp from ss:sp
     and esp, #0xffff
     xor eax, eax
     mov ax, ss
-    rol eax, #4
-    add eax, esp
-    mov esp, eax
+    shl eax, #4
+    add esp, eax
 
+    ; Switch to protected mode
     seg cs
-    lgdt my_gdtdesc                            ; switch to own table
-
+    lgdt protmode_gdtdesc
     mov eax, cr0
-    or al, #0x1                                ; protected mode 'on'
+    or al, #0x1  ; protected mode on
     mov cr0, eax
-
-    jmpf DWORD (SEGMENT_OFFSET | switch_to_protmode_goon_1), #PM_NORMAL_CS
-
-    START_PM_CODE
-
-switch_to_protmode_goon_1:
-    mov ax, #PM_32BIT_DS                       ; 32 bit segment that allows
-    mov ds, ax                                 ; to reach all 32 bit
-    mov es, ax                                 ; addresses
+    jmpf DWORD (REAL_MODE_CODE_OFFSET|upcall1), #PM_32BIT_CS
+upcall1:
+    USE32
+    mov ax, #PM_32BIT_DS
+    mov ds, ax
+    mov es, ax
     mov ss, ax
 
-    pop eax                                    ;@A2
-    pop bp                                     ;@A1
-    ret
+    ; Marshal arguments and call 32-bit function
+    mov ecx, #MAX_ARG_BYTES/4
+upcall2:
+    push MAX_ARG_BYTES-4+args_off[esp]
+    loop upcall2
+    mov eax, [BIOS_INFO_PHYSICAL_ADDRESS + BIOSINFO_OFF_bios32_entry]
+    call eax
+    add esp, #MAX_ARG_BYTES
+    mov ecx, eax  ; Result in ecx
 
-    END_PM_CODE
-
-
-
-    .align 16
-gdt_base:
-    ; see Intel SW Dev. Manuals section 3.4.5, Volume 3 for meaning of bits
-    .word 0,0
-    .byte 0,0,0,0
-
-gdt_entry_pm_cs:
-    ; 32 bit code segment for protected mode
-    .word 0xffff, 0x0000
-    .byte 0x00, 0x9a, 0xcf, 0x00
-
-gdt_entry_pm_16bit_cs:
-    ; temp. 16 bit code segment used while in protected mode
-    .word 0xffff, 0x0000
-    .byte SEGMENT_OFFSET >> 16, 0x9a, 0x0, 0x0
-
-gdt_entry_pm_32bit_ds:
-    ; (32 bit) data segment (r/w) reaching all possible areas in 32bit memory
-    ; 4kb granularity
-    .word 0xffff, 0x0000
-    .byte 0x0, 0x92, 0xcf, 0x0
-gdt_entry_end:
-
-my_gdtdesc:
-    .word (gdt_entry_end - gdt_base) - 1
-    .long gdt_base | SEGMENT_OFFSET
-
-
-realmode_gdtdesc:                              ;to be used in real mode
-    .word 0xffff
-    .long 0x0
-
-
-
-switch_to_realmode:
-    ; Implementation of switching from protected mode to real mode
-    ; prepares cs, es, ds, ss to be used in real mode
-    ; spills   eax
-    START_PM_CODE
-
-    ; need to fix up the stack to return in 16 bit mode
-    ; currently the 32 bit return address is on the stack
-    pop eax
-    push ax
-
-    push bx                                    ;pop@1
-    push si                                    ;pop@2
-
-    call _ebda_ss_offset32                     ; get the offset of the ss
-    mov bx, ax                                 ; entry within the ebda.
-
-    jmpf switch_to_realmode_goon_1, #PM_16BIT_CS
-
-    END_PM_CODE
-
-switch_to_realmode_goon_1:
-    mov eax, cr0
-    and al, #0xfe                              ; protected mode 'off'
-    mov cr0, eax
-
-    jmpf switch_to_realmode_goon_2, #REAL_MODE_CODE_SEGMENT
-
-switch_to_realmode_goon_2:
-
-    ; get orig. 'ss' without using the stack (no 'call'!)
-    xor eax, eax                       ; clear upper 16 bits (and lower)
-    mov ax, #0x40                      ; where is the ebda located?
-    mov ds, ax
-    mov si, #0xe
-    seg ds
-    mov ax, [si]                       ; ax = segment of ebda
-
-    mov ds, ax                         ; segment of ebda
-    seg ds
-    mov ax, [bx]                       ; stack segment - bx has been set above
-    mov ss, ax
-
-    ; from esp and ss calculate real-mode sp
-    rol eax, #4
+    ; Restore real-mode stack pointer
+    xor eax, eax
+    mov ax, ss_off[esp]
+    mov bx, ax    ; Real-mode ss in bx
+    shl eax, 4
     sub esp, eax
 
-    push dx                            ;preserve before call(s)
-    push cx
-    push bx
-
-    call _get_register_ds              ; get orig. 'ds'
+    ; Return to real mode
+    jmpf upcall3, #PM_16BIT_CS
+upcall3:
+    USE16
+    mov ax, #PM_16BIT_DS
     mov ds, ax
-    call _get_register_es              ; get orig. 'es'
     mov es, ax
-    call _get_register_esp_hi          ; fix the upper 16 bits of esp
-    ror esp, #16
-    mov sp, ax
-    rol esp, #16
-
-    pop bx
-    pop cx
-    pop dx
-
+    mov ss, ax
+    mov eax, cr0
+    and al, #0xfe ; protected mode off
+    mov cr0, eax
+    jmpf upcall4, #REAL_MODE_CODE_OFFSET>>4
+upcall4:
     seg cs
     lgdt realmode_gdtdesc
 
-    sti                                                ; allow interrupts
+    ; Restore real-mode ss
+    mov ss, bx
 
-    pop si                                     ;@2
-    pop bx                                     ;@1
+    ; Convert result into dx:ax format
+    mov eax, ecx
+    ror eax, #16
+    mov dx, ax
+    ror eax, #16
 
+    ; Restore caller state and return
+    pop esp
+    pop bx ; skip ss
+    pop es
+    pop ds
+    popf
     ret
 
-    ASM_END
-
-/*
- * Helper function to get the offset of the reg_ss within the ebda struct
- * Only 'C' can tell the offset.
- */
-Bit16u
-ebda_ss_offset32()
-{
-    ASM_START
-    START_PM_CODE                              // need to have this
-    ASM_END                                    // compiled for protected mode
-    return &EbdaData->upcall.reg_ss;           // 'C' knows the offset!
-    ASM_START
-    END_PM_CODE
-    ASM_END
-}
-
-/*
- * Two often-used functions
- */
-Bit16u
-read_word_from_ebda(offset)
-    Bit16u offset;
-{
-       Bit16u ebda_seg = read_word(0x0040, 0x000E);
-       return read_word(ebda_seg, offset);
-}
-
-Bit32u
-read_dword_from_ebda(offset)
-    Bit16u offset;
-{
-       Bit16u ebda_seg = read_word(0x0040, 0x000E);
-       return read_dword(ebda_seg, offset);
-}
-
-/*
- * Store registers in the EBDA; used to keep the registers'
- * content in a well-defined place during protected mode execution
- */
-  void
-store_segment_registers(ss, cs, ds, es, esp_hi)
-  Bit16u ss, cs, ds, es, esp_hi;
-{
-       Bit16u ebda_seg = read_word(0x0040, 0x000E);
-       write_word(ebda_seg, &EbdaData->upcall.reg_ss, ss);
-       write_word(ebda_seg, &EbdaData->upcall.reg_cs, cs);
-       write_word(ebda_seg, &EbdaData->upcall.reg_ds, ds);
-       write_word(ebda_seg, &EbdaData->upcall.reg_es, es);
-       write_word(ebda_seg, &EbdaData->upcall.esp_hi, esp_hi);
-}
-
-
-  void
-store_returnaddress(retaddr)
-   Bit16u retaddr;
-{
-       Bit16u ebda_seg = read_word(0x0040, 0x000E);
-       write_word(ebda_seg, &EbdaData->upcall.retaddr, retaddr);
-}
-
-Bit16u
-get_returnaddress()
-{
-       return read_word_from_ebda(&EbdaData->upcall.retaddr);
-}
-
-/*
- * get the segment register 'cs' value from the EBDA
- */
-Bit16u
-get_register_cs()
-{
-       return read_word_from_ebda(&EbdaData->upcall.reg_cs);
-}
-
-/*
- * get the segment register 'ds' value from the EBDA
- */
-Bit16u
-get_register_ds()
-{
-       return read_word_from_ebda(&EbdaData->upcall.reg_ds);
-}
-
-/*
- * get the segment register 'es' value from the EBDA
- */
-Bit16u
-get_register_es()
-{
-       return read_word_from_ebda(&EbdaData->upcall.reg_es);
-}
-
-/*
- * get the upper 16 bits of the esp from the EBDA
- */
-Bit16u
-get_register_esp_hi()
-{
-       return read_word_from_ebda(&EbdaData->upcall.esp_hi);
-}
-
-
-
-/********************************************************/
-
-
-ASM_START
-
-Upcall:
-       ; do the upcall into 32 bit space
-       ; clear the stack frame so that 32 bit space sees all the parameters
-       ; on the stack as if they were prepared for it
-       ; ---> take the 16 bit return address off the stack and remember it
-       ;
-       ; Input:
-       ; bx: index of function to call
-       ; Ouput:
-       ; dx, ax: 32 bit result of call (even if 'void' is expected)
-
-       push bp                         ;pop @1
-       mov bp, sp
-       push si                         ;pop @2
-
-       mov ax, 2[bp]                   ; 16 bit return address
-       push ax
-       call _store_returnaddress       ; store away
-       pop ax
-
-       ; XXX GDT munging requires ROM to be writable!
-       call _enable_rom_write_access
-
-       rol bx, #2
-       mov si, #jmptable
-       seg cs
-       mov eax, dword ptr [si+bx]      ; address to call from table
-
-       pop si                          ;@2
-       pop bp                          ;@1
-
-       add sp, #2                      ; remove 16bit return address from stack
-
-       call switch_to_protmode
-       START_PM_CODE
-
-       call eax                        ; call 32bit function
-       push eax                        ; preserve result
-
-       call switch_to_realmode         ; back to realmode
-       END_PM_CODE
-
-       pop eax                         ; get result
-
-       push word 0x0000                ; placeholder for 16 bit return address
-       push bp
-       mov bp,sp
-       push eax                        ; preserve work register
-
-       call _disable_rom_write_access
-
-       call _get_returnaddress
-       mov 2[bp], ax                   ; 16bit return address onto stack
-
-       pop eax
-       pop bp
-
-       ror eax, #16                    ; result into dx/ax
-       mov dx, ax                      ; hi(res) -> dx
-       ror eax, #16
-
-       ret
-
-
-/* macro for functions to declare their call into 32bit space */
 MACRO DoUpcall
-       mov bx, #?1
-       jmp Upcall
+    mov bx, #?1
+    jmp Upcall
 MEND
 
-
-ASM_END
-
+#define X(idx, ret, fn, args...) _ ## fn: DoUpcall(idx)
 #include "32bitprotos.h"
-#include "32bitgateway.h"
-
-#include "tcgbios.c"
-
-Bit32u get_s3_waking_vector()
-{
-       ASM_START
-       DoUpcall(IDX_GET_S3_WAKING_VECTOR)
-       ASM_END
-}
+#undef X
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/rombios/32bitgateway.h
--- a/tools/firmware/rombios/32bitgateway.h     Wed Jan 28 12:22:58 2009 +0900
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,18 +0,0 @@
-#ifndef GATEWAY
-#define GATEWAY
-
-#include "32bitprotos.h"
-
-void test_gateway();
-
-/* extension for the EBDA */
-typedef struct {
-  Bit16u reg_ss;
-  Bit16u reg_cs;
-  Bit16u reg_ds;
-  Bit16u reg_es;
-  Bit16u esp_hi;
-  Bit16u retaddr;
-} upcall_t;
-
-#endif
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/rombios/32bitprotos.h
--- a/tools/firmware/rombios/32bitprotos.h      Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/rombios/32bitprotos.h      Wed Jan 28 13:06:45 2009 +0900
@@ -1,47 +1,16 @@
-#ifndef PROTOS_HIGHBIOS
-#define PROTOS_HIGHBIOS
-
-/* shared include file for bcc and gcc */
-
-/* bcc does not like 'enum' */
-#define IDX_TCGINTERRUPTHANDLER            0
-#define IDX_TCPA_ACPI_INIT                 1
-#define IDX_TCPA_EXTEND_ACPI_LOG           2
-#define IDX_TCPA_CALLING_INT19H            3
-#define IDX_TCPA_RETURNED_INT19H           4
-#define IDX_TCPA_ADD_EVENT_SEPARATORS      5
-#define IDX_TCPA_WAKE_EVENT                6
-#define IDX_TCPA_ADD_BOOTDEVICE            7
-#define IDX_TCPA_START_OPTION_ROM_SCAN     8
-#define IDX_TCPA_OPTION_ROM                9
-#define IDX_TCPA_IPL                       10
-#define IDX_TCPA_INITIALIZE_TPM            11
-#define IDX_TCPA_MEASURE_POST              12
-#define IDX_GET_S3_WAKING_VECTOR           13
-#define IDX_LAST                           14 /* keep last! */
-
-#ifdef GCC_PROTOS
-  #define PARMS(x...) x
-#else
-  /* bcc doesn't want any parameter types in prototypes */
-  #define PARMS(x...)
-#endif
-
-Bit32u TCGInterruptHandler( PARMS(pushad_regs_t *regs, Bit32u esds, Bit32u 
flags_ptr));
-
-void tcpa_acpi_init( PARMS(void) );
-Bit32u tcpa_extend_acpi_log( PARMS(Bit32u entry_ptr) );
-void tcpa_calling_int19h( PARMS(void) );
-void tcpa_returned_int19h( PARMS(void) );
-void tcpa_add_event_separators( PARMS(void) );
-void tcpa_wake_event( PARMS(void) );
-void tcpa_add_bootdevice( PARMS(Bit32u bootcd, Bit32u bootdrv) );
-void tcpa_start_option_rom_scan( PARMS(void) );
-void tcpa_option_rom( PARMS(Bit32u seg) );
-void tcpa_ipl( PARMS(Bit32u bootcd,Bit32u seg,Bit32u off,Bit32u count) );
-void tcpa_measure_post( PARMS(Bit32u from, Bit32u to) );
-Bit32u tcpa_initialize_tpm( PARMS(Bit32u physpres) );
-
-Bit32u get_s3_waking_vector( PARMS(void) );
-
-#endif
+X(0,  Bit32u, TCGInterruptHandler,
+  pushad_regs_t *regs, Bit32u esds, Bit32u flags_ptr)
+X(1,  void,   tcpa_acpi_init, void)
+X(2,  Bit32u, tcpa_extend_acpi_log, Bit32u entry_ptr)
+X(3,  void,   tcpa_calling_int19h,void)
+X(4,  void,   tcpa_returned_int19h, void)
+X(5,  void,   tcpa_add_event_separators, void)
+X(6,  void,   tcpa_wake_event, void)
+X(7,  void,   tcpa_add_bootdevice, Bit32u bootcd, Bit32u bootdrv)
+X(8,  void,   tcpa_start_option_rom_scan, void)
+X(9,  void,   tcpa_option_rom, Bit32u seg)
+X(10, void,   tcpa_ipl, Bit32u bootcd, Bit32u seg, Bit32u off, Bit32u count)
+X(11, void,   tcpa_measure_post, Bit32u from, Bit32u to)
+X(12, Bit32u, tcpa_initialize_tpm, Bit32u physpres)
+X(13, Bit32u, get_s3_waking_vector, void)
+X(14, Bit32u, pmm, void *argp)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/rombios/Makefile
--- a/tools/firmware/rombios/Makefile   Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/rombios/Makefile   Wed Jan 28 13:06:45 2009 +0900
@@ -13,6 +13,7 @@ clean: subdirs-clean
        rm -f  as86-sym.txt ld86-sym.txt 
        rm -f  rombios*.txt rombios*.sym usage biossums
        rm -f  BIOS-bochs-*
+       rm -f  $(DEPS)
 
 BIOS-bochs-latest: rombios.c biossums 32bitgateway.c tcgbios.c
        gcc -DBX_SMP_PROCESSORS=1 -E -P $< > _rombios_.c
@@ -27,3 +28,4 @@ biossums: biossums.c
 biossums: biossums.c
        gcc -o biossums biossums.c
 
+-include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/rombios/rombios.c
--- a/tools/firmware/rombios/rombios.c  Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/rombios/rombios.c  Wed Jan 28 13:06:45 2009 +0900
@@ -161,6 +161,8 @@
 
 #define BX_TCGBIOS       0   /* main switch for TCG BIOS ext. */
 
+#define BX_PMM           1   /* POST Memory Manager */
+
 #define BX_MAX_ATA_INTERFACES   4
 #define BX_MAX_ATA_DEVICES      (BX_MAX_ATA_INTERFACES*2)
 
@@ -726,7 +728,9 @@ typedef struct {
     } cdemu_t;
 #endif // BX_ELTORITO_BOOT
 
-#include "32bitgateway.h"
+#define X(idx, ret, fn, arg...) ret fn ();
+#include "32bitprotos.h"
+#undef X
 
   // for access to EBDA area
   //     The EBDA structure should conform to
@@ -752,8 +756,6 @@ typedef struct {
     // El Torito Emulation data
     cdemu_t cdemu;
 #endif // BX_ELTORITO_BOOT
-
-    upcall_t upcall;
     } ebda_data_t;
 
   #define EBDA_CMOS_SHUTDOWN_STATUS_OFFSET 1
@@ -1416,31 +1418,24 @@ fixup_base_mem_in_k()
   write_word(0x40, 0x13, base_mem >> 10);
 }
 
-void
-set_rom_write_access(action)
-  Bit16u action;
-{
-    Bit16u off = (Bit16u)&((struct bios_info *)0)->xen_pfiob;
 ASM_START
-    mov si,.set_rom_write_access.off[bp]
+_rom_write_access_control:
     push ds
-    mov ax,#(ACPI_PHYSICAL_ADDRESS >> 4)
+    mov ax,#(BIOS_INFO_PHYSICAL_ADDRESS >> 4)
     mov ds,ax
-    mov dx,[si]
+    mov ax,[BIOSINFO_OFF_xen_pfiob]
     pop ds
-    mov ax,.set_rom_write_access.action[bp]
-    out dx,al
+    ret
 ASM_END
-}
 
 void enable_rom_write_access()
 {
-    set_rom_write_access(0);
+    outb(rom_write_access_control(), 0);
 }
 
 void disable_rom_write_access()
 {
-    set_rom_write_access(PFFLAG_ROM_LOCK);
+    outb(rom_write_access_control(), PFFLAG_ROM_LOCK);
 }
     
 #endif /* HVMASSIST */
@@ -2054,7 +2049,10 @@ print_bios_banner()
   "rombios32 "
 #endif
 #if BX_TCGBIOS
-  "TCG-enabled"
+  "TCG-enabled "
+#endif
+#if BX_PMM
+  "PMM "
 #endif
   "\n\n");
 }
@@ -9499,8 +9497,9 @@ use16 386
 
 #endif
 
+#include "32bitgateway.c"
 ASM_END
-#include "32bitgateway.c"
+#include "tcgbios.c"
 ASM_START
 
 ;--------------------
@@ -10355,6 +10354,48 @@ rombios32_gdt:
   dw 0xffff, 0, 0x9300, 0x0000 ; 16 bit data segment base=0x0 limit=0xffff
 #endif // BX_ROMBIOS32
 
+#if BX_PMM
+; according to POST Memory Manager Specification Version 1.01
+.align 16
+pmm_structure:
+  db 0x24,0x50,0x4d,0x4d ;; "$PMM" signature
+  db 0x01 ;; revision
+  db 16 ;; length
+  db (-((pmm_entry_point>>8)+pmm_entry_point+0x20f))&0xff;; checksum
+  dw pmm_entry_point,0xf000 ;; far call entrypoint
+  db 0,0,0,0,0 ;; reserved
+
+pmm_entry_point:
+  pushf
+  pushad
+; Calculate protected-mode address of PMM function args
+  xor  eax, eax
+  mov  ax, sp
+  xor  ebx, ebx
+  mov  bx, ss
+  shl  ebx, 4
+  lea  ebx, [eax+ebx+38] ;; ebx=(ss<<4)+sp+4(far call)+2(pushf)+32(pushad)
+  push ebx
+;
+; Stack layout at this point:
+;
+;        : +0x0    +0x2    +0x4    +0x6    +0x8    +0xa    +0xc    +0xe
+; -----------------------------------------------------------------------
+; sp     : [&arg1         ][edi           ][esi           ][ebp           ]
+; sp+0x10: [esp           ][ebx           ][edx           ][ecx           ]
+; sp+0x20: [eax           ][flags ][ip    ][cs    ][arg1  ][arg2, ...
+;
+  call _pmm
+  mov  bx, sp
+SEG SS
+  mov  [bx+0x20], ax
+SEG SS
+  mov  [bx+0x18], dx
+  pop  ebx
+  popad
+  popf
+  retf
+#endif // BX_PMM
 
 ; parallel port detection: base address in DX, index in BX, timeout in CL
 detect_parport:
@@ -10447,7 +10488,9 @@ rom_scan:
   ;;   3         ROM initialization entry point (FAR CALL)
 
 #if BX_TCGBIOS
+  push ax
   call _tcpa_start_option_rom_scan    /* specs: 3.2.3.3 + 10.4.3 */
+  pop ax
 #endif
 
 rom_scan_loop:
@@ -11790,15 +11833,6 @@ static Bit8u vgafont8[128*8]=
 #ifdef HVMASSIST
 ASM_START
 
-// space for addresses in 32bit BIOS area; currently 256/4 entries
-// are allocated
-.org 0xcb00
-jmptable:
-db 0x5F, 0x5F, 0x5F, 0x4A, 0x4D, 0x50, 0x54 ;; ___JMPT
-dw 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ;;  64 bytes
-dw 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ;; 128 bytes
-dw 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ;; 192 bytes
-
 //
 // MP Tables
 // just carve out some blank space for HVMLOADER to write the MP tables to
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/rombios/tcgbios.c
--- a/tools/firmware/rombios/tcgbios.c  Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/rombios/tcgbios.c  Wed Jan 28 13:06:45 2009 +0900
@@ -25,162 +25,6 @@
   Support for TCPA ACPI logging
  ******************************************************************/
 
-/*
- * Extend the ACPI log with the given entry by copying the
- * entry data into the log.
- * Input
- *  Pointer to the structure to be copied into the log
- *
- * Output:
- *  lower 16 bits of return code contain entry number
- *  if entry number is '0', then upper 16 bits contain error code.
- */
-Bit32u tcpa_extend_acpi_log(entry_ptr)
-    Bit32u entry_ptr;
-{
-       ASM_START
-       DoUpcall(IDX_TCPA_EXTEND_ACPI_LOG)
-       ASM_END
-}
-
-
-/*
-   initialize the TCPA ACPI subsystem; find the ACPI tables and determine
-   where the TCPA table is.
- */
- void
-tcpa_acpi_init()
-{
-       ASM_START
-       DoUpcall(IDX_TCPA_ACPI_INIT)
-       ASM_END
-}
-
-
-/*
- * Add measurement to log about call of int 19h
- */
- void
-tcpa_calling_int19h()
-{
-       ASM_START
-       DoUpcall(IDX_TCPA_CALLING_INT19H)
-       ASM_END
-}
-
-/*
- * Add measurement to log about retuning from int 19h
- */
- void
-tcpa_returned_int19h()
-{
-       ASM_START
-       DoUpcall(IDX_TCPA_RETURNED_INT19H)
-       ASM_END
-}
-
-/*
- * Add event separators for PCRs 0 to 7; specs 8.2.3
- */
- void
-tcpa_add_event_separators()
-{
-       ASM_START
-       DoUpcall(IDX_TCPA_ADD_EVENT_SEPARATORS)
-       ASM_END
-}
-
-
-/*
- * Add a wake event to the log
- */
- void
-tcpa_wake_event()
-{
-       ASM_START
-       DoUpcall(IDX_TCPA_WAKE_EVENT)
-       ASM_END
-}
-
-
-/*
- * Add measurement to the log about option rom scan
- * 10.4.3 : action 14
- */
- void
-tcpa_start_option_rom_scan()
-{
-       ASM_START
-       DoUpcall(IDX_TCPA_START_OPTION_ROM_SCAN)
-       ASM_END
-}
-
-
-/*
- * Add measurement to the log about an option rom
- */
- void
-tcpa_option_rom(seg)
-    Bit32u seg;
-{
-       ASM_START
-       DoUpcall(IDX_TCPA_OPTION_ROM)
-       ASM_END
-}
-
-/*
- * Add a measurement regarding the boot device (CDRom, Floppy, HDD) to
- * the list of measurements.
- */
-void
- tcpa_add_bootdevice(bootcd, bootdrv)
-  Bit32u bootcd;
-  Bit32u bootdrv;
-{
-       ASM_START
-       DoUpcall(IDX_TCPA_ADD_BOOTDEVICE)
-       ASM_END
-}
-
-/*
- * Add a measurement to the log in support of 8.2.5.3
- * Creates two log entries
- *
- * Input parameter:
- *  seg    : segment where the IPL data are located
- */
- void
- tcpa_ipl(bootcd,seg,off,count)
-    Bit32u bootcd;
-    Bit32u seg;
-    Bit32u off;
-    Bit32u count;
-{
-       ASM_START
-       DoUpcall(IDX_TCPA_IPL)
-       ASM_END
-}
-
-
-Bit32u
-tcpa_initialize_tpm(physpres)
-  Bit32u physpres;
-{
-       ASM_START
-       DoUpcall(IDX_TCPA_INITIALIZE_TPM)
-       ASM_END
-}
-
-void
-tcpa_measure_post(from, to)
-   Bit32u from;
-   Bit32u to;
-{
-       ASM_START
-       DoUpcall(IDX_TCPA_MEASURE_POST)
-       ASM_END
-}
-
 ASM_START
 MACRO POST_MEASURE
        push word #0x000f
@@ -205,18 +49,6 @@ tcpa_do_measure_POSTs()
        POST_MEASURE(timer_tick_post, int76_handler)
 
        ret
-       ASM_END
-}
-
-Bit32u
-TCGInterruptHandler(regs_ptr, es, ds, flags_ptr)
-   Bit32u regs_ptr;
-   Bit16u es;
-   Bit16u ds;
-   Bit32u flags_ptr;
-{
-       ASM_START
-       DoUpcall(IDX_TCGINTERRUPTHANDLER)
        ASM_END
 }
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/vgabios/vbe.c
--- a/tools/firmware/vgabios/vbe.c      Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/vgabios/vbe.c      Wed Jan 28 13:06:45 2009 +0900
@@ -37,8 +37,6 @@
 
 #include "vbe.h"
 #include "vbetables.h"
-
-#define VBE_TOTAL_VIDEO_MEMORY_DIV_64K 
(VBE_DISPI_TOTAL_VIDEO_MEMORY_MB*1024/64)
 
 // The current OEM Software Revision of this VBE Bios
 #define VBE_OEM_SOFTWARE_REV 0x0002;
@@ -821,7 +819,8 @@ Bit16u *AX;Bit16u ES;Bit16u DI;
         vbe_info_block.VideoModePtr_Off= DI + 34;
 
         // VBE Total Memory (in 64b blocks)
-        vbe_info_block.TotalMemory = VBE_TOTAL_VIDEO_MEMORY_DIV_64K;
+        outw(VBE_DISPI_IOPORT_INDEX, VBE_DISPI_INDEX_VIDEO_MEMORY_64K);
+        vbe_info_block.TotalMemory = inw(VBE_DISPI_IOPORT_DATA);
 
         if (vbe2_info)
        {
@@ -846,7 +845,8 @@ Bit16u *AX;Bit16u ES;Bit16u DI;
         do
         {
                 if ((cur_info->info.XResolution <= dispi_get_max_xres()) &&
-                    (cur_info->info.BitsPerPixel <= dispi_get_max_bpp())) {
+                    (cur_info->info.BitsPerPixel <= dispi_get_max_bpp()) &&
+                    (cur_info->info.XResolution * cur_info->info.XResolution * 
cur_info->info.BitsPerPixel <= vbe_info_block.TotalMemory << 19 )) {
 #ifdef DEBUG
                   printf("VBE found mode %x => %x\n", cur_info->mode,cur_mode);
 #endif
@@ -855,7 +855,7 @@ Bit16u *AX;Bit16u ES;Bit16u DI;
                   cur_ptr+=2;
                 } else {
 #ifdef DEBUG
-                  printf("VBE mode %x (xres=%x / bpp=%02x) not supported by 
display\n", 
cur_info->mode,cur_info->info.XResolution,cur_info->info.BitsPerPixel);
+                  printf("VBE mode %x (xres=%x / bpp=%02x) not supported \n", 
cur_info->mode,cur_info->info.XResolution,cur_info->info.BitsPerPixel);
 #endif
                 }
                 cur_info++;
@@ -913,7 +913,13 @@ Bit16u *AX;Bit16u CX; Bit16u ES;Bit16u D
                   info.WinFuncPtr = 0xC0000000UL;
                   *(Bit16u *)&(info.WinFuncPtr) = 
(Bit16u)(dispi_set_bank_farcall);
                 }
-                
+                outw(VBE_DISPI_IOPORT_INDEX,VBE_DISPI_INDEX_LFB_ADDRESS_H);
+                info.PhysBasePtr = inw(VBE_DISPI_IOPORT_DATA);
+                info.PhysBasePtr = info.PhysBasePtr << 16;
+#if 0                                  
+                outw(VBE_DISPI_IOPORT_INDEX,VBE_DISPI_INDEX_LFB_ADDRESS_L);
+                info.PhysBasePtr |= inw(VBE_DISPI_IOPORT_DATA);
+#endif                                                         
                 result = 0x4f;
         }
         else
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/vgabios/vbe.h
--- a/tools/firmware/vgabios/vbe.h      Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/vgabios/vbe.h      Wed Jan 28 13:06:45 2009 +0900
@@ -275,39 +275,41 @@ typedef struct ModeInfoListItem
 //        like 0xE0000000
 
 
-  #define VBE_DISPI_BANK_ADDRESS          0xA0000
-  #define VBE_DISPI_BANK_SIZE_KB          64
+  #define VBE_DISPI_BANK_ADDRESS           0xA0000
+  #define VBE_DISPI_BANK_SIZE_KB           64
   
-  #define VBE_DISPI_MAX_XRES              1024
-  #define VBE_DISPI_MAX_YRES              768
+  #define VBE_DISPI_MAX_XRES               2560
+  #define VBE_DISPI_MAX_YRES               1600
   
-  #define VBE_DISPI_IOPORT_INDEX          0x01CE
-  #define VBE_DISPI_IOPORT_DATA           0x01CF
+  #define VBE_DISPI_IOPORT_INDEX           0x01CE
+  #define VBE_DISPI_IOPORT_DATA            0x01CF
   
-  #define VBE_DISPI_INDEX_ID              0x0
-  #define VBE_DISPI_INDEX_XRES            0x1
-  #define VBE_DISPI_INDEX_YRES            0x2
-  #define VBE_DISPI_INDEX_BPP             0x3
-  #define VBE_DISPI_INDEX_ENABLE          0x4
-  #define VBE_DISPI_INDEX_BANK            0x5
-  #define VBE_DISPI_INDEX_VIRT_WIDTH      0x6
-  #define VBE_DISPI_INDEX_VIRT_HEIGHT     0x7
-  #define VBE_DISPI_INDEX_X_OFFSET        0x8
-  #define VBE_DISPI_INDEX_Y_OFFSET        0x9
-      
-  #define VBE_DISPI_ID0                   0xB0C0
-  #define VBE_DISPI_ID1                   0xB0C1
-  #define VBE_DISPI_ID2                   0xB0C2
-  #define VBE_DISPI_ID3                   0xB0C3
-  #define VBE_DISPI_ID4                   0xB0C4
-  
-  #define VBE_DISPI_DISABLED              0x00
-  #define VBE_DISPI_ENABLED               0x01
-  #define VBE_DISPI_GETCAPS               0x02
-  #define VBE_DISPI_8BIT_DAC              0x20
-  #define VBE_DISPI_LFB_ENABLED           0x40
-  #define VBE_DISPI_NOCLEARMEM            0x80
-  
-  #define VBE_DISPI_LFB_PHYSICAL_ADDRESS  0xE0000000
+  #define VBE_DISPI_INDEX_ID               0x0
+  #define VBE_DISPI_INDEX_XRES             0x1
+  #define VBE_DISPI_INDEX_YRES             0x2
+  #define VBE_DISPI_INDEX_BPP              0x3
+  #define VBE_DISPI_INDEX_ENABLE           0x4
+  #define VBE_DISPI_INDEX_BANK             0x5
+  #define VBE_DISPI_INDEX_VIRT_WIDTH       0x6
+  #define VBE_DISPI_INDEX_VIRT_HEIGHT      0x7
+  #define VBE_DISPI_INDEX_X_OFFSET         0x8
+  #define VBE_DISPI_INDEX_Y_OFFSET         0x9
+  #define VBE_DISPI_INDEX_VIDEO_MEMORY_64K 0xa
+  #define VBE_DISPI_INDEX_LFB_ADDRESS_H    0xb
+  #define VBE_DISPI_INDEX_LFB_ADDRESS_L    0xc
+
+  #define VBE_DISPI_LFB_PHYSICAL_ADDRESS   0xE0000000
+  #define VBE_DISPI_ID0                    0xB0C0
+  #define VBE_DISPI_ID1                    0xB0C1
+  #define VBE_DISPI_ID2                    0xB0C2
+  #define VBE_DISPI_ID3                    0xB0C3
+  #define VBE_DISPI_ID4                    0xB0C4
+
+  #define VBE_DISPI_DISABLED               0x00
+  #define VBE_DISPI_ENABLED                0x01
+  #define VBE_DISPI_GETCAPS                0x02
+  #define VBE_DISPI_8BIT_DAC               0x20
+  #define VBE_DISPI_LFB_ENABLED            0x40
+  #define VBE_DISPI_NOCLEARMEM             0x80
 
 #endif
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/vgabios/vbetables-gen.c
--- a/tools/firmware/vgabios/vbetables-gen.c    Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/vgabios/vbetables-gen.c    Wed Jan 28 13:06:45 2009 +0900
@@ -2,7 +2,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 
-#define VBE_DISPI_TOTAL_VIDEO_MEMORY_MB 8
+#define VBE_DISPI_TOTAL_VIDEO_MEMORY_MB 16
 
 typedef struct {
     int width;
@@ -42,19 +42,40 @@ ModeInfo modes[] = {
 { 1600, 1200, 24                      , 0x11F},
 
       /* BOCHS/PLE, 86 'own' mode numbers */
-{ 320, 200, 32                        , 0x140},
-{ 640, 400, 32                        , 0x141},
-{ 640, 480, 32                        , 0x142},
-{ 800, 600, 32                        , 0x143},
-{ 1024, 768, 32                       , 0x144},
-{ 1280, 1024, 32                      , 0x145},
-{ 320, 200, 8                           , 0x146},
-{ 1600, 1200, 32                      , 0x147},
-{ 1152, 864, 8                      , 0x148},
+{ 320, 200, 32                       , 0x140},
+{ 640, 400, 32                       , 0x141},
+{ 640, 480, 32                       , 0x142},
+{ 800, 600, 32                       , 0x143},
+{ 1024, 768, 32                      , 0x144},
+{ 1280, 1024, 32                     , 0x145},
+{ 320, 200, 8                        , 0x146},
+{ 1600, 1200, 32                     , 0x147},
+{ 1152, 864, 8                       , 0x148},
 { 1152, 864, 15                      , 0x149},
 { 1152, 864, 16                      , 0x14a},
 { 1152, 864, 24                      , 0x14b},
 { 1152, 864, 32                      , 0x14c},
+{ 1280, 800, 16                      , 0x178},
+{ 1280, 800, 24                      , 0x179},
+{ 1280, 800, 32                      , 0x17a},
+{ 1280, 960, 16                      , 0x17b},
+{ 1280, 960, 24                      , 0x17c},
+{ 1280, 960, 32                      , 0x17d},
+{ 1440, 900, 16                      , 0x17e},
+{ 1440, 900, 24                      , 0x17f},
+{ 1440, 900, 32                      , 0x180},
+{ 1400, 1050, 16                     , 0x181},
+{ 1400, 1050, 24                     , 0x182},
+{ 1400, 1050, 32                     , 0x183},
+{ 1680, 1050, 16                     , 0x184},
+{ 1680, 1050, 24                     , 0x185},
+{ 1680, 1050, 32                     , 0x186},
+{ 1920, 1200, 16                     , 0x187},
+{ 1920, 1200, 24                     , 0x188},
+{ 1920, 1200, 32                     , 0x189},
+{ 2560, 1600, 16                     , 0x18a},
+{ 2560, 1600, 24                     , 0x18b},
+{ 2560, 1600, 32                     , 0x18c},
 { 0, },
 };
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/vgabios/vgabios.c
--- a/tools/firmware/vgabios/vgabios.c  Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/vgabios/vgabios.c  Wed Jan 28 13:06:45 2009 +0900
@@ -3811,9 +3811,9 @@ void printf(s)
         for (i=0; i<format_width; i++) {
           nibble = (arg >> (4 * digit)) & 0x000f;
           if (nibble <= 9)
-            outb(0x0500, nibble + '0');
+            outb(0xe9, nibble + '0');
           else
-            outb(0x0500, (nibble - 10) + 'A');
+            outb(0xe9, (nibble - 10) + 'A');
           digit--;
           }
         in_format = 0;
@@ -3823,7 +3823,7 @@ void printf(s)
       //  }
       }
     else {
-      outb(0x0500, c);
+      outb(0xe9, c);
       }
     s ++;
     }
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/flask/libflask/Makefile
--- a/tools/flask/libflask/Makefile     Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/flask/libflask/Makefile     Wed Jan 28 13:06:45 2009 +0900
@@ -16,7 +16,6 @@ CFLAGS   += $(INCLUDES) -I./include -I$(
 # Get gcc to generate the dependencies for us.
 CFLAGS   += -Wp,-MD,.$(@F).d
 LDFLAGS  += -L.
-DEPS     = .*.d
 
 LIB_OBJS := $(patsubst %.c,%.o,$(SRCS))
 PIC_OBJS := $(patsubst %.c,%.opic,$(SRCS))
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/flask/loadpolicy/Makefile
--- a/tools/flask/loadpolicy/Makefile   Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/flask/loadpolicy/Makefile   Wed Jan 28 13:06:45 2009 +0900
@@ -7,9 +7,6 @@ LIBFLASK_ROOT = $(XEN_ROOT)/tools/flask/
 
 PROFILE=#-pg
 BASECFLAGS=-Wall -g -Werror
-# Make gcc generate dependencies.
-BASECFLAGS += -Wp,-MD,.$(@F).d
-PROG_DEP = .*.d
 BASECFLAGS+= $(PROFILE)
 #BASECFLAGS+= -I$(XEN_ROOT)/tools
 BASECFLAGS+= $(CFLAGS_libxenctrl)
@@ -39,7 +36,7 @@ clean:
 clean: 
        rm -f *.o *.opic *.so
        rm -f $(CLIENTS)
-       $(RM) $(PROG_DEP)
+       $(RM) $(DEPS)
 
 .PHONY: print-dir
 print-dir:
@@ -54,7 +51,7 @@ install: all
        $(INSTALL_DIR) $(DESTDIR)$(SBINDIR)
        $(INSTALL_PROG) $(CLIENTS) $(DESTDIR)$(SBINDIR)
 
--include $(PROG_DEP)
+-include $(DEPS)
 
 # never delete any intermediate files.
 .SECONDARY:
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/fs-back/Makefile
--- a/tools/fs-back/Makefile    Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/fs-back/Makefile    Wed Jan 28 13:06:45 2009 +0900
@@ -12,10 +12,6 @@ CFLAGS   += $(CFLAGS_libxenstore)
 CFLAGS   += $(CFLAGS_libxenstore)
 CFLAGS   += $(INCLUDES) -I.
 CFLAGS   += -D_GNU_SOURCE
-
-# Get gcc to generate the dependencies for us.
-CFLAGS   += -Wp,-MD,.$(@F).d
-DEPS      = .*.d
 
 LIBS      := -L. -L.. -L../lib
 LIBS      += $(LDFLAGS_libxenctrl)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/include/Makefile
--- a/tools/include/Makefile    Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/include/Makefile    Wed Jan 28 13:06:45 2009 +0900
@@ -10,11 +10,12 @@ xen-foreign:
 
 xen/.dir:
        @rm -rf xen
-       mkdir xen
+       mkdir -p xen/libelf
        ln -sf ../$(XEN_ROOT)/xen/include/public/COPYING xen
        ln -sf $(addprefix ../,$(wildcard $(XEN_ROOT)/xen/include/public/*.h)) 
xen
        ln -sf $(addprefix ../$(XEN_ROOT)/xen/include/public/,arch-ia64 
arch-x86 hvm io xsm) xen
        ln -sf ../xen-sys/$(XEN_OS) xen/sys
+       ln -sf $(addprefix ../../$(XEN_ROOT)/xen/include/xen/,libelf.h 
elfstructs.h) xen/libelf/
        ln -s ../xen-foreign xen/foreign
        touch $@
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/include/xen-foreign/reference.size
--- a/tools/include/xen-foreign/reference.size  Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/include/xen-foreign/reference.size  Wed Jan 28 13:06:45 2009 +0900
@@ -1,7 +1,7 @@
 
 structs                   |  x86_32  x86_64    ia64
 
-start_info                |    1104    1152    1152
+start_info                |    1112    1168    1168
 trap_info                 |       8      16       -
 pt_fpreg                  |       -       -      16
 cpu_user_regs             |      68     200       -
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libaio/src/Makefile
--- a/tools/libaio/src/Makefile Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libaio/src/Makefile Wed Jan 28 13:06:45 2009 +0900
@@ -6,7 +6,7 @@ libdir=$(prefix)/lib
 libdir=$(prefix)/lib
 
 ARCH := $(shell uname -m | sed -e s/i.86/i386/)
-CFLAGS := -nostdlib -nostartfiles -Wall -I. -g -fomit-frame-pointer -O2 -fPIC
+CFLAGS = -nostdlib -nostartfiles -Wall -I. -g -fomit-frame-pointer -O2 -fPIC
 SO_CFLAGS=-shared $(CFLAGS)
 L_CFLAGS=$(CFLAGS)
 LINK_FLAGS=
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libfsimage/Rules.mk
--- a/tools/libfsimage/Rules.mk Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libfsimage/Rules.mk Wed Jan 28 13:06:45 2009 +0900
@@ -1,8 +1,6 @@ include $(XEN_ROOT)/tools/Rules.mk
 include $(XEN_ROOT)/tools/Rules.mk
 
-DEPS = .*.d
-
-CFLAGS += -I$(XEN_ROOT)/tools/libfsimage/common/ -Werror -Wp,-MD,.$(@F).d
+CFLAGS += -I$(XEN_ROOT)/tools/libfsimage/common/ -Werror
 LDFLAGS += -L../common/
 
 PIC_OBJS := $(patsubst %.c,%.opic,$(LIB_SRCS-y))
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libfsimage/common/Makefile
--- a/tools/libfsimage/common/Makefile  Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libfsimage/common/Makefile  Wed Jan 28 13:06:45 2009 +0900
@@ -3,9 +3,6 @@ include $(XEN_ROOT)/tools/Rules.mk
 
 MAJOR = 1.0
 MINOR = 0
-
-CFLAGS += -Werror -Wp,-MD,.$(@F).d
-DEPS = .*.d
 
 LDFLAGS-$(CONFIG_SunOS) = -Wl,-M -Wl,mapfile-SunOS
 LDFLAGS-$(CONFIG_Linux) = -Wl,mapfile-GNU
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/Makefile
--- a/tools/libxc/Makefile      Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libxc/Makefile      Wed Jan 28 13:06:45 2009 +0900
@@ -1,7 +1,7 @@ XEN_ROOT = ../..
 XEN_ROOT = ../..
 include $(XEN_ROOT)/tools/Rules.mk
 
-MAJOR    = 3.2
+MAJOR    = 3.4
 MINOR    = 0
 
 CTRL_SRCS-y       :=
@@ -62,10 +62,7 @@ CFLAGS-$(CONFIG_Linux) += -D_GNU_SOURCE
 # libraries.
 #CFLAGS   += -DVALGRIND -O0 -ggdb3
 
-# Get gcc to generate the dependencies for us.
-CFLAGS   += -Wp,-MD,.$(@F).d
 LDFLAGS  += -L.
-DEPS     = .*.d
 
 CTRL_LIB_OBJS := $(patsubst %.c,%.o,$(CTRL_SRCS-y))
 CTRL_PIC_OBJS := $(patsubst %.c,%.opic,$(CTRL_SRCS-y))
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_core.c
--- a/tools/libxc/xc_core.c     Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libxc/xc_core.c     Wed Jan 28 13:06:45 2009 +0900
@@ -57,9 +57,6 @@
 
 /* number of pages to write at a time */
 #define DUMP_INCREMENT (4 * 1024)
-
-/* Don't yet support cross-address-size core dump */
-#define guest_width (sizeof (unsigned long))
 
 /* string table */
 struct xc_core_strtab {
@@ -240,7 +237,7 @@ xc_core_ehdr_init(Elf64_Ehdr *ehdr)
     ehdr->e_ident[EI_ABIVERSION] = EV_CURRENT;
 
     ehdr->e_type = ET_CORE;
-    ehdr->e_machine = ELF_ARCH_MACHINE;
+    /* e_machine will be filled in later */
     ehdr->e_version = EV_CURRENT;
     ehdr->e_entry = 0;
     ehdr->e_phoff = 0;
@@ -359,7 +356,8 @@ elfnote_dump_core_header(
 }
 
 static int
-elfnote_dump_xen_version(void *args, dumpcore_rtn_t dump_rtn, int xc_handle)
+elfnote_dump_xen_version(void *args, dumpcore_rtn_t dump_rtn, int xc_handle,
+                         unsigned int guest_width)
 {
     int sts;
     struct elfnote elfnote;
@@ -371,6 +369,12 @@ elfnote_dump_xen_version(void *args, dum
     elfnote.descsz = sizeof(xen_version);
     elfnote.type = XEN_ELFNOTE_DUMPCORE_XEN_VERSION;
     elfnote_fill_xen_version(xc_handle, &xen_version);
+    if (guest_width < sizeof(unsigned long))
+    {
+        // 32 bit elf file format differs in pagesize's alignment
+        char *p = (char *)&xen_version.pagesize;
+        memmove(p - 4, p, sizeof(xen_version.pagesize));
+    }
     sts = dump_rtn(args, (char*)&elfnote, sizeof(elfnote));
     if ( sts != 0 )
         return sts;
@@ -396,6 +400,24 @@ elfnote_dump_format_version(void *args, 
     return dump_rtn(args, (char*)&format_version, sizeof(format_version));
 }
 
+static int
+get_guest_width(int xc_handle,
+                uint32_t domid,
+                unsigned int *guest_width)
+{
+    DECLARE_DOMCTL;
+
+    memset(&domctl, 0, sizeof(domctl));
+    domctl.domain = domid;
+    domctl.cmd = XEN_DOMCTL_get_address_size;
+
+    if ( do_domctl(xc_handle, &domctl) != 0 )
+        return 1;
+        
+    *guest_width = domctl.u.address_size.size / 8;
+    return 0;
+}
+
 int
 xc_domain_dumpcore_via_callback(int xc_handle,
                                 uint32_t domid,
@@ -403,7 +425,8 @@ xc_domain_dumpcore_via_callback(int xc_h
                                 dumpcore_rtn_t dump_rtn)
 {
     xc_dominfo_t info;
-    shared_info_t *live_shinfo = NULL;
+    shared_info_any_t *live_shinfo = NULL;
+    unsigned int guest_width; 
 
     int nr_vcpus = 0;
     char *dump_mem, *dump_mem_start = NULL;
@@ -437,6 +460,12 @@ xc_domain_dumpcore_via_callback(int xc_h
     uint16_t strtab_idx;
     struct xc_core_section_headers *sheaders = NULL;
     Elf64_Shdr *shdr;
+ 
+    if ( get_guest_width(xc_handle, domid, &guest_width) != 0 )
+    {
+        PERROR("Could not get address size for domain");
+        return sts;
+    }
 
     xc_core_arch_context_init(&arch_ctxt);
     if ( (dump_mem_start = malloc(DUMP_INCREMENT*PAGE_SIZE)) == NULL )
@@ -500,7 +529,7 @@ xc_domain_dumpcore_via_callback(int xc_h
             goto out;
         }
 
-        sts = xc_core_arch_map_p2m(xc_handle, &info, live_shinfo,
+        sts = xc_core_arch_map_p2m(xc_handle, guest_width, &info, live_shinfo,
                                    &p2m, &p2m_size);
         if ( sts != 0 )
             goto out;
@@ -676,6 +705,7 @@ xc_domain_dumpcore_via_callback(int xc_h
     /* write out elf header */
     ehdr.e_shnum = sheaders->num;
     ehdr.e_shstrndx = strtab_idx;
+    ehdr.e_machine = ELF_ARCH_MACHINE;
     sts = dump_rtn(args, (char*)&ehdr, sizeof(ehdr));
     if ( sts != 0 )
         goto out;
@@ -697,7 +727,7 @@ xc_domain_dumpcore_via_callback(int xc_h
         goto out;
 
     /* elf note section: xen version */
-    sts = elfnote_dump_xen_version(args, dump_rtn, xc_handle);
+    sts = elfnote_dump_xen_version(args, dump_rtn, xc_handle, guest_width);
     if ( sts != 0 )
         goto out;
 
@@ -757,9 +787,21 @@ xc_domain_dumpcore_via_callback(int xc_h
 
             if ( !auto_translated_physmap )
             {
-                gmfn = p2m[i];
-                if ( gmfn == INVALID_P2M_ENTRY )
-                    continue;
+                if ( guest_width >= sizeof(unsigned long) )
+                {
+                    if ( guest_width == sizeof(unsigned long) )
+                        gmfn = p2m[i];
+                    else
+                        gmfn = ((uint64_t *)p2m)[i];
+                    if ( gmfn == INVALID_P2M_ENTRY )
+                        continue;
+                }
+                else
+                {
+                    gmfn = ((uint32_t *)p2m)[i];
+                    if ( gmfn == (uint32_t)INVALID_P2M_ENTRY )
+                       continue;
+                }
 
                 p2m_array[j].pfn = i;
                 p2m_array[j].gmfn = gmfn;
@@ -802,7 +844,7 @@ copy_done:
         /* When live dump-mode (-L option) is specified,
          * guest domain may reduce memory. pad with zero pages.
          */
-        IPRINTF("j (%ld) != nr_pages (%ld)", j , nr_pages);
+        IPRINTF("j (%ld) != nr_pages (%ld)", j, nr_pages);
         memset(dump_mem_start, 0, PAGE_SIZE);
         for (; j < nr_pages; j++) {
             sts = dump_rtn(args, dump_mem_start, PAGE_SIZE);
@@ -891,7 +933,7 @@ xc_domain_dumpcore(int xc_handle,
     struct dump_args da;
     int sts;
 
-    if ( (da.fd = open(corename, O_CREAT|O_RDWR, S_IWUSR|S_IRUSR)) < 0 )
+    if ( (da.fd = open(corename, O_CREAT|O_RDWR|O_TRUNC, S_IWUSR|S_IRUSR)) < 0 
)
     {
         PERROR("Could not open corefile %s", corename);
         return -errno;
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_core.h
--- a/tools/libxc/xc_core.h     Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libxc/xc_core.h     Wed Jan 28 13:06:45 2009 +0900
@@ -23,7 +23,7 @@
 
 #include "xen/version.h"
 #include "xg_private.h"
-#include "xen/elfstructs.h"
+#include "xen/libelf/elfstructs.h"
 
 /* section names */
 #define XEN_DUMPCORE_SEC_NOTE                   ".note.Xen"
@@ -136,12 +136,12 @@ struct xc_core_arch_context;
 struct xc_core_arch_context;
 int xc_core_arch_memory_map_get(int xc_handle,
                                 struct xc_core_arch_context *arch_ctxt,
-                                xc_dominfo_t *info, shared_info_t *live_shinfo,
+                                xc_dominfo_t *info, shared_info_any_t 
*live_shinfo,
                                 xc_core_memory_map_t **mapp,
                                 unsigned int *nr_entries);
-int xc_core_arch_map_p2m(int xc_handle, xc_dominfo_t *info,
-                         shared_info_t *live_shinfo, xen_pfn_t **live_p2m,
-                         unsigned long *pfnp);
+int xc_core_arch_map_p2m(int xc_handle, unsigned int guest_width,
+                         xc_dominfo_t *info, shared_info_any_t *live_shinfo,
+                         xen_pfn_t **live_p2m, unsigned long *pfnp);
 
 
 #if defined (__i386__) || defined (__x86_64__)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_core_ia64.c
--- a/tools/libxc/xc_core_ia64.c        Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libxc/xc_core_ia64.c        Wed Jan 28 13:06:45 2009 +0900
@@ -68,7 +68,7 @@ xc_core_arch_auto_translated_physmap(con
 /* see setup_guest() @ xc_linux_build.c */
 static int
 memory_map_get_old_domu(int xc_handle, xc_dominfo_t *info,
-                        shared_info_t *live_shinfo,
+                        shared_info_any_t *live_shinfo,
                         xc_core_memory_map_t **mapp, unsigned int *nr_entries)
 {
     xc_core_memory_map_t *map = NULL;
@@ -96,7 +96,7 @@ out:
 /* see setup_guest() @ xc_ia64_hvm_build.c */
 static int
 memory_map_get_old_hvm(int xc_handle, xc_dominfo_t *info, 
-                       shared_info_t *live_shinfo,
+                       shared_info_any_t *live_shinfo,
                        xc_core_memory_map_t **mapp, unsigned int *nr_entries)
 {
     const xc_core_memory_map_t gfw_map[] = {
@@ -155,7 +155,7 @@ out:
 
 static int
 memory_map_get_old(int xc_handle, xc_dominfo_t *info, 
-                   shared_info_t *live_shinfo,
+                   shared_info_any_t *live_shinfo,
                    xc_core_memory_map_t **mapp, unsigned int *nr_entries)
 {
     if ( info->hvm )
@@ -170,7 +170,8 @@ int
 int
 xc_core_arch_memory_map_get(int xc_handle,
                             struct xc_core_arch_context *arch_ctxt,
-                            xc_dominfo_t *info, shared_info_t *live_shinfo,
+                            xc_dominfo_t *info,
+                            shared_info_any_t *live_shinfo,
                             xc_core_memory_map_t **mapp,
                             unsigned int *nr_entries)
 {
@@ -190,8 +191,8 @@ xc_core_arch_memory_map_get(int xc_handl
     }
 
     /* copy before use in case someone updating them */
-    if (xc_ia64_copy_memmap(xc_handle, info->domid, live_shinfo, &memmap_info,
-                            NULL)) {
+    if (xc_ia64_copy_memmap(xc_handle, info->domid, &live_shinfo->s,
+                            &memmap_info, NULL)) {
         goto old;
     }
 
@@ -235,8 +236,8 @@ old:
 }
 
 int
-xc_core_arch_map_p2m(int xc_handle, xc_dominfo_t *info,
-                     shared_info_t *live_shinfo, xen_pfn_t **live_p2m,
+xc_core_arch_map_p2m(int xc_handle, unsigned int guest_width, xc_dominfo_t 
*info,
+                     shared_info_any_t *live_shinfo, xen_pfn_t **live_p2m,
                      unsigned long *pfnp)
 {
     /*
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_core_x86.c
--- a/tools/libxc/xc_core_x86.c Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libxc/xc_core_x86.c Wed Jan 28 13:06:45 2009 +0900
@@ -20,9 +20,25 @@
 
 #include "xg_private.h"
 #include "xc_core.h"
-
-/* Don't yet support cross-address-size core dump */
-#define guest_width (sizeof (unsigned long))
+#include "xc_e820.h"
+
+#define GET_FIELD(_p, _f) ((guest_width==8) ? ((_p)->x64._f) : ((_p)->x32._f))
+
+#ifndef MAX
+#define MAX(_a, _b) ((_a) >= (_b) ? (_a) : (_b))
+#endif
+
+int
+xc_core_arch_gpfn_may_present(struct xc_core_arch_context *arch_ctxt,
+                              unsigned long pfn)
+{
+    if ((pfn >= 0xa0 && pfn < 0xc0) /* VGA hole */
+        || (pfn >= (HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT)
+            && pfn < (1ULL<<32) >> PAGE_SHIFT)) /* MMIO */
+        return 0;
+    return 1;
+}
+
 
 static int nr_gpfns(int xc_handle, domid_t domid)
 {
@@ -37,7 +53,7 @@ xc_core_arch_auto_translated_physmap(con
 
 int
 xc_core_arch_memory_map_get(int xc_handle, struct xc_core_arch_context *unused,
-                            xc_dominfo_t *info, shared_info_t *live_shinfo,
+                            xc_dominfo_t *info, shared_info_any_t *live_shinfo,
                             xc_core_memory_map_t **mapp,
                             unsigned int *nr_entries)
 {
@@ -60,17 +76,22 @@ xc_core_arch_memory_map_get(int xc_handl
 }
 
 int
-xc_core_arch_map_p2m(int xc_handle, xc_dominfo_t *info,
-                     shared_info_t *live_shinfo, xen_pfn_t **live_p2m,
+xc_core_arch_map_p2m(int xc_handle, unsigned int guest_width, xc_dominfo_t 
*info,
+                     shared_info_any_t *live_shinfo, xen_pfn_t **live_p2m,
                      unsigned long *pfnp)
 {
     /* Double and single indirect references to the live P2M table */
     xen_pfn_t *live_p2m_frame_list_list = NULL;
     xen_pfn_t *live_p2m_frame_list = NULL;
+    /* Copies of the above. */
+    xen_pfn_t *p2m_frame_list_list = NULL;
+    xen_pfn_t *p2m_frame_list = NULL;
+
     uint32_t dom = info->domid;
     unsigned long p2m_size = nr_gpfns(xc_handle, info->domid);
     int ret = -1;
     int err;
+    int i;
 
     if ( p2m_size < info->nr_pages  )
     {
@@ -80,17 +101,36 @@ xc_core_arch_map_p2m(int xc_handle, xc_d
 
     live_p2m_frame_list_list =
         xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ,
-                             live_shinfo->arch.pfn_to_mfn_frame_list_list);
+                             GET_FIELD(live_shinfo, 
arch.pfn_to_mfn_frame_list_list));
 
     if ( !live_p2m_frame_list_list )
     {
         PERROR("Couldn't map p2m_frame_list_list (errno %d)", errno);
         goto out;
     }
+
+    /* Get a local copy of the live_P2M_frame_list_list */
+    if ( !(p2m_frame_list_list = malloc(PAGE_SIZE)) )
+    {
+        ERROR("Couldn't allocate p2m_frame_list_list array");
+        goto out;
+    }
+    memcpy(p2m_frame_list_list, live_p2m_frame_list_list, PAGE_SIZE);
+
+    /* Canonicalize guest's unsigned long vs ours */
+    if ( guest_width > sizeof(unsigned long) )
+        for ( i = 0; i < PAGE_SIZE/sizeof(unsigned long); i++ )
+            if ( i < PAGE_SIZE/guest_width )
+                p2m_frame_list_list[i] = ((uint64_t *)p2m_frame_list_list)[i];
+            else
+                p2m_frame_list_list[i] = 0;
+    else if ( guest_width < sizeof(unsigned long) )
+        for ( i = PAGE_SIZE/sizeof(unsigned long) - 1; i >= 0; i-- )
+            p2m_frame_list_list[i] = ((uint32_t *)p2m_frame_list_list)[i];
 
     live_p2m_frame_list =
         xc_map_foreign_pages(xc_handle, dom, PROT_READ,
-                             live_p2m_frame_list_list,
+                             p2m_frame_list_list,
                              P2M_FLL_ENTRIES);
 
     if ( !live_p2m_frame_list )
@@ -99,8 +139,25 @@ xc_core_arch_map_p2m(int xc_handle, xc_d
         goto out;
     }
 
+    /* Get a local copy of the live_P2M_frame_list */
+    if ( !(p2m_frame_list = malloc(P2M_TOOLS_FL_SIZE)) )
+    {
+        ERROR("Couldn't allocate p2m_frame_list array");
+        goto out;
+    }
+    memset(p2m_frame_list, 0, P2M_TOOLS_FL_SIZE);
+    memcpy(p2m_frame_list, live_p2m_frame_list, P2M_GUEST_FL_SIZE);
+
+    /* Canonicalize guest's unsigned long vs ours */
+    if ( guest_width > sizeof(unsigned long) )
+        for ( i = 0; i < P2M_FL_ENTRIES; i++ )
+            p2m_frame_list[i] = ((uint64_t *)p2m_frame_list)[i];
+    else if ( guest_width < sizeof(unsigned long) )
+        for ( i = P2M_FL_ENTRIES - 1; i >= 0; i-- )
+            p2m_frame_list[i] = ((uint32_t *)p2m_frame_list)[i];
+
     *live_p2m = xc_map_foreign_pages(xc_handle, dom, PROT_READ,
-                                    live_p2m_frame_list,
+                                    p2m_frame_list,
                                     P2M_FL_ENTRIES);
 
     if ( !*live_p2m )
@@ -121,6 +178,12 @@ out:
 
     if ( live_p2m_frame_list )
         munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE);
+
+    if ( p2m_frame_list_list )
+        free(p2m_frame_list_list);
+
+    if ( p2m_frame_list )
+        free(p2m_frame_list);
 
     errno = err;
     return ret;
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_core_x86.h
--- a/tools/libxc/xc_core_x86.h Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libxc/xc_core_x86.h Wed Jan 28 13:06:45 2009 +0900
@@ -21,15 +21,8 @@
 #ifndef XC_CORE_X86_H
 #define XC_CORE_X86_H
 
-#if defined(__i386__) || defined(__x86_64__)
 #define ELF_ARCH_DATA           ELFDATA2LSB
-#if defined (__i386__)
-# define ELF_ARCH_MACHINE       EM_386
-#else
-# define ELF_ARCH_MACHINE       EM_X86_64
-#endif
-#endif /* __i386__ or __x86_64__ */
-
+#define ELF_ARCH_MACHINE       (guest_width == 8 ? EM_X86_64 : EM_386)
 
 struct xc_core_arch_context {
     /* nothing */
@@ -40,8 +33,10 @@ struct xc_core_arch_context {
 #define xc_core_arch_context_get(arch_ctxt, ctxt, xc_handle, domid) \
                                                                 (0)
 #define xc_core_arch_context_dump(arch_ctxt, args, dump_rtn)    (0)
-#define xc_core_arch_gpfn_may_present(arch_ctxt, i)             (1)
 
+int
+xc_core_arch_gpfn_may_present(struct xc_core_arch_context *arch_ctxt,
+                              unsigned long pfn);
 static inline int
 xc_core_arch_context_get_shdr(struct xc_core_arch_context *arch_ctxt, 
                               struct xc_core_section_headers *sheaders,
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_dom.h
--- a/tools/libxc/xc_dom.h      Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libxc/xc_dom.h      Wed Jan 28 13:06:45 2009 +0900
@@ -1,4 +1,4 @@
-#include <xen/libelf.h>
+#include <xen/libelf/libelf.h>
 
 #define INVALID_P2M_ENTRY   ((xen_pfn_t)-1)
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c   Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libxc/xc_domain.c   Wed Jan 28 13:06:45 2009 +0900
@@ -531,33 +531,6 @@ int xc_domain_memory_populate_physmap(in
         DPRINTF("Failed allocation for dom %d: %ld extents of order %d\n",
                 domid, nr_extents, extent_order);
         errno = EBUSY;
-        err = -1;
-    }
-
-    return err;
-}
-
-int xc_domain_memory_translate_gpfn_list(int xc_handle,
-                                         uint32_t domid,
-                                         unsigned long nr_gpfns,
-                                         xen_pfn_t *gpfn_list,
-                                         xen_pfn_t *mfn_list)
-{
-    int err;
-    struct xen_translate_gpfn_list translate_gpfn_list = {
-        .domid    = domid,
-        .nr_gpfns = nr_gpfns,
-    };
-    set_xen_guest_handle(translate_gpfn_list.gpfn_list, gpfn_list);
-    set_xen_guest_handle(translate_gpfn_list.mfn_list, mfn_list);
-
-    err = xc_memory_op(xc_handle, XENMEM_translate_gpfn_list, 
&translate_gpfn_list);
-
-    if ( err != 0 )
-    {
-        DPRINTF("Failed translation for dom %d (%ld PFNs)\n",
-                domid, nr_gpfns);
-        errno = -err;
         err = -1;
     }
 
@@ -958,7 +931,8 @@ int xc_domain_bind_pt_irq(
     bind->hvm_domid = domid;
     bind->irq_type = irq_type;
     bind->machine_irq = machine_irq;
-    if ( irq_type == PT_IRQ_TYPE_PCI )
+    if ( irq_type == PT_IRQ_TYPE_PCI ||
+         irq_type == PT_IRQ_TYPE_MSI_TRANSLATE )
     {
         bind->u.pci.bus = bus;
         bind->u.pci.device = device;    
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_elf.h
--- a/tools/libxc/xc_elf.h      Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libxc/xc_elf.h      Wed Jan 28 13:06:45 2009 +0900
@@ -1,1 +1,1 @@
-#include <xen/elfstructs.h>
+#include <xen/libelf/elfstructs.h>
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c        Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libxc/xc_hvm_build.c        Wed Jan 28 13:06:45 2009 +0900
@@ -15,100 +15,55 @@
 #include <xen/foreign/x86_64.h>
 #include <xen/hvm/hvm_info_table.h>
 #include <xen/hvm/params.h>
-#include "xc_e820.h"
-
-#include <xen/libelf.h>
+#include <xen/hvm/e820.h>
+
+#include <xen/libelf/libelf.h>
 
 #define SUPERPAGE_PFN_SHIFT  9
 #define SUPERPAGE_NR_PFNS    (1UL << SUPERPAGE_PFN_SHIFT)
 
-#define SCRATCH_PFN 0xFFFFF
-
-#define SPECIALPAGE_GUARD    0
-#define SPECIALPAGE_BUFIOREQ 1
-#define SPECIALPAGE_XENSTORE 2
-#define SPECIALPAGE_IOREQ    3
-#define SPECIALPAGE_IDENT_PT 4
+#define SPECIALPAGE_BUFIOREQ 0
+#define SPECIALPAGE_XENSTORE 1
+#define SPECIALPAGE_IOREQ    2
+#define SPECIALPAGE_IDENT_PT 3
+#define SPECIALPAGE_SHINFO   4
 #define NR_SPECIAL_PAGES     5
-
-static void build_e820map(void *e820_page, unsigned long long mem_size)
-{
-    struct e820entry *e820entry =
-        (struct e820entry *)(((unsigned char *)e820_page) + HVM_E820_OFFSET);
-    unsigned long long extra_mem_size = 0;
-    unsigned char nr_map = 0;
-
-    /*
-     * Physical address space from HVM_BELOW_4G_RAM_END to 4G is reserved
-     * for PCI devices MMIO. So if HVM has more than HVM_BELOW_4G_RAM_END
-     * RAM, memory beyond HVM_BELOW_4G_RAM_END will go to 4G above.
-     */
-    if ( mem_size > HVM_BELOW_4G_RAM_END )
-    {
-        extra_mem_size = mem_size - HVM_BELOW_4G_RAM_END;
-        mem_size = HVM_BELOW_4G_RAM_END;
-    }
-
-    /* 0x0-0x9FC00: Ordinary RAM. */
-    e820entry[nr_map].addr = 0x0;
-    e820entry[nr_map].size = 0x9FC00;
-    e820entry[nr_map].type = E820_RAM;
-    nr_map++;
-
-    /* 0x9FC00-0xA0000: Extended BIOS Data Area (EBDA). */
-    e820entry[nr_map].addr = 0x9FC00;
-    e820entry[nr_map].size = 0x400;
-    e820entry[nr_map].type = E820_RESERVED;
-    nr_map++;
-
-    /*
-     * Following regions are standard regions of the PC memory map.
-     * They are not covered by e820 regions. OSes will not use as RAM.
-     * 0xA0000-0xC0000: VGA memory-mapped I/O. Not covered by E820.
-     * 0xC0000-0xE0000: 16-bit devices, expansion ROMs (inc. vgabios).
-     * TODO: hvmloader should free pages which turn out to be unused.
-     */
-
-    /*
-     * 0xE0000-0x0F0000: PC-specific area. We place ACPI tables here.
-     *                   We *cannot* mark as E820_ACPI, for two reasons:
-     *                    1. ACPI spec. says that E820_ACPI regions below
-     *                       16MB must clip INT15h 0x88 and 0xe801 queries.
-     *                       Our rombios doesn't do this.
-     *                    2. The OS is allowed to reclaim ACPI memory after
-     *                       parsing the tables. But our FACS is in this
-     *                       region and it must not be reclaimed (it contains
-     *                       the ACPI global lock!).
-     * 0xF0000-0x100000: System BIOS.
-     * TODO: hvmloader should free pages which turn out to be unused.
-     */
-    e820entry[nr_map].addr = 0xE0000;
-    e820entry[nr_map].size = 0x20000;
-    e820entry[nr_map].type = E820_RESERVED;
-    nr_map++;
-
-    /* Low RAM goes here. Reserve space for special pages. */
-    e820entry[nr_map].addr = 0x100000;
-    e820entry[nr_map].size = (mem_size - 0x100000 -
-                              PAGE_SIZE * NR_SPECIAL_PAGES);
-    e820entry[nr_map].type = E820_RAM;
-    nr_map++;
-
-    /* Explicitly reserve space for special pages (excluding guard page). */
-    e820entry[nr_map].addr = mem_size - PAGE_SIZE * (NR_SPECIAL_PAGES - 1);
-    e820entry[nr_map].size = PAGE_SIZE * (NR_SPECIAL_PAGES - 1);
-    e820entry[nr_map].type = E820_RESERVED;
-    nr_map++;
-
-    if ( extra_mem_size )
-    {
-        e820entry[nr_map].addr = (1ULL << 32);
-        e820entry[nr_map].size = extra_mem_size;
-        e820entry[nr_map].type = E820_RAM;
-        nr_map++;
-    }
-
-    *(((unsigned char *)e820_page) + HVM_E820_NR_OFFSET) = nr_map;
+#define special_pfn(x) (0xff000u - NR_SPECIAL_PAGES + (x))
+
+static void build_hvm_info(void *hvm_info_page, uint64_t mem_size)
+{
+    struct hvm_info_table *hvm_info = (struct hvm_info_table *)
+        (((unsigned char *)hvm_info_page) + HVM_INFO_OFFSET);
+    uint64_t lowmem_end = mem_size, highmem_end = 0;
+    uint8_t sum;
+    int i;
+
+    if ( lowmem_end > HVM_BELOW_4G_RAM_END )
+    {
+        highmem_end = lowmem_end + (1ull<<32) - HVM_BELOW_4G_RAM_END;
+        lowmem_end = HVM_BELOW_4G_RAM_END;
+    }
+
+    memset(hvm_info_page, 0, PAGE_SIZE);
+
+    /* Fill in the header. */
+    strncpy(hvm_info->signature, "HVM INFO", 8);
+    hvm_info->length = sizeof(struct hvm_info_table);
+
+    /* Sensible defaults: these can be overridden by the caller. */
+    hvm_info->acpi_enabled = 1;
+    hvm_info->apic_mode = 1;
+    hvm_info->nr_vcpus = 1;
+
+    /* Memory parameters. */
+    hvm_info->low_mem_pgend = lowmem_end >> PAGE_SHIFT;
+    hvm_info->high_mem_pgend = highmem_end >> PAGE_SHIFT;
+    hvm_info->reserved_mem_pgstart = special_pfn(0);
+
+    /* Finish with the checksum. */
+    for ( i = 0, sum = 0; i < hvm_info->length; i++ )
+        sum += ((uint8_t *)hvm_info)[i];
+    hvm_info->checksum = -sum;
 }
 
 static int loadelfimage(
@@ -153,10 +108,10 @@ static int setup_guest(int xc_handle,
     unsigned long i, nr_pages = (unsigned long)memsize << (20 - PAGE_SHIFT);
     unsigned long target_pages = (unsigned long)target << (20 - PAGE_SHIFT);
     unsigned long pod_pages = 0;
-    unsigned long special_page_nr, entry_eip, cur_pages;
+    unsigned long entry_eip, cur_pages;
     struct xen_add_to_physmap xatp;
     struct shared_info *shared_info;
-    void *e820_page;
+    void *hvm_info_page;
     uint32_t *ident_pt;
     struct elf_binary elf;
     uint64_t v_start, v_end;
@@ -289,23 +244,22 @@ static int setup_guest(int xc_handle,
     if ( loadelfimage(&elf, xc_handle, dom, page_array) != 0 )
         goto error_out;
 
-    if ( (e820_page = xc_map_foreign_range(
+    if ( (hvm_info_page = xc_map_foreign_range(
               xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
-              HVM_E820_PAGE >> PAGE_SHIFT)) == NULL )
-        goto error_out;
-    memset(e820_page, 0, PAGE_SIZE);
-    build_e820map(e820_page, v_end);
-    munmap(e820_page, PAGE_SIZE);
+              HVM_INFO_PFN)) == NULL )
+        goto error_out;
+    build_hvm_info(hvm_info_page, v_end);
+    munmap(hvm_info_page, PAGE_SIZE);
 
     /* Map and initialise shared_info page. */
     xatp.domid = dom;
     xatp.space = XENMAPSPACE_shared_info;
     xatp.idx   = 0;
-    xatp.gpfn  = SCRATCH_PFN;
+    xatp.gpfn  = special_pfn(SPECIALPAGE_SHINFO);
     if ( (xc_memory_op(xc_handle, XENMEM_add_to_physmap, &xatp) != 0) ||
          ((shared_info = xc_map_foreign_range(
              xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
-             SCRATCH_PFN)) == NULL) )
+             special_pfn(SPECIALPAGE_SHINFO))) == NULL) )
         goto error_out;
     memset(shared_info, 0, PAGE_SIZE);
     /* NB. evtchn_upcall_mask is unused: leave as zero. */
@@ -313,31 +267,28 @@ static int setup_guest(int xc_handle,
            sizeof(shared_info->evtchn_mask));
     munmap(shared_info, PAGE_SIZE);
 
-    special_page_nr = (((v_end > HVM_BELOW_4G_RAM_END)
-                        ? (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT)
-                        : (v_end >> PAGE_SHIFT))
-                       - NR_SPECIAL_PAGES);
-
-    /* Paranoia: clean special pages. */
+    /* Allocate and clear special pages. */
     for ( i = 0; i < NR_SPECIAL_PAGES; i++ )
-        if ( xc_clear_domain_page(xc_handle, dom, special_page_nr + i) )
+    {
+        xen_pfn_t pfn = special_pfn(i);
+        if ( i == SPECIALPAGE_SHINFO )
+            continue;
+        rc = xc_domain_memory_populate_physmap(xc_handle, dom, 1, 0, 0, &pfn);
+        if ( rc != 0 )
+        {
+            PERROR("Could not allocate %d'th special page.\n", i);
             goto error_out;
-
-    /* Free the guard page that separates low RAM from special pages. */
-    rc = xc_domain_memory_decrease_reservation(
-        xc_handle, dom, 1, 0, &page_array[special_page_nr]);
-    if ( rc != 0 )
-    {
-        PERROR("Could not deallocate guard page for HVM guest.\n");
-        goto error_out;
+        }
+        if ( xc_clear_domain_page(xc_handle, dom, special_pfn(i)) )
+            goto error_out;
     }
 
     xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN,
-                     special_page_nr + SPECIALPAGE_XENSTORE);
+                     special_pfn(SPECIALPAGE_XENSTORE));
     xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN,
-                     special_page_nr + SPECIALPAGE_BUFIOREQ);
+                     special_pfn(SPECIALPAGE_BUFIOREQ));
     xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN,
-                     special_page_nr + SPECIALPAGE_IOREQ);
+                     special_pfn(SPECIALPAGE_IOREQ));
 
     /*
      * Identity-map page table is required for running with CR0.PG=0 when
@@ -345,14 +296,14 @@ static int setup_guest(int xc_handle,
      */
     if ( (ident_pt = xc_map_foreign_range(
               xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
-              special_page_nr + SPECIALPAGE_IDENT_PT)) == NULL )
+              special_pfn(SPECIALPAGE_IDENT_PT))) == NULL )
         goto error_out;
     for ( i = 0; i < PAGE_SIZE / sizeof(*ident_pt); i++ )
         ident_pt[i] = ((i << 22) | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER |
                        _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
     munmap(ident_pt, PAGE_SIZE);
     xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IDENT_PT,
-                     (special_page_nr + SPECIALPAGE_IDENT_PT) << PAGE_SHIFT);
+                     special_pfn(SPECIALPAGE_IDENT_PT) << PAGE_SHIFT);
 
     /* Insert JMP <rel32> instruction at address 0x0 to reach entry point. */
     entry_eip = elf_uval(&elf, elf.ehdr, e_entry);
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_private.c
--- a/tools/libxc/xc_private.c  Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libxc/xc_private.c  Wed Jan 28 13:06:45 2009 +0900
@@ -307,13 +307,6 @@ int xc_memory_op(int xc_handle,
             goto out1;
         }
         break;
-    case XENMEM_remove_from_physmap:
-        if ( lock_pages(arg, sizeof(struct xen_remove_from_physmap)) )
-        {
-            PERROR("Could not lock");
-            goto out1;
-        }
-        break;
     case XENMEM_current_reservation:
     case XENMEM_maximum_reservation:
     case XENMEM_maximum_gpfn:
@@ -354,9 +347,6 @@ int xc_memory_op(int xc_handle,
         break;
     case XENMEM_add_to_physmap:
         unlock_pages(arg, sizeof(struct xen_add_to_physmap));
-        break;
-    case XENMEM_remove_from_physmap:
-        unlock_pages(arg, sizeof(struct xen_remove_from_physmap));
         break;
     case XENMEM_current_reservation:
     case XENMEM_maximum_reservation:
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_ptrace_core.c
--- a/tools/libxc/xc_ptrace_core.c      Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libxc/xc_ptrace_core.c      Wed Jan 28 13:06:45 2009 +0900
@@ -540,7 +540,9 @@ xc_waitdomain_core_elf(
                              XEN_ELFNOTE_DUMPCORE_XEN_VERSION,
                              (void**)&xen_version) < 0)
         goto out;
-    if (xen_version->xen_version.pagesize != PAGE_SIZE)
+    /* shifted case covers 32 bit FV guest core file created on 64 bit Dom0 */
+    if (xen_version->xen_version.pagesize != PAGE_SIZE &&
+        (xen_version->xen_version.pagesize >> 32) != PAGE_SIZE)
         goto out;
 
     /* .note.Xen: format_version */
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libxc/xenctrl.h     Wed Jan 28 13:06:45 2009 +0900
@@ -628,12 +628,6 @@ int xc_domain_memory_populate_physmap(in
                                       unsigned int mem_flags,
                                       xen_pfn_t *extent_start);
 
-int xc_domain_memory_translate_gpfn_list(int xc_handle,
-                                         uint32_t domid,
-                                         unsigned long nr_gpfns,
-                                         xen_pfn_t *gpfn_list,
-                                         xen_pfn_t *mfn_list);
-
 int xc_domain_memory_set_pod_target(int xc_handle,
                                     uint32_t domid,
                                     uint64_t target_pages,
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/misc/Makefile
--- a/tools/misc/Makefile       Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/misc/Makefile       Wed Jan 28 13:06:45 2009 +0900
@@ -47,7 +47,7 @@ install: build
 
 .PHONY: clean
 clean:
-       $(RM) *.o $(TARGETS) *~
+       $(RM) *.o $(TARGETS) *~ $(DEPS)
        set -e; for d in $(SUBDIRS); do $(MAKE) -C $$d clean; done
 
 %.o: %.c $(HDRS) Makefile
@@ -55,3 +55,5 @@ clean:
 
 xenperf xenpm: %: %.o Makefile
        $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) $(LDFLAGS_libxenctrl)
+
+-include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/misc/xenpm.c
--- a/tools/misc/xenpm.c        Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/misc/xenpm.c        Wed Jan 28 13:06:45 2009 +0900
@@ -21,83 +21,56 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <unistd.h>
 #include <string.h>
 #include <getopt.h>
 #include <errno.h>
+#include <signal.h>
 
 #include <xenctrl.h>
 #include <inttypes.h>
+#include <sys/time.h>
 
 #define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
+
+static int xc_fd;
+static int max_cpu_nr;
 
 /* help message */
 void show_help(void)
 {
     fprintf(stderr,
-            "Usage:\n"
-            "       xenpm get-cpuidle-states [cpuid]: list cpu idle 
information on CPU cpuid or all CPUs.\n"
-            "       xenpm get-cpufreq-states [cpuid]: list cpu frequency 
information on CPU cpuid or all CPUs.\n"
-            "       xenpm get-cpufreq-para [cpuid]: list cpu frequency 
information on CPU cpuid or all CPUs.\n"
-            "       xenpm set-scaling-maxfreq <cpuid> <HZ>: set max cpu 
frequency <HZ> on CPU <cpuid>.\n"
-            "       xenpm set-scaling-minfreq <cpuid> <HZ>: set min cpu 
frequency <HZ> on CPU <cpuid>.\n"
-            "       xenpm set-scaling-governor <cpuid> <name>: set scaling 
governor on CPU <cpuid>.\n"
-            "       xenpm set-scaling-speed <cpuid> <num>: set scaling speed 
on CPU <cpuid>.\n"
-            "       xenpm set-sampling-rate <cpuid> <num>: set sampling rate 
on CPU <cpuid>.\n"
-            "       xenpm set-up-threshold <cpuid> <num>: set up threshold on 
CPU <cpuid>.\n");
-}
-
+            "xen power management control tool\n\n"
+            "usage: xenpm <command> [args]\n\n"
+            "xenpm command list:\n\n"
+            " get-cpuidle-states    [cpuid]       list cpu idle info of CPU 
<cpuid> or all\n"
+            " get-cpufreq-states    [cpuid]       list cpu freq info of CPU 
<cpuid> or all\n"
+            " get-cpufreq-para      [cpuid]       list cpu freq parameter of 
CPU <cpuid> or all\n"
+            " set-scaling-maxfreq   [cpuid] <HZ>  set max cpu frequency <HZ> 
on CPU <cpuid>\n"
+            "                                     or all CPUs\n"
+            " set-scaling-minfreq   [cpuid] <HZ>  set min cpu frequency <HZ> 
on CPU <cpuid>\n"
+            "                                     or all CPUs\n"
+            " set-scaling-speed     [cpuid] <num> set scaling speed on CPU 
<cpuid> or all\n"
+            "                                     it is used in userspace 
governor.\n"
+            " set-scaling-governor  [cpuid] <gov> set scaling governor on CPU 
<cpuid> or all\n"
+            "                                     as 
userspace/performance/powersave/ondemand\n"
+            " set-sampling-rate     [cpuid] <num> set sampling rate on CPU 
<cpuid> or all\n"
+            "                                     it is used in ondemand 
governor.\n"
+            " set-up-threshold      [cpuid] <num> set up threshold on CPU 
<cpuid> or all\n"
+            "                                     it is used in ondemand 
governor.\n"
+            " start                               start collect Cx/Px 
statistics,\n"
+            "                                     output after CTRL-C or 
SIGINT.\n"
+            );
+}
 /* wrapper function */
-int help_func(int xc_fd, int cpuid, uint32_t value)
+void help_func(int argc, char *argv[])
 {
     show_help();
-    return 0;
-}
-
-/* show cpu idle information on CPU cpuid */
-static int show_cx_cpuid(int xc_fd, int cpuid)
-{
-    int i, ret = 0;
-    int max_cx_num = 0;
-    struct xc_cx_stat cxstatinfo, *cxstat = &cxstatinfo;
-
-    ret = xc_pm_get_max_cx(xc_fd, cpuid, &max_cx_num);
-    if ( ret )
-    {
-        if ( errno == ENODEV )
-        {
-            fprintf(stderr, "Xen cpuidle is not enabled!\n");
-            return -ENODEV;
-        }
-        else
-        {
-            fprintf(stderr, "[CPU%d] failed to get max C-state\n", cpuid);
-            return -EINVAL;
-        }
-    }
-
-    cxstat->triggers = malloc(max_cx_num * sizeof(uint64_t));
-    if ( !cxstat->triggers )
-    {
-        fprintf(stderr, "[CPU%d] failed to malloc for C-states triggers\n", 
cpuid);
-        return -ENOMEM;
-    }
-    cxstat->residencies = malloc(max_cx_num * sizeof(uint64_t));
-    if ( !cxstat->residencies )
-    {
-        fprintf(stderr, "[CPU%d] failed to malloc for C-states residencies\n", 
cpuid);
-        free(cxstat->triggers);
-        return -ENOMEM;
-    }
-
-    ret = xc_pm_get_cxstat(xc_fd, cpuid, cxstat);
-    if( ret )
-    {
-        fprintf(stderr, "[CPU%d] failed to get C-states statistics "
-                "information\n", cpuid);
-        free(cxstat->triggers);
-        free(cxstat->residencies);
-        return -EINVAL;
-    }
+}
+
+static void print_cxstat(int cpuid, struct xc_cx_stat *cxstat)
+{
+    int i;
 
     printf("cpu id               : %d\n", cpuid);
     printf("total C-states       : %d\n", cxstat->nr);
@@ -110,88 +83,87 @@ static int show_cx_cpuid(int xc_fd, int 
         printf("                       residency  [%020"PRIu64" ms]\n",
                cxstat->residencies[i]/1000000UL);
     }
-
-    free(cxstat->triggers);
-    free(cxstat->residencies);
-
     printf("\n");
+}
+
+/* show cpu idle information on CPU cpuid */
+static int get_cxstat_by_cpuid(int xc_fd, int cpuid, struct xc_cx_stat *cxstat)
+{
+    int ret = 0;
+    int max_cx_num = 0;
+
+    ret = xc_pm_get_max_cx(xc_fd, cpuid, &max_cx_num);
+    if ( ret )
+        return errno;
+
+    if ( !cxstat )
+        return -EINVAL;
+
+    cxstat->triggers = malloc(max_cx_num * sizeof(uint64_t));
+    if ( !cxstat->triggers )
+        return -ENOMEM;
+    cxstat->residencies = malloc(max_cx_num * sizeof(uint64_t));
+    if ( !cxstat->residencies )
+    {
+        free(cxstat->triggers);
+        return -ENOMEM;
+    }
+
+    ret = xc_pm_get_cxstat(xc_fd, cpuid, cxstat);
+    if( ret )
+    {
+        int temp = errno;
+        free(cxstat->triggers);
+        free(cxstat->residencies);
+        cxstat->triggers = NULL;
+        cxstat->residencies = NULL;
+        return temp;
+    }
+
     return 0;
 }
 
-int cxstates_func(int xc_fd, int cpuid, uint32_t value)
+static int show_cxstat_by_cpuid(int xc_fd, int cpuid)
 {
     int ret = 0;
-    xc_physinfo_t physinfo = { 0 };
-
-    if ( cpuid < 0 )
-    {
-        /* show cxstates on all cpu */
-        ret = xc_physinfo(xc_fd, &physinfo);
-        if ( ret )
-        {
-            fprintf(stderr, "failed to get the processor information\n");
-        }
-        else
-        {
-            int i;
-            for ( i = 0; i < physinfo.nr_cpus; i++ )
-            {
-                if ( (ret = show_cx_cpuid(xc_fd, i)) == -ENODEV )
-                    break;
-            }
-        }
-    }
-    else
-        ret = show_cx_cpuid(xc_fd, cpuid);
-
-    return ret;
-}
-
-/* show cpu frequency information on CPU cpuid */
-static int show_px_cpuid(int xc_fd, int cpuid)
-{
-    int i, ret = 0;
-    int max_px_num = 0;
-    struct xc_px_stat pxstatinfo, *pxstat = &pxstatinfo;
-
-    ret = xc_pm_get_max_px(xc_fd, cpuid, &max_px_num);
+    struct xc_cx_stat cxstatinfo;
+
+    ret = get_cxstat_by_cpuid(xc_fd, cpuid, &cxstatinfo);
     if ( ret )
-    {
-        if ( errno == ENODEV )
-        {
-            printf("Xen cpufreq is not enabled!\n");
-            return -ENODEV;
-        }
-        else
-        {
-            fprintf(stderr, "[CPU%d] failed to get max P-state\n", cpuid);
-            return -EINVAL;
-        }
-    }
-
-    pxstat->trans_pt = malloc(max_px_num * max_px_num *
-                              sizeof(uint64_t));
-    if ( !pxstat->trans_pt )
-    {
-        fprintf(stderr, "[CPU%d] failed to malloc for P-states transition 
table\n", cpuid);
-        return -ENOMEM;
-    }
-    pxstat->pt = malloc(max_px_num * sizeof(struct xc_px_val));
-    if ( !pxstat->pt )
-    {
-        fprintf(stderr, "[CPU%d] failed to malloc for P-states table\n", 
cpuid);
-        free(pxstat->trans_pt);
-        return -ENOMEM;
-    }
-
-    ret = xc_pm_get_pxstat(xc_fd, cpuid, pxstat);
-    if( ret )
-    {
-        fprintf(stderr, "[CPU%d] failed to get P-states statistics 
information\n", cpuid);
-        free(pxstat->trans_pt);
-        free(pxstat->pt);
-        return -ENOMEM;
-    }
+        return ret;
+
+    print_cxstat(cpuid, &cxstatinfo);
+
+    free(cxstatinfo.triggers);
+    free(cxstatinfo.residencies);
+    return 0;
+}
+
+void cxstat_func(int argc, char *argv[])
+{
+    int cpuid = -1;
+
+    if ( argc > 0 && sscanf(argv[0], "%d", &cpuid) != 1 )
+        cpuid = -1;
+
+    if ( cpuid >= max_cpu_nr )
+        cpuid = -1;
+
+    if ( cpuid < 0 )
+    {
+        /* show cxstates on all cpus */
+        int i;
+        for ( i = 0; i < max_cpu_nr; i++ )
+            if ( show_cxstat_by_cpuid(xc_fd, i) == -ENODEV )
+                break;
+    }
+    else
+        show_cxstat_by_cpuid(xc_fd, cpuid);
+}
+
+static void print_pxstat(int cpuid, struct xc_px_stat *pxstat)
+{
+    int i;
 
     printf("cpu id               : %d\n", cpuid);
     printf("total P-states       : %d\n", pxstat->total);
@@ -211,40 +183,233 @@ static int show_px_cpuid(int xc_fd, int 
         printf("                       residency  [%020"PRIu64" ms]\n",
                pxstat->pt[i].residency/1000000UL);
     }
-
-    free(pxstat->trans_pt);
-    free(pxstat->pt);
-
     printf("\n");
+}
+
+/* show cpu frequency information on CPU cpuid */
+static int get_pxstat_by_cpuid(int xc_fd, int cpuid, struct xc_px_stat *pxstat)
+{
+    int ret = 0;
+    int max_px_num = 0;
+
+    ret = xc_pm_get_max_px(xc_fd, cpuid, &max_px_num);
+    if ( ret )
+        return errno;
+
+    if ( !pxstat)
+        return -EINVAL;
+
+    pxstat->trans_pt = malloc(max_px_num * max_px_num *
+                              sizeof(uint64_t));
+    if ( !pxstat->trans_pt )
+        return -ENOMEM;
+    pxstat->pt = malloc(max_px_num * sizeof(struct xc_px_val));
+    if ( !pxstat->pt )
+    {
+        free(pxstat->trans_pt);
+        return -ENOMEM;
+    }
+
+    ret = xc_pm_get_pxstat(xc_fd, cpuid, pxstat);
+    if( ret )
+    {
+        int temp = errno;
+        free(pxstat->trans_pt);
+        free(pxstat->pt);
+        pxstat->trans_pt = NULL;
+        pxstat->pt = NULL;
+        return temp;
+    }
+
     return 0;
 }
 
-int pxstates_func(int xc_fd, int cpuid, uint32_t value)
+static int show_pxstat_by_cpuid(int xc_fd, int cpuid)
 {
     int ret = 0;
-    xc_physinfo_t physinfo = { 0 };
-
-    if ( cpuid < 0 )
-    {
-        ret = xc_physinfo(xc_fd, &physinfo);
-        if ( ret )
+    struct xc_px_stat pxstatinfo;
+
+    ret = get_pxstat_by_cpuid(xc_fd, cpuid, &pxstatinfo);
+    if ( ret )
+        return ret;
+
+    print_pxstat(cpuid, &pxstatinfo);
+
+    free(pxstatinfo.trans_pt);
+    free(pxstatinfo.pt);
+    return 0;
+}
+
+void pxstat_func(int argc, char *argv[])
+{
+    int cpuid = -1;
+
+    if ( argc > 0 && sscanf(argv[0], "%d", &cpuid) != 1 )
+        cpuid = -1;
+
+    if ( cpuid >= max_cpu_nr )
+        cpuid = -1;
+
+    if ( cpuid < 0 )
+    {
+        /* show pxstates on all cpus */
+        int i;
+        for ( i = 0; i < max_cpu_nr; i++ )
+            if ( show_pxstat_by_cpuid(xc_fd, i) == -ENODEV )
+                break;
+    }
+    else
+        show_pxstat_by_cpuid(xc_fd, cpuid);
+}
+
+static uint64_t usec_start, usec_end;
+static struct xc_cx_stat *cxstat, *cxstat_start, *cxstat_end;
+static struct xc_px_stat *pxstat, *pxstat_start, *pxstat_end;
+static uint64_t *sum, *sum_cx, *sum_px;
+
+static void signal_int_handler(int signo)
+{
+    int i, j;
+    struct timeval tv;
+    int cx_cap = 0, px_cap = 0;
+
+    if ( gettimeofday(&tv, NULL) == -1 )
+    {
+        fprintf(stderr, "failed to get timeofday\n");
+        return ;
+    }
+    usec_end = tv.tv_sec * 1000000UL + tv.tv_usec;
+
+    if ( get_cxstat_by_cpuid(xc_fd, 0, NULL) != -ENODEV )
+    {
+        cx_cap = 1;
+        for ( i = 0; i < max_cpu_nr; i++ )
+            if ( !get_cxstat_by_cpuid(xc_fd, i, &cxstat_end[i]) )
+                for ( j = 0; j < cxstat_end[i].nr; j++ )
+                    sum_cx[i] += cxstat_end[i].residencies[j] -
+                                 cxstat_start[i].residencies[j];
+    }
+
+    if ( get_pxstat_by_cpuid(xc_fd, 0, NULL) != -ENODEV )
+    {
+        px_cap = 1;
+        for ( i = 0; i < max_cpu_nr; i++ )
+            if ( !get_pxstat_by_cpuid(xc_fd, i , &pxstat_end[i]) )
+                for ( j = 0; j < pxstat_end[i].total; j++ )
+                    sum_px[i] += pxstat_end[i].pt[j].residency -
+                                 pxstat_start[i].pt[j].residency;
+    }
+
+    printf("Elapsed time (ms): %"PRIu64"\n", (usec_end - usec_start) / 1000UL);
+    for ( i = 0; i < max_cpu_nr; i++ )
+    {
+        uint64_t temp;
+        printf("CPU%d:\n\tresidency\tpercentage\n", i);
+        if ( cx_cap )
         {
-            fprintf(stderr, "failed to get the processor information\n");
-        }
-        else
-        {
-            int i;
-            for ( i = 0; i < physinfo.nr_cpus; i++ )
+            for ( j = 0; j < cxstat_end[i].nr; j++ )
             {
-                if ( (ret = show_px_cpuid(xc_fd, i)) == -ENODEV )
-                    break;
+                if ( sum_cx[i] > 0 )
+                {
+                    temp = cxstat_end[i].residencies[j] -
+                           cxstat_start[i].residencies[j];
+                    printf("  C%d\t%"PRIu64" ms\t%.2f%%\n", j,
+                           temp / 1000000UL, 100UL * temp / (double)sum_cx[i]);
+                }
             }
         }
-    }
-    else
-        ret = show_px_cpuid(xc_fd, cpuid);
-
-    return ret;
+        if ( px_cap )
+        {
+            for ( j = 0; j < pxstat_end[i].total; j++ )
+            {
+                if ( sum_px[i] > 0 )
+                {
+                    temp = pxstat_end[i].pt[j].residency -
+                           pxstat_start[i].pt[j].residency;
+                    printf("  P%d\t%"PRIu64" ms\t%.2f%%\n", j,
+                           temp / 1000000UL, 100UL * temp / (double)sum_px[i]);
+                }
+            }
+        }
+        printf("\n");
+    }
+
+    /* some clean up and then exits */
+    for ( i = 0; i < 2 * max_cpu_nr; i++ )
+    {
+        free(cxstat[i].triggers);
+        free(cxstat[i].residencies);
+        free(pxstat[i].trans_pt);
+        free(pxstat[i].pt);
+    }
+    free(cxstat);
+    free(pxstat);
+    free(sum);
+    xc_interface_close(xc_fd);
+    exit(0);
+}
+
+void start_gather_func(int argc, char *argv[])
+{
+    int i;
+    struct timeval tv;
+
+    if ( gettimeofday(&tv, NULL) == -1 )
+    {
+        fprintf(stderr, "failed to get timeofday\n");
+        return ;
+    }
+    usec_start = tv.tv_sec * 1000000UL + tv.tv_usec;
+
+    sum = malloc(sizeof(uint64_t) * 2 * max_cpu_nr);
+    if ( sum == NULL )
+        return ;
+    cxstat = malloc(sizeof(struct xc_cx_stat) * 2 * max_cpu_nr);
+    if ( cxstat == NULL )
+    {
+        free(sum);
+        return ;
+    }
+    pxstat = malloc(sizeof(struct xc_px_stat) * 2 * max_cpu_nr);
+    if ( pxstat == NULL )
+    {
+        free(sum);
+        free(cxstat);
+        return ;
+    }
+    memset(sum, 0, sizeof(uint64_t) * 2 * max_cpu_nr);
+    memset(cxstat, 0, sizeof(struct xc_cx_stat) * 2 * max_cpu_nr);
+    memset(pxstat, 0, sizeof(struct xc_px_stat) * 2 * max_cpu_nr);
+    sum_cx = sum;
+    sum_px = sum + max_cpu_nr;
+    cxstat_start = cxstat;
+    cxstat_end = cxstat + max_cpu_nr;
+    pxstat_start = pxstat;
+    pxstat_end = pxstat + max_cpu_nr;
+
+    if ( get_cxstat_by_cpuid(xc_fd, 0, NULL) == -ENODEV &&
+         get_pxstat_by_cpuid(xc_fd, 0, NULL) == -ENODEV )
+    {
+        fprintf(stderr, "Xen cpu idle and frequency is disabled!\n");
+        return ;
+    }
+
+    for ( i = 0; i < max_cpu_nr; i++ )
+    {
+        get_cxstat_by_cpuid(xc_fd, i, &cxstat_start[i]);
+        get_pxstat_by_cpuid(xc_fd, i, &pxstat_start[i]);
+    }
+
+    if (signal(SIGINT, signal_int_handler) == SIG_ERR)
+    {
+        fprintf(stderr, "failed to set signal int handler\n");
+        free(sum);
+        free(pxstat);
+        free(cxstat);
+        return ;
+    }
+
+    pause();
 }
 
 /* print out parameters about cpu frequency */
@@ -294,7 +459,8 @@ static void print_cpufreq_para(int cpuid
 
     printf("scaling_avail_freq   :");
     for ( i = 0; i < p_cpufreq->freq_num; i++ )
-        if ( p_cpufreq->scaling_available_frequencies[i] == 
p_cpufreq->scaling_cur_freq )
+        if ( p_cpufreq->scaling_available_frequencies[i] ==
+             p_cpufreq->scaling_cur_freq )
             printf(" *%d", p_cpufreq->scaling_available_frequencies[i]);
         else
             printf(" %d", p_cpufreq->scaling_available_frequencies[i]);
@@ -308,7 +474,7 @@ static void print_cpufreq_para(int cpuid
 }
 
 /* show cpu frequency parameters information on CPU cpuid */
-static int show_cpufreq_para_cpuid(int xc_fd, int cpuid)
+static int show_cpufreq_para_by_cpuid(int xc_fd, int cpuid)
 {
     int ret = 0;
     struct xc_get_cpufreq_para cpufreq_para, *p_cpufreq = &cpufreq_para;
@@ -381,159 +547,221 @@ out:
     return ret;
 }
 
-int cpufreq_para_func(int xc_fd, int cpuid, uint32_t value)
-{
-    int ret = 0;
-    xc_physinfo_t physinfo = { 0 };
-
-    if ( cpuid < 0 )
-    {
-        ret = xc_physinfo(xc_fd, &physinfo);
-        if ( ret )
+void cpufreq_para_func(int argc, char *argv[])
+{
+    int cpuid = -1;
+
+    if ( argc > 0 && sscanf(argv[0], "%d", &cpuid) != 1 )
+        cpuid = -1;
+
+    if ( cpuid >= max_cpu_nr )
+        cpuid = -1;
+
+    if ( cpuid < 0 )
+    {
+        /* show cpu freqency information on all cpus */
+        int i;
+        for ( i = 0; i < max_cpu_nr; i++ )
+            if ( show_cpufreq_para_by_cpuid(xc_fd, i) == -ENODEV )
+                break;
+    }
+    else
+        show_cpufreq_para_by_cpuid(xc_fd, cpuid);
+}
+
+void scaling_max_freq_func(int argc, char *argv[])
+{
+    int cpuid = -1, freq = -1;
+
+    if ( (argc >= 2 && (sscanf(argv[1], "%d", &freq) != 1 ||
+                        sscanf(argv[0], "%d", &cpuid) != 1)) ||
+         (argc == 1 && sscanf(argv[0], "%d", &freq) != 1 ) ||
+         argc == 0 )
+    {
+        fprintf(stderr, "failed to set scaling max freq\n");
+        return ;
+    }
+
+    if ( cpuid < 0 )
+    {
+        int i;
+        for ( i = 0; i < max_cpu_nr; i++ )
+            if ( xc_set_cpufreq_para(xc_fd, i, SCALING_MAX_FREQ, freq) )
+                fprintf(stderr, "[CPU%d] failed to set scaling max freq\n", i);
+    }
+    else
+    {
+        if ( xc_set_cpufreq_para(xc_fd, cpuid, SCALING_MAX_FREQ, freq) )
+            fprintf(stderr, "failed to set scaling max freq\n");
+    }
+}
+
+void scaling_min_freq_func(int argc, char *argv[])
+{
+    int cpuid = -1, freq = -1;
+
+    if ( (argc >= 2 && (sscanf(argv[1], "%d", &freq) != 1 ||
+                        sscanf(argv[0], "%d", &cpuid) != 1) ) ||
+         (argc == 1 && sscanf(argv[0], "%d", &freq) != 1 ) ||
+         argc == 0 )
+    {
+        fprintf(stderr, "failed to set scaling min freq\n");
+        return ;
+    }
+
+    if ( cpuid < 0 )
+    {
+        int i;
+        for ( i = 0; i < max_cpu_nr; i++ )
+            if ( xc_set_cpufreq_para(xc_fd, i, SCALING_MIN_FREQ, freq) )
+                fprintf(stderr, "[CPU%d] failed to set scaling min freq\n", i);
+    }
+    else
+    {
+        if ( xc_set_cpufreq_para(xc_fd, cpuid, SCALING_MIN_FREQ, freq) )
+            fprintf(stderr, "failed to set scaling min freq\n");
+    }
+}
+
+void scaling_speed_func(int argc, char *argv[])
+{
+    int cpuid = -1, speed = -1;
+
+    if ( (argc >= 2 && (sscanf(argv[1], "%d", &speed) != 1 ||
+                        sscanf(argv[0], "%d", &cpuid) != 1) ) ||
+         (argc == 1 && sscanf(argv[0], "%d", &speed) != 1 ) ||
+         argc == 0 )
+    {
+        fprintf(stderr, "failed to set scaling speed\n");
+        return ;
+    }
+
+    if ( cpuid < 0 )
+    {
+        int i;
+        for ( i = 0; i < max_cpu_nr; i++ )
+            if ( xc_set_cpufreq_para(xc_fd, i, SCALING_SETSPEED, speed) )
+                fprintf(stderr, "[CPU%d] failed to set scaling speed\n", i);
+    }
+    else
+    {
+        if ( xc_set_cpufreq_para(xc_fd, cpuid, SCALING_SETSPEED, speed) )
+            fprintf(stderr, "failed to set scaling speed\n");
+    }
+}
+
+void scaling_sampling_rate_func(int argc, char *argv[])
+{
+    int cpuid = -1, rate = -1;
+
+    if ( (argc >= 2 && (sscanf(argv[1], "%d", &rate) != 1 ||
+                        sscanf(argv[0], "%d", &cpuid) != 1) ) ||
+         (argc == 1 && sscanf(argv[0], "%d", &rate) != 1 ) ||
+         argc == 0 )
+    {
+        fprintf(stderr, "failed to set scaling sampling rate\n");
+        return ;
+    }
+
+    if ( cpuid < 0 )
+    {
+        int i;
+        for ( i = 0; i < max_cpu_nr; i++ )
+            if ( xc_set_cpufreq_para(xc_fd, i, SAMPLING_RATE, rate) )
+                fprintf(stderr,
+                        "[CPU%d] failed to set scaling sampling rate\n", i);
+    }
+    else
+    {
+        if ( xc_set_cpufreq_para(xc_fd, cpuid, SAMPLING_RATE, rate) )
+            fprintf(stderr, "failed to set scaling sampling rate\n");
+    }
+}
+
+void scaling_up_threshold_func(int argc, char *argv[])
+{
+    int cpuid = -1, threshold = -1;
+
+    if ( (argc >= 2 && (sscanf(argv[1], "%d", &threshold) != 1 ||
+                        sscanf(argv[0], "%d", &cpuid) != 1) ) ||
+         (argc == 1 && sscanf(argv[0], "%d", &threshold) != 1 ) ||
+         argc == 0 )
+    {
+        fprintf(stderr, "failed to set up scaling threshold\n");
+        return ;
+    }
+
+    if ( cpuid < 0 )
+    {
+        int i;
+        for ( i = 0; i < max_cpu_nr; i++ )
+            if ( xc_set_cpufreq_para(xc_fd, i, UP_THRESHOLD, threshold) )
+                fprintf(stderr,
+                        "[CPU%d] failed to set up scaling threshold\n", i);
+    }
+    else
+    {
+        if ( xc_set_cpufreq_para(xc_fd, cpuid, UP_THRESHOLD, threshold) )
+            fprintf(stderr, "failed to set up scaling threshold\n");
+    }
+}
+
+void scaling_governor_func(int argc, char *argv[])
+{
+    int cpuid = -1;
+    char *name = NULL;
+
+    if ( argc >= 2 )
+    {
+        name = strdup(argv[1]);
+        if ( name == NULL )
+            goto out;
+        if ( sscanf(argv[0], "%d", &cpuid) != 1 )
         {
-            fprintf(stderr, "failed to get the processor information\n");
+            free(name);
+            goto out;
         }
-        else
-        {
-            int i;
-            for ( i = 0; i < physinfo.nr_cpus; i++ )
-            {
-                if ( (ret = show_cpufreq_para_cpuid(xc_fd, i)) == -ENODEV )
-                    break;
-            }
-        }
-    }
-    else
-        ret = show_cpufreq_para_cpuid(xc_fd, cpuid);
-
-    return ret;
-}
-
-int scaling_max_freq_func(int xc_fd, int cpuid, uint32_t value)
-{
-    int ret = 0;
-
-    if ( cpuid < 0 )
-    {
-        show_help();
-        return -EINVAL;
-    }
-
-    ret = xc_set_cpufreq_para(xc_fd, cpuid, SCALING_MAX_FREQ, value);
-    if ( ret )
-    {
-        fprintf(stderr, "[CPU%d] failed to set scaling max freq\n", cpuid);
-    }
-
-    return ret;
-}
-
-int scaling_min_freq_func(int xc_fd, int cpuid, uint32_t value)
-{
-    int ret;
-
-    if ( cpuid < 0 )
-    {
-        show_help();
-        return -EINVAL;
-    }
-
-    ret = xc_set_cpufreq_para(xc_fd, cpuid, SCALING_MIN_FREQ, value);
-    if ( ret )
-    {
-        fprintf(stderr, "[CPU%d] failed to set scaling min freq\n", cpuid);
-    }
-
-    return ret;
-}
-
-int scaling_speed_func(int xc_fd, int cpuid, uint32_t value)
-{
-    int ret;
-
-    if ( cpuid < 0 )
-    {
-        show_help();
-        return -EINVAL;
-    }
-
-    ret = xc_set_cpufreq_para(xc_fd, cpuid, SCALING_SETSPEED, value);
-    if ( ret )
-    {
-        fprintf(stderr, "[CPU%d] failed to set scaling speed\n", cpuid);
-    }
-
-    return ret;
-}
-
-int scaling_sampling_rate_func(int xc_fd, int cpuid, uint32_t value)
-{
-    int ret;
-
-    if ( cpuid < 0 )
-    {
-        show_help();
-        return -EINVAL;
-    }
-
-    ret = xc_set_cpufreq_para(xc_fd, cpuid, SAMPLING_RATE, value);
-    if ( ret ) 
-    {
-        fprintf(stderr, "[CPU%d] failed to set scaling sampling rate\n", 
cpuid);
-    }
-
-    return ret;
-}
-
-int scaling_up_threshold_func(int xc_fd, int cpuid, uint32_t value)
-{
-    int ret;
-
-    if ( cpuid < 0 )
-    {
-        show_help();
-        return -EINVAL;
-    }
-
-    ret = xc_set_cpufreq_para(xc_fd, cpuid, UP_THRESHOLD, value);
-    if ( ret )
-    {
-        fprintf(stderr, "[CPU%d] failed to set scaling threshold\n", cpuid);
-    }
-
-    return ret;
-}
-
-int scaling_governor_func(int xc_fd, int cpuid, char *name)
-{
-    int ret = 0;
-
-    if ( cpuid < 0 )
-    {
-        show_help();
-        return -EINVAL;
-    }
-
-    ret = xc_set_cpufreq_gov(xc_fd, cpuid, name);
-    if ( ret )
-    {
-        fprintf(stderr, "failed to set cpufreq governor to %s\n", name);
-    }
-
-    return ret;
+    }
+    else if ( argc > 0 )
+    {
+        name = strdup(argv[0]);
+        if ( name == NULL )
+            goto out;
+    }
+    else
+        goto out;
+
+    if ( cpuid < 0 )
+    {
+        int i;
+        for ( i = 0; i < max_cpu_nr; i++ )
+            if ( xc_set_cpufreq_gov(xc_fd, i, name) )
+                fprintf(stderr, "[CPU%d] failed to set governor name\n", i);
+    }
+    else
+    {
+        if ( xc_set_cpufreq_gov(xc_fd, cpuid, name) )
+            fprintf(stderr, "failed to set governor name\n");
+    }
+
+    free(name);
+    return ;
+out:
+    fprintf(stderr, "failed to set governor name\n");
 }
 
 struct {
     const char *name;
-    int (*function)(int xc_fd, int cpuid, uint32_t value);
+    void (*function)(int argc, char *argv[]);
 } main_options[] = {
     { "help", help_func },
-    { "get-cpuidle-states", cxstates_func },
-    { "get-cpufreq-states", pxstates_func },
+    { "get-cpuidle-states", cxstat_func },
+    { "get-cpufreq-states", pxstat_func },
+    { "start", start_gather_func },
     { "get-cpufreq-para", cpufreq_para_func },
     { "set-scaling-maxfreq", scaling_max_freq_func },
     { "set-scaling-minfreq", scaling_min_freq_func },
-    { "set-scaling-governor", NULL },
+    { "set-scaling-governor", scaling_governor_func },
     { "set-scaling-speed", scaling_speed_func },
     { "set-sampling-rate", scaling_sampling_rate_func },
     { "set-up-threshold", scaling_up_threshold_func },
@@ -541,38 +769,37 @@ struct {
 
 int main(int argc, char *argv[])
 {
-    int i, ret = -EINVAL;
-    int xc_fd;
-    int cpuid = -1;
-    uint32_t value = 0;
+    int i, ret = 0;
+    xc_physinfo_t physinfo = { 0 };
     int nr_matches = 0;
     int matches_main_options[ARRAY_SIZE(main_options)];
 
     if ( argc < 2 )
     {
         show_help();
-        return ret;
-    }
-
-    if ( argc > 2 )
-    {
-        if ( sscanf(argv[2], "%d", &cpuid) != 1 )
-            cpuid = -1;
+        return 0;
     }
 
     xc_fd = xc_interface_open();
     if ( xc_fd < 0 )
     {
         fprintf(stderr, "failed to get the handler\n");
-    }
-
+        return 0;
+    }
+
+    ret = xc_physinfo(xc_fd, &physinfo);
+    if ( ret )
+    {
+        fprintf(stderr, "failed to get the processor information\n");
+        xc_interface_close(xc_fd);
+        return 0;
+    }
+    max_cpu_nr = physinfo.nr_cpus;
+
+    /* calculate how many options match with user's input */
     for ( i = 0; i < ARRAY_SIZE(main_options); i++ )
-    {
         if ( !strncmp(main_options[i].name, argv[1], strlen(argv[1])) )
-        {
             matches_main_options[nr_matches++] = i;
-        }
-    }
 
     if ( nr_matches > 1 )
     {
@@ -582,27 +809,12 @@ int main(int argc, char *argv[])
         fprintf(stderr, "\n");
     }
     else if ( nr_matches == 1 )
-    {
-        if ( !strcmp("set-scaling-governor", 
main_options[matches_main_options[0]].name) )
-        {
-            char *name = strdup(argv[3]);
-            ret = scaling_governor_func(xc_fd, cpuid, name);
-            free(name);
-        }
-        else
-        {
-            if ( argc > 3 )
-            {
-                if ( sscanf(argv[3], "%d", &value) != 1 )
-                    value = 0;
-            }
-            ret = main_options[matches_main_options[0]].function(xc_fd, cpuid, 
value);
-        }
-    }
+        /* dispatch to the corresponding function handler */
+        main_options[matches_main_options[0]].function(argc - 2, argv + 2);
     else
         show_help();
 
     xc_interface_close(xc_fd);
-    return ret;
-}
-
+    return 0;
+}
+
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/pygrub/Makefile
--- a/tools/pygrub/Makefile     Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/pygrub/Makefile     Wed Jan 28 13:06:45 2009 +0900
@@ -22,4 +22,6 @@ endif
 
 .PHONY: clean
 clean:
-       rm -rf build tmp *.pyc *.pyo *.o *.a *~ a.out
+       rm -rf build tmp *.pyc *.pyo *.o *.a *~ a.out $(DEPS)
+
+-include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/Makefile
--- a/tools/python/Makefile     Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/python/Makefile     Wed Jan 28 13:06:45 2009 +0900
@@ -85,3 +85,6 @@ test:
 .PHONY: clean
 clean:
        rm -rf build *.pyc *.pyo *.o *.a *~ $(CATALOGS) xen/util/auxbin.pyc
+       rm -f $(DEPS)
+
+-include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/python/xen/lowlevel/xc/xc.c Wed Jan 28 13:06:45 2009 +0900
@@ -903,26 +903,24 @@ static PyObject *pyxc_hvm_build(XcObject
     if ( target == -1 )
         target = memsize;
 
-    if ( xc_hvm_build_target_mem(self->xc_handle, dom, memsize, target, image) 
!= 0 )
+    if ( xc_hvm_build_target_mem(self->xc_handle, dom, memsize,
+                                 target, image) != 0 )
         return pyxc_error_to_exception();
 
 #if !defined(__ia64__)
-    /* Set up the HVM info table. */
+    /* Fix up the HVM info table. */
     va_map = xc_map_foreign_range(self->xc_handle, dom, XC_PAGE_SIZE,
                                   PROT_READ | PROT_WRITE,
                                   HVM_INFO_PFN);
     if ( va_map == NULL )
         return PyErr_SetFromErrno(xc_error_obj);
     va_hvm = (struct hvm_info_table *)(va_map + HVM_INFO_OFFSET);
-    memset(va_hvm, 0, sizeof(*va_hvm));
-    strncpy(va_hvm->signature, "HVM INFO", 8);
-    va_hvm->length       = sizeof(struct hvm_info_table);
     va_hvm->acpi_enabled = acpi;
     va_hvm->apic_mode    = apic;
     va_hvm->nr_vcpus     = vcpus;
     for ( i = 0, sum = 0; i < va_hvm->length; i++ )
         sum += ((uint8_t *)va_hvm)[i];
-    va_hvm->checksum = -sum;
+    va_hvm->checksum -= sum;
     munmap(va_map, XC_PAGE_SIZE);
 #endif
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/util/oshelp.py
--- a/tools/python/xen/util/oshelp.py   Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/python/xen/util/oshelp.py   Wed Jan 28 13:06:45 2009 +0900
@@ -5,7 +5,7 @@ def fcntl_setfd_cloexec(file, bool):
         f = fcntl.fcntl(file, fcntl.F_GETFD)
         if bool: f |= fcntl.FD_CLOEXEC
         else: f &= ~fcntl.FD_CLOEXEC
-        fcntl.fcntl(file, fcntl.F_SETFD)
+        fcntl.fcntl(file, fcntl.F_SETFD, f)
 
 def waitstatus_description(st):
         if os.WIFEXITED(st):
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py       Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/python/xen/xend/XendConfig.py       Wed Jan 28 13:06:45 2009 +0900
@@ -149,6 +149,7 @@ XENAPI_PLATFORM_CFG_TYPES = {
     'opengl': int,
     'soundhw': str,
     'stdvga': int,
+    'videoram': int,
     'usb': int,
     'usbdevice': str,
     'hpet': int,
@@ -166,6 +167,7 @@ XENAPI_PLATFORM_CFG_TYPES = {
     'guest_os_type': str,
     'hap': int,
     'xen_extended_power_mgmt': int,
+    'pci_msitranslate': int,
 }
 
 # Xen API console 'other_config' keys.
@@ -1247,6 +1249,11 @@ class XendConfig(dict):
                         'PPCI': ppci_uuid,
                         'hotplug_slot': pci_dev.get('vslot', 0)
                     }
+
+                    dpci_opts = pci_dev.get('opts')
+                    if dpci_opts and len(dpci_opts) > 0:
+                        dpci_record['options'] = dpci_opts
+
                     XendDPCI(dpci_uuid, dpci_record)
 
                 target['devices'][pci_devs_uuid] = (dev_type,
@@ -1762,6 +1769,11 @@ class XendConfig(dict):
                         'PPCI': ppci_uuid,
                         'hotplug_slot': pci_dev.get('vslot', 0)
                     }
+
+                    dpci_opts = pci_dev.get('opts')
+                    if dpci_opts and len(dpci_opts) > 0:
+                        dpci_record['options'] = dpci_opts
+
                     XendDPCI(dpci_uuid, dpci_record)
 
                 self['devices'][dev_uuid] = (dev_type,
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xend/XendDPCI.py
--- a/tools/python/xen/xend/XendDPCI.py Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/python/xen/xend/XendDPCI.py Wed Jan 28 13:06:45 2009 +0900
@@ -41,7 +41,8 @@ class XendDPCI(XendBase):
                   'virtual_name',
                   'VM',
                   'PPCI',
-                  'hotplug_slot']
+                  'hotplug_slot',
+                  'options']
         return XendBase.getAttrRO() + attrRO
 
     def getAttrRW(self):
@@ -119,6 +120,8 @@ class XendDPCI(XendBase):
         self.VM = record['VM']
         self.PPCI = record['PPCI']
         self.hotplug_slot = record['hotplug_slot']
+        if 'options' in record.keys():
+            self.options = record['options']
 
     def destroy(self):
         xendom = XendDomain.instance()
@@ -152,3 +155,5 @@ class XendDPCI(XendBase):
     def get_hotplug_slot(self):
         return self.hotplug_slot
 
+    def get_options(self):
+        return self.options
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/python/xen/xend/XendDomain.py       Wed Jan 28 13:06:45 2009 +0900
@@ -423,7 +423,7 @@ class XendDomain:
                     log.exception("Unable to recreate domain")
                     try:
                         xc.domain_pause(domid)
-                        do_FLR(domid)
+                        XendDomainInfo.do_FLR(domid)
                         xc.domain_destroy(domid)
                     except:
                         log.exception("Hard destruction of domain failed: %d" %
@@ -1264,7 +1264,7 @@ class XendDomain:
         else:
             try:
                 xc.domain_pause(int(domid))
-                do_FLR(int(domid))
+                XendDomainInfo.do_FLR(int(domid))
                 val = xc.domain_destroy(int(domid))
             except ValueError:
                 raise XendInvalidDomain(domid)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/python/xen/xend/XendDomainInfo.py   Wed Jan 28 13:06:45 2009 +0900
@@ -696,10 +696,17 @@ class XendDomainInfo:
                     " assigned to other domain.' \
                     )% (pci_device.name, self.domid, pci_str))
 
-        bdf_str = "%s:%s:%s.%s@%s" % (new_dev['domain'],
+        opts = ''
+        if 'opts' in new_dev and len(new_dev['opts']) > 0:
+            config_opts = new_dev['opts']
+            config_opts = map(lambda (x, y): x+'='+y, config_opts)
+            opts = ',' + reduce(lambda x, y: x+','+y, config_opts)
+
+        bdf_str = "%s:%s:%s.%s%s@%s" % (new_dev['domain'],
                 new_dev['bus'],
                 new_dev['slot'],
                 new_dev['func'],
+                opts,
                 new_dev['vslt'])
         self.image.signalDeviceModel('pci-ins', 'pci-inserted', bdf_str)
 
@@ -1192,7 +1199,7 @@ class XendDomainInfo:
 
         if self.domid >= 0:
             if target > memory_cur:
-                balloon.free( (target-memory_cur)*1024 )
+                balloon.free((target - memory_cur) * 1024, self)
             self.storeVm("memory", target)
             self.storeDom("memory/target", target << 10)
             xc.domain_set_target_mem(self.domid,
@@ -2234,7 +2241,11 @@ class XendDomainInfo:
         xc.domain_max_vcpus(self.domid, int(self.info['VCPUs_max']))
 
         # Test whether the devices can be assigned with VT-d
-        pci_str = str(self.info["platform"].get("pci"))
+        pci = self.info["platform"].get("pci")
+        pci_str = ''
+        if pci and len(pci) > 0:
+            pci = map(lambda x: x[0:4], pci)  # strip options 
+            pci_str = str(pci)
         if hvm and pci_str:
             bdf = xc.test_assign_device(self.domid, pci_str)
             if bdf != 0:
@@ -3527,6 +3538,11 @@ class XendDomainInfo:
 
         dpci_uuid = uuid.createString()
 
+        dpci_opts = []
+        opts_dict = xenapi_pci.get('options')
+        for k in opts_dict.keys():
+            dpci_opts.append([k, opts_dict[k]])
+
         # Convert xenapi to sxp
         ppci = XendAPIStore.get(xenapi_pci.get('PPCI'), 'PPCI')
 
@@ -3538,6 +3554,7 @@ class XendDomainInfo:
                     ['slot', '0x%02x' % ppci.get_slot()],
                     ['func', '0x%1x' % ppci.get_func()],
                     ['vslt', '0x%02x' % xenapi_pci.get('hotplug_slot')],
+                    ['opts', dpci_opts],
                     ['uuid', dpci_uuid]
                 ],
                 ['state', 'Initialising']
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xend/balloon.py
--- a/tools/python/xen/xend/balloon.py  Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/python/xen/xend/balloon.py  Wed Jan 28 13:06:45 2009 +0900
@@ -67,7 +67,7 @@ def get_dom0_target_alloc():
         raise VmError('Failed to query target memory allocation of dom0.')
     return kb
 
-def free(need_mem ,self):
+def free(need_mem, dominfo):
     """Balloon out memory from the privileged domain so that there is the
     specified required amount (in KiB) free.
     """
@@ -130,7 +130,7 @@ def free(need_mem ,self):
         if physinfo['nr_nodes'] > 1 and retries == 0:
             oldnode = -1
             waitscrub = 1
-            vcpus = self.info['cpus'][0]
+            vcpus = dominfo.info['cpus'][0]
             for vcpu in vcpus:
                 nodenum = 0
                 for node in physinfo['node_to_cpu']:
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/python/xen/xend/image.py    Wed Jan 28 13:06:45 2009 +0900
@@ -264,6 +264,10 @@ class ImageHandler:
             # skip vnc init if nographic is set
             ret.append('-nographic')
             return ret
+
+        vram = str(vmConfig['platform'].get('videoram',4))
+        ret.append('-videoram')
+        ret.append(vram)
 
         vnc_config = {}
         has_vnc = int(vmConfig['platform'].get('vnc', 0)) != 0
@@ -833,6 +837,7 @@ class IA64_HVM_ImageHandler(HVMImageHand
     def configure(self, vmConfig):
         HVMImageHandler.configure(self, vmConfig)
         self.vhpt = int(vmConfig['platform'].get('vhpt',  0))
+        self.vramsize = int(vmConfig['platform'].get('videoram',4)) * 1024
 
     def buildDomain(self):
         xc.nvram_init(self.vm.getName(), self.vm.getDomid())
@@ -847,8 +852,8 @@ class IA64_HVM_ImageHandler(HVMImageHand
         # buffer io page, buffer pio page and memmap info page
         extra_pages = 1024 + 5
         mem_kb += extra_pages * page_kb
-        # Add 8 MiB overhead for QEMU's video RAM.
-        return mem_kb + 8192
+        mem_kb += self.vramsize
+        return mem_kb
 
     def getRequiredInitialReservation(self):
         return self.vm.getMemoryTarget()
@@ -882,6 +887,7 @@ class X86_HVM_ImageHandler(HVMImageHandl
     def configure(self, vmConfig):
         HVMImageHandler.configure(self, vmConfig)
         self.pae = int(vmConfig['platform'].get('pae',  0))
+        self.vramsize = int(vmConfig['platform'].get('videoram',4)) * 1024
 
     def buildDomain(self):
         xc.hvm_set_param(self.vm.getDomid(), HVM_PARAM_PAE_ENABLED, self.pae)
@@ -890,8 +896,7 @@ class X86_HVM_ImageHandler(HVMImageHandl
         return rc
 
     def getRequiredAvailableMemory(self, mem_kb):
-        # Add 8 MiB overhead for QEMU's video RAM.
-        return mem_kb + 8192
+        return mem_kb + self.vramsize
 
     def getRequiredInitialReservation(self):
         return self.vm.getMemoryTarget()
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xend/server/pciif.py
--- a/tools/python/xen/xend/server/pciif.py     Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/python/xen/xend/server/pciif.py     Wed Jan 28 13:06:45 2009 +0900
@@ -75,6 +75,12 @@ class PciController(DevController):
             slot = parse_hex(pci_config.get('slot', 0))
             func = parse_hex(pci_config.get('func', 0))            
 
+            opts = pci_config.get('opts', '')
+            if len(opts) > 0:
+                opts = map(lambda (x, y): x+'='+y, opts)
+                opts = reduce(lambda x, y: x+','+y, opts)
+                back['opts-%i' % pcidevid] = opts
+
             vslt = pci_config.get('vslt')
             if vslt is not None:
                 vslots = vslots + vslt + ";"
@@ -89,6 +95,9 @@ class PciController(DevController):
 
         back['num_devs']=str(pcidevid)
         back['uuid'] = config.get('uuid','')
+        if 'pci_msitranslate' in self.vm.info['platform']:
+            
back['msitranslate']=str(self.vm.info['platform']['pci_msitranslate'])
+
         return (0, back, {})
 
 
@@ -108,6 +117,9 @@ class PciController(DevController):
                 dev = back['dev-%i' % i]
                 state = states[i]
                 uuid = back['uuid-%i' %i]
+                opts = ''
+                if 'opts-%i' % i in back:
+                    opts = back['opts-%i' % i]
             except:
                 raise XendError('Error reading config')
 
@@ -129,6 +141,8 @@ class PciController(DevController):
                 self.writeBackend(devid, 'state-%i' % (num_olddevs + i),
                                   str(xenbusState['Initialising']))
                 self.writeBackend(devid, 'uuid-%i' % (num_olddevs + i), uuid)
+                if len(opts) > 0:
+                    self.writeBackend(devid, 'opts-%i' % (num_olddevs + i), 
opts)
                 self.writeBackend(devid, 'num_devs', str(num_olddevs + i + 1))
 
                 # Update vslots
@@ -540,6 +554,9 @@ class PciController(DevController):
                 self.removeBackend(devid, 'vdev-%i' % i)
                 self.removeBackend(devid, 'state-%i' % i)
                 self.removeBackend(devid, 'uuid-%i' % i)
+                tmpopts = self.readBackend(devid, 'opts-%i' % i)
+                if tmpopts is not None:
+                    self.removeBackend(devid, 'opts-%i' % i)
             else:
                 if new_num_devs != i:
                     tmpdev = self.readBackend(devid, 'dev-%i' % i)
@@ -556,6 +573,9 @@ class PciController(DevController):
                     tmpuuid = self.readBackend(devid, 'uuid-%i' % i)
                     self.writeBackend(devid, 'uuid-%i' % new_num_devs, tmpuuid)
                     self.removeBackend(devid, 'uuid-%i' % i)
+                    tmpopts = self.readBackend(devid, 'opts-%i' % i)
+                    if tmpopts is not None:
+                        self.removeBackend(devid, 'opts-%i' % i)
                 new_num_devs = new_num_devs + 1
 
         self.writeBackend(devid, 'num_devs', str(new_num_devs))
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xend/server/relocate.py
--- a/tools/python/xen/xend/server/relocate.py  Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/python/xen/xend/server/relocate.py  Wed Jan 28 13:06:45 2009 +0900
@@ -122,6 +122,8 @@ class RelocationProtocol(protocol.Protoc
         if self.transport:
             self.send_reply(["ready", name])
             p2cread, p2cwrite = os.pipe()
+            from xen.util import oshelp
+            oshelp.fcntl_setfd_cloexec(p2cwrite, True)
             
threading.Thread(target=connection.SSLSocketServerConnection.recv2fd,
                              args=(self.transport.sock, p2cwrite)).start()
             try:
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xm/create.dtd
--- a/tools/python/xen/xm/create.dtd    Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/python/xen/xm/create.dtd    Wed Jan 28 13:06:45 2009 +0900
@@ -82,11 +82,12 @@
 <!ELEMENT vtpm   (name*)>
 <!ATTLIST vtpm   backend         CDATA #REQUIRED>
 
-<!ELEMENT pci    EMPTY>
+<!ELEMENT pci    (pci_opt*)>
 <!ATTLIST pci    domain          CDATA #REQUIRED
                  bus             CDATA #REQUIRED
                  slot            CDATA #REQUIRED
                  func            CDATA #REQUIRED
+                 opts_str        CDATA #IMPLIED
                  vslt            CDATA #IMPLIED>
 
 <!ELEMENT vscsi  EMPTY>
@@ -138,6 +139,10 @@
 <!ATTLIST vcpu_param key   CDATA #REQUIRED
                      value CDATA #REQUIRED>
 
+<!ELEMENT pci_opt    EMPTY>
+<!ATTLIST pci_opt    key   CDATA #REQUIRED
+                     value CDATA #REQUIRED>
+
 <!ELEMENT other_config EMPTY>
 <!ATTLIST other_config key   CDATA #REQUIRED
                        value CDATA #REQUIRED>
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py     Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/python/xen/xm/create.py     Wed Jan 28 13:06:45 2009 +0900
@@ -318,11 +318,14 @@ gopts.var('disk', val='phy:DEV,VDEV,MODE
           backend driver domain to use for the disk.
           The option may be repeated to add more than one disk.""")
 
-gopts.var('pci', val='BUS:DEV.FUNC',
+gopts.var('pci', val='BUS:DEV.FUNC[,msitranslate=0|1]',
           fn=append_value, default=[],
           use="""Add a PCI device to a domain, using given params (in hex).
-         For example 'pci=c0:02.1'.
-         The option may be repeated to add more than one pci device.""")
+          For example 'pci=c0:02.1'.
+          If msitranslate is set, MSI-INTx translation is enabled if possible.
+          Guest that doesn't support MSI will get IO-APIC type IRQs
+          translated from physical MSI, HVM only. Default is 1.
+          The option may be repeated to add more than one pci device.""")
 
 gopts.var('vscsi', val='PDEV,VDEV[,DOM]',
           fn=append_value, default=[],
@@ -523,9 +526,9 @@ gopts.var('vncunused', val='',
           use="""Try to find an unused port for the VNC server.
           Only valid when vnc=1.""")
 
-gopts.var('videoram', val='',
-          fn=set_value, default=None,
-          use="""Maximum amount of videoram PV guest can allocate
+gopts.var('videoram', val='MEMORY',
+          fn=set_int, default=4,
+          use="""Maximum amount of videoram a guest can allocate
           for frame buffer.""")
 
 gopts.var('sdl', val='',
@@ -587,6 +590,11 @@ gopts.var('suppress_spurious_page_faults
 gopts.var('suppress_spurious_page_faults', val='yes|no',
           fn=set_bool, default=None,
           use="""Do not inject spurious page faults into this guest""")
+
+gopts.var('pci_msitranslate', val='TRANSLATE',
+          fn=set_int, default=1,
+          use="""Global PCI MSI-INTx translation flag (0=disable;
+          1=enable.""")
 
 def err(msg):
     """Print an error to stderr and exit.
@@ -667,9 +675,23 @@ def configure_pci(config_devs, vals):
     """Create the config for pci devices.
     """
     config_pci = []
-    for (domain, bus, slot, func) in vals.pci:
-        config_pci.append(['dev', ['domain', domain], ['bus', bus], \
-                        ['slot', slot], ['func', func]])
+    for (domain, bus, slot, func, opts) in vals.pci:
+        config_pci_opts = []
+        d = comma_sep_kv_to_dict(opts)
+
+        def f(k):
+            if k not in ['msitranslate']:
+                err('Invalid pci option: ' + k)
+
+            config_pci_opts.append([k, d[k]])
+
+        config_pci_bdf = ['dev', ['domain', domain], ['bus', bus], \
+                          ['slot', slot], ['func', func]]
+        map(f, d.keys())
+        if len(config_pci_opts)>0:
+            config_pci_bdf.append(['opts', config_pci_opts])
+
+        config_pci.append(config_pci_bdf)
 
     if len(config_pci)>0:
         config_pci.insert(0, 'pci')
@@ -862,12 +884,12 @@ def configure_hvm(config_image, vals):
     """Create the config for HVM devices.
     """
     args = [ 'device_model', 'pae', 'vcpus', 'boot', 'fda', 'fdb', 
'timer_mode',
-             'localtime', 'serial', 'stdvga', 'isa', 'nographic', 'soundhw',
+             'localtime', 'serial', 'stdvga', 'videoram', 'isa', 'nographic', 
'soundhw',
              'vnc', 'vncdisplay', 'vncunused', 'vncconsole', 'vnclisten',
              'sdl', 'display', 'xauthority', 'rtc_timeoffset', 'monitor',
              'acpi', 'apic', 'usb', 'usbdevice', 'keymap', 'pci', 'hpet',
              'guest_os_type', 'hap', 'opengl', 'cpuid', 'cpuid_check',
-             'viridian', 'xen_extended_power_mgmt' ]
+             'viridian', 'xen_extended_power_mgmt', 'pci_msitranslate' ]
 
     for a in args:
         if a in vals.__dict__ and vals.__dict__[a] is not None:
@@ -991,14 +1013,18 @@ def preprocess_pci(vals):
         pci_match = re.match(r"((?P<domain>[0-9a-fA-F]{1,4})[:,])?" + \
                 r"(?P<bus>[0-9a-fA-F]{1,2})[:,]" + \
                 r"(?P<slot>[0-9a-fA-F]{1,2})[.,]" + \
-                r"(?P<func>[0-7])$", pci_dev_str)
+                r"(?P<func>[0-7])" + \
+                r"(,(?P<opts>.*))?$", pci_dev_str)
         if pci_match!=None:
-            pci_dev_info = pci_match.groupdict('0')
+            pci_dev_info = pci_match.groupdict('')
+            if pci_dev_info['domain']=='':
+                pci_dev_info['domain']='0'
             try:
                 pci.append( ('0x'+pci_dev_info['domain'], \
                         '0x'+pci_dev_info['bus'], \
                         '0x'+pci_dev_info['slot'], \
-                        '0x'+pci_dev_info['func']))
+                        '0x'+pci_dev_info['func'], \
+                        pci_dev_info['opts']))
             except IndexError:
                 err('Error in PCI slot syntax "%s"'%(pci_dev_str))
     vals.pci = pci
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/python/xen/xm/main.py       Wed Jan 28 13:06:45 2009 +0900
@@ -187,7 +187,7 @@ SUBCOMMAND_HELP = {
     'vnet-delete'   :  ('<VnetId>', 'Delete a Vnet.'),
     'vnet-list'     :  ('[-l|--long]', 'List Vnets.'),
     'vtpm-list'     :  ('<Domain> [--long]', 'List virtual TPM devices.'),
-    'pci-attach'    :  ('<Domain> <domain:bus:slot.func> [virtual slot]',
+    'pci-attach'    :  ('[-o|--options=<opt>] <Domain> <domain:bus:slot.func> 
[virtual slot]',
                         'Insert a new pass-through pci device.'),
     'pci-detach'    :  ('<Domain> <domain:bus:slot.func>',
                         'Remove a domain\'s pass-through pci device.'),
@@ -2428,7 +2428,7 @@ def xm_network_attach(args):
             vif.append(vif_param)
         server.xend.domain.device_create(dom, vif)
 
-def parse_pci_configuration(args, state):
+def parse_pci_configuration(args, state, opts = ''):
     dom = args[0]
     pci_dev_str = args[1]
     if len(args) == 3:
@@ -2443,12 +2443,17 @@ def parse_pci_configuration(args, state)
     if pci_match == None:
         raise OptionError("Invalid argument: %s %s" % (pci_dev_str,vslt))
     pci_dev_info = pci_match.groupdict('0')
+
     try:
-        pci.append(['dev', ['domain', '0x'+ pci_dev_info['domain']], \
+        pci_bdf =['dev', ['domain', '0x'+ pci_dev_info['domain']], \
                 ['bus', '0x'+ pci_dev_info['bus']],
                 ['slot', '0x'+ pci_dev_info['slot']],
                 ['func', '0x'+ pci_dev_info['func']],
-                ['vslt', '0x%x' % int(vslt, 16)]])
+                ['vslt', '0x%x' % int(vslt, 16)]]
+        if len(opts) > 0:
+            pci_bdf.append(['opts', opts])
+        pci.append(pci_bdf)
+
     except:
         raise OptionError("Invalid argument: %s %s" % (pci_dev_str,vslt))
     pci.append(['state', state])
@@ -2456,8 +2461,22 @@ def parse_pci_configuration(args, state)
     return (dom, pci)
 
 def xm_pci_attach(args):
-    arg_check(args, 'pci-attach', 2, 3)
-    (dom, pci) = parse_pci_configuration(args, 'Initialising')
+    config_pci_opts = []
+    (options, params) = getopt.gnu_getopt(args, 'o:', ['options='])
+    for (k, v) in options:
+        if k in ('-o', '--options'):
+            if len(v.split('=')) != 2:
+                err("Invalid pci attach option: %s" % v)
+                usage('pci-attach')
+            config_pci_opts.append(v.split('='))
+
+    n = len([i for i in params if i != '--'])
+    if n < 2 or n > 3:
+        err("Invalid argument for 'xm pci-attach'")
+        usage('pci-attach')
+
+    (dom, pci) = parse_pci_configuration(params, 'Initialising',
+                     config_pci_opts)
 
     if serverType == SERVER_XEN_API:
 
@@ -2480,7 +2499,8 @@ def xm_pci_attach(args):
         dpci_record = {
             "VM":           get_single_vm(dom),
             "PPCI":         target_ref,
-            "hotplug_slot": vslt
+            "hotplug_slot": vslt,
+            "options":      dict(config_pci_opts)
         }
         server.xenapi.DPCI.create(dpci_record)
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xm/xenapi_create.py
--- a/tools/python/xen/xm/xenapi_create.py      Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/python/xen/xm/xenapi_create.py      Wed Jan 28 13:06:45 2009 +0900
@@ -533,7 +533,10 @@ class xenapi_create:
             "PPCI":
                 target_ref,
             "hotplug_slot":
-                int(pci.attributes["func"].value, 16)
+                int(pci.attributes["func"].value, 16),
+            "options":
+                get_child_nodes_as_dict(pci,
+                  "pci_opt", "key", "value")
         }
 
         return server.xenapi.DPCI.create(dpci_record)
@@ -931,6 +934,12 @@ class sxp2xml:
                     = get_child_by_name(dev_sxp, "func", "0")
                 pci.attributes["vslt"] \
                     = get_child_by_name(dev_sxp, "vslt", "0")
+                for opt in get_child_by_name(dev_sxp, "opts", ""):
+                    if len(opt) > 0:
+                        pci_opt = document.createElement("pci_opt")
+                        pci_opt.attributes["key"] = opt[0]
+                        pci_opt.attributes["value"] = opt[1]
+                        pci.appendChild(pci_opt)
 
                 pcis.append(pci)
 
@@ -1032,6 +1041,7 @@ class sxp2xml:
             'vhpt',
             'guest_os_type',
             'hap',
+            'pci_msitranslate',
         ]
 
         platform_configs = []
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/tests/blowfish.mk
--- a/tools/tests/blowfish.mk   Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/tests/blowfish.mk   Wed Jan 28 13:06:45 2009 +0900
@@ -1,13 +1,13 @@
 
 override XEN_TARGET_ARCH = x86_32
 XEN_ROOT = ../..
-CFLAGS :=
+CFLAGS =
 include $(XEN_ROOT)/tools/Rules.mk
 
 # Disable PIE/SSP if GCC supports them. They can break us.
-CFLAGS += $(call cc-option,$(CC),-nopie,)
-CFLAGS += $(call cc-option,$(CC),-fno-stack-protector,)
-CFLAGS += $(call cc-option,$(CC),-fno-stack-protector-all,)
+$(call cc-option-add,CFLAGS,CC,-nopie)
+$(call cc-option-add,CFLAGS,CC,-fno-stack-protector)
+$(call cc-option-add,CFLAGS,CC,-fno-stack-protector-all)
 
 CFLAGS += -fno-builtin -msoft-float
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/vnet/libxutil/Makefile
--- a/tools/vnet/libxutil/Makefile      Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/vnet/libxutil/Makefile      Wed Jan 28 13:06:45 2009 +0900
@@ -24,13 +24,10 @@ LIB_OBJS := $(LIB_SRCS:.c=.o)
 LIB_OBJS := $(LIB_SRCS:.c=.o)
 PIC_OBJS := $(LIB_SRCS:.c=.opic)
 
-CFLAGS   += -Werror -fno-strict-aliasing $(call 
cc-option,$(CC),-fgnu89-inline,)
+$(call cc-option-add,CFLAGS,CC,-fgnu89-inline)
+CFLAGS   += -Werror -fno-strict-aliasing
 CFLAGS   += -O3
 #CFLAGS   += -g
-
-# Get gcc to generate the dependencies for us.
-CFLAGS   += -Wp,-MD,.$(@F).d
-DEPS     = .*.d
 
 MAJOR    := 3.0
 MINOR    := 0
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/vtpm/Makefile
--- a/tools/vtpm/Makefile       Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/vtpm/Makefile       Wed Jan 28 13:06:45 2009 +0900
@@ -89,6 +89,6 @@ build_sub:
                        $(MAKE) -C $(TPM_EMULATOR_DIR); \
                fi \
        else \
-               echo "*** Unable to build VTPMs. libgmp could not be found."; \
+               echo "=== Unable to build VTPMs. libgmp could not be found."; \
        fi
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/vtpm/Rules.mk
--- a/tools/vtpm/Rules.mk       Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/vtpm/Rules.mk       Wed Jan 28 13:06:45 2009 +0900
@@ -11,11 +11,6 @@ TOOLS_INSTALL_DIR = $(DESTDIR)/usr/bin
 # General compiler flags
 CFLAGS   = -Werror -g3 -I.
 
-# For generating dependencies
-CFLAGS += -Wp,-MD,.$(@F).d
-
-DEP_FILES      = .*.d
-
 # Generic project files
 HDRS   = $(wildcard *.h)
 SRCS   = $(wildcard *.c)
@@ -26,7 +21,7 @@ OBJS  = $(patsubst %.c,%.o,$(SRCS))
 
 $(OBJS): $(SRCS)
 
--include $(DEP_FILES)
+-include $(DEPS)
 
 BUILD_EMULATOR = y
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/vtpm_manager/Rules.mk
--- a/tools/vtpm_manager/Rules.mk       Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/vtpm_manager/Rules.mk       Wed Jan 28 13:06:45 2009 +0900
@@ -11,11 +11,6 @@ TOOLS_INSTALL_DIR = $(DESTDIR)/usr/bin
 # General compiler flags
 CFLAGS = -Werror -g3 -I.
 
-# For generating dependencies
-CFLAGS += -Wp,-MD,.$(@F).d
-
-DEP_FILES      = .*.d
-
 # Generic project files
 HDRS   = $(wildcard *.h)
 SRCS   = $(wildcard *.c)
@@ -26,7 +21,7 @@ OBJS  = $(patsubst %.c,%.o,$(SRCS))
 
 $(OBJS): $(SRCS)
 
--include $(DEP_FILES)
+-include $(FILES)
 
 # Make sure these are just rules
 .PHONY : all build install clean
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/xcutils/Makefile
--- a/tools/xcutils/Makefile    Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/xcutils/Makefile    Wed Jan 28 13:06:45 2009 +0900
@@ -13,10 +13,6 @@ include $(XEN_ROOT)/tools/Rules.mk
 
 CFLAGS += -Werror
 CFLAGS += $(CFLAGS_libxenctrl) $(CFLAGS_libxenguest) $(CFLAGS_libxenstore)
-
-# Make gcc generate dependencies.
-CFLAGS += -Wp,-MD,.$(@F).d
-PROG_DEP = .*.d
 
 PROGRAMS = xc_restore xc_save readnotes lsevtchn
 
@@ -40,6 +36,6 @@ install: build
 .PHONY: clean
 clean:
        $(RM) *.o $(PROGRAMS)
-       $(RM) $(PROG_DEP)
+       $(RM) $(DEPS)
 
--include $(PROG_DEP)
+-include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/xcutils/readnotes.c
--- a/tools/xcutils/readnotes.c Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/xcutils/readnotes.c Wed Jan 28 13:06:45 2009 +0900
@@ -13,7 +13,7 @@
 #include <xg_private.h>
 #include <xc_dom.h> /* gunzip bits */
 
-#include <xen/libelf.h>
+#include <xen/libelf/libelf.h>
 
 static void print_string_note(const char *prefix, struct elf_binary *elf,
                              const elf_note *note)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/xenmon/Makefile
--- a/tools/xenmon/Makefile     Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/xenmon/Makefile     Wed Jan 28 13:06:45 2009 +0900
@@ -38,10 +38,12 @@ install: build
 
 .PHONY: clean
 clean:
-       rm -f $(BIN)
+       rm -f $(BIN) $(DEPS)
 
 
 %: %.c Makefile
        $(CC) $(CFLAGS) $< $(LDFLAGS) -o $@
 xentrace_%: %.c Makefile
        $(CC) $(CFLAGS) $< $(LDFLAGS) -o $@
+
+-include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/xenpmd/Makefile
--- a/tools/xenpmd/Makefile     Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/xenpmd/Makefile     Wed Jan 28 13:06:45 2009 +0900
@@ -17,4 +17,6 @@ install: all
 
 .PHONY: clean
 clean:
-       $(RM) -f $(BIN)
+       $(RM) -f $(BIN) $(DEPS)
+
+-include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/xenstat/libxenstat/Makefile
--- a/tools/xenstat/libxenstat/Makefile Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/xenstat/libxenstat/Makefile Wed Jan 28 13:06:45 2009 +0900
@@ -155,4 +155,6 @@ endif
 .PHONY: clean
 clean:
        rm -f $(LIB) $(SHLIB) $(SHLIB_LINKS) $(OBJECTS-y) \
-             $(BINDINGS) $(BINDINGSRC)
+             $(BINDINGS) $(BINDINGSRC) $(DEPS)
+
+-include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/xenstat/xentop/Makefile
--- a/tools/xenstat/xentop/Makefile     Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/xenstat/xentop/Makefile     Wed Jan 28 13:06:45 2009 +0900
@@ -37,4 +37,6 @@ endif
 
 .PHONY: clean
 clean:
-       rm -f xentop xentop.o
+       rm -f xentop xentop.o $(DEPS)
+
+-include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/xenstore/Makefile
--- a/tools/xenstore/Makefile   Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/xenstore/Makefile   Wed Jan 28 13:06:45 2009 +0900
@@ -7,10 +7,6 @@ CFLAGS += -Werror
 CFLAGS += -Werror
 CFLAGS += -I.
 CFLAGS += $(CFLAGS_libxenctrl)
-
-# Make gcc generate dependencies.
-CFLAGS += -Wp,-MD,.$(@F).d
-DEP    = .*.d
 
 CLIENTS := xenstore-exists xenstore-list xenstore-read xenstore-rm 
xenstore-chmod
 CLIENTS += xenstore-write xenstore-ls
@@ -82,7 +78,7 @@ clean:
        rm -f xenstored xs_random xs_stress xs_crashme
        rm -f xs_tdb_dump xenstore-control
        rm -f xenstore $(CLIENTS)
-       $(RM) $(DEP)
+       $(RM) $(DEPS)
 
 .PHONY: TAGS
 TAGS:
@@ -113,7 +109,7 @@ install: all
        $(INSTALL_DATA) xs.h $(DESTDIR)$(INCLUDEDIR)
        $(INSTALL_DATA) xs_lib.h $(DESTDIR)$(INCLUDEDIR)
 
--include $(DEP)
+-include $(DEPS)
 
 # never delete any intermediate files.
 .SECONDARY:
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/xentrace/Makefile
--- a/tools/xentrace/Makefile   Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/xentrace/Makefile   Wed Jan 28 13:06:45 2009 +0900
@@ -46,9 +46,12 @@ install: build
 
 .PHONY: clean
 clean:
-       $(RM) *.a *.so *.o *.rpm $(BIN) $(LIBBIN)
+       $(RM) *.a *.so *.o *.rpm $(BIN) $(LIBBIN) $(DEPS)
 
 %: %.c $(HDRS) Makefile
        $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
 xentrace_%: %.c $(HDRS) Makefile
        $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
+
+-include $(DEPS)
+
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/Rules.mk
--- a/xen/Rules.mk      Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/Rules.mk      Wed Jan 28 13:06:45 2009 +0900
@@ -23,9 +23,6 @@ ifeq ($(perfc_arrays),y)
 ifeq ($(perfc_arrays),y)
 perfc := y
 endif
-ifeq ($(frame_pointer),y)
-CFLAGS := $(shell echo $(CFLAGS) | sed -e 's/-f[^ ]*omit-frame-pointer//g')
-endif
 
 # Set ARCH/SUBARCH appropriately.
 override TARGET_SUBARCH  := $(XEN_TARGET_ARCH)
@@ -34,20 +31,7 @@ override TARGET_ARCH     := $(shell echo
 
 TARGET := $(BASEDIR)/xen
 
-HDRS := $(wildcard *.h)
-HDRS += $(wildcard $(BASEDIR)/include/xen/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/xen/hvm/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/public/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/public/*/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/compat/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-$(TARGET_ARCH)/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-$(TARGET_ARCH)/$(TARGET_SUBARCH)/*.h)
-
 include $(BASEDIR)/arch/$(TARGET_ARCH)/Rules.mk
-
-# Do not depend on auto-generated header files.
-AHDRS := $(filter-out %/include/xen/compile.h,$(HDRS))
-HDRS  := $(filter-out %/asm-offsets.h,$(AHDRS))
 
 # Note that link order matters!
 ALL_OBJS-y               += $(BASEDIR)/common/built_in.o
@@ -77,15 +61,18 @@ AFLAGS-y                += -D__ASSEMBLY_
 
 ALL_OBJS := $(ALL_OBJS-y)
 
-CFLAGS   := $(strip $(CFLAGS) $(CFLAGS-y))
+# Get gcc to generate the dependencies for us.
+CFLAGS-y += -MMD -MF .$(@F).d
+DEPS = .*.d
+
+CFLAGS += $(CFLAGS-y)
 
 # Most CFLAGS are safe for assembly files:
 #  -std=gnu{89,99} gets confused by #-prefixed end-of-line comments
-AFLAGS   := $(strip $(AFLAGS) $(AFLAGS-y))
-AFLAGS   += $(patsubst -std=gnu%,,$(CFLAGS))
+AFLAGS += $(AFLAGS-y) $(filter-out -std=gnu%,$(CFLAGS))
 
 # LDFLAGS are only passed directly to $(LD)
-LDFLAGS  := $(strip $(LDFLAGS) $(LDFLAGS_DIRECT))
+LDFLAGS += $(LDFLAGS_DIRECT)
 
 include Makefile
 
@@ -115,19 +102,21 @@ FORCE:
 
 .PHONY: clean
 clean:: $(addprefix _clean_, $(subdir-all))
-       rm -f *.o *~ core
+       rm -f *.o *~ core $(DEPS)
 _clean_%/: FORCE
        $(MAKE) -f $(BASEDIR)/Rules.mk -C $* clean
 
-%.o: %.c $(HDRS) Makefile
+%.o: %.c Makefile
        $(CC) $(CFLAGS) -c $< -o $@
 
-%.o: %.S $(AHDRS) Makefile
+%.o: %.S Makefile
        $(CC) $(AFLAGS) -c $< -o $@
 
-%.i: %.c $(HDRS) Makefile
+%.i: %.c Makefile
        $(CPP) $(CFLAGS) $< -o $@
 
 # -std=gnu{89,99} gets confused by # as an end-of-line comment marker
-%.s: %.S $(AHDRS) Makefile
+%.s: %.S Makefile
        $(CPP) $(AFLAGS) $< -o $@
+
+-include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/ia64/Makefile
--- a/xen/arch/ia64/Makefile    Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/ia64/Makefile    Wed Jan 28 13:06:45 2009 +0900
@@ -29,11 +29,11 @@ subdir-y += linux-xen
 # Headers do not depend on auto-generated header, but object files do.
 $(ALL_OBJS): $(BASEDIR)/include/asm-ia64/asm-xsi-offsets.h
 
-asm-offsets.s: asm-offsets.c $(HDRS) \
+asm-offsets.s: asm-offsets.c \
     $(BASEDIR)/include/asm-ia64/.offsets.h.stamp 
        $(CC) $(CFLAGS) -DGENERATE_ASM_OFFSETS -DIA64_TASK_SIZE=0 -S -o $@ $<
 
-asm-xsi-offsets.s: asm-xsi-offsets.c $(HDRS)
+asm-xsi-offsets.s: asm-xsi-offsets.c
        $(CC) $(CFLAGS) -S -o $@ $<
 
 $(BASEDIR)/include/asm-ia64/asm-xsi-offsets.h: asm-xsi-offsets.s
@@ -61,7 +61,7 @@ asm-xsi-offsets.s: asm-xsi-offsets.c $(H
        touch $@
 
 # I'm sure a Makefile wizard would know a better way to do this
-xen.lds.s: xen/xen.lds.S $(HDRS)
+xen.lds.s: xen/xen.lds.S
        $(CC) -E $(CPPFLAGS) -P -DXEN $(AFLAGS) \
                -o xen.lds.s xen/xen.lds.S
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/ia64/Rules.mk
--- a/xen/arch/ia64/Rules.mk    Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/ia64/Rules.mk    Wed Jan 28 13:06:45 2009 +0900
@@ -72,19 +72,4 @@ CFLAGS += -DCONFIG_XEN_IA64_DISABLE_OPTV
 CFLAGS += -DCONFIG_XEN_IA64_DISABLE_OPTVFAULT
 endif
 
-LDFLAGS := -g
-
-# Additionnal IA64 include dirs.
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-null/asm/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-null/asm/sn/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-null/linux/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-xen/asm/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-xen/asm/sn/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-xen/linux/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux/asm-generic/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux/asm/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux/byteorder/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/hvm/*.h)
-
-HDRS := $(filter-out %/include/asm-ia64/asm-xsi-offsets.h,$(HDRS))
+LDFLAGS = -g
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/ia64/tools/p2m_foreign/Makefile
--- a/xen/arch/ia64/tools/p2m_foreign/Makefile  Wed Jan 28 12:22:58 2009 +0900
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,52 +0,0 @@
-#
-# xen/arch/ia64/tools/p2m_foreign
-#
-# Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
-#                    VA Linux Systems Japan K.K.
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-XEN_ROOT       = ../../../../..
-include $(XEN_ROOT)/tools/Rules.mk
-
-CFLAGS += -Werror -ggdb3
-CFLAGS += -I$(XEN_LIBXC) -I$(XEN_XENSTORE)
-
-# Make gcc generate dependencies.
-CFLAGS += -Wp,-MD,.$(@F).d
-DEPS = .*.d
-
-PROGRAMS = p2m_foreign
-LDLIBS   = -L$(XEN_LIBXC) -L$(XEN_XENSTORE) -lxenguest -lxenctrl
-
-.PHONY: all
-all: build
-
-.PHONY: build
-build: $(PROGRAMS)
-
-$(PROGRAMS): %: %.o
-       $(CC) $(CFLAGS) $^ $(LDLIBS) -o $@
-
-
-.PHONY: install
-install:
-
-.PHONY: clean
-clean:
-       $(RM) *.o $(PROGRAMS)
-       $(RM) $(DEPS)
-
--include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 
xen/arch/ia64/tools/p2m_foreign/p2m_foreign.c
--- a/xen/arch/ia64/tools/p2m_foreign/p2m_foreign.c     Wed Jan 28 12:22:58 
2009 +0900
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,233 +0,0 @@
-/*
- * Foreign p2m exposure test.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- * Copyright (c) 2007 Isaku Yamahata <yamahata at valinux co jp>
- *                    VA Linux Systems Japan K.K.
- *
- */
-
-#include <sys/mman.h>
-#include <err.h>
-#include <errno.h>
-#include <assert.h>
-
-#include <xc_private.h>
-#include <xenctrl.h>
-#include <xenguest.h>
-#include <xc_efi.h>
-#include <ia64/xc_ia64.h>
-
-#if 1
-# define printd(fmt, args...)  printf(fmt, ##args)
-#else
-# define printd(fmt, args...)  ((void)0)
-#endif
-
-/* xc_memory_op() in xc_private.c doesn't support translate_gpfn_list */
-static int
-__xc_memory_op(int xc_handle, int cmd, void *arg)
-{
-       DECLARE_HYPERCALL;
-       struct xen_translate_gpfn_list* translate = arg;
-
-       xen_ulong_t* gpfns;
-       xen_ulong_t* mfns;
-       size_t len;
-
-       long ret = -EINVAL;
-
-       hypercall.op     = __HYPERVISOR_memory_op;
-       hypercall.arg[0] = (unsigned long)cmd;
-       hypercall.arg[1] = (unsigned long)arg;
-
-       assert(cmd == XENMEM_translate_gpfn_list);
-
-       get_xen_guest_handle(gpfns, translate->gpfn_list);
-       get_xen_guest_handle(mfns, translate->mfn_list);
-       len = sizeof(gpfns[0]) * translate->nr_gpfns;
-       if (lock_pages(translate, sizeof(*translate)) ||
-           lock_pages(gpfns, len) ||
-           lock_pages(mfns, len))
-               goto out;
-
-       ret = do_xen_hypercall(xc_handle, &hypercall);
-
-out:
-       unlock_pages(mfns, len);
-       unlock_pages(gpfns, len);
-       unlock_pages(translate, sizeof(*translate));
-
-       return ret;
-}
-
-int
-xc_translate_gpfn_list(int xc_handle, uint32_t domid, xen_ulong_t nr_gpfns,
-                      xen_ulong_t* gpfns, xen_ulong_t* mfns)
-{
-       struct xen_translate_gpfn_list translate = {
-               .domid = domid,
-               .nr_gpfns = nr_gpfns,
-       };
-       set_xen_guest_handle(translate.gpfn_list, gpfns);
-       set_xen_guest_handle(translate.mfn_list, mfns);
-
-       return __xc_memory_op(xc_handle,
-                             XENMEM_translate_gpfn_list, &translate);
-}
-
-int
-main(int argc, char** argv)
-{
-       uint32_t domid;
-       int xc_handle;
-
-       xc_dominfo_t info;
-       shared_info_t* shinfo;
-
-       unsigned long map_size;
-       xen_ia64_memmap_info_t* memmap_info;
-       struct xen_ia64_p2m_table p2m_table;
-
-       char* p;
-       char* start;
-       char* end;
-       xen_ulong_t nr_gpfns;
-
-       xen_ulong_t* gpfns;
-       xen_ulong_t* mfns;
-
-       unsigned long i;
-
-       if (argc != 2)
-               errx(EXIT_FAILURE, "usage: %s <domid>", argv[0]);
-       domid = atol(argv[1]);
-
-       printd("xc_interface_open()\n");
-       xc_handle = xc_interface_open();
-       if (xc_handle < 0)
-               errx(EXIT_FAILURE, "can't open control interface");
-
-       printd("xc_domain_getinfo\n");
-       if (xc_domain_getinfo(xc_handle, domid, 1, &info) != 1)
-               errx(EXIT_FAILURE, "Could not get info for domain");
-
-
-       printd("shared info\n");
-       shinfo = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
-                                     PROT_READ, info.shared_info_frame);
-       if (shinfo == NULL)
-               errx(EXIT_FAILURE, "can't map shared info");
-
-       printd("memmap_info\n");
-       map_size = PAGE_SIZE * shinfo->arch.memmap_info_num_pages;
-       memmap_info = xc_map_foreign_range(xc_handle, info.domid,
-                                          map_size, PROT_READ,
-                                          shinfo->arch.memmap_info_pfn);
-       if (memmap_info == NULL)
-               errx(EXIT_FAILURE, "can't map memmap_info");
-
-#if 1
-       start = (char*)&memmap_info->memdesc;
-       end = start + memmap_info->efi_memmap_size;
-       i = 0;
-       for (p = start; p < end; p += memmap_info->efi_memdesc_size) {
-               efi_memory_desc_t* md = (efi_memory_desc_t*)p;
-               printd("%ld [0x%lx, 0x%lx) 0x%lx pages\n",
-                      i, md->phys_addr,
-                      md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
-                      md->num_pages >> (PAGE_SHIFT - EFI_PAGE_SHIFT));
-               i++;
-       }
-#endif
-
-
-       printd("p2m map\n");
-       if (xc_ia64_p2m_map(&p2m_table, xc_handle, domid, memmap_info, 0) < 0)
-               errx(EXIT_FAILURE, "can't map foreign p2m table");
-       printd("p2m map done\n");
-
-       start = (char*)&memmap_info->memdesc;
-       end = start + memmap_info->efi_memmap_size;
-       nr_gpfns = 0;
-       i = 0;
-       for (p = start; p < end; p += memmap_info->efi_memdesc_size) {
-               efi_memory_desc_t* md = (efi_memory_desc_t*)p;
-               if ( md->type != EFI_CONVENTIONAL_MEMORY ||
-                    md->attribute != EFI_MEMORY_WB ||
-                    md->num_pages == 0 )
-                       continue;
-
-               printd("%ld [0x%lx, 0x%lx) 0x%lx pages\n",
-                      i, md->phys_addr,
-                      md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
-                      md->num_pages >> (PAGE_SHIFT - EFI_PAGE_SHIFT));
-               nr_gpfns += md->num_pages >> (PAGE_SHIFT - EFI_PAGE_SHIFT);
-               i++;
-       }
-
-       printd("total 0x%lx gpfns\n", nr_gpfns);
-       gpfns = malloc(sizeof(gpfns[0]) * nr_gpfns);
-       mfns = malloc(sizeof(mfns[0]) * nr_gpfns);
-       if (gpfns == NULL || mfns == NULL)
-               err(EXIT_FAILURE, "can't allocate memory for gpfns/mfns");
-
-       i = 0;
-       for (p = start; p < end; p += memmap_info->efi_memdesc_size) {
-               efi_memory_desc_t* md = (efi_memory_desc_t*)p;
-               unsigned long j;
-               if ( md->type != EFI_CONVENTIONAL_MEMORY ||
-                    md->attribute != EFI_MEMORY_WB ||
-                    md->num_pages == 0 )
-                       continue;
-
-               for (j = 0;
-                    j < md->num_pages >> (PAGE_SHIFT - EFI_PAGE_SHIFT);
-                    j++) {
-                       gpfns[i] = (md->phys_addr >> PAGE_SHIFT) + j;
-                       i++;
-               }
-       }
-       for (i = 0; i < nr_gpfns; i++)
-               mfns[i] = INVALID_MFN;
-
-       printd("issue translate gpfn list hypercall. "
-              "this may take a while\n");
-       if (xc_translate_gpfn_list(xc_handle,
-                                  domid, nr_gpfns, gpfns, mfns) < 0)
-               err(EXIT_FAILURE, "translate gpfn list hypercall failure");
-       printd("translate gpfn list hypercall done\n");
-
-       printd("checking p2m table\n");
-       for (i = 0; i < nr_gpfns; i++) {
-               unsigned long mfn_by_translated = mfns[i];
-               unsigned long mfn_by_p2m =
-                       xc_ia64_p2m_mfn(&p2m_table, gpfns[i]);
-               if (mfn_by_translated != mfn_by_p2m &&
-                   !(mfn_by_translated == 0 && mfn_by_p2m == INVALID_MFN)) {
-                       printf("ERROR! i 0x%lx gpfn "
-                              "0x%lx trnslated 0x%lx p2m 0x%lx\n",
-                              i, gpfns[i], mfn_by_translated, mfn_by_p2m);
-               }
-       }
-       printd("checking p2m table done\n");
-
-       xc_ia64_p2m_unmap(&p2m_table);
-       munmap(memmap_info, map_size);
-       munmap(shinfo, PAGE_SIZE);
-
-       return EXIT_SUCCESS;
-}
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c        Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/ia64/xen/domain.c        Wed Jan 28 13:06:45 2009 +0900
@@ -31,7 +31,7 @@
 #include <xen/event.h>
 #include <xen/console.h>
 #include <xen/version.h>
-#include <public/libelf.h>
+#include <xen/libelf.h>
 #include <asm/pgalloc.h>
 #include <asm/offsets.h>  /* for IA64_THREAD_INFO_SIZE */
 #include <asm/vcpu.h>   /* for function declarations */
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/ia64/xen/irq.c
--- a/xen/arch/ia64/xen/irq.c   Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/ia64/xen/irq.c   Wed Jan 28 13:06:45 2009 +0900
@@ -402,7 +402,7 @@ void __do_IRQ_guest(int irq)
        }
 }
 
-int pirq_acktype(int irq)
+static int pirq_acktype(int irq)
 {
     irq_desc_t *desc = &irq_desc[irq];
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/ia64/xen/machine_kexec.c
--- a/xen/arch/ia64/xen/machine_kexec.c Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/ia64/xen/machine_kexec.c Wed Jan 28 13:06:45 2009 +0900
@@ -195,6 +195,7 @@ int machine_kexec_get(xen_kexec_range_t 
 
 void arch_crash_save_vmcoreinfo(void)
 {
+    VMCOREINFO_SYMBOL(xenheap_phys_end);
        VMCOREINFO_SYMBOL(dom_xen);
        VMCOREINFO_SYMBOL(dom_io);
        VMCOREINFO_SYMBOL(xen_pstart);
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/ia64/xen/mm.c
--- a/xen/arch/ia64/xen/mm.c    Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/ia64/xen/mm.c    Wed Jan 28 13:06:45 2009 +0900
@@ -3246,9 +3246,9 @@ int get_page_type(struct page_info *page
     return 1;
 }
 
-int memory_is_conventional_ram(paddr_t p)
-{
-    return (efi_mem_type(p) == EFI_CONVENTIONAL_MEMORY);
+int page_is_conventional_ram(unsigned long mfn)
+{
+    return (efi_mem_type(pfn_to_paddr(mfn)) == EFI_CONVENTIONAL_MEMORY);
 }
 
 
@@ -3295,38 +3295,39 @@ arch_memory_op(int op, XEN_GUEST_HANDLE(
 
             spin_unlock(&d->grant_table->lock);
             break;
-        case XENMAPSPACE_mfn:
-        {
-            if ( get_page_from_pagenr(xatp.idx, d) ) {
-                struct xen_ia64_memmap_info memmap_info;
-                efi_memory_desc_t md;
-                int ret;
-
-                mfn = xatp.idx;
-                page = mfn_to_page(mfn);
-
-                memmap_info.efi_memmap_size = sizeof(md);
-                memmap_info.efi_memdesc_size = sizeof(md);
-                memmap_info.efi_memdesc_version =
-                    EFI_MEMORY_DESCRIPTOR_VERSION;
-
-                md.type = EFI_CONVENTIONAL_MEMORY;
-                md.pad = 0;
-                md.phys_addr = xatp.gpfn << PAGE_SHIFT;
-                md.virt_addr = 0;
-                md.num_pages = 1UL << (PAGE_SHIFT - EFI_PAGE_SHIFT);
-                md.attribute = EFI_MEMORY_WB;
-
-                ret = __dom0vp_add_memdesc(d, &memmap_info, (char*)&md);
-                if (ret != 0) {
-                    put_page(page);
-                    rcu_unlock_domain(d);
-                    gdprintk(XENLOG_DEBUG,
-                             "%s:%d td %d gpfn 0x%lx mfn 0x%lx ret %d\n",
-                             __func__, __LINE__,
-                             d->domain_id, xatp.gpfn, xatp.idx, ret);
-                    return ret;
-                }
+        case XENMAPSPACE_gmfn: {
+            struct xen_ia64_memmap_info memmap_info;
+            efi_memory_desc_t md;
+            int ret;
+
+            xatp.idx = gmfn_to_mfn(d, xatp.idx);
+            if ( !get_page_from_pagenr(xatp.idx, d) )
+                break;
+
+            mfn = xatp.idx;
+            page = mfn_to_page(mfn);
+
+            memmap_info.efi_memmap_size = sizeof(md);
+            memmap_info.efi_memdesc_size = sizeof(md);
+            memmap_info.efi_memdesc_version =
+                EFI_MEMORY_DESCRIPTOR_VERSION;
+
+            md.type = EFI_CONVENTIONAL_MEMORY;
+            md.pad = 0;
+            md.phys_addr = xatp.gpfn << PAGE_SHIFT;
+            md.virt_addr = 0;
+            md.num_pages = 1UL << (PAGE_SHIFT - EFI_PAGE_SHIFT);
+            md.attribute = EFI_MEMORY_WB;
+
+            ret = __dom0vp_add_memdesc(d, &memmap_info, (char*)&md);
+            if (ret != 0) {
+                put_page(page);
+                rcu_unlock_domain(d);
+                gdprintk(XENLOG_DEBUG,
+                         "%s:%d td %d gpfn 0x%lx mfn 0x%lx ret %d\n",
+                         __func__, __LINE__,
+                         d->domain_id, xatp.gpfn, xatp.idx, ret);
+                return ret;
             }
             break;
         }
@@ -3377,34 +3378,6 @@ arch_memory_op(int op, XEN_GUEST_HANDLE(
 
         break;
     }
-
-    case XENMEM_remove_from_physmap:
-    {
-        struct xen_remove_from_physmap xrfp;
-        unsigned long mfn;
-        struct domain *d;
-
-        if ( copy_from_guest(&xrfp, arg, 1) )
-            return -EFAULT;
-
-        rc = rcu_lock_target_domain_by_id(xrfp.domid, &d);
-        if ( rc != 0 )
-            return rc;
-
-        domain_lock(d);
-
-        mfn = gmfn_to_mfn(d, xrfp.gpfn);
-
-        if ( mfn_valid(mfn) )
-            guest_physmap_remove_page(d, xrfp.gpfn, mfn, 0);
-
-        domain_unlock(d);
-
-        rcu_unlock_domain(d);
-
-        break;
-    }
-
 
     case XENMEM_machine_memory_map:
     {
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/ia64/xen/xensetup.c
--- a/xen/arch/ia64/xen/xensetup.c      Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/ia64/xen/xensetup.c      Wed Jan 28 13:06:45 2009 +0900
@@ -747,8 +747,3 @@ int xen_in_range(paddr_t start, paddr_t 
 
     return start < end;
 }
-
-int tboot_in_range(paddr_t start, paddr_t end)
-{
-    return 0;
-}
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/Makefile     Wed Jan 28 13:06:45 2009 +0900
@@ -53,6 +53,7 @@ obj-y += crash.o
 obj-y += crash.o
 obj-y += tboot.o
 obj-y += hpet.o
+obj-y += bzimage.o
 
 obj-$(crash_debug) += gdbstub.o
 
@@ -78,10 +79,10 @@ ALL_OBJS := $(BASEDIR)/arch/x86/boot/bui
            $(@D)/.$(@F).1.o -o $@
        rm -f $(@D)/.$(@F).[0-9]*
 
-asm-offsets.s: $(TARGET_SUBARCH)/asm-offsets.c $(HDRS)
+asm-offsets.s: $(TARGET_SUBARCH)/asm-offsets.c
        $(CC) $(CFLAGS) -S -o $@ $<
 
-xen.lds: $(TARGET_SUBARCH)/xen.lds.S $(HDRS)
+xen.lds: $(TARGET_SUBARCH)/xen.lds.S
        $(CC) -P -E -Ui386 $(AFLAGS) -o $@ $<
 
 boot/mkelf32: boot/mkelf32.c
@@ -90,4 +91,4 @@ boot/mkelf32: boot/mkelf32.c
 .PHONY: clean
 clean::
        rm -f asm-offsets.s xen.lds boot/*.o boot/*~ boot/core boot/mkelf32
-       rm -f $(BASEDIR)/.xen-syms.[0-9]*
+       rm -f $(BASEDIR)/.xen-syms.[0-9]* boot/.*.d
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/Rules.mk
--- a/xen/arch/x86/Rules.mk     Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/Rules.mk     Wed Jan 28 13:06:45 2009 +0900
@@ -26,9 +26,9 @@ CFLAGS += -msoft-float
 CFLAGS += -msoft-float
 
 # Disable PIE/SSP if GCC supports them. They can break us.
-CFLAGS += $(call cc-option,$(CC),-nopie,)
-CFLAGS += $(call cc-option,$(CC),-fno-stack-protector,)
-CFLAGS += $(call cc-option,$(CC),-fno-stack-protector-all,)
+$(call cc-option-add,CFLAGS,CC,-nopie)
+$(call cc-option-add,CFLAGS,CC,-fno-stack-protector)
+$(call cc-option-add,CFLAGS,CC,-fno-stack-protector-all)
 
 ifeq ($(supervisor_mode_kernel),y)
 CFLAGS += -DCONFIG_X86_SUPERVISOR_MODE_KERNEL=1
@@ -45,16 +45,12 @@ CFLAGS += -mno-red-zone -fpic -fno-reord
 CFLAGS += -mno-red-zone -fpic -fno-reorder-blocks
 CFLAGS += -fno-asynchronous-unwind-tables
 # -fvisibility=hidden reduces -fpic cost, if it's available
-CFLAGS += $(call cc-option,$(CC),-fvisibility=hidden,)
-CFLAGS := $(subst -fvisibility=hidden,-DGCC_HAS_VISIBILITY_ATTRIBUTE,$(CFLAGS))
+ifneq ($(call cc-option,$(CC),-fvisibility=hidden,n),n)
+CFLAGS += -DGCC_HAS_VISIBILITY_ATTRIBUTE
+endif
 x86_32 := n
 x86_64 := y
 endif
 
-HDRS += $(wildcard $(BASEDIR)/include/asm-x86/hvm/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-x86/hvm/svm/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-x86/hvm/vmx/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-x86/mach-*/*.h)
-
 # Require GCC v3.4+ (to avoid issues with alignment constraints in Xen headers)
 $(call cc-ver-check,CC,0x030400,"Xen requires at least gcc-3.4")
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/acpi/cpu_idle.c
--- a/xen/arch/x86/acpi/cpu_idle.c      Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/acpi/cpu_idle.c      Wed Jan 28 13:06:45 2009 +0900
@@ -50,11 +50,6 @@
 
 #define DEBUG_PM_CX
 
-#define US_TO_PM_TIMER_TICKS(t)     ((t * (PM_TIMER_FREQUENCY/1000)) / 1000)
-#define PM_TIMER_TICKS_TO_US(t)     ((t * 1000) / (PM_TIMER_FREQUENCY / 1000))
-#define C2_OVERHEAD         4   /* 1us (3.579 ticks per us) */
-#define C3_OVERHEAD         4   /* 1us (3.579 ticks per us) */
-
 static void (*lapic_timer_off)(void);
 static void (*lapic_timer_on)(void);
 
@@ -366,7 +361,7 @@ static void acpi_processor_idle(void)
     cx->usage++;
     if ( sleep_ticks > 0 )
     {
-        power->last_residency = PM_TIMER_TICKS_TO_US(sleep_ticks);
+        power->last_residency = acpi_pm_tick_to_ns(sleep_ticks) / 1000UL;
         cx->time += sleep_ticks;
     }
 
@@ -611,7 +606,7 @@ static void set_cx(
     cx->latency  = xen_cx->latency;
     cx->power    = xen_cx->power;
     
-    cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
+    cx->latency_ticks = ns_to_acpi_pm_tick(cx->latency * 1000UL);
     cx->target_residency = cx->latency * latency_factor;
     if ( cx->type == ACPI_STATE_C1 || cx->type == ACPI_STATE_C2 )
         acpi_power->safe_state = cx;
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/acpi/power.c
--- a/xen/arch/x86/acpi/power.c Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/acpi/power.c Wed Jan 28 13:06:45 2009 +0900
@@ -221,6 +221,7 @@ static int enter_state(u32 state)
 
  enable_cpu:
     cpufreq_add_cpu(0);
+    microcode_resume_cpu(0);
     enable_nonboot_cpus();
     thaw_domains();
     spin_unlock(&pm_lock);
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/apic.c
--- a/xen/arch/x86/apic.c       Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/apic.c       Wed Jan 28 13:06:45 2009 +0900
@@ -40,7 +40,7 @@
 /*
  * Knob to control our willingness to enable the local APIC.
  */
-int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
+static int enable_local_apic __initdata = 0; /* -1=force-disable, 
+1=force-enable */
 
 /*
  * Debug level
@@ -742,7 +742,7 @@ static void __init lapic_disable(char *s
 static void __init lapic_disable(char *str)
 {
     enable_local_apic = -1;
-    clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+    setup_clear_cpu_cap(X86_FEATURE_APIC);
 }
 custom_param("nolapic", lapic_disable);
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/boot/Makefile
--- a/xen/arch/x86/boot/Makefile        Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/boot/Makefile        Wed Jan 28 13:06:45 2009 +0900
@@ -1,4 +1,1 @@ obj-y += head.o
 obj-y += head.o
-
-head.o: head.S $(TARGET_SUBARCH).S trampoline.S mem.S video.S \
-       cmdline.S edd.S wakeup.S
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/boot/mkelf32.c
--- a/xen/arch/x86/boot/mkelf32.c       Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/boot/mkelf32.c       Wed Jan 28 13:06:45 2009 +0900
@@ -25,7 +25,7 @@
 #define s16 int16_t
 #define s32 int32_t
 #define s64 int64_t
-#include "../../../include/public/elfstructs.h"
+#include "../../../include/xen/elfstructs.h"
 
 #define DYNAMICALLY_FILLED   0
 #define RAW_OFFSET         128
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/bzimage.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/bzimage.c    Wed Jan 28 13:06:45 2009 +0900
@@ -0,0 +1,242 @@
+#include <xen/cache.h>
+#include <xen/errno.h>
+#include <xen/lib.h>
+#include <xen/mm.h>
+#include <xen/string.h>
+#include <xen/types.h>
+
+#define HEAPORDER 3
+
+static unsigned char *window;
+#define memptr long
+static memptr free_mem_ptr;
+static memptr free_mem_end_ptr;
+
+#define WSIZE           0x80000000
+
+static unsigned char    *inbuf;
+static unsigned         insize;
+
+/* Index of next byte to be processed in inbuf: */
+static unsigned         inptr;
+
+/* Bytes in output buffer: */
+static unsigned         outcnt;
+
+#define OF(args)        args
+#define STATIC          static
+
+#define memzero(s, n)   memset((s), 0, (n))
+
+typedef unsigned char   uch;
+typedef unsigned short  ush;
+typedef unsigned long   ulg;
+
+#define INIT __init
+
+#define get_byte()      (inptr < insize ? inbuf[inptr++] : fill_inbuf())
+
+/* Diagnostic functions */
+#ifdef DEBUG
+#  define Assert(cond, msg) do { if (!(cond)) error(msg); } while (0)
+#  define Trace(x)      do { fprintf x; } while (0)
+#  define Tracev(x)     do { if (verbose) fprintf x ; } while (0)
+#  define Tracevv(x)    do { if (verbose > 1) fprintf x ; } while (0)
+#  define Tracec(c, x)  do { if (verbose && (c)) fprintf x ; } while (0)
+#  define Tracecv(c, x) do { if (verbose > 1 && (c)) fprintf x ; } while (0)
+#else
+#  define Assert(cond, msg)
+#  define Trace(x)
+#  define Tracev(x)
+#  define Tracevv(x)
+#  define Tracec(c, x)
+#  define Tracecv(c, x)
+#endif
+
+static long bytes_out;
+static void flush_window(void);
+
+static __init void error(char *x)
+{
+    printk("%s\n", x);
+    BUG();
+}
+
+static __init int fill_inbuf(void)
+{
+        error("ran out of input data");
+        return 0;
+}
+
+
+#include "../../common/inflate.c"
+
+static __init void flush_window(void)
+{
+    /*
+     * The window is equal to the output buffer therefore only need to
+     * compute the crc.
+     */
+    unsigned long c = crc;
+    unsigned n;
+    unsigned char *in, ch;
+
+    in = window;
+    for ( n = 0; n < outcnt; n++ )
+    {
+        ch = *in++;
+        c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
+    }
+    crc = c;
+
+    bytes_out += (unsigned long)outcnt;
+    outcnt = 0;
+}
+
+static __init int gzip_length(char *image, unsigned long image_len)
+{
+    return *(uint32_t *)&image[image_len - 4];
+}
+
+static  __init int perform_gunzip(char *output, char **_image_start, unsigned 
long *image_len)
+{
+    char *image = *_image_start;
+    int rc;
+    unsigned char magic0 = (unsigned char)image[0];
+    unsigned char magic1 = (unsigned char)image[1];
+
+    if ( magic0 != 0x1f || ( (magic1 != 0x8b) && (magic1 != 0x9e) ) )
+        return 0;
+
+    window = (unsigned char *)output;
+
+    free_mem_ptr = (unsigned long)alloc_xenheap_pages(HEAPORDER);
+    free_mem_end_ptr = free_mem_ptr + (PAGE_SIZE << HEAPORDER);
+
+    inbuf = (unsigned char *)image;
+    insize = *image_len;
+    inptr = 0;
+
+    makecrc();
+
+    if ( gunzip() < 0 )
+    {
+        rc = -EINVAL;
+    }
+    else
+    {
+        *_image_start = (char *)window;
+        *image_len = gzip_length(image, *image_len);
+        rc = 0;
+    }
+
+    free_xenheap_pages((void *)free_mem_ptr, HEAPORDER);
+
+    return rc;
+}
+
+struct setup_header {
+        uint8_t         _pad0[0x1f1];           /* skip uninteresting stuff */
+        uint8_t         setup_sects;
+        uint16_t        root_flags;
+        uint32_t        syssize;
+        uint16_t        ram_size;
+        uint16_t        vid_mode;
+        uint16_t        root_dev;
+        uint16_t        boot_flag;
+        uint16_t        jump;
+        uint32_t        header;
+#define HDR_MAGIC               "HdrS"
+#define HDR_MAGIC_SZ    4
+        uint16_t        version;
+#define VERSION(h,l)    (((h)<<8) | (l))
+        uint32_t        realmode_swtch;
+        uint16_t        start_sys;
+        uint16_t        kernel_version;
+        uint8_t         type_of_loader;
+        uint8_t         loadflags;
+        uint16_t        setup_move_size;
+        uint32_t        code32_start;
+        uint32_t        ramdisk_image;
+        uint32_t        ramdisk_size;
+        uint32_t        bootsect_kludge;
+        uint16_t        heap_end_ptr;
+        uint16_t        _pad1;
+        uint32_t        cmd_line_ptr;
+        uint32_t        initrd_addr_max;
+        uint32_t        kernel_alignment;
+        uint8_t         relocatable_kernel;
+        uint8_t         _pad2[3];
+        uint32_t        cmdline_size;
+        uint32_t        hardware_subarch;
+        uint64_t        hardware_subarch_data;
+        uint32_t        payload_offset;
+        uint32_t        payload_length;
+    } __attribute__((packed));
+
+static __init int bzimage_check(struct setup_header *hdr, unsigned long len)
+{
+    if ( len < sizeof(struct setup_header) )
+        return 0;
+
+    if ( memcmp(&hdr->header, HDR_MAGIC, HDR_MAGIC_SZ) != 0 )
+        return 0;
+
+    if ( hdr->version < VERSION(2,8) ) {
+        printk("Cannot load bzImage v%d.%02d at least v2.08 is required\n",
+           hdr->version >> 8, hdr->version & 0xff);
+        return -EINVAL;
+    }
+    return 1;
+}
+
+int __init bzimage_headroom(char *image_start, unsigned long image_length)
+{
+    struct setup_header *hdr = (struct setup_header *)image_start;
+    char *img;
+    int err, headroom;
+
+    err = bzimage_check(hdr, image_length);
+    if (err < 1)
+        return err;
+
+    img = image_start + (hdr->setup_sects+1) * 512;
+    img += hdr->payload_offset;
+
+    headroom = gzip_length(img, hdr->payload_length);
+    headroom += headroom >> 12; /* Add 8 bytes for every 32K input block */
+    headroom += (32768 + 18); /* Add 32K + 18 bytes of extra headroom */
+    headroom = (headroom + 4095) & ~4095;
+
+    return headroom;
+}
+
+int __init bzimage_parse(char *image_base, char **image_start, unsigned long 
*image_len)
+{
+    struct setup_header *hdr = (struct setup_header *)(*image_start);
+    int err = bzimage_check(hdr, *image_len);
+
+    if (err < 1)
+        return err;
+
+    BUG_ON(!(image_base < *image_start));
+
+    *image_start += (hdr->setup_sects+1) * 512;
+    *image_start += hdr->payload_offset;
+    *image_len = hdr->payload_length;
+
+    if ( (err = perform_gunzip(image_base, image_start, image_len)) < 0 )
+        return err;
+
+    return 0;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/cpu/common.c
--- a/xen/arch/x86/cpu/common.c Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/cpu/common.c Wed Jan 28 13:06:45 2009 +0900
@@ -29,6 +29,14 @@ struct cpu_dev * cpu_devs[X86_VENDOR_NUM
  */
 u64 host_pat = 0x050100070406;
 
+static unsigned int __cpuinitdata cleared_caps[NCAPINTS];
+
+void __init setup_clear_cpu_cap(unsigned int cap)
+{
+       __clear_bit(cap, boot_cpu_data.x86_capability);
+       __set_bit(cap, cleared_caps);
+}
+
 static void default_init(struct cpuinfo_x86 * c)
 {
        /* Not much we can do here... */
@@ -235,6 +243,7 @@ static void __init early_cpu_detect(void
                if (c->x86 >= 0x6)
                        c->x86_model += ((tfms >> 16) & 0xF) << 4;
                c->x86_mask = tfms & 15;
+               cap0 &= ~cleared_caps[0];
                if (cap0 & (1<<19))
                        c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
                c->x86_capability[0] = cap0; /* Added for Xen bootstrap */
@@ -329,6 +338,7 @@ void __cpuinit identify_cpu(struct cpuin
        c->x86_vendor_id[0] = '\0'; /* Unset */
        c->x86_model_id[0] = '\0';  /* Unset */
        c->x86_max_cores = 1;
+       c->x86_num_siblings = 1;
        c->x86_clflush_size = 0;
        memset(&c->x86_capability, 0, sizeof c->x86_capability);
 
@@ -395,6 +405,9 @@ void __cpuinit identify_cpu(struct cpuin
        if (disable_pse)
                clear_bit(X86_FEATURE_PSE, c->x86_capability);
 
+       for (i = 0 ; i < NCAPINTS ; ++i)
+               c->x86_capability[i] &= ~cleared_caps[i];
+
        /* If the model name is still unset, do table lookup. */
        if ( !c->x86_model_id[0] ) {
                char *p;
@@ -468,27 +481,27 @@ void __cpuinit detect_ht(struct cpuinfo_
        if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
                return;
 
-       smp_num_siblings = (ebx & 0xff0000) >> 16;
-
-       if (smp_num_siblings == 1) {
+       c->x86_num_siblings = (ebx & 0xff0000) >> 16;
+
+       if (c->x86_num_siblings == 1) {
                printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
-       } else if (smp_num_siblings > 1 ) {
-
-               if (smp_num_siblings > NR_CPUS) {
-                       printk(KERN_WARNING "CPU: Unsupported number of the 
siblings %d", smp_num_siblings);
-                       smp_num_siblings = 1;
+       } else if (c->x86_num_siblings > 1 ) {
+
+               if (c->x86_num_siblings > NR_CPUS) {
+                       printk(KERN_WARNING "CPU: Unsupported number of the 
siblings %d", c->x86_num_siblings);
+                       c->x86_num_siblings = 1;
                        return;
                }
 
-               index_msb = get_count_order(smp_num_siblings);
+               index_msb = get_count_order(c->x86_num_siblings);
                phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
 
                printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
                       phys_proc_id[cpu]);
 
-               smp_num_siblings = smp_num_siblings / c->x86_max_cores;
-
-               index_msb = get_count_order(smp_num_siblings) ;
+               c->x86_num_siblings = c->x86_num_siblings / c->x86_max_cores;
+
+               index_msb = get_count_order(c->x86_num_siblings) ;
 
                core_bits = get_count_order(c->x86_max_cores);
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/cpu/mcheck/mce_intel.c
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c       Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c       Wed Jan 28 13:06:45 2009 +0900
@@ -14,7 +14,6 @@ DEFINE_PER_CPU(cpu_banks_t, mce_banks_ow
 
 static int nr_intel_ext_msrs = 0;
 static int cmci_support = 0;
-extern int firstbank;
 
 #ifdef CONFIG_X86_MCE_THERMAL
 static void unexpected_thermal_interrupt(struct cpu_user_regs *regs)
@@ -121,7 +120,7 @@ static inline void intel_get_extended_ms
     if (nr_intel_ext_msrs == 0)
         return;
 
-    /*this function will called when CAP(9).MCG_EXT_P = 1*/
+    /* this function will called when CAP(9).MCG_EXT_P = 1 */
     memset(mc_ext, 0, sizeof(struct mcinfo_extended));
     mc_ext->common.type = MC_TYPE_EXTENDED;
     mc_ext->common.size = sizeof(mc_ext);
@@ -157,7 +156,7 @@ static inline void intel_get_extended_ms
  * 3. called in polling handler
  * It will generate a new mc_info item if found CE/UC errors. DOM0 is the 
  * consumer.
-*/
+ */
 static struct mc_info *machine_check_poll(int calltype)
 {
     struct mc_info *mi = NULL;
@@ -174,9 +173,9 @@ static struct mc_info *machine_check_pol
     memset(&mcg, 0, sizeof(mcg));
     mcg.common.type = MC_TYPE_GLOBAL;
     mcg.common.size = sizeof(mcg);
-    /*If called from cpu-reset check, don't need to fill them.
-     *If called from cmci context, we'll try to fill domid by memory addr
-    */
+    /* If called from cpu-reset check, don't need to fill them.
+     * If called from cmci context, we'll try to fill domid by memory addr
+     */
     mcg.mc_domid = -1;
     mcg.mc_vcpuid = -1;
     if (calltype == MC_FLAG_POLLED || calltype == MC_FLAG_RESET)
@@ -186,12 +185,13 @@ static struct mc_info *machine_check_pol
     mcg.mc_socketid = phys_proc_id[cpu];
     mcg.mc_coreid = cpu_core_id[cpu];
     mcg.mc_apicid = cpu_physical_id(cpu);
-    mcg.mc_core_threadid = mcg.mc_apicid & ( 1 << (smp_num_siblings - 1)); 
+    mcg.mc_core_threadid =
+        mcg.mc_apicid & ( 1 << (cpu_data[cpu].x86_num_siblings - 1)); 
     rdmsrl(MSR_IA32_MCG_STATUS, mcg.mc_gstatus);
 
     for ( i = 0; i < nr_mce_banks; i++ ) {
         struct mcinfo_bank mcb;
-        /*For CMCI, only owners checks the owned MSRs*/
+        /* For CMCI, only owners checks the owned MSRs */
         if ( !test_bit(i, __get_cpu_var(mce_banks_owned)) &&
              (calltype & MC_FLAG_CMCI) )
             continue;
@@ -240,7 +240,7 @@ static struct mc_info *machine_check_pol
         x86_mcinfo_add(mi, &mcb);
         nr_unit++;
         add_taint(TAINT_MACHINE_CHECK);
-        /*Clear state for this bank */
+        /* Clear state for this bank */
         wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0);
         printk(KERN_DEBUG "mcheck_poll: bank%i CPU%d status[%"PRIx64"]\n", 
                 i, cpu, status);
@@ -249,12 +249,12 @@ static struct mc_info *machine_check_pol
                 mcg.mc_coreid, mcg.mc_apicid, mcg.mc_core_threadid);
  
     }
-    /*if pcc = 1, uc must be 1*/
+    /* if pcc = 1, uc must be 1 */
     if (pcc)
         mcg.mc_flags |= MC_FLAG_UNCORRECTABLE;
     else if (uc)
         mcg.mc_flags |= MC_FLAG_RECOVERABLE;
-    else /*correctable*/
+    else /* correctable */
         mcg.mc_flags |= MC_FLAG_CORRECTABLE;
 
     if (nr_unit && nr_intel_ext_msrs && 
@@ -264,7 +264,7 @@ static struct mc_info *machine_check_pol
     }
     if (nr_unit) 
         x86_mcinfo_add(mi, &mcg);
-    /*Clear global state*/
+    /* Clear global state */
     return mi;
 }
 
@@ -541,8 +541,7 @@ static void mce_init(void)
      * This also clears all registers*/
 
     mi = machine_check_poll(MC_FLAG_RESET);
-    /*in the boot up stage, not expect inject to DOM0, but go print out
-    */
+    /* in the boot up stage, don't inject to DOM0, but print out */
     if (mi)
         x86_mcinfo_dump(mi);
 
@@ -553,22 +552,22 @@ static void mce_init(void)
 
     for (i = firstbank; i < nr_mce_banks; i++)
     {
-        /*Some banks are shared across cores, use MCi_CTRL to judge whether
-         * this bank has been initialized by other cores already.*/
+        /* Some banks are shared across cores, use MCi_CTRL to judge whether
+         * this bank has been initialized by other cores already. */
         rdmsr(MSR_IA32_MC0_CTL + 4*i, l, h);
-        if (!l & !h)
+        if (!(l | h))
         {
-            /*if ctl is 0, this bank is never initialized*/
+            /* if ctl is 0, this bank is never initialized */
             printk(KERN_DEBUG "mce_init: init bank%d\n", i);
             wrmsr (MSR_IA32_MC0_CTL + 4*i, 0xffffffff, 0xffffffff);
             wrmsr (MSR_IA32_MC0_STATUS + 4*i, 0x0, 0x0);
-       }
-    }
-    if (firstbank) /*if cmci enabled, firstbank = 0*/
+        }
+    }
+    if (firstbank) /* if cmci enabled, firstbank = 0 */
         wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0);
 }
 
-/*p4/p6 faimily has similar MCA initialization process*/
+/* p4/p6 family have similar MCA initialization process */
 void intel_mcheck_init(struct cpuinfo_x86 *c)
 {
     mce_cap_init(c);
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/domain.c     Wed Jan 28 13:06:45 2009 +0900
@@ -143,7 +143,7 @@ void dump_pageframe_info(struct domain *
     {
         list_for_each_entry ( page, &d->page_list, list )
         {
-            printk("    DomPage %p: caf=%08x, taf=%" PRtype_info "\n",
+            printk("    DomPage %p: caf=%08lx, taf=%" PRtype_info "\n",
                    _p(page_to_mfn(page)),
                    page->count_info, page->u.inuse.type_info);
         }
@@ -156,7 +156,7 @@ void dump_pageframe_info(struct domain *
 
     list_for_each_entry ( page, &d->xenpage_list, list )
     {
-        printk("    XenPage %p: caf=%08x, taf=%" PRtype_info "\n",
+        printk("    XenPage %p: caf=%08lx, taf=%" PRtype_info "\n",
                _p(page_to_mfn(page)),
                page->count_info, page->u.inuse.type_info);
     }
@@ -405,8 +405,17 @@ int arch_domain_create(struct domain *d,
         if ( d->arch.ioport_caps == NULL )
             goto fail;
 
+#ifdef __i386__
         if ( (d->shared_info = alloc_xenheap_page()) == NULL )
             goto fail;
+#else
+        pg = alloc_domheap_page(
+            NULL, MEMF_node(domain_to_node(d)) | MEMF_bits(32));
+        if ( pg == NULL )
+            goto fail;
+        pg->count_info |= PGC_xen_heap;
+        d->shared_info = page_to_virt(pg);
+#endif
 
         clear_page(d->shared_info);
         share_xen_page_with_guest(
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/domain_build.c       Wed Jan 28 13:06:45 2009 +0900
@@ -19,6 +19,7 @@
 #include <xen/iocap.h>
 #include <xen/bitops.h>
 #include <xen/compat.h>
+#include <xen/libelf.h>
 #include <asm/regs.h>
 #include <asm/system.h>
 #include <asm/io.h>
@@ -30,7 +31,9 @@
 #include <asm/e820.h>
 
 #include <public/version.h>
-#include <public/libelf.h>
+
+int __init bzimage_parse(
+    char *output, char **image_start, unsigned long *image_len);
 
 extern unsigned long initial_images_nrpages(void);
 extern void discard_initial_images(void);
@@ -196,7 +199,8 @@ static void __init process_dom0_ioports_
 
 int __init construct_dom0(
     struct domain *d,
-    unsigned long _image_start, unsigned long image_len, 
+    unsigned long _image_base,
+    unsigned long _image_start, unsigned long image_len,
     unsigned long _initrd_start, unsigned long initrd_len,
     char *cmdline)
 {
@@ -213,9 +217,11 @@ int __init construct_dom0(
     struct vcpu *v = d->vcpu[0];
     unsigned long long value;
 #if defined(__i386__)
+    char *image_base   = (char *)_image_base;   /* use lowmem mappings */
     char *image_start  = (char *)_image_start;  /* use lowmem mappings */
     char *initrd_start = (char *)_initrd_start; /* use lowmem mappings */
 #elif defined(__x86_64__)
+    char *image_base   = __va(_image_base);
     char *image_start  = __va(_image_start);
     char *initrd_start = __va(_initrd_start);
 #endif
@@ -262,6 +268,9 @@ int __init construct_dom0(
 
     nr_pages = compute_dom0_nr_pages();
 
+    if ( (rc = bzimage_parse(image_base, &image_start, &image_len)) != 0 )
+        return rc;
+
     if ( (rc = elf_init(&elf, image_start, image_len)) != 0 )
         return rc;
 #ifdef VERBOSE
@@ -341,6 +350,12 @@ int __init construct_dom0(
 #endif
     }
 
+    if ( (parms.p2m_base != UNSET_ADDR) && elf_32bit(&elf) )
+    {
+        printk(XENLOG_WARNING "P2M table base ignored\n");
+        parms.p2m_base = UNSET_ADDR;
+    }
+
     domain_set_alloc_bitsize(d);
 
     /*
@@ -359,6 +374,8 @@ int __init construct_dom0(
     vphysmap_end     = vphysmap_start + (nr_pages * (!is_pv_32on64_domain(d) ?
                                                      sizeof(unsigned long) :
                                                      sizeof(unsigned int)));
+    if ( parms.p2m_base != UNSET_ADDR )
+        vphysmap_end = vphysmap_start;
     vstartinfo_start = round_pgup(vphysmap_end);
     vstartinfo_end   = (vstartinfo_start +
                         sizeof(struct start_info) +
@@ -400,6 +417,11 @@ int __init construct_dom0(
     /* Ensure that our low-memory 1:1 mapping covers the allocation. */
     page = alloc_domheap_pages(d, order, MEMF_bits(30));
 #else
+    if ( parms.p2m_base != UNSET_ADDR )
+    {
+        vphysmap_start = parms.p2m_base;
+        vphysmap_end   = vphysmap_start + nr_pages * sizeof(unsigned long);
+    }
     page = alloc_domheap_pages(d, order, 0);
 #endif
     if ( page == NULL )
@@ -429,14 +451,6 @@ int __init construct_dom0(
            _p(vstack_start), _p(vstack_end),
            _p(v_start), _p(v_end));
     printk(" ENTRY ADDRESS: %p\n", _p(parms.virt_entry));
-
-    if ( ((v_end - v_start)>>PAGE_SHIFT) > nr_pages )
-    {
-        printk("Initial guest OS requires too much space\n"
-               "(%luMB is greater than %luMB limit)\n",
-               (v_end-v_start)>>20, nr_pages>>(20-PAGE_SHIFT));
-        return -ENOMEM;
-    }
 
     mpt_alloc = (vpt_start - v_start) +
         (unsigned long)pfn_to_paddr(alloc_spfn);
@@ -748,8 +762,109 @@ int __init construct_dom0(
     snprintf(si->magic, sizeof(si->magic), "xen-3.0-x86_%d%s",
              elf_64bit(&elf) ? 64 : 32, parms.pae ? "p" : "");
 
+    count = d->tot_pages;
+#ifdef __x86_64__
+    /* Set up the phys->machine table if not part of the initial mapping. */
+    if ( parms.p2m_base != UNSET_ADDR )
+    {
+        unsigned long va = vphysmap_start;
+
+        if ( v_start <= vphysmap_end && vphysmap_start <= v_end )
+            panic("DOM0 P->M table overlaps initial mapping");
+
+        while ( va < vphysmap_end )
+        {
+            if ( d->tot_pages + ((round_pgup(vphysmap_end) - va)
+                                 >> PAGE_SHIFT) + 3 > nr_pages )
+                panic("Dom0 allocation too small for initial P->M table.\n");
+
+            l4tab = l4start + l4_table_offset(va);
+            if ( !l4e_get_intpte(*l4tab) )
+            {
+                page = alloc_domheap_page(d, 0);
+                if ( !page )
+                    break;
+                /* No mapping, PGC_allocated + page-table page. */
+                page->count_info = PGC_allocated | 2;
+                page->u.inuse.type_info =
+                    PGT_l3_page_table | PGT_validated | 1;
+                clear_page(page_to_virt(page));
+                *l4tab = l4e_from_page(page, L4_PROT);
+            }
+            l3tab = page_to_virt(l4e_get_page(*l4tab));
+            l3tab += l3_table_offset(va);
+            if ( !l3e_get_intpte(*l3tab) )
+            {
+                if ( cpu_has_page1gb &&
+                     !(va & ((1UL << L3_PAGETABLE_SHIFT) - 1)) &&
+                     vphysmap_end >= va + (1UL << L3_PAGETABLE_SHIFT) &&
+                     (page = alloc_domheap_pages(d,
+                                                 L3_PAGETABLE_SHIFT -
+                                                     PAGE_SHIFT,
+                                                 0)) != NULL )
+                {
+                    *l3tab = l3e_from_page(page,
+                                           L1_PROT|_PAGE_DIRTY|_PAGE_PSE);
+                    va += 1UL << L3_PAGETABLE_SHIFT;
+                    continue;
+                }
+                if ( (page = alloc_domheap_page(d, 0)) == NULL )
+                    break;
+                else
+                {
+                    /* No mapping, PGC_allocated + page-table page. */
+                    page->count_info = PGC_allocated | 2;
+                    page->u.inuse.type_info =
+                        PGT_l2_page_table | PGT_validated | 1;
+                    clear_page(page_to_virt(page));
+                    *l3tab = l3e_from_page(page, L3_PROT);
+                }
+            }
+            l2tab = page_to_virt(l3e_get_page(*l3tab));
+            l2tab += l2_table_offset(va);
+            if ( !l2e_get_intpte(*l2tab) )
+            {
+                if ( !(va & ((1UL << L2_PAGETABLE_SHIFT) - 1)) &&
+                     vphysmap_end >= va + (1UL << L2_PAGETABLE_SHIFT) &&
+                     (page = alloc_domheap_pages(d,
+                                                 L2_PAGETABLE_SHIFT -
+                                                     PAGE_SHIFT,
+                                                 0)) != NULL )
+                {
+                    *l2tab = l2e_from_page(page,
+                                           L1_PROT|_PAGE_DIRTY|_PAGE_PSE);
+                    va += 1UL << L2_PAGETABLE_SHIFT;
+                    continue;
+                }
+                if ( (page = alloc_domheap_page(d, 0)) == NULL )
+                    break;
+                else
+                {
+                    /* No mapping, PGC_allocated + page-table page. */
+                    page->count_info = PGC_allocated | 2;
+                    page->u.inuse.type_info =
+                        PGT_l1_page_table | PGT_validated | 1;
+                    clear_page(page_to_virt(page));
+                    *l2tab = l2e_from_page(page, L2_PROT);
+                }
+            }
+            l1tab = page_to_virt(l2e_get_page(*l2tab));
+            l1tab += l1_table_offset(va);
+            BUG_ON(l1e_get_intpte(*l1tab));
+            page = alloc_domheap_page(d, 0);
+            if ( !page )
+                break;
+            *l1tab = l1e_from_page(page, L1_PROT|_PAGE_DIRTY);
+            va += PAGE_SIZE;
+            va &= PAGE_MASK;
+        }
+        if ( !page )
+            panic("Not enough RAM for DOM0 P->M table.\n");
+    }
+#endif
+
     /* Write the phys->machine and machine->phys table entries. */
-    for ( pfn = 0; pfn < d->tot_pages; pfn++ )
+    for ( pfn = 0; pfn < count; pfn++ )
     {
         mfn = pfn + alloc_spfn;
 #ifndef NDEBUG
@@ -763,6 +878,26 @@ int __init construct_dom0(
             ((unsigned int *)vphysmap_start)[pfn] = mfn;
         set_gpfn_from_mfn(mfn, pfn);
     }
+    si->first_p2m_pfn = pfn;
+    si->nr_p2m_frames = d->tot_pages - count;
+    list_for_each_entry ( page, &d->page_list, list )
+    {
+        mfn = page_to_mfn(page);
+        if ( get_gpfn_from_mfn(mfn) >= count )
+        {
+            BUG_ON(is_pv_32bit_domain(d));
+            if ( !page->u.inuse.type_info &&
+                 !get_page_and_type(page, d, PGT_writable_page) )
+                BUG();
+            ((unsigned long *)vphysmap_start)[pfn] = mfn;
+            set_gpfn_from_mfn(mfn, pfn);
+            ++pfn;
+#ifndef NDEBUG
+            ++alloc_epfn;
+#endif
+        }
+    }
+    BUG_ON(pfn != d->tot_pages);
     while ( pfn < nr_pages )
     {
         if ( (page = alloc_chunk(d, nr_pages - d->tot_pages)) == NULL )
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/hvm/hvm.c    Wed Jan 28 13:06:45 2009 +0900
@@ -20,6 +20,7 @@
  */
 
 #include <xen/config.h>
+#include <xen/ctype.h>
 #include <xen/init.h>
 #include <xen/lib.h>
 #include <xen/trace.h>
@@ -272,6 +273,10 @@ static int hvm_print_line(
     char c = *val;
 
     BUG_ON(bytes != 1);
+
+    /* Accept only printable characters, newline, and horizontal tab. */
+    if ( !isprint(c) && (c != '\n') && (c != '\t') )
+        return X86EMUL_OKAY;
 
     spin_lock(&hd->pbuf_lock);
     hd->pbuf[hd->pbuf_idx++] = c;
@@ -1503,7 +1508,15 @@ static enum hvm_copy_result __hvm_copy(
 
         if ( flags & HVMCOPY_to_guest )
         {
-            if ( p2mt != p2m_ram_ro )
+            if ( p2mt == p2m_ram_ro )
+            {
+                static unsigned long lastpage;
+                if ( xchg(&lastpage, gfn) != gfn )
+                    gdprintk(XENLOG_DEBUG, "guest attempted write to read-only"
+                             " memory page. gfn=%#lx, mfn=%#lx\n",
+                             gfn, mfn);
+            }
+            else
             {
                 memcpy(p, buf, count);
                 paging_mark_dirty(curr->domain, mfn);
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/hvm/mtrr.c
--- a/xen/arch/x86/hvm/mtrr.c   Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/hvm/mtrr.c   Wed Jan 28 13:06:45 2009 +0900
@@ -702,12 +702,15 @@ HVM_REGISTER_SAVE_RESTORE(MTRR, hvm_save
                           1, HVMSR_PER_VCPU);
 
 uint8_t epte_get_entry_emt(
-    struct domain *d, unsigned long gfn, unsigned long mfn)
+    struct domain *d, unsigned long gfn, 
+    unsigned long mfn, uint8_t *igmt, int direct_mmio)
 {
     uint8_t gmtrr_mtype, hmtrr_mtype;
     uint32_t type;
     struct vcpu *v = current;
 
+    *igmt = 0;
+
     if ( (current->domain != d) && ((v = d->vcpu[0]) == NULL) )
         return MTRR_TYPE_WRBACK;
 
@@ -722,6 +725,21 @@ uint8_t epte_get_entry_emt(
 
     if ( hvm_get_mem_pinned_cacheattr(d, gfn, &type) )
         return type;
+
+    if ( !iommu_enabled )
+    {
+        *igmt = 1;
+        return MTRR_TYPE_WRBACK;
+    }
+
+    if ( direct_mmio )
+        return MTRR_TYPE_UNCACHABLE;
+
+    if ( iommu_snoop )
+    {
+        *igmt = 1;
+        return MTRR_TYPE_WRBACK;
+    }
 
     gmtrr_mtype = get_mtrr_type(&v->arch.hvm_vcpu.mtrr, (gfn << PAGE_SHIFT));
     hmtrr_mtype = get_mtrr_type(&mtrr_state, (mfn << PAGE_SHIFT));
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/hvm/vmsi.c
--- a/xen/arch/x86/hvm/vmsi.c   Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/hvm/vmsi.c   Wed Jan 28 13:06:45 2009 +0900
@@ -134,7 +134,7 @@ int vmsi_deliver(struct domain *d, int p
                 "vector=%x trig_mode=%x\n",
                 dest, dest_mode, delivery_mode, vector, trig_mode);
 
-    if ( !test_bit(_HVM_IRQ_DPCI_MSI, &hvm_irq_dpci->mirq[pirq].flags) )
+    if ( !( hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_GUEST_MSI ) )
     {
         gdprintk(XENLOG_WARNING, "pirq %x not msi \n", pirq);
         return 0;
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c       Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/hvm/vmx/vmcs.c       Wed Jan 28 13:06:45 2009 +0900
@@ -167,14 +167,15 @@ static void vmx_init_vmcs_config(void)
 #endif
 
     min = VM_EXIT_ACK_INTR_ON_EXIT;
-    opt = 0;
+    opt = VM_EXIT_SAVE_GUEST_PAT | VM_EXIT_LOAD_HOST_PAT;
 #ifdef __x86_64__
     min |= VM_EXIT_IA32E_MODE;
 #endif
     _vmx_vmexit_control = adjust_vmx_controls(
         min, opt, MSR_IA32_VMX_EXIT_CTLS);
 
-    min = opt = 0;
+    min = 0;
+    opt = VM_ENTRY_LOAD_GUEST_PAT;
     _vmx_vmentry_control = adjust_vmx_controls(
         min, opt, MSR_IA32_VMX_ENTRY_CTLS);
 
@@ -519,8 +520,6 @@ static int construct_vmcs(struct vcpu *v
 
     /* VMCS controls. */
     __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control);
-    __vmwrite(VM_EXIT_CONTROLS, vmx_vmexit_control);
-    __vmwrite(VM_ENTRY_CONTROLS, vmx_vmentry_control);
 
     v->arch.hvm_vmx.exec_control = vmx_cpu_based_exec_control;
     v->arch.hvm_vmx.secondary_exec_control = vmx_secondary_exec_control;
@@ -534,12 +533,18 @@ static int construct_vmcs(struct vcpu *v
     else
     {
         v->arch.hvm_vmx.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
+        vmx_vmexit_control &= ~(VM_EXIT_SAVE_GUEST_PAT |
+                                VM_EXIT_LOAD_HOST_PAT);
+        vmx_vmentry_control &= ~VM_ENTRY_LOAD_GUEST_PAT;
     }
 
     /* Do not enable Monitor Trap Flag unless start single step debug */
     v->arch.hvm_vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG;
 
     __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control);
+    __vmwrite(VM_EXIT_CONTROLS, vmx_vmexit_control);
+    __vmwrite(VM_ENTRY_CONTROLS, vmx_vmentry_control);
+
     if ( cpu_has_vmx_secondary_exec_control )
         __vmwrite(SECONDARY_VM_EXEC_CONTROL,
                   v->arch.hvm_vmx.secondary_exec_control);
@@ -561,6 +566,8 @@ static int construct_vmcs(struct vcpu *v
         vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS);
         vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP);
         vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP);
+        if ( cpu_has_vmx_pat && paging_mode_hap(d) )
+            vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT);
     }
 
     /* I/O access bitmap. */
@@ -690,6 +697,21 @@ static int construct_vmcs(struct vcpu *v
         v->arch.hvm_vmx.vpid =
             v->domain->arch.hvm_domain.vmx.vpid_base + v->vcpu_id;
         __vmwrite(VIRTUAL_PROCESSOR_ID, v->arch.hvm_vmx.vpid);
+    }
+
+    if ( cpu_has_vmx_pat && paging_mode_hap(d) )
+    {
+        u64 host_pat, guest_pat;
+
+        rdmsrl(MSR_IA32_CR_PAT, host_pat);
+        guest_pat = 0x7040600070406ULL;
+
+        __vmwrite(HOST_PAT, host_pat);
+        __vmwrite(GUEST_PAT, guest_pat);
+#ifdef __i386__
+        __vmwrite(HOST_PAT_HIGH, host_pat >> 32);
+        __vmwrite(GUEST_PAT_HIGH, guest_pat >> 32);
+#endif
     }
 
     vmx_vmcs_exit(v);
@@ -989,6 +1011,8 @@ void vmcs_dump_vcpu(struct vcpu *v)
     vmx_dump_sel("LDTR", GUEST_LDTR_SELECTOR);
     vmx_dump_sel2("IDTR", GUEST_IDTR_LIMIT);
     vmx_dump_sel("TR", GUEST_TR_SELECTOR);
+    printk("Guest PAT = 0x%08x%08x\n",
+           (uint32_t)vmr(GUEST_PAT_HIGH), (uint32_t)vmr(GUEST_PAT));
     x  = (unsigned long long)vmr(TSC_OFFSET_HIGH) << 32;
     x |= (uint32_t)vmr(TSC_OFFSET);
     printk("TSC Offset = %016llx\n", x);
@@ -1027,6 +1051,8 @@ void vmcs_dump_vcpu(struct vcpu *v)
            (unsigned long long)vmr(HOST_SYSENTER_ESP),
            (int)vmr(HOST_SYSENTER_CS),
            (unsigned long long)vmr(HOST_SYSENTER_EIP));
+    printk("Host PAT = 0x%08x%08x\n",
+           (uint32_t)vmr(HOST_PAT_HIGH), (uint32_t)vmr(HOST_PAT));
 
     printk("*** Control State ***\n");
     printk("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n",
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c    Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/io_apic.c    Wed Jan 28 13:06:45 2009 +0900
@@ -84,7 +84,9 @@ int disable_timer_pin_1 __initdata;
 
 static struct irq_pin_list {
     int apic, pin, next;
-} irq_2_pin[PIN_MAP_SIZE];
+} irq_2_pin[PIN_MAP_SIZE] = {
+    [0 ... PIN_MAP_SIZE-1].pin = -1
+};
 static int irq_2_pin_free_entry = NR_IRQS;
 
 int vector_irq[NR_VECTORS] __read_mostly = {
@@ -1017,11 +1019,6 @@ static void __init enable_IO_APIC(void)
     int i8259_apic, i8259_pin;
     int i, apic;
     unsigned long flags;
-
-    for (i = 0; i < PIN_MAP_SIZE; i++) {
-        irq_2_pin[i].pin = -1;
-        irq_2_pin[i].next = 0;
-    }
 
     /* Initialise dynamic irq_2_pin free list. */
     for (i = NR_IRQS; i < PIN_MAP_SIZE; i++)
@@ -1557,11 +1554,14 @@ static unsigned int startup_msi_vector(u
 
 static void ack_msi_vector(unsigned int vector)
 {
-    ack_APIC_irq();
+    if ( msi_maskable_irq(irq_desc[vector].msi_desc) )
+        ack_APIC_irq(); /* ACKTYPE_NONE */
 }
 
 static void end_msi_vector(unsigned int vector)
 {
+    if ( !msi_maskable_irq(irq_desc[vector].msi_desc) )
+        ack_APIC_irq(); /* ACKTYPE_EOI */
 }
 
 static void shutdown_msi_vector(unsigned int vector)
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c        Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/irq.c        Wed Jan 28 13:06:45 2009 +0900
@@ -491,7 +491,7 @@ int pirq_guest_unmask(struct domain *d)
 }
 
 extern int ioapic_ack_new;
-int pirq_acktype(struct domain *d, int irq)
+static int pirq_acktype(struct domain *d, int irq)
 {
     irq_desc_t  *desc;
     unsigned int vector;
@@ -705,6 +705,10 @@ static irq_guest_action_t *__pirq_guest_
             spin_lock_irq(&desc->lock);
         }
         break;
+    case ACKTYPE_NONE:
+        stop_timer(&irq_guest_eoi_timer[vector]);
+        _irq_guest_eoi(desc);
+        break;
     }
 
     /*
@@ -853,10 +857,6 @@ int map_domain_pirq(
     ASSERT(spin_is_locked(&pcidevs_lock));
     ASSERT(spin_is_locked(&d->event_lock));
 
-    /* XXX Until pcidev and msi locking is fixed. */
-    if ( type == MAP_PIRQ_TYPE_MSI )
-        return -EINVAL;
-
     if ( !IS_PRIV(current->domain) )
         return -EPERM;
 
@@ -867,8 +867,8 @@ int map_domain_pirq(
         return -EINVAL;
     }
 
-    old_vector = d->arch.pirq_vector[pirq];
-    old_pirq = d->arch.vector_pirq[vector];
+    old_vector = domain_irq_to_vector(d, pirq);
+    old_pirq = domain_vector_to_irq(d, vector);
 
     if ( (old_vector && (old_vector != vector) ) ||
          (old_pirq && (old_pirq != pirq)) )
@@ -891,6 +891,10 @@ int map_domain_pirq(
     if ( type == MAP_PIRQ_TYPE_MSI )
     {
         struct msi_info *msi = (struct msi_info *)data;
+
+        ret = -ENODEV;
+        if ( !cpu_has_apic )
+            goto done;
 
         pdev = pci_get_pdev(msi->bus, msi->devfn);
         ret = pci_enable_msi(msi, &msi_desc);
@@ -937,7 +941,7 @@ int unmap_domain_pirq(struct domain *d, 
     ASSERT(spin_is_locked(&pcidevs_lock));
     ASSERT(spin_is_locked(&d->event_lock));
 
-    vector = d->arch.pirq_vector[pirq];
+    vector = domain_irq_to_vector(d, pirq);
     if ( vector <= 0 )
     {
         dprintk(XENLOG_G_ERR, "dom%d: pirq %d not mapped\n",
@@ -958,7 +962,7 @@ int unmap_domain_pirq(struct domain *d, 
 
     spin_lock_irqsave(&desc->lock, flags);
 
-    BUG_ON(vector != d->arch.pirq_vector[pirq]);
+    BUG_ON(vector != domain_irq_to_vector(d, pirq));
 
     if ( msi_desc )
         teardown_msi_vector(vector);
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/machine_kexec.c
--- a/xen/arch/x86/machine_kexec.c      Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/machine_kexec.c      Wed Jan 28 13:06:45 2009 +0900
@@ -150,6 +150,9 @@ void arch_crash_save_vmcoreinfo(void)
        VMCOREINFO_SYMBOL(dom_xen);
        VMCOREINFO_SYMBOL(dom_io);
 
+#ifdef CONFIG_X86_32
+    VMCOREINFO_SYMBOL(xenheap_phys_end);
+#endif
 #ifdef CONFIG_X86_PAE
        VMCOREINFO_SYMBOL_ALIAS(pgd_l3, idle_pg_table);
 #endif
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/microcode.c
--- a/xen/arch/x86/microcode.c  Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/microcode.c  Wed Jan 28 13:06:45 2009 +0900
@@ -49,31 +49,22 @@ struct microcode_info {
     char buffer[1];
 };
 
-static void microcode_fini_cpu(int cpu)
+static void __microcode_fini_cpu(int cpu)
 {
     struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 
+    xfree(uci->mc.mc_valid);
+    memset(uci, 0, sizeof(*uci));
+}
+
+static void microcode_fini_cpu(int cpu)
+{
     spin_lock(&microcode_mutex);
-    xfree(uci->mc.valid_mc);
-    uci->mc.valid_mc = NULL;
-    uci->valid = 0;
+    __microcode_fini_cpu(cpu);
     spin_unlock(&microcode_mutex);
 }
 
-static int collect_cpu_info(int cpu)
-{
-    int err = 0;
-    struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-
-    memset(uci, 0, sizeof(*uci));
-    err = microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig);
-    if ( !err )
-        uci->valid = 1;
-
-    return err;
-}
-
-static int microcode_resume_cpu(int cpu)
+int microcode_resume_cpu(int cpu)
 {
     int err = 0;
     struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
@@ -81,7 +72,7 @@ static int microcode_resume_cpu(int cpu)
 
     gdprintk(XENLOG_INFO, "microcode: CPU%d resumed\n", cpu);
 
-    if ( !uci->mc.valid_mc )
+    if ( !uci->mc.mc_valid )
         return -EIO;
 
     /*
@@ -95,16 +86,15 @@ static int microcode_resume_cpu(int cpu)
         return err;
     }
 
-    if ( memcmp(&nsig, &uci->cpu_sig, sizeof(nsig)) )
+    if ( microcode_ops->microcode_resume_match(cpu, &nsig) )
+    {
+        return microcode_ops->apply_microcode(cpu);
+    }
+    else
     {
         microcode_fini_cpu(cpu);
-        /* Should we look for a new ucode here? */
         return -EIO;
     }
-
-    err = microcode_ops->apply_microcode(cpu);
-
-    return err;
 }
 
 static int microcode_update_cpu(const void *buf, size_t size)
@@ -115,20 +105,11 @@ static int microcode_update_cpu(const vo
 
     spin_lock(&microcode_mutex);
 
-    /*
-     * Check if the system resume is in progress (uci->valid != NULL),
-     * otherwise just request a firmware:
-     */
-    if ( uci->valid )
-    {
-        err = microcode_resume_cpu(cpu);
-    }
+    err = microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig);
+    if ( likely(!err) )
+        err = microcode_ops->cpu_request_microcode(cpu, buf, size);
     else
-    {
-        err = collect_cpu_info(cpu);
-        if ( !err && uci->valid )
-            err = microcode_ops->cpu_request_microcode(cpu, buf, size);
-    }
+        __microcode_fini_cpu(cpu);
 
     spin_unlock(&microcode_mutex);
 
@@ -153,7 +134,6 @@ static long do_microcode_update(void *_i
     error = info->error;
     xfree(info);
     return error;
-
 }
 
 int microcode_update(XEN_GUEST_HANDLE(const_void) buf, unsigned long len)
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/microcode_amd.c
--- a/xen/arch/x86/microcode_amd.c      Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/microcode_amd.c      Wed Jan 28 13:06:45 2009 +0900
@@ -38,21 +38,16 @@
 #define MC_HEADER_SIZE          (sizeof(struct microcode_header_amd))
 #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
 #define DWSIZE                  (sizeof(uint32_t))
-/* For now we support a fixed ucode total size only */
-#define get_totalsize(mc) \
-        ((((struct microcode_amd *)mc)->hdr.mc_patch_data_len * 28) \
-         + MC_HEADER_SIZE)
 
 /* serialize access to the physical write */
 static DEFINE_SPINLOCK(microcode_update_lock);
 
 struct equiv_cpu_entry *equiv_cpu_table;
 
-static long install_equiv_cpu_table(const void *, uint32_t, long);
-
 static int collect_cpu_info(int cpu, struct cpu_signature *csig)
 {
     struct cpuinfo_x86 *c = &cpu_data[cpu];
+    uint32_t dummy;
 
     memset(csig, 0, sizeof(*csig));
 
@@ -60,13 +55,10 @@ static int collect_cpu_info(int cpu, str
     {
         printk(KERN_ERR "microcode: CPU%d not a capable AMD processor\n",
                cpu);
-        return -1;
-    }
-
-    asm volatile (
-        "movl %1, %%ecx; rdmsr"
-        : "=a" (csig->rev)
-        : "i" (MSR_AMD_PATCHLEVEL) : "ecx" );
+        return -EINVAL;
+    }
+
+    rdmsr(MSR_AMD_PATCHLEVEL, csig->rev, dummy);
 
     printk(KERN_INFO "microcode: collect_cpu_info: patch_id=0x%x\n",
            csig->rev);
@@ -74,29 +66,17 @@ static int collect_cpu_info(int cpu, str
     return 0;
 }
 
-static int get_matching_microcode(void *mc, int cpu)
+static int microcode_fits(void *mc, int cpu)
 {
     struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
     struct microcode_header_amd *mc_header = mc;
-    unsigned long total_size = get_totalsize(mc_header);
-    void *new_mc;
     unsigned int current_cpu_id;
-    unsigned int equiv_cpu_id = 0x00;
+    unsigned int equiv_cpu_id = 0x0;
     unsigned int i;
 
     /* We should bind the task to the CPU */
     BUG_ON(cpu != raw_smp_processor_id());
 
-    /* This is a tricky part. We might be called from a write operation
-     * to the device file instead of the usual process of firmware
-     * loading. This routine needs to be able to distinguish both
-     * cases. This is done by checking if there already is a equivalent
-     * CPU table installed. If not, we're written through
-     * /dev/cpu/microcode.
-     * Since we ignore all checks. The error case in which going through
-     * firmware loading and that table is not loaded has already been
-     * checked earlier.
-     */
     if ( equiv_cpu_table == NULL )
     {
         printk(KERN_INFO "microcode: CPU%d microcode update with "
@@ -111,7 +91,7 @@ static int get_matching_microcode(void *
     {
         if ( current_cpu_id == equiv_cpu_table[i].installed_cpu )
         {
-            equiv_cpu_id = equiv_cpu_table[i].equiv_cpu;
+            equiv_cpu_id = equiv_cpu_table[i].equiv_cpu & 0xffff;
             break;
         }
     }
@@ -119,171 +99,136 @@ static int get_matching_microcode(void *
     if ( !equiv_cpu_id )
     {
         printk(KERN_ERR "microcode: CPU%d cpu_id "
-               "not found in equivalent cpu table \n", cpu);
-        return 0;
-    }
-
-    if ( (mc_header->processor_rev_id[0]) != (equiv_cpu_id & 0xff) )
-    {
-        printk(KERN_INFO
-               "microcode: CPU%d patch does not match "
-               "(patch is %x, cpu extended is %x) \n",
-               cpu, mc_header->processor_rev_id[0],
-               (equiv_cpu_id & 0xff));
-        return 0;
-    }
-
-    if ( (mc_header->processor_rev_id[1]) != ((equiv_cpu_id >> 16) & 0xff) )
+               "not found in equivalent cpu table\n", cpu);
+        return -EINVAL;
+    }
+
+    if ( (mc_header->processor_rev_id) != equiv_cpu_id )
     {
         printk(KERN_INFO "microcode: CPU%d patch does not match "
                "(patch is %x, cpu base id is %x) \n",
-               cpu, mc_header->processor_rev_id[1],
-               ((equiv_cpu_id >> 16) & 0xff));
-        return 0;
+               cpu, mc_header->processor_rev_id, equiv_cpu_id);
+        return -EINVAL;
     }
 
     if ( mc_header->patch_id <= uci->cpu_sig.rev )
-        return 0;
+        return -EINVAL;
 
     printk(KERN_INFO "microcode: CPU%d found a matching microcode "
            "update with version 0x%x (current=0x%x)\n",
            cpu, mc_header->patch_id, uci->cpu_sig.rev);
 
- out:
-    new_mc = xmalloc_bytes(UCODE_MAX_SIZE);
-    if ( new_mc == NULL )
-    {
-        printk(KERN_ERR "microcode: error, can't allocate memory\n");
-        return -ENOMEM;
-    }
-    memset(new_mc, 0, UCODE_MAX_SIZE);
-
-    /* free previous update file */
-    xfree(uci->mc.mc_amd);
-
-    memcpy(new_mc, mc, total_size);
-
-    uci->mc.mc_amd = new_mc;
-    return 1;
+out:
+    return 0;
 }
 
 static int apply_microcode(int cpu)
 {
     unsigned long flags;
-    uint32_t eax, edx, rev;
-    int cpu_num = raw_smp_processor_id();
-    struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
-    uint64_t addr;
+    struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+    uint32_t rev, dummy;
+    struct microcode_amd *mc_amd = uci->mc.mc_amd;
 
     /* We should bind the task to the CPU */
-    BUG_ON(cpu_num != cpu);
-
-    if ( uci->mc.mc_amd == NULL )
+    BUG_ON(raw_smp_processor_id() != cpu);
+
+    if ( mc_amd == NULL )
         return -EINVAL;
 
     spin_lock_irqsave(&microcode_update_lock, flags);
 
-    addr = (unsigned long)&uci->mc.mc_amd->hdr.data_code;
-    edx = (uint32_t)(addr >> 32);
-    eax = (uint32_t)addr;
-
-    asm volatile (
-        "movl %0, %%ecx; wrmsr" :
-        : "i" (MSR_AMD_PATCHLOADER), "a" (eax), "d" (edx) : "ecx" );
+    wrmsrl(MSR_AMD_PATCHLOADER, (unsigned long)&mc_amd->hdr.data_code);
 
     /* get patch id after patching */
-    asm volatile (
-        "movl %1, %%ecx; rdmsr"
-        : "=a" (rev)
-        : "i" (MSR_AMD_PATCHLEVEL) : "ecx");
+    rdmsr(MSR_AMD_PATCHLEVEL, rev, dummy);
 
     spin_unlock_irqrestore(&microcode_update_lock, flags);
 
     /* check current patch id and patch's id for match */
-    if ( rev != uci->mc.mc_amd->hdr.patch_id )
+    if ( rev != mc_amd->hdr.patch_id )
     {
         printk(KERN_ERR "microcode: CPU%d update from revision "
-               "0x%x to 0x%x failed\n", cpu_num,
-               uci->mc.mc_amd->hdr.patch_id, rev);
+               "0x%x to 0x%x failed\n", cpu,
+               mc_amd->hdr.patch_id, rev);
         return -EIO;
     }
 
     printk("microcode: CPU%d updated from revision "
            "0x%x to 0x%x \n",
-           cpu_num, uci->cpu_sig.rev, uci->mc.mc_amd->hdr.patch_id);
+           cpu, uci->cpu_sig.rev, mc_amd->hdr.patch_id);
 
     uci->cpu_sig.rev = rev;
 
     return 0;
 }
 
-static long get_next_ucode_from_buffer_amd(void **mc, const void *buf,
-                                           unsigned long size, long offset)
+static int get_next_ucode_from_buffer_amd(void *mc, const void *buf,
+                                         size_t size, unsigned long *offset)
 {
     struct microcode_header_amd *mc_header;
-    unsigned long total_size;
-    const uint8_t *buf_pos = buf;
+    size_t total_size;
+    const uint8_t *bufp = buf;
+    unsigned long off;
+
+    off = *offset;
 
     /* No more data */
-    if ( offset >= size )
-        return 0;
-
-    if ( buf_pos[offset] != UCODE_UCODE_TYPE )
+    if ( off >= size )
+        return 1;
+
+    if ( bufp[off] != UCODE_UCODE_TYPE )
     {
         printk(KERN_ERR "microcode: error! "
                "Wrong microcode payload type field\n");
         return -EINVAL;
     }
 
-    mc_header = (struct microcode_header_amd *)(&buf_pos[offset+8]);
-
-    total_size = (unsigned long) (buf_pos[offset+4] +
-                                  (buf_pos[offset+5] << 8));
+    mc_header = (struct microcode_header_amd *)(&bufp[off+8]);
+
+    total_size = (unsigned long) (bufp[off+4] + (bufp[off+5] << 8));
 
     printk(KERN_INFO "microcode: size %lu, total_size %lu, offset %ld\n",
-           size, total_size, offset);
-
-    if ( (offset + total_size) > size )
+           (unsigned long)size, total_size, off);
+
+    if ( (off + total_size) > size )
     {
         printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
         return -EINVAL;
     }
 
-    *mc = xmalloc_bytes(UCODE_MAX_SIZE);
-    if ( *mc == NULL )
-    {
-        printk(KERN_ERR "microcode: error! "
-               "Can not allocate memory for microcode patch\n");
-        return -ENOMEM;
-    }
-
-    memset(*mc, 0, UCODE_MAX_SIZE);
-    memcpy(*mc, (const void *)(buf + offset + 8), total_size);
-
-    return offset + total_size + 8;
-}
-
-static long install_equiv_cpu_table(const void *buf,
-                                    uint32_t size, long offset)
+    memset(mc, 0, UCODE_MAX_SIZE);
+    memcpy(mc, (const void *)(&bufp[off + 8]), total_size);
+
+    *offset = off + total_size + 8;
+
+    return 0;
+}
+
+static int install_equiv_cpu_table(const void *buf, uint32_t size,
+                                   unsigned long *offset)
 {
     const uint32_t *buf_pos = buf;
+    unsigned long off;
+
+    off = *offset;
+    *offset = 0;
 
     /* No more data */
-    if ( offset >= size )
-        return 0;
+    if ( off >= size )
+        return -EINVAL;
 
     if ( buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE )
     {
         printk(KERN_ERR "microcode: error! "
-               "Wrong microcode equivalnet cpu table type field\n");
-        return 0;
+               "Wrong microcode equivalent cpu table type field\n");
+        return -EINVAL;
     }
 
     if ( size == 0 )
     {
         printk(KERN_ERR "microcode: error! "
                "Wrong microcode equivalnet cpu table length\n");
-        return 0;
+        return -EINVAL;
     }
 
     equiv_cpu_table = xmalloc_bytes(size);
@@ -291,20 +236,24 @@ static long install_equiv_cpu_table(cons
     {
         printk(KERN_ERR "microcode: error, can't allocate "
                "memory for equiv CPU table\n");
-        return 0;
+        return -ENOMEM;
     }
 
     memset(equiv_cpu_table, 0, size);
     memcpy(equiv_cpu_table, (const void *)&buf_pos[3], size);
 
-    return size + 12; /* add header length */
+    *offset = size + 12;       /* add header length */
+
+    return 0;
 }
 
 static int cpu_request_microcode(int cpu, const void *buf, size_t size)
 {
     const uint32_t *buf_pos;
-    long offset = 0;
+    unsigned long offset = 0;
     int error = 0;
+    int ret;
+    struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
     void *mc;
 
     /* We should bind the task to the CPU */
@@ -319,41 +268,63 @@ static int cpu_request_microcode(int cpu
         return -EINVAL;
     }
 
-    offset = install_equiv_cpu_table(buf, (uint32_t)(buf_pos[2]), offset);
-    if ( !offset )
+    error = install_equiv_cpu_table(buf, (uint32_t)(buf_pos[2]), &offset);
+    if ( error )
     {
         printk(KERN_ERR "microcode: installing equivalent cpu table failed\n");
         return -EINVAL;
     }
 
-    while ( (offset =
-             get_next_ucode_from_buffer_amd(&mc, buf, size, offset)) > 0 )
-    {
-        error = get_matching_microcode(mc, cpu);
-        if ( error < 0 )
+    mc = xmalloc_bytes(UCODE_MAX_SIZE);
+    if ( mc == NULL )
+    {
+        printk(KERN_ERR "microcode: error! "
+               "Can not allocate memory for microcode patch\n");
+        error = -ENOMEM;
+        goto out;
+    }
+
+    /* implicitely validates uci->mc.mc_valid */
+    uci->mc.mc_amd = mc;
+
+    /*
+     * It's possible the data file has multiple matching ucode,
+     * lets keep searching till the latest version
+     */
+    while ( (ret = get_next_ucode_from_buffer_amd(mc, buf, size, &offset)) == 
0)
+    {
+        error = microcode_fits(mc, cpu);
+        if (error != 0)
+            continue;
+
+        error = apply_microcode(cpu);
+        if (error == 0)
             break;
-        /*
-         * It's possible the data file has multiple matching ucode,
-         * lets keep searching till the latest version
-         */
-        if ( error == 1 )
-            error = apply_microcode(cpu);
+    }
+
+    /* On success keep the microcode patch for
+     * re-apply on resume.
+     */
+    if (error) {
         xfree(mc);
-    }
-    if ( offset > 0 )
-    {
-        xfree(mc);
-        xfree(equiv_cpu_table);
-        equiv_cpu_table = NULL;
-    }
-    if ( offset < 0 )
-        error = offset;
+        mc = NULL;
+    }
+    uci->mc.mc_amd = mc;
+
+out:
+    xfree(equiv_cpu_table);
+    equiv_cpu_table = NULL;
 
     return error;
 }
 
+static int microcode_resume_match(int cpu, struct cpu_signature *nsig)
+{
+    return 0;
+}
+
 static struct microcode_ops microcode_amd_ops = {
-    .get_matching_microcode           = get_matching_microcode,
+    .microcode_resume_match           = microcode_resume_match,
     .cpu_request_microcode            = cpu_request_microcode,
     .collect_cpu_info                 = collect_cpu_info,
     .apply_microcode                  = apply_microcode,
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/microcode_intel.c
--- a/xen/arch/x86/microcode_intel.c    Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/microcode_intel.c    Wed Jan 28 13:06:45 2009 +0900
@@ -64,6 +64,8 @@ static int collect_cpu_info(int cpu_num,
     struct cpuinfo_x86 *c = &cpu_data[cpu_num];
     unsigned int val[2];
 
+    BUG_ON(cpu_num != smp_processor_id());
+
     memset(csig, 0, sizeof(*csig));
 
     if ( (c->x86_vendor != X86_VENDOR_INTEL) || (c->x86 < 6) ||
@@ -323,6 +325,7 @@ static int cpu_request_microcode(int cpu
     long offset = 0;
     int error = 0;
     void *mc;
+    unsigned int matching_count = 0;
 
     /* We should bind the task to the CPU */
     BUG_ON(cpu != raw_smp_processor_id());
@@ -341,7 +344,7 @@ static int cpu_request_microcode(int cpu
          */
         if ( error == 1 )
         {
-            apply_microcode(cpu);
+            matching_count++;
             error = 0;
         }
         xfree(mc);
@@ -351,11 +354,22 @@ static int cpu_request_microcode(int cpu
     if ( offset < 0 )
         error = offset;
 
+    if ( !error && matching_count )
+        apply_microcode(cpu);
+
     return error;
 }
 
+static int microcode_resume_match(int cpu, struct cpu_signature *nsig)
+{
+    struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+    return (sigmatch(nsig->sig, uci->cpu_sig.sig, nsig->pf, uci->cpu_sig.pf) &&
+            (uci->cpu_sig.rev > nsig->rev));
+}
+
 static struct microcode_ops microcode_intel_ops = {
-    .get_matching_microcode           = get_matching_microcode,
+    .microcode_resume_match           = microcode_resume_match,
     .cpu_request_microcode            = cpu_request_microcode,
     .collect_cpu_info                 = collect_cpu_info,
     .apply_microcode                  = apply_microcode,
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/mm.c Wed Jan 28 13:06:45 2009 +0900
@@ -205,11 +205,6 @@ void __init init_frametable(void)
     }
 
     memset(frame_table, 0, nr_pages << PAGE_SHIFT);
-
-#if defined(__x86_64__)
-    for ( i = 0; i < max_page; i ++ )
-        spin_lock_init(&frame_table[i].lock);
-#endif
 }
 
 void __init arch_init_memory(void)
@@ -290,15 +285,16 @@ void __init arch_init_memory(void)
     subarch_init_memory();
 }
 
-int memory_is_conventional_ram(paddr_t p)
-{
+int page_is_conventional_ram(unsigned long mfn)
+{
+    uint64_t maddr = pfn_to_paddr(mfn);
     int i;
 
     for ( i = 0; i < e820.nr_map; i++ )
     {
         if ( (e820.map[i].type == E820_RAM) &&
-             (e820.map[i].addr <= p) &&
-             (e820.map[i].size > p) )
+             (e820.map[i].addr <= maddr) &&
+             ((e820.map[i].addr + e820.map[i].size) >= (maddr + PAGE_SIZE)) )
             return 1;
     }
 
@@ -329,7 +325,7 @@ void share_xen_page_with_guest(
 
     page_set_owner(page, d);
     wmb(); /* install valid domain ptr before updating refcnt. */
-    ASSERT(page->count_info == 0);
+    ASSERT((page->count_info & ~PGC_xen_heap) == 0);
 
     /* Only add to the allocation list if the domain isn't dying. */
     if ( !d->is_dying )
@@ -738,8 +734,8 @@ get_page_from_l1e(
     else if ( pte_flags_to_cacheattr(l1f) !=
               ((page->count_info >> PGC_cacheattr_base) & 7) )
     {
-        uint32_t x, nx, y = page->count_info;
-        uint32_t cacheattr = pte_flags_to_cacheattr(l1f);
+        unsigned long x, nx, y = page->count_info;
+        unsigned long cacheattr = pte_flags_to_cacheattr(l1f);
 
         if ( is_xen_heap_page(page) )
         {
@@ -1013,7 +1009,8 @@ static int put_page_from_l2e(l2_pgentry_
     {
         unsigned long mfn = l2e_get_pfn(l2e), m = mfn;
         int writeable = l2e_get_flags(l2e) & _PAGE_RW;
-        ASSERT(opt_allow_hugepage && !(mfn & (L1_PAGETABLE_ENTRIES-1)));
+
+        ASSERT(!(mfn & (L1_PAGETABLE_ENTRIES-1)));
         do {
             put_data_page(mfn_to_page(m), writeable);
         } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) );
@@ -1031,14 +1028,28 @@ static int put_page_from_l3e(l3_pgentry_
 static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
                              int partial, int preemptible)
 {
-    if ( (l3e_get_flags(l3e) & _PAGE_PRESENT) && 
-         (l3e_get_pfn(l3e) != pfn) )
-    {
-        if ( unlikely(partial > 0) )
-            return __put_page_type(l3e_get_page(l3e), preemptible);
-        return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible);
-    }
-    return 1;
+    if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || (l3e_get_pfn(l3e) == pfn) )
+        return 1;
+
+#ifdef __x86_64__
+    if ( unlikely(l3e_get_flags(l3e) & _PAGE_PSE) )
+    {
+        unsigned long mfn = l3e_get_pfn(l3e);
+        int writeable = l3e_get_flags(l3e) & _PAGE_RW;
+
+        ASSERT(!(mfn & ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1)));
+        do {
+            put_data_page(mfn_to_page(mfn), writeable);
+        } while ( ++mfn & ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1) );
+
+        return 0;
+    }
+#endif
+
+    if ( unlikely(partial > 0) )
+        return __put_page_type(l3e_get_page(l3e), preemptible);
+
+    return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible);
 }
 
 #if CONFIG_PAGING_LEVELS >= 4
@@ -1523,24 +1534,31 @@ static int free_l4_table(struct page_inf
 #define free_l4_table(page, preemptible) (-EINVAL)
 #endif
 
-static void page_lock(struct page_info *page)
-{
-#if defined(__i386__)
-    while ( unlikely(test_and_set_bit(_PGC_locked, &page->count_info)) )
-        while ( test_bit(_PGC_locked, &page->count_info) )
+static int page_lock(struct page_info *page)
+{
+    unsigned long x, nx;
+
+    do {
+        while ( (x = page->u.inuse.type_info) & PGT_locked )
             cpu_relax();
-#else
-    spin_lock(&page->lock);
-#endif
+        nx = x + (1 | PGT_locked);
+        if ( !(x & PGT_validated) ||
+             !(x & PGT_count_mask) ||
+             !(nx & PGT_count_mask) )
+            return 0;
+    } while ( cmpxchg(&page->u.inuse.type_info, x, nx) != x );
+
+    return 1;
 }
 
 static void page_unlock(struct page_info *page)
 {
-#if defined(__i386__)
-    clear_bit(_PGC_locked, &page->count_info);
-#else
-    spin_unlock(&page->lock);
-#endif
+    unsigned long x, nx, y = page->u.inuse.type_info;
+
+    do {
+        x = y;
+        nx = x - (1 | PGT_locked);
+    } while ( (y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x );
 }
 
 /* How to write an entry to the guest pagetables.
@@ -1603,19 +1621,15 @@ static int mod_l1_entry(l1_pgentry_t *pl
     struct vcpu *curr = current;
     struct domain *d = curr->domain;
     unsigned long mfn;
-    struct page_info *l1pg = mfn_to_page(gl1mfn);
     p2m_type_t p2mt;
     int rc = 1;
 
-    page_lock(l1pg);
-
     if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
-        return page_unlock(l1pg), 0;
+        return 0;
 
     if ( unlikely(paging_mode_refcounts(d)) )
     {
         rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, preserve_ad);
-        page_unlock(l1pg);
         return rc;
     }
 
@@ -1624,13 +1638,12 @@ static int mod_l1_entry(l1_pgentry_t *pl
         /* Translate foreign guest addresses. */
         mfn = mfn_x(gfn_to_mfn(FOREIGNDOM, l1e_get_pfn(nl1e), &p2mt));
         if ( !p2m_is_ram(p2mt) || unlikely(mfn == INVALID_MFN) )
-            return page_unlock(l1pg), 0;
+            return 0;
         ASSERT((mfn & ~(PADDR_MASK >> PAGE_SHIFT)) == 0);
         nl1e = l1e_from_pfn(mfn, l1e_get_flags(nl1e));
 
         if ( unlikely(l1e_get_flags(nl1e) & l1_disallow_mask(d)) )
         {
-            page_unlock(l1pg);
             MEM_LOG("Bad L1 flags %x",
                     l1e_get_flags(nl1e) & l1_disallow_mask(d));
             return 0;
@@ -1642,12 +1655,11 @@ static int mod_l1_entry(l1_pgentry_t *pl
             adjust_guest_l1e(nl1e, d);
             rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
                               preserve_ad);
-            page_unlock(l1pg);
             return rc;
         }
 
         if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) )
-            return page_unlock(l1pg), 0;
+            return 0;
         
         adjust_guest_l1e(nl1e, d);
         if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
@@ -1660,11 +1672,9 @@ static int mod_l1_entry(l1_pgentry_t *pl
     else if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
                                      preserve_ad)) )
     {
-        page_unlock(l1pg);
         return 0;
     }
 
-    page_unlock(l1pg);
     put_page_from_l1e(ol1e, d);
     return rc;
 }
@@ -1674,13 +1684,13 @@ static int mod_l2_entry(l2_pgentry_t *pl
 static int mod_l2_entry(l2_pgentry_t *pl2e, 
                         l2_pgentry_t nl2e, 
                         unsigned long pfn,
-                        unsigned long type,
                         int preserve_ad)
 {
     l2_pgentry_t ol2e;
     struct vcpu *curr = current;
     struct domain *d = curr->domain;
     struct page_info *l2pg = mfn_to_page(pfn);
+    unsigned long type = l2pg->u.inuse.type_info;
     int rc = 1;
 
     if ( unlikely(!is_guest_l2_slot(d, type, pgentry_ptr_to_slot(pl2e))) )
@@ -1689,16 +1699,13 @@ static int mod_l2_entry(l2_pgentry_t *pl
         return 0;
     }
 
-    page_lock(l2pg);
-
     if ( unlikely(__copy_from_user(&ol2e, pl2e, sizeof(ol2e)) != 0) )
-        return page_unlock(l2pg), 0;
+        return 0;
 
     if ( l2e_get_flags(nl2e) & _PAGE_PRESENT )
     {
         if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) )
         {
-            page_unlock(l2pg);
             MEM_LOG("Bad L2 flags %x",
                     l2e_get_flags(nl2e) & L2_DISALLOW_MASK);
             return 0;
@@ -1709,12 +1716,11 @@ static int mod_l2_entry(l2_pgentry_t *pl
         {
             adjust_guest_l2e(nl2e, d);
             rc = UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr, preserve_ad);
-            page_unlock(l2pg);
             return rc;
         }
 
         if ( unlikely(get_page_from_l2e(nl2e, pfn, d) < 0) )
-            return page_unlock(l2pg), 0;
+            return 0;
 
         adjust_guest_l2e(nl2e, d);
         if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr,
@@ -1727,11 +1733,9 @@ static int mod_l2_entry(l2_pgentry_t *pl
     else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr,
                                      preserve_ad)) )
     {
-        page_unlock(l2pg);
         return 0;
     }
 
-    page_unlock(l2pg);
     put_page_from_l2e(ol2e, pfn);
     return rc;
 }
@@ -1746,7 +1750,6 @@ static int mod_l3_entry(l3_pgentry_t *pl
     l3_pgentry_t ol3e;
     struct vcpu *curr = current;
     struct domain *d = curr->domain;
-    struct page_info *l3pg = mfn_to_page(pfn);
     int rc = 0;
 
     if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) )
@@ -1762,16 +1765,13 @@ static int mod_l3_entry(l3_pgentry_t *pl
     if ( is_pv_32bit_domain(d) && (pgentry_ptr_to_slot(pl3e) >= 3) )
         return -EINVAL;
 
-    page_lock(l3pg);
-
     if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) )
-        return page_unlock(l3pg), -EFAULT;
+        return -EFAULT;
 
     if ( l3e_get_flags(nl3e) & _PAGE_PRESENT )
     {
         if ( unlikely(l3e_get_flags(nl3e) & l3_disallow_mask(d)) )
         {
-            page_unlock(l3pg);
             MEM_LOG("Bad L3 flags %x",
                     l3e_get_flags(nl3e) & l3_disallow_mask(d));
             return -EINVAL;
@@ -1782,13 +1782,12 @@ static int mod_l3_entry(l3_pgentry_t *pl
         {
             adjust_guest_l3e(nl3e, d);
             rc = UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, preserve_ad);
-            page_unlock(l3pg);
             return rc ? 0 : -EFAULT;
         }
 
         rc = get_page_from_l3e(nl3e, pfn, d, 0, preemptible);
         if ( unlikely(rc < 0) )
-            return page_unlock(l3pg), rc;
+            return rc;
         rc = 0;
 
         adjust_guest_l3e(nl3e, d);
@@ -1802,7 +1801,6 @@ static int mod_l3_entry(l3_pgentry_t *pl
     else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr,
                                      preserve_ad)) )
     {
-        page_unlock(l3pg);
         return -EFAULT;
     }
 
@@ -1814,7 +1812,6 @@ static int mod_l3_entry(l3_pgentry_t *pl
         pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
     }
 
-    page_unlock(l3pg);
     put_page_from_l3e(ol3e, pfn, 0, 0);
     return rc;
 }
@@ -1831,7 +1828,6 @@ static int mod_l4_entry(l4_pgentry_t *pl
     struct vcpu *curr = current;
     struct domain *d = curr->domain;
     l4_pgentry_t ol4e;
-    struct page_info *l4pg = mfn_to_page(pfn);
     int rc = 0;
 
     if ( unlikely(!is_guest_l4_slot(d, pgentry_ptr_to_slot(pl4e))) )
@@ -1840,16 +1836,13 @@ static int mod_l4_entry(l4_pgentry_t *pl
         return -EINVAL;
     }
 
-    page_lock(l4pg);
-
     if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) )
-        return page_unlock(l4pg), -EFAULT;
+        return -EFAULT;
 
     if ( l4e_get_flags(nl4e) & _PAGE_PRESENT )
     {
         if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) )
         {
-            page_unlock(l4pg);
             MEM_LOG("Bad L4 flags %x",
                     l4e_get_flags(nl4e) & L4_DISALLOW_MASK);
             return -EINVAL;
@@ -1860,13 +1853,12 @@ static int mod_l4_entry(l4_pgentry_t *pl
         {
             adjust_guest_l4e(nl4e, d);
             rc = UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, preserve_ad);
-            page_unlock(l4pg);
             return rc ? 0 : -EFAULT;
         }
 
         rc = get_page_from_l4e(nl4e, pfn, d, 0, preemptible);
         if ( unlikely(rc < 0) )
-            return page_unlock(l4pg), rc;
+            return rc;
         rc = 0;
 
         adjust_guest_l4e(nl4e, d);
@@ -1880,11 +1872,9 @@ static int mod_l4_entry(l4_pgentry_t *pl
     else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr,
                                      preserve_ad)) )
     {
-        page_unlock(l4pg);
         return -EFAULT;
     }
 
-    page_unlock(l4pg);
     put_page_from_l4e(ol4e, pfn, 0, 0);
     return rc;
 }
@@ -1893,7 +1883,7 @@ static int mod_l4_entry(l4_pgentry_t *pl
 
 void put_page(struct page_info *page)
 {
-    u32 nx, x, y = page->count_info;
+    unsigned long nx, x, y = page->count_info;
 
     do {
         x  = y;
@@ -1911,36 +1901,30 @@ void put_page(struct page_info *page)
 
 int get_page(struct page_info *page, struct domain *domain)
 {
-    u32 x, nx, y = page->count_info;
-    u32 d, nd = page->u.inuse._domain;
-    u32 _domain = pickle_domptr(domain);
+    unsigned long x, y = page->count_info;
 
     do {
-        x  = y;
-        nx = x + 1;
-        d  = nd;
+        x = y;
         if ( unlikely((x & PGC_count_mask) == 0) ||  /* Not allocated? */
              /* Keep one spare reference to be acquired by get_page_light(). */
-             unlikely(((nx + 1) & PGC_count_mask) <= 1) || /* Overflow? */
-             unlikely(d != _domain) )                /* Wrong owner? */
-        {
-            if ( !_shadow_mode_refcounts(domain) && !domain->is_dying )
-                gdprintk(XENLOG_INFO,
-                         "Error pfn %lx: rd=%p, od=%p, caf=%08x, taf=%"
-                         PRtype_info "\n",
-                         page_to_mfn(page), domain, unpickle_domptr(d),
-                         x, page->u.inuse.type_info);
-            return 0;
-        }
-        asm volatile (
-            LOCK_PREFIX "cmpxchg8b %2"
-            : "=d" (nd), "=a" (y),
-            "=m" (*(volatile u64 *)(&page->count_info))
-            : "0" (d), "1" (x), "c" (d), "b" (nx) );
-    }
-    while ( unlikely(nd != d) || unlikely(y != x) );
-
-    return 1;
+             unlikely(((x + 2) & PGC_count_mask) <= 1) ) /* Overflow? */
+            goto fail;
+    }
+    while ( (y = cmpxchg(&page->count_info, x, x + 1)) != x );
+
+    if ( likely(page_get_owner(page) == domain) )
+        return 1;
+
+    put_page(page);
+
+ fail:
+    if ( !_shadow_mode_refcounts(domain) && !domain->is_dying )
+        gdprintk(XENLOG_INFO,
+                 "Error pfn %lx: rd=%p, od=%p, caf=%08lx, taf=%"
+                 PRtype_info "\n",
+                 page_to_mfn(page), domain, page_get_owner(page),
+                 y, page->u.inuse.type_info);
+    return 0;
 }
 
 /*
@@ -1953,7 +1937,7 @@ int get_page(struct page_info *page, str
  */
 static void get_page_light(struct page_info *page)
 {
-    u32 x, nx, y = page->count_info;
+    unsigned long x, nx, y = page->count_info;
 
     do {
         x  = y;
@@ -1994,7 +1978,7 @@ static int alloc_page_type(struct page_i
         rc = alloc_segdesc_page(page);
         break;
     default:
-        printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%x\n", 
+        printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%lx\n", 
                type, page->u.inuse.type_info,
                page->count_info);
         rc = -EINVAL;
@@ -2018,7 +2002,7 @@ static int alloc_page_type(struct page_i
     {
         ASSERT(rc < 0);
         MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %"
-                PRtype_info ": caf=%08x taf=%" PRtype_info,
+                PRtype_info ": caf=%08lx taf=%" PRtype_info,
                 page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
                 type, page->count_info, page->u.inuse.type_info);
         page->u.inuse.type_info = 0;
@@ -2949,7 +2933,6 @@ int do_mmu_update(
     unsigned int cmd, done = 0;
     struct vcpu *v = current;
     struct domain *d = v->domain;
-    unsigned long type_info;
     struct domain_mmap_cache mapcache;
 
     if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
@@ -3021,24 +3004,9 @@ int do_mmu_update(
                           (unsigned long)(req.ptr & ~PAGE_MASK));
             page = mfn_to_page(mfn);
 
-            switch ( (type_info = page->u.inuse.type_info) & PGT_type_mask )
+            if ( page_lock(page) )
             {
-            case PGT_l1_page_table:
-            case PGT_l2_page_table:
-            case PGT_l3_page_table:
-            case PGT_l4_page_table:
-            {
-                if ( paging_mode_refcounts(d) )
-                {
-                    MEM_LOG("mmu update on auto-refcounted domain!");
-                    break;
-                }
-
-                if ( unlikely(!get_page_type(
-                    page, type_info & (PGT_type_mask|PGT_pae_xen_l2))) )
-                    goto not_a_pt;
-
-                switch ( type_info & PGT_type_mask )
+                switch ( page->u.inuse.type_info & PGT_type_mask )
                 {
                 case PGT_l1_page_table:
                 {
@@ -3050,7 +3018,7 @@ int do_mmu_update(
                 case PGT_l2_page_table:
                 {
                     l2_pgentry_t l2e = l2e_from_intpte(req.val);
-                    okay = mod_l2_entry(va, l2e, mfn, type_info,
+                    okay = mod_l2_entry(va, l2e, mfn,
                                         cmd == MMU_PT_UPDATE_PRESERVE_AD);
                 }
                 break;
@@ -3072,31 +3040,23 @@ int do_mmu_update(
                 }
                 break;
 #endif
+                case PGT_writable_page:
+                    perfc_incr(writable_mmu_updates);
+                    okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn));
+                    break;
                 }
-
-                put_page_type(page);
+                page_unlock(page);
                 if ( rc == -EINTR )
                     rc = -EAGAIN;
             }
-            break;
-
-            default:
-            not_a_pt:
+            else if ( get_page_type(page, PGT_writable_page) )
             {
-                if ( unlikely(!get_page_type(page, PGT_writable_page)) )
-                    break;
-
                 perfc_incr(writable_mmu_updates);
-
                 okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn));
-
                 put_page_type(page);
             }
-            break;
-            }
 
             unmap_domain_page_with_cache(va, &mapcache);
-
             put_page(page);
             break;
 
@@ -3175,7 +3135,6 @@ static int create_grant_pte_mapping(
     void *va;
     unsigned long gmfn, mfn;
     struct page_info *page;
-    u32 type;
     l1_pgentry_t ol1e;
     struct domain *d = v->domain;
 
@@ -3196,21 +3155,23 @@ static int create_grant_pte_mapping(
     va = (void *)((unsigned long)va + ((unsigned long)pte_addr & ~PAGE_MASK));
     page = mfn_to_page(mfn);
 
-    type = page->u.inuse.type_info & PGT_type_mask;
-    if ( (type != PGT_l1_page_table) || !get_page_type(page, type) )
-    {
-        MEM_LOG("Grant map attempted to update a non-L1 page");
+    if ( !page_lock(page) )
+    {
         rc = GNTST_general_error;
         goto failed;
     }
 
-    page_lock(page);
+    if ( (page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
+    {
+        page_unlock(page);
+        rc = GNTST_general_error;
+        goto failed;
+    }
 
     ol1e = *(l1_pgentry_t *)va;
     if ( !UPDATE_ENTRY(l1, (l1_pgentry_t *)va, ol1e, nl1e, mfn, v, 0) )
     {
         page_unlock(page);
-        put_page_type(page);
         rc = GNTST_general_error;
         goto failed;
     } 
@@ -3220,8 +3181,6 @@ static int create_grant_pte_mapping(
     if ( !paging_mode_refcounts(d) )
         put_page_from_l1e(ol1e, d);
 
-    put_page_type(page);
- 
  failed:
     unmap_domain_page(va);
     put_page(page);
@@ -3236,7 +3195,6 @@ static int destroy_grant_pte_mapping(
     void *va;
     unsigned long gmfn, mfn;
     struct page_info *page;
-    u32 type;
     l1_pgentry_t ol1e;
 
     gmfn = addr >> PAGE_SHIFT;
@@ -3252,15 +3210,18 @@ static int destroy_grant_pte_mapping(
     va = (void *)((unsigned long)va + ((unsigned long)addr & ~PAGE_MASK));
     page = mfn_to_page(mfn);
 
-    type = page->u.inuse.type_info & PGT_type_mask;
-    if ( (type != PGT_l1_page_table) || !get_page_type(page, type) )
-    {
-        MEM_LOG("Grant map attempted to update a non-L1 page");
+    if ( !page_lock(page) )
+    {
         rc = GNTST_general_error;
         goto failed;
     }
 
-    page_lock(page);
+    if ( (page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
+    {
+        page_unlock(page);
+        rc = GNTST_general_error;
+        goto failed;
+    }
 
     ol1e = *(l1_pgentry_t *)va;
     
@@ -3270,7 +3231,6 @@ static int destroy_grant_pte_mapping(
         page_unlock(page);
         MEM_LOG("PTE entry %lx for address %"PRIx64" doesn't match frame %lx",
                 (unsigned long)l1e_get_intpte(ol1e), addr, frame);
-        put_page_type(page);
         rc = GNTST_general_error;
         goto failed;
     }
@@ -3284,13 +3244,11 @@ static int destroy_grant_pte_mapping(
     {
         page_unlock(page);
         MEM_LOG("Cannot delete PTE entry at %p", va);
-        put_page_type(page);
         rc = GNTST_general_error;
         goto failed;
     }
 
     page_unlock(page);
-    put_page_type(page);
 
  failed:
     unmap_domain_page(va);
@@ -3318,21 +3276,40 @@ static int create_grant_va_mapping(
         MEM_LOG("Could not find L1 PTE for address %lx", va);
         return GNTST_general_error;
     }
+
+    if ( !get_page_from_pagenr(gl1mfn, current->domain) )
+    {
+        guest_unmap_l1e(v, pl1e);
+        return GNTST_general_error;
+    }
+
     l1pg = mfn_to_page(gl1mfn);
-    page_lock(l1pg);
+    if ( !page_lock(l1pg) )
+    {
+        put_page(l1pg);
+        guest_unmap_l1e(v, pl1e);
+        return GNTST_general_error;
+    }
+
+    if ( (l1pg->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
+    {
+        page_unlock(l1pg);
+        put_page(l1pg);
+        guest_unmap_l1e(v, pl1e);
+        return GNTST_general_error;
+    }
+
     ol1e = *pl1e;
     okay = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0);
+
     page_unlock(l1pg);
+    put_page(l1pg);
     guest_unmap_l1e(v, pl1e);
-    pl1e = NULL;
-
-    if ( !okay )
-            return GNTST_general_error;
-
-    if ( !paging_mode_refcounts(d) )
+
+    if ( okay && !paging_mode_refcounts(d) )
         put_page_from_l1e(ol1e, d);
 
-    return GNTST_okay;
+    return okay ? GNTST_okay : GNTST_general_error;
 }
 
 static int replace_grant_va_mapping(
@@ -3350,31 +3327,48 @@ static int replace_grant_va_mapping(
         return GNTST_general_error;
     }
 
+    if ( !get_page_from_pagenr(gl1mfn, current->domain) )
+    {
+        rc = GNTST_general_error;
+        goto out;
+    }
+
     l1pg = mfn_to_page(gl1mfn);
-    page_lock(l1pg);
+    if ( !page_lock(l1pg) )
+    {
+        rc = GNTST_general_error;
+        put_page(l1pg);
+        goto out;
+    }
+
+    if ( (l1pg->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
+    {
+        rc = GNTST_general_error;
+        goto unlock_and_out;
+    }
+
     ol1e = *pl1e;
 
     /* Check that the virtual address supplied is actually mapped to frame. */
     if ( unlikely(l1e_get_pfn(ol1e) != frame) )
     {
-        page_unlock(l1pg);
         MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx",
                 l1e_get_pfn(ol1e), addr, frame);
         rc = GNTST_general_error;
-        goto out;
+        goto unlock_and_out;
     }
 
     /* Delete pagetable entry. */
     if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0)) )
     {
-        page_unlock(l1pg);
         MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
         rc = GNTST_general_error;
-        goto out;
-    }
-
+        goto unlock_and_out;
+    }
+
+ unlock_and_out:
     page_unlock(l1pg);
-
+    put_page(l1pg);
  out:
     guest_unmap_l1e(v, pl1e);
     return rc;
@@ -3436,20 +3430,42 @@ int replace_grant_host_mapping(
         return GNTST_general_error;
     }
 
+    if ( !get_page_from_pagenr(gl1mfn, current->domain) )
+    {
+        guest_unmap_l1e(curr, pl1e);
+        return GNTST_general_error;
+    }
+
     l1pg = mfn_to_page(gl1mfn);
-    page_lock(l1pg);
+    if ( !page_lock(l1pg) )
+    {
+        put_page(l1pg);
+        guest_unmap_l1e(curr, pl1e);
+        return GNTST_general_error;
+    }
+
+    if ( (l1pg->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
+    {
+        page_unlock(l1pg);
+        put_page(l1pg);
+        guest_unmap_l1e(curr, pl1e);
+        return GNTST_general_error;
+    }
+
     ol1e = *pl1e;
 
     if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, l1e_empty(),
                                 gl1mfn, curr, 0)) )
     {
         page_unlock(l1pg);
+        put_page(l1pg);
         MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
         guest_unmap_l1e(curr, pl1e);
         return GNTST_general_error;
     }
 
     page_unlock(l1pg);
+    put_page(l1pg);
     guest_unmap_l1e(curr, pl1e);
 
     rc = replace_grant_va_mapping(addr, frame, ol1e, curr);
@@ -3462,49 +3478,47 @@ int steal_page(
 int steal_page(
     struct domain *d, struct page_info *page, unsigned int memflags)
 {
-    u32 _d, _nd, x, y;
+    unsigned long x, y;
 
     spin_lock(&d->page_alloc_lock);
 
+    if ( is_xen_heap_page(page) || (page_get_owner(page) != d) )
+        goto fail;
+
     /*
-     * The tricky bit: atomically release ownership while there is just one 
-     * benign reference to the page (PGC_allocated). If that reference 
-     * disappears then the deallocation routine will safely spin.
+     * We require there is just one reference (PGC_allocated). We temporarily
+     * drop this reference now so that we can safely swizzle the owner.
      */
-    _d  = pickle_domptr(d);
-    _nd = page->u.inuse._domain;
-    y   = page->count_info;
+    y = page->count_info;
     do {
         x = y;
-        if ( unlikely((x & (PGC_count_mask|PGC_allocated)) !=
-                      (1 | PGC_allocated)) || unlikely(_nd != _d) )
-        { 
-            MEM_LOG("gnttab_transfer: Bad page %p: ed=%p(%u), sd=%p,"
-                    " caf=%08x, taf=%" PRtype_info "\n", 
-                    (void *) page_to_mfn(page),
-                    d, d->domain_id, unpickle_domptr(_nd), x, 
-                    page->u.inuse.type_info);
-            spin_unlock(&d->page_alloc_lock);
-            return -1;
-        }
-        asm volatile (
-            LOCK_PREFIX "cmpxchg8b %2"
-            : "=d" (_nd), "=a" (y),
-            "=m" (*(volatile u64 *)(&page->count_info))
-            : "0" (_d), "1" (x), "c" (NULL), "b" (x) );
-    } while (unlikely(_nd != _d) || unlikely(y != x));
-
-    /*
-     * Unlink from 'd'. At least one reference remains (now anonymous), so 
-     * noone else is spinning to try to delete this page from 'd'.
-     */
+        if ( (x & (PGC_count_mask|PGC_allocated)) != (1 | PGC_allocated) )
+            goto fail;
+        y = cmpxchg(&page->count_info, x, x & ~PGC_count_mask);
+    } while ( y != x );
+
+    /* Swizzle the owner then reinstate the PGC_allocated reference. */
+    page_set_owner(page, NULL);
+    y = page->count_info;
+    do {
+        x = y;
+        BUG_ON((x & (PGC_count_mask|PGC_allocated)) != PGC_allocated);
+    } while ( (y = cmpxchg(&page->count_info, x, x | 1)) != x );
+
+    /* Unlink from original owner. */
     if ( !(memflags & MEMF_no_refcount) )
         d->tot_pages--;
     list_del(&page->list);
 
     spin_unlock(&d->page_alloc_lock);
-
     return 0;
+
+ fail:
+    spin_unlock(&d->page_alloc_lock);
+    MEM_LOG("Bad page %p: ed=%p(%u), sd=%p, caf=%08lx, taf=%" PRtype_info,
+            (void *)page_to_mfn(page), d, d->domain_id,
+            page_get_owner(page), page->count_info, page->u.inuse.type_info);
+    return -1;
 }
 
 int do_update_va_mapping(unsigned long va, u64 val64,
@@ -3513,28 +3527,45 @@ int do_update_va_mapping(unsigned long v
     l1_pgentry_t   val = l1e_from_intpte(val64);
     struct vcpu   *v   = current;
     struct domain *d   = v->domain;
+    struct page_info *gl1pg;
     l1_pgentry_t  *pl1e;
     unsigned long  vmask, bmap_ptr, gl1mfn;
     cpumask_t      pmask;
-    int            rc  = 0;
+    int            rc;
 
     perfc_incr(calls_to_update_va);
-
-    if ( unlikely(!access_ok(va, 1) && !paging_mode_external(d)) )
-        return -EINVAL;
 
     rc = xsm_update_va_mapping(d, FOREIGNDOM, val);
     if ( rc )
         return rc;
 
+    rc = -EINVAL;
     pl1e = guest_map_l1e(v, va, &gl1mfn);
-
-    if ( unlikely(!pl1e || !mod_l1_entry(pl1e, val, gl1mfn, 0)) )
-        rc = -EINVAL;
-
+    if ( unlikely(!pl1e || !get_page_from_pagenr(gl1mfn, d)) )
+        goto out;
+
+    gl1pg = mfn_to_page(gl1mfn);
+    if ( !page_lock(gl1pg) )
+    {
+        put_page(gl1pg);
+        goto out;
+    }
+
+    if ( (gl1pg->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
+    {
+        page_unlock(gl1pg);
+        put_page(gl1pg);
+        goto out;
+    }
+
+    rc = mod_l1_entry(pl1e, val, gl1mfn, 0) ? 0 : -EINVAL;
+
+    page_unlock(gl1pg);
+    put_page(gl1pg);
+
+ out:
     if ( pl1e )
         guest_unmap_l1e(v, pl1e);
-    pl1e = NULL;
 
     process_deferred_ops();
 
@@ -3793,14 +3824,13 @@ long arch_memory_op(int op, XEN_GUEST_HA
 
             spin_unlock(&d->grant_table->lock);
             break;
-        case XENMAPSPACE_mfn:
-        {
-            if ( get_page_from_pagenr(xatp.idx, d) ) {
-                mfn = xatp.idx;
-                page = mfn_to_page(mfn);
-            }
+        case XENMAPSPACE_gmfn:
+            xatp.idx = gmfn_to_mfn(d, xatp.idx);
+            if ( !get_page_from_pagenr(xatp.idx, d) )
+                break;
+            mfn = xatp.idx;
+            page = mfn_to_page(mfn);
             break;
-        }
         default:
             break;
         }
@@ -3839,39 +3869,6 @@ long arch_memory_op(int op, XEN_GUEST_HA
 
         if ( page )
             put_page(page);
-
-        rcu_unlock_domain(d);
-
-        break;
-    }
-
-    case XENMEM_remove_from_physmap:
-    {
-        struct xen_remove_from_physmap xrfp;
-        unsigned long mfn;
-        struct domain *d;
-
-        if ( copy_from_guest(&xrfp, arg, 1) )
-            return -EFAULT;
-
-        rc = rcu_lock_target_domain_by_id(xrfp.domid, &d);
-        if ( rc != 0 )
-            return rc;
-
-        if ( xsm_remove_from_physmap(current->domain, d) )
-        {
-            rcu_unlock_domain(d);
-            return -EPERM;
-        }
-
-        domain_lock(d);
-
-        mfn = gmfn_to_mfn(d, xrfp.gpfn);
-
-        if ( mfn_valid(mfn) )
-            guest_physmap_remove_page(d, xrfp.gpfn, mfn, 0);
-
-        domain_unlock(d);
 
         rcu_unlock_domain(d);
 
@@ -4245,15 +4242,25 @@ int ptwr_do_page_fault(struct vcpu *v, u
 
     /* Attempt to read the PTE that maps the VA being accessed. */
     guest_get_eff_l1e(v, addr, &pte);
-    page = l1e_get_page(pte);
 
     /* We are looking only for read-only mappings of p.t. pages. */
     if ( ((l1e_get_flags(pte) & (_PAGE_PRESENT|_PAGE_RW)) != _PAGE_PRESENT) ||
-         !mfn_valid(l1e_get_pfn(pte)) ||
-         ((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) ||
-         ((page->u.inuse.type_info & PGT_count_mask) == 0) ||
-         (page_get_owner(page) != d) )
+         !get_page_from_pagenr(l1e_get_pfn(pte), d) )
         goto bail;
+
+    page = l1e_get_page(pte);
+    if ( !page_lock(page) )
+    {
+        put_page(page);
+        goto bail;
+    }
+
+    if ( (page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
+    {
+        page_unlock(page);
+        put_page(page);
+        goto bail;
+    }
 
     ptwr_ctxt.ctxt.regs = regs;
     ptwr_ctxt.ctxt.force_writeback = 0;
@@ -4262,9 +4269,11 @@ int ptwr_do_page_fault(struct vcpu *v, u
     ptwr_ctxt.cr2 = addr;
     ptwr_ctxt.pte = pte;
 
-    page_lock(page);
     rc = x86_emulate(&ptwr_ctxt.ctxt, &ptwr_emulate_ops);
+
     page_unlock(page);
+    put_page(page);
+
     if ( rc == X86EMUL_UNHANDLEABLE )
         goto bail;
 
@@ -4741,12 +4750,18 @@ void memguard_init(void)
 void memguard_init(void)
 {
     unsigned long start = max_t(unsigned long, xen_phys_start, 1UL << 20);
+#ifdef __i386__
     map_pages_to_xen(
         (unsigned long)__va(start),
         start >> PAGE_SHIFT,
         (xenheap_phys_end - start) >> PAGE_SHIFT,
         __PAGE_HYPERVISOR|MAP_SMALL_PAGES);
-#ifdef __x86_64__
+#else
+    map_pages_to_xen(
+        (unsigned long)__va(start),
+        start >> PAGE_SHIFT,
+        (__pa(&_end) + PAGE_SIZE - 1 - start) >> PAGE_SHIFT,
+        __PAGE_HYPERVISOR|MAP_SMALL_PAGES);
     BUG_ON(start != xen_phys_start);
     map_pages_to_xen(
         XEN_VIRT_START,
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/mm/Makefile
--- a/xen/arch/x86/mm/Makefile  Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/mm/Makefile  Wed Jan 28 13:06:45 2009 +0900
@@ -7,5 +7,5 @@ obj-y += guest_walk_3.o
 obj-y += guest_walk_3.o
 obj-$(x86_64) += guest_walk_4.o
 
-guest_walk_%.o: guest_walk.c $(HDRS) Makefile
+guest_walk_%.o: guest_walk.c Makefile
        $(CC) $(CFLAGS) -DGUEST_PAGING_LEVELS=$* -c $< -o $@
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/mm/hap/Makefile
--- a/xen/arch/x86/mm/hap/Makefile      Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/mm/hap/Makefile      Wed Jan 28 13:06:45 2009 +0900
@@ -7,5 +7,5 @@ guest_levels  = $(subst level,,$(filter 
 guest_levels  = $(subst level,,$(filter %level,$(subst ., ,$(subst _, ,$(1)))))
 guest_walk_defns = -DGUEST_PAGING_LEVELS=$(call guest_levels,$(1))
 
-guest_walk_%level.o: guest_walk.c $(HDRS) Makefile
+guest_walk_%level.o: guest_walk.c Makefile
        $(CC) $(CFLAGS) $(call guest_walk_defns,$(@F)) -c $< -o $@
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/mm/hap/hap.c
--- a/xen/arch/x86/mm/hap/hap.c Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/mm/hap/hap.c Wed Jan 28 13:06:45 2009 +0900
@@ -166,7 +166,7 @@ void hap_free_p2m_page(struct domain *d,
     ASSERT(page_get_owner(pg) == d);
     /* Should have just the one ref we gave it in alloc_p2m_page() */
     if ( (pg->count_info & PGC_count_mask) != 1 )
-        HAP_ERROR("Odd p2m page count c=%#x t=%"PRtype_info"\n",
+        HAP_ERROR("Odd p2m page count c=%#lx t=%"PRtype_info"\n",
                   pg->count_info, pg->u.inuse.type_info);
     pg->count_info = 0;
     /* Free should not decrement domain's total allocation, since
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/mm/hap/p2m-ept.c
--- a/xen/arch/x86/mm/hap/p2m-ept.c     Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/mm/hap/p2m-ept.c     Wed Jan 28 13:06:45 2009 +0900
@@ -66,6 +66,7 @@ static int ept_set_middle_entry(struct d
     list_add_tail(&pg->list, &d->arch.p2m->pages);
 
     ept_entry->emt = 0;
+    ept_entry->igmt = 0;
     ept_entry->sp_avail = 0;
     ept_entry->avail1 = 0;
     ept_entry->mfn = page_to_mfn(pg);
@@ -114,9 +115,13 @@ static int ept_next_level(struct domain 
     }
 }
 
+/*
+ * TODO: ept_set_entry() computes 'need_modify_vtd_table' for itself,
+ * by observing whether any gfn->mfn translations are modified.
+ */
 static int
-ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, 
-              unsigned int order, p2m_type_t p2mt)
+_ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, 
+              unsigned int order, p2m_type_t p2mt, int need_modify_vtd_table)
 {
     ept_entry_t *table = NULL;
     unsigned long gfn_remainder = gfn, offset = 0;
@@ -124,6 +129,8 @@ ept_set_entry(struct domain *d, unsigned
     u32 index;
     int i, rv = 0, ret = 0;
     int walk_level = order / EPT_TABLE_ORDER;
+    int direct_mmio = (p2mt == p2m_mmio_direct);
+    uint8_t igmt = 0;
 
     /* we only support 4k and 2m pages now */
 
@@ -157,7 +164,9 @@ ept_set_entry(struct domain *d, unsigned
     {
         if ( mfn_valid(mfn_x(mfn)) || (p2mt == p2m_mmio_direct) )
         {
-            ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn));
+            ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn),
+                                &igmt, direct_mmio);
+            ept_entry->igmt = igmt;
             ept_entry->sp_avail = walk_level ? 1 : 0;
 
             if ( ret == GUEST_TABLE_SUPER_PAGE )
@@ -208,7 +217,10 @@ ept_set_entry(struct domain *d, unsigned
         {
             split_ept_entry = split_table + i;
             split_ept_entry->emt = epte_get_entry_emt(d,
-                                        gfn-offset+i, split_mfn+i);
+                                        gfn-offset+i, split_mfn+i, 
+                                        &igmt, direct_mmio);
+            split_ept_entry->igmt = igmt;
+
             split_ept_entry->sp_avail =  0;
 
             split_ept_entry->mfn = split_mfn+i;
@@ -223,7 +235,10 @@ ept_set_entry(struct domain *d, unsigned
 
         /* Set the destinated 4k page as normal */
         split_ept_entry = split_table + offset;
-        split_ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn));
+        split_ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn), 
+                                                &igmt, direct_mmio);
+        split_ept_entry->igmt = igmt;
+
         split_ept_entry->mfn = mfn_x(mfn);
         split_ept_entry->avail1 = p2mt;
         ept_p2m_type_to_flags(split_ept_entry, p2mt);
@@ -246,7 +261,8 @@ out:
 
     /* Now the p2m table is not shared with vt-d page table */
 
-    if ( iommu_enabled && is_hvm_domain(d) )
+    if ( iommu_enabled && is_hvm_domain(d)  
+             && need_modify_vtd_table )
     {
         if ( p2mt == p2m_ram_rw )
         {
@@ -271,6 +287,17 @@ out:
     }
 
     return rv;
+}
+
+static int
+ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
+              unsigned int order, p2m_type_t p2mt)
+{
+    /* ept_set_entry() are called from set_entry(),
+     * We should always create VT-d page table acording 
+     * to the gfn to mfn translations changes.
+     */
+    return _ept_set_entry(d, gfn, mfn, order, p2mt, 1); 
 }
 
 /* Read ept p2m entries */
@@ -395,18 +422,30 @@ void ept_change_entry_emt_with_range(str
                  * Set emt for super page.
                  */
                 order = EPT_TABLE_ORDER;
-                ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
+                /* vmx_set_uc_mode() dont' touch the gfn to mfn
+                 * translations, only modify the emt field of the EPT entries.
+                 * so we need not modify the current VT-d page tables.
+                 */
+                _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt, 0);
                 gfn += 0x1FF;
             }
             else
             {
-                /* change emt for partial entries of the 2m area */
-                ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
+                /* 1)change emt for partial entries of the 2m area.
+                 * 2)vmx_set_uc_mode() dont' touch the gfn to mfn
+                 * translations, only modify the emt field of the EPT entries.
+                 * so we need not modify the current VT-d page tables.
+                 */
+                _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt,0);
                 gfn = ((gfn >> EPT_TABLE_ORDER) << EPT_TABLE_ORDER) + 0x1FF;
             }
         }
-        else /* gfn assigned with 4k */
-            ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
+        else /* 1)gfn assigned with 4k
+              * 2)vmx_set_uc_mode() dont' touch the gfn to mfn
+              * translations, only modify the emt field of the EPT entries.
+              * so we need not modify the current VT-d page tables.
+             */
+            _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt, 0);
     }
 }
 

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.