[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg



# HG changeset patch
# User Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
# Date 1218077854 -32400
# Node ID a39913db6e51d060e312c27ef46d1442390bbf67
# Parent  7affdebb7a1ed4f80fb46c679f25c642b1a67034
# Parent  eff5fcfa69bc601f1872a808d7a2134c460f2135
merge with xen-unstable.hg
---
 tools/examples/stubdom-ExampleHVMDomain                    |   14 
 .hgignore                                                  |   20 
 .hgtags                                                    |    2 
 Config.mk                                                  |    6 
 Makefile                                                   |    2 
 README                                                     |   22 
 docs/Docs.mk                                               |    4 
 docs/Makefile                                              |   16 
 docs/misc/vtd.txt                                          |    9 
 docs/src/user.tex                                          |    8 
 docs/xen-api/Makefile                                      |    8 
 docs/xen-api/coversheet.tex                                |    4 
 docs/xen-api/revision-history.tex                          |    9 
 docs/xen-api/xenapi-coversheet.tex                         |    4 
 docs/xen-api/xenapi-datamodel-graph.dot                    |    7 
 docs/xen-api/xenapi-datamodel.tex                          | 1259 +++++++++++++
 extras/mini-os/fs-front.c                                  |  266 ++
 extras/mini-os/gntmap.c                                    |  252 ++
 extras/mini-os/include/fs.h                                |    5 
 extras/mini-os/include/gntmap.h                            |   35 
 extras/mini-os/include/lib.h                               |    3 
 extras/mini-os/lib/sys.c                                   |   12 
 extras/mini-os/minios.mk                                   |    1 
 extras/mini-os/pcifront.c                                  |   13 
 stubdom/Makefile                                           |   38 
 stubdom/README                                             |   18 
 stubdom/stubdom-dm                                         |    6 
 tools/Makefile                                             |   15 
 tools/blktap/Makefile                                      |    3 
 tools/blktap/lib/Makefile                                  |    2 
 tools/console/Makefile                                     |    2 
 tools/console/daemon/io.c                                  |   14 
 tools/examples/Makefile                                    |   18 
 tools/examples/README                                      |    4 
 tools/examples/xend-config.sxp                             |    4 
 tools/examples/xmexample.hvm                               |    5 
 tools/examples/xmexample.hvm-dm                            |   14 
 tools/examples/xmexample.hvm-stubdom                       |    4 
 tools/examples/xmexample.pv-grub                           |  212 ++
 tools/examples/xmexample.vti                               |    5 
 tools/examples/xmexample3                                  |   24 
 tools/firmware/extboot/Makefile                            |    2 
 tools/firmware/hvmloader/Makefile                          |    8 
 tools/firmware/hvmloader/acpi/acpi2_0.h                    |    2 
 tools/firmware/hvmloader/acpi/build.c                      |   41 
 tools/firmware/hvmloader/hvmloader.c                       |    9 
 tools/firmware/hvmloader/util.h                            |    2 
 tools/firmware/rombios/32bit/32bitbios.c                   |    2 
 tools/firmware/rombios/32bit/Makefile                      |    9 
 tools/firmware/rombios/32bit/tcgbios/Makefile              |    2 
 tools/firmware/rombios/32bit/tcgbios/tcgbios.c             |   47 
 tools/firmware/rombios/32bit/tcgbios/tcgbios.h             |   41 
 tools/firmware/rombios/32bit/util.c                        |   72 
 tools/firmware/rombios/32bit/util.h                        |    3 
 tools/firmware/rombios/32bitgateway.c                      |   12 
 tools/firmware/rombios/32bitprotos.h                       |    6 
 tools/firmware/rombios/Makefile                            |    3 
 tools/firmware/rombios/rombios.c                           |  154 -
 tools/fs-back/fs-backend.c                                 |   36 
 tools/fs-back/fs-backend.h                                 |   17 
 tools/fs-back/fs-ops.c                                     |  143 -
 tools/fs-back/fs-xenbus.c                                  |   19 
 tools/include/xen-sys/MiniOS/privcmd.h                     |    2 
 tools/ioemu/block-vbd.c                                    |    4 
 tools/ioemu/hw/pass-through.c                              |  329 +--
 tools/ioemu/hw/pass-through.h                              |    8 
 tools/ioemu/hw/pc.c                                        |   19 
 tools/ioemu/hw/pci.c                                       |    7 
 tools/ioemu/hw/pt-msi.c                                    |    6 
 tools/ioemu/hw/serial.c                                    |    1 
 tools/ioemu/hw/vga.c                                       |    4 
 tools/ioemu/vl.c                                           |    2 
 tools/libaio/src/Makefile                                  |    2 
 tools/libxc/xc_dom_boot.c                                  |   30 
 tools/libxc/xc_domain_save.c                               |   54 
 tools/libxc/xc_hvm_build.c                                 |   34 
 tools/libxc/xc_linux.c                                     |   35 
 tools/libxc/xc_minios.c                                    |  150 +
 tools/libxc/xc_netbsd.c                                    |   53 
 tools/libxc/xc_physdev.c                                   |    6 
 tools/libxc/xc_private.h                                   |    5 
 tools/libxc/xc_solaris.c                                   |   35 
 tools/libxc/xenctrl.h                                      |    2 
 tools/pygrub/src/pygrub                                    |    2 
 tools/python/xen/lowlevel/xc/xc.c                          |    3 
 tools/python/xen/util/pci.py                               |  116 -
 tools/python/xen/util/utils.py                             |   44 
 tools/python/xen/xend/XendDomainInfo.py                    |   30 
 tools/python/xen/xend/image.py                             |   10 
 tools/python/xen/xend/server/pciif.py                      |   43 
 tools/python/xen/xm/console.py                             |   63 
 tools/python/xen/xm/create.dtd                             |    8 
 tools/python/xen/xm/create.py                              |   65 
 tools/python/xen/xm/main.py                                |  130 +
 tools/python/xen/xm/shutdown.py                            |    5 
 tools/python/xen/xm/xenapi_create.py                       |   81 
 tools/xenmon/Makefile                                      |    2 
 tools/xenstat/libxenstat/Makefile                          |    2 
 tools/xenstat/libxenstat/src/xenstat.c                     |    2 
 unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c |    4 
 xen/Makefile                                               |    6 
 xen/arch/ia64/xen/domain.c                                 |    5 
 xen/arch/ia64/xen/irq.c                                    |    3 
 xen/arch/x86/acpi/cpufreq/utility.c                        |   10 
 xen/arch/x86/acpi/pmstat.c                                 |    6 
 xen/arch/x86/domain.c                                      |    1 
 xen/arch/x86/domain_build.c                                |    1 
 xen/arch/x86/domctl.c                                      |   15 
 xen/arch/x86/hvm/io.c                                      |    2 
 xen/arch/x86/hvm/stdvga.c                                  |   34 
 xen/arch/x86/hvm/svm/svm.c                                 |    7 
 xen/arch/x86/hvm/vmx/vmx.c                                 |    2 
 xen/arch/x86/io_apic.c                                     |    8 
 xen/arch/x86/irq.c                                         |    3 
 xen/arch/x86/mm.c                                          |   38 
 xen/arch/x86/mm/shadow/multi.c                             |   23 
 xen/arch/x86/msi.c                                         |   17 
 xen/arch/x86/numa.c                                        |   26 
 xen/arch/x86/physdev.c                                     |    9 
 xen/arch/x86/platform_hypercall.c                          |   31 
 xen/arch/x86/setup.c                                       |   12 
 xen/arch/x86/shutdown.c                                    |   13 
 xen/arch/x86/time.c                                        |  180 -
 xen/arch/x86/x86_64/physdev.c                              |    9 
 xen/arch/x86/x86_emulate/x86_emulate.c                     |    2 
 xen/common/compat/grant_table.c                            |    1 
 xen/common/domain.c                                        |    4 
 xen/common/event_channel.c                                 |   57 
 xen/common/keyhandler.c                                    |   58 
 xen/common/page_alloc.c                                    |   93 
 xen/common/shutdown.c                                      |    5 
 xen/drivers/char/console.c                                 |    3 
 xen/drivers/passthrough/amd/iommu_acpi.c                   |  233 +-
 xen/drivers/passthrough/amd/iommu_detect.c                 |  227 --
 xen/drivers/passthrough/amd/iommu_init.c                   |  236 ++
 xen/drivers/passthrough/amd/iommu_intr.c                   |   17 
 xen/drivers/passthrough/amd/iommu_map.c                    |    2 
 xen/drivers/passthrough/amd/pci_amd_iommu.c                |  289 --
 xen/drivers/passthrough/iommu.c                            |   72 
 xen/drivers/passthrough/pci.c                              |  118 -
 xen/drivers/passthrough/vtd/dmar.c                         |   57 
 xen/drivers/passthrough/vtd/extern.h                       |    1 
 xen/drivers/passthrough/vtd/iommu.c                        |  152 -
 xen/drivers/passthrough/vtd/utils.c                        |  136 +
 xen/include/acpi/cpufreq/processor_perf.h                  |    2 
 xen/include/asm-ia64/config.h                              |    2 
 xen/include/asm-x86/amd-iommu.h                            |    8 
 xen/include/asm-x86/config.h                               |    2 
 xen/include/asm-x86/hvm/svm/amd-iommu-proto.h              |   27 
 xen/include/asm-x86/io_apic.h                              |    2 
 xen/include/asm-x86/p2m.h                                  |    2 
 xen/include/public/io/fsif.h                               |   24 
 xen/include/public/xen.h                                   |    1 
 xen/include/xen/iommu.h                                    |    2 
 xen/include/xen/irq.h                                      |    2 
 xen/include/xen/sched.h                                    |    4 
 xen/include/xen/shutdown.h                                 |    2 
 xen/include/xen/spinlock.h                                 |    4 
 158 files changed, 4775 insertions(+), 1930 deletions(-)

diff -r 7affdebb7a1e -r a39913db6e51 .hgignore
--- a/.hgignore Thu Aug 07 11:47:34 2008 +0900
+++ b/.hgignore Thu Aug 07 11:57:34 2008 +0900
@@ -21,8 +21,7 @@
 ^[^/]*\.bz2$
 ^\.config$
 ^\.pc
-^TAGS$
-^tags$
+(^|/)(tags|TAGS)$
 ^build-.*$
 ^dist/.*$
 ^docs/.*\.aux$
@@ -60,10 +59,13 @@
 ^docs/xen-api/vm_lifecycle.eps$
 ^docs/xen-api/xenapi-datamodel-graph.eps$
 ^docs/xen-api/xenapi.out$
-^extras/mini-os/h/hypervisor-ifs$
-^extras/mini-os/h/xen-public$
+^extras/mini-os/arch/ia64/gen_off.s$
+^extras/mini-os/include/mini-os$
+^extras/mini-os/include/ia64/mini-os$
+^extras/mini-os/include/ia64/offsets.h$
+^extras/mini-os/include/x86/mini-os$
+^extras/mini-os/include/xen$
 ^extras/mini-os/mini-os.*$
-^extras/mini-os/*-stubdom.*$
 ^install/.*$
 ^linux-[^/]*-paravirt/.*$
 ^linux-2.6[^/]*/.*$
@@ -91,13 +93,17 @@
 ^stubdom/libxc$
 ^stubdom/lwip-.*$
 ^stubdom/mini-os-.*$
+^stubdom/mk-headers$
 ^stubdom/newlib-.*$
 ^stubdom/pciutils-.*$
 ^stubdom/zlib-.*$
 ^stubdom/grub-cvs$
 ^stubdom/grub/stage2$
 ^stubdom/grub/netboot$
-^tools/.*/TAGS$
+^stubdom/grub/dirs$
+^stubdom/lwip/
+^stubdom/ioemu/
+^stubdom/grub-upstream/
 ^tools/.*/build/lib.*/.*\.py$
 ^tools/blktap/Makefile\.smh$
 ^tools/blktap/drivers/blktapctrl$
@@ -253,7 +259,6 @@
 ^xen/\.banner.*$
 ^xen/BLOG$
 ^xen/System.map$
-^xen/TAGS$
 ^xen/arch/x86/asm-offsets\.s$
 ^xen/arch/x86/boot/mkelf32$
 ^xen/arch/x86/xen\.lds$
@@ -271,7 +276,6 @@
 ^xen/include/xen/acm_policy\.h$
 ^xen/include/xen/banner\.h$
 ^xen/include/xen/compile\.h$
-^xen/tags$
 ^xen/tools/figlet/figlet$
 ^xen/tools/symbols$
 ^xen/xen$
diff -r 7affdebb7a1e -r a39913db6e51 .hgtags
--- a/.hgtags   Thu Aug 07 11:47:34 2008 +0900
+++ b/.hgtags   Thu Aug 07 11:57:34 2008 +0900
@@ -25,3 +25,5 @@ c5deb251b9dcece9e466a48a66d3528ca1797db4
 c5deb251b9dcece9e466a48a66d3528ca1797db4 3.2.0-rc4
 36bb2ab4722733d919d32e4555eb46cc6a06cb8f 3.2.0-rc5
 9facc624a238f2b9437b07fa28ff65884aa867f2 3.2.0-rc6
+c3494402098e26507fc61a6579832c0149351d6a 3.3.0-rc1
+dde12ff94c96331668fe38a7b09506fa94d03c34 3.3.0-rc2
diff -r 7affdebb7a1e -r a39913db6e51 Config.mk
--- a/Config.mk Thu Aug 07 11:47:34 2008 +0900
+++ b/Config.mk Thu Aug 07 11:57:34 2008 +0900
@@ -19,6 +19,8 @@ HOSTCFLAGS += -fno-strict-aliasing
 
 DISTDIR     ?= $(XEN_ROOT)/dist
 DESTDIR     ?= /
+DOCDIR      ?= /usr/share/doc/xen
+MANDIR      ?= /usr/share/man
 
 # Allow phony attribute to be listed as dependency rather than fake target
 .PHONY: .phony
@@ -84,7 +86,11 @@ QEMU_REMOTE=http://xenbits.xensource.com
 # Mercurial in-tree version, or a local directory, or a git URL.
 # CONFIG_QEMU   ?= ioemu
 # CONFIG_QEMU   ?= ../qemu-xen.git
+ifeq ($(XEN_TARGET_ARCH),ia64)
+CONFIG_QEMU   ?= ioemu
+else
 CONFIG_QEMU   ?= $(QEMU_REMOTE)
+endif
 
 # Optional components
 XENSTAT_XENTOP     ?= y
diff -r 7affdebb7a1e -r a39913db6e51 Makefile
--- a/Makefile  Thu Aug 07 11:47:34 2008 +0900
+++ b/Makefile  Thu Aug 07 11:57:34 2008 +0900
@@ -149,7 +149,7 @@ help:
        @echo '                     trees then make dist'
        @echo '  xen              - build and install Xen hypervisor'
        @echo '  tools            - build and install tools'
-       @echo '  stubdomain       - build and install the stubdomain images'
+       @echo '  stubdom          - build and install the stubdomain images'
        @echo '  kernels          - build and install guest kernels'
        @echo '  kbuild           - synonym for make kernels'
        @echo '  docs             - build and install user documentation'
diff -r 7affdebb7a1e -r a39913db6e51 README
--- a/README    Thu Aug 07 11:47:34 2008 +0900
+++ b/README    Thu Aug 07 11:57:34 2008 +0900
@@ -1,10 +1,10 @@
 #################################
- __  __            _____  _____  
- \ \/ /___ _ __   |___ / |___ /  
-  \  // _ \ '_ \    |_ \   |_ \  
-  /  \  __/ | | |  ___) | ___) | 
- /_/\_\___|_| |_| |____(_)____/  
-                                 
+ __  __            _  _    ___  
+ \ \/ /___ _ __   | || |  / _ \ 
+  \  // _ \ '_ \  | || |_| | | |
+  /  \  __/ | | | |__   _| |_| |
+ /_/\_\___|_| |_|    |_|(_)___/ 
+                                
 #################################
 
 http://www.xen.org/
@@ -21,7 +21,7 @@ by the original Xen development team to 
 by the original Xen development team to build enterprise products
 around Xen.
 
-The 3.3 release offers excellent performance, hardware support and
+The 4.0 release offers excellent performance, hardware support and
 enterprise-grade features such as x86_32-PAE, x86_64, SMP guests and
 live relocation of VMs. Ports to Linux 2.6, Linux 2.4, NetBSD, FreeBSD
 and Solaris are available from the community.
@@ -54,8 +54,8 @@ 2. Configure your bootloader to boot Xen
    /boot/grub/menu.lst: edit this file to include an entry like the
    following:
 
-    title Xen 3.3 / XenLinux 2.6
-       kernel /boot/xen-3.3.gz console=vga
+    title Xen 4.0 / XenLinux 2.6
+       kernel /boot/xen-4.0.gz console=vga
        module /boot/vmlinuz-2.6-xen root=<root-dev> ro console=tty0
        module /boot/initrd-2.6-xen.img
 
@@ -74,7 +74,7 @@ 2. Configure your bootloader to boot Xen
    32MB memory for internal use, which is not available for allocation
    to virtual machines.
 
-3. Reboot your system and select the "Xen 3.3 / XenLinux 2.6" menu
+3. Reboot your system and select the "Xen 4.0 / XenLinux 2.6" menu
    option. After booting Xen, Linux will start and your initialisation
    scripts should execute in the usual way.
 
@@ -224,6 +224,6 @@ to modify grub.conf to use tboot to laun
 to modify grub.conf to use tboot to launch Xen.
 
 There are optional targets as part of Xen's top-level makefile that will
-downlaod and build tboot: install-tboot, build-tboot, dist-tboot, clean-tboot.
+download and build tboot: install-tboot, build-tboot, dist-tboot, clean-tboot.
 These will download the latest tar file from the SourceForge site using wget,
 then build/install/dist according to Xen's settings.
diff -r 7affdebb7a1e -r a39913db6e51 docs/Docs.mk
--- a/docs/Docs.mk      Thu Aug 07 11:47:34 2008 +0900
+++ b/docs/Docs.mk      Thu Aug 07 11:57:34 2008 +0900
@@ -7,7 +7,3 @@ POD2MAN         := pod2man
 POD2MAN                := pod2man
 DOT            := dot
 NEATO          := neato
-
-pkgdocdir      := /usr/share/doc/xen
-mandir         := /usr/share/man
-
diff -r 7affdebb7a1e -r a39913db6e51 docs/Makefile
--- a/docs/Makefile     Thu Aug 07 11:47:34 2008 +0900
+++ b/docs/Makefile     Thu Aug 07 11:57:34 2008 +0900
@@ -80,17 +80,17 @@ distclean: clean
 
 .PHONY: install
 install: all
-       rm -rf $(DESTDIR)$(pkgdocdir)
-       $(INSTALL_DIR) $(DESTDIR)$(pkgdocdir)
+       rm -rf $(DESTDIR)$(DOCDIR)
+       $(INSTALL_DIR) $(DESTDIR)$(DOCDIR)
 
        $(MAKE) -C xen-api install
 
-       cp -dR ps $(DESTDIR)$(pkgdocdir)
-       cp -dR pdf $(DESTDIR)$(pkgdocdir)
-       $(INSTALL_DIR) $(DESTDIR)$(mandir)
-       cp -dR man1 $(DESTDIR)$(mandir)
-       cp -dR man5 $(DESTDIR)$(mandir)
-       [ ! -d html ] || cp -dR html $(DESTDIR)$(pkgdocdir)
+       cp -dR ps $(DESTDIR)$(DOCDIR)
+       cp -dR pdf $(DESTDIR)$(DOCDIR)
+       $(INSTALL_DIR) $(DESTDIR)$(MANDIR)
+       cp -dR man1 $(DESTDIR)$(MANDIR)
+       cp -dR man5 $(DESTDIR)$(MANDIR)
+       [ ! -d html ] || cp -dR html $(DESTDIR)$(DOCDIR)
 
 pdf/%.pdf: ps/%.ps
        $(INSTALL_DIR) $(@D)
diff -r 7affdebb7a1e -r a39913db6e51 docs/misc/vtd.txt
--- a/docs/misc/vtd.txt Thu Aug 07 11:47:34 2008 +0900
+++ b/docs/misc/vtd.txt Thu Aug 07 11:57:34 2008 +0900
@@ -2,7 +2,7 @@ Authors : Allen Kay    <allen.m.kay@inte
 Authors : Allen Kay    <allen.m.kay@xxxxxxxxx>
           Weidong Han  <weidong.han@xxxxxxxxx>
 Created : October-24-2007
-Updated : May-07-2008
+Updated : August-06-2008
 
 How to turn on VT-d in Xen
 --------------------------
@@ -21,7 +21,7 @@ 11) "hide" pci device from dom0 as follo
 
 title Xen-Fedora Core (2.6.18-xen)
         root (hd0,0)
-        kernel /boot/xen.gz com1=115200,8n1 console=com1
+        kernel /boot/xen.gz com1=115200,8n1 console=com1 iommu=1
         module /boot/vmlinuz-2.6.18.8-xen root=LABEL=/ ro xencons=ttyS 
console=tty0 console=ttyS0, pciback.hide=(01:00.0)(03:00.0)
         module /boot/initrd-2.6.18-xen.img
 
@@ -30,6 +30,11 @@ 13) add "pci" line in /etc/xen/hvm.conf 
         pci = [ '01:00.0', '03:00.0' ]
 15) start hvm guest and use "lspci" to see the passthru device and
     "ifconfig" to see if IP address has been assigned to NIC devices.
+
+
+Enable MSI/MSI-x for assigned devices
+-------------------------------------
+Add "msi=1" option in kernel line of host grub.
 
 
 Caveat on Conventional PCI Device Passthrough
diff -r 7affdebb7a1e -r a39913db6e51 docs/src/user.tex
--- a/docs/src/user.tex Thu Aug 07 11:47:34 2008 +0900
+++ b/docs/src/user.tex Thu Aug 07 11:57:34 2008 +0900
@@ -4204,11 +4204,9 @@ writing to the VGA console after domain 
   enabled by the BIOS.
 \item [ apic=bigsmp,default,es7000,summit ] Specify NUMA platform.
   This can usually be probed automatically.
-\item [ dma\_bits=xxx ] Specify width of DMA
-  addresses in bits. Default is 30 bits (addresses up to 1GB are DMAable).
-\item [ dma\_emergency\_pool=xxx ] Specify lower bound on size of DMA
-  pool below which ordinary allocations will fail rather than fall
-  back to allocating from the DMA pool.
+\item [ dma\_bits=xxx ] Specify width of DMA addresses in bits. This
+  is used in NUMA systems to prevent this special DMA memory from
+  being exhausted in one node when remote nodes have available memory.
 \end{description}
 
 In addition, the following options may be specified on the Xen command
diff -r 7affdebb7a1e -r a39913db6e51 docs/xen-api/Makefile
--- a/docs/xen-api/Makefile     Thu Aug 07 11:47:34 2008 +0900
+++ b/docs/xen-api/Makefile     Thu Aug 07 11:57:34 2008 +0900
@@ -16,11 +16,11 @@ build: xenapi.pdf xenapi.ps
 build: xenapi.pdf xenapi.ps
 
 install:
-       $(INSTALL_DIR) $(DESTDIR)$(pkgdocdir)/ps
-       $(INSTALL_DIR) $(DESTDIR)$(pkgdocdir)/pdf
+       $(INSTALL_DIR) $(DESTDIR)$(DOCDIR)/ps
+       $(INSTALL_DIR) $(DESTDIR)$(DOCDIR)/pdf
 
-       [ -e xenapi.ps ] && cp xenapi.ps $(DESTDIR)$(pkgdocdir)/ps || true
-       [ -e xenapi.pdf ] && cp xenapi.pdf $(DESTDIR)$(pkgdocdir)/pdf || true
+       [ -e xenapi.ps ] && cp xenapi.ps $(DESTDIR)$(DOCDIR)/ps || true
+       [ -e xenapi.pdf ] && cp xenapi.pdf $(DESTDIR)$(DOCDIR)/pdf || true
 
 xenapi.dvi: $(TEX) $(EPS) $(EPSDOT)
        $(LATEX) xenapi.tex
diff -r 7affdebb7a1e -r a39913db6e51 docs/xen-api/coversheet.tex
--- a/docs/xen-api/coversheet.tex       Thu Aug 07 11:47:34 2008 +0900
+++ b/docs/xen-api/coversheet.tex       Thu Aug 07 11:57:34 2008 +0900
@@ -50,7 +50,7 @@ Hollis Blanchard, IBM & Alastair Tse, Xe
 Hollis Blanchard, IBM & Alastair Tse, XenSource \\
 Mike Day, IBM & Daniel Veillard, Red Hat \\
 Jim Fehlig, Novell & Tom Wilkie, University of Cambridge \\
-Jon Harrop, XenSource & \\
+Jon Harrop, XenSource & Yosuke Iwamatsu, NEC \\
 \end{tabular}
 \end{large}
 
@@ -60,4 +60,4 @@ Jon Harrop, XenSource & \\
 \legalnotice{}
 
 \newpage
-\pagestyle{fancy}
\ No newline at end of file
+\pagestyle{fancy}
diff -r 7affdebb7a1e -r a39913db6e51 docs/xen-api/revision-history.tex
--- a/docs/xen-api/revision-history.tex Thu Aug 07 11:47:34 2008 +0900
+++ b/docs/xen-api/revision-history.tex Thu Aug 07 11:57:34 2008 +0900
@@ -47,5 +47,14 @@
     \end{flushleft}
    \end{minipage}\\
   \hline
+  1.0.6 & 24th Jul. 08 & Y. Iwamatsu &
+   \begin{minipage}[t]{7cm}
+    \begin{flushleft}
+     Added definitions of new classes DPCI and PPCI. Updated the table
+     and the diagram representing relationships between classes.
+     Added host.PPCIs and VM.DPCIs fields.
+    \end{flushleft}
+   \end{minipage}\\
+  \hline
  \end{tabular}
 \end{center}
diff -r 7affdebb7a1e -r a39913db6e51 docs/xen-api/xenapi-coversheet.tex
--- a/docs/xen-api/xenapi-coversheet.tex        Thu Aug 07 11:47:34 2008 +0900
+++ b/docs/xen-api/xenapi-coversheet.tex        Thu Aug 07 11:57:34 2008 +0900
@@ -17,12 +17,12 @@
 \newcommand{\coversheetlogo}{xen.eps}
 
 %% Document date
-\newcommand{\datestring}{11th February 2008}
+\newcommand{\datestring}{24th July 2008}
 
 \newcommand{\releasestatement}{Stable Release}
 
 %% Document revision
-\newcommand{\revstring}{API Revision 1.0.5}
+\newcommand{\revstring}{API Revision 1.0.6}
 
 %% Document authors
 \newcommand{\docauthors}{
diff -r 7affdebb7a1e -r a39913db6e51 docs/xen-api/xenapi-datamodel-graph.dot
--- a/docs/xen-api/xenapi-datamodel-graph.dot   Thu Aug 07 11:47:34 2008 +0900
+++ b/docs/xen-api/xenapi-datamodel-graph.dot   Thu Aug 07 11:57:34 2008 +0900
@@ -14,7 +14,7 @@ fontname="Verdana";
 
 node [ shape=box ]; session VM host network VIF PIF SR VDI VBD PBD user 
XSPolicy ACMPolicy;
 node [shape=ellipse]; PIF_metrics VIF_metrics VM_metrics VBD_metrics 
PBD_metrics VM_guest_metrics host_metrics;
-node [shape=box]; host_cpu console
+node [shape=box]; DPCI PPCI host_cpu console
 session -> host [ arrowhead="none" ]
 session -> user [ arrowhead="none" ]
 VM -> VM_metrics [ arrowhead="none" ]
@@ -22,7 +22,7 @@ VM -> console [ arrowhead="crow" ]
 VM -> console [ arrowhead="crow" ]
 host -> PBD [ arrowhead="crow", arrowtail="none" ]
 host -> host_metrics [ arrowhead="none" ]
-host -> host_cpu [ arrowhead="none" ]
+host -> host_cpu [ arrowhead="crow", arrowtail="none" ]
 VIF -> VM [ arrowhead="none", arrowtail="crow" ]
 VIF -> network [ arrowhead="none", arrowtail="crow" ]
 VIF -> VIF_metrics [ arrowhead="none" ]
@@ -38,4 +38,7 @@ VBD -> VBD_metrics [ arrowhead="none" ]
 VBD -> VBD_metrics [ arrowhead="none" ]
 XSPolicy -> host [ arrowhead="none" ]
 XSPolicy -> ACMPolicy [ arrowhead="none" ]
+DPCI -> VM [ arrowhead="none", arrowtail="crow" ]
+DPCI -> PPCI [ arrowhead="none" ]
+PPCI -> host [ arrowhead="none", arrowtail="crow" ]
 }
diff -r 7affdebb7a1e -r a39913db6e51 docs/xen-api/xenapi-datamodel.tex
--- a/docs/xen-api/xenapi-datamodel.tex Thu Aug 07 11:47:34 2008 +0900
+++ b/docs/xen-api/xenapi-datamodel.tex Thu Aug 07 11:57:34 2008 +0900
@@ -44,6 +44,8 @@ Name & Description \\
 {\tt crashdump} & A VM crashdump \\
 {\tt VTPM} & A virtual TPM device \\
 {\tt console} & A console \\
+{\tt DPCI} & A pass-through PCI device \\
+{\tt PPCI} & A physical PCI device \\
 {\tt user} & A user of the system \\
 {\tt debug} & A basic class for testing \\
 {\tt XSPolicy} & A class for handling Xen Security Policies \\
@@ -70,6 +72,8 @@ SR.VDIs & VDI.SR & many-to-one\\
 SR.VDIs & VDI.SR & many-to-one\\
 VTPM.VM & VM.VTPMs & one-to-many\\
 console.VM & VM.consoles & one-to-many\\
+DPCI.VM & VM.DPCIs & one-to-many\\
+PPCI.host & host.PPCIs & one-to-many\\
 host.resident\_VMs & VM.resident\_on & many-to-one\\
 host.host\_CPUs & host\_cpu.host & many-to-one\\
 \hline
@@ -1402,6 +1406,7 @@ Quals & Field & Type & Description \\
 $\mathit{RO}_\mathit{run}$ &  {\tt VBDs} & (VBD ref) Set & virtual block 
devices \\
 $\mathit{RO}_\mathit{run}$ &  {\tt crash\_dumps} & (crashdump ref) Set & crash 
dumps associated with this VM \\
 $\mathit{RO}_\mathit{run}$ &  {\tt VTPMs} & (VTPM ref) Set & virtual TPMs \\
+$\mathit{RO}_\mathit{run}$ &  {\tt DPCIs} & (DPCI ref) Set & pass-through PCI 
devices \\
 $\mathit{RW}$ &  {\tt PV/bootloader} & string & name of or path to bootloader 
\\
 $\mathit{RW}$ &  {\tt PV/kernel} & string & path to the kernel \\
 $\mathit{RW}$ &  {\tt PV/ramdisk} & string & path to the initrd \\
@@ -3406,6 +3411,38 @@ Get the VTPMs field of the given VM.
  \noindent {\bf Return Type:} 
 {\tt 
 (VTPM ref) Set
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_DPCIs}
+
+{\bf Overview:} 
+Get the DPCIs field of the given VM.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} ((DPCI ref) Set) get_DPCIs (session_id s, VM ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt VM ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+(DPCI ref) Set
 }
 
 
@@ -5480,6 +5517,7 @@ Quals & Field & Type & Description \\
 $\mathit{RW}$ &  {\tt suspend\_image\_sr} & SR ref & The SR in which VDIs for 
suspend images are created \\
 $\mathit{RW}$ &  {\tt crash\_dump\_sr} & SR ref & The SR in which VDIs for 
crash dumps are created \\
 $\mathit{RO}_\mathit{run}$ &  {\tt PBDs} & (PBD ref) Set & physical 
blockdevices \\
+$\mathit{RO}_\mathit{run}$ &  {\tt PPCIs} & (PPCI ref) Set & physical PCI 
devices \\
 $\mathit{RO}_\mathit{run}$ &  {\tt host\_CPUs} & (host\_cpu ref) Set & The 
physical CPUs on this host \\
 $\mathit{RO}_\mathit{run}$ &  {\tt metrics} & host\_metrics ref & metrics 
associated with this host \\
 \hline
@@ -6767,6 +6805,38 @@ Get the PBDs field of the given host.
  \noindent {\bf Return Type:} 
 {\tt 
 (PBD ref) Set
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_PPCIs}
+
+{\bf Overview:} 
+Get the PPCIs field of the given host.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} ((PPCI ref) Set) get_PPCIs (session_id s, host ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt host ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+(PPCI ref) Set
 }
 
 
@@ -14464,6 +14534,1195 @@ all fields from the object
 
 \vspace{1cm}
 \newpage
+\section{Class: DPCI}
+\subsection{Fields for class: DPCI}
+\begin{longtable}{|lllp{0.38\textwidth}|}
+\hline
+\multicolumn{1}{|l}{Name} & \multicolumn{3}{l|}{\bf DPCI} \\
+\multicolumn{1}{|l}{Description} & \multicolumn{3}{l|}{\parbox{11cm}{\em A
+pass-through PCI device.}} \\
+\hline
+Quals & Field & Type & Description \\
+\hline
+$\mathit{RO}_\mathit{run}$ &  {\tt uuid} & string & unique identifier/object 
reference \\
+$\mathit{RO}_\mathit{inst}$ &  {\tt VM} & VM ref & the virtual machine \\
+$\mathit{RO}_\mathit{inst}$ &  {\tt PPCI} & PPCI ref & the physical PCI device 
\\
+$\mathit{RO}_\mathit{inst}$ &  {\tt hotplug\_slot} & int & the slot number to 
which this PCI device is inserted \\
+$\mathit{RO}_\mathit{run}$ &  {\tt virtual\_domain} & int & the virtual domain 
number \\
+$\mathit{RO}_\mathit{run}$ &  {\tt virtual\_bus} & int & the virtual bus 
number \\
+$\mathit{RO}_\mathit{run}$ &  {\tt virtual\_slot} & int & the virtual slot 
number \\
+$\mathit{RO}_\mathit{run}$ &  {\tt virtual\_func} & int & the virtual func 
number \\
+$\mathit{RO}_\mathit{run}$ &  {\tt virtual\_name} & string & the virtual PCI 
name \\
+\hline
+\end{longtable}
+\subsection{RPCs associated with class: DPCI}
+\subsubsection{RPC name:~get\_all}
+
+{\bf Overview:} 
+Return a list of all the DPCIs known to the system.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} ((DPCI ref) Set) get_all (session_id s)\end{verbatim}
+
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+(DPCI ref) Set
+}
+
+
+references to all objects
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_uuid}
+
+{\bf Overview:} 
+Get the uuid field of the given DPCI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} string get_uuid (session_id s, DPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DPCI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_VM}
+
+{\bf Overview:} 
+Get the VM field of the given DPCI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} (VM ref) get_VM (session_id s, DPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DPCI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+VM ref
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_PPCI}
+
+{\bf Overview:} 
+Get the PPCI field of the given DPCI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} (PPCI ref) get_PPCI (session_id s, DPCI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DPCI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+PPCI ref
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_hotplug\_slot}
+
+{\bf Overview:} 
+Get the hotplug\_slot field of the given DPCI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} int get_hotplug_slot (session_id s, DPCI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DPCI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_virtual\_domain}
+
+{\bf Overview:} 
+Get the virtual\_domain field of the given DPCI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} int get_virtual_domain (session_id s, DPCI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DPCI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_virtual\_bus}
+
+{\bf Overview:} 
+Get the virtual\_bus field of the given DPCI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} int get_virtual_bus (session_id s, DPCI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DPCI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_virtual\_slot}
+
+{\bf Overview:} 
+Get the virtual\_slot field of the given DPCI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} int get_virtual_slot (session_id s, DPCI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DPCI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_virtual\_func}
+
+{\bf Overview:} 
+Get the virtual\_func field of the given DPCI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} int get_virtual_func (session_id s, DPCI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DPCI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_virtual\_name}
+
+{\bf Overview:} 
+Get the virtual\_name field of the given DPCI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} string get_virtual_name (session_id s, DPCI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DPCI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~create}
+
+{\bf Overview:} 
+Create a new DPCI instance, and return its handle.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} (DPCI ref) create (session_id s, DPCI record 
args)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DPCI record } & args & All constructor arguments \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+DPCI ref
+}
+
+
+reference to the newly created object
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~destroy}
+
+{\bf Overview:} 
+Destroy the specified DPCI instance.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} void destroy (session_id s, DPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DPCI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+void
+}
+
+
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_by\_uuid}
+
+{\bf Overview:} 
+Get a reference to the DPCI instance with the specified UUID.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} (DPCI ref) get_by_uuid (session_id s, string 
uuid)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt string } & uuid & UUID of object to return \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+DPCI ref
+}
+
+
+reference to the object
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_record}
+
+{\bf Overview:} 
+Get a record containing the current state of the given DPCI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} (DPCI record) get_record (session_id s, DPCI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DPCI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+DPCI record
+}
+
+
+all fields from the object
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+
+\vspace{1cm}
+\newpage
+\section{Class: PPCI}
+\subsection{Fields for class: PPCI}
+\begin{longtable}{|lllp{0.38\textwidth}|}
+\hline
+\multicolumn{1}{|l}{Name} & \multicolumn{3}{l|}{\bf PPCI} \\
+\multicolumn{1}{|l}{Description} & \multicolumn{3}{l|}{\parbox{11cm}{\em A
+physical PCI device.}} \\
+\hline
+Quals & Field & Type & Description \\
+\hline
+$\mathit{RO}_\mathit{run}$ &  {\tt uuid} & string & unique identifier/object 
reference \\
+$\mathit{RO}_\mathit{run}$ &  {\tt host} & host ref &  the physical machine to 
which this PPCI is connected \\
+$\mathit{RO}_\mathit{run}$ &  {\tt domain} & int & the domain number \\
+$\mathit{RO}_\mathit{run}$ &  {\tt bus} & int & the bus number \\
+$\mathit{RO}_\mathit{run}$ &  {\tt slot} & int & the slot number \\
+$\mathit{RO}_\mathit{run}$ &  {\tt func} & int & the func number \\
+$\mathit{RO}_\mathit{run}$ &  {\tt name} & string & the PCI name \\
+$\mathit{RO}_\mathit{run}$ &  {\tt vendor\_id} & int & the vendor ID \\
+$\mathit{RO}_\mathit{run}$ &  {\tt vendor\_name} & string & the vendor name \\
+$\mathit{RO}_\mathit{run}$ &  {\tt device\_id} & int & the device ID \\
+$\mathit{RO}_\mathit{run}$ &  {\tt device\_name} & string & the device name \\
+$\mathit{RO}_\mathit{run}$ &  {\tt revision\_id} & int & the revision ID \\
+$\mathit{RO}_\mathit{run}$ &  {\tt class\_code} & int & the class code \\
+$\mathit{RO}_\mathit{run}$ &  {\tt class\_name} & string & the class name \\
+$\mathit{RO}_\mathit{run}$ &  {\tt subsystem\_vendor\_id} & int & the 
subsystem vendor ID \\
+$\mathit{RO}_\mathit{run}$ &  {\tt subsystem\_vendor\_name} & string & the 
subsystem vendor name \\
+$\mathit{RO}_\mathit{run}$ &  {\tt subsystem\_id} & int & the subsystem ID \\
+$\mathit{RO}_\mathit{run}$ &  {\tt subsystem\_name} & string & the subsystem 
name \\
+$\mathit{RO}_\mathit{run}$ &  {\tt driver} & string & the driver name \\
+\hline
+\end{longtable}
+\subsection{RPCs associated with class: PPCI}
+\subsubsection{RPC name:~get\_all}
+
+{\bf Overview:} 
+Return a list of all the PPCIs known to the system.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} ((PPCI ref) Set) get_all (session_id s)\end{verbatim}
+
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+(PPCI ref) Set
+}
+
+
+references to all objects
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_uuid}
+
+{\bf Overview:} 
+Get the uuid field of the given PPCI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} string get_uuid (session_id s, PPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_host}
+
+{\bf Overview:} 
+Get the host field of the given PPCI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} (host ref) get_host (session_id s, PPCI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+host ref
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_domain}
+
+{\bf Overview:} 
+Get the domain field of the given PPCI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} int get_domain (session_id s, PPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_bus}
+
+{\bf Overview:} 
+Get the bus field of the given PPCI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} int get_bus (session_id s, PPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_slot}
+
+{\bf Overview:} 
+Get the slot field of the given PPCI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} int get_slot (session_id s, PPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_func}
+
+{\bf Overview:} 
+Get the func field of the given PPCI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} int get_func (session_id s, PPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_name}
+
+{\bf Overview:} 
+Get the name field of the given PPCI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} string get_name (session_id s, PPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_vendor\_id}
+
+{\bf Overview:} 
+Get the vendor\_id field of the given PPCI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} int get_vendor_id (session_id s, PPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_vendor\_name}
+
+{\bf Overview:} 
+Get the vendor\_name field of the given PPCI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} string get_vendor_name (session_id s, PPCI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_device\_id}
+
+{\bf Overview:} 
+Get the device\_id field of the given PPCI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} int get_device_id (session_id s, PPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_device\_name}
+
+{\bf Overview:} 
+Get the device\_name field of the given PPCI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} string get_device_name (session_id s, PPCI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_revision\_id}
+
+{\bf Overview:} 
+Get the revision\_id field of the given PPCI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} int get_revision_id (session_id s, PPCI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_class\_code}
+
+{\bf Overview:} 
+Get the class\_code field of the given PPCI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} int get_class_code (session_id s, PPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_class\_name}
+
+{\bf Overview:} 
+Get the class\_name field of the given PPCI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} string get_class_name (session_id s, PPCI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_subsystem\_vendor\_id}
+
+{\bf Overview:} 
+Get the subsystem\_vendor\_id field of the given PPCI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} int get_subsystem_vendor_id (session_id s, PPCI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_subsystem\_vendor\_name}
+
+{\bf Overview:} 
+Get the subsystem\_vendor\_name field of the given PPCI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} string get_subsystem_vendor_name (session_id s, PPCI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_subsystem\_id}
+
+{\bf Overview:} 
+Get the subsystem\_id field of the given PPCI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} int get_subsystem_id (session_id s, PPCI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_subsystem\_name}
+
+{\bf Overview:} 
+Get the subsystem\_name field of the given PPCI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} string get_subsystem_name (session_id s, PPCI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_driver}
+
+{\bf Overview:} 
+Get the driver field of the given PPCI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} string get_driver (session_id s, PPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_by\_uuid}
+
+{\bf Overview:} 
+Get a reference to the PPCI instance with the specified UUID.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} (PPCI ref) get_by_uuid (session_id s, string 
uuid)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt string } & uuid & UUID of object to return \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+PPCI ref
+}
+
+
+reference to the object
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_record}
+
+{\bf Overview:} 
+Get a record containing the current state of the given PPCI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} (PPCI record) get_record (session_id s, PPCI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+PPCI record
+}
+
+
+all fields from the object
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+
+\vspace{1cm}
+\newpage
 \section{Class: user}
 \subsection{Fields for class: user}
 \begin{longtable}{|lllp{0.38\textwidth}|}
diff -r 7affdebb7a1e -r a39913db6e51 extras/mini-os/fs-front.c
--- a/extras/mini-os/fs-front.c Thu Aug 07 11:47:34 2008 +0900
+++ b/extras/mini-os/fs-front.c Thu Aug 07 11:57:34 2008 +0900
@@ -50,6 +50,8 @@
 
 struct fs_request;
 struct fs_import *fs_import;
+void *alloc_buffer_page(struct fs_request *req, domid_t domid, grant_ref_t 
*gref);
+void free_buffer_page(struct fs_request *req);
 
 
/******************************************************************************/
 /*                      RING REQUEST/RESPONSES HANDLING                       
*/
@@ -57,11 +59,19 @@ struct fs_import *fs_import;
 
 struct fs_request
 {
-    void *page;
-    grant_ref_t gref;
+    void *private1;                        /* Specific to request type */
+    void *private2;
     struct thread *thread;                 /* Thread blocked on this request */
     struct fsif_response shadow_rsp;       /* Response copy writen by the 
                                               interrupt handler */  
+};
+
+struct fs_rw_gnts
+{
+    /* TODO 16 bit? */
+    int count;
+    grant_ref_t grefs[FSIF_NR_READ_GNTS];  
+    void *pages[FSIF_NR_READ_GNTS];  
 };
 
 /* Ring operations:
@@ -177,6 +187,8 @@ int fs_open(struct fs_import *import, ch
 {
     struct fs_request *fsr;
     unsigned short priv_req_id;
+    grant_ref_t gref;
+    void *buffer;
     RING_IDX back_req_id; 
     struct fsif_request *req;
     int fd;
@@ -189,14 +201,15 @@ int fs_open(struct fs_import *import, ch
     priv_req_id = get_id_from_freelist(import->freelist);
     DEBUG("Request id for fs_open call is: %d\n", priv_req_id);
     fsr = &import->requests[priv_req_id];
-    DEBUG("gref id=%d\n", fsr->gref);
-    fsr->thread = current;
-    sprintf(fsr->page, "%s", file);
+    buffer = alloc_buffer_page(fsr, import->dom_id, &gref);
+    DEBUG("gref id=%d\n", gref);
+    fsr->thread = current;
+    sprintf(buffer, "%s", file);
 
     req = RING_GET_REQUEST(&import->ring, back_req_id);
     req->type = REQ_FILE_OPEN;
     req->id = priv_req_id;
-    req->u.fopen.gref = fsr->gref;
+    req->u.fopen.gref = gref;
 
     /* Set blocked flag before commiting the request, thus avoiding missed
      * response race */
@@ -207,6 +220,7 @@ int fs_open(struct fs_import *import, ch
     /* Read the response */
     fd = (int)fsr->shadow_rsp.ret_val;
     DEBUG("The following FD returned: %d\n", fd);
+    free_buffer_page(fsr);
     add_id_to_freelist(priv_req_id, import->freelist);
 
     return fd;
@@ -254,11 +268,13 @@ ssize_t fs_read(struct fs_import *import
 {
     struct fs_request *fsr;
     unsigned short priv_req_id;
+    struct fs_rw_gnts gnts;
     RING_IDX back_req_id; 
     struct fsif_request *req;
     ssize_t ret;
-
-    BUG_ON(len > PAGE_SIZE);
+    int i;
+
+    BUG_ON(len > PAGE_SIZE * FSIF_NR_READ_GNTS);
 
     /* Prepare request for the backend */
     back_req_id = reserve_fsif_request(import);
@@ -268,17 +284,27 @@ ssize_t fs_read(struct fs_import *import
     priv_req_id = get_id_from_freelist(import->freelist);
     DEBUG("Request id for fs_read call is: %d\n", priv_req_id);
     fsr = &import->requests[priv_req_id];
-    DEBUG("gref=%d\n", fsr->gref);
-    fsr->thread = current;
-    memset(fsr->page, 0, PAGE_SIZE);
 
     req = RING_GET_REQUEST(&import->ring, back_req_id);
     req->type = REQ_FILE_READ;
     req->id = priv_req_id;
     req->u.fread.fd = fd;
-    req->u.fread.gref = fsr->gref;
     req->u.fread.len = len;
     req->u.fread.offset = offset;
+
+
+    ASSERT(len > 0);
+    gnts.count = ((len - 1) / PAGE_SIZE) + 1; 
+    for(i=0; i<gnts.count; i++)
+    {
+        gnts.pages[i] = (void *)alloc_page(); 
+        gnts.grefs[i] = gnttab_grant_access(import->dom_id, 
+                                            virt_to_mfn(gnts.pages[i]), 
+                                            0); 
+        memset(gnts.pages[i], 0, PAGE_SIZE);
+        req->u.fread.grefs[i] = gnts.grefs[i];
+    }
+    fsr->thread = current;
 
     /* Set blocked flag before commiting the request, thus avoiding missed
      * response race */
@@ -290,7 +316,19 @@ ssize_t fs_read(struct fs_import *import
     ret = (ssize_t)fsr->shadow_rsp.ret_val;
     DEBUG("The following ret value returned %d\n", ret);
     if(ret > 0)
-        memcpy(buf, fsr->page, ret);
+    {
+        ssize_t to_copy = ret, current_copy;
+        for(i=0; i<gnts.count; i++)
+        {
+            gnttab_end_access(gnts.grefs[i]);
+            current_copy = to_copy > PAGE_SIZE ? PAGE_SIZE : to_copy;
+            if(current_copy > 0)
+                memcpy(buf, gnts.pages[i], current_copy); 
+            to_copy -= current_copy; 
+            buf = (char*) buf + current_copy;
+            free_page(gnts.pages[i]);
+        }
+    }
     add_id_to_freelist(priv_req_id, import->freelist);
 
     return ret;
@@ -301,11 +339,13 @@ ssize_t fs_write(struct fs_import *impor
 {
     struct fs_request *fsr;
     unsigned short priv_req_id;
-    RING_IDX back_req_id; 
-    struct fsif_request *req;
-    ssize_t ret;
-
-    BUG_ON(len > PAGE_SIZE);
+    struct fs_rw_gnts gnts;
+    RING_IDX back_req_id; 
+    struct fsif_request *req;
+    ssize_t ret, to_copy;
+    int i;
+
+    BUG_ON(len > PAGE_SIZE * FSIF_NR_WRITE_GNTS);
 
     /* Prepare request for the backend */
     back_req_id = reserve_fsif_request(import);
@@ -315,20 +355,35 @@ ssize_t fs_write(struct fs_import *impor
     priv_req_id = get_id_from_freelist(import->freelist);
     DEBUG("Request id for fs_read call is: %d\n", priv_req_id);
     fsr = &import->requests[priv_req_id];
-    DEBUG("gref=%d\n", fsr->gref);
-    fsr->thread = current;
-    memcpy(fsr->page, buf, len);
-    BUG_ON(len > PAGE_SIZE);
-    memset((char *)fsr->page + len, 0, PAGE_SIZE - len); 
 
     req = RING_GET_REQUEST(&import->ring, back_req_id);
     req->type = REQ_FILE_WRITE;
     req->id = priv_req_id;
     req->u.fwrite.fd = fd;
-    req->u.fwrite.gref = fsr->gref;
     req->u.fwrite.len = len;
     req->u.fwrite.offset = offset;
 
+    ASSERT(len > 0);
+    gnts.count = ((len - 1) / PAGE_SIZE) + 1; 
+    to_copy = len;
+    for(i=0; i<gnts.count; i++)
+    {
+        int current_copy = (to_copy > PAGE_SIZE ? PAGE_SIZE : to_copy);
+        gnts.pages[i] = (void *)alloc_page(); 
+        gnts.grefs[i] = gnttab_grant_access(import->dom_id, 
+                                            virt_to_mfn(gnts.pages[i]), 
+                                            0); 
+        memcpy(gnts.pages[i], buf, current_copy);
+        if(current_copy < PAGE_SIZE)
+            memset((char *)gnts.pages[i] + current_copy, 
+                    0, 
+                    PAGE_SIZE - current_copy); 
+        req->u.fwrite.grefs[i] = gnts.grefs[i];
+        to_copy -= current_copy; 
+        buf = (char*) buf + current_copy;
+    }
+    fsr->thread = current;
+
     /* Set blocked flag before commiting the request, thus avoiding missed
      * response race */
     block(current);
@@ -338,6 +393,11 @@ ssize_t fs_write(struct fs_import *impor
     /* Read the response */
     ret = (ssize_t)fsr->shadow_rsp.ret_val;
     DEBUG("The following ret value returned %d\n", ret);
+    for(i=0; i<gnts.count; i++)
+    {
+        gnttab_end_access(gnts.grefs[i]);
+        free_page(gnts.pages[i]);
+    }
     add_id_to_freelist(priv_req_id, import->freelist);
 
     return ret;
@@ -361,15 +421,12 @@ int fs_stat(struct fs_import *import,
     priv_req_id = get_id_from_freelist(import->freelist);
     DEBUG("Request id for fs_stat call is: %d\n", priv_req_id);
     fsr = &import->requests[priv_req_id];
-    DEBUG("gref=%d\n", fsr->gref);
-    fsr->thread = current;
-    memset(fsr->page, 0, PAGE_SIZE);
+    fsr->thread = current;
 
     req = RING_GET_REQUEST(&import->ring, back_req_id);
     req->type = REQ_STAT;
     req->id = priv_req_id;
     req->u.fstat.fd   = fd;
-    req->u.fstat.gref = fsr->gref;
 
     /* Set blocked flag before commiting the request, thus avoiding missed
      * response race */
@@ -380,7 +437,9 @@ int fs_stat(struct fs_import *import,
     /* Read the response */
     ret = (int)fsr->shadow_rsp.ret_val;
     DEBUG("Following ret from fstat: %d\n", ret);
-    memcpy(stat, fsr->page, sizeof(struct fsif_stat_response));
+    memcpy(stat, 
+           &fsr->shadow_rsp.fstat, 
+           sizeof(struct fsif_stat_response));
     add_id_to_freelist(priv_req_id, import->freelist);
 
     return ret;
@@ -430,6 +489,8 @@ int fs_remove(struct fs_import *import, 
 {
     struct fs_request *fsr;
     unsigned short priv_req_id;
+    grant_ref_t gref;
+    void *buffer;
     RING_IDX back_req_id; 
     struct fsif_request *req;
     int ret;
@@ -442,14 +503,15 @@ int fs_remove(struct fs_import *import, 
     priv_req_id = get_id_from_freelist(import->freelist);
     DEBUG("Request id for fs_open call is: %d\n", priv_req_id);
     fsr = &import->requests[priv_req_id];
-    DEBUG("gref=%d\n", fsr->gref);
-    fsr->thread = current;
-    sprintf(fsr->page, "%s", file);
+    buffer = alloc_buffer_page(fsr, import->dom_id, &gref);
+    DEBUG("gref=%d\n", gref);
+    fsr->thread = current;
+    sprintf(buffer, "%s", file);
 
     req = RING_GET_REQUEST(&import->ring, back_req_id);
     req->type = REQ_REMOVE;
     req->id = priv_req_id;
-    req->u.fremove.gref = fsr->gref;
+    req->u.fremove.gref = gref;
 
     /* Set blocked flag before commiting the request, thus avoiding missed
      * response race */
@@ -460,6 +522,7 @@ int fs_remove(struct fs_import *import, 
     /* Read the response */
     ret = (int)fsr->shadow_rsp.ret_val;
     DEBUG("The following ret: %d\n", ret);
+    free_buffer_page(fsr);
     add_id_to_freelist(priv_req_id, import->freelist);
 
     return ret;
@@ -472,6 +535,8 @@ int fs_rename(struct fs_import *import,
 {
     struct fs_request *fsr;
     unsigned short priv_req_id;
+    grant_ref_t gref;
+    void *buffer;
     RING_IDX back_req_id; 
     struct fsif_request *req;
     int ret;
@@ -486,15 +551,16 @@ int fs_rename(struct fs_import *import,
     priv_req_id = get_id_from_freelist(import->freelist);
     DEBUG("Request id for fs_open call is: %d\n", priv_req_id);
     fsr = &import->requests[priv_req_id];
-    DEBUG("gref=%d\n", fsr->gref);
-    fsr->thread = current;
-    sprintf(fsr->page, "%s%s%c%s%s", 
+    buffer = alloc_buffer_page(fsr, import->dom_id, &gref);
+    DEBUG("gref=%d\n", gref);
+    fsr->thread = current;
+    sprintf(buffer, "%s%s%c%s%s", 
             old_header, old_file_name, '\0', new_header, new_file_name);
 
     req = RING_GET_REQUEST(&import->ring, back_req_id);
     req->type = REQ_RENAME;
     req->id = priv_req_id;
-    req->u.frename.gref = fsr->gref;
+    req->u.frename.gref = gref;
     req->u.frename.old_name_offset = strlen(old_header);
     req->u.frename.new_name_offset = strlen(old_header) +
                                      strlen(old_file_name) +
@@ -511,6 +577,7 @@ int fs_rename(struct fs_import *import,
     /* Read the response */
     ret = (int)fsr->shadow_rsp.ret_val;
     DEBUG("The following ret: %d\n", ret);
+    free_buffer_page(fsr);
     add_id_to_freelist(priv_req_id, import->freelist);
 
     return ret;
@@ -521,6 +588,8 @@ int fs_create(struct fs_import *import, 
 {
     struct fs_request *fsr;
     unsigned short priv_req_id;
+    grant_ref_t gref;
+    void *buffer;
     RING_IDX back_req_id; 
     struct fsif_request *req;
     int ret;
@@ -533,14 +602,15 @@ int fs_create(struct fs_import *import, 
     priv_req_id = get_id_from_freelist(import->freelist);
     DEBUG("Request id for fs_create call is: %d\n", priv_req_id);
     fsr = &import->requests[priv_req_id];
-    DEBUG("gref=%d\n", fsr->gref);
-    fsr->thread = current;
-    sprintf(fsr->page, "%s", name);
+    buffer = alloc_buffer_page(fsr, import->dom_id, &gref);
+    DEBUG("gref=%d\n", gref);
+    fsr->thread = current;
+    sprintf(buffer, "%s", name);
 
     req = RING_GET_REQUEST(&import->ring, back_req_id);
     req->type = REQ_CREATE;
     req->id = priv_req_id;
-    req->u.fcreate.gref = fsr->gref;
+    req->u.fcreate.gref = gref;
     req->u.fcreate.directory = directory;
     req->u.fcreate.mode = mode;
 
@@ -553,6 +623,7 @@ int fs_create(struct fs_import *import, 
     /* Read the response */
     ret = (int)fsr->shadow_rsp.ret_val;
     DEBUG("The following ret: %d\n", ret);
+    free_buffer_page(fsr);
     add_id_to_freelist(priv_req_id, import->freelist);
 
     return ret;
@@ -563,6 +634,8 @@ char** fs_list(struct fs_import *import,
 {
     struct fs_request *fsr;
     unsigned short priv_req_id;
+    grant_ref_t gref;
+    void *buffer;
     RING_IDX back_req_id; 
     struct fsif_request *req;
     char **files, *current_file;
@@ -579,14 +652,15 @@ char** fs_list(struct fs_import *import,
     priv_req_id = get_id_from_freelist(import->freelist);
     DEBUG("Request id for fs_list call is: %d\n", priv_req_id);
     fsr = &import->requests[priv_req_id];
-    DEBUG("gref=%d\n", fsr->gref);
-    fsr->thread = current;
-    sprintf(fsr->page, "%s", name);
+    buffer = alloc_buffer_page(fsr, import->dom_id, &gref);
+    DEBUG("gref=%d\n", gref);
+    fsr->thread = current;
+    sprintf(buffer, "%s", name);
 
     req = RING_GET_REQUEST(&import->ring, back_req_id);
     req->type = REQ_DIR_LIST;
     req->id = priv_req_id;
-    req->u.flist.gref = fsr->gref;
+    req->u.flist.gref = gref;
     req->u.flist.offset = offset;
 
     /* Set blocked flag before commiting the request, thus avoiding missed
@@ -600,7 +674,7 @@ char** fs_list(struct fs_import *import,
     files = NULL;
     if(*nr_files <= 0) goto exit;
     files = malloc(sizeof(char*) * (*nr_files));
-    current_file = fsr->page;
+    current_file = buffer; 
     for(i=0; i<*nr_files; i++)
     {
         files[i] = strdup(current_file); 
@@ -608,6 +682,7 @@ char** fs_list(struct fs_import *import,
     }
     if(has_more != NULL)
         *has_more = fsr->shadow_rsp.ret_val & HAS_MORE_FLAG;
+    free_buffer_page(fsr);
     add_id_to_freelist(priv_req_id, import->freelist);
 exit:
     return files;
@@ -655,6 +730,8 @@ int64_t fs_space(struct fs_import *impor
 {
     struct fs_request *fsr;
     unsigned short priv_req_id;
+    grant_ref_t gref;
+    void *buffer;
     RING_IDX back_req_id; 
     struct fsif_request *req;
     int64_t ret;
@@ -667,14 +744,15 @@ int64_t fs_space(struct fs_import *impor
     priv_req_id = get_id_from_freelist(import->freelist);
     DEBUG("Request id for fs_space is: %d\n", priv_req_id);
     fsr = &import->requests[priv_req_id];
-    DEBUG("gref=%d\n", fsr->gref);
-    fsr->thread = current;
-    sprintf(fsr->page, "%s", location);
+    buffer = alloc_buffer_page(fsr, import->dom_id, &gref);
+    DEBUG("gref=%d\n", gref);
+    fsr->thread = current;
+    sprintf(buffer, "%s", location);
 
     req = RING_GET_REQUEST(&import->ring, back_req_id);
     req->type = REQ_FS_SPACE;
     req->id = priv_req_id;
-    req->u.fspace.gref = fsr->gref;
+    req->u.fspace.gref = gref;
 
     /* Set blocked flag before commiting the request, thus avoiding missed
      * response race */
@@ -685,6 +763,7 @@ int64_t fs_space(struct fs_import *impor
     /* Read the response */
     ret = (int64_t)fsr->shadow_rsp.ret_val;
     DEBUG("The following returned: %lld\n", ret);
+    free_buffer_page(fsr);
     add_id_to_freelist(priv_req_id, import->freelist);
 
     return ret;
@@ -732,6 +811,23 @@ int fs_sync(struct fs_import *import, in
 /*                       END OF INDIVIDUAL FILE OPERATIONS                    
*/
 
/******************************************************************************/
 
+void *alloc_buffer_page(struct fs_request *req, domid_t domid, grant_ref_t 
*gref)
+{
+    void *page;
+
+    page = (void *)alloc_page(); 
+    *gref = gnttab_grant_access(domid, virt_to_mfn(page), 0); 
+    req->private1 = page;
+    req->private2 = (void *)(uintptr_t)(*gref);
+
+    return page;
+}
+
+void free_buffer_page(struct fs_request *req)
+{
+    gnttab_end_access((grant_ref_t)(uintptr_t)req->private2);
+    free_page(req->private1);
+}
 
 static void fsfront_handler(evtchn_port_t port, struct pt_regs *regs, void 
*data)
 {
@@ -797,15 +893,7 @@ static void alloc_request_table(struct f
     import->freelist = xmalloc_array(unsigned short, import->nr_entries + 1);
     memset(import->freelist, 0, sizeof(unsigned short) * (import->nr_entries + 
1));
     for(i=0; i<import->nr_entries; i++)
-    {
-       /* TODO: that's a lot of memory */
-        requests[i].page = (void *)alloc_page(); 
-        requests[i].gref = gnttab_grant_access(import->dom_id, 
-                                               virt_to_mfn(requests[i].page),
-                                               0);
-        //printk("   ===>> Page=%lx, gref=%d, mfn=%lx\n", requests[i].page, 
requests[i].gref, virt_to_mfn(requests[i].page));
         add_id_to_freelist(i, import->freelist);
-    }
     import->requests = requests;
 }
 
@@ -818,22 +906,27 @@ void test_fs_import(void *data)
 void test_fs_import(void *data)
 {
     struct fs_import *import = (struct fs_import *)data; 
-    int ret, fd, i;
+    int ret, fd, i, repeat_count;
     int32_t nr_files;
     char buffer[1024];
     ssize_t offset;
     char **files;
     long ret64;
-   
+    struct fsif_stat_response stat;
+    
+    repeat_count = 10; 
     /* Sleep for 1s and then try to open a file */
     msleep(1000);
+again:
     ret = fs_create(import, "mini-os-created-directory", 1, 0777);
     printk("Directory create: %d\n", ret);
 
-    ret = fs_create(import, "mini-os-created-directory/mini-os-created-file", 
0, 0666);
+    sprintf(buffer, "mini-os-created-directory/mini-os-created-file-%d", 
+            repeat_count);
+    ret = fs_create(import, buffer, 0, 0666);
     printk("File create: %d\n", ret);
 
-    fd = fs_open(import, "mini-os-created-directory/mini-os-created-file");
+    fd = fs_open(import, buffer);
     printk("File descriptor: %d\n", fd);
     if(fd < 0) return;
 
@@ -847,7 +940,16 @@ void test_fs_import(void *data)
             return;
         offset += ret;
     }
-
+    ret = fs_stat(import, fd, &stat);
+    printk("Ret after stat: %d\n", ret);
+    printk(" st_mode=%o\n", stat.stat_mode);
+    printk(" st_uid =%d\n", stat.stat_uid);
+    printk(" st_gid =%d\n", stat.stat_gid);
+    printk(" st_size=%ld\n", stat.stat_size);
+    printk(" st_atime=%ld\n", stat.stat_atime);
+    printk(" st_mtime=%ld\n", stat.stat_mtime);
+    printk(" st_ctime=%ld\n", stat.stat_ctime);
+ 
     ret = fs_close(import, fd);
     printk("Closed fd: %d, ret=%d\n", fd, ret);
    
@@ -858,6 +960,9 @@ void test_fs_import(void *data)
 
     ret64 = fs_space(import, "/");
     printk("Free space: %lld (=%lld Mb)\n", ret64, (ret64 >> 20));
+    repeat_count--;
+    if(repeat_count > 0)
+        goto again;
     
 }
 
@@ -924,20 +1029,21 @@ static int init_fs_import(struct fs_impo
     xenbus_transaction_t xbt;
     char nodename[1024], r_nodename[1024], token[128], *message = NULL;
     struct fsif_sring *sring;
-    int retry = 0;
+    int i, retry = 0;
     domid_t self_id;
     xenbus_event_queue events = NULL;
 
     printk("Initialising FS fortend to backend dom %d\n", import->dom_id);
     /* Allocate page for the shared ring */
-    sring = (struct fsif_sring*) alloc_page();
-    memset(sring, 0, PAGE_SIZE);
+    sring = (struct fsif_sring*) alloc_pages(FSIF_RING_SIZE_ORDER);
+    memset(sring, 0, PAGE_SIZE * FSIF_RING_SIZE_PAGES);
 
     /* Init the shared ring */
     SHARED_RING_INIT(sring);
+    ASSERT(FSIF_NR_READ_GNTS == FSIF_NR_WRITE_GNTS);
 
     /* Init private frontend ring */
-    FRONT_RING_INIT(&import->ring, sring, PAGE_SIZE);
+    FRONT_RING_INIT(&import->ring, sring, PAGE_SIZE * FSIF_RING_SIZE_PAGES);
     import->nr_entries = import->ring.nr_ents;
 
     /* Allocate table of requests */
@@ -945,7 +1051,11 @@ static int init_fs_import(struct fs_impo
     init_SEMAPHORE(&import->reqs_sem, import->nr_entries);
 
     /* Grant access to the shared ring */
-    import->gnt_ref = gnttab_grant_access(import->dom_id, virt_to_mfn(sring), 
0);
+    for(i=0; i<FSIF_RING_SIZE_PAGES; i++) 
+        import->gnt_refs[i] = 
+            gnttab_grant_access(import->dom_id, 
+                                virt_to_mfn((char *)sring + i * PAGE_SIZE), 
+                                0);
    
     /* Allocate event channel */ 
     BUG_ON(evtchn_alloc_unbound(import->dom_id, 
@@ -969,12 +1079,26 @@ again:
     
     err = xenbus_printf(xbt, 
                         nodename, 
-                        "ring-ref",
+                        "ring-size",
                         "%u",
-                        import->gnt_ref);
+                        FSIF_RING_SIZE_PAGES);
     if (err) {
-        message = "writing ring-ref";
+        message = "writing ring-size";
         goto abort_transaction;
+    }
+    
+    for(i=0; i<FSIF_RING_SIZE_PAGES; i++)
+    {
+        sprintf(r_nodename, "ring-ref-%d", i);
+        err = xenbus_printf(xbt, 
+                            nodename, 
+                            r_nodename,
+                            "%u",
+                            import->gnt_refs[i]);
+        if (err) {
+            message = "writing ring-refs";
+            goto abort_transaction;
+        }
     }
 
     err = xenbus_printf(xbt, 
diff -r 7affdebb7a1e -r a39913db6e51 extras/mini-os/gntmap.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/extras/mini-os/gntmap.c   Thu Aug 07 11:57:34 2008 +0900
@@ -0,0 +1,252 @@
+/*
+ * Manages grant mappings from other domains.
+ *
+ * Diego Ongaro <diego.ongaro@xxxxxxxxxx>, July 2008
+ *
+ * Files of type FTYPE_GNTMAP contain a gntmap, which is an array of
+ * (host address, grant handle) pairs. Grant handles come from a hypervisor map
+ * operation and are needed for the corresponding unmap.
+ *
+ * This is a rather naive implementation in terms of performance. If we start
+ * using it frequently, there's definitely some low-hanging fruit here.
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <os.h>
+#include <xmalloc.h>
+#include <errno.h>
+#include <xen/grant_table.h>
+#include <inttypes.h>
+#include "gntmap.h"
+
+#define DEFAULT_MAX_GRANTS 128
+
+struct gntmap_entry {
+    unsigned long host_addr;
+    grant_handle_t handle;
+};
+
+static inline int
+gntmap_entry_used(struct gntmap_entry *entry)
+{
+    return entry->host_addr != 0;
+}
+
+static struct gntmap_entry*
+gntmap_find_free_entry(struct gntmap *map)
+{
+    int i;
+
+    for (i = 0; i < map->nentries; i++) {
+        if (!gntmap_entry_used(&map->entries[i]))
+            return &map->entries[i];
+    }
+
+#ifdef GNTMAP_DEBUG
+    printk("gntmap_find_free_entry(map=%p): all %d entries full\n",
+           map, map->nentries);
+#endif
+    return NULL;
+}
+
+static struct gntmap_entry*
+gntmap_find_entry(struct gntmap *map, unsigned long addr)
+{
+    int i;
+
+    for (i = 0; i < map->nentries; i++) {
+        if (map->entries[i].host_addr == addr)
+            return &map->entries[i];
+    }
+    return NULL;
+}
+
+int
+gntmap_set_max_grants(struct gntmap *map, int count)
+{
+#ifdef GNTMAP_DEBUG
+    printk("gntmap_set_max_grants(map=%p, count=%d)\n", map, count);
+#endif
+
+    if (map->nentries != 0)
+        return -EBUSY;
+
+    map->entries = xmalloc_array(struct gntmap_entry, count);
+    if (map->entries == NULL)
+        return -ENOMEM;
+
+    memset(map->entries, 0, sizeof(struct gntmap_entry) * count);
+    map->nentries = count;
+    return 0;
+}
+
+static int
+_gntmap_map_grant_ref(struct gntmap_entry *entry, 
+                      unsigned long host_addr,
+                      uint32_t domid,
+                      uint32_t ref,
+                      int writable)
+{
+    struct gnttab_map_grant_ref op;
+    int rc;
+
+    op.ref = (grant_ref_t) ref;
+    op.dom = (domid_t) domid;
+    op.host_addr = (uint64_t) host_addr;
+    op.flags = GNTMAP_host_map;
+    if (!writable)
+        op.flags |= GNTMAP_readonly;
+
+    rc = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1);
+    if (rc != 0 || op.status != GNTST_okay) {
+        printk("GNTTABOP_map_grant_ref failed: "
+               "returned %d, status %" PRId16 "\n",
+               rc, op.status);
+        return rc != 0 ? rc : op.status;
+    }
+
+    entry->host_addr = host_addr;
+    entry->handle = op.handle;
+    return 0;
+}
+
+static int
+_gntmap_unmap_grant_ref(struct gntmap_entry *entry)
+{
+    struct gnttab_unmap_grant_ref op;
+    int rc;
+
+    op.host_addr    = (uint64_t) entry->host_addr;
+    op.dev_bus_addr = 0;
+    op.handle       = entry->handle;
+
+    rc = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1);
+    if (rc != 0 || op.status != GNTST_okay) {
+        printk("GNTTABOP_unmap_grant_ref failed: "
+               "returned %d, status %" PRId16 "\n",
+               rc, op.status);
+        return rc != 0 ? rc : op.status;
+    }
+
+    entry->host_addr = 0;
+    return 0;
+}
+
+int
+gntmap_munmap(struct gntmap *map, unsigned long start_address, int count)
+{
+    int i, rc;
+    struct gntmap_entry *ent;
+
+#ifdef GNTMAP_DEBUG
+    printk("gntmap_munmap(map=%p, start_address=%lx, count=%d)\n",
+           map, start_address, count);
+#endif
+
+    for (i = 0; i < count; i++) {
+        ent = gntmap_find_entry(map, start_address + PAGE_SIZE * i);
+        if (ent == NULL) {
+            printk("gntmap: tried to munmap unknown page\n");
+            return -EINVAL;
+        }
+
+        rc = _gntmap_unmap_grant_ref(ent);
+        if (rc != 0)
+            return rc;
+    }
+
+    return 0;
+}
+
+void*
+gntmap_map_grant_refs(struct gntmap *map, 
+                      uint32_t count,
+                      uint32_t *domids,
+                      int domids_stride,
+                      uint32_t *refs,
+                      int writable)
+{
+    unsigned long addr;
+    struct gntmap_entry *ent;
+    int i;
+
+#ifdef GNTMAP_DEBUG
+    printk("gntmap_map_grant_refs(map=%p, count=%" PRIu32 ", "
+           "domids=%p [%" PRIu32 "...], domids_stride=%d, "
+           "refs=%p [%" PRIu32 "...], writable=%d)\n",
+           map, count,
+           domids, domids == NULL ? 0 : domids[0], domids_stride,
+           refs, refs == NULL ? 0 : refs[0], writable);
+#endif
+
+    (void) gntmap_set_max_grants(map, DEFAULT_MAX_GRANTS);
+
+    addr = allocate_ondemand((unsigned long) count, 1);
+    if (addr == 0)
+        return NULL;
+
+    for (i = 0; i < count; i++) {
+        ent = gntmap_find_free_entry(map);
+        if (ent == NULL ||
+            _gntmap_map_grant_ref(ent,
+                                  addr + PAGE_SIZE * i,
+                                  domids[i * domids_stride],
+                                  refs[i],
+                                  writable) != 0) {
+
+            (void) gntmap_munmap(map, addr, i);
+            return NULL;
+        }
+    }
+
+    return (void*) addr;
+}
+
+void
+gntmap_init(struct gntmap *map)
+{
+#ifdef GNTMAP_DEBUG
+    printk("gntmap_init(map=%p)\n", map);
+#endif
+    map->nentries = 0;
+    map->entries = NULL;
+}
+
+void
+gntmap_fini(struct gntmap *map)
+{
+    struct gntmap_entry *ent;
+    int i;
+
+#ifdef GNTMAP_DEBUG
+    printk("gntmap_fini(map=%p)\n", map);
+#endif
+
+    for (i = 0; i < map->nentries; i++) {
+        ent = &map->entries[i];
+        if (gntmap_entry_used(ent))
+            (void) _gntmap_unmap_grant_ref(ent);
+    }
+
+    xfree(map->entries);
+    map->entries = NULL;
+    map->nentries = 0;
+}
diff -r 7affdebb7a1e -r a39913db6e51 extras/mini-os/include/fs.h
--- a/extras/mini-os/include/fs.h       Thu Aug 07 11:47:34 2008 +0900
+++ b/extras/mini-os/include/fs.h       Thu Aug 07 11:57:34 2008 +0900
@@ -4,6 +4,9 @@
 #include <xen/io/fsif.h>
 #include <mini-os/semaphore.h>
 #include <mini-os/types.h>
+
+#define FSIF_RING_SIZE_ORDER   1
+#define FSIF_RING_SIZE_PAGES   (1<<FSIF_RING_SIZE_ORDER)
 
 struct fs_import 
 {
@@ -14,7 +17,7 @@ struct fs_import
     unsigned int nr_entries;        /* Number of entries in rings & request
                                        array                                */
     struct fsif_front_ring ring;    /* frontend ring (contains shared ring) */
-    int gnt_ref;                    /* grant reference to the shared ring   */
+    u32 gnt_refs[FSIF_RING_SIZE_PAGES];  /* grant references to the shared 
ring  */
     evtchn_port_t local_port;       /* local event channel port             */
     char *backend;                  /* XenBus location of the backend       */
     struct fs_request *requests;    /* Table of requests                    */
diff -r 7affdebb7a1e -r a39913db6e51 extras/mini-os/include/gntmap.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/extras/mini-os/include/gntmap.h   Thu Aug 07 11:57:34 2008 +0900
@@ -0,0 +1,35 @@
+#ifndef __GNTMAP_H__
+#define __GNTMAP_H__
+
+#include <os.h>
+
+/*
+ * Please consider struct gntmap opaque. If instead you choose to disregard
+ * this message, I insist that you keep an eye out for raptors.
+ */
+struct gntmap {
+    int nentries;
+    struct gntmap_entry *entries;
+};
+
+int
+gntmap_set_max_grants(struct gntmap *map, int count);
+
+int
+gntmap_munmap(struct gntmap *map, unsigned long start_address, int count);
+
+void*
+gntmap_map_grant_refs(struct gntmap *map, 
+                      uint32_t count,
+                      uint32_t *domids,
+                      int domids_stride,
+                      uint32_t *refs,
+                      int writable);
+
+void
+gntmap_init(struct gntmap *map);
+
+void
+gntmap_fini(struct gntmap *map);
+
+#endif /* !__GNTMAP_H__ */
diff -r 7affdebb7a1e -r a39913db6e51 extras/mini-os/include/lib.h
--- a/extras/mini-os/include/lib.h      Thu Aug 07 11:47:34 2008 +0900
+++ b/extras/mini-os/include/lib.h      Thu Aug 07 11:57:34 2008 +0900
@@ -59,6 +59,7 @@
 #include <stddef.h>
 #include <xen/xen.h>
 #include <xen/event_channel.h>
+#include "gntmap.h"
 
 #ifdef HAVE_LIBC
 #include <stdio.h>
@@ -138,6 +139,7 @@ enum fd_type {
     FTYPE_XENBUS,
     FTYPE_XC,
     FTYPE_EVTCHN,
+    FTYPE_GNTMAP,
     FTYPE_SOCKET,
     FTYPE_TAP,
     FTYPE_BLK,
@@ -168,6 +170,7 @@ extern struct file {
                 int bound;
             } ports[MAX_EVTCHN_PORTS];
        } evtchn;
+       struct gntmap gntmap;
        struct {
            struct netfront_dev *dev;
        } tap;
diff -r 7affdebb7a1e -r a39913db6e51 extras/mini-os/lib/sys.c
--- a/extras/mini-os/lib/sys.c  Thu Aug 07 11:47:34 2008 +0900
+++ b/extras/mini-os/lib/sys.c  Thu Aug 07 11:57:34 2008 +0900
@@ -84,6 +84,7 @@
 #define NOFILE 32
 extern int xc_evtchn_close(int fd);
 extern int xc_interface_close(int fd);
+extern int xc_gnttab_close(int fd);
 
 pthread_mutex_t fd_lock = PTHREAD_MUTEX_INITIALIZER;
 struct file files[NOFILE] = {
@@ -230,8 +231,8 @@ int read(int fd, void *buf, size_t nbyte
         }
        case FTYPE_FILE: {
            ssize_t ret;
-           if (nbytes > PAGE_SIZE)
-               nbytes = PAGE_SIZE;
+           if (nbytes > PAGE_SIZE * FSIF_NR_READ_GNTS)
+               nbytes = PAGE_SIZE * FSIF_NR_READ_GNTS;
            ret = fs_read(fs_import, files[fd].file.fd, buf, nbytes, 
files[fd].file.offset);
            if (ret > 0) {
                files[fd].file.offset += ret;
@@ -291,8 +292,8 @@ int write(int fd, const void *buf, size_
            return nbytes;
        case FTYPE_FILE: {
            ssize_t ret;
-           if (nbytes > PAGE_SIZE)
-               nbytes = PAGE_SIZE;
+           if (nbytes > PAGE_SIZE * FSIF_NR_WRITE_GNTS)
+               nbytes = PAGE_SIZE * FSIF_NR_WRITE_GNTS;
            ret = fs_write(fs_import, files[fd].file.fd, (void *) buf, nbytes, 
files[fd].file.offset);
            if (ret > 0) {
                files[fd].file.offset += ret;
@@ -401,6 +402,9 @@ int close(int fd)
        case FTYPE_EVTCHN:
             xc_evtchn_close(fd);
             return 0;
+       case FTYPE_GNTMAP:
+           xc_gnttab_close(fd);
+           return 0;
        case FTYPE_TAP:
            shutdown_netfront(files[fd].tap.dev);
            files[fd].type = FTYPE_NONE;
diff -r 7affdebb7a1e -r a39913db6e51 extras/mini-os/minios.mk
--- a/extras/mini-os/minios.mk  Thu Aug 07 11:47:34 2008 +0900
+++ b/extras/mini-os/minios.mk  Thu Aug 07 11:57:34 2008 +0900
@@ -21,6 +21,7 @@ DEF_CFLAGS += -g
 #DEF_CFLAGS += -DFS_DEBUG
 #DEF_CFLAGS += -DLIBC_DEBUG
 DEF_CFLAGS += -DGNT_DEBUG
+DEF_CFLAGS += -DGNTMAP_DEBUG
 else
 DEF_CFLAGS += -O3
 endif
diff -r 7affdebb7a1e -r a39913db6e51 extras/mini-os/pcifront.c
--- a/extras/mini-os/pcifront.c Thu Aug 07 11:47:34 2008 +0900
+++ b/extras/mini-os/pcifront.c Thu Aug 07 11:57:34 2008 +0900
@@ -57,19 +57,26 @@ struct pcifront_dev *init_pcifront(char 
     int retry=0;
     char* msg;
     char* nodename = _nodename ? _nodename : "device/pci/0";
+    int dom;
 
     struct pcifront_dev *dev;
 
     char path[strlen(nodename) + 1 + 10 + 1];
 
     printk("******************* PCIFRONT for %s **********\n\n\n", nodename);
+
+    snprintf(path, sizeof(path), "%s/backend-id", nodename);
+    dom = xenbus_read_integer(path); 
+    if (dom == -1) {
+        printk("no backend\n");
+        return NULL;
+    }
 
     dev = malloc(sizeof(*dev));
     memset(dev, 0, sizeof(*dev));
     dev->nodename = strdup(nodename);
-
-    snprintf(path, sizeof(path), "%s/backend-id", nodename);
-    dev->dom = xenbus_read_integer(path); 
+    dev->dom = dom;
+
     evtchn_alloc_unbound(dev->dom, pcifront_handler, dev, &dev->evtchn);
 
     dev->info = (struct xen_pci_sharedinfo*) alloc_page();
diff -r 7affdebb7a1e -r a39913db6e51 stubdom/Makefile
--- a/stubdom/Makefile  Thu Aug 07 11:47:34 2008 +0900
+++ b/stubdom/Makefile  Thu Aug 07 11:57:34 2008 +0900
@@ -2,18 +2,23 @@ MINI_OS = $(XEN_ROOT)/extras/mini-os
 MINI_OS = $(XEN_ROOT)/extras/mini-os
 
 export XEN_OS=MiniOS
-
-CONFIG_QEMU=ioemu
 
 export stubdom=y
 export debug=y
 include $(XEN_ROOT)/Config.mk
 
+override CONFIG_QEMU=ioemu
+
 IOEMU_OPTIONS=--disable-sdl --disable-opengl --disable-gfx-check 
--disable-vnc-tls --disable-brlapi --disable-kqemu
+ZLIB_URL?=http://www.zlib.net
 ZLIB_VERSION=1.2.3
+LIBPCI_URL?=http://www.kernel.org/pub/software/utils/pciutils
 LIBPCI_VERSION=2.2.9
+NEWLIB_URL?=ftp://sources.redhat.com/pub/newlib
 NEWLIB_VERSION=1.16.0
+LWIP_URL?=http://download.savannah.gnu.org/releases/lwip
 LWIP_VERSION=1.3.0
+GRUB_URL?=http://alpha.gnu.org/gnu/grub
 GRUB_VERSION=0.97
 
 WGET=wget -c
@@ -75,7 +80,7 @@ endif
 ##############
 
 newlib-$(NEWLIB_VERSION).tar.gz:
-       $(WGET) ftp://sources.redhat.com/pub/newlib/$@
+       $(WGET) $(NEWLIB_URL)/$@
 
 newlib-$(NEWLIB_VERSION): newlib-$(NEWLIB_VERSION).tar.gz
        tar xzf $<
@@ -97,7 +102,7 @@ cross-newlib: $(NEWLIB_STAMPFILE)
 ############
 
 zlib-$(ZLIB_VERSION).tar.gz:
-       $(WGET) http://www.zlib.net/$@
+       $(WGET) $(ZLIB_URL)/$@
 
 ZLIB_STAMPFILE=$(CROSS_ROOT)/$(GNU_TARGET_ARCH)-xen-elf/lib/libz.a
 .PHONY: cross-zlib
@@ -114,7 +119,7 @@ cross-zlib: $(ZLIB_STAMPFILE)
 ##############
 
 pciutils-$(LIBPCI_VERSION).tar.bz2:
-       $(WGET) http://www.kernel.org/pub/software/utils/pciutils/$@
+       $(WGET) $(LIBPCI_URL)/$@
 
 pciutils-$(LIBPCI_VERSION): pciutils-$(LIBPCI_VERSION).tar.bz2
        tar xjf $<
@@ -132,7 +137,7 @@ cross-libpci: $(LIBPCI_STAMPFILE)
          $(MAKE) CC="$(CC) $(TARGET_CPPFLAGS) $(TARGET_CFLAGS) -I$(realpath 
$(MINI_OS)/include)" lib/libpci.a && \
          $(INSTALL_DATA) lib/libpci.a 
$(CROSS_PREFIX)/$(GNU_TARGET_ARCH)-xen-elf/lib/ && \
          $(INSTALL_DIR) $(CROSS_PREFIX)/$(GNU_TARGET_ARCH)-xen-elf/include/pci 
&& \
-         $(INSTALL_DATA) lib/{config,header,pci,types}.h 
$(CROSS_PREFIX)/$(GNU_TARGET_ARCH)-xen-elf/include/pci/ \
+         $(INSTALL_DATA) lib/config.h lib/header.h lib/pci.h lib/types.h 
$(CROSS_PREFIX)/$(GNU_TARGET_ARCH)-xen-elf/include/pci/ \
        )
 
 ######
@@ -140,7 +145,7 @@ cross-libpci: $(LIBPCI_STAMPFILE)
 ######
 
 lwip-$(LWIP_VERSION).tar.gz:
-       $(WGET) http://download.savannah.gnu.org/releases/lwip/$@
+       $(WGET) $(LWIP_URL)/$@
 
 lwip: lwip-$(LWIP_VERSION).tar.gz
        tar xzf $<
@@ -154,7 +159,6 @@ lwip: lwip-$(LWIP_VERSION).tar.gz
 .PHONY: $(CROSS_ROOT)
 $(CROSS_ROOT): cross-newlib cross-zlib cross-libpci
 
-.PHONY: mk-headers
 mk-headers:
        mkdir -p include/xen && \
           ln -sf $(addprefix ../../,$(wildcard 
$(XEN_ROOT)/xen/include/public/*.h)) include/xen && \
@@ -191,6 +195,7 @@ endif
        [ ! -h ioemu/config-host.h ] || rm -f ioemu/config-host.h
        [ ! -h ioemu/config-host.mak ] || rm -f ioemu/config-host.mak
        $(MAKE) -C $(MINI_OS) links
+       touch mk-headers
 
 TARGETS_MINIOS=$(addprefix mini-os-,$(TARGETS))
 $(TARGETS_MINIOS): mini-os-%:
@@ -247,7 +252,7 @@ c: $(CROSS_ROOT)
 ######
 
 grub-$(GRUB_VERSION).tar.gz:
-       $(WGET) ftp://alpha.gnu.org/gnu/grub/$@
+       $(WGET) $(GRUB_URL)/$@
 
 grub-upstream: grub-$(GRUB_VERSION).tar.gz
        tar xzf $<
@@ -291,20 +296,24 @@ pv-grub: mini-os-grub libxc grub
 #########
 
 ifeq ($(STUBDOM_SUPPORTED),1)
-install: install-ioemu install-grub
+install: install-readme install-ioemu install-grub
 else
 install:
 endif
+
+install-readme:
+       $(INSTALL_DIR) $(DESTDIR)$(DOCDIR)
+       $(INSTALL_DATA) README $(DESTDIR)$(DOCDIR)/README.stubdom
 
 install-ioemu: ioemu-stubdom
        $(INSTALL_DIR) "$(DESTDIR)/usr/lib/xen/bin"
        $(INSTALL_PROG) stubdom-dm "$(DESTDIR)/usr/lib/xen/bin"
        $(INSTALL_DIR) "$(DESTDIR)/usr/lib/xen/boot"
-       $(INSTALL_PROG) mini-os-ioemu/mini-os.gz 
"$(DESTDIR)/usr/lib/xen/boot/ioemu-stubdom.gz"
+       $(INSTALL_DATA) mini-os-ioemu/mini-os.gz 
"$(DESTDIR)/usr/lib/xen/boot/ioemu-stubdom.gz"
 
 install-grub: pv-grub
        $(INSTALL_DIR) "$(DESTDIR)/usr/lib/xen/boot"
-       $(INSTALL_PROG) mini-os-grub/mini-os.gz 
"$(DESTDIR)/usr/lib/xen/boot/pv-grub.gz"
+       $(INSTALL_DATA) mini-os-grub/mini-os.gz 
"$(DESTDIR)/usr/lib/xen/boot/pv-grub.gz"
 
 #######
 # clean
@@ -320,7 +329,8 @@ clean:
        $(MAKE) -C caml clean
        $(MAKE) -C c clean
        $(MAKE) -C grub clean
-       rm -fr libxc ioemu mini-os include
+       [ ! -d libxc ] || $(MAKE) -C libxc clean
+       [ ! -d ioemu ] || $(MAKE) -C ioemu clean
 
 # clean the cross-compilation result
 .PHONY: crossclean
@@ -328,6 +338,8 @@ crossclean: clean
        rm -fr $(CROSS_ROOT)
        rm -fr newlib-build
        rm -fr zlib-$(ZLIB_VERSION) pciutils-$(LIBPCI_VERSION)
+       rm -fr libxc ioemu
+       rm -f mk-headers
 
 # clean patched sources
 .PHONY: patchclean
diff -r 7affdebb7a1e -r a39913db6e51 stubdom/README
--- a/stubdom/README    Thu Aug 07 11:47:34 2008 +0900
+++ b/stubdom/README    Thu Aug 07 11:57:34 2008 +0900
@@ -1,13 +1,3 @@ To compile
-To compile
-==========
-
-Just run make -j 4, that will download / patch / compile
-Then make install to install the result.
-
-Also, run make and make install in $XEN_ROOT/tools/fs-back
-
-
-
                                 IOEMU stubdom
                                 =============
 
@@ -15,6 +5,14 @@ Also, run make and make install in $XEN_
 
 General Configuration
 =====================
+
+Due to a race between the creation of the IOEMU stubdomain itself and 
allocation
+of video memory for the HVM domain, you need to avoid the need for ballooning,
+by using the hypervisor dom0_mem= option for instance.
+
+
+There is a sample configuration set in xmexample.hvm-stubdom and
+xmexample.hvm-dm
 
 In your HVM config "hvmconfig",
 
diff -r 7affdebb7a1e -r a39913db6e51 stubdom/stubdom-dm
--- a/stubdom/stubdom-dm        Thu Aug 07 11:47:34 2008 +0900
+++ b/stubdom/stubdom-dm        Thu Aug 07 11:57:34 2008 +0900
@@ -55,7 +55,7 @@ term() {
     kill %1
     (
        [ -n "$vncpid" ] && kill -9 $vncpid
-       xm destroy stubdom-$domname
+       xm destroy $domname-dm
        #xm destroy $domname
     ) &
     # We need to exit immediately so as to let xend do the commands above
@@ -67,12 +67,12 @@ trap term SIGHUP
 ############
 # stubdomain
 # Wait for any previous stubdom to terminate
-while xm list | grep stubdom-$domname
+while xm list | grep $domname-dm
 do
        sleep 1
 done
 
-creation="xm create -c stubdom-$domname target=$domid memory=32 
extra=\"$extra\""
+creation="xm create -c $domname-dm target=$domid memory=32 extra=\"$extra\""
 
 (while true ; do sleep 60 ; done) | /bin/sh -c "$creation" &
 #xterm -geometry +0+0 -e /bin/sh -c "$creation ; echo ; echo press ENTER to 
shut down ; read" &
diff -r 7affdebb7a1e -r a39913db6e51 tools/Makefile
--- a/tools/Makefile    Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/Makefile    Thu Aug 07 11:57:34 2008 +0900
@@ -55,13 +55,14 @@ clean distclean: subdirs-clean
 clean distclean: subdirs-clean
 
 ifneq ($(XEN_COMPILE_ARCH),$(XEN_TARGET_ARCH))
-IOEMU_CONFIGURE_CROSS ?= --cross-prefix=$(CROSS_COMPILE) \
+IOEMU_CONFIGURE_CROSS ?= --cpu=$(XEN_TARGET_ARCH) \
+                        --cross-prefix=$(CROSS_COMPILE) \
                         --interp-prefix=$(CROSS_SYS_ROOT)
 endif
 
 ioemu/config-host.mak:
-       cd ioemu && XEN_TARGET_ARCH=$(XEN_TARGET_ARCH) sh configure 
--prefix=/usr \
-               $(IOEMU_CONFIGURE_CROSS)
+       cd ioemu && XEN_TARGET_ARCH=$(XEN_TARGET_ARCH) sh configure \
+               --prefix=$(PREFIX) $(IOEMU_CONFIGURE_CROSS)
 
 subdir-all-ioemu subdir-install-ioemu: ioemu/config-host.mak
 
@@ -78,6 +79,12 @@ ioemu-dir-find:
                        rm -rf ioemu-remote ioemu-remote.tmp; \
                        mkdir ioemu-remote.tmp; rmdir ioemu-remote.tmp; \
                        $(GIT) clone $(CONFIG_QEMU) ioemu-remote.tmp; \
+                       if [ "$(QEMU_TAG)" ]; then                      \
+                               cd ioemu-remote.tmp;                    \
+                               $(GIT) branch -D dummy >/dev/null 2>&1 ||:; \
+                               $(GIT) checkout -b dummy $(QEMU_TAG);   \
+                               cd ..;                                  \
+                       fi;                                             \
                        mv ioemu-remote.tmp ioemu-remote; \
                fi; \
                rm -f ioemu-dir; \
@@ -90,7 +97,7 @@ ioemu-dir-find:
                esac; \
                export XEN_ROOT; \
                cd ioemu-dir; \
-               ./xen-setup
+               ./xen-setup $(IOEMU_CONFIGURE_CROSS)
 
 subdir-all-ioemu-dir subdir-install-ioemu-dir: ioemu-dir-find
 
diff -r 7affdebb7a1e -r a39913db6e51 tools/blktap/Makefile
--- a/tools/blktap/Makefile     Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/blktap/Makefile     Thu Aug 07 11:57:34 2008 +0900
@@ -8,3 +8,6 @@ SUBDIRS-y += drivers
 .PHONY: all clean install
 all clean install: %: subdirs-%
 
+install:
+       $(INSTALL_DIR) $(DESTDIR)$(DOCDIR)
+       $(INSTALL_DATA) README $(DESTDIR)$(DOCDIR)/README.blktap
diff -r 7affdebb7a1e -r a39913db6e51 tools/blktap/lib/Makefile
--- a/tools/blktap/lib/Makefile Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/blktap/lib/Makefile Thu Aug 07 11:57:34 2008 +0900
@@ -43,7 +43,7 @@ install: all
 
 .PHONY: clean
 clean:
-       rm -rf *.a *.so* *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS
+       rm -rf *.a *.so* *.o *.opic *.rpm $(LIB) *~ $(DEPS) xen TAGS
 
 libblktap.so.$(MAJOR).$(MINOR): $(OBJS_PIC) 
        $(CC) $(CFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,$(SONAME) $(SHLIB_CFLAGS) \
diff -r 7affdebb7a1e -r a39913db6e51 tools/console/Makefile
--- a/tools/console/Makefile    Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/console/Makefile    Thu Aug 07 11:57:34 2008 +0900
@@ -21,7 +21,7 @@ clean:
 
 xenconsoled: $(patsubst %.c,%.o,$(wildcard daemon/*.c))
        $(CC) $(CFLAGS) $^ -o $@ $(LDFLAGS) \
-              $(UTIL_LIBS) $(SOCKET_LIBS)
+              $(UTIL_LIBS) $(SOCKET_LIBS) -lrt
 
 xenconsole: $(patsubst %.c,%.o,$(wildcard client/*.c))
        $(CC) $(CFLAGS) $^ -o $@ $(LDFLAGS) \
diff -r 7affdebb7a1e -r a39913db6e51 tools/console/daemon/io.c
--- a/tools/console/daemon/io.c Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/console/daemon/io.c Thu Aug 07 11:57:34 2008 +0900
@@ -622,9 +622,9 @@ static struct domain *create_domain(int 
 {
        struct domain *dom;
        char *s;
-       struct timeval tv;
-
-       if (gettimeofday(&tv, NULL) < 0) {
+       struct timespec ts;
+
+       if (clock_gettime(CLOCK_MONOTONIC, &ts) < 0) {
                dolog(LOG_ERR, "Cannot get time of day %s:%s:L%d",
                      __FILE__, __FUNCTION__, __LINE__);
                return NULL;
@@ -666,7 +666,7 @@ static struct domain *create_domain(int 
        dom->buffer.capacity = 0;
        dom->buffer.max_capacity = 0;
        dom->event_count = 0;
-       dom->next_period = (tv.tv_sec * 1000) + (tv.tv_usec / 1000) + 
RATE_LIMIT_PERIOD;
+       dom->next_period = (ts.tv_sec * 1000) + (ts.tv_nsec / 1000000) + 
RATE_LIMIT_PERIOD;
        dom->next = NULL;
 
        dom->ring_ref = -1;
@@ -971,7 +971,7 @@ void handle_io(void)
                struct domain *d, *n;
                int max_fd = -1;
                struct timeval timeout;
-               struct timeval tv;
+               struct timespec ts;
                long long now, next_timeout = 0;
 
                FD_ZERO(&readfds);
@@ -985,9 +985,9 @@ void handle_io(void)
                        max_fd = MAX(xc_evtchn_fd(xce_handle), max_fd);
                }
 
-               if (gettimeofday(&tv, NULL) < 0)
+               if (clock_gettime(CLOCK_MONOTONIC, &ts) < 0)
                        return;
-               now = (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+               now = (ts.tv_sec * 1000) + (ts.tv_nsec / 1000000);
 
                /* Re-calculate any event counter allowances & unblock
                   domains with new allowance */
diff -r 7affdebb7a1e -r a39913db6e51 tools/examples/Makefile
--- a/tools/examples/Makefile   Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/examples/Makefile   Thu Aug 07 11:57:34 2008 +0900
@@ -8,11 +8,18 @@ XENDOMAINS_SYSCONFIG = init.d/sysconfig.
 
 # Xen configuration dir and configs to go there.
 XEN_CONFIG_DIR = /etc/xen
+XEN_READMES = README
+XEN_READMES += README.incompatibilities
 XEN_CONFIGS = xend-config.sxp
 XEN_CONFIGS += xm-config.xml
 XEN_CONFIGS += xmexample1 
 XEN_CONFIGS += xmexample2
+XEN_CONFIGS += xmexample3
 XEN_CONFIGS += xmexample.hvm
+XEN_CONFIGS += xmexample.hvm-stubdom
+XEN_CONFIGS += xmexample.hvm-dm
+XEN_CONFIGS += xmexample.pv-grub
+XEN_CONFIGS += xmexample.nbd
 XEN_CONFIGS += xmexample.vti
 XEN_CONFIGS += xend-pci-quirks.sxp
 XEN_CONFIGS += xend-pci-permissive.sxp
@@ -59,7 +66,16 @@ build:
 build:
 
 .PHONY: install
-install: all install-initd install-configs install-scripts $(HOTPLUGS)
+install: all install-readmes install-initd install-configs install-scripts 
$(HOTPLUGS)
+
+.PHONY: install-readmes
+install-readmes:
+       [ -d $(DESTDIR)$(XEN_CONFIG_DIR) ] || \
+               $(INSTALL_DIR) $(DESTDIR)$(XEN_CONFIG_DIR)
+       set -e; for i in $(XEN_READMES); \
+           do [ -e $(DESTDIR)$(XEN_CONFIG_DIR)/$$i ] || \
+           $(INSTALL_DATA) $$i $(DESTDIR)$(XEN_CONFIG_DIR); \
+       done
 
 .PHONY: install-initd
 install-initd:
diff -r 7affdebb7a1e -r a39913db6e51 tools/examples/README
--- a/tools/examples/README     Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/examples/README     Thu Aug 07 11:57:34 2008 +0900
@@ -44,4 +44,8 @@ xmexample.nbd       - configuration scri
 xmexample.nbd       - configuration script that uses NBD filesystems
 xmexample.hvm       - a configuration script for creating a hvm domain with
                       'xm create'
+xmexample.hvm-stubdom - a configuration script for creating a hvm domain with
+                        'xm create' that utilizes a stubdomain for device model
+xmexample.pv-grub   - a configuration script for creating a domain with 'xm 
create'
+                      which boots PV-GRUB.
 xmexample.vti       - a configuration script for creating a domain on vti
diff -r 7affdebb7a1e -r a39913db6e51 tools/examples/stubdom-ExampleHVMDomain
--- a/tools/examples/stubdom-ExampleHVMDomain   Thu Aug 07 11:47:34 2008 +0900
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
-# Not to be started directly,
-# See xmexample.hvm-stubdom and stubdom/README for more details
-
-kernel = "/usr/lib/xen/boot/ioemu-stubdom.gz"
-
-# Must be the same as in xmexample.hvm-stubdom, with a prepended vif for TCP/IP
-# networking in the stubdomain itself, here just ''
-vif = [ '', 'type=ioemu, bridge=xenbr0' ]
-
-# Set here instead of in xmexample.hvm-stubdom
-disk = [ 'file:/var/images/min-el3-i386.img,hda,w', ',hdc:cdrom,r' ]
-
-# Actual output via PVFB
-vfb = [ 'type=sdl' ]
diff -r 7affdebb7a1e -r a39913db6e51 tools/examples/xend-config.sxp
--- a/tools/examples/xend-config.sxp    Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/examples/xend-config.sxp    Thu Aug 07 11:57:34 2008 +0900
@@ -245,3 +245,7 @@
 
 # Rotation count of qemu-dm log file.
 #(qemu-dm-logrotate-count 10)
+
+# Path where persistent domain configuration is stored.
+# Default is /var/lib/xend/domains/
+#(xend-domains-path /var/lib/xend/domains)
diff -r 7affdebb7a1e -r a39913db6e51 tools/examples/xmexample.hvm
--- a/tools/examples/xmexample.hvm      Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/examples/xmexample.hvm      Thu Aug 07 11:57:34 2008 +0900
@@ -156,11 +156,6 @@ vnc=1
 #----------------------------------------------------------------------------
 # try to find an unused port for the VNC server, default = 1
 #vncunused=1
-
-#----------------------------------------------------------------------------
-# enable spawning vncviewer for domain's console
-# (only valid when vnc=1), default = 0
-#vncconsole=0
 
 #----------------------------------------------------------------------------
 # set password for domain's VNC console
diff -r 7affdebb7a1e -r a39913db6e51 tools/examples/xmexample.hvm-dm
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/examples/xmexample.hvm-dm   Thu Aug 07 11:57:34 2008 +0900
@@ -0,0 +1,14 @@
+# Not to be started directly,
+# See xmexample.hvm-stubdom and stubdom/README for more details
+
+kernel = "/usr/lib/xen/boot/ioemu-stubdom.gz"
+
+# Must be the same as in xmexample.hvm-stubdom, with a prepended vif for TCP/IP
+# networking in the stubdomain itself, here just ''
+vif = [ '', 'type=ioemu, bridge=xenbr0' ]
+
+# Set here instead of in xmexample.hvm-stubdom
+disk = [ 'file:/var/images/min-el3-i386.img,hda,w', ',hdc:cdrom,r' ]
+
+# Actual output via PVFB
+vfb = [ 'type=sdl' ]
diff -r 7affdebb7a1e -r a39913db6e51 tools/examples/xmexample.hvm-stubdom
--- a/tools/examples/xmexample.hvm-stubdom      Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/examples/xmexample.hvm-stubdom      Thu Aug 07 11:57:34 2008 +0900
@@ -7,7 +7,7 @@
 #============================================================================
 #
 # This is a version using a stubdomain for device model, see
-# stubdom-ExampleHVMDomain and stubdom/README for more details
+# xmexample.hvm-dm and README.stubdom for more details
 # The differences with xmexample.hvm are marked with "STUBDOM"
 
 #----------------------------------------------------------------------------
@@ -30,7 +30,7 @@ memory = 128
 # shadow_memory = 8
 
 # A name for your domain. All domains must have different names.
-name = "ExampleHVMDomain"
+name = "xmexample.hvm"
 
 # 128-bit UUID for the domain.  The default behavior is to generate a new UUID
 # on each call to 'xm create'.
diff -r 7affdebb7a1e -r a39913db6e51 tools/examples/xmexample.pv-grub
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/examples/xmexample.pv-grub  Thu Aug 07 11:57:34 2008 +0900
@@ -0,0 +1,212 @@
+#  -*- mode: python; -*-
+#============================================================================
+# Python configuration setup for 'xm create'.
+# This script sets the parameters used when a domain is created using 'xm 
create'.
+# You use a separate script for each domain you want to create, or 
+# you can set the parameters for the domain on the xm command line.
+#============================================================================
+
+#----------------------------------------------------------------------------
+# PV GRUB image file.
+kernel = "/usr/lib/xen/boot/pv-grub.gz"
+
+# Optional provided menu.lst.
+#ramdisk = "/boot/guests/menu.lst"
+
+# Sets path to menu.lst
+extra = "(hd0,0)/boot/grub/menu.lst"
+# can be a TFTP-served path (DHCP will automatically be run)
+# extra = "(nd)/netboot/menu.lst"
+# can be configured automatically by GRUB's DHCP option 150 (see grub manual)
+# extra = ""
+
+# Initial memory allocation (in megabytes) for the new domain.
+#
+# WARNING: Creating a domain with insufficient memory may cause out of
+#          memory errors. The domain needs enough memory to boot kernel
+#          and modules. Allocating less than 32MBs is not recommended.
+memory = 64
+
+# A name for your domain. All domains must have different names.
+name = "ExampleDomain"
+
+# 128-bit UUID for the domain.  The default behavior is to generate a new UUID
+# on each call to 'xm create'.
+#uuid = "06ed00fe-1162-4fc4-b5d8-11993ee4a8b9"
+
+# List of which CPUS this domain is allowed to use, default Xen picks
+#cpus = ""         # leave to Xen to pick
+#cpus = "0"        # all vcpus run on CPU0
+#cpus = "0-3,5,^1" # all vcpus run on cpus 0,2,3,5
+#cpus = ["2", "3"] # VCPU0 runs on CPU2, VCPU1 runs on CPU3
+
+# Number of Virtual CPUS to use, default is 1
+#vcpus = 1
+
+#----------------------------------------------------------------------------
+# Define network interfaces.
+
+# By default, no network interfaces are configured.  You may have one created
+# with sensible defaults using an empty vif clause:
+#
+# vif = [ '' ]
+#
+# or optionally override backend, bridge, ip, mac, script, type, or vifname:
+#
+# vif = [ 'mac=00:16:3e:00:00:11, bridge=xenbr0' ]
+#
+# or more than one interface may be configured:
+#
+# vif = [ '', 'bridge=xenbr1' ]
+
+vif = [ '' ]
+
+#----------------------------------------------------------------------------
+# Define the disk devices you want the domain to have access to, and
+# what you want them accessible as.
+# Each disk entry is of the form phy:UNAME,DEV,MODE
+# where UNAME is the device, DEV is the device name the domain will see,
+# and MODE is r for read-only, w for read-write.
+
+disk = [ 'phy:hda1,hda1,w' ]
+
+#----------------------------------------------------------------------------
+# Define frame buffer device.
+#
+# By default, no frame buffer device is configured.
+#
+# To create one using the SDL backend and sensible defaults:
+#
+# vfb = [ 'type=sdl' ]
+#
+# This uses environment variables XAUTHORITY and DISPLAY.  You
+# can override that:
+#
+# vfb = [ 'type=sdl,xauthority=/home/bozo/.Xauthority,display=:1' ]
+#
+# To create one using the VNC backend and sensible defaults:
+#
+# vfb = [ 'type=vnc' ]
+#
+# The backend listens on 127.0.0.1 port 5900+N by default, where N is
+# the domain ID.  You can override both address and N:
+#
+# vfb = [ 'type=vnc,vnclisten=127.0.0.1,vncdisplay=1' ]
+#
+# Or you can bind the first unused port above 5900:
+#
+# vfb = [ 'type=vnc,vnclisten=0.0.0.0,vncunused=1' ]
+#
+# You can override the password:
+#
+# vfb = [ 'type=vnc,vncpasswd=MYPASSWD' ]
+#
+# Empty password disables authentication.  Defaults to the vncpasswd
+# configured in xend-config.sxp.
+
+#----------------------------------------------------------------------------
+# Define to which TPM instance the user domain should communicate.
+# The vtpm entry is of the form 'instance=INSTANCE,backend=DOM'
+# where INSTANCE indicates the instance number of the TPM the VM
+# should be talking to and DOM provides the domain where the backend
+# is located.
+# Note that no two virtual machines should try to connect to the same
+# TPM instance. The handling of all TPM instances does require
+# some management effort in so far that VM configration files (and thus
+# a VM) should be associated with a TPM instance throughout the lifetime
+# of the VM / VM configuration file. The instance number must be
+# greater or equal to 1.
+#vtpm = [ 'instance=1,backend=0' ]
+
+#----------------------------------------------------------------------------
+# Set the kernel command line for the new domain.
+# You only need to define the IP parameters and hostname if the domain's
+# IP config doesn't, e.g. in ifcfg-eth0 or via DHCP.
+# You can use 'extra' to set the runlevel and custom environment
+# variables used by custom rc scripts (e.g. VMID=, usr= ).
+
+# Set if you want dhcp to allocate the IP address.
+#dhcp="dhcp"
+# Set netmask.
+#netmask=
+# Set default gateway.
+#gateway=
+# Set the hostname.
+#hostname= "vm%d" % vmid
+
+# Set root device.
+root = "/dev/hda1 ro"
+
+# Root device for nfs.
+#root = "/dev/nfs"
+# The nfs server.
+#nfs_server = '192.0.2.1'  
+# Root directory on the nfs server.
+#nfs_root   = '/full/path/to/root/directory'
+
+#----------------------------------------------------------------------------
+# Configure the behaviour when a domain exits.  There are three 'reasons'
+# for a domain to stop: poweroff, reboot, and crash.  For each of these you
+# may specify:
+#
+#   "destroy",        meaning that the domain is cleaned up as normal;
+#   "restart",        meaning that a new domain is started in place of the old
+#                     one;
+#   "preserve",       meaning that no clean-up is done until the domain is
+#                     manually destroyed (using xm destroy, for example); or
+#   "rename-restart", meaning that the old domain is not cleaned up, but is
+#                     renamed and a new domain started in its place.
+#
+# In the event a domain stops due to a crash, you have the additional options:
+#
+#   "coredump-destroy", meaning dump the crashed domain's core and then 
destroy;
+#   "coredump-restart', meaning dump the crashed domain's core and the restart.
+#
+# The default is
+#
+#   on_poweroff = 'destroy'
+#   on_reboot   = 'restart'
+#   on_crash    = 'restart'
+#
+# For backwards compatibility we also support the deprecated option restart
+#
+# restart = 'onreboot' means on_poweroff = 'destroy'
+#                            on_reboot   = 'restart'
+#                            on_crash    = 'destroy'
+#
+# restart = 'always'   means on_poweroff = 'restart'
+#                            on_reboot   = 'restart'
+#                            on_crash    = 'restart'
+#
+# restart = 'never'    means on_poweroff = 'destroy'
+#                            on_reboot   = 'destroy'
+#                            on_crash    = 'destroy'
+
+#on_poweroff = 'destroy'
+#on_reboot   = 'restart'
+#on_crash    = 'restart'
+
+#-----------------------------------------------------------------------------
+#   Configure PVSCSI devices:
+#
+#vscsi=[ 'PDEV, VDEV' ]
+#
+#   PDEV   gives physical SCSI device to be attached to specified guest
+#          domain by one of the following identifier format.
+#          - XX:XX:XX:XX (4-tuples with decimal notation which shows
+#                          "host:channel:target:lun")
+#          - /dev/sdxx or sdx
+#          - /dev/stxx or stx
+#          - /dev/sgxx or sgx
+#          - result of 'scsi_id -gu -s'.
+#            ex. # scsi_id -gu -s /block/sdb
+#                  36000b5d0006a0000006a0257004c0000
+#
+#   VDEV   gives virtual SCSI device by 4-tuples (XX:XX:XX:XX) as 
+#          which the specified guest domain recognize.
+#
+
+#vscsi = [ '/dev/sdx, 0:0:0:0' ]
+
+#============================================================================
+
diff -r 7affdebb7a1e -r a39913db6e51 tools/examples/xmexample.vti
--- a/tools/examples/xmexample.vti      Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/examples/xmexample.vti      Thu Aug 07 11:57:34 2008 +0900
@@ -95,11 +95,6 @@ vnc=0
 #vncunused=1
 
 #----------------------------------------------------------------------------
-# enable spawning vncviewer for domain's console
-# (only valid when vnc=1), default = 0
-#vncconsole=0
-
-#----------------------------------------------------------------------------
 # set password for domain's VNC console
 # default is depents on vncpasswd in xend-config.sxp
 vncpasswd=''
diff -r 7affdebb7a1e -r a39913db6e51 tools/examples/xmexample3
--- a/tools/examples/xmexample3 Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/examples/xmexample3 Thu Aug 07 11:57:34 2008 +0900
@@ -207,4 +207,26 @@ extra = "4 VMID=%d" % vmid
 #on_reboot   = 'restart'
 #on_crash    = 'restart'
 
-#============================================================================
+#-----------------------------------------------------------------------------
+#   Configure PVSCSI devices:
+#
+#vscsi=[ 'PDEV, VDEV' ]
+#
+#   PDEV   gives physical SCSI device to be attached to specified guest
+#          domain by one of the following identifier format.
+#          - XX:XX:XX:XX (4-tuples with decimal notation which shows
+#                          "host:channel:target:lun")
+#          - /dev/sdxx or sdx
+#          - /dev/stxx or stx
+#          - /dev/sgxx or sgx
+#          - result of 'scsi_id -gu -s'.
+#            ex. # scsi_id -gu -s /block/sdb
+#                  36000b5d0006a0000006a0257004c0000
+#
+#   VDEV   gives virtual SCSI device by 4-tuples (XX:XX:XX:XX) as 
+#          which the specified guest domain recognize.
+#
+
+#vscsi = [ '/dev/sdx, 0:0:0:0' ]
+
+#============================================================================
diff -r 7affdebb7a1e -r a39913db6e51 tools/firmware/extboot/Makefile
--- a/tools/firmware/extboot/Makefile   Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/firmware/extboot/Makefile   Thu Aug 07 11:57:34 2008 +0900
@@ -1,7 +1,5 @@ XEN_ROOT = ../../..
 XEN_ROOT = ../../..
 include $(XEN_ROOT)/tools/firmware/Rules.mk
-
-CFLAGS += -I$(XEN_ROOT)/tools/libxc -I.
 
 .PHONY: all
 all: extboot.bin
diff -r 7affdebb7a1e -r a39913db6e51 tools/firmware/hvmloader/Makefile
--- a/tools/firmware/hvmloader/Makefile Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/firmware/hvmloader/Makefile Thu Aug 07 11:57:34 2008 +0900
@@ -33,14 +33,14 @@ OBJS  = $(patsubst %.c,%.o,$(SRCS))
 OBJS  = $(patsubst %.c,%.o,$(SRCS))
 
 .PHONY: all
-all: hvmloader
+all: subdirs-all
+       $(MAKE) hvmloader
 
 hvmloader.o: roms.h
 smbios.o: CFLAGS += -D__SMBIOS_DATE__="\"$(shell date +%m/%d/%Y)\""
 
-hvmloader: subdirs-all $(OBJS)
-       $(LD) $(LDFLAGS_DIRECT) -N -Ttext $(LOADADDR) \
-               -o hvmloader.tmp $(OBJS) acpi/acpi.a
+hvmloader: $(OBJS) acpi/acpi.a
+       $(LD) $(LDFLAGS_DIRECT) -N -Ttext $(LOADADDR) -o hvmloader.tmp $^
        $(OBJCOPY) hvmloader.tmp hvmloader
        rm -f hvmloader.tmp
 
diff -r 7affdebb7a1e -r a39913db6e51 tools/firmware/hvmloader/acpi/acpi2_0.h
--- a/tools/firmware/hvmloader/acpi/acpi2_0.h   Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/firmware/hvmloader/acpi/acpi2_0.h   Thu Aug 07 11:57:34 2008 +0900
@@ -381,7 +381,7 @@ struct acpi_20_madt_intsrcovr {
 
 #pragma pack ()
 
-int acpi_build_tables(uint8_t *);
+void acpi_build_tables(void);
 
 #endif /* _ACPI_2_0_H_ */
 
diff -r 7affdebb7a1e -r a39913db6e51 tools/firmware/hvmloader/acpi/build.c
--- a/tools/firmware/hvmloader/acpi/build.c     Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/firmware/hvmloader/acpi/build.c     Thu Aug 07 11:57:34 2008 +0900
@@ -248,8 +248,7 @@ static int construct_secondary_tables(ui
     return align16(offset);
 }
 
-/* Copy all the ACPI table to buffer. */
-int acpi_build_tables(uint8_t *buf)
+static void __acpi_build_tables(uint8_t *buf, int *low_sz, int *high_sz)
 {
     struct acpi_20_rsdp *rsdp;
     struct acpi_20_rsdt *rsdt;
@@ -261,7 +260,9 @@ int acpi_build_tables(uint8_t *buf)
     unsigned long        secondary_tables[16];
     int                  offset = 0, i;
 
-    offset += construct_bios_info_table(&buf[offset]);
+    /*
+     * Fill in high-memory data structures, starting at @buf.
+     */
 
     facs = (struct acpi_20_facs *)&buf[offset];
     memcpy(facs, &Facs, sizeof(struct acpi_20_facs));
@@ -325,7 +326,18 @@ int acpi_build_tables(uint8_t *buf)
                  offsetof(struct acpi_header, checksum),
                  rsdt->header.length);
 
+    *high_sz = offset;
+
+    /*
+     * Fill in low-memory data structures: bios_info_table and RSDP.
+     */
+
+    buf = (uint8_t *)ACPI_PHYSICAL_ADDRESS;
+    offset = 0;
+
+    offset += construct_bios_info_table(&buf[offset]);
     rsdp = (struct acpi_20_rsdp *)&buf[offset];
+
     memcpy(rsdp, &Rsdp, sizeof(struct acpi_20_rsdp));
     offset += align16(sizeof(struct acpi_20_rsdp));
     rsdp->rsdt_address = (unsigned long)rsdt;
@@ -337,7 +349,28 @@ int acpi_build_tables(uint8_t *buf)
                  offsetof(struct acpi_20_rsdp, extended_checksum),
                  sizeof(struct acpi_20_rsdp));
 
-    return offset;
+    *low_sz = offset;
+}
+
+void acpi_build_tables(void)
+{
+    int high_sz, low_sz;
+    uint8_t *buf;
+
+    /* Find out size of high-memory ACPI data area. */
+    buf = (uint8_t *)&_end;
+    __acpi_build_tables(buf, &low_sz, &high_sz);
+    memset(buf, 0, high_sz);
+
+    /* Allocate data area and set up ACPI tables there. */
+    buf = (uint8_t *)e820_malloc(high_sz);
+    __acpi_build_tables(buf, &low_sz, &high_sz);
+
+    printf(" - Lo data: %08lx-%08lx\n"
+           " - Hi data: %08lx-%08lx\n",
+           (unsigned long)ACPI_PHYSICAL_ADDRESS,
+           (unsigned long)ACPI_PHYSICAL_ADDRESS + low_sz - 1,
+           (unsigned long)buf, (unsigned long)buf + high_sz - 1);
 }
 
 /*
diff -r 7affdebb7a1e -r a39913db6e51 tools/firmware/hvmloader/hvmloader.c
--- a/tools/firmware/hvmloader/hvmloader.c      Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/firmware/hvmloader/hvmloader.c      Thu Aug 07 11:57:34 2008 +0900
@@ -449,7 +449,7 @@ static void init_xen_platform_io_base(vo
 
 int main(void)
 {
-    int acpi_sz = 0, vgabios_sz = 0, etherboot_sz = 0, rombios_sz, smbios_sz;
+    int vgabios_sz = 0, etherboot_sz = 0, rombios_sz, smbios_sz;
     int extboot_sz = 0;
 
     printf("HVM Loader\n");
@@ -508,8 +508,7 @@ int main(void)
     if ( get_acpi_enabled() )
     {
         printf("Loading ACPI ...\n");
-        acpi_sz = acpi_build_tables((uint8_t *)ACPI_PHYSICAL_ADDRESS);
-        ASSERT((ACPI_PHYSICAL_ADDRESS + acpi_sz) <= 0xF0000);
+        acpi_build_tables();
     }
 
     cmos_write_memory_size();
@@ -531,10 +530,6 @@ int main(void)
         printf(" %05x-%05x: SMBIOS tables\n",
                SMBIOS_PHYSICAL_ADDRESS,
                SMBIOS_PHYSICAL_ADDRESS + smbios_sz - 1);
-    if ( acpi_sz )
-        printf(" %05x-%05x: ACPI tables\n",
-               ACPI_PHYSICAL_ADDRESS,
-               ACPI_PHYSICAL_ADDRESS + acpi_sz - 1);
     if ( rombios_sz )
         printf(" %05x-%05x: Main BIOS\n",
                ROMBIOS_PHYSICAL_ADDRESS,
diff -r 7affdebb7a1e -r a39913db6e51 tools/firmware/hvmloader/util.h
--- a/tools/firmware/hvmloader/util.h   Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/firmware/hvmloader/util.h   Thu Aug 07 11:57:34 2008 +0900
@@ -145,4 +145,6 @@ void smp_initialise(void);
 
 #define isdigit(c) ((c) >= '0' && (c) <= '9')
 
+extern char _start[], _end[];
+
 #endif /* __HVMLOADER_UTIL_H__ */
diff -r 7affdebb7a1e -r a39913db6e51 tools/firmware/rombios/32bit/32bitbios.c
--- a/tools/firmware/rombios/32bit/32bitbios.c  Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/firmware/rombios/32bit/32bitbios.c  Thu Aug 07 11:57:34 2008 +0900
@@ -47,5 +47,7 @@ uint32_t jumptable[IDX_LAST+1] __attribu
 
        TABLE_ENTRY(IDX_TCPA_INITIALIZE_TPM, tcpa_initialize_tpm),
 
+       TABLE_ENTRY(IDX_GET_S3_WAKING_VECTOR, get_s3_waking_vector),
+
        TABLE_ENTRY(IDX_LAST       , 0)     /* keep last */
 };
diff -r 7affdebb7a1e -r a39913db6e51 tools/firmware/rombios/32bit/Makefile
--- a/tools/firmware/rombios/32bit/Makefile     Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/firmware/rombios/32bit/Makefile     Thu Aug 07 11:57:34 2008 +0900
@@ -4,21 +4,22 @@ SOURCES = util.c
 SOURCES = util.c
 TARGET = 32bitbios_flat.h
 
-CFLAGS += -I../ -DGCC_PROTOS
+CFLAGS += $(CFLAGS_include) -I.. -DGCC_PROTOS
 
 SUBDIRS = tcgbios
 
 MODULES = tcgbios/tcgbiosext.o
 
 .PHONY: all
-all: $(TARGET)
+all: subdirs-all
+       $(MAKE) $(TARGET)
 
 .PHONY: clean
 clean: subdirs-clean
        rm -rf *.o $(TARGET)
 
-$(TARGET): subdirs-all 32bitbios.o util.o
-       $(LD) $(LDFLAGS_DIRECT) -s -r 32bitbios.o $(MODULES) util.o -o 
32bitbios_all.o
+$(TARGET): 32bitbios.o $(MODULES) util.o
+       $(LD) $(LDFLAGS_DIRECT) -s -r $^ -o 32bitbios_all.o
        @nm 32bitbios_all.o |                                \
          egrep '^ +U ' >/dev/null && {                      \
            echo "There are undefined symbols in the BIOS:"; \
diff -r 7affdebb7a1e -r a39913db6e51 
tools/firmware/rombios/32bit/tcgbios/Makefile
--- a/tools/firmware/rombios/32bit/tcgbios/Makefile     Thu Aug 07 11:47:34 
2008 +0900
+++ b/tools/firmware/rombios/32bit/tcgbios/Makefile     Thu Aug 07 11:57:34 
2008 +0900
@@ -5,7 +5,7 @@ FILES   = tcgbios tpm_drivers
 FILES   = tcgbios tpm_drivers
 OBJECTS = $(foreach f,$(FILES),$(f).o)
 
-CFLAGS += -I../ -I../../ -DGCC_PROTOS
+CFLAGS += $(CFLAGS_include) -I.. -I../.. -DGCC_PROTOS
 
 .PHONY: all clean
 
diff -r 7affdebb7a1e -r a39913db6e51 
tools/firmware/rombios/32bit/tcgbios/tcgbios.c
--- a/tools/firmware/rombios/32bit/tcgbios/tcgbios.c    Thu Aug 07 11:47:34 
2008 +0900
+++ b/tools/firmware/rombios/32bit/tcgbios/tcgbios.c    Thu Aug 07 11:57:34 
2008 +0900
@@ -24,10 +24,9 @@
 #include "rombios_compat.h"
 #include "tpm_drivers.h"
 
+#include "util.h"
 #include "tcgbios.h"
 #include "32bitprotos.h"
-#include "util.h"
-
 
 /* local structure and variables */
 struct ptti_cust {
@@ -135,7 +134,7 @@ static inline uint32_t bswap(uint32_t a)
  *******************************************************/
 
 typedef struct {
-       struct acpi_20_tcpa *tcpa_ptr;
+       struct acpi_20_tcpa_clisrv *tcpa_ptr;
        unsigned char       *lasa_last_ptr;
        uint16_t            entry_count;
        uint16_t            flags;
@@ -260,45 +259,19 @@ uint8_t acpi_validate_entry(struct acpi_
 }
 
 
-/*
- * Search for the RSDP ACPI table in the memory starting at addr and
- * ending at addr + len - 1.
- */
-static struct acpi_20_rsdp *find_rsdp(const void *start, unsigned int len)
-{
-       char *rsdp = (char *)start;
-       char *end = rsdp + len;
-       /* scan memory in steps of 16 bytes */
-       while (rsdp < end) {
-               /* check for expected string */
-               if (!strncmp( rsdp, "RSD PTR ", 8))
-                       return (struct acpi_20_rsdp *)rsdp;
-               rsdp += 0x10;
-       }
-       return 0;
-}
-
 void tcpa_acpi_init(void)
 {
        struct acpi_20_rsdt *rsdt;
-       struct acpi_20_tcpa *tcpa = (void *)0;
+       struct acpi_20_tcpa_clisrv *tcpa = (void *)0;
        struct acpi_20_rsdp *rsdp;
        uint32_t length;
        uint16_t off;
        int found = 0;
-       uint16_t ebda_seg;
-
-       if (MA_IsTPMPresent() == 0) {
+
+       if (MA_IsTPMPresent() == 0)
                return;
-       }
-
-       /* RSDP in EBDA? */
-       ebda_seg = *(uint16_t *)ADDR_FROM_SEG_OFF(0x40, 0xe);
-       rsdp = find_rsdp((void *)(ebda_seg << 16), 1024);
-
-       if (!rsdp)
-               rsdp = find_rsdp((void *)(ACPI_SEGMENT << 4), 0x20000);
-
+
+       rsdp = find_rsdp();
        if (rsdp) {
                uint32_t ctr = 0;
                /* get RSDT from RSDP */
@@ -307,7 +280,7 @@ void tcpa_acpi_init(void)
                off = 36;
                while ((off + 3) < length) {
                        /* try all pointers to structures */
-                       tcpa = (struct acpi_20_tcpa *)rsdt->entry[ctr];
+                       tcpa = (struct acpi_20_tcpa_clisrv *)rsdt->entry[ctr];
                        /* valid TCPA ACPI table ? */
                        if (ACPI_2_0_TCPA_SIGNATURE == tcpa->header.signature
                            && acpi_validate_entry(&tcpa->header) == 0) {
@@ -398,7 +371,7 @@ unsigned char *tcpa_get_lasa_base_ptr(vo
 unsigned char *tcpa_get_lasa_base_ptr(void)
 {
        unsigned char *lasa = 0;
-       struct acpi_20_tcpa *tcpa = tcpa_acpi.tcpa_ptr;
+       struct acpi_20_tcpa_clisrv *tcpa = tcpa_acpi.tcpa_ptr;
        if (tcpa != 0) {
                uint32_t class = tcpa->platform_class;
                if (class == TCPA_ACPI_CLASS_CLIENT) {
@@ -416,7 +389,7 @@ uint32_t tcpa_get_laml(void)
 uint32_t tcpa_get_laml(void)
 {
        uint32_t laml = 0;
-       struct acpi_20_tcpa *tcpa = tcpa_acpi.tcpa_ptr;
+       struct acpi_20_tcpa_clisrv *tcpa = tcpa_acpi.tcpa_ptr;
        if (tcpa != 0) {
                uint32_t class = tcpa->platform_class;
                if (class == TCPA_ACPI_CLASS_CLIENT) {
diff -r 7affdebb7a1e -r a39913db6e51 
tools/firmware/rombios/32bit/tcgbios/tcgbios.h
--- a/tools/firmware/rombios/32bit/tcgbios/tcgbios.h    Thu Aug 07 11:47:34 
2008 +0900
+++ b/tools/firmware/rombios/32bit/tcgbios/tcgbios.h    Thu Aug 07 11:57:34 
2008 +0900
@@ -1,6 +1,5 @@
 #ifndef TCGBIOS_H
 #define TCGBIOS_H
-
 
 /* TCPA ACPI definitions */
 #define TCPA_ACPI_CLASS_CLIENT          0
@@ -117,14 +116,7 @@
 /* address of locality 0 (TIS) */
 #define TPM_TIS_BASE_ADDRESS        0xfed40000
 
-#define ASCII32(a,b,c,d)     ((((Bit32u)a) <<  0) | (((Bit32u)b) <<  8) | \
-                              (((Bit32u)c) << 16) | (((Bit32u)d) << 24)  )
-#define ACPI_2_0_TCPA_SIGNATURE ASCII32('T','C','P','A') /* "TCPA" */
-
-
 #define STATUS_FLAG_SHUTDOWN                (1 << 0)
-
-#define ACPI_SEGMENT    0xE000
 
 /* Input and Output blocks for the TCG BIOS commands */
 
@@ -232,37 +224,6 @@ struct pcpes
        uint32_t    event;
 } __attribute__((packed));
 
-
-struct acpi_header
-{
-       uint32_t signature;
-       uint32_t length;
-       uint8_t  revision;
-       uint8_t  checksum;
-       uint8_t  oem_id[6];
-       uint64_t oem_table_id;
-       uint32_t oem_revision;
-       uint32_t creator_id;
-       uint32_t creator_revision;
-} __attribute__((packed));
-
-struct acpi_20_rsdt {
-       struct acpi_header header;
-       uint32_t entry[1];
-} __attribute__((packed));
-
-struct acpi_20_rsdp {
-       uint64_t signature;
-       uint8_t  checksum;
-       uint8_t  oem_id[6];
-       uint8_t  revision;
-       uint32_t rsdt_address;
-       uint32_t length;
-       uint64_t xsdt_address;
-       uint8_t  extended_checksum;
-       uint8_t  reserved[3];
-} __attribute__((packed));
-
 struct acpi_20_tcpa_client {
        uint32_t laml;
        uint64_t lasa;
@@ -275,7 +236,7 @@ struct acpi_20_tcpa_server {
        /* more here */
 } __attribute__((packed));
 
-struct acpi_20_tcpa {
+struct acpi_20_tcpa_clisrv {
        struct acpi_header header;
        uint16_t platform_class;
        union {
diff -r 7affdebb7a1e -r a39913db6e51 tools/firmware/rombios/32bit/util.c
--- a/tools/firmware/rombios/32bit/util.c       Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/firmware/rombios/32bit/util.c       Thu Aug 07 11:57:34 2008 +0900
@@ -19,6 +19,7 @@
  */
 #include <stdarg.h>
 #include <stdint.h>
+#include "rombios_compat.h"
 #include "util.h"
 
 static void putchar(char c);
@@ -92,11 +93,11 @@ int strcmp(const char *cs, const char *c
 
 int strncmp(const char *s1, const char *s2, uint32_t n)
 {
-       uint32_t ctr;
-       for (ctr = 0; ctr < n; ctr++)
-               if (s1[ctr] != s2[ctr])
-                       return (int)(s1[ctr] - s2[ctr]);
-       return 0;
+    uint32_t ctr;
+    for (ctr = 0; ctr < n; ctr++)
+        if (s1[ctr] != s2[ctr])
+            return (int)(s1[ctr] - s2[ctr]);
+    return 0;
 }
 
 void *memcpy(void *dest, const void *src, unsigned n)
@@ -402,3 +403,64 @@ void mssleep(uint32_t waittime)
         y = x;
     }
 }
+
+/*
+ * Search for the RSDP ACPI table in the memory starting at addr and
+ * ending at addr + len - 1.
+ */
+static struct acpi_20_rsdp *__find_rsdp(const void *start, unsigned int len)
+{
+    char *rsdp = (char *)start;
+    char *end = rsdp + len;
+    /* scan memory in steps of 16 bytes */
+    while (rsdp < end) {
+        /* check for expected string */
+        if (!strncmp(rsdp, "RSD PTR ", 8))
+            return (struct acpi_20_rsdp *)rsdp;
+        rsdp += 0x10;
+    }
+    return 0;
+}
+
+struct acpi_20_rsdp *find_rsdp(void)
+{
+    struct acpi_20_rsdp *rsdp;
+    uint16_t ebda_seg;
+
+    ebda_seg = *(uint16_t *)ADDR_FROM_SEG_OFF(0x40, 0xe);
+    rsdp = __find_rsdp((void *)(ebda_seg << 16), 1024);
+    if (!rsdp)
+        rsdp = __find_rsdp((void *)0xE0000, 0x20000);
+
+    return rsdp;
+}
+
+uint32_t get_s3_waking_vector(void)
+{
+    struct acpi_20_rsdp *rsdp = find_rsdp();
+    struct acpi_20_xsdt *xsdt;
+    struct acpi_20_fadt *fadt;
+    struct acpi_20_facs *facs;
+    uint32_t vector;
+
+    if (!rsdp)
+        return 0;
+
+    xsdt = (struct acpi_20_xsdt *)(long)rsdp->xsdt_address;
+    if (!xsdt)
+        return 0;
+
+    fadt = (struct acpi_20_fadt *)(long)xsdt->entry[0];
+    if (!fadt || (fadt->header.signature != ACPI_2_0_FADT_SIGNATURE))
+        return 0;
+
+    facs = (struct acpi_20_facs *)(long)fadt->x_firmware_ctrl;
+    if (!facs)
+        return 0;
+
+    vector = facs->x_firmware_waking_vector;
+    if (!vector)
+        vector = facs->firmware_waking_vector;
+
+    return vector;
+}
diff -r 7affdebb7a1e -r a39913db6e51 tools/firmware/rombios/32bit/util.h
--- a/tools/firmware/rombios/32bit/util.h       Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/firmware/rombios/32bit/util.h       Thu Aug 07 11:57:34 2008 +0900
@@ -1,5 +1,7 @@
 #ifndef UTIL_H
 #define UTIL_H
+
+#include "../hvmloader/acpi/acpi2_0.h"
 
 void outb(uint16_t addr, uint8_t val);
 void outw(uint16_t addr, uint16_t val);
@@ -39,5 +41,6 @@ static inline uint32_t mmio_readl(uint32
        return *(volatile uint32_t *)addr;
 }
 
+struct acpi_20_rsdp *find_rsdp(void);
 
 #endif
diff -r 7affdebb7a1e -r a39913db6e51 tools/firmware/rombios/32bitgateway.c
--- a/tools/firmware/rombios/32bitgateway.c     Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/firmware/rombios/32bitgateway.c     Thu Aug 07 11:57:34 2008 +0900
@@ -356,6 +356,9 @@ Upcall:
        call _store_returnaddress       ; store away
        pop ax
 
+       ; XXX GDT munging requires ROM to be writable!
+       call _enable_rom_write_access
+
        rol bx, #2
        mov si, #jmptable
        seg cs
@@ -381,6 +384,8 @@ Upcall:
        push bp
        mov bp,sp
        push eax                        ; preserve work register
+
+       call _disable_rom_write_access
 
        call _get_returnaddress
        mov 2[bp], ax                   ; 16bit return address onto stack
@@ -408,3 +413,10 @@ ASM_END
 #include "32bitgateway.h"
 
 #include "tcgbios.c"
+
+Bit32u get_s3_waking_vector()
+{
+       ASM_START
+       DoUpcall(IDX_GET_S3_WAKING_VECTOR)
+       ASM_END
+}
diff -r 7affdebb7a1e -r a39913db6e51 tools/firmware/rombios/32bitprotos.h
--- a/tools/firmware/rombios/32bitprotos.h      Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/firmware/rombios/32bitprotos.h      Thu Aug 07 11:57:34 2008 +0900
@@ -17,8 +17,8 @@
 #define IDX_TCPA_IPL                       10
 #define IDX_TCPA_INITIALIZE_TPM            11
 #define IDX_TCPA_MEASURE_POST              12
-
-#define IDX_LAST                           13 /* keep last! */
+#define IDX_GET_S3_WAKING_VECTOR           13
+#define IDX_LAST                           14 /* keep last! */
 
 #ifdef GCC_PROTOS
   #define PARMS(x...) x
@@ -42,4 +42,6 @@ void tcpa_measure_post( PARMS(Bit32u fro
 void tcpa_measure_post( PARMS(Bit32u from, Bit32u to) );
 Bit32u tcpa_initialize_tpm( PARMS(Bit32u physpres) );
 
+Bit32u get_s3_waking_vector( PARMS(void) );
+
 #endif
diff -r 7affdebb7a1e -r a39913db6e51 tools/firmware/rombios/Makefile
--- a/tools/firmware/rombios/Makefile   Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/firmware/rombios/Makefile   Thu Aug 07 11:57:34 2008 +0900
@@ -4,7 +4,8 @@ SUBDIRS := 32bit
 SUBDIRS := 32bit
 
 .PHONY: all
-all: subdirs-all BIOS-bochs-latest
+all: subdirs-all
+       $(MAKE) BIOS-bochs-latest
 
 .PHONY: clean
 clean: subdirs-clean
diff -r 7affdebb7a1e -r a39913db6e51 tools/firmware/rombios/rombios.c
--- a/tools/firmware/rombios/rombios.c  Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/firmware/rombios/rombios.c  Thu Aug 07 11:57:34 2008 +0900
@@ -738,7 +738,9 @@ typedef struct {
   // EBDA must be at most 768 bytes; it lives at 0x9fc00, and the boot 
   // device tables are at 0x9ff00 -- 0x9ffff
   typedef struct {
-    unsigned char filler1[0x3D];
+    unsigned char ebda_size;
+    unsigned char cmos_shutdown_status;
+    unsigned char filler1[0x3B];
 
     // FDPT - Can be splitted in data members if needed
     unsigned char fdpt0[0x10];
@@ -757,6 +759,7 @@ typedef struct {
     upcall_t upcall;
     } ebda_data_t;
   
+  #define EBDA_CMOS_SHUTDOWN_STATUS_OFFSET 1
   #define EbdaData ((ebda_data_t *) 0)
 
   // for access to the int13ext structure
@@ -1464,19 +1467,30 @@ copy_e820_table()
 }
 
 void
-disable_rom_write_access()
+set_rom_write_access(action)
+  Bit16u action;
 {
     Bit16u off = (Bit16u)&((struct bios_info *)0)->xen_pfiob;
 ASM_START
-    mov si,.disable_rom_write_access.off[bp]
+    mov si,.set_rom_write_access.off[bp]
     push ds
     mov ax,#(ACPI_PHYSICAL_ADDRESS >> 4)
     mov ds,ax
     mov dx,[si]
     pop ds
-    mov ax,#PFFLAG_ROM_LOCK
+    mov ax,.set_rom_write_access.action[bp]
     out dx,al
 ASM_END
+}
+
+void enable_rom_write_access()
+{
+    set_rom_write_access(0);
+}
+
+void disable_rom_write_access()
+{
+    set_rom_write_access(PFFLAG_ROM_LOCK);
 }
     
 #endif /* HVMASSIST */
@@ -2325,78 +2339,38 @@ debugger_off()
   outb(0xfedc, 0x00);
 }
 
-/* according to memory layout defined in acpi_build_tables(),
-   acpi FACS table is located in ACPI_PHYSICAL_ADDRESS(0xEA000) */
-#define ACPI_FACS_ADDRESS 0xEA000
-#define ACPI_FACS_OFFSET 0x10
-/* S3 resume status in CMOS 0Fh shutdown status byte*/
-
-Bit32u facs_get32(offs)
-Bit16u offs;
-{
-ASM_START
-  push bp
-  mov  bp, sp
-
-    push ds
-    mov ax, #(ACPI_FACS_ADDRESS >> 4)
-    mov ds, ax
-
-    mov bx, 4[bp]
-    mov ax, [bx]
-    mov dx, 2[bx]
-    pop ds
-
-  pop  bp
-ASM_END
-}
-
-
 void 
 s3_resume()
 {
     Bit32u s3_wakeup_vector;
-    extern Bit16u s3_wakeup_ip;
-    extern Bit16u s3_wakeup_cs;
-    extern Bit8u s3_resume_flag;
+    Bit16u s3_wakeup_ip, s3_wakeup_cs;
+    Bit8u cmos_shutdown_status;
 
 ASM_START
     push ds
-    mov ax, #0xF000
+    push ax
+    mov ax, #EBDA_SEG
     mov ds, ax
+    mov al, [EBDA_CMOS_SHUTDOWN_STATUS_OFFSET]
+    mov .s3_resume.cmos_shutdown_status[bp], al
+    pop ax
+    pop ds
 ASM_END
 
-    if (s3_resume_flag!=CMOS_SHUTDOWN_S3){
-        goto s3_out;
-    }
-    s3_resume_flag = 0;
-
-    /* get x_firmware_waking_vector */
-    s3_wakeup_vector = facs_get32(ACPI_FACS_OFFSET+24);
-    if (!s3_wakeup_vector) {
-        /* get firmware_waking_vector */
-       s3_wakeup_vector = facs_get32(ACPI_FACS_OFFSET+12);
-       if (!s3_wakeup_vector) {
-            goto s3_out;
-       }
-    }
-
-    /* setup wakeup vector */
+    if (cmos_shutdown_status != CMOS_SHUTDOWN_S3)
+        return;
+
+    s3_wakeup_vector = get_s3_waking_vector();
+    if (!s3_wakeup_vector)
+        return;
+
     s3_wakeup_ip = s3_wakeup_vector & 0xF;
     s3_wakeup_cs = s3_wakeup_vector >> 4;
 
 ASM_START
-    jmpf [_s3_wakeup_ip]
-
-; S3 data
-_s3_wakeup_ip:    dw 0x0a      
-_s3_wakeup_cs:    dw 0x0      
-_s3_resume_flag:  db 0   ; set at POST time by CMOS[0xF] shutdown status
-ASM_END
-
-s3_out:
-ASM_START
-   pop ds 
+    push .s3_resume.s3_wakeup_cs[bp]
+    push .s3_resume.s3_wakeup_ip[bp]
+    retf
 ASM_END
 }
 
@@ -9865,52 +9839,9 @@ post:
 
   ;; Examine CMOS shutdown status.
   mov al, bl
-
-  ;; 0xFE S3 resume
-  cmp AL, #0xFE
-  jnz not_s3_resume
-
-  ;; set S3 resume flag
-  mov dx, #0xF000
+  mov dx, #EBDA_SEG
   mov ds, dx
-  mov [_s3_resume_flag], AL
-  jmp normal_post
-
-not_s3_resume:
-
-  ;; 0x00, 0x09, 0x0D+ = normal startup
-  cmp AL, #0x00
-  jz normal_post
-  cmp AL, #0x0d
-  jae normal_post
-  cmp AL, #0x09
-  je normal_post
-
-  ;; 0x05 = eoi + jmp via [0x40:0x67] jump
-  cmp al, #0x05
-  je  eoi_jmp_post
-
-  ;; Examine CMOS shutdown status.
-  ;;  0x01,0x02,0x03,0x04,0x06,0x07,0x08, 0x0a, 0x0b, 0x0c = Unimplemented 
shutdown status.
-  push bx
-  call _shutdown_status_panic
-
-#if 0 
-  HALT(__LINE__)
-  ;
-  ;#if 0
-  ;  0xb0, 0x20,       /* mov al, #0x20 */
-  ;  0xe6, 0x20,       /* out 0x20, al    ;send EOI to PIC */
-  ;#endif
-  ;
-  pop es
-  pop ds
-  popa
-  iret
-#endif
-
-normal_post:
-  ; case 0: normal startup
+  mov [EBDA_CMOS_SHUTDOWN_STATUS_OFFSET], AL
 
   cli
   mov  ax, #0xfffe
@@ -9928,8 +9859,6 @@ normal_post:
     stosw
 
   call _log_bios_start
-
-  call _clobber_entry_point
 
   ;; set all interrupts to default handler
   mov  bx, #0x0000    ;; offset index
@@ -10123,8 +10052,11 @@ post_default_ints:
   out  0xa1, AL ;slave  pic: unmask IRQ 12, 13, 14
 
 #ifdef HVMASSIST
+  call _enable_rom_write_access
+  call _clobber_entry_point
   call _copy_e820_table
   call smbios_init
+  call _disable_rom_write_access
 #endif
 
   call _init_boot_vectors
@@ -10174,10 +10106,6 @@ post_default_ints:
 #if BX_TCGBIOS
   call tcpa_post_part2
 #endif
-
-#ifdef HVMASSIST
-  call _disable_rom_write_access
-#endif 
 
   ;; Start the boot sequence.   See the comments in int19_relocated 
   ;; for why we use INT 18h instead of INT 19h here.
diff -r 7affdebb7a1e -r a39913db6e51 tools/fs-back/fs-backend.c
--- a/tools/fs-back/fs-backend.c        Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/fs-back/fs-backend.c        Thu Aug 07 11:57:34 2008 +0900
@@ -16,7 +16,7 @@ static int export_id = 0;
 static int export_id = 0;
 static int mount_id = 0;
 
-void dispatch_response(struct mount *mount, int priv_req_id)
+static void dispatch_response(struct fs_mount *mount, int priv_req_id)
 {
     int i;
     struct fs_op *op;
@@ -41,7 +41,7 @@ void dispatch_response(struct mount *mou
     add_id_to_freelist(priv_req_id, mount->freelist);
 }
 
-static void handle_aio_events(struct mount *mount)
+static void handle_aio_events(struct fs_mount *mount)
 {
     int fd, ret, count, i, notify;
     evtchn_port_t port;
@@ -103,7 +103,7 @@ read_event_channel:
 }
 
 
-void allocate_request_array(struct mount *mount)
+static void allocate_request_array(struct fs_mount *mount)
 {
     int i, nr_entries = mount->nr_entries;
     struct fs_request *requests;
@@ -123,10 +123,10 @@ void allocate_request_array(struct mount
 }
 
 
-void* handle_mount(void *data)
+static void *handle_mount(void *data)
 {
     int more, notify;
-    struct mount *mount = (struct mount *)data;
+    struct fs_mount *mount = (struct fs_mount *)data;
     
     printf("Starting a thread for mount: %d\n", mount->mount_id);
     allocate_request_array(mount);
@@ -147,7 +147,8 @@ moretodo:
             int i;
             struct fs_op *op;
 
-            printf("Got a request at %d\n", cons);
+            printf("Got a request at %d (of %d)\n", 
+                    cons, RING_SIZE(&mount->ring));
             req = RING_GET_REQUEST(&mount->ring, cons);
             printf("Request type=%d\n", req->type); 
             for(i=0;;i++)
@@ -193,11 +194,12 @@ moretodo:
 
 static void handle_connection(int frontend_dom_id, int export_id, char 
*frontend)
 {
-    struct mount *mount;
+    struct fs_mount *mount;
     struct fs_export *export;
     int evt_port;
     pthread_t handling_thread;
     struct fsif_sring *sring;
+    uint32_t dom_ids[MAX_RING_SIZE];
     int i;
 
     printf("Handling connection from dom=%d, for export=%d\n", 
@@ -216,13 +218,13 @@ static void handle_connection(int fronte
         return;
     }
 
-    mount = (struct mount*)malloc(sizeof(struct mount));
+    mount = (struct fs_mount*)malloc(sizeof(struct fs_mount));
     mount->dom_id = frontend_dom_id;
     mount->export = export;
     mount->mount_id = mount_id++;
     xenbus_read_mount_request(mount, frontend);
     printf("Frontend found at: %s (gref=%d, evtchn=%d)\n", 
-            mount->frontend, mount->gref, mount->remote_evtchn);
+            mount->frontend, mount->grefs[0], mount->remote_evtchn);
     xenbus_write_backend_node(mount);
     mount->evth = -1;
     mount->evth = xc_evtchn_open(); 
@@ -235,11 +237,15 @@ static void handle_connection(int fronte
     mount->gnth = -1;
     mount->gnth = xc_gnttab_open(); 
     assert(mount->gnth != -1);
-    sring = xc_gnttab_map_grant_ref(mount->gnth,
-                                    mount->dom_id,
-                                    mount->gref,
-                                    PROT_READ | PROT_WRITE);
-    BACK_RING_INIT(&mount->ring, sring, XC_PAGE_SIZE);
+    for(i=0; i<mount->shared_ring_size; i++)
+        dom_ids[i] = mount->dom_id;
+    sring = xc_gnttab_map_grant_refs(mount->gnth,
+                                     mount->shared_ring_size,
+                                     dom_ids,
+                                     mount->grefs,
+                                     PROT_READ | PROT_WRITE);
+
+    BACK_RING_INIT(&mount->ring, sring, mount->shared_ring_size * 
XC_PAGE_SIZE);
     mount->nr_entries = mount->ring.nr_ents; 
     for (i = 0; i < MAX_FDS; i++)
         mount->fds[i] = -1;
@@ -287,7 +293,7 @@ next_select:
     } while (1);
 }
 
-struct fs_export* create_export(char *name, char *export_path)
+static struct fs_export* create_export(char *name, char *export_path)
 {
     struct fs_export *curr_export, **last_export;
 
diff -r 7affdebb7a1e -r a39913db6e51 tools/fs-back/fs-backend.h
--- a/tools/fs-back/fs-backend.h        Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/fs-back/fs-backend.h        Thu Aug 07 11:57:34 2008 +0900
@@ -13,6 +13,7 @@
 #define EXPORTS_NODE        ROOT_NODE"/"EXPORTS_SUBNODE
 #define WATCH_NODE          EXPORTS_NODE"/requests"
 #define MAX_FDS             16
+#define MAX_RING_SIZE       16
 
 struct fs_export
 {
@@ -26,22 +27,24 @@ struct fs_request
 {
     int active;
     void *page;                         /* Pointer to mapped grant */
+    int count;
     struct fsif_request req_shadow;
     struct aiocb aiocb; 
 };
 
 
-struct mount
+struct fs_mount
 {
     struct fs_export *export;
     int dom_id;
     char *frontend;
     int mount_id;                     /* = backend id */
-    grant_ref_t gref;
+    grant_ref_t grefs[MAX_RING_SIZE];
     evtchn_port_t remote_evtchn;
     int evth;                         /* Handle to the event channel */
     evtchn_port_t local_evtchn;
     int gnth;
+    int shared_ring_size;             /* in pages */
     struct fsif_back_ring ring;
     int nr_entries;
     struct fs_request *requests;
@@ -56,17 +59,17 @@ bool xenbus_create_request_node(void);
 bool xenbus_create_request_node(void);
 int xenbus_register_export(struct fs_export *export);
 int xenbus_get_watch_fd(void);
-void xenbus_read_mount_request(struct mount *mount, char *frontend);
-void xenbus_write_backend_node(struct mount *mount);
-void xenbus_write_backend_ready(struct mount *mount);
+void xenbus_read_mount_request(struct fs_mount *mount, char *frontend);
+void xenbus_write_backend_node(struct fs_mount *mount);
+void xenbus_write_backend_ready(struct fs_mount *mount);
 
 /* File operations, implemented in fs-ops.c */
 struct fs_op
 {
     int type;       /* Type of request (from fsif.h) this handlers 
                        are responsible for */
-    void (*dispatch_handler)(struct mount *mount, struct fsif_request *req);
-    void (*response_handler)(struct mount *mount, struct fs_request *req);
+    void (*dispatch_handler)(struct fs_mount *mount, struct fsif_request *req);
+    void (*response_handler)(struct fs_mount *mount, struct fs_request *req);
 };
 
 /* This NULL terminated array of all file requests handlers */
diff -r 7affdebb7a1e -r a39913db6e51 tools/fs-back/fs-ops.c
--- a/tools/fs-back/fs-ops.c    Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/fs-back/fs-ops.c    Thu Aug 07 11:57:34 2008 +0900
@@ -10,7 +10,7 @@
 #include <sys/mman.h>
 #include <sys/types.h>
 #include <sys/stat.h>
-#include <sys/vfs.h>
+#include <sys/statvfs.h>
 #include <sys/mount.h>
 #include <unistd.h>
 #include "fs-backend.h"
@@ -23,7 +23,7 @@
 #define BUFFER_SIZE 1024
 
 
-unsigned short get_request(struct mount *mount, struct fsif_request *req)
+static unsigned short get_request(struct fs_mount *mount, struct fsif_request 
*req)
 {
     unsigned short id = get_id_from_freelist(mount->freelist); 
 
@@ -34,7 +34,7 @@ unsigned short get_request(struct mount 
     return id;
 }
 
-int get_fd(struct mount *mount)
+static int get_fd(struct fs_mount *mount)
 {
     int i;
 
@@ -45,7 +45,7 @@ int get_fd(struct mount *mount)
 }
 
 
-void dispatch_file_open(struct mount *mount, struct fsif_request *req)
+static void dispatch_file_open(struct fs_mount *mount, struct fsif_request 
*req)
 {
     char *file_name, full_path[BUFFER_SIZE];
     int fd;
@@ -93,7 +93,7 @@ void dispatch_file_open(struct mount *mo
     rsp->ret_val = (uint64_t)fd;
 }
 
-void dispatch_file_close(struct mount *mount, struct fsif_request *req)
+static void dispatch_file_close(struct fs_mount *mount, struct fsif_request 
*req)
 {
     int ret;
     RING_IDX rsp_idx;
@@ -122,19 +122,25 @@ void dispatch_file_close(struct mount *m
     rsp->id = req_id; 
     rsp->ret_val = (uint64_t)ret;
 }
-void dispatch_file_read(struct mount *mount, struct fsif_request *req)
+
+#define MAX_GNTS 16
+static void dispatch_file_read(struct fs_mount *mount, struct fsif_request 
*req)
 {
     void *buf;
-    int fd;
+    int fd, i, count;
     uint16_t req_id;
     unsigned short priv_id;
     struct fs_request *priv_req;
 
     /* Read the request */
-    buf = xc_gnttab_map_grant_ref(mount->gnth,
-                                  mount->dom_id,
-                                  req->u.fread.gref,
-                                  PROT_WRITE);
+    assert(req->u.fread.len > 0); 
+    count = (req->u.fread.len - 1) / XC_PAGE_SIZE + 1;
+    assert(count <= FSIF_NR_READ_GNTS);
+    buf = xc_gnttab_map_domain_grant_refs(mount->gnth,
+                                          count,
+                                          mount->dom_id,
+                                          req->u.fread.grefs,
+                                          PROT_WRITE);
    
     req_id = req->id;
     printf("File read issued for FD=%d (len=%"PRIu64", offest=%"PRIu64")\n", 
@@ -149,6 +155,7 @@ void dispatch_file_read(struct mount *mo
     printf("Private id is: %d\n", priv_id);
     priv_req = &mount->requests[priv_id];
     priv_req->page = buf;
+    priv_req->count = count;
 
     /* Dispatch AIO read request */
     bzero(&priv_req->aiocb, sizeof(struct aiocb));
@@ -164,14 +171,16 @@ out:
     mount->ring.req_cons++;
 }
 
-void end_file_read(struct mount *mount, struct fs_request *priv_req)
+static void end_file_read(struct fs_mount *mount, struct fs_request *priv_req)
 {
     RING_IDX rsp_idx;
     fsif_response_t *rsp;
     uint16_t req_id;
 
     /* Release the grant */
-    assert(xc_gnttab_munmap(mount->gnth, priv_req->page, 1) == 0);
+    assert(xc_gnttab_munmap(mount->gnth, 
+                            priv_req->page, 
+                            priv_req->count) == 0);
 
     /* Get a response from the ring */
     rsp_idx = mount->ring.rsp_prod_pvt++;
@@ -182,19 +191,23 @@ void end_file_read(struct mount *mount, 
     rsp->ret_val = (uint64_t)aio_return(&priv_req->aiocb);
 }
 
-void dispatch_file_write(struct mount *mount, struct fsif_request *req)
+static void dispatch_file_write(struct fs_mount *mount, struct fsif_request 
*req)
 {
     void *buf;
-    int fd;
+    int fd, count, i;
     uint16_t req_id;
     unsigned short priv_id;
     struct fs_request *priv_req;
 
     /* Read the request */
-    buf = xc_gnttab_map_grant_ref(mount->gnth,
-                                  mount->dom_id,
-                                  req->u.fwrite.gref,
-                                  PROT_READ);
+    assert(req->u.fwrite.len > 0); 
+    count = (req->u.fwrite.len - 1) / XC_PAGE_SIZE + 1;
+    assert(count <= FSIF_NR_WRITE_GNTS);
+    buf = xc_gnttab_map_domain_grant_refs(mount->gnth,
+                                          count,
+                                          mount->dom_id,
+                                          req->u.fwrite.grefs,
+                                          PROT_READ);
    
     req_id = req->id;
     printf("File write issued for FD=%d (len=%"PRIu64", offest=%"PRIu64")\n", 
@@ -209,6 +222,7 @@ void dispatch_file_write(struct mount *m
     printf("Private id is: %d\n", priv_id);
     priv_req = &mount->requests[priv_id];
     priv_req->page = buf;
+    priv_req->count = count;
 
     /* Dispatch AIO write request */
     bzero(&priv_req->aiocb, sizeof(struct aiocb));
@@ -224,14 +238,16 @@ void dispatch_file_write(struct mount *m
     mount->ring.req_cons++;
 }
 
-void end_file_write(struct mount *mount, struct fs_request *priv_req)
+static void end_file_write(struct fs_mount *mount, struct fs_request *priv_req)
 {
     RING_IDX rsp_idx;
     fsif_response_t *rsp;
     uint16_t req_id;
 
     /* Release the grant */
-    assert(xc_gnttab_munmap(mount->gnth, priv_req->page, 1) == 0);
+    assert(xc_gnttab_munmap(mount->gnth, 
+                            priv_req->page, 
+                            priv_req->count) == 0);
     
     /* Get a response from the ring */
     rsp_idx = mount->ring.rsp_prod_pvt++;
@@ -242,7 +258,7 @@ void end_file_write(struct mount *mount,
     rsp->ret_val = (uint64_t)aio_return(&priv_req->aiocb);
 }
 
-void dispatch_stat(struct mount *mount, struct fsif_request *req)
+static void dispatch_stat(struct fs_mount *mount, struct fsif_request *req)
 {
     struct fsif_stat_response *buf;
     struct stat stat;
@@ -251,12 +267,6 @@ void dispatch_stat(struct mount *mount, 
     RING_IDX rsp_idx;
     fsif_response_t *rsp;
 
-    /* Read the request */
-    buf = xc_gnttab_map_grant_ref(mount->gnth,
-                                  mount->dom_id,
-                                  req->u.fstat.gref,
-                                  PROT_WRITE);
-   
     req_id = req->id;
     if (req->u.fstat.fd < MAX_FDS)
         fd = mount->fds[req->u.fstat.fd];
@@ -272,38 +282,35 @@ void dispatch_stat(struct mount *mount, 
     /* Stat, and create the response */ 
     ret = fstat(fd, &stat);
     printf("Mode=%o, uid=%d, a_time=%ld\n",
-            stat.st_mode, stat.st_uid, stat.st_atime);
-    buf->stat_mode  = stat.st_mode;
-    buf->stat_uid   = stat.st_uid;
-    buf->stat_gid   = stat.st_gid;
+            stat.st_mode, stat.st_uid, (long)stat.st_atime);
+    
+    /* Get a response from the ring */
+    rsp_idx = mount->ring.rsp_prod_pvt++;
+    printf("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
+    rsp = RING_GET_RESPONSE(&mount->ring, rsp_idx);
+    rsp->id = req_id; 
+    rsp->fstat.stat_ret = (uint32_t)ret;
+    rsp->fstat.stat_mode  = stat.st_mode;
+    rsp->fstat.stat_uid   = stat.st_uid;
+    rsp->fstat.stat_gid   = stat.st_gid;
 #ifdef BLKGETSIZE
     if (S_ISBLK(stat.st_mode)) {
        unsigned long sectors;
        if (ioctl(fd, BLKGETSIZE, &sectors)) {
            perror("getting device size\n");
-           buf->stat_size = 0;
+           rsp->fstat.stat_size = 0;
        } else
-           buf->stat_size = sectors << 9;
+           rsp->fstat.stat_size = sectors << 9;
     } else
 #endif
-       buf->stat_size  = stat.st_size;
-    buf->stat_atime = stat.st_atime;
-    buf->stat_mtime = stat.st_mtime;
-    buf->stat_ctime = stat.st_ctime;
-
-    /* Release the grant */
-    assert(xc_gnttab_munmap(mount->gnth, buf, 1) == 0);
-    
-    /* Get a response from the ring */
-    rsp_idx = mount->ring.rsp_prod_pvt++;
-    printf("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
-    rsp = RING_GET_RESPONSE(&mount->ring, rsp_idx);
-    rsp->id = req_id; 
-    rsp->ret_val = (uint64_t)ret;
-}
-
-
-void dispatch_truncate(struct mount *mount, struct fsif_request *req)
+       rsp->fstat.stat_size  = stat.st_size;
+    rsp->fstat.stat_atime = stat.st_atime;
+    rsp->fstat.stat_mtime = stat.st_mtime;
+    rsp->fstat.stat_ctime = stat.st_ctime;
+}
+
+
+static void dispatch_truncate(struct fs_mount *mount, struct fsif_request *req)
 {
     int fd, ret;
     uint16_t req_id;
@@ -335,7 +342,7 @@ void dispatch_truncate(struct mount *mou
     rsp->ret_val = (uint64_t)ret;
 }
 
-void dispatch_remove(struct mount *mount, struct fsif_request *req)
+static void dispatch_remove(struct fs_mount *mount, struct fsif_request *req)
 {
     char *file_name, full_path[BUFFER_SIZE];
     int ret;
@@ -374,7 +381,7 @@ void dispatch_remove(struct mount *mount
 }
 
 
-void dispatch_rename(struct mount *mount, struct fsif_request *req)
+static void dispatch_rename(struct fs_mount *mount, struct fsif_request *req)
 {
     char *buf, *old_file_name, *new_file_name;
     char old_full_path[BUFFER_SIZE], new_full_path[BUFFER_SIZE];
@@ -421,7 +428,7 @@ void dispatch_rename(struct mount *mount
 }
 
 
-void dispatch_create(struct mount *mount, struct fsif_request *req)
+static void dispatch_create(struct fs_mount *mount, struct fsif_request *req)
 {
     char *file_name, full_path[BUFFER_SIZE];
     int ret;
@@ -459,7 +466,17 @@ void dispatch_create(struct mount *mount
     else
     {
         printf("Issuing create for file: %s\n", full_path);
-        ret = creat(full_path, mode); 
+        ret = get_fd(mount);
+        if (ret >= 0) {
+            int real_fd = creat(full_path, mode); 
+            if (real_fd < 0)
+                ret = -1;
+            else
+            {
+                mount->fds[ret] = real_fd;
+                printf("Got FD: %d for real %d\n", ret, real_fd);
+            }
+        }
     }
     printf("Got ret %d (errno=%d)\n", ret, errno);
 
@@ -471,7 +488,7 @@ void dispatch_create(struct mount *mount
     rsp->ret_val = (uint64_t)ret;
 }
 
-void dispatch_list(struct mount *mount, struct fsif_request *req)
+static void dispatch_list(struct fs_mount *mount, struct fsif_request *req)
 {
     char *file_name, *buf, full_path[BUFFER_SIZE];
     uint32_t offset, nr_files, error_code; 
@@ -541,7 +558,7 @@ error_out:
     rsp->ret_val = ret_val;
 }
 
-void dispatch_chmod(struct mount *mount, struct fsif_request *req)
+static void dispatch_chmod(struct fs_mount *mount, struct fsif_request *req)
 {
     int fd, ret;
     RING_IDX rsp_idx;
@@ -572,13 +589,13 @@ void dispatch_chmod(struct mount *mount,
     rsp->ret_val = (uint64_t)ret;
 }
 
-void dispatch_fs_space(struct mount *mount, struct fsif_request *req)
+static void dispatch_fs_space(struct fs_mount *mount, struct fsif_request *req)
 {
     char *file_name, full_path[BUFFER_SIZE];
     RING_IDX rsp_idx;
     fsif_response_t *rsp;
     uint16_t req_id;
-    struct statfs stat;
+    struct statvfs stat;
     int64_t ret;
 
     printf("Dispatching fs space operation (gref=%d).\n", req->u.fspace.gref);
@@ -596,7 +613,7 @@ void dispatch_fs_space(struct mount *mou
            mount->export->export_path, file_name);
     assert(xc_gnttab_munmap(mount->gnth, file_name, 1) == 0);
     printf("Issuing fs space for %s\n", full_path);
-    ret = statfs(full_path, &stat);
+    ret = statvfs(full_path, &stat);
     if(ret >= 0)
         ret = stat.f_bsize * stat.f_bfree;
 
@@ -613,7 +630,7 @@ void dispatch_fs_space(struct mount *mou
     rsp->ret_val = (uint64_t)ret;
 }
 
-void dispatch_file_sync(struct mount *mount, struct fsif_request *req)
+static void dispatch_file_sync(struct fs_mount *mount, struct fsif_request 
*req)
 {
     int fd;
     uint16_t req_id;
@@ -643,7 +660,7 @@ void dispatch_file_sync(struct mount *mo
     mount->ring.req_cons++;
 }
 
-void end_file_sync(struct mount *mount, struct fs_request *priv_req)
+static void end_file_sync(struct fs_mount *mount, struct fs_request *priv_req)
 {
     RING_IDX rsp_idx;
     fsif_response_t *rsp;
diff -r 7affdebb7a1e -r a39913db6e51 tools/fs-back/fs-xenbus.c
--- a/tools/fs-back/fs-xenbus.c Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/fs-back/fs-xenbus.c Thu Aug 07 11:57:34 2008 +0900
@@ -109,10 +109,11 @@ int xenbus_get_watch_fd(void)
     return xs_fileno(xsh); 
 }
 
-void xenbus_read_mount_request(struct mount *mount, char *frontend)
+void xenbus_read_mount_request(struct fs_mount *mount, char *frontend)
 {
     char node[1024];
     char *s;
+    int i;
 
     assert(xsh != NULL);
 #if 0
@@ -125,10 +126,18 @@ void xenbus_read_mount_request(struct mo
     s = xs_read(xsh, XBT_NULL, node, NULL);
     assert(strcmp(s, STATE_READY) == 0);
     free(s);
-    snprintf(node, sizeof(node), "%s/ring-ref", frontend);
+    snprintf(node, sizeof(node), "%s/ring-size", frontend);
     s = xs_read(xsh, XBT_NULL, node, NULL);
-    mount->gref = atoi(s);
+    mount->shared_ring_size = atoi(s);
+    assert(mount->shared_ring_size <= MAX_RING_SIZE);
     free(s);
+    for(i=0; i<mount->shared_ring_size; i++)
+    {
+        snprintf(node, sizeof(node), "%s/ring-ref-%d", frontend, i);
+        s = xs_read(xsh, XBT_NULL, node, NULL);
+        mount->grefs[i] = atoi(s);
+        free(s);
+    }
     snprintf(node, sizeof(node), "%s/event-channel", frontend);
     s = xs_read(xsh, XBT_NULL, node, NULL);
     mount->remote_evtchn = atoi(s);
@@ -150,7 +159,7 @@ static int get_self_id(void)
 } 
 
 
-void xenbus_write_backend_node(struct mount *mount)
+void xenbus_write_backend_node(struct fs_mount *mount)
 {
     char node[1024], backend_node[1024];
     int self_id;
@@ -167,7 +176,7 @@ void xenbus_write_backend_node(struct mo
     xs_write(xsh, XBT_NULL, node, STATE_INITIALISED, 
strlen(STATE_INITIALISED));
 }
 
-void xenbus_write_backend_ready(struct mount *mount)
+void xenbus_write_backend_ready(struct fs_mount *mount)
 {
     char node[1024];
     int self_id;
diff -r 7affdebb7a1e -r a39913db6e51 tools/include/xen-sys/MiniOS/privcmd.h
--- a/tools/include/xen-sys/MiniOS/privcmd.h    Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/include/xen-sys/MiniOS/privcmd.h    Thu Aug 07 11:57:34 2008 +0900
@@ -10,9 +10,7 @@ typedef struct privcmd_hypercall
 } privcmd_hypercall_t;
 
 typedef struct privcmd_mmap_entry {
-       u64 va;
        u64 mfn;
-       u64 npages;
 } privcmd_mmap_entry_t; 
 
 #endif /* __MINIOS_PUBLIC_PRIVCMD_H__ */
diff -r 7affdebb7a1e -r a39913db6e51 tools/ioemu/block-vbd.c
--- a/tools/ioemu/block-vbd.c   Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/ioemu/block-vbd.c   Thu Aug 07 11:57:34 2008 +0900
@@ -273,6 +273,10 @@ static BlockDriverAIOCB *vbd_aio_flush(B
     BDRVVbdState *s = bs->opaque;
     VbdAIOCB *acb = NULL;
 
+    if (s->info.mode == O_RDONLY) {
+        cb(opaque, 0);
+        return NULL;
+    }
     if (s->info.barrier == 1) {
         acb = vbd_aio_setup(bs, 0, NULL, 0,
                 s->info.flush == 1 ? vbd_nop_cb : cb, opaque);
diff -r 7affdebb7a1e -r a39913db6e51 tools/ioemu/hw/pass-through.c
--- a/tools/ioemu/hw/pass-through.c     Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/ioemu/hw/pass-through.c     Thu Aug 07 11:57:34 2008 +0900
@@ -138,6 +138,13 @@ static int pt_msixctrl_reg_write(struct 
     struct pt_reg_tbl *cfg_entry, 
     uint16_t *value, uint16_t dev_value, uint16_t valid_mask);
 
+/* pt_reg_info_tbl declaration
+ * - only for emulated register (either a part or whole bit).
+ * - for passthrough register that need special behavior (like interacting with
+ *   other component), set emu_mask to all 0 and specify r/w func properly.
+ * - do NOT use ALL F for init_val, otherwise the tbl will not be registered.
+ */
+ 
 /* Header Type0 reg static infomation table */
 static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
     /* Command reg */
@@ -564,6 +571,13 @@ static struct pt_reg_info_tbl pt_emu_reg
     }, 
 };
 
+/* pt_reg_grp_info_tbl declaration
+ * - only for emulated or zero-hardwired register group.
+ * - for register group with dynamic size, just set grp_size to 0xFF and 
+ *   specify size_init func properly.
+ * - no need to specify emu_reg_tbl for zero-hardwired type.
+ */
+
 /* emul reg group static infomation table */
 static const struct pt_reg_grp_info_tbl pt_emu_reg_grp_tbl[] = {
     /* Header Type0 reg group */
@@ -821,7 +835,7 @@ void pt_iomem_map(PCIDevice *d, int i, u
     assigned_device->bases[i].e_size= e_size;
 
     PT_LOG("e_phys=%08x maddr=%lx type=%d len=%d index=%d first_map=%d\n",
-        e_phys, assigned_device->bases[i].access.maddr, 
+        e_phys, (unsigned long)assigned_device->bases[i].access.maddr, 
         type, e_size, i, first_map);
 
     if ( e_size == 0 )
@@ -843,7 +857,7 @@ void pt_iomem_map(PCIDevice *d, int i, u
         }
     }
 
-    /* map only valid guest address (include 0) */
+    /* map only valid guest address */
     if (e_phys != -1)
     {
         /* Create new mapping */
@@ -860,7 +874,7 @@ void pt_iomem_map(PCIDevice *d, int i, u
         
         ret = remove_msix_mapping(assigned_device, i);
         if ( ret != 0 )
-            PT_LOG("Error: remove MSX-X mmio mapping failed!\n");
+            PT_LOG("Error: remove MSI-X mmio mapping failed!\n");
     }
 }
 
@@ -996,8 +1010,11 @@ static void pt_pci_write_config(PCIDevic
     int index = 0;
     int ret = 0;
 
-    PT_LOG("write(%x.%x): address=%04x val=0x%08x len=%d\n",
-        (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
+#ifdef PT_DEBUG_PCI_CONFIG_ACCESS
+    PT_LOG("[%02x:%02x.%x]: address=%04x val=0x%08x len=%d\n",
+       pci_bus_num(d->bus), (d->devfn >> 3) & 0x1F, (d->devfn & 0x7),
+       address, val, len);
+#endif
 
     /* check offset range */
     if (address >= 0xFF)
@@ -1049,7 +1066,10 @@ static void pt_pci_write_config(PCIDevic
         if (reg_grp->grp_type == GRP_TYPE_HARDWIRED)
         {
             /* ignore silently */
-            PT_LOG("Access to 0 Hardwired register.\n");
+            PT_LOG("Access to 0 Hardwired register. "
+                "[%02x:%02x.%x][Offset:%02xh][Length:%d]\n",
+                pci_bus_num(d->bus), ((d->devfn >> 3) & 0x1F), 
+                (d->devfn & 0x7), address, len);
             goto exit;
         }
     }
@@ -1067,22 +1087,22 @@ static void pt_pci_write_config(PCIDevic
         break;
     }
 
-    /* check libpci error */
+    /* check libpci result */
     valid_mask = (0xFFFFFFFF >> ((4 - len) << 3));
     if ((read_val & valid_mask) == valid_mask)
     {
-        PT_LOG("libpci read error. No emulation. "
+        PT_LOG("Warning: Return ALL F from libpci read. "
             "[%02x:%02x.%x][Offset:%02xh][Length:%d]\n",
             pci_bus_num(d->bus), ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
             address, len);
-        goto exit;
     }
     
     /* pass directly to libpci for passthrough type register group */
     if (reg_grp_entry == NULL)
         goto out;
 
-    /* adjust the write value to appropriate CFC-CFF window */
+    /* adjust the read and write value to appropriate CFC-CFF window */
+    read_val <<= ((address & 3) << 3);
     val <<= ((address & 3) << 3);
     emul_len = len;
 
@@ -1131,7 +1151,8 @@ static void pt_pci_write_config(PCIDevic
             if (ret < 0)
             {
                 /* exit I/O emulator */
-                PT_LOG("I/O emulator exit()\n");
+                PT_LOG("Internal error: Invalid write emulation "
+                    "return value[%d]. I/O emulator exit.\n", ret);
                 exit(1);
             }
 
@@ -1186,9 +1207,6 @@ static uint32_t pt_pci_read_config(PCIDe
     int emul_len = 0;
     int ret = 0;
 
-    PT_LOG("read(%x.%x): address=%04x len=%d\n",
-        (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, len);
-
     /* check offset range */
     if (address >= 0xFF)
     {
@@ -1246,15 +1264,14 @@ static uint32_t pt_pci_read_config(PCIDe
         break;
     }
 
-    /* check libpci error */
+    /* check libpci result */
     valid_mask = (0xFFFFFFFF >> ((4 - len) << 3));
     if ((val & valid_mask) == valid_mask)
     {
-        PT_LOG("libpci read error. No emulation. "
+        PT_LOG("Warning: Return ALL F from libpci read. "
             "[%02x:%02x.%x][Offset:%02xh][Length:%d]\n",
             pci_bus_num(d->bus), ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
             address, len);
-        goto exit;
     }
 
     /* just return the I/O device register value for 
@@ -1309,7 +1326,8 @@ static uint32_t pt_pci_read_config(PCIDe
             if (ret < 0)
             {
                 /* exit I/O emulator */
-                PT_LOG("I/O emulator exit()\n");
+                PT_LOG("Internal error: Invalid read emulation "
+                    "return value[%d]. I/O emulator exit.\n", ret);
                 exit(1);
             }
 
@@ -1332,6 +1350,13 @@ static uint32_t pt_pci_read_config(PCIDe
     val >>= ((address & 3) << 3);
 
 exit:
+
+#ifdef PT_DEBUG_PCI_CONFIG_ACCESS
+    PT_LOG("[%02x:%02x.%x]: address=%04x val=0x%08x len=%d\n",
+       pci_bus_num(d->bus), (d->devfn >> 3) & 0x1F, (d->devfn & 0x7),
+       address, val, len);
+#endif
+
     return val;
 }
 
@@ -1389,7 +1414,7 @@ static int pt_register_regions(struct pt
     return 0;
 }
 
-static int pt_unregister_regions(struct pt_dev *assigned_device)
+static void pt_unregister_regions(struct pt_dev *assigned_device)
 {
     int i, type, ret;
     uint32_t e_size;
@@ -1488,7 +1513,9 @@ static int pt_bar_reg_parse(
     /* check 64bit BAR */
     index = pt_bar_offset_to_index(reg->offset);
     if ((index > 0) && (index < PCI_ROM_SLOT) &&
-        (d->config[bar_64] & PCI_BASE_ADDRESS_MEM_TYPE_64))
+        ((d->config[bar_64] & (PCI_BASE_ADDRESS_SPACE |
+                               PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
+         (PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64)))
     {
         region = &ptdev->bases[index-1];
         if (region->bar_flag != PT_BAR_FLAG_UPPER)
@@ -1502,6 +1529,13 @@ static int pt_bar_reg_parse(
     r = &d->io_regions[index];
     if (!r->size)
         goto out;
+
+    /* for ExpROM BAR */
+    if (index == PCI_ROM_SLOT)
+    {
+        bar_flag = PT_BAR_FLAG_MEM;
+        goto out;
+    }
 
     /* check BAR I/O indicator */
     if (d->config[reg->offset] & PCI_BASE_ADDRESS_SPACE_IO)
@@ -1540,7 +1574,7 @@ static void pt_bar_mapping(struct pt_dev
         /* copy region address to temporary */
         r_addr = r->addr;
 
-        /* clear region address in case I/O Space or Memory Space disable */
+        /* need unmapping in case I/O Space or Memory Space disable */
         if (((base->bar_flag == PT_BAR_FLAG_IO) && !io_enable ) ||
             ((base->bar_flag == PT_BAR_FLAG_MEM) && !mem_enable ))
             r_addr = -1;
@@ -1556,8 +1590,10 @@ static void pt_bar_mapping(struct pt_dev
         /* check overlapped address */
         ret = pt_chk_bar_overlap(dev->bus, dev->devfn, r_addr, r_size);
         if (ret > 0)
-            PT_LOG("Base Address[%d] is overlapped. "
-                "[Address:%08xh][Size:%04xh]\n", i, r_addr, r_size);
+            PT_LOG("ptdev[%02x:%02x.%x][Region:%d][Address:%08xh][Size:%08xh] "
+                "is overlapped.\n", pci_bus_num(dev->bus), 
+                (dev->devfn >> 3) & 0x1F, (dev->devfn & 0x7),
+                i, r_addr, r_size);
 
         /* check whether we need to update the mapping or not */
         if (r_addr != ptdev->bases[i].e_physbase)
@@ -1776,14 +1812,16 @@ static uint32_t pt_status_reg_init(struc
         else
         {
             /* exit I/O emulator */
-            PT_LOG("I/O emulator exit()\n");
+            PT_LOG("Internal error: Couldn't find pt_reg_tbl for "
+                "Capabilities Pointer register. I/O emulator exit.\n");
             exit(1);
         }
     }
     else
     {
         /* exit I/O emulator */
-        PT_LOG("I/O emulator exit()\n");
+        PT_LOG("Internal error: Couldn't find pt_reg_grp_tbl for Header. "
+            "I/O emulator exit.\n");
         exit(1);
     }
 
@@ -1815,7 +1853,8 @@ static uint32_t pt_bar_reg_init(struct p
     if (index < 0)
     {
         /* exit I/O emulator */
-        PT_LOG("I/O emulator exit()\n");
+        PT_LOG("Internal error: Invalid BAR index[%d]. "
+            "I/O emulator exit.\n", index);
         exit(1);
     }
 
@@ -1962,9 +2001,8 @@ static uint8_t pt_msi_size_init(struct p
     ptdev->msi = malloc(sizeof(struct pt_msi_info));
     if ( !ptdev->msi )
     {
-        PT_LOG("error allocation pt_msi_info\n");
         /* exit I/O emulator */
-        PT_LOG("I/O emulator exit()\n");
+        PT_LOG("error allocation pt_msi_info. I/O emulator exit.\n");
         exit(1);
     }
     memset(ptdev->msi, 0, sizeof(struct pt_msi_info));
@@ -1983,7 +2021,8 @@ static uint8_t pt_msix_size_init(struct 
     if (ret == -1)
     {
         /* exit I/O emulator */
-        PT_LOG("I/O emulator exit()\n");
+        PT_LOG("Internal error: Invalid pt_msix_init return value[%d]. "
+            "I/O emulator exit.\n", ret);
         exit(1);
     }
 
@@ -2060,7 +2099,8 @@ static int pt_bar_reg_read(struct pt_dev
     if (index < 0)
     {
         /* exit I/O emulator */
-        PT_LOG("I/O emulator exit()\n");
+        PT_LOG("Internal error: Invalid BAR index[%d]. "
+            "I/O emulator exit.\n", index);
         exit(1);
     }
 
@@ -2074,8 +2114,8 @@ static int pt_bar_reg_read(struct pt_dev
         bar_emu_mask = PT_BAR_IO_EMU_MASK;
         break;
     case PT_BAR_FLAG_UPPER:
-        *value = 0;
-        goto out;
+        bar_emu_mask = PT_BAR_ALLF;
+        break;
     default:
         break;
     }
@@ -2085,7 +2125,6 @@ static int pt_bar_reg_read(struct pt_dev
     *value = ((*value & ~valid_emu_mask) | 
               (cfg_entry->data & valid_emu_mask));
 
-out:
    return 0;
 }
 
@@ -2201,12 +2240,13 @@ static int pt_bar_reg_write(struct pt_de
     uint32_t r_size = 0;
     int index = 0;
 
-   /* get BAR index */
+    /* get BAR index */
     index = pt_bar_offset_to_index(reg->offset);
     if (index < 0)
     {
         /* exit I/O emulator */
-        PT_LOG("I/O emulator exit()\n");
+        PT_LOG("Internal error: Invalid BAR index[%d]. "
+            "I/O emulator exit.\n", index);
         exit(1);
     }
 
@@ -2216,89 +2256,113 @@ static int pt_bar_reg_write(struct pt_de
     /* align resource size (memory type only) */
     PT_GET_EMUL_SIZE(base->bar_flag, r_size);
 
-    /* check guest write value */
-    if (*value == PT_BAR_ALLF)
-    {
-        /* set register with resource size alligned to page size */
-        cfg_entry->data = ~(r_size - 1);
-        /* avoid writing ALL F to I/O device register */
-        *value = dev_value;
-    }
-    else
-    {
-        /* set emulate mask and read-only mask depend on BAR flag */
-        switch (ptdev->bases[index].bar_flag)
-        {
-        case PT_BAR_FLAG_MEM:
-            bar_emu_mask = PT_BAR_MEM_EMU_MASK;
-            bar_ro_mask = PT_BAR_MEM_RO_MASK;
-            break;
-        case PT_BAR_FLAG_IO:
-            new_addr = *value;
-            last_addr = new_addr + r_size - 1;
+    /* set emulate mask and read-only mask depend on BAR flag */
+    switch (ptdev->bases[index].bar_flag)
+    {
+    case PT_BAR_FLAG_MEM:
+        bar_emu_mask = PT_BAR_MEM_EMU_MASK;
+        bar_ro_mask = PT_BAR_MEM_RO_MASK | (r_size - 1);
+        break;
+    case PT_BAR_FLAG_IO:
+        bar_emu_mask = PT_BAR_IO_EMU_MASK;
+        bar_ro_mask = PT_BAR_IO_RO_MASK | (r_size - 1);
+        break;
+    case PT_BAR_FLAG_UPPER:
+        bar_emu_mask = PT_BAR_ALLF;
+        bar_ro_mask = 0;    /* all upper 32bit are R/W */
+        break;
+    default:
+        break;
+    }
+
+    /* modify emulate register */
+    writable_mask = bar_emu_mask & ~bar_ro_mask & valid_mask;
+    cfg_entry->data = ((*value & writable_mask) |
+                       (cfg_entry->data & ~writable_mask));
+
+    /* check whether we need to update the virtual region address or not */
+    switch (ptdev->bases[index].bar_flag)
+    {
+    case PT_BAR_FLAG_MEM:
+        /* nothing to do */
+        break;
+    case PT_BAR_FLAG_IO:
+        new_addr = cfg_entry->data;
+        last_addr = new_addr + r_size - 1;
+        /* check invalid address */
+        if (last_addr <= new_addr || !new_addr || last_addr >= 0x10000)
+        {
             /* check 64K range */
-            if (last_addr <= new_addr || !new_addr || last_addr >= 0x10000)
+            if ((last_addr >= 0x10000) &&
+                (cfg_entry->data != (PT_BAR_ALLF & ~bar_ro_mask)))
             {
                 PT_LOG("Guest attempt to set Base Address over the 64KB. "
-                    "[%02x:%02x.%x][Offset:%02xh][Range:%08xh-%08xh]\n",
+                    
"[%02x:%02x.%x][Offset:%02xh][Address:%08xh][Size:%08xh]\n",
                     pci_bus_num(d->bus), 
                     ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
-                    reg->offset, new_addr, last_addr);
-                /* just remove mapping */
-                r->addr = -1;
-                goto exit;
+                    reg->offset, new_addr, r_size);
             }
-            bar_emu_mask = PT_BAR_IO_EMU_MASK;
-            bar_ro_mask = PT_BAR_IO_RO_MASK;
-            break;
-        case PT_BAR_FLAG_UPPER:
-            if (*value)
+            /* just remove mapping */
+            r->addr = -1;
+            goto exit;
+        }
+        break;
+    case PT_BAR_FLAG_UPPER:
+        if (cfg_entry->data)
+        {
+            if (cfg_entry->data != (PT_BAR_ALLF & ~bar_ro_mask))
             {
                 PT_LOG("Guest attempt to set high MMIO Base Address. "
-                   "Ignore mapping. "
-                   "[%02x:%02x.%x][Offset:%02xh][High Address:%08xh]\n",
+                    "Ignore mapping. "
+                    "[%02x:%02x.%x][Offset:%02xh][High Address:%08xh]\n",
                     pci_bus_num(d->bus), 
                     ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
-                    reg->offset, *value);
-                /* clear lower address */
-                d->io_regions[index-1].addr = -1;
+                    reg->offset, cfg_entry->data);
             }
-            else
+            /* clear lower address */
+            d->io_regions[index-1].addr = -1;
+        }
+        else
+        {
+            /* find lower 32bit BAR */
+            prev_offset = (reg->offset - 4);
+            reg_grp_entry = pt_find_reg_grp(ptdev, prev_offset);
+            if (reg_grp_entry)
             {
-                /* find lower 32bit BAR */
-                prev_offset = (reg->offset - 4);
-                reg_grp_entry = pt_find_reg_grp(ptdev, prev_offset);
-                if (reg_grp_entry)
-                {
-                    reg_entry = pt_find_reg(reg_grp_entry, prev_offset);
-                    if (reg_entry)
-                        /* restore lower address */
-                        d->io_regions[index-1].addr = reg_entry->data;
-                    else
-                        return -1;
-                }
+                reg_entry = pt_find_reg(reg_grp_entry, prev_offset);
+                if (reg_entry)
+                    /* restore lower address */
+                    d->io_regions[index-1].addr = reg_entry->data;
                 else
                     return -1;
             }
-            cfg_entry->data = 0;
-            r->addr = -1;
-            goto exit;
-        }
-
-        /* modify emulate register */
-        writable_mask = bar_emu_mask & ~bar_ro_mask & valid_mask;
-        cfg_entry->data = ((*value & writable_mask) |
-                           (cfg_entry->data & ~writable_mask));
-        /* update the corresponding virtual region address */
-        r->addr = cfg_entry->data;
-
-        /* create value for writing to I/O device register */
-        throughable_mask = ~bar_emu_mask & valid_mask;
-        *value = ((*value & throughable_mask) |
-                  (dev_value & ~throughable_mask));
-    }
+            else
+                return -1;
+        }
+
+        /* always keep the emulate register value to 0,
+         * because hvmloader does not support high MMIO for now.
+         */
+        cfg_entry->data = 0;
+
+        /* never mapping the 'empty' upper region,
+         * because we'll do it enough for the lower region.
+         */
+        r->addr = -1;
+        goto exit;
+    default:
+        break;
+    }
+
+    /* update the corresponding virtual region address */
+    r->addr = cfg_entry->data;
 
 exit:
+    /* create value for writing to I/O device register */
+    throughable_mask = ~bar_emu_mask & valid_mask;
+    *value = ((*value & throughable_mask) |
+              (dev_value & ~throughable_mask));
+
     return 0;
 }
 
@@ -2314,6 +2378,8 @@ static int pt_exp_rom_bar_reg_write(stru
     uint32_t writable_mask = 0;
     uint32_t throughable_mask = 0;
     uint32_t r_size = 0;
+    uint32_t bar_emu_mask = 0;
+    uint32_t bar_ro_mask = 0;
 
     r = &d->io_regions[PCI_ROM_SLOT];
     r_size = r->size;
@@ -2321,28 +2387,22 @@ static int pt_exp_rom_bar_reg_write(stru
     /* align memory type resource size */
     PT_GET_EMUL_SIZE(base->bar_flag, r_size);
 
-    /* check guest write value */
-    if (*value == PT_BAR_ALLF)
-    {
-        /* set register with resource size alligned to page size */
-        cfg_entry->data = ~(r_size - 1);
-        /* avoid writing ALL F to I/O device register */
-        *value = dev_value;
-    }
-    else
-    {
-        /* modify emulate register */
-        writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask;
-        cfg_entry->data = ((*value & writable_mask) |
-                           (cfg_entry->data & ~writable_mask));
-        /* update the corresponding virtual region address */
-        r->addr = cfg_entry->data;
-
-        /* create value for writing to I/O device register */
-        throughable_mask = ~reg->emu_mask & valid_mask;
-        *value = ((*value & throughable_mask) |
-                  (dev_value & ~throughable_mask));
-    }
+    /* set emulate mask and read-only mask */
+    bar_emu_mask = reg->emu_mask;
+    bar_ro_mask = reg->ro_mask | (r_size - 1);
+
+    /* modify emulate register */
+    writable_mask = bar_emu_mask & ~bar_ro_mask & valid_mask;
+    cfg_entry->data = ((*value & writable_mask) |
+                       (cfg_entry->data & ~writable_mask));
+
+    /* update the corresponding virtual region address */
+    r->addr = cfg_entry->data;
+    
+    /* create value for writing to I/O device register */
+    throughable_mask = ~bar_emu_mask & valid_mask;
+    *value = ((*value & throughable_mask) |
+              (dev_value & ~throughable_mask));
 
     return 0;
 }
@@ -2483,8 +2543,6 @@ static int pt_msgctrl_reg_write(struct p
     uint16_t throughable_mask = 0;
     uint16_t old_ctrl = cfg_entry->data;
     PCIDevice *pd = (PCIDevice *)ptdev;
-
-    PT_LOG("[before] dev_val:%xh wr_val:%xh\n", dev_value, *value);
 
     /* Currently no support for multi-vector */
     if ((*value & PCI_MSI_FLAGS_QSIZE) != 0x0)
@@ -2527,8 +2585,6 @@ static int pt_msgctrl_reg_write(struct p
     else
         ptdev->msi->flags &= ~PCI_MSI_FLAGS_ENABLE;
 
-    PT_LOG("[after] wr_val:%xh\n", *value);
-
     return 0;
 }
 
@@ -2541,8 +2597,6 @@ static int pt_msgaddr32_reg_write(struct
     uint32_t writable_mask = 0;
     uint32_t throughable_mask = 0;
     uint32_t old_addr = cfg_entry->data;
-
-    PT_LOG("[before] dev_val:%xh wr_val:%xh\n", dev_value, *value);
 
     /* modify emulate register */
     writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask;
@@ -2564,8 +2618,6 @@ static int pt_msgaddr32_reg_write(struct
             pt_msi_update(ptdev);
     }
 
-    PT_LOG("[after] wr_val:%xh\n", *value);
-
     return 0;
 }
 
@@ -2578,8 +2630,6 @@ static int pt_msgaddr64_reg_write(struct
     uint32_t writable_mask = 0;
     uint32_t throughable_mask = 0;
     uint32_t old_addr = cfg_entry->data;
-
-    PT_LOG("[before] dev_val:%xh wr_val:%xh\n", dev_value, *value);
 
     /* check whether the type is 64 bit or not */
     if (!(ptdev->msi->flags & PCI_MSI_FLAGS_64BIT))
@@ -2609,8 +2659,6 @@ static int pt_msgaddr64_reg_write(struct
             pt_msi_update(ptdev);
     }
 
-    PT_LOG("[after] wr_val:%xh\n", *value);
-
     return 0;
 }
 
@@ -2627,8 +2675,6 @@ static int pt_msgdata_reg_write(struct p
     uint32_t flags = ptdev->msi->flags;
     uint32_t offset = reg->offset;
 
-    PT_LOG("[before] dev_val:%xh wr_val:%xh\n", dev_value, *value);
-
     /* check the offset whether matches the type or not */
     if (!((offset == PCI_MSI_DATA_64) &&  (flags & PCI_MSI_FLAGS_64BIT)) &&
         !((offset == PCI_MSI_DATA_32) && !(flags & PCI_MSI_FLAGS_64BIT)))
@@ -2658,8 +2704,6 @@ static int pt_msgdata_reg_write(struct p
             pt_msi_update(ptdev);
     }
 
-    PT_LOG("[after] wr_val:%xh\n", *value);
-
     return 0;
 }
 
@@ -2672,8 +2716,6 @@ static int pt_msixctrl_reg_write(struct 
     uint16_t writable_mask = 0;
     uint16_t throughable_mask = 0;
     uint16_t old_ctrl = cfg_entry->data;
-
-    PT_LOG("[before] dev_val:%xh wr_val:%xh\n", dev_value, *value);
 
     /* modify emulate register */
     writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask;
@@ -2691,8 +2733,6 @@ static int pt_msixctrl_reg_write(struct 
         pt_msix_update(ptdev);
 
     ptdev->msix->enabled = !!(*value & PCI_MSIX_ENABLE);
-
-    PT_LOG("[after] wr_val:%xh\n", *value);
 
     return 0;
 }
@@ -2785,8 +2825,7 @@ struct pt_dev * register_real_device(PCI
         int pirq = pci_dev->irq;
 
         machine_irq = pci_dev->irq;
-        rc = xc_physdev_map_pirq(xc_handle, domid, MAP_PIRQ_TYPE_GSI,
-                                machine_irq, &pirq);
+        rc = xc_physdev_map_pirq(xc_handle, domid, machine_irq, &pirq);
 
         if ( rc )
         {
diff -r 7affdebb7a1e -r a39913db6e51 tools/ioemu/hw/pass-through.h
--- a/tools/ioemu/hw/pass-through.h     Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/ioemu/hw/pass-through.h     Thu Aug 07 11:57:34 2008 +0900
@@ -47,12 +47,20 @@
 /* because the current version of libpci (2.2.0) doesn't define these ID,
  * so we define Capability ID here.
  */
+#ifndef PCI_CAP_ID_HOTPLUG
 /* SHPC Capability List Item reg group */
 #define PCI_CAP_ID_HOTPLUG      0x0C
+#endif
+
+#ifndef PCI_CAP_ID_SSVID
 /* Subsystem ID and Subsystem Vendor ID Capability List Item reg group */
 #define PCI_CAP_ID_SSVID        0x0D
+#endif
+
+#ifndef PCI_MSI_FLAGS_MASK_BIT
 /* interrupt masking & reporting supported */
 #define PCI_MSI_FLAGS_MASK_BIT  0x0100
+#endif
 
 #define PT_INVALID_REG          0xFFFFFFFF      /* invalid register value */
 #define PT_BAR_ALLF             0xFFFFFFFF      /* BAR ALLF value */
diff -r 7affdebb7a1e -r a39913db6e51 tools/ioemu/hw/pc.c
--- a/tools/ioemu/hw/pc.c       Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/ioemu/hw/pc.c       Thu Aug 07 11:57:34 2008 +0900
@@ -30,9 +30,6 @@
 #define VGABIOS_FILENAME "vgabios.bin"
 #define VGABIOS_CIRRUS_FILENAME "vgabios-cirrus.bin"
 #define LINUX_BOOT_FILENAME "linux_boot.bin"
-
-/* Leave a chunk of memory at the top of RAM for the BIOS ACPI tables.  */
-#define ACPI_DATA_SIZE        0x10000
 
 static fdctrl_t *floppy_controller;
 static RTCState *rtc_state;
@@ -542,6 +539,7 @@ static void load_linux(const char *kerne
     uint16_t seg[6];
     uint16_t real_seg;
     int setup_size, kernel_size, initrd_size, cmdline_size;
+    unsigned long end_low_ram;
     uint32_t initrd_max;
     uint8_t header[1024];
     target_phys_addr_t real_addr, reloc_prot_addr, prot_addr, cmdline_addr, 
initrd_addr;
@@ -595,15 +593,14 @@ static void load_linux(const char *kerne
             (size_t)cmdline_addr,
             (size_t)prot_addr);
 
+    /* Special pages are placed at end of low RAM: pick an arbitrary one and
+     * subtract a suitably large amount of padding (64kB) to skip BIOS data. */
+    xc_get_hvm_param(xc_handle, domid, HVM_PARAM_BUFIOREQ_PFN, &end_low_ram);
+    end_low_ram = (end_low_ram << 12) - (64*1024);
+
     /* highest address for loading the initrd */
-    if (protocol >= 0x203)
-        initrd_max = ldl_p(header+0x22c);
-    else
-        initrd_max = 0x37ffffff;
-
-    if (initrd_max >= ram_size-ACPI_DATA_SIZE)
-        initrd_max = ram_size-ACPI_DATA_SIZE-1;
-
+    initrd_max = (protocol >= 0x203) ? ldl_p(header+0x22c) : 0x37ffffff;
+    initrd_max = MIN(initrd_max, (uint32_t)end_low_ram);
 
     /* kernel command line */
     ncmdline = strlen(kernel_cmdline);
diff -r 7affdebb7a1e -r a39913db6e51 tools/ioemu/hw/pci.c
--- a/tools/ioemu/hw/pci.c      Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/ioemu/hw/pci.c      Thu Aug 07 11:57:34 2008 +0900
@@ -664,9 +664,10 @@ int pt_chk_bar_overlap(PCIBus *bus, int 
             r = &devices->io_regions[j];
             if ((addr < (r->addr + r->size)) && ((addr + size) > r->addr))
             {
-                printf("Overlapped to device[%02x:%02x.%x] region:%d addr:%08x"
-                    " size:%08x\n", bus->bus_num, (devices->devfn >> 3) & 0x1F,
-                    (devices->devfn & 0x7), j, r->addr, r->size);
+                printf("Overlapped to device[%02x:%02x.%x][Region:%d]"
+                    "[Address:%08xh][Size:%08xh]\n", bus->bus_num,
+                    (devices->devfn >> 3) & 0x1F, (devices->devfn & 0x7),
+                    j, r->addr, r->size);
                 ret = 1;
                 goto out;
             }
diff -r 7affdebb7a1e -r a39913db6e51 tools/ioemu/hw/pt-msi.c
--- a/tools/ioemu/hw/pt-msi.c   Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/ioemu/hw/pt-msi.c   Thu Aug 07 11:57:34 2008 +0900
@@ -37,8 +37,7 @@ int pt_msi_setup(struct pt_dev *dev)
         return -1;
     }
 
-    if ( xc_physdev_map_pirq_msi(xc_handle, domid, MAP_PIRQ_TYPE_MSI,
-                            AUTO_ASSIGN, &pirq,
+    if ( xc_physdev_map_pirq_msi(xc_handle, domid, AUTO_ASSIGN, &pirq,
                                                        dev->pci_dev->dev << 3 
| dev->pci_dev->func,
                                                        dev->pci_dev->bus, 0, 
1) )
     {
@@ -120,8 +119,7 @@ static int pt_msix_update_one(struct pt_
     /* Check if this entry is already mapped */
     if ( entry->pirq == -1 )
     {
-        ret = xc_physdev_map_pirq_msi(xc_handle, domid, MAP_PIRQ_TYPE_MSI,
-                                AUTO_ASSIGN, &pirq,
+        ret = xc_physdev_map_pirq_msi(xc_handle, domid, AUTO_ASSIGN, &pirq,
                                 dev->pci_dev->dev << 3 | dev->pci_dev->func,
                                 dev->pci_dev->bus, entry_nr, 0);
         if ( ret )
diff -r 7affdebb7a1e -r a39913db6e51 tools/ioemu/hw/serial.c
--- a/tools/ioemu/hw/serial.c   Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/ioemu/hw/serial.c   Thu Aug 07 11:57:34 2008 +0900
@@ -728,7 +728,6 @@ static int serial_load(QEMUFile *f, void
     qemu_get_8s(f,&s->lsr);
     qemu_get_8s(f,&s->msr);
     qemu_get_8s(f,&s->scr);
-    qemu_get_8s(f,&s->fcr);
 
     if (version_id >= 2)
         qemu_get_8s(f,&fcr);
diff -r 7affdebb7a1e -r a39913db6e51 tools/ioemu/hw/vga.c
--- a/tools/ioemu/hw/vga.c      Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/ioemu/hw/vga.c      Thu Aug 07 11:57:34 2008 +0900
@@ -1548,8 +1548,8 @@ static void vga_draw_graphic(VGAState *s
             } else {
                 /* ENODATA just means we have changed mode and will succeed
                  * next time */
-                if (err != -ENODATA)
-                    fprintf(stderr, "track_dirty_vram(%lx, %lx) failed 
(%d)\n", s->lfb_addr + y, npages, err);
+                if (errno != ENODATA)
+                    fprintf(stderr, "track_dirty_vram(%lx, %lx) failed (%d, 
%d)\n", s->lfb_addr + y, npages, err, errno);
             }
         }
 
diff -r 7affdebb7a1e -r a39913db6e51 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c  Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/ioemu/vl.c  Thu Aug 07 11:57:34 2008 +0900
@@ -7136,8 +7136,10 @@ int main(int argc, char **argv)
         sigaddset(&set, aio_sig_num);
         sigprocmask(SIG_BLOCK, &set, NULL);
     }
+#endif
 
     QEMU_LIST_INIT (&vm_change_state_head);
+#ifndef CONFIG_STUBDOM
 #ifndef _WIN32
     {
         struct sigaction act;
diff -r 7affdebb7a1e -r a39913db6e51 tools/libaio/src/Makefile
--- a/tools/libaio/src/Makefile Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/libaio/src/Makefile Thu Aug 07 11:57:34 2008 +0900
@@ -1,7 +1,7 @@ XEN_ROOT = ../../..
 XEN_ROOT = ../../..
 include $(XEN_ROOT)/tools/Rules.mk
 
-prefix=/usr
+prefix=$(PREFIX)
 includedir=$(prefix)/include
 libdir=$(prefix)/lib
 
diff -r 7affdebb7a1e -r a39913db6e51 tools/libxc/xc_dom_boot.c
--- a/tools/libxc/xc_dom_boot.c Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/libxc/xc_dom_boot.c Thu Aug 07 11:57:34 2008 +0900
@@ -4,7 +4,7 @@
  * This is the code which actually boots a fresh
  * prepared domain image as xen guest domain.
  *
- * ==>  this is the only domain bilder code piece
+ * ==>  this is the only domain builder code piece
  *          where xen hypercalls are allowed        <==
  *
  * This code is licenced under the GPL.
@@ -153,7 +153,7 @@ void *xc_dom_boot_domU_map(struct xc_dom
     int page_shift = XC_DOM_PAGE_SHIFT(dom);
     privcmd_mmap_entry_t *entries;
     void *ptr;
-    int i, rc;
+    int i;
     int err;
 
     entries = xc_dom_malloc(dom, count * sizeof(privcmd_mmap_entry_t));
@@ -165,9 +165,13 @@ void *xc_dom_boot_domU_map(struct xc_dom
         return NULL;
     }
 
-    ptr = mmap(NULL, count << page_shift, PROT_READ | PROT_WRITE,
-               MAP_SHARED, dom->guest_xc, 0);
-    if ( ptr == MAP_FAILED )
+    for ( i = 0; i < count; i++ )
+        entries[i].mfn = xc_dom_p2m_host(dom, pfn + i);
+
+    ptr = xc_map_foreign_ranges(dom->guest_xc, dom->guest_domid,
+                count << page_shift, PROT_READ | PROT_WRITE, 1 << page_shift,
+                entries, count);
+    if ( ptr == NULL )
     {
         err = errno;
         xc_dom_panic(XC_INTERNAL_ERROR,
@@ -177,22 +181,6 @@ void *xc_dom_boot_domU_map(struct xc_dom
         return NULL;
     }
 
-    for ( i = 0; i < count; i++ )
-    {
-        entries[i].va = (uintptr_t) ptr + (i << page_shift);
-        entries[i].mfn = xc_dom_p2m_host(dom, pfn + i);
-        entries[i].npages = 1;
-    }
-
-    rc = xc_map_foreign_ranges(dom->guest_xc, dom->guest_domid,
-                               entries, count);
-    if ( rc < 0 )
-    {
-        xc_dom_panic(XC_INTERNAL_ERROR,
-                     "%s: failed to mmap domU pages 0x%" PRIpfn "+0x%" PRIpfn
-                     " [xenctl, rc=%d]\n", __FUNCTION__, pfn, count, rc);
-        return NULL;
-    }
     return ptr;
 }
 
diff -r 7affdebb7a1e -r a39913db6e51 tools/libxc/xc_domain_save.c
--- a/tools/libxc/xc_domain_save.c      Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/libxc/xc_domain_save.c      Thu Aug 07 11:57:34 2008 +0900
@@ -568,16 +568,19 @@ static xen_pfn_t *xc_map_m2p(int xc_hand
     unsigned long m2p_chunks, m2p_size;
     xen_pfn_t *m2p;
     xen_pfn_t *extent_start;
-    int i, rc;
-
+    int i;
+
+    m2p = NULL;
     m2p_size   = M2P_SIZE(max_mfn);
     m2p_chunks = M2P_CHUNKS(max_mfn);
 
     xmml.max_extents = m2p_chunks;
-    if ( !(extent_start = malloc(m2p_chunks * sizeof(xen_pfn_t))) )
+
+    extent_start = calloc(m2p_chunks, sizeof(xen_pfn_t));
+    if ( !extent_start )
     {
         ERROR("failed to allocate space for m2p mfns");
-        return NULL;
+        goto err0;
     }
     set_xen_guest_handle(xmml.extent_start, extent_start);
 
@@ -585,41 +588,36 @@ static xen_pfn_t *xc_map_m2p(int xc_hand
          (xmml.nr_extents != m2p_chunks) )
     {
         ERROR("xc_get_m2p_mfns");
-        return NULL;
-    }
-
-    if ( (m2p = mmap(NULL, m2p_size, prot,
-                     MAP_SHARED, xc_handle, 0)) == MAP_FAILED )
-    {
-        ERROR("failed to mmap m2p");
-        return NULL;
-    }
-
-    if ( !(entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t))) )
+        goto err1;
+    }
+
+    entries = calloc(m2p_chunks, sizeof(privcmd_mmap_entry_t));
+    if (entries == NULL)
     {
         ERROR("failed to allocate space for mmap entries");
-        return NULL;
+        goto err1;
     }
 
     for ( i = 0; i < m2p_chunks; i++ )
-    {
-        entries[i].va = (unsigned long)(((void *)m2p) + (i * M2P_CHUNK_SIZE));
         entries[i].mfn = extent_start[i];
-        entries[i].npages = M2P_CHUNK_SIZE >> PAGE_SHIFT;
-    }
-
-    if ( (rc = xc_map_foreign_ranges(xc_handle, DOMID_XEN,
-                                     entries, m2p_chunks)) < 0 )
-    {
-        ERROR("xc_mmap_foreign_ranges failed (rc = %d)", rc);
-        return NULL;
+
+    m2p = xc_map_foreign_ranges(xc_handle, DOMID_XEN,
+                       m2p_size, prot, M2P_CHUNK_SIZE,
+                       entries, m2p_chunks);
+    if (m2p == NULL)
+    {
+        ERROR("xc_mmap_foreign_ranges failed");
+        goto err2;
     }
 
     m2p_mfn0 = entries[0].mfn;
 
+err2:
+    free(entries);
+err1:
     free(extent_start);
-    free(entries);
-
+
+err0:
     return m2p;
 }
 
diff -r 7affdebb7a1e -r a39913db6e51 tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c        Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/libxc/xc_hvm_build.c        Thu Aug 07 11:57:34 2008 +0900
@@ -115,42 +115,32 @@ static int loadelfimage(
     struct elf_binary *elf, int xch, uint32_t dom, unsigned long *parray)
 {
     privcmd_mmap_entry_t *entries = NULL;
-    int pages = (elf->pend - elf->pstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
+    size_t pages = (elf->pend - elf->pstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
     int i, rc = -1;
 
     /* Map address space for initial elf image. */
-    entries = malloc(pages * sizeof(privcmd_mmap_entry_t));
+    entries = calloc(pages, sizeof(privcmd_mmap_entry_t));
     if ( entries == NULL )
         goto err;
-    elf->dest = mmap(NULL, pages << PAGE_SHIFT, PROT_READ | PROT_WRITE,
-                     MAP_SHARED, xch, 0);
-    if ( elf->dest == MAP_FAILED )
-        goto err;
 
     for ( i = 0; i < pages; i++ )
-    {
-        entries[i].va = (uintptr_t)elf->dest + (i << PAGE_SHIFT);
         entries[i].mfn = parray[(elf->pstart >> PAGE_SHIFT) + i];
-        entries[i].npages = 1;
-    }
-
-    rc = xc_map_foreign_ranges(xch, dom, entries, pages);
-    if ( rc < 0 )
+
+    elf->dest = xc_map_foreign_ranges(
+        xch, dom, pages << PAGE_SHIFT, PROT_READ | PROT_WRITE, 1 << PAGE_SHIFT,
+        entries, pages);
+    if ( elf->dest == NULL )
         goto err;
 
     /* Load the initial elf image. */
     elf_load_binary(elf);
     rc = 0;
 
+    munmap(elf->dest, pages << PAGE_SHIFT);
+    elf->dest = NULL;
+
  err:
-    if ( elf->dest )
-    {
-        munmap(elf->dest, pages << PAGE_SHIFT);
-        elf->dest = NULL;
-    }
-
-    if ( entries )
-        free(entries);
+    free(entries);
 
     return rc;
 }
@@ -239,7 +229,7 @@ static int setup_guest(int xc_handle,
         if ( ((count | cur_pages) & (SUPERPAGE_NR_PFNS - 1)) == 0 )
         {
             long done;
-            xen_pfn_t sp_extents[2048 >> SUPERPAGE_PFN_SHIFT];
+            xen_pfn_t sp_extents[count >> SUPERPAGE_PFN_SHIFT];
             struct xen_memory_reservation sp_req = {
                 .nr_extents   = count >> SUPERPAGE_PFN_SHIFT,
                 .extent_order = SUPERPAGE_PFN_SHIFT,
diff -r 7affdebb7a1e -r a39913db6e51 tools/libxc/xc_linux.c
--- a/tools/libxc/xc_linux.c    Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/libxc/xc_linux.c    Thu Aug 07 11:57:34 2008 +0900
@@ -118,16 +118,41 @@ void *xc_map_foreign_range(int xc_handle
     return addr;
 }
 
-int xc_map_foreign_ranges(int xc_handle, uint32_t dom,
-                          privcmd_mmap_entry_t *entries, int nr)
+void *xc_map_foreign_ranges(int xc_handle, uint32_t dom,
+                            size_t size, int prot, size_t chunksize,
+                            privcmd_mmap_entry_t entries[], int nentries)
 {
     privcmd_mmap_t ioctlx;
-
-    ioctlx.num   = nr;
+    int i, rc;
+    void *addr;
+
+    addr = mmap(NULL, size, prot, MAP_SHARED, xc_handle, 0);
+    if ( addr == MAP_FAILED )
+        goto mmap_failed;
+
+    for ( i = 0; i < nentries; i++ )
+    {
+        entries[i].va = (unsigned long)addr + (i * chunksize);
+        entries[i].npages = chunksize >> PAGE_SHIFT;
+    }
+
+    ioctlx.num   = nentries;
     ioctlx.dom   = dom;
     ioctlx.entry = entries;
 
-    return ioctl(xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx);
+    rc = ioctl(xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx);
+    if ( rc )
+        goto ioctl_failed;
+
+    return addr;
+
+ioctl_failed:
+    rc = munmap(addr, size);
+    if ( rc == -1 )
+        ERROR("%s: error in error path\n", __FUNCTION__);
+
+mmap_failed:
+    return NULL;
 }
 
 static int do_privcmd(int xc_handle, unsigned int cmd, unsigned long data)
diff -r 7affdebb7a1e -r a39913db6e51 tools/libxc/xc_minios.c
--- a/tools/libxc/xc_minios.c   Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/libxc/xc_minios.c   Thu Aug 07 11:57:34 2008 +0900
@@ -15,6 +15,7 @@
 #include <os.h>
 #include <mm.h>
 #include <lib.h>
+#include <gntmap.h>
 #include <events.h>
 #include <wait.h>
 #include <sys/mman.h>
@@ -76,16 +77,30 @@ void *xc_map_foreign_range(int xc_handle
     return map_frames_ex(&mfn, size / getpagesize(), 0, 1, 1, dom, 0, pt_prot);
 }
 
-int xc_map_foreign_ranges(int xc_handle, uint32_t dom,
-                          privcmd_mmap_entry_t *entries, int nr)
-{
-    int i;
-    for (i = 0; i < nr; i++) {
-       unsigned long mfn = entries[i].mfn;
-        do_map_frames(entries[i].va, &mfn, entries[i].npages, 0, 1, dom, 0, 
L1_PROT);
-    }
-    return 0;
-}
+void *xc_map_foreign_ranges(int xc_handle, uint32_t dom,
+                            size_t size, int prot, size_t chunksize,
+                            privcmd_mmap_entry_t entries[], int nentries)
+{
+    unsigned long mfns[size / PAGE_SIZE];
+    int i, j, n;
+    unsigned long pt_prot = 0;
+#ifdef __ia64__
+    /* TODO */
+#else
+    if (prot & PROT_READ)
+       pt_prot = L1_PROT_RO;
+    if (prot & PROT_WRITE)
+       pt_prot = L1_PROT;
+#endif
+
+    n = 0;
+    for (i = 0; i < nentries; i++)
+        for (j = 0; j < chunksize / PAGE_SIZE; j++)
+            mfns[n++] = entries[i].mfn + j;
+
+    return map_frames_ex(mfns, n, 1, 0, 1, dom, 0, pt_prot);
+}
+
 
 int do_xen_hypercall(int xc_handle, privcmd_hypercall_t *hypercall)
 {
@@ -102,8 +117,8 @@ int do_xen_hypercall(int xc_handle, priv
        errno = -ret;
        return -1;
     }
-    if (call.result < 0) {
-        errno = -call.result;
+    if ((long) call.result < 0) {
+        errno = - (long) call.result;
         return -1;
     }
     return call.result;
@@ -244,8 +259,11 @@ int xc_evtchn_unbind(int xce_handle, evt
            files[xce_handle].evtchn.ports[i].port = -1;
            break;
        }
-    if (i == MAX_EVTCHN_PORTS)
+    if (i == MAX_EVTCHN_PORTS) {
        printf("Warning: couldn't find port %"PRId32" for xc handle %x\n", 
port, xce_handle);
+       errno = -EINVAL;
+       return -1;
+    }
     files[xce_handle].evtchn.ports[i].bound = 0;
     unbind_evtchn(port);
     return 0;
@@ -278,18 +296,24 @@ evtchn_port_or_error_t xc_evtchn_pending
 {
     int i;
     unsigned long flags;
+    evtchn_port_t ret = -1;
+
     local_irq_save(flags);
+    files[xce_handle].read = 0;
     for (i = 0; i < MAX_EVTCHN_PORTS; i++) {
-       evtchn_port_t port = files[xce_handle].evtchn.ports[i].port;
-       if (port != -1 && files[xce_handle].evtchn.ports[i].pending) {
-           files[xce_handle].evtchn.ports[i].pending = 0;
-           local_irq_restore(flags);
-           return port;
-       }
-    }
-    files[xce_handle].read = 0;
+        evtchn_port_t port = files[xce_handle].evtchn.ports[i].port;
+        if (port != -1 && files[xce_handle].evtchn.ports[i].pending) {
+            if (ret == -1) {
+                ret = port;
+                files[xce_handle].evtchn.ports[i].pending = 0;
+            } else {
+                files[xce_handle].read = 1;
+                break;
+            }
+        }
+    }
     local_irq_restore(flags);
-    return -1;
+    return ret;
 }
 
 int xc_evtchn_unmask(int xce_handle, evtchn_port_t port)
@@ -304,6 +328,88 @@ void discard_file_cache(int fd, int flus
     if (flush)
         fsync(fd);
 }
+
+int xc_gnttab_open(void)
+{
+    int xcg_handle;
+    xcg_handle = alloc_fd(FTYPE_GNTMAP);
+    gntmap_init(&files[xcg_handle].gntmap);
+    return xcg_handle;
+}
+
+int xc_gnttab_close(int xcg_handle)
+{
+    gntmap_fini(&files[xcg_handle].gntmap);
+    files[xcg_handle].type = FTYPE_NONE;
+    return 0;
+}
+
+void *xc_gnttab_map_grant_ref(int xcg_handle,
+                              uint32_t domid,
+                              uint32_t ref,
+                              int prot)
+{
+    return gntmap_map_grant_refs(&files[xcg_handle].gntmap,
+                                 1,
+                                 &domid, 0,
+                                 &ref,
+                                 prot & PROT_WRITE);
+}
+
+void *xc_gnttab_map_grant_refs(int xcg_handle,
+                               uint32_t count,
+                               uint32_t *domids,
+                               uint32_t *refs,
+                               int prot)
+{
+    return gntmap_map_grant_refs(&files[xcg_handle].gntmap,
+                                 count,
+                                 domids, 1,
+                                 refs,
+                                 prot & PROT_WRITE);
+}
+
+void *xc_gnttab_map_domain_grant_refs(int xcg_handle,
+                                      uint32_t count,
+                                      uint32_t domid,
+                                      uint32_t *refs,
+                                      int prot)
+{
+    return gntmap_map_grant_refs(&files[xcg_handle].gntmap,
+                                 count,
+                                 &domid, 0,
+                                 refs,
+                                 prot & PROT_WRITE);
+}
+
+int xc_gnttab_munmap(int xcg_handle,
+                     void *start_address,
+                     uint32_t count)
+{
+    int ret;
+    ret = gntmap_munmap(&files[xcg_handle].gntmap,
+                        (unsigned long) start_address,
+                        count);
+    if (ret < 0) {
+        errno = -ret;
+        return -1;
+    }
+    return ret;
+}
+
+int xc_gnttab_set_max_grants(int xcg_handle,
+                             uint32_t count)
+{
+    int ret;
+    ret = gntmap_set_max_grants(&files[xcg_handle].gntmap,
+                                count);
+    if (ret < 0) {
+        errno = -ret;
+        return -1;
+    }
+    return ret;
+}
+
 /*
  * Local variables:
  * mode: C
diff -r 7affdebb7a1e -r a39913db6e51 tools/libxc/xc_netbsd.c
--- a/tools/libxc/xc_netbsd.c   Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/libxc/xc_netbsd.c   Thu Aug 07 11:57:34 2008 +0900
@@ -11,7 +11,6 @@
 
 #include "xc_private.h"
 
-#include <xen/memory.h>
 #include <xen/sys/evtchn.h>
 #include <unistd.h>
 #include <fcntl.h>
@@ -114,22 +113,42 @@ void *xc_map_foreign_range(int xc_handle
     return addr;
 }
 
-int xc_map_foreign_ranges(int xc_handle, uint32_t dom,
-                          privcmd_mmap_entry_t *entries, int nr)
-{
-    privcmd_mmap_t ioctlx;
-    int err;
-
-    ioctlx.num   = nr;
-    ioctlx.dom   = dom;
-    ioctlx.entry = entries;
-
-    err = ioctl(xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx);
-    if (err == 0)
-       return 0;
-    else
-       return -errno;
-}
+void *xc_map_foreign_ranges(int xc_handle, uint32_t dom,
+                            size_t size, int prot, size_t chunksize,
+                            privcmd_mmap_entry_t entries[], int nentries)
+{
+       privcmd_mmap_t ioctlx;
+       int i, rc;
+       void *addr;
+
+       addr = mmap(NULL, size, prot, MAP_ANON | MAP_SHARED, -1, 0);
+       if (addr == MAP_FAILED)
+               goto mmap_failed;
+
+       for (i = 0; i < nentries; i++) {
+               entries[i].va = (uintptr_t)addr + (i * chunksize);
+               entries[i].npages = chunksize >> PAGE_SHIFT;
+       }
+
+       ioctlx.num   = nentries;
+       ioctlx.dom   = dom;
+       ioctlx.entry = entries;
+
+       rc = ioctl(xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx);
+       if (rc)
+               goto ioctl_failed;
+
+       return addr;
+
+ioctl_failed:
+       rc = munmap(addr, size);
+       if (rc == -1)
+               ERROR("%s: error in error path\n", __FUNCTION__);
+
+mmap_failed:
+       return NULL;
+}
+
 
 static int do_privcmd(int xc_handle, unsigned int cmd, unsigned long data)
 {
diff -r 7affdebb7a1e -r a39913db6e51 tools/libxc/xc_physdev.c
--- a/tools/libxc/xc_physdev.c  Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/libxc/xc_physdev.c  Thu Aug 07 11:57:34 2008 +0900
@@ -22,7 +22,6 @@ int xc_physdev_pci_access_modify(int xc_
 
 int xc_physdev_map_pirq(int xc_handle,
                         int domid,
-                        int type,
                         int index,
                         int *pirq)
 {
@@ -33,7 +32,7 @@ int xc_physdev_map_pirq(int xc_handle,
         return -EINVAL;
 
     map.domid = domid;
-    map.type = type;
+    map.type = MAP_PIRQ_TYPE_GSI;
     map.index = index;
     map.pirq = *pirq;
 
@@ -47,7 +46,6 @@ int xc_physdev_map_pirq(int xc_handle,
 
 int xc_physdev_map_pirq_msi(int xc_handle,
                             int domid,
-                            int type,
                             int index,
                             int *pirq,
                             int devfn,
@@ -62,7 +60,7 @@ int xc_physdev_map_pirq_msi(int xc_handl
         return -EINVAL;
 
     map.domid = domid;
-    map.type = type;
+    map.type = MAP_PIRQ_TYPE_MSI;
     map.index = index;
     map.pirq = *pirq;
     map.msi_info.devfn = devfn;
diff -r 7affdebb7a1e -r a39913db6e51 tools/libxc/xc_private.h
--- a/tools/libxc/xc_private.h  Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/libxc/xc_private.h  Thu Aug 07 11:57:34 2008 +0900
@@ -184,8 +184,9 @@ static inline int do_sysctl(int xc_handl
     return ret;
 }
 
-int xc_map_foreign_ranges(int xc_handle, uint32_t dom,
-                          privcmd_mmap_entry_t *entries, int nr);
+void *xc_map_foreign_ranges(int xc_handle, uint32_t dom,
+                            size_t size, int prot, size_t chunksize,
+                            privcmd_mmap_entry_t entries[], int nentries);
 
 void *map_domain_va_core(unsigned long domfd, int cpu, void *guest_va,
                          vcpu_guest_context_any_t *ctxt);
diff -r 7affdebb7a1e -r a39913db6e51 tools/libxc/xc_solaris.c
--- a/tools/libxc/xc_solaris.c  Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/libxc/xc_solaris.c  Thu Aug 07 11:57:34 2008 +0900
@@ -109,17 +109,40 @@ void *xc_map_foreign_range(int xc_handle
     return addr;
 }
 
-int xc_map_foreign_ranges(int xc_handle, uint32_t dom,
-                          privcmd_mmap_entry_t *entries, int nr)
+void *xc_map_foreign_ranges(int xc_handle, uint32_t dom,
+                            size_t size, int prot, size_t chunksize,
+                            privcmd_mmap_entry_t entries[], int nentries)
 {
     privcmd_mmap_t ioctlx;
-
-    ioctlx.num   = nr;
+    int i, rc;
+    void *addr;
+
+    addr = mmap(NULL, size, prot, MAP_SHARED, xc_handle, 0);
+    if (addr == MAP_FAILED)
+        goto mmap_failed;
+
+    for (i = 0; i < nentries; i++) {
+        entries[i].va = (uintptr_t)addr + (i * chunksize);
+        entries[i].npages = chunksize >> PAGE_SHIFT;
+    }
+
+    ioctlx.num   = nentries;
     ioctlx.dom   = dom;
     ioctlx.entry = entries;
 
-    return ioctl(xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx);
-}
+    rc = ioctl(xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx);
+    if (rc)
+        goto ioctl_failed;
+
+ioctl_failed:
+    rc = munmap(addr, size);
+    if (rc == -1)
+        ERROR("%s: error in error path\n", __FUNCTION__);
+
+mmap_failed:
+    return NULL;
+}
+
 
 static int do_privcmd(int xc_handle, unsigned int cmd, unsigned long data)
 {
diff -r 7affdebb7a1e -r a39913db6e51 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/libxc/xenctrl.h     Thu Aug 07 11:57:34 2008 +0900
@@ -907,13 +907,11 @@ int xc_gnttab_set_max_grants(int xcg_han
 
 int xc_physdev_map_pirq(int xc_handle,
                         int domid,
-                        int type,
                         int index,
                         int *pirq);
 
 int xc_physdev_map_pirq_msi(int xc_handle,
                             int domid,
-                            int type,
                             int index,
                             int *pirq,
                             int devfn,
diff -r 7affdebb7a1e -r a39913db6e51 tools/pygrub/src/pygrub
--- a/tools/pygrub/src/pygrub   Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/pygrub/src/pygrub   Thu Aug 07 11:57:34 2008 +0900
@@ -21,7 +21,7 @@ import curses, _curses, curses.wrapper, 
 import curses, _curses, curses.wrapper, curses.textpad, curses.ascii
 import getopt
 
-sys.path = [ '/usr/lib/python' ] + sys.path
+sys.path = [ '/usr/lib/python', '/usr/lib64/python' ] + sys.path
 
 import fsimage
 import grub.GrubConf
diff -r 7affdebb7a1e -r a39913db6e51 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/python/xen/lowlevel/xc/xc.c Thu Aug 07 11:57:34 2008 +0900
@@ -958,8 +958,7 @@ static PyObject *pyxc_physdev_map_pirq(P
     if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iii", kwd_list,
                                       &dom, &index, &pirq) )
         return NULL;
-    ret = xc_physdev_map_pirq(xc->xc_handle, dom, MAP_PIRQ_TYPE_GSI,
-                             index, &pirq);
+    ret = xc_physdev_map_pirq(xc->xc_handle, dom, index, &pirq);
     if ( ret != 0 )
           return pyxc_error_to_exception();
     return PyLong_FromUnsignedLong(pirq);
diff -r 7affdebb7a1e -r a39913db6e51 tools/python/xen/util/pci.py
--- a/tools/python/xen/util/pci.py      Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/python/xen/util/pci.py      Thu Aug 07 11:57:34 2008 +0900
@@ -44,6 +44,12 @@ PCI_CLASS_DEVICE = 0x0a
 PCI_CLASS_DEVICE = 0x0a
 PCI_CLASS_BRIDGE_PCI = 0x0604
 
+PCI_HEADER_TYPE = 0x0e
+PCI_HEADER_TYPE_MASK = 0x7f
+PCI_HEADER_TYPE_NORMAL  = 0
+PCI_HEADER_TYPE_BRIDGE  = 1
+PCI_HEADER_TYPE_CARDBUS = 2
+
 PCI_CAPABILITY_LIST = 0x34
 PCI_CB_BRIDGE_CONTROL = 0x3e
 PCI_BRIDGE_CTL_BUS_RESET= 0x40
@@ -56,6 +62,12 @@ PCI_EXP_DEVCAP_FLR = (0x1 << 28)
 PCI_EXP_DEVCAP_FLR = (0x1 << 28)
 PCI_EXP_DEVCTL = 0x8
 PCI_EXP_DEVCTL_FLR = (0x1 << 15)
+
+PCI_CAP_ID_PM = 0x01
+PCI_PM_CTRL = 4
+PCI_PM_CTRL_NO_SOFT_RESET = 0x0004
+PCI_PM_CTRL_STATE_MASK = 0x0003
+PCI_D3hot = 3
 
 PCI_CAP_ID_AF = 0x13
 PCI_AF_CAPs   = 0x3
@@ -105,15 +117,22 @@ def parse_hex(val):
         return None
 
 def parse_pci_name(pci_name_string):
-    # Format: xxxx:xx:xx:x
-    s = pci_name_string
-    s = s.split(':')
-    dom = parse_hex(s[0])
-    bus = parse_hex(s[1])
-    s = s[2].split('.')
-    dev = parse_hex(s[0])
-    func =  parse_hex(s[1])
-    return (dom, bus, dev, func)
+    pci_match = re.match(r"((?P<domain>[0-9a-fA-F]{1,4})[:,])?" + \
+            r"(?P<bus>[0-9a-fA-F]{1,2})[:,]" + \
+            r"(?P<slot>[0-9a-fA-F]{1,2})[.,]" + \
+            r"(?P<func>[0-7])$", pci_name_string)
+    if pci_match is None:
+        raise PciDeviceParseError(('Failed to parse pci device name: %s' %
+            pci_name_string))
+    pci_dev_info = pci_match.groupdict('0')
+
+    domain = parse_hex(pci_dev_info['domain'])
+    bus = parse_hex(pci_dev_info['bus'])
+    slot = parse_hex(pci_dev_info['slot'])
+    func = parse_hex(pci_dev_info['func'])
+
+    return (domain, bus, slot, func)
+ 
 
 def find_sysfs_mnt():
     global sysfs_mnt_point
@@ -169,14 +188,14 @@ def create_lspci_info():
 
     # Execute 'lspci' command and parse the result.
     # If the command does not exist, lspci_info will be kept blank ({}).
-    for paragraph in os.popen(LSPCI_CMD + ' -vmmD').read().split('\n\n'):
+    for paragraph in os.popen(LSPCI_CMD + ' -vmm').read().split('\n\n'):
         device_name = None
         device_info = {}
         for line in paragraph.split('\n'):
             try:
                 (opt, value) = line.split(':\t')
                 if opt == 'Slot':
-                    device_name = value
+                    device_name = PCI_DEV_FORMAT_STR % parse_pci_name(value)
                 else:
                     device_info[opt] = value
             except:
@@ -246,18 +265,8 @@ def transform_list(target, src):
     return  result
 
 def check_FLR_capability(dev_list):
-    i = len(dev_list)
-    if i == 0:
+    if len(dev_list) == 0:
         return []
-    i = i - 1;
-    while i >= 0:
-        dev = dev_list[i]
-        if dev.bus == 0:
-            if dev.dev_type == DEV_TYPE_PCIe_ENDPOINT and not dev.pcie_flr:
-                del dev_list[i]
-            elif dev.dev_type == DEV_TYPE_PCI and not dev.pci_af_flr:
-                del dev_list[i]
-        i = i - 1
 
     pci_list = []
     pci_dev_dict = {}
@@ -270,6 +279,8 @@ def check_FLR_capability(dev_list):
         for pci in pci_list:
             if isinstance(pci, types.StringTypes):
                 dev = pci_dev_dict[pci]
+                if dev.bus == 0:
+                    continue
                 if dev.dev_type == DEV_TYPE_PCIe_ENDPOINT and not dev.pcie_flr:
                     coassigned_pci_list = dev.find_all_the_multi_functions()
                     need_transform = True
@@ -336,13 +347,6 @@ class PciDeviceAssignmentError(Exception
         self.message = msg
     def __str__(self):
         return 'pci: impproper device assignment spcified: ' + \
-            self.message
-
-class PciDeviceFlrError(PciDeviceAssignmentError):
-    def __init__(self,msg):
-        self.message = msg
-    def __str__(self):
-        return 'Can not find a suitable FLR method for the device(s): ' + \
             self.message
 
 class PciDevice:
@@ -480,6 +484,27 @@ class PciDevice:
         # Restore the config spaces
         restore_pci_conf_space((pci_list, cfg_list))
         
+    def do_Dstate_transition(self):
+        pos = self.find_cap_offset(PCI_CAP_ID_PM)
+        if pos == 0:
+            return 
+        
+        (pci_list, cfg_list) = save_pci_conf_space([self.name])
+        
+        # Enter D3hot without soft reset
+        pm_ctl = self.pci_conf_read32(pos + PCI_PM_CTRL)
+        pm_ctl |= PCI_PM_CTRL_NO_SOFT_RESET
+        pm_ctl &= ~PCI_PM_CTRL_STATE_MASK
+        pm_ctl |= PCI_D3hot
+        self.pci_conf_write32(pos + PCI_PM_CTRL, pm_ctl)
+        time.sleep(0.010)
+
+        # From D3hot to D0
+        self.pci_conf_write32(pos + PCI_PM_CTRL, 0)
+        time.sleep(0.010)
+
+        restore_pci_conf_space((pci_list, cfg_list))
+
     def find_all_the_multi_functions(self):
         sysfs_mnt = find_sysfs_mnt()
         pci_names = os.popen('ls ' + sysfs_mnt + SYSFS_PCI_DEVS_PATH).read()
@@ -650,13 +675,16 @@ class PciDevice:
                 time.sleep(0.200)
                 restore_pci_conf_space((pci_list, cfg_list))
             else:
-                funcs = self.find_all_the_multi_functions()
-                self.devs_check_driver(funcs)
-
-                parent = '%04x:%02x:%02x.%01x' % self.find_parent()
-
-                # Do Secondary Bus Reset.
-                self.do_secondary_bus_reset(parent, funcs)
+                if self.bus == 0:
+                    self.do_Dstate_transition()
+                else:
+                    funcs = self.find_all_the_multi_functions()
+                    self.devs_check_driver(funcs)
+
+                    parent = '%04x:%02x:%02x.%01x' % self.find_parent()
+
+                    # Do Secondary Bus Reset.
+                    self.do_secondary_bus_reset(parent, funcs)
         # PCI devices
         else:
             # For PCI device on host bus, we test "PCI Advanced Capabilities".
@@ -669,9 +697,7 @@ class PciDevice:
                 restore_pci_conf_space((pci_list, cfg_list))
             else:
                 if self.bus == 0:
-                    err_msg = 'pci: %s is not assignable: it is on bus 0, '+ \
-                        'but it has no PCI Advanced Capabilities.'
-                    raise PciDeviceFlrError(err_msg % self.name)
+                    self.do_Dstate_transition()
                 else:
                     devs = self.find_coassigned_devices(False)
                     # Remove the element 0 which is a bridge
@@ -690,12 +716,24 @@ class PciDevice:
                self.name+SYSFS_PCI_DEV_CONFIG_PATH
         try:
             conf_file = open(path, 'rb')
+            conf_file.seek(PCI_HEADER_TYPE)
+            header_type = ord(conf_file.read(1)) & PCI_HEADER_TYPE_MASK
+            if header_type == PCI_HEADER_TYPE_CARDBUS:
+                return
             conf_file.seek(PCI_STATUS_OFFSET)
             status = ord(conf_file.read(1))
             if status&PCI_STATUS_CAP_MASK:
                 conf_file.seek(PCI_CAP_OFFSET)
                 capa_pointer = ord(conf_file.read(1))
+                capa_count = 0
                 while capa_pointer:
+                    if capa_pointer < 0x40:
+                        raise PciDeviceParseError(
+                            ('Broken capability chain: %s' % self.name))
+                    capa_count += 1
+                    if capa_count > 96:
+                        raise PciDeviceParseError(
+                            ('Looped capability chain: %s' % self.name))
                     conf_file.seek(capa_pointer)
                     capa_id = ord(conf_file.read(1))
                     capa_pointer = ord(conf_file.read(1))
diff -r 7affdebb7a1e -r a39913db6e51 tools/python/xen/util/utils.py
--- a/tools/python/xen/util/utils.py    Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/python/xen/util/utils.py    Thu Aug 07 11:57:34 2008 +0900
@@ -1,6 +1,50 @@ import traceback
 import traceback
 import sys
+import os
 
 def exception_string(e):
         (ty,v,tb) = sys.exc_info()
         return traceback.format_exception_only(ty,v)
+
+def daemonize(prog, args, stdin_tmpfile=None):
+    """Runs a program as a daemon with the list of arguments.  Returns the PID
+    of the daemonized program, or returns 0 on error.
+    """
+    r, w = os.pipe()
+    pid = os.fork()
+
+    if pid == 0:
+        os.close(r)
+        w = os.fdopen(w, 'w')
+        os.setsid()
+        try:
+            pid2 = os.fork()
+        except:
+            pid2 = None
+        if pid2 == 0:
+            os.chdir("/")
+            null_fd = os.open("/dev/null", os.O_RDWR)
+            if stdin_tmpfile is not None:
+                os.dup2(stdin_tmpfile.fileno(), 0)
+            else:
+                os.dup2(null_fd, 0)
+            os.dup2(null_fd, 1)
+            os.dup2(null_fd, 2)
+            for fd in range(3, 256):
+                try:
+                    os.close(fd)
+                except:
+                    pass
+            os.execvp(prog, args)
+            os._exit(1)
+        else:
+            w.write(str(pid2 or 0))
+            w.close()
+            os._exit(0)
+    os.close(w)
+    r = os.fdopen(r)
+    daemon_pid = int(r.read())
+    r.close()
+    os.waitpid(pid, 0)
+    return daemon_pid
+
diff -r 7affdebb7a1e -r a39913db6e51 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/python/xen/xend/XendDomainInfo.py   Thu Aug 07 11:57:34 2008 +0900
@@ -599,14 +599,17 @@ class XendDomainInfo:
                 new_dev['func'])
         bdf = xc.test_assign_device(self.domid, pci_str)
         if bdf != 0:
+            if bdf == -1:
+                raise VmError("failed to assign device: maybe the platform"
+                              " doesn't support VT-d, or VT-d isn't enabled"
+                              " properly?")
             bus = (bdf >> 16) & 0xff
             devfn = (bdf >> 8) & 0xff
             dev = (devfn >> 3) & 0x1f
             func = devfn & 0x7
-            raise VmError("Fail to hot insert device(%x:%x.%x): maybe VT-d is "
-                          "not enabled, or the device is not exist, or it "
-                          "has already been assigned to other domain"
-                          % (bus, dev, func))
+            raise VmError("fail to assign device(%x:%x.%x): maybe it has"
+                          " already been assigned to other domain, or maybe"
+                          " it doesn't exist." % (bus, dev, func))
 
         bdf_str = "%s:%s:%s.%s@%s" % (new_dev['domain'],
                 new_dev['bus'],
@@ -635,7 +638,10 @@ class XendDomainInfo:
                 self._waitForDevice(dev_type, devid)
             except VmError, ex:
                 del self.info['devices'][dev_uuid]
-                if dev_type == 'tap':
+                if dev_type == 'pci':
+                    for dev in dev_config_dict['devs']:
+                        XendAPIStore.deregister(dev['uuid'], 'DPCI')
+                elif dev_type == 'tap':
                     self.info['vbd_refs'].remove(dev_uuid)
                 else:
                     self.info['%s_refs' % dev_type].remove(dev_uuid)
@@ -2086,14 +2092,17 @@ class XendDomainInfo:
         if hvm and pci_str:
             bdf = xc.test_assign_device(self.domid, pci_str)
             if bdf != 0:
+                if bdf == -1:
+                    raise VmError("failed to assign device: maybe the platform"
+                                  " doesn't support VT-d, or VT-d isn't 
enabled"
+                                  " properly?")
                 bus = (bdf >> 16) & 0xff
                 devfn = (bdf >> 8) & 0xff
                 dev = (devfn >> 3) & 0x1f
                 func = devfn & 0x7
-                raise VmError("Fail to assign device(%x:%x.%x): maybe VT-d is "
-                              "not enabled, or the device is not exist, or it "
-                              "has already been assigned to other domain"
-                              % (bus, dev, func))
+                raise VmError("fail to assign device(%x:%x.%x): maybe it has"
+                              " already been assigned to other domain, or 
maybe"
+                              " it doesn't exist." % (bus, dev, func))
 
         # register the domain in the list 
         from xen.xend import XendDomain
@@ -2374,6 +2383,9 @@ class XendDomainInfo:
     def destroy(self):
         """Cleanup VM and destroy domain.  Nothrow guarantee."""
 
+        if self.domid is None:
+            return
+
         log.debug("XendDomainInfo.destroy: domid=%s", str(self.domid))
 
         paths = self._prepare_phantom_paths()
diff -r 7affdebb7a1e -r a39913db6e51 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/python/xen/xend/image.py    Thu Aug 07 11:57:34 2008 +0900
@@ -114,7 +114,7 @@ class ImageHandler:
 
         self.display = vmConfig['platform'].get('display')
         self.xauthority = vmConfig['platform'].get('xauthority')
-        self.vncconsole = vmConfig['platform'].get('vncconsole')
+        self.vncconsole = int(vmConfig['platform'].get('vncconsole', 0))
         self.dmargs = self.parseDeviceModelArgs(vmConfig)
         self.pid = None
         rtc_timeoffset = vmConfig['platform'].get('rtc_timeoffset')
@@ -249,10 +249,6 @@ class ImageHandler:
     # xm config file
     def parseDeviceModelArgs(self, vmConfig):
         ret = ["-domain-name", str(self.vm.info['name_label'])]
-
-        # Tell QEMU how large the guest's memory allocation is
-        # to help it when loading the initrd (if neccessary)
-        ret += ["-m", str(self.getRequiredInitialReservation() / 1024)]
 
         # Find RFB console device, and if it exists, make QEMU enable
         # the VNC console.
@@ -777,6 +773,10 @@ class HVMImageHandler(ImageHandler):
             ret.append("tap,vlan=%d,ifname=tap%d.%d,bridge=%s" %
                        (nics, self.vm.getDomid(), nics-1, bridge))
 
+        if nics == 0:
+            ret.append("-net")
+            ret.append("none")
+
         return ret
 
     def getDeviceModelArgs(self, restore = False):
diff -r 7affdebb7a1e -r a39913db6e51 tools/python/xen/xend/server/pciif.py
--- a/tools/python/xen/xend/server/pciif.py     Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/python/xen/xend/server/pciif.py     Thu Aug 07 11:57:34 2008 +0900
@@ -375,17 +375,34 @@ class PciController(DevController):
                 raise VmError("pci: failed to locate device and "+
                         "parse it's resources - "+str(e))
             if (dev.dev_type == DEV_TYPE_PCIe_ENDPOINT) and not dev.pcie_flr:
-                funcs = dev.find_all_the_multi_functions()
-                for f in funcs:
-                    if not f in pci_str_list:
-                        err_msg = 'pci: % must be co-assigned to guest with %s'
-                        raise VmError(err_msg % (f, dev.name))
+                if dev.bus == 0:
+                    # We cope with this case by using the Dstate transition
+                    # method for now.
+                    err_msg = 'pci: %s: it is on bus 0, but has no PCIe' +\
+                        ' FLR Capability. Will try the Dstate transition'+\
+                        ' method if available.'
+                    log.warn(err_msg % dev.name)
+                else:
+                    funcs = dev.find_all_the_multi_functions()
+                    for f in funcs:
+                        if not f in pci_str_list:
+                            (f_dom, f_bus, f_slot, f_func) = parse_pci_name(f)
+                            f_pci_str = '0x%x,0x%x,0x%x,0x%x' % \
+                                (f_dom, f_bus, f_slot, f_func)
+                            # f has been assigned to other guest?
+                            if xc.test_assign_device(0, f_pci_str) != 0:
+                                err_msg = 'pci: %s must be co-assigned to' + \
+                                    ' the same guest with %s'
+                                raise VmError(err_msg % (f, dev.name))
             elif dev.dev_type == DEV_TYPE_PCI:
                 if dev.bus == 0:
                     if not dev.pci_af_flr:
-                        err_msg = 'pci: %s is not assignable: it is on ' + \
-                            'bus 0,  but lacks of FLR capability'
-                        raise VmError(err_msg % dev.name)
+                        # We cope with this case by using the Dstate transition
+                        # method for now.
+                        err_msg = 'pci: %s: it is on bus 0, but has no PCI' +\
+                            ' Advanced Capabilities for FLR. Will try the'+\
+                            ' Dstate transition method if available.'
+                        log.warn(err_msg % dev.name)
                 else:
                     # All devices behind the uppermost PCI/PCI-X bridge must 
be\
                     # co-assigned to the same guest.
@@ -395,8 +412,14 @@ class PciController(DevController):
 
                     for s in devs_str:
                         if not s in pci_str_list:
-                            err_msg = 'pci: %s must be co-assigned to guest 
with %s'
-                            raise VmError(err_msg % (s, dev.name))
+                            (s_dom, s_bus, s_slot, s_func) = parse_pci_name(s)
+                            s_pci_str = '0x%x,0x%x,0x%x,0x%x' % \
+                                (s_dom, s_bus, s_slot, s_func)
+                            # s has been assigned to other guest?
+                            if xc.test_assign_device(0, s_pci_str) != 0:
+                                err_msg = 'pci: %s must be co-assigned to 
the'+\
+                                    ' same guest with %s'
+                                raise VmError(err_msg % (s, dev.name))
 
         for (domain, bus, slot, func) in pci_dev_list:
             self.setupOneDevice(domain, bus, slot, func)
diff -r 7affdebb7a1e -r a39913db6e51 tools/python/xen/xm/console.py
--- a/tools/python/xen/xm/console.py    Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/python/xen/xm/console.py    Thu Aug 07 11:57:34 2008 +0900
@@ -15,10 +15,69 @@
 # Copyright (C) 2005 XenSource Ltd
 #============================================================================
 
+import xen.util.auxbin
+import xen.lowlevel.xs
+import os
+import sys
+import signal
+from xen.util import utils
 
 XENCONSOLE = "xenconsole"
 
-import xen.util.auxbin
-
 def execConsole(domid):
     xen.util.auxbin.execute(XENCONSOLE, [str(domid)])
+
+
+class OurXenstoreConnection:
+    def __init__(self):
+        self.handle = xen.lowlevel.xs.xs()
+    def read_eventually(self, path):
+        watch = None
+        trans = None
+        try:
+            signal.alarm(10)
+            watch = self.handle.watch(path, None)
+            while True:
+                result = self.handle.read('0', path)
+                if result is not None:
+                    signal.alarm(0)
+                    return result
+                self.handle.read_watch()
+        finally:
+            signal.alarm(0)
+            if watch is not None: self.handle.unwatch(path, watch)
+    def read_maybe(self, path):
+        return self.handle.read('0', path)
+
+def runVncViewer(domid, do_autopass, do_daemonize=False):
+    xs = OurXenstoreConnection()
+    d = '/local/domain/%d/' % domid
+    vnc_port = xs.read_eventually(d + 'console/vnc-port')
+    vfb_backend = xs.read_maybe(d + 'device/vfb/0/backend')
+    vnc_listen = None
+    vnc_password = None
+    vnc_password_tmpfile = None
+    cmdl = ['vncviewer']
+    if vfb_backend is not None:
+        vnc_listen = xs.read_maybe(vfb_backend + '/vnclisten')
+        if do_autopass:
+            vnc_password = xs.read_maybe(vfb_backend + '/vncpasswd')
+            if vnc_password is not None:
+                cmdl.append('-autopass')
+                vnc_password_tmpfile = os.tmpfile()
+                print >>vnc_password_tmpfile, vnc_password
+                vnc_password_tmpfile.seek(0)
+                vnc_password_tmpfile.flush()
+    if vnc_listen is None:
+        vnc_listen = 'localhost'
+    cmdl.append('%s:%d' % (vnc_listen, int(vnc_port) - 5900))
+    if do_daemonize:
+        pid = utils.daemonize('vncviewer', cmdl, vnc_password_tmpfile)
+        if pid == 0:
+            puts >>sys.stderr, 'failed to invoke vncviewer'
+            os._exit(-1)
+    else:
+        print 'invoking ', ' '.join(cmdl)
+        if vnc_password_tmpfile is not None:
+            os.dup2(vnc_password_tmpfile.fileno(), 0)
+        os.execvp('vncviewer', cmdl)
diff -r 7affdebb7a1e -r a39913db6e51 tools/python/xen/xm/create.dtd
--- a/tools/python/xen/xm/create.dtd    Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/python/xen/xm/create.dtd    Thu Aug 07 11:57:34 2008 +0900
@@ -39,6 +39,7 @@
                  vbd*,
                  vif*,
                  vtpm*,
+                 pci*,
                  console*,
                  platform*,
                  vcpu_param*,
@@ -79,6 +80,13 @@
 
 <!ELEMENT vtpm   (name*)>
 <!ATTLIST vtpm   backend         CDATA #REQUIRED>
+
+<!ELEMENT pci    EMPTY>
+<!ATTLIST pci    domain          CDATA #REQUIRED
+                 bus             CDATA #REQUIRED
+                 slot            CDATA #REQUIRED
+                 func            CDATA #REQUIRED
+                 vslt            CDATA #IMPLIED>
 
 <!ELEMENT console (other_config*)>
 <!ATTLIST console protocol       (vt100|rfb|rdp) #REQUIRED>
diff -r 7affdebb7a1e -r a39913db6e51 tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py     Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/python/xen/xm/create.py     Thu Aug 07 11:57:34 2008 +0900
@@ -36,10 +36,12 @@ from xen.util import vscsi_util
 from xen.util import vscsi_util
 import xen.util.xsm.xsm as security
 from xen.xm.main import serverType, SERVER_XEN_API, get_single_vm
+from xen.util import utils
 
 from xen.xm.opts import *
 
 from main import server
+from main import domain_name_to_domid
 import console
 
 
@@ -118,6 +120,14 @@ gopts.opt('console_autoconnect', short='
           fn=set_true, default=0,
           use="Connect to the console after the domain is created.")
 
+gopts.opt('vncviewer',
+          fn=set_true, default=0,
+          use="Connect to the VNC display after the domain is created.")
+
+gopts.opt('vncviewer-autopass',
+          fn=set_true, default=0,
+          use="Pass VNC password to viewer via stdin and -autopass.")
+
 gopts.var('vncpasswd', val='NAME',
           fn=set_value, default=None,
           use="Password for VNC console on HVM domain.")
@@ -128,7 +138,7 @@ gopts.var('vncviewer', val='no|yes',
            "The address of the vncviewer is passed to the domain on the "
            "kernel command line using 'VNC_SERVER=<host>:<port>'. The port "
            "used by vnc is 5500 + DISPLAY. A display value with a free port "
-           "is chosen if possible.\nOnly valid when vnc=1.")
+           "is chosen if possible.\nOnly valid when vnc=1.\nDEPRECATED")
 
 gopts.var('vncconsole', val='no|yes',
           fn=set_bool, default=None,
@@ -1108,44 +1118,6 @@ def choose_vnc_display():
     return None
 vncpid = None
 
-def daemonize(prog, args):
-    """Runs a program as a daemon with the list of arguments.  Returns the PID
-    of the daemonized program, or returns 0 on error.
-    """
-    r, w = os.pipe()
-    pid = os.fork()
-
-    if pid == 0:
-        os.close(r)
-        w = os.fdopen(w, 'w')
-        os.setsid()
-        try:
-            pid2 = os.fork()
-        except:
-            pid2 = None
-        if pid2 == 0:
-            os.chdir("/")
-            for fd in range(0, 256):
-                try:
-                    os.close(fd)
-                except:
-                    pass
-            os.open("/dev/null", os.O_RDWR)
-            os.dup2(0, 1)
-            os.dup2(0, 2)
-            os.execvp(prog, args)
-            os._exit(1)
-        else:
-            w.write(str(pid2 or 0))
-            w.close()
-            os._exit(0)
-    os.close(w)
-    r = os.fdopen(r)
-    daemon_pid = int(r.read())
-    r.close()
-    os.waitpid(pid, 0)
-    return daemon_pid
-
 def spawn_vnc(display):
     """Spawns a vncviewer that listens on the specified display.  On success,
     returns the port that the vncviewer is listening on and sets the global
@@ -1154,7 +1126,7 @@ def spawn_vnc(display):
     vncargs = (["vncviewer", "-log", "*:stdout:0",
             "-listen", "%d" % (VNC_BASE_PORT + display) ])
     global vncpid
-    vncpid = daemonize("vncviewer", vncargs)
+    vncpid = utils.daemonize("vncviewer", vncargs)
     if vncpid == 0:
         return 0
 
@@ -1362,6 +1334,11 @@ def main(argv):
     elif not opts.is_xml:
         dom = make_domain(opts, config)
         
+    if opts.vals.vncviewer:
+        domid = domain_name_to_domid(sxp.child_value(config, 'name', -1))
+        vncviewer_autopass = getattr(opts.vals,'vncviewer-autopass', False)
+        console.runVncViewer(domid, vncviewer_autopass, True)
+    
 def do_console(domain_name):
     cpid = os.fork() 
     if cpid != 0:
@@ -1373,13 +1350,7 @@ def do_console(domain_name):
                 if os.WEXITSTATUS(rv) != 0:
                     sys.exit(os.WEXITSTATUS(rv))
             try:
-                # Acquire the console of the created dom
-                if serverType == SERVER_XEN_API:
-                    domid = server.xenapi.VM.get_domid(
-                               get_single_vm(domain_name))
-                else:
-                    dom = server.xend.domain(domain_name)
-                    domid = int(sxp.child_value(dom, 'domid', '-1'))
+                domid = domain_name_to_domid(domain_name)
                 console.execConsole(domid)
             except:
                 pass
diff -r 7affdebb7a1e -r a39913db6e51 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/python/xen/xm/main.py       Thu Aug 07 11:57:34 2008 +0900
@@ -64,6 +64,9 @@ from xen.xend import XendOptions
 from xen.xend import XendOptions
 xoptions = XendOptions.instance()
 
+import signal
+signal.signal(signal.SIGINT, signal.SIG_DFL)
+
 # getopt.gnu_getopt is better, but only exists in Python 2.3+.  Use
 # getopt.getopt if gnu_getopt is not available.  This will mean that options
 # may only be specified before positional arguments.
@@ -97,6 +100,8 @@ SUBCOMMAND_HELP = {
     
     'console'     : ('[-q|--quiet] <Domain>',
                      'Attach to <Domain>\'s console.'),
+    'vncviewer'   : ('[--[vncviewer-]autopass] <Domain>',
+                     'Attach to <Domain>\'s VNC server.'),
     'create'      : ('<ConfigFile> [options] [vars]',
                      'Create a domain based on <ConfigFile>.'),
     'destroy'     : ('<Domain>',
@@ -243,6 +248,10 @@ SUBCOMMAND_OPTIONS = {
     'console': (
        ('-q', '--quiet', 'Do not print an error message if the domain does not 
exist'),
     ),
+    'vncviewer': (
+       ('', '--autopass', 'Pass VNC password to viewer via stdin and 
-autopass'),
+       ('', '--vncviewer-autopass', '(consistency alias for --autopass)'),
+    ),
     'dmesg': (
        ('-c', '--clear', 'Clear dmesg buffer as well as printing it'),
     ),
@@ -260,6 +269,8 @@ SUBCOMMAND_OPTIONS = {
     'start': (
        ('-p', '--paused', 'Do not unpause domain after starting it'),
        ('-c', '--console_autoconnect', 'Connect to the console after the 
domain is created'),
+       ('', '--vncviewer', 'Connect to display via VNC after the domain is 
created'),
+       ('', '--vncviewer-autopass', 'Pass VNC password to viewer via stdin and 
-autopass'),
     ),
     'resume': (
        ('-p', '--paused', 'Do not unpause domain after resuming it'),
@@ -277,6 +288,7 @@ SUBCOMMAND_OPTIONS = {
 
 common_commands = [
     "console",
+    "vncviewer",
     "create",
     "new",
     "delete",
@@ -304,6 +316,7 @@ common_commands = [
 
 domain_commands = [
     "console",
+    "vncviewer",
     "create",
     "new",
     "delete",
@@ -1185,14 +1198,20 @@ def xm_start(args):
 
     paused = False
     console_autoconnect = False
+    vncviewer = False
+    vncviewer_autopass = False
 
     try:
-        (options, params) = getopt.gnu_getopt(args, 'cp', 
['console_autoconnect','paused'])
+        (options, params) = getopt.gnu_getopt(args, 'cp', 
['console_autoconnect','paused','vncviewer','vncviewer-autopass'])
         for (k, v) in options:
             if k in ('-p', '--paused'):
                 paused = True
             if k in ('-c', '--console_autoconnect'):
                 console_autoconnect = True
+            if k in ('--vncviewer'):
+                vncviewer = True
+            if k in ('--vncviewer-autopass'):
+                vncviewer_autopass = True
 
         if len(params) != 1:
             raise OptionError("Expects 1 argument")
@@ -1204,6 +1223,9 @@ def xm_start(args):
 
     if console_autoconnect:
         start_do_console(dom)
+
+    if console_autoconnect:
+        console.runVncViewer(domid, vncviewer_autopass, True)
 
     try:
         if serverType == SERVER_XEN_API:
@@ -1783,6 +1805,40 @@ def xm_console(args):
     console.execConsole(domid)
 
 
+def domain_name_to_domid(domain_name):
+    if serverType == SERVER_XEN_API:
+        domid = server.xenapi.VM.get_domid(
+                   get_single_vm(domain_name))
+    else:
+        dom = server.xend.domain(domain_name)
+        domid = int(sxp.child_value(dom, 'domid', '-1'))
+    return domid
+
+def xm_vncviewer(args):
+    autopass = False;
+
+    try:
+        (options, params) = getopt.gnu_getopt(args, '', 
['autopass','vncviewer-autopass'])
+    except getopt.GetoptError, opterr:
+        err(opterr)
+        usage('vncviewer')
+
+    for (k, v) in options:
+        if k in ['--autopass','--vncviewer-autopass']:
+            autopass = True
+        else:
+            assert False
+
+    if len(params) != 1:
+        err('No domain given (or several parameters specified)')
+        usage('vncviewer')
+
+    dom = params[0]
+    domid = domain_name_to_domid(dom)
+
+    console.runVncViewer(domid, autopass)
+
+
 def xm_uptime(args):
     short_mode = 0
 
@@ -2102,7 +2158,23 @@ def xm_pci_list(args):
 
     dom = params[0]
 
-    devs = server.xend.domain.getDeviceSxprs(dom, 'pci')
+    devs = []
+    if serverType == SERVER_XEN_API:
+        for dpci_ref in server.xenapi.VM.get_DPCIs(get_single_vm(dom)):
+            ppci_ref = server.xenapi.DPCI.get_PPCI(dpci_ref)
+            ppci_record = server.xenapi.PPCI.get_record(ppci_ref)
+            dev = {
+                "domain":   "0x%04x" % int(ppci_record["domain"]),
+                "bus":      "0x%02x" % int(ppci_record["bus"]),
+                "slot":     "0x%02x" % int(ppci_record["slot"]),
+                "func":     "0x%01x" % int(ppci_record["func"]),
+                "vslt":     "0x%02x" % \
+                            int(server.xenapi.DPCI.get_hotplug_slot(dpci_ref))
+            }
+            devs.append(dev)
+
+    else:
+        devs = server.xend.domain.getDeviceSxprs(dom, 'pci')
 
     if len(devs) == 0:
         return
@@ -2362,7 +2434,34 @@ def xm_pci_attach(args):
 def xm_pci_attach(args):
     arg_check(args, 'pci-attach', 2, 3)
     (dom, pci) = parse_pci_configuration(args, 'Initialising')
-    server.xend.domain.device_configure(dom, pci)
+
+    if serverType == SERVER_XEN_API:
+
+        pci_dev = sxp.children(pci, 'dev')[0]
+        domain = int(sxp.child_value(pci_dev, 'domain'), 16)
+        bus = int(sxp.child_value(pci_dev, 'bus'), 16)
+        slot = int(sxp.child_value(pci_dev, 'slot'), 16)
+        func = int(sxp.child_value(pci_dev, 'func'), 16)
+        vslt = int(sxp.child_value(pci_dev, 'vslt'), 16)
+        name = "%04x:%02x:%02x.%01x" % (domain, bus, slot, func)
+
+        target_ref = None
+        for ppci_ref in server.xenapi.PPCI.get_all():
+            if name == server.xenapi.PPCI.get_name(ppci_ref):
+                target_ref = ppci_ref
+                break
+        if target_ref is None:
+            raise OptionError("Device %s not found" % name)
+
+        dpci_record = {
+            "VM":           get_single_vm(dom),
+            "PPCI":         target_ref,
+            "hotplug_slot": vslt
+        }
+        server.xenapi.DPCI.create(dpci_record)
+
+    else:
+        server.xend.domain.device_configure(dom, pci)
 
 def xm_scsi_attach(args):
     xenapi_unsupported()
@@ -2462,7 +2561,29 @@ def xm_pci_detach(args):
 def xm_pci_detach(args):
     arg_check(args, 'pci-detach', 2)
     (dom, pci) = parse_pci_configuration(args, 'Closing')
-    server.xend.domain.device_configure(dom, pci)
+
+    if serverType == SERVER_XEN_API:
+
+        pci_dev = sxp.children(pci, 'dev')[0]
+        domain = int(sxp.child_value(pci_dev, 'domain'), 16)
+        bus = int(sxp.child_value(pci_dev, 'bus'), 16)
+        slot = int(sxp.child_value(pci_dev, 'slot'), 16)
+        func = int(sxp.child_value(pci_dev, 'func'), 16)
+        vslt = int(sxp.child_value(pci_dev, 'vslt'), 16)
+        name = "%04x:%02x:%02x.%01x" % (domain, bus, slot, func)
+
+        target_ref = None
+        for dpci_ref in server.xenapi.VM.get_DPCIs(get_single_vm(dom)):
+            ppci_ref = server.xenapi.DPCI.get_PPCI(dpci_ref)
+            if name == server.xenapi.PPCI.get_name(ppci_ref):
+                target_ref = ppci_ref
+                server.xenapi.DPCI.destroy(dpci_ref)
+                break
+        if target_ref is None:
+            raise OptionError("Device %s not assigned" % name)
+
+    else:
+        server.xend.domain.device_configure(dom, pci)
 
 def xm_scsi_detach(args):
     xenapi_unsupported()
@@ -2617,6 +2738,7 @@ commands = {
     "event-monitor": xm_event_monitor,
     # console commands
     "console": xm_console,
+    "vncviewer": xm_vncviewer,
     # xenstat commands
     "top": xm_top,
     # domain commands
diff -r 7affdebb7a1e -r a39913db6e51 tools/python/xen/xm/shutdown.py
--- a/tools/python/xen/xm/shutdown.py   Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/python/xen/xm/shutdown.py   Thu Aug 07 11:57:34 2008 +0900
@@ -144,9 +144,10 @@ def main_dom(opts, args):
 def main_dom(opts, args):
     if len(args) == 0: opts.err('No domain parameter given')
     if len(args) >  1: opts.err('No multiple domain parameters allowed')
-    dom = sxp.child_value(server.xend.domain(args[0]), 'name')
     if serverType == SERVER_XEN_API:
-        dom = get_single_vm(dom)
+        dom = get_single_vm(args[0])
+    else:
+        dom = sxp.child_value(server.xend.domain(args[0]), 'name')
     mode = shutdown_mode(opts)  
     shutdown(opts, [ dom ], mode, opts.vals.wait)
     
diff -r 7affdebb7a1e -r a39913db6e51 tools/python/xen/xm/xenapi_create.py
--- a/tools/python/xen/xm/xenapi_create.py      Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/python/xen/xm/xenapi_create.py      Thu Aug 07 11:57:34 2008 +0900
@@ -369,6 +369,12 @@ class xenapi_create:
 
             self.create_consoles(vm_ref, consoles)
 
+            # Now create pcis
+
+            pcis = vm.getElementsByTagName("pci")
+
+            self.create_pcis(vm_ref, pcis)
+
             return vm_ref
         except:
             server.xenapi.VM.destroy(vm_ref)
@@ -389,7 +395,7 @@ class xenapi_create:
             "device":
                 vbd.attributes["device"].value,
             "bootable":
-                vbd.attributes["bootable"].value == "True",
+                vbd.attributes["bootable"].value == "1",
             "mode":
                 vbd.attributes["mode"].value,
             "type":
@@ -493,6 +499,39 @@ class xenapi_create:
 
         return server.xenapi.console.create(console_record)
 
+    def create_pcis(self, vm_ref, pcis):
+        log(DEBUG, "create_pcis")
+        return map(lambda pci: self.create_pci(vm_ref, pci), pcis)
+
+    def create_pci(self, vm_ref, pci):
+        log(DEBUG, "create_pci")
+
+        domain = int(pci.attributes["domain"].value, 16)
+        bus = int(pci.attributes["bus"].value, 16)
+        slot = int(pci.attributes["slot"].value, 16)
+        func = int(pci.attributes["func"].value, 16)
+        name = "%04x:%02x:%02x.%01x" % (domain, bus, slot, func)
+
+        target_ref = None
+        for ppci_ref in server.xenapi.PPCI.get_all():
+            if name == server.xenapi.PPCI.get_name(ppci_ref):
+                target_ref = ppci_ref
+                break
+        if target_ref is None:
+            log(DEBUG, "create_pci: pci device not found")
+            return None
+
+        dpci_record = {
+            "VM":
+                vm_ref,
+            "PPCI":
+                target_ref,
+            "hotplug_slot":
+                int(pci.attributes["func"].value, 16)
+        }
+
+        return server.xenapi.DPCI.create(dpci_record)
+
 def get_child_by_name(exp, childname, default = None):
     try:
         return [child for child in sxp.children(exp)
@@ -520,6 +559,9 @@ class sxp2xml:
 
         vfbs_sxp = map(lambda x: x[1], [device for device in devices
                                         if device[1][0] == "vfb"])
+
+        pcis_sxp = map(lambda x: x[1], [device for device in devices
+                                        if device[1][0] == "pci"])
 
         # Create XML Document
         
@@ -597,13 +639,15 @@ class sxp2xml:
             pv = document.createElement("pv")
             pv.attributes["kernel"] \
                 = get_child_by_name(image, "kernel", "")
-            pv.attributes["bootloader"] = ""
+            pv.attributes["bootloader"] \
+                = get_child_by_name(config, "bootloader", "")
             pv.attributes["ramdisk"] \
                 = get_child_by_name(image, "ramdisk", "")
             pv.attributes["args"] \
                 = "root=" + get_child_by_name(image, "root", "") \
                 + " " + get_child_by_name(image, "args", "")
-            pv.attributes["bootloader_args"] = ""
+            pv.attributes["bootloader_args"] \
+                = get_child_by_name(config, "bootloader_args","")
 
             vm.appendChild(pv)
         elif image[0] == "hvm":
@@ -653,6 +697,12 @@ class sxp2xml:
         vtpms = map(lambda vtpm: self.extract_vtpm(vtpm, document), vtpms_sxp)
 
         map(vm.appendChild, vtpms)
+
+        # And now the pcis
+
+        pcis = self.extract_pcis(pcis_sxp, document)
+
+        map(vm.appendChild, pcis)
 
         # Last but not least the consoles...
 
@@ -821,7 +871,28 @@ class sxp2xml:
 
         return vfb
 
-    _eths = -1
+    def extract_pcis(self, pcis_sxp, document):
+
+        pcis = []
+
+        for pci_sxp in pcis_sxp:
+            for dev_sxp in sxp.children(pci_sxp, "dev"):
+                pci = document.createElement("pci")
+
+                pci.attributes["domain"] \
+                    = get_child_by_name(dev_sxp, "domain", "0")
+                pci.attributes["bus"] \
+                    = get_child_by_name(dev_sxp, "bus", "0")
+                pci.attributes["slot"] \
+                    = get_child_by_name(dev_sxp, "slot", "0")
+                pci.attributes["func"] \
+                    = get_child_by_name(dev_sxp, "func", "0")
+                pci.attributes["vslt"] \
+                    = get_child_by_name(dev_sxp, "vslt", "0")
+
+                pcis.append(pci)
+
+        return pcis
 
     def mk_other_config(self, key, value, document):
         other_config = document.createElement("other_config")
@@ -914,6 +985,8 @@ class sxp2xml:
  
         return platform_configs
     
+    _eths = -1
+
     def getFreshEthDevice(self):
         self._eths += 1
         return "eth%i" % self._eths
diff -r 7affdebb7a1e -r a39913db6e51 tools/xenmon/Makefile
--- a/tools/xenmon/Makefile     Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/xenmon/Makefile     Thu Aug 07 11:57:34 2008 +0900
@@ -33,6 +33,8 @@ install: build
        $(INSTALL_PROG) xenbaked $(DESTDIR)$(SBINDIR)/xenbaked
        $(INSTALL_PROG) xentrace_setmask  $(DESTDIR)$(SBINDIR)/xentrace_setmask
        $(INSTALL_PROG) xenmon.py  $(DESTDIR)$(SBINDIR)/xenmon.py
+       $(INSTALL_DIR) $(DESTDIR)$(DOCDIR)
+       $(INSTALL_DATA) README $(DESTDIR)$(DOCDIR)/README.xenmon
 
 .PHONY: clean
 clean:
diff -r 7affdebb7a1e -r a39913db6e51 tools/xenstat/libxenstat/Makefile
--- a/tools/xenstat/libxenstat/Makefile Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/xenstat/libxenstat/Makefile Thu Aug 07 11:57:34 2008 +0900
@@ -15,7 +15,7 @@ XEN_ROOT=../../..
 XEN_ROOT=../../..
 include $(XEN_ROOT)/tools/Rules.mk
 
-prefix=/usr
+prefix=$(PREFIX)
 includedir=$(prefix)/include
 libdir=$(prefix)/lib
 
diff -r 7affdebb7a1e -r a39913db6e51 tools/xenstat/libxenstat/src/xenstat.c
--- a/tools/xenstat/libxenstat/src/xenstat.c    Thu Aug 07 11:47:34 2008 +0900
+++ b/tools/xenstat/libxenstat/src/xenstat.c    Thu Aug 07 11:57:34 2008 +0900
@@ -109,7 +109,7 @@ xenstat_handle *xenstat_init(void)
 
        handle->xshandle = xs_daemon_open_readonly(); /* open handle to 
xenstore*/
        if (handle->xshandle == NULL) {
-               perror("unable to open xenstore\n");
+               perror("unable to open xenstore");
                xc_interface_close(handle->xc_handle);
                free(handle);
                return NULL;
diff -r 7affdebb7a1e -r a39913db6e51 
unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c
--- a/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c        Thu Aug 
07 11:47:34 2008 +0900
+++ b/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c        Thu Aug 
07 11:57:34 2008 +0900
@@ -71,7 +71,7 @@ static int bp_suspend(void)
        return suspend_cancelled;
 }
 
-int __xen_suspend(int fast_suspend, void (*resume_notifier)(void))
+int __xen_suspend(int fast_suspend, void (*resume_notifier)(int))
 {
        int err, suspend_cancelled, nr_cpus;
        struct ap_suspend_info info;
@@ -101,7 +101,7 @@ int __xen_suspend(int fast_suspend, void
 
        local_irq_disable();
        suspend_cancelled = bp_suspend();
-       resume_notifier();
+       resume_notifier(suspend_cancelled);
        local_irq_enable();
 
        smp_mb();
diff -r 7affdebb7a1e -r a39913db6e51 xen/Makefile
--- a/xen/Makefile      Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/Makefile      Thu Aug 07 11:57:34 2008 +0900
@@ -1,8 +1,8 @@
 # This is the correct place to edit the build version.
 # All other places this is stored (eg. compile.h) should be autogenerated.
-export XEN_VERSION       = 3
-export XEN_SUBVERSION    = 3
-export XEN_EXTRAVERSION ?= -unstable$(XEN_VENDORVERSION)
+export XEN_VERSION       = 4
+export XEN_SUBVERSION    = 0
+export XEN_EXTRAVERSION ?= .0-rc3-pre$(XEN_VENDORVERSION)
 export XEN_FULLVERSION   = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION)
 -include xen-version
 
diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c        Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/arch/ia64/xen/domain.c        Thu Aug 07 11:57:34 2008 +0900
@@ -2212,8 +2212,9 @@ int __init construct_dom0(struct domain 
        return 0;
 }
 
-void machine_restart(void)
-{
+void machine_restart(unsigned int delay_millisecs)
+{
+       mdelay(delay_millisecs);
        console_start_sync();
        if (running_on_sim)
                printk ("machine_restart called.  spinning...\n");
diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/ia64/xen/irq.c
--- a/xen/arch/ia64/xen/irq.c   Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/arch/ia64/xen/irq.c   Thu Aug 07 11:57:34 2008 +0900
@@ -459,7 +459,7 @@ int pirq_guest_bind(struct vcpu *v, int 
     return rc;
 }
 
-int pirq_guest_unbind(struct domain *d, int irq)
+void pirq_guest_unbind(struct domain *d, int irq)
 {
     irq_desc_t         *desc = &irq_desc[irq];
     irq_guest_action_t *action;
@@ -493,7 +493,6 @@ int pirq_guest_unbind(struct domain *d, 
     }
 
     spin_unlock_irqrestore(&desc->lock, flags);    
-    return 0;
 }
 
 void
diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/acpi/cpufreq/utility.c
--- a/xen/arch/x86/acpi/cpufreq/utility.c       Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/arch/x86/acpi/cpufreq/utility.c       Thu Aug 07 11:57:34 2008 +0900
@@ -296,12 +296,11 @@ void cpufreq_suspend(void)
 {
     int cpu;
 
-    /* to protect the case when Px was controlled by dom0-kernel */
-    /* or when CPU_FREQ not set in which case ACPI Px objects not parsed */
+    /* to protect the case when Px was not controlled by xen */
     for_each_online_cpu(cpu) {
         struct processor_performance *perf = &processor_pminfo[cpu].perf;
 
-        if (!perf->init)
+        if (!(perf->init & XEN_PX_INIT))
             return;
     }
 
@@ -316,14 +315,13 @@ int cpufreq_resume(void)
 {
     int cpu, ret = 0;
 
-    /* 1. to protect the case when Px was controlled by dom0-kernel */
-    /* or when CPU_FREQ not set in which case ACPI Px objects not parsed */
+    /* 1. to protect the case when Px was not controlled by xen */
     /* 2. set state and resume flag to sync cpu to right state and freq */
     for_each_online_cpu(cpu) {
         struct processor_performance *perf = &processor_pminfo[cpu].perf;
         struct cpufreq_policy *policy = &xen_px_policy[cpu];
 
-        if (!perf->init)
+        if (!(perf->init & XEN_PX_INIT))
             goto err;
         perf->state = 0;
         policy->resume = 1;
diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/acpi/pmstat.c
--- a/xen/arch/x86/acpi/pmstat.c        Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/arch/x86/acpi/pmstat.c        Thu Aug 07 11:57:34 2008 +0900
@@ -52,9 +52,9 @@ int do_get_pm_info(struct xen_sysctl_get
     struct pm_px *pxpt = &px_statistic_data[op->cpuid];
     struct processor_pminfo *pmpt = &processor_pminfo[op->cpuid];
 
-    /* to protect the case when Px was controlled by dom0-kernel */
-    /* or when CPU_FREQ not set in which case ACPI Px objects not parsed */
-    if ( !pmpt->perf.init && (op->type & PMSTAT_CATEGORY_MASK) == PMSTAT_PX )
+    /* to protect the case when Px was not controlled by xen */
+    if ( (!(pmpt->perf.init & XEN_PX_INIT)) && 
+        (op->type & PMSTAT_CATEGORY_MASK) == PMSTAT_PX )
         return -EINVAL;
 
     if ( !cpu_online(op->cpuid) )
diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/arch/x86/domain.c     Thu Aug 07 11:57:34 2008 +0900
@@ -1811,7 +1811,6 @@ int domain_relinquish_resources(struct d
         if ( ret )
             return ret;
 #endif
-        WARN_ON(d->xenheap_pages);
         break;
 
     default:
diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/arch/x86/domain_build.c       Thu Aug 07 11:57:34 2008 +0900
@@ -757,6 +757,7 @@ int __init construct_dom0(
     si->shared_info = virt_to_maddr(d->shared_info);
 
     si->flags        = SIF_PRIVILEGED | SIF_INITDOMAIN;
+    si->flags       |= (xen_processor_pmbits << 8) & SIF_PM_MASK;
     si->pt_base      = vpt_start + 2 * PAGE_SIZE * !!is_pv_32on64_domain(d);
     si->nr_pt_frames = nr_pt_pages;
     si->mfn_list     = vphysmap_start;
diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c     Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/arch/x86/domctl.c     Thu Aug 07 11:57:34 2008 +0900
@@ -661,6 +661,7 @@ long arch_do_domctl(
         if ( !iommu_pv_enabled && !is_hvm_domain(d) )
         {
             ret = -ENOSYS;
+            put_domain(d);
             break;
         }
 
@@ -669,12 +670,16 @@ long arch_do_domctl(
             gdprintk(XENLOG_ERR, "XEN_DOMCTL_assign_device: "
                      "%x:%x:%x already assigned, or non-existent\n",
                      bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+            put_domain(d);
             break;
         }
 
         ret = assign_device(d, bus, devfn);
-        gdprintk(XENLOG_INFO, "XEN_DOMCTL_assign_device: bdf = %x:%x:%x\n",
-                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+        if ( ret )
+            gdprintk(XENLOG_ERR, "XEN_DOMCTL_assign_device: "
+                     "assign device (%x:%x:%x) failed\n",
+                     bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+
         put_domain(d);
     }
     break;
@@ -701,11 +706,15 @@ long arch_do_domctl(
         if ( !iommu_pv_enabled && !is_hvm_domain(d) )
         {
             ret = -ENOSYS;
+            put_domain(d);
             break;
         }
 
         if ( !device_assigned(bus, devfn) )
-            break;
+        {
+            put_domain(d);
+            break;
+        }
 
         ret = 0;
         deassign_device(d, bus, devfn);
diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/hvm/io.c
--- a/xen/arch/x86/hvm/io.c     Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/arch/x86/hvm/io.c     Thu Aug 07 11:57:34 2008 +0900
@@ -284,7 +284,7 @@ void dpci_ioport_write(uint32_t mport, i
         data = p->data;
         if ( p->data_is_ptr )
             (void)hvm_copy_from_guest_phys(
-                &data, p->data + (sign * i & p->size), p->size);
+                &data, p->data + (sign * i * p->size), p->size);
 
         switch ( p->size )
         {
diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/hvm/stdvga.c
--- a/xen/arch/x86/hvm/stdvga.c Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/arch/x86/hvm/stdvga.c Thu Aug 07 11:57:34 2008 +0900
@@ -33,6 +33,10 @@
 #include <xen/domain_page.h>
 #include <asm/hvm/support.h>
 #include <xen/numa.h>
+#include <xen/paging.h>
+
+#define VGA_MEM_BASE 0xa0000
+#define VGA_MEM_SIZE 0x20000
 
 #define PAT(x) (x)
 static const uint32_t mask16[16] = {
@@ -464,6 +468,7 @@ static int mmio_move(struct hvm_hw_stdvg
 {
     int i;
     int sign = p->df ? -1 : 1;
+    p2m_type_t p2mt;
 
     if ( p->data_is_ptr )
     {
@@ -473,7 +478,19 @@ static int mmio_move(struct hvm_hw_stdvg
             for ( i = 0; i < p->count; i++ ) 
             {
                 tmp = stdvga_mem_read(addr, p->size);
-                hvm_copy_to_guest_phys(data, &tmp, p->size);
+                if ( hvm_copy_to_guest_phys(data, &tmp, p->size) ==
+                     HVMCOPY_bad_gfn_to_mfn )
+                {
+                    (void)gfn_to_mfn_current(data >> PAGE_SHIFT, &p2mt);
+                    /*
+                     * The only case we handle is vga_mem <-> vga_mem.
+                     * Anything else disables caching and leaves it to qemu-dm.
+                     */
+                    if ( (p2mt != p2m_mmio_dm) || (data < VGA_MEM_BASE) ||
+                         ((data + p->size) > (VGA_MEM_BASE + VGA_MEM_SIZE)) )
+                        return 0;
+                    stdvga_mem_write(data, tmp, p->size);
+                }
                 data += sign * p->size;
                 addr += sign * p->size;
             }
@@ -483,7 +500,15 @@ static int mmio_move(struct hvm_hw_stdvg
             uint32_t addr = p->addr, data = p->data, tmp;
             for ( i = 0; i < p->count; i++ )
             {
-                hvm_copy_from_guest_phys(&tmp, data, p->size);
+                if ( hvm_copy_from_guest_phys(&tmp, data, p->size) ==
+                     HVMCOPY_bad_gfn_to_mfn )
+                {
+                    (void)gfn_to_mfn_current(data >> PAGE_SHIFT, &p2mt);
+                    if ( (p2mt != p2m_mmio_dm) || (data < VGA_MEM_BASE) ||
+                         ((data + p->size) > (VGA_MEM_BASE + VGA_MEM_SIZE)) )
+                        return 0;
+                    tmp = stdvga_mem_read(data, p->size);
+                }
                 stdvga_mem_write(addr, tmp, p->size);
                 data += sign * p->size;
                 addr += sign * p->size;
@@ -536,7 +561,8 @@ static int stdvga_intercept_mmio(ioreq_t
         {
         case IOREQ_TYPE_COPY:
             buf = mmio_move(s, p);
-            break;
+            if ( buf )
+                break;
         default:
             gdprintk(XENLOG_WARNING, "unsupported mmio request type:%d "
                      "addr:0x%04x data:0x%04x size:%d count:%d state:%d "
@@ -588,7 +614,7 @@ void stdvga_init(struct domain *d)
         register_portio_handler(d, 0x3ce, 2, stdvga_intercept_pio);
         /* MMIO. */
         register_buffered_io_handler(
-            d, 0xa0000, 0x20000, stdvga_intercept_mmio);
+            d, VGA_MEM_BASE, VGA_MEM_SIZE, stdvga_intercept_mmio);
     }
 }
 
diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/arch/x86/hvm/svm/svm.c        Thu Aug 07 11:57:34 2008 +0900
@@ -874,9 +874,12 @@ static void svm_do_nested_pgfault(paddr_
     mfn_t mfn;
     unsigned long gfn = gpa >> PAGE_SHIFT;
 
-    /* If this GFN is emulated MMIO, pass the fault to the mmio handler */
+    /*
+     * If this GFN is emulated MMIO or marked as read-only, pass the fault
+     * to the mmio handler.
+     */
     mfn = gfn_to_mfn_current(gfn, &p2mt);
-    if ( p2mt == p2m_mmio_dm )
+    if ( (p2mt == p2m_mmio_dm) || (p2mt == p2m_ram_ro) )
     {
         if ( !handle_mmio() )
             hvm_inject_exception(TRAP_gp_fault, 0, 0);
diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Thu Aug 07 11:57:34 2008 +0900
@@ -1971,7 +1971,7 @@ static void ept_handle_violation(unsigne
     }
 
     mfn = gfn_to_mfn(d, gfn, &t);
-    if ( p2m_is_ram(t) && paging_mode_log_dirty(d) )
+    if ( (t != p2m_ram_ro) && p2m_is_ram(t) && paging_mode_log_dirty(d) )
     {
         paging_mark_dirty(d, mfn_x(mfn));
         p2m_change_type(d, gfn, p2m_ram_logdirty, p2m_ram_rw);
diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c    Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/arch/x86/io_apic.c    Thu Aug 07 11:57:34 2008 +0900
@@ -45,12 +45,12 @@ int (*ioapic_renumber_irq)(int ioapic, i
 int (*ioapic_renumber_irq)(int ioapic, int irq);
 atomic_t irq_mis_count;
 
-int msi_irq_enable = 0;
-boolean_param("msi_irq_enable", msi_irq_enable);
+int msi_enable = 0;
+boolean_param("msi", msi_enable);
 
 int domain_irq_to_vector(struct domain *d, int irq)
 {
-    if ( !msi_irq_enable )
+    if ( !msi_enable )
         return irq_to_vector(irq);
     else
         return d->arch.pirq_vector[irq];
@@ -58,7 +58,7 @@ int domain_irq_to_vector(struct domain *
 
 int domain_vector_to_irq(struct domain *d, int vector)
 {
-    if ( !msi_irq_enable )
+    if ( !msi_enable )
         return vector_to_irq(vector);
     else
         return d->arch.vector_pirq[vector];
diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c        Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/arch/x86/irq.c        Thu Aug 07 11:57:34 2008 +0900
@@ -573,7 +573,7 @@ int pirq_guest_bind(struct vcpu *v, int 
     return rc;
 }
 
-int pirq_guest_unbind(struct domain *d, int irq)
+void pirq_guest_unbind(struct domain *d, int irq)
 {
     unsigned int        vector;
     irq_desc_t         *desc;
@@ -660,7 +660,6 @@ int pirq_guest_unbind(struct domain *d, 
 
  out:
     spin_unlock_irqrestore(&desc->lock, flags);    
-    return 0;
 }
 
 extern void dump_ioapic_irq_info(void);
diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/arch/x86/mm.c Thu Aug 07 11:57:34 2008 +0900
@@ -1138,8 +1138,10 @@ static int alloc_l2_table(struct page_in
 
     for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
     {
-        if ( is_guest_l2_slot(d, type, i) &&
-             unlikely(!get_page_from_l2e(pl2e[i], pfn, d)) )
+        if ( !is_guest_l2_slot(d, type, i) )
+            continue;
+
+        if ( unlikely(!get_page_from_l2e(pl2e[i], pfn, d)) )
             goto fail;
         
         adjust_guest_l2e(pl2e[i], d);
@@ -1206,8 +1208,9 @@ static int alloc_l3_table(struct page_in
                                                 d) )
                 goto fail;
         }
-        else if ( is_guest_l3_slot(i) &&
-                  unlikely(!get_page_from_l3e(pl3e[i], pfn, d)) )
+        else if ( !is_guest_l3_slot(i) )
+            continue;
+        else if ( unlikely(!get_page_from_l3e(pl3e[i], pfn, d)) )
             goto fail;
 
         adjust_guest_l3e(pl3e[i], d);
@@ -1222,8 +1225,12 @@ static int alloc_l3_table(struct page_in
  fail:
     MEM_LOG("Failure in alloc_l3_table: entry %d", i);
     while ( i-- > 0 )
-        if ( is_guest_l3_slot(i) )
-            put_page_from_l3e(pl3e[i], pfn);
+    {
+        if ( !is_guest_l3_slot(i) )
+            continue;
+        unadjust_guest_l3e(pl3e[i], d);
+        put_page_from_l3e(pl3e[i], pfn);
+    }
 
     unmap_domain_page(pl3e);
     return 0;
@@ -1242,8 +1249,10 @@ static int alloc_l4_table(struct page_in
 
     for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
     {
-        if ( is_guest_l4_slot(d, i) &&
-             unlikely(!get_page_from_l4e(pl4e[i], pfn, d)) )
+        if ( !is_guest_l4_slot(d, i) )
+            continue;
+
+        if ( unlikely(!get_page_from_l4e(pl4e[i], pfn, d)) )
             goto fail;
 
         adjust_guest_l4e(pl4e[i], d);
@@ -1585,7 +1594,7 @@ static int mod_l3_entry(l3_pgentry_t *pl
     struct vcpu *curr = current;
     struct domain *d = curr->domain;
     struct page_info *l3pg = mfn_to_page(pfn);
-    int okay, rc = 1;
+    int rc = 1;
 
     if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) )
     {
@@ -1642,10 +1651,13 @@ static int mod_l3_entry(l3_pgentry_t *pl
         return 0;
     }
 
-    okay = create_pae_xen_mappings(d, pl3e);
-    BUG_ON(!okay);
-
-    pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
+    if ( likely(rc) )
+    {
+        if ( !create_pae_xen_mappings(d, pl3e) )
+            BUG();
+
+        pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
+    }
 
     page_unlock(l3pg);
     put_page_from_l3e(ol3e, pfn);
diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/arch/x86/mm/shadow/multi.c    Thu Aug 07 11:57:34 2008 +0900
@@ -3359,7 +3359,7 @@ static int sh_page_fault(struct vcpu *v,
             gdprintk(XENLOG_DEBUG, "guest attempted write to read-only memory"
                      " page. va page=%#lx, mfn=%#lx\n",
                      va & PAGE_MASK, mfn_x(gmfn));
-        goto emulate; /* skip over the instruction */
+        goto emulate_readonly; /* skip over the instruction */
     }
 
     /* In HVM guests, we force CR0.WP always to be set, so that the
@@ -3404,6 +3404,11 @@ static int sh_page_fault(struct vcpu *v,
         goto done;
     }
 
+    /*
+     * Write from userspace to ro-mem needs to jump here to avoid getting
+     * caught by user-mode page-table check above.
+     */
+ emulate_readonly:
     /*
      * We don't need to hold the lock for the whole emulation; we will
      * take it again when we write to the pagetables.
@@ -4640,14 +4645,8 @@ static void *emulate_map_dest(struct vcp
                               u32 bytes,
                               struct sh_emulate_ctxt *sh_ctxt)
 {
-    struct segment_register *sreg;
     unsigned long offset;
     void *map = NULL;
-
-    /* We don't emulate user-mode writes to page tables */
-    sreg = hvm_get_seg_reg(x86_seg_ss, sh_ctxt);
-    if ( sreg->attr.fields.dpl == 3 )
-        return MAPPING_UNHANDLEABLE;
 
     sh_ctxt->mfn1 = emulate_gva_to_mfn(v, vaddr, sh_ctxt);
     if ( !mfn_valid(sh_ctxt->mfn1) ) 
@@ -4656,6 +4655,16 @@ static void *emulate_map_dest(struct vcp
                 (mfn_x(sh_ctxt->mfn1) == READONLY_GFN) ?
                 MAPPING_SILENT_FAIL : MAPPING_UNHANDLEABLE);
 
+#ifndef NDEBUG
+    /* We don't emulate user-mode writes to page tables */
+    if ( hvm_get_seg_reg(x86_seg_ss, sh_ctxt)->attr.fields.dpl == 3 )
+    {
+        gdprintk(XENLOG_DEBUG, "User-mode write to pagetable reached "
+                 "emulate_map_dest(). This should never happen!\n");
+        return MAPPING_UNHANDLEABLE;
+    }
+#endif
+                
     /* Unaligned writes mean probably this isn't a pagetable */
     if ( vaddr & (bytes - 1) )
         sh_remove_shadows(v, sh_ctxt->mfn1, 0, 0 /* Slow, can fail */ );
diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/msi.c
--- a/xen/arch/x86/msi.c        Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/arch/x86/msi.c        Thu Aug 07 11:57:34 2008 +0900
@@ -27,8 +27,6 @@
 #include <public/physdev.h>
 #include <xen/iommu.h>
 
-extern int msi_irq_enable;
-
 /* bitmap indicate which fixed map is free */
 DEFINE_SPINLOCK(msix_fixmap_lock);
 DECLARE_BITMAP(msix_fixmap_pages, MAX_MSIX_PAGES);
@@ -763,14 +761,13 @@ retry:
     {
         desc = &irq_desc[entry->vector];
 
-       local_irq_save(flags);
-       if ( !spin_trylock(&desc->lock) )
-       {
-           local_irq_restore(flags);
-           goto retry;
-       }
-
-        spin_lock_irqsave(&desc->lock, flags);
+        local_irq_save(flags);
+        if ( !spin_trylock(&desc->lock) )
+        {
+             local_irq_restore(flags);
+            goto retry;
+        }
+
         if ( desc->handler == &pci_msi_type )
         {
             /* MSI is not shared, so should be released already */
diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/numa.c
--- a/xen/arch/x86/numa.c       Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/arch/x86/numa.c       Thu Aug 07 11:57:34 2008 +0900
@@ -14,6 +14,7 @@
 #include <xen/time.h>
 #include <xen/smp.h>
 #include <asm/acpi.h>
+#include <xen/sched.h>
 
 static int numa_setup(char *s);
 custom_param("numa", numa_setup);
@@ -281,6 +282,9 @@ static void dump_numa(unsigned char key)
 {
        s_time_t now = NOW();
        int i;
+       struct domain *d;
+       struct page_info *page;
+       unsigned int page_num_node[MAX_NUMNODES];
 
        printk("'%c' pressed -> dumping numa info (now-0x%X:%08X)\n", key,
                  (u32)(now>>32), (u32)now);
@@ -297,6 +301,28 @@ static void dump_numa(unsigned char key)
        }
        for_each_online_cpu(i)
                printk("CPU%d -> NODE%d\n", i, cpu_to_node[i]);
+
+       rcu_read_lock(&domlist_read_lock);
+
+       printk("Memory location of each domain:\n");
+       for_each_domain(d)
+       {
+               printk("Domain %u (total: %u):\n", d->domain_id, d->tot_pages);
+
+               for_each_online_node(i)
+                       page_num_node[i] = 0;
+
+               list_for_each_entry(page, &d->page_list, list)
+               {
+                       i = phys_to_nid(page_to_mfn(page) << PAGE_SHIFT);
+                       page_num_node[i]++;
+               }
+
+               for_each_online_node(i)
+                       printk("    Node %u: %u\n", i, page_num_node[i]);
+       }
+
+       rcu_read_unlock(&domlist_read_lock);
 }
 
 static __init int register_numa_trigger(void)
diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c    Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/arch/x86/physdev.c    Thu Aug 07 11:57:34 2008 +0900
@@ -184,15 +184,14 @@ static int unmap_domain_pirq(struct doma
     return ret;
 }
 
-extern int msi_irq_enable;
 static int physdev_map_pirq(struct physdev_map_pirq *map)
 {
     struct domain *d;
     int vector, pirq, ret = 0;
     unsigned long flags;
 
-    /* if msi_irq_enable is not enabled,map always success */
-    if ( !msi_irq_enable )
+    /* if msi_enable is not enabled, map always succeeds */
+    if ( !msi_enable )
         return 0;
 
     if ( !IS_PRIV(current->domain) )
@@ -304,7 +303,7 @@ static int physdev_unmap_pirq(struct phy
     unsigned long flags;
     int ret;
 
-    if ( !msi_irq_enable )
+    if ( !msi_enable )
         return 0;
 
     if ( !IS_PRIV(current->domain) )
@@ -455,7 +454,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
 
         ret = 0;
 
-        if ( msi_irq_enable )
+        if ( msi_enable )
         {
             spin_lock_irqsave(&dom0->arch.irq_lock, flags);
             if ( irq != AUTO_ASSIGN )
diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/platform_hypercall.c
--- a/xen/arch/x86/platform_hypercall.c Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/arch/x86/platform_hypercall.c Thu Aug 07 11:57:34 2008 +0900
@@ -355,6 +355,11 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
             struct processor_pminfo *pmpt;
             struct processor_performance *pxpt;
 
+            if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) )
+            {
+                ret = -ENOSYS;
+                break;
+            }
             if ( cpuid < 0 )
             {
                 ret = -EINVAL;
@@ -373,6 +378,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
                 memcpy ((void *)&pxpt->status_register,
                     (void *)&xenpxpt->status_register,
                     sizeof(struct xen_pct_register));
+                pxpt->init |= XEN_PX_PCT;
             }
             if ( xenpxpt->flags & XEN_PX_PSS ) 
             {
@@ -390,6 +396,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
                     break;
                 }
                 pxpt->state_count = xenpxpt->state_count;
+                pxpt->init |= XEN_PX_PSS;
             }
             if ( xenpxpt->flags & XEN_PX_PSD )
             {
@@ -397,14 +404,18 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
                 memcpy ((void *)&pxpt->domain_info,
                     (void *)&xenpxpt->domain_info,
                     sizeof(struct xen_psd_package));
+                pxpt->init |= XEN_PX_PSD;
             }
             if ( xenpxpt->flags & XEN_PX_PPC )
+            {
                 pxpt->ppc = xenpxpt->ppc;
-
-            if ( xenpxpt->flags == ( XEN_PX_PCT | XEN_PX_PSS | 
-                XEN_PX_PSD | XEN_PX_PPC ) )
-            {
-                pxpt->init =1;
+                pxpt->init |= XEN_PX_PPC;
+            }
+
+            if ( pxpt->init == ( XEN_PX_PCT | XEN_PX_PSS |
+                                 XEN_PX_PSD | XEN_PX_PPC ) )
+            {
+                pxpt->init |= XEN_PX_INIT;
                 cpu_count++;
             }
             if ( cpu_count == num_online_cpus() )
@@ -418,10 +429,20 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
         }
  
         case XEN_PM_CX:
+            if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_CX) )
+            {
+                ret = -ENOSYS;
+                break;
+            }
             ret = set_cx_pminfo(op->u.set_pminfo.id, &op->u.set_pminfo.power);
             break;
 
         case XEN_PM_TX:
+            if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_TX) )
+            {
+                ret = -ENOSYS;
+                break;
+            }
             ret = -EINVAL;
             break;
 
diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/arch/x86/setup.c      Thu Aug 07 11:57:34 2008 +0900
@@ -997,7 +997,6 @@ void __init __start_xen(unsigned long mb
     if ( (cmdline != NULL) || (kextra != NULL) )
     {
         static char dom0_cmdline[MAX_GUEST_CMDLINE];
-        char xen_pm_param[32];
 
         cmdline = cmdline_cook(cmdline);
         safe_strcpy(dom0_cmdline, cmdline);
@@ -1022,14 +1021,6 @@ void __init __start_xen(unsigned long mb
             safe_strcat(dom0_cmdline, " acpi=");
             safe_strcat(dom0_cmdline, acpi_param);
         }
-        if ( xen_cpuidle )
-            xen_processor_pmbits |= XEN_PROCESSOR_PM_CX;
-
-        snprintf(xen_pm_param, sizeof(xen_pm_param), 
-            " xen_processor_pmbits=%d", xen_processor_pmbits);
-
-        if ( !strstr(dom0_cmdline, "xen_processor_pmbits=") )
-            safe_strcat(dom0_cmdline, xen_pm_param);
 
         cmdline = dom0_cmdline;
     }
@@ -1040,6 +1031,9 @@ void __init __start_xen(unsigned long mb
             (mod[initrdidx].mod_start - mod[0].mod_start);
         _initrd_len   = mod[initrdidx].mod_end - mod[initrdidx].mod_start;
     }
+
+    if ( xen_cpuidle )
+        xen_processor_pmbits |= XEN_PROCESSOR_PM_CX;
 
     /*
      * We're going to setup domain0 using the module(s) that we stashed safely
diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/shutdown.c
--- a/xen/arch/x86/shutdown.c   Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/arch/x86/shutdown.c   Thu Aug 07 11:57:34 2008 +0900
@@ -291,7 +291,12 @@ __initcall(reboot_init);
 
 #endif
 
-void machine_restart(void)
+static void __machine_restart(void *pdelay)
+{
+    machine_restart(*(unsigned int *)pdelay);
+}
+
+void machine_restart(unsigned int delay_millisecs)
 {
     int i;
 
@@ -304,13 +309,15 @@ void machine_restart(void)
     if ( get_apic_id() != boot_cpu_physical_apicid )
     {
         /* Send IPI to the boot CPU (logical cpu 0). */
-        on_selected_cpus(cpumask_of_cpu(0), (void *)machine_restart,
-                         NULL, 1, 0);
+        on_selected_cpus(cpumask_of_cpu(0), __machine_restart,
+                         &delay_millisecs, 1, 0);
         for ( ; ; )
             halt();
     }
 
     smp_send_stop();
+
+    mdelay(delay_millisecs);
 
     if ( tboot_in_measured_env() )
         tboot_shutdown(TB_SHUTDOWN_REBOOT);
diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/arch/x86/time.c       Thu Aug 07 11:57:34 2008 +0900
@@ -35,8 +35,6 @@ static char opt_clocksource[10];
 static char opt_clocksource[10];
 string_param("clocksource", opt_clocksource);
 
-#define EPOCH MILLISECS(1000)
-
 unsigned long cpu_khz;  /* CPU clock frequency in kHz. */
 DEFINE_SPINLOCK(rtc_lock);
 unsigned long pit0_ticks;
@@ -55,7 +53,6 @@ struct cpu_time {
     s_time_t stime_master_stamp;
     struct time_scale tsc_scale;
     u64 cstate_plt_count_stamp;
-    struct timer calibration_timer;
 };
 
 struct platform_timesource {
@@ -66,6 +63,10 @@ struct platform_timesource {
 };
 
 static DEFINE_PER_CPU(struct cpu_time, cpu_time);
+
+/* Calibrate all CPUs to platform timer every EPOCH. */
+#define EPOCH MILLISECS(1000)
+static struct timer calibration_timer;
 
 /* TSC is invariant on C state entry? */
 static bool_t tsc_invariant;
@@ -481,35 +482,6 @@ static int init_pmtimer(struct platform_
 }
 
 /************************************************************
- * PLATFORM TIMER 5: TSC
- */
-
-static const char plt_tsc_name[] = "TSC";
-#define platform_timer_is_tsc() (plt_src.name == plt_tsc_name)
-
-static int init_tsctimer(struct platform_timesource *pts)
-{
-    if ( !tsc_invariant )
-        return 0;
-
-    pts->name = (char *)plt_tsc_name;
-    return 1;
-}
-
-static void make_tsctimer_record(void)
-{
-    struct cpu_time *t = &this_cpu(cpu_time);
-    s_time_t now;
-    u64 tsc;
-
-    rdtscll(tsc);
-    now = scale_delta(tsc, &t->tsc_scale);
-
-    t->local_tsc_stamp = tsc;
-    t->stime_local_stamp = t->stime_master_stamp = now;
-}
-
-/************************************************************
  * GENERIC PLATFORM TIMER INFRASTRUCTURE
  */
 
@@ -530,11 +502,11 @@ static void plt_overflow(void *unused)
 {
     u64 count;
 
-    spin_lock(&platform_timer_lock);
+    spin_lock_irq(&platform_timer_lock);
     count = plt_src.read_counter();
     plt_stamp64 += (count - plt_stamp) & plt_mask;
     plt_stamp = count;
-    spin_unlock(&platform_timer_lock);
+    spin_unlock_irq(&platform_timer_lock);
 
     set_timer(&plt_overflow_timer, NOW() + plt_overflow_period);
 }
@@ -550,6 +522,8 @@ static s_time_t read_platform_stime(void
 {
     u64 count;
     s_time_t stime;
+
+    ASSERT(!local_irq_is_enabled());
 
     spin_lock(&platform_timer_lock);
     count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) & plt_mask);
@@ -564,22 +538,16 @@ static void platform_time_calibration(vo
     u64 count;
     s_time_t stamp;
 
-    spin_lock(&platform_timer_lock);
+    spin_lock_irq(&platform_timer_lock);
     count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) & plt_mask);
     stamp = __read_platform_stime(count);
     stime_platform_stamp = stamp;
     platform_timer_stamp = count;
-    spin_unlock(&platform_timer_lock);
+    spin_unlock_irq(&platform_timer_lock);
 }
 
 static void resume_platform_timer(void)
 {
-    if ( platform_timer_is_tsc() )
-    {
-        /* TODO: Save/restore TSC values. */
-        return;
-    }
-
     /* No change in platform_stime across suspend/resume. */
     platform_timer_stamp = plt_stamp64;
     plt_stamp = plt_src.read_counter();
@@ -600,8 +568,6 @@ static void init_platform_timer(void)
             rc = init_cyclone(pts);
         else if ( !strcmp(opt_clocksource, "acpi") )
             rc = init_pmtimer(pts);
-        else if ( !strcmp(opt_clocksource, "tsc") )
-            rc = init_tsctimer(pts);
 
         if ( rc <= 0 )
             printk("WARNING: %s clocksource '%s'.\n",
@@ -614,12 +580,6 @@ static void init_platform_timer(void)
          !init_hpet(pts) &&
          !init_pmtimer(pts) )
         init_pit(pts);
-
-    if ( platform_timer_is_tsc() )
-    {
-        printk("Platform timer is TSC\n");
-        return;
-    }
 
     plt_mask = (u64)~0ull >> (64 - pts->counter_bits);
 
@@ -823,10 +783,6 @@ int cpu_frequency_change(u64 freq)
     struct cpu_time *t = &this_cpu(cpu_time);
     u64 curr_tsc;
 
-    /* Nothing to do if TSC is platform timer. Assume it is constant-rate. */
-    if ( platform_timer_is_tsc() )
-        return 0;
-
     /* Sanity check: CPU frequency allegedly dropping below 1MHz? */
     if ( freq < 1000000u )
     {
@@ -847,9 +803,11 @@ int cpu_frequency_change(u64 freq)
     local_irq_enable();
 
     /* A full epoch should pass before we check for deviation. */
-    set_timer(&t->calibration_timer, NOW() + EPOCH);
     if ( smp_processor_id() == 0 )
+    {
+        set_timer(&calibration_timer, NOW() + EPOCH);
         platform_time_calibration();
+    }
 
     return 0;
 }
@@ -875,9 +833,20 @@ void do_settime(unsigned long secs, unsi
     rcu_read_unlock(&domlist_read_lock);
 }
 
+/* Per-CPU communication between rendezvous IRQ and softirq handler. */
+struct cpu_calibration {
+    u64 local_tsc_stamp;
+    s_time_t stime_local_stamp;
+    s_time_t stime_master_stamp;
+    struct timer softirq_callback;
+};
+static DEFINE_PER_CPU(struct cpu_calibration, cpu_calibration);
+
+/* Softirq handler for per-CPU time calibration. */
 static void local_time_calibration(void *unused)
 {
     struct cpu_time *t = &this_cpu(cpu_time);
+    struct cpu_calibration *c = &this_cpu(cpu_calibration);
 
     /*
      * System timestamps, extrapolated from local and master oscillators,
@@ -908,26 +877,15 @@ static void local_time_calibration(void 
     /* The overall calibration scale multiplier. */
     u32 calibration_mul_frac;
 
-    if ( platform_timer_is_tsc() )
-    {
-        make_tsctimer_record(); 
-        update_vcpu_system_time(current);
-        set_timer(&t->calibration_timer, NOW() + MILLISECS(10*1000));
-        return;
-    }
-
     prev_tsc          = t->local_tsc_stamp;
     prev_local_stime  = t->stime_local_stamp;
     prev_master_stime = t->stime_master_stamp;
 
-    /*
-     * Disable IRQs to get 'instantaneous' current timestamps. We read platform
-     * time first, as we may be delayed when acquiring platform_timer_lock.
-     */
+    /* Disabling IRQs ensures we atomically read cpu_calibration struct. */
     local_irq_disable();
-    curr_master_stime = read_platform_stime();
-    curr_local_stime  = get_s_time();
-    rdtscll(curr_tsc);
+    curr_tsc          = c->local_tsc_stamp;
+    curr_local_stime  = c->stime_local_stamp;
+    curr_master_stime = c->stime_master_stamp;
     local_irq_enable();
 
 #if 0
@@ -1021,10 +979,62 @@ static void local_time_calibration(void 
     update_vcpu_system_time(current);
 
  out:
-    set_timer(&t->calibration_timer, NOW() + EPOCH);
-
     if ( smp_processor_id() == 0 )
+    {
+        set_timer(&calibration_timer, NOW() + EPOCH);
         platform_time_calibration();
+    }
+}
+
+/*
+ * Rendezvous for all CPUs in IRQ context.
+ * Master CPU snapshots the platform timer.
+ * All CPUS snapshot their local TSC and extrapolation of system time.
+ */
+struct calibration_rendezvous {
+    atomic_t nr_cpus;
+    s_time_t master_stime;
+};
+
+static void time_calibration_rendezvous(void *_r)
+{
+    unsigned int total_cpus = num_online_cpus();
+    struct cpu_calibration *c = &this_cpu(cpu_calibration);
+    struct calibration_rendezvous *r = _r;
+
+    local_irq_disable();
+
+    if ( smp_processor_id() == 0 )
+    {
+        while ( atomic_read(&r->nr_cpus) != (total_cpus - 1) )
+            cpu_relax();
+        r->master_stime = read_platform_stime();
+        atomic_inc(&r->nr_cpus);
+    }
+    else
+    {
+        atomic_inc(&r->nr_cpus);
+        while ( atomic_read(&r->nr_cpus) != total_cpus )
+            cpu_relax();
+    }
+
+    rdtscll(c->local_tsc_stamp);
+    c->stime_local_stamp = get_s_time();
+    c->stime_master_stamp = r->master_stime;
+
+    local_irq_enable();
+
+    /* Callback in softirq context as soon as possible. */
+    set_timer(&c->softirq_callback, c->stime_local_stamp);
+}
+
+static void time_calibration(void *unused)
+{
+    struct calibration_rendezvous r = {
+        .nr_cpus = ATOMIC_INIT(0)
+    };
+
+    on_each_cpu(time_calibration_rendezvous, &r, 0, 1);
 }
 
 void init_percpu_time(void)
@@ -1032,12 +1042,6 @@ void init_percpu_time(void)
     struct cpu_time *t = &this_cpu(cpu_time);
     unsigned long flags;
     s_time_t now;
-
-    if ( platform_timer_is_tsc() )
-    {
-        make_tsctimer_record();
-        goto out;
-    }
 
     local_irq_save(flags);
     rdtscll(t->local_tsc_stamp);
@@ -1047,10 +1051,14 @@ void init_percpu_time(void)
     t->stime_master_stamp = now;
     t->stime_local_stamp  = now;
 
- out:
-    init_timer(&t->calibration_timer, local_time_calibration,
-               NULL, smp_processor_id());
-    set_timer(&t->calibration_timer, NOW() + EPOCH);
+    init_timer(&this_cpu(cpu_calibration).softirq_callback,
+               local_time_calibration, NULL, smp_processor_id());
+
+    if ( smp_processor_id() == 0 )
+    {
+        init_timer(&calibration_timer, time_calibration, NULL, 0);
+        set_timer(&calibration_timer, NOW() + EPOCH);
+    }
 }
 
 /* Late init function (after all CPUs are booted). */
@@ -1134,7 +1142,10 @@ void pit_broadcast_enter(void)
 
 void pit_broadcast_exit(void)
 {
-    cpu_clear(smp_processor_id(), pit_broadcast_mask);
+    int cpu = smp_processor_id();
+
+    if ( cpu_test_and_clear(cpu, pit_broadcast_mask) )
+        reprogram_timer(per_cpu(timer_deadline, cpu));
 }
 
 int pit_broadcast_is_available(void)
@@ -1163,10 +1174,11 @@ int time_suspend(void)
     {
         cmos_utc_offset = -get_cmos_time();
         cmos_utc_offset += (wc_sec + (wc_nsec + NOW()) / 1000000000ULL);
+        kill_timer(&calibration_timer);
     }
 
     /* Better to cancel calibration timer for accuracy. */
-    kill_timer(&this_cpu(cpu_time).calibration_timer);
+    kill_timer(&this_cpu(cpu_calibration).softirq_callback);
 
     return 0;
 }
diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/x86_64/physdev.c
--- a/xen/arch/x86/x86_64/physdev.c     Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/arch/x86/x86_64/physdev.c     Thu Aug 07 11:57:34 2008 +0900
@@ -30,6 +30,15 @@
 #define physdev_irq_status_query   compat_physdev_irq_status_query
 #define physdev_irq_status_query_t physdev_irq_status_query_compat_t
 
+#define physdev_map_pirq           compat_physdev_map_pirq
+#define physdev_map_pirq_t         physdev_map_pirq_compat_t
+
+#define physdev_unmap_pirq         compat_physdev_unmap_pirq
+#define physdev_unmap_pirq_t       physdev_unmap_pirq_compat_t
+
+#define physdev_manage_pci         compat_physdev_manage_pci
+#define physdev_manage_pci_t       physdev_manage_pci_compat_t
+
 #define COMPAT
 #undef guest_handle_okay
 #define guest_handle_okay          compat_handle_okay
diff -r 7affdebb7a1e -r a39913db6e51 xen/arch/x86/x86_emulate/x86_emulate.c
--- a/xen/arch/x86/x86_emulate/x86_emulate.c    Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c    Thu Aug 07 11:57:34 2008 +0900
@@ -1606,6 +1606,7 @@ x86_emulate(
         dst.val = _regs.eax;
     case 0x38 ... 0x3b: cmp: /* cmp */
         emulate_2op_SrcV("cmp", src, dst, _regs.eflags);
+        dst.type = OP_NONE;
         break;
 
     case 0x62: /* bound */ {
@@ -1707,6 +1708,7 @@ x86_emulate(
         dst.val = _regs.eax;
     case 0x84 ... 0x85: test: /* test */
         emulate_2op_SrcV("test", src, dst, _regs.eflags);
+        dst.type = OP_NONE;
         break;
 
     case 0x86 ... 0x87: xchg: /* xchg */
diff -r 7affdebb7a1e -r a39913db6e51 xen/common/compat/grant_table.c
--- a/xen/common/compat/grant_table.c   Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/common/compat/grant_table.c   Thu Aug 07 11:57:34 2008 +0900
@@ -138,7 +138,6 @@ int compat_grant_table_op(unsigned int c
                         for ( i = 0; i < (_s_)->nr_frames; ++i ) \
                         { \
                             unsigned int frame = (_s_)->frame_list.p[i]; \
-                            BUG_ON(frame != (_s_)->frame_list.p[i]); \
                             (void)__copy_to_compat_offset((_d_)->frame_list, 
i, &frame, 1); \
                         } \
                     } \
diff -r 7affdebb7a1e -r a39913db6e51 xen/common/domain.c
--- a/xen/common/domain.c       Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/common/domain.c       Thu Aug 07 11:57:34 2008 +0900
@@ -50,7 +50,7 @@ static void __init setup_cpufreq_option(
     else if ( !strcmp(str, "xen") )
     {
         xen_processor_pmbits |= XEN_PROCESSOR_PM_PX;
-        cpufreq_controller = FREQCTL_none;
+        cpufreq_controller = FREQCTL_xen;
     }
 }
 custom_param("cpufreq", setup_cpufreq_option);
@@ -136,6 +136,8 @@ struct vcpu *alloc_vcpu(
 
     v->runstate.state = is_idle_vcpu(v) ? RUNSTATE_running : RUNSTATE_offline;
     v->runstate.state_entry_time = NOW();
+
+    spin_lock_init(&v->virq_lock);
 
     if ( !is_idle_domain(d) )
     {
diff -r 7affdebb7a1e -r a39913db6e51 xen/common/event_channel.c
--- a/xen/common/event_channel.c        Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/common/event_channel.c        Thu Aug 07 11:57:34 2008 +0900
@@ -386,14 +386,18 @@ static long __evtchn_close(struct domain
         break;
 
     case ECS_PIRQ:
-        if ( (rc = pirq_guest_unbind(d1, chn1->u.pirq)) == 0 )
-            d1->pirq_to_evtchn[chn1->u.pirq] = 0;
+        pirq_guest_unbind(d1, chn1->u.pirq);
+        d1->pirq_to_evtchn[chn1->u.pirq] = 0;
         break;
 
     case ECS_VIRQ:
         for_each_vcpu ( d1, v )
-            if ( v->virq_to_evtchn[chn1->u.virq] == port1 )
-                v->virq_to_evtchn[chn1->u.virq] = 0;
+        {
+            if ( v->virq_to_evtchn[chn1->u.virq] != port1 )
+                continue;
+            v->virq_to_evtchn[chn1->u.virq] = 0;
+            spin_barrier(&v->virq_lock);
+        }
         break;
 
     case ECS_IPI:
@@ -447,6 +451,9 @@ static long __evtchn_close(struct domain
         BUG();
     }
 
+    /* Clear pending event to avoid unexpected behavior on re-bind. */
+    clear_bit(port1, &shared_info(d1, evtchn_pending));
+
     /* Reset binding to vcpu0 when the channel is freed. */
     chn1->state          = ECS_FREE;
     chn1->notify_vcpu_id = 0;
@@ -573,37 +580,33 @@ static int evtchn_set_pending(struct vcp
     return 0;
 }
 
+int guest_enabled_event(struct vcpu *v, int virq)
+{
+    return ((v != NULL) && (v->virq_to_evtchn[virq] != 0));
+}
 
 void send_guest_vcpu_virq(struct vcpu *v, int virq)
 {
+    unsigned long flags;
     int port;
 
     ASSERT(!virq_is_global(virq));
+
+    spin_lock_irqsave(&v->virq_lock, flags);
 
     port = v->virq_to_evtchn[virq];
     if ( unlikely(port == 0) )
-        return;
+        goto out;
 
     evtchn_set_pending(v, port);
-}
-
-int guest_enabled_event(struct vcpu *v, int virq)
-{
-    int port;
-
-    if ( unlikely(v == NULL) )
-        return 0;
-
-    port = v->virq_to_evtchn[virq];
-    if ( port == 0 )
-        return 0;
-
-    /* virq is in use */
-    return 1;
+
+ out:
+    spin_unlock_irqrestore(&v->virq_lock, flags);
 }
 
 void send_guest_global_virq(struct domain *d, int virq)
 {
+    unsigned long flags;
     int port;
     struct vcpu *v;
     struct evtchn *chn;
@@ -617,20 +620,28 @@ void send_guest_global_virq(struct domai
     if ( unlikely(v == NULL) )
         return;
 
+    spin_lock_irqsave(&v->virq_lock, flags);
+
     port = v->virq_to_evtchn[virq];
     if ( unlikely(port == 0) )
-        return;
+        goto out;
 
     chn = evtchn_from_port(d, port);
     evtchn_set_pending(d->vcpu[chn->notify_vcpu_id], port);
-}
-
+
+ out:
+    spin_unlock_irqrestore(&v->virq_lock, flags);
+}
 
 int send_guest_pirq(struct domain *d, int pirq)
 {
     int port = d->pirq_to_evtchn[pirq];
     struct evtchn *chn;
 
+    /*
+     * It should not be possible to race with __evtchn_close():
+     * The caller of this function must synchronise with pirq_guest_unbind().
+     */
     ASSERT(port != 0);
 
     chn = evtchn_from_port(d, port);
diff -r 7affdebb7a1e -r a39913db6e51 xen/common/keyhandler.c
--- a/xen/common/keyhandler.c   Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/common/keyhandler.c   Thu Aug 07 11:57:34 2008 +0900
@@ -143,7 +143,7 @@ static void halt_machine(unsigned char k
 static void halt_machine(unsigned char key, struct cpu_user_regs *regs)
 {
     printk("'%c' pressed -> rebooting machine\n", key);
-    machine_restart();
+    machine_restart(0);
 }
 
 static void cpuset_print(char *set, int size, cpumask_t mask)
@@ -236,6 +236,7 @@ static void dump_domains(unsigned char k
 
 static cpumask_t read_clocks_cpumask = CPU_MASK_NONE;
 static s_time_t read_clocks_time[NR_CPUS];
+static u64 read_cycles_time[NR_CPUS];
 
 static void read_clocks_slave(void *unused)
 {
@@ -244,14 +245,20 @@ static void read_clocks_slave(void *unus
     while ( !cpu_isset(cpu, read_clocks_cpumask) )
         cpu_relax();
     read_clocks_time[cpu] = NOW();
+    read_cycles_time[cpu] = get_cycles();
     cpu_clear(cpu, read_clocks_cpumask);
     local_irq_enable();
 }
 
 static void read_clocks(unsigned char key)
 {
-    unsigned int cpu = smp_processor_id(), min_cpu, max_cpu;
-    u64 min, max, dif, difus;
+    unsigned int cpu = smp_processor_id(), min_stime_cpu, max_stime_cpu;
+    unsigned int min_cycles_cpu, max_cycles_cpu;
+    u64 min_stime, max_stime, dif_stime;
+    u64 min_cycles, max_cycles, dif_cycles;
+    static u64 sumdif_stime = 0, maxdif_stime = 0;
+    static u64 sumdif_cycles = 0, maxdif_cycles = 0;
+    static u32 count = 0;
     static DEFINE_SPINLOCK(lock);
 
     spin_lock(&lock);
@@ -261,31 +268,48 @@ static void read_clocks(unsigned char ke
     local_irq_disable();
     read_clocks_cpumask = cpu_online_map;
     read_clocks_time[cpu] = NOW();
+    read_cycles_time[cpu] = get_cycles();
     cpu_clear(cpu, read_clocks_cpumask);
     local_irq_enable();
 
     while ( !cpus_empty(read_clocks_cpumask) )
         cpu_relax();
 
-    min_cpu = max_cpu = cpu;
+    min_stime_cpu = max_stime_cpu = min_cycles_cpu = max_cycles_cpu = cpu;
     for_each_online_cpu ( cpu )
     {
-        if ( read_clocks_time[cpu] < read_clocks_time[min_cpu] )
-            min_cpu = cpu;
-        if ( read_clocks_time[cpu] > read_clocks_time[max_cpu] )
-            max_cpu = cpu;
-    }
-
-    min = read_clocks_time[min_cpu];
-    max = read_clocks_time[max_cpu];
+        if ( read_clocks_time[cpu] < read_clocks_time[min_stime_cpu] )
+            min_stime_cpu = cpu;
+        if ( read_clocks_time[cpu] > read_clocks_time[max_stime_cpu] )
+            max_stime_cpu = cpu;
+        if ( read_cycles_time[cpu] < read_cycles_time[min_cycles_cpu] )
+            min_cycles_cpu = cpu;
+        if ( read_cycles_time[cpu] > read_cycles_time[max_cycles_cpu] )
+            max_cycles_cpu = cpu;
+    }
+
+    min_stime = read_clocks_time[min_stime_cpu];
+    max_stime = read_clocks_time[max_stime_cpu];
+    min_cycles = read_cycles_time[min_cycles_cpu];
+    max_cycles = read_cycles_time[max_cycles_cpu];
 
     spin_unlock(&lock);
 
-    dif = difus = max - min;
-    do_div(difus, 1000);
-    printk("Min = %"PRIu64" ; Max = %"PRIu64" ; Diff = %"PRIu64
-           " (%"PRIu64" microseconds)\n",
-           min, max, dif, difus);
+    dif_stime = max_stime - min_stime;
+    if ( dif_stime > maxdif_stime )
+        maxdif_stime = dif_stime;
+    sumdif_stime += dif_stime;
+    dif_cycles = max_cycles - min_cycles;
+    if ( dif_cycles > maxdif_cycles )
+        maxdif_cycles = dif_cycles;
+    sumdif_cycles += dif_cycles;
+    count++;
+    printk("Synced stime skew: max=%"PRIu64"ns avg=%"PRIu64"ns "
+           "samples=%"PRIu32" current=%"PRIu64"ns\n",
+           maxdif_stime, sumdif_stime/count, count, dif_stime);
+    printk("Synced cycles skew: max=%"PRIu64" avg=%"PRIu64" "
+           "samples=%"PRIu32" current=%"PRIu64"\n",
+           maxdif_cycles, sumdif_cycles/count, count, dif_cycles);
 }
 
 extern void dump_runq(unsigned char key);
diff -r 7affdebb7a1e -r a39913db6e51 xen/common/page_alloc.c
--- a/xen/common/page_alloc.c   Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/common/page_alloc.c   Thu Aug 07 11:57:34 2008 +0900
@@ -53,34 +53,11 @@ boolean_param("bootscrub", opt_bootscrub
 boolean_param("bootscrub", opt_bootscrub);
 
 /*
- * Bit width of the DMA heap.
- */
-static unsigned int dma_bitsize = CONFIG_DMA_BITSIZE;
-static void __init parse_dma_bits(char *s)
-{
-    unsigned int v = simple_strtol(s, NULL, 0);
-    if ( v >= (BITS_PER_LONG + PAGE_SHIFT) )
-        dma_bitsize = BITS_PER_LONG + PAGE_SHIFT;
-    else if ( v > PAGE_SHIFT + 1 )
-        dma_bitsize = v;
-    else
-        printk("Invalid dma_bits value of %u ignored.\n", v);
-}
-custom_param("dma_bits", parse_dma_bits);
-
-/*
- * Amount of memory to reserve in a low-memory (<4GB) pool for specific
- * allocation requests. Ordinary requests will not fall back to the
- * lowmem emergency pool.
- */
-static unsigned long dma_emergency_pool_pages;
-static void __init parse_dma_emergency_pool(char *s)
-{
-    unsigned long long bytes;
-    bytes = parse_size_and_unit(s, NULL);
-    dma_emergency_pool_pages = bytes >> PAGE_SHIFT;
-}
-custom_param("dma_emergency_pool", parse_dma_emergency_pool);
+ * Bit width of the DMA heap -- used to override NUMA-node-first.
+ * allocation strategy, which can otherwise exhaust low memory.
+ */
+static unsigned int dma_bitsize;
+integer_param("dma_bits", dma_bitsize);
 
 #define round_pgdown(_p)  ((_p)&PAGE_MASK)
 #define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
@@ -281,11 +258,7 @@ unsigned long __init alloc_boot_pages(
  */
 
 #define MEMZONE_XEN 0
-#ifdef PADDR_BITS
 #define NR_ZONES    (PADDR_BITS - PAGE_SHIFT)
-#else
-#define NR_ZONES    (BITS_PER_LONG - PAGE_SHIFT)
-#endif
 
 #define pfn_dom_zone_type(_pfn) (fls(_pfn) - 1)
 
@@ -583,7 +556,22 @@ void __init end_boot_allocator(void)
             init_heap_pages(pfn_dom_zone_type(i), mfn_to_page(i), 1);
     }
 
-    printk("Domain heap initialised: DMA width %u bits\n", dma_bitsize);
+    if ( !dma_bitsize && (num_online_nodes() > 1) )
+    {
+#ifdef CONFIG_X86
+        dma_bitsize = min_t(unsigned int,
+                            fls(NODE_DATA(0)->node_spanned_pages) - 1
+                            + PAGE_SHIFT - 2,
+                            32);
+#else
+        dma_bitsize = 32;
+#endif
+    }
+
+    printk("Domain heap initialised");
+    if ( dma_bitsize )
+        printk(" DMA width %u bits", dma_bitsize);
+    printk("\n");
 }
 #undef avail_for_domheap
 
@@ -803,19 +791,9 @@ struct page_info *alloc_domheap_pages(
     if ( bits < zone_hi )
         zone_hi = bits;
 
-    if ( (zone_hi + PAGE_SHIFT) >= dma_bitsize )
-    {
+    if ( (dma_bitsize > PAGE_SHIFT) &&
+         ((zone_hi + PAGE_SHIFT) >= dma_bitsize) )
         pg = alloc_heap_pages(dma_bitsize - PAGE_SHIFT, zone_hi, node, order);
-
-        /* Failure? Then check if we can fall back to the DMA pool. */
-        if ( unlikely(pg == NULL) &&
-             ((order > MAX_ORDER) ||
-              (avail_heap_pages(MEMZONE_XEN + 1,
-                                dma_bitsize - PAGE_SHIFT - 1,
-                                -1) <
-               (dma_emergency_pool_pages + (1UL << order)))) )
-            return NULL;
-    }
 
     if ( (pg == NULL) &&
          ((pg = alloc_heap_pages(MEMZONE_XEN + 1, zone_hi,
@@ -917,28 +895,15 @@ unsigned long avail_domheap_pages_region
 
 unsigned long avail_domheap_pages(void)
 {
-    unsigned long avail_nrm, avail_dma;
-    
-    avail_nrm = avail_heap_pages(dma_bitsize - PAGE_SHIFT,
-                                 NR_ZONES - 1,
-                                 -1);
-
-    avail_dma = avail_heap_pages(MEMZONE_XEN + 1,
-                                 dma_bitsize - PAGE_SHIFT - 1,
-                                 -1);
-
-    if ( avail_dma > dma_emergency_pool_pages )
-        avail_dma -= dma_emergency_pool_pages;
-    else
-        avail_dma = 0;
-
-    return avail_nrm + avail_dma;
+    return avail_heap_pages(MEMZONE_XEN + 1,
+                            NR_ZONES - 1,
+                            -1);
 }
 
 static void pagealloc_keyhandler(unsigned char key)
 {
     unsigned int zone = MEMZONE_XEN;
-    unsigned long total = 0;
+    unsigned long n, total = 0;
 
     printk("Physical memory information:\n");
     printk("    Xen heap: %lukB free\n",
@@ -946,9 +911,7 @@ static void pagealloc_keyhandler(unsigne
 
     while ( ++zone < NR_ZONES )
     {
-        unsigned long n;
-
-        if ( zone == dma_bitsize - PAGE_SHIFT )
+        if ( (zone + PAGE_SHIFT) == dma_bitsize )
         {
             printk("    DMA heap: %lukB free\n", total << (PAGE_SHIFT-10));
             total = 0;
diff -r 7affdebb7a1e -r a39913db6e51 xen/common/shutdown.c
--- a/xen/common/shutdown.c     Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/common/shutdown.c     Thu Aug 07 11:57:34 2008 +0900
@@ -23,8 +23,7 @@ static void maybe_reboot(void)
     {
         printk("rebooting machine in 5 seconds.\n");
         watchdog_disable();
-        mdelay(5000);
-        machine_restart();
+        machine_restart(5000);
     }
 }
 
@@ -50,7 +49,7 @@ void dom0_shutdown(u8 reason)
     case SHUTDOWN_reboot:
     {
         printk("Domain 0 shutdown: rebooting machine.\n");
-        machine_restart();
+        machine_restart(0);
         break; /* not reached */
     }
 
diff -r 7affdebb7a1e -r a39913db6e51 xen/drivers/char/console.c
--- a/xen/drivers/char/console.c        Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/drivers/char/console.c        Thu Aug 07 11:57:34 2008 +0900
@@ -939,8 +939,7 @@ void panic(const char *fmt, ...)
     else
     {
         watchdog_disable();
-        mdelay(5000);
-        machine_restart();
+        machine_restart(5000);
     }
 }
 
diff -r 7affdebb7a1e -r a39913db6e51 xen/drivers/passthrough/amd/iommu_acpi.c
--- a/xen/drivers/passthrough/amd/iommu_acpi.c  Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/drivers/passthrough/amd/iommu_acpi.c  Thu Aug 07 11:57:34 2008 +0900
@@ -27,6 +27,7 @@ extern unsigned long amd_iommu_page_entr
 extern unsigned long amd_iommu_page_entries;
 extern unsigned short ivrs_bdf_entries;
 extern struct ivrs_mappings *ivrs_mappings;
+extern unsigned short last_bdf;
 
 static struct amd_iommu * __init find_iommu_from_bdf_cap(
     u16 bdf, u8 cap_offset)
@@ -85,10 +86,8 @@ static void __init reserve_unity_map_for
     }
 
     /* extend r/w permissioms and keep aggregate */
-    if ( iw )
-        ivrs_mappings[bdf].write_permission = IOMMU_CONTROL_ENABLED;
-    if ( ir )
-        ivrs_mappings[bdf].read_permission = IOMMU_CONTROL_ENABLED;
+    ivrs_mappings[bdf].write_permission = iw;
+    ivrs_mappings[bdf].read_permission = ir;
     ivrs_mappings[bdf].unity_map_enable = IOMMU_CONTROL_ENABLED;
     ivrs_mappings[bdf].addr_range_start = base;
     ivrs_mappings[bdf].addr_range_length = length;
@@ -112,7 +111,7 @@ static int __init register_exclusion_ran
         length = range_top - base;
         /* reserve r/w unity-mapped page entries for devices */
         /* note: these entries are part of the exclusion range */
-        for (bdf = 0; bdf < ivrs_bdf_entries; ++bdf)
+        for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
             reserve_unity_map_for_device(bdf, base, length, iw, ir);
         /* push 'base' just outside of virtual address space */
         base = iommu_top;
@@ -190,7 +189,7 @@ static int __init register_exclusion_ran
         length = range_top - base;
         /* reserve r/w unity-mapped page entries for devices */
         /* note: these entries are part of the exclusion range */
-        for ( bdf = 0; bdf < ivrs_bdf_entries; ++bdf )
+        for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
         {
             bus = bdf >> 8;
             devfn = bdf & 0xFF;
@@ -357,7 +356,7 @@ static u16 __init parse_ivhd_device_padd
 }
 
 static u16 __init parse_ivhd_device_select(
-    union acpi_ivhd_device *ivhd_device)
+    union acpi_ivhd_device *ivhd_device, struct amd_iommu *iommu)
 {
     u16 bdf;
 
@@ -373,13 +372,14 @@ static u16 __init parse_ivhd_device_sele
         get_field_from_byte(ivhd_device->header.flags,
                             AMD_IOMMU_ACPI_SYS_MGT_MASK,
                             AMD_IOMMU_ACPI_SYS_MGT_SHIFT);
+    ivrs_mappings[bdf].iommu = iommu;
 
     return sizeof(struct acpi_ivhd_device_header);
 }
 
 static u16 __init parse_ivhd_device_range(
     union acpi_ivhd_device *ivhd_device,
-    u16 header_length, u16 block_length)
+    u16 header_length, u16 block_length, struct amd_iommu *iommu)
 {
     u16 dev_length, first_bdf, last_bdf, bdf;
     u8 sys_mgt;
@@ -423,14 +423,17 @@ static u16 __init parse_ivhd_device_rang
                                   AMD_IOMMU_ACPI_SYS_MGT_MASK,
                                   AMD_IOMMU_ACPI_SYS_MGT_SHIFT);
     for ( bdf = first_bdf; bdf <= last_bdf; bdf++ )
+    {
         ivrs_mappings[bdf].dte_sys_mgt_enable = sys_mgt;
+        ivrs_mappings[bdf].iommu = iommu;
+    }
 
     return dev_length;
 }
 
 static u16 __init parse_ivhd_device_alias(
     union acpi_ivhd_device *ivhd_device,
-    u16 header_length, u16 block_length)
+    u16 header_length, u16 block_length, struct amd_iommu *iommu)
 {
     u16 dev_length, alias_id, bdf;
 
@@ -463,15 +466,18 @@ static u16 __init parse_ivhd_device_alia
         get_field_from_byte(ivhd_device->header.flags,
                             AMD_IOMMU_ACPI_SYS_MGT_MASK,
                             AMD_IOMMU_ACPI_SYS_MGT_SHIFT);
+    ivrs_mappings[bdf].iommu = iommu;
+
     ivrs_mappings[alias_id].dte_sys_mgt_enable =
         ivrs_mappings[bdf].dte_sys_mgt_enable;
+    ivrs_mappings[alias_id].iommu = iommu;
 
     return dev_length;
 }
 
 static u16 __init parse_ivhd_device_alias_range(
     union acpi_ivhd_device *ivhd_device,
-    u16 header_length, u16 block_length)
+    u16 header_length, u16 block_length, struct amd_iommu *iommu)
 {
 
     u16 dev_length, first_bdf, last_bdf, alias_id, bdf;
@@ -527,15 +533,17 @@ static u16 __init parse_ivhd_device_alia
     {
         ivrs_mappings[bdf].dte_requestor_id = alias_id;
         ivrs_mappings[bdf].dte_sys_mgt_enable = sys_mgt;
+        ivrs_mappings[bdf].iommu = iommu;
     }
     ivrs_mappings[alias_id].dte_sys_mgt_enable = sys_mgt;
+    ivrs_mappings[alias_id].iommu = iommu;
 
     return dev_length;
 }
 
 static u16 __init parse_ivhd_device_extended(
     union acpi_ivhd_device *ivhd_device,
-    u16 header_length, u16 block_length)
+    u16 header_length, u16 block_length, struct amd_iommu *iommu)
 {
     u16 dev_length, bdf;
 
@@ -558,13 +566,14 @@ static u16 __init parse_ivhd_device_exte
         get_field_from_byte(ivhd_device->header.flags,
                             AMD_IOMMU_ACPI_SYS_MGT_MASK,
                             AMD_IOMMU_ACPI_SYS_MGT_SHIFT);
+    ivrs_mappings[bdf].iommu = iommu;
 
     return dev_length;
 }
 
 static u16 __init parse_ivhd_device_extended_range(
     union acpi_ivhd_device *ivhd_device,
-    u16 header_length, u16 block_length)
+    u16 header_length, u16 block_length, struct amd_iommu *iommu)
 {
     u16 dev_length, first_bdf, last_bdf, bdf;
     u8 sys_mgt;
@@ -609,7 +618,10 @@ static u16 __init parse_ivhd_device_exte
                                   AMD_IOMMU_ACPI_SYS_MGT_MASK,
                                   AMD_IOMMU_ACPI_SYS_MGT_SHIFT);
     for ( bdf = first_bdf; bdf <= last_bdf; bdf++ )
+    {
         ivrs_mappings[bdf].dte_sys_mgt_enable = sys_mgt;
+        ivrs_mappings[bdf].iommu = iommu;
+    }
 
     return dev_length;
 }
@@ -635,33 +647,6 @@ static int __init parse_ivhd_block(struc
                 ivhd_block->header.dev_id, ivhd_block->cap_offset);
         return -ENODEV;
     }
-
-    amd_iov_info("IVHD Block:\n");
-    amd_iov_info(" Cap_Offset 0x%x\n", ivhd_block->cap_offset);
-    amd_iov_info(" MMIO_BAR_Phys 0x%"PRIx64"\n",ivhd_block->mmio_base);
-    amd_iov_info( " PCI_Segment 0x%x\n", ivhd_block->pci_segment);
-    amd_iov_info( " IOMMU_Info 0x%x\n", ivhd_block->iommu_info);
-
-    /* override IOMMU support flags */
-    iommu->coherent = get_field_from_byte(ivhd_block->header.flags,
-                                          AMD_IOMMU_ACPI_COHERENT_MASK,
-                                          AMD_IOMMU_ACPI_COHERENT_SHIFT);
-    iommu->iotlb_support = get_field_from_byte(ivhd_block->header.flags,
-                                               AMD_IOMMU_ACPI_IOTLB_SUP_MASK,
-                                               AMD_IOMMU_ACPI_IOTLB_SUP_SHIFT);
-    iommu->isochronous = get_field_from_byte(ivhd_block->header.flags,
-                                             AMD_IOMMU_ACPI_ISOC_MASK,
-                                             AMD_IOMMU_ACPI_ISOC_SHIFT);
-    iommu->res_pass_pw = get_field_from_byte(ivhd_block->header.flags,
-                                             AMD_IOMMU_ACPI_RES_PASS_PW_MASK,
-                                             AMD_IOMMU_ACPI_RES_PASS_PW_SHIFT);
-    iommu->pass_pw = get_field_from_byte(ivhd_block->header.flags,
-                                         AMD_IOMMU_ACPI_PASS_PW_MASK,
-                                         AMD_IOMMU_ACPI_PASS_PW_SHIFT);
-    iommu->ht_tunnel_enable = get_field_from_byte(
-        ivhd_block->header.flags,
-        AMD_IOMMU_ACPI_HT_TUN_ENB_MASK,
-        AMD_IOMMU_ACPI_HT_TUN_ENB_SHIFT);
 
     /* parse Device Entries */
     block_length = sizeof(struct acpi_ivhd_block_header);
@@ -689,32 +674,32 @@ static int __init parse_ivhd_block(struc
                 ivhd_block->header.length, block_length);
             break;
         case AMD_IOMMU_ACPI_IVHD_DEV_SELECT:
-            dev_length = parse_ivhd_device_select(ivhd_device);
+            dev_length = parse_ivhd_device_select(ivhd_device, iommu);
             break;
         case AMD_IOMMU_ACPI_IVHD_DEV_RANGE_START:
             dev_length = parse_ivhd_device_range(
                 ivhd_device,
-                ivhd_block->header.length, block_length);
+                ivhd_block->header.length, block_length, iommu);
             break;
         case AMD_IOMMU_ACPI_IVHD_DEV_ALIAS_SELECT:
             dev_length = parse_ivhd_device_alias(
                 ivhd_device,
-                ivhd_block->header.length, block_length);
+                ivhd_block->header.length, block_length, iommu);
             break;
         case AMD_IOMMU_ACPI_IVHD_DEV_ALIAS_RANGE:
             dev_length = parse_ivhd_device_alias_range(
                 ivhd_device,
-                ivhd_block->header.length, block_length);
+                ivhd_block->header.length, block_length, iommu);
             break;
         case AMD_IOMMU_ACPI_IVHD_DEV_EXT_SELECT:
             dev_length = parse_ivhd_device_extended(
                 ivhd_device,
-                ivhd_block->header.length, block_length);
+                ivhd_block->header.length, block_length, iommu);
             break;
         case AMD_IOMMU_ACPI_IVHD_DEV_EXT_RANGE:
             dev_length = parse_ivhd_device_extended_range(
                 ivhd_device,
-                ivhd_block->header.length, block_length);
+                ivhd_block->header.length, block_length, iommu);
             break;
         default:
             amd_iov_error("IVHD Error: Invalid Device Type!\n");
@@ -794,29 +779,16 @@ static void __init dump_acpi_table_heade
 
 }
 
-int __init parse_ivrs_table(struct acpi_table_header *_table)
+static int __init parse_ivrs_table(struct acpi_table_header *_table)
 {
     struct acpi_ivrs_block_header *ivrs_block;
-    unsigned long length, i;
-    u8 checksum, *raw_table;
+    unsigned long length;
     int error = 0;
     struct acpi_table_header *table = (struct acpi_table_header *)_table;
 
     BUG_ON(!table);
 
     dump_acpi_table_header(table);
-
-    /* validate checksum: sum of entire table == 0 */
-    checksum = 0;
-    raw_table = (u8 *)table;
-    for ( i = 0; i < table->length; i++ )
-        checksum += raw_table[i];
-    if ( checksum )
-    {
-        amd_iov_error("IVRS Error: "
-                "Invalid Checksum 0x%x\n", checksum);
-        return -ENODEV;
-    }
 
     /* parse IVRS blocks */
     length = sizeof(struct acpi_ivrs_table_header);
@@ -846,3 +818,144 @@ int __init parse_ivrs_table(struct acpi_
 
     return error;
 }
+
+static int __init detect_iommu_acpi(struct acpi_table_header *_table)
+{
+    struct acpi_ivrs_block_header *ivrs_block;
+    struct acpi_table_header *table = (struct acpi_table_header *)_table;
+    unsigned long i;
+    unsigned long length = sizeof(struct acpi_ivrs_table_header);
+    u8 checksum, *raw_table;
+
+    /* validate checksum: sum of entire table == 0 */
+    checksum = 0;
+    raw_table = (u8 *)table;
+    for ( i = 0; i < table->length; i++ )
+        checksum += raw_table[i];
+    if ( checksum )
+    {
+        amd_iov_error("IVRS Error: "
+                "Invalid Checksum 0x%x\n", checksum);
+        return -ENODEV;
+    }
+
+    while ( table->length > (length + sizeof(*ivrs_block)) )
+    {
+        ivrs_block = (struct acpi_ivrs_block_header *) ((u8 *)table + length);
+        if ( table->length < (length + ivrs_block->length) )
+            return -ENODEV;
+        if ( ivrs_block->type == AMD_IOMMU_ACPI_IVHD_TYPE )
+            if ( amd_iommu_detect_one_acpi((void*)ivrs_block) != 0 )
+                return -ENODEV;
+        length += ivrs_block->length;
+    }
+    return 0;
+}
+
+#define UPDATE_LAST_BDF(x) do {\
+   if ((x) > last_bdf) \
+       last_bdf = (x); \
+   } while(0);
+
+static int __init get_last_bdf_ivhd(void *ivhd)
+{
+    union acpi_ivhd_device *ivhd_device;
+    u16 block_length, dev_length;
+    struct acpi_ivhd_block_header *ivhd_block;
+
+    ivhd_block = (struct acpi_ivhd_block_header *)ivhd;
+
+    if ( ivhd_block->header.length <
+         sizeof(struct acpi_ivhd_block_header) )
+    {
+        amd_iov_error("IVHD Error: Invalid Block Length!\n");
+        return -ENODEV;
+    }
+
+    block_length = sizeof(struct acpi_ivhd_block_header);
+    while ( ivhd_block->header.length >=
+            (block_length + sizeof(struct acpi_ivhd_device_header)) )
+    {
+        ivhd_device = (union acpi_ivhd_device *)
+            ((u8 *)ivhd_block + block_length);
+
+        switch ( ivhd_device->header.type )
+        {
+        case AMD_IOMMU_ACPI_IVHD_DEV_U32_PAD:
+            dev_length = sizeof(u32);
+            break;
+        case AMD_IOMMU_ACPI_IVHD_DEV_U64_PAD:
+            dev_length = sizeof(u64);
+            break;
+        case AMD_IOMMU_ACPI_IVHD_DEV_SELECT:
+            UPDATE_LAST_BDF(ivhd_device->header.dev_id);
+            dev_length = sizeof(struct acpi_ivhd_device_header);
+            break;
+        case AMD_IOMMU_ACPI_IVHD_DEV_ALIAS_SELECT:
+            UPDATE_LAST_BDF(ivhd_device->header.dev_id);
+            dev_length = sizeof(struct acpi_ivhd_device_alias);
+            break;
+        case AMD_IOMMU_ACPI_IVHD_DEV_EXT_SELECT:
+            UPDATE_LAST_BDF(ivhd_device->header.dev_id);
+            dev_length = sizeof(struct acpi_ivhd_device_extended);
+            break;
+        case AMD_IOMMU_ACPI_IVHD_DEV_RANGE_START:
+            UPDATE_LAST_BDF(ivhd_device->range.trailer.dev_id);
+            dev_length = sizeof(struct acpi_ivhd_device_range);
+            break;
+        case AMD_IOMMU_ACPI_IVHD_DEV_ALIAS_RANGE:
+            UPDATE_LAST_BDF(ivhd_device->alias_range.trailer.dev_id)
+            dev_length = sizeof(struct acpi_ivhd_device_alias_range);
+            break;
+        case AMD_IOMMU_ACPI_IVHD_DEV_EXT_RANGE:
+            UPDATE_LAST_BDF(ivhd_device->extended_range.trailer.dev_id)
+            dev_length = sizeof(struct acpi_ivhd_device_extended_range);
+            break;
+        default:
+            amd_iov_error("IVHD Error: Invalid Device Type!\n");
+            dev_length = 0;
+            break;
+        }
+
+        block_length += dev_length;
+        if ( !dev_length )
+            return -ENODEV;
+    }
+
+    return 0;
+}
+
+static int __init get_last_bdf_acpi(struct acpi_table_header *_table)
+{
+    struct acpi_ivrs_block_header *ivrs_block;
+    struct acpi_table_header *table = (struct acpi_table_header *)_table;
+    unsigned long length = sizeof(struct acpi_ivrs_table_header);
+
+    while ( table->length > (length + sizeof(*ivrs_block)) )
+    {
+        ivrs_block = (struct acpi_ivrs_block_header *) ((u8 *)table + length);
+        if ( table->length < (length + ivrs_block->length) )
+            return -ENODEV;
+        if ( ivrs_block->type == AMD_IOMMU_ACPI_IVHD_TYPE )
+            if ( get_last_bdf_ivhd((void*)ivrs_block) != 0 )
+                return -ENODEV;
+        length += ivrs_block->length;
+    }
+   return 0;
+}
+
+int __init amd_iommu_detect_acpi(void)
+{
+    return acpi_table_parse(AMD_IOMMU_ACPI_IVRS_SIG, detect_iommu_acpi);
+}
+
+int __init amd_iommu_get_ivrs_dev_entries(void)
+{
+    acpi_table_parse(AMD_IOMMU_ACPI_IVRS_SIG, get_last_bdf_acpi);
+    return last_bdf + 1;
+}
+
+int __init amd_iommu_update_ivrs_mapping_acpi(void)
+{
+    return acpi_table_parse(AMD_IOMMU_ACPI_IVRS_SIG, parse_ivrs_table);
+}
diff -r 7affdebb7a1e -r a39913db6e51 xen/drivers/passthrough/amd/iommu_detect.c
--- a/xen/drivers/passthrough/amd/iommu_detect.c        Thu Aug 07 11:47:34 
2008 +0900
+++ b/xen/drivers/passthrough/amd/iommu_detect.c        Thu Aug 07 11:57:34 
2008 +0900
@@ -25,65 +25,10 @@
 #include <xen/pci_regs.h>
 #include <asm/amd-iommu.h>
 #include <asm/hvm/svm/amd-iommu-proto.h>
+#include <asm/hvm/svm/amd-iommu-acpi.h>
 
-static int __init valid_bridge_bus_config(
-    int bus, int dev, int func, int *sec_bus, int *sub_bus)
-{
-    int pri_bus;
-
-    pri_bus = pci_conf_read8(bus, dev, func, PCI_PRIMARY_BUS);
-    *sec_bus = pci_conf_read8(bus, dev, func, PCI_SECONDARY_BUS);
-    *sub_bus = pci_conf_read8(bus, dev, func, PCI_SUBORDINATE_BUS);
-
-    return ((pri_bus == bus) && (*sec_bus > bus) && (*sub_bus >= *sec_bus));
-}
-
-int __init get_iommu_last_downstream_bus(struct amd_iommu *iommu)
-{
-    int bus, dev, func;
-    int devfn, hdr_type;
-    int sec_bus, sub_bus;
-    int multi_func;
-
-    bus = iommu->last_downstream_bus = iommu->root_bus;
-    iommu->downstream_bus_present[bus] = 1;
-    dev = PCI_SLOT(iommu->first_devfn);
-    multi_func = PCI_FUNC(iommu->first_devfn) > 0;
-    for ( devfn = iommu->first_devfn; devfn <= iommu->last_devfn; devfn++ )
-    {
-        /* skipping to next device#? */
-        if ( dev != PCI_SLOT(devfn) )
-        {
-            dev = PCI_SLOT(devfn);
-            multi_func = 0;
-        }
-        func = PCI_FUNC(devfn);
- 
-        if ( !VALID_PCI_VENDOR_ID(pci_conf_read16(bus, dev, func,
-                                                  PCI_VENDOR_ID)) )
-            continue;
-
-        hdr_type = pci_conf_read8(bus, dev, func, PCI_HEADER_TYPE);
-        if ( func == 0 )
-            multi_func = IS_PCI_MULTI_FUNCTION(hdr_type);
-
-        if ( (func == 0 || multi_func) &&
-             IS_PCI_TYPE1_HEADER(hdr_type) )
-        {
-            if ( !valid_bridge_bus_config(bus, dev, func,
-                                          &sec_bus, &sub_bus) )
-                return -ENODEV;
-
-            if ( sub_bus > iommu->last_downstream_bus )
-                iommu->last_downstream_bus = sub_bus;
-            do {
-                iommu->downstream_bus_present[sec_bus] = 1;
-            } while ( sec_bus++ < sub_bus );
-        }
-    }
-
-    return 0;
-}
+extern struct list_head amd_iommu_head;
+unsigned short last_bdf = 0;
 
 static int __init get_iommu_msi_capabilities(u8 bus, u8 dev, u8 func,
             struct amd_iommu *iommu)
@@ -128,30 +73,10 @@ int __init get_iommu_capabilities(u8 bus
                                   struct amd_iommu *iommu)
 {
     u32 cap_header, cap_range, misc_info;
-    u64 mmio_bar;
-
-    mmio_bar = (u64)pci_conf_read32(
-        bus, dev, func, cap_ptr + PCI_CAP_MMIO_BAR_HIGH_OFFSET) << 32;
-    mmio_bar |= pci_conf_read32(bus, dev, func,
-                                cap_ptr + PCI_CAP_MMIO_BAR_LOW_OFFSET);
-    iommu->mmio_base_phys = mmio_bar & (u64)~0x3FFF;
-
-    if ( ((mmio_bar & 0x1) == 0) || (iommu->mmio_base_phys == 0) )
-    {
-        amd_iov_error("Invalid MMIO_BAR = 0x%"PRIx64"\n", mmio_bar);
-        return -ENODEV;
-    }
-
-    iommu->bdf = (bus << 8) | PCI_DEVFN(dev, func);
-    iommu->cap_offset = cap_ptr;
 
     cap_header = pci_conf_read32(bus, dev, func, cap_ptr);
     iommu->revision = get_field_from_reg_u32(
         cap_header, PCI_CAP_REV_MASK, PCI_CAP_REV_SHIFT);
-    iommu->iotlb_support = get_field_from_reg_u32(
-        cap_header, PCI_CAP_IOTLB_MASK, PCI_CAP_IOTLB_SHIFT);
-    iommu->ht_tunnel_support = get_field_from_reg_u32(
-        cap_header, PCI_CAP_HT_TUNNEL_MASK, PCI_CAP_HT_TUNNEL_SHIFT);
     iommu->pte_not_present_cached = get_field_from_reg_u32(
         cap_header, PCI_CAP_NP_CACHE_MASK, PCI_CAP_NP_CACHE_SHIFT);
 
@@ -159,96 +84,76 @@ int __init get_iommu_capabilities(u8 bus
                                 cap_ptr + PCI_CAP_RANGE_OFFSET);
     iommu->unit_id = get_field_from_reg_u32(
         cap_range, PCI_CAP_UNIT_ID_MASK, PCI_CAP_UNIT_ID_SHIFT);
-    iommu->root_bus = get_field_from_reg_u32(
-        cap_range, PCI_CAP_BUS_NUMBER_MASK, PCI_CAP_BUS_NUMBER_SHIFT);
-    iommu->first_devfn = get_field_from_reg_u32(
-        cap_range, PCI_CAP_FIRST_DEVICE_MASK, PCI_CAP_FIRST_DEVICE_SHIFT);
-    iommu->last_devfn = get_field_from_reg_u32(
-        cap_range, PCI_CAP_LAST_DEVICE_MASK, PCI_CAP_LAST_DEVICE_SHIFT);
 
     misc_info = pci_conf_read32(bus, dev, func,
                                 cap_ptr + PCI_MISC_INFO_OFFSET);
     iommu->msi_number = get_field_from_reg_u32(
         misc_info, PCI_CAP_MSI_NUMBER_MASK, PCI_CAP_MSI_NUMBER_SHIFT);
 
+    return 0;
+}
+
+int __init amd_iommu_detect_one_acpi(void *ivhd)
+{
+    struct amd_iommu *iommu;
+    u8 bus, dev, func;
+    struct acpi_ivhd_block_header *ivhd_block;
+
+    ivhd_block = (struct acpi_ivhd_block_header *)ivhd;
+
+    if ( ivhd_block->header.length < sizeof(struct acpi_ivhd_block_header) )
+    {
+        amd_iov_error("Invalid IVHD Block Length!\n");
+        return -ENODEV;
+    }
+
+    if ( !ivhd_block->header.dev_id ||
+        !ivhd_block->cap_offset || !ivhd_block->mmio_base)
+    {
+        amd_iov_error("Invalid IVHD Block!\n");
+        return -ENODEV;
+    }
+
+    iommu = (struct amd_iommu *) xmalloc(struct amd_iommu);
+    if ( !iommu )
+    {
+        amd_iov_error("Error allocating amd_iommu\n");
+        return -ENOMEM;
+    }
+    memset(iommu, 0, sizeof(struct amd_iommu));
+
+    spin_lock_init(&iommu->lock);
+
+    iommu->bdf = ivhd_block->header.dev_id;
+    iommu->cap_offset = ivhd_block->cap_offset;
+    iommu->mmio_base_phys = ivhd_block->mmio_base;
+
+    /* override IOMMU support flags */
+    iommu->coherent = get_field_from_byte(ivhd_block->header.flags,
+                        AMD_IOMMU_ACPI_COHERENT_MASK,
+                        AMD_IOMMU_ACPI_COHERENT_SHIFT);
+    iommu->iotlb_support = get_field_from_byte(ivhd_block->header.flags,
+                        AMD_IOMMU_ACPI_IOTLB_SUP_MASK,
+                        AMD_IOMMU_ACPI_IOTLB_SUP_SHIFT);
+    iommu->isochronous = get_field_from_byte(ivhd_block->header.flags,
+                        AMD_IOMMU_ACPI_ISOC_MASK,
+                        AMD_IOMMU_ACPI_ISOC_SHIFT);
+    iommu->res_pass_pw = get_field_from_byte(ivhd_block->header.flags,
+                        AMD_IOMMU_ACPI_RES_PASS_PW_MASK,
+                        AMD_IOMMU_ACPI_RES_PASS_PW_SHIFT);
+    iommu->pass_pw = get_field_from_byte(ivhd_block->header.flags,
+                        AMD_IOMMU_ACPI_PASS_PW_MASK,
+                        AMD_IOMMU_ACPI_PASS_PW_SHIFT);
+    iommu->ht_tunnel_enable = get_field_from_byte(ivhd_block->header.flags,
+                        AMD_IOMMU_ACPI_HT_TUN_ENB_MASK,
+                        AMD_IOMMU_ACPI_HT_TUN_ENB_SHIFT);
+    bus = iommu->bdf >> 8;
+    dev = PCI_SLOT(iommu->bdf & 0xFF);
+    func = PCI_FUNC(iommu->bdf & 0xFF);
+    get_iommu_capabilities(bus, dev, func, iommu->cap_offset, iommu);
     get_iommu_msi_capabilities(bus, dev, func, iommu);
+
+    list_add_tail(&iommu->list, &amd_iommu_head);
 
     return 0;
 }
-
-static int __init scan_caps_for_iommu(
-    int bus, int dev, int func,
-    iommu_detect_callback_ptr_t iommu_detect_callback)
-{
-    int cap_ptr, cap_id, cap_type;
-    u32 cap_header;
-    int count, error = 0;
-
-    count = 0;
-    cap_ptr = pci_conf_read8(bus, dev, func, PCI_CAPABILITY_LIST);
-    while ( (cap_ptr >= PCI_MIN_CAP_OFFSET) &&
-            (count < PCI_MAX_CAP_BLOCKS) &&
-            !error )
-    {
-        cap_ptr &= PCI_CAP_PTR_MASK;
-        cap_header = pci_conf_read32(bus, dev, func, cap_ptr);
-        cap_id = get_field_from_reg_u32(
-            cap_header, PCI_CAP_ID_MASK, PCI_CAP_ID_SHIFT);
-
-        if ( cap_id == PCI_CAP_ID_SECURE_DEVICE )
-        {
-            cap_type = get_field_from_reg_u32(
-                cap_header, PCI_CAP_TYPE_MASK, PCI_CAP_TYPE_SHIFT);
-            if ( cap_type == PCI_CAP_TYPE_IOMMU )
-                error = iommu_detect_callback(
-                    bus, dev, func, cap_ptr);
-        }
-
-        cap_ptr = get_field_from_reg_u32(
-            cap_header, PCI_CAP_NEXT_PTR_MASK, PCI_CAP_NEXT_PTR_SHIFT);
-        count++;
-    }
-
-    return error;
-}
-
-static int __init scan_functions_for_iommu(
-    int bus, int dev, iommu_detect_callback_ptr_t iommu_detect_callback)
-{
-    int func, hdr_type;
-    int count = 1, error = 0;
-
-    for ( func = 0;
-          (func < count) && !error &&
-              VALID_PCI_VENDOR_ID(pci_conf_read16(bus, dev, func,
-                                                  PCI_VENDOR_ID));
-          func++ )
-
-    {
-        hdr_type = pci_conf_read8(bus, dev, func, PCI_HEADER_TYPE);
-
-        if ( (func == 0) && IS_PCI_MULTI_FUNCTION(hdr_type) )
-            count = PCI_MAX_FUNC_COUNT;
-
-        if ( IS_PCI_TYPE0_HEADER(hdr_type) ||
-             IS_PCI_TYPE1_HEADER(hdr_type) )
-            error = scan_caps_for_iommu(bus, dev, func,
-                                        iommu_detect_callback);
-    }
-
-    return error;
-}
-
-
-int __init scan_for_iommu(iommu_detect_callback_ptr_t iommu_detect_callback)
-{
-    int bus, dev, error = 0;
-
-    for ( bus = 0; bus < PCI_MAX_BUS_COUNT && !error; ++bus )
-        for ( dev = 0; dev < PCI_MAX_DEV_COUNT && !error; ++dev )
-            error = scan_functions_for_iommu(bus, dev,
-                                             iommu_detect_callback);
-
-    return error;
-}
-
diff -r 7affdebb7a1e -r a39913db6e51 xen/drivers/passthrough/amd/iommu_init.c
--- a/xen/drivers/passthrough/amd/iommu_init.c  Thu Aug 07 11:47:34 2008 +0900
+++ b/xen/drivers/passthrough/amd/iommu_init.c  Thu Aug 07 11:57:34 2008 +0900
@@ -27,10 +27,20 @@
 #include <asm/hvm/svm/amd-iommu-proto.h>
 #include <asm-x86/fixmap.h>
 
-extern int nr_amd_iommus;
 static struct amd_iommu *vector_to_iommu[NR_VECTORS];
-
-int __init map_iommu_mmio_region(struct amd_iommu *iommu)
+static int nr_amd_iommus;
+static long amd_iommu_cmd_buffer_entries = IOMMU_CMD_BUFFER_DEFAULT_ENTRIES;
+static long amd_iommu_event_log_entries = IOMMU_EVENT_LOG_DEFAULT_ENTRIES;
+
+unsigned short ivrs_bdf_entries;
+struct ivrs_mappings *ivrs_mappings;
+struct list_head amd_iommu_head;
+struct table_struct device_table;
+
+extern void *int_remap_table;
+extern spinlock_t int_remap_table_lock;
+
+static int __init map_iommu_mmio_region(struct amd_iommu *iommu)
 {
     unsigned long mfn;
 
@@ -51,7 +61,7 @@ int __init map_iommu_mmio_region(struct 
     return 0;
 }
 
-void __init unmap_iommu_mmio_region(struct amd_iommu *iommu)
+static void __init unmap_iommu_mmio_region(struct amd_iommu *iommu)
 {
     if ( iommu->mmio_base )
     {
@@ -60,7 +70,7 @@ void __init unmap_iommu_mmio_region(stru
     }
 }
 
-void __init register_iommu_dev_table_in_mmio_space(struct amd_iommu *iommu)
+static void __init register_iommu_dev_table_in_mmio_space(struct amd_iommu 
*iommu)
 {
     u64 addr_64, addr_lo, addr_hi;
     u32 entry;
@@ -83,7 +93,7 @@ void __init register_iommu_dev_table_in_
     writel(entry, iommu->mmio_base + IOMMU_DEV_TABLE_BASE_HIGH_OFFSET);
 }
 
-void __init register_iommu_cmd_buffer_in_mmio_space(struct amd_iommu *iommu)
+static void __init register_iommu_cmd_buffer_in_mmio_space(struct amd_iommu 
*iommu)
 {
     u64 addr_64, addr_lo, addr_hi;
     u32 power_of2_entries;
@@ -110,7 +120,7 @@ void __init register_iommu_cmd_buffer_in
     writel(entry, iommu->mmio_base+IOMMU_CMD_BUFFER_BASE_HIGH_OFFSET);
 }
 
-void __init register_iommu_event_log_in_mmio_space(struct amd_iommu *iommu)
+static void __init register_iommu_event_log_in_mmio_space(struct amd_iommu 
*iommu)
 {
     u64 addr_64, addr_lo, addr_hi;
     u32 power_of2_entries;
@@ -266,12 +276,13 @@ static int amd_iommu_read_event_log(stru
     return -EFAULT;
 }
 
-static void amd_iommu_msi_data_init(struct amd_iommu *iommu, int vector)
+static void amd_iommu_msi_data_init(struct amd_iommu *iommu)
 {
     u32 msi_data;
     u8 bus = (iommu->bdf >> 8) & 0xff;
     u8 dev = PCI_SLOT(iommu->bdf & 0xff);
     u8 func = PCI_FUNC(iommu->bdf & 0xff);
+    int vector = iommu->vector;
 
     msi_data = MSI_DATA_TRIGGER_EDGE |
         MSI_DATA_LEVEL_ASSERT |
@@ -434,7 +445,6 @@ static int set_iommu_interrupt_handler(s
 static int set_iommu_interrupt_handler(struct amd_iommu *iommu)
 {
     int vector, ret;
-    unsigned long flags;
 
     vector = assign_irq_vector(AUTO_ASSIGN);
     vector_to_iommu[vector] = iommu;
@@ -450,38 +460,210 @@ static int set_iommu_interrupt_handler(s
     }
 
     irq_desc[vector].handler = &iommu_msi_type;
-    ret = request_irq(vector, amd_iommu_page_fault, 0, "dmar", iommu);
+    ret = request_irq(vector, amd_iommu_page_fault, 0, "amd_iommu", iommu);
     if ( ret )
     {
         amd_iov_error("can't request irq\n");
         return 0;
     }
 
+    return vector;
+}
+
+void __init enable_iommu(struct amd_iommu *iommu)
+{
+    unsigned long flags;
+
     spin_lock_irqsave(&iommu->lock, flags);
 
-    amd_iommu_msi_data_init (iommu, vector);
+    if ( iommu->enabled )
+    {
+        spin_unlock_irqrestore(&iommu->lock, flags); 
+        return;
+    }
+
+    iommu->dev_table.alloc_size = device_table.alloc_size;
+    iommu->dev_table.entries = device_table.entries;
+    iommu->dev_table.buffer = device_table.buffer;
+
+    register_iommu_dev_table_in_mmio_space(iommu);
+    register_iommu_cmd_buffer_in_mmio_space(iommu);
+    register_iommu_event_log_in_mmio_space(iommu);
+    register_iommu_exclusion_range(iommu);
+
+    amd_iommu_msi_data_init (iommu);
     amd_iommu_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map)));
     amd_iommu_msi_enable(iommu, IOMMU_CONTROL_ENABLED);
 
-    spin_unlock_irqrestore(&iommu->lock, flags);
-
-    return vector;
-}
-
-void __init enable_iommu(struct amd_iommu *iommu)
-{
-    unsigned long flags;
-
-    set_iommu_interrupt_handler(iommu);
-
-    spin_lock_irqsave(&iommu->lock, flags);
-
-    register_iommu_exclusion_range(iommu);
     set_iommu_command_buffer_control(iommu, IOMMU_CONTROL_ENABLED);
     set_iommu_event_log_control(iommu, IOMMU_CONTROL_ENABLED);
     set_iommu_translation_control(iommu, IOMMU_CONTROL_ENABLED);
 
+    printk("AMD_IOV: IOMMU %d Enabled.\n", nr_amd_iommus );
+    nr_amd_iommus++;
+
+    iommu->enabled = 1;
     spin_unlock_irqrestore(&iommu->lock, flags);
 
-    printk("AMD_IOV: IOMMU %d Enabled.\n", nr_amd_iommus);
-}
+}
+
+static void __init deallocate_iommu_table_struct(
+    struct table_struct *table)
+{
+    if ( table->buffer )
+    {
+        free_xenheap_pages(table->buffer,
+            get_order_from_bytes(table->alloc_size));

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.