[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg



# HG changeset patch
# User Alex Williamson <alex.williamson@xxxxxx>
# Date 1209067709 21600
# Node ID dc510776dd598f3f479af749865bec225e32634d
# Parent  239b44eeb2d6d235ddee581b6e89398c80278a2f
# Parent  97da69831384f0819caeeb8b8bdff0f942b2d690
merge with xen-unstable.hg
---
 xen/arch/x86/hvm/svm/x86_32/Makefile          |    1 
 xen/arch/x86/hvm/svm/x86_32/exits.S           |  131 -------
 xen/arch/x86/hvm/svm/x86_64/Makefile          |    1 
 xen/arch/x86/hvm/svm/x86_64/exits.S           |  148 --------
 xen/arch/x86/hvm/vmx/x86_32/Makefile          |    1 
 xen/arch/x86/hvm/vmx/x86_32/exits.S           |  148 --------
 xen/arch/x86/hvm/vmx/x86_64/Makefile          |    1 
 xen/arch/x86/hvm/vmx/x86_64/exits.S           |  165 ---------
 .hgignore                                     |    1 
 Makefile                                      |    7 
 docs/misc/vtd.txt                             |    2 
 docs/src/user.tex                             |    7 
 docs/xen-api/revision-history.tex             |   10 
 docs/xen-api/xenapi-coversheet.tex            |    2 
 docs/xen-api/xenapi-datamodel.tex             |  180 ++++++++--
 extras/mini-os/Makefile                       |   29 +
 extras/mini-os/app.lds                        |   11 
 extras/mini-os/arch/ia64/minios-ia64.lds      |    5 
 extras/mini-os/arch/x86/minios-x86_32.lds     |    1 
 extras/mini-os/arch/x86/minios-x86_64.lds     |    1 
 extras/mini-os/arch/x86/mm.c                  |    7 
 extras/mini-os/fbfront.c                      |   98 +++--
 extras/mini-os/hypervisor.c                   |   15 
 extras/mini-os/include/fbfront.h              |    3 
 extras/mini-os/include/hypervisor.h           |    4 
 extras/mini-os/include/ia64/arch_mm.h         |    2 
 extras/mini-os/include/ia64/os.h              |   11 
 extras/mini-os/include/lib.h                  |    1 
 extras/mini-os/include/mm.h                   |    2 
 extras/mini-os/include/x86/os.h               |    1 
 extras/mini-os/kernel.c                       |   15 
 extras/mini-os/lib/sys.c                      |   35 ++
 extras/mini-os/main.c                         |    2 
 extras/mini-os/mm.c                           |   12 
 extras/mini-os/sched.c                        |  136 ++------
 tools/blktap/drivers/blktapctrl.c             |   83 ++++
 tools/blktap/drivers/tapdisk.h                |    2 
 tools/console/daemon/io.c                     |   18 -
 tools/console/daemon/main.c                   |   13 
 tools/examples/blktap                         |   22 -
 tools/firmware/hvmloader/Makefile             |    5 
 tools/firmware/hvmloader/acpi/build.c         |    4 
 tools/firmware/hvmloader/cacheattr.c          |   99 +++++
 tools/firmware/hvmloader/config.h             |    3 
 tools/firmware/hvmloader/hvmloader.c          |    3 
 tools/firmware/hvmloader/smp.c                |    9 
 tools/ioemu/Makefile                          |    2 
 tools/ioemu/hw/cirrus_vga.c                   |    4 
 tools/ioemu/hw/pci.c                          |   16 
 tools/ioemu/hw/vga.c                          |    6 
 tools/ioemu/hw/xen_blktap.c                   |   45 --
 tools/ioemu/hw/xenfb.c                        |  153 ++++++---
 tools/ioemu/tapdisk-ioemu.c                   |   14 
 tools/ioemu/target-i386-dm/helper2.c          |    2 
 tools/ioemu/vl.c                              |   17 -
 tools/ioemu/vl.h                              |    1 
 tools/libfsimage/ext2fs/fsys_ext2fs.c         |   61 +++
 tools/libxc/xc_hvm_build.c                    |    2 
 tools/python/xen/util/acmpolicy.py            |   27 +
 tools/python/xen/xend/XendDomain.py           |   26 +
 tools/python/xen/xend/XendDomainInfo.py       |   31 +
 tools/python/xen/xend/XendXSPolicyAdmin.py    |    1 
 tools/python/xen/xend/image.py                |   46 ++
 tools/python/xen/xm/main.py                   |   11 
 tools/tests/test_x86_emulator.c               |   48 --
 tools/tests/x86_emulate.c                     |    6 
 tools/xenmon/xenbaked.c                       |   32 +
 xen/Makefile                                  |   12 
 xen/arch/x86/Makefile                         |    2 
 xen/arch/x86/bitops.c                         |   32 +
 xen/arch/x86/cpu/mtrr/main.c                  |    7 
 xen/arch/x86/domain.c                         |   11 
 xen/arch/x86/domain_build.c                   |    4 
 xen/arch/x86/hvm/emulate.c                    |  133 +++++++-
 xen/arch/x86/hvm/hvm.c                        |  152 ++++++++-
 xen/arch/x86/hvm/mtrr.c                       |  312 ++++--------------
 xen/arch/x86/hvm/svm/Makefile                 |    4 
 xen/arch/x86/hvm/svm/entry.S                  |  178 ++++++++++
 xen/arch/x86/hvm/svm/intr.c                   |    6 
 xen/arch/x86/hvm/svm/svm.c                    |   40 --
 xen/arch/x86/hvm/vmx/Makefile                 |    4 
 xen/arch/x86/hvm/vmx/entry.S                  |  198 +++++++++++
 xen/arch/x86/hvm/vmx/intr.c                   |    6 
 xen/arch/x86/hvm/vmx/vmx.c                    |  101 ------
 xen/arch/x86/mm.c                             |   35 --
 xen/arch/x86/mm/shadow/common.c               |   94 ++---
 xen/arch/x86/mm/shadow/multi.c                |   13 
 xen/arch/x86/setup.c                          |    4 
 xen/arch/x86/smp.c                            |   27 -
 xen/arch/x86/traps.c                          |   41 +-
 xen/arch/x86/x86_emulate.c                    |   18 -
 xen/arch/x86/x86_emulate/x86_emulate.c        |  282 +++++++++++-----
 xen/arch/x86/x86_emulate/x86_emulate.h        |   54 +--
 xen/common/trace.c                            |   11 
 xen/common/xencomm.c                          |    2 
 xen/drivers/char/console.c                    |    2 
 xen/drivers/char/serial.c                     |   53 ++-
 xen/drivers/passthrough/amd/iommu_acpi.c      |  214 +++++-------
 xen/drivers/passthrough/amd/iommu_detect.c    |   62 +++
 xen/drivers/passthrough/amd/iommu_init.c      |  306 ++++++++++++++++++
 xen/drivers/passthrough/amd/iommu_map.c       |   21 -
 xen/drivers/passthrough/amd/pci_amd_iommu.c   |  210 +++++-------
 xen/drivers/passthrough/iommu.c               |   30 +
 xen/drivers/passthrough/vtd/dmar.c            |   58 ---
 xen/drivers/passthrough/vtd/iommu.c           |  432 ++++++++------------------
 xen/drivers/passthrough/vtd/utils.c           |    2 
 xen/drivers/passthrough/vtd/x86/vtd.c         |   16 
 xen/include/asm-x86/amd-iommu.h               |    6 
 xen/include/asm-x86/bitops.h                  |   52 +--
 xen/include/asm-x86/hvm/hvm.h                 |    2 
 xen/include/asm-x86/hvm/support.h             |    2 
 xen/include/asm-x86/hvm/svm/amd-iommu-defs.h  |   37 ++
 xen/include/asm-x86/hvm/svm/amd-iommu-proto.h |   19 -
 xen/include/asm-x86/hvm/vcpu.h                |   11 
 xen/include/asm-x86/mtrr.h                    |    8 
 xen/include/asm-x86/paging.h                  |    2 
 xen/include/public/xsm/acm.h                  |    7 
 xen/include/xen/iommu.h                       |    4 
 xen/include/xen/serial.h                      |    8 
 xen/include/xsm/acm/acm_core.h                |    1 
 xen/tools/Makefile                            |    4 
 xen/tools/figlet/figlet.c                     |   24 -
 xen/xsm/acm/acm_policy.c                      |    8 
 123 files changed, 2967 insertions(+), 2425 deletions(-)

diff -r 239b44eeb2d6 -r dc510776dd59 .hgignore
--- a/.hgignore Thu Apr 24 14:02:16 2008 -0600
+++ b/.hgignore Thu Apr 24 14:08:29 2008 -0600
@@ -243,6 +243,7 @@
 ^tools/xm-test/lib/XmTestLib/config.py$
 ^tools/xm-test/lib/XmTestReport/xmtest.py$
 ^tools/xm-test/tests/.*\.test$
+^xen/\.banner.*$
 ^xen/BLOG$
 ^xen/System.map$
 ^xen/TAGS$
diff -r 239b44eeb2d6 -r dc510776dd59 Makefile
--- a/Makefile  Thu Apr 24 14:02:16 2008 -0600
+++ b/Makefile  Thu Apr 24 14:08:29 2008 -0600
@@ -121,6 +121,13 @@ distclean:
 # Linux name for GNU distclean
 .PHONY: mrproper
 mrproper: distclean
+
+# Prepare for source tarball
+.PHONY: src-tarball
+src-tarball: distclean
+       $(MAKE) -C xen .banner
+       rm -rf xen/tools/figlet .[a-z]*
+       $(MAKE) -C xen distclean
 
 .PHONY: help
 help:
diff -r 239b44eeb2d6 -r dc510776dd59 docs/misc/vtd.txt
--- a/docs/misc/vtd.txt Thu Apr 24 14:02:16 2008 -0600
+++ b/docs/misc/vtd.txt Thu Apr 24 14:08:29 2008 -0600
@@ -21,7 +21,7 @@ 11) "hide" pci device from dom0 as follo
 
 title Xen-Fedora Core (2.6.18-xen)
         root (hd0,0)
-        kernel /boot/xen.gz com1=115200,8n1 console=com1 vtd=1
+        kernel /boot/xen.gz com1=115200,8n1 console=com1
         module /boot/vmlinuz-2.6.18.8-xen root=LABEL=/ ro console=tty0 
console=ttyS0,115200,8n1 pciback.hide=(01:00.0)(03:00.0) 
pciback.verbose_request=1 apic=debug
         module /boot/initrd-2.6.18-xen.img
 
diff -r 239b44eeb2d6 -r dc510776dd59 docs/src/user.tex
--- a/docs/src/user.tex Thu Apr 24 14:02:16 2008 -0600
+++ b/docs/src/user.tex Thu Apr 24 14:08:29 2008 -0600
@@ -4088,6 +4088,8 @@ editing \path{grub.conf}.
   a list of pages not to be allocated for use because they contain bad
   bytes. For example, if your memory tester says that byte 0x12345678
   is bad, you would place `badpage=0x12345' on Xen's command line.
+\item [ serial\_tx\_buffer=$<$size$>$ ] Size of serial transmit
+  buffers. Default is 16kB.
 \item [ com1=$<$baud$>$,DPS,$<$io\_base$>$,$<$irq$>$
   com2=$<$baud$>$,DPS,$<$io\_base$>$,$<$irq$>$ ] \mbox{}\\
   Xen supports up to two 16550-compatible serial ports.  For example:
@@ -4239,10 +4241,11 @@ In addition to the standard Linux kernel
     \begin{tabular}{l}
       `xencons=off': disable virtual console \\
       `xencons=tty': attach console to /dev/tty1 (tty0 at boot-time) \\
-      `xencons=ttyS': attach console to /dev/ttyS0
+      `xencons=ttyS': attach console to /dev/ttyS0 \\
+      `xencons=xvc': attach console to /dev/xvc0
     \end{tabular}
 \end{center}
-The default is ttyS for dom0 and tty for all other domains.
+The default is ttyS for dom0 and xvc for all other domains.
 \end{description}
 
 
diff -r 239b44eeb2d6 -r dc510776dd59 docs/xen-api/revision-history.tex
--- a/docs/xen-api/revision-history.tex Thu Apr 24 14:02:16 2008 -0600
+++ b/docs/xen-api/revision-history.tex Thu Apr 24 14:08:29 2008 -0600
@@ -37,5 +37,15 @@
     \end{flushleft}
    \end{minipage}\\
   \hline
+  1.0.5 & 17th Apr. 08 & S. Berger &
+   \begin{minipage}[t]{7cm}
+    \begin{flushleft}
+     Added undocumented fields and methods for default\_netmask and
+     default\_gateway to the Network class. Removed an unimplemented
+     method from the XSPolicy class and removed the 'optional' from
+     'oldlabel' parameters.
+    \end{flushleft}
+   \end{minipage}\\
+  \hline
  \end{tabular}
 \end{center}
diff -r 239b44eeb2d6 -r dc510776dd59 docs/xen-api/xenapi-coversheet.tex
--- a/docs/xen-api/xenapi-coversheet.tex        Thu Apr 24 14:02:16 2008 -0600
+++ b/docs/xen-api/xenapi-coversheet.tex        Thu Apr 24 14:08:29 2008 -0600
@@ -22,7 +22,7 @@
 \newcommand{\releasestatement}{Stable Release}
 
 %% Document revision
-\newcommand{\revstring}{API Revision 1.0.4}
+\newcommand{\revstring}{API Revision 1.0.5}
 
 %% Document authors
 \newcommand{\docauthors}{
diff -r 239b44eeb2d6 -r dc510776dd59 docs/xen-api/xenapi-datamodel.tex
--- a/docs/xen-api/xenapi-datamodel.tex Thu Apr 24 14:02:16 2008 -0600
+++ b/docs/xen-api/xenapi-datamodel.tex Thu Apr 24 14:08:29 2008 -0600
@@ -4467,7 +4467,7 @@ security_label, string old_label)\end{ve
 {\bf type} & {\bf name} & {\bf description} \\ \hline
 {\tt VM ref } & self & reference to the object \\ \hline
 {\tt string } & security\_label & security label for the VM \\ \hline
-{\tt string } & old\_label & Optional label value that the security label \\
+{\tt string } & old\_label & Label value that the security label \\
 & & must currently have for the change to succeed.\\ \hline
 
 \end{tabular}
@@ -7619,6 +7619,8 @@ Quals & Field & Type & Description \\
 $\mathit{RW}$ &  {\tt name/description} & string & a notes field containg 
human-readable description \\
 $\mathit{RO}_\mathit{run}$ &  {\tt VIFs} & (VIF ref) Set & list of connected 
vifs \\
 $\mathit{RO}_\mathit{run}$ &  {\tt PIFs} & (PIF ref) Set & list of connected 
pifs \\
+$\mathit{RW}$ &  {\tt default\_gateway} & string & default gateway \\
+$\mathit{RW}$ &  {\tt default\_netmask} & string & default netmask \\
 $\mathit{RW}$ &  {\tt other\_config} & (string $\rightarrow$ string) Map & 
additional configuration \\
 \hline
 \end{longtable}
@@ -7869,6 +7871,138 @@ Get the PIFs field of the given network.
 
 
 value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_default\_gateway}
+
+{\bf Overview:} 
+Get the default\_gateway field of the given network.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} string get_default_gateway (session_id s, network ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt network ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~set\_default\_gateway}
+
+{\bf Overview:} 
+Set the default\_gateway field of the given network.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} void set_default_gateway (session_id s, network ref self, 
string value)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt network ref } & self & reference to the object \\ \hline 
+
+{\tt string } & value & New value to set \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+void
+}
+
+
+
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_default\_netmask}
+
+{\bf Overview:} 
+Get the default\_netmask field of the given network.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} string get_default_netmask (session_id s, network ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt network ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~set\_default\_netmask}
+
+{\bf Overview:} 
+Set the default\_netmask field of the given network.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} void set_default_netmask (session_id s, network ref self, 
string value)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt network ref } & self & reference to the object \\ \hline 
+
+{\tt string } & value & New value to set \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+void
+}
+
+
+
 \vspace{0.3cm}
 \vspace{0.3cm}
 \vspace{0.3cm}
@@ -8999,7 +9133,7 @@ security_label, string old_label)\end{ve
 {\tt VIF ref } & self & reference to the object \\ \hline
 
 {\tt string } & security\_label & New value of the security label \\ \hline
-{\tt string } & old\_label & Optional label value that the security label \\
+{\tt string } & old\_label & Label value that the security label \\
 & & must currently have for the change to succeed.\\ \hline
 \end{tabular}
 
@@ -11504,7 +11638,7 @@ security_label, string old_label)\end{ve
 {\tt VDI ref } & self & reference to the object \\ \hline
 
 {\tt string } & security\_label & New value of the security label \\ \hline
-{\tt string } & old\_label & Optional label value that the security label \\
+{\tt string } & old\_label & Label value that the security label \\
 & & must currently have for the change to succeed.\\ \hline
 \end{tabular}
 
@@ -14898,46 +15032,6 @@ The label of the given resource.
 \vspace{0.3cm}
 \vspace{0.3cm}
 \vspace{0.3cm}
-\subsubsection{RPC name:~activate\_xspolicy}
-
-{\bf Overview:}
-Load the referenced policy into the hypervisor.
-
- \noindent {\bf Signature:}
-\begin{verbatim} xs_instantiationflags activate_xspolicy (session_id s, xs_ref 
xspolicy,
-xs_instantiationflags flags)\end{verbatim}
-
-
-\noindent{\bf Arguments:}
-
-
-\vspace{0.3cm}
-\begin{tabular}{|c|c|p{7cm}|}
- \hline
-{\bf type} & {\bf name} & {\bf description} \\ \hline
-{\tt xs ref } & self & reference to the object \\ \hline
-{\tt xs\_instantiationflags } & flags & flags to activate on a policy; flags
-  can only be set \\ \hline
-
-\end{tabular}
-
-\vspace{0.3cm}
-
-
- \noindent {\bf Return Type:}
-{\tt
-xs\_instantiationflags
-}
-
-
-Currently active instantiation flags.
-\vspace{0.3cm}
-
-\noindent{\bf Possible Error Codes:} {\tt SECURITY\_ERROR}
-
-\vspace{0.3cm}
-\vspace{0.3cm}
-\vspace{0.3cm}
 \subsubsection{RPC name:~can\_run}
 
 {\bf Overview:}
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/Makefile
--- a/extras/mini-os/Makefile   Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/Makefile   Thu Apr 24 14:08:29 2008 -0600
@@ -19,6 +19,7 @@ include minios.mk
 
 # Define some default flags for linking.
 LDLIBS := 
+APP_LDLIBS := 
 LDARCHLIB := -L$(TARGET_ARCH_DIR) -l$(ARCH_LIB_NAME)
 LDFLAGS_FINAL := -T $(TARGET_ARCH_DIR)/minios-$(XEN_TARGET_ARCH).lds
 
@@ -33,6 +34,7 @@ SUBDIRS := lib xenbus console
 SUBDIRS := lib xenbus console
 
 # The common mini-os objects to build.
+APP_OBJS :=
 OBJS := $(patsubst %.c,%.o,$(wildcard *.c))
 OBJS += $(patsubst %.c,%.o,$(wildcard lib/*.c))
 OBJS += $(patsubst %.c,%.o,$(wildcard xenbus/*.c))
@@ -75,28 +77,28 @@ OBJS := $(filter-out lwip%.o $(LWO), $(O
 
 ifeq ($(caml),y)
 CAMLLIB = $(shell ocamlc -where)
-OBJS += $(CAMLDIR)/caml.o
-OBJS += $(CAMLLIB)/libasmrun.a
+APP_OBJS += main-caml.o
+APP_OBJS += $(CAMLDIR)/caml.o
+APP_OBJS += $(CAMLLIB)/libasmrun.a
 CFLAGS += -I$(CAMLLIB)
-LDLIBS += -lm
-else
+APP_LDLIBS += -lm
+endif
 OBJS := $(filter-out main-caml.o, $(OBJS))
-endif
 
 ifeq ($(qemu),y)
-OBJS += $(QEMUDIR)/i386-dm-stubdom/qemu.a $(QEMUDIR)/i386-dm-stubdom/libqemu.a
+APP_OBJS += $(QEMUDIR)/i386-dm-stubdom/qemu.a 
$(QEMUDIR)/i386-dm-stubdom/libqemu.a
 CFLAGS += -DCONFIG_QEMU
 endif
 
 ifneq ($(CDIR),)
-OBJS += $(CDIR)/main.a
-LDLIBS += 
+APP_OBJS += $(CDIR)/main.a
+APP_LDLIBS += 
 endif
 
 ifeq ($(libc),y)
 LDLIBS += -L$(XEN_ROOT)/stubdom/libxc -lxenctrl -lxenguest
-LDLIBS += -lpci
-LDLIBS += -lz
+APP_LDLIBS += -lpci
+APP_LDLIBS += -lz
 LDLIBS += -lc
 endif
 
@@ -104,8 +106,11 @@ OBJS := $(filter-out daytime.o, $(OBJS))
 OBJS := $(filter-out daytime.o, $(OBJS))
 endif
 
-$(TARGET): links $(OBJS) arch_lib
-       $(LD) -r $(LDFLAGS) $(HEAD_OBJ) $(OBJS) $(LDARCHLIB) $(LDLIBS) -o $@.o
+app.o: $(APP_OBJS) app.lds
+       $(LD) -r -d $(LDFLAGS) $^ $(APP_LDLIBS) --undefined main -o $@
+
+$(TARGET): links $(OBJS) app.o arch_lib
+       $(LD) -r $(LDFLAGS) $(HEAD_OBJ) app.o $(OBJS) $(LDARCHLIB) $(LDLIBS) -o 
$@.o
        $(OBJCOPY) -w -G $(GLOBAL_PREFIX)* -G _start $@.o $@.o
        $(LD) $(LDFLAGS) $(LDFLAGS_FINAL) $@.o $(EXTRA_OBJS) -o $@
        gzip -f -9 -c $@ >$@.gz
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/app.lds
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/extras/mini-os/app.lds    Thu Apr 24 14:08:29 2008 -0600
@@ -0,0 +1,11 @@
+SECTIONS
+{
+        .app.bss : {
+                __app_bss_start = . ;
+                *(.bss .bss.*)
+                *(COMMON)
+                *(.lbss .lbss.*)
+                *(LARGE_COMMON)
+                __app_bss_end = . ;
+        }
+}
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/arch/ia64/minios-ia64.lds
--- a/extras/mini-os/arch/ia64/minios-ia64.lds  Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/arch/ia64/minios-ia64.lds  Thu Apr 24 14:08:29 2008 -0600
@@ -59,7 +59,10 @@ SECTIONS
   { *(.IA_64.unwind) }
 
   .bss : AT(ADDR(.bss) - (((5<<(61))+0x100000000) - (1 << 20)))
-  { *(.bss) }
+  {
+    *(.bss)
+    *(.app.bss)
+  }
 
   _end = .;
 
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/arch/x86/minios-x86_32.lds
--- a/extras/mini-os/arch/x86/minios-x86_32.lds Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/arch/x86/minios-x86_32.lds Thu Apr 24 14:08:29 2008 -0600
@@ -38,6 +38,7 @@ SECTIONS
   __bss_start = .;             /* BSS */
   .bss : {
        *(.bss)
+        *(.app.bss)
        }
   _end = . ;
 
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/arch/x86/minios-x86_64.lds
--- a/extras/mini-os/arch/x86/minios-x86_64.lds Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/arch/x86/minios-x86_64.lds Thu Apr 24 14:08:29 2008 -0600
@@ -38,6 +38,7 @@ SECTIONS
   __bss_start = .;             /* BSS */
   .bss : {
        *(.bss)
+        *(.app.bss)
        }
   _end = . ;
 
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/arch/x86/mm.c
--- a/extras/mini-os/arch/x86/mm.c      Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/arch/x86/mm.c      Thu Apr 24 14:08:29 2008 -0600
@@ -556,7 +556,6 @@ void *map_frames_ex(unsigned long *f, un
 
 static void clear_bootstrap(void)
 {
-    struct xen_memory_reservation reservation;
     xen_pfn_t mfns[] = { virt_to_mfn(&shared_info) };
     int n = sizeof(mfns)/sizeof(*mfns);
     pte_t nullpte = { };
@@ -567,11 +566,7 @@ static void clear_bootstrap(void)
     if (HYPERVISOR_update_va_mapping((unsigned long) &_text, nullpte, 
UVMF_INVLPG))
        printk("Unable to unmap first page\n");
 
-    set_xen_guest_handle(reservation.extent_start, mfns);
-    reservation.nr_extents = n;
-    reservation.extent_order = 0;
-    reservation.domid = DOMID_SELF;
-    if (HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) != n)
+    if (free_physical_pages(mfns, n) != n)
        printk("Unable to free bootstrap pages\n");
 }
 
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/fbfront.c
--- a/extras/mini-os/fbfront.c  Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/fbfront.c  Thu Apr 24 14:08:29 2008 -0600
@@ -243,12 +243,12 @@ struct fbfront_dev {
     char *backend;
     int request_update;
 
-    char *data;
     int width;
     int height;
     int depth;
-    int line_length;
+    int stride;
     int mem_length;
+    int offset;
 };
 
 void fbfront_handler(evtchn_port_t port, struct pt_regs *regs, void *data)
@@ -256,7 +256,7 @@ void fbfront_handler(evtchn_port_t port,
     wake_up(&fbfront_queue);
 }
 
-struct fbfront_dev *init_fbfront(char *nodename, void *data, int width, int 
height, int depth, int line_length, int mem_length)
+struct fbfront_dev *init_fbfront(char *nodename, unsigned long *mfns, int 
width, int height, int depth, int stride, int n)
 {
     xenbus_transaction_t xbt;
     char* err;
@@ -289,24 +289,17 @@ struct fbfront_dev *init_fbfront(char *n
     dev->width = s->width = width;
     dev->height = s->height = height;
     dev->depth = s->depth = depth;
-    dev->line_length = s->line_length = line_length;
-    dev->mem_length = s->mem_length = mem_length;
-
-    ASSERT(!((unsigned long)data & ~PAGE_MASK));
-    dev->data = data;
+    dev->stride = s->line_length = stride;
+    dev->mem_length = s->mem_length = n * PAGE_SIZE;
+    dev->offset = 0;
 
     const int max_pd = sizeof(s->pd) / sizeof(s->pd[0]);
     unsigned long mapped = 0;
 
-    for (i = 0; mapped < mem_length && i < max_pd; i++) {
+    for (i = 0; mapped < n && i < max_pd; i++) {
         unsigned long *pd = (unsigned long *) alloc_page();
-        for (j = 0; mapped < mem_length && j < PAGE_SIZE / sizeof(unsigned 
long); j++) {
-            /* Trigger CoW */
-            * ((char *)data + mapped) = 0;
-            barrier();
-            pd[j] = virtual_to_mfn((unsigned long) data + mapped);
-            mapped += PAGE_SIZE;
-        }
+        for (j = 0; mapped < n && j < PAGE_SIZE / sizeof(unsigned long); j++)
+            pd[j] = mfns[mapped++];
         for ( ; j < PAGE_SIZE / sizeof(unsigned long); j++)
             pd[j] = 0;
         s->pd[i] = virt_to_mfn(pd);
@@ -395,31 +388,11 @@ done:
     return dev;
 }
 
-void fbfront_update(struct fbfront_dev *dev, int x, int y, int width, int 
height)
+static void fbfront_out_event(struct fbfront_dev *dev, union xenfb_out_event 
*event)
 {
     struct xenfb_page *page = dev->page;
     uint32_t prod;
     DEFINE_WAIT(w);
-
-    if (dev->request_update <= 0)
-        return;
-
-    if (x < 0) {
-        width += x;
-        x = 0;
-    }
-    if (x + width > dev->width)
-        width = dev->width - x;
-
-    if (y < 0) {
-        height += y;
-        y = 0;
-    }
-    if (y + height > dev->height)
-        height = dev->height - y;
-
-    if (width <= 0 || height <= 0)
-        return;
 
     add_waiter(w, fbfront_queue);
     while (page->out_prod - page->out_cons == XENFB_OUT_RING_LEN)
@@ -428,14 +401,55 @@ void fbfront_update(struct fbfront_dev *
 
     prod = page->out_prod;
     mb(); /* ensure ring space available */
-    XENFB_OUT_RING_REF(page, prod).type = XENFB_TYPE_UPDATE;
-    XENFB_OUT_RING_REF(page, prod).update.x = x;
-    XENFB_OUT_RING_REF(page, prod).update.y = y;
-    XENFB_OUT_RING_REF(page, prod).update.width = width;
-    XENFB_OUT_RING_REF(page, prod).update.height = height;
+    XENFB_OUT_RING_REF(page, prod) = *event;
     wmb(); /* ensure ring contents visible */
     page->out_prod = prod + 1;
     notify_remote_via_evtchn(dev->evtchn);
+}
+
+void fbfront_update(struct fbfront_dev *dev, int x, int y, int width, int 
height)
+{
+    struct xenfb_update update;
+
+    if (dev->request_update <= 0)
+        return;
+
+    if (x < 0) {
+        width += x;
+        x = 0;
+    }
+    if (x + width > dev->width)
+        width = dev->width - x;
+
+    if (y < 0) {
+        height += y;
+        y = 0;
+    }
+    if (y + height > dev->height)
+        height = dev->height - y;
+
+    if (width <= 0 || height <= 0)
+        return;
+
+    update.type = XENFB_TYPE_UPDATE;
+    update.x = x;
+    update.y = y;
+    update.width = width;
+    update.height = height;
+    fbfront_out_event(dev, (union xenfb_out_event *) &update);
+}
+
+void fbfront_resize(struct fbfront_dev *dev, int width, int height, int 
stride, int depth, int offset)
+{
+    struct xenfb_resize resize;
+
+    resize.type = XENFB_TYPE_RESIZE;
+    dev->width  = resize.width = width;
+    dev->height = resize.height = height;
+    dev->stride = resize.stride = stride;
+    dev->depth  = resize.depth = depth;
+    dev->offset = resize.offset = offset;
+    fbfront_out_event(dev, (union xenfb_out_event *) &resize);
 }
 
 void shutdown_fbfront(struct fbfront_dev *dev)
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/hypervisor.c
--- a/extras/mini-os/hypervisor.c       Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/hypervisor.c       Thu Apr 24 14:08:29 2008 -0600
@@ -66,6 +66,21 @@ void do_hypervisor_callback(struct pt_re
     in_callback = 0;
 }
 
+void force_evtchn_callback(void)
+{
+    vcpu_info_t *vcpu;
+    vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()];
+    int save = vcpu->evtchn_upcall_mask;
+
+    while (vcpu->evtchn_upcall_pending) {
+        vcpu->evtchn_upcall_mask = 1;
+        barrier();
+        do_hypervisor_callback(NULL);
+        barrier();
+        vcpu->evtchn_upcall_mask = save;
+        barrier();
+    };
+}
 
 inline void mask_evtchn(u32 port)
 {
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/include/fbfront.h
--- a/extras/mini-os/include/fbfront.h  Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/include/fbfront.h  Thu Apr 24 14:08:29 2008 -0600
@@ -31,11 +31,12 @@ void shutdown_kbdfront(struct kbdfront_d
 void shutdown_kbdfront(struct kbdfront_dev *dev);
 
 
-struct fbfront_dev *init_fbfront(char *nodename, void *data, int width, int 
height, int depth, int line_length, int mem_length);
+struct fbfront_dev *init_fbfront(char *nodename, unsigned long *mfns, int 
width, int height, int depth, int stride, int n);
 #ifdef HAVE_LIBC
 int fbfront_open(struct fbfront_dev *dev);
 #endif
 
 void fbfront_update(struct fbfront_dev *dev, int x, int y, int width, int 
height);
+void fbfront_resize(struct fbfront_dev *dev, int width, int height, int 
stride, int depth, int offset);
 
 void shutdown_fbfront(struct fbfront_dev *dev);
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/include/hypervisor.h
--- a/extras/mini-os/include/hypervisor.h       Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/include/hypervisor.h       Thu Apr 24 14:08:29 2008 -0600
@@ -24,6 +24,7 @@
 #else
 #error "Unsupported architecture"
 #endif
+#include <traps.h>
 
 /*
  * a placeholder for the start of day information passed up from the hypervisor
@@ -37,7 +38,8 @@ extern union start_info_union start_info
 #define start_info (start_info_union.start_info)
 
 /* hypervisor.c */
-//void do_hypervisor_callback(struct pt_regs *regs);
+void force_evtchn_callback(void);
+void do_hypervisor_callback(struct pt_regs *regs);
 void mask_evtchn(u32 port);
 void unmask_evtchn(u32 port);
 void clear_evtchn(u32 port);
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/include/ia64/arch_mm.h
--- a/extras/mini-os/include/ia64/arch_mm.h     Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/include/ia64/arch_mm.h     Thu Apr 24 14:08:29 2008 -0600
@@ -38,6 +38,6 @@
 #define map_frames(f, n) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, 0, 0)
 /* TODO */
 #define map_zero(n, a) map_frames_ex(NULL, n, 0, 0, a, DOMID_SELF, 0, 0)
-#define do_map_zero(start, n) ((void)0)
+#define do_map_zero(start, n) ASSERT(n == 0)
 
 #endif /* __ARCH_MM_H__ */
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/include/ia64/os.h
--- a/extras/mini-os/include/ia64/os.h  Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/include/ia64/os.h  Thu Apr 24 14:08:29 2008 -0600
@@ -189,17 +189,6 @@ __synch_cmpxchg(volatile void *ptr, uint
        return ia64_cmpxchg_acq_64(ptr, old, new);
 }
 
-/*
- * Force a proper event-channel callback from Xen after clearing the
- * callback mask. We do this in a very simple manner, by making a call
- * down into Xen. The pending flag will be checked by Xen on return.
- */
-static inline void
-force_evtchn_callback(void)
-{
-       (void)HYPERVISOR_xen_version(0, NULL);
-}
-
 extern shared_info_t *HYPERVISOR_shared_info;
 
 static inline int
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/include/lib.h
--- a/extras/mini-os/include/lib.h      Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/include/lib.h      Thu Apr 24 14:08:29 2008 -0600
@@ -187,6 +187,7 @@ int alloc_fd(enum fd_type type);
 int alloc_fd(enum fd_type type);
 void close_all_files(void);
 extern struct thread *main_thread;
+void sparse(unsigned long data, size_t size);
 #endif
 
 #endif /* _LIB_H_ */
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/include/mm.h
--- a/extras/mini-os/include/mm.h       Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/include/mm.h       Thu Apr 24 14:08:29 2008 -0600
@@ -70,4 +70,6 @@ extern unsigned long heap, brk, heap_map
 extern unsigned long heap, brk, heap_mapped, heap_end;
 #endif
 
+int free_physical_pages(xen_pfn_t *mfns, int n);
+
 #endif /* _MM_H_ */
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/include/x86/os.h
--- a/extras/mini-os/include/x86/os.h   Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/include/x86/os.h   Thu Apr 24 14:08:29 2008 -0600
@@ -28,7 +28,6 @@ extern void do_exit(void) __attribute__(
 #include <xen/xen.h>
 
 
-#define force_evtchn_callback() ((void)HYPERVISOR_xen_version(0, 0))
 
 #define __KERNEL_CS  FLAT_KERNEL_CS
 #define __KERNEL_DS  FLAT_KERNEL_DS
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/kernel.c
--- a/extras/mini-os/kernel.c   Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/kernel.c   Thu Apr 24 14:08:29 2008 -0600
@@ -297,9 +297,20 @@ static void fbfront_thread(void *p)
 {
     size_t line_length = WIDTH * (DEPTH / 8);
     size_t memsize = HEIGHT * line_length;
-
+    unsigned long *mfns;
+    int i, n = (memsize + PAGE_SIZE-1) / PAGE_SIZE;
+
+    memsize = n * PAGE_SIZE;
     fb = _xmalloc(memsize, PAGE_SIZE);
-    fb_dev = init_fbfront(NULL, fb, WIDTH, HEIGHT, DEPTH, line_length, 
memsize);
+    mfns = xmalloc_array(unsigned long, n);
+    for (i = 0; i < n; i++) {
+        /* trigger CoW */
+        ((char *) fb) [i * PAGE_SIZE] = 0;
+        barrier();
+        mfns[i] = virtual_to_mfn((char *) fb + i * PAGE_SIZE);
+    }
+    fb_dev = init_fbfront(NULL, mfns, WIDTH, HEIGHT, DEPTH, line_length, n);
+    xfree(mfns);
     if (!fb_dev) {
         xfree(fb);
         return;
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/lib/sys.c
--- a/extras/mini-os/lib/sys.c  Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/lib/sys.c  Thu Apr 24 14:08:29 2008 -0600
@@ -1108,6 +1108,41 @@ int munmap(void *start, size_t length)
     return 0;
 }
 
+void sparse(unsigned long data, size_t size)
+{
+    unsigned long newdata;
+    xen_pfn_t *mfns;
+    int i, n;
+
+    newdata = (data + PAGE_SIZE - 1) & PAGE_MASK;
+    if (newdata - data > size)
+        return;
+    size -= newdata - data;
+    data = newdata;
+    n = size / PAGE_SIZE;
+    size = n * PAGE_SIZE;
+
+    mfns = malloc(n * sizeof(*mfns));
+    for (i = 0; i < n; i++) {
+#ifdef LIBC_DEBUG
+        int j;
+        for (j=0; j<PAGE_SIZE; j++)
+            if (((char*)data + i * PAGE_SIZE)[j]) {
+                printk("%lx is not zero!\n", data + i * PAGE_SIZE + j);
+                exit(1);
+            }
+#endif
+        mfns[i] = virtual_to_mfn(data + i * PAGE_SIZE);
+    }
+
+    printk("sparsing %ldMB at %lx\n", size >> 20, data);
+
+    munmap((void *) data, size);
+    free_physical_pages(mfns, n);
+    do_map_zero(data, n);
+}
+
+
 /* Not supported by FS yet.  */
 unsupported_function_crash(link);
 unsupported_function(int, readlink, -1);
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/main.c
--- a/extras/mini-os/main.c     Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/main.c     Thu Apr 24 14:08:29 2008 -0600
@@ -39,6 +39,7 @@ void _fini(void)
 {
 }
 
+extern char __app_bss_start, __app_bss_end;
 static void call_main(void *p)
 {
     char *args, /**path,*/ *msg, *c;
@@ -56,6 +57,7 @@ static void call_main(void *p)
      * crashing. */
     //sleep(1);
 
+    sparse((unsigned long) &__app_bss_start, &__app_bss_end - 
&__app_bss_start);
     start_networking();
     init_fs_frontend();
 
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/mm.c
--- a/extras/mini-os/mm.c       Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/mm.c       Thu Apr 24 14:08:29 2008 -0600
@@ -36,6 +36,7 @@
 
 #include <os.h>
 #include <hypervisor.h>
+#include <xen/memory.h>
 #include <mm.h>
 #include <types.h>
 #include <lib.h>
@@ -360,6 +361,17 @@ void free_pages(void *pointer, int order
    
 }
 
+int free_physical_pages(xen_pfn_t *mfns, int n)
+{
+    struct xen_memory_reservation reservation;
+
+    set_xen_guest_handle(reservation.extent_start, mfns);
+    reservation.nr_extents = n;
+    reservation.extent_order = 0;
+    reservation.domid = DOMID_SELF;
+    return HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+}
+
 #ifdef HAVE_LIBC
 void *sbrk(ptrdiff_t increment)
 {
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/sched.c
--- a/extras/mini-os/sched.c    Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/sched.c    Thu Apr 24 14:08:29 2008 -0600
@@ -70,62 +70,15 @@ void inline print_runqueue(void)
     printk("\n");
 }
 
-/* Find the time when the next timeout expires. If this is more than
-   10 seconds from now, return 10 seconds from now. */
-static s_time_t blocking_time(void)
-{
-    struct thread *thread;
-    struct list_head *iterator;
-    s_time_t min_wakeup_time;
-    unsigned long flags;
-    local_irq_save(flags);
-    /* default-block the domain for 10 seconds: */
-    min_wakeup_time = NOW() + SECONDS(10);
-
-    /* Thread list needs to be protected */
-    list_for_each(iterator, &idle_thread->thread_list)
-    {
-        thread = list_entry(iterator, struct thread, thread_list);
-        if(!is_runnable(thread) && thread->wakeup_time != 0LL)
-        {
-            if(thread->wakeup_time < min_wakeup_time)
-            {
-                min_wakeup_time = thread->wakeup_time;
-            }
-        }
-    }
-    local_irq_restore(flags);
-    return(min_wakeup_time);
-}
-
-/* Wake up all threads with expired timeouts. */
-static void wake_expired(void)
-{
-    struct thread *thread;
-    struct list_head *iterator;
-    s_time_t now = NOW();
-    unsigned long flags;
-    local_irq_save(flags);
-    /* Thread list needs to be protected */
-    list_for_each(iterator, &idle_thread->thread_list)
-    {
-        thread = list_entry(iterator, struct thread, thread_list);
-        if(!is_runnable(thread) && thread->wakeup_time != 0LL)
-        {
-            if(thread->wakeup_time <= now)
-                wake(thread);
-        }
-    }
-    local_irq_restore(flags);
-}
-
 void schedule(void)
 {
     struct thread *prev, *next, *thread;
     struct list_head *iterator;
     unsigned long flags;
+
     prev = current;
     local_irq_save(flags); 
+
     if (in_callback) {
         printk("Must not call schedule() from a callback\n");
         BUG();
@@ -134,6 +87,45 @@ void schedule(void)
         printk("Must not call schedule() with IRQs disabled\n");
         BUG();
     }
+
+    do {
+        /* Examine all threads.
+           Find a runnable thread, but also wake up expired ones and find the
+           time when the next timeout expires, else use 10 seconds. */
+        s_time_t now = NOW();
+        s_time_t min_wakeup_time = now + SECONDS(10);
+        next = NULL;   
+        list_for_each(iterator, &idle_thread->thread_list)
+        {
+            thread = list_entry(iterator, struct thread, thread_list);
+            if (!is_runnable(thread) && thread->wakeup_time != 0LL)
+            {
+                if (thread->wakeup_time <= now)
+                    wake(thread);
+                else if (thread->wakeup_time < min_wakeup_time)
+                    min_wakeup_time = thread->wakeup_time;
+            }
+            if(is_runnable(thread)) 
+            {
+                next = thread;
+                /* Put this thread on the end of the list */
+                list_del(&thread->thread_list);
+                list_add_tail(&thread->thread_list, &idle_thread->thread_list);
+                break;
+            }
+        }
+        if (next)
+            break;
+        /* block until the next timeout expires, or for 10 secs, whichever 
comes first */
+        block_domain(min_wakeup_time);
+        /* handle pending events if any */
+        force_evtchn_callback();
+    } while(1);
+    local_irq_restore(flags);
+    /* Interrupting the switch is equivalent to having the next thread
+       inturrupted at the return instruction. And therefore at safe point. */
+    if(prev != next) switch_threads(prev, next);
+
     list_for_each(iterator, &exited_threads)
     {
         thread = list_entry(iterator, struct thread, thread_list);
@@ -144,24 +136,6 @@ void schedule(void)
             xfree(thread);
         }
     }
-    next = idle_thread;   
-    /* Thread list needs to be protected */
-    list_for_each(iterator, &idle_thread->thread_list)
-    {
-        thread = list_entry(iterator, struct thread, thread_list);
-        if(is_runnable(thread)) 
-        {
-            next = thread;
-            /* Put this thread on the end of the list */
-            list_del(&thread->thread_list);
-            list_add_tail(&thread->thread_list, &idle_thread->thread_list);
-            break;
-        }
-    }
-    local_irq_restore(flags);
-    /* Interrupting the switch is equivalent to having the next thread
-       inturrupted at the return instruction. And therefore at safe point. */
-    if(prev != next) switch_threads(prev, next);
 }
 
 struct thread* create_thread(char *name, void (*function)(void *), void *data)
@@ -267,32 +241,10 @@ void wake(struct thread *thread)
 
 void idle_thread_fn(void *unused)
 {
-    s_time_t until;
     threads_started = 1;
-    unsigned long flags;
-    struct list_head *iterator;
-    struct thread *next, *thread;
-    for(;;)
-    {
-        schedule();
-        next = NULL;
-        local_irq_save(flags);
-        list_for_each(iterator, &idle_thread->thread_list)
-        {
-            thread = list_entry(iterator, struct thread, thread_list);
-            if(is_runnable(thread)) 
-            {
-                next = thread;
-                break;
-            }
-        }
-        if (!next) {
-            /* block until the next timeout expires, or for 10 secs, whichever 
comes first */
-            until = blocking_time();
-            block_domain(until);
-        }
-        local_irq_restore(flags);
-        wake_expired();
+    while (1) {
+        block(current);
+        schedule();
     }
 }
 
diff -r 239b44eeb2d6 -r dc510776dd59 tools/blktap/drivers/blktapctrl.c
--- a/tools/blktap/drivers/blktapctrl.c Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/blktap/drivers/blktapctrl.c Thu Apr 24 14:08:29 2008 -0600
@@ -474,9 +474,8 @@ static int read_msg(int fd, int msgtype,
 
 }
 
-int launch_tapdisk(char *wrctldev, char *rdctldev)
-{
-       char *argv[] = { "tapdisk", wrctldev, rdctldev, NULL };
+static int launch_tapdisk_provider(char **argv)
+{
        pid_t child;
        
        if ((child = fork()) < 0)
@@ -490,7 +489,9 @@ int launch_tapdisk(char *wrctldev, char 
                            i != STDERR_FILENO)
                                close(i);
 
-               execvp("tapdisk", argv);
+               execvp(argv[0], argv);
+               DPRINTF("execvp failed: %d (%s)\n", errno, strerror(errno));
+               DPRINTF("PATH = %s\n", getenv("PATH"));
                _exit(1);
        } else {
                pid_t got;
@@ -498,28 +499,78 @@ int launch_tapdisk(char *wrctldev, char 
                        got = waitpid(child, NULL, 0);
                } while (got != child);
        }
+       return child;
+}
+
+static int launch_tapdisk(char *wrctldev, char *rdctldev)
+{
+       char *argv[] = { "tapdisk", wrctldev, rdctldev, NULL };
+
+       if (launch_tapdisk_provider(argv) < 0)
+               return -1;
+
        return 0;
 }
 
-/* Connect to qemu-dm */
-static int connect_qemu(blkif_t *blkif)
+static int launch_tapdisk_ioemu(void)
+{
+       char *argv[] = { "tapdisk-ioemu", NULL };
+       return launch_tapdisk_provider(argv);
+}
+
+/* 
+ * Connect to an ioemu based disk provider (qemu-dm or tapdisk-ioemu)
+ *
+ * If the domain has a device model, connect to qemu-dm through the
+ * domain specific pipe. Otherwise use a single tapdisk-ioemu instance
+ * which is represented by domid 0 and provides access for Dom0 and
+ * all DomUs without device model.
+ */
+static int connect_qemu(blkif_t *blkif, int domid)
 {
        char *rdctldev, *wrctldev;
-       
-       if (asprintf(&rdctldev, BLKTAP_CTRL_DIR "/qemu-read-%d", 
-                       blkif->domid) < 0)
-               return -1;
-
-       if (asprintf(&wrctldev, BLKTAP_CTRL_DIR "/qemu-write-%d", 
-                       blkif->domid) < 0) {
+
+       static int tapdisk_ioemu_pid = 0;
+       static int dom0_readfd = 0;
+       static int dom0_writefd = 0;
+       
+       if (asprintf(&rdctldev, BLKTAP_CTRL_DIR "/qemu-read-%d", domid) < 0)
+               return -1;
+
+       if (asprintf(&wrctldev, BLKTAP_CTRL_DIR "/qemu-write-%d", domid) < 0) {
                free(rdctldev);
                return -1;
        }
 
        DPRINTF("Using qemu blktap pipe: %s\n", rdctldev);
        
-       blkif->fds[READ] = open_ctrl_socket(wrctldev);
-       blkif->fds[WRITE] = open_ctrl_socket(rdctldev);
+       if (domid == 0) {
+               /*
+                * tapdisk-ioemu exits as soon as the last image is 
+                * disconnected. Check if it is still running.
+                */
+               if (tapdisk_ioemu_pid == 0 || kill(tapdisk_ioemu_pid, 0)) {
+                       /* No device model and tapdisk-ioemu doesn't run yet */
+                       DPRINTF("Launching tapdisk-ioemu\n");
+                       tapdisk_ioemu_pid = launch_tapdisk_ioemu();
+                       
+                       dom0_readfd = open_ctrl_socket(wrctldev);
+                       dom0_writefd = open_ctrl_socket(rdctldev);
+               }
+
+               DPRINTF("Using tapdisk-ioemu connection\n");
+               blkif->fds[READ] = dom0_readfd;
+               blkif->fds[WRITE] = dom0_writefd;
+       } else if (access(rdctldev, R_OK | W_OK) == 0) {
+               /* Use existing pipe to the device model */
+               DPRINTF("Using qemu-dm connection\n");
+               blkif->fds[READ] = open_ctrl_socket(wrctldev);
+               blkif->fds[WRITE] = open_ctrl_socket(rdctldev);
+       } else {
+               /* No device model => try with tapdisk-ioemu */
+               DPRINTF("No device model\n");
+               connect_qemu(blkif, 0);
+       }
        
        free(rdctldev);
        free(wrctldev);
@@ -599,7 +650,7 @@ int blktapctrl_new_blkif(blkif_t *blkif)
 
                if (!exist) {
                        if (type == DISK_TYPE_IOEMU) {
-                               if (connect_qemu(blkif))
+                               if (connect_qemu(blkif, blkif->domid))
                                        goto fail;
                        } else {
                                if (connect_tapdisk(blkif, minor))
diff -r 239b44eeb2d6 -r dc510776dd59 tools/blktap/drivers/tapdisk.h
--- a/tools/blktap/drivers/tapdisk.h    Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/blktap/drivers/tapdisk.h    Thu Apr 24 14:08:29 2008 -0600
@@ -235,7 +235,7 @@ static disk_info_t ioemu_disk = {
        DISK_TYPE_IOEMU,
        "ioemu disk",
        "ioemu",
-       0,
+       1,
 #ifdef TAPDISK
        NULL
 #endif
diff -r 239b44eeb2d6 -r dc510776dd59 tools/console/daemon/io.c
--- a/tools/console/daemon/io.c Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/console/daemon/io.c Thu Apr 24 14:08:29 2008 -0600
@@ -63,6 +63,7 @@ extern int log_time_hv;
 extern int log_time_hv;
 extern int log_time_guest;
 extern char *log_dir;
+extern int discard_overflowed_data;
 
 static int log_time_hv_needts = 1;
 static int log_time_guest_needts = 1;
@@ -201,7 +202,7 @@ static void buffer_append(struct domain 
                              dom->domid, errno, strerror(errno));
        }
 
-       if (buffer->max_capacity &&
+       if (discard_overflowed_data && buffer->max_capacity &&
            buffer->size > buffer->max_capacity) {
                /* Discard the middle of the data. */
 
@@ -228,6 +229,11 @@ static void buffer_advance(struct buffer
        if (buffer->consumed == buffer->size) {
                buffer->consumed = 0;
                buffer->size = 0;
+               if (buffer->max_capacity &&
+                   buffer->capacity > buffer->max_capacity) {
+                       buffer->data = realloc(buffer->data, 
buffer->max_capacity);
+                       buffer->capacity = buffer->max_capacity;
+               }
        }
 }
 
@@ -1005,9 +1011,13 @@ void handle_io(void)
                                    d->next_period < next_timeout)
                                        next_timeout = d->next_period;
                        } else if (d->xce_handle != -1) {
-                               int evtchn_fd = xc_evtchn_fd(d->xce_handle);
-                               FD_SET(evtchn_fd, &readfds);
-                               max_fd = MAX(evtchn_fd, max_fd);
+                               if (discard_overflowed_data ||
+                                   !d->buffer.max_capacity ||
+                                   d->buffer.size < d->buffer.max_capacity) {
+                                       int evtchn_fd = 
xc_evtchn_fd(d->xce_handle);
+                                       FD_SET(evtchn_fd, &readfds);
+                                       max_fd = MAX(evtchn_fd, max_fd);
+                               }
                        }
 
                        if (d->master_fd != -1) {
diff -r 239b44eeb2d6 -r dc510776dd59 tools/console/daemon/main.c
--- a/tools/console/daemon/main.c       Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/console/daemon/main.c       Thu Apr 24 14:08:29 2008 -0600
@@ -38,6 +38,7 @@ int log_time_hv = 0;
 int log_time_hv = 0;
 int log_time_guest = 0;
 char *log_dir = NULL;
+int discard_overflowed_data = 1;
 
 static void handle_hup(int sig)
 {
@@ -46,7 +47,7 @@ static void handle_hup(int sig)
 
 static void usage(char *name)
 {
-       printf("Usage: %s [-h] [-V] [-v] [-i] [--log=none|guest|hv|all] 
[--log-dir=DIR] [--pid-file=PATH] [-t, --timestamp=none|guest|hv|all]\n", name);
+       printf("Usage: %s [-h] [-V] [-v] [-i] [--log=none|guest|hv|all] 
[--log-dir=DIR] [--pid-file=PATH] [-t, --timestamp=none|guest|hv|all] [-o, 
--overflow-data=discard|keep]\n", name);
 }
 
 static void version(char *name)
@@ -56,7 +57,7 @@ static void version(char *name)
 
 int main(int argc, char **argv)
 {
-       const char *sopts = "hVvit:";
+       const char *sopts = "hVvit:o:";
        struct option lopts[] = {
                { "help", 0, 0, 'h' },
                { "version", 0, 0, 'V' },
@@ -66,6 +67,7 @@ int main(int argc, char **argv)
                { "log-dir", 1, 0, 'r' },
                { "pid-file", 1, 0, 'p' },
                { "timestamp", 1, 0, 't' },
+               { "overflow-data", 1, 0, 'o'},
                { 0 },
        };
        bool is_interactive = false;
@@ -119,6 +121,13 @@ int main(int argc, char **argv)
                                log_time_hv = 0;
                        }
                        break;
+               case 'o':
+                       if (!strcmp(optarg, "keep")) {
+                               discard_overflowed_data = 0;
+                       } else if (!strcmp(optarg, "discard")) {
+                               discard_overflowed_data = 1;
+                       }
+                       break;
                case '?':
                        fprintf(stderr,
                                "Try `%s --help' for more information\n",
diff -r 239b44eeb2d6 -r dc510776dd59 tools/examples/blktap
--- a/tools/examples/blktap     Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/examples/blktap     Thu Apr 24 14:08:29 2008 -0600
@@ -54,10 +54,6 @@ check_blktap_sharing()
     echo 'ok'
 }
 
-FRONTEND_ID=$(xenstore_read "$XENBUS_PATH/frontend-id")
-FRONTEND_UUID=$(xenstore_read "/local/domain/$FRONTEND_ID/vm")
-mode=$(xenstore_read "$XENBUS_PATH/mode")
-mode=$(canonicalise_mode "$mode")
 
 t=$(xenstore_read_default "$XENBUS_PATH/type" 'MISSING')
 if [ -n "$t" ]
@@ -77,15 +73,21 @@ else
     file="$p"
 fi
 
-if [ "$mode" != '!' ] 
-then
-    result=$(check_blktap_sharing "$file" "$mode")
-    [ "$result" = 'ok' ] || ebusy "$file already in use by other domain"
-fi
-
 if [ "$command" = 'add' ]
 then
     [ -e "$file" ] || { fatal $file does not exist; }
+
+    FRONTEND_ID=$(xenstore_read "$XENBUS_PATH/frontend-id")
+    FRONTEND_UUID=$(xenstore_read "/local/domain/$FRONTEND_ID/vm")
+    mode=$(xenstore_read "$XENBUS_PATH/mode")
+    mode=$(canonicalise_mode "$mode")
+
+    if [ "$mode" != '!' ] 
+    then
+        result=$(check_blktap_sharing "$file" "$mode")
+        [ "$result" = 'ok' ] || ebusy "$file already in use by other domain"
+    fi
+
     success
 fi
 
diff -r 239b44eeb2d6 -r dc510776dd59 tools/firmware/hvmloader/Makefile
--- a/tools/firmware/hvmloader/Makefile Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/firmware/hvmloader/Makefile Thu Apr 24 14:08:29 2008 -0600
@@ -28,8 +28,9 @@ LOADADDR = 0x100000
 
 CFLAGS += $(CFLAGS_include) -I.
 
-SRCS = hvmloader.c mp_tables.c util.c smbios.c 32bitbios_support.c smp.c
-OBJS = $(patsubst %.c,%.o,$(SRCS))
+SRCS  = hvmloader.c mp_tables.c util.c smbios.c 
+SRCS += 32bitbios_support.c smp.c cacheattr.c
+OBJS  = $(patsubst %.c,%.o,$(SRCS))
 
 .PHONY: all
 all: hvmloader
diff -r 239b44eeb2d6 -r dc510776dd59 tools/firmware/hvmloader/acpi/build.c
--- a/tools/firmware/hvmloader/acpi/build.c     Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/firmware/hvmloader/acpi/build.c     Thu Apr 24 14:08:29 2008 -0600
@@ -84,8 +84,8 @@ static int construct_bios_info_table(uin
 
     bios_info->hpet_present = hpet_exists(ACPI_HPET_ADDRESS);
 
-    bios_info->pci_min = 0xf0000000;
-    bios_info->pci_len = 0x0c000000;
+    bios_info->pci_min = PCI_MEMBASE;
+    bios_info->pci_len = PCI_MEMSIZE;
 
     return align16(sizeof(*bios_info));
 }
diff -r 239b44eeb2d6 -r dc510776dd59 tools/firmware/hvmloader/cacheattr.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/firmware/hvmloader/cacheattr.c      Thu Apr 24 14:08:29 2008 -0600
@@ -0,0 +1,99 @@
+/*
+ * cacheattr.c: MTRR and PAT initialisation.
+ *
+ * Copyright (c) 2008, Citrix Systems, Inc.
+ * 
+ * Authors:
+ *    Keir Fraser <keir.fraser@xxxxxxxxxx>
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include "util.h"
+#include "config.h"
+
+#define MSR_MTRRphysBase(reg) (0x200 + 2 * (reg))
+#define MSR_MTRRphysMask(reg) (0x200 + 2 * (reg) + 1)
+#define MSR_MTRRcap          0x00fe
+#define MSR_MTRRfix64K_00000 0x0250
+#define MSR_MTRRfix16K_80000 0x0258
+#define MSR_MTRRfix16K_A0000 0x0259
+#define MSR_MTRRfix4K_C0000  0x0268
+#define MSR_MTRRfix4K_C8000  0x0269
+#define MSR_MTRRfix4K_D0000  0x026a
+#define MSR_MTRRfix4K_D8000  0x026b
+#define MSR_MTRRfix4K_E0000  0x026c
+#define MSR_MTRRfix4K_E8000  0x026d
+#define MSR_MTRRfix4K_F0000  0x026e
+#define MSR_MTRRfix4K_F8000  0x026f
+#define MSR_PAT              0x0277
+#define MSR_MTRRdefType      0x02ff
+
+void cacheattr_init(void)
+{
+    uint32_t eax, ebx, ecx, edx;
+    uint64_t mtrr_cap, mtrr_def, content, addr_mask;
+    unsigned int i, nr_var_ranges, phys_bits = 36;
+
+    /* Does the CPU support architectural MTRRs? */
+    cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
+    if ( !(edx & (1u << 12)) )
+         return;
+
+    /* Find the physical address size for this CPU. */
+    cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
+    if ( eax >= 0x80000008 )
+    {
+        cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
+        phys_bits = (uint8_t)eax;
+    }
+
+    printf("%u-bit phys ... ", phys_bits);
+
+    addr_mask = ((1ull << phys_bits) - 1) & ~((1ull << 12) - 1);
+    mtrr_cap = rdmsr(MSR_MTRRcap);
+    mtrr_def = (1u << 11) | 6; /* E, default type WB */
+
+    /* Fixed-range MTRRs supported? */
+    if ( mtrr_cap & (1u << 8) )
+    {
+        /* 0x00000-0x9ffff: Write Back (WB) */
+        content = 0x0606060606060606ull;
+        wrmsr(MSR_MTRRfix64K_00000, content);
+        wrmsr(MSR_MTRRfix16K_80000, content);
+        /* 0xa0000-0xbffff: Write Combining (WC) */
+        if ( mtrr_cap & (1u << 10) ) /* WC supported? */
+            content = 0x0101010101010101ull;
+        wrmsr(MSR_MTRRfix16K_A0000, content);
+        /* 0xc0000-0xfffff: Write Back (WB) */
+        content = 0x0606060606060606ull;
+        for ( i = 0; i < 8; i++ )
+            wrmsr(MSR_MTRRfix4K_C0000 + i, content);
+        mtrr_def |= 1u << 10; /* FE */
+        printf("fixed MTRRs ... ");
+    }
+
+    /* Variable-range MTRRs supported? */
+    nr_var_ranges = (uint8_t)mtrr_cap;
+    if ( nr_var_ranges != 0 )
+    {
+        /* A single UC range covering PCI space. */
+        wrmsr(MSR_MTRRphysBase(0), PCI_MEMBASE);
+        wrmsr(MSR_MTRRphysMask(0),
+              ((uint64_t)(int32_t)PCI_MEMBASE & addr_mask) | (1u << 11));
+        printf("var MTRRs ... ");
+    }
+
+    wrmsr(MSR_MTRRdefType, mtrr_def);
+}
diff -r 239b44eeb2d6 -r dc510776dd59 tools/firmware/hvmloader/config.h
--- a/tools/firmware/hvmloader/config.h Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/firmware/hvmloader/config.h Thu Apr 24 14:08:29 2008 -0600
@@ -10,6 +10,9 @@
 
 #define PCI_ISA_DEVFN       0x08    /* dev 1, fn 0 */
 #define PCI_ISA_IRQ_MASK    0x0c20U /* ISA IRQs 5,10,11 are PCI connected */
+
+#define PCI_MEMBASE         0xf0000000
+#define PCI_MEMSIZE         0x0c000000
 
 #define ROMBIOS_SEG            0xF000
 #define ROMBIOS_BEGIN          0x000F0000
diff -r 239b44eeb2d6 -r dc510776dd59 tools/firmware/hvmloader/hvmloader.c
--- a/tools/firmware/hvmloader/hvmloader.c      Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/firmware/hvmloader/hvmloader.c      Thu Apr 24 14:08:29 2008 -0600
@@ -96,6 +96,7 @@ asm (
     "stack:                          \n"
     "    .skip    0x4000             \n"
     "stack_top:                      \n"
+    "    .text                       \n"
     );
 
 void smp_initialise(void);
@@ -158,7 +159,7 @@ static void pci_setup(void)
     struct resource {
         uint32_t base, max;
     } *resource;
-    struct resource mem_resource = { 0xf0000000, 0xfc000000 };
+    struct resource mem_resource = { PCI_MEMBASE, PCI_MEMBASE + PCI_MEMSIZE };
     struct resource io_resource  = { 0xc000, 0x10000 };
 
     /* Create a list of device BARs in descending order of size. */
diff -r 239b44eeb2d6 -r dc510776dd59 tools/firmware/hvmloader/smp.c
--- a/tools/firmware/hvmloader/smp.c    Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/firmware/hvmloader/smp.c    Thu Apr 24 14:08:29 2008 -0600
@@ -66,12 +66,15 @@ asm (
     "stack:                          \n"
     "    .skip    0x4000             \n"
     "stack_top:                      \n"
+    "    .text                       \n"
     );
+
+extern void cacheattr_init(void);
 
 /*static*/ void ap_start(void)
 {
     printf(" - CPU%d ... ", ap_cpuid);
-
+    cacheattr_init();
     printf("done.\n");
     wmb();
     ap_callin = 1;
@@ -121,12 +124,10 @@ void smp_initialise(void)
 {
     unsigned int i, nr_cpus = get_vcpu_nr();
 
-    if ( nr_cpus <= 1 )
-        return;
-
     memcpy((void *)AP_BOOT_EIP, ap_boot_start, ap_boot_end - ap_boot_start);
 
     printf("Multiprocessor initialisation:\n");
+    ap_start();
     for ( i = 1; i < nr_cpus; i++ )
         boot_cpu(i);
 }
diff -r 239b44eeb2d6 -r dc510776dd59 tools/ioemu/Makefile
--- a/tools/ioemu/Makefile      Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/ioemu/Makefile      Thu Apr 24 14:08:29 2008 -0600
@@ -87,7 +87,7 @@ endif
 
 install: all $(if $(BUILD_DOCS),install-doc)
        mkdir -p "$(DESTDIR)$(bindir)"
-       $(INSTALL) -m 755 -s $(TOOLS) "$(DESTDIR)$(prefix)/sbin"
+       $(INSTALL) -m 755 $(TOOLS) "$(DESTDIR)$(SBINDIR)"
 #      mkdir -p "$(DESTDIR)$(datadir)"
 #      for x in bios.bin vgabios.bin vgabios-cirrus.bin ppc_rom.bin \
 #              video.x openbios-sparc32 linux_boot.bin pxe-ne2k_pci.bin \
diff -r 239b44eeb2d6 -r dc510776dd59 tools/ioemu/hw/cirrus_vga.c
--- a/tools/ioemu/hw/cirrus_vga.c       Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/ioemu/hw/cirrus_vga.c       Thu Apr 24 14:08:29 2008 -0600
@@ -2595,6 +2595,10 @@ static void *set_vram_mapping(unsigned l
 
     memset(vram_pointer, 0, nr_extents * TARGET_PAGE_SIZE);
 
+#ifdef CONFIG_STUBDOM
+    xenfb_pv_display_start(vram_pointer);
+#endif
+
     free(extent_start);
 
     return vram_pointer;
diff -r 239b44eeb2d6 -r dc510776dd59 tools/ioemu/hw/pci.c
--- a/tools/ioemu/hw/pci.c      Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/ioemu/hw/pci.c      Thu Apr 24 14:08:29 2008 -0600
@@ -79,18 +79,30 @@ int pci_bus_num(PCIBus *s)
 
 void pci_device_save(PCIDevice *s, QEMUFile *f)
 {
-    qemu_put_be32(f, 1); /* PCI device version */
+    uint8_t irq_state = 0;
+    int i;
+    qemu_put_be32(f, 2); /* PCI device version */
     qemu_put_buffer(f, s->config, 256);
+    for (i = 0; i < 4; i++)
+        irq_state |= !!s->irq_state[i] << i;
+    qemu_put_buffer(f, &irq_state, 1);
 }
 
 int pci_device_load(PCIDevice *s, QEMUFile *f)
 {
     uint32_t version_id;
     version_id = qemu_get_be32(f);
-    if (version_id != 1)
+    if (version_id != 1 && version_id != 2)
         return -EINVAL;
     qemu_get_buffer(f, s->config, 256);
     pci_update_mappings(s);
+    if (version_id == 2) {
+        uint8_t irq_state;
+        int i;
+        qemu_get_buffer(f, &irq_state, 1);
+        for (i = 0; i < 4; i++)
+            pci_set_irq(s, i, !!(irq_state >> i));
+    }
     return 0;
 }
 
diff -r 239b44eeb2d6 -r dc510776dd59 tools/ioemu/hw/vga.c
--- a/tools/ioemu/hw/vga.c      Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/ioemu/hw/vga.c      Thu Apr 24 14:08:29 2008 -0600
@@ -2067,8 +2067,8 @@ void vga_common_init(VGAState *s, Displa
                                  & ~(TARGET_PAGE_SIZE - 1));
 
     /* Video RAM must be 128-bit aligned for SSE optimizations later */
-    s->vram_alloc = qemu_malloc(vga_ram_size + 15);
-    s->vram_ptr = (uint8_t *)((long)(s->vram_alloc + 15) & ~15L);
+    /* and page-aligned for PVFB memory sharing */
+    s->vram_ptr = s->vram_alloc = qemu_memalign(TARGET_PAGE_SIZE, 
vga_ram_size);
 
     s->vram_offset = vga_ram_offset;
     s->vram_size = vga_ram_size;
@@ -2210,7 +2210,7 @@ void *vga_update_vram(VGAState *s, void 
     }
 
     if (!vga_ram_base) {
-        vga_ram_base = qemu_malloc(vga_ram_size + TARGET_PAGE_SIZE + 1);
+        vga_ram_base = qemu_memalign(TARGET_PAGE_SIZE, vga_ram_size + 
TARGET_PAGE_SIZE + 1);
         if (!vga_ram_base) {
             fprintf(stderr, "reallocate error\n");
             return NULL;
diff -r 239b44eeb2d6 -r dc510776dd59 tools/ioemu/hw/xen_blktap.c
--- a/tools/ioemu/hw/xen_blktap.c       Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/ioemu/hw/xen_blktap.c       Thu Apr 24 14:08:29 2008 -0600
@@ -581,17 +581,13 @@ static void handle_blktap_ctrlmsg(void* 
  */
 static int open_ctrl_socket(char *devname)
 {
-       int ret;
        int ipc_fd;
 
        if (mkdir(BLKTAP_CTRL_DIR, 0755) == 0)
                DPRINTF("Created %s directory\n", BLKTAP_CTRL_DIR);
 
-       ret = mkfifo(devname,S_IRWXU|S_IRWXG|S_IRWXO);
-       if ( (ret != 0) && (errno != EEXIST) ) {
-               DPRINTF("ERROR: pipe failed (%d)\n", errno);
+       if (access(devname, R_OK | W_OK))
                return -1;
-       }
 
        ipc_fd = open(devname,O_RDWR|O_NONBLOCK);
 
@@ -601,42 +597,6 @@ static int open_ctrl_socket(char *devnam
        }
 
        return ipc_fd;
-}
-
-/**
- * Unmaps all disks and closes their pipes
- */
-void shutdown_blktap(void)
-{
-       fd_list_entry_t *ptr;
-       struct td_state *s;
-       char *devname;
-
-       DPRINTF("Shutdown blktap\n");
-
-       /* Unmap all disks */
-       ptr = fd_start;
-       while (ptr != NULL) {
-               s = ptr->s;
-               unmap_disk(s);
-               close(ptr->tap_fd);
-               ptr = ptr->next;
-       }
-
-       /* Delete control pipes */
-       if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-read-%d", domid) >= 0) {
-               DPRINTF("Delete %s\n", devname);
-               if (unlink(devname))
-                       DPRINTF("Could not delete: %s\n", strerror(errno));
-               free(devname);
-       }
-       
-       if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-write-%d", domid) >= 0) { 
-               DPRINTF("Delete %s\n", devname);
-               if (unlink(devname))
-                       DPRINTF("Could not delete: %s\n", strerror(errno));
-               free(devname);
-       }
 }
 
 /**
@@ -679,8 +639,5 @@ int init_blktap(void)
        /* Attach a handler to the read pipe (called from qemu main loop) */
        qemu_set_fd_handler2(read_fd, NULL, &handle_blktap_ctrlmsg, NULL, NULL);
 
-       /* Register handler to clean up when the domain is destroyed */
-       atexit(&shutdown_blktap);
-
        return 0;
 }
diff -r 239b44eeb2d6 -r dc510776dd59 tools/ioemu/hw/xenfb.c
--- a/tools/ioemu/hw/xenfb.c    Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/ioemu/hw/xenfb.c    Thu Apr 24 14:08:29 2008 -0600
@@ -1235,14 +1235,10 @@ static struct semaphore kbd_sem = __SEMA
 static struct semaphore kbd_sem = __SEMAPHORE_INITIALIZER(kbd_sem, 0);
 static struct kbdfront_dev *kbd_dev;
 static char *kbd_path, *fb_path;
+static void *vga_vram, *nonshared_vram;
+static DisplayState *xenfb_ds;
 
 static unsigned char linux2scancode[KEY_MAX + 1];
-
-#define WIDTH 1024
-#define HEIGHT 768
-#define DEPTH 32
-#define LINESIZE (1280 * (DEPTH / 8))
-#define MEMSIZE (LINESIZE * HEIGHT)
 
 int xenfb_connect_vkbd(const char *path)
 {
@@ -1256,33 +1252,73 @@ int xenfb_connect_vfb(const char *path)
     return 0;
 }
 
-static void xenfb_pv_update(DisplayState *s, int x, int y, int w, int h)
-{
-    struct fbfront_dev *fb_dev = s->opaque;
+static void xenfb_pv_update(DisplayState *ds, int x, int y, int w, int h)
+{
+    struct fbfront_dev *fb_dev = ds->opaque;
+    if (!fb_dev)
+        return;
     fbfront_update(fb_dev, x, y, w, h);
 }
 
-static void xenfb_pv_resize(DisplayState *s, int w, int h, int linesize)
-{
-    struct fbfront_dev *fb_dev = s->opaque;
-    fprintf(stderr,"resize to %dx%d required\n", w, h);
-    s->width = w;
-    s->height = h;
-    /* TODO: send resize event if supported */
-    memset(s->data, 0, MEMSIZE);
-    fbfront_update(fb_dev, 0, 0, WIDTH, HEIGHT);
+static void xenfb_pv_resize(DisplayState *ds, int w, int h, int linesize)
+{
+    struct fbfront_dev *fb_dev = ds->opaque;
+    fprintf(stderr,"resize to %dx%d, %d required\n", w, h, linesize);
+    ds->width = w;
+    ds->height = h;
+    if (!linesize)
+        ds->shared_buf = 0;
+    if (!ds->shared_buf)
+        linesize = w * 4;
+    ds->linesize = linesize;
+    if (!fb_dev)
+        return;
+    if (ds->shared_buf) {
+        ds->data = NULL;
+    } else {
+        ds->data = nonshared_vram;
+        fbfront_resize(fb_dev, w, h, linesize, ds->depth, VGA_RAM_SIZE);
+    }
 }
 
 static void xenfb_pv_colourdepth(DisplayState *ds, int depth)
 {
-    /* TODO: send redepth event if supported */
+    struct fbfront_dev *fb_dev = ds->opaque;
     static int lastdepth = -1;
+    if (!depth) {
+        ds->shared_buf = 0;
+        ds->depth = 32;
+    } else {
+        ds->shared_buf = 1;
+        ds->depth = depth;
+    }
     if (depth != lastdepth) {
         fprintf(stderr,"redepth to %d required\n", depth);
         lastdepth = depth;
+    } else return;
+    if (!fb_dev)
+        return;
+    if (ds->shared_buf) {
+        ds->data = NULL;
+    } else {
+        ds->data = nonshared_vram;
+        fbfront_resize(fb_dev, ds->width, ds->height, ds->linesize, ds->depth, 
VGA_RAM_SIZE);
     }
-    /* We can't redepth for now */
-    ds->depth = DEPTH;
+}
+
+static void xenfb_pv_setdata(DisplayState *ds, void *pixels)
+{
+    struct fbfront_dev *fb_dev = ds->opaque;
+    int offset = pixels - vga_vram;
+    ds->data = pixels;
+    if (!fb_dev)
+        return;
+    fbfront_resize(fb_dev, ds->width, ds->height, ds->linesize, ds->depth, 
offset);
+}
+
+static void xenfb_pv_refresh(DisplayState *ds)
+{
+    vga_hw_update();
 }
 
 static void xenfb_kbd_handler(void *opaque)
@@ -1373,13 +1409,6 @@ static void xenfb_kbd_handler(void *opaq
     }
 }
 
-static void xenfb_pv_refresh(DisplayState *ds)
-{
-    /* always request negociation */
-    ds->depth = -1;
-    vga_hw_update();
-}
-
 static void kbdfront_thread(void *p)
 {
     int scancode, keycode;
@@ -1399,40 +1428,72 @@ static void kbdfront_thread(void *p)
 
 int xenfb_pv_display_init(DisplayState *ds)
 {
-    void *data;
+    if (!fb_path || !kbd_path)
+        return -1;
+
+    create_thread("kbdfront", kbdfront_thread, (void*) kbd_path);
+
+    xenfb_ds = ds;
+
+    ds->data = nonshared_vram = qemu_memalign(PAGE_SIZE, VGA_RAM_SIZE);
+    memset(ds->data, 0, VGA_RAM_SIZE);
+    ds->depth = 32;
+    ds->bgr = 0;
+    ds->width = 640;
+    ds->height = 400;
+    ds->linesize = 640 * 4;
+    ds->dpy_update = xenfb_pv_update;
+    ds->dpy_resize = xenfb_pv_resize;
+    ds->dpy_colourdepth = xenfb_pv_colourdepth;
+    ds->dpy_setdata = xenfb_pv_setdata;
+    ds->dpy_refresh = xenfb_pv_refresh;
+    return 0;
+}
+
+int xenfb_pv_display_start(void *data)
+{
+    DisplayState *ds = xenfb_ds;
     struct fbfront_dev *fb_dev;
     int kbd_fd;
+    int offset = 0;
+    unsigned long *mfns;
+    int n = VGA_RAM_SIZE / PAGE_SIZE;
+    int i;
 
     if (!fb_path || !kbd_path)
-        return -1;
-
-    create_thread("kbdfront", kbdfront_thread, (void*) kbd_path);
-
-    data = qemu_memalign(PAGE_SIZE, VGA_RAM_SIZE);
-    fb_dev = init_fbfront(fb_path, data, WIDTH, HEIGHT, DEPTH, LINESIZE, 
MEMSIZE);
+        return 0;
+
+    vga_vram = data;
+    mfns = malloc(2 * n * sizeof(*mfns));
+    for (i = 0; i < n; i++)
+        mfns[i] = virtual_to_mfn(vga_vram + i * PAGE_SIZE);
+    for (i = 0; i < n; i++)
+        mfns[n + i] = virtual_to_mfn(nonshared_vram + i * PAGE_SIZE);
+
+    fb_dev = init_fbfront(fb_path, mfns, ds->width, ds->height, ds->depth, 
ds->linesize, 2 * n);
+    free(mfns);
     if (!fb_dev) {
         fprintf(stderr,"can't open frame buffer\n");
         exit(1);
     }
     free(fb_path);
 
+    if (ds->shared_buf) {
+        offset = (void*) ds->data - vga_vram;
+    } else {
+        offset = VGA_RAM_SIZE;
+        ds->data = nonshared_vram;
+    }
+    if (offset)
+        fbfront_resize(fb_dev, ds->width, ds->height, ds->linesize, ds->depth, 
offset);
+
     down(&kbd_sem);
     free(kbd_path);
 
     kbd_fd = kbdfront_open(kbd_dev);
     qemu_set_fd_handler(kbd_fd, xenfb_kbd_handler, NULL, ds);
 
-    ds->data = data;
-    ds->linesize = LINESIZE;
-    ds->depth = DEPTH;
-    ds->bgr = 0;
-    ds->width = WIDTH;
-    ds->height = HEIGHT;
-    ds->dpy_update = xenfb_pv_update;
-    ds->dpy_resize = xenfb_pv_resize;
-    ds->dpy_colourdepth = xenfb_pv_colourdepth;
-    ds->dpy_refresh = xenfb_pv_refresh;
-    ds->opaque = fb_dev;
+    xenfb_ds->opaque = fb_dev;
     return 0;
 }
 #endif
diff -r 239b44eeb2d6 -r dc510776dd59 tools/ioemu/tapdisk-ioemu.c
--- a/tools/ioemu/tapdisk-ioemu.c       Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/ioemu/tapdisk-ioemu.c       Thu Apr 24 14:08:29 2008 -0600
@@ -4,6 +4,7 @@
 #include <string.h>
 #include <stdint.h>
 #include <signal.h>
+#include <unistd.h>
 #include <sys/time.h>
 
 #include <assert.h>
@@ -15,6 +16,8 @@ extern void bdrv_init(void);
 
 extern void *qemu_mallocz(size_t size);
 extern void qemu_free(void *ptr);
+
+extern void *fd_start;
 
 int domid = 0;
 FILE* logfile;
@@ -95,12 +98,17 @@ int main(void)
     int max_fd;
     fd_set rfds;
     struct timeval tv;
+    void *old_fd_start = NULL;
 
     logfile = stderr;
     
     bdrv_init();
     qemu_aio_init();
     init_blktap();
+
+    /* Daemonize */
+    if (fork() != 0)
+       exit(0);
    
     /* 
      * Main loop: Pass events to the corrsponding handlers and check for
@@ -137,6 +145,12 @@ int main(void)
             } else 
                 pioh = &ioh->next;
         }
+
+        /* Exit when the last image has been closed */
+        if (old_fd_start != NULL && fd_start == NULL)
+            exit(0);
+
+        old_fd_start = fd_start;
     }
     return 0;
 }
diff -r 239b44eeb2d6 -r dc510776dd59 tools/ioemu/target-i386-dm/helper2.c
--- a/tools/ioemu/target-i386-dm/helper2.c      Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/ioemu/target-i386-dm/helper2.c      Thu Apr 24 14:08:29 2008 -0600
@@ -482,7 +482,7 @@ void cpu_handle_ioreq(void *opaque)
     CPUState *env = opaque;
     ioreq_t *req = cpu_get_ioreq();
 
-    handle_buffered_io(env);
+    __handle_buffered_iopage(env);
     if (req) {
         __handle_ioreq(env, req);
 
diff -r 239b44eeb2d6 -r dc510776dd59 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c  Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/ioemu/vl.c  Thu Apr 24 14:08:29 2008 -0600
@@ -140,9 +140,9 @@
 #define MAX_IOPORTS 65536
 
 const char *bios_dir = CONFIG_QEMU_SHAREDIR;
-void **ioport_opaque;
-IOPortReadFunc *(*ioport_read_table)[MAX_IOPORTS];
-IOPortWriteFunc *(*ioport_write_table)[MAX_IOPORTS];
+void *ioport_opaque[MAX_IOPORTS];
+IOPortReadFunc *ioport_read_table[3][MAX_IOPORTS];
+IOPortWriteFunc *ioport_write_table[3][MAX_IOPORTS];
 /* Note: bs_table[MAX_DISKS] is a dummy block driver if none available
    to store the VM snapshots */
 BlockDriverState *bs_table[MAX_DISKS + MAX_SCSI_DISKS + 1], *fd_table[MAX_FD];
@@ -281,9 +281,6 @@ void default_ioport_writel(void *opaque,
 
 void init_ioports(void)
 {
-    ioport_opaque = calloc(MAX_IOPORTS, sizeof(*ioport_opaque));
-    ioport_read_table = calloc(3 * MAX_IOPORTS, sizeof(**ioport_read_table));
-    ioport_write_table = calloc(3 * MAX_IOPORTS, sizeof(**ioport_write_table));
 }
 
 /* size is the word size in byte */
@@ -6276,12 +6273,6 @@ void qemu_system_powerdown_request(void)
     powerdown_requested = 1;
     if (cpu_single_env)
         cpu_interrupt(cpu_single_env, CPU_INTERRUPT_EXIT);
-}
-
-static void qemu_sighup_handler(int signal)
-{
-    fprintf(stderr, "Received SIGHUP, terminating.\n");
-    exit(0);
 }
 
 void main_loop_wait(int timeout)
@@ -7979,7 +7970,7 @@ int main(int argc, char **argv)
 
 #ifndef CONFIG_STUBDOM
     /* Unblock SIGTERM and SIGHUP, which may have been blocked by the caller */
-    signal(SIGHUP, qemu_sighup_handler);
+    signal(SIGHUP, SIG_DFL);
     sigemptyset(&set);
     sigaddset(&set, SIGTERM);
     sigaddset(&set, SIGHUP);
diff -r 239b44eeb2d6 -r dc510776dd59 tools/ioemu/vl.h
--- a/tools/ioemu/vl.h  Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/ioemu/vl.h  Thu Apr 24 14:08:29 2008 -0600
@@ -1545,6 +1545,7 @@ char *xenstore_vm_read(int domid, char *
 
 /* xenfb.c */
 int xenfb_pv_display_init(DisplayState *ds);
+int xenfb_pv_display_start(void *vram_start);
 int xenfb_connect_vkbd(const char *path);
 int xenfb_connect_vfb(const char *path);
 
diff -r 239b44eeb2d6 -r dc510776dd59 tools/libfsimage/ext2fs/fsys_ext2fs.c
--- a/tools/libfsimage/ext2fs/fsys_ext2fs.c     Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/libfsimage/ext2fs/fsys_ext2fs.c     Thu Apr 24 14:08:29 2008 -0600
@@ -77,7 +77,52 @@ struct ext2_super_block
     __u32 s_rev_level;         /* Revision level */
     __u16 s_def_resuid;                /* Default uid for reserved blocks */
     __u16 s_def_resgid;                /* Default gid for reserved blocks */
-    __u32 s_reserved[235];     /* Padding to the end of the block */
+    /*
+     * These fields are for EXT2_DYNAMIC_REV superblocks only.
+     *
+     * Note: the difference between the compatible feature set and
+     * the incompatible feature set is that if there is a bit set
+     * in the incompatible feature set that the kernel doesn't
+     * know about, it should refuse to mount the filesystem.
+     *
+     * e2fsck's requirements are more strict; if it doesn't know
+     * about a feature in either the compatible or incompatible
+     * feature set, it must abort and not try to meddle with
+     * things it doesn't understand...
+     */
+    __u32 s_first_ino;         /* First non-reserved inode */
+    __u16 s_inode_size;                /* size of inode structure */
+    __u16 s_block_group_nr;    /* block group # of this superblock */
+    __u32 s_feature_compat;    /* compatible feature set */
+    __u32 s_feature_incompat;  /* incompatible feature set */
+    __u32 s_feature_ro_compat; /* readonly-compatible feature set */
+    __u8  s_uuid[16];          /* 128-bit uuid for volume */
+    char  s_volume_name[16];   /* volume name */
+    char  s_last_mounted[64];  /* directory where last mounted */
+    __u32 s_algorithm_usage_bitmap; /* For compression */
+    /*
+     * Performance hints.  Directory preallocation should only
+     * happen if the EXT2_FEATURE_COMPAT_DIR_PREALLOC flag is on.
+     */
+    __u8  s_prealloc_blocks;   /* Nr of blocks to try to preallocate*/
+    __u8  s_prealloc_dir_blocks;       /* Nr to preallocate for dirs */
+    __u16 s_reserved_gdt_blocks;/* Per group table for online growth */
+    /*
+     * Journaling support valid if EXT2_FEATURE_COMPAT_HAS_JOURNAL set.
+     */
+    __u8 s_journal_uuid[16];   /* uuid of journal superblock */
+    __u32 s_journal_inum;      /* inode number of journal file */
+    __u32 s_journal_dev;       /* device number of journal file */
+    __u32 s_last_orphan;       /* start of list of inodes to delete */
+    __u32 s_hash_seed[4];      /* HTREE hash seed */
+    __u8  s_def_hash_version;  /* Default hash version to use */
+    __u8  s_jnl_backup_type;   /* Default type of journal backup */
+    __u16 s_reserved_word_pad;
+    __u32 s_default_mount_opts;
+    __u32 s_first_meta_bg;     /* First metablock group */
+    __u32 s_mkfs_time;         /* When the filesystem was created */
+    __u32 s_jnl_blocks[17];    /* Backup of the journal inode */
+    __u32 s_reserved[172];     /* Padding to the end of the block */
   };
 
 struct ext2_group_desc
@@ -216,6 +261,9 @@ struct ext2_dir_entry
 #define EXT2_ADDR_PER_BLOCK(s)          (EXT2_BLOCK_SIZE(s) / sizeof (__u32))
 #define EXT2_ADDR_PER_BLOCK_BITS(s)            (log2(EXT2_ADDR_PER_BLOCK(s)))
 
+#define EXT2_INODE_SIZE(s)             (SUPERBLOCK->s_inode_size)
+#define EXT2_INODES_PER_BLOCK(s)       (EXT2_BLOCK_SIZE(s)/EXT2_INODE_SIZE(s))
+
 /* linux/ext2_fs.h */
 #define EXT2_BLOCK_SIZE_BITS(s)        ((s)->s_log_block_size + 10)
 /* kind of from ext2/super.c */
@@ -537,7 +585,7 @@ ext2fs_dir (fsi_file_t *ffi, char *dirna
       gdp = GROUP_DESC;
       ino_blk = gdp[desc].bg_inode_table +
        (((current_ino - 1) % (SUPERBLOCK->s_inodes_per_group))
-        >> log2 (EXT2_BLOCK_SIZE (SUPERBLOCK) / sizeof (struct ext2_inode)));
+        >> log2 (EXT2_INODES_PER_BLOCK (SUPERBLOCK)));
 #ifdef E2DEBUG
       printf ("inode table fsblock=%d\n", ino_blk);
 #endif /* E2DEBUG */
@@ -549,13 +597,12 @@ ext2fs_dir (fsi_file_t *ffi, char *dirna
       /* reset indirect blocks! */
       mapblock2 = mapblock1 = -1;
 
-      raw_inode = INODE +
-       ((current_ino - 1)
-        & (EXT2_BLOCK_SIZE (SUPERBLOCK) / sizeof (struct ext2_inode) - 1));
+      raw_inode = (struct ext2_inode *)((char *)INODE +
+       ((current_ino - 1) & (EXT2_INODES_PER_BLOCK (SUPERBLOCK) - 1)) *
+       EXT2_INODE_SIZE (SUPERBLOCK));
 #ifdef E2DEBUG
       printf ("ipb=%d, sizeof(inode)=%d\n",
-             (EXT2_BLOCK_SIZE (SUPERBLOCK) / sizeof (struct ext2_inode)),
-             sizeof (struct ext2_inode));
+             EXT2_INODES_PER_BLOCK (SUPERBLOCK), EXT2_INODE_SIZE (SUPERBLOCK));
       printf ("inode=%x, raw_inode=%x\n", INODE, raw_inode);
       printf ("offset into inode table block=%d\n", (int) raw_inode - (int) 
INODE);
       for (i = (unsigned char *) INODE; i <= (unsigned char *) raw_inode;
diff -r 239b44eeb2d6 -r dc510776dd59 tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c        Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/libxc/xc_hvm_build.c        Thu Apr 24 14:08:29 2008 -0600
@@ -298,7 +298,7 @@ static int setup_guest(int xc_handle,
                        _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
     munmap(ident_pt, PAGE_SIZE);
     xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IDENT_PT,
-                     special_page_nr + SPECIALPAGE_IDENT_PT);
+                     (special_page_nr + SPECIALPAGE_IDENT_PT) << PAGE_SHIFT);
 
     /* Insert JMP <rel32> instruction at address 0x0 to reach entry point. */
     entry_eip = elf_uval(&elf, elf.ehdr, e_entry);
diff -r 239b44eeb2d6 -r dc510776dd59 tools/python/xen/util/acmpolicy.py
--- a/tools/python/xen/util/acmpolicy.py        Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/python/xen/util/acmpolicy.py        Thu Apr 24 14:08:29 2008 -0600
@@ -17,6 +17,7 @@
 #============================================================================
 
 import os
+import sha
 import stat
 import array
 import struct
@@ -35,7 +36,7 @@ ACM_POLICIES_DIR = security.policy_dir_p
 
 # Constants needed for generating a binary policy from its XML
 # representation
-ACM_POLICY_VERSION = 3  # Latest one
+ACM_POLICY_VERSION = 4  # Latest one
 ACM_CHWALL_VERSION = 1
 
 ACM_STE_VERSION = 1
@@ -965,6 +966,10 @@ class ACMPolicy(XSPolicy):
             return dom.toxml()
         return None
 
+    def hash(self):
+        """ Calculate a SAH1 hash of the XML policy """
+        return sha.sha(self.toxml())
+
     def save(self):
         ### Save the XML policy into a file ###
         rc = -xsconstants.XSERR_FILE_ERROR
@@ -1403,7 +1408,7 @@ class ACMPolicy(XSPolicy):
             ste_bin += "\x00"
 
         #Write binary header:
-        headerformat="!iiiiiiiiii"
+        headerformat="!iiiiiiiiii20s"
         totallen_bin = struct.calcsize(headerformat) + \
                        len(pr_bin) + len(chw_bin) + len(ste_bin)
         polref_offset = struct.calcsize(headerformat)
@@ -1425,7 +1430,8 @@ class ACMPolicy(XSPolicy):
                               primpoloffset,
                               secpolcode,
                               secpoloffset,
-                              major, minor)
+                              major, minor,
+                              self.hash().digest())
 
         all_bin = array.array('B')
         for s in [ hdr_bin, pr_bin, chw_bin, ste_bin ]:
@@ -1443,6 +1449,21 @@ class ACMPolicy(XSPolicy):
             rc = -xsconstants.XSERR_BAD_LABEL
         return rc, mapfile, all_bin.tostring()
 
+    def validate_enforced_policy_hash(self):
+        """ verify that the policy hash embedded in the binary policy
+            that is currently enforce matches the one of the XML policy.
+        """
+        if self.hash().digest() != self.get_enforced_policy_hash():
+            raise Exception('Policy hashes do not match')
+
+    def get_enforced_policy_hash(self):
+        binpol = self.get_enforced_binary()
+        headerformat="!iiiiiiiiii20s"
+        res = struct.unpack(headerformat, binpol[:60])
+        if len(res) >= 11:
+            return res[10]
+        return None
+
     def get_enforced_binary(self):
         rc, binpol = security.hv_get_policy()
         if rc != 0:
diff -r 239b44eeb2d6 -r dc510776dd59 tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/python/xen/xend/XendDomain.py       Thu Apr 24 14:08:29 2008 -0600
@@ -1622,7 +1622,31 @@ class XendDomain:
                                           vcpu)
         except Exception, ex:
             raise XendError(str(ex))
- 
+
+    def domain_reset(self, domid):
+        """Terminate domain immediately, and then create domain.
+
+        @param domid: Domain ID or Name
+        @type domid: int or string.
+        @rtype: None
+        @raise XendError: Failed to destroy or create
+        @raise XendInvalidDomain: Domain is not valid
+        """
+
+        dominfo = self.domain_lookup_nr(domid)
+        if not dominfo:
+            raise XendInvalidDomain(str(domid))
+        if dominfo and dominfo.getDomid() == DOM0_ID:
+            raise XendError("Cannot reset privileged domain %s" % domid)
+        if dominfo._stateGet() not in (DOM_STATE_RUNNING, DOM_STATE_PAUSED):
+            raise VMBadState("Domain '%s' is not started" % domid,
+                             POWER_STATE_NAMES[DOM_STATE_RUNNING],
+                             POWER_STATE_NAMES[dominfo._stateGet()])
+        try:
+            dominfo.resetDomain()
+        except Exception, ex:
+            raise XendError(str(ex))
+
 
 def instance():
     """Singleton constructor. Use this instead of the class constructor.
diff -r 239b44eeb2d6 -r dc510776dd59 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/python/xen/xend/XendDomainInfo.py   Thu Apr 24 14:08:29 2008 -0600
@@ -1837,6 +1837,9 @@ class XendDomainInfo:
 
         @raise: VmError for invalid devices
         """
+        if self.image:
+            self.image.prepareEnvironment()
+
         ordered_refs = self.info.ordered_device_refs()
         for dev_uuid in ordered_refs:
             devclass, config = self.info['devices'][dev_uuid]
@@ -2323,6 +2326,34 @@ class XendDomainInfo:
         self._cleanup_phantom_devs(paths)
 
 
+    def resetDomain(self):
+        log.debug("XendDomainInfo.resetDomain(%s)", str(self.domid))
+
+        old_domid = self.domid
+        prev_vm_xend = self._listRecursiveVm('xend')
+        new_dom_info = self.info
+        try:
+            self._unwatchVm()
+            self.destroy()
+
+            new_dom = None
+            try:
+                from xen.xend import XendDomain
+                new_dom_info['domid'] = None
+                new_dom = XendDomain.instance().domain_create_from_dict(
+                    new_dom_info)
+                for x in prev_vm_xend[0][1]:
+                    new_dom._writeVm('xend/%s' % x[0], x[1])
+                new_dom.waitForDevices()
+                new_dom.unpause()
+            except:
+                if new_dom:
+                    new_dom.destroy()
+                raise
+        except:
+            log.exception('Failed to reset domain %s.', str(old_domid))
+
+
     def resumeDomain(self):
         log.debug("XendDomainInfo.resumeDomain(%s)", str(self.domid))
 
diff -r 239b44eeb2d6 -r dc510776dd59 tools/python/xen/xend/XendXSPolicyAdmin.py
--- a/tools/python/xen/xend/XendXSPolicyAdmin.py        Thu Apr 24 14:02:16 
2008 -0600
+++ b/tools/python/xen/xend/XendXSPolicyAdmin.py        Thu Apr 24 14:08:29 
2008 -0600
@@ -54,6 +54,7 @@ class XSPolicyAdmin:
         try:
             self.xsobjs[ref] = ACMPolicy(name=act_pol_name, ref=ref)
             self.policies[ref] = (act_pol_name, xsconstants.ACM_POLICY_ID)
+            self.xsobjs[ref].validate_enforced_policy_hash()
         except Exception, e:
             log.error("Could not find XML representation of policy '%s': "
                       "%s" % (act_pol_name,e))
diff -r 239b44eeb2d6 -r dc510776dd59 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/python/xen/xend/image.py    Thu Apr 24 14:08:29 2008 -0600
@@ -184,6 +184,42 @@ class ImageHandler:
     def buildDomain(self):
         """Build the domain. Define in subclass."""
         raise NotImplementedError()
+
+    def prepareEnvironment(self):
+        """Prepare the environment for the execution of the domain. This
+        method is called before any devices are set up."""
+        
+        domid = self.vm.getDomid()
+       
+        # Delete left-over pipes
+        try:
+            os.unlink('/var/run/tap/qemu-read-%d' % domid)
+            os.unlink('/var/run/tap/qemu-write-%d' % domid)
+        except:
+            pass
+
+        # No device model, don't create pipes
+        if self.device_model is None:
+            return
+
+        # If we use a device model, the pipes for communication between
+        # blktapctrl and ioemu must be present before the devices are 
+        # created (blktapctrl must access them for new block devices)
+
+        # mkdir throws an exception if the path already exists
+        try:
+            os.mkdir('/var/run/tap', 0755)
+        except:
+            pass
+
+        try:
+            os.mkfifo('/var/run/tap/qemu-read-%d' % domid, 0600)
+            os.mkfifo('/var/run/tap/qemu-write-%d' % domid, 0600)
+        except OSError, e:
+            log.warn('Could not create blktap pipes for domain %d' % domid)
+            log.exception(e)
+            pass
+
 
     # Return a list of cmd line args to the device models based on the
     # xm config file
@@ -411,6 +447,12 @@ class ImageHandler:
             self.pid = None
             state = xstransact.Remove("/local/domain/0/device-model/%i"
                                       % self.vm.getDomid())
+            
+            try:
+                os.unlink('/var/run/tap/qemu-read-%d' % self.vm.getDomid())
+                os.unlink('/var/run/tap/qemu-write-%d' % self.vm.getDomid())
+            except:
+                pass
 
 
 class LinuxImageHandler(ImageHandler):
@@ -643,7 +685,9 @@ class IA64_HVM_ImageHandler(HVMImageHand
         # ROM size for guest firmware, io page, xenstore page
         # buffer io page, buffer pio page and memmap info page
         extra_pages = 1024 + 5
-        return mem_kb + extra_pages * page_kb
+        mem_kb += extra_pages * page_kb
+        # Add 8 MiB overhead for QEMU's video RAM.
+        return mem_kb + 8192
 
     def getRequiredInitialReservation(self):
         return self.vm.getMemoryTarget()
diff -r 239b44eeb2d6 -r dc510776dd59 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/python/xen/xm/main.py       Thu Apr 24 14:08:29 2008 -0600
@@ -107,6 +107,7 @@ SUBCOMMAND_HELP = {
                      'Migrate a domain to another machine.'),
     'pause'       : ('<Domain>', 'Pause execution of a domain.'),
     'reboot'      : ('<Domain> [-wa]', 'Reboot a domain.'),
+    'reset'       : ('<Domain>', 'Reset a domain.'),
     'restore'     : ('<CheckpointFile> [-p]',
                      'Restore a domain from a saved state.'),
     'save'        : ('[-c] <Domain> <CheckpointFile>',
@@ -274,6 +275,7 @@ common_commands = [
     "migrate",
     "pause",
     "reboot",
+    "reset",
     "restore",
     "resume",
     "save",
@@ -303,6 +305,7 @@ domain_commands = [
     "pause",
     "reboot",
     "rename",
+    "reset",
     "restore",
     "resume",
     "save",
@@ -1247,6 +1250,13 @@ def xm_shutdown(args):
     arg_check(args, "shutdown", 1, 4)
     from xen.xm import shutdown
     shutdown.main(["shutdown"] + args)
+
+def xm_reset(args):
+    arg_check(args, "reset", 1)
+    dom = args[0]
+
+    # TODO: XenAPI
+    server.xend.domain.reset(dom)
 
 def xm_pause(args):
     arg_check(args, "pause", 1)
@@ -2474,6 +2484,7 @@ commands = {
     "dump-core": xm_dump_core,
     "reboot": xm_reboot,
     "rename": xm_rename,
+    "reset": xm_reset,
     "restore": xm_restore,
     "resume": xm_resume,
     "save": xm_save,
diff -r 239b44eeb2d6 -r dc510776dd59 tools/tests/test_x86_emulator.c
--- a/tools/tests/test_x86_emulator.c   Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/tests/test_x86_emulator.c   Thu Apr 24 14:08:29 2008 -0600
@@ -26,14 +26,8 @@ static int read(
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
-    unsigned long addr = offset;
-    switch ( bytes )
-    {
-    case 1: *val = *(uint8_t *)addr; break;
-    case 2: *val = *(uint16_t *)addr; break;
-    case 4: *val = *(uint32_t *)addr; break;
-    case 8: *val = *(unsigned long *)addr; break;
-    }
+    *val = 0;
+    memcpy(val, (void *)offset, bytes);
     return X86EMUL_OKAY;
 }
 
@@ -44,48 +38,19 @@ static int write(
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
-    unsigned long addr = offset;
-    switch ( bytes )
-    {
-    case 1: *(uint8_t *)addr = (uint8_t)val; break;
-    case 2: *(uint16_t *)addr = (uint16_t)val; break;
-    case 4: *(uint32_t *)addr = (uint32_t)val; break;
-    case 8: *(unsigned long *)addr = val; break;
-    }
+    memcpy((void *)offset, &val, bytes);
     return X86EMUL_OKAY;
 }
 
 static int cmpxchg(
     unsigned int seg,
     unsigned long offset,
-    unsigned long old,
-    unsigned long new,
+    void *old,
+    void *new,
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
-    unsigned long addr = offset;
-    switch ( bytes )
-    {
-    case 1: *(uint8_t *)addr = (uint8_t)new; break;
-    case 2: *(uint16_t *)addr = (uint16_t)new; break;
-    case 4: *(uint32_t *)addr = (uint32_t)new; break;
-    case 8: *(unsigned long *)addr = new; break;
-    }
-    return X86EMUL_OKAY;
-}
-
-static int cmpxchg8b(
-    unsigned int seg,
-    unsigned long offset,
-    unsigned long old_lo,
-    unsigned long old_hi,
-    unsigned long new_lo,
-    unsigned long new_hi,
-    struct x86_emulate_ctxt *ctxt)
-{
-    unsigned long addr = offset;
-    ((unsigned long *)addr)[0] = new_lo;
-    ((unsigned long *)addr)[1] = new_hi;
+    memcpy((void *)offset, new, bytes);
     return X86EMUL_OKAY;
 }
 
@@ -94,7 +59,6 @@ static struct x86_emulate_ops emulops = 
     .insn_fetch = read,
     .write      = write,
     .cmpxchg    = cmpxchg,
-    .cmpxchg8b  = cmpxchg8b
 };
 
 int main(int argc, char **argv)
diff -r 239b44eeb2d6 -r dc510776dd59 tools/tests/x86_emulate.c
--- a/tools/tests/x86_emulate.c Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/tests/x86_emulate.c Thu Apr 24 14:08:29 2008 -0600
@@ -4,10 +4,4 @@
 #include <public/xen.h>
 
 #include "x86_emulate/x86_emulate.h"
-
-#define __emulate_fpu_insn(_op)                 \
-do{ rc = X86EMUL_UNHANDLEABLE;                  \
-    goto done;                                  \
-} while (0)
-
 #include "x86_emulate/x86_emulate.c"
diff -r 239b44eeb2d6 -r dc510776dd59 tools/xenmon/xenbaked.c
--- a/tools/xenmon/xenbaked.c   Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/xenmon/xenbaked.c   Thu Apr 24 14:08:29 2008 -0600
@@ -509,14 +509,36 @@ int monitor_tbufs(void)
     {
         for ( i = 0; (i < num) && !interrupted; i++ )
         {
-            while ( meta[i]->cons != meta[i]->prod )
+            unsigned long start_offset, end_offset, cons, prod;
+
+            cons = meta[i]->cons;
+            prod = meta[i]->prod;
+            xen_rmb(); /* read prod, then read item. */
+
+            if ( cons == prod )
+                continue;
+
+            start_offset = cons % data_size;
+            end_offset = prod % data_size;
+
+            if ( start_offset >= end_offset )
             {
-                xen_rmb(); /* read prod, then read item. */
+                while ( start_offset != data_size )
+                {
+                    rec_size = process_record(
+                        i, (struct t_rec *)(data[i] + start_offset));
+                    start_offset += rec_size;
+                }
+                start_offset = 0;
+            }
+            while ( start_offset != end_offset )
+            {
                 rec_size = process_record(
-                    i, (struct t_rec *)(data[i] + meta[i]->cons % data_size));
-                xen_mb(); /* read item, then update cons. */
-                meta[i]->cons += rec_size;
+                    i, (struct t_rec *)(data[i] + start_offset));
+                start_offset += rec_size;
             }
+            xen_mb(); /* read item, then update cons. */
+            meta[i]->cons = prod;
         }
 
        wait_for_event();
diff -r 239b44eeb2d6 -r dc510776dd59 xen/Makefile
--- a/xen/Makefile      Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/Makefile      Thu Apr 24 14:08:29 2008 -0600
@@ -44,6 +44,7 @@ _clean: delete-unfresh-files
        $(MAKE) -f $(BASEDIR)/Rules.mk -C arch/$(TARGET_ARCH) clean
        rm -f include/asm *.o $(TARGET)* *~ core
        rm -f include/asm-*/asm-offsets.h
+       [ -d tools/figlet ] && rm -f .banner*
 
 .PHONY: _distclean
 _distclean: clean
@@ -70,8 +71,14 @@ delete-unfresh-files:
                rm -f include/xen/compile.h; \
        fi
 
+.banner: Makefile
+       $(MAKE) -C tools
+       @tools/figlet/figlet -d tools/figlet Xen $(XEN_FULLVERSION) 2>$@2 >$@1
+       @cat $@1 $@2 >$@
+       @rm -f $@1 $@2
+
 # compile.h contains dynamic build info. Rebuilt on every 'make' invocation.
-include/xen/compile.h: include/xen/compile.h.in
+include/xen/compile.h: include/xen/compile.h.in .banner
        @sed -e 's/@@date@@/$(shell LC_ALL=C date)/g' \
            -e 's/@@time@@/$(shell LC_ALL=C date +%T)/g' \
            -e 's/@@whoami@@/$(USER)/g' \
@@ -83,7 +90,8 @@ include/xen/compile.h: include/xen/compi
            -e 's/@@extraversion@@/$(XEN_EXTRAVERSION)/g' \
            -e 's!@@changeset@@!$(shell ((hg parents --template "{date|date} 
{rev}:{node|short}" >/dev/null && hg parents --template "{date|date} 
{rev}:{node|short}") || echo "unavailable") 2>/dev/null)!g' \
            < include/xen/compile.h.in > $@.new
-       tools/figlet/figlet -d tools/figlet Xen $(XEN_FULLVERSION) >> $@.new
+       @grep \" .banner >> $@.new
+       @grep -v \" .banner
        @mv -f $@.new $@
 
 include/asm-$(TARGET_ARCH)/asm-offsets.h: arch/$(TARGET_ARCH)/asm-offsets.s
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/Makefile     Thu Apr 24 14:08:29 2008 -0600
@@ -52,6 +52,8 @@ obj-y += tboot.o
 
 obj-$(crash_debug) += gdbstub.o
 
+x86_emulate.o: x86_emulate/x86_emulate.c x86_emulate/x86_emulate.h
+
 $(TARGET): $(TARGET)-syms boot/mkelf32
        ./boot/mkelf32 $(TARGET)-syms $(TARGET) 0x100000 \
        `$(NM) -nr $(TARGET)-syms | head -n 1 | sed -e 's/^\([^ ]*\).*/0x\1/'`
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/bitops.c
--- a/xen/arch/x86/bitops.c     Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/bitops.c     Thu Apr 24 14:08:29 2008 -0600
@@ -8,17 +8,18 @@ unsigned int __find_first_bit(
     unsigned long d0, d1, res;
 
     asm volatile (
-        "   xor %%eax,%%eax\n\t" /* also ensures ZF==1 if size==0 */
+        "1: xor %%eax,%%eax\n\t" /* also ensures ZF==1 if size==0 */
         "   repe; scas"__OS"\n\t"
-        "   je 1f\n\t"
+        "   je 2f\n\t"
+        "   bsf -"STR(BITS_PER_LONG/8)"(%2),%0\n\t"
+        "   jz 1b\n\t"
         "   lea -"STR(BITS_PER_LONG/8)"(%2),%2\n\t"
-        "   bsf (%2),%0\n"
-        "1: sub %%ebx,%%edi\n\t"
+        "2: sub %%ebx,%%edi\n\t"
         "   shl $3,%%edi\n\t"
         "   add %%edi,%%eax"
         : "=&a" (res), "=&c" (d0), "=&D" (d1)
-        : "1" ((size + BITS_PER_LONG - 1) / BITS_PER_LONG),
-          "2" (addr), "b" ((int)(long)addr) : "memory" );
+        : "1" (BITS_TO_LONGS(size)), "2" (addr), "b" ((int)(long)addr)
+        : "memory" );
 
     return res;
 }
@@ -34,8 +35,7 @@ unsigned int __find_next_bit(
     if ( bit != 0 )
     {
         /* Look for a bit in the first word. */
-        asm ( "bsf %1,%%"__OP"ax"
-              : "=a" (set) : "r" (*p >> bit), "0" (BITS_PER_LONG) );
+        set = __scanbit(*p >> bit, BITS_PER_LONG - bit);
         if ( set < (BITS_PER_LONG - bit) )
             return (offset + set);
         offset += BITS_PER_LONG - bit;
@@ -56,18 +56,20 @@ unsigned int __find_first_zero_bit(
     unsigned long d0, d1, d2, res;
 
     asm volatile (
+        "1: xor %%eax,%%eax ; not %3\n\t" /* rAX == ~0ul */
         "   xor %%edx,%%edx\n\t" /* also ensures ZF==1 if size==0 */
         "   repe; scas"__OS"\n\t"
-        "   je 1f\n\t"
+        "   je 2f\n\t"
+        "   xor -"STR(BITS_PER_LONG/8)"(%2),%3\n\t"
+        "   jz 1b\n\t"
+        "   bsf %3,%0\n\t"
         "   lea -"STR(BITS_PER_LONG/8)"(%2),%2\n\t"
-        "   xor (%2),%3\n\t"
-        "   bsf %3,%0\n"
-        "1: sub %%ebx,%%edi\n\t"
+        "2: sub %%ebx,%%edi\n\t"
         "   shl $3,%%edi\n\t"
         "   add %%edi,%%edx"
         : "=&d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
-        : "1" ((size + BITS_PER_LONG - 1) / BITS_PER_LONG),
-          "2" (addr), "b" ((int)(long)addr), "3" (-1L) : "memory" );
+        : "1" (BITS_TO_LONGS(size)), "2" (addr), "b" ((int)(long)addr)
+        : "memory" );
 
     return res;
 }
@@ -83,7 +85,7 @@ unsigned int __find_next_zero_bit(
     if ( bit != 0 )
     {
         /* Look for zero in the first word. */
-        asm ( "bsf %1,%%"__OP"ax" : "=a" (set) : "r" (~(*p >> bit)) );
+        set = __scanbit(~(*p >> bit), BITS_PER_LONG - bit);
         if ( set < (BITS_PER_LONG - bit) )
             return (offset + set);
         offset += BITS_PER_LONG - bit;
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/cpu/mtrr/main.c
--- a/xen/arch/x86/cpu/mtrr/main.c      Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/cpu/mtrr/main.c      Thu Apr 24 14:08:29 2008 -0600
@@ -586,8 +586,6 @@ struct mtrr_value {
        unsigned long   lsize;
 };
 
-extern void global_init_mtrr_pat(void);
-
 /**
  * mtrr_bp_init - initialize mtrrs on the boot CPU
  *
@@ -654,11 +652,8 @@ void __init mtrr_bp_init(void)
        if (mtrr_if) {
                set_num_var_ranges();
                init_table();
-               if (use_intel()) {
+               if (use_intel())
                        get_mtrr_state();
-                       /* initialize some global data for MTRR/PAT 
virutalization */
-                       global_init_mtrr_pat();
-               }
        }
 }
 
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/domain.c     Thu Apr 24 14:08:29 2008 -0600
@@ -521,10 +521,10 @@ int arch_domain_create(struct domain *d,
         clear_page(d->shared_info);
         share_xen_page_with_guest(
             virt_to_page(d->shared_info), d, XENSHARE_writable);
-    }
-
-    if ( (rc = iommu_domain_init(d)) != 0 )
-        goto fail;
+
+        if ( (rc = iommu_domain_init(d)) != 0 )
+            goto fail;
+    }
 
     if ( is_hvm_domain(d) )
     {
@@ -562,7 +562,8 @@ void arch_domain_destroy(struct domain *
     if ( is_hvm_domain(d) )
         hvm_domain_destroy(d);
 
-    iommu_domain_destroy(d);
+    if ( !is_idle_domain(d) )
+        iommu_domain_destroy(d);
 
     paging_final_teardown(d);
 
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/domain_build.c       Thu Apr 24 14:08:29 2008 -0600
@@ -957,8 +957,8 @@ int __init construct_dom0(
     rc |= ioports_deny_access(dom0, 0x40, 0x43);
     /* PIT Channel 2 / PC Speaker Control. */
     rc |= ioports_deny_access(dom0, 0x61, 0x61);
-    /* PCI configuration spaces. */
-    rc |= ioports_deny_access(dom0, 0xcf8, 0xcff);
+    /* PCI configuration space (NB. 0xcf8 has special treatment). */
+    rc |= ioports_deny_access(dom0, 0xcfc, 0xcff);
     /* Command-line I/O ranges. */
     process_dom0_ioports_disable();
 
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/emulate.c
--- a/xen/arch/x86/hvm/emulate.c        Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/hvm/emulate.c        Thu Apr 24 14:08:29 2008 -0600
@@ -28,6 +28,33 @@ static int hvmemul_do_io(
     ioreq_t *p = &vio->vp_ioreq;
     int rc;
 
+    /* Only retrieve the value from singleton (non-REP) reads. */
+    ASSERT((val == NULL) || ((dir == IOREQ_READ) && !value_is_ptr));
+
+    if ( is_mmio && !value_is_ptr )
+    {
+        /* Part of a multi-cycle read or write? */
+        if ( dir == IOREQ_WRITE )
+        {
+            paddr_t pa = curr->arch.hvm_vcpu.mmio_large_write_pa;
+            unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_write_bytes;
+            if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) )
+                return X86EMUL_OKAY;
+        }
+        else
+        {
+            paddr_t pa = curr->arch.hvm_vcpu.mmio_large_read_pa;
+            unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_read_bytes;
+            if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) )
+            {
+                *val = 0;
+                memcpy(val, &curr->arch.hvm_vcpu.mmio_large_read[addr - pa],
+                       size);
+                return X86EMUL_OKAY;
+            }
+        }
+    }
+
     switch ( curr->arch.hvm_vcpu.io_state )
     {
     case HVMIO_none:
@@ -36,8 +63,13 @@ static int hvmemul_do_io(
         curr->arch.hvm_vcpu.io_state = HVMIO_none;
         if ( val == NULL )
             return X86EMUL_UNHANDLEABLE;
-        *val = curr->arch.hvm_vcpu.io_data;
-        return X86EMUL_OKAY;
+        goto finish_access;
+    case HVMIO_dispatched:
+        /* May have to wait for previous cycle of a multi-write to complete. */
+        if ( is_mmio && !value_is_ptr && (dir == IOREQ_WRITE) &&
+             (addr == (curr->arch.hvm_vcpu.mmio_large_write_pa +
+                       curr->arch.hvm_vcpu.mmio_large_write_bytes)) )
+            return X86EMUL_RETRY;
     default:
         return X86EMUL_UNHANDLEABLE;
     }
@@ -80,8 +112,6 @@ static int hvmemul_do_io(
         *reps = p->count;
         p->state = STATE_IORESP_READY;
         hvm_io_assist();
-        if ( val != NULL )
-            *val = curr->arch.hvm_vcpu.io_data;
         curr->arch.hvm_vcpu.io_state = HVMIO_none;
         break;
     case X86EMUL_UNHANDLEABLE:
@@ -92,7 +122,43 @@ static int hvmemul_do_io(
         BUG();
     }
 
-    return rc;
+    if ( rc != X86EMUL_OKAY )
+        return rc;
+
+ finish_access:
+    if ( val != NULL )
+        *val = curr->arch.hvm_vcpu.io_data;
+
+    if ( is_mmio && !value_is_ptr )
+    {
+        /* Part of a multi-cycle read or write? */
+        if ( dir == IOREQ_WRITE )
+        {
+            paddr_t pa = curr->arch.hvm_vcpu.mmio_large_write_pa;
+            unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_write_bytes;
+            if ( bytes == 0 )
+                pa = curr->arch.hvm_vcpu.mmio_large_write_pa = addr;
+            if ( addr == (pa + bytes) )
+                curr->arch.hvm_vcpu.mmio_large_write_bytes += size;
+        }
+        else
+        {
+            paddr_t pa = curr->arch.hvm_vcpu.mmio_large_read_pa;
+            unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_read_bytes;
+            if ( bytes == 0 )
+                pa = curr->arch.hvm_vcpu.mmio_large_read_pa = addr;
+            if ( (addr == (pa + bytes)) &&
+                 ((bytes + size) <
+                  sizeof(curr->arch.hvm_vcpu.mmio_large_read)) )
+            {
+                memcpy(&curr->arch.hvm_vcpu.mmio_large_read[addr - pa],
+                       val, size);
+                curr->arch.hvm_vcpu.mmio_large_read_bytes += size;
+            }
+        }
+    }
+
+    return X86EMUL_OKAY;
 }
 
 static int hvmemul_do_pio(
@@ -371,11 +437,15 @@ static int hvmemul_cmpxchg(
 static int hvmemul_cmpxchg(
     enum x86_segment seg,
     unsigned long offset,
-    unsigned long old,
-    unsigned long new,
+    void *p_old,
+    void *p_new,
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
+    unsigned long new = 0;
+    if ( bytes > sizeof(new) )
+        return X86EMUL_UNHANDLEABLE;
+    memcpy(&new, p_new, bytes);
     /* Fix this in case the guest is really relying on r-m-w atomicity. */
     return hvmemul_write(seg, offset, new, bytes, ctxt);
 }
@@ -603,7 +673,7 @@ static int hvmemul_read_msr(
 
     _regs.ecx = (uint32_t)reg;
 
-    if ( (rc = hvm_funcs.msr_read_intercept(&_regs)) != 0 )
+    if ( (rc = hvm_msr_read_intercept(&_regs)) != 0 )
         return rc;
 
     *val = ((uint64_t)(uint32_t)_regs.edx << 32) || (uint32_t)_regs.eax;
@@ -621,7 +691,7 @@ static int hvmemul_write_msr(
     _regs.eax = (uint32_t)val;
     _regs.ecx = (uint32_t)reg;
 
-    return hvm_funcs.msr_write_intercept(&_regs);
+    return hvm_msr_write_intercept(&_regs);
 }
 
 static int hvmemul_wbinvd(
@@ -674,11 +744,40 @@ static int hvmemul_inject_sw_interrupt(
     return X86EMUL_OKAY;
 }
 
-static void hvmemul_load_fpu_ctxt(
-    struct x86_emulate_ctxt *ctxt)
-{
-    if ( !current->fpu_dirtied )
+static int hvmemul_get_fpu(
+    void (*exception_callback)(void *, struct cpu_user_regs *),
+    void *exception_callback_arg,
+    enum x86_emulate_fpu_type type,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct vcpu *curr = current;
+
+    switch ( type )
+    {
+    case X86EMUL_FPU_fpu:
+        break;
+    case X86EMUL_FPU_mmx:
+        if ( !cpu_has_mmx )
+            return X86EMUL_UNHANDLEABLE;
+        break;
+    default:
+        return X86EMUL_UNHANDLEABLE;
+    }
+
+    if ( !curr->fpu_dirtied )
         hvm_funcs.fpu_dirty_intercept();
+
+    curr->arch.hvm_vcpu.fpu_exception_callback = exception_callback;
+    curr->arch.hvm_vcpu.fpu_exception_callback_arg = exception_callback_arg;
+
+    return X86EMUL_OKAY;
+}
+
+static void hvmemul_put_fpu(
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct vcpu *curr = current;
+    curr->arch.hvm_vcpu.fpu_exception_callback = NULL;
 }
 
 static int hvmemul_invlpg(
@@ -720,7 +819,8 @@ static struct x86_emulate_ops hvm_emulat
     .cpuid         = hvmemul_cpuid,
     .inject_hw_exception = hvmemul_inject_hw_exception,
     .inject_sw_interrupt = hvmemul_inject_sw_interrupt,
-    .load_fpu_ctxt = hvmemul_load_fpu_ctxt,
+    .get_fpu       = hvmemul_get_fpu,
+    .put_fpu       = hvmemul_put_fpu,
     .invlpg        = hvmemul_invlpg
 };
 
@@ -763,6 +863,11 @@ int hvm_emulate_one(
     hvmemul_ctxt->exn_pending = 0;
 
     rc = x86_emulate(&hvmemul_ctxt->ctxt, &hvm_emulate_ops);
+
+    if ( rc != X86EMUL_RETRY )
+        curr->arch.hvm_vcpu.mmio_large_read_bytes =
+            curr->arch.hvm_vcpu.mmio_large_write_bytes = 0;
+
     if ( rc != X86EMUL_OKAY )
         return rc;
 
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/hvm/hvm.c    Thu Apr 24 14:08:29 2008 -0600
@@ -494,14 +494,14 @@ static int hvm_load_cpu_ctxt(struct doma
          ((ctxt.cr0 & (X86_CR0_PE|X86_CR0_PG)) == X86_CR0_PG) )
     {
         gdprintk(XENLOG_ERR, "HVM restore: bad CR0 0x%"PRIx64"\n",
-                 ctxt.msr_efer);
+                 ctxt.cr0);
         return -EINVAL;
     }
 
     if ( ctxt.cr4 & HVM_CR4_GUEST_RESERVED_BITS )
     {
         gdprintk(XENLOG_ERR, "HVM restore: bad CR4 0x%"PRIx64"\n",
-                 ctxt.msr_efer);
+                 ctxt.cr4);
         return -EINVAL;
     }
 
@@ -620,8 +620,6 @@ HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_
 HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_cpu_ctxt, hvm_load_cpu_ctxt,
                           1, HVMSR_PER_VCPU);
 
-extern int reset_vmsr(struct mtrr_state *m, u64 *p);
-
 int hvm_vcpu_initialise(struct vcpu *v)
 {
     int rc;
@@ -647,7 +645,7 @@ int hvm_vcpu_initialise(struct vcpu *v)
     spin_lock_init(&v->arch.hvm_vcpu.tm_lock);
     INIT_LIST_HEAD(&v->arch.hvm_vcpu.tm_list);
 
-    rc = reset_vmsr(&v->arch.hvm_vcpu.mtrr, &v->arch.hvm_vcpu.pat_cr);
+    rc = hvm_vcpu_cacheattr_init(v);
     if ( rc != 0 )
         goto fail3;
 
@@ -681,6 +679,7 @@ int hvm_vcpu_initialise(struct vcpu *v)
 
 void hvm_vcpu_destroy(struct vcpu *v)
 {
+    hvm_vcpu_cacheattr_destroy(v);
     vlapic_destroy(v);
     hvm_funcs.vcpu_destroy(v);
 
@@ -1604,6 +1603,9 @@ void hvm_cpuid(unsigned int input, unsig
         *ebx &= 0x0000FFFFu;
         *ebx |= (current->vcpu_id * 2) << 24;
 
+        /* We always support MTRR MSRs. */
+        *edx |= bitmaskof(X86_FEATURE_MTRR);
+
         *ecx &= (bitmaskof(X86_FEATURE_XMM3) |
                  bitmaskof(X86_FEATURE_SSSE3) |
                  bitmaskof(X86_FEATURE_CX16) |
@@ -1653,6 +1655,146 @@ void hvm_cpuid(unsigned int input, unsig
 #endif
         break;
     }
+}
+
+int hvm_msr_read_intercept(struct cpu_user_regs *regs)
+{
+    uint32_t ecx = regs->ecx;
+    uint64_t msr_content = 0;
+    struct vcpu *v = current;
+    uint64_t *var_range_base, *fixed_range_base;
+    int index;
+
+    var_range_base = (uint64_t *)v->arch.hvm_vcpu.mtrr.var_ranges;
+    fixed_range_base = (uint64_t *)v->arch.hvm_vcpu.mtrr.fixed_ranges;
+
+    switch ( ecx )
+    {
+    case MSR_IA32_TSC:
+        msr_content = hvm_get_guest_time(v);
+        break;
+
+    case MSR_IA32_APICBASE:
+        msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
+        break;
+
+    case MSR_IA32_MCG_CAP:
+    case MSR_IA32_MCG_STATUS:
+    case MSR_IA32_MC0_STATUS:
+    case MSR_IA32_MC1_STATUS:
+    case MSR_IA32_MC2_STATUS:
+    case MSR_IA32_MC3_STATUS:
+    case MSR_IA32_MC4_STATUS:
+    case MSR_IA32_MC5_STATUS:
+        /* No point in letting the guest see real MCEs */
+        msr_content = 0;
+        break;
+
+    case MSR_IA32_CR_PAT:
+        msr_content = v->arch.hvm_vcpu.pat_cr;
+        break;
+
+    case MSR_MTRRcap:
+        msr_content = v->arch.hvm_vcpu.mtrr.mtrr_cap;
+        break;
+    case MSR_MTRRdefType:
+        msr_content = v->arch.hvm_vcpu.mtrr.def_type
+                        | (v->arch.hvm_vcpu.mtrr.enabled << 10);
+        break;
+    case MSR_MTRRfix64K_00000:
+        msr_content = fixed_range_base[0];
+        break;
+    case MSR_MTRRfix16K_80000:
+    case MSR_MTRRfix16K_A0000:
+        index = regs->ecx - MSR_MTRRfix16K_80000;
+        msr_content = fixed_range_base[index + 1];
+        break;
+    case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000:
+        index = regs->ecx - MSR_MTRRfix4K_C0000;
+        msr_content = fixed_range_base[index + 3];
+        break;
+    case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7:
+        index = regs->ecx - MSR_IA32_MTRR_PHYSBASE0;
+        msr_content = var_range_base[index];
+        break;
+
+    default:
+        return hvm_funcs.msr_read_intercept(regs);
+    }
+
+    regs->eax = (uint32_t)msr_content;
+    regs->edx = (uint32_t)(msr_content >> 32);
+    return X86EMUL_OKAY;
+}
+
+int hvm_msr_write_intercept(struct cpu_user_regs *regs)
+{
+    extern bool_t mtrr_var_range_msr_set(
+        struct mtrr_state *v, u32 msr, u64 msr_content);
+    extern bool_t mtrr_fix_range_msr_set(
+        struct mtrr_state *v, int row, u64 msr_content);
+    extern bool_t mtrr_def_type_msr_set(struct mtrr_state *v, u64 msr_content);
+    extern bool_t pat_msr_set(u64 *pat, u64 msr);
+
+    uint32_t ecx = regs->ecx;
+    uint64_t msr_content = (uint32_t)regs->eax | ((uint64_t)regs->edx << 32);
+    struct vcpu *v = current;
+    int index;
+
+    switch ( ecx )
+    {
+     case MSR_IA32_TSC:
+        hvm_set_guest_time(v, msr_content);
+        pt_reset(v);
+        break;
+
+    case MSR_IA32_APICBASE:
+        vlapic_msr_set(vcpu_vlapic(v), msr_content);
+        break;
+
+    case MSR_IA32_CR_PAT:
+        if ( !pat_msr_set(&v->arch.hvm_vcpu.pat_cr, msr_content) )
+           goto gp_fault;
+        break;
+
+    case MSR_MTRRcap:
+        goto gp_fault;
+    case MSR_MTRRdefType:
+        if ( !mtrr_def_type_msr_set(&v->arch.hvm_vcpu.mtrr, msr_content) )
+           goto gp_fault;
+        break;
+    case MSR_MTRRfix64K_00000:
+        if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr, 0, msr_content) )
+            goto gp_fault;
+        break;
+    case MSR_MTRRfix16K_80000:
+    case MSR_MTRRfix16K_A0000:
+        index = regs->ecx - MSR_MTRRfix16K_80000 + 1;
+        if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr,
+                                     index, msr_content) )
+            goto gp_fault;
+        break;
+    case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000:
+        index = regs->ecx - MSR_MTRRfix4K_C0000 + 3;
+        if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr,
+                                     index, msr_content) )
+            goto gp_fault;
+        break;
+    case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7:
+        if ( !mtrr_var_range_msr_set(&v->arch.hvm_vcpu.mtrr,
+                                     regs->ecx, msr_content) )
+            goto gp_fault;
+        break;
+
+    default:
+        return hvm_funcs.msr_write_intercept(regs);
+    }
+
+    return X86EMUL_OKAY;
+
+gp_fault:
+    hvm_inject_exception(TRAP_gp_fault, 0, 0);
+    return X86EMUL_EXCEPTION;
 }
 
 enum hvm_intblk hvm_interrupt_blocked(struct vcpu *v, struct hvm_intack intack)
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/mtrr.c
--- a/xen/arch/x86/hvm/mtrr.c   Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/hvm/mtrr.c   Thu Apr 24 14:08:29 2008 -0600
@@ -27,7 +27,6 @@
 #include <asm/hvm/support.h>
 #include <asm/hvm/cacheattr.h>
 
-/* Xen holds the native MTRR MSRs */
 extern struct mtrr_state mtrr_state;
 
 static uint64_t phys_base_msr_mask;
@@ -35,19 +34,17 @@ static uint32_t size_or_mask;
 static uint32_t size_or_mask;
 static uint32_t size_and_mask;
 
-static void init_pat_entry_tbl(uint64_t pat);
-static void init_mtrr_epat_tbl(void);
-static uint8_t get_mtrr_type(struct mtrr_state *m, paddr_t pa);
-/* get page attribute fields (PAn) from PAT MSR */
+/* Get page attribute fields (PAn) from PAT MSR. */
 #define pat_cr_2_paf(pat_cr,n)  ((((uint64_t)pat_cr) >> ((n)<<3)) & 0xff)
-/* pat entry to PTE flags (PAT, PCD, PWT bits) */
+
+/* PAT entry to PTE flags (PAT, PCD, PWT bits). */
 static uint8_t pat_entry_2_pte_flags[8] = {
     0,           _PAGE_PWT,
     _PAGE_PCD,   _PAGE_PCD | _PAGE_PWT,
     _PAGE_PAT,   _PAGE_PAT | _PAGE_PWT,
     _PAGE_PAT | _PAGE_PCD, _PAGE_PAT | _PAGE_PCD | _PAGE_PWT };
 
-/* effective mm type lookup table, according to MTRR and PAT */
+/* Effective mm type lookup table, according to MTRR and PAT. */
 static uint8_t mm_type_tbl[MTRR_NUM_TYPES][PAT_TYPE_NUMS] = {
 /********PAT(UC,WC,RS,RS,WT,WP,WB,UC-)*/
 /* RS means reserved type(2,3), and type is hardcoded here */
@@ -67,12 +64,13 @@ static uint8_t mm_type_tbl[MTRR_NUM_TYPE
             {0, 1, 2, 2, 4, 5, 6, 0}
 };
 
-/* reverse lookup table, to find a pat type according to MTRR and effective
- * memory type. This table is dynamically generated
+/*
+ * Reverse lookup table, to find a pat type according to MTRR and effective
+ * memory type. This table is dynamically generated.
  */
 static uint8_t mtrr_epat_tbl[MTRR_NUM_TYPES][MEMORY_NUM_TYPES];
 
-/* lookup table for PAT entry of a given PAT value in host pat */
+/* Lookup table for PAT entry of a given PAT value in host PAT. */
 static uint8_t pat_entry_tbl[PAT_TYPE_NUMS];
 
 static void get_mtrr_range(uint64_t base_msr, uint64_t mask_msr,
@@ -139,220 +137,63 @@ bool_t is_var_mtrr_overlapped(struct mtr
     return 0;
 }
 
-/* reserved mtrr for guest OS */
-#define RESERVED_MTRR 2
+#define MTRR_PHYSMASK_VALID_BIT  11
+#define MTRR_PHYSMASK_SHIFT      12
+
+#define MTRR_PHYSBASE_TYPE_MASK  0xff   /* lowest 8 bits */
+#define MTRR_PHYSBASE_SHIFT      12
+#define MTRR_VCNT                8
+
 #define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
 #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
 bool_t mtrr_var_range_msr_set(struct mtrr_state *m, uint32_t msr,
                               uint64_t msr_content);
-bool_t mtrr_def_type_msr_set(struct mtrr_state *m, uint64_t msr_content);
 bool_t mtrr_fix_range_msr_set(struct mtrr_state *m, uint32_t row,
                               uint64_t msr_content);
-static void set_var_mtrr(uint32_t reg, struct mtrr_state *m,
-                         uint32_t base, uint32_t size,
-                         uint32_t type)
-{
-    struct mtrr_var_range *vr;
-
-    vr = &m->var_ranges[reg];
-
-    if ( size == 0 )
-    {
-        /* The invalid bit is kept in the mask, so we simply clear the
-         * relevant mask register to disable a range.
-         */
-        mtrr_var_range_msr_set(m, MTRRphysMask_MSR(reg), 0);
-    }
-    else
-    {
-        vr->base_lo = base << PAGE_SHIFT | type;
-        vr->base_hi = (base & size_and_mask) >> (32 - PAGE_SHIFT);
-        vr->mask_lo = -size << PAGE_SHIFT | 0x800;
-        vr->mask_hi = (-size & size_and_mask) >> (32 - PAGE_SHIFT);
-
-        mtrr_var_range_msr_set(m, MTRRphysBase_MSR(reg), *(uint64_t *)vr);
-        mtrr_var_range_msr_set(m, MTRRphysMask_MSR(reg),
-                               *((uint64_t *)vr + 1));
-    }
-}
-/* From Intel Vol. III Section 10.11.4, the Range Size and Base Alignment has
- * some kind of requirement:
- * 1. The range size must be 2^N byte for N >= 12 (i.e 4KB minimum).
- * 2. The base address must be 2^N aligned, where the N here is equal to
- * the N in previous requirement. So a 8K range must be 8K aligned not 4K 
aligned.
- */
-static uint32_t range_to_mtrr(uint32_t reg, struct mtrr_state *m,
-                              uint32_t range_startk, uint32_t range_sizek,
-                              uint8_t type)
-{
-    if ( !range_sizek || (reg >= ((m->mtrr_cap & 0xff) - RESERVED_MTRR)) )
-    {
-        gdprintk(XENLOG_WARNING,
-                "Failed to init var mtrr msr[%d]"
-                "range_size:%x, total available MSR:%d\n",
-                reg, range_sizek,
-                (uint32_t)((m->mtrr_cap & 0xff) - RESERVED_MTRR));
-        return reg;
-    }
-
-    while ( range_sizek )
-    {
-        uint32_t max_align, align, sizek;
-
-        max_align = (range_startk == 0) ? 32 : ffs(range_startk);
-        align = min_t(uint32_t, fls(range_sizek), max_align);
-        sizek = 1 << (align - 1);
-
-        set_var_mtrr(reg++, m, range_startk, sizek, type);
-
-        range_startk += sizek;
-        range_sizek  -= sizek;
-
-        if ( reg >= ((m->mtrr_cap & 0xff) - RESERVED_MTRR) )
-        {
-            gdprintk(XENLOG_WARNING,
-                    "Failed to init var mtrr msr[%d],"
-                    "total available MSR:%d\n",
-                    reg, (uint32_t)((m->mtrr_cap & 0xff) - RESERVED_MTRR));
-            break;
-        }
-    }
-
-    return reg;
-}
-
-static void setup_fixed_mtrrs(struct vcpu *v)
-{
-    uint64_t content;
-    int32_t i;
-    struct mtrr_state *m = &v->arch.hvm_vcpu.mtrr;
-
-    /* 1. Map (0~A0000) as WB */
-    content = 0x0606060606060606ull;
-    mtrr_fix_range_msr_set(m, 0, content);
-    mtrr_fix_range_msr_set(m, 1, content);
-    /* 2. Map VRAM(A0000~C0000) as WC */
-    content = 0x0101010101010101;
-    mtrr_fix_range_msr_set(m, 2, content);
-    /* 3. Map (C0000~100000) as UC */
-    for ( i = 3; i < 11; i++)
-        mtrr_fix_range_msr_set(m, i, 0);
-}
-
-static void setup_var_mtrrs(struct vcpu *v)
-{
-    p2m_type_t p2m;
-    uint64_t e820_mfn;
-    int8_t *p = NULL;
-    uint8_t nr = 0;
-    int32_t i;
-    uint32_t reg = 0;
-    uint64_t size = 0;
-    uint64_t addr = 0;
-    struct e820entry *e820_table;
-
-    e820_mfn = mfn_x(gfn_to_mfn(v->domain,
-                    HVM_E820_PAGE >> PAGE_SHIFT, &p2m));
-
-    p = (int8_t *)map_domain_page(e820_mfn);
-
-    nr = *(uint8_t*)(p + HVM_E820_NR_OFFSET);
-    e820_table = (struct e820entry*)(p + HVM_E820_OFFSET);
-    /* search E820 table, set MTRR for RAM */
-    for ( i = 0; i < nr; i++)
-    {
-        if ( (e820_table[i].addr >= 0x100000) &&
-             (e820_table[i].type == E820_RAM) )
-        {
-            if ( e820_table[i].addr == 0x100000 )
-            {
-                size = e820_table[i].size + 0x100000 + PAGE_SIZE * 5;
-                addr = 0;
-            }
-            else
-            {
-                /* Larger than 4G */
-                size = e820_table[i].size;
-                addr = e820_table[i].addr;
-            }
-
-            reg = range_to_mtrr(reg, &v->arch.hvm_vcpu.mtrr,
-                                addr >> PAGE_SHIFT, size >> PAGE_SHIFT,
-                                MTRR_TYPE_WRBACK);
-        }
-    }
-}
-
-void init_mtrr_in_hyper(struct vcpu *v)
-{
-    /* TODO:MTRR should be initialized in BIOS or other places.
-     * workaround to do it in here
-     */
-    if ( v->arch.hvm_vcpu.mtrr.is_initialized )
-        return;
-
-    setup_fixed_mtrrs(v);
-    setup_var_mtrrs(v);
-    /* enable mtrr */
-    mtrr_def_type_msr_set(&v->arch.hvm_vcpu.mtrr, 0xc00);
-
-    v->arch.hvm_vcpu.mtrr.is_initialized = 1;
-}
-
-static int32_t reset_mtrr(struct mtrr_state *m)
-{
-    m->var_ranges = xmalloc_array(struct mtrr_var_range, MTRR_VCNT);
-    if ( m->var_ranges == NULL )
-        return -ENOMEM;
-    memset(m->var_ranges, 0, MTRR_VCNT * sizeof(struct mtrr_var_range));
-    memset(m->fixed_ranges, 0, sizeof(m->fixed_ranges));
-    m->enabled = 0;
-    m->def_type = 0;/*mtrr is disabled*/
-    m->mtrr_cap = (0x5<<8)|MTRR_VCNT;/*wc,fix enabled, and vcnt=8*/
-    m->overlapped = 0;
-    return 0;
-}
-
-/* init global variables for MTRR and PAT */
-void global_init_mtrr_pat(void)
+
+static int hvm_mtrr_pat_init(void)
 {
     extern uint64_t host_pat;
-    uint32_t phys_addr;
-
-    init_mtrr_epat_tbl();
-    init_pat_entry_tbl(host_pat);
-    /* Get max physical address, set some global variable */
-    if ( cpuid_eax(0x80000000) < 0x80000008 )
-        phys_addr = 36;
-    else
-        phys_addr = cpuid_eax(0x80000008);
-
-    phys_base_msr_mask = ~((((uint64_t)1) << phys_addr) - 1) | 0xf00UL;
-    phys_mask_msr_mask = ~((((uint64_t)1) << phys_addr) - 1) | 0x7ffUL;
-
-    size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1);
-    size_and_mask = ~size_or_mask & 0xfff00000;
-}
-
-static void init_pat_entry_tbl(uint64_t pat)
-{
-    int32_t i, j;
+    unsigned int i, j, phys_addr;
+
+    memset(&mtrr_epat_tbl, INVALID_MEM_TYPE, sizeof(mtrr_epat_tbl));
+    for ( i = 0; i < MTRR_NUM_TYPES; i++ )
+    {
+        for ( j = 0; j < PAT_TYPE_NUMS; j++ )
+        {
+            int32_t tmp = mm_type_tbl[i][j];
+            if ( (tmp >= 0) && (tmp < MEMORY_NUM_TYPES) )
+                mtrr_epat_tbl[i][tmp] = j;
+        }
+    }
 
     memset(&pat_entry_tbl, INVALID_MEM_TYPE,
            PAT_TYPE_NUMS * sizeof(pat_entry_tbl[0]));
-
     for ( i = 0; i < PAT_TYPE_NUMS; i++ )
     {
         for ( j = 0; j < PAT_TYPE_NUMS; j++ )
         {
-            if ( pat_cr_2_paf(pat, j) == i )
+            if ( pat_cr_2_paf(host_pat, j) == i )
             {
                 pat_entry_tbl[i] = j;
                 break;
             }
         }
     }
-}
+
+    phys_addr = 36;
+    if ( cpuid_eax(0x80000000) >= 0x80000008 )
+        phys_addr = (uint8_t)cpuid_eax(0x80000008);
+
+    phys_base_msr_mask = ~((((uint64_t)1) << phys_addr) - 1) | 0xf00UL;
+    phys_mask_msr_mask = ~((((uint64_t)1) << phys_addr) - 1) | 0x7ffUL;
+
+    size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1);
+    size_and_mask = ~size_or_mask & 0xfff00000;
+
+    return 0;
+}
+__initcall(hvm_mtrr_pat_init);
 
 uint8_t pat_type_2_pte_flags(uint8_t pat_type)
 {
@@ -368,24 +209,35 @@ uint8_t pat_type_2_pte_flags(uint8_t pat
     return pat_entry_2_pte_flags[pat_entry_tbl[PAT_TYPE_UNCACHABLE]];
 }
 
-int32_t reset_vmsr(struct mtrr_state *m, uint64_t *pat_ptr)
-{
-    int32_t rc;
-
-    rc = reset_mtrr(m);
-    if ( rc != 0 )
-        return rc;
-
-    *pat_ptr = ((uint64_t)PAT_TYPE_WRBACK) |               /* PAT0: WB */
-               ((uint64_t)PAT_TYPE_WRTHROUGH << 8) |       /* PAT1: WT */
-               ((uint64_t)PAT_TYPE_UC_MINUS << 16) |       /* PAT2: UC- */
-               ((uint64_t)PAT_TYPE_UNCACHABLE << 24) |     /* PAT3: UC */
-               ((uint64_t)PAT_TYPE_WRBACK << 32) |         /* PAT4: WB */
-               ((uint64_t)PAT_TYPE_WRTHROUGH << 40) |      /* PAT5: WT */
-               ((uint64_t)PAT_TYPE_UC_MINUS << 48) |       /* PAT6: UC- */
-               ((uint64_t)PAT_TYPE_UNCACHABLE << 56);      /* PAT7: UC */
-
-    return 0;
+int hvm_vcpu_cacheattr_init(struct vcpu *v)
+{
+    struct mtrr_state *m = &v->arch.hvm_vcpu.mtrr;
+
+    memset(m, 0, sizeof(*m));
+
+    m->var_ranges = xmalloc_array(struct mtrr_var_range, MTRR_VCNT);
+    if ( m->var_ranges == NULL )
+        return -ENOMEM;
+    memset(m->var_ranges, 0, MTRR_VCNT * sizeof(struct mtrr_var_range));
+
+    m->mtrr_cap = (1u << 10) | (1u << 8) | MTRR_VCNT;
+
+    v->arch.hvm_vcpu.pat_cr =
+        ((uint64_t)PAT_TYPE_WRBACK) |               /* PAT0: WB */
+        ((uint64_t)PAT_TYPE_WRTHROUGH << 8) |       /* PAT1: WT */
+        ((uint64_t)PAT_TYPE_UC_MINUS << 16) |       /* PAT2: UC- */
+        ((uint64_t)PAT_TYPE_UNCACHABLE << 24) |     /* PAT3: UC */
+        ((uint64_t)PAT_TYPE_WRBACK << 32) |         /* PAT4: WB */
+        ((uint64_t)PAT_TYPE_WRTHROUGH << 40) |      /* PAT5: WT */
+        ((uint64_t)PAT_TYPE_UC_MINUS << 48) |       /* PAT6: UC- */
+        ((uint64_t)PAT_TYPE_UNCACHABLE << 56);      /* PAT7: UC */
+
+    return 0;
+}
+
+void hvm_vcpu_cacheattr_destroy(struct vcpu *v)
+{
+    xfree(v->arch.hvm_vcpu.mtrr.var_ranges);
 }
 
 /*
@@ -512,23 +364,6 @@ static uint8_t effective_mm_type(struct 
     return effective;
 }
 
-static void init_mtrr_epat_tbl(void)
-{
-    int32_t i, j;
-    /* set default value to an invalid type, just for checking conflict */
-    memset(&mtrr_epat_tbl, INVALID_MEM_TYPE, sizeof(mtrr_epat_tbl));
-
-    for ( i = 0; i < MTRR_NUM_TYPES; i++ )
-    {
-        for ( j = 0; j < PAT_TYPE_NUMS; j++ )
-        {
-            int32_t tmp = mm_type_tbl[i][j];
-            if ( (tmp >= 0) && (tmp < MEMORY_NUM_TYPES) )
-                mtrr_epat_tbl[i][tmp] = j;
-        }
-    }
-}
-
 uint32_t get_pat_flags(struct vcpu *v,
                        uint32_t gl1e_flags,
                        paddr_t gpaddr,
@@ -856,7 +691,6 @@ static int hvm_load_mtrr_msr(struct doma
 
     mtrr_def_type_msr_set(mtrr_state, hw_mtrr.msr_mtrr_def_type);
 
-    v->arch.hvm_vcpu.mtrr.is_initialized = 1;
     return 0;
 }
 
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/svm/Makefile
--- a/xen/arch/x86/hvm/svm/Makefile     Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/hvm/svm/Makefile     Thu Apr 24 14:08:29 2008 -0600
@@ -1,8 +1,6 @@ subdir-$(x86_32) += x86_32
-subdir-$(x86_32) += x86_32
-subdir-$(x86_64) += x86_64
-
 obj-y += asid.o
 obj-y += emulate.o
+obj-y += entry.o
 obj-y += intr.o
 obj-y += svm.o
 obj-y += vmcb.o
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/svm/entry.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/svm/entry.S      Thu Apr 24 14:08:29 2008 -0600
@@ -0,0 +1,178 @@
+/*
+ * entry.S: SVM architecture-specific entry/exit handling.
+ * Copyright (c) 2005-2007, Advanced Micro Devices, Inc.
+ * Copyright (c) 2004, Intel Corporation.
+ * Copyright (c) 2008, Citrix Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <xen/config.h>
+#include <xen/errno.h>
+#include <xen/softirq.h>
+#include <asm/types.h>
+#include <asm/asm_defns.h>
+#include <asm/apicdef.h>
+#include <asm/page.h>
+#include <public/xen.h>
+
+#define VMRUN  .byte 0x0F,0x01,0xD8
+#define STGI   .byte 0x0F,0x01,0xDC
+#define CLGI   .byte 0x0F,0x01,0xDD
+
+#define get_current(reg)                        \
+        mov $STACK_SIZE-BYTES_PER_LONG, r(reg); \
+        or  r(sp), r(reg);                      \
+        and $~(BYTES_PER_LONG-1),r(reg);        \
+        mov (r(reg)),r(reg);
+
+#if defined(__x86_64__)
+#define r(reg) %r##reg
+#define addr_of(lbl) lbl(%rip)
+#define call_with_regs(fn)                      \
+        mov  %rsp,%rdi;                         \
+        call fn;
+#else /* defined(__i386__) */
+#define r(reg) %e##reg
+#define addr_of(lbl) lbl
+#define UREGS_rax UREGS_eax
+#define UREGS_rip UREGS_eip
+#define UREGS_rsp UREGS_esp
+#define call_with_regs(fn)                      \
+        mov  %esp,%eax;                         \
+        push %eax;                              \
+        call fn;                                \
+        add  $4,%esp;
+#endif
+
+ENTRY(svm_asm_do_resume)
+        get_current(bx)
+        CLGI
+
+        mov  VCPU_processor(r(bx)),%eax
+        shl  $IRQSTAT_shift,r(ax)
+        lea  addr_of(irq_stat),r(dx)
+        testl $~0,(r(dx),r(ax),1)
+        jnz  .Lsvm_process_softirqs
+
+        call svm_asid_handle_vmrun
+        call svm_intr_assist
+
+        cmpb $0,addr_of(tb_init_done)
+        jnz  .Lsvm_trace
+.Lsvm_trace_done:
+
+        mov  VCPU_svm_vmcb(r(bx)),r(cx)
+        mov  UREGS_rax(r(sp)),r(ax)
+        mov  r(ax),VMCB_rax(r(cx))
+        mov  UREGS_rip(r(sp)),r(ax)
+        mov  r(ax),VMCB_rip(r(cx))
+        mov  UREGS_rsp(r(sp)),r(ax)
+        mov  r(ax),VMCB_rsp(r(cx))
+        mov  UREGS_eflags(r(sp)),r(ax)
+        mov  r(ax),VMCB_rflags(r(cx))
+
+        mov  VCPU_svm_vmcb_pa(r(bx)),r(ax)
+
+#if defined(__x86_64__)
+        pop  %r15
+        pop  %r14
+        pop  %r13
+        pop  %r12
+        pop  %rbp
+        pop  %rbx
+        pop  %r11
+        pop  %r10
+        pop  %r9
+        pop  %r8
+        add  $8,%rsp /* Skip %rax: restored by VMRUN. */
+        pop  %rcx
+        pop  %rdx
+        pop  %rsi
+        pop  %rdi
+#else /* defined(__i386__) */
+        pop  %ebx
+        pop  %ecx
+        pop  %edx
+        pop  %esi
+        pop  %edi
+        pop  %ebp
+#endif
+
+        VMRUN
+
+#if defined(__x86_64__)
+        push %rdi
+        push %rsi
+        push %rdx
+        push %rcx
+        push %rax
+        push %r8
+        push %r9
+        push %r10
+        push %r11
+        push %rbx
+        push %rbp
+        push %r12
+        push %r13
+        push %r14
+        push %r15
+#else /* defined(__i386__) */
+        push %ebp
+        push %edi
+        push %esi
+        push %edx
+        push %ecx
+        push %ebx
+#endif
+
+        get_current(bx)
+        movb $0,VCPU_svm_vmcb_in_sync(r(bx))
+        mov  VCPU_svm_vmcb(r(bx)),r(cx)
+        mov  VMCB_rax(r(cx)),r(ax)
+        mov  r(ax),UREGS_rax(r(sp))
+        mov  VMCB_rip(r(cx)),r(ax)
+        mov  r(ax),UREGS_rip(r(sp))
+        mov  VMCB_rsp(r(cx)),r(ax)
+        mov  r(ax),UREGS_rsp(r(sp))
+        mov  VMCB_rflags(r(cx)),r(ax)
+        mov  r(ax),UREGS_eflags(r(sp))
+
+#ifndef NDEBUG
+        mov  $0xbeef,%ax
+        mov  %ax,UREGS_error_code(r(sp))
+        mov  %ax,UREGS_entry_vector(r(sp))
+        mov  %ax,UREGS_saved_upcall_mask(r(sp))
+        mov  %ax,UREGS_cs(r(sp))
+        mov  %ax,UREGS_ds(r(sp))
+        mov  %ax,UREGS_es(r(sp))
+        mov  %ax,UREGS_fs(r(sp))
+        mov  %ax,UREGS_gs(r(sp))
+        mov  %ax,UREGS_ss(r(sp))
+#endif
+
+        STGI
+.globl svm_stgi_label
+svm_stgi_label:
+        call_with_regs(svm_vmexit_handler)
+        jmp  svm_asm_do_resume
+
+.Lsvm_process_softirqs:
+        STGI
+        call do_softirq
+        jmp  svm_asm_do_resume
+
+.Lsvm_trace:
+        call svm_trace_vmentry
+        jmp  .Lsvm_trace_done
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/svm/intr.c
--- a/xen/arch/x86/hvm/svm/intr.c       Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/hvm/svm/intr.c       Thu Apr 24 14:08:29 2008 -0600
@@ -102,15 +102,17 @@ static void svm_dirq_assist(struct vcpu 
     struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
     struct dev_intx_gsi_link *digl;
 
-    if ( !amd_iommu_enabled || (v->vcpu_id != 0) || (hvm_irq_dpci == NULL) )
+    if ( !iommu_enabled || (v->vcpu_id != 0) || (hvm_irq_dpci == NULL) )
         return;
 
     for ( irq = find_first_bit(hvm_irq_dpci->dirq_mask, NR_IRQS);
           irq < NR_IRQS;
           irq = find_next_bit(hvm_irq_dpci->dirq_mask, NR_IRQS, irq + 1) )
     {
+        if ( !test_and_clear_bit(irq, &hvm_irq_dpci->dirq_mask) )
+            continue;
+
         stop_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(irq)]);
-        clear_bit(irq, &hvm_irq_dpci->dirq_mask);
 
         list_for_each_entry ( digl, &hvm_irq_dpci->mirq[irq].digl_list, list )
         {
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/hvm/svm/svm.c        Thu Apr 24 14:08:29 2008 -0600
@@ -911,6 +911,9 @@ static void svm_cpuid_intercept(
             __clear_bit(X86_FEATURE_PAE & 31, edx);
         __clear_bit(X86_FEATURE_PSE36 & 31, edx);
 
+        /* We always support MTRR MSRs. */
+        *edx |= bitmaskof(X86_FEATURE_MTRR);
+
         /* Filter all other features according to a whitelist. */
         *ecx &= (bitmaskof(X86_FEATURE_LAHF_LM) |
                  bitmaskof(X86_FEATURE_ALTMOVCR) |
@@ -924,7 +927,9 @@ static void svm_cpuid_intercept(
                  bitmaskof(X86_FEATURE_SYSCALL) |
                  bitmaskof(X86_FEATURE_MP) |
                  bitmaskof(X86_FEATURE_MMXEXT) |
-                 bitmaskof(X86_FEATURE_FFXSR));
+                 bitmaskof(X86_FEATURE_FFXSR) |
+                 bitmaskof(X86_FEATURE_3DNOW) |
+                 bitmaskof(X86_FEATURE_3DNOWEXT));
         break;
 
     case 0x80000007:
@@ -981,14 +986,6 @@ static int svm_msr_read_intercept(struct
 
     switch ( ecx )
     {
-    case MSR_IA32_TSC:
-        msr_content = hvm_get_guest_time(v);
-        break;
-
-    case MSR_IA32_APICBASE:
-        msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
-        break;
-
     case MSR_EFER:
         msr_content = v->arch.hvm_vcpu.guest_efer;
         break;
@@ -1013,18 +1010,6 @@ static int svm_msr_read_intercept(struct
 
     case MSR_K8_VM_HSAVE_PA:
         goto gpf;
-
-    case MSR_IA32_MCG_CAP:
-    case MSR_IA32_MCG_STATUS:
-    case MSR_IA32_MC0_STATUS:
-    case MSR_IA32_MC1_STATUS:
-    case MSR_IA32_MC2_STATUS:
-    case MSR_IA32_MC3_STATUS:
-    case MSR_IA32_MC4_STATUS:
-    case MSR_IA32_MC5_STATUS:
-        /* No point in letting the guest see real MCEs */
-        msr_content = 0;
-        break;
 
     case MSR_IA32_DEBUGCTLMSR:
         msr_content = vmcb->debugctlmsr;
@@ -1083,15 +1068,6 @@ static int svm_msr_write_intercept(struc
 
     switch ( ecx )
     {
-    case MSR_IA32_TSC:
-        hvm_set_guest_time(v, msr_content);
-        pt_reset(v);
-        break;
-
-    case MSR_IA32_APICBASE:
-        vlapic_msr_set(vcpu_vlapic(v), msr_content);
-        break;
-
     case MSR_K8_VM_HSAVE_PA:
         goto gpf;
 
@@ -1152,12 +1128,12 @@ static void svm_do_msr_access(struct cpu
 
     if ( vmcb->exitinfo1 == 0 )
     {
-        rc = svm_msr_read_intercept(regs);
+        rc = hvm_msr_read_intercept(regs);
         inst_len = __get_instruction_length(v, INSTR_RDMSR, NULL);
     }
     else
     {
-        rc = svm_msr_write_intercept(regs);
+        rc = hvm_msr_write_intercept(regs);
         inst_len = __get_instruction_length(v, INSTR_WRMSR, NULL);
     }
 
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/svm/x86_32/Makefile
--- a/xen/arch/x86/hvm/svm/x86_32/Makefile      Thu Apr 24 14:02:16 2008 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-obj-y += exits.o
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/svm/x86_32/exits.S
--- a/xen/arch/x86/hvm/svm/x86_32/exits.S       Thu Apr 24 14:02:16 2008 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,131 +0,0 @@
-/*
- * exits.S: SVM architecture-specific exit handling.
- * Copyright (c) 2005-2007, Advanced Micro Devices, Inc.
- * Copyright (c) 2004, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-
-#include <xen/config.h>
-#include <xen/errno.h>
-#include <xen/softirq.h>
-#include <asm/asm_defns.h>
-#include <asm/apicdef.h>
-#include <asm/page.h>
-#include <public/xen.h>
-
-#define GET_CURRENT(reg)         \
-        movl $STACK_SIZE-4,reg;  \
-        orl  %esp,reg;           \
-        andl $~3,reg;            \
-        movl (reg),reg;
-
-#define VMRUN  .byte 0x0F,0x01,0xD8
-#define STGI   .byte 0x0F,0x01,0xDC
-#define CLGI   .byte 0x0F,0x01,0xDD
-
-ENTRY(svm_asm_do_resume)
-        GET_CURRENT(%ebx)
-        CLGI
-
-        movl VCPU_processor(%ebx),%eax
-        shl  $IRQSTAT_shift,%eax
-        testl $~0,irq_stat(%eax,1)
-        jnz  svm_process_softirqs
-
-        call svm_asid_handle_vmrun
-        call svm_intr_assist
-
-        /* Check if the trace buffer is initialized. 
-         * Because the below condition is unlikely, we jump out of line
-         * instead of having a mostly taken branch over the unlikely code.
-         */
-        cmpb $0,tb_init_done
-        jnz  svm_trace
-svm_trace_done:
-
-        movl VCPU_svm_vmcb(%ebx),%ecx
-        movl UREGS_eax(%esp),%eax
-        movl %eax,VMCB_rax(%ecx)
-        movl UREGS_eip(%esp),%eax
-        movl %eax,VMCB_rip(%ecx)
-        movl UREGS_esp(%esp),%eax
-        movl %eax,VMCB_rsp(%ecx)
-        movl UREGS_eflags(%esp),%eax
-        movl %eax,VMCB_rflags(%ecx)
-
-        movl VCPU_svm_vmcb_pa(%ebx),%eax
-        popl %ebx
-        popl %ecx
-        popl %edx
-        popl %esi
-        popl %edi
-        popl %ebp
-
-        VMRUN
-
-        pushl %ebp
-        pushl %edi
-        pushl %esi
-        pushl %edx
-        pushl %ecx
-        pushl %ebx
-
-        GET_CURRENT(%ebx)
-        movb $0,VCPU_svm_vmcb_in_sync(%ebx)
-        movl VCPU_svm_vmcb(%ebx),%ecx
-        movl VMCB_rax(%ecx),%eax
-        movl %eax,UREGS_eax(%esp)
-        movl VMCB_rip(%ecx),%eax
-        movl %eax,UREGS_eip(%esp)
-        movl VMCB_rsp(%ecx),%eax
-        movl %eax,UREGS_esp(%esp)
-        movl VMCB_rflags(%ecx),%eax
-        movl %eax,UREGS_eflags(%esp)
-
-#ifndef NDEBUG
-        movw $0xbeef,%ax
-        movw %ax,UREGS_error_code(%esp)
-        movw %ax,UREGS_entry_vector(%esp)
-        movw %ax,UREGS_saved_upcall_mask(%esp)
-        movw %ax,UREGS_cs(%esp)
-        movw %ax,UREGS_ds(%esp)
-        movw %ax,UREGS_es(%esp)
-        movw %ax,UREGS_fs(%esp)
-        movw %ax,UREGS_gs(%esp)
-        movw %ax,UREGS_ss(%esp)
-#endif
-
-        STGI
-.globl svm_stgi_label;
-svm_stgi_label:
-        movl %esp,%eax
-        push %eax
-        call svm_vmexit_handler
-        addl $4,%esp
-        jmp  svm_asm_do_resume
-
-        ALIGN
-svm_process_softirqs:
-        STGI
-        call do_softirq
-        jmp  svm_asm_do_resume
-
-svm_trace:
-        /* Call out to C, as this is not speed critical path
-         * Note: svm_trace_vmentry will recheck the tb_init_done,
-         * but this is on the slow path, so who cares 
-         */
-        call svm_trace_vmentry
-        jmp  svm_trace_done
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/svm/x86_64/Makefile
--- a/xen/arch/x86/hvm/svm/x86_64/Makefile      Thu Apr 24 14:02:16 2008 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-obj-y += exits.o
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/svm/x86_64/exits.S
--- a/xen/arch/x86/hvm/svm/x86_64/exits.S       Thu Apr 24 14:02:16 2008 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,148 +0,0 @@
-/*
- * exits.S: AMD-V architecture-specific exit handling.
- * Copyright (c) 2005-2007, Advanced Micro Devices, Inc.
- * Copyright (c) 2004, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-
-#include <xen/config.h>
-#include <xen/errno.h>
-#include <xen/softirq.h>
-#include <asm/asm_defns.h>
-#include <asm/apicdef.h>
-#include <asm/page.h>
-#include <public/xen.h>
-
-#define GET_CURRENT(reg)         \
-        movq $STACK_SIZE-8,reg;  \
-        orq  %rsp,reg;           \
-        andq $~7,reg;            \
-        movq (reg),reg;
-
-#define VMRUN  .byte 0x0F,0x01,0xD8
-#define STGI   .byte 0x0F,0x01,0xDC
-#define CLGI   .byte 0x0F,0x01,0xDD
-
-ENTRY(svm_asm_do_resume)
-        GET_CURRENT(%rbx)
-        CLGI
-
-        movl VCPU_processor(%rbx),%eax
-        shl  $IRQSTAT_shift,%rax
-        leaq irq_stat(%rip),%rdx
-        testl $~0,(%rdx,%rax,1)
-        jnz  svm_process_softirqs
-
-        call svm_asid_handle_vmrun
-        call svm_intr_assist
-
-        /* Check if the trace buffer is initialized. 
-         * Because the below condition is unlikely, we jump out of line
-         * instead of having a mostly taken branch over the unlikely code.
-         */
-        cmpb $0,tb_init_done(%rip)
-        jnz  svm_trace
-svm_trace_done:
-
-        movq VCPU_svm_vmcb(%rbx),%rcx
-        movq UREGS_rax(%rsp),%rax
-        movq %rax,VMCB_rax(%rcx)
-        movq UREGS_rip(%rsp),%rax
-        movq %rax,VMCB_rip(%rcx)
-        movq UREGS_rsp(%rsp),%rax
-        movq %rax,VMCB_rsp(%rcx)
-        movq UREGS_eflags(%rsp),%rax
-        movq %rax,VMCB_rflags(%rcx)
-
-        movq VCPU_svm_vmcb_pa(%rbx),%rax
-        popq %r15
-        popq %r14
-        popq %r13
-        popq %r12
-        popq %rbp
-        popq %rbx
-        popq %r11
-        popq %r10
-        popq %r9
-        popq %r8
-        addq $8,%rsp /* Skip %rax: restored by VMRUN. */
-        popq %rcx
-        popq %rdx
-        popq %rsi
-        popq %rdi
-
-        VMRUN
-
-        pushq %rdi
-        pushq %rsi
-        pushq %rdx
-        pushq %rcx
-        pushq %rax
-        pushq %r8
-        pushq %r9
-        pushq %r10
-        pushq %r11
-        pushq %rbx
-        pushq %rbp
-        pushq %r12
-        pushq %r13
-        pushq %r14
-        pushq %r15
-
-        GET_CURRENT(%rbx)
-        movb $0,VCPU_svm_vmcb_in_sync(%rbx)
-        movq VCPU_svm_vmcb(%rbx),%rcx
-        movq VMCB_rax(%rcx),%rax
-        movq %rax,UREGS_rax(%rsp)
-        movq VMCB_rip(%rcx),%rax
-        movq %rax,UREGS_rip(%rsp)
-        movq VMCB_rsp(%rcx),%rax
-        movq %rax,UREGS_rsp(%rsp)
-        movq VMCB_rflags(%rcx),%rax
-        movq %rax,UREGS_eflags(%rsp)
-
-#ifndef NDEBUG
-        movw $0xbeef,%ax
-        movw %ax,UREGS_error_code(%rsp)
-        movw %ax,UREGS_entry_vector(%rsp)
-        movw %ax,UREGS_saved_upcall_mask(%rsp)
-        movw %ax,UREGS_cs(%rsp)
-        movw %ax,UREGS_ds(%rsp)
-        movw %ax,UREGS_es(%rsp)
-        movw %ax,UREGS_fs(%rsp)
-        movw %ax,UREGS_gs(%rsp)
-        movw %ax,UREGS_ss(%rsp)
-#endif
-
-        STGI
-.globl svm_stgi_label;
-svm_stgi_label:
-        movq %rsp,%rdi
-        call svm_vmexit_handler
-        jmp  svm_asm_do_resume
-
-        ALIGN
-svm_process_softirqs:
-        STGI
-        call do_softirq
-        jmp  svm_asm_do_resume
-
-svm_trace:
-        /* Call out to C, as this is not speed critical path
-         * Note: svm_trace_vmentry will recheck the tb_init_done,
-         * but this is on the slow path, so who cares 
-         */
-        call svm_trace_vmentry
-        jmp  svm_trace_done
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/vmx/Makefile
--- a/xen/arch/x86/hvm/vmx/Makefile     Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/hvm/vmx/Makefile     Thu Apr 24 14:08:29 2008 -0600
@@ -1,6 +1,4 @@ subdir-$(x86_32) += x86_32
-subdir-$(x86_32) += x86_32
-subdir-$(x86_64) += x86_64
-
+obj-y += entry.o
 obj-y += intr.o
 obj-y += realmode.o
 obj-y += vmcs.o
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/vmx/entry.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/vmx/entry.S      Thu Apr 24 14:08:29 2008 -0600
@@ -0,0 +1,198 @@
+/*
+ * entry.S: VMX architecture-specific entry/exit handling.
+ * Copyright (c) 2004, Intel Corporation.
+ * Copyright (c) 2008, Citrix Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <xen/config.h>
+#include <xen/errno.h>
+#include <xen/softirq.h>
+#include <asm/types.h>
+#include <asm/asm_defns.h>
+#include <asm/apicdef.h>
+#include <asm/page.h>
+#include <public/xen.h>
+
+#define VMRESUME     .byte 0x0f,0x01,0xc3
+#define VMLAUNCH     .byte 0x0f,0x01,0xc2
+#define VMREAD(off)  .byte 0x0f,0x78,0x47,((off)-UREGS_rip)
+#define VMWRITE(off) .byte 0x0f,0x79,0x47,((off)-UREGS_rip)
+
+/* VMCS field encodings */
+#define GUEST_RSP    0x681c
+#define GUEST_RIP    0x681e
+#define GUEST_RFLAGS 0x6820
+
+#define get_current(reg)                        \
+        mov $STACK_SIZE-BYTES_PER_LONG, r(reg); \
+        or  r(sp), r(reg);                      \
+        and $~(BYTES_PER_LONG-1),r(reg);        \
+        mov (r(reg)),r(reg);
+
+#if defined(__x86_64__)
+#define r(reg) %r##reg
+#define addr_of(lbl) lbl(%rip)
+#define call_with_regs(fn)                      \
+        mov  %rsp,%rdi;                         \
+        call fn;
+#else /* defined(__i386__) */
+#define r(reg) %e##reg
+#define addr_of(lbl) lbl
+#define UREGS_rip UREGS_eip
+#define UREGS_rsp UREGS_esp
+#define call_with_regs(fn)                      \
+        mov  %esp,%eax;                         \
+        push %eax;                              \
+        call fn;                                \
+        add  $4,%esp;
+#endif
+
+        ALIGN
+.globl vmx_asm_vmexit_handler
+vmx_asm_vmexit_handler:
+#if defined(__x86_64__)
+        push %rdi
+        push %rsi
+        push %rdx
+        push %rcx
+        push %rax
+        push %r8
+        push %r9
+        push %r10
+        push %r11
+        push %rbx
+        push %rbp
+        push %r12
+        push %r13
+        push %r14
+        push %r15
+#else /* defined(__i386__) */
+        push %eax
+        push %ebp
+        push %edi
+        push %esi
+        push %edx
+        push %ecx
+        push %ebx
+#endif
+
+        get_current(bx)
+
+        movb $1,VCPU_vmx_launched(r(bx))
+
+        lea  UREGS_rip(r(sp)),r(di)
+        mov  $GUEST_RIP,%eax
+        /*VMREAD(UREGS_rip)*/
+        .byte 0x0f,0x78,0x07  /* vmread r(ax),(r(di)) */
+        mov  $GUEST_RSP,%eax
+        VMREAD(UREGS_rsp)
+        mov  $GUEST_RFLAGS,%eax
+        VMREAD(UREGS_eflags)
+
+        mov  %cr2,r(ax)
+        mov  r(ax),VCPU_hvm_guest_cr2(r(bx))
+
+#ifndef NDEBUG
+        mov  $0xbeef,%ax
+        mov  %ax,UREGS_error_code(r(sp))
+        mov  %ax,UREGS_entry_vector(r(sp))
+        mov  %ax,UREGS_saved_upcall_mask(r(sp))
+        mov  %ax,UREGS_cs(r(sp))
+        mov  %ax,UREGS_ds(r(sp))
+        mov  %ax,UREGS_es(r(sp))
+        mov  %ax,UREGS_fs(r(sp))
+        mov  %ax,UREGS_gs(r(sp))
+        mov  %ax,UREGS_ss(r(sp))
+#endif
+
+        call_with_regs(vmx_vmexit_handler)
+
+.globl vmx_asm_do_vmentry
+vmx_asm_do_vmentry:
+        get_current(bx)
+        cli
+
+        mov  VCPU_processor(r(bx)),%eax
+        shl  $IRQSTAT_shift,r(ax)
+        lea  addr_of(irq_stat),r(dx)
+        cmpl $0,(r(dx),r(ax),1)
+        jnz  .Lvmx_process_softirqs
+
+        call vmx_intr_assist
+
+        testb $0xff,VCPU_vmx_emul(r(bx))
+        jnz  .Lvmx_goto_realmode
+
+        mov  VCPU_hvm_guest_cr2(r(bx)),r(ax)
+        mov  r(ax),%cr2
+        call vmx_trace_vmentry
+
+        lea  UREGS_rip(r(sp)),r(di)
+        mov  $GUEST_RIP,%eax
+        /*VMWRITE(UREGS_rip)*/
+        .byte 0x0f,0x79,0x07  /* vmwrite (r(di)),r(ax) */
+        mov  $GUEST_RSP,%eax
+        VMWRITE(UREGS_rsp)
+        mov  $GUEST_RFLAGS,%eax
+        VMWRITE(UREGS_eflags)
+
+        cmpb $0,VCPU_vmx_launched(r(bx))
+#if defined(__x86_64__)
+        pop  %r15
+        pop  %r14
+        pop  %r13
+        pop  %r12
+        pop  %rbp
+        pop  %rbx
+        pop  %r11
+        pop  %r10
+        pop  %r9
+        pop  %r8
+        pop  %rax
+        pop  %rcx
+        pop  %rdx
+        pop  %rsi
+        pop  %rdi
+#else /* defined(__i386__) */
+        pop  %ebx
+        pop  %ecx
+        pop  %edx
+        pop  %esi
+        pop  %edi
+        pop  %ebp
+        pop  %eax
+#endif
+        je   .Lvmx_launch
+
+/*.Lvmx_resume:*/
+        VMRESUME
+        call vm_resume_fail
+        ud2
+
+.Lvmx_launch:
+        VMLAUNCH
+        call vm_launch_fail
+        ud2
+
+.Lvmx_goto_realmode:
+        sti
+        call_with_regs(vmx_realmode)
+        jmp  vmx_asm_do_vmentry
+
+.Lvmx_process_softirqs:
+        sti
+        call do_softirq
+        jmp  vmx_asm_do_vmentry
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/vmx/intr.c
--- a/xen/arch/x86/hvm/vmx/intr.c       Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/hvm/vmx/intr.c       Thu Apr 24 14:08:29 2008 -0600
@@ -111,15 +111,17 @@ static void vmx_dirq_assist(struct vcpu 
     struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
     struct dev_intx_gsi_link *digl;
 
-    if ( !vtd_enabled || (v->vcpu_id != 0) || (hvm_irq_dpci == NULL) )
+    if ( !iommu_enabled || (v->vcpu_id != 0) || (hvm_irq_dpci == NULL) )
         return;
 
     for ( irq = find_first_bit(hvm_irq_dpci->dirq_mask, NR_IRQS);
           irq < NR_IRQS;
           irq = find_next_bit(hvm_irq_dpci->dirq_mask, NR_IRQS, irq + 1) )
     {
+        if ( !test_and_clear_bit(irq, &hvm_irq_dpci->dirq_mask) )
+            continue;
+
         stop_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(irq)]);
-        clear_bit(irq, &hvm_irq_dpci->dirq_mask);
 
         list_for_each_entry ( digl, &hvm_irq_dpci->mirq[irq].digl_list, list )
         {
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Thu Apr 24 14:08:29 2008 -0600
@@ -1622,17 +1622,11 @@ static int vmx_msr_read_intercept(struct
     u64 msr_content = 0;
     u32 ecx = regs->ecx, eax, edx;
     struct vcpu *v = current;
-    int index;
-    u64 *var_range_base = (u64*)v->arch.hvm_vcpu.mtrr.var_ranges;
-    u64 *fixed_range_base =  (u64*)v->arch.hvm_vcpu.mtrr.fixed_ranges;
 
     HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x", ecx);
 
     switch ( ecx )
     {
-    case MSR_IA32_TSC:
-        msr_content = hvm_get_guest_time(v);
-        break;
     case MSR_IA32_SYSENTER_CS:
         msr_content = (u32)__vmread(GUEST_SYSENTER_CS);
         break;
@@ -1641,35 +1635,6 @@ static int vmx_msr_read_intercept(struct
         break;
     case MSR_IA32_SYSENTER_EIP:
         msr_content = __vmread(GUEST_SYSENTER_EIP);
-        break;
-    case MSR_IA32_APICBASE:
-        msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
-        break;
-    case MSR_IA32_CR_PAT:
-        msr_content = v->arch.hvm_vcpu.pat_cr;
-        break;
-    case MSR_MTRRcap:
-        msr_content = v->arch.hvm_vcpu.mtrr.mtrr_cap;
-        break;
-    case MSR_MTRRdefType:
-        msr_content = v->arch.hvm_vcpu.mtrr.def_type
-                        | (v->arch.hvm_vcpu.mtrr.enabled << 10);
-        break;
-    case MSR_MTRRfix64K_00000:
-        msr_content = fixed_range_base[0];
-        break;
-    case MSR_MTRRfix16K_80000:
-    case MSR_MTRRfix16K_A0000:
-        index = regs->ecx - MSR_MTRRfix16K_80000;
-        msr_content = fixed_range_base[index + 1];
-        break;
-    case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000:
-        index = regs->ecx - MSR_MTRRfix4K_C0000;
-        msr_content = fixed_range_base[index + 3];
-        break;
-    case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7:
-        index = regs->ecx - MSR_IA32_MTRR_PHYSBASE0;
-        msr_content = var_range_base[index];
         break;
     case MSR_IA32_DEBUGCTLMSR:
         msr_content = __vmread(GUEST_IA32_DEBUGCTL);
@@ -1679,17 +1644,6 @@ static int vmx_msr_read_intercept(struct
         break;
     case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2:
         goto gp_fault;
-    case MSR_IA32_MCG_CAP:
-    case MSR_IA32_MCG_STATUS:
-    case MSR_IA32_MC0_STATUS:
-    case MSR_IA32_MC1_STATUS:
-    case MSR_IA32_MC2_STATUS:
-    case MSR_IA32_MC3_STATUS:
-    case MSR_IA32_MC4_STATUS:
-    case MSR_IA32_MC5_STATUS:
-        /* No point in letting the guest see real MCEs */
-        msr_content = 0;
-        break;
     case MSR_IA32_MISC_ENABLE:
         rdmsrl(MSR_IA32_MISC_ENABLE, msr_content);
         /* Debug Trace Store is not supported. */
@@ -1729,8 +1683,8 @@ static int vmx_msr_read_intercept(struct
         goto gp_fault;
     }
 
-    regs->eax = msr_content & 0xFFFFFFFF;
-    regs->edx = msr_content >> 32;
+    regs->eax = (uint32_t)msr_content;
+    regs->edx = (uint32_t)(msr_content >> 32);
 
 done:
     hvmtrace_msr_read(v, ecx, msr_content);
@@ -1833,19 +1787,11 @@ void vmx_vlapic_msr_changed(struct vcpu 
     vmx_vmcs_exit(v);
 }
 
-extern bool_t mtrr_var_range_msr_set(struct mtrr_state *v,
-        u32 msr, u64 msr_content);
-extern bool_t mtrr_fix_range_msr_set(struct mtrr_state *v,
-        int row, u64 msr_content);
-extern bool_t mtrr_def_type_msr_set(struct mtrr_state *v, u64 msr_content);
-extern bool_t pat_msr_set(u64 *pat, u64 msr);
-
 static int vmx_msr_write_intercept(struct cpu_user_regs *regs)
 {
     u32 ecx = regs->ecx;
     u64 msr_content;
     struct vcpu *v = current;
-    int index;
 
     HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x, eax=%x, edx=%x",
                 ecx, (u32)regs->eax, (u32)regs->edx);
@@ -1856,10 +1802,6 @@ static int vmx_msr_write_intercept(struc
 
     switch ( ecx )
     {
-    case MSR_IA32_TSC:
-        hvm_set_guest_time(v, msr_content);
-        pt_reset(v);
-        break;
     case MSR_IA32_SYSENTER_CS:
         __vmwrite(GUEST_SYSENTER_CS, msr_content);
         break;
@@ -1869,41 +1811,6 @@ static int vmx_msr_write_intercept(struc
     case MSR_IA32_SYSENTER_EIP:
         __vmwrite(GUEST_SYSENTER_EIP, msr_content);
         break;
-    case MSR_IA32_APICBASE:
-        vlapic_msr_set(vcpu_vlapic(v), msr_content);
-        break;
-    case MSR_IA32_CR_PAT:
-        if ( !pat_msr_set(&v->arch.hvm_vcpu.pat_cr, msr_content) )
-           goto gp_fault;
-        break;
-    case MSR_MTRRdefType:
-        if ( !mtrr_def_type_msr_set(&v->arch.hvm_vcpu.mtrr, msr_content) )
-           goto gp_fault;
-        break;
-    case MSR_MTRRfix64K_00000:
-        if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr, 0, msr_content) )
-            goto gp_fault;
-        break;
-    case MSR_MTRRfix16K_80000:
-    case MSR_MTRRfix16K_A0000:
-        index = regs->ecx - MSR_MTRRfix16K_80000 + 1;
-        if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr,
-                                     index, msr_content) )
-            goto gp_fault;
-        break;
-    case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000:
-        index = regs->ecx - MSR_MTRRfix4K_C0000 + 3;
-        if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr,
-                                     index, msr_content) )
-            goto gp_fault;
-        break;
-    case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7:
-        if ( !mtrr_var_range_msr_set(&v->arch.hvm_vcpu.mtrr,
-                                     regs->ecx, msr_content) )
-            goto gp_fault;
-        break;
-    case MSR_MTRRcap:
-        goto gp_fault;
     case MSR_IA32_DEBUGCTLMSR: {
         int i, rc = 0;
 
@@ -2330,12 +2237,12 @@ asmlinkage void vmx_vmexit_handler(struc
         break;
     case EXIT_REASON_MSR_READ:
         inst_len = __get_instruction_length(); /* Safe: RDMSR */
-        if ( vmx_msr_read_intercept(regs) == X86EMUL_OKAY )
+        if ( hvm_msr_read_intercept(regs) == X86EMUL_OKAY )
             __update_guest_eip(inst_len);
         break;
     case EXIT_REASON_MSR_WRITE:
         inst_len = __get_instruction_length(); /* Safe: WRMSR */
-        if ( vmx_msr_write_intercept(regs) == X86EMUL_OKAY )
+        if ( hvm_msr_write_intercept(regs) == X86EMUL_OKAY )
             __update_guest_eip(inst_len);
         break;
 
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/vmx/x86_32/Makefile
--- a/xen/arch/x86/hvm/vmx/x86_32/Makefile      Thu Apr 24 14:02:16 2008 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-obj-y += exits.o
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/vmx/x86_32/exits.S
--- a/xen/arch/x86/hvm/vmx/x86_32/exits.S       Thu Apr 24 14:02:16 2008 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,148 +0,0 @@
-/*
- * exits.S: VMX architecture-specific exit handling.
- * Copyright (c) 2004, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-#include <xen/config.h>
-#include <xen/errno.h>
-#include <xen/softirq.h>
-#include <asm/asm_defns.h>
-#include <asm/apicdef.h>
-#include <asm/page.h>
-#include <public/xen.h>
-
-#define VMRESUME     .byte 0x0f,0x01,0xc3
-#define VMLAUNCH     .byte 0x0f,0x01,0xc2
-#define VMREAD(off)  .byte 0x0f,0x78,0x44,0x24,off
-#define VMWRITE(off) .byte 0x0f,0x79,0x44,0x24,off
-
-/* VMCS field encodings */
-#define GUEST_RSP    0x681c
-#define GUEST_RIP    0x681e
-#define GUEST_RFLAGS 0x6820
-
-#define GET_CURRENT(reg)         \
-        movl $STACK_SIZE-4, reg; \
-        orl  %esp, reg;          \
-        andl $~3,reg;            \
-        movl (reg),reg;
-
-#define HVM_SAVE_ALL_NOSEGREGS                                              \
-        pushl %eax;                                                         \
-        pushl %ebp;                                                         \
-        pushl %edi;                                                         \
-        pushl %esi;                                                         \
-        pushl %edx;                                                         \
-        pushl %ecx;                                                         \
-        pushl %ebx;
-
-#define HVM_RESTORE_ALL_NOSEGREGS               \
-        popl %ebx;                              \
-        popl %ecx;                              \
-        popl %edx;                              \
-        popl %esi;                              \
-        popl %edi;                              \
-        popl %ebp;                              \
-        popl %eax
-
-        ALIGN
-ENTRY(vmx_asm_vmexit_handler)
-        HVM_SAVE_ALL_NOSEGREGS
-        GET_CURRENT(%ebx)
-
-        movl $GUEST_RIP,%eax
-        VMREAD(UREGS_eip)
-        movl $GUEST_RSP,%eax
-        VMREAD(UREGS_esp)
-        movl $GUEST_RFLAGS,%eax
-        VMREAD(UREGS_eflags)
-
-        movl %cr2,%eax
-        movl %eax,VCPU_hvm_guest_cr2(%ebx)
-
-#ifndef NDEBUG
-        movw $0xbeef,%ax
-        movw %ax,UREGS_error_code(%esp)
-        movw %ax,UREGS_entry_vector(%esp)
-        movw %ax,UREGS_saved_upcall_mask(%esp)
-        movw %ax,UREGS_cs(%esp)
-        movw %ax,UREGS_ds(%esp)
-        movw %ax,UREGS_es(%esp)
-        movw %ax,UREGS_fs(%esp)
-        movw %ax,UREGS_gs(%esp)
-        movw %ax,UREGS_ss(%esp)
-#endif
-
-        movl %esp,%eax
-        push %eax
-        call vmx_vmexit_handler
-        addl $4,%esp
-        jmp vmx_asm_do_vmentry
-
-        ALIGN
-vmx_process_softirqs:
-        sti
-        call do_softirq
-        jmp vmx_asm_do_vmentry
-
-        ALIGN
-ENTRY(vmx_asm_do_vmentry)
-        GET_CURRENT(%ebx)
-        cli                             # tests must not race interrupts
-
-        movl VCPU_processor(%ebx),%eax
-        shl  $IRQSTAT_shift,%eax
-        cmpl $0,irq_stat(%eax,1)
-        jnz  vmx_process_softirqs
-
-        call vmx_intr_assist
-
-        testb $0xff,VCPU_vmx_emul(%ebx)
-        jnz  vmx_goto_realmode
-
-        movl VCPU_hvm_guest_cr2(%ebx),%eax
-        movl %eax,%cr2
-        call vmx_trace_vmentry
-
-        movl $GUEST_RIP,%eax
-        VMWRITE(UREGS_eip)
-        movl $GUEST_RSP,%eax
-        VMWRITE(UREGS_esp)
-        movl $GUEST_RFLAGS,%eax
-        VMWRITE(UREGS_eflags)
-
-        cmpb $0,VCPU_vmx_launched(%ebx)
-        je   vmx_launch
-
-/*vmx_resume:*/
-        HVM_RESTORE_ALL_NOSEGREGS
-        VMRESUME
-        call vm_resume_fail
-        ud2
-
-vmx_launch:
-        movb $1,VCPU_vmx_launched(%ebx)
-        HVM_RESTORE_ALL_NOSEGREGS
-        VMLAUNCH
-        call vm_launch_fail
-        ud2
-
-vmx_goto_realmode:
-        sti
-        movl %esp,%eax
-        push %eax
-        call vmx_realmode
-        addl $4,%esp
-        jmp vmx_asm_do_vmentry
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/vmx/x86_64/Makefile
--- a/xen/arch/x86/hvm/vmx/x86_64/Makefile      Thu Apr 24 14:02:16 2008 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-obj-y += exits.o
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/vmx/x86_64/exits.S
--- a/xen/arch/x86/hvm/vmx/x86_64/exits.S       Thu Apr 24 14:02:16 2008 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,165 +0,0 @@
-/*
- * exits.S: VMX architecture-specific exit handling.
- * Copyright (c) 2004, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-#include <xen/config.h>
-#include <xen/errno.h>
-#include <xen/softirq.h>
-#include <asm/asm_defns.h>
-#include <asm/apicdef.h>
-#include <asm/page.h>
-#include <public/xen.h>
-
-#define VMRESUME     .byte 0x0f,0x01,0xc3
-#define VMLAUNCH     .byte 0x0f,0x01,0xc2
-#define VMREAD(off)  .byte 0x0f,0x78,0x47,((off)-UREGS_rip)
-#define VMWRITE(off) .byte 0x0f,0x79,0x47,((off)-UREGS_rip)
-
-/* VMCS field encodings */
-#define GUEST_RSP    0x681c
-#define GUEST_RIP    0x681e
-#define GUEST_RFLAGS 0x6820
-
-#define GET_CURRENT(reg)         \
-        movq $STACK_SIZE-8, reg; \
-        orq  %rsp, reg;          \
-        andq $~7,reg;            \
-        movq (reg),reg;
-
-#define HVM_SAVE_ALL_NOSEGREGS                  \
-        pushq %rdi;                             \
-        pushq %rsi;                             \
-        pushq %rdx;                             \
-        pushq %rcx;                             \
-        pushq %rax;                             \
-        pushq %r8;                              \
-        pushq %r9;                              \
-        pushq %r10;                             \
-        pushq %r11;                             \
-        pushq %rbx;                             \
-        pushq %rbp;                             \
-        pushq %r12;                             \
-        pushq %r13;                             \
-        pushq %r14;                             \
-        pushq %r15;
-
-#define HVM_RESTORE_ALL_NOSEGREGS               \
-        popq %r15;                              \
-        popq %r14;                              \
-        popq %r13;                              \
-        popq %r12;                              \
-        popq %rbp;                              \
-        popq %rbx;                              \
-        popq %r11;                              \
-        popq %r10;                              \
-        popq %r9;                               \
-        popq %r8;                               \
-        popq %rax;                              \
-        popq %rcx;                              \
-        popq %rdx;                              \
-        popq %rsi;                              \
-        popq %rdi
-
-        ALIGN
-ENTRY(vmx_asm_vmexit_handler)
-        HVM_SAVE_ALL_NOSEGREGS
-        GET_CURRENT(%rbx)
-
-        leaq UREGS_rip(%rsp),%rdi
-        movl $GUEST_RIP,%eax
-        /*VMREAD(UREGS_rip)*/
-        .byte 0x0f,0x78,0x07  /* vmread %rax,(%rdi) */
-        movl $GUEST_RSP,%eax
-        VMREAD(UREGS_rsp)
-        movl $GUEST_RFLAGS,%eax
-        VMREAD(UREGS_eflags)
-
-        movq %cr2,%rax
-        movq %rax,VCPU_hvm_guest_cr2(%rbx)
-
-#ifndef NDEBUG
-        movw $0xbeef,%ax
-        movw %ax,UREGS_error_code(%rsp)
-        movw %ax,UREGS_entry_vector(%rsp)
-        movw %ax,UREGS_saved_upcall_mask(%rsp)
-        movw %ax,UREGS_cs(%rsp)
-        movw %ax,UREGS_ds(%rsp)
-        movw %ax,UREGS_es(%rsp)
-        movw %ax,UREGS_fs(%rsp)
-        movw %ax,UREGS_gs(%rsp)
-        movw %ax,UREGS_ss(%rsp)
-#endif
-
-        movq %rsp,%rdi
-        call vmx_vmexit_handler
-        jmp vmx_asm_do_vmentry
-
-        ALIGN
-vmx_process_softirqs:
-        sti
-        call do_softirq
-        jmp vmx_asm_do_vmentry
-
-        ALIGN
-ENTRY(vmx_asm_do_vmentry)
-        GET_CURRENT(%rbx)
-        cli                             # tests must not race interrupts
-
-        movl  VCPU_processor(%rbx),%eax
-        shl   $IRQSTAT_shift,%rax
-        leaq  irq_stat(%rip),%rdx
-        cmpl  $0,(%rdx,%rax,1)
-        jnz   vmx_process_softirqs
-
-        call vmx_intr_assist
-
-        testb $0xff,VCPU_vmx_emul(%rbx)
-        jnz  vmx_goto_realmode
-
-        movq VCPU_hvm_guest_cr2(%rbx),%rax
-        movq %rax,%cr2
-        call vmx_trace_vmentry
-
-        leaq UREGS_rip(%rsp),%rdi
-        movl $GUEST_RIP,%eax
-        /*VMWRITE(UREGS_rip)*/
-        .byte 0x0f,0x79,0x07  /* vmwrite (%rdi),%rax */
-        movl $GUEST_RSP,%eax
-        VMWRITE(UREGS_rsp)
-        movl $GUEST_RFLAGS,%eax
-        VMWRITE(UREGS_eflags)
-
-        cmpb $0,VCPU_vmx_launched(%rbx)
-        je   vmx_launch
-
-/*vmx_resume:*/
-        HVM_RESTORE_ALL_NOSEGREGS
-        VMRESUME
-        call vm_resume_fail
-        ud2
-
-vmx_launch:
-        movb $1,VCPU_vmx_launched(%rbx)
-        HVM_RESTORE_ALL_NOSEGREGS
-        VMLAUNCH
-        call vm_launch_fail
-        ud2
-
-vmx_goto_realmode:
-        sti
-        movq %rsp,%rdi
-        call vmx_realmode
-        jmp vmx_asm_do_vmentry
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/mm.c Thu Apr 24 14:08:29 2008 -0600
@@ -3279,15 +3279,6 @@ long arch_memory_op(int op, XEN_GUEST_HA
         case XENMAPSPACE_shared_info:
             if ( xatp.idx == 0 )
                 mfn = virt_to_mfn(d->shared_info);
-            /* XXX: assumption here, this is called after E820 table is build
-             * need the E820 to initialize MTRR.
-             */
-            if ( is_hvm_domain(d) ) {
-                extern void init_mtrr_in_hyper(struct vcpu *);
-                struct vcpu *vs;
-                for_each_vcpu(d, vs)
-                    init_mtrr_in_hyper(vs);
-            }
             break;
         case XENMAPSPACE_grant_table:
             spin_lock(&d->grant_table->lock);
@@ -3625,29 +3616,18 @@ static int ptwr_emulated_cmpxchg(
 static int ptwr_emulated_cmpxchg(
     enum x86_segment seg,
     unsigned long offset,
-    unsigned long old,
-    unsigned long new,
+    void *p_old,
+    void *p_new,
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
+    paddr_t old = 0, new = 0;
+    if ( bytes > sizeof(paddr_t) )
+        return X86EMUL_UNHANDLEABLE;
+    memcpy(&old, p_old, bytes);
+    memcpy(&new, p_new, bytes);
     return ptwr_emulated_update(
         offset, old, new, bytes, 1,
-        container_of(ctxt, struct ptwr_emulate_ctxt, ctxt));
-}
-
-static int ptwr_emulated_cmpxchg8b(
-    enum x86_segment seg,
-    unsigned long offset,
-    unsigned long old,
-    unsigned long old_hi,
-    unsigned long new,
-    unsigned long new_hi,
-    struct x86_emulate_ctxt *ctxt)
-{
-    if ( CONFIG_PAGING_LEVELS == 2 )
-        return X86EMUL_UNHANDLEABLE;
-    return ptwr_emulated_update(
-        offset, ((u64)old_hi << 32) | old, ((u64)new_hi << 32) | new, 8, 1,
         container_of(ctxt, struct ptwr_emulate_ctxt, ctxt));
 }
 
@@ -3656,7 +3636,6 @@ static struct x86_emulate_ops ptwr_emula
     .insn_fetch = ptwr_emulated_read,
     .write      = ptwr_emulated_write,
     .cmpxchg    = ptwr_emulated_cmpxchg,
-    .cmpxchg8b  = ptwr_emulated_cmpxchg8b
 };
 
 /* Write page fault handler: check if guest is trying to modify a PTE. */
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c   Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/mm/shadow/common.c   Thu Apr 24 14:08:29 2008 -0600
@@ -239,15 +239,15 @@ static int
 static int 
 hvm_emulate_cmpxchg(enum x86_segment seg,
                     unsigned long offset,
-                    unsigned long old,
-                    unsigned long new,
+                    void *p_old,
+                    void *p_new,
                     unsigned int bytes,
                     struct x86_emulate_ctxt *ctxt)
 {
     struct sh_emulate_ctxt *sh_ctxt =
         container_of(ctxt, struct sh_emulate_ctxt, ctxt);
     struct vcpu *v = current;
-    unsigned long addr;
+    unsigned long addr, old[2], new[2];
     int rc;
 
     if ( !is_x86_user_segment(seg) )
@@ -258,35 +258,21 @@ hvm_emulate_cmpxchg(enum x86_segment seg
     if ( rc )
         return rc;
 
-    return v->arch.paging.mode->shadow.x86_emulate_cmpxchg(
-        v, addr, old, new, bytes, sh_ctxt);
-}
-
-static int 
-hvm_emulate_cmpxchg8b(enum x86_segment seg,
-                      unsigned long offset,
-                      unsigned long old_lo,
-                      unsigned long old_hi,
-                      unsigned long new_lo,
-                      unsigned long new_hi,
-                      struct x86_emulate_ctxt *ctxt)
-{
-    struct sh_emulate_ctxt *sh_ctxt =
-        container_of(ctxt, struct sh_emulate_ctxt, ctxt);
-    struct vcpu *v = current;
-    unsigned long addr;
-    int rc;
-
-    if ( !is_x86_user_segment(seg) )
-        return X86EMUL_UNHANDLEABLE;
-
-    rc = hvm_translate_linear_addr(
-        seg, offset, 8, hvm_access_write, sh_ctxt, &addr);
-    if ( rc )
-        return rc;
-
-    return v->arch.paging.mode->shadow.x86_emulate_cmpxchg8b(
-        v, addr, old_lo, old_hi, new_lo, new_hi, sh_ctxt);
+    old[0] = new[0] = 0;
+    memcpy(old, p_old, bytes);
+    memcpy(new, p_new, bytes);
+
+    if ( bytes <= sizeof(long) )
+        return v->arch.paging.mode->shadow.x86_emulate_cmpxchg(
+            v, addr, old[0], new[0], bytes, sh_ctxt);
+
+#ifdef __i386__
+    if ( bytes == 8 )
+        return v->arch.paging.mode->shadow.x86_emulate_cmpxchg8b(
+            v, addr, old[0], old[1], new[0], new[1], sh_ctxt);
+#endif
+
+    return X86EMUL_UNHANDLEABLE;
 }
 
 static struct x86_emulate_ops hvm_shadow_emulator_ops = {
@@ -294,7 +280,6 @@ static struct x86_emulate_ops hvm_shadow
     .insn_fetch = hvm_emulate_insn_fetch,
     .write      = hvm_emulate_write,
     .cmpxchg    = hvm_emulate_cmpxchg,
-    .cmpxchg8b  = hvm_emulate_cmpxchg8b,
 };
 
 static int
@@ -338,36 +323,34 @@ static int
 static int 
 pv_emulate_cmpxchg(enum x86_segment seg,
                    unsigned long offset,
-                   unsigned long old,
-                   unsigned long new,
+                   void *p_old,
+                   void *p_new,
                    unsigned int bytes,
                    struct x86_emulate_ctxt *ctxt)
 {
     struct sh_emulate_ctxt *sh_ctxt =
         container_of(ctxt, struct sh_emulate_ctxt, ctxt);
+    unsigned long old[2], new[2];
     struct vcpu *v = current;
+
     if ( !is_x86_user_segment(seg) )
         return X86EMUL_UNHANDLEABLE;
-    return v->arch.paging.mode->shadow.x86_emulate_cmpxchg(
-        v, offset, old, new, bytes, sh_ctxt);
-}
-
-static int 
-pv_emulate_cmpxchg8b(enum x86_segment seg,
-                     unsigned long offset,
-                     unsigned long old_lo,
-                     unsigned long old_hi,
-                     unsigned long new_lo,
-                     unsigned long new_hi,
-                     struct x86_emulate_ctxt *ctxt)
-{
-    struct sh_emulate_ctxt *sh_ctxt =
-        container_of(ctxt, struct sh_emulate_ctxt, ctxt);
-    struct vcpu *v = current;
-    if ( !is_x86_user_segment(seg) )
-        return X86EMUL_UNHANDLEABLE;
-    return v->arch.paging.mode->shadow.x86_emulate_cmpxchg8b(
-        v, offset, old_lo, old_hi, new_lo, new_hi, sh_ctxt);
+
+    old[0] = new[0] = 0;
+    memcpy(old, p_old, bytes);
+    memcpy(new, p_new, bytes);
+
+    if ( bytes <= sizeof(long) )
+        return v->arch.paging.mode->shadow.x86_emulate_cmpxchg(
+            v, offset, old[0], new[0], bytes, sh_ctxt);
+
+#ifdef __i386__
+    if ( bytes == 8 )
+        return v->arch.paging.mode->shadow.x86_emulate_cmpxchg8b(
+            v, offset, old[0], old[1], new[0], new[1], sh_ctxt);
+#endif
+
+    return X86EMUL_UNHANDLEABLE;
 }
 
 static struct x86_emulate_ops pv_shadow_emulator_ops = {
@@ -375,7 +358,6 @@ static struct x86_emulate_ops pv_shadow_
     .insn_fetch = pv_emulate_read,
     .write      = pv_emulate_write,
     .cmpxchg    = pv_emulate_cmpxchg,
-    .cmpxchg8b  = pv_emulate_cmpxchg8b,
 };
 
 struct x86_emulate_ops *shadow_init_emulation(
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/mm/shadow/multi.c    Thu Apr 24 14:08:29 2008 -0600
@@ -2089,7 +2089,7 @@ static shadow_l1e_t * shadow_get_and_cre
         else 
         {
             /* Shadowing an actual guest l1 table */
-            if ( !mfn_valid(gw->l2mfn) ) return NULL; /* No guest page. */
+            if ( !mfn_valid(gw->l1mfn) ) return NULL; /* No guest page. */
             *sl1mfn = get_shadow_status(v, gw->l1mfn, SH_type_l1_shadow);
             if ( !mfn_valid(*sl1mfn) ) 
             {
@@ -4365,7 +4365,7 @@ static void emulate_unmap_dest(struct vc
     atomic_inc(&v->domain->arch.paging.shadow.gtable_dirty_version);
 }
 
-int
+static int
 sh_x86_emulate_write(struct vcpu *v, unsigned long vaddr, void *src,
                       u32 bytes, struct sh_emulate_ctxt *sh_ctxt)
 {
@@ -4389,7 +4389,7 @@ sh_x86_emulate_write(struct vcpu *v, uns
     return X86EMUL_OKAY;
 }
 
-int
+static int
 sh_x86_emulate_cmpxchg(struct vcpu *v, unsigned long vaddr, 
                         unsigned long old, unsigned long new,
                         unsigned int bytes, struct sh_emulate_ctxt *sh_ctxt)
@@ -4432,7 +4432,8 @@ sh_x86_emulate_cmpxchg(struct vcpu *v, u
     return rv;
 }
 
-int
+#ifdef __i386__
+static int
 sh_x86_emulate_cmpxchg8b(struct vcpu *v, unsigned long vaddr, 
                           unsigned long old_lo, unsigned long old_hi,
                           unsigned long new_lo, unsigned long new_hi,
@@ -4465,7 +4466,7 @@ sh_x86_emulate_cmpxchg8b(struct vcpu *v,
     shadow_unlock(v->domain);
     return rv;
 }
-
+#endif
 
 /**************************************************************************/
 /* Audit tools */
@@ -4738,7 +4739,9 @@ struct paging_mode sh_paging_mode = {
     .shadow.detach_old_tables      = sh_detach_old_tables,
     .shadow.x86_emulate_write      = sh_x86_emulate_write,
     .shadow.x86_emulate_cmpxchg    = sh_x86_emulate_cmpxchg,
+#ifdef __i386__
     .shadow.x86_emulate_cmpxchg8b  = sh_x86_emulate_cmpxchg8b,
+#endif
     .shadow.make_monitor_table     = sh_make_monitor_table,
     .shadow.destroy_monitor_table  = sh_destroy_monitor_table,
 #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/setup.c      Thu Apr 24 14:08:29 2008 -0600
@@ -1019,10 +1019,6 @@ void __init __start_xen(unsigned long mb
         _initrd_len   = mod[initrdidx].mod_end - mod[initrdidx].mod_start;
     }
 
-    iommu_setup();
-
-    amd_iommu_detect();
-
     /*
      * We're going to setup domain0 using the module(s) that we stashed safely
      * above our heap. The second module, if present, is an initrd ramdisk.
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/smp.c
--- a/xen/arch/x86/smp.c        Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/smp.c        Thu Apr 24 14:08:29 2008 -0600
@@ -75,20 +75,10 @@ static inline int __prepare_ICR2 (unsign
     return SET_APIC_DEST_FIELD(mask);
 }
 
-static inline void check_IPI_mask(cpumask_t cpumask)
-{
-    /*
-     * Sanity, and necessary. An IPI with no target generates a send accept
-     * error with Pentium and P6 APICs.
-     */
-    ASSERT(cpus_subset(cpumask, cpu_online_map));
-    ASSERT(!cpus_empty(cpumask));
-}
-
 void apic_wait_icr_idle(void)
 {
-       while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY )
-               cpu_relax();
+    while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY )
+        cpu_relax();
 }
 
 void send_IPI_mask_flat(cpumask_t cpumask, int vector)
@@ -97,7 +87,8 @@ void send_IPI_mask_flat(cpumask_t cpumas
     unsigned long cfg;
     unsigned long flags;
 
-    check_IPI_mask(cpumask);
+    /* An IPI with no target generates a send accept error from P5/P6 APICs. */
+    WARN_ON(mask == 0);
 
     local_irq_save(flags);
 
@@ -130,17 +121,9 @@ void send_IPI_mask_phys(cpumask_t mask, 
     unsigned long cfg, flags;
     unsigned int query_cpu;
 
-    check_IPI_mask(mask);
-
-    /*
-     * Hack. The clustered APIC addressing mode doesn't allow us to send 
-     * to an arbitrary mask, so I do a unicasts to each CPU instead. This 
-     * should be modified to do 1 message per cluster ID - mbligh
-     */ 
-
     local_irq_save(flags);
 
-    for_each_cpu_mask( query_cpu, mask )
+    for_each_cpu_mask ( query_cpu, mask )
     {
         /*
          * Wait for idle.
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/traps.c      Thu Apr 24 14:08:29 2008 -0600
@@ -479,6 +479,7 @@ static inline void do_trap(
 static inline void do_trap(
     int trapnr, struct cpu_user_regs *regs, int use_error_code)
 {
+    struct vcpu *curr = current;
     unsigned long fixup;
 
     DEBUGGER_trap_entry(trapnr, regs);
@@ -494,6 +495,14 @@ static inline void do_trap(
         dprintk(XENLOG_ERR, "Trap %d: %p -> %p\n",
                 trapnr, _p(regs->eip), _p(fixup));
         regs->eip = fixup;
+        return;
+    }
+
+    if ( ((trapnr == TRAP_copro_error) || (trapnr == TRAP_simd_error)) &&
+         is_hvm_vcpu(curr) && curr->arch.hvm_vcpu.fpu_exception_callback )
+    {
+        curr->arch.hvm_vcpu.fpu_exception_callback(
+            curr->arch.hvm_vcpu.fpu_exception_callback_arg, regs);
         return;
     }
 
@@ -1399,6 +1408,13 @@ static int admin_io_okay(
     unsigned int port, unsigned int bytes,
     struct vcpu *v, struct cpu_user_regs *regs)
 {
+    /*
+     * Port 0xcf8 (CONFIG_ADDRESS) is only visible for DWORD accesses.
+     * We never permit direct access to that register.
+     */
+    if ( (port == 0xcf8) && (bytes == 4) )
+        return 0;
+
     return ioports_access_permitted(v->domain, port, port + bytes - 1);
 }
 
@@ -1431,10 +1447,10 @@ static uint32_t guest_io_read(
         {
             sub_data = pv_pit_handler(port, 0, 0);
         }
-        else if ( (port & 0xfffc) == 0xcf8 )
-        {
-            size = min(bytes, 4 - (port & 3));
-            sub_data = v->domain->arch.pci_cf8 >> ((port & 3) * 8);
+        else if ( (port == 0xcf8) && (bytes == 4) )
+        {
+            size = 4;
+            sub_data = v->domain->arch.pci_cf8;
         }
         else if ( ((port & 0xfffc) == 0xcfc) && IS_PRIV(v->domain) )
         {
@@ -1489,19 +1505,10 @@ static void guest_io_write(
         {
             pv_pit_handler(port, (uint8_t)data, 1);
         }
-        else if ( (port & 0xfffc) == 0xcf8 )
-        {
-            size = min(bytes, 4 - (port & 3));
-            if ( size == 4 )
-            {
-                v->domain->arch.pci_cf8 = data;
-            }
-            else
-            {
-                uint32_t mask = ((1u << (size * 8)) - 1) << ((port & 3) * 8);
-                v->domain->arch.pci_cf8 &= ~mask;
-                v->domain->arch.pci_cf8 |= (data << ((port & 3) * 8)) & mask;
-            }
+        else if ( (port == 0xcf8) && (bytes == 4) )
+        {
+            size = 4;
+            v->domain->arch.pci_cf8 = data;
         }
         else if ( ((port & 0xfffc) == 0xcfc) && IS_PRIV(v->domain) )
         {
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/x86_emulate.c
--- a/xen/arch/x86/x86_emulate.c        Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/x86_emulate.c        Thu Apr 24 14:08:29 2008 -0600
@@ -11,23 +11,7 @@
 
 #include <asm/x86_emulate.h>
 
+/* Avoid namespace pollution. */
 #undef cmpxchg
 
-#define __emulate_fpu_insn(_op)                 \
-do{ int _exn;                                   \
-    asm volatile (                              \
-        "1: " _op "\n"                          \
-        "2: \n"                                 \
-        ".section .fixup,\"ax\"\n"              \
-        "3: mov $1,%0\n"                        \
-        "   jmp 2b\n"                           \
-        ".previous\n"                           \
-        ".section __ex_table,\"a\"\n"           \
-        "   "__FIXUP_ALIGN"\n"                  \
-        "   "__FIXUP_WORD" 1b,3b\n"             \
-        ".previous"                             \
-        : "=r" (_exn) : "0" (0) );              \
-    generate_exception_if(_exn, EXC_MF, -1);    \
-} while (0)
-
 #include "x86_emulate/x86_emulate.c"
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/x86_emulate/x86_emulate.c
--- a/xen/arch/x86/x86_emulate/x86_emulate.c    Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c    Thu Apr 24 14:08:29 2008 -0600
@@ -195,9 +195,9 @@ static uint8_t twobyte_table[256] = {
     /* 0x50 - 0x5F */
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     /* 0x60 - 0x6F */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM,
     /* 0x70 - 0x7F */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM,
     /* 0x80 - 0x87 */
     ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
     ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
@@ -546,6 +546,62 @@ do {                                    
                      ? (uint16_t)_regs.eip : (uint32_t)_regs.eip);      \
 } while (0)
 
+struct fpu_insn_ctxt {
+    uint8_t insn_bytes;
+    uint8_t exn_raised;
+};
+
+static void fpu_handle_exception(void *_fic, struct cpu_user_regs *regs)
+{
+    struct fpu_insn_ctxt *fic = _fic;
+    fic->exn_raised = 1;
+    regs->eip += fic->insn_bytes;
+}
+
+#define get_fpu(_type, _fic)                                    \
+do{ (_fic)->exn_raised = 0;                                     \
+    fail_if(ops->get_fpu == NULL);                              \
+    rc = ops->get_fpu(fpu_handle_exception, _fic, _type, ctxt); \
+    if ( rc ) goto done;                                        \
+} while (0)
+#define put_fpu(_fic)                                           \
+do{                                                             \
+    if ( ops->put_fpu != NULL )                                 \
+        ops->put_fpu(ctxt);                                     \
+    generate_exception_if((_fic)->exn_raised, EXC_MF, -1);      \
+} while (0)
+
+#define emulate_fpu_insn(_op)                           \
+do{ struct fpu_insn_ctxt fic;                           \
+    get_fpu(X86EMUL_FPU_fpu, &fic);                     \
+    asm volatile (                                      \
+        "movb $2f-1f,%0 \n"                             \
+        "1: " _op "     \n"                             \
+        "2:             \n"                             \
+        : "=m" (fic.insn_bytes) : : "memory" );         \
+    put_fpu(&fic);                                      \
+} while (0)
+
+#define emulate_fpu_insn_memdst(_op, _arg)              \
+do{ struct fpu_insn_ctxt fic;                           \
+    get_fpu(X86EMUL_FPU_fpu, &fic);                     \
+    asm volatile (                                      \
+        "movb $2f-1f,%0 \n"                             \
+        "1: " _op " %1  \n"                             \
+        "2:             \n"                             \
+        : "=m" (fic.insn_bytes), "=m" (_arg)            \
+        : : "memory" );                                 \
+    put_fpu(&fic);                                      \
+} while (0)
+
+#define emulate_fpu_insn_stub(_bytes...)                                \
+do{ uint8_t stub[] = { _bytes, 0xc3 };                                  \
+    struct fpu_insn_ctxt fic = { .insn_bytes = sizeof(stub)-1 };        \
+    get_fpu(X86EMUL_FPU_fpu, &fic);                                     \
+    (*(void(*)(void))stub)();                                           \
+    put_fpu(&fic);                                                      \
+} while (0)
+
 static unsigned long __get_rep_prefix(
     struct cpu_user_regs *int_regs,
     struct cpu_user_regs *ext_regs,
@@ -851,6 +907,7 @@ protmode_load_seg(
     struct { uint32_t a, b; } desc;
     unsigned long val;
     uint8_t dpl, rpl, cpl;
+    uint32_t new_desc_b;
     int rc, fault_type = EXC_TS;
 
     /* NULL selector? */
@@ -933,10 +990,11 @@ protmode_load_seg(
         }
 
         /* Ensure Accessed flag is set. */
+        new_desc_b = desc.b | 0x100;
         rc = ((desc.b & 0x100) ? X86EMUL_OKAY : 
               ops->cmpxchg(
-                  x86_seg_none, desctab.base + (sel & 0xfff8) + 4, desc.b,
-                  desc.b | 0x100, 4, ctxt));
+                  x86_seg_none, desctab.base + (sel & 0xfff8) + 4,
+                  &desc.b, &new_desc_b, 4, ctxt));
     } while ( rc == X86EMUL_CMPXCHG_FAILED );
 
     if ( rc )
@@ -2036,8 +2094,8 @@ x86_emulate(
             /* nothing to do */;
         else if ( lock_prefix )
             rc = ops->cmpxchg(
-                dst.mem.seg, dst.mem.off, dst.orig_val,
-                dst.val, dst.bytes, ctxt);
+                dst.mem.seg, dst.mem.off, &dst.orig_val,
+                &dst.val, dst.bytes, ctxt);
         else
             rc = ops->write(
                 dst.mem.seg, dst.mem.off, dst.val, dst.bytes, ctxt);
@@ -2399,9 +2457,7 @@ x86_emulate(
     }
 
     case 0x9b:  /* wait/fwait */
-        fail_if(ops->load_fpu_ctxt == NULL);
-        ops->load_fpu_ctxt(ctxt);
-        __emulate_fpu_insn("fwait");
+        emulate_fpu_insn("fwait");
         break;
 
     case 0x9c: /* pushf */
@@ -2721,77 +2777,89 @@ x86_emulate(
     }
 
     case 0xd9: /* FPU 0xd9 */
-        fail_if(ops->load_fpu_ctxt == NULL);
-        ops->load_fpu_ctxt(ctxt);
         switch ( modrm )
         {
-        case 0xc0: __emulate_fpu_insn(".byte 0xd9,0xc0"); break;
-        case 0xc1: __emulate_fpu_insn(".byte 0xd9,0xc1"); break;
-        case 0xc2: __emulate_fpu_insn(".byte 0xd9,0xc2"); break;
-        case 0xc3: __emulate_fpu_insn(".byte 0xd9,0xc3"); break;
-        case 0xc4: __emulate_fpu_insn(".byte 0xd9,0xc4"); break;
-        case 0xc5: __emulate_fpu_insn(".byte 0xd9,0xc5"); break;
-        case 0xc6: __emulate_fpu_insn(".byte 0xd9,0xc6"); break;
-        case 0xc7: __emulate_fpu_insn(".byte 0xd9,0xc7"); break;
-        case 0xe0: __emulate_fpu_insn(".byte 0xd9,0xe0"); break;
-        case 0xe8: __emulate_fpu_insn(".byte 0xd9,0xe8"); break;
-        case 0xee: __emulate_fpu_insn(".byte 0xd9,0xee"); break;
+        case 0xc0 ... 0xc7: /* fld %stN */
+        case 0xc8 ... 0xcf: /* fxch %stN */
+        case 0xd0: /* fnop */
+        case 0xe0: /* fchs */
+        case 0xe1: /* fabs */
+        case 0xe4: /* ftst */
+        case 0xe5: /* fxam */
+        case 0xe8: /* fld1 */
+        case 0xe9: /* fldl2t */
+        case 0xea: /* fldl2e */
+        case 0xeb: /* fldpi */
+        case 0xec: /* fldlg2 */
+        case 0xed: /* fldln2 */
+        case 0xee: /* fldz */
+        case 0xf0: /* f2xm1 */
+        case 0xf1: /* fyl2x */
+        case 0xf2: /* fptan */
+        case 0xf3: /* fpatan */
+        case 0xf4: /* fxtract */
+        case 0xf5: /* fprem1 */
+        case 0xf6: /* fdecstp */
+        case 0xf7: /* fincstp */
+        case 0xf8: /* fprem */
+        case 0xf9: /* fyl2xp1 */
+        case 0xfa: /* fsqrt */
+        case 0xfb: /* fsincos */
+        case 0xfc: /* frndint */
+        case 0xfd: /* fscale */
+        case 0xfe: /* fsin */
+        case 0xff: /* fcos */
+            emulate_fpu_insn_stub(0xd9, modrm);
+            break;
         default:
             fail_if((modrm_reg & 7) != 7);
             fail_if(modrm >= 0xc0);
             /* fnstcw m2byte */
             ea.bytes = 2;
             dst = ea;
-            asm volatile ( "fnstcw %0" : "=m" (dst.val) );
+            emulate_fpu_insn_memdst("fnstcw", dst.val);
         }
         break;
 
     case 0xdb: /* FPU 0xdb */
-        fail_if(ops->load_fpu_ctxt == NULL);
-        ops->load_fpu_ctxt(ctxt);
         fail_if(modrm != 0xe3);
         /* fninit */
-        asm volatile ( "fninit" );
+        emulate_fpu_insn("fninit");
         break;
 
     case 0xdd: /* FPU 0xdd */
-        fail_if(ops->load_fpu_ctxt == NULL);
-        ops->load_fpu_ctxt(ctxt);
         fail_if((modrm_reg & 7) != 7);
         fail_if(modrm >= 0xc0);
         /* fnstsw m2byte */
         ea.bytes = 2;
         dst = ea;
-        asm volatile ( "fnstsw %0" : "=m" (dst.val) );
+        emulate_fpu_insn_memdst("fnstsw", dst.val);
         break;
 
     case 0xde: /* FPU 0xde */
-        fail_if(ops->load_fpu_ctxt == NULL);
-        ops->load_fpu_ctxt(ctxt);
         switch ( modrm )
         {
-        case 0xd9: __emulate_fpu_insn(".byte 0xde,0xd9"); break;
-        case 0xf8: __emulate_fpu_insn(".byte 0xde,0xf8"); break;
-        case 0xf9: __emulate_fpu_insn(".byte 0xde,0xf9"); break;
-        case 0xfa: __emulate_fpu_insn(".byte 0xde,0xfa"); break;
-        case 0xfb: __emulate_fpu_insn(".byte 0xde,0xfb"); break;
-        case 0xfc: __emulate_fpu_insn(".byte 0xde,0xfc"); break;
-        case 0xfd: __emulate_fpu_insn(".byte 0xde,0xfd"); break;
-        case 0xfe: __emulate_fpu_insn(".byte 0xde,0xfe"); break;
-        case 0xff: __emulate_fpu_insn(".byte 0xde,0xff"); break;
-        default: goto cannot_emulate;
+        case 0xc0 ... 0xc7: /* faddp %stN */
+        case 0xc8 ... 0xcf: /* fmulp %stN */
+        case 0xd9: /* fcompp */
+        case 0xe0 ... 0xe7: /* fsubrp %stN */
+        case 0xe8 ... 0xef: /* fsubp %stN */
+        case 0xf0 ... 0xf7: /* fdivrp %stN */
+        case 0xf8 ... 0xff: /* fdivp %stN */
+            emulate_fpu_insn_stub(0xde, modrm);
+            break;
+        default:
+            goto cannot_emulate;
         }
         break;
 
     case 0xdf: /* FPU 0xdf */
-        fail_if(ops->load_fpu_ctxt == NULL);
-        ops->load_fpu_ctxt(ctxt);
         fail_if(modrm != 0xe0);
         /* fnstsw %ax */
         dst.bytes = 2;
         dst.type = OP_REG;
         dst.reg = (unsigned long *)&_regs.eax;
-        asm volatile ( "fnstsw %0" : "=m" (dst.val) );
+        emulate_fpu_insn_memdst("fnstsw", dst.val);
         break;
 
     case 0xe0 ... 0xe2: /* loop{,z,nz} */ {
@@ -2975,6 +3043,7 @@ x86_emulate(
 
     case 0xa3: bt: /* bt */
         emulate_2op_SrcV_nobyte("bt", src, dst, _regs.eflags);
+        dst.type = OP_NONE;
         break;
 
     case 0xa4: /* shld imm8,r,r/m */
@@ -3067,7 +3136,11 @@ x86_emulate(
               : "=r" (dst.val), "=q" (zf)
               : "r" (src.val), "1" (0) );
         _regs.eflags &= ~EFLG_ZF;
-        _regs.eflags |= zf ? EFLG_ZF : 0;
+        if ( zf )
+        {
+            _regs.eflags |= EFLG_ZF;
+            dst.type = OP_NONE;
+        }
         break;
     }
 
@@ -3077,7 +3150,11 @@ x86_emulate(
               : "=r" (dst.val), "=q" (zf)
               : "r" (src.val), "1" (0) );
         _regs.eflags &= ~EFLG_ZF;
-        _regs.eflags |= zf ? EFLG_ZF : 0;
+        if ( zf )
+        {
+            _regs.eflags |= EFLG_ZF;
+            dst.type = OP_NONE;
+        }
         break;
     }
 
@@ -3310,6 +3387,44 @@ x86_emulate(
         break;
     }
 
+    case 0x6f: /* movq mm/m64,mm */ {
+        uint8_t stub[] = { 0x0f, 0x6f, modrm, 0xc3 };
+        struct fpu_insn_ctxt fic = { .insn_bytes = sizeof(stub)-1 };
+        uint64_t val;
+        if ( ea.type == OP_MEM )
+        {
+            unsigned long lval, hval;
+            if ( (rc = ops->read(ea.mem.seg, ea.mem.off+0, &lval, 4, ctxt)) ||
+                 (rc = ops->read(ea.mem.seg, ea.mem.off+4, &hval, 4, ctxt)) )
+                goto done;
+            val = ((uint64_t)hval << 32) | (uint32_t)lval;
+            stub[2] = modrm & 0x38; /* movq (%eax),%mmN */
+        }
+        get_fpu(X86EMUL_FPU_mmx, &fic);
+        asm volatile ( "call *%0" : : "r" (stub), "a" (&val) : "memory" );
+        put_fpu(&fic);
+        break;
+    }
+
+    case 0x7f: /* movq mm,mm/m64 */ {
+        uint8_t stub[] = { 0x0f, 0x7f, modrm, 0xc3 };
+        struct fpu_insn_ctxt fic = { .insn_bytes = sizeof(stub)-1 };
+        uint64_t val;
+        if ( ea.type == OP_MEM )
+            stub[2] = modrm & 0x38; /* movq %mmN,(%eax) */
+        get_fpu(X86EMUL_FPU_mmx, &fic);
+        asm volatile ( "call *%0" : : "r" (stub), "a" (&val) : "memory" );
+        put_fpu(&fic);
+        if ( ea.type == OP_MEM )
+        {
+            unsigned long lval = (uint32_t)val, hval = (uint32_t)(val >> 32);
+            if ( (rc = ops->write(ea.mem.seg, ea.mem.off+0, lval, 4, ctxt)) ||
+                 (rc = ops->write(ea.mem.seg, ea.mem.off+4, hval, 4, ctxt)) )
+                goto done;
+        }
+        break;
+    }
+
     case 0x80 ... 0x8f: /* jcc (near) */ {
         int rel = (((op_bytes == 2) && !mode_64bit())
                    ? (int32_t)insn_fetch_type(int16_t)
@@ -3346,60 +3461,49 @@ x86_emulate(
         src.val = x86_seg_gs;
         goto pop_seg;
 
-    case 0xc7: /* Grp9 (cmpxchg8b) */
-#if defined(__i386__)
-    {
-        unsigned long old_lo, old_hi;
+    case 0xc7: /* Grp9 (cmpxchg8b/cmpxchg16b) */ {
+        unsigned long old[2], exp[2], new[2];
+        unsigned int i;
+
         generate_exception_if((modrm_reg & 7) != 1, EXC_UD, -1);
         generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
-        if ( (rc = ops->read(ea.mem.seg, ea.mem.off+0, &old_lo, 4, ctxt)) ||
-             (rc = ops->read(ea.mem.seg, ea.mem.off+4, &old_hi, 4, ctxt)) )
-            goto done;
-        if ( (old_lo != _regs.eax) || (old_hi != _regs.edx) )
-        {
-            _regs.eax = old_lo;
-            _regs.edx = old_hi;
+        op_bytes *= 2;
+
+        /* Get actual old value. */
+        for ( i = 0; i < (op_bytes/sizeof(long)); i++ )
+            if ( (rc = ops->read(ea.mem.seg, ea.mem.off + i*sizeof(long),
+                                 &old[i], sizeof(long), ctxt)) != 0 )
+                goto done;
+
+        /* Get expected and proposed values. */
+        if ( op_bytes == 8 )
+        {
+            ((uint32_t *)exp)[0] = _regs.eax; ((uint32_t *)exp)[1] = _regs.edx;
+            ((uint32_t *)new)[0] = _regs.ebx; ((uint32_t *)new)[1] = _regs.ecx;
+        }
+        else
+        {
+            exp[0] = _regs.eax; exp[1] = _regs.edx;
+            new[0] = _regs.ebx; new[1] = _regs.ecx;
+        }
+
+        if ( memcmp(old, exp, op_bytes) )
+        {
+            /* Expected != actual: store actual to rDX:rAX and clear ZF. */
+            _regs.eax = (op_bytes == 8) ? ((uint32_t *)old)[0] : old[0];
+            _regs.edx = (op_bytes == 8) ? ((uint32_t *)old)[1] : old[1];
             _regs.eflags &= ~EFLG_ZF;
         }
-        else if ( ops->cmpxchg8b == NULL )
-        {
-            rc = X86EMUL_UNHANDLEABLE;
-            goto done;
-        }
         else
         {
-            if ( (rc = ops->cmpxchg8b(ea.mem.seg, ea.mem.off, old_lo, old_hi,
-                                      _regs.ebx, _regs.ecx, ctxt)) != 0 )
+            /* Expected == actual: attempt atomic cmpxchg and set ZF. */
+            if ( (rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old,
+                                    new, op_bytes, ctxt)) != 0 )
                 goto done;
             _regs.eflags |= EFLG_ZF;
         }
         break;
     }
-#elif defined(__x86_64__)
-    {
-        unsigned long old, new;
-        generate_exception_if((modrm_reg & 7) != 1, EXC_UD, -1);
-        generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
-        if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &old, 8, ctxt)) != 0 )
-            goto done;
-        if ( ((uint32_t)(old>>0) != (uint32_t)_regs.eax) ||
-             ((uint32_t)(old>>32) != (uint32_t)_regs.edx) )
-        {
-            _regs.eax = (uint32_t)(old>>0);
-            _regs.edx = (uint32_t)(old>>32);
-            _regs.eflags &= ~EFLG_ZF;
-        }
-        else
-        {
-            new = (_regs.ecx<<32)|(uint32_t)_regs.ebx;
-            if ( (rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old,
-                                    new, 8, ctxt)) != 0 )
-                goto done;
-            _regs.eflags |= EFLG_ZF;
-        }
-        break;
-    }
-#endif
 
     case 0xc8 ... 0xcf: /* bswap */
         dst.type = OP_REG;
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/x86_emulate/x86_emulate.h
--- a/xen/arch/x86/x86_emulate/x86_emulate.h    Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h    Thu Apr 24 14:08:29 2008 -0600
@@ -95,6 +95,12 @@ struct segment_register {
  /* (cmpxchg accessor): CMPXCHG failed. Maps to X86EMUL_RETRY in caller. */
 #define X86EMUL_CMPXCHG_FAILED 3
 
+/* FPU sub-types which may be requested via ->get_fpu(). */
+enum x86_emulate_fpu_type {
+    X86EMUL_FPU_fpu, /* Standard FPU coprocessor instruction set */
+    X86EMUL_FPU_mmx  /* MMX instruction set (%mm0-%mm7) */
+};
+
 /*
  * These operations represent the instruction emulator's interface to memory.
  * 
@@ -104,8 +110,7 @@ struct segment_register {
  *     some out-of-band mechanism, unknown to the emulator. The memop signals
  *     failure by returning X86EMUL_EXCEPTION to the emulator, which will
  *     then immediately bail.
- *  2. Valid access sizes are 1, 2, 4 and 8 bytes. On x86/32 systems only
- *     cmpxchg8b_emulated need support 8-byte accesses.
+ *  2. Valid access sizes are 1, 2, 4 and 8 (x86/64 only) bytes.
  *  3. The emulator cannot handle 64-bit mode emulation on an x86/32 system.
  */
 struct x86_emulate_ops
@@ -153,34 +158,16 @@ struct x86_emulate_ops
 
     /*
      * cmpxchg: Emulate an atomic (LOCKed) CMPXCHG operation.
-     *  @old:   [IN ] Value expected to be current at @addr.
-     *  @new:   [IN ] Value to write to @addr.
+     *  @p_old: [IN ] Pointer to value expected to be current at @addr.
+     *  @p_new: [IN ] Pointer to value to write to @addr.
+     *  @bytes: [IN ] Operation size (up to 8 (x86/32) or 16 (x86/64) bytes).
      */
     int (*cmpxchg)(
         enum x86_segment seg,
         unsigned long offset,
-        unsigned long old,
-        unsigned long new,
-        unsigned int bytes,
-        struct x86_emulate_ctxt *ctxt);
-
-    /*
-     * cmpxchg8b: Emulate an atomic (LOCKed) CMPXCHG8B operation.
-     *  @old:   [IN ] Value expected to be current at @addr.
-     *  @new:   [IN ] Value to write to @addr.
-     * NOTES:
-     *  1. This function is only ever called when emulating a real CMPXCHG8B.
-     *  2. This function is *never* called on x86/64 systems.
-     *  2. Not defining this function (i.e., specifying NULL) is equivalent
-     *     to defining a function that always returns X86EMUL_UNHANDLEABLE.
-     */
-    int (*cmpxchg8b)(
-        enum x86_segment seg,
-        unsigned long offset,
-        unsigned long old_lo,
-        unsigned long old_hi,
-        unsigned long new_lo,
-        unsigned long new_hi,
+        void *p_old,
+        void *p_new,
+        unsigned int bytes,
         struct x86_emulate_ctxt *ctxt);
 
     /*
@@ -342,8 +329,19 @@ struct x86_emulate_ops
         uint8_t insn_len,
         struct x86_emulate_ctxt *ctxt);
 
-    /* load_fpu_ctxt: Load emulated environment's FPU state onto processor. */
-    void (*load_fpu_ctxt)(
+    /*
+     * get_fpu: Load emulated environment's FPU state onto processor.
+     *  @exn_callback: On any FPU or SIMD exception, pass control to
+     *                 (*exception_callback)(exception_callback_arg, regs).
+     */
+    int (*get_fpu)(
+        void (*exception_callback)(void *, struct cpu_user_regs *),
+        void *exception_callback_arg,
+        enum x86_emulate_fpu_type type,
+        struct x86_emulate_ctxt *ctxt);
+
+    /* put_fpu: Relinquish the FPU. Unhook from FPU/SIMD exception handlers. */
+    void (*put_fpu)(
         struct x86_emulate_ctxt *ctxt);
 
     /* invlpg: Invalidate paging structures which map addressed byte. */
diff -r 239b44eeb2d6 -r dc510776dd59 xen/common/trace.c
--- a/xen/common/trace.c        Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/common/trace.c        Thu Apr 24 14:08:29 2008 -0600
@@ -374,6 +374,15 @@ static inline int insert_lost_records(st
                            (unsigned char *)&ed);
 }
 
+/*
+ * Notification is performed in qtasklet to avoid deadlocks with contexts
+ * which __trace_var() may be called from (e.g., scheduler critical regions).
+ */
+static void trace_notify_dom0(unsigned long unused)
+{
+    send_guest_global_virq(dom0, VIRQ_TBUF);
+}
+static DECLARE_TASKLET(trace_notify_dom0_tasklet, trace_notify_dom0, 0);
 
 /**
  * trace - Enters a trace tuple into the trace buffer for the current CPU.
@@ -506,7 +515,7 @@ void __trace_var(u32 event, int cycles, 
     /* Notify trace buffer consumer that we've crossed the high water mark. */
     if ( started_below_highwater &&
          (calc_unconsumed_bytes(buf) >= t_buf_highwater) )
-        send_guest_global_virq(dom0, VIRQ_TBUF);
+        tasklet_schedule(&trace_notify_dom0_tasklet);
 }
 
 /*
diff -r 239b44eeb2d6 -r dc510776dd59 xen/common/xencomm.c
--- a/xen/common/xencomm.c      Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/common/xencomm.c      Thu Apr 24 14:08:29 2008 -0600
@@ -323,7 +323,7 @@ xencomm_copy_chunk_to(
                (unsigned long)xencomm_vaddr(paddr, page));
 
     memcpy(xencomm_vaddr(paddr, page), (void *)from, len);
-    xencomm_mark_dirty(xencomm_vaddr(paddr, page), len);
+    xencomm_mark_dirty((unsigned long)xencomm_vaddr(paddr, page), len);
     put_page(page);
 
     return 0;
diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/char/console.c
--- a/xen/drivers/char/console.c        Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/drivers/char/console.c        Thu Apr 24 14:08:29 2008 -0600
@@ -322,7 +322,7 @@ static long guest_console_write(XEN_GUES
 
     while ( count > 0 )
     {
-        while ( serial_tx_space(sercon_handle) < (SERIAL_TXBUFSZ / 2) )
+        while ( serial_tx_space(sercon_handle) < (serial_txbufsz / 2) )
         {
             if ( hypercall_preempt_check() )
                 break;
diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/char/serial.c
--- a/xen/drivers/char/serial.c Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/drivers/char/serial.c Thu Apr 24 14:08:29 2008 -0600
@@ -15,6 +15,19 @@
 #include <xen/mm.h>
 #include <xen/serial.h>
 
+/* Never drop characters, even if the async transmit buffer fills. */
+/* #define SERIAL_NEVER_DROP_CHARS 1 */
+
+unsigned int serial_txbufsz = 16384;
+static void __init parse_serial_tx_buffer(const char *s)
+{
+    serial_txbufsz = max((unsigned int)parse_size_and_unit(s, NULL), 512u);
+}
+custom_param("serial_tx_buffer", parse_serial_tx_buffer);
+
+#define mask_serial_rxbuf_idx(_i) ((_i)&(serial_rxbufsz-1))
+#define mask_serial_txbuf_idx(_i) ((_i)&(serial_txbufsz-1))
+
 static struct serial_port com[2] = {
     { .rx_lock = SPIN_LOCK_UNLOCKED, .tx_lock = SPIN_LOCK_UNLOCKED }, 
     { .rx_lock = SPIN_LOCK_UNLOCKED, .tx_lock = SPIN_LOCK_UNLOCKED }
@@ -36,8 +49,8 @@ void serial_rx_interrupt(struct serial_p
             fn = port->rx_hi;
         else if ( !(c & 0x80) && (port->rx_lo != NULL) )
             fn = port->rx_lo;
-        else if ( (port->rxbufp - port->rxbufc) != SERIAL_RXBUFSZ )
-            port->rxbuf[MASK_SERIAL_RXBUF_IDX(port->rxbufp++)] = c;            
+        else if ( (port->rxbufp - port->rxbufc) != serial_rxbufsz )
+            port->rxbuf[mask_serial_rxbuf_idx(port->rxbufp++)] = c;            
     }
 
     spin_unlock_irqrestore(&port->rx_lock, flags);
@@ -72,7 +85,7 @@ void serial_tx_interrupt(struct serial_p
             if ( port->txbufc == port->txbufp )
                 break;
             port->driver->putc(
-                port, port->txbuf[MASK_SERIAL_TXBUF_IDX(port->txbufc++)]);
+                port, port->txbuf[mask_serial_txbuf_idx(port->txbufc++)]);
         }
     }
 
@@ -81,22 +94,24 @@ void serial_tx_interrupt(struct serial_p
 
 static void __serial_putc(struct serial_port *port, char c)
 {
-    int i;
-
     if ( (port->txbuf != NULL) && !port->sync )
     {
         /* Interrupt-driven (asynchronous) transmitter. */
-        if ( (port->txbufp - port->txbufc) == SERIAL_TXBUFSZ )
-        {
-            /* Buffer is full: we spin, but could alternatively drop chars. */
+#ifdef SERIAL_NEVER_DROP_CHARS
+        if ( (port->txbufp - port->txbufc) == serial_txbufsz )
+        {
+            /* Buffer is full: we spin waiting for space to appear. */
+            int i;
             while ( !port->driver->tx_empty(port) )
                 cpu_relax();
             for ( i = 0; i < port->tx_fifo_size; i++ )
                 port->driver->putc(
-                    port, port->txbuf[MASK_SERIAL_TXBUF_IDX(port->txbufc++)]);
-            port->txbuf[MASK_SERIAL_TXBUF_IDX(port->txbufp++)] = c;
-        }
-        else if ( ((port->txbufp - port->txbufc) == 0) &&
+                    port, port->txbuf[mask_serial_txbuf_idx(port->txbufc++)]);
+            port->txbuf[mask_serial_txbuf_idx(port->txbufp++)] = c;
+            return;
+        }
+#endif
+        if ( ((port->txbufp - port->txbufc) == 0) &&
                   port->driver->tx_empty(port) )
         {
             /* Buffer and UART FIFO are both empty. */
@@ -105,7 +120,7 @@ static void __serial_putc(struct serial_
         else
         {
             /* Normal case: buffer the character. */
-            port->txbuf[MASK_SERIAL_TXBUF_IDX(port->txbufp++)] = c;
+            port->txbuf[mask_serial_txbuf_idx(port->txbufp++)] = c;
         }
     }
     else if ( port->driver->tx_empty )
@@ -200,7 +215,7 @@ char serial_getc(int handle)
             
             if ( port->rxbufp != port->rxbufc )
             {
-                c = port->rxbuf[MASK_SERIAL_RXBUF_IDX(port->rxbufc++)];
+                c = port->rxbuf[mask_serial_rxbuf_idx(port->rxbufc++)];
                 spin_unlock_irqrestore(&port->rx_lock, flags);
                 break;
             }
@@ -336,7 +351,7 @@ void serial_start_sync(int handle)
             while ( !port->driver->tx_empty(port) )
                 cpu_relax();
             port->driver->putc(
-                port, port->txbuf[MASK_SERIAL_TXBUF_IDX(port->txbufc++)]);
+                port, port->txbuf[mask_serial_txbuf_idx(port->txbufc++)]);
         }
     }
 
@@ -364,9 +379,9 @@ int serial_tx_space(int handle)
 {
     struct serial_port *port;
     if ( handle == -1 )
-        return SERIAL_TXBUFSZ;
-    port = &com[handle & SERHND_IDX];
-    return SERIAL_TXBUFSZ - (port->txbufp - port->txbufc);
+        return serial_txbufsz;
+    port = &com[handle & SERHND_IDX];
+    return serial_txbufsz - (port->txbufp - port->txbufc);
 }
 
 void __devinit serial_init_preirq(void)
@@ -431,7 +446,7 @@ void serial_async_transmit(struct serial
     BUG_ON(!port->driver->tx_empty);
     if ( port->txbuf == NULL )
         port->txbuf = alloc_xenheap_pages(
-            get_order_from_bytes(SERIAL_TXBUFSZ));
+            get_order_from_bytes(serial_txbufsz));
 }
 
 /*
diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/passthrough/amd/iommu_acpi.c
--- a/xen/drivers/passthrough/amd/iommu_acpi.c  Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/drivers/passthrough/amd/iommu_acpi.c  Thu Apr 24 14:08:29 2008 -0600
@@ -139,7 +139,7 @@ static int __init register_exclusion_ran
     iommu = find_iommu_for_device(bus, devfn);
     if ( !iommu )
     {
-        dprintk(XENLOG_ERR, "IVMD Error: No IOMMU for Dev_Id 0x%x!\n", bdf);
+        amd_iov_error("IVMD Error: No IOMMU for Dev_Id 0x%x!\n", bdf);
         return -ENODEV;
     }
     req = ivrs_mappings[bdf].dte_requestor_id;
@@ -221,7 +221,7 @@ static int __init parse_ivmd_device_sele
     bdf = ivmd_block->header.dev_id;
     if ( bdf >= ivrs_bdf_entries )
     {
-        dprintk(XENLOG_ERR, "IVMD Error: Invalid Dev_Id 0x%x\n", bdf);
+        amd_iov_error("IVMD Error: Invalid Dev_Id 0x%x\n", bdf);
         return -ENODEV;
     }
 
@@ -238,21 +238,18 @@ static int __init parse_ivmd_device_rang
     first_bdf = ivmd_block->header.dev_id;
     if ( first_bdf >= ivrs_bdf_entries )
     {
-        dprintk(XENLOG_ERR, "IVMD Error: "
-                "Invalid Range_First Dev_Id 0x%x\n", first_bdf);
+        amd_iov_error(
+            "IVMD Error: Invalid Range_First Dev_Id 0x%x\n", first_bdf);
         return -ENODEV;
     }
 
     last_bdf = ivmd_block->last_dev_id;
     if ( (last_bdf >= ivrs_bdf_entries) || (last_bdf <= first_bdf) )
     {
-        dprintk(XENLOG_ERR, "IVMD Error: "
-                "Invalid Range_Last Dev_Id 0x%x\n", last_bdf);
-        return -ENODEV;
-    }
-
-    dprintk(XENLOG_ERR, " Dev_Id Range: 0x%x -> 0x%x\n",
-            first_bdf, last_bdf);
+        amd_iov_error(
+            "IVMD Error: Invalid Range_Last Dev_Id 0x%x\n", last_bdf);
+        return -ENODEV;
+    }
 
     for ( bdf = first_bdf, error = 0; (bdf <= last_bdf) && !error; bdf++ )
         error = register_exclusion_range_for_device(
@@ -272,8 +269,7 @@ static int __init parse_ivmd_device_iomm
                                     ivmd_block->cap_offset);
     if ( !iommu )
     {
-        dprintk(XENLOG_ERR,
-                "IVMD Error: No IOMMU for Dev_Id 0x%x  Cap 0x%x\n",
+        amd_iov_error("IVMD Error: No IOMMU for Dev_Id 0x%x  Cap 0x%x\n",
                 ivmd_block->header.dev_id, ivmd_block->cap_offset);
         return -ENODEV;
     }
@@ -290,7 +286,7 @@ static int __init parse_ivmd_block(struc
     if ( ivmd_block->header.length <
          sizeof(struct acpi_ivmd_block_header) )
     {
-        dprintk(XENLOG_ERR, "IVMD Error: Invalid Block Length!\n");
+        amd_iov_error("IVMD Error: Invalid Block Length!\n");
         return -ENODEV;
     }
 
@@ -299,10 +295,9 @@ static int __init parse_ivmd_block(struc
     base = start_addr & PAGE_MASK;
     limit = (start_addr + mem_length - 1) & PAGE_MASK;
 
-    dprintk(XENLOG_INFO, "IVMD Block: Type 0x%x\n",
-            ivmd_block->header.type);
-    dprintk(XENLOG_INFO, " Start_Addr_Phys 0x%lx\n", start_addr);
-    dprintk(XENLOG_INFO, " Mem_Length 0x%lx\n", mem_length);
+    amd_iov_info("IVMD Block: Type 0x%x\n",ivmd_block->header.type);
+    amd_iov_info(" Start_Addr_Phys 0x%lx\n", start_addr);
+    amd_iov_info(" Mem_Length 0x%lx\n", mem_length);
 
     if ( get_field_from_byte(ivmd_block->header.flags,
                              AMD_IOMMU_ACPI_EXCLUSION_RANGE_MASK,
@@ -321,7 +316,7 @@ static int __init parse_ivmd_block(struc
     }
     else
     {
-        dprintk(KERN_ERR, "IVMD Error: Invalid Flag Field!\n");
+        amd_iov_error("IVMD Error: Invalid Flag Field!\n");
         return -ENODEV;
     }
 
@@ -344,7 +339,7 @@ static int __init parse_ivmd_block(struc
                                        base, limit, iw, ir);
 
     default:
-        dprintk(XENLOG_ERR, "IVMD Error: Invalid Block Type!\n");
+        amd_iov_error("IVMD Error: Invalid Block Type!\n");
         return -ENODEV;
     }
 }
@@ -354,7 +349,7 @@ static u16 __init parse_ivhd_device_padd
 {
     if ( header_length < (block_length + pad_length) )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: Invalid Device_Entry Length!\n");
+        amd_iov_error("IVHD Error: Invalid Device_Entry Length!\n");
         return 0;
     }
 
@@ -369,8 +364,7 @@ static u16 __init parse_ivhd_device_sele
     bdf = ivhd_device->header.dev_id;
     if ( bdf >= ivrs_bdf_entries )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
-                "Invalid Device_Entry Dev_Id 0x%x\n", bdf);
+        amd_iov_error("IVHD Error: Invalid Device_Entry Dev_Id 0x%x\n", bdf);
         return 0;
     }
 
@@ -393,14 +387,14 @@ static u16 __init parse_ivhd_device_rang
     dev_length = sizeof(struct acpi_ivhd_device_range);
     if ( header_length < (block_length + dev_length) )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: Invalid Device_Entry Length!\n");
+        amd_iov_error("IVHD Error: Invalid Device_Entry Length!\n");
         return 0;
     }
 
     if ( ivhd_device->range.trailer.type !=
          AMD_IOMMU_ACPI_IVHD_DEV_RANGE_END )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
+        amd_iov_error("IVHD Error: "
                 "Invalid Range: End_Type 0x%x\n",
                 ivhd_device->range.trailer.type);
         return 0;
@@ -409,21 +403,20 @@ static u16 __init parse_ivhd_device_rang
     first_bdf = ivhd_device->header.dev_id;
     if ( first_bdf >= ivrs_bdf_entries )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
-                "Invalid Range: First Dev_Id 0x%x\n", first_bdf);
+        amd_iov_error(
+            "IVHD Error: Invalid Range: First Dev_Id 0x%x\n", first_bdf);
         return 0;
     }
 
     last_bdf = ivhd_device->range.trailer.dev_id;
     if ( (last_bdf >= ivrs_bdf_entries) || (last_bdf <= first_bdf) )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
-                "Invalid Range: Last Dev_Id 0x%x\n", last_bdf);
-        return 0;
-    }
-
-    dprintk(XENLOG_INFO, " Dev_Id Range: 0x%x -> 0x%x\n",
-            first_bdf, last_bdf);
+        amd_iov_error(
+            "IVHD Error: Invalid Range: Last Dev_Id 0x%x\n", last_bdf);
+        return 0;
+    }
+
+    amd_iov_info(" Dev_Id Range: 0x%x -> 0x%x\n", first_bdf, last_bdf);
 
     /* override flags for range of devices */
     sys_mgt = get_field_from_byte(ivhd_device->header.flags,
@@ -444,28 +437,25 @@ static u16 __init parse_ivhd_device_alia
     dev_length = sizeof(struct acpi_ivhd_device_alias);
     if ( header_length < (block_length + dev_length) )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
-                "Invalid Device_Entry Length!\n");
+        amd_iov_error("IVHD Error: Invalid Device_Entry Length!\n");
         return 0;
     }
 
     bdf = ivhd_device->header.dev_id;
     if ( bdf >= ivrs_bdf_entries )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
-                "Invalid Device_Entry Dev_Id 0x%x\n", bdf);
+        amd_iov_error("IVHD Error: Invalid Device_Entry Dev_Id 0x%x\n", bdf);
         return 0;
     }
 
     alias_id = ivhd_device->alias.dev_id;
     if ( alias_id >= ivrs_bdf_entries )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
-                "Invalid Alias Dev_Id 0x%x\n", alias_id);
-        return 0;
-    }
-
-    dprintk(XENLOG_INFO, " Dev_Id Alias: 0x%x\n", alias_id);
+        amd_iov_error("IVHD Error: Invalid Alias Dev_Id 0x%x\n", alias_id);
+        return 0;
+    }
+
+    amd_iov_info(" Dev_Id Alias: 0x%x\n", alias_id);
 
     /* override requestor_id and flags for device */
     ivrs_mappings[bdf].dte_requestor_id = alias_id;
@@ -490,15 +480,14 @@ static u16 __init parse_ivhd_device_alia
     dev_length = sizeof(struct acpi_ivhd_device_alias_range);
     if ( header_length < (block_length + dev_length) )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
-                "Invalid Device_Entry Length!\n");
+        amd_iov_error("IVHD Error: Invalid Device_Entry Length!\n");
         return 0;
     }
 
     if ( ivhd_device->alias_range.trailer.type !=
          AMD_IOMMU_ACPI_IVHD_DEV_RANGE_END )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
+        amd_iov_error("IVHD Error: "
                 "Invalid Range: End_Type 0x%x\n",
                 ivhd_device->alias_range.trailer.type);
         return 0;
@@ -507,30 +496,28 @@ static u16 __init parse_ivhd_device_alia
     first_bdf = ivhd_device->header.dev_id;
     if ( first_bdf >= ivrs_bdf_entries )
     {
-        dprintk(XENLOG_ERR,"IVHD Error: "
-                "Invalid Range: First Dev_Id 0x%x\n", first_bdf);
+        amd_iov_error(
+            "IVHD Error: Invalid Range: First Dev_Id 0x%x\n", first_bdf);
         return 0;
     }
 
     last_bdf = ivhd_device->alias_range.trailer.dev_id;
     if ( last_bdf >= ivrs_bdf_entries || last_bdf <= first_bdf )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
-                "Invalid Range: Last Dev_Id 0x%x\n", last_bdf);
+        amd_iov_error(
+            "IVHD Error: Invalid Range: Last Dev_Id 0x%x\n", last_bdf);
         return 0;
     }
 
     alias_id = ivhd_device->alias_range.alias.dev_id;
     if ( alias_id >= ivrs_bdf_entries )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
-                "Invalid Alias Dev_Id 0x%x\n", alias_id);
-        return 0;
-    }
-
-    dprintk(XENLOG_INFO, " Dev_Id Range: 0x%x -> 0x%x\n",
-            first_bdf, last_bdf);
-    dprintk(XENLOG_INFO, " Dev_Id Alias: 0x%x\n", alias_id);
+        amd_iov_error("IVHD Error: Invalid Alias Dev_Id 0x%x\n", alias_id);
+        return 0;
+    }
+
+    amd_iov_info(" Dev_Id Range: 0x%x -> 0x%x\n", first_bdf, last_bdf);
+    amd_iov_info(" Dev_Id Alias: 0x%x\n", alias_id);
 
     /* override requestor_id and flags for range of devices */
     sys_mgt = get_field_from_byte(ivhd_device->header.flags,
@@ -555,16 +542,14 @@ static u16 __init parse_ivhd_device_exte
     dev_length = sizeof(struct acpi_ivhd_device_extended);
     if ( header_length < (block_length + dev_length) )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
-                "Invalid Device_Entry Length!\n");
+        amd_iov_error("IVHD Error: Invalid Device_Entry Length!\n");
         return 0;
     }
 
     bdf = ivhd_device->header.dev_id;
     if ( bdf >= ivrs_bdf_entries )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
-                "Invalid Device_Entry Dev_Id 0x%x\n", bdf);
+        amd_iov_error("IVHD Error: Invalid Device_Entry Dev_Id 0x%x\n", bdf);
         return 0;
     }
 
@@ -587,15 +572,14 @@ static u16 __init parse_ivhd_device_exte
     dev_length = sizeof(struct acpi_ivhd_device_extended_range);
     if ( header_length < (block_length + dev_length) )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
-                "Invalid Device_Entry Length!\n");
+        amd_iov_error("IVHD Error: Invalid Device_Entry Length!\n");
         return 0;
     }
 
     if ( ivhd_device->extended_range.trailer.type !=
          AMD_IOMMU_ACPI_IVHD_DEV_RANGE_END )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
+        amd_iov_error("IVHD Error: "
                 "Invalid Range: End_Type 0x%x\n",
                 ivhd_device->extended_range.trailer.type);
         return 0;
@@ -604,20 +588,20 @@ static u16 __init parse_ivhd_device_exte
     first_bdf = ivhd_device->header.dev_id;
     if ( first_bdf >= ivrs_bdf_entries )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
-                "Invalid Range: First Dev_Id 0x%x\n", first_bdf);
+        amd_iov_error(
+            "IVHD Error: Invalid Range: First Dev_Id 0x%x\n", first_bdf);
         return 0;
     }
 
     last_bdf = ivhd_device->extended_range.trailer.dev_id;
     if ( (last_bdf >= ivrs_bdf_entries) || (last_bdf <= first_bdf) )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
-                "Invalid Range: Last Dev_Id 0x%x\n", last_bdf);
-        return 0;
-    }
-
-    dprintk(XENLOG_INFO, " Dev_Id Range: 0x%x -> 0x%x\n",
+        amd_iov_error(
+            "IVHD Error: Invalid Range: Last Dev_Id 0x%x\n", last_bdf);
+        return 0;
+    }
+
+    amd_iov_info(" Dev_Id Range: 0x%x -> 0x%x\n",
             first_bdf, last_bdf);
 
     /* override flags for range of devices */
@@ -639,7 +623,7 @@ static int __init parse_ivhd_block(struc
     if ( ivhd_block->header.length <
          sizeof(struct acpi_ivhd_block_header) )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: Invalid Block Length!\n");
+        amd_iov_error("IVHD Error: Invalid Block Length!\n");
         return -ENODEV;
     }
 
@@ -647,21 +631,16 @@ static int __init parse_ivhd_block(struc
                                     ivhd_block->cap_offset);
     if ( !iommu )
     {
-        dprintk(XENLOG_ERR,
-                "IVHD Error: No IOMMU for Dev_Id 0x%x  Cap 0x%x\n",
+        amd_iov_error("IVHD Error: No IOMMU for Dev_Id 0x%x  Cap 0x%x\n",
                 ivhd_block->header.dev_id, ivhd_block->cap_offset);
         return -ENODEV;
     }
 
-    dprintk(XENLOG_INFO, "IVHD Block:\n");
-    dprintk(XENLOG_INFO, " Cap_Offset 0x%x\n",
-            ivhd_block->cap_offset);
-    dprintk(XENLOG_INFO, " MMIO_BAR_Phys 0x%lx\n",
-            (unsigned long)ivhd_block->mmio_base);
-    dprintk(XENLOG_INFO, " PCI_Segment 0x%x\n",
-            ivhd_block->pci_segment);
-    dprintk(XENLOG_INFO, " IOMMU_Info 0x%x\n",
-            ivhd_block->iommu_info);
+    amd_iov_info("IVHD Block:\n");
+    amd_iov_info(" Cap_Offset 0x%x\n", ivhd_block->cap_offset);
+    amd_iov_info(" MMIO_BAR_Phys 0x%"PRIx64"\n",ivhd_block->mmio_base);
+    amd_iov_info( " PCI_Segment 0x%x\n", ivhd_block->pci_segment);
+    amd_iov_info( " IOMMU_Info 0x%x\n", ivhd_block->iommu_info);
 
     /* override IOMMU support flags */
     iommu->coherent = get_field_from_byte(ivhd_block->header.flags,
@@ -692,13 +671,10 @@ static int __init parse_ivhd_block(struc
         ivhd_device = (union acpi_ivhd_device *)
             ((u8 *)ivhd_block + block_length);
 
-        dprintk(XENLOG_INFO, "IVHD Device Entry:\n");
-        dprintk(XENLOG_INFO, " Type 0x%x\n",
-                ivhd_device->header.type);
-        dprintk(XENLOG_INFO, " Dev_Id 0x%x\n",
-                ivhd_device->header.dev_id);
-        dprintk(XENLOG_INFO, " Flags 0x%x\n",
-                ivhd_device->header.flags);
+        amd_iov_info( "IVHD Device Entry:\n");
+        amd_iov_info( " Type 0x%x\n", ivhd_device->header.type);
+        amd_iov_info( " Dev_Id 0x%x\n", ivhd_device->header.dev_id);
+        amd_iov_info( " Flags 0x%x\n", ivhd_device->header.flags);
 
         switch ( ivhd_device->header.type )
         {
@@ -741,8 +717,7 @@ static int __init parse_ivhd_block(struc
                 ivhd_block->header.length, block_length);
             break;
         default:
-            dprintk(XENLOG_ERR, "IVHD Error: "
-                    "Invalid Device Type!\n");
+            amd_iov_error("IVHD Error: Invalid Device Type!\n");
             dev_length = 0;
             break;
         }
@@ -774,46 +749,49 @@ static int __init parse_ivrs_block(struc
         return parse_ivmd_block(ivmd_block);
 
     default:
-        dprintk(XENLOG_ERR, "IVRS Error: Invalid Block Type!\n");
+        amd_iov_error("IVRS Error: Invalid Block Type!\n");
         return -ENODEV;
     }
 
     return 0;
 }
 
-void __init dump_acpi_table_header(struct acpi_table_header *table)
-{
+static void __init dump_acpi_table_header(struct acpi_table_header *table)
+{
+#ifdef AMD_IOV_DEBUG
     int i;
 
-    printk(XENLOG_INFO "AMD IOMMU: ACPI Table:\n");
-    printk(XENLOG_INFO " Signature ");
+    amd_iov_info("ACPI Table:\n");
+    amd_iov_info(" Signature ");
     for ( i = 0; i < ACPI_NAME_SIZE; i++ )
         printk("%c", table->signature[i]);
     printk("\n");
 
-    printk(" Length 0x%x\n", table->length);
-    printk(" Revision 0x%x\n", table->revision);
-    printk(" CheckSum 0x%x\n", table->checksum);
-
-    printk(" OEM_Id ");
+    amd_iov_info(" Length 0x%x\n", table->length);
+    amd_iov_info(" Revision 0x%x\n", table->revision);
+    amd_iov_info(" CheckSum 0x%x\n", table->checksum);
+
+    amd_iov_info(" OEM_Id ");
     for ( i = 0; i < ACPI_OEM_ID_SIZE; i++ )
         printk("%c", table->oem_id[i]);
     printk("\n");
 
-    printk(" OEM_Table_Id ");
+    amd_iov_info(" OEM_Table_Id ");
     for ( i = 0; i < ACPI_OEM_TABLE_ID_SIZE; i++ )
         printk("%c", table->oem_table_id[i]);
     printk("\n");
 
-    printk(" OEM_Revision 0x%x\n", table->oem_revision);
-
-    printk(" Creator_Id ");
+    amd_iov_info(" OEM_Revision 0x%x\n", table->oem_revision);
+
+    amd_iov_info(" Creator_Id ");
     for ( i = 0; i < ACPI_NAME_SIZE; i++ )
         printk("%c", table->asl_compiler_id[i]);
     printk("\n");
 
-    printk(" Creator_Revision 0x%x\n",
+    amd_iov_info(" Creator_Revision 0x%x\n",
            table->asl_compiler_revision);
+#endif
+
 }
 
 int __init parse_ivrs_table(unsigned long phys_addr, unsigned long size)
@@ -827,9 +805,7 @@ int __init parse_ivrs_table(unsigned lon
 
     BUG_ON(!table);
 
-#if 0
     dump_acpi_table_header(table);
-#endif
 
     /* validate checksum: sum of entire table == 0 */
     checksum = 0;
@@ -838,7 +814,7 @@ int __init parse_ivrs_table(unsigned lon
         checksum += raw_table[i];
     if ( checksum )
     {
-        dprintk(XENLOG_ERR, "IVRS Error: "
+        amd_iov_error("IVRS Error: "
                 "Invalid Checksum 0x%x\n", checksum);
         return -ENODEV;
     }
@@ -850,15 +826,15 @@ int __init parse_ivrs_table(unsigned lon
         ivrs_block = (struct acpi_ivrs_block_header *)
             ((u8 *)table + length);
 
-        dprintk(XENLOG_INFO, "IVRS Block:\n");
-        dprintk(XENLOG_INFO, " Type 0x%x\n", ivrs_block->type);
-        dprintk(XENLOG_INFO, " Flags 0x%x\n", ivrs_block->flags);
-        dprintk(XENLOG_INFO, " Length 0x%x\n", ivrs_block->length);
-        dprintk(XENLOG_INFO, " Dev_Id 0x%x\n", ivrs_block->dev_id);
+        amd_iov_info("IVRS Block:\n");
+        amd_iov_info(" Type 0x%x\n", ivrs_block->type);
+        amd_iov_info(" Flags 0x%x\n", ivrs_block->flags);
+        amd_iov_info(" Length 0x%x\n", ivrs_block->length);
+        amd_iov_info(" Dev_Id 0x%x\n", ivrs_block->dev_id);
 
         if ( table->length < (length + ivrs_block->length) )
         {
-            dprintk(XENLOG_ERR, "IVRS Error: "
+            amd_iov_error("IVRS Error: "
                     "Table Length Exceeded: 0x%x -> 0x%lx\n",
                     table->length,
                     (length + ivrs_block->length));
diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/passthrough/amd/iommu_detect.c
--- a/xen/drivers/passthrough/amd/iommu_detect.c        Thu Apr 24 14:02:16 
2008 -0600
+++ b/xen/drivers/passthrough/amd/iommu_detect.c        Thu Apr 24 14:08:29 
2008 -0600
@@ -85,6 +85,45 @@ int __init get_iommu_last_downstream_bus
     return 0;
 }
 
+static int __init get_iommu_msi_capabilities(u8 bus, u8 dev, u8 func,
+            struct amd_iommu *iommu)
+{
+    int cap_ptr, cap_id;
+    u32 cap_header;
+    u16 control;
+    int count = 0;
+
+    cap_ptr = pci_conf_read8(bus, dev, func,
+            PCI_CAPABILITY_LIST);
+
+    while ( cap_ptr >= PCI_MIN_CAP_OFFSET &&
+        count < PCI_MAX_CAP_BLOCKS )
+    {
+        cap_ptr &= PCI_CAP_PTR_MASK;
+        cap_header = pci_conf_read32(bus, dev, func, cap_ptr);
+        cap_id = get_field_from_reg_u32(cap_header,
+                PCI_CAP_ID_MASK, PCI_CAP_ID_SHIFT);
+
+        if ( cap_id == PCI_CAP_ID_MSI )
+        {
+            iommu->msi_cap = cap_ptr;
+            break;
+        }
+        cap_ptr = get_field_from_reg_u32(cap_header,
+                PCI_CAP_NEXT_PTR_MASK, PCI_CAP_NEXT_PTR_SHIFT);
+        count++;
+    }
+
+    if ( !iommu->msi_cap )
+        return -ENODEV;
+
+    amd_iov_info("Found MSI capability block \n");
+    control = pci_conf_read16(bus, dev, func,
+            iommu->msi_cap + PCI_MSI_FLAGS);
+    iommu->maskbit = control & PCI_MSI_FLAGS_MASKBIT;
+    return 0;
+}
+
 int __init get_iommu_capabilities(u8 bus, u8 dev, u8 func, u8 cap_ptr,
                                   struct amd_iommu *iommu)
 {
@@ -99,8 +138,7 @@ int __init get_iommu_capabilities(u8 bus
 
     if ( ((mmio_bar & 0x1) == 0) || (iommu->mmio_base_phys == 0) )
     {
-        dprintk(XENLOG_ERR ,
-                "AMD IOMMU: Invalid MMIO_BAR = 0x%"PRIx64"\n", mmio_bar);
+        amd_iov_error("Invalid MMIO_BAR = 0x%"PRIx64"\n", mmio_bar);
         return -ENODEV;
     }
 
@@ -133,6 +171,8 @@ int __init get_iommu_capabilities(u8 bus
     iommu->msi_number = get_field_from_reg_u32(
         misc_info, PCI_CAP_MSI_NUMBER_MASK, PCI_CAP_MSI_NUMBER_SHIFT);
 
+    get_iommu_msi_capabilities(bus, dev, func, iommu);
+
     return 0;
 }
 
@@ -176,24 +216,24 @@ static int __init scan_functions_for_iom
     int bus, int dev, iommu_detect_callback_ptr_t iommu_detect_callback)
 {
     int func, hdr_type;
-    int count, error = 0;
-
-    func = 0;
-    count = 1;
-    while ( VALID_PCI_VENDOR_ID(pci_conf_read16(bus, dev, func,
-                                                PCI_VENDOR_ID)) &&
-            !error && (func < count) )
+    int count = 1, error = 0;
+
+    for ( func = 0;
+          (func < count) && !error &&
+              VALID_PCI_VENDOR_ID(pci_conf_read16(bus, dev, func,
+                                                  PCI_VENDOR_ID));
+          func++ )
+
     {
         hdr_type = pci_conf_read8(bus, dev, func, PCI_HEADER_TYPE);
 
-        if ( func == 0 && IS_PCI_MULTI_FUNCTION(hdr_type) )
+        if ( (func == 0) && IS_PCI_MULTI_FUNCTION(hdr_type) )
             count = PCI_MAX_FUNC_COUNT;
 
         if ( IS_PCI_TYPE0_HEADER(hdr_type) ||
              IS_PCI_TYPE1_HEADER(hdr_type) )
             error = scan_caps_for_iommu(bus, dev, func,
                                         iommu_detect_callback);
-        func++;
     }
 
     return error;
diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/passthrough/amd/iommu_init.c
--- a/xen/drivers/passthrough/amd/iommu_init.c  Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/drivers/passthrough/amd/iommu_init.c  Thu Apr 24 14:08:29 2008 -0600
@@ -27,6 +27,7 @@
 #include "../pci_regs.h"
 
 extern int nr_amd_iommus;
+static struct amd_iommu *vector_to_iommu[NR_VECTORS];
 
 int __init map_iommu_mmio_region(struct amd_iommu *iommu)
 {
@@ -34,8 +35,7 @@ int __init map_iommu_mmio_region(struct 
 
     if ( nr_amd_iommus > MAX_AMD_IOMMUS )
     {
-        gdprintk(XENLOG_ERR,
-                 "IOMMU: nr_amd_iommus %d > MAX_IOMMUS\n", nr_amd_iommus);
+        amd_iov_error("nr_amd_iommus %d > MAX_IOMMUS\n", nr_amd_iommus);
         return -ENOMEM;
     }
 
@@ -109,6 +109,33 @@ void __init register_iommu_cmd_buffer_in
     writel(entry, iommu->mmio_base+IOMMU_CMD_BUFFER_BASE_HIGH_OFFSET);
 }
 
+void __init register_iommu_event_log_in_mmio_space(struct amd_iommu *iommu)
+{
+    u64 addr_64, addr_lo, addr_hi;
+    u32 power_of2_entries;
+    u32 entry;
+
+    addr_64 = (u64)virt_to_maddr(iommu->event_log.buffer);
+    addr_lo = addr_64 & DMA_32BIT_MASK;
+    addr_hi = addr_64 >> 32;
+
+    set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0,
+                         IOMMU_EVENT_LOG_BASE_LOW_MASK,
+                         IOMMU_EVENT_LOG_BASE_LOW_SHIFT, &entry);
+    writel(entry, iommu->mmio_base + IOMMU_EVENT_LOG_BASE_LOW_OFFSET);
+
+    power_of2_entries = get_order_from_bytes(iommu->event_log.alloc_size) +
+                        IOMMU_EVENT_LOG_POWER_OF2_ENTRIES_PER_PAGE;
+
+    set_field_in_reg_u32((u32)addr_hi, 0,
+                        IOMMU_EVENT_LOG_BASE_HIGH_MASK,
+                        IOMMU_EVENT_LOG_BASE_HIGH_SHIFT, &entry);
+    set_field_in_reg_u32(power_of2_entries, entry,
+                        IOMMU_EVENT_LOG_LENGTH_MASK,
+                        IOMMU_EVENT_LOG_LENGTH_SHIFT, &entry);
+    writel(entry, iommu->mmio_base+IOMMU_EVENT_LOG_BASE_HIGH_OFFSET);
+}
+
 static void __init set_iommu_translation_control(struct amd_iommu *iommu,
                                                  int enable)
 {
@@ -179,10 +206,281 @@ static void __init register_iommu_exclus
     writel(entry, iommu->mmio_base+IOMMU_EXCLUSION_BASE_LOW_OFFSET);
 }
 
+static void __init set_iommu_event_log_control(struct amd_iommu *iommu,
+            int enable)
+{
+    u32 entry;
+
+    entry = readl(iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
+    set_field_in_reg_u32(enable ? IOMMU_CONTROL_ENABLED :
+                         IOMMU_CONTROL_DISABLED, entry,
+                         IOMMU_CONTROL_EVENT_LOG_ENABLE_MASK,
+                         IOMMU_CONTROL_EVENT_LOG_ENABLE_SHIFT, &entry);
+    writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
+
+    set_field_in_reg_u32(enable ? IOMMU_CONTROL_ENABLED :
+                         IOMMU_CONTROL_DISABLED, entry,
+                         IOMMU_CONTROL_EVENT_LOG_INT_MASK,
+                         IOMMU_CONTROL_EVENT_LOG_INT_SHIFT, &entry);
+    writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
+
+    set_field_in_reg_u32(enable ? IOMMU_CONTROL_ENABLED :
+                         IOMMU_CONTROL_DISABLED, entry,
+                         IOMMU_CONTROL_COMP_WAIT_INT_MASK,
+                         IOMMU_CONTROL_COMP_WAIT_INT_SHIFT, &entry);
+    writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
+}
+
+static int amd_iommu_read_event_log(struct amd_iommu *iommu, u32 event[])
+{
+    u32 tail, head, *event_log;
+    int i;
+
+     BUG_ON( !iommu || !event );
+
+    /* make sure there's an entry in the log */
+    tail = get_field_from_reg_u32(
+                readl(iommu->mmio_base + IOMMU_EVENT_LOG_TAIL_OFFSET),
+                IOMMU_EVENT_LOG_TAIL_MASK,
+                IOMMU_EVENT_LOG_TAIL_SHIFT);
+    if ( tail != iommu->event_log_head )
+    {
+        /* read event log entry */
+        event_log = (u32 *)(iommu->event_log.buffer +
+                                        (iommu->event_log_head *
+                                        IOMMU_EVENT_LOG_ENTRY_SIZE));
+        for ( i = 0; i < IOMMU_EVENT_LOG_U32_PER_ENTRY; i++ )
+            event[i] = event_log[i];
+        if ( ++iommu->event_log_head == iommu->event_log.entries )
+            iommu->event_log_head = 0;
+
+        /* update head pointer */
+        set_field_in_reg_u32(iommu->event_log_head, 0,
+                             IOMMU_EVENT_LOG_HEAD_MASK,
+                             IOMMU_EVENT_LOG_HEAD_SHIFT, &head);
+        writel(head, iommu->mmio_base + IOMMU_EVENT_LOG_HEAD_OFFSET);
+        return 0;
+    }
+
+    return -EFAULT;
+}
+
+static void amd_iommu_msi_data_init(struct amd_iommu *iommu, int vector)
+{
+    u32 msi_data;
+    u8 bus = (iommu->bdf >> 8) & 0xff;
+    u8 dev = PCI_SLOT(iommu->bdf & 0xff);
+    u8 func = PCI_FUNC(iommu->bdf & 0xff);
+
+    msi_data = MSI_DATA_TRIGGER_EDGE |
+        MSI_DATA_LEVEL_ASSERT |
+        MSI_DATA_DELIVERY_FIXED |
+        MSI_DATA_VECTOR(vector);
+
+    pci_conf_write32(bus, dev, func,
+        iommu->msi_cap + PCI_MSI_DATA_64, msi_data);
+}
+
+static void amd_iommu_msi_addr_init(struct amd_iommu *iommu, int phy_cpu)
+{
+
+    int bus = (iommu->bdf >> 8) & 0xff;
+    int dev = PCI_SLOT(iommu->bdf & 0xff);
+    int func = PCI_FUNC(iommu->bdf & 0xff);
+
+    u32 address_hi = 0;
+    u32 address_lo = MSI_ADDR_HEADER |
+            MSI_ADDR_DESTMODE_PHYS |
+            MSI_ADDR_REDIRECTION_CPU |
+            MSI_ADDR_DESTID_CPU(phy_cpu);
+
+    pci_conf_write32(bus, dev, func,
+        iommu->msi_cap + PCI_MSI_ADDRESS_LO, address_lo);
+    pci_conf_write32(bus, dev, func,
+        iommu->msi_cap + PCI_MSI_ADDRESS_HI, address_hi);
+}
+
+static void amd_iommu_msi_enable(struct amd_iommu *iommu, int flag)
+{
+    u16 control;
+    int bus = (iommu->bdf >> 8) & 0xff;
+    int dev = PCI_SLOT(iommu->bdf & 0xff);
+    int func = PCI_FUNC(iommu->bdf & 0xff);
+
+    control = pci_conf_read16(bus, dev, func,
+        iommu->msi_cap + PCI_MSI_FLAGS);
+    control &= ~(1);
+    if ( flag )
+        control |= flag;
+    pci_conf_write16(bus, dev, func,
+        iommu->msi_cap + PCI_MSI_FLAGS, control);
+}
+
+static void iommu_msi_unmask(unsigned int vector)
+{
+    unsigned long flags;
+    struct amd_iommu *iommu = vector_to_iommu[vector];
+
+    /* FIXME: do not support mask bits at the moment */
+    if ( iommu->maskbit )
+        return;
+
+    spin_lock_irqsave(&iommu->lock, flags);
+    amd_iommu_msi_enable(iommu, IOMMU_CONTROL_ENABLED);
+    spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+static void iommu_msi_mask(unsigned int vector)
+{
+    unsigned long flags;
+    struct amd_iommu *iommu = vector_to_iommu[vector];
+
+    /* FIXME: do not support mask bits at the moment */
+    if ( iommu->maskbit )
+        return;
+
+    spin_lock_irqsave(&iommu->lock, flags);
+    amd_iommu_msi_enable(iommu, IOMMU_CONTROL_DISABLED);
+    spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+static unsigned int iommu_msi_startup(unsigned int vector)
+{
+    iommu_msi_unmask(vector);
+    return 0;
+}
+
+static void iommu_msi_end(unsigned int vector)
+{
+    iommu_msi_unmask(vector);
+    ack_APIC_irq();
+}
+
+static void iommu_msi_set_affinity(unsigned int vector, cpumask_t dest)
+{
+    struct amd_iommu *iommu = vector_to_iommu[vector];
+    amd_iommu_msi_addr_init(iommu, cpu_physical_id(first_cpu(dest)));
+}
+
+static struct hw_interrupt_type iommu_msi_type = {
+    .typename = "AMD_IOV_MSI",
+    .startup = iommu_msi_startup,
+    .shutdown = iommu_msi_mask,
+    .enable = iommu_msi_unmask,
+    .disable = iommu_msi_mask,
+    .ack = iommu_msi_mask,
+    .end = iommu_msi_end,
+    .set_affinity = iommu_msi_set_affinity,
+};
+
+static void parse_event_log_entry(u32 entry[])
+{
+    u16 domain_id, device_id;
+    u32 code;
+    u64 *addr;
+    char * event_str[] = {"ILLEGAL_DEV_TABLE_ENTRY",
+                                         "IO_PAGE_FALT",
+                                         "DEV_TABLE_HW_ERROR",
+                                         "PAGE_TABLE_HW_ERROR",
+                                         "ILLEGAL_COMMAND_ERROR",
+                                         "COMMAND_HW_ERROR",
+                                         "IOTLB_INV_TIMEOUT",
+                                         "INVALID_DEV_REQUEST"};
+
+    code = get_field_from_reg_u32(entry[1],
+                                           IOMMU_EVENT_CODE_MASK,
+                                           IOMMU_EVENT_CODE_SHIFT);
+
+    if ( (code > IOMMU_EVENT_INVALID_DEV_REQUEST)
+        || (code < IOMMU_EVENT_ILLEGAL_DEV_TABLE_ENTRY) )
+    {
+        amd_iov_error("Invalid event log entry!\n");
+        return;
+    }
+
+    if ( code == IOMMU_EVENT_IO_PAGE_FALT )
+    {
+        device_id = get_field_from_reg_u32(entry[0],
+                                           IOMMU_EVENT_DEVICE_ID_MASK,
+                                           IOMMU_EVENT_DEVICE_ID_SHIFT);
+        domain_id = get_field_from_reg_u32(entry[1],
+                                           IOMMU_EVENT_DOMAIN_ID_MASK,
+                                           IOMMU_EVENT_DOMAIN_ID_SHIFT);
+        addr= (u64*) (entry + 2);
+        printk(XENLOG_ERR "AMD_IOV: "
+            "%s: domain:%d, device id:0x%x, fault address:0x%"PRIx64"\n",
+            event_str[code-1], domain_id, device_id, *addr);
+    }
+}
+
+static void amd_iommu_page_fault(int vector, void *dev_id,
+                             struct cpu_user_regs *regs)
+{
+    u32  event[4];
+    unsigned long flags;
+    int ret = 0;
+    struct amd_iommu *iommu = dev_id;
+
+    spin_lock_irqsave(&iommu->lock, flags);
+    ret = amd_iommu_read_event_log(iommu, event);
+    spin_unlock_irqrestore(&iommu->lock, flags);
+
+    if ( ret != 0 )
+        return;
+    parse_event_log_entry(event);
+}
+
+static int set_iommu_interrupt_handler(struct amd_iommu *iommu)
+{
+    int vector, ret;
+    unsigned long flags;
+
+    vector = assign_irq_vector(AUTO_ASSIGN);
+    vector_to_iommu[vector] = iommu;
+
+    /* make irq == vector */
+    irq_vector[vector] = vector;
+    vector_irq[vector] = vector;
+
+    if ( !vector )
+    {
+        amd_iov_error("no vectors\n");
+        return 0;
+    }
+
+    irq_desc[vector].handler = &iommu_msi_type;
+    ret = request_irq(vector, amd_iommu_page_fault, 0, "dmar", iommu);
+    if ( ret )
+    {
+        amd_iov_error("can't request irq\n");
+        return 0;
+    }
+
+    spin_lock_irqsave(&iommu->lock, flags);
+
+    amd_iommu_msi_data_init (iommu, vector);
+    amd_iommu_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map)));
+    amd_iommu_msi_enable(iommu, IOMMU_CONTROL_ENABLED);
+
+    spin_unlock_irqrestore(&iommu->lock, flags);
+
+    return vector;
+}
+
 void __init enable_iommu(struct amd_iommu *iommu)
 {
+    unsigned long flags;
+
+    set_iommu_interrupt_handler(iommu);
+
+    spin_lock_irqsave(&iommu->lock, flags);
+
     register_iommu_exclusion_range(iommu);
     set_iommu_command_buffer_control(iommu, IOMMU_CONTROL_ENABLED);
+    set_iommu_event_log_control(iommu, IOMMU_CONTROL_ENABLED);
     set_iommu_translation_control(iommu, IOMMU_CONTROL_ENABLED);
-    printk("AMD IOMMU %d: Enabled\n", nr_amd_iommus);
-}
+
+    spin_unlock_irqrestore(&iommu->lock, flags);
+
+    printk("AMD_IOV: IOMMU %d Enabled.\n", nr_amd_iommus);
+}
diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/passthrough/amd/iommu_map.c
--- a/xen/drivers/passthrough/amd/iommu_map.c   Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/drivers/passthrough/amd/iommu_map.c   Thu Apr 24 14:08:29 2008 -0600
@@ -154,8 +154,7 @@ void flush_command_buffer(struct amd_iom
         }
         else
         {
-            dprintk(XENLOG_WARNING, "AMD IOMMU: Warning:"
-                    " ComWaitInt bit did not assert!\n");
+            amd_iov_warning("Warning: ComWaitInt bit did not assert!\n");
         }
     }
 }
@@ -402,10 +401,9 @@ int amd_iommu_map_page(struct domain *d,
     pte = get_pte_from_page_tables(hd->root_table, hd->paging_mode, gfn);
     if ( pte == NULL )
     {
-        dprintk(XENLOG_ERR,
-                "AMD IOMMU: Invalid IO pagetable entry gfn = %lx\n", gfn);
+        amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn);
         spin_unlock_irqrestore(&hd->mapping_lock, flags);
-        return -EIO;
+        return -EFAULT;
     }
 
     set_page_table_entry_present((u32 *)pte, maddr, iw, ir);
@@ -439,10 +437,9 @@ int amd_iommu_unmap_page(struct domain *
     pte = get_pte_from_page_tables(hd->root_table, hd->paging_mode, gfn);
     if ( pte == NULL )
     {
-        dprintk(XENLOG_ERR,
-                "AMD IOMMU: Invalid IO pagetable entry gfn = %lx\n", gfn);
+        amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn);
         spin_unlock_irqrestore(&hd->mapping_lock, flags);
-        return -EIO;
+        return -EFAULT;
     }
 
     /* mark PTE as 'page not present' */
@@ -479,9 +476,8 @@ int amd_iommu_reserve_domain_unity_map(
             hd->root_table, hd->paging_mode, phys_addr >> PAGE_SHIFT);
         if ( pte == NULL )
         {
-            dprintk(XENLOG_ERR,
-                    "AMD IOMMU: Invalid IO pagetable entry "
-                    "phys_addr = %lx\n", phys_addr);
+            amd_iov_error(
+            "Invalid IO pagetable entry phys_addr = %lx\n", phys_addr);
             spin_unlock_irqrestore(&hd->mapping_lock, flags);
             return -EFAULT;
         }
@@ -528,8 +524,7 @@ int amd_iommu_sync_p2m(struct domain *d)
         pte = get_pte_from_page_tables(hd->root_table, hd->paging_mode, gfn);
         if ( pte == NULL )
         {
-            dprintk(XENLOG_ERR,
-                    "AMD IOMMU: Invalid IO pagetable entry gfn = %lx\n", gfn);
+            amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn);
             spin_unlock_irqrestore(&hd->mapping_lock, flags);
             return -EFAULT;
         }
diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/passthrough/amd/pci_amd_iommu.c
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c       Thu Apr 24 14:02:16 
2008 -0600
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c       Thu Apr 24 14:08:29 
2008 -0600
@@ -29,16 +29,11 @@ struct list_head amd_iommu_head;
 struct list_head amd_iommu_head;
 long amd_iommu_poll_comp_wait = COMPLETION_WAIT_DEFAULT_POLLING_COUNT;
 static long amd_iommu_cmd_buffer_entries = IOMMU_CMD_BUFFER_DEFAULT_ENTRIES;
-int nr_amd_iommus = 0;
-
-unsigned short ivrs_bdf_entries = 0;
-struct ivrs_mappings *ivrs_mappings = NULL;
-
-/* will set if amd-iommu HW is found */
-int amd_iommu_enabled = 0;
-
-static int enable_amd_iommu = 0;
-boolean_param("enable_amd_iommu", enable_amd_iommu);
+static long amd_iommu_event_log_entries = IOMMU_EVENT_LOG_DEFAULT_ENTRIES;
+int nr_amd_iommus;
+
+unsigned short ivrs_bdf_entries;
+struct ivrs_mappings *ivrs_mappings;
 
 static void deallocate_domain_page_tables(struct hvm_iommu *hd)
 {
@@ -73,25 +68,8 @@ static void __init deallocate_iommu_reso
 static void __init deallocate_iommu_resources(struct amd_iommu *iommu)
 {
     deallocate_iommu_table_struct(&iommu->dev_table);
-    deallocate_iommu_table_struct(&iommu->cmd_buffer);;
-}
-
-static void __init detect_cleanup(void)
-{
-    struct amd_iommu *iommu, *next;
-
-    list_for_each_entry_safe ( iommu, next, &amd_iommu_head, list )
-    {
-        list_del(&iommu->list);
-        deallocate_iommu_resources(iommu);
-        xfree(iommu);
-    }
-
-    if ( ivrs_mappings )
-    {
-        xfree(ivrs_mappings);
-        ivrs_mappings = NULL;
-    }
+    deallocate_iommu_table_struct(&iommu->cmd_buffer);
+    deallocate_iommu_table_struct(&iommu->event_log);
 }
 
 static int __init allocate_iommu_table_struct(struct table_struct *table,
@@ -102,7 +80,7 @@ static int __init allocate_iommu_table_s
 
     if ( !table->buffer )
     {
-        dprintk(XENLOG_ERR, "AMD IOMMU: Error allocating %s\n", name);
+        amd_iov_error("Error allocating %s\n", name);
         return -ENOMEM;
     }
 
@@ -139,6 +117,20 @@ static int __init allocate_iommu_resourc
                                      "Command Buffer") != 0 )
         goto error_out;
 
+    /* allocate 'event log' in power of 2 increments of 4K */
+    iommu->event_log_head = 0;
+    iommu->event_log.alloc_size =
+        PAGE_SIZE << get_order_from_bytes(
+            PAGE_ALIGN(amd_iommu_event_log_entries *
+                        IOMMU_EVENT_LOG_ENTRY_SIZE));
+
+    iommu->event_log.entries =
+        iommu->event_log.alloc_size / IOMMU_EVENT_LOG_ENTRY_SIZE;
+
+    if ( allocate_iommu_table_struct(&iommu->event_log,
+                                     "Event Log") != 0 )
+        goto error_out;
+
     return 0;
 
  error_out:
@@ -153,7 +145,7 @@ int iommu_detect_callback(u8 bus, u8 dev
     iommu = (struct amd_iommu *) xmalloc(struct amd_iommu);
     if ( !iommu )
     {
-        dprintk(XENLOG_ERR, "AMD IOMMU: Error allocating amd_iommu\n");
+        amd_iov_error("Error allocating amd_iommu\n");
         return -ENOMEM;
     }
     memset(iommu, 0, sizeof(struct amd_iommu));
@@ -203,6 +195,7 @@ static int __init amd_iommu_init(void)
             goto error_out;
         register_iommu_dev_table_in_mmio_space(iommu);
         register_iommu_cmd_buffer_in_mmio_space(iommu);
+        register_iommu_event_log_in_mmio_space(iommu);
 
         spin_unlock_irqrestore(&iommu->lock, flags);
     }
@@ -220,18 +213,14 @@ static int __init amd_iommu_init(void)
     }
 
     if ( acpi_table_parse(ACPI_IVRS, parse_ivrs_table) != 0 )
-        dprintk(XENLOG_INFO, "AMD IOMMU: Did not find IVRS table!\n");
+        amd_iov_error("Did not find IVRS table!\n");
 
     for_each_amd_iommu ( iommu )
     {
-        spin_lock_irqsave(&iommu->lock, flags);
         /* enable IOMMU translation services */
         enable_iommu(iommu);
         nr_amd_iommus++;
-        spin_unlock_irqrestore(&iommu->lock, flags);
-    }
-
-    amd_iommu_enabled = 1;
+    }
 
     return 0;
 
@@ -262,7 +251,7 @@ struct amd_iommu *find_iommu_for_device(
     return NULL;
 }
 
-void amd_iommu_setup_domain_device(
+static void amd_iommu_setup_domain_device(
     struct domain *domain, struct amd_iommu *iommu, int bdf)
 {
     void *dte;
@@ -288,12 +277,12 @@ void amd_iommu_setup_domain_device(
         sys_mgt = ivrs_mappings[req_id].dte_sys_mgt_enable;
         dev_ex = ivrs_mappings[req_id].dte_allow_exclusion;
         amd_iommu_set_dev_table_entry((u32 *)dte, root_ptr,
-                                      req_id, sys_mgt, dev_ex,
+                                      hd->domain_id, sys_mgt, dev_ex,
                                       hd->paging_mode);
 
         invalidate_dev_table_entry(iommu, req_id);
         flush_command_buffer(iommu);
-        dprintk(XENLOG_INFO, "AMD IOMMU: Set DTE req_id:%x, "
+        amd_iov_info("Enable DTE:0x%x, "
                 "root_ptr:%"PRIx64", domain_id:%d, paging_mode:%d\n",
                 req_id, root_ptr, hd->domain_id, hd->paging_mode);
 
@@ -301,9 +290,9 @@ void amd_iommu_setup_domain_device(
     }
 }
 
-void __init amd_iommu_setup_dom0_devices(void)
-{
-    struct hvm_iommu *hd = domain_hvm_iommu(dom0);
+static void amd_iommu_setup_dom0_devices(struct domain *d)
+{
+    struct hvm_iommu *hd = domain_hvm_iommu(d);
     struct amd_iommu *iommu;
     struct pci_dev *pdev;
     int bus, dev, func;
@@ -333,80 +322,72 @@ void __init amd_iommu_setup_dom0_devices
                     find_iommu_for_device(bus, pdev->devfn) : NULL;
 
                 if ( iommu )
-                    amd_iommu_setup_domain_device(dom0, iommu, bdf);
+                    amd_iommu_setup_domain_device(d, iommu, bdf);
             }
         }
     }
 }
 
-int amd_iommu_detect(void)
-{
-    unsigned long i;
+int amd_iov_detect(void)
+{
     int last_bus;
-    struct amd_iommu *iommu;
-
-    if ( !enable_amd_iommu )
-    {
-        printk("AMD IOMMU: Disabled\n");
-        return 0;
-    }
+    struct amd_iommu *iommu, *next;
 
     INIT_LIST_HEAD(&amd_iommu_head);
 
     if ( scan_for_iommu(iommu_detect_callback) != 0 )
     {
-        dprintk(XENLOG_ERR, "AMD IOMMU: Error detection\n");
+        amd_iov_error("Error detection\n");
         goto error_out;
     }
 
     if ( !iommu_found() )
     {
-        printk("AMD IOMMU: Not found!\n");
-        return 0;
-    }
-    else
-    {
-        /* allocate 'ivrs mappings' table */
-        /* note: the table has entries to accomodate all IOMMUs */
-        last_bus = 0;
-        for_each_amd_iommu ( iommu )
-            if ( iommu->last_downstream_bus > last_bus )
-                last_bus = iommu->last_downstream_bus;
-
-        ivrs_bdf_entries = (last_bus + 1) *
-            IOMMU_DEV_TABLE_ENTRIES_PER_BUS;
-        ivrs_mappings = xmalloc_array( struct ivrs_mappings, ivrs_bdf_entries);
-
-        if ( !ivrs_mappings )
-        {
-            dprintk(XENLOG_ERR, "AMD IOMMU:"
-                    " Error allocating IVRS DevMappings table\n");
-            goto error_out;
-        }
-        memset(ivrs_mappings, 0,
-               ivrs_bdf_entries * sizeof(struct ivrs_mappings));
-    }
+        printk("AMD_IOV: IOMMU not found!\n");
+        goto error_out;
+    }
+
+    /* allocate 'ivrs mappings' table */
+    /* note: the table has entries to accomodate all IOMMUs */
+    last_bus = 0;
+    for_each_amd_iommu ( iommu )
+        if ( iommu->last_downstream_bus > last_bus )
+            last_bus = iommu->last_downstream_bus;
+
+    ivrs_bdf_entries = (last_bus + 1) *
+        IOMMU_DEV_TABLE_ENTRIES_PER_BUS;
+    ivrs_mappings = xmalloc_array( struct ivrs_mappings, ivrs_bdf_entries);
+    if ( ivrs_mappings == NULL )
+    {
+        amd_iov_error("Error allocating IVRS DevMappings table\n");
+        goto error_out;
+    }
+    memset(ivrs_mappings, 0,
+           ivrs_bdf_entries * sizeof(struct ivrs_mappings));
 
     if ( amd_iommu_init() != 0 )
     {
-        dprintk(XENLOG_ERR, "AMD IOMMU: Error initialization\n");
-        goto error_out;
-    }
-
-    if ( iommu_domain_init(dom0) != 0 )
-        goto error_out;
-
-    /* setup 1:1 page table for dom0 */
-    for ( i = 0; i < max_page; i++ )
-        amd_iommu_map_page(dom0, i, i);
-
-    amd_iommu_setup_dom0_devices();
+        amd_iov_error("Error initialization\n");
+        goto error_out;
+    }
+
     return 0;
 
  error_out:
-    detect_cleanup();
+    list_for_each_entry_safe ( iommu, next, &amd_iommu_head, list )
+    {
+        list_del(&iommu->list);
+        deallocate_iommu_resources(iommu);
+        xfree(iommu);
+    }
+
+    if ( ivrs_mappings )
+    {
+        xfree(ivrs_mappings);
+        ivrs_mappings = NULL;
+    }
+
     return -ENODEV;
-
 }
 
 static int allocate_domain_resources(struct hvm_iommu *hd)
@@ -447,12 +428,10 @@ static int get_paging_mode(unsigned long
             return -ENOMEM;
     }
 
-    dprintk(XENLOG_INFO, "AMD IOMMU: paging mode = %d\n", level);
-
     return level;
 }
 
-int amd_iommu_domain_init(struct domain *domain)
+static int amd_iommu_domain_init(struct domain *domain)
 {
     struct hvm_iommu *hd = domain_hvm_iommu(domain);
 
@@ -463,10 +442,18 @@ int amd_iommu_domain_init(struct domain 
         return -ENOMEM;
     }
 
-    if ( is_hvm_domain(domain) )
-        hd->paging_mode = IOMMU_PAGE_TABLE_LEVEL_4;
-    else
-        hd->paging_mode = get_paging_mode(max_page);
+    hd->paging_mode = is_hvm_domain(domain)?
+        IOMMU_PAGE_TABLE_LEVEL_4 : get_paging_mode(max_page);
+
+    if ( domain->domain_id == 0 )
+    {
+        unsigned long i; 
+       /* setup 1:1 page table for dom0 */
+        for ( i = 0; i < max_page; i++ )
+            amd_iommu_map_page(domain, i, i);
+
+        amd_iommu_setup_dom0_devices(domain);
+    }
 
     hd->domain_id = domain->domain_id;
 
@@ -490,7 +477,7 @@ static void amd_iommu_disable_domain_dev
         memset (dte, 0, IOMMU_DEV_TABLE_ENTRY_SIZE);
         invalidate_dev_table_entry(iommu, req_id);
         flush_command_buffer(iommu);
-        dprintk(XENLOG_INFO , "AMD IOMMU: disable DTE 0x%x,"
+        amd_iov_info("Disable DTE:0x%x,"
                 " domain_id:%d, paging_mode:%d\n",
                 req_id,  domain_hvm_iommu(domain)->domain_id,
                 domain_hvm_iommu(domain)->paging_mode);
@@ -525,7 +512,7 @@ static int reassign_device( struct domai
 
         if ( !iommu )
         {
-            gdprintk(XENLOG_ERR , "AMD IOMMU: fail to find iommu."
+            amd_iov_error("Fail to find iommu."
                      " %x:%x.%x cannot be assigned to domain %d\n", 
                      bus, PCI_SLOT(devfn), PCI_FUNC(devfn), target->domain_id);
             return -ENODEV;
@@ -540,8 +527,7 @@ static int reassign_device( struct domai
         spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags);
 
         amd_iommu_setup_domain_device(target, iommu, bdf);
-        gdprintk(XENLOG_INFO ,
-                 "AMD IOMMU: reassign %x:%x.%x domain %d -> domain %d\n",
+        amd_iov_info("reassign %x:%x.%x domain %d -> domain %d\n",
                  bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
                  source->domain_id, target->domain_id);
 
@@ -550,7 +536,7 @@ static int reassign_device( struct domai
     return 0;
 }
 
-int amd_iommu_assign_device(struct domain *d, u8 bus, u8 devfn)
+static int amd_iommu_assign_device(struct domain *d, u8 bus, u8 devfn)
 {
     int bdf = (bus << 8) | devfn;
     int req_id = ivrs_mappings[bdf].dte_requestor_id;
@@ -580,8 +566,7 @@ static void release_domain_devices(struc
     {
         pdev = list_entry(hd->pdev_list.next, typeof(*pdev), list);
         pdev_flr(pdev->bus, pdev->devfn);
-        gdprintk(XENLOG_INFO ,
-                 "AMD IOMMU: release devices %x:%x.%x\n",
+        amd_iov_info("release domain %d devices %x:%x.%x\n", d->domain_id,
                  pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
         reassign_device(d, dom0, pdev->bus, pdev->devfn);
     }
@@ -637,16 +622,13 @@ static void deallocate_iommu_page_tables
     hd ->root_table = NULL;
 }
 
-void amd_iommu_domain_destroy(struct domain *d)
-{
-    if ( !amd_iommu_enabled )
-        return;
-
+static void amd_iommu_domain_destroy(struct domain *d)
+{
     deallocate_iommu_page_tables(d);
     release_domain_devices(d);
 }
 
-void amd_iommu_return_device(
+static void amd_iommu_return_device(
     struct domain *s, struct domain *t, u8 bus, u8 devfn)
 {
     pdev_flr(bus, devfn);
diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/passthrough/iommu.c
--- a/xen/drivers/passthrough/iommu.c   Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/drivers/passthrough/iommu.c   Thu Apr 24 14:08:29 2008 -0600
@@ -18,6 +18,11 @@
 
 extern struct iommu_ops intel_iommu_ops;
 extern struct iommu_ops amd_iommu_ops;
+int intel_vtd_setup(void);
+int amd_iov_detect(void);
+
+int iommu_enabled = 1;
+boolean_param("iommu", iommu_enabled);
 
 int iommu_domain_init(struct domain *domain)
 {
@@ -134,3 +139,28 @@ void deassign_device(struct domain *d, u
 
     return hd->platform_ops->reassign_device(d, dom0, bus, devfn);
 }
+
+static int iommu_setup(void)
+{
+    int rc = -ENODEV;
+
+    if ( !iommu_enabled )
+        goto out;
+
+    switch ( boot_cpu_data.x86_vendor )
+    {
+    case X86_VENDOR_INTEL:
+        rc = intel_vtd_setup();
+        break;
+    case X86_VENDOR_AMD:
+        rc = amd_iov_detect();
+        break;
+    }
+
+    iommu_enabled = (rc == 0);
+
+ out:
+    printk("I/O virtualisation %sabled\n", iommu_enabled ? "en" : "dis");
+    return rc;
+}
+__initcall(iommu_setup);
diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/passthrough/vtd/dmar.c
--- a/xen/drivers/passthrough/vtd/dmar.c        Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/drivers/passthrough/vtd/dmar.c        Thu Apr 24 14:08:29 2008 -0600
@@ -30,8 +30,7 @@
 #include "dmar.h"
 #include "../pci_regs.h"
 
-int vtd_enabled;
-boolean_param("vtd", vtd_enabled);
+int vtd_enabled = 1;
 
 #undef PREFIX
 #define PREFIX VTDPREFIX "ACPI DMAR:"
@@ -79,14 +78,9 @@ struct acpi_drhd_unit * ioapic_to_drhd(u
 struct acpi_drhd_unit * ioapic_to_drhd(unsigned int apic_id)
 {
     struct acpi_drhd_unit *drhd;
-    list_for_each_entry( drhd, &acpi_drhd_units, list ) {
-        if ( acpi_ioapic_device_match(&drhd->ioapic_list, apic_id) ) {
-            dprintk(XENLOG_INFO VTDPREFIX,
-                    "ioapic_to_drhd: drhd->address = %lx\n",
-                    drhd->address);
+    list_for_each_entry( drhd, &acpi_drhd_units, list )
+        if ( acpi_ioapic_device_match(&drhd->ioapic_list, apic_id) )
             return drhd;
-        }
-    }
     return NULL;
 }
 
@@ -94,15 +88,9 @@ struct iommu * ioapic_to_iommu(unsigned 
 {
     struct acpi_drhd_unit *drhd;
 
-    list_for_each_entry( drhd, &acpi_drhd_units, list ) {
-        if ( acpi_ioapic_device_match(&drhd->ioapic_list, apic_id) ) {
-            dprintk(XENLOG_INFO VTDPREFIX,
-                    "ioapic_to_iommu: drhd->address = %lx\n",
-                    drhd->address);
+    list_for_each_entry( drhd, &acpi_drhd_units, list )
+        if ( acpi_ioapic_device_match(&drhd->ioapic_list, apic_id) )
             return drhd->iommu;
-        }
-    }
-    dprintk(XENLOG_INFO VTDPREFIX, "returning NULL\n");
     return NULL;
 }
 
@@ -150,21 +138,11 @@ struct acpi_drhd_unit * acpi_find_matche
 
         if ( acpi_pci_device_match(drhd->devices,
                                    drhd->devices_cnt, dev) )
-        {
-            dprintk(XENLOG_INFO VTDPREFIX, 
-                    "acpi_find_matched_drhd_unit: drhd->address = %lx\n",
-                    drhd->address);
             return drhd;
-        }
     }
 
     if ( include_all_drhd )
-    {
-        dprintk(XENLOG_INFO VTDPREFIX, 
-                "acpi_find_matched_drhd_unit:include_all_drhd->addr = %lx\n",
-                include_all_drhd->address);
         return include_all_drhd;
-    }
 
     return NULL;
 }
@@ -174,11 +152,9 @@ struct acpi_rmrr_unit * acpi_find_matche
     struct acpi_rmrr_unit *rmrr;
 
     list_for_each_entry ( rmrr, &acpi_rmrr_units, list )
-    {
         if ( acpi_pci_device_match(rmrr->devices,
                                    rmrr->devices_cnt, dev) )
             return rmrr;
-    }
 
     return NULL;
 }
@@ -199,11 +175,7 @@ struct acpi_atsr_unit * acpi_find_matche
     }
 
     if ( all_ports_atsru )
-    {
-        dprintk(XENLOG_INFO VTDPREFIX,
-                "acpi_find_matched_atsr_unit: all_ports_atsru\n");
         return all_ports_atsru;;
-    }
 
     return NULL;
 }
@@ -604,22 +576,24 @@ int acpi_dmar_init(void)
 {
     int rc;
 
-    if ( !vtd_enabled )
-        return -ENODEV;
+    rc = -ENODEV;
+    if ( !iommu_enabled )
+        goto fail;
 
     if ( (rc = vtd_hw_check()) != 0 )
-        return rc;
+        goto fail;
 
     acpi_table_parse(ACPI_DMAR, acpi_parse_dmar);
 
+    rc = -ENODEV;
     if ( list_empty(&acpi_drhd_units) )
-    {
-        dprintk(XENLOG_ERR VTDPREFIX, "No DMAR devices found\n");
-        vtd_enabled = 0;
-        return -ENODEV;
-    }
+        goto fail;
 
     printk("Intel VT-d has been enabled\n");
 
     return 0;
-}
+
+ fail:
+    vtd_enabled = 0;
+    return -ENODEV;
+}
diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c       Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/drivers/passthrough/vtd/iommu.c       Thu Apr 24 14:08:29 2008 -0600
@@ -41,6 +41,9 @@ static int domid_bitmap_size;           
 static int domid_bitmap_size;           /* domain id bitmap size in bits */
 static unsigned long *domid_bitmap;     /* iommu domain id bitmap */
 
+static void setup_dom0_devices(struct domain *d);
+static void setup_dom0_rmrr(struct domain *d);
+
 #define DID_FIELD_WIDTH 16
 #define DID_HIGH_OFFSET 8
 static void context_set_domain_id(struct context_entry *context,
@@ -78,17 +81,12 @@ static struct intel_iommu *alloc_intel_i
     struct intel_iommu *intel;
 
     intel = xmalloc(struct intel_iommu);
-    if ( !intel )
-    {
-        gdprintk(XENLOG_ERR VTDPREFIX,
-                 "Allocate intel_iommu failed.\n");
+    if ( intel == NULL )
         return NULL;
-    }
     memset(intel, 0, sizeof(struct intel_iommu));
 
     spin_lock_init(&intel->qi_ctrl.qinval_lock);
     spin_lock_init(&intel->qi_ctrl.qinval_poll_lock);
-
     spin_lock_init(&intel->ir_ctrl.iremap_lock);
 
     return intel;
@@ -96,68 +94,22 @@ static struct intel_iommu *alloc_intel_i
 
 static void free_intel_iommu(struct intel_iommu *intel)
 {
-    if ( intel )
-    {
-        xfree(intel);
-        intel = NULL;
-    }
+    xfree(intel);
 }
 
 struct qi_ctrl *iommu_qi_ctrl(struct iommu *iommu)
 {
-    if ( !iommu )
-        return NULL;
-
-    if ( !iommu->intel )
-    {
-        iommu->intel = alloc_intel_iommu();
-        if ( !iommu->intel )
-        {
-            dprintk(XENLOG_ERR VTDPREFIX,
-                    "iommu_qi_ctrl: Allocate iommu->intel failed.\n");
-            return NULL;
-        }
-    }
-
-    return &(iommu->intel->qi_ctrl);
+    return iommu ? &iommu->intel->qi_ctrl : NULL;
 }
 
 struct ir_ctrl *iommu_ir_ctrl(struct iommu *iommu)
 {
-    if ( !iommu )
-        return NULL;
-
-    if ( !iommu->intel )
-    {
-        iommu->intel = alloc_intel_iommu();
-        if ( !iommu->intel )
-        {
-            dprintk(XENLOG_ERR VTDPREFIX,
-                    "iommu_ir_ctrl: Allocate iommu->intel failed.\n");
-            return NULL;
-        }
-    }
-
-    return &(iommu->intel->ir_ctrl);
+    return iommu ? &iommu->intel->ir_ctrl : NULL;
 }
 
 struct iommu_flush *iommu_get_flush(struct iommu *iommu)
 {
-    if ( !iommu )
-        return NULL;
-
-    if ( !iommu->intel )
-    {
-        iommu->intel = alloc_intel_iommu();
-        if ( !iommu->intel )
-        {
-            dprintk(XENLOG_ERR VTDPREFIX,
-                    "iommu_get_flush: Allocate iommu->intel failed.\n");
-            return NULL;
-        }
-    }
-
-    return &(iommu->intel->flush);
+    return iommu ? &iommu->intel->flush : NULL;
 }
 
 unsigned int clflush_size;
@@ -276,11 +228,7 @@ static u64 addr_to_dma_page_maddr(struct
             dma_set_pte_addr(*pte, maddr);
             vaddr = map_vtd_domain_page(maddr);
             if ( !vaddr )
-            {
-                unmap_vtd_domain_page(parent);
-                spin_unlock_irqrestore(&hd->mapping_lock, flags);
-                return 0;
-            }
+                break;
 
             /*
              * high level table always sets r/w, last level
@@ -294,14 +242,9 @@ static u64 addr_to_dma_page_maddr(struct
         {
             vaddr = map_vtd_domain_page(pte->val);
             if ( !vaddr )
-            {
-                unmap_vtd_domain_page(parent);
-                spin_unlock_irqrestore(&hd->mapping_lock, flags);
-                return 0;
-            }
+                break;
         }
 
-        unmap_vtd_domain_page(parent);
         if ( level == 2 )
         {
             pte_maddr = pte->val & PAGE_MASK_4K;
@@ -309,11 +252,13 @@ static u64 addr_to_dma_page_maddr(struct
             break;
         }
 
+        unmap_vtd_domain_page(parent);
         parent = (struct dma_pte *)vaddr;
         vaddr = NULL;
         level--;
     }
 
+    unmap_vtd_domain_page(parent);
     spin_unlock_irqrestore(&hd->mapping_lock, flags);
     return pte_maddr;
 }
@@ -688,7 +633,7 @@ void dma_pte_free_pagetable(struct domai
     struct dma_pte *page, *pte;
     int total = agaw_to_level(hd->agaw);
     int level;
-    u32 tmp;
+    u64 tmp;
     u64 pg_maddr;
 
     drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
@@ -709,7 +654,10 @@ void dma_pte_free_pagetable(struct domai
         {
             pg_maddr = dma_addr_level_page_maddr(domain, tmp, level);
             if ( pg_maddr == 0 )
-                return;
+            {
+                tmp += level_size(level);
+                continue;
+            }
             page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
             pte = page + address_level_offset(tmp, level);
             dma_clear_pte(*pte);
@@ -730,18 +678,11 @@ void dma_pte_free_pagetable(struct domai
     }
 }
 
-/* iommu handling */
 static int iommu_set_root_entry(struct iommu *iommu)
 {
     u32 cmd, sts;
     unsigned long flags;
-
-    if ( iommu == NULL )
-    {
-        gdprintk(XENLOG_ERR VTDPREFIX,
-                 "iommu_set_root_entry: iommu == NULL\n");
-        return -EINVAL;
-    }
+    s_time_t start_time;
 
     if ( iommu->root_maddr != 0 )
     {
@@ -760,11 +701,14 @@ static int iommu_set_root_entry(struct i
     dmar_writel(iommu->reg, DMAR_GCMD_REG, cmd);
 
     /* Make sure hardware complete it */
+    start_time = NOW();
     for ( ; ; )
     {
         sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
         if ( sts & DMA_GSTS_RTPS )
             break;
+        if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
+            panic("DMAR hardware is malfunctional, please disable IOMMU\n");
         cpu_relax();
     }
 
@@ -777,6 +721,7 @@ static int iommu_enable_translation(stru
 {
     u32 sts;
     unsigned long flags;
+    s_time_t start_time;
 
     dprintk(XENLOG_INFO VTDPREFIX,
             "iommu_enable_translation: iommu->reg = %p\n", iommu->reg);
@@ -784,11 +729,14 @@ static int iommu_enable_translation(stru
     iommu->gcmd |= DMA_GCMD_TE;
     dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
     /* Make sure hardware complete it */
+    start_time = NOW();
     for ( ; ; )
     {
         sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
         if ( sts & DMA_GSTS_TES )
             break;
+        if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
+            panic("DMAR hardware is malfunctional, please disable IOMMU\n");
         cpu_relax();
     }
 
@@ -802,17 +750,21 @@ int iommu_disable_translation(struct iom
 {
     u32 sts;
     unsigned long flags;
+    s_time_t start_time;
 
     spin_lock_irqsave(&iommu->register_lock, flags);
     iommu->gcmd &= ~ DMA_GCMD_TE;
     dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
 
     /* Make sure hardware complete it */
+    start_time = NOW();
     for ( ; ; )
     {
         sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
         if ( !(sts & DMA_GSTS_TES) )
             break;
+        if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
+            panic("DMAR hardware is malfunctional, please disable IOMMU\n");
         cpu_relax();
     }
     spin_unlock_irqrestore(&iommu->register_lock, flags);
@@ -1039,69 +991,64 @@ int iommu_set_interrupt(struct iommu *io
     return vector;
 }
 
-struct iommu *iommu_alloc(void *hw_data)
-{
-    struct acpi_drhd_unit *drhd = (struct acpi_drhd_unit *) hw_data;
+static int iommu_alloc(struct acpi_drhd_unit *drhd)
+{
     struct iommu *iommu;
 
     if ( nr_iommus > MAX_IOMMUS )
     {
         gdprintk(XENLOG_ERR VTDPREFIX,
                  "IOMMU: nr_iommus %d > MAX_IOMMUS\n", nr_iommus);
-        return NULL;
+        return -ENOMEM;
     }
 
     iommu = xmalloc(struct iommu);
-    if ( !iommu )
-        return NULL;
+    if ( iommu == NULL )
+        return -ENOMEM;
     memset(iommu, 0, sizeof(struct iommu));
 
+    iommu->intel = alloc_intel_iommu();
+    if ( iommu->intel == NULL )
+    {
+        xfree(iommu);
+        return -ENOMEM;
+    }
+
     set_fixmap_nocache(FIX_IOMMU_REGS_BASE_0 + nr_iommus, drhd->address);
-    iommu->reg = (void *) fix_to_virt(FIX_IOMMU_REGS_BASE_0 + nr_iommus);
-
-    printk("iommu_alloc: iommu->reg = %p drhd->address = %lx\n",
-           iommu->reg, drhd->address);
-
+    iommu->reg = (void *)fix_to_virt(FIX_IOMMU_REGS_BASE_0 + nr_iommus);
     nr_iommus++;
-
-    if ( !iommu->reg )
-    {
-        printk(KERN_ERR VTDPREFIX "IOMMU: can't mapping the region\n");
-        goto error;
-    }
 
     iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
     iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG);
 
-    printk("iommu_alloc: cap = %"PRIx64"\n",iommu->cap);
-    printk("iommu_alloc: ecap = %"PRIx64"\n", iommu->ecap);
-
     spin_lock_init(&iommu->lock);
     spin_lock_init(&iommu->register_lock);
 
-    iommu->intel = alloc_intel_iommu();
-
     drhd->iommu = iommu;
-    return iommu;
- error:
-    xfree(iommu);
-    return NULL;
-}
-
-static void free_iommu(struct iommu *iommu)
-{
-    if ( !iommu )
+    return 0;
+}
+
+static void iommu_free(struct acpi_drhd_unit *drhd)
+{
+    struct iommu *iommu = drhd->iommu;
+
+    if ( iommu == NULL )
         return;
+
     if ( iommu->root_maddr != 0 )
     {
         free_pgtable_maddr(iommu->root_maddr);
         iommu->root_maddr = 0;
     }
+
     if ( iommu->reg )
         iounmap(iommu->reg);
+
     free_intel_iommu(iommu->intel);
     free_irq(iommu->vector);
     xfree(iommu);
+
+    drhd->iommu = NULL;
 }
 
 #define guestwidth_to_adjustwidth(gaw) ({       \
@@ -1111,22 +1058,21 @@ static void free_iommu(struct iommu *iom
         agaw = 64;                              \
     agaw; })
 
-int intel_iommu_domain_init(struct domain *domain)
-{
-    struct hvm_iommu *hd = domain_hvm_iommu(domain);
+static int intel_iommu_domain_init(struct domain *d)
+{
+    struct hvm_iommu *hd = domain_hvm_iommu(d);
     struct iommu *iommu = NULL;
     int guest_width = DEFAULT_DOMAIN_ADDRESS_WIDTH;
-    int adjust_width, agaw;
+    int i, adjust_width, agaw;
     unsigned long sagaw;
     struct acpi_drhd_unit *drhd;
 
-    if ( !vtd_enabled || list_empty(&acpi_drhd_units) )
-        return 0;
-
-    for_each_drhd_unit ( drhd )
-        iommu = drhd->iommu ? : iommu_alloc(drhd);
-
-    /* calculate AGAW */
+    INIT_LIST_HEAD(&hd->pdev_list);
+
+    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
+    iommu = drhd->iommu;
+
+    /* Calculate AGAW. */
     if ( guest_width > cap_mgaw(iommu->cap) )
         guest_width = cap_mgaw(iommu->cap);
     adjust_width = guestwidth_to_adjustwidth(guest_width);
@@ -1142,6 +1088,26 @@ int intel_iommu_domain_init(struct domai
             return -ENODEV;
     }
     hd->agaw = agaw;
+
+    if ( d->domain_id == 0 )
+    {
+        /* Set up 1:1 page table for dom0. */
+        for ( i = 0; i < max_page; i++ )
+            iommu_map_page(d, i, i);
+
+        setup_dom0_devices(d);
+        setup_dom0_rmrr(d);
+
+        iommu_flush_all();
+
+        for_each_drhd_unit ( drhd )
+        {
+            iommu = drhd->iommu;
+            if ( iommu_enable_translation(iommu) )
+                return -EIO;
+        }
+    }
+
     return 0;
 }
 
@@ -1153,28 +1119,15 @@ static int domain_context_mapping_one(
     struct hvm_iommu *hd = domain_hvm_iommu(domain);
     struct context_entry *context, *context_entries;
     unsigned long flags;
-    int ret = 0;
     u64 maddr;
 
     maddr = bus_to_context_maddr(iommu, bus);
     context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
     context = &context_entries[devfn];
-    if ( !context )
+
+    if ( context_present(*context) )
     {
         unmap_vtd_domain_page(context_entries);
-        gdprintk(XENLOG_ERR VTDPREFIX,
-                 "domain_context_mapping_one:context == NULL:"
-                 "bdf = %x:%x:%x\n",
-                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
-        return -ENOMEM;
-    }
-
-    if ( context_present(*context) )
-    {
-        unmap_vtd_domain_page(context_entries);
-        gdprintk(XENLOG_WARNING VTDPREFIX,
-                 "domain_context_mapping_one:context present:bdf=%x:%x:%x\n",
-                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
         return 0;
     }
 
@@ -1203,12 +1156,6 @@ static int domain_context_mapping_one(
     context_set_present(*context);
     iommu_flush_cache_entry(iommu, context);
 
-    gdprintk(XENLOG_INFO VTDPREFIX,
-             "domain_context_mapping_one-%x:%x:%x-*context=%"PRIx64":%"PRIx64
-             " hd->pgd_maddr=%"PRIx64"\n",
-             bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
-             context->hi, context->lo, hd->pgd_maddr);
-
     unmap_vtd_domain_page(context_entries);
 
     if ( iommu_flush_context_device(iommu, domain_iommu_domid(domain),
@@ -1218,7 +1165,8 @@ static int domain_context_mapping_one(
     else
         iommu_flush_iotlb_dsi(iommu, domain_iommu_domid(domain), 0);
     spin_unlock_irqrestore(&iommu->lock, flags);
-    return ret;
+
+    return 0;
 }
 
 static int __pci_find_next_cap(u8 bus, unsigned int devfn, u8 pos, int cap)
@@ -1377,28 +1325,12 @@ static int domain_context_unmap_one(
     maddr = bus_to_context_maddr(iommu, bus);
     context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
     context = &context_entries[devfn];
-    if ( !context )
+
+    if ( !context_present(*context) )
     {
         unmap_vtd_domain_page(context_entries);
-        gdprintk(XENLOG_ERR VTDPREFIX,
-                 "domain_context_unmap_one-%x:%x:%x- context == NULL:return\n",
-                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
-        return -ENOMEM;
-    }
-
-    if ( !context_present(*context) )
-    {
-        unmap_vtd_domain_page(context_entries);
-        gdprintk(XENLOG_WARNING VTDPREFIX,
-                 "domain_context_unmap_one-%x:%x:%x- "
-                 "context NOT present:return\n",
-                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
         return 0;
     }
-
-    gdprintk(XENLOG_INFO VTDPREFIX,
-             "domain_context_unmap_one: bdf = %x:%x:%x\n",
-             bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
 
     spin_lock_irqsave(&iommu->lock, flags);
     context_clear_present(*context);
@@ -1431,24 +1363,12 @@ static int domain_context_unmap(
         sub_bus = pci_conf_read8(
             pdev->bus, PCI_SLOT(pdev->devfn),
             PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS);
-
-        gdprintk(XENLOG_INFO VTDPREFIX,
-                 "domain_context_unmap:BRIDGE:%x:%x:%x "
-                 "sec_bus=%x sub_bus=%x\n",
-                 pdev->bus, PCI_SLOT(pdev->devfn),
-                 PCI_FUNC(pdev->devfn), sec_bus, sub_bus);
         break;
     case DEV_TYPE_PCIe_ENDPOINT:
-        gdprintk(XENLOG_INFO VTDPREFIX,
-                 "domain_context_unmap:PCIe : bdf = %x:%x:%x\n",
-                 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
         ret = domain_context_unmap_one(domain, iommu,
                                        (u8)(pdev->bus), (u8)(pdev->devfn));
         break;
     case DEV_TYPE_PCI:
-        gdprintk(XENLOG_INFO VTDPREFIX,
-                 "domain_context_unmap:PCI: bdf = %x:%x:%x\n",
-                 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
         if ( pdev->bus == 0 )
             ret = domain_context_unmap_one(
                 domain, iommu,
@@ -1502,35 +1422,29 @@ void reassign_device_ownership(
     int status;
     unsigned long flags;
 
-    gdprintk(XENLOG_INFO VTDPREFIX,
-             "reassign_device-%x:%x:%x- source = %d target = %d\n",
-             bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
-             source->domain_id, target->domain_id);
-
     pdev_flr(bus, devfn);
 
     for_each_pdev( source, pdev )
-    {
-        if ( (pdev->bus != bus) || (pdev->devfn != devfn) )
-            continue;
-
-        drhd = acpi_find_matched_drhd_unit(pdev);
-        iommu = drhd->iommu;
-        domain_context_unmap(source, iommu, pdev);
-
-        /* Move pci device from the source domain to target domain. */
-        spin_lock_irqsave(&source_hd->iommu_list_lock, flags);
-        spin_lock_irqsave(&target_hd->iommu_list_lock, flags);
-        list_move(&pdev->list, &target_hd->pdev_list);
-        spin_unlock_irqrestore(&target_hd->iommu_list_lock, flags);
-        spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags);
-
-        status = domain_context_mapping(target, iommu, pdev);
-        if ( status != 0 )
-            gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping failed\n");
-
-        break;
-    }
+        if ( (pdev->bus == bus) && (pdev->devfn == devfn) )
+            goto found;
+
+    return;
+
+ found:
+    drhd = acpi_find_matched_drhd_unit(pdev);
+    iommu = drhd->iommu;
+    domain_context_unmap(source, iommu, pdev);
+
+    /* Move pci device from the source domain to target domain. */
+    spin_lock_irqsave(&source_hd->iommu_list_lock, flags);
+    spin_lock_irqsave(&target_hd->iommu_list_lock, flags);
+    list_move(&pdev->list, &target_hd->pdev_list);
+    spin_unlock_irqrestore(&target_hd->iommu_list_lock, flags);
+    spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags);
+
+    status = domain_context_mapping(target, iommu, pdev);
+    if ( status != 0 )
+        gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping failed\n");
 }
 
 void return_devices_to_dom0(struct domain *d)
@@ -1541,9 +1455,6 @@ void return_devices_to_dom0(struct domai
     while ( !list_empty(&hd->pdev_list) )
     {
         pdev = list_entry(hd->pdev_list.next, typeof(*pdev), list);
-        dprintk(XENLOG_INFO VTDPREFIX,
-                "return_devices_to_dom0: bdf = %x:%x:%x\n",
-                pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
         reassign_device_ownership(d, dom0, pdev->bus, pdev->devfn);
     }
 
@@ -1600,7 +1511,7 @@ int intel_iommu_map_page(
         return 0;
 #endif
 
-    pg_maddr = addr_to_dma_page_maddr(d, gfn << PAGE_SHIFT_4K);
+    pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K);
     if ( pg_maddr == 0 )
         return -ENOMEM;
     page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
@@ -1643,11 +1554,11 @@ int intel_iommu_unmap_page(struct domain
 }
 
 int iommu_page_mapping(struct domain *domain, paddr_t iova,
-                       void *hpa, size_t size, int prot)
+                       paddr_t hpa, size_t size, int prot)
 {
     struct acpi_drhd_unit *drhd;
     struct iommu *iommu;
-    unsigned long start_pfn, end_pfn;
+    u64 start_pfn, end_pfn;
     struct dma_pte *page = NULL, *pte = NULL;
     int index;
     u64 pg_maddr;
@@ -1657,9 +1568,8 @@ int iommu_page_mapping(struct domain *do
     if ( (prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0 )
         return -EINVAL;
     iova = (iova >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K;
-    start_pfn = (unsigned long)(((unsigned long) hpa) >> PAGE_SHIFT_4K);
-    end_pfn = (unsigned long)
-        ((PAGE_ALIGN_4K(((unsigned long)hpa) + size)) >> PAGE_SHIFT_4K);
+    start_pfn = hpa >> PAGE_SHIFT_4K;
+    end_pfn = (PAGE_ALIGN_4K(hpa + size)) >> PAGE_SHIFT_4K;
     index = 0;
     while ( start_pfn < end_pfn )
     {
@@ -1668,7 +1578,7 @@ int iommu_page_mapping(struct domain *do
             return -ENOMEM;
         page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
         pte = page + (start_pfn & LEVEL_MASK);
-        dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
+        dma_set_pte_addr(*pte, (paddr_t)start_pfn << PAGE_SHIFT_4K);
         dma_set_pte_prot(*pte, prot);
         iommu_flush_cache_entry(iommu, pte);
         unmap_vtd_domain_page(page);
@@ -1727,7 +1637,7 @@ static int iommu_prepare_rmrr_dev(
     /* page table init */
     size = rmrr->end_address - rmrr->base_address + 1;
     ret = iommu_page_mapping(d, rmrr->base_address,
-                             (void *)rmrr->base_address, size,
+                             rmrr->base_address, size,
                              DMA_PTE_READ|DMA_PTE_WRITE);
     if ( ret )
         return ret;
@@ -1743,37 +1653,15 @@ static int iommu_prepare_rmrr_dev(
     return ret;
 }
 
-void __init setup_dom0_devices(void)
-{
-    struct hvm_iommu *hd  = domain_hvm_iommu(dom0);
+static void setup_dom0_devices(struct domain *d)
+{
+    struct hvm_iommu *hd;
     struct acpi_drhd_unit *drhd;
     struct pci_dev *pdev;
     int bus, dev, func, ret;
     u32 l;
 
-#ifdef DEBUG_VTD_CONTEXT_ENTRY
-    for ( bus = 0; bus < 256; bus++ )
-    {
-        for ( dev = 0; dev < 32; dev++ )
-        { 
-            for ( func = 0; func < 8; func++ )
-            {
-                struct context_entry *context;
-                struct pci_dev device;
-
-                device.bus = bus; 
-                device.devfn = PCI_DEVFN(dev, func); 
-                drhd = acpi_find_matched_drhd_unit(&device);
-                context = device_to_context_entry(drhd->iommu,
-                                                  bus, PCI_DEVFN(dev, func));
-                if ( (context->lo != 0) || (context->hi != 0) )
-                    dprintk(XENLOG_INFO VTDPREFIX,
-                            "setup_dom0_devices-%x:%x:%x- context not 0\n",
-                            bus, dev, func);
-            }
-        }    
-    }        
-#endif
+    hd = domain_hvm_iommu(d);
 
     for ( bus = 0; bus < 256; bus++ )
     {
@@ -1792,18 +1680,13 @@ void __init setup_dom0_devices(void)
                 list_add_tail(&pdev->list, &hd->pdev_list);
 
                 drhd = acpi_find_matched_drhd_unit(pdev);
-                ret = domain_context_mapping(dom0, drhd->iommu, pdev);
+                ret = domain_context_mapping(d, drhd->iommu, pdev);
                 if ( ret != 0 )
                     gdprintk(XENLOG_ERR VTDPREFIX,
                              "domain_context_mapping failed\n");
             }
         }
     }
-
-    for_each_pdev ( dom0, pdev )
-        dprintk(XENLOG_INFO VTDPREFIX,
-                "setup_dom0_devices: bdf = %x:%x:%x\n",
-                pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
 }
 
 void clear_fault_bits(struct iommu *iommu)
@@ -1850,13 +1733,6 @@ static int init_vtd_hw(void)
         flush->context = flush_context_reg;
         flush->iotlb = flush_iotlb_reg;
     }
-    return 0;
-}
-
-static int init_vtd2_hw(void)
-{
-    struct acpi_drhd_unit *drhd;
-    struct iommu *iommu;
 
     for_each_drhd_unit ( drhd )
     {
@@ -1873,52 +1749,38 @@ static int init_vtd2_hw(void)
             dprintk(XENLOG_ERR VTDPREFIX,
                     "Interrupt Remapping hardware not found\n");
     }
-    return 0;
-}
-
-static int enable_vtd_translation(void)
-{
-    struct acpi_drhd_unit *drhd;
-    struct iommu *iommu;
-
-    for_each_drhd_unit ( drhd )
-    {
-        iommu = drhd->iommu;
-        if ( iommu_enable_translation(iommu) )
-            return -EIO;
-    }
-    return 0;
-}
-
-static void setup_dom0_rmrr(void)
+
+    return 0;
+}
+
+static void setup_dom0_rmrr(struct domain *d)
 {
     struct acpi_rmrr_unit *rmrr;
     struct pci_dev *pdev;
     int ret;
 
     for_each_rmrr_device ( rmrr, pdev )
-        ret = iommu_prepare_rmrr_dev(dom0, rmrr, pdev);
+        ret = iommu_prepare_rmrr_dev(d, rmrr, pdev);
         if ( ret )
             gdprintk(XENLOG_ERR VTDPREFIX,
                      "IOMMU: mapping reserved region failed\n");
     end_for_each_rmrr_device ( rmrr, pdev )
 }
 
-int iommu_setup(void)
-{
-    struct hvm_iommu *hd  = domain_hvm_iommu(dom0);
+int intel_vtd_setup(void)
+{
     struct acpi_drhd_unit *drhd;
     struct iommu *iommu;
-    unsigned long i;
 
     if ( !vtd_enabled )
-        return 0;
+        return -ENODEV;
 
     spin_lock_init(&domid_bitmap_lock);
-    INIT_LIST_HEAD(&hd->pdev_list);
-
-    /* setup clflush size */
     clflush_size = get_clflush_size();
+
+    for_each_drhd_unit ( drhd )
+        if ( iommu_alloc(drhd) != 0 )
+            goto error;
 
     /* Allocate IO page directory page for the domain. */
     drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
@@ -1933,27 +1795,15 @@ int iommu_setup(void)
     memset(domid_bitmap, 0, domid_bitmap_size / 8);
     set_bit(0, domid_bitmap);
 
-    /* setup 1:1 page table for dom0 */
-    for ( i = 0; i < max_page; i++ )
-        iommu_map_page(dom0, i, i);
-
     init_vtd_hw();
-    setup_dom0_devices();
-    setup_dom0_rmrr();
-    iommu_flush_all();
-    enable_vtd_translation();
-    init_vtd2_hw();
 
     return 0;
 
  error:
-    printk("iommu_setup() failed\n");
     for_each_drhd_unit ( drhd )
-    {
-        iommu = drhd->iommu;
-        free_iommu(iommu);
-    }
-    return -EIO;
+        iommu_free(drhd);
+    vtd_enabled = 0;
+    return -ENOMEM;
 }
 
 /*
@@ -1979,10 +1829,6 @@ int intel_iommu_assign_device(struct dom
 
     if ( list_empty(&acpi_drhd_units) )
         return ret;
-
-    gdprintk(XENLOG_INFO VTDPREFIX,
-             "assign_device: bus = %x dev = %x func = %x\n",
-             bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
 
     reassign_device_ownership(dom0, d, bus, devfn);
 
diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/passthrough/vtd/utils.c
--- a/xen/drivers/passthrough/vtd/utils.c       Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/drivers/passthrough/vtd/utils.c       Thu Apr 24 14:08:29 2008 -0600
@@ -60,10 +60,10 @@ int vtd_hw_check(void)
             dprintk(XENLOG_WARNING VTDPREFIX,
                     "***  vendor = %x device = %x revision = %x\n",
                     vendor, device, revision);
-            vtd_enabled = 0;
             return -ENODEV;
         }
     }
+
     return 0;
 }
 
diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/passthrough/vtd/x86/vtd.c
--- a/xen/drivers/passthrough/vtd/x86/vtd.c     Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/drivers/passthrough/vtd/x86/vtd.c     Thu Apr 24 14:08:29 2008 -0600
@@ -114,8 +114,6 @@ void hvm_dpci_isairq_eoi(struct domain *
                 if ( --dpci->mirq[i].pending == 0 )
                 {
                     spin_unlock(&dpci->dirq_lock);
-                    gdprintk(XENLOG_INFO VTDPREFIX,
-                             "hvm_dpci_isairq_eoi:: mirq = %x\n", i);
                     stop_timer(&dpci->hvm_timer[irq_to_vector(i)]);
                     pirq_guest_eoi(d, i);
                 }
@@ -130,8 +128,6 @@ void iommu_set_pgd(struct domain *d)
 {
     struct hvm_iommu *hd  = domain_hvm_iommu(d);
     unsigned long p2m_table;
-    int level = agaw_to_level(hd->agaw);
-    l3_pgentry_t *l3e;
 
     p2m_table = mfn_x(pagetable_get_mfn(d->arch.phys_table));
 
@@ -153,12 +149,12 @@ void iommu_set_pgd(struct domain *d)
                 return;
             }
             pgd_mfn = _mfn(dma_pte_addr(*dpte) >> PAGE_SHIFT_4K);
-            hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K;
+            hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K;
             unmap_domain_page(dpte);
             break;
         case VTD_PAGE_TABLE_LEVEL_4:
             pgd_mfn = _mfn(p2m_table);
-            hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K;
+            hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K;
             break;
         default:
             gdprintk(XENLOG_ERR VTDPREFIX,
@@ -173,6 +169,8 @@ void iommu_set_pgd(struct domain *d)
         int i;
         u64 pmd_maddr;
         unsigned long flags;
+        l3_pgentry_t *l3e;
+        int level = agaw_to_level(hd->agaw);
 
         spin_lock_irqsave(&hd->mapping_lock, flags);
         hd->pgd_maddr = alloc_pgtable_maddr();
@@ -236,6 +234,8 @@ void iommu_set_pgd(struct domain *d)
 
 #elif CONFIG_PAGING_LEVELS == 4
         mfn_t pgd_mfn;
+        l3_pgentry_t *l3e;
+        int level = agaw_to_level(hd->agaw);
 
         switch ( level )
         {
@@ -250,12 +250,12 @@ void iommu_set_pgd(struct domain *d)
             }
 
             pgd_mfn = _mfn(l3e_get_pfn(*l3e));
-            hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K;
+            hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K;
             unmap_domain_page(l3e);
             break;
         case VTD_PAGE_TABLE_LEVEL_4:
             pgd_mfn = _mfn(p2m_table);
-            hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K;
+            hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K;
             break;
         default:
             gdprintk(XENLOG_ERR VTDPREFIX,
diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/asm-x86/amd-iommu.h
--- a/xen/include/asm-x86/amd-iommu.h   Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/include/asm-x86/amd-iommu.h   Thu Apr 24 14:08:29 2008 -0600
@@ -28,10 +28,9 @@
 
 #define iommu_found()           (!list_empty(&amd_iommu_head))
 
-extern int amd_iommu_enabled;
 extern struct list_head amd_iommu_head;
 
-extern int __init amd_iommu_detect(void);
+extern int __init amd_iov_detect(void);
 
 struct table_struct {
     void *buffer;
@@ -79,6 +78,9 @@ struct amd_iommu {
     int exclusion_allow_all;
     uint64_t exclusion_base;
     uint64_t exclusion_limit;
+
+    int msi_cap;
+    int maskbit;
 };
 
 struct ivrs_mappings {
diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/asm-x86/bitops.h
--- a/xen/include/asm-x86/bitops.h      Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/include/asm-x86/bitops.h      Thu Apr 24 14:08:29 2008 -0600
@@ -331,10 +331,9 @@ extern unsigned int __find_next_zero_bit
 extern unsigned int __find_next_zero_bit(
     const unsigned long *addr, unsigned int size, unsigned int offset);
 
-/* return index of first bit set in val or BITS_PER_LONG when no bit is set */
-static inline unsigned int __scanbit(unsigned long val)
-{
-    asm ( "bsf %1,%0" : "=r" (val) : "r" (val), "0" (BITS_PER_LONG) );
+static inline unsigned int __scanbit(unsigned long val, unsigned long max)
+{
+    asm ( "bsf %1,%0 ; cmovz %2,%0" : "=&r" (val) : "r" (val), "r" (max) );
     return (unsigned int)val;
 }
 
@@ -346,9 +345,9 @@ static inline unsigned int __scanbit(uns
  * Returns the bit-number of the first set bit, not the number of the byte
  * containing a bit.
  */
-#define find_first_bit(addr,size) \
-((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
-  (__scanbit(*(const unsigned long *)addr)) : \
+#define find_first_bit(addr,size)                               \
+((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ?       \
+  (__scanbit(*(const unsigned long *)addr, size)) :             \
   __find_first_bit(addr,size)))
 
 /**
@@ -357,9 +356,9 @@ static inline unsigned int __scanbit(uns
  * @offset: The bitnumber to start searching at
  * @size: The maximum size to search
  */
-#define find_next_bit(addr,size,off) \
-((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
-  ((off) + (__scanbit((*(const unsigned long *)addr) >> (off)))) : \
+#define find_next_bit(addr,size,off)                                     \
+((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ?                \
+  ((off) + (__scanbit((*(const unsigned long *)addr) >> (off), size))) : \
   __find_next_bit(addr,size,off)))
 
 /**
@@ -370,9 +369,9 @@ static inline unsigned int __scanbit(uns
  * Returns the bit-number of the first zero bit, not the number of the byte
  * containing a bit.
  */
-#define find_first_zero_bit(addr,size) \
-((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
-  (__scanbit(~*(const unsigned long *)addr)) : \
+#define find_first_zero_bit(addr,size)                          \
+((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ?       \
+  (__scanbit(~*(const unsigned long *)addr, size)) :            \
   __find_first_zero_bit(addr,size)))
 
 /**
@@ -381,9 +380,9 @@ static inline unsigned int __scanbit(uns
  * @offset: The bitnumber to start searching at
  * @size: The maximum size to search
  */
-#define find_next_zero_bit(addr,size,off) \
-((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
-  ((off)+(__scanbit(~(((*(const unsigned long *)addr)) >> (off))))) : \
+#define find_next_zero_bit(addr,size,off)                                   \
+((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ?                   \
+  ((off)+(__scanbit(~(((*(const unsigned long *)addr)) >> (off)), size))) : \
   __find_next_zero_bit(addr,size,off)))
 
 
@@ -391,8 +390,7 @@ static inline unsigned int __scanbit(uns
  * find_first_set_bit - find the first set bit in @word
  * @word: the word to search
  * 
- * Returns the bit-number of the first set bit. If no bits are set then the
- * result is undefined.
+ * Returns the bit-number of the first set bit. The input must *not* be zero.
  */
 static inline unsigned int find_first_set_bit(unsigned long word)
 {
@@ -401,26 +399,10 @@ static inline unsigned int find_first_se
 }
 
 /**
- * ffz - find first zero in word.
- * @word: The word to search
- *
- * Undefined if no zero exists, so code should check against ~0UL first.
- */
-static inline unsigned long ffz(unsigned long word)
-{
-    asm ( "bsf %1,%0"
-          :"=r" (word)
-          :"r" (~word));
-    return word;
-}
-
-/**
  * ffs - find first bit set
  * @x: the word to search
  *
- * This is defined the same way as
- * the libc and compiler builtin ffs routines, therefore
- * differs in spirit from the above ffz (man ffs).
+ * This is defined the same way as the libc and compiler builtin ffs routines.
  */
 static inline int ffs(unsigned long x)
 {
diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h     Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/include/asm-x86/hvm/hvm.h     Thu Apr 24 14:08:29 2008 -0600
@@ -139,6 +139,8 @@ int hvm_vcpu_initialise(struct vcpu *v);
 int hvm_vcpu_initialise(struct vcpu *v);
 void hvm_vcpu_destroy(struct vcpu *v);
 void hvm_vcpu_down(struct vcpu *v);
+int hvm_vcpu_cacheattr_init(struct vcpu *v);
+void hvm_vcpu_cacheattr_destroy(struct vcpu *v);
 
 void hvm_send_assist_req(struct vcpu *v);
 
diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/asm-x86/hvm/support.h
--- a/xen/include/asm-x86/hvm/support.h Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/include/asm-x86/hvm/support.h Thu Apr 24 14:08:29 2008 -0600
@@ -130,5 +130,7 @@ int hvm_set_cr0(unsigned long value);
 int hvm_set_cr0(unsigned long value);
 int hvm_set_cr3(unsigned long value);
 int hvm_set_cr4(unsigned long value);
+int hvm_msr_read_intercept(struct cpu_user_regs *regs);
+int hvm_msr_write_intercept(struct cpu_user_regs *regs);
 
 #endif /* __ASM_X86_HVM_SUPPORT_H__ */
diff -r 239b44eeb2d6 -r dc510776dd59 
xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h      Thu Apr 24 14:02:16 
2008 -0600
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h      Thu Apr 24 14:08:29 
2008 -0600
@@ -35,6 +35,9 @@
 /* IOMMU Command Buffer entries: in power of 2 increments, minimum of 256 */
 #define IOMMU_CMD_BUFFER_DEFAULT_ENTRIES       512
 
+/* IOMMU Event Log entries: in power of 2 increments, minimum of 256 */
+#define IOMMU_EVENT_LOG_DEFAULT_ENTRIES     512
+
 #define BITMAP_ENTRIES_PER_BYTE                8
 
 #define PTE_PER_TABLE_SHIFT            9
@@ -303,6 +306,11 @@
 #define IOMMU_EVENT_COMMAND_HW_ERROR           0x6
 #define IOMMU_EVENT_IOTLB_INV_TIMEOUT          0x7
 #define IOMMU_EVENT_INVALID_DEV_REQUEST                0x8
+
+#define IOMMU_EVENT_DOMAIN_ID_MASK           0x0000FFFF
+#define IOMMU_EVENT_DOMAIN_ID_SHIFT          0
+#define IOMMU_EVENT_DEVICE_ID_MASK           0x0000FFFF
+#define IOMMU_EVENT_DEVICE_ID_SHIFT          0
 
 /* Control Register */
 #define IOMMU_CONTROL_MMIO_OFFSET                      0x18
@@ -427,4 +435,33 @@
 #define IOMMU_IO_READ_ENABLED           1
 #define HACK_BIOS_SETTINGS                  0
 
+/* MSI interrupt */
+#define MSI_DATA_VECTOR_SHIFT       0
+#define MSI_DATA_VECTOR(v)      (((u8)v) << MSI_DATA_VECTOR_SHIFT)
+
+#define MSI_DATA_DELIVERY_SHIFT     8
+#define MSI_DATA_DELIVERY_FIXED (0 << MSI_DATA_DELIVERY_SHIFT)
+#define MSI_DATA_DELIVERY_LOWPRI    (1 << MSI_DATA_DELIVERY_SHIFT)
+
+#define MSI_DATA_LEVEL_SHIFT        14
+#define MSI_DATA_LEVEL_DEASSERT (0 << MSI_DATA_LEVEL_SHIFT)
+#define MSI_DATA_LEVEL_ASSERT   (1 << MSI_DATA_LEVEL_SHIFT)
+
+#define MSI_DATA_TRIGGER_SHIFT      15
+#define MSI_DATA_TRIGGER_EDGE   (0 << MSI_DATA_TRIGGER_SHIFT)
+#define  MSI_DATA_TRIGGER_LEVEL  (1 << MSI_DATA_TRIGGER_SHIFT)
+
+#define MSI_TARGET_CPU_SHIFT        12
+#define MSI_ADDR_HEADER         0xfee00000
+#define MSI_ADDR_DESTID_MASK        0xfff0000f
+#define MSI_ADDR_DESTID_CPU(cpu)    ((cpu) << MSI_TARGET_CPU_SHIFT)
+
+#define MSI_ADDR_DESTMODE_SHIFT     2
+#define MSI_ADDR_DESTMODE_PHYS  (0 << MSI_ADDR_DESTMODE_SHIFT)
+#define MSI_ADDR_DESTMODE_LOGIC (1 << MSI_ADDR_DESTMODE_SHIFT)
+
+#define MSI_ADDR_REDIRECTION_SHIFT  3
+#define MSI_ADDR_REDIRECTION_CPU    (0 << MSI_ADDR_REDIRECTION_SHIFT)
+#define MSI_ADDR_REDIRECTION_LOWPRI (1 << MSI_ADDR_REDIRECTION_SHIFT)
+
 #endif /* _ASM_X86_64_AMD_IOMMU_DEFS_H */
diff -r 239b44eeb2d6 -r dc510776dd59 
xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h     Thu Apr 24 14:02:16 
2008 -0600
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h     Thu Apr 24 14:08:29 
2008 -0600
@@ -35,6 +35,19 @@
 #define DMA_32BIT_MASK  0x00000000ffffffffULL
 #define PAGE_ALIGN(addr)    (((addr) + PAGE_SIZE - 1) & PAGE_MASK)
 
+#ifdef AMD_IOV_DEBUG
+#define amd_iov_info(fmt, args...) \
+    printk(XENLOG_INFO "AMD_IOV: " fmt, ## args)
+#define amd_iov_warning(fmt, args...) \
+    printk(XENLOG_WARNING "AMD_IOV: " fmt, ## args)
+#define amd_iov_error(fmt, args...) \
+    printk(XENLOG_ERR "AMD_IOV: %s:%d: " fmt, __FILE__ , __LINE__ , ## args)
+#else
+#define amd_iov_info(fmt, args...)
+#define amd_iov_warning(fmt, args...)
+#define amd_iov_error(fmt, args...)
+#endif
+
 typedef int (*iommu_detect_callback_ptr_t)(
     u8 bus, u8 dev, u8 func, u8 cap_ptr);
 
@@ -49,6 +62,7 @@ void __init unmap_iommu_mmio_region(stru
 void __init unmap_iommu_mmio_region(struct amd_iommu *iommu);
 void __init register_iommu_dev_table_in_mmio_space(struct amd_iommu *iommu);
 void __init register_iommu_cmd_buffer_in_mmio_space(struct amd_iommu *iommu);
+void __init register_iommu_event_log_in_mmio_space(struct amd_iommu *iommu);
 void __init enable_iommu(struct amd_iommu *iommu);
 
 /* mapping functions */
@@ -69,11 +83,6 @@ void invalidate_dev_table_entry(struct a
 /* send cmd to iommu */
 int send_iommu_command(struct amd_iommu *iommu, u32 cmd[]);
 void flush_command_buffer(struct amd_iommu *iommu);
-
-/* iommu domain funtions */
-int amd_iommu_domain_init(struct domain *domain);
-void amd_iommu_setup_domain_device(struct domain *domain,
-    struct amd_iommu *iommu, int bdf);
 
 /* find iommu for bdf */
 struct amd_iommu *find_iommu_for_device(int bus, int devfn);
diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/asm-x86/hvm/vcpu.h
--- a/xen/include/asm-x86/hvm/vcpu.h    Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/include/asm-x86/hvm/vcpu.h    Thu Apr 24 14:08:29 2008 -0600
@@ -83,7 +83,16 @@ struct hvm_vcpu {
      */
     unsigned long       mmio_gva;
     unsigned long       mmio_gpfn;
+    /* Callback into x86_emulate when emulating FPU/MMX/XMM instructions. */
+    void (*fpu_exception_callback)(void *, struct cpu_user_regs *);
+    void *fpu_exception_callback_arg;
+    /* We may read up to m128 as a number of device-model transactions. */
+    paddr_t mmio_large_read_pa;
+    uint8_t mmio_large_read[16];
+    unsigned int mmio_large_read_bytes;
+    /* We may write up to m128 as a number of device-model transactions. */
+    paddr_t mmio_large_write_pa;
+    unsigned int mmio_large_write_bytes;
 };
 
 #endif /* __ASM_X86_HVM_VCPU_H__ */
-
diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/asm-x86/mtrr.h
--- a/xen/include/asm-x86/mtrr.h        Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/include/asm-x86/mtrr.h        Thu Apr 24 14:08:29 2008 -0600
@@ -11,13 +11,6 @@
 #define MTRR_TYPE_WRBACK     6
 #define MTRR_NUM_TYPES       7
 #define MEMORY_NUM_TYPES     MTRR_NUM_TYPES
-
-#define MTRR_PHYSMASK_VALID_BIT  11
-#define MTRR_PHYSMASK_SHIFT      12
-
-#define MTRR_PHYSBASE_TYPE_MASK  0xff   /* lowest 8 bits */
-#define MTRR_PHYSBASE_SHIFT      12
-#define MTRR_VCNT            8
 
 #define NORMAL_CACHE_MODE          0
 #define NO_FILL_CACHE_MODE         2
@@ -58,7 +51,6 @@ struct mtrr_state {
        u64       mtrr_cap;
        /* ranges in var MSRs are overlapped or not:0(no overlapped) */
        bool_t    overlapped;
-       bool_t    is_initialized;
 };
 
 extern void mtrr_save_fixed_ranges(void *);
diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/asm-x86/paging.h
--- a/xen/include/asm-x86/paging.h      Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/include/asm-x86/paging.h      Thu Apr 24 14:08:29 2008 -0600
@@ -83,12 +83,14 @@ struct shadow_paging_mode {
                                             unsigned long new,
                                             unsigned int bytes,
                                             struct sh_emulate_ctxt *sh_ctxt);
+#ifdef __i386__
     int           (*x86_emulate_cmpxchg8b )(struct vcpu *v, unsigned long va,
                                             unsigned long old_lo, 
                                             unsigned long old_hi, 
                                             unsigned long new_lo,
                                             unsigned long new_hi,
                                             struct sh_emulate_ctxt *sh_ctxt);
+#endif
     mfn_t         (*make_monitor_table    )(struct vcpu *v);
     void          (*destroy_monitor_table )(struct vcpu *v, mfn_t mmfn);
     int           (*guess_wrmap           )(struct vcpu *v, 
diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/public/xsm/acm.h
--- a/xen/include/public/xsm/acm.h      Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/include/public/xsm/acm.h      Thu Apr 24 14:08:29 2008 -0600
@@ -91,7 +91,7 @@
  * whenever the interpretation of the related
  * policy's data structure changes
  */
-#define ACM_POLICY_VERSION 3
+#define ACM_POLICY_VERSION 4
 #define ACM_CHWALL_VERSION 1
 #define ACM_STE_VERSION  1
 
@@ -131,6 +131,10 @@ typedef uint16_t domaintype_t;
 /* high-16 = version, low-16 = check magic */
 #define ACM_MAGIC  0x0001debc
 
+/* size of the SHA1 hash identifying the XML policy from which the
+   binary policy was created */
+#define ACM_SHA1_HASH_SIZE    20
+
 /* each offset in bytes from start of the struct they
  * are part of */
 
@@ -160,6 +164,7 @@ struct acm_policy_buffer {
     uint32_t secondary_policy_code;
     uint32_t secondary_buffer_offset;
     struct acm_policy_version xml_pol_version; /* add in V3 */
+    uint8_t xml_policy_hash[ACM_SHA1_HASH_SIZE]; /* added in V4 */
 };
 
 
diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/xen/iommu.h
--- a/xen/include/xen/iommu.h   Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/include/xen/iommu.h   Thu Apr 24 14:08:29 2008 -0600
@@ -27,9 +27,8 @@
 #include <public/domctl.h>
 
 extern int vtd_enabled;
-extern int amd_iommu_enabled;
+extern int iommu_enabled;
 
-#define iommu_enabled ( amd_iommu_enabled || vtd_enabled )
 #define domain_hvm_iommu(d)     (&d->arch.hvm_domain.hvm_iommu)
 #define domain_vmx_iommu(d)     (&d->arch.hvm_domain.hvm_iommu.vmx_iommu)
 
@@ -72,7 +71,6 @@ struct iommu {
     struct intel_iommu *intel;
 };
 
-int iommu_setup(void);
 int iommu_domain_init(struct domain *d);
 void iommu_domain_destroy(struct domain *d);
 int device_assigned(u8 bus, u8 devfn);
diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/xen/serial.h
--- a/xen/include/xen/serial.h  Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/include/xen/serial.h  Thu Apr 24 14:08:29 2008 -0600
@@ -16,12 +16,10 @@ void serial_set_rx_handler(int handle, s
 void serial_set_rx_handler(int handle, serial_rx_fn fn);
 
 /* Number of characters we buffer for a polling receiver. */
-#define SERIAL_RXBUFSZ 32
-#define MASK_SERIAL_RXBUF_IDX(_i) ((_i)&(SERIAL_RXBUFSZ-1))
+#define serial_rxbufsz 32
 
 /* Number of characters we buffer for an interrupt-driven transmitter. */
-#define SERIAL_TXBUFSZ 16384
-#define MASK_SERIAL_TXBUF_IDX(_i) ((_i)&(SERIAL_TXBUFSZ-1))
+extern unsigned int serial_txbufsz;
 
 struct uart_driver;
 
@@ -39,7 +37,7 @@ struct serial_port {
     /* Receiver callback functions (asynchronous receivers). */
     serial_rx_fn        rx_lo, rx_hi, rx;
     /* Receive data buffer (polling receivers). */
-    char                rxbuf[SERIAL_RXBUFSZ];
+    char                rxbuf[serial_rxbufsz];
     unsigned int        rxbufp, rxbufc;
     /* Serial I/O is concurrency-safe. */
     spinlock_t          rx_lock, tx_lock;
diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/xsm/acm/acm_core.h
--- a/xen/include/xsm/acm/acm_core.h    Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/include/xsm/acm/acm_core.h    Thu Apr 24 14:08:29 2008 -0600
@@ -34,6 +34,7 @@ struct acm_binary_policy {
     u16 primary_policy_code;
     u16 secondary_policy_code;
     struct acm_policy_version xml_pol_version;
+    u8 xml_policy_hash[ACM_SHA1_HASH_SIZE];
 };
 
 struct chwall_binary_policy {
diff -r 239b44eeb2d6 -r dc510776dd59 xen/tools/Makefile
--- a/xen/tools/Makefile        Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/tools/Makefile        Thu Apr 24 14:08:29 2008 -0600
@@ -4,12 +4,12 @@ include $(XEN_ROOT)/Config.mk
 
 .PHONY: default
 default:
-       $(MAKE) -C figlet
+       [ -d figlet ] && $(MAKE) -C figlet
        $(MAKE) symbols
 
 .PHONY: clean
 clean:
-       $(MAKE) -C figlet clean
+       [ -d figlet ] && $(MAKE) -C figlet clean
        rm -f *.o symbols
 
 symbols: symbols.c
diff -r 239b44eeb2d6 -r dc510776dd59 xen/tools/figlet/figlet.c
--- a/xen/tools/figlet/figlet.c Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/tools/figlet/figlet.c Thu Apr 24 14:08:29 2008 -0600
@@ -1488,18 +1488,7 @@ static void myputchar(unsigned char c)
 
     putc(c, stderr);
 
-    if ( nr_chars == 0 )
-        putchar('"');
-
-    putchar('\\');
-    putchar('0' + ((c>>6)&7));
-    putchar('0' + ((c>>3)&7));
-    putchar('0' + ((c>>0)&7));
-
-    if ( c == '\n' )
-        startline = 1;
-
-    if ( ++nr_chars == 18 ) 
+    if ( nr_chars == 18 ) 
     {
         nr_chars = 0;
         putchar('"');
@@ -1507,6 +1496,17 @@ static void myputchar(unsigned char c)
         putchar('\\');
         putchar('\n');
     }
+
+    if ( nr_chars++ == 0 )
+        putchar('"');
+
+    putchar('\\');
+    putchar('0' + ((c>>6)&7));
+    putchar('0' + ((c>>3)&7));
+    putchar('0' + ((c>>0)&7));
+
+    if ( c == '\n' )
+        startline = 1;
 }
 
 void putstring(string)
diff -r 239b44eeb2d6 -r dc510776dd59 xen/xsm/acm/acm_policy.c
--- a/xen/xsm/acm/acm_policy.c  Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/xsm/acm/acm_policy.c  Thu Apr 24 14:08:29 2008 -0600
@@ -156,6 +156,10 @@ _acm_update_policy(void *buf, u32 buf_si
            &pol->xml_pol_version,
            sizeof(acm_bin_pol.xml_pol_version));
 
+    memcpy(&acm_bin_pol.xml_policy_hash,
+           pol->xml_policy_hash,
+           sizeof(acm_bin_pol.xml_policy_hash));
+
     if ( acm_primary_ops->is_default_policy() &&
          acm_secondary_ops->is_default_policy() )
         require_update = 0;
@@ -257,6 +261,10 @@ acm_get_policy(XEN_GUEST_HANDLE_64(void)
     memcpy(&bin_pol->xml_pol_version,
            &acm_bin_pol.xml_pol_version,
            sizeof(struct acm_policy_version));
+
+    memcpy(&bin_pol->xml_policy_hash,
+           &acm_bin_pol.xml_policy_hash,
+           sizeof(acm_bin_pol.xml_policy_hash));
 
     ret = acm_dump_policy_reference(
                policy_buffer + be32_to_cpu(bin_pol->policy_reference_offset),

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.