[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg



# HG changeset patch
# User Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
# Date 1224643615 -32400
# Node ID 46d7e12c4c919bab07af4b7097526dd06b824bea
# Parent  6583186e5989d7e7e0bff126cd20a9cacb7c1613
# Parent  d2f7243fc571ea78cbf3fe33e723aa8f30111daa
merge with xen-unstable.hg
---
 xen/include/asm-ia64/xenspinlock.h                         |   30 
 docs/xen-api/coversheet.tex                                |    1 
 docs/xen-api/revision-history.tex                          |    9 
 docs/xen-api/xenapi-coversheet.tex                         |    4 
 docs/xen-api/xenapi-datamodel-graph.dot                    |   11 
 docs/xen-api/xenapi-datamodel.tex                          | 1160 +++++++++++++
 tools/blktap/drivers/block-qcow.c                          |   18 
 tools/blktap/drivers/block-qcow2.c                         |   62 
 tools/libxc/xc_domain.c                                    |   12 
 tools/libxc/xenctrl.h                                      |    3 
 tools/python/xen/lowlevel/xc/xc.c                          |   21 
 tools/python/xen/util/pci.py                               |   21 
 tools/python/xen/util/utils.py                             |   26 
 tools/python/xen/util/vscsi_util.py                        |  219 +-
 tools/python/xen/xend/XendAPI.py                           |   20 
 tools/python/xen/xend/XendConfig.py                        |  202 +-
 tools/python/xen/xend/XendDSCSI.py                         |  174 +
 tools/python/xen/xend/XendDomainInfo.py                    |  137 +
 tools/python/xen/xend/XendNode.py                          |   39 
 tools/python/xen/xend/XendPSCSI.py                         |  143 +
 tools/python/xen/xend/server/vscsiif.py                    |   18 
 tools/python/xen/xm/create.dtd                             |    5 
 tools/python/xen/xm/create.py                              |   18 
 tools/python/xen/xm/main.py                                |  140 +
 tools/python/xen/xm/xenapi_create.py                       |   59 
 tools/xentrace/formats                                     |    4 
 unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c |   14 
 xen/arch/ia64/vmx/vmmu.c                                   |    2 
 xen/arch/x86/acpi/cpu_idle.c                               |   15 
 xen/arch/x86/cpu/amd.c                                     |    4 
 xen/arch/x86/domain.c                                      |    7 
 xen/arch/x86/domctl.c                                      |   15 
 xen/arch/x86/hvm/svm/emulate.c                             |   31 
 xen/arch/x86/hvm/svm/intr.c                                |   57 
 xen/arch/x86/hvm/viridian.c                                |    3 
 xen/arch/x86/hvm/vmx/intr.c                                |   57 
 xen/arch/x86/hvm/vmx/vmx.c                                 |   20 
 xen/arch/x86/irq.c                                         |   55 
 xen/arch/x86/mm/hap/hap.c                                  |    7 
 xen/arch/x86/mm/shadow/private.h                           |   63 
 xen/arch/x86/msi.c                                         |    1 
 xen/arch/x86/nmi.c                                         |   25 
 xen/arch/x86/platform_hypercall.c                          |   22 
 xen/arch/x86/smpboot.c                                     |   13 
 xen/arch/x86/traps.c                                       |    6 
 xen/arch/x86/x86_32/xen.lds.S                              |    1 
 xen/arch/x86/x86_64/Makefile                               |    2 
 xen/arch/x86/x86_64/cpu_idle.c                             |    2 
 xen/arch/x86/x86_64/cpufreq.c                              |   91 +
 xen/arch/x86/x86_64/mm.c                                   |    2 
 xen/arch/x86/x86_64/platform_hypercall.c                   |    4 
 xen/arch/x86/x86_64/xen.lds.S                              |    1 
 xen/arch/x86/x86_emulate/x86_emulate.c                     |    9 
 xen/common/Makefile                                        |    3 
 xen/common/kernel.c                                        |    3 
 xen/common/schedule.c                                      |    4 
 xen/common/spinlock.c                                      |  154 +
 xen/common/timer.c                                         |   56 
 xen/common/xmalloc_tlsf.c                                  |  599 ++++++
 xen/drivers/Makefile                                       |    2 
 xen/drivers/acpi/pmstat.c                                  |    4 
 xen/drivers/char/ns16550.c                                 |   19 
 xen/drivers/cpufreq/cpufreq.c                              |   35 
 xen/drivers/cpufreq/utility.c                              |   37 
 xen/drivers/passthrough/Makefile                           |    1 
 xen/drivers/passthrough/io.c                               |   61 
 xen/drivers/passthrough/iommu.c                            |    2 
 xen/drivers/passthrough/pci.c                              |    5 
 xen/drivers/passthrough/vtd/Makefile                       |    1 
 xen/drivers/passthrough/vtd/ia64/Makefile                  |    1 
 xen/drivers/passthrough/vtd/ia64/vtd.c                     |  112 +
 xen/drivers/passthrough/vtd/intremap.c                     |   54 
 xen/drivers/passthrough/vtd/iommu.c                        |  125 -
 xen/drivers/passthrough/vtd/qinval.c                       |    6 
 xen/drivers/passthrough/vtd/utils.c                        |    5 
 xen/drivers/passthrough/vtd/x86/vtd.c                      |    8 
 xen/include/acpi/cpufreq/processor_perf.h                  |    3 
 xen/include/asm-ia64/linux-xen/asm/spinlock.h              |   49 
 xen/include/asm-x86/domain.h                               |    2 
 xen/include/asm-x86/hvm/hvm.h                              |    7 
 xen/include/asm-x86/hvm/irq.h                              |   78 
 xen/include/asm-x86/hvm/svm/vmcb.h                         |    4 
 xen/include/asm-x86/io_apic.h                              |    2 
 xen/include/asm-x86/msi.h                                  |    9 
 xen/include/asm-x86/rwlock.h                               |   32 
 xen/include/asm-x86/spinlock.h                             |   95 -
 xen/include/public/domctl.h                                |    4 
 xen/include/public/trace.h                                 |   10 
 xen/include/xen/hvm/irq.h                                  |   99 +
 xen/include/xen/spinlock.h                                 |  156 -
 xen/include/xen/xmalloc.h                                  |   81 
 xen/include/xlat.lst                                       |    4 
 92 files changed, 4067 insertions(+), 950 deletions(-)

diff -r 6583186e5989 -r 46d7e12c4c91 docs/xen-api/coversheet.tex
--- a/docs/xen-api/coversheet.tex       Wed Oct 22 11:38:22 2008 +0900
+++ b/docs/xen-api/coversheet.tex       Wed Oct 22 11:46:55 2008 +0900
@@ -51,6 +51,7 @@ Mike Day, IBM & Daniel Veillard, Red Hat
 Mike Day, IBM & Daniel Veillard, Red Hat \\
 Jim Fehlig, Novell & Tom Wilkie, University of Cambridge \\
 Jon Harrop, XenSource & Yosuke Iwamatsu, NEC \\
+Masaki Kanno, FUJITSU \\
 \end{tabular}
 \end{large}
 
diff -r 6583186e5989 -r 46d7e12c4c91 docs/xen-api/revision-history.tex
--- a/docs/xen-api/revision-history.tex Wed Oct 22 11:38:22 2008 +0900
+++ b/docs/xen-api/revision-history.tex Wed Oct 22 11:46:55 2008 +0900
@@ -56,5 +56,14 @@
     \end{flushleft}
    \end{minipage}\\
   \hline
+  1.0.7 & 20th Oct. 08 & M. Kanno &
+   \begin{minipage}[t]{7cm}
+    \begin{flushleft}
+     Added definitions of new classes DSCSI and PSCSI. Updated the table
+     and the diagram representing relationships between classes.
+     Added host.PSCSIs and VM.DSCSIs fields.
+    \end{flushleft}
+   \end{minipage}\\
+  \hline
  \end{tabular}
 \end{center}
diff -r 6583186e5989 -r 46d7e12c4c91 docs/xen-api/xenapi-coversheet.tex
--- a/docs/xen-api/xenapi-coversheet.tex        Wed Oct 22 11:38:22 2008 +0900
+++ b/docs/xen-api/xenapi-coversheet.tex        Wed Oct 22 11:46:55 2008 +0900
@@ -17,12 +17,12 @@
 \newcommand{\coversheetlogo}{xen.eps}
 
 %% Document date
-\newcommand{\datestring}{24th July 2008}
+\newcommand{\datestring}{20th October 2008}
 
 \newcommand{\releasestatement}{Stable Release}
 
 %% Document revision
-\newcommand{\revstring}{API Revision 1.0.6}
+\newcommand{\revstring}{API Revision 1.0.7}
 
 %% Document authors
 \newcommand{\docauthors}{
diff -r 6583186e5989 -r 46d7e12c4c91 docs/xen-api/xenapi-datamodel-graph.dot
--- a/docs/xen-api/xenapi-datamodel-graph.dot   Wed Oct 22 11:38:22 2008 +0900
+++ b/docs/xen-api/xenapi-datamodel-graph.dot   Wed Oct 22 11:46:55 2008 +0900
@@ -12,9 +12,11 @@ digraph "Xen-API Class Diagram" {
 digraph "Xen-API Class Diagram" {
 fontname="Verdana";
 
-node [ shape=box ]; session VM host network VIF PIF SR VDI VBD PBD user 
XSPolicy ACMPolicy;
-node [shape=ellipse]; PIF_metrics VIF_metrics VM_metrics VBD_metrics 
PBD_metrics VM_guest_metrics host_metrics;
-node [shape=box]; DPCI PPCI host_cpu console VTPM
+node [ shape=box ]; session VM host network VIF PIF SR VDI VBD PBD user;
+node [ shape=box ]; XSPolicy ACMPolicy DPCI PPCI host_cpu console VTPM;
+node [ shape=box ]; DSCSI PSCSI;
+node [ shape=ellipse ]; VM_metrics VM_guest_metrics host_metrics;
+node [ shape=ellipse ]; PIF_metrics VIF_metrics VBD_metrics PBD_metrics;
 session -> host [ arrowhead="none" ]
 session -> user [ arrowhead="none" ]
 VM -> VM_metrics [ arrowhead="none" ]
@@ -41,4 +43,7 @@ DPCI -> VM [ arrowhead="none", arrowtail
 DPCI -> VM [ arrowhead="none", arrowtail="crow" ]
 DPCI -> PPCI [ arrowhead="none" ]
 PPCI -> host [ arrowhead="none", arrowtail="crow" ]
+DSCSI -> VM [ arrowhead="none", arrowtail="crow" ]
+DSCSI -> PSCSI [ arrowhead="none" ]
+PSCSI -> host [ arrowhead="none", arrowtail="crow" ]
 }
diff -r 6583186e5989 -r 46d7e12c4c91 docs/xen-api/xenapi-datamodel.tex
--- a/docs/xen-api/xenapi-datamodel.tex Wed Oct 22 11:38:22 2008 +0900
+++ b/docs/xen-api/xenapi-datamodel.tex Wed Oct 22 11:46:55 2008 +0900
@@ -46,6 +46,8 @@ Name & Description \\
 {\tt console} & A console \\
 {\tt DPCI} & A pass-through PCI device \\
 {\tt PPCI} & A physical PCI device \\
+{\tt DSCSI} & A half-virtualized SCSI device \\
+{\tt PSCSI} & A physical SCSI device \\
 {\tt user} & A user of the system \\
 {\tt debug} & A basic class for testing \\
 {\tt XSPolicy} & A class for handling Xen Security Policies \\
@@ -74,6 +76,8 @@ console.VM & VM.consoles & one-to-many\\
 console.VM & VM.consoles & one-to-many\\
 DPCI.VM & VM.DPCIs & one-to-many\\
 PPCI.host & host.PPCIs & one-to-many\\
+DSCSI.VM & VM.DSCSIs & one-to-many\\
+PSCSI.host & host.PSCSIs & one-to-many\\
 host.resident\_VMs & VM.resident\_on & many-to-one\\
 host.host\_CPUs & host\_cpu.host & many-to-one\\
 \hline
@@ -1407,6 +1411,7 @@ Quals & Field & Type & Description \\
 $\mathit{RO}_\mathit{run}$ &  {\tt crash\_dumps} & (crashdump ref) Set & crash 
dumps associated with this VM \\
 $\mathit{RO}_\mathit{run}$ &  {\tt VTPMs} & (VTPM ref) Set & virtual TPMs \\
 $\mathit{RO}_\mathit{run}$ &  {\tt DPCIs} & (DPCI ref) Set & pass-through PCI 
devices \\
+$\mathit{RO}_\mathit{run}$ &  {\tt DSCSIs} & (DSCSI ref) Set & 
half-virtualized SCSI devices \\
 $\mathit{RW}$ &  {\tt PV/bootloader} & string & name of or path to bootloader 
\\
 $\mathit{RW}$ &  {\tt PV/kernel} & string & path to the kernel \\
 $\mathit{RW}$ &  {\tt PV/ramdisk} & string & path to the initrd \\
@@ -3443,6 +3448,38 @@ Get the DPCIs field of the given VM.
  \noindent {\bf Return Type:} 
 {\tt 
 (DPCI ref) Set
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_DSCSIs}
+
+{\bf Overview:} 
+Get the DSCSIs field of the given VM.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} ((DSCSI ref) Set) get_DSCSIs (session_id s, VM ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt VM ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+(DSCSI ref) Set
 }
 
 
@@ -5518,6 +5555,7 @@ Quals & Field & Type & Description \\
 $\mathit{RW}$ &  {\tt crash\_dump\_sr} & SR ref & The SR in which VDIs for 
crash dumps are created \\
 $\mathit{RO}_\mathit{run}$ &  {\tt PBDs} & (PBD ref) Set & physical 
blockdevices \\
 $\mathit{RO}_\mathit{run}$ &  {\tt PPCIs} & (PPCI ref) Set & physical PCI 
devices \\
+$\mathit{RO}_\mathit{run}$ &  {\tt PSCSIs} & (PSCSI ref) Set & physical SCSI 
devices \\
 $\mathit{RO}_\mathit{run}$ &  {\tt host\_CPUs} & (host\_cpu ref) Set & The 
physical CPUs on this host \\
 $\mathit{RO}_\mathit{run}$ &  {\tt metrics} & host\_metrics ref & metrics 
associated with this host \\
 \hline
@@ -6837,6 +6875,38 @@ Get the PPCIs field of the given host.
  \noindent {\bf Return Type:} 
 {\tt 
 (PPCI ref) Set
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_PSCSIs}
+
+{\bf Overview:} 
+Get the PSCSIs field of the given host.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} ((PSCSI ref) Set) get_PSCSIs (session_id s, host ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt host ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+(PSCSI ref) Set
 }
 
 
@@ -15723,6 +15793,1096 @@ all fields from the object
 
 \vspace{1cm}
 \newpage
+\section{Class: DSCSI}
+\subsection{Fields for class: DSCSI}
+\begin{longtable}{|lllp{0.38\textwidth}|}
+\hline
+\multicolumn{1}{|l}{Name} & \multicolumn{3}{l|}{\bf DSCSI} \\
+\multicolumn{1}{|l}{Description} & \multicolumn{3}{l|}{\parbox{11cm}{\em A
+half-virtualized SCSI device.}} \\
+\hline
+Quals & Field & Type & Description \\
+\hline
+$\mathit{RO}_\mathit{run}$ &  {\tt uuid} & string & unique identifier/object 
reference \\
+$\mathit{RO}_\mathit{inst}$ &  {\tt VM} & VM ref & the virtual machine \\
+$\mathit{RO}_\mathit{inst}$ &  {\tt PSCSI} & PSCSI ref & the physical SCSI 
device \\
+$\mathit{RO}_\mathit{run}$ &  {\tt virtual\_host} & int & the virtual host 
number \\
+$\mathit{RO}_\mathit{run}$ &  {\tt virtual\_channel} & int & the virtual 
channel number \\
+$\mathit{RO}_\mathit{run}$ &  {\tt virtual\_target} & int & the virtual target 
number \\
+$\mathit{RO}_\mathit{run}$ &  {\tt virtual\_lun} & int & the virtual logical 
unit number \\
+$\mathit{RO}_\mathit{inst}$ &  {\tt virtual\_HCTL} & string & the virtual HCTL 
\\
+$\mathit{RO}_\mathit{run}$ &  {\tt runtime\_properties} & (string 
$\rightarrow$ string) Map & Device runtime properties \\
+\hline
+\end{longtable}
+\subsection{RPCs associated with class: DSCSI}
+\subsubsection{RPC name:~get\_all}
+
+{\bf Overview:} 
+Return a list of all the DSCSIs known to the system.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} ((DSCSI ref) Set) get_all (session_id s)\end{verbatim}
+
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+(DSCSI ref) Set
+}
+
+
+references to all objects
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_uuid}
+
+{\bf Overview:} 
+Get the uuid field of the given DSCSI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} string get_uuid (session_id s, DSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DSCSI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_VM}
+
+{\bf Overview:} 
+Get the VM field of the given DSCSI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} (VM ref) get_VM (session_id s, DSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DSCSI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+VM ref
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_PSCSI}
+
+{\bf Overview:} 
+Get the PSCSI field of the given DSCSI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} (PSCSI ref) get_PSCSI (session_id s, DSCSI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DSCSI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+PSCSI ref
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_virtual\_host}
+
+{\bf Overview:} 
+Get the virtual\_host field of the given DSCSI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} int get_virtual_host (session_id s, DSCSI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DSCSI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_virtual\_channel}
+
+{\bf Overview:} 
+Get the virtual\_channel field of the given DSCSI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} int get_virtual_channel (session_id s, DSCSI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DSCSI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_virtual\_target}
+
+{\bf Overview:} 
+Get the virtual\_target field of the given DSCSI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} int get_virtual_target (session_id s, DSCSI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DSCSI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_virtual\_lun}
+
+{\bf Overview:} 
+Get the virtual\_lun field of the given DSCSI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} int get_virtual_lun (session_id s, DSCSI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DSCSI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_virtual\_HCTL}
+
+{\bf Overview:} 
+Get the virtual\_HCTL field of the given DSCSI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} string get_virtual_HCTL (session_id s, DSCSI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DSCSI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_runtime\_properties}
+
+{\bf Overview:} 
+Get the runtime\_properties field of the given DSCSI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} ((string -> string) Map) get_runtime_properties (session_id 
s, DSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DSCSI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+(string $\rightarrow$ string) Map
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~create}
+
+{\bf Overview:} 
+Create a new DSCSI instance, and return its handle.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} (DSCSI ref) create (session_id s, DSCSI record 
args)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DSCSI record } & args & All constructor arguments \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+DSCSI ref
+}
+
+
+reference to the newly created object
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~destroy}
+
+{\bf Overview:} 
+Destroy the specified DSCSI instance.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} void destroy (session_id s, DSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DSCSI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+void
+}
+
+
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_by\_uuid}
+
+{\bf Overview:} 
+Get a reference to the DSCSI instance with the specified UUID.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} (DSCSI ref) get_by_uuid (session_id s, string 
uuid)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt string } & uuid & UUID of object to return \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+DSCSI ref
+}
+
+
+reference to the object
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_record}
+
+{\bf Overview:} 
+Get a record containing the current state of the given DSCSI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} (DSCSI record) get_record (session_id s, DSCSI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DSCSI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+DSCSI record
+}
+
+
+all fields from the object
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+
+\vspace{1cm}
+\newpage
+\section{Class: PSCSI}
+\subsection{Fields for class: PSCSI}
+\begin{longtable}{|lllp{0.38\textwidth}|}
+\hline
+\multicolumn{1}{|l}{Name} & \multicolumn{3}{l|}{\bf PSCSI} \\
+\multicolumn{1}{|l}{Description} & \multicolumn{3}{l|}{\parbox{11cm}{\em A
+physical SCSI device.}} \\
+\hline
+Quals & Field & Type & Description \\
+\hline
+$\mathit{RO}_\mathit{run}$ &  {\tt uuid} & string & unique identifier/object 
reference \\
+$\mathit{RO}_\mathit{run}$ &  {\tt host} & host ref &  the physical machine to 
which this PSCSI is connected \\
+$\mathit{RO}_\mathit{run}$ &  {\tt physical\_host} & int & the physical host 
number \\
+$\mathit{RO}_\mathit{run}$ &  {\tt physical\_channel} & int & the physical 
channel number \\
+$\mathit{RO}_\mathit{run}$ &  {\tt physical\_target} & int & the physical 
target number \\
+$\mathit{RO}_\mathit{run}$ &  {\tt physical\_lun} & int & the physical logical 
unit number \\
+$\mathit{RO}_\mathit{run}$ &  {\tt physical\_HCTL} & string & the physical 
HCTL \\
+$\mathit{RO}_\mathit{run}$ &  {\tt vendor\_name} & string & the vendor name \\
+$\mathit{RO}_\mathit{run}$ &  {\tt model} & string & the model \\
+$\mathit{RO}_\mathit{run}$ &  {\tt type\_id} & int & the SCSI type ID \\
+$\mathit{RO}_\mathit{run}$ &  {\tt type} & string &  the SCSI type \\
+$\mathit{RO}_\mathit{run}$ &  {\tt dev\_name} & string & the SCSI device name 
(e.g. sda or st0) \\
+$\mathit{RO}_\mathit{run}$ &  {\tt sg\_name} & string & the SCSI generic 
device name (e.g. sg0) \\
+$\mathit{RO}_\mathit{run}$ &  {\tt revision} & string & the revision \\
+$\mathit{RO}_\mathit{run}$ &  {\tt scsi\_id} & string & the SCSI ID \\
+$\mathit{RO}_\mathit{run}$ &  {\tt scsi\_level} & int & the SCSI level \\
+\hline
+\end{longtable}
+\subsection{RPCs associated with class: PSCSI}
+\subsubsection{RPC name:~get\_all}
+
+{\bf Overview:} 
+Return a list of all the PSCSIs known to the system.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} ((PSCSI ref) Set) get_all (session_id s)\end{verbatim}
+
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+(PSCSI ref) Set
+}
+
+
+references to all objects
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_uuid}
+
+{\bf Overview:} 
+Get the uuid field of the given PSCSI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} string get_uuid (session_id s, PSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_host}
+
+{\bf Overview:} 
+Get the host field of the given PSCSI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} (host ref) get_host (session_id s, PSCSI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+host ref
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_physical\_host}
+
+{\bf Overview:} 
+Get the physical\_host field of the given PSCSI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} int get_physical_host (session_id s, PSCSI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_physical\_channel}
+
+{\bf Overview:} 
+Get the physical\_channel field of the given PSCSI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} int get_physical_channel (session_id s, PSCSI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_physical\_target}
+
+{\bf Overview:} 
+Get the physical\_target field of the given PSCSI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} int get_physical_target (session_id s, PSCSI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_physical\_lun}
+
+{\bf Overview:} 
+Get the physical\_lun field of the given PSCSI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} int get_physical_lun (session_id s, PSCSI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_physical\_HCTL}
+
+{\bf Overview:} 
+Get the physical\_HCTL field of the given PSCSI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} string get_physical_HCTL (session_id s, PSCSI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_vendor\_name}
+
+{\bf Overview:} 
+Get the vendor\_name field of the given PSCSI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} string get_vendor_name (session_id s, PSCSI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_model}
+
+{\bf Overview:} 
+Get the model field of the given PSCSI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} string get_model (session_id s, PSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_type\_id}
+
+{\bf Overview:} 
+Get the type\_id field of the given PSCSI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} int get_type_id (session_id s, PSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_type}
+
+{\bf Overview:} 
+Get the type field of the given PSCSI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} string get_type (session_id s, PSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_dev\_name}
+
+{\bf Overview:} 
+Get the dev\_name field of the given PSCSI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} string get_dev_name (session_id s, PSCSI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_sg\_name}
+
+{\bf Overview:} 
+Get the sg\_name field of the given PSCSI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} string get_sg_name (session_id s, PSCSI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_revision}
+
+{\bf Overview:} 
+Get the revision field of the given PSCSI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} string get_revision (session_id s, PSCSI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_scsi\_id}
+
+{\bf Overview:} 
+Get the scsi\_id field of the given PSCSI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} string get_scsi_id (session_id s, PSCSI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_scsi\_level}
+
+{\bf Overview:} 
+Get the scsi\_level field of the given PSCSI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} int get_scsi_level (session_id s, PSCSI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_by\_uuid}
+
+{\bf Overview:} 
+Get a reference to the PSCSI instance with the specified UUID.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} (PSCSI ref) get_by_uuid (session_id s, string 
uuid)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt string } & uuid & UUID of object to return \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+PSCSI ref
+}
+
+
+reference to the object
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_record}
+
+{\bf Overview:} 
+Get a record containing the current state of the given PSCSI.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} (PSCSI record) get_record (session_id s, PSCSI ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+PSCSI record
+}
+
+
+all fields from the object
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+
+\vspace{1cm}
+\newpage
 \section{Class: user}
 \subsection{Fields for class: user}
 \begin{longtable}{|lllp{0.38\textwidth}|}
diff -r 6583186e5989 -r 46d7e12c4c91 tools/blktap/drivers/block-qcow.c
--- a/tools/blktap/drivers/block-qcow.c Wed Oct 22 11:38:22 2008 +0900
+++ b/tools/blktap/drivers/block-qcow.c Wed Oct 22 11:46:55 2008 +0900
@@ -734,8 +734,8 @@ static int tdqcow_open (struct disk_driv
 
        DPRINTF("QCOW: Opening %s\n",name);
 
-       o_flags = O_DIRECT | O_LARGEFILE | 
-               ((flags == TD_RDONLY) ? O_RDONLY : O_RDWR);
+       /* Since we don't handle O_DIRECT correctly, don't use it */
+       o_flags = O_LARGEFILE | ((flags == TD_RDONLY) ? O_RDONLY : O_RDWR);
        fd = open(name, o_flags);
        if (fd < 0) {
                DPRINTF("Unable to open %s (%d)\n",name,0 - errno);
@@ -1385,7 +1385,7 @@ static int tdqcow_get_parent_id(struct d
        filename[len]  = '\0';
 
        id->name       = strdup(filename);
-       id->drivertype = DISK_TYPE_QCOW;
+       id->drivertype = DISK_TYPE_AIO;
        err            = 0;
  out:
        free(buf);
@@ -1397,17 +1397,15 @@ static int tdqcow_validate_parent(struct
 {
        struct stat stats;
        uint64_t psize, csize;
-       struct tdqcow_state *c = (struct tdqcow_state *)child->private;
-       struct tdqcow_state *p = (struct tdqcow_state *)parent->private;
-       
-       if (stat(p->name, &stats))
+       
+       if (stat(parent->name, &stats))
                return -EINVAL;
-       if (get_filesize(p->name, &psize, &stats))
+       if (get_filesize(parent->name, &psize, &stats))
                return -EINVAL;
 
-       if (stat(c->name, &stats))
+       if (stat(child->name, &stats))
                return -EINVAL;
-       if (get_filesize(c->name, &csize, &stats))
+       if (get_filesize(child->name, &csize, &stats))
                return -EINVAL;
 
        if (csize != psize)
diff -r 6583186e5989 -r 46d7e12c4c91 tools/blktap/drivers/block-qcow2.c
--- a/tools/blktap/drivers/block-qcow2.c        Wed Oct 22 11:38:22 2008 +0900
+++ b/tools/blktap/drivers/block-qcow2.c        Wed Oct 22 11:46:55 2008 +0900
@@ -34,6 +34,7 @@
 #include "tapdisk.h"
 #include "tapaio.h"
 #include "bswap.h"
+#include "blk.h"
 
 #define USE_AIO
 
@@ -1902,6 +1903,42 @@ repeat:
 
 #endif 
 
+static int get_filesize(char *filename, uint64_t *size, struct stat *st)
+{
+       int fd;
+       QCowHeader header;
+
+       /*Set to the backing file size*/
+       fd = open(filename, O_RDONLY);
+       if (fd < 0)
+               return -1;
+       if (read(fd, &header, sizeof(header)) < sizeof(header)) {
+               close(fd);
+               return -1;
+       }
+       close(fd);
+       
+       be32_to_cpus(&header.magic);
+       be32_to_cpus(&header.version);
+       be64_to_cpus(&header.size);
+       if (header.magic == QCOW_MAGIC && header.version == QCOW_VERSION) {
+               *size = header.size >> SECTOR_SHIFT;
+               return 0;
+       }
+
+       if(S_ISBLK(st->st_mode)) {
+               fd = open(filename, O_RDONLY);
+               if (fd < 0)
+                       return -1;
+               if (blk_getimagesize(fd, size) != 0) {
+                       close(fd);
+                       return -1;
+               }
+               close(fd);
+       } else *size = (st->st_size >> SECTOR_SHIFT);   
+       return 0;
+}
+
 /**
  * @return 
  *        0 if parent id successfully retrieved;
@@ -1916,7 +1953,7 @@ static int qcow_get_parent_id(struct dis
                return TD_NO_PARENT;
 
        id->name = strdup(s->backing_file);
-       id->drivertype = DISK_TYPE_QCOW2;
+       id->drivertype = DISK_TYPE_AIO;
 
        return 0;
 }
@@ -1924,15 +1961,22 @@ static int qcow_validate_parent(struct d
 static int qcow_validate_parent(struct disk_driver *child, 
                struct disk_driver *parent, td_flag_t flags)
 {
-       struct BDRVQcowState *cs = (struct BDRVQcowState*) child->private;
-       struct BDRVQcowState *ps = (struct BDRVQcowState*) parent->private;
-
-       if (ps->total_sectors != cs->total_sectors) {
-               DPRINTF("qcow_validate_parent(): %#"PRIx64" != %#"PRIx64"\n",
-                       ps->total_sectors, cs->total_sectors);
+       struct stat stats;
+       uint64_t psize, csize;
+       
+       if (stat(parent->name, &stats))
                return -EINVAL;
-       }
-       
+       if (get_filesize(parent->name, &psize, &stats))
+               return -EINVAL;
+
+       if (stat(child->name, &stats))
+               return -EINVAL;
+       if (get_filesize(child->name, &csize, &stats))
+               return -EINVAL;
+
+       if (csize != psize)
+               return -EINVAL;
+
        return 0;
 }
 
diff -r 6583186e5989 -r 46d7e12c4c91 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c   Wed Oct 22 11:38:22 2008 +0900
+++ b/tools/libxc/xc_domain.c   Wed Oct 22 11:46:55 2008 +0900
@@ -1049,6 +1049,18 @@ int xc_domain_get_machine_address_size(i
     return rc == 0 ? domctl.u.address_size.size : rc;
 }
 
+int xc_domain_suppress_spurious_page_faults(int xc, uint32_t domid)
+{
+    DECLARE_DOMCTL;
+
+    memset(&domctl, 0, sizeof(domctl));
+    domctl.domain = domid;
+    domctl.cmd    = XEN_DOMCTL_suppress_spurious_page_faults;
+
+    return do_domctl(xc, &domctl);
+
+}
+
 /*
  * Local variables:
  * mode: C
diff -r 6583186e5989 -r 46d7e12c4c91 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Wed Oct 22 11:38:22 2008 +0900
+++ b/tools/libxc/xenctrl.h     Wed Oct 22 11:46:55 2008 +0900
@@ -1103,6 +1103,9 @@ int xc_domain_get_machine_address_size(i
 int xc_domain_get_machine_address_size(int handle,
                                       uint32_t domid);
 
+int xc_domain_suppress_spurious_page_faults(int handle,
+                                         uint32_t domid);
+
 /* Set the target domain */
 int xc_domain_set_target(int xc_handle,
                          uint32_t domid,
diff -r 6583186e5989 -r 46d7e12c4c91 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Wed Oct 22 11:38:22 2008 +0900
+++ b/tools/python/xen/lowlevel/xc/xc.c Wed Oct 22 11:46:55 2008 +0900
@@ -859,6 +859,21 @@ static PyObject *pyxc_dom_set_machine_ad
     return zero;
 }
 
+static PyObject *pyxc_dom_suppress_spurious_page_faults(XcObject *self,
+                                                     PyObject *args,
+                                                     PyObject *kwds)
+{
+    uint32_t dom;
+
+    if (!PyArg_ParseTuple(args, "i", &dom))
+       return NULL;
+
+    if (xc_domain_suppress_spurious_page_faults(self->xc_handle, dom) != 0)
+       return pyxc_error_to_exception();
+
+    Py_INCREF(zero);
+    return zero;
+}
 #endif /* __i386__ || __x86_64__ */
 
 static PyObject *pyxc_hvm_build(XcObject *self,
@@ -1911,6 +1926,12 @@ static PyMethodDef pyxc_methods[] = {
       "Set maximum machine address size for this domain.\n"
       " dom [int]: Identifier of domain.\n"
       " width [int]: Maximum machine address width.\n" },
+
+    { "domain_suppress_spurious_page_faults",
+      (PyCFunction)pyxc_dom_suppress_spurious_page_faults,
+      METH_VARARGS, "\n"
+      "Do not propagate spurious page faults to this guest.\n"
+      " dom [int]: Identifier of domain.\n" },
 #endif
 
     { NULL, NULL, 0, NULL }
diff -r 6583186e5989 -r 46d7e12c4c91 tools/python/xen/util/pci.py
--- a/tools/python/xen/util/pci.py      Wed Oct 22 11:38:22 2008 +0900
+++ b/tools/python/xen/util/pci.py      Wed Oct 22 11:46:55 2008 +0900
@@ -12,8 +12,8 @@ import types
 import types
 import struct
 import time
-
-PROC_MNT_PATH = '/proc/mounts'
+from xen.util import utils
+
 PROC_PCI_PATH = '/proc/bus/pci/devices'
 PROC_PCI_NUM_RESOURCES = 7
 
@@ -97,9 +97,6 @@ MSIX_SIZE_MASK = 0x7ff
 # Global variable to store information from lspci
 lspci_info = None
 
-# Global variable to store the sysfs mount point
-sysfs_mnt_point = None
-
 #Calculate PAGE_SHIFT: number of bits to shift an address to get the page 
number
 PAGE_SIZE = resource.getpagesize()
 PAGE_SHIFT = 0
@@ -141,20 +138,8 @@ def parse_pci_name(pci_name_string):
  
 
 def find_sysfs_mnt():
-    global sysfs_mnt_point
-    if not sysfs_mnt_point is None:
-        return sysfs_mnt_point
-
     try:
-        mounts_file = open(PROC_MNT_PATH,'r')
-
-        for line in mounts_file:
-            sline = line.split()
-            if len(sline)<3:
-                continue
-            if sline[2]=='sysfs':
-                sysfs_mnt_point= sline[1]
-                return sysfs_mnt_point
+        return utils.find_sysfs_mount()
     except IOError, (errno, strerr):
         raise PciDeviceParseError(('Failed to locate sysfs mount: %s: %s (%d)'%
             (PROC_PCI_PATH, strerr, errno)))
diff -r 6583186e5989 -r 46d7e12c4c91 tools/python/xen/util/utils.py
--- a/tools/python/xen/util/utils.py    Wed Oct 22 11:38:22 2008 +0900
+++ b/tools/python/xen/util/utils.py    Wed Oct 22 11:46:55 2008 +0900
@@ -48,3 +48,29 @@ def daemonize(prog, args, stdin_tmpfile=
     os.waitpid(pid, 0)
     return daemon_pid
 
+# Global variable to store the sysfs mount point
+sysfs_mount_point = None
+
+PROC_MOUNTS_PATH = '/proc/mounts'
+
+def find_sysfs_mount():
+    global sysfs_mount_point
+
+    if not sysfs_mount_point is None:
+        return sysfs_mount_point
+
+    try:
+        mounts_file = open(PROC_MOUNTS_PATH, 'r')
+
+        for line in mounts_file:
+            sline = line.split()
+            if len(sline) < 3:
+                continue
+            if sline[2] == 'sysfs':
+                sysfs_mount_point= sline[1]
+                return sysfs_mount_point
+    except IOError, (errno, strerr):
+        raise
+
+    return None
+
diff -r 6583186e5989 -r 46d7e12c4c91 tools/python/xen/util/vscsi_util.py
--- a/tools/python/xen/util/vscsi_util.py       Wed Oct 22 11:38:22 2008 +0900
+++ b/tools/python/xen/util/vscsi_util.py       Wed Oct 22 11:46:55 2008 +0900
@@ -23,32 +23,40 @@
 """Support for VSCSI Devices.
 """
 import os
+import os.path
 import sys
 import re
 import string
-
-def _vscsi_hctl_block(name, scsi_devices):
-    """ block-device name is convert into hctl. (e.g., '/dev/sda',
-    '0:0:0:0')"""
+from xen.util import utils
+
+SYSFS_SCSI_PATH = "/bus/scsi/devices"
+SYSFS_SCSI_DEV_VENDOR_PATH = '/vendor'
+SYSFS_SCSI_DEV_MODEL_PATH = '/model'
+SYSFS_SCSI_DEV_TYPEID_PATH = '/type'
+SYSFS_SCSI_DEV_REVISION_PATH = '/rev'
+SYSFS_SCSI_DEV_SCSILEVEL_PATH = '/scsi_level'
+
+def _vscsi_get_devname_by(name, scsi_devices):
+    """A device name is gotten by the HCTL.
+    (e.g., '0:0:0:0' to '/dev/sda')
+    """
+
     try:
         search = re.compile(r'' + name + '$', re.DOTALL)
     except Exception, e:
         raise VmError("vscsi: invalid expression. " + str(e))
-    chk = 0
-    for hctl, block, sg, scsi_id in scsi_devices:
+
+    for hctl, devname, sg, scsi_id in scsi_devices:
         if search.match(hctl):
-            chk = 1
-            break
-
-    if chk:
-        return (hctl, block)
-    else:
-        return (None, None)
-
-
-def _vscsi_block_scsiid_to_hctl(phyname, scsi_devices):
-    """ block-device name is convert into hctl. (e.g., '/dev/sda',
-    '0:0:0:0')"""
+            return (hctl, devname)
+
+    return (None, None)
+
+
+def _vscsi_get_hctl_by(phyname, scsi_devices):
+    """An HCTL is gotten by the device name or the scsi_id.
+    (e.g., '/dev/sda' to '0:0:0:0')
+    """
     
     if re.match('/dev/sd[a-z]+([1-9]|1[0-5])?$', phyname):
         # sd driver
@@ -63,71 +71,148 @@ def _vscsi_block_scsiid_to_hctl(phyname,
         # scsi_id -gu
         name = phyname
 
-    chk = 0
-    for hctl, block, sg, scsi_id in scsi_devices:
-        if block == name:
-            chk = 1
-            break
-        elif sg == name:
-            chk = 1
-            break
-        elif scsi_id == name:
-            chk = 1
-            break
-
-    if chk:
-        return (hctl, block)
-    else:
-        return (None, None)
+    for hctl, devname, sg, scsi_id in scsi_devices:
+        if name in [devname, sg, scsi_id]:
+            return (hctl, devname)
+
+    return (None, None)
 
 
 def vscsi_get_scsidevices():
     """ get all scsi devices"""
 
-    SERCH_SCSI_PATH = "/sys/bus/scsi/devices"
     devices = []
-
-    for dirpath, dirnames, files in os.walk(SERCH_SCSI_PATH):
+    sysfs_mnt = utils.find_sysfs_mount() 
+
+    for dirpath, dirnames, files in os.walk(sysfs_mnt + SYSFS_SCSI_PATH):
         for hctl in dirnames:
             paths = os.path.join(dirpath, hctl)
-            block = "-"
+            devname = None
+            sg = None
+            scsi_id = None
             for f in os.listdir(paths):
-                if re.match('^block', f):
-                    os.chdir(os.path.join(paths, f))
-                    block = os.path.basename(os.getcwd())
-                elif re.match('^tape', f):
-                    os.chdir(os.path.join(paths, f))
-                    block = os.path.basename(os.getcwd())
-                elif re.match('^scsi_changer', f):
-                    os.chdir(os.path.join(paths, f))
-                    block = os.path.basename(os.getcwd())
-                elif re.match('^onstream_tape', f):
-                    os.chdir(os.path.join(paths, f))
-                    block = os.path.basename(os.getcwd())
+                realpath = os.path.realpath(os.path.join(paths, f))
+                if  re.match('^block', f) or \
+                    re.match('^tape', f) or \
+                    re.match('^scsi_changer', f) or \
+                    re.match('^onstream_tape', f):
+                    devname = os.path.basename(realpath)
 
                 if re.match('^scsi_generic', f):
-                    os.chdir(os.path.join(paths, f))
-                    sg = os.path.basename(os.getcwd())
+                    sg = os.path.basename(realpath)
                     lines = os.popen('/sbin/scsi_id -gu -s 
/class/scsi_generic/' + sg).read().split()
-                    if len(lines) == 0:
-                        scsi_id = '-'
-                    else:
+                    if len(lines):
                         scsi_id = lines[0]
 
-            devices.append([hctl, block, sg, scsi_id])
+            devices.append([hctl, devname, sg, scsi_id])
 
     return devices
 
 
-def vscsi_search_hctl_and_block(device):
-
-    scsi_devices = vscsi_get_scsidevices()
-
-    tmp = device.split(':')
-    if len(tmp) == 4:
-        (hctl, block) = _vscsi_hctl_block(device, scsi_devices)
+def vscsi_get_hctl_and_devname_by(target, scsi_devices = None):
+    if scsi_devices is None:
+        scsi_devices = vscsi_get_scsidevices()
+
+    if len(target.split(':')) == 4:
+        return _vscsi_get_devname_by(target, scsi_devices)
     else:
-        (hctl, block) = _vscsi_block_scsiid_to_hctl(device, scsi_devices)
-
-    return (hctl, block)
-
+        return _vscsi_get_hctl_by(target, scsi_devices)
+
+
+def get_scsi_vendor(pHCTL):
+    try:
+        sysfs_mnt = utils.find_sysfs_mount() 
+        sysfs_scsi_dev_path = \
+            os.path.join(sysfs_mnt + SYSFS_SCSI_PATH, pHCTL)
+        scsi_vendor = \
+            os.popen('cat ' + sysfs_scsi_dev_path + \
+                              SYSFS_SCSI_DEV_VENDOR_PATH).read()
+        return scsi_vendor.splitlines()[0]
+    except:
+        return None
+
+def get_scsi_model(pHCTL):
+    try:
+        sysfs_mnt = utils.find_sysfs_mount() 
+        sysfs_scsi_dev_path = \
+            os.path.join(sysfs_mnt + SYSFS_SCSI_PATH, pHCTL)
+        scsi_model = \
+            os.popen('cat ' + sysfs_scsi_dev_path + \
+                              SYSFS_SCSI_DEV_MODEL_PATH).read()
+        return scsi_model.splitlines()[0]
+    except:
+        return None
+
+def get_scsi_typeid(pHCTL):
+    try:
+        sysfs_mnt = utils.find_sysfs_mount() 
+        sysfs_scsi_dev_path = \
+            os.path.join(sysfs_mnt + SYSFS_SCSI_PATH, pHCTL)
+        scsi_typeid = \
+            os.popen('cat ' + sysfs_scsi_dev_path + \
+                              SYSFS_SCSI_DEV_TYPEID_PATH).read()
+        return int(scsi_typeid.splitlines()[0])
+    except:
+        return None
+
+def get_scsi_revision(pHCTL):
+    try:
+        sysfs_mnt = utils.find_sysfs_mount() 
+        sysfs_scsi_dev_path = \
+            os.path.join(sysfs_mnt + SYSFS_SCSI_PATH, pHCTL)
+        scsi_revision = \
+            os.popen('cat ' + sysfs_scsi_dev_path + \
+                              SYSFS_SCSI_DEV_REVISION_PATH).read()
+        return scsi_revision.splitlines()[0]
+    except:
+        return None
+
+def get_scsi_scsilevel(pHCTL):
+    try:
+        sysfs_mnt = utils.find_sysfs_mount() 
+        sysfs_scsi_dev_path = \
+            os.path.join(sysfs_mnt + SYSFS_SCSI_PATH, pHCTL)
+        scsi_scsilevel = \
+            os.popen('cat ' + sysfs_scsi_dev_path + \
+                              SYSFS_SCSI_DEV_SCSILEVEL_PATH).read()
+        return int(scsi_scsilevel.splitlines()[0])
+    except:
+        return None
+
+def get_all_scsi_devices():
+
+    scsi_devs = []
+
+    for scsi_info in vscsi_get_scsidevices():
+        scsi_dev = {
+            'physical_HCTL': scsi_info[0],
+            'dev_name': None,
+            'sg_name': scsi_info[2],
+            'scsi_id': None
+        }
+        if scsi_info[1] is not None:
+            scsi_dev['dev_name'] = scsi_info[1] 
+        if scsi_info[3] is not None:
+            scsi_dev['scsi_id'] = scsi_info[3] 
+
+        scsi_dev['vendor_name'] = \
+            get_scsi_vendor(scsi_dev['physical_HCTL'])
+        scsi_dev['model'] = \
+            get_scsi_model(scsi_dev['physical_HCTL'])
+        scsi_dev['type_id'] = \
+            get_scsi_typeid(scsi_dev['physical_HCTL'])
+        scsi_dev['revision'] = \
+            get_scsi_revision(scsi_dev['physical_HCTL'])
+        scsi_dev['scsi_level'] = \
+            get_scsi_scsilevel(scsi_dev['physical_HCTL'])
+
+        try:
+            lsscsi_info = os.popen('lsscsi ' + 
scsi_dev['physical_HCTL']).read().split()
+            scsi_dev['type'] = lsscsi_info[1]
+        except:
+            scsi_dev['type'] = None
+
+        scsi_devs.append(scsi_dev)
+
+    return scsi_devs
+
diff -r 6583186e5989 -r 46d7e12c4c91 tools/python/xen/xend/XendAPI.py
--- a/tools/python/xen/xend/XendAPI.py  Wed Oct 22 11:38:22 2008 +0900
+++ b/tools/python/xen/xend/XendAPI.py  Wed Oct 22 11:46:55 2008 +0900
@@ -42,6 +42,8 @@ from XendPBD import XendPBD
 from XendPBD import XendPBD
 from XendPPCI import XendPPCI
 from XendDPCI import XendDPCI
+from XendPSCSI import XendPSCSI
+from XendDSCSI import XendDSCSI
 from XendXSPolicy import XendXSPolicy, XendACMPolicy
 
 from XendAPIConstants import *
@@ -480,7 +482,9 @@ classes = {
     'PBD'          : valid_object("PBD"),
     'PIF_metrics'  : valid_object("PIF_metrics"),
     'PPCI'         : valid_object("PPCI"),
-    'DPCI'         : valid_object("DPCI")
+    'DPCI'         : valid_object("DPCI"),
+    'PSCSI'        : valid_object("PSCSI"),
+    'DSCSI'        : valid_object("DSCSI")
 }
 
 autoplug_classes = {
@@ -491,6 +495,8 @@ autoplug_classes = {
     'PIF_metrics' : XendPIFMetrics,
     'PPCI'        : XendPPCI,
     'DPCI'        : XendDPCI,
+    'PSCSI'       : XendPSCSI,
+    'DSCSI'       : XendDSCSI,
     'XSPolicy'    : XendXSPolicy,
     'ACMPolicy'   : XendACMPolicy,
 }
@@ -881,6 +887,7 @@ class XendAPI(object):
                     'PBDs',
                     'PIFs',
                     'PPCIs',
+                    'PSCSIs',
                     'host_CPUs',
                     'cpu_configuration',
                     'metrics',
@@ -961,6 +968,8 @@ class XendAPI(object):
         return xen_api_success(XendNode.instance().get_PIF_refs())
     def host_get_PPCIs(self, session, ref):
         return xen_api_success(XendNode.instance().get_PPCI_refs())
+    def host_get_PSCSIs(self, session, ref):
+        return xen_api_success(XendNode.instance().get_PSCSI_refs())
     def host_get_host_CPUs(self, session, host_ref):
         return xen_api_success(XendNode.instance().get_host_cpu_refs())
     def host_get_metrics(self, _, ref):
@@ -1037,7 +1046,8 @@ class XendAPI(object):
                   'logging': {},
                   'PIFs': XendPIF.get_all(),
                   'PBDs': XendPBD.get_all(),
-                  'PPCIs': XendPPCI.get_all()}
+                  'PPCIs': XendPPCI.get_all(),
+                  'PSCSIs': XendPSCSI.get_all()}
         return xen_api_success(record)
 
     # class methods
@@ -1158,6 +1168,7 @@ class XendAPI(object):
                   'VBDs',
                   'VTPMs',
                   'DPCIs',
+                  'DSCSIs',
                   'tools_version',
                   'domid',
                   'is_control_domain',
@@ -1304,6 +1315,10 @@ class XendAPI(object):
         dom = XendDomain.instance().get_vm_by_uuid(vm_ref)
         return xen_api_success(dom.get_dpcis())
     
+    def VM_get_DSCSIs(self, session, vm_ref):
+        dom = XendDomain.instance().get_vm_by_uuid(vm_ref)
+        return xen_api_success(dom.get_dscsis())
+
     def VM_get_tools_version(self, session, vm_ref):
         dom = XendDomain.instance().get_vm_by_uuid(vm_ref)
         return dom.get_tools_version()
@@ -1684,6 +1699,7 @@ class XendAPI(object):
             'VBDs': xeninfo.get_vbds(),
             'VTPMs': xeninfo.get_vtpms(),
             'DPCIs': xeninfo.get_dpcis(),
+            'DSCSIs': xeninfo.get_dscsis(),
             'PV_bootloader': xeninfo.info.get('PV_bootloader'),
             'PV_kernel': xeninfo.info.get('PV_kernel'),
             'PV_ramdisk': xeninfo.info.get('PV_ramdisk'),
diff -r 6583186e5989 -r 46d7e12c4c91 tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py       Wed Oct 22 11:38:22 2008 +0900
+++ b/tools/python/xen/xend/XendConfig.py       Wed Oct 22 11:46:55 2008 +0900
@@ -26,6 +26,8 @@ from xen.xend import XendAPIStore
 from xen.xend import XendAPIStore
 from xen.xend.XendPPCI import XendPPCI
 from xen.xend.XendDPCI import XendDPCI
+from xen.xend.XendPSCSI import XendPSCSI
+from xen.xend.XendDSCSI import XendDSCSI
 from xen.xend.XendError import VmError
 from xen.xend.XendDevices import XendDevices
 from xen.xend.PrettyPrint import prettyprintstring
@@ -210,6 +212,7 @@ XENAPI_CFG_TYPES = {
     'cpuid' : dict,
     'cpuid_check' : dict,
     'machine_address_size': int,
+    'suppress_spurious_page_faults': bool0,
 }
 
 # List of legacy configuration keys that have no equivalent in the
@@ -781,8 +784,8 @@ class XendConfig(dict):
         log.debug('_sxp_to_xapi(%s)' % scrub_password(sxp_cfg))
 
         # _parse_sxp() below will call device_add() and construct devices.
-        # Some devices (currently only pci) may require VM's uuid, so
-        # setup self['uuid'] beforehand.
+        # Some devices may require VM's uuid, so setup self['uuid']
+        # beforehand.
         self['uuid'] = sxp.child_value(sxp_cfg, 'uuid', uuid.createString())
 
         cfg = self._parse_sxp(sxp_cfg)
@@ -1221,29 +1224,28 @@ class XendConfig(dict):
             dev_type = sxp.name(config)
             dev_info = {}
 
-            if dev_type == 'pci' or dev_type == 'vscsi':
+            if dev_type == 'pci':
                 pci_devs_uuid = sxp.child_value(config, 'uuid',
                                                 uuid.createString())
 
                 pci_dict = self.pci_convert_sxp_to_dict(config)
                 pci_devs = pci_dict['devs']
 
-                if dev_type != 'vscsi':
-                    # create XenAPI DPCI objects.
-                    for pci_dev in pci_devs:
-                        dpci_uuid = pci_dev.get('uuid')
-                        ppci_uuid = XendPPCI.get_by_sbdf(pci_dev['domain'],
-                                                        pci_dev['bus'],
-                                                        pci_dev['slot'],
-                                                        pci_dev['func'])
-                        if ppci_uuid is None:
-                            continue
-                        dpci_record = {
-                            'VM': self['uuid'],
-                            'PPCI': ppci_uuid,
-                            'hotplug_slot': pci_dev.get('vslot', 0)
-                        }
-                        XendDPCI(dpci_uuid, dpci_record)
+                # create XenAPI DPCI objects.
+                for pci_dev in pci_devs:
+                    dpci_uuid = pci_dev.get('uuid')
+                    ppci_uuid = XendPPCI.get_by_sbdf(pci_dev['domain'],
+                                                     pci_dev['bus'],
+                                                     pci_dev['slot'],
+                                                     pci_dev['func'])
+                    if ppci_uuid is None:
+                        continue
+                    dpci_record = {
+                        'VM': self['uuid'],
+                        'PPCI': ppci_uuid,
+                        'hotplug_slot': pci_dev.get('vslot', 0)
+                    }
+                    XendDPCI(dpci_uuid, dpci_record)
 
                 target['devices'][pci_devs_uuid] = (dev_type,
                                                     {'devs': pci_devs,
@@ -1252,6 +1254,30 @@ class XendConfig(dict):
                 log.debug("XendConfig: reading device: %s" % pci_devs)
 
                 return pci_devs_uuid
+
+            if dev_type == 'vscsi':
+                vscsi_devs_uuid = sxp.child_value(config, 'uuid',
+                                                  uuid.createString())
+                vscsi_dict = self.vscsi_convert_sxp_to_dict(config)
+                vscsi_devs = vscsi_dict['devs']
+
+                # create XenAPI DSCSI objects.
+                for vscsi_dev in vscsi_devs:
+                    dscsi_uuid = vscsi_dev.get('uuid')
+                    pscsi_uuid = XendPSCSI.get_by_HCTL(vscsi_dev['p-dev'])
+                    if pscsi_uuid is None:
+                        continue
+                    dscsi_record = {
+                        'VM': self['uuid'],
+                        'PSCSI': pscsi_uuid,
+                        'virtual_HCTL': vscsi_dev.get('v-dev')
+                    }
+                    XendDSCSI(dscsi_uuid, dscsi_record)
+
+                target['devices'][vscsi_devs_uuid] = \
+                    (dev_type, {'devs': vscsi_devs, 'uuid': vscsi_devs_uuid} )
+                log.debug("XendConfig: reading device: %s" % vscsi_devs)
+                return vscsi_devs_uuid
 
             for opt_val in config[1:]:
                 try:
@@ -1558,6 +1584,86 @@ class XendConfig(dict):
 
         return dev_config
 
+    def vscsi_convert_sxp_to_dict(self, dev_sxp):
+        """Convert vscsi device sxp to dict
+        @param dev_sxp: device configuration
+        @type  dev_sxp: SXP object (parsed config)
+        @return: dev_config
+        @rtype: dictionary
+        """
+        # Parsing the device SXP's. In most cases, the SXP looks
+        # like this:
+        #
+        # [device, [vif, [mac, xx:xx:xx:xx:xx:xx], [ip 1.3.4.5]]]
+        #
+        # However, for SCSI devices it looks like this:
+        #
+        # [device,
+        #   [vscsi,
+        #     [dev,
+        #       [devid, 0], [p-devname, sdb], [p-dev, 1:0:0:1],
+        #       [v-dev, 0:0:0:0], [state, Initialising]
+        #     ],
+        #     [dev,
+        #       [devid, 0], [p-devname, sdc], [p-dev, 1:0:0:2],
+        #       [v-dev, 0:0:0:1], [satet, Initialising]
+        #     ]
+        #   ],
+        #   [vscsi,
+        #     [dev,
+        #       [devid, 1], [p-devname, sdg], [p-dev, 2:0:0:0],
+        #       [v-dev, 1:0:0:0], [state, Initialising]
+        #     ],
+        #     [dev,
+        #       [devid, 1], [p-devname, sdh], [p-dev, 2:0:0:1],
+        #       [v-dev, 1:0:0:1], [satet, Initialising]
+        #     ]
+        #   ]
+        # ]
+        #
+        # It seems the reasoning for this difference is because
+        # vscsiif.py needs all the SCSI device configurations with 
+        # same host number at the same time when creating the devices.
+
+        # For SCSI device hotplug support, the SXP of SCSI devices is
+        # extendend like this:
+        #
+        # [device,
+        #   [vscsi,
+        #     [dev,
+        #       [devid, 0], [p-devname, sdd], [p-dev, 1:0:0:3],
+        #       [v-dev, 0:0:0:2], [state, Initialising]
+        #     ]
+        #   ]
+        # ]
+        #
+        # state 'Initialising' indicates that the device is being attached,
+        # while state 'Closing' indicates that the device is being detached.
+        #
+        # The Dict looks like this:
+        #
+        # { devs: [ {devid: 0, p-devname: sdd, p-dev: 1:0:0:3,
+        #            v-dev: 0:0:0:2, state: Initialising} ] }
+
+        dev_config = {}
+
+        vscsi_devs = []
+        for vscsi_dev in sxp.children(dev_sxp, 'dev'):
+            vscsi_dev_info = {}
+            for opt_val in vscsi_dev[1:]:
+                try:
+                    opt, val = opt_val
+                    vscsi_dev_info[opt] = val
+                except TypeError:
+                    pass
+            # append uuid for each vscsi device.
+            vscsi_uuid = vscsi_dev_info.get('uuid', uuid.createString())
+            vscsi_dev_info['uuid'] = vscsi_uuid
+            vscsi_devs.append(vscsi_dev_info)
+        dev_config['devs'] = vscsi_devs 
+
+        return dev_config
+
     def console_add(self, protocol, location, other_config = {}):
         dev_uuid = uuid.createString()
         if protocol == 'vt100':
@@ -1631,7 +1737,7 @@ class XendConfig(dict):
 
             dev_type, dev_info = self['devices'][dev_uuid]
 
-            if dev_type == 'pci' or dev_type == 'vscsi': # Special case for pci
+            if dev_type == 'pci': # Special case for pci
                 pci_dict = self.pci_convert_sxp_to_dict(config)
                 pci_devs = pci_dict['devs']
 
@@ -1639,26 +1745,50 @@ class XendConfig(dict):
                 for dpci_uuid in XendDPCI.get_by_VM(self['uuid']):
                     XendAPIStore.deregister(dpci_uuid, "DPCI")
 
-                if dev_type != 'vscsi':
-                    # create XenAPI DPCI objects.
-                    for pci_dev in pci_devs:
-                        dpci_uuid = pci_dev.get('uuid')
-                        ppci_uuid = XendPPCI.get_by_sbdf(pci_dev['domain'],
-                                                         pci_dev['bus'],
-                                                         pci_dev['slot'],
-                                                         pci_dev['func'])
-                        if ppci_uuid is None:
-                            continue
-                        dpci_record = {
-                            'VM': self['uuid'],
-                            'PPCI': ppci_uuid,
-                            'hotplug_slot': pci_dev.get('vslot', 0)
-                        }
-                        XendDPCI(dpci_uuid, dpci_record)
+                # create XenAPI DPCI objects.
+                for pci_dev in pci_devs:
+                    dpci_uuid = pci_dev.get('uuid')
+                    ppci_uuid = XendPPCI.get_by_sbdf(pci_dev['domain'],
+                                                     pci_dev['bus'],
+                                                     pci_dev['slot'],
+                                                     pci_dev['func'])
+                    if ppci_uuid is None:
+                        continue
+                    dpci_record = {
+                        'VM': self['uuid'],
+                        'PPCI': ppci_uuid,
+                        'hotplug_slot': pci_dev.get('vslot', 0)
+                    }
+                    XendDPCI(dpci_uuid, dpci_record)
 
                 self['devices'][dev_uuid] = (dev_type,
                                              {'devs': pci_devs,
                                               'uuid': dev_uuid})
+                return True
+                
+            if dev_type == 'vscsi': # Special case for vscsi
+                vscsi_dict = self.vscsi_convert_sxp_to_dict(config)
+                vscsi_devs = vscsi_dict['devs']
+
+                # destroy existing XenAPI DSCSI objects
+                for dscsi_uuid in XendDSCSI.get_by_VM(self['uuid']):
+                    XendAPIStore.deregister(dscsi_uuid, "DSCSI")
+
+                # create XenAPI DSCSI objects.
+                for vscsi_dev in vscsi_devs:
+                    dscsi_uuid = vscsi_dev.get('uuid')
+                    pscsi_uuid = XendPSCSI.get_by_HCTL(vscsi_dev['p-dev'])
+                    if pscsi_uuid is None:
+                        continue
+                    dscsi_record = {
+                        'VM': self['uuid'],
+                        'PSCSI': pscsi_uuid,
+                        'virtual_HCTL': vscsi_dev.get('v-dev')
+                    }
+                    XendDSCSI(dscsi_uuid, dscsi_record)
+
+                self['devices'][dev_uuid] = \
+                    (dev_type, {'devs': vscsi_devs, 'uuid': dev_uuid} )
                 return True
                 
             for opt_val in config[1:]:
diff -r 6583186e5989 -r 46d7e12c4c91 tools/python/xen/xend/XendDSCSI.py
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/python/xen/xend/XendDSCSI.py        Wed Oct 22 11:46:55 2008 +0900
@@ -0,0 +1,174 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright FUJITSU LIMITED 2008
+#       Masaki Kanno <kanno.masaki@xxxxxxxxxxxxxx>
+#============================================================================
+
+from xen.xend.XendBase import XendBase
+from xen.xend.XendPSCSI import XendPSCSI
+from xen.xend import XendAPIStore
+from xen.xend import sxp
+from xen.xend import uuid as genuuid
+
+import XendDomain, XendNode
+
+from XendError import *
+from XendTask import XendTask
+from XendLogging import log
+
+class XendDSCSI(XendBase):
+    """Representation of a half-virtualized SCSI device."""
+
+    def getClass(self):
+        return "DSCSI"
+
+    def getAttrRO(self):
+        attrRO = ['VM',
+                  'PSCSI',
+                  'virtual_host',
+                  'virtual_channel',
+                  'virtual_target',
+                  'virtual_lun',
+                  'virtual_HCTL',
+                  'runtime_properties']
+        return XendBase.getAttrRO() + attrRO
+
+    def getAttrRW(self):
+        attrRW = []
+        return XendBase.getAttrRW() + attrRW
+
+    def getAttrInst(self):
+        attrInst = ['VM',
+                    'PSCSI',
+                    'virtual_HCTL']
+        return XendBase.getAttrInst() + attrInst
+
+    def getMethods(self):
+        methods = ['destroy']
+        return XendBase.getMethods() + methods
+
+    def getFuncs(self):
+        funcs = ['create']
+        return XendBase.getFuncs() + funcs
+
+    getClass    = classmethod(getClass)
+    getAttrRO   = classmethod(getAttrRO)
+    getAttrRW   = classmethod(getAttrRW)
+    getAttrInst = classmethod(getAttrInst)
+    getMethods  = classmethod(getMethods)
+    getFuncs    = classmethod(getFuncs)
+ 
+    def create(self, dscsi_struct):
+
+        # Check if VM is valid
+        xendom = XendDomain.instance()
+        if not xendom.is_valid_vm(dscsi_struct['VM']):
+            raise InvalidHandleError('VM', dscsi_struct['VM'])
+        dom = xendom.get_vm_by_uuid(dscsi_struct['VM'])
+
+        # Check if PSCSI is valid
+        xennode = XendNode.instance()
+        pscsi_uuid = xennode.get_pscsi_by_uuid(dscsi_struct['PSCSI'])
+        if not pscsi_uuid:
+            raise InvalidHandleError('PSCSI', dscsi_struct['PSCSI'])
+
+        # Assign PSCSI to VM
+        try:
+            dscsi_ref = XendTask.log_progress(0, 100, \
+                                              dom.create_dscsi, \
+                                              dscsi_struct)
+        except XendError, e:
+            log.exception("Error in create_dscsi")
+            raise
+
+        return dscsi_ref
+
+    create = classmethod(create)
+
+    def get_by_VM(cls, VM_ref):
+        result = []
+        for dscsi in XendAPIStore.get_all("DSCSI"):
+            if dscsi.get_VM() == VM_ref:
+                result.append(dscsi.get_uuid())
+        return result
+
+    get_by_VM = classmethod(get_by_VM)
+
+    def __init__(self, uuid, record):
+        XendBase.__init__(self, uuid, record)
+        v_hctl = self.virtual_HCTL.split(':')
+        self.virtual_host = int(v_hctl[0])
+        self.virtual_channel = int(v_hctl[1])
+        self.virtual_target = int(v_hctl[2])
+        self.virtual_lun = int(v_hctl[3])
+
+    def get_VM(self):
+        return self.VM
+
+    def get_PSCSI(self):
+        return self.PSCSI
+
+    def get_virtual_host(self):
+        return self.virtual_host
+
+    def get_virtual_channel(self):
+        return self.virtual_channel
+
+    def get_virtual_target(self):
+        return self.virtual_target
+
+    def get_virtual_lun(self):
+        return self.virtual_lun
+
+    def get_virtual_HCTL(self):
+        return self.virtual_HCTL
+
+    def get_runtime_properties(self):
+        xendom = XendDomain.instance()
+        dominfo = xendom.get_vm_by_uuid(self.VM)
+
+        try:
+            device_dict = {}
+            for device_sxp in dominfo.getDeviceSxprs('vscsi'):
+                target_dev = None
+                for dev in device_sxp[1][0][1]:
+                    vdev = sxp.child_value(dev, 'v-dev')
+                    if vdev == self.virtual_HCTL:
+                        target_dev = dev
+                        break
+                if target_dev is None:
+                    continue
+
+                dev_dict = {}
+                for info in target_dev[1:]:
+                    dev_dict[info[0]] = info[1]
+                device_dict['dev'] = dev_dict
+                for info in device_sxp[1][1:]:
+                    device_dict[info[0]] = info[1]
+
+            return device_dict
+        except Exception, exn:
+            log.exception(exn)
+            return {}
+
+    def destroy(self):
+        xendom = XendDomain.instance()
+        dom = xendom.get_vm_by_uuid(self.get_VM())
+        if not dom:
+            raise InvalidHandleError("VM", self.get_VM())
+        XendTask.log_progress(0, 100, \
+                              dom.destroy_dscsi, \
+                              self.get_uuid())
+
diff -r 6583186e5989 -r 46d7e12c4c91 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Wed Oct 22 11:38:22 2008 +0900
+++ b/tools/python/xen/xend/XendDomainInfo.py   Wed Oct 22 11:46:55 2008 +0900
@@ -55,9 +55,11 @@ from xen.xend.XendAPIConstants import *
 
 from xen.xend.XendVMMetrics import XendVMMetrics
 
+from xen.xend import XendAPIStore
 from xen.xend.XendPPCI import XendPPCI
 from xen.xend.XendDPCI import XendDPCI
-from xen.xend import XendAPIStore
+from xen.xend.XendPSCSI import XendPSCSI
+from xen.xend.XendDSCSI import XendDSCSI
 
 MIGRATE_TIMEOUT = 30.0
 BOOTLOADER_LOOPBACK_DEVICE = '/dev/xvdp'
@@ -663,6 +665,9 @@ class XendDomainInfo:
                 if dev_type == 'pci':
                     for dev in dev_config_dict['devs']:
                         XendAPIStore.deregister(dev['uuid'], 'DPCI')
+                if dev_type == 'vscsi':
+                    for dev in dev_config_dict['devs']:
+                        XendAPIStore.deregister(dev['uuid'], 'DSCSI')
                 elif dev_type == 'tap':
                     self.info['vbd_refs'].remove(dev_uuid)
                 else:
@@ -786,12 +791,11 @@ class XendDomainInfo:
         if dev_class != 'vscsi':
             return False
 
-        dev_config = self.info.pci_convert_sxp_to_dict(dev_sxp)
+        dev_config = self.info.vscsi_convert_sxp_to_dict(dev_sxp)
         dev = dev_config['devs'][0]
-        req_devid = sxp.child_value(dev_sxp, 'devid')
-        req_devid = int(req_devid)
+        req_devid = int(dev['devid'])
         existing_dev_info = self._getDeviceInfo_vscsi(req_devid, dev['v-dev'])
-        state = sxp.child_value(dev_sxp, 'state')
+        state = dev['state']
 
         if state == 'Initialising':
             # new create
@@ -1502,23 +1506,18 @@ class XendDomainInfo:
         return self.info['VCPUs_max']
 
     def setVCpuCount(self, vcpus):
-        if vcpus <= 0:
-            raise XendError('Invalid VCPUs')
+        def vcpus_valid(n):
+            if vcpus <= 0:
+                raise XendError('Zero or less VCPUs is invalid')
+            if self.domid >= 0 and vcpus > self.info['VCPUs_max']:
+                raise XendError('Cannot set vcpus greater than max vcpus on 
running domain')
+        vcpus_valid(vcpus)
         
         self.info['vcpu_avail'] = (1 << vcpus) - 1
         if self.domid >= 0:
             self.storeVm('vcpu_avail', self.info['vcpu_avail'])
-            # update dom differently depending on whether we are adjusting
-            # vcpu number up or down, otherwise _vcpuDomDetails does not
-            # disable the vcpus
-            if self.info['VCPUs_max'] > vcpus:
-                # decreasing
-                self._writeDom(self._vcpuDomDetails())
-                self.info['VCPUs_live'] = vcpus
-            else:
-                # same or increasing
-                self.info['VCPUs_live'] = vcpus
-                self._writeDom(self._vcpuDomDetails())
+            self._writeDom(self._vcpuDomDetails())
+            self.info['VCPUs_live'] = vcpus
         else:
             if self.info['VCPUs_max'] > vcpus:
                 # decreasing
@@ -1528,7 +1527,7 @@ class XendDomainInfo:
                 for c in range(self.info['VCPUs_max'], vcpus):
                     self.info['cpus'].append(list())
             self.info['VCPUs_max'] = vcpus
-            xen.xend.XendDomain.instance().managed_config_save(self)
+        xen.xend.XendDomain.instance().managed_config_save(self)
         log.info("Set VCPU count on domain %s to %d", self.info['name_label'],
                  vcpus)
 
@@ -2241,6 +2240,10 @@ class XendDomainInfo:
             if self.info.has_key('machine_address_size'):
                 log.debug("_initDomain: setting maximum machine address size 
%d" % self.info['machine_address_size'])
                 xc.domain_set_machine_address_size(self.domid, 
self.info['machine_address_size'])
+
+            if self.info.has_key('suppress_spurious_page_faults') and 
self.info['suppress_spurious_page_faults']:
+                log.debug("_initDomain: suppressing spurious page faults")
+                xc.domain_suppress_spurious_page_faults(self.domid)
                 
             self._createChannels()
 
@@ -3233,6 +3236,9 @@ class XendDomainInfo:
     def get_dpcis(self):
         return XendDPCI.get_by_VM(self.info.get('uuid'))
 
+    def get_dscsis(self):
+        return XendDSCSI.get_by_VM(self.info.get('uuid'))
+
     def create_vbd(self, xenapi_vbd, vdi_image_path):
         """Create a VBD using a VDI from XendStorageRepository.
 
@@ -3412,6 +3418,60 @@ class XendDomainInfo:
                 raise XendError('Failed to create device')
 
         return dpci_uuid
+
+    def create_dscsi(self, xenapi_dscsi):
+        """Create scsi device from the passed struct in Xen API format.
+
+        @param xenapi_dscsi: DSCSI struct from Xen API
+        @rtype: string
+        @return: UUID
+        """
+
+        dscsi_uuid = uuid.createString()
+
+        # Convert xenapi to sxp
+        pscsi = XendAPIStore.get(xenapi_dscsi.get('PSCSI'), 'PSCSI')
+        devid = int(xenapi_dscsi.get('virtual_HCTL').split(':')[0])
+        target_vscsi_sxp = \
+            ['vscsi', 
+                ['dev',
+                    ['devid', devid],
+                    ['p-devname', pscsi.get_dev_name()],
+                    ['p-dev', pscsi.get_physical_HCTL()],
+                    ['v-dev', xenapi_dscsi.get('virtual_HCTL')],
+                    ['state', 'Initialising'],
+                    ['uuid', dscsi_uuid]
+                ]
+            ]
+
+        if self._stateGet() != XEN_API_VM_POWER_STATE_RUNNING:
+
+            cur_vscsi_sxp = self._getDeviceInfo_vscsi(devid, None)
+
+            if cur_vscsi_sxp is None:
+                dev_uuid = self.info.device_add('vscsi', cfg_sxp = 
target_vscsi_sxp)
+                if not dev_uuid:
+                    raise XendError('Failed to create device')
+
+            else:
+                new_vscsi_sxp = ['vscsi']
+                for existing_dev in sxp.children(cur_vscsi_sxp, 'dev'):
+                    new_vscsi_sxp.append(existing_dev)
+                new_vscsi_sxp.append(sxp.child0(target_vscsi_sxp, 'dev'))
+
+                dev_uuid = sxp.child_value(cur_vscsi_sxp, 'uuid')
+                self.info.device_update(dev_uuid, new_vscsi_sxp)
+
+            xen.xend.XendDomain.instance().managed_config_save(self)
+
+        else:
+            try:
+                self.device_configure(target_vscsi_sxp)
+
+            except Exception, exn:
+                raise XendError('Failed to create device')
+
+        return dscsi_uuid
 
 
     def destroy_device_by_uuid(self, dev_type, dev_uuid):
@@ -3480,6 +3540,41 @@ class XendDomainInfo:
             except Exception, exn:
                 raise XendError('Failed to destroy device')
 
+    def destroy_dscsi(self, dev_uuid):
+        dscsi = XendAPIStore.get(dev_uuid, 'DSCSI')
+        devid = dscsi.get_virtual_host()
+        vHCTL = dscsi.get_virtual_HCTL()
+        cur_vscsi_sxp = self._getDeviceInfo_vscsi(devid, None)
+        dev_uuid = sxp.child_value(cur_vscsi_sxp, 'uuid')
+
+        target_dev = None
+        new_vscsi_sxp = ['vscsi']
+        for dev in sxp.children(cur_vscsi_sxp, 'dev'):
+            if vHCTL == sxp.child_value(dev, 'v-dev'):
+                target_dev = dev
+            else:
+                new_vscsi_sxp.append(dev)
+
+        if target_dev is None:
+            raise XendError('Failed to destroy device')
+
+        target_dev.append(['state', 'Closing'])
+        target_vscsi_sxp = ['vscsi', target_dev]
+
+        if self._stateGet() != XEN_API_VM_POWER_STATE_RUNNING:
+
+            self.info.device_update(dev_uuid, new_vscsi_sxp)
+            if len(sxp.children(new_vscsi_sxp, 'dev')) == 0:
+                del self.info['devices'][dev_uuid]
+            xen.xend.XendDomain.instance().managed_config_save(self)
+
+        else:
+            try:
+                self.device_configure(target_vscsi_sxp)
+
+            except Exception, exn:
+                raise XendError('Failed to destroy device')
+
     def destroy_xapi_instances(self):
         """Destroy Xen-API instances stored in XendAPIStore.
         """
@@ -3504,6 +3599,10 @@ class XendDomainInfo:
         for dpci_uuid in XendDPCI.get_by_VM(self.info.get('uuid')):
             XendAPIStore.deregister(dpci_uuid, "DPCI")
             
+        # Destroy DSCSI instances.
+        for dscsi_uuid in XendDSCSI.get_by_VM(self.info.get('uuid')):
+            XendAPIStore.deregister(dscsi_uuid, "DSCSI")
+            
     def has_device(self, dev_class, dev_uuid):
         return (dev_uuid in self.info['%s_refs' % dev_class.lower()])
 
diff -r 6583186e5989 -r 46d7e12c4c91 tools/python/xen/xend/XendNode.py
--- a/tools/python/xen/xend/XendNode.py Wed Oct 22 11:38:22 2008 +0900
+++ b/tools/python/xen/xend/XendNode.py Wed Oct 22 11:46:55 2008 +0900
@@ -22,6 +22,7 @@ import xen.lowlevel.xc
 
 from xen.util import Brctl
 from xen.util import pci as PciUtil
+from xen.util import vscsi_util
 from xen.xend import XendAPIStore
 from xen.xend import osdep
 
@@ -38,7 +39,8 @@ from XendStateStore import XendStateStor
 from XendStateStore import XendStateStore
 from XendMonitor import XendMonitor
 from XendPPCI import XendPPCI
-     
+from XendPSCSI import XendPSCSI
+
 class XendNode:
     """XendNode - Represents a Domain 0 Host."""
     
@@ -53,6 +55,7 @@ class XendNode:
         * network
         * Storage Repository
         * PPCI
+        * PSCSI
         """
         
         self.xc = xen.lowlevel.xc.xc()
@@ -269,6 +272,24 @@ class XendNode:
             XendPPCI(ppci_uuid, ppci_record)
 
 
+        # Initialise PSCSIs
+        saved_pscsis = self.state_store.load_state('pscsi')
+        saved_pscsi_table = {}
+        if saved_pscsis:
+            for pscsi_uuid, pscsi_record in saved_pscsis.items():
+                try:
+                    saved_pscsi_table[pscsi_record['scsi_id']] = pscsi_uuid
+                except KeyError:
+                    pass
+
+        for pscsi_record in vscsi_util.get_all_scsi_devices():
+            if pscsi_record['scsi_id']:
+                # If saved uuid exists, use it. Otherwise create one.
+                pscsi_uuid = saved_pscsi_table.get(pscsi_record['scsi_id'],
+                                                   uuid.createString())
+                XendPSCSI(pscsi_uuid, pscsi_record)
+
+
 ##    def network_destroy(self, net_uuid):
  ##       del self.networks[net_uuid]
   ##      self.save_networks()
@@ -317,6 +338,15 @@ class XendNode:
     def get_ppci_by_uuid(self, ppci_uuid):
         if ppci_uuid in self.get_PPCI_refs():
             return ppci_uuid
+        return None
+
+
+    def get_PSCSI_refs(self):
+        return XendPSCSI.get_all()
+
+    def get_pscsi_by_uuid(self, pscsi_uuid):
+        if pscsi_uuid in self.get_PSCSI_refs():
+            return pscsi_uuid
         return None
 
 
@@ -333,6 +363,7 @@ class XendNode:
         self.save_PBDs()
         self.save_SRs()
         self.save_PPCIs()
+        self.save_PSCSIs()
 
     def save_PIFs(self):
         pif_records = dict([(pif_uuid, XendAPIStore.get(
@@ -362,6 +393,12 @@ class XendNode:
                                  ppci_uuid, "PPCI").get_record())
                             for ppci_uuid in XendPPCI.get_all()])
         self.state_store.save_state('ppci', ppci_records)
+
+    def save_PSCSIs(self):
+        pscsi_records = dict([(pscsi_uuid, XendAPIStore.get(
+                                  pscsi_uuid, "PSCSI").get_record())
+                            for pscsi_uuid in XendPSCSI.get_all()])
+        self.state_store.save_state('pscsi', pscsi_records)
 
     def shutdown(self):
         return 0
diff -r 6583186e5989 -r 46d7e12c4c91 tools/python/xen/xend/XendPSCSI.py
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/python/xen/xend/XendPSCSI.py        Wed Oct 22 11:46:55 2008 +0900
@@ -0,0 +1,143 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright FUJITSU LIMITED 2008
+#       Masaki Kanno <kanno.masaki@xxxxxxxxxxxxxx>
+#============================================================================
+
+from xen.xend.XendBase import XendBase
+from xen.xend.XendBase import XendAPIStore
+from xen.xend import uuid as genuuid
+
+class XendPSCSI(XendBase):
+    """Representation of a physical SCSI device."""
+
+    def getClass(self):
+        return "PSCSI"
+
+    def getAttrRO(self):
+        attrRO = ['host',
+                  'physical_host',
+                  'physical_channel',
+                  'physical_target',
+                  'physical_lun',
+                  'physical_HCTL',
+                  'vendor_name',
+                  'model',
+                  'type_id',
+                  'type',
+                  'dev_name',
+                  'sg_name',
+                  'revision',
+                  'scsi_id',
+                  'scsi_level']
+        return XendBase.getAttrRO() + attrRO
+
+    def getAttrRW(self):
+        attrRW = []
+        return XendBase.getAttrRW() + attrRW
+
+    def getAttrInst(self):
+        attrInst = []
+        return XendBase.getAttrInst() + attrInst
+
+    def getMethods(self):
+        methods = []
+        return XendBase.getMethods() + methods
+
+    def getFuncs(self):
+        funcs = []
+        return XendBase.getFuncs() + funcs
+
+    getClass    = classmethod(getClass)
+    getAttrRO   = classmethod(getAttrRO)
+    getAttrRW   = classmethod(getAttrRW)
+    getAttrInst = classmethod(getAttrInst)
+    getMethods  = classmethod(getMethods)
+    getFuncs    = classmethod(getFuncs)
+ 
+    def get_by_HCTL(self, physical_HCTL):
+        for pscsi in XendAPIStore.get_all("PSCSI"):
+            if pscsi.get_physical_HCTL() == physical_HCTL:
+                return pscsi.get_uuid()
+        return None
+
+    get_by_HCTL = classmethod(get_by_HCTL)
+
+    def __init__(self, uuid, record):
+        self.physical_HCTL = record['physical_HCTL']
+        self.vendor_name = record['vendor_name']
+        self.model = record['model']
+        self.type_id = record['type_id']
+        self.type = record['type']
+        self.dev_name = record['dev_name']
+        self.sg_name = record['sg_name']
+        self.revision = record['revision']
+        self.scsi_id = record['scsi_id']
+        self.scsi_level = record['scsi_level']
+
+        p_hctl = self.physical_HCTL.split(':')
+        self.physical_host = int(p_hctl[0])
+        self.physical_channel = int(p_hctl[1])
+        self.physical_target = int(p_hctl[2])
+        self.physical_lun = int(p_hctl[3])
+
+        XendBase.__init__(self, uuid, record)
+
+    def get_host(self):
+        from xen.xend import XendNode
+        return XendNode.instance().get_uuid()
+
+    def get_physical_host(self):
+        return self.physical_host
+
+    def get_physical_channel(self):
+        return self.physical_channel
+
+    def get_physical_target(self):
+        return self.physical_target
+
+    def get_physical_lun(self):
+        return self.physical_lun
+
+    def get_physical_HCTL(self):
+        return self.physical_HCTL
+
+    def get_vendor_name(self):
+        return self.vendor_name
+
+    def get_model(self):
+        return self.model
+
+    def get_type_id(self):
+        return self.type_id
+
+    def get_type(self):
+        return self.type
+
+    def get_dev_name(self):
+        return self.dev_name
+
+    def get_sg_name(self):
+        return self.sg_name
+
+    def get_revision(self):
+        return self.revision
+
+    def get_scsi_id(self):
+        return self.scsi_id
+
+    def get_scsi_level(self):
+        return self.scsi_level
+
diff -r 6583186e5989 -r 46d7e12c4c91 tools/python/xen/xend/server/vscsiif.py
--- a/tools/python/xen/xend/server/vscsiif.py   Wed Oct 22 11:38:22 2008 +0900
+++ b/tools/python/xen/xend/server/vscsiif.py   Wed Oct 22 11:46:55 2008 +0900
@@ -125,10 +125,10 @@ class VSCSIController(DevController):
             state = self.readBackend(devid, devpath + '/state')
             localdevid = self.readBackend(devid, devpath + '/devid')
             dev_dict = {'p-dev': pdev,
-                            'p-devname': pdevname,
-                            'v-dev': pdevname,
-                            'state': state,
-                            'devid': localdevid }
+                        'p-devname': pdevname,
+                        'v-dev': vdev,
+                        'state': state,
+                        'devid': localdevid }
             vscsi_devs.append(dev_dict)
 
         config['devs'] = vscsi_devs
@@ -168,17 +168,17 @@ class VSCSIController(DevController):
         (devid, back, front) = self.getDeviceDetails(config)
         devid = int(devid)
         vscsi_config = config['devs'][0]
-        states = config.get('states', [])
+        state = vscsi_config.get('state', '')
         driver_state = self.readBackend(devid, 'state')
         if str(xenbusState['Connected']) != driver_state:
             raise VmError("Driver status is not connected")
 
         uuid = self.readBackend(devid, 'uuid')
-        if states[0] == 'Initialising':
+        if state == 'Initialising':
             back['uuid'] = uuid
             self.writeBackend(devid, back)
 
-        elif states[0] == 'Closing':
+        elif state == 'Closing':
             found = False
             devs = self.readBackendList(devid, "vscsi-devs")
             vscsipath = "vscsi-devs/"
@@ -197,8 +197,8 @@ class VSCSIController(DevController):
                 raise VmError("Device %s not connected" % vdev)
 
         else:
-            raise XendError('Error configuring device invalid state %s'
-                                % state)
+            raise XendError("Error configuring device invalid "
+                            "state '%s'" % state)
 
         self.writeBackend(devid, 'state', str(xenbusState['Reconfiguring']))
         return self.readBackend(devid, 'uuid')
diff -r 6583186e5989 -r 46d7e12c4c91 tools/python/xen/xm/create.dtd
--- a/tools/python/xen/xm/create.dtd    Wed Oct 22 11:38:22 2008 +0900
+++ b/tools/python/xen/xm/create.dtd    Wed Oct 22 11:46:55 2008 +0900
@@ -40,6 +40,7 @@
                  vif*,
                  vtpm*,
                  pci*,
+                 vscsi*,
                  console*,
                  platform*,
                  vcpu_param*,
@@ -87,6 +88,10 @@
                  slot            CDATA #REQUIRED
                  func            CDATA #REQUIRED
                  vslt            CDATA #IMPLIED>
+
+<!ELEMENT vscsi  EMPTY>
+<!ATTLIST vscsi  p-dev           CDATA #REQUIRED
+                 v-dev           CDATA #REQUIRED>
 
 <!ELEMENT console (other_config*)>
 <!ATTLIST console protocol       (vt100|rfb|rdp) #REQUIRED>
diff -r 6583186e5989 -r 46d7e12c4c91 tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py     Wed Oct 22 11:38:22 2008 +0900
+++ b/tools/python/xen/xm/create.py     Wed Oct 22 11:46:55 2008 +0900
@@ -583,6 +583,10 @@ gopts.var('machine_address_size', val='B
           fn=set_int, default=None,
           use="""Maximum machine address size""")
 
+gopts.var('suppress_spurious_page_faults', val='yes|no',
+          fn=set_bool, default=None,
+          use="""Do not inject spurious page faults into this guest""")
+
 def err(msg):
     """Print an error to stderr and exit.
     """
@@ -634,6 +638,9 @@ def configure_image(vals):
     if vals.machine_address_size:
         config_image.append(['machine_address_size', 
vals.machine_address_size])
 
+    if vals.suppress_spurious_page_faults:
+        config_image.append(['suppress_spurious_page_faults', 
vals.suppress_spurious_page_faults])
+
     return config_image
     
 def configure_disks(config_devs, vals):
@@ -696,11 +703,8 @@ def configure_vscsis(config_devs, vals):
 
     scsi_devices = vscsi_util.vscsi_get_scsidevices()
     for (p_dev, v_dev, backend) in vals.vscsi:
-        tmp = p_dev.split(':')
-        if len(tmp) == 4:
-            (p_hctl, block) = vscsi_util._vscsi_hctl_block(p_dev, scsi_devices)
-        else:
-            (p_hctl, block) = vscsi_util._vscsi_block_scsiid_to_hctl(p_dev, 
scsi_devices)
+        (p_hctl, devname) = \
+            vscsi_util.vscsi_get_hctl_and_devname_by(p_dev, scsi_devices)
 
         if p_hctl == None:
             raise ValueError("Cannot find device \"%s\"" % p_dev)
@@ -716,7 +720,7 @@ def configure_vscsis(config_devs, vals):
                         ['state', 'Initialising'], \
                         ['devid', devid], \
                         ['p-dev', p_hctl], \
-                        ['p-devname', block], \
+                        ['p-devname', devname], \
                         ['v-dev', v_dev] ])
 
         if vscsi_lookup_devid(devidlist, devid) == 0:
@@ -887,7 +891,7 @@ def make_config(vals):
                    'restart', 'on_poweroff',
                    'on_reboot', 'on_crash', 'vcpus', 'vcpu_avail', 'features',
                    'on_xend_start', 'on_xend_stop', 'target', 'cpuid',
-                   'cpuid_check', 'machine_address_size'])
+                   'cpuid_check', 'machine_address_size', 
'suppress_spurious_page_faults'])
 
     if vals.uuid is not None:
         config.append(['uuid', vals.uuid])
diff -r 6583186e5989 -r 46d7e12c4c91 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Wed Oct 22 11:38:22 2008 +0900
+++ b/tools/python/xen/xm/main.py       Wed Oct 22 11:46:55 2008 +0900
@@ -2235,12 +2235,34 @@ def vscsi_convert_sxp_to_dict(dev_sxp):
     return dev_dict
 
 def xm_scsi_list(args):
-    xenapi_unsupported()
     (use_long, params) = arg_check_for_resource_list(args, "scsi-list")
 
     dom = params[0]
 
-    devs = server.xend.domain.getDeviceSxprs(dom, 'vscsi')
+    devs = []
+    if serverType == SERVER_XEN_API:
+
+        dscsi_refs = server.xenapi.VM.get_DSCSIs(get_single_vm(dom))
+        dscsi_properties = \
+            map(server.xenapi.DSCSI.get_runtime_properties, dscsi_refs)
+        dscsi_dict = {}
+        for dscsi_property in dscsi_properties:
+            devid = int(dscsi_property['dev']['devid'])
+            try:
+                dscsi_sxp = dscsi_dict[devid]
+            except:
+                dscsi_sxp = [['devs', []]]
+                for key, value in dscsi_property.items():
+                    if key != 'dev':
+                        dscsi_sxp.append([key, value])
+            dev_sxp = ['dev']
+            dev_sxp.extend(map2sxp(dscsi_property['dev']))
+            dscsi_sxp[0][1].append(dev_sxp)
+            dscsi_dict[devid] = dscsi_sxp
+        devs = map2sxp(dscsi_dict)
+
+    else:
+        devs = server.xend.domain.getDeviceSxprs(dom, 'vscsi')
 
     if use_long:
         map(PrettyPrint.prettyprint, devs)
@@ -2464,37 +2486,60 @@ def xm_pci_attach(args):
     else:
         server.xend.domain.device_configure(dom, pci)
 
+def parse_scsi_configuration(p_scsi, v_hctl, state):
+    v = v_hctl.split(':')
+    if len(v) != 4:
+        raise OptionError("Invalid argument: %s" % v_hctl)
+
+    p_hctl = None
+    devname = None
+    if p_scsi is not None:
+        (p_hctl, devname) = \
+            vscsi_util.vscsi_get_hctl_and_devname_by(p_scsi)
+        if p_hctl is None:
+            raise OptionError("Cannot find device '%s'" % p_scsi)
+
+    scsi = ['vscsi']
+    scsi.append(['dev', \
+                 ['state', state], \
+                 ['devid', int(v[0])], \
+                 ['p-dev', p_hctl], \
+                 ['p-devname', devname], \
+                 ['v-dev', v_hctl] \
+               ])
+
+    return scsi
+
 def xm_scsi_attach(args):
-    xenapi_unsupported()
-
     arg_check(args, 'scsi-attach', 3, 4)
-    p_devname = args[1]
-    v_dev = args[2]
-
-    v_hctl = v_dev.split(':')
-    if len(v_hctl) != 4:
-        raise OptionError("Invalid argument: %s" % v_dev)
-
-    (p_hctl, block) = vscsi_util.vscsi_search_hctl_and_block(p_devname)
-
-    if p_hctl == None:
-        raise OptionError("Cannot find device \"%s\"" % p_devname)
-
     dom = args[0]
-    vscsi = ['vscsi']
-    vscsi.append(['dev', \
-                ['state', 'Initialising'], \
-                ['devid', v_hctl[0]], \
-                ['p-dev', p_hctl], \
-                ['p-devname', block], \
-                ['v-dev', v_dev] ])
-
-    if len(args) == 4:
-        vscsi.append(['backend', args[3]])
-
-    vscsi.append(['state', 'Initialising'])
-    vscsi.append(['devid', v_hctl[0]])
-    server.xend.domain.device_configure(dom, vscsi)
+    p_scsi = args[1]
+    v_hctl = args[2]
+    scsi = parse_scsi_configuration(p_scsi, v_hctl, 'Initialising')
+
+    if serverType == SERVER_XEN_API:
+
+        scsi_dev = sxp.children(scsi, 'dev')[0]
+        p_hctl = sxp.child_value(scsi_dev, 'p-dev')
+        target_ref = None
+        for pscsi_ref in server.xenapi.PSCSI.get_all():
+            if p_hctl == server.xenapi.PSCSI.get_physical_HCTL(pscsi_ref):
+                target_ref = pscsi_ref
+                break
+        if target_ref is None:
+            raise OptionError("Cannot find device '%s'" % p_scsi)
+
+        dscsi_record = {
+            "VM":           get_single_vm(dom),
+            "PSCSI":        target_ref,
+            "virtual_HCTL": v_hctl
+        }
+        server.xenapi.DSCSI.create(dscsi_record)
+
+    else:
+        if len(args) == 4:
+            scsi.append(['backend', args[3]])
+        server.xend.domain.device_configure(dom, scsi)
 
 def detach(args, deviceClass):
     rm_cfg = True
@@ -2587,26 +2632,25 @@ def xm_pci_detach(args):
         server.xend.domain.device_configure(dom, pci)
 
 def xm_scsi_detach(args):
-    xenapi_unsupported()
     arg_check(args, 'scsi-detach', 2)
-
-    v_dev = args[1]
-    v_hctl = v_dev.split(':')
-    if len(v_hctl) != 4:
-        raise OptionError("Invalid argument: %s" % v_dev)
-
     dom = args[0]
-    vscsi = ['vscsi']
-    vscsi.append(['dev', \
-                ['state', 'Closing'], \
-                ['devid', v_hctl[0]], \
-                ['p-dev', ''], \
-                ['p-devname', ''], \
-                ['v-dev', v_dev] ])
-
-    vscsi.append(['state', 'Closing'])
-    vscsi.append(['devid', v_hctl[0]])
-    server.xend.domain.device_configure(dom, vscsi)
+    v_hctl = args[1]
+    scsi = parse_scsi_configuration(None, v_hctl, 'Closing')
+
+    if serverType == SERVER_XEN_API:
+
+        target_ref = None
+        for dscsi_ref in server.xenapi.VM.get_DSCSIs(get_single_vm(dom)):
+            if v_hctl == server.xenapi.DSCSI.get_virtual_HCTL(dscsi_ref):
+                target_ref = dscsi_ref
+                break
+        if target_ref is None:
+            raise OptionError("Device %s not assigned" % v_hctl)
+
+        server.xenapi.DSCSI.destroy(target_ref)
+
+    else:
+        server.xend.domain.device_configure(dom, scsi)
 
 def xm_vnet_list(args):
     xenapi_unsupported()
diff -r 6583186e5989 -r 46d7e12c4c91 tools/python/xen/xm/xenapi_create.py
--- a/tools/python/xen/xm/xenapi_create.py      Wed Oct 22 11:38:22 2008 +0900
+++ b/tools/python/xen/xm/xenapi_create.py      Wed Oct 22 11:46:55 2008 +0900
@@ -375,6 +375,12 @@ class xenapi_create:
 
             self.create_pcis(vm_ref, pcis)
 
+            # Now create scsis
+
+            scsis = vm.getElementsByTagName("vscsi")
+
+            self.create_scsis(vm_ref, scsis)
+
             return vm_ref
         except:
             server.xenapi.VM.destroy(vm_ref)
@@ -532,6 +538,33 @@ class xenapi_create:
 
         return server.xenapi.DPCI.create(dpci_record)
 
+    def create_scsis(self, vm_ref, scsis):
+        log(DEBUG, "create_scsis")
+        return map(lambda scsi: self.create_scsi(vm_ref, scsi), scsis)
+
+    def create_scsi(self, vm_ref, scsi):
+        log(DEBUG, "create_scsi")
+
+        target_ref = None
+        for pscsi_ref in server.xenapi.PSCSI.get_all():
+            if scsi.attributes["p-dev"].value == 
server.xenapi.PSCSI.get_physical_HCTL(pscsi_ref):
+                target_ref = pscsi_ref
+                break
+        if target_ref is None:
+            log(DEBUG, "create_scsi: scsi device not found")
+            return None
+
+        dscsi_record = {
+            "VM":
+                vm_ref,
+            "PSCSI":
+                target_ref,
+            "virtual_HCTL":
+                scsi.attributes["v-dev"].value
+        }
+
+        return server.xenapi.DSCSI.create(dscsi_record)
+
 def get_child_by_name(exp, childname, default = None):
     try:
         return [child for child in sxp.children(exp)
@@ -562,6 +595,9 @@ class sxp2xml:
 
         pcis_sxp = map(lambda x: x[1], [device for device in devices
                                         if device[1][0] == "pci"])
+
+        scsis_sxp = map(lambda x: x[1], [device for device in devices
+                                         if device[1][0] == "vscsi"])
 
         # Create XML Document
         
@@ -704,6 +740,12 @@ class sxp2xml:
 
         map(vm.appendChild, pcis)
 
+        # And now the scsis
+
+        scsis = self.extract_scsis(scsis_sxp, document)
+
+        map(vm.appendChild, scsis)
+
         # Last but not least the consoles...
 
         consoles = self.extract_consoles(image, document)
@@ -893,6 +935,23 @@ class sxp2xml:
                 pcis.append(pci)
 
         return pcis
+
+    def extract_scsis(self, scsis_sxp, document):
+
+        scsis = []
+
+        for scsi_sxp in scsis_sxp:
+            for dev_sxp in sxp.children(scsi_sxp, "dev"):
+                scsi = document.createElement("vscsi")
+
+                scsi.attributes["p-dev"] \
+                    = get_child_by_name(dev_sxp, "p-dev")
+                scsi.attributes["v-dev"] \
+                    = get_child_by_name(dev_sxp, "v-dev")
+
+                scsis.append(scsi)
+
+        return scsis
 
     def mk_other_config(self, key, value, document):
         other_config = document.createElement("other_config")
diff -r 6583186e5989 -r 46d7e12c4c91 tools/xentrace/formats
--- a/tools/xentrace/formats    Wed Oct 22 11:38:22 2008 +0900
+++ b/tools/xentrace/formats    Wed Oct 22 11:46:55 2008 +0900
@@ -116,3 +116,7 @@ 0x0040f10e  CPU%(cpu)d  %(tsc)d (+%(relt
 0x0040f10e  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_resync_full     
   [ gfn = 0x%(1)16x ]
 0x0040f00f  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_resync_only     
   [ gfn = 0x%(1)08x ]
 0x0040f10f  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_resync_only     
   [ gfn = 0x%(1)16x ]
+
+0x00801001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  cpu_freq_change [ %(1)dMHz -> 
%(2)dMHz ]
+0x00802001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  cpu_idle_entry  [ C0 -> C%(1)d 
]
+0x00802002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  cpu_idle_exit   [ C%(1)d -> C0 
]
diff -r 6583186e5989 -r 46d7e12c4c91 
unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c
--- a/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c        Wed Oct 
22 11:38:22 2008 +0900
+++ b/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c        Wed Oct 
22 11:46:55 2008 +0900
@@ -10,12 +10,6 @@ struct ap_suspend_info {
        int      do_spin;
        atomic_t nr_spinning;
 };
-
-/*
- * Use a rwlock to protect the hypercall page from being executed in AP context
- * while the BSP is re-initializing it after restore.
- */
-static DEFINE_RWLOCK(suspend_lock);
 
 #ifdef CONFIG_SMP
 
@@ -33,12 +27,8 @@ static void ap_suspend(void *_info)
        atomic_inc(&info->nr_spinning);
        mb();
 
-       while (info->do_spin) {
+       while (info->do_spin)
                cpu_relax();
-               read_lock(&suspend_lock);
-               HYPERVISOR_yield();
-               read_unlock(&suspend_lock);
-       }
 
        mb();
        atomic_dec(&info->nr_spinning);
@@ -61,9 +51,7 @@ static int bp_suspend(void)
        suspend_cancelled = HYPERVISOR_suspend(0);
 
        if (!suspend_cancelled) {
-               write_lock(&suspend_lock);
                platform_pci_resume();
-               write_unlock(&suspend_lock);
                gnttab_resume();
                irq_resume();
        }
diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/ia64/vmx/vmmu.c
--- a/xen/arch/ia64/vmx/vmmu.c  Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/arch/ia64/vmx/vmmu.c  Wed Oct 22 11:46:55 2008 +0900
@@ -446,7 +446,7 @@ IA64FAULT vmx_vcpu_ptc_ga(VCPU *vcpu, u6
         do {
             cpu = v->processor;
             if (cpu != current->processor) {
-                spin_unlock_wait(&per_cpu(schedule_data, cpu).schedule_lock);
+                spin_barrier(&per_cpu(schedule_data, cpu).schedule_lock);
                 /* Flush VHPT on remote processors. */
                 smp_call_function_single(cpu, &ptc_ga_remote_func,
                                          &args, 0, 1);
diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/acpi/cpu_idle.c
--- a/xen/arch/x86/acpi/cpu_idle.c      Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/arch/x86/acpi/cpu_idle.c      Wed Oct 22 11:46:55 2008 +0900
@@ -40,6 +40,7 @@
 #include <xen/guest_access.h>
 #include <xen/keyhandler.h>
 #include <xen/cpuidle.h>
+#include <xen/trace.h>
 #include <asm/cache.h>
 #include <asm/io.h>
 #include <asm/hpet.h>
@@ -251,6 +252,9 @@ static void acpi_processor_idle(void)
     switch ( cx->type )
     {
     case ACPI_STATE_C1:
+        /* Trace cpu idle entry */
+        TRACE_1D(TRC_PM_IDLE_ENTRY, 1);
+
         /*
          * Invoke C1.
          * Use the appropriate idle routine, the one that would
@@ -261,6 +265,9 @@ static void acpi_processor_idle(void)
         else 
             acpi_safe_halt();
 
+        /* Trace cpu idle exit */
+        TRACE_1D(TRC_PM_IDLE_EXIT, 1);
+
         /*
          * TBD: Can't get time duration while in C1, as resumes
          *      go to an ISR rather than here.  Need to instrument
@@ -272,12 +279,16 @@ static void acpi_processor_idle(void)
     case ACPI_STATE_C2:
         if ( local_apic_timer_c2_ok )
         {
+            /* Trace cpu idle entry */
+            TRACE_1D(TRC_PM_IDLE_ENTRY, 2);
             /* Get start time (ticks) */
             t1 = inl(pmtmr_ioport);
             /* Invoke C2 */
             acpi_idle_do_entry(cx);
             /* Get end time (ticks) */
             t2 = inl(pmtmr_ioport);
+            /* Trace cpu idle exit */
+            TRACE_1D(TRC_PM_IDLE_EXIT, 2);
 
             /* Re-enable interrupts */
             local_irq_enable();
@@ -316,6 +327,8 @@ static void acpi_processor_idle(void)
             ACPI_FLUSH_CPU_CACHE();
         }
 
+        /* Trace cpu idle entry */
+        TRACE_1D(TRC_PM_IDLE_ENTRY, cx - &power->states[0]);
         /*
          * Before invoking C3, be aware that TSC/APIC timer may be 
          * stopped by H/W. Without carefully handling of TSC/APIC stop issues,
@@ -335,6 +348,8 @@ static void acpi_processor_idle(void)
 
         /* recovering TSC */
         cstate_restore_tsc();
+        /* Trace cpu idle exit */
+        TRACE_1D(TRC_PM_IDLE_EXIT, cx - &power->states[0]);
 
         if ( power->flags.bm_check && power->flags.bm_control )
         {
diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/cpu/amd.c
--- a/xen/arch/x86/cpu/amd.c    Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/arch/x86/cpu/amd.c    Wed Oct 22 11:46:55 2008 +0900
@@ -37,8 +37,8 @@ integer_param("cpuid_mask_ecx", opt_cpui
 integer_param("cpuid_mask_ecx", opt_cpuid_mask_ecx);
 integer_param("cpuid_mask_edx", opt_cpuid_mask_edx);
 static unsigned int opt_cpuid_mask_ext_ecx, opt_cpuid_mask_ext_edx;
-integer_param("cpuid_mask_ecx", opt_cpuid_mask_ext_ecx);
-integer_param("cpuid_mask_edx", opt_cpuid_mask_ext_edx);
+integer_param("cpuid_mask_ext_ecx", opt_cpuid_mask_ext_ecx);
+integer_param("cpuid_mask_ext_edx", opt_cpuid_mask_ext_edx);
 
 static inline void wrmsr_amd(unsigned int index, unsigned int lo, 
                unsigned int hi)
diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/arch/x86/domain.c     Wed Oct 22 11:46:55 2008 +0900
@@ -575,7 +575,10 @@ int arch_set_info_guest(
     v->arch.guest_context.user_regs.eflags |= 2;
 
     if ( is_hvm_vcpu(v) )
+    {
+        hvm_set_info_guest(v);
         goto out;
+    }
 
     /* Only CR0.TS is modifiable by guest or admin. */
     v->arch.guest_context.ctrlreg[0] &= X86_CR0_TS;
@@ -1252,10 +1255,10 @@ void context_switch(struct vcpu *prev, s
             flush_tlb_mask(next->vcpu_dirty_cpumask);
     }
 
-    local_irq_disable();
-
     if ( is_hvm_vcpu(prev) && !list_empty(&prev->arch.hvm_vcpu.tm_list) )
         pt_save_timer(prev);
+
+    local_irq_disable();
 
     set_current(next);
 
diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c     Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/arch/x86/domctl.c     Wed Oct 22 11:46:55 2008 +0900
@@ -1028,6 +1028,21 @@ long arch_do_domctl(
     }
     break;
 
+    case XEN_DOMCTL_suppress_spurious_page_faults:
+    {
+        struct domain *d;
+
+        ret = -ESRCH;
+        d = rcu_lock_domain_by_id(domctl->domain);
+        if ( d != NULL )
+        {
+            d->arch.suppress_spurious_page_faults = 1;
+            rcu_unlock_domain(d);
+            ret = 0;
+        }
+    }
+    break;
+
     default:
         ret = -ENOSYS;
         break;
diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/hvm/svm/emulate.c
--- a/xen/arch/x86/hvm/svm/emulate.c    Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/arch/x86/hvm/svm/emulate.c    Wed Oct 22 11:46:55 2008 +0900
@@ -61,6 +61,34 @@ static unsigned long svm_rip2pointer(str
     return p;
 }
 
+static unsigned long svm_nextrip_insn_length(struct vcpu *v)
+{
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+    if ( !cpu_has_svm_nrips || (vmcb->nextrip <= vmcb->rip) )
+        return 0;
+
+#ifndef NDEBUG
+    switch ( vmcb->exitcode )
+    {
+    case VMEXIT_CR0_READ... VMEXIT_DR15_WRITE:
+        /* faults due to instruction intercepts */
+        /* (exitcodes 84-95) are reserved */
+    case VMEXIT_IDTR_READ ... VMEXIT_TR_WRITE:
+    case VMEXIT_RDTSC ... VMEXIT_MSR:
+    case VMEXIT_VMRUN ...  VMEXIT_MWAIT_CONDITIONAL:
+        /* ...and the rest of the #VMEXITs */
+    case VMEXIT_CR0_SEL_WRITE:
+    case VMEXIT_EXCEPTION_BP:
+        break;
+    default:
+        BUG();
+    }
+#endif
+
+    return vmcb->nextrip - vmcb->rip;
+}
+
 /* First byte: Length. Following bytes: Opcode bytes. */
 #define MAKE_INSTR(nm, ...) static const u8 OPCODE_##nm[] = { __VA_ARGS__ }
 MAKE_INSTR(INVD,   2, 0x0f, 0x08);
@@ -118,6 +146,9 @@ int __get_instruction_length_from_list(s
     unsigned long fetch_addr;
     unsigned int fetch_len;
 
+    if ( (inst_len = svm_nextrip_insn_length(v)) != 0 )
+        return inst_len;
+
     /* Fetch up to the next page break; we'll fetch from the next page
      * later if we have to. */
     fetch_addr = svm_rip2pointer(v);
diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/hvm/svm/intr.c
--- a/xen/arch/x86/hvm/svm/intr.c       Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/arch/x86/hvm/svm/intr.c       Wed Oct 22 11:46:55 2008 +0900
@@ -100,61 +100,6 @@ static void enable_intr_window(struct vc
     vmcb->general1_intercepts |= GENERAL1_INTERCEPT_VINTR;
 }
 
-extern int vmsi_deliver(struct domain *d, int pirq);
-static int hvm_pci_msi_assert(struct domain *d, int pirq)
-{
-    return vmsi_deliver(d, pirq);
-}
-
-static void svm_dirq_assist(struct vcpu *v)
-{
-    unsigned int irq;
-    uint32_t device, intx;
-    struct domain *d = v->domain;
-    struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
-    struct dev_intx_gsi_link *digl;
-
-    if ( !iommu_enabled || (v->vcpu_id != 0) || (hvm_irq_dpci == NULL) )
-        return;
-
-    for ( irq = find_first_bit(hvm_irq_dpci->dirq_mask, NR_IRQS);
-          irq < NR_IRQS;
-          irq = find_next_bit(hvm_irq_dpci->dirq_mask, NR_IRQS, irq + 1) )
-    {
-        if ( !test_and_clear_bit(irq, &hvm_irq_dpci->dirq_mask) )
-            continue;
-
-        spin_lock(&d->event_lock);
-        if ( test_bit(_HVM_IRQ_DPCI_MSI, &hvm_irq_dpci->mirq[irq].flags) )
-        {
-            hvm_pci_msi_assert(d, irq);
-            spin_unlock(&d->event_lock);
-            continue;
-        }
-
-        stop_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, irq)]);
-
-        list_for_each_entry ( digl, &hvm_irq_dpci->mirq[irq].digl_list, list )
-        {
-            device = digl->device;
-            intx = digl->intx;
-            hvm_pci_intx_assert(d, device, intx);
-            hvm_irq_dpci->mirq[irq].pending++;
-        }
-
-        /*
-         * Set a timer to see if the guest can finish the interrupt or not. For
-         * example, the guest OS may unmask the PIC during boot, before the
-         * guest driver is loaded. hvm_pci_intx_assert() may succeed, but the
-         * guest will never deal with the irq, then the physical interrupt line
-         * will never be deasserted.
-         */
-        set_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, irq)],
-                  NOW() + PT_IRQ_TIME_OUT);
-        spin_unlock(&d->event_lock);
-    }
-}
-
 asmlinkage void svm_intr_assist(void) 
 {
     struct vcpu *v = current;
@@ -163,7 +108,7 @@ asmlinkage void svm_intr_assist(void)
 
     /* Crank the handle on interrupt state. */
     pt_update_irq(v);
-    svm_dirq_assist(v);
+    hvm_dirq_assist(v);
 
     do {
         intack = hvm_vcpu_has_pending_irq(v);
diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/hvm/viridian.c
--- a/xen/arch/x86/hvm/viridian.c       Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/arch/x86/hvm/viridian.c       Wed Oct 22 11:46:55 2008 +0900
@@ -244,7 +244,6 @@ int rdmsr_viridian_regs(uint32_t idx, ui
 
 int viridian_hypercall(struct cpu_user_regs *regs)
 {
-    struct domain *d = current->domain;
     int mode = hvm_guest_x86_mode(current);
     unsigned long input_params_gpa, output_params_gpa;
     uint16_t status = HV_STATUS_SUCCESS;
@@ -271,7 +270,7 @@ int viridian_hypercall(struct cpu_user_r
         };
     } output = { 0 };
 
-    ASSERT(is_viridian_domain(d));
+    ASSERT(is_viridian_domain(current->domain));
 
     switch ( mode )
     {
diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/hvm/vmx/intr.c
--- a/xen/arch/x86/hvm/vmx/intr.c       Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/arch/x86/hvm/vmx/intr.c       Wed Oct 22 11:46:55 2008 +0900
@@ -103,61 +103,6 @@ static void enable_intr_window(struct vc
     }
 }
 
-extern int vmsi_deliver(struct domain *d, int pirq);
-static int hvm_pci_msi_assert(struct domain *d, int pirq)
-{
-    return vmsi_deliver(d, pirq);
-}
-
-static void vmx_dirq_assist(struct vcpu *v)
-{
-    unsigned int irq;
-    uint32_t device, intx;
-    struct domain *d = v->domain;
-    struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
-    struct dev_intx_gsi_link *digl;
-
-    if ( !iommu_enabled || (v->vcpu_id != 0) || (hvm_irq_dpci == NULL) )
-        return;
-
-    for ( irq = find_first_bit(hvm_irq_dpci->dirq_mask, NR_IRQS);
-          irq < NR_IRQS;
-          irq = find_next_bit(hvm_irq_dpci->dirq_mask, NR_IRQS, irq + 1) )
-    {
-        if ( !test_and_clear_bit(irq, &hvm_irq_dpci->dirq_mask) )
-            continue;
-
-        spin_lock(&d->event_lock);
-        if ( test_bit(_HVM_IRQ_DPCI_MSI, &hvm_irq_dpci->mirq[irq].flags) )
-        {
-            hvm_pci_msi_assert(d, irq);
-            spin_unlock(&d->event_lock);
-            continue;
-        }
-
-        stop_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, irq)]);
-
-        list_for_each_entry ( digl, &hvm_irq_dpci->mirq[irq].digl_list, list )
-        {
-            device = digl->device;
-            intx = digl->intx;
-            hvm_pci_intx_assert(d, device, intx);
-            hvm_irq_dpci->mirq[irq].pending++;
-        }
-
-        /*
-         * Set a timer to see if the guest can finish the interrupt or not. For
-         * example, the guest OS may unmask the PIC during boot, before the
-         * guest driver is loaded. hvm_pci_intx_assert() may succeed, but the
-         * guest will never deal with the irq, then the physical interrupt line
-         * will never be deasserted.
-         */
-        set_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, irq)],
-                  NOW() + PT_IRQ_TIME_OUT);
-        spin_unlock(&d->event_lock);
-    }
-}
-
 asmlinkage void vmx_intr_assist(void)
 {
     struct hvm_intack intack;
@@ -167,7 +112,7 @@ asmlinkage void vmx_intr_assist(void)
 
     /* Crank the handle on interrupt state. */
     pt_update_irq(v);
-    vmx_dirq_assist(v);
+    hvm_dirq_assist(v);
 
     do {
         intack = hvm_vcpu_has_pending_irq(v);
diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Wed Oct 22 11:46:55 2008 +0900
@@ -1184,6 +1184,13 @@ static void vmx_set_uc_mode(struct vcpu 
     vpid_sync_all();
 }
 
+static void vmx_set_info_guest(struct vcpu *v)
+{
+    vmx_vmcs_enter(v);
+    __vmwrite(GUEST_DR7, v->arch.guest_context.debugreg[7]);
+    vmx_vmcs_exit(v);
+}
+
 static struct hvm_function_table vmx_function_table = {
     .name                 = "VMX",
     .domain_initialise    = vmx_domain_initialise,
@@ -1214,7 +1221,8 @@ static struct hvm_function_table vmx_fun
     .msr_read_intercept   = vmx_msr_read_intercept,
     .msr_write_intercept  = vmx_msr_write_intercept,
     .invlpg_intercept     = vmx_invlpg_intercept,
-    .set_uc_mode          = vmx_set_uc_mode
+    .set_uc_mode          = vmx_set_uc_mode,
+    .set_info_guest       = vmx_set_info_guest
 };
 
 static unsigned long *vpid_bitmap;
@@ -2048,8 +2056,12 @@ asmlinkage void vmx_vmexit_handler(struc
 
     perfc_incra(vmexits, exit_reason);
 
-    if ( exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT )
-        local_irq_enable();
+    /* Handle the interrupt we missed before allowing any more in. */
+    if ( exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT )
+        vmx_do_extint(regs);
+
+    /* Now enable interrupts so it's safe to take locks. */
+    local_irq_enable();
 
     if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) )
         return vmx_failed_vmentry(exit_reason, regs);
@@ -2177,7 +2189,7 @@ asmlinkage void vmx_vmexit_handler(struc
         break;
     }
     case EXIT_REASON_EXTERNAL_INTERRUPT:
-        vmx_do_extint(regs);
+        /* Already handled above. */
         break;
     case EXIT_REASON_TRIPLE_FAULT:
         hvm_triple_fault();
diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c        Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/arch/x86/irq.c        Wed Oct 22 11:46:55 2008 +0900
@@ -510,7 +510,7 @@ int pirq_guest_bind(struct vcpu *v, int 
 {
     unsigned int        vector;
     irq_desc_t         *desc;
-    irq_guest_action_t *action;
+    irq_guest_action_t *action, *newaction = NULL;
     int                 rc = 0;
     cpumask_t           cpumask = CPU_MASK_NONE;
 
@@ -520,7 +520,10 @@ int pirq_guest_bind(struct vcpu *v, int 
  retry:
     desc = domain_spin_lock_irq_desc(v->domain, irq, NULL);
     if ( desc == NULL )
-        return -EINVAL;
+    {
+        rc = -EINVAL;
+        goto out;
+    }
 
     action = (irq_guest_action_t *)desc->action;
     vector = desc - irq_desc;
@@ -533,18 +536,24 @@ int pirq_guest_bind(struct vcpu *v, int 
                     "Cannot bind IRQ %d to guest. In use by '%s'.\n",
                     irq, desc->action->name);
             rc = -EBUSY;
-            goto out;
+            goto unlock_out;
         }
 
-        action = xmalloc(irq_guest_action_t);
-        if ( (desc->action = (struct irqaction *)action) == NULL )
+        if ( newaction == NULL )
         {
+            spin_unlock_irq(&desc->lock);
+            if ( (newaction = xmalloc(irq_guest_action_t)) != NULL )
+                goto retry;
             gdprintk(XENLOG_INFO,
-                    "Cannot bind IRQ %d to guest. Out of memory.\n",
-                    irq);
+                     "Cannot bind IRQ %d to guest. Out of memory.\n",
+                     irq);
             rc = -ENOMEM;
             goto out;
         }
+
+        action = newaction;
+        desc->action = (struct irqaction *)action;
+        newaction = NULL;
 
         action->nr_guests   = 0;
         action->in_flight   = 0;
@@ -568,7 +577,7 @@ int pirq_guest_bind(struct vcpu *v, int 
                "Will not share with others.\n",
                 irq);
         rc = -EBUSY;
-        goto out;
+        goto unlock_out;
     }
     else if ( action->nr_guests == 0 )
     {
@@ -588,17 +597,21 @@ int pirq_guest_bind(struct vcpu *v, int 
         gdprintk(XENLOG_INFO, "Cannot bind IRQ %d to guest. "
                "Already at max share.\n", irq);
         rc = -EBUSY;
-        goto out;
+        goto unlock_out;
     }
 
     action->guest[action->nr_guests++] = v->domain;
 
+ unlock_out:
+    spin_unlock_irq(&desc->lock);
  out:
-    spin_unlock_irq(&desc->lock);
+    if ( newaction != NULL )
+        xfree(newaction);
     return rc;
 }
 
-static void __pirq_guest_unbind(struct domain *d, int irq, irq_desc_t *desc)
+static irq_guest_action_t *__pirq_guest_unbind(
+    struct domain *d, int irq, irq_desc_t *desc)
 {
     unsigned int        vector;
     irq_guest_action_t *action;
@@ -644,7 +657,7 @@ static void __pirq_guest_unbind(struct d
     BUG_ON(test_bit(irq, d->pirq_mask));
 
     if ( action->nr_guests != 0 )
-        return;
+        return NULL;
 
     BUG_ON(action->in_flight != 0);
 
@@ -672,15 +685,18 @@ static void __pirq_guest_unbind(struct d
     BUG_ON(!cpus_empty(action->cpu_eoi_map));
 
     desc->action = NULL;
-    xfree(action);
     desc->status &= ~IRQ_GUEST;
     desc->status &= ~IRQ_INPROGRESS;
     kill_timer(&irq_guest_eoi_timer[vector]);
     desc->handler->shutdown(vector);
+
+    /* Caller frees the old guest descriptor block. */
+    return action;
 }
 
 void pirq_guest_unbind(struct domain *d, int irq)
 {
+    irq_guest_action_t *oldaction = NULL;
     irq_desc_t *desc;
     int vector;
 
@@ -699,16 +715,19 @@ void pirq_guest_unbind(struct domain *d,
     }
     else
     {
-        __pirq_guest_unbind(d, irq, desc);
+        oldaction = __pirq_guest_unbind(d, irq, desc);
     }
 
     spin_unlock_irq(&desc->lock);
+
+    if ( oldaction != NULL )
+        xfree(oldaction);
 }
 
 int pirq_guest_force_unbind(struct domain *d, int irq)
 {
     irq_desc_t *desc;
-    irq_guest_action_t *action;
+    irq_guest_action_t *action, *oldaction = NULL;
     int i, bound = 0;
 
     WARN_ON(!spin_is_locked(&d->event_lock));
@@ -727,10 +746,14 @@ int pirq_guest_force_unbind(struct domai
         goto out;
 
     bound = 1;
-    __pirq_guest_unbind(d, irq, desc);
+    oldaction = __pirq_guest_unbind(d, irq, desc);
 
  out:
     spin_unlock_irq(&desc->lock);
+
+    if ( oldaction != NULL )
+        xfree(oldaction);
+
     return bound;
 }
 
diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/mm/hap/hap.c
--- a/xen/arch/x86/mm/hap/hap.c Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/arch/x86/mm/hap/hap.c Wed Oct 22 11:46:55 2008 +0900
@@ -639,9 +639,16 @@ hap_write_p2m_entry(struct vcpu *v, unsi
 hap_write_p2m_entry(struct vcpu *v, unsigned long gfn, l1_pgentry_t *p,
                     mfn_t table_mfn, l1_pgentry_t new, unsigned int level)
 {
+    uint32_t old_flags;
+
     hap_lock(v->domain);
 
+    old_flags = l1e_get_flags(*p);
     safe_write_pte(p, new);
+    if ( (old_flags & _PAGE_PRESENT)
+         && (level == 1 || (level == 2 && (old_flags & _PAGE_PSE))) )
+             flush_tlb_mask(v->domain->domain_dirty_cpumask);
+
 #if CONFIG_PAGING_LEVELS == 3
     /* install P2M in monitor table for PAE Xen */
     if ( level == 3 )
diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h  Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/arch/x86/mm/shadow/private.h  Wed Oct 22 11:46:55 2008 +0900
@@ -227,32 +227,40 @@ struct shadow_page_info
 struct shadow_page_info
 {
     union {
-        /* When in use, guest page we're a shadow of */
-        unsigned long backpointer;
-        /* When free, order of the freelist we're on */
-        unsigned int order;
-    };
-    union {
-        /* When in use, next shadow in this hash chain */
-        struct shadow_page_info *next_shadow;
-        /* When free, TLB flush time when freed */
-        u32 tlbflush_timestamp;
-    };
-    struct {
-        unsigned int type:5;      /* What kind of shadow is this? */
-        unsigned int pinned:1;    /* Is the shadow pinned? */
-        unsigned int count:26;    /* Reference count */
-        u32 mbz;                  /* Must be zero: this is where the owner 
-                                   * field lives in a non-shadow page */
-    } __attribute__((packed));
-    union {
-        /* For unused shadow pages, a list of pages of this order; 
-         * for pinnable shadows, if pinned, a list of other pinned shadows
-         * (see sh_type_is_pinnable() below for the definition of 
-         * "pinnable" shadow types). */
-        struct list_head list;
-        /* For non-pinnable shadows, a higher entry that points at us */
-        paddr_t up;
+        /* Ensures that shadow_page_info is same size as page_info. */
+        struct page_info page_info;
+
+        struct {
+            union {
+                /* When in use, guest page we're a shadow of */
+                unsigned long backpointer;
+                /* When free, order of the freelist we're on */
+                unsigned int order;
+            };
+            union {
+                /* When in use, next shadow in this hash chain */
+                struct shadow_page_info *next_shadow;
+                /* When free, TLB flush time when freed */
+                u32 tlbflush_timestamp;
+            };
+            struct {
+                unsigned int type:5;   /* What kind of shadow is this? */
+                unsigned int pinned:1; /* Is the shadow pinned? */
+                unsigned int count:26; /* Reference count */
+                u32 mbz;               /* Must be zero: this is where the
+                                        * owner field lives in page_info */
+            } __attribute__((packed));
+            union {
+                /* For unused shadow pages, a list of pages of this order; for 
+                 * pinnable shadows, if pinned, a list of other pinned shadows
+                 * (see sh_type_is_pinnable() below for the definition of 
+                 * "pinnable" shadow types). */
+                struct list_head list;
+                /* For non-pinnable shadows, a higher entry that points
+                 * at us. */
+                paddr_t up;
+            };
+        };
     };
 };
 
@@ -261,7 +269,8 @@ struct shadow_page_info
  * Also, the mbz field must line up with the owner field of normal 
  * pages, so they look properly like anonymous/xen pages. */
 static inline void shadow_check_page_struct_offsets(void) {
-    BUILD_BUG_ON(sizeof (struct shadow_page_info) > sizeof (struct page_info));
+    BUILD_BUG_ON(sizeof (struct shadow_page_info) !=
+                 sizeof (struct page_info));
     BUILD_BUG_ON(offsetof(struct shadow_page_info, mbz) !=
                  offsetof(struct page_info, u.inuse._domain));
 };
diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/msi.c
--- a/xen/arch/x86/msi.c        Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/arch/x86/msi.c        Wed Oct 22 11:46:55 2008 +0900
@@ -364,6 +364,7 @@ static struct msi_desc* alloc_msi_entry(
 
     INIT_LIST_HEAD(&entry->list);
     entry->dev = NULL;
+    entry->remap_index = -1;
 
     return entry;
 }
diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/nmi.c
--- a/xen/arch/x86/nmi.c        Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/arch/x86/nmi.c        Wed Oct 22 11:46:55 2008 +0900
@@ -72,8 +72,8 @@ int nmi_active;
 #define P6_EVNTSEL_INT         (1 << 20)
 #define P6_EVNTSEL_OS          (1 << 17)
 #define P6_EVNTSEL_USR         (1 << 16)
-#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
-#define P6_NMI_EVENT           P6_EVENT_CPU_CLOCKS_NOT_HALTED
+#define P6_EVENT_CPU_CLOCKS_NOT_HALTED  0x79
+#define CORE_EVENT_CPU_CLOCKS_NOT_HALTED 0x3c
 
 #define P4_ESCR_EVENT_SELECT(N)        ((N)<<25)
 #define P4_CCCR_OVF_PMI0       (1<<26)
@@ -122,10 +122,17 @@ int __init check_nmi_watchdog (void)
 
     printk("\n");
 
-    /* now that we know it works we can reduce NMI frequency to
-       something more reasonable; makes a difference in some configs */
+    /*
+     * Now that we know it works we can reduce NMI frequency to
+     * something more reasonable; makes a difference in some configs.
+     * There's a limit to how slow we can go because writing the perfctr
+     * MSRs only sets the low 32 bits, with the top 8 bits sign-extended
+     * from those, so it's not possible to set up a delay larger than
+     * 2^31 cycles and smaller than (2^40 - 2^31) cycles. 
+     * (Intel SDM, section 18.22.2)
+     */
     if ( nmi_watchdog == NMI_LOCAL_APIC )
-        nmi_hz = 1;
+        nmi_hz = max(1ul, cpu_khz >> 20);
 
     return 0;
 }
@@ -248,7 +255,7 @@ static void __pminit setup_k7_watchdog(v
     wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
 }
 
-static void __pminit setup_p6_watchdog(void)
+static void __pminit setup_p6_watchdog(unsigned counter)
 {
     unsigned int evntsel;
 
@@ -260,7 +267,7 @@ static void __pminit setup_p6_watchdog(v
     evntsel = P6_EVNTSEL_INT
         | P6_EVNTSEL_OS
         | P6_EVNTSEL_USR
-        | P6_NMI_EVENT;
+        | counter;
 
     wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
     write_watchdog_counter("P6_PERFCTR0");
@@ -326,7 +333,9 @@ void __pminit setup_apic_nmi_watchdog(vo
     case X86_VENDOR_INTEL:
         switch (boot_cpu_data.x86) {
         case 6:
-            setup_p6_watchdog();
+            setup_p6_watchdog((boot_cpu_data.x86_model < 14) 
+                              ? P6_EVENT_CPU_CLOCKS_NOT_HALTED
+                              : CORE_EVENT_CPU_CLOCKS_NOT_HALTED);
             break;
         case 15:
             if (!setup_p4_watchdog())
diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/platform_hypercall.c
--- a/xen/arch/x86/platform_hypercall.c Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/arch/x86/platform_hypercall.c Wed Oct 22 11:46:55 2008 +0900
@@ -53,15 +53,6 @@ static long cpu_frequency_change_helper(
     return cpu_frequency_change(this_cpu(freq));
 }
 
-int xenpf_copy_px_states(struct processor_performance *pxpt,
-        struct xen_processor_performance *dom0_px_info)
-{
-    if (!pxpt || !dom0_px_info)
-        return -EINVAL;
-    return  copy_from_compat(pxpt->states, dom0_px_info->states, 
-                    dom0_px_info->state_count);
-}
-
 ret_t do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op)
 {
     ret_t ret = 0;
@@ -372,12 +363,13 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
         switch ( op->u.set_pminfo.type )
         {
         case XEN_PM_PX:
-        {
-
-            ret = set_px_pminfo(op->u.set_pminfo.id,
-                                &op->u.set_pminfo.perf);
-            break;
-        }
+            if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) )
+            {
+                ret = -ENOSYS;
+                break;
+            }
+            ret = set_px_pminfo(op->u.set_pminfo.id, &op->u.set_pminfo.perf);
+            break;
  
         case XEN_PM_CX:
             if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_CX) )
diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c    Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/arch/x86/smpboot.c    Wed Oct 22 11:46:55 2008 +0900
@@ -473,13 +473,6 @@ static void construct_percpu_idt(unsigne
 {
        unsigned char idt_load[10];
 
-       /* If IDT table exists since last hotplug, reuse it */
-       if (!idt_tables[cpu]) {
-               idt_tables[cpu] = xmalloc_array(idt_entry_t, IDT_ENTRIES);
-               memcpy(idt_tables[cpu], idt_table,
-                               IDT_ENTRIES*sizeof(idt_entry_t));
-       }
-
        *(unsigned short *)(&idt_load[0]) = (IDT_ENTRIES*sizeof(idt_entry_t))-1;
        *(unsigned long  *)(&idt_load[2]) = (unsigned long)idt_tables[cpu];
        __asm__ __volatile__ ( "lidt %0" : "=m" (idt_load) );
@@ -908,6 +901,12 @@ static int __devinit do_boot_cpu(int api
        }
 #endif
 
+       if (!idt_tables[cpu]) {
+               idt_tables[cpu] = xmalloc_array(idt_entry_t, IDT_ENTRIES);
+               memcpy(idt_tables[cpu], idt_table,
+                      IDT_ENTRIES*sizeof(idt_entry_t));
+       }
+
        /*
         * This grunge runs the startup process for
         * the targeted processor.
diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/arch/x86/traps.c      Wed Oct 22 11:46:55 2008 +0900
@@ -710,7 +710,7 @@ static void pv_cpuid(struct cpu_user_reg
     if ( current->domain->domain_id != 0 )
     {
         if ( !cpuid_hypervisor_leaves(a, &a, &b, &c, &d) )
-            domain_cpuid(current->domain, a, b, &a, &b, &c, &d);
+            domain_cpuid(current->domain, a, c, &a, &b, &c, &d);
         goto out;
     }
 
@@ -1241,6 +1241,10 @@ asmlinkage void do_page_fault(struct cpu
               "Faulting linear address: %p\n",
               regs->error_code, _p(addr));
     }
+
+    if ( unlikely(current->domain->arch.suppress_spurious_page_faults
+                  && spurious_page_fault(addr, regs)) )
+        return;
 
     propagate_page_fault(addr, regs->error_code);
 }
diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/x86_32/xen.lds.S
--- a/xen/arch/x86/x86_32/xen.lds.S     Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/arch/x86/x86_32/xen.lds.S     Wed Oct 22 11:46:55 2008 +0900
@@ -26,7 +26,6 @@ SECTIONS
        *(.fixup)
        *(.gnu.warning)
        } :text =0x9090
-  .text.lock : { *(.text.lock) } :text /* out-of-line lock text */
 
   _etext = .;                  /* End of text section */
 
diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/x86_64/Makefile
--- a/xen/arch/x86/x86_64/Makefile      Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/arch/x86/x86_64/Makefile      Wed Oct 22 11:46:55 2008 +0900
@@ -13,6 +13,7 @@ obj-$(CONFIG_COMPAT) += physdev.o
 obj-$(CONFIG_COMPAT) += physdev.o
 obj-$(CONFIG_COMPAT) += platform_hypercall.o
 obj-$(CONFIG_COMPAT) += cpu_idle.o
+obj-$(CONFIG_COMPAT) += cpufreq.o
 
 ifeq ($(CONFIG_COMPAT),y)
 # extra dependencies
@@ -24,4 +25,5 @@ sysctl.o:     ../sysctl.c
 sysctl.o:      ../sysctl.c
 traps.o:       compat/traps.c
 cpu_idle.o:    ../acpi/cpu_idle.c
+cpufreq.o:     ../../../drivers/cpufreq/cpufreq.c
 endif
diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/x86_64/cpu_idle.c
--- a/xen/arch/x86/x86_64/cpu_idle.c    Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/arch/x86/x86_64/cpu_idle.c    Wed Oct 22 11:46:55 2008 +0900
@@ -44,7 +44,7 @@ DEFINE_XEN_GUEST_HANDLE(compat_processor
     xlat_page_current = xlat_page_start; \
 } while (0)
 
-static void *xlat_malloc(unsigned long *xlat_page_current, size_t size)
+void *xlat_malloc(unsigned long *xlat_page_current, size_t size)
 {
     void *ret;
 
diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/x86_64/cpufreq.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/x86_64/cpufreq.c     Wed Oct 22 11:46:55 2008 +0900
@@ -0,0 +1,91 @@
+/******************************************************************************
+ * cpufreq.c -- adapt 32b compat guest to 64b hypervisor.
+ *
+ *  Copyright (C) 2008, Liu Jinsong <jinsong.liu@xxxxxxxxx>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or (at
+ *  your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/xmalloc.h>
+#include <xen/guest_access.h>
+#include <compat/platform.h>
+
+DEFINE_XEN_GUEST_HANDLE(compat_processor_px_t);
+
+#define xlat_page_start ((unsigned long)COMPAT_ARG_XLAT_VIRT_BASE)
+
+#define xlat_malloc_init(xlat_page_current)    do { \
+    xlat_page_current = xlat_page_start; \
+} while (0)
+
+extern void *xlat_malloc(unsigned long *xlat_page_current, size_t size);
+
+#define xlat_malloc_array(_p, _t, _c) ((_t *) xlat_malloc(&_p, sizeof(_t) * 
_c))
+
+extern int 
+set_px_pminfo(uint32_t cpu, struct xen_processor_performance *perf);
+
+int 
+compat_set_px_pminfo(uint32_t cpu, struct compat_processor_performance *perf)
+{
+    struct xen_processor_performance *xen_perf;
+    unsigned long xlat_page_current;
+
+    xlat_malloc_init(xlat_page_current);
+
+    xen_perf = xlat_malloc_array(xlat_page_current,
+                                  struct xen_processor_performance, 1);
+    if ( unlikely(xen_perf == NULL) )
+       return -EFAULT;
+
+#define XLAT_processor_performance_HNDL_states(_d_, _s_) do { \
+    xen_processor_px_t *xen_states = NULL; \
+\
+    if ( likely((_s_)->state_count > 0) ) \
+    { \
+        XEN_GUEST_HANDLE(compat_processor_px_t) states; \
+        compat_processor_px_t state; \
+        int i; \
+\
+        xen_states = xlat_malloc_array(xlat_page_current, \
+                               xen_processor_px_t, (_s_)->state_count); \
+        if ( unlikely(xen_states == NULL) ) \
+            return -EFAULT; \
+\
+        if ( unlikely(!compat_handle_okay((_s_)->states, \
+                                (_s_)->state_count)) ) \
+            return -EFAULT; \
+        guest_from_compat_handle(states, (_s_)->states); \
+\
+        for ( i = 0; i < _s_->state_count; i++ ) \
+        { \
+           if ( unlikely(copy_from_guest_offset(&state, states, i, 1)) ) \
+               return -EFAULT; \
+           XLAT_processor_px(&xen_states[i], &state); \
+        } \
+    } \
+\
+    set_xen_guest_handle((_d_)->states, xen_states); \
+} while (0)
+    XLAT_processor_performance(xen_perf, perf);
+#undef XLAT_processor_performance_HNDL_states
+
+    return set_px_pminfo(cpu, xen_perf);
+}
diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/x86_64/mm.c
--- a/xen/arch/x86/x86_64/mm.c  Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/arch/x86/x86_64/mm.c  Wed Oct 22 11:46:55 2008 +0900
@@ -252,8 +252,6 @@ void __init subarch_init_memory(void)
     BUILD_BUG_ON(offsetof(struct page_info, u.inuse._domain) != 
                  (offsetof(struct page_info, count_info) + sizeof(u32)));
     BUILD_BUG_ON((offsetof(struct page_info, count_info) & 7) != 0);
-    BUILD_BUG_ON(sizeof(struct page_info) !=
-                 (32 + BITS_TO_LONGS(NR_CPUS)*sizeof(long)));
 
     /* M2P table is mappable read-only by privileged domains. */
     for ( v  = RDWR_MPT_VIRT_START;
diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/x86_64/platform_hypercall.c
--- a/xen/arch/x86/x86_64/platform_hypercall.c  Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/arch/x86/x86_64/platform_hypercall.c  Wed Oct 22 11:46:55 2008 +0900
@@ -11,13 +11,13 @@ DEFINE_XEN_GUEST_HANDLE(compat_platform_
 #define xen_platform_op_t   compat_platform_op_t
 #define do_platform_op(x)   compat_platform_op(_##x)
 
-#define xenpf_copy_px_states compat_xenpf_copy_px_states
-
 #define xen_processor_px    compat_processor_px
 #define xen_processor_px_t  compat_processor_px_t
 #define xen_processor_performance    compat_processor_performance
 #define xen_processor_performance_t  compat_processor_performance_t
 #define xenpf_set_processor_pminfo   compat_pf_set_processor_pminfo
+
+#define set_px_pminfo          compat_set_px_pminfo
 
 #define xen_processor_power     compat_processor_power
 #define xen_processor_power_t   compat_processor_power_t
diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/x86_64/xen.lds.S
--- a/xen/arch/x86/x86_64/xen.lds.S     Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/arch/x86/x86_64/xen.lds.S     Wed Oct 22 11:46:55 2008 +0900
@@ -24,7 +24,6 @@ SECTIONS
        *(.fixup)
        *(.gnu.warning)
        } :text = 0x9090
-  .text.lock : { *(.text.lock) } :text /* out-of-line lock text */
 
   _etext = .;                  /* End of text section */
 
diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/x86_emulate/x86_emulate.c
--- a/xen/arch/x86/x86_emulate/x86_emulate.c    Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c    Wed Oct 22 11:46:55 2008 +0900
@@ -236,7 +236,8 @@ static uint8_t twobyte_table[256] = {
     DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
     ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov,
     /* 0xC0 - 0xC7 */
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, 0, 0,
+    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+    0, DstMem|SrcReg|ModRM|Mov,
     0, 0, 0, ImplicitOps|ModRM,
     /* 0xC8 - 0xCF */
     ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
@@ -3910,6 +3911,12 @@ x86_emulate(
         }
         goto add;
 
+    case 0xc3: /* movnti */
+        /* Ignore the non-temporal hint for now. */
+        generate_exception_if(dst.bytes <= 2, EXC_UD, -1);
+        dst.val = src.val;
+        break;
+
     case 0xc7: /* Grp9 (cmpxchg8b/cmpxchg16b) */ {
         unsigned long old[2], exp[2], new[2];
         unsigned int i;
diff -r 6583186e5989 -r 46d7e12c4c91 xen/common/Makefile
--- a/xen/common/Makefile       Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/common/Makefile       Wed Oct 22 11:46:55 2008 +0900
@@ -16,6 +16,7 @@ obj-y += schedule.o
 obj-y += schedule.o
 obj-y += shutdown.o
 obj-y += softirq.o
+obj-y += spinlock.o
 obj-y += stop_machine.o
 obj-y += string.o
 obj-y += symbols.o
@@ -25,7 +26,7 @@ obj-y += trace.o
 obj-y += trace.o
 obj-y += version.o
 obj-y += vsprintf.o
-obj-y += xmalloc.o
+obj-y += xmalloc_tlsf.o
 obj-y += rcupdate.o
 
 obj-$(perfc)       += perfc.o
diff -r 6583186e5989 -r 46d7e12c4c91 xen/common/kernel.c
--- a/xen/common/kernel.c       Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/common/kernel.c       Wed Oct 22 11:46:55 2008 +0900
@@ -75,8 +75,7 @@ void cmdline_parse(char *cmdline)
                 strlcpy(param->var, optval, param->len);
                 break;
             case OPT_UINT:
-                *(unsigned int *)param->var =
-                    simple_strtol(optval, (const char **)&optval, 0);
+                *(unsigned int *)param->var = simple_strtol(optval, NULL, 0);
                 break;
             case OPT_BOOL:
             case OPT_INVBOOL:
diff -r 6583186e5989 -r 46d7e12c4c91 xen/common/schedule.c
--- a/xen/common/schedule.c     Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/common/schedule.c     Wed Oct 22 11:46:55 2008 +0900
@@ -455,6 +455,10 @@ static long do_poll(struct sched_poll *s
         goto out;
 #endif
 
+    rc = 0;
+    if ( local_events_need_delivery() )
+        goto out;
+
     for ( i = 0; i < sched_poll->nr_ports; i++ )
     {
         rc = -EFAULT;
diff -r 6583186e5989 -r 46d7e12c4c91 xen/common/spinlock.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/common/spinlock.c     Wed Oct 22 11:46:55 2008 +0900
@@ -0,0 +1,154 @@
+#include <xen/config.h>
+#include <xen/smp.h>
+#include <xen/spinlock.h>
+
+void _spin_lock(spinlock_t *lock)
+{
+    _raw_spin_lock(&lock->raw);
+}
+
+void _spin_lock_irq(spinlock_t *lock)
+{
+    local_irq_disable();
+    _raw_spin_lock(&lock->raw);
+}
+
+unsigned long _spin_lock_irqsave(spinlock_t *lock)
+{
+    unsigned long flags;
+    local_irq_save(flags);
+    _raw_spin_lock(&lock->raw);
+    return flags;
+}
+
+void _spin_unlock(spinlock_t *lock)
+{
+    _raw_spin_unlock(&lock->raw);
+}
+
+void _spin_unlock_irq(spinlock_t *lock)
+{
+    _raw_spin_unlock(&lock->raw);
+    local_irq_enable();
+}
+
+void _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
+{
+    _raw_spin_unlock(&lock->raw);
+    local_irq_restore(flags);
+}
+
+int _spin_is_locked(spinlock_t *lock)
+{
+    return _raw_spin_is_locked(&lock->raw);
+}
+
+int _spin_trylock(spinlock_t *lock)
+{
+    return _raw_spin_trylock(&lock->raw);
+}
+
+void _spin_barrier(spinlock_t *lock)
+{
+    do { mb(); } while ( _raw_spin_is_locked(&lock->raw) );
+    mb();
+}
+
+void _spin_lock_recursive(spinlock_t *lock)
+{
+    int cpu = smp_processor_id();
+
+    /* Don't allow overflow of recurse_cpu field. */
+    BUILD_BUG_ON(NR_CPUS > 0xfffu);
+
+    if ( likely(lock->recurse_cpu != cpu) )
+    {
+        spin_lock(lock);
+        lock->recurse_cpu = cpu;
+    }
+
+    /* We support only fairly shallow recursion, else the counter overflows. */
+    ASSERT(lock->recurse_cnt < 0xfu);
+    lock->recurse_cnt++;
+}
+
+void _spin_unlock_recursive(spinlock_t *lock)
+{
+    if ( likely(--lock->recurse_cnt == 0) )
+    {
+        lock->recurse_cpu = 0xfffu;
+        spin_unlock(lock);
+    }
+}
+
+void _read_lock(rwlock_t *lock)
+{
+    _raw_read_lock(&lock->raw);
+}
+
+void _read_lock_irq(rwlock_t *lock)
+{
+    local_irq_disable();
+    _raw_read_lock(&lock->raw);
+}
+
+unsigned long _read_lock_irqsave(rwlock_t *lock)
+{
+    unsigned long flags;
+    local_irq_save(flags);
+    _raw_read_lock(&lock->raw);
+    return flags;
+}
+
+void _read_unlock(rwlock_t *lock)
+{
+    _raw_read_unlock(&lock->raw);
+}
+
+void _read_unlock_irq(rwlock_t *lock)
+{
+    _raw_read_unlock(&lock->raw);
+    local_irq_enable();
+}
+
+void _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
+{
+    _raw_read_unlock(&lock->raw);
+    local_irq_restore(flags);
+}
+
+void _write_lock(rwlock_t *lock)
+{
+    _raw_write_lock(&lock->raw);
+}
+
+void _write_lock_irq(rwlock_t *lock)
+{
+    local_irq_disable();
+    _raw_write_lock(&lock->raw);
+}
+
+unsigned long _write_lock_irqsave(rwlock_t *lock)
+{
+    unsigned long flags;
+    local_irq_save(flags);
+    _raw_write_lock(&lock->raw);
+    return flags;
+}
+
+void _write_unlock(rwlock_t *lock)
+{
+    _raw_write_unlock(&lock->raw);
+}
+
+void _write_unlock_irq(rwlock_t *lock)
+{
+    _raw_write_unlock(&lock->raw);
+    local_irq_enable();
+}
+
+void _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
+{
+    _raw_write_unlock(&lock->raw);
+    local_irq_restore(flags);
+}
diff -r 6583186e5989 -r 46d7e12c4c91 xen/common/timer.c
--- a/xen/common/timer.c        Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/common/timer.c        Wed Oct 22 11:46:55 2008 +0900
@@ -114,34 +114,19 @@ static int remove_from_heap(struct timer
 
 
 /* Add new entry @t to @heap. Return TRUE if new top of heap. */
-static int add_to_heap(struct timer ***pheap, struct timer *t)
-{
-    struct timer **heap = *pheap;
+static int add_to_heap(struct timer **heap, struct timer *t)
+{
     int sz = GET_HEAP_SIZE(heap);
 
-    /* Copy the heap if it is full. */
+    /* Fail if the heap is full. */
     if ( unlikely(sz == GET_HEAP_LIMIT(heap)) )
-    {
-        /* old_limit == (2^n)-1; new_limit == (2^(n+4))-1 */
-        int old_limit = GET_HEAP_LIMIT(heap);
-        int new_limit = ((old_limit + 1) << 4) - 1;
-        if ( in_irq() )
-            goto out;
-        heap = xmalloc_array(struct timer *, new_limit + 1);
-        if ( heap == NULL )
-            goto out;
-        memcpy(heap, *pheap, (old_limit + 1) * sizeof(*heap));
-        SET_HEAP_LIMIT(heap, new_limit);
-        if ( old_limit != 0 )
-            xfree(*pheap);
-        *pheap = heap;
-    }
+        return 0;
 
     SET_HEAP_SIZE(heap, ++sz);
     heap[sz] = t;
     t->heap_offset = sz;
     up_heap(heap, sz);
- out:
+
     return (t->heap_offset == 1);
 }
 
@@ -210,7 +195,7 @@ static int add_entry(struct timers *time
     /* Try to add to heap. t->heap_offset indicates whether we succeed. */
     t->heap_offset = 0;
     t->status = TIMER_STATUS_in_heap;
-    rc = add_to_heap(&timers->heap, t);
+    rc = add_to_heap(timers->heap, t);
     if ( t->heap_offset != 0 )
         return rc;
 
@@ -368,6 +353,27 @@ static void timer_softirq_action(void)
     void          *data;
 
     ts = &this_cpu(timers);
+    heap = ts->heap;
+
+    /* If we are using overflow linked list, try to allocate a larger heap. */
+    if ( unlikely(ts->list != NULL) )
+    {
+        /* old_limit == (2^n)-1; new_limit == (2^(n+4))-1 */
+        int old_limit = GET_HEAP_LIMIT(heap);
+        int new_limit = ((old_limit + 1) << 4) - 1;
+        struct timer **newheap = xmalloc_array(struct timer *, new_limit + 1);
+        if ( newheap != NULL )
+        {
+            spin_lock_irq(&ts->lock);
+            memcpy(newheap, heap, (old_limit + 1) * sizeof(*heap));
+            SET_HEAP_LIMIT(newheap, new_limit);
+            ts->heap = newheap;
+            spin_unlock_irq(&ts->lock);
+            if ( old_limit != 0 )
+                xfree(heap);
+            heap = newheap;
+        }
+    }
 
     spin_lock_irq(&ts->lock);
 
@@ -380,9 +386,8 @@ static void timer_softirq_action(void)
         t->status = TIMER_STATUS_inactive;
         add_entry(ts, t);
     }
-    
-    heap = ts->heap;
-    now  = NOW();
+
+    now = NOW();
 
     while ( (GET_HEAP_SIZE(heap) != 0) &&
             ((t = heap[1])->expires < (now + TIMER_SLOP)) )
@@ -397,9 +402,6 @@ static void timer_softirq_action(void)
         spin_unlock_irq(&ts->lock);
         (*fn)(data);
         spin_lock_irq(&ts->lock);
-
-        /* Heap may have grown while the lock was released. */
-        heap = ts->heap;
     }
 
     deadline = GET_HEAP_SIZE(heap) ? heap[1]->expires : 0;
diff -r 6583186e5989 -r 46d7e12c4c91 xen/common/xmalloc_tlsf.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/common/xmalloc_tlsf.c Wed Oct 22 11:46:55 2008 +0900
@@ -0,0 +1,599 @@
+/*
+ * Two Levels Segregate Fit memory allocator (TLSF)
+ * Version 2.3.2
+ *
+ * Written by Miguel Masmano Tello <mimastel@xxxxxxxxxxxxx>
+ *
+ * Thanks to Ismael Ripoll for his suggestions and reviews
+ *
+ * Copyright (C) 2007, 2006, 2005, 2004
+ *
+ * This code is released using a dual license strategy: GPL/LGPL
+ * You can choose the licence that better fits your requirements.
+ *
+ * Released under the terms of the GNU General Public License Version 2.0
+ * Released under the terms of the GNU Lesser General Public License 
+ * Version 2.1
+ *
+ * This is kernel port of TLSF allocator.
+ * Original code can be found at: http://rtportal.upv.es/rtmalloc/
+ * Adapted for Linux by Nitin Gupta (nitingupta910@xxxxxxxxx)
+ * (http://code.google.com/p/compcache/source/browse/trunk/sub-projects
+ *  /allocators/tlsf-kmod r229 dated Aug 27, 2008
+ * Adapted for Xen by Dan Magenheimer (dan.magenheimer@xxxxxxxxxx)
+ */
+
+#include <xen/config.h>
+#include <xen/irq.h>
+#include <xen/mm.h>
+#include <asm/time.h>
+
+#define MAX_POOL_NAME_LEN       16
+
+/* Some IMPORTANT TLSF parameters */
+#define MEM_ALIGN       (sizeof(void *) * 2)
+#define MEM_ALIGN_MASK  (~(MEM_ALIGN - 1))
+
+#define MAX_FLI         (30)
+#define MAX_LOG2_SLI    (5)
+#define MAX_SLI         (1 << MAX_LOG2_SLI)
+
+#define FLI_OFFSET      (6)
+/* tlsf structure just will manage blocks bigger than 128 bytes */
+#define SMALL_BLOCK     (128)
+#define REAL_FLI        (MAX_FLI - FLI_OFFSET)
+#define MIN_BLOCK_SIZE  (sizeof(struct free_ptr))
+#define BHDR_OVERHEAD   (sizeof(struct bhdr) - MIN_BLOCK_SIZE)
+
+#define PTR_MASK        (sizeof(void *) - 1)
+#define BLOCK_SIZE_MASK (0xFFFFFFFF - PTR_MASK)
+
+#define GET_NEXT_BLOCK(addr, r) ((struct bhdr *) \
+                                ((char *)(addr) + (r)))
+#define ROUNDUP_SIZE(r)         (((r) + MEM_ALIGN - 1) & MEM_ALIGN_MASK)
+#define ROUNDDOWN_SIZE(r)       ((r) & MEM_ALIGN_MASK)
+#define ROUNDUP_PAGE(r)         (((r) + PAGE_SIZE - 1) & PAGE_MASK)
+
+#define BLOCK_STATE     (0x1)
+#define PREV_STATE      (0x2)
+
+/* bit 0 of the block size */
+#define FREE_BLOCK      (0x1)
+#define USED_BLOCK      (0x0)
+
+/* bit 1 of the block size */
+#define PREV_FREE       (0x2)
+#define PREV_USED       (0x0)
+
+static spinlock_t pool_list_lock;
+static struct list_head pool_list_head;
+
+struct free_ptr {
+    struct bhdr *prev;
+    struct bhdr *next;
+};
+
+struct bhdr {
+    /* All blocks in a region are linked in order of physical address */
+    struct bhdr *prev_hdr;
+    /*
+     * The size is stored in bytes
+     *  bit 0: block is free, if set
+     *  bit 1: previous block is free, if set
+     */
+    u32 size;
+    /* Free blocks in individual freelists are linked */
+    union {
+        struct free_ptr free_ptr;
+        u8 buffer[sizeof(struct free_ptr)];
+    } ptr;
+};
+
+struct xmem_pool {
+    /* First level bitmap (REAL_FLI bits) */
+    u32 fl_bitmap;
+
+    /* Second level bitmap */
+    u32 sl_bitmap[REAL_FLI];
+
+    /* Free lists */
+    struct bhdr *matrix[REAL_FLI][MAX_SLI];
+
+    spinlock_t lock;
+
+    unsigned long init_size;
+    unsigned long max_size;
+    unsigned long grow_size;
+
+    /* Basic stats */
+    unsigned long used_size;
+    unsigned long num_regions;
+
+    /* User provided functions for expanding/shrinking pool */
+    xmem_pool_get_memory *get_mem;
+    xmem_pool_put_memory *put_mem;
+
+    struct list_head list;
+
+    void *init_region;
+    char name[MAX_POOL_NAME_LEN];
+};
+
+/*
+ * Helping functions
+ */
+
+/**
+ * Returns indexes (fl, sl) of the list used to serve request of size r
+ */
+static inline void MAPPING_SEARCH(unsigned long *r, int *fl, int *sl)
+{
+    int t;
+
+    if ( *r < SMALL_BLOCK )
+    {
+        *fl = 0;
+        *sl = *r / (SMALL_BLOCK / MAX_SLI);
+    }
+    else
+    {
+        t = (1 << (fls(*r) - 1 - MAX_LOG2_SLI)) - 1;
+        *r = *r + t;
+        *fl = fls(*r) - 1;
+        *sl = (*r >> (*fl - MAX_LOG2_SLI)) - MAX_SLI;
+        *fl -= FLI_OFFSET;
+        /*if ((*fl -= FLI_OFFSET) < 0) // FL will be always >0!
+         *fl = *sl = 0;
+         */
+        *r &= ~t;
+    }
+}
+
+/**
+ * Returns indexes (fl, sl) which is used as starting point to search
+ * for a block of size r. It also rounds up requested size(r) to the
+ * next list.
+ */
+static inline void MAPPING_INSERT(unsigned long r, int *fl, int *sl)
+{
+    if ( r < SMALL_BLOCK )
+    {
+        *fl = 0;
+        *sl = r / (SMALL_BLOCK / MAX_SLI);
+    }
+    else
+    {
+        *fl = fls(r) - 1;
+        *sl = (r >> (*fl - MAX_LOG2_SLI)) - MAX_SLI;
+        *fl -= FLI_OFFSET;
+    }
+}
+
+/**
+ * Returns first block from a list that hold blocks larger than or
+ * equal to the one pointed by the indexes (fl, sl)
+ */
+static inline struct bhdr *FIND_SUITABLE_BLOCK(struct xmem_pool *p, int *fl,
+                                               int *sl)
+{
+    u32 tmp = p->sl_bitmap[*fl] & (~0 << *sl);
+    struct bhdr *b = NULL;
+
+    if ( tmp )
+    {
+        *sl = ffs(tmp) - 1;
+        b = p->matrix[*fl][*sl];
+    }
+    else
+    {
+        *fl = ffs(p->fl_bitmap & (~0 << (*fl + 1))) - 1;
+        if ( likely(*fl > 0) )
+        {
+            *sl = ffs(p->sl_bitmap[*fl]) - 1;
+            b = p->matrix[*fl][*sl];
+        }
+    }
+
+    return b;
+}
+
+/**
+ * Remove first free block(b) from free list with indexes (fl, sl).
+ */
+static inline void EXTRACT_BLOCK_HDR(struct bhdr *b, struct xmem_pool *p, int 
fl,
+                                     int sl)
+{
+    p->matrix[fl][sl] = b->ptr.free_ptr.next;
+    if ( p->matrix[fl][sl] )
+    {
+        p->matrix[fl][sl]->ptr.free_ptr.prev = NULL;
+    }
+    else
+    {
+        clear_bit(sl, &p->sl_bitmap[fl]);
+        if ( !p->sl_bitmap[fl] )
+            clear_bit(fl, &p->fl_bitmap);
+    }
+    b->ptr.free_ptr = (struct free_ptr) {NULL, NULL};
+}
+
+/**
+ * Removes block(b) from free list with indexes (fl, sl)
+ */
+static inline void EXTRACT_BLOCK(struct bhdr *b, struct xmem_pool *p, int fl,
+                                 int sl)
+{
+    if ( b->ptr.free_ptr.next )
+        b->ptr.free_ptr.next->ptr.free_ptr.prev =
+            b->ptr.free_ptr.prev;
+    if ( b->ptr.free_ptr.prev )
+        b->ptr.free_ptr.prev->ptr.free_ptr.next =
+            b->ptr.free_ptr.next;
+    if ( p->matrix[fl][sl] == b )
+    {
+        p->matrix[fl][sl] = b->ptr.free_ptr.next;
+        if ( !p->matrix[fl][sl] )
+        {
+            clear_bit(sl, &p->sl_bitmap[fl]);
+            if ( !p->sl_bitmap[fl] )
+                clear_bit (fl, &p->fl_bitmap);
+        }
+    }
+    b->ptr.free_ptr = (struct free_ptr) {NULL, NULL};
+}
+
+/**
+ * Insert block(b) in free list with indexes (fl, sl)
+ */
+static inline void INSERT_BLOCK(struct bhdr *b, struct xmem_pool *p, int fl, 
int sl)
+{
+    b->ptr.free_ptr = (struct free_ptr) {NULL, p->matrix[fl][sl]};
+    if ( p->matrix[fl][sl] )
+        p->matrix[fl][sl]->ptr.free_ptr.prev = b;
+    p->matrix[fl][sl] = b;
+    set_bit(sl, &p->sl_bitmap[fl]);
+    set_bit(fl, &p->fl_bitmap);
+}
+
+/**
+ * Region is a virtually contiguous memory region and Pool is
+ * collection of such regions
+ */
+static inline void ADD_REGION(void *region, unsigned long region_size,
+                              struct xmem_pool *pool)
+{
+    int fl, sl;
+    struct bhdr *b, *lb;
+
+    b = (struct bhdr *)(region);
+    b->prev_hdr = NULL;
+    b->size = ROUNDDOWN_SIZE(region_size - 2 * BHDR_OVERHEAD)
+        | FREE_BLOCK | PREV_USED;
+    MAPPING_INSERT(b->size & BLOCK_SIZE_MASK, &fl, &sl);
+    INSERT_BLOCK(b, pool, fl, sl);
+    /* The sentinel block: allows us to know when we're in the last block */
+    lb = GET_NEXT_BLOCK(b->ptr.buffer, b->size & BLOCK_SIZE_MASK);
+    lb->prev_hdr = b;
+    lb->size = 0 | USED_BLOCK | PREV_FREE;
+    pool->used_size += BHDR_OVERHEAD; /* only sentinel block is "used" */
+    pool->num_regions++;
+}
+
+/*
+ * TLSF pool-based allocator start.
+ */
+
+struct xmem_pool *xmem_pool_create(
+    const char *name,
+    xmem_pool_get_memory get_mem,
+    xmem_pool_put_memory put_mem,
+    unsigned long init_size,
+    unsigned long max_size,
+    unsigned long grow_size)
+{
+    struct xmem_pool *pool;
+    void *region;
+    int pool_bytes, pool_order;
+
+    BUG_ON(max_size && (max_size < init_size));
+
+    pool_bytes = ROUNDUP_SIZE(sizeof(*pool));
+    pool_order = get_order_from_bytes(pool_bytes);
+
+    pool = (void *)alloc_xenheap_pages(pool_order);
+    if ( pool == NULL )
+        return NULL;
+    memset(pool, 0, pool_bytes);
+
+    /* Round to next page boundary */
+    init_size = ROUNDUP_PAGE(init_size);
+    max_size = ROUNDUP_PAGE(max_size);
+    grow_size = ROUNDUP_PAGE(grow_size);
+
+    /* pool global overhead not included in used size */
+    pool->used_size = 0;
+
+    pool->init_size = init_size;
+    pool->max_size = max_size;
+    pool->grow_size = grow_size;
+    pool->get_mem = get_mem;
+    pool->put_mem = put_mem;
+    strlcpy(pool->name, name, sizeof(pool->name));
+    region = get_mem(init_size);
+    if ( region == NULL )
+        goto out_region;
+    ADD_REGION(region, init_size, pool);
+    pool->init_region = region;
+
+    spin_lock_init(&pool->lock);
+
+    spin_lock(&pool_list_lock);
+    list_add_tail(&pool->list, &pool_list_head);
+    spin_unlock(&pool_list_lock);
+
+    return pool;
+
+ out_region:
+    free_xenheap_pages(pool, pool_order);
+    return NULL;
+}
+
+unsigned long xmem_pool_get_used_size(struct xmem_pool *pool)
+{
+    return pool->used_size;
+}
+
+unsigned long xmem_pool_get_total_size(struct xmem_pool *pool)
+{
+    unsigned long total;
+    total = ROUNDUP_SIZE(sizeof(*pool))
+        + pool->init_size
+        + (pool->num_regions - 1) * pool->grow_size;
+    return total;
+}
+
+void xmem_pool_destroy(struct xmem_pool *pool) 
+{
+    if ( pool == NULL )
+        return;
+
+    /* User is destroying without ever allocating from this pool */
+    if ( xmem_pool_get_used_size(pool) == BHDR_OVERHEAD )
+    {
+        pool->put_mem(pool->init_region);
+        pool->used_size -= BHDR_OVERHEAD;
+    }
+
+    /* Check for memory leaks in this pool */
+    if ( xmem_pool_get_used_size(pool) )
+        printk("memory leak in pool: %s (%p). "
+               "%lu bytes still in use.\n",
+               pool->name, pool, xmem_pool_get_used_size(pool));
+
+    spin_lock(&pool_list_lock);
+    list_del_init(&pool->list);
+    spin_unlock(&pool_list_lock);
+    pool->put_mem(pool);
+}
+
+void *xmem_pool_alloc(unsigned long size, struct xmem_pool *pool)
+{
+    struct bhdr *b, *b2, *next_b, *region;
+    int fl, sl;
+    unsigned long tmp_size;
+
+    size = (size < MIN_BLOCK_SIZE) ? MIN_BLOCK_SIZE : ROUNDUP_SIZE(size);
+    /* Rounding up the requested size and calculating fl and sl */
+
+    spin_lock(&pool->lock);
+ retry_find:
+    MAPPING_SEARCH(&size, &fl, &sl);
+
+    /* Searching a free block */
+    if ( !(b = FIND_SUITABLE_BLOCK(pool, &fl, &sl)) )
+    {
+        /* Not found */
+        if ( size > (pool->grow_size - 2 * BHDR_OVERHEAD) )
+            goto out_locked;
+        if ( pool->max_size && (pool->init_size +
+                                pool->num_regions * pool->grow_size
+                                > pool->max_size) )
+            goto out_locked;
+        spin_unlock(&pool->lock);
+        if ( (region = pool->get_mem(pool->grow_size)) == NULL )
+            goto out;
+        spin_lock(&pool->lock);
+        ADD_REGION(region, pool->grow_size, pool);
+        goto retry_find;
+    }
+    EXTRACT_BLOCK_HDR(b, pool, fl, sl);
+
+    /*-- found: */
+    next_b = GET_NEXT_BLOCK(b->ptr.buffer, b->size & BLOCK_SIZE_MASK);
+    /* Should the block be split? */
+    tmp_size = (b->size & BLOCK_SIZE_MASK) - size;
+    if ( tmp_size >= sizeof(struct bhdr) )
+    {
+        tmp_size -= BHDR_OVERHEAD;
+        b2 = GET_NEXT_BLOCK(b->ptr.buffer, size);
+
+        b2->size = tmp_size | FREE_BLOCK | PREV_USED;
+        b2->prev_hdr = b;
+
+        next_b->prev_hdr = b2;
+
+        MAPPING_INSERT(tmp_size, &fl, &sl);
+        INSERT_BLOCK(b2, pool, fl, sl);
+
+        b->size = size | (b->size & PREV_STATE);
+    }
+    else
+    {
+        next_b->size &= (~PREV_FREE);
+        b->size &= (~FREE_BLOCK); /* Now it's used */
+    }
+
+    pool->used_size += (b->size & BLOCK_SIZE_MASK) + BHDR_OVERHEAD;
+
+    spin_unlock(&pool->lock);
+    return (void *)b->ptr.buffer;
+
+    /* Failed alloc */
+ out_locked:
+    spin_unlock(&pool->lock);
+
+ out:
+    return NULL;
+}
+
+void xmem_pool_free(void *ptr, struct xmem_pool *pool)
+{
+    struct bhdr *b, *tmp_b;
+    int fl = 0, sl = 0;
+
+    if ( unlikely(ptr == NULL) )
+        return;
+
+    b = (struct bhdr *)((char *) ptr - BHDR_OVERHEAD);
+
+    spin_lock(&pool->lock);
+    b->size |= FREE_BLOCK;
+    pool->used_size -= (b->size & BLOCK_SIZE_MASK) + BHDR_OVERHEAD;
+    b->ptr.free_ptr = (struct free_ptr) { NULL, NULL};
+    tmp_b = GET_NEXT_BLOCK(b->ptr.buffer, b->size & BLOCK_SIZE_MASK);
+    if ( tmp_b->size & FREE_BLOCK )
+    {
+        MAPPING_INSERT(tmp_b->size & BLOCK_SIZE_MASK, &fl, &sl);
+        EXTRACT_BLOCK(tmp_b, pool, fl, sl);
+        b->size += (tmp_b->size & BLOCK_SIZE_MASK) + BHDR_OVERHEAD;
+    }
+    if ( b->size & PREV_FREE )
+    {
+        tmp_b = b->prev_hdr;
+        MAPPING_INSERT(tmp_b->size & BLOCK_SIZE_MASK, &fl, &sl);
+        EXTRACT_BLOCK(tmp_b, pool, fl, sl);
+        tmp_b->size += (b->size & BLOCK_SIZE_MASK) + BHDR_OVERHEAD;
+        b = tmp_b;
+    }
+    tmp_b = GET_NEXT_BLOCK(b->ptr.buffer, b->size & BLOCK_SIZE_MASK);
+    tmp_b->prev_hdr = b;
+
+    MAPPING_INSERT(b->size & BLOCK_SIZE_MASK, &fl, &sl);
+
+    if ( (b->prev_hdr == NULL) && ((tmp_b->size & BLOCK_SIZE_MASK) == 0) )
+    {
+        pool->put_mem(b);
+        pool->num_regions--;
+        pool->used_size -= BHDR_OVERHEAD; /* sentinel block header */
+        goto out;
+    }
+
+    INSERT_BLOCK(b, pool, fl, sl);
+
+    tmp_b->size |= PREV_FREE;
+    tmp_b->prev_hdr = b;
+ out:
+    spin_unlock(&pool->lock);
+}
+
+/*
+ * Glue for xmalloc().
+ */
+
+static struct xmem_pool *xenpool;
+
+static void *xmalloc_pool_get(unsigned long size)
+{
+    ASSERT(size == PAGE_SIZE);
+    return alloc_xenheap_pages(0);
+}
+
+static void xmalloc_pool_put(void *p)
+{
+    free_xenheap_pages(p,0);
+}
+
+static void *xmalloc_whole_pages(unsigned long size)
+{
+    struct bhdr *b;
+    unsigned int pageorder = get_order_from_bytes(size + BHDR_OVERHEAD);
+
+    b = alloc_xenheap_pages(pageorder);
+    if ( b == NULL )
+        return NULL;
+
+    b->size = (1 << (pageorder + PAGE_SHIFT));
+    return (void *)b->ptr.buffer;
+}
+
+static void tlsf_init(void)
+{
+    INIT_LIST_HEAD(&pool_list_head);
+    spin_lock_init(&pool_list_lock);
+    xenpool = xmem_pool_create(
+        "xmalloc", xmalloc_pool_get, xmalloc_pool_put,
+        PAGE_SIZE, 0, PAGE_SIZE);
+    BUG_ON(!xenpool);
+}
+
+/*
+ * xmalloc()
+ */
+
+void *_xmalloc(unsigned long size, unsigned long align)
+{
+    void *p;
+    u32 pad;
+
+    ASSERT(!in_irq());
+
+    ASSERT((align & (align - 1)) == 0);
+    if ( align < MEM_ALIGN )
+        align = MEM_ALIGN;
+    size += align - MEM_ALIGN;
+
+    if ( !xenpool )
+        tlsf_init();
+
+    if ( size >= (PAGE_SIZE - (2*BHDR_OVERHEAD)) )
+        p = xmalloc_whole_pages(size);
+    else
+        p = xmem_pool_alloc(size, xenpool);
+
+    /* Add alignment padding. */
+    if ( (pad = -(long)p & (align - 1)) != 0 )
+    {
+        char *q = (char *)p + pad;
+        struct bhdr *b = (struct bhdr *)(q - BHDR_OVERHEAD);
+        ASSERT(q > (char *)p);
+        b->size = pad | 1;
+        p = q;
+    }
+
+    ASSERT(((unsigned long)p & (align - 1)) == 0);
+    return p;
+}
+
+void xfree(void *p)
+{
+    struct bhdr *b;
+
+    ASSERT(!in_irq());
+
+    if ( p == NULL )
+        return;
+
+    /* Strip alignment padding. */
+    b = (struct bhdr *)((char *) p - BHDR_OVERHEAD);
+    if ( b->size & 1 )
+    {
+        p = (char *)p - (b->size & ~1u);
+        b = (struct bhdr *)((char *)p - BHDR_OVERHEAD);
+        ASSERT(!(b->size & 1));
+    }
+
+    if ( b->size >= (PAGE_SIZE - (2*BHDR_OVERHEAD)) )
+        free_xenheap_pages((void *)b, get_order_from_bytes(b->size));
+    else
+        xmem_pool_free(p, xenpool);
+}
diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/Makefile
--- a/xen/drivers/Makefile      Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/drivers/Makefile      Wed Oct 22 11:46:55 2008 +0900
@@ -1,6 +1,6 @@ subdir-y += char
 subdir-y += char
 subdir-y += cpufreq
 subdir-y += pci
-subdir-$(x86) += passthrough
+subdir-y += passthrough
 subdir-$(HAS_ACPI) += acpi
 subdir-$(HAS_VGA) += video
diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/acpi/pmstat.c
--- a/xen/drivers/acpi/pmstat.c Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/drivers/acpi/pmstat.c Wed Oct 22 11:46:55 2008 +0900
@@ -52,7 +52,7 @@ int do_get_pm_info(struct xen_sysctl_get
     int ret = 0;
     const struct processor_pminfo *pmpt;
 
-    if ( (op->cpuid >= NR_CPUS) || !cpu_online(op->cpuid) )
+    if ( !op || (op->cpuid >= NR_CPUS) || !cpu_online(op->cpuid) )
         return -EINVAL;
     pmpt = processor_pminfo[op->cpuid];
 
@@ -87,7 +87,7 @@ int do_get_pm_info(struct xen_sysctl_get
         uint64_t tmp_idle_ns;
         struct pm_px *pxpt = cpufreq_statistic_data[op->cpuid];
 
-        if ( !pxpt )
+        if ( !pxpt || !pxpt->u.pt || !pxpt->u.trans_pt )
             return -ENODATA;
 
         total_idle_ns = get_cpu_idle_time(op->cpuid);
diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/char/ns16550.c
--- a/xen/drivers/char/ns16550.c        Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/drivers/char/ns16550.c        Wed Oct 22 11:46:55 2008 +0900
@@ -18,17 +18,19 @@
 #include <asm/io.h>
 
 /*
- * Configure serial port with a string <baud>,DPS,<io-base>,<irq>.
+ * Configure serial port with a string:
+ *   <baud>[/<clock_hz>][,DPS[,<io-base>[,<irq>]]].
  * The tail of the string can be omitted if platform defaults are sufficient.
  * If the baud rate is pre-configured, perhaps by a bootloader, then 'auto'
- * can be specified in place of a numeric baud rate.
+ * can be specified in place of a numeric baud rate. Polled mode is specified
+ * by requesting irq 0.
  */
 static char opt_com1[30] = "", opt_com2[30] = "";
 string_param("com1", opt_com1);
 string_param("com2", opt_com2);
 
 static struct ns16550 {
-    int baud, data_bits, parity, stop_bits, irq;
+    int baud, clock_hz, data_bits, parity, stop_bits, irq;
     unsigned long io_base;   /* I/O port or memory-mapped I/O address. */
     char *remapped_io_base;  /* Remapped virtual address of mmap I/O.  */ 
     /* UART with IRQ line: interrupt-driven I/O. */
@@ -192,7 +194,7 @@ static void __devinit ns16550_init_preir
     if ( uart->baud != BAUD_AUTO )
     {
         /* Baud rate specified: program it into the divisor latch. */
-        divisor = UART_CLOCK_HZ / (uart->baud * 16);
+        divisor = uart->clock_hz / (uart->baud << 4);
         ns_write_reg(uart, DLL, (char)divisor);
         ns_write_reg(uart, DLM, (char)(divisor >> 8));
     }
@@ -201,7 +203,7 @@ static void __devinit ns16550_init_preir
         /* Baud rate already set: read it out from the divisor latch. */
         divisor  = ns_read_reg(uart, DLL);
         divisor |= ns_read_reg(uart, DLM) << 8;
-        uart->baud = UART_CLOCK_HZ / (divisor * 16);
+        uart->baud = uart->clock_hz / (divisor << 4);
     }
     ns_write_reg(uart, LCR, lcr);
 
@@ -354,6 +356,12 @@ static void __init ns16550_parse_port_co
     }
     else if ( (baud = simple_strtoul(conf, &conf, 10)) != 0 )
         uart->baud = baud;
+
+    if ( *conf == '/')
+    {
+        conf++;
+        uart->clock_hz = simple_strtoul(conf, &conf, 0) << 4;
+    }
 
     if ( *conf != ',' )
         goto config_parsed;
@@ -408,6 +416,7 @@ void __init ns16550_init(int index, stru
     uart->baud      = (defaults->baud ? :
                        console_has((index == 0) ? "com1" : "com2")
                        ? BAUD_AUTO : 0);
+    uart->clock_hz  = UART_CLOCK_HZ;
     uart->data_bits = defaults->data_bits;
     uart->parity    = parse_parity_char(defaults->parity);
     uart->stop_bits = defaults->stop_bits;
diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/cpufreq/cpufreq.c
--- a/xen/drivers/cpufreq/cpufreq.c     Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/drivers/cpufreq/cpufreq.c     Wed Oct 22 11:46:55 2008 +0900
@@ -34,6 +34,7 @@
 #include <xen/sched.h>
 #include <xen/timer.h>
 #include <xen/xmalloc.h>
+#include <xen/guest_access.h>
 #include <xen/domain.h>
 #include <asm/bug.h>
 #include <asm/io.h>
@@ -185,10 +186,18 @@ int cpufreq_del_cpu(unsigned int cpu)
     return 0;
 }
 
+static void print_PCT(struct xen_pct_register *ptr)
+{
+    printk(KERN_INFO "\t_PCT: descriptor=%d, length=%d, space_id=%d, "
+            "bit_width=%d, bit_offset=%d, reserved=%d, address=%"PRId64"\n",
+            ptr->descriptor, ptr->length, ptr->space_id, ptr->bit_width, 
+            ptr->bit_offset, ptr->reserved, ptr->address);
+}
+
 static void print_PSS(struct xen_processor_px *ptr, int count)
 {
     int i;
-    printk(KERN_INFO "\t_PSS:\n");
+    printk(KERN_INFO "\t_PSS: state_count=%d\n", count);
     for (i=0; i<count; i++){
         printk(KERN_INFO "\tState%d: %"PRId64"MHz %"PRId64"mW %"PRId64"us "
                "%"PRId64"us 0x%"PRIx64" 0x%"PRIx64"\n",
@@ -211,20 +220,19 @@ static void print_PSD( struct xen_psd_pa
             ptr->num_processors);
 }
 
+static void print_PPC(unsigned int platform_limit)
+{
+    printk(KERN_INFO "\t_PPC: %d\n", platform_limit);
+}
+
 int set_px_pminfo(uint32_t acpi_id, struct xen_processor_performance 
*dom0_px_info)
 {
     int ret=0, cpuid;
     struct processor_pminfo *pmpt;
     struct processor_performance *pxpt;
 
-    if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) )
-    {
-        ret = -ENOSYS;
-        goto out;
-    }
-
     cpuid = get_cpu_id(acpi_id);
-    if ( cpuid < 0 )
+    if ( cpuid < 0 || !dom0_px_info)
     {
         ret = -EINVAL;
         goto out;
@@ -256,6 +264,8 @@ int set_px_pminfo(uint32_t acpi_id, stru
         memcpy ((void *)&pxpt->status_register,
                 (void *)&dom0_px_info->status_register,
                 sizeof(struct xen_pct_register));
+        print_PCT(&pxpt->control_register);
+        print_PCT(&pxpt->status_register);
     }
     if ( dom0_px_info->flags & XEN_PX_PSS ) 
     {
@@ -265,12 +275,8 @@ int set_px_pminfo(uint32_t acpi_id, stru
             ret = -ENOMEM;
             goto out;
         }
-        if ( xenpf_copy_px_states(pxpt, dom0_px_info) )
-        {
-            xfree(pxpt->states);
-            ret = -EFAULT;
-            goto out;
-        }
+        copy_from_guest(pxpt->states, dom0_px_info->states, 
+                                      dom0_px_info->state_count);
         pxpt->state_count = dom0_px_info->state_count;
         print_PSS(pxpt->states,pxpt->state_count);
     }
@@ -285,6 +291,7 @@ int set_px_pminfo(uint32_t acpi_id, stru
     if ( dom0_px_info->flags & XEN_PX_PPC )
     {
         pxpt->platform_limit = dom0_px_info->platform_limit;
+        print_PPC(pxpt->platform_limit);
 
         if ( pxpt->init == XEN_PX_INIT )
         {
diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/cpufreq/utility.c
--- a/xen/drivers/cpufreq/utility.c     Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/drivers/cpufreq/utility.c     Wed Oct 22 11:46:55 2008 +0900
@@ -27,6 +27,7 @@
 #include <xen/types.h>
 #include <xen/sched.h>
 #include <xen/timer.h>
+#include <xen/trace.h>
 #include <asm/config.h>
 #include <acpi/cpufreq/cpufreq.h>
 #include <public/sysctl.h>
@@ -72,27 +73,30 @@ int cpufreq_statistic_init(unsigned int 
     struct pm_px *pxpt = cpufreq_statistic_data[cpuid];
     const struct processor_pminfo *pmpt = processor_pminfo[cpuid];
 
+    if ( !pmpt )
+        return -EINVAL;
+
+    if ( pxpt )
+        return 0;
+
     count = pmpt->perf.state_count;
 
-    if ( !pmpt )
-        return -EINVAL;
-
+    pxpt = xmalloc(struct pm_px);
     if ( !pxpt )
-    {
-        pxpt = xmalloc(struct pm_px);
-        if ( !pxpt )
-            return -ENOMEM;
-        memset(pxpt, 0, sizeof(*pxpt));
-        cpufreq_statistic_data[cpuid] = pxpt;
-    }
+        return -ENOMEM;
+    memset(pxpt, 0, sizeof(*pxpt));
+    cpufreq_statistic_data[cpuid] = pxpt;
 
     pxpt->u.trans_pt = xmalloc_array(uint64_t, count * count);
-    if (!pxpt->u.trans_pt)
+    if (!pxpt->u.trans_pt) {
+        xfree(pxpt);
         return -ENOMEM;
+    }
 
     pxpt->u.pt = xmalloc_array(struct pm_px_val, count);
     if (!pxpt->u.pt) {
         xfree(pxpt->u.trans_pt);
+        xfree(pxpt);
         return -ENOMEM;
     }
 
@@ -119,7 +123,8 @@ void cpufreq_statistic_exit(unsigned int
         return;
     xfree(pxpt->u.trans_pt);
     xfree(pxpt->u.pt);
-    memset(pxpt, 0, sizeof(struct pm_px));
+    xfree(pxpt);
+    cpufreq_statistic_data[cpuid] = NULL;
 }
 
 void cpufreq_statistic_reset(unsigned int cpuid)
@@ -128,7 +133,7 @@ void cpufreq_statistic_reset(unsigned in
     struct pm_px *pxpt = cpufreq_statistic_data[cpuid];
     const struct processor_pminfo *pmpt = processor_pminfo[cpuid];
 
-    if ( !pxpt || !pmpt )
+    if ( !pmpt || !pxpt || !pxpt->u.pt || !pxpt->u.trans_pt )
         return;
 
     count = pmpt->perf.state_count;
@@ -293,7 +298,13 @@ int __cpufreq_driver_target(struct cpufr
     int retval = -EINVAL;
 
     if (cpu_online(policy->cpu) && cpufreq_driver->target)
+    {
+        unsigned int prev_freq = policy->cur;
+
         retval = cpufreq_driver->target(policy, target_freq, relation);
+        if ( retval == 0 )
+            TRACE_2D(TRC_PM_FREQ_CHANGE, prev_freq/1000, policy->cur/1000);
+    }
 
     return retval;
 }
diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/passthrough/Makefile
--- a/xen/drivers/passthrough/Makefile  Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/drivers/passthrough/Makefile  Wed Oct 22 11:46:55 2008 +0900
@@ -1,4 +1,5 @@ subdir-$(x86) += vtd
 subdir-$(x86) += vtd
+subdir-$(ia64) += vtd
 subdir-$(x86) += amd
 
 obj-y += iommu.o
diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/passthrough/io.c
--- a/xen/drivers/passthrough/io.c      Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/drivers/passthrough/io.c      Wed Oct 22 11:46:55 2008 +0900
@@ -20,6 +20,9 @@
 
 #include <xen/event.h>
 #include <xen/iommu.h>
+#include <asm/hvm/irq.h>
+#include <asm/hvm/iommu.h>
+#include <xen/hvm/irq.h>
 
 static void pt_irq_time_out(void *data)
 {
@@ -245,6 +248,7 @@ int hvm_do_IRQ_dpci(struct domain *d, un
     return 1;
 }
 
+#ifdef SUPPORT_MSI_REMAPPING
 void hvm_dpci_msi_eoi(struct domain *d, int vector)
 {
     struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
@@ -278,6 +282,63 @@ void hvm_dpci_msi_eoi(struct domain *d, 
     spin_unlock(&d->event_lock);
 }
 
+extern int vmsi_deliver(struct domain *d, int pirq);
+static int hvm_pci_msi_assert(struct domain *d, int pirq)
+{
+    return vmsi_deliver(d, pirq);
+}
+#endif
+
+void hvm_dirq_assist(struct vcpu *v)
+{
+    unsigned int irq;
+    uint32_t device, intx;
+    struct domain *d = v->domain;
+    struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
+    struct dev_intx_gsi_link *digl;
+
+    if ( !iommu_enabled || (v->vcpu_id != 0) || (hvm_irq_dpci == NULL) )
+        return;
+
+    for ( irq = find_first_bit(hvm_irq_dpci->dirq_mask, NR_IRQS);
+          irq < NR_IRQS;
+          irq = find_next_bit(hvm_irq_dpci->dirq_mask, NR_IRQS, irq + 1) )
+    {
+        if ( !test_and_clear_bit(irq, &hvm_irq_dpci->dirq_mask) )
+            continue;
+
+        spin_lock(&d->event_lock);
+#ifdef SUPPORT_MSI_REMAPPING
+        if ( test_bit(_HVM_IRQ_DPCI_MSI, &hvm_irq_dpci->mirq[irq].flags) )
+        {
+            hvm_pci_msi_assert(d, irq);
+            spin_unlock(&d->event_lock);
+            continue;
+        }
+#endif
+        stop_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, irq)]);
+
+        list_for_each_entry ( digl, &hvm_irq_dpci->mirq[irq].digl_list, list )
+        {
+            device = digl->device;
+            intx = digl->intx;
+            hvm_pci_intx_assert(d, device, intx);
+            hvm_irq_dpci->mirq[irq].pending++;
+        }
+
+        /*
+         * Set a timer to see if the guest can finish the interrupt or not. For
+         * example, the guest OS may unmask the PIC during boot, before the
+         * guest driver is loaded. hvm_pci_intx_assert() may succeed, but the
+         * guest will never deal with the irq, then the physical interrupt line
+         * will never be deasserted.
+         */
+        set_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, irq)],
+                  NOW() + PT_IRQ_TIME_OUT);
+        spin_unlock(&d->event_lock);
+    }
+}
+
 void hvm_dpci_eoi(struct domain *d, unsigned int guest_gsi,
                   union vioapic_redir_entry *ent)
 {
diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/passthrough/iommu.c
--- a/xen/drivers/passthrough/iommu.c   Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/drivers/passthrough/iommu.c   Wed Oct 22 11:46:55 2008 +0900
@@ -19,8 +19,6 @@
 #include <xen/paging.h>
 #include <xen/guest_access.h>
 
-extern struct iommu_ops intel_iommu_ops;
-extern struct iommu_ops amd_iommu_ops;
 static void parse_iommu_param(char *s);
 static int iommu_populate_page_table(struct domain *d);
 int intel_vtd_setup(void);
diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/passthrough/pci.c
--- a/xen/drivers/passthrough/pci.c     Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/drivers/passthrough/pci.c     Wed Oct 22 11:46:55 2008 +0900
@@ -21,6 +21,8 @@
 #include <xen/list.h>
 #include <xen/prefetch.h>
 #include <xen/iommu.h>
+#include <asm/hvm/iommu.h>
+#include <asm/hvm/irq.h>
 #include <xen/delay.h>
 #include <xen/keyhandler.h>
 
@@ -207,6 +209,7 @@ void pci_release_devices(struct domain *
     }
 }
 
+#ifdef SUPPORT_MSI_REMAPPING
 static void dump_pci_devices(unsigned char ch)
 {
     struct pci_dev *pdev;
@@ -236,7 +239,7 @@ static int __init setup_dump_pcidevs(voi
     return 0;
 }
 __initcall(setup_dump_pcidevs);
-
+#endif
 
 
 /*
diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/passthrough/vtd/Makefile
--- a/xen/drivers/passthrough/vtd/Makefile      Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/drivers/passthrough/vtd/Makefile      Wed Oct 22 11:46:55 2008 +0900
@@ -1,4 +1,5 @@ subdir-$(x86) += x86
 subdir-$(x86) += x86
+subdir-$(ia64) += ia64
 
 obj-y += iommu.o
 obj-y += dmar.o
diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/passthrough/vtd/ia64/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/drivers/passthrough/vtd/ia64/Makefile Wed Oct 22 11:46:55 2008 +0900
@@ -0,0 +1,1 @@
+obj-y += vtd.o
diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/passthrough/vtd/ia64/vtd.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/drivers/passthrough/vtd/ia64/vtd.c    Wed Oct 22 11:46:55 2008 +0900
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2008, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) Allen Kay <allen.m.kay@xxxxxxxxx>
+ * Copyright (C) Weidong Han <weidong.han@xxxxxxxxx>
+ */
+
+#include <xen/sched.h>
+#include <xen/domain_page.h>
+#include <xen/iommu.h>
+#include <asm/xensystem.h>
+#include <asm/sal.h>
+#include "../iommu.h"
+#include "../dmar.h"
+#include "../vtd.h"
+
+
+int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
+/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
+u8 irq_vector[NR_IRQ_VECTORS] __read_mostly;
+
+void *map_vtd_domain_page(u64 maddr)
+{
+    return (void *)((u64)map_domain_page(maddr >> PAGE_SHIFT) |
+            (maddr & (PAGE_SIZE - PAGE_SIZE_4K)));
+}
+
+void unmap_vtd_domain_page(void *va)
+{
+    unmap_domain_page(va);
+}
+
+/* Allocate page table, return its machine address */
+u64 alloc_pgtable_maddr(void)
+{
+    struct page_info *pg;
+    u64 *vaddr;
+
+    pg = alloc_domheap_page(NULL, 0);
+    vaddr = map_domain_page(page_to_mfn(pg));
+    if ( !vaddr )
+        return 0;
+    memset(vaddr, 0, PAGE_SIZE);
+
+    iommu_flush_cache_page(vaddr);
+    unmap_domain_page(vaddr);
+
+    return page_to_maddr(pg);
+}
+
+void free_pgtable_maddr(u64 maddr)
+{
+    if ( maddr != 0 )
+        free_domheap_page(maddr_to_page(maddr));
+}
+
+unsigned int get_cache_line_size(void)
+{
+    return L1_CACHE_BYTES;
+}
+
+void cacheline_flush(char * addr)
+{
+    ia64_fc(addr);
+    ia64_sync_i();
+    ia64_srlz_i();
+}
+
+void flush_all_cache()
+{
+    ia64_sal_cache_flush(3);
+}
+
+void * map_to_nocache_virt(int nr_iommus, u64 maddr)
+{
+  return (void *) ( maddr + __IA64_UNCACHED_OFFSET);
+}
+
+struct hvm_irq_dpci *domain_get_irq_dpci(struct domain *domain)
+{
+    if ( !domain )
+        return NULL;
+
+    return domain->arch.hvm_domain.irq.dpci;
+}
+
+int domain_set_irq_dpci(struct domain *domain, struct hvm_irq_dpci *dpci)
+{
+    if ( !domain || !dpci )
+        return 0;
+
+    domain->arch.hvm_domain.irq.dpci = dpci;
+    return 1;
+}
+
+void hvm_dpci_isairq_eoi(struct domain *d, unsigned int isairq)
+{
+    /* dummy */
+}
diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/passthrough/vtd/intremap.c
--- a/xen/drivers/passthrough/vtd/intremap.c    Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/drivers/passthrough/vtd/intremap.c    Wed Oct 22 11:46:55 2008 +0900
@@ -21,6 +21,7 @@
 #include <xen/irq.h>
 #include <xen/sched.h>
 #include <xen/iommu.h>
+#include <asm/hvm/iommu.h>
 #include <xen/time.h>
 #include <xen/pci.h>
 #include <xen/pci_regs.h>
@@ -128,7 +129,13 @@ static int ioapic_rte_to_remap_entry(str
     memcpy(&new_ire, iremap_entry, sizeof(struct iremap_entry));
 
     if ( rte_upper )
+    {
+#if defined(__i386__) || defined(__x86_64__)
         new_ire.lo.dst = (value >> 24) << 8;
+#else /* __ia64__ */
+        new_ire.lo.dst = value >> 16;
+#endif
+    }
     else
     {
         *(((u32 *)&new_rte) + 0) = value;
@@ -179,7 +186,7 @@ unsigned int io_apic_read_remap_rte(
     struct IO_xAPIC_route_entry old_rte = { 0 };
     struct IO_APIC_route_remap_entry *remap_rte;
     int rte_upper = (reg & 1) ? 1 : 0;
-    struct iommu *iommu = ioapic_to_iommu(mp_ioapics[apic].mpc_apicid);
+    struct iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
 
     if ( !iommu || !ir_ctrl || ir_ctrl->iremap_maddr == 0 ||
@@ -224,7 +231,7 @@ void io_apic_write_remap_rte(
     struct IO_xAPIC_route_entry old_rte = { 0 };
     struct IO_APIC_route_remap_entry *remap_rte;
     unsigned int rte_upper = (reg & 1) ? 1 : 0;
-    struct iommu *iommu = ioapic_to_iommu(mp_ioapics[apic].mpc_apicid);
+    struct iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
     int saved_mask;
 
@@ -253,7 +260,7 @@ void io_apic_write_remap_rte(
     *(IO_APIC_BASE(apic)+4) = *(((int *)&old_rte)+0);
     remap_rte->mask = saved_mask;
 
-    if ( ioapic_rte_to_remap_entry(iommu, mp_ioapics[apic].mpc_apicid,
+    if ( ioapic_rte_to_remap_entry(iommu, IO_APIC_ID(apic),
                                    &old_rte, rte_upper, value) )
     {
         *IO_APIC_BASE(apic) = rte_upper ? (reg + 1) : reg;
@@ -328,7 +335,8 @@ static int remap_entry_to_msi_msg(
 }
 
 static int msi_msg_to_remap_entry(
-    struct iommu *iommu, struct pci_dev *pdev, struct msi_msg *msg)
+    struct iommu *iommu, struct pci_dev *pdev,
+    struct msi_desc *msi_desc, struct msi_msg *msg)
 {
     struct iremap_entry *iremap_entry = NULL, *iremap_entries;
     struct iremap_entry new_ire;
@@ -336,32 +344,18 @@ static int msi_msg_to_remap_entry(
     unsigned int index;
     unsigned long flags;
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
-    int i = 0;
 
     remap_rte = (struct msi_msg_remap_entry *) msg;
     spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
 
-    iremap_entries =
-        (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
-
-    /* If the entry for a PCI device has been there, use the old entry,
-     * Or, assign a new entry for it.
-     */
-    for ( i = 0; i <= ir_ctrl->iremap_index; i++ )
-    {
-        iremap_entry = &iremap_entries[i];
-        if ( iremap_entry->hi.sid ==
-             ((pdev->bus << 8) | pdev->devfn) )
-           break;
-    }
-
-    if ( i > ir_ctrl->iremap_index )
-    {
-       ir_ctrl->iremap_index++;
+    if ( msi_desc->remap_index < 0 )
+    {
+        ir_ctrl->iremap_index++;
         index = ir_ctrl->iremap_index;
+        msi_desc->remap_index = index;
     }
     else
-        index = i;
+        index = msi_desc->remap_index;
 
     if ( index > IREMAP_ENTRY_NR - 1 )
     {
@@ -369,11 +363,13 @@ static int msi_msg_to_remap_entry(
                 "%s: intremap index (%d) is larger than"
                 " the maximum index (%ld)!\n",
                 __func__, index, IREMAP_ENTRY_NR - 1);
-        unmap_vtd_domain_page(iremap_entries);
+        msi_desc->remap_index = -1;
         spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
         return -EFAULT;
     }
 
+    iremap_entries =
+        (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
     iremap_entry = &iremap_entries[index];
     memcpy(&new_ire, iremap_entry, sizeof(struct iremap_entry));
 
@@ -450,7 +446,7 @@ void msi_msg_write_remap_rte(
     if ( !iommu || !ir_ctrl || ir_ctrl->iremap_maddr == 0 )
         return;
 
-    msi_msg_to_remap_entry(iommu, pdev, msg);
+    msi_msg_to_remap_entry(iommu, pdev, msi_desc, msg);
 }
 #elif defined(__ia64__)
 void msi_msg_read_remap_rte(
@@ -482,7 +478,7 @@ int intremap_setup(struct iommu *iommu)
         {
             dprintk(XENLOG_WARNING VTDPREFIX,
                     "Cannot allocate memory for ir_ctrl->iremap_maddr\n");
-            return -ENODEV;
+            return -ENOMEM;
         }
         ir_ctrl->iremap_index = -1;
     }
@@ -490,10 +486,10 @@ int intremap_setup(struct iommu *iommu)
 #if defined(ENABLED_EXTENDED_INTERRUPT_SUPPORT)
     /* set extended interrupt mode bit */
     ir_ctrl->iremap_maddr |=
-            ecap_ext_intr(iommu->ecap) ? (1 << IRTA_REG_EIMI_SHIFT) : 0;
+            ecap_ext_intr(iommu->ecap) ? (1 << IRTA_REG_EIME_SHIFT) : 0;
 #endif
-    /* size field = 256 entries per 4K page = 8 - 1 */
-    ir_ctrl->iremap_maddr |= 7;
+    /* set size of the interrupt remapping table */ 
+    ir_ctrl->iremap_maddr |= IRTA_REG_TABLE_SIZE;
     dmar_writeq(iommu->reg, DMAR_IRTA_REG, ir_ctrl->iremap_maddr);
 
     /* set SIRTP */
diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c       Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/drivers/passthrough/vtd/iommu.c       Wed Oct 22 11:46:55 2008 +0900
@@ -24,6 +24,7 @@
 #include <xen/xmalloc.h>
 #include <xen/domain_page.h>
 #include <xen/iommu.h>
+#include <asm/hvm/iommu.h>
 #include <xen/numa.h>
 #include <xen/time.h>
 #include <xen/pci.h>
@@ -218,10 +219,10 @@ static u64 addr_to_dma_page_maddr(struct
             if ( !alloc )
                 break;
             maddr = alloc_pgtable_maddr();
+            if ( !maddr )
+                break;
             dma_set_pte_addr(*pte, maddr);
             vaddr = map_vtd_domain_page(maddr);
-            if ( !vaddr )
-                break;
 
             /*
              * high level table always sets r/w, last level
@@ -234,8 +235,6 @@ static u64 addr_to_dma_page_maddr(struct
         else
         {
             vaddr = map_vtd_domain_page(pte->val);
-            if ( !vaddr )
-                break;
         }
 
         if ( level == 2 )
@@ -567,26 +566,6 @@ static void dma_pte_clear_one(struct dom
     }
 
     unmap_vtd_domain_page(page);
-}
-
-/* clear last level pte, a tlb flush should be followed */
-static void dma_pte_clear_range(struct domain *domain, u64 start, u64 end)
-{
-    struct hvm_iommu *hd = domain_hvm_iommu(domain);
-    int addr_width = agaw_to_width(hd->agaw);
-
-    start &= (((u64)1) << addr_width) - 1;
-    end &= (((u64)1) << addr_width) - 1;
-    /* in case it's partial page */
-    start = PAGE_ALIGN_4K(start);
-    end &= PAGE_MASK_4K;
-
-    /* we don't need lock here, nobody else touches the iova range */
-    while ( start < end )
-    {
-        dma_pte_clear_one(domain, start);
-        start += PAGE_SIZE_4K;
-    }
 }
 
 static void iommu_free_pagetable(u64 pt_maddr, int level)
@@ -877,6 +856,7 @@ static void dma_msi_data_init(struct iom
     spin_unlock_irqrestore(&iommu->register_lock, flags);
 }
 
+#ifdef SUPPORT_MSI_REMAPPING
 static void dma_msi_addr_init(struct iommu *iommu, int phy_cpu)
 {
     u64 msi_address;
@@ -893,6 +873,12 @@ static void dma_msi_addr_init(struct iom
     dmar_writel(iommu->reg, DMAR_FEUADDR_REG, (u32)(msi_address >> 32));
     spin_unlock_irqrestore(&iommu->register_lock, flags);
 }
+#else
+static void dma_msi_addr_init(struct iommu *iommu, int phy_cpu)
+{
+    /* ia64: TODO */
+}
+#endif
 
 static void dma_msi_set_affinity(unsigned int vector, cpumask_t dest)
 {
@@ -1024,7 +1010,7 @@ static int intel_iommu_domain_init(struc
 {
     struct hvm_iommu *hd = domain_hvm_iommu(d);
     struct iommu *iommu = NULL;
-    u64 i;
+    u64 i, j, tmp;
     struct acpi_drhd_unit *drhd;
 
     drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
@@ -1043,11 +1029,13 @@ static int intel_iommu_domain_init(struc
          */
         for ( i = 0; i < max_page; i++ )
         {
-            if ( xen_in_range(i << PAGE_SHIFT_4K, (i + 1) << PAGE_SHIFT_4K) ||
-                 tboot_in_range(i << PAGE_SHIFT_4K, (i + 1) << PAGE_SHIFT_4K) )
+            if ( xen_in_range(i << PAGE_SHIFT, (i + 1) << PAGE_SHIFT) ||
+                 tboot_in_range(i << PAGE_SHIFT, (i + 1) << PAGE_SHIFT) )
                 continue;
 
-            iommu_map_page(d, i, i);
+            tmp = 1 << (PAGE_SHIFT - PAGE_SHIFT_4K);
+            for ( j = 0; j < tmp; j++ )
+                iommu_map_page(d, (i*tmp+j), (i*tmp+j));
         }
 
         setup_dom0_devices(d);
@@ -1511,75 +1499,26 @@ int intel_iommu_unmap_page(struct domain
     return 0;
 }
 
-int iommu_page_mapping(struct domain *domain, paddr_t iova,
-                       paddr_t hpa, size_t size, int prot)
-{
-    struct hvm_iommu *hd = domain_hvm_iommu(domain);
-    struct acpi_drhd_unit *drhd;
-    struct iommu *iommu;
-    u64 start_pfn, end_pfn;
-    struct dma_pte *page = NULL, *pte = NULL;
-    int index;
-    u64 pg_maddr;
-
-    if ( (prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0 )
-        return -EINVAL;
-
-    iova = (iova >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K;
-    start_pfn = hpa >> PAGE_SHIFT_4K;
-    end_pfn = (PAGE_ALIGN_4K(hpa + size)) >> PAGE_SHIFT_4K;
-    index = 0;
-    while ( start_pfn < end_pfn )
-    {
-        pg_maddr = addr_to_dma_page_maddr(domain, iova + PAGE_SIZE_4K*index, 
1);
-        if ( pg_maddr == 0 )
-            return -ENOMEM;
-        page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
-        pte = page + (start_pfn & LEVEL_MASK);
-        dma_set_pte_addr(*pte, (paddr_t)start_pfn << PAGE_SHIFT_4K);
-        dma_set_pte_prot(*pte, prot);
-        iommu_flush_cache_entry(pte);
-        unmap_vtd_domain_page(page);
-        start_pfn++;
-        index++;
-    }
-
-    if ( index > 0 )
-    {
-        for_each_drhd_unit ( drhd )
-        {
-            iommu = drhd->iommu;
-            if ( test_bit(iommu->index, &hd->iommu_bitmap) )
-                if ( iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
-                                           iova, index, 1))
-                    iommu_flush_write_buffer(iommu);
-        }
-    }
-
-    return 0;
-}
-
-int iommu_page_unmapping(struct domain *domain, paddr_t addr, size_t size)
-{
-    dma_pte_clear_range(domain, addr, addr + size);
-
-    return 0;
-}
-
 static int iommu_prepare_rmrr_dev(struct domain *d,
                                   struct acpi_rmrr_unit *rmrr,
                                   u8 bus, u8 devfn)
 {
-    u64 size;
-    int ret;
-
-    /* page table init */
-    size = rmrr->end_address - rmrr->base_address + 1;
-    ret = iommu_page_mapping(d, rmrr->base_address,
-                             rmrr->base_address, size,
-                             DMA_PTE_READ|DMA_PTE_WRITE);
-    if ( ret )
-        return ret;
+    int ret = 0;
+    u64 base, end;
+    unsigned long base_pfn, end_pfn;
+
+    ASSERT(rmrr->base_address < rmrr->end_address);
+    
+    base = rmrr->base_address & PAGE_MASK_4K;
+    base_pfn = base >> PAGE_SHIFT_4K;
+    end = PAGE_ALIGN_4K(rmrr->end_address);
+    end_pfn = end >> PAGE_SHIFT_4K;
+
+    while ( base_pfn < end_pfn )
+    {
+        intel_iommu_map_page(d, base_pfn, base_pfn);
+        base_pfn++;
+    }
 
     if ( domain_context_mapped(bus, devfn) == 0 )
         ret = domain_context_mapping(d, bus, devfn);
diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/passthrough/vtd/qinval.c
--- a/xen/drivers/passthrough/vtd/qinval.c      Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/drivers/passthrough/vtd/qinval.c      Wed Oct 22 11:46:55 2008 +0900
@@ -428,7 +428,11 @@ int qinval_setup(struct iommu *iommu)
     {
         qi_ctrl->qinval_maddr = alloc_pgtable_maddr();
         if ( qi_ctrl->qinval_maddr == 0 )
-            panic("Cannot allocate memory for qi_ctrl->qinval_maddr\n");
+        {
+            dprintk(XENLOG_WARNING VTDPREFIX,
+                    "Cannot allocate memory for qi_ctrl->qinval_maddr\n");
+            return -ENOMEM;
+        }
         flush->context = flush_context_qi;
         flush->iotlb = flush_iotlb_qi;
     }
diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/passthrough/vtd/utils.c
--- a/xen/drivers/passthrough/vtd/utils.c       Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/drivers/passthrough/vtd/utils.c       Wed Oct 22 11:46:55 2008 +0900
@@ -204,6 +204,7 @@ void print_vtd_entries(struct iommu *iom
 
 void dump_iommu_info(unsigned char key)
 {
+#if defined(__i386__) || defined(__x86_64__)
     struct acpi_drhd_unit *drhd;
     struct iommu *iommu;
     int i;
@@ -305,6 +306,10 @@ void dump_iommu_info(unsigned char key)
             }
         }
     }
+#else
+    printk("%s: not implemnted on IA64 for now.\n", __func__);
+    /* ia64: TODO */
+#endif
 }
 
 /*
diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/passthrough/vtd/x86/vtd.c
--- a/xen/drivers/passthrough/vtd/x86/vtd.c     Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/drivers/passthrough/vtd/x86/vtd.c     Wed Oct 22 11:46:55 2008 +0900
@@ -41,17 +41,19 @@ u64 alloc_pgtable_maddr(void)
 {
     struct page_info *pg;
     u64 *vaddr;
+    unsigned long mfn;
 
     pg = alloc_domheap_page(NULL, 0);
-    vaddr = map_domain_page(page_to_mfn(pg));
-    if ( !vaddr )
+    if ( !pg )
         return 0;
+    mfn = page_to_mfn(pg);
+    vaddr = map_domain_page(mfn);
     memset(vaddr, 0, PAGE_SIZE);
 
     iommu_flush_cache_page(vaddr);
     unmap_domain_page(vaddr);
 
-    return page_to_maddr(pg);
+    return (u64)mfn << PAGE_SHIFT_4K;
 }
 
 void free_pgtable_maddr(u64 maddr)
diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/acpi/cpufreq/processor_perf.h
--- a/xen/include/acpi/cpufreq/processor_perf.h Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/include/acpi/cpufreq/processor_perf.h Wed Oct 22 11:46:55 2008 +0900
@@ -60,8 +60,5 @@ struct pm_px {
 
 extern struct pm_px *cpufreq_statistic_data[NR_CPUS];
 
-int xenpf_copy_px_states(struct processor_performance *pxpt,
-        struct xen_processor_performance *dom0_px_info);
-
 int cpufreq_cpu_init(unsigned int cpuid);
 #endif /* __XEN_PROCESSOR_PM_H__ */
diff -r 6583186e5989 -r 46d7e12c4c91 
xen/include/asm-ia64/linux-xen/asm/spinlock.h
--- a/xen/include/asm-ia64/linux-xen/asm/spinlock.h     Wed Oct 22 11:38:22 
2008 +0900
+++ b/xen/include/asm-ia64/linux-xen/asm/spinlock.h     Wed Oct 22 11:46:55 
2008 +0900
@@ -27,25 +27,16 @@ typedef struct {
 #ifdef DEBUG_SPINLOCK
        void *locker;
 #endif
+} raw_spinlock_t;
+
 #ifdef XEN
-       unsigned char recurse_cpu;
-       unsigned char recurse_cnt;
-#endif
-} spinlock_t;
-
-#ifdef XEN
-#ifdef DEBUG_SPINLOCK
-#define SPIN_LOCK_UNLOCKED     /*(spinlock_t)*/ { 0, NULL, -1, 0 }
-#else
-#define SPIN_LOCK_UNLOCKED     /*(spinlock_t)*/ { 0, -1, 0 }
-#endif
-static inline void spin_lock_init(spinlock_t *lock)
-{
-       *lock = ((spinlock_t)SPIN_LOCK_UNLOCKED);
-}
-#else
-#define SPIN_LOCK_UNLOCKED                     /*(spinlock_t)*/ { 0 }
-#define spin_lock_init(x)                      ((x)->lock = 0)
+#ifdef DEBUG_SPINLOCK
+#define _RAW_SPIN_LOCK_UNLOCKED        /*(raw_spinlock_t)*/ { 0, NULL }
+#else
+#define _RAW_SPIN_LOCK_UNLOCKED        /*(raw_spinlock_t)*/ { 0 }
+#endif
+#else
+#define _RAW_SPIN_LOCK_UNLOCKED        /*(raw_spinlock_t)*/ { 0 }
 #endif
 
 #ifdef ASM_SUPPORTED
@@ -59,7 +50,7 @@ static inline void spin_lock_init(spinlo
 #define IA64_SPINLOCK_CLOBBERS "ar.ccv", "ar.pfs", "p14", "p15", "r27", "r28", 
"r29", "r30", "b6", "memory"
 
 static inline void
-_raw_spin_lock_flags (spinlock_t *lock, unsigned long flags)
+_raw_spin_lock_flags (raw_spinlock_t *lock, unsigned long flags)
 {
        register volatile unsigned int *ptr asm ("r31") = &lock->lock;
 
@@ -136,10 +127,9 @@ do {                                                       
                                \
 } while (0)
 #endif /* !ASM_SUPPORTED */
 
-#define spin_is_locked(x)      ((x)->lock != 0)
-#define _raw_spin_unlock(x)    do { barrier(); ((spinlock_t *) x)->lock = 0; } 
while (0)
+#define _raw_spin_is_locked(x) ((x)->lock != 0)
+#define _raw_spin_unlock(x)    do { barrier(); (x)->lock = 0; } while (0)
 #define _raw_spin_trylock(x)   (cmpxchg_acq(&(x)->lock, 0, 1) == 0)
-#define spin_unlock_wait(x)    do { barrier(); } while ((x)->lock)
 
 typedef struct {
        volatile unsigned int read_counter      : 31;
@@ -147,16 +137,12 @@ typedef struct {
 #ifdef CONFIG_PREEMPT
        unsigned int break_lock;
 #endif
-} rwlock_t;
-#define RW_LOCK_UNLOCKED /*(rwlock_t)*/ { 0, 0 }
-
-#define rwlock_init(x)         do { *(x) = (rwlock_t) RW_LOCK_UNLOCKED; } 
while(0)
-#define read_can_lock(rw)      (*(volatile int *)(rw) >= 0)
-#define write_can_lock(rw)     (*(volatile int *)(rw) == 0)
+} raw_rwlock_t;
+#define _RAW_RW_LOCK_UNLOCKED /*(raw_rwlock_t)*/ { 0, 0 }
 
 #define _raw_read_lock(rw)                                                     
        \
 do {                                                                           
        \
-       rwlock_t *__read_lock_ptr = (rw);                                       
        \
+       raw_rwlock_t *__read_lock_ptr = (rw);                                   
        \
                                                                                
        \
        while (unlikely(ia64_fetchadd(1, (int *) __read_lock_ptr, acq) < 0)) {  
        \
                ia64_fetchadd(-1, (int *) __read_lock_ptr, rel);                
        \
@@ -167,7 +153,7 @@ do {                                                        
                                \
 
 #define _raw_read_unlock(rw)                                   \
 do {                                                           \
-       rwlock_t *__read_lock_ptr = (rw);                       \
+       raw_rwlock_t *__read_lock_ptr = (rw);                   \
        ia64_fetchadd(-1, (int *) __read_lock_ptr, rel);        \
 } while (0)
 
@@ -230,7 +216,4 @@ do {                                                        
                        \
        clear_bit(31, (x));                                                     
        \
 })
 
-#ifdef XEN
-#include <asm/xenspinlock.h>
-#endif
 #endif /*  _ASM_IA64_SPINLOCK_H */
diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/asm-ia64/xenspinlock.h
--- a/xen/include/asm-ia64/xenspinlock.h        Wed Oct 22 11:38:22 2008 +0900
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,30 +0,0 @@
-#ifndef _ASM_IA64_XENSPINLOCK_H
-#define _ASM_IA64_XENSPINLOCK_H
-
-/*
- * spin_[un]lock_recursive(): Use these forms when the lock can (safely!) be
- * reentered recursively on the same CPU. All critical regions that may form
- * part of a recursively-nested set must be protected by these forms. If there
- * are any critical regions that cannot form part of such a set, they can use
- * standard spin_[un]lock().
- */
-#define _raw_spin_lock_recursive(_lock)            \
-    do {                                           \
-        int cpu = smp_processor_id();              \
-        if ( likely((_lock)->recurse_cpu != cpu) ) \
-        {                                          \
-            spin_lock(_lock);                      \
-            (_lock)->recurse_cpu = cpu;            \
-        }                                          \
-        (_lock)->recurse_cnt++;                    \
-    } while ( 0 )
-
-#define _raw_spin_unlock_recursive(_lock)          \
-    do {                                           \
-        if ( likely(--(_lock)->recurse_cnt == 0) ) \
-        {                                          \
-            (_lock)->recurse_cpu = -1;             \
-            spin_unlock(_lock);                    \
-        }                                          \
-    } while ( 0 )
-#endif /*  _ASM_IA64_XENSPINLOCK_H */
diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/include/asm-x86/domain.h      Wed Oct 22 11:46:55 2008 +0900
@@ -250,6 +250,8 @@ struct arch_domain
     bool_t is_32bit_pv;
     /* Is shared-info page in 32-bit format? */
     bool_t has_32bit_shinfo;
+    /* Domain cannot handle spurious page faults? */
+    bool_t suppress_spurious_page_faults;
 
     /* Continuable domain_relinquish_resources(). */
     enum {
diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h     Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/include/asm-x86/hvm/hvm.h     Wed Oct 22 11:46:55 2008 +0900
@@ -128,6 +128,7 @@ struct hvm_function_table {
     int (*msr_write_intercept)(struct cpu_user_regs *regs);
     void (*invlpg_intercept)(unsigned long vaddr);
     void (*set_uc_mode)(struct vcpu *v);
+    void (*set_info_guest)(struct vcpu *v);
 };
 
 extern struct hvm_function_table hvm_funcs;
@@ -314,4 +315,10 @@ int hvm_virtual_to_linear_addr(
     unsigned int addr_size,
     unsigned long *linear_addr);
 
+static inline void hvm_set_info_guest(struct vcpu *v)
+{
+    if ( hvm_funcs.set_info_guest )
+        return hvm_funcs.set_info_guest(v);
+}
+
 #endif /* __ASM_X86_HVM_HVM_H__ */
diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/asm-x86/hvm/irq.h
--- a/xen/include/asm-x86/hvm/irq.h     Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/include/asm-x86/hvm/irq.h     Wed Oct 22 11:46:55 2008 +0900
@@ -22,62 +22,11 @@
 #ifndef __ASM_X86_HVM_IRQ_H__
 #define __ASM_X86_HVM_IRQ_H__
 
-#include <xen/types.h>
-#include <xen/spinlock.h>
-#include <asm/irq.h>
 #include <asm/pirq.h>
+#include <xen/hvm/irq.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/vpic.h>
 #include <asm/hvm/vioapic.h>
-#include <public/hvm/save.h>
-
-struct dev_intx_gsi_link {
-    struct list_head list;
-    uint8_t device;
-    uint8_t intx;
-    uint8_t gsi;
-    uint8_t link;
-};
-
-#define _HVM_IRQ_DPCI_MSI  0x1
-
-struct hvm_gmsi_info {
-    uint32_t gvec;
-    uint32_t gflags;
-};
-
-struct hvm_mirq_dpci_mapping {
-    uint32_t flags;
-    int pending;
-    struct list_head digl_list;
-    struct domain *dom;
-    struct hvm_gmsi_info gmsi;
-};
-
-struct hvm_girq_dpci_mapping {
-    uint8_t valid;
-    uint8_t device;
-    uint8_t intx;
-    uint8_t machine_gsi;
-};
-
-#define NR_ISAIRQS  16
-#define NR_LINK     4
-/* Protected by domain's event_lock */
-struct hvm_irq_dpci {
-    /* Machine IRQ to guest device/intx mapping. */
-    DECLARE_BITMAP(mapping, NR_PIRQS);
-    struct hvm_mirq_dpci_mapping mirq[NR_IRQS];
-    /* Guest IRQ to guest device/intx mapping. */
-    struct hvm_girq_dpci_mapping girq[NR_IRQS];
-    uint8_t msi_gvec_pirq[NR_VECTORS];
-    DECLARE_BITMAP(dirq_mask, NR_IRQS);
-    /* Record of mapped ISA IRQs */
-    DECLARE_BITMAP(isairq_map, NR_ISAIRQS);
-    /* Record of mapped Links */
-    uint8_t link_cnt[NR_LINK];
-    struct timer hvm_timer[NR_IRQS];
-};
 
 struct hvm_irq {
     /*
@@ -149,27 +98,16 @@ struct hvm_irq {
 
 #define hvm_isa_irq_to_gsi(isa_irq) ((isa_irq) ? : 2)
 
-/* Modify state of a PCI INTx wire. */
-void hvm_pci_intx_assert(
-    struct domain *d, unsigned int device, unsigned int intx);
-void hvm_pci_intx_deassert(
-    struct domain *d, unsigned int device, unsigned int intx);
-
-/* Modify state of an ISA device's IRQ wire. */
-void hvm_isa_irq_assert(
-    struct domain *d, unsigned int isa_irq);
-void hvm_isa_irq_deassert(
-    struct domain *d, unsigned int isa_irq);
-
-void hvm_set_pci_link_route(struct domain *d, u8 link, u8 isa_irq);
-
-void hvm_maybe_deassert_evtchn_irq(void);
-void hvm_assert_evtchn_irq(struct vcpu *v);
-void hvm_set_callback_via(struct domain *d, uint64_t via);
-
 /* Check/Acknowledge next pending interrupt. */
 struct hvm_intack hvm_vcpu_has_pending_irq(struct vcpu *v);
 struct hvm_intack hvm_vcpu_ack_pending_irq(struct vcpu *v,
                                            struct hvm_intack intack);
 
+/*
+ * Currently IA64 Xen doesn't support MSI. So for x86, we define this macro
+ * to control the conditional compilation of some MSI-related functions.
+ * This macro will be removed once IA64 has MSI support.
+ */
+#define SUPPORT_MSI_REMAPPING 1
+
 #endif /* __ASM_X86_HVM_IRQ_H__ */
diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/asm-x86/hvm/svm/vmcb.h
--- a/xen/include/asm-x86/hvm/svm/vmcb.h        Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/include/asm-x86/hvm/svm/vmcb.h        Wed Oct 22 11:46:55 2008 +0900
@@ -393,7 +393,9 @@ struct vmcb_struct {
     eventinj_t  eventinj;       /* offset 0xA8 */
     u64 h_cr3;                  /* offset 0xB0 */
     lbrctrl_t lbr_control;      /* offset 0xB8 */
-    u64 res09[104];             /* offset 0xC0 pad to save area */
+    u64 res09;                  /* offset 0xC0 */
+    u64 nextrip;                /* offset 0xC8 */
+    u64 res10a[102];            /* offset 0xD0 pad to save area */
 
     svm_segment_register_t es;      /* offset 1024 */
     svm_segment_register_t cs;
diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/asm-x86/io_apic.h
--- a/xen/include/asm-x86/io_apic.h     Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/include/asm-x86/io_apic.h     Wed Oct 22 11:46:55 2008 +0900
@@ -19,6 +19,8 @@
 #define IO_APIC_BASE(idx) \
                ((volatile int *)(__fix_to_virt(FIX_IO_APIC_BASE_0 + idx) \
                + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK)))
+
+#define IO_APIC_ID(idx) (mp_ioapics[idx].mpc_apicid)
 
 /*
  * The structure of the IO-APIC:
diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/asm-x86/msi.h
--- a/xen/include/asm-x86/msi.h Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/include/asm-x86/msi.h Wed Oct 22 11:46:55 2008 +0900
@@ -90,10 +90,11 @@ struct msi_desc {
 
        void __iomem *mask_base;
        struct pci_dev *dev;
-    int vector;
-
-       /* Last set MSI message */
-       struct msi_msg msg;
+       int vector;
+
+       struct msi_msg msg;             /* Last set MSI message */
+
+       int remap_index;                /* index in interrupt remapping table */
 };
 
 /*
diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/asm-x86/rwlock.h
--- a/xen/include/asm-x86/rwlock.h      Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/include/asm-x86/rwlock.h      Wed Oct 22 11:46:55 2008 +0900
@@ -22,25 +22,19 @@
 
 #define __build_read_lock_ptr(rw, helper)   \
        asm volatile(LOCK "subl $1,(%0)\n\t" \
-                    "js 2f\n" \
+                    "jns 1f\n\t" \
+                    "call " helper "\n\t" \
                     "1:\n" \
-                    ".section .text.lock,\"ax\"\n" \
-                    "2:\tcall " helper "\n\t" \
-                    "jmp 1b\n" \
-                    ".previous" \
                     ::"a" (rw) : "memory")
 
 #define __build_read_lock_const(rw, helper)   \
        asm volatile(LOCK "subl $1,%0\n\t" \
-                    "js 2f\n" \
-                    "1:\n" \
-                    ".section .text.lock,\"ax\"\n" \
-                    "2:\tpush %%"__OP"ax\n\t" \
+                    "jns 1f\n\t" \
+                    "push %%"__OP"ax\n\t" \
                     "lea %0,%%"__OP"ax\n\t" \
                     "call " helper "\n\t" \
                     "pop %%"__OP"ax\n\t" \
-                    "jmp 1b\n" \
-                    ".previous" \
+                    "1:\n" \
                     :"=m" (*(volatile int *)rw) : : "memory")
 
 #define __build_read_lock(rw, helper)  do { \
@@ -52,25 +46,19 @@
 
 #define __build_write_lock_ptr(rw, helper) \
        asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \
-                    "jnz 2f\n" \
+                    "jz 1f\n\t" \
+                    "call " helper "\n\t" \
                     "1:\n" \
-                    ".section .text.lock,\"ax\"\n" \
-                    "2:\tcall " helper "\n\t" \
-                    "jmp 1b\n" \
-                    ".previous" \
                     ::"a" (rw) : "memory")
 
 #define __build_write_lock_const(rw, helper) \
        asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \
-                    "jnz 2f\n" \
-                    "1:\n" \
-                    ".section .text.lock,\"ax\"\n" \
-                    "2:\tpush %%"__OP"ax\n\t" \
+                    "jz 1f\n\t" \
+                    "push %%"__OP"ax\n\t" \
                     "lea %0,%%"__OP"ax\n\t" \
                     "call " helper "\n\t" \
                     "pop %%"__OP"ax\n\t" \
-                    "jmp 1b\n" \
-                    ".previous" \
+                    "1:\n" \
                     :"=m" (*(volatile int *)rw) : : "memory")
 
 #define __build_write_lock(rw, helper) do { \
diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/asm-x86/spinlock.h
--- a/xen/include/asm-x86/spinlock.h    Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/include/asm-x86/spinlock.h    Wed Oct 22 11:46:55 2008 +0900
@@ -8,104 +8,71 @@
 
 typedef struct {
     volatile s16 lock;
-    s8 recurse_cpu;
-    u8 recurse_cnt;
-} spinlock_t;
+} raw_spinlock_t;
 
-#define SPIN_LOCK_UNLOCKED /*(spinlock_t)*/ { 1, -1, 0 }
+#define _RAW_SPIN_LOCK_UNLOCKED /*(raw_spinlock_t)*/ { 1 }
 
-#define spin_lock_init(x)      do { *(x) = (spinlock_t) SPIN_LOCK_UNLOCKED; } 
while(0)
-#define spin_is_locked(x)      (*(volatile char *)(&(x)->lock) <= 0)
+#define _raw_spin_is_locked(x)                  \
+    (*(volatile char *)(&(x)->lock) <= 0)
 
-static inline void _raw_spin_lock(spinlock_t *lock)
+static always_inline void _raw_spin_lock(raw_spinlock_t *lock)
 {
-    __asm__ __volatile__ (
-        "1:  lock; decb %0         \n"
-        "    js 2f                 \n"
-        ".section .text.lock,\"ax\"\n"
+    asm volatile (
+        "1:  lock; decw %0         \n"
+        "    jns 3f                \n"
         "2:  rep; nop              \n"
-        "    cmpb $0,%0            \n"
+        "    cmpw $0,%0            \n"
         "    jle 2b                \n"
         "    jmp 1b                \n"
-        ".previous"
+        "3:"
         : "=m" (lock->lock) : : "memory" );
 }
 
-static inline void _raw_spin_unlock(spinlock_t *lock)
+static always_inline void _raw_spin_unlock(raw_spinlock_t *lock)
 {
-    ASSERT(spin_is_locked(lock));
-    __asm__ __volatile__ (
-       "movb $1,%0" 
+    ASSERT(_raw_spin_is_locked(lock));
+    asm volatile (
+        "movw $1,%0" 
         : "=m" (lock->lock) : : "memory" );
 }
 
-static inline int _raw_spin_trylock(spinlock_t *lock)
+static always_inline int _raw_spin_trylock(raw_spinlock_t *lock)
 {
-    char oldval;
-    __asm__ __volatile__(
-        "xchgb %b0,%1"
-        :"=q" (oldval), "=m" (lock->lock)
-        :"0" (0) : "memory");
-    return oldval > 0;
+    s16 oldval;
+    asm volatile (
+        "xchgw %w0,%1"
+        :"=r" (oldval), "=m" (lock->lock)
+        :"0" (0) : "memory" );
+    return (oldval > 0);
 }
-
-/*
- * spin_[un]lock_recursive(): Use these forms when the lock can (safely!) be
- * reentered recursively on the same CPU. All critical regions that may form
- * part of a recursively-nested set must be protected by these forms. If there
- * are any critical regions that cannot form part of such a set, they can use
- * standard spin_[un]lock().
- */
-#define _raw_spin_lock_recursive(_lock)            \
-    do {                                           \
-        int cpu = smp_processor_id();              \
-        if ( likely((_lock)->recurse_cpu != cpu) ) \
-        {                                          \
-            spin_lock(_lock);                      \
-            (_lock)->recurse_cpu = cpu;            \
-        }                                          \
-        (_lock)->recurse_cnt++;                    \
-    } while ( 0 )
-
-#define _raw_spin_unlock_recursive(_lock)          \
-    do {                                           \
-        if ( likely(--(_lock)->recurse_cnt == 0) ) \
-        {                                          \
-            (_lock)->recurse_cpu = -1;             \
-            spin_unlock(_lock);                    \
-        }                                          \
-    } while ( 0 )
-
 
 typedef struct {
     volatile unsigned int lock;
-} rwlock_t;
+} raw_rwlock_t;
 
-#define RW_LOCK_UNLOCKED /*(rwlock_t)*/ { RW_LOCK_BIAS }
-
-#define rwlock_init(x) do { *(x) = (rwlock_t) RW_LOCK_UNLOCKED; } while(0)
+#define _RAW_RW_LOCK_UNLOCKED /*(raw_rwlock_t)*/ { RW_LOCK_BIAS }
 
 /*
  * On x86, we implement read-write locks as a 32-bit counter
  * with the high bit (sign) being the "contended" bit.
  */
-static inline void _raw_read_lock(rwlock_t *rw)
+static always_inline void _raw_read_lock(raw_rwlock_t *rw)
 {
     __build_read_lock(rw, "__read_lock_failed");
 }
 
-static inline void _raw_write_lock(rwlock_t *rw)
+static always_inline void _raw_write_lock(raw_rwlock_t *rw)
 {
     __build_write_lock(rw, "__write_lock_failed");
 }
 
-#define _raw_read_unlock(rw)                       \
-    __asm__ __volatile__ (                         \
-        "lock ; incl %0" :                         \
+#define _raw_read_unlock(rw)                    \
+    asm volatile (                              \
+        "lock ; incl %0" :                      \
         "=m" ((rw)->lock) : : "memory" )
-#define _raw_write_unlock(rw)                      \
-    __asm__ __volatile__ (                         \
-        "lock ; addl $" RW_LOCK_BIAS_STR ",%0" :   \
+#define _raw_write_unlock(rw)                           \
+    asm volatile (                                      \
+        "lock ; addl $" RW_LOCK_BIAS_STR ",%0" :        \
         "=m" ((rw)->lock) : : "memory" )
 
 #endif /* __ASM_SPINLOCK_H */
diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/public/domctl.h
--- a/xen/include/public/domctl.h       Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/include/public/domctl.h       Wed Oct 22 11:46:55 2008 +0900
@@ -614,6 +614,10 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_subsc
 #define XEN_DOMCTL_set_machine_address_size  51
 #define XEN_DOMCTL_get_machine_address_size  52
 
+/*
+ * Do not inject spurious page faults into this domain.
+ */
+#define XEN_DOMCTL_suppress_spurious_page_faults 53
 
 struct xen_domctl {
     uint32_t cmd;
diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/public/trace.h
--- a/xen/include/public/trace.h        Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/include/public/trace.h        Wed Oct 22 11:46:55 2008 +0900
@@ -38,6 +38,7 @@
 #define TRC_MEM      0x0010f000    /* Xen memory trace         */
 #define TRC_PV       0x0020f000    /* Xen PV traces            */
 #define TRC_SHADOW   0x0040f000    /* Xen shadow tracing       */
+#define TRC_PM       0x0080f000    /* Xen power management trace */
 #define TRC_ALL      0x0ffff000
 #define TRC_HD_TO_EVENT(x) ((x)&0x0fffffff)
 #define TRC_HD_CYCLE_FLAG (1UL<<31)
@@ -146,6 +147,15 @@
 #define TRC_HVM_LMSW            (TRC_HVM_HANDLER + 0x19)
 #define TRC_HVM_LMSW64          (TRC_HVM_HANDLER + TRC_64_FLAG + 0x19)
 
+/* trace subclasses for power management */
+#define TRC_PM_FREQ     0x00801000      /* xen cpu freq events */
+#define TRC_PM_IDLE     0x00802000      /* xen cpu idle events */
+
+/* trace events for per class */
+#define TRC_PM_FREQ_CHANGE      (TRC_PM_FREQ + 0x01)
+#define TRC_PM_IDLE_ENTRY       (TRC_PM_IDLE + 0x01)
+#define TRC_PM_IDLE_EXIT        (TRC_PM_IDLE + 0x02)
+
 /* This structure represents a single trace buffer record. */
 struct t_rec {
     uint32_t event:28;
diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/xen/hvm/irq.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/xen/hvm/irq.h Wed Oct 22 11:46:55 2008 +0900
@@ -0,0 +1,99 @@
+/******************************************************************************
+ * irq.h
+ * 
+ * Interrupt distribution and delivery logic.
+ * 
+ * Copyright (c) 2006, K A Fraser, XenSource Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#ifndef __XEN_HVM_IRQ_H__
+#define __XEN_HVM_IRQ_H__
+
+#include <xen/types.h>
+#include <xen/spinlock.h>
+#include <asm/irq.h>
+#include <public/hvm/save.h>
+
+struct dev_intx_gsi_link {
+    struct list_head list;
+    uint8_t device;
+    uint8_t intx;
+    uint8_t gsi;
+    uint8_t link;
+};
+
+#define _HVM_IRQ_DPCI_MSI  0x1
+
+struct hvm_gmsi_info {
+    uint32_t gvec;
+    uint32_t gflags;
+};
+
+struct hvm_mirq_dpci_mapping {
+    uint32_t flags;
+    int pending;
+    struct list_head digl_list;
+    struct domain *dom;
+    struct hvm_gmsi_info gmsi;
+};
+
+struct hvm_girq_dpci_mapping {
+    uint8_t valid;
+    uint8_t device;
+    uint8_t intx;
+    uint8_t machine_gsi;
+};
+
+#define NR_ISAIRQS  16
+#define NR_LINK     4
+
+/* Protected by domain's event_lock */
+struct hvm_irq_dpci {
+    /* Machine IRQ to guest device/intx mapping. */
+    DECLARE_BITMAP(mapping, NR_PIRQS);
+    struct hvm_mirq_dpci_mapping mirq[NR_IRQS];
+    /* Guest IRQ to guest device/intx mapping. */
+    struct hvm_girq_dpci_mapping girq[NR_IRQS];
+    uint8_t msi_gvec_pirq[NR_VECTORS];
+    DECLARE_BITMAP(dirq_mask, NR_IRQS);
+    /* Record of mapped ISA IRQs */
+    DECLARE_BITMAP(isairq_map, NR_ISAIRQS);
+    /* Record of mapped Links */
+    uint8_t link_cnt[NR_LINK];
+    struct timer hvm_timer[NR_IRQS];
+};
+
+/* Modify state of a PCI INTx wire. */
+void hvm_pci_intx_assert(
+    struct domain *d, unsigned int device, unsigned int intx);
+void hvm_pci_intx_deassert(
+    struct domain *d, unsigned int device, unsigned int intx);
+
+/* Modify state of an ISA device's IRQ wire. */
+void hvm_isa_irq_assert(
+    struct domain *d, unsigned int isa_irq);
+void hvm_isa_irq_deassert(
+    struct domain *d, unsigned int isa_irq);
+
+void hvm_set_pci_link_route(struct domain *d, u8 link, u8 isa_irq);
+
+void hvm_maybe_deassert_evtchn_irq(void);
+void hvm_assert_evtchn_irq(struct vcpu *v);
+void hvm_set_callback_via(struct domain *d, uint64_t via);
+
+void hvm_dirq_assist(struct vcpu *v);
+
+#endif /* __XEN_HVM_IRQ_H__ */
diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/xen/spinlock.h
--- a/xen/include/xen/spinlock.h        Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/include/xen/spinlock.h        Wed Oct 22 11:46:55 2008 +0900
@@ -3,93 +3,95 @@
 
 #include <xen/config.h>
 #include <asm/system.h>
-
-#define spin_lock_irqsave(lock, flags) \
-    do { local_irq_save(flags); spin_lock(lock); } while ( 0 )
-#define spin_lock_irq(lock) \
-    do { local_irq_disable(); spin_lock(lock); } while ( 0 )
-
-#define read_lock_irqsave(lock, flags) \
-    do { local_irq_save(flags); read_lock(lock); } while ( 0 )
-#define read_lock_irq(lock) \
-    do { local_irq_disable(); read_lock(lock); } while ( 0 )
-
-#define write_lock_irqsave(lock, flags) \
-    do { local_irq_save(flags); write_lock(lock); } while ( 0 )
-#define write_lock_irq(lock) \
-    do { local_irq_disable(); write_lock(lock); } while ( 0 )
-
-#define spin_unlock_irqrestore(lock, flags) \
-    do { spin_unlock(lock); local_irq_restore(flags); } while ( 0 )
-#define spin_unlock_irq(lock) \
-    do { spin_unlock(lock); local_irq_enable(); } while ( 0 )
-
-#define read_unlock_irqrestore(lock, flags) \
-    do { read_unlock(lock); local_irq_restore(flags); } while ( 0 )
-#define read_unlock_irq(lock) \
-    do { read_unlock(lock); local_irq_enable(); } while ( 0 )
-
-#define write_unlock_irqrestore(lock, flags) \
-    do { write_unlock(lock); local_irq_restore(flags); } while ( 0 )
-#define write_unlock_irq(lock) \
-    do { write_unlock(lock); local_irq_enable(); } while ( 0 )
-
-#ifdef CONFIG_SMP
-
 #include <asm/spinlock.h>
 
-#else
+typedef struct {
+    raw_spinlock_t raw;
+    u16 recurse_cpu:12;
+    u16 recurse_cnt:4;
+} spinlock_t;
 
-#if (__GNUC__ > 2)
-typedef struct { } spinlock_t;
-#define SPIN_LOCK_UNLOCKED /*(spinlock_t)*/ { }
-#else
-typedef struct { int gcc_is_buggy; } spinlock_t;
-#define SPIN_LOCK_UNLOCKED /*(spinlock_t)*/ { 0 }
-#endif
+#define SPIN_LOCK_UNLOCKED { _RAW_SPIN_LOCK_UNLOCKED, 0xfffu, 0 }
+#define DEFINE_SPINLOCK(l) spinlock_t l = SPIN_LOCK_UNLOCKED
+#define spin_lock_init(l) (*(l) = (spinlock_t)SPIN_LOCK_UNLOCKED)
 
-#define spin_lock_init(lock)             do { } while(0)
-#define spin_is_locked(lock)             (0)
-#define _raw_spin_lock(lock)             (void)(lock)
-#define _raw_spin_trylock(lock)          ({1; })
-#define _raw_spin_unlock(lock)           do { } while(0)
-#define _raw_spin_lock_recursive(lock)   do { } while(0)
-#define _raw_spin_unlock_recursive(lock) do { } while(0)
+typedef struct {
+    raw_rwlock_t raw;
+} rwlock_t;
 
-#if (__GNUC__ > 2)
-typedef struct { } rwlock_t;
-#define RW_LOCK_UNLOCKED /*(rwlock_t)*/ { }
-#else
-typedef struct { int gcc_is_buggy; } rwlock_t;
-#define RW_LOCK_UNLOCKED /*(rwlock_t)*/ { 0 }
-#endif
+#define RW_LOCK_UNLOCKED { _RAW_RW_LOCK_UNLOCKED }
+#define DEFINE_RWLOCK(l) rwlock_t l = RW_LOCK_UNLOCKED
+#define rwlock_init(l) (*(l) = (rwlock_t)RW_LOCK_UNLOCKED)
 
-#define rwlock_init(lock)            do { } while(0)
-#define _raw_read_lock(lock)         (void)(lock) /* Not "unused variable". */
-#define _raw_read_unlock(lock)       do { } while(0)
-#define _raw_write_lock(lock)        (void)(lock) /* Not "unused variable". */
-#define _raw_write_unlock(lock)      do { } while(0)
+void _spin_lock(spinlock_t *lock);
+void _spin_lock_irq(spinlock_t *lock);
+unsigned long _spin_lock_irqsave(spinlock_t *lock);
 
-#endif
+void _spin_unlock(spinlock_t *lock);
+void _spin_unlock_irq(spinlock_t *lock);
+void _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags);
 
-#define spin_lock(_lock)             _raw_spin_lock(_lock)
-#define spin_trylock(_lock)          _raw_spin_trylock(_lock)
-#define spin_unlock(_lock)           _raw_spin_unlock(_lock)
-#define spin_lock_recursive(_lock)   _raw_spin_lock_recursive(_lock)
-#define spin_unlock_recursive(_lock) _raw_spin_unlock_recursive(_lock)
-#define read_lock(_lock)             _raw_read_lock(_lock)
-#define read_unlock(_lock)           _raw_read_unlock(_lock)
-#define write_lock(_lock)            _raw_write_lock(_lock)
-#define write_unlock(_lock)          _raw_write_unlock(_lock)
+int _spin_is_locked(spinlock_t *lock);
+int _spin_trylock(spinlock_t *lock);
+void _spin_barrier(spinlock_t *lock);
+
+void _spin_lock_recursive(spinlock_t *lock);
+void _spin_unlock_recursive(spinlock_t *lock);
+
+void _read_lock(rwlock_t *lock);
+void _read_lock_irq(rwlock_t *lock);
+unsigned long _read_lock_irqsave(rwlock_t *lock);
+
+void _read_unlock(rwlock_t *lock);
+void _read_unlock_irq(rwlock_t *lock);
+void _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags);
+
+void _write_lock(rwlock_t *lock);
+void _write_lock_irq(rwlock_t *lock);
+unsigned long _write_lock_irqsave(rwlock_t *lock);
+
+void _write_unlock(rwlock_t *lock);
+void _write_unlock_irq(rwlock_t *lock);
+void _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags);
+
+#define spin_lock(l)                  _spin_lock(l)
+#define spin_lock_irq(l)              _spin_lock_irq(l)
+#define spin_lock_irqsave(l, f)       ((f) = _spin_lock_irqsave(l))
+
+#define spin_unlock(l)                _spin_unlock(l)
+#define spin_unlock_irq(l)            _spin_unlock_irq(l)
+#define spin_unlock_irqrestore(l, f)  _spin_unlock_irqrestore(l, f)
+
+#define spin_is_locked(l)             _raw_spin_is_locked(&(l)->raw)
+#define spin_trylock(l)               _spin_trylock(l)
 
 /* Ensure a lock is quiescent between two critical operations. */
-static inline void spin_barrier(spinlock_t *lock)
-{
-    do { mb(); } while ( spin_is_locked(lock) );
-    mb();
-}
+#define spin_barrier(l)               _spin_barrier(l)
 
-#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED
-#define DEFINE_RWLOCK(x) rwlock_t x = RW_LOCK_UNLOCKED
+/*
+ * spin_[un]lock_recursive(): Use these forms when the lock can (safely!) be
+ * reentered recursively on the same CPU. All critical regions that may form
+ * part of a recursively-nested set must be protected by these forms. If there
+ * are any critical regions that cannot form part of such a set, they can use
+ * standard spin_[un]lock().
+ */
+#define spin_lock_recursive(l)        _spin_lock_recursive(l)
+#define spin_unlock_recursive(l)      _spin_unlock_recursive(l)
+
+#define read_lock(l)                  _read_lock(l)
+#define read_lock_irq(l)              _read_lock_irq(l)
+#define read_lock_irqsave(l, f)       ((f) = _read_lock_irqsave(l))
+
+#define read_unlock(l)                _read_unlock(l)
+#define read_unlock_irq(l)            _read_unlock_irq(l)
+#define read_unlock_irqrestore(l, f)  _read_unlock_irqrestore(l, f)
+
+#define write_lock(l)                 _write_lock(l)
+#define write_lock_irq(l)             _write_lock_irq(l)
+#define write_lock_irqsave(l, f)      ((f) = _write_lock_irqsave(l))
+
+#define write_unlock(l)               _write_unlock(l)
+#define write_unlock_irq(l)           _write_unlock_irq(l)
+#define write_unlock_irqrestore(l, f) _write_unlock_irqrestore(l, f)
 
 #endif /* __SPINLOCK_H__ */
diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/xen/xmalloc.h
--- a/xen/include/xen/xmalloc.h Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/include/xen/xmalloc.h Wed Oct 22 11:46:55 2008 +0900
@@ -1,12 +1,17 @@
 
 #ifndef __XMALLOC_H__
 #define __XMALLOC_H__
+
+/*
+ * Xen malloc/free-style interface.
+ */
 
 /* Allocate space for typed object. */
 #define xmalloc(_type) ((_type *)_xmalloc(sizeof(_type), __alignof__(_type)))
 
 /* Allocate space for array of typed objects. */
-#define xmalloc_array(_type, _num) ((_type *)_xmalloc_array(sizeof(_type), 
__alignof__(_type), _num))
+#define xmalloc_array(_type, _num) \
+    ((_type *)_xmalloc_array(sizeof(_type), __alignof__(_type), _num))
 
 /* Allocate untyped storage. */
 #define xmalloc_bytes(_bytes) (_xmalloc(_bytes, SMP_CACHE_BYTES))
@@ -15,8 +20,9 @@ extern void xfree(void *);
 extern void xfree(void *);
 
 /* Underlying functions */
-extern void *_xmalloc(size_t size, size_t align);
-static inline void *_xmalloc_array(size_t size, size_t align, size_t num)
+extern void *_xmalloc(unsigned long size, unsigned long align);
+static inline void *_xmalloc_array(
+    unsigned long size, unsigned long align, unsigned long num)
 {
        /* Check for overflow. */
        if (size && num > UINT_MAX / size)
@@ -24,4 +30,73 @@ static inline void *_xmalloc_array(size_
        return _xmalloc(size * num, align);
 }
 
+/*
+ * Pooled allocator interface.
+ */
+
+struct xmem_pool;
+
+typedef void *(xmem_pool_get_memory)(unsigned long bytes);
+typedef void (xmem_pool_put_memory)(void *ptr);
+
+/**
+ * xmem_pool_create - create dynamic memory pool
+ * @name: name of the pool
+ * @get_mem: callback function used to expand pool
+ * @put_mem: callback function used to shrink pool
+ * @init_size: inital pool size (in bytes)
+ * @max_size: maximum pool size (in bytes) - set this as 0 for no limit
+ * @grow_size: amount of memory (in bytes) added to pool whenever required
+ *
+ * All size values are rounded up to next page boundary.
+ */
+struct xmem_pool *xmem_pool_create(
+    const char *name,
+    xmem_pool_get_memory get_mem,
+    xmem_pool_put_memory put_mem,
+    unsigned long init_size,
+    unsigned long max_size,
+    unsigned long grow_size);
+
+/**
+ * xmem_pool_destroy - cleanup given pool
+ * @mem_pool: Pool to be destroyed
+ *
+ * Data structures associated with pool are freed.
+ * All memory allocated from pool must be freed before
+ * destorying it.
+ */
+void xmem_pool_destroy(struct xmem_pool *pool);
+
+/**
+ * xmem_pool_alloc - allocate memory from given pool
+ * @size: no. of bytes
+ * @mem_pool: pool to allocate from
+ */
+void *xmem_pool_alloc(unsigned long size, struct xmem_pool *pool);
+
+/**
+ * xmem_pool_free - free memory from given pool
+ * @ptr: address of memory to be freed
+ * @mem_pool: pool to free from
+ */
+void xmem_pool_free(void *ptr, struct xmem_pool *pool);
+
+/**
+ * xmem_pool_get_used_size - get memory currently used by given pool
+ *
+ * Used memory includes stored data + metadata + internal fragmentation
+ */
+unsigned long xmem_pool_get_used_size(struct xmem_pool *pool);
+
+/**
+ * xmem_pool_get_total_size - get total memory currently allocated for pool
+ *
+ * This is the total memory currently allocated for this pool which includes
+ * used size + free size.
+ *
+ * (Total - Used) is good indicator of memory efficiency of allocator.
+ */
+unsigned long xmem_pool_get_total_size(struct xmem_pool *pool);
+
 #endif /* __XMALLOC_H__ */
diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/xlat.lst
--- a/xen/include/xlat.lst      Wed Oct 22 11:38:22 2008 +0900
+++ b/xen/include/xlat.lst      Wed Oct 22 11:46:55 2008 +0900
@@ -55,3 +55,7 @@
 !      processor_cx                    platform.h
 !      processor_flags                 platform.h
 !      processor_power                 platform.h
+!      pct_register                    platform.h
+!      processor_px                    platform.h
+!      psd_package                     platform.h
+!      processor_performance           platform.h

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.