[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
# HG changeset patch # User Isaku Yamahata <yamahata@xxxxxxxxxxxxx> # Date 1224643615 -32400 # Node ID 46d7e12c4c919bab07af4b7097526dd06b824bea # Parent 6583186e5989d7e7e0bff126cd20a9cacb7c1613 # Parent d2f7243fc571ea78cbf3fe33e723aa8f30111daa merge with xen-unstable.hg --- xen/include/asm-ia64/xenspinlock.h | 30 docs/xen-api/coversheet.tex | 1 docs/xen-api/revision-history.tex | 9 docs/xen-api/xenapi-coversheet.tex | 4 docs/xen-api/xenapi-datamodel-graph.dot | 11 docs/xen-api/xenapi-datamodel.tex | 1160 +++++++++++++ tools/blktap/drivers/block-qcow.c | 18 tools/blktap/drivers/block-qcow2.c | 62 tools/libxc/xc_domain.c | 12 tools/libxc/xenctrl.h | 3 tools/python/xen/lowlevel/xc/xc.c | 21 tools/python/xen/util/pci.py | 21 tools/python/xen/util/utils.py | 26 tools/python/xen/util/vscsi_util.py | 219 +- tools/python/xen/xend/XendAPI.py | 20 tools/python/xen/xend/XendConfig.py | 202 +- tools/python/xen/xend/XendDSCSI.py | 174 + tools/python/xen/xend/XendDomainInfo.py | 137 + tools/python/xen/xend/XendNode.py | 39 tools/python/xen/xend/XendPSCSI.py | 143 + tools/python/xen/xend/server/vscsiif.py | 18 tools/python/xen/xm/create.dtd | 5 tools/python/xen/xm/create.py | 18 tools/python/xen/xm/main.py | 140 + tools/python/xen/xm/xenapi_create.py | 59 tools/xentrace/formats | 4 unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c | 14 xen/arch/ia64/vmx/vmmu.c | 2 xen/arch/x86/acpi/cpu_idle.c | 15 xen/arch/x86/cpu/amd.c | 4 xen/arch/x86/domain.c | 7 xen/arch/x86/domctl.c | 15 xen/arch/x86/hvm/svm/emulate.c | 31 xen/arch/x86/hvm/svm/intr.c | 57 xen/arch/x86/hvm/viridian.c | 3 xen/arch/x86/hvm/vmx/intr.c | 57 xen/arch/x86/hvm/vmx/vmx.c | 20 xen/arch/x86/irq.c | 55 xen/arch/x86/mm/hap/hap.c | 7 xen/arch/x86/mm/shadow/private.h | 63 xen/arch/x86/msi.c | 1 xen/arch/x86/nmi.c | 25 xen/arch/x86/platform_hypercall.c | 22 xen/arch/x86/smpboot.c | 13 xen/arch/x86/traps.c | 6 xen/arch/x86/x86_32/xen.lds.S | 1 xen/arch/x86/x86_64/Makefile | 2 xen/arch/x86/x86_64/cpu_idle.c | 2 xen/arch/x86/x86_64/cpufreq.c | 91 + xen/arch/x86/x86_64/mm.c | 2 xen/arch/x86/x86_64/platform_hypercall.c | 4 xen/arch/x86/x86_64/xen.lds.S | 1 xen/arch/x86/x86_emulate/x86_emulate.c | 9 xen/common/Makefile | 3 xen/common/kernel.c | 3 xen/common/schedule.c | 4 xen/common/spinlock.c | 154 + xen/common/timer.c | 56 xen/common/xmalloc_tlsf.c | 599 ++++++ xen/drivers/Makefile | 2 xen/drivers/acpi/pmstat.c | 4 xen/drivers/char/ns16550.c | 19 xen/drivers/cpufreq/cpufreq.c | 35 xen/drivers/cpufreq/utility.c | 37 xen/drivers/passthrough/Makefile | 1 xen/drivers/passthrough/io.c | 61 xen/drivers/passthrough/iommu.c | 2 xen/drivers/passthrough/pci.c | 5 xen/drivers/passthrough/vtd/Makefile | 1 xen/drivers/passthrough/vtd/ia64/Makefile | 1 xen/drivers/passthrough/vtd/ia64/vtd.c | 112 + xen/drivers/passthrough/vtd/intremap.c | 54 xen/drivers/passthrough/vtd/iommu.c | 125 - xen/drivers/passthrough/vtd/qinval.c | 6 xen/drivers/passthrough/vtd/utils.c | 5 xen/drivers/passthrough/vtd/x86/vtd.c | 8 xen/include/acpi/cpufreq/processor_perf.h | 3 xen/include/asm-ia64/linux-xen/asm/spinlock.h | 49 xen/include/asm-x86/domain.h | 2 xen/include/asm-x86/hvm/hvm.h | 7 xen/include/asm-x86/hvm/irq.h | 78 xen/include/asm-x86/hvm/svm/vmcb.h | 4 xen/include/asm-x86/io_apic.h | 2 xen/include/asm-x86/msi.h | 9 xen/include/asm-x86/rwlock.h | 32 xen/include/asm-x86/spinlock.h | 95 - xen/include/public/domctl.h | 4 xen/include/public/trace.h | 10 xen/include/xen/hvm/irq.h | 99 + xen/include/xen/spinlock.h | 156 - xen/include/xen/xmalloc.h | 81 xen/include/xlat.lst | 4 92 files changed, 4067 insertions(+), 950 deletions(-) diff -r 6583186e5989 -r 46d7e12c4c91 docs/xen-api/coversheet.tex --- a/docs/xen-api/coversheet.tex Wed Oct 22 11:38:22 2008 +0900 +++ b/docs/xen-api/coversheet.tex Wed Oct 22 11:46:55 2008 +0900 @@ -51,6 +51,7 @@ Mike Day, IBM & Daniel Veillard, Red Hat Mike Day, IBM & Daniel Veillard, Red Hat \\ Jim Fehlig, Novell & Tom Wilkie, University of Cambridge \\ Jon Harrop, XenSource & Yosuke Iwamatsu, NEC \\ +Masaki Kanno, FUJITSU \\ \end{tabular} \end{large} diff -r 6583186e5989 -r 46d7e12c4c91 docs/xen-api/revision-history.tex --- a/docs/xen-api/revision-history.tex Wed Oct 22 11:38:22 2008 +0900 +++ b/docs/xen-api/revision-history.tex Wed Oct 22 11:46:55 2008 +0900 @@ -56,5 +56,14 @@ \end{flushleft} \end{minipage}\\ \hline + 1.0.7 & 20th Oct. 08 & M. Kanno & + \begin{minipage}[t]{7cm} + \begin{flushleft} + Added definitions of new classes DSCSI and PSCSI. Updated the table + and the diagram representing relationships between classes. + Added host.PSCSIs and VM.DSCSIs fields. + \end{flushleft} + \end{minipage}\\ + \hline \end{tabular} \end{center} diff -r 6583186e5989 -r 46d7e12c4c91 docs/xen-api/xenapi-coversheet.tex --- a/docs/xen-api/xenapi-coversheet.tex Wed Oct 22 11:38:22 2008 +0900 +++ b/docs/xen-api/xenapi-coversheet.tex Wed Oct 22 11:46:55 2008 +0900 @@ -17,12 +17,12 @@ \newcommand{\coversheetlogo}{xen.eps} %% Document date -\newcommand{\datestring}{24th July 2008} +\newcommand{\datestring}{20th October 2008} \newcommand{\releasestatement}{Stable Release} %% Document revision -\newcommand{\revstring}{API Revision 1.0.6} +\newcommand{\revstring}{API Revision 1.0.7} %% Document authors \newcommand{\docauthors}{ diff -r 6583186e5989 -r 46d7e12c4c91 docs/xen-api/xenapi-datamodel-graph.dot --- a/docs/xen-api/xenapi-datamodel-graph.dot Wed Oct 22 11:38:22 2008 +0900 +++ b/docs/xen-api/xenapi-datamodel-graph.dot Wed Oct 22 11:46:55 2008 +0900 @@ -12,9 +12,11 @@ digraph "Xen-API Class Diagram" { digraph "Xen-API Class Diagram" { fontname="Verdana"; -node [ shape=box ]; session VM host network VIF PIF SR VDI VBD PBD user XSPolicy ACMPolicy; -node [shape=ellipse]; PIF_metrics VIF_metrics VM_metrics VBD_metrics PBD_metrics VM_guest_metrics host_metrics; -node [shape=box]; DPCI PPCI host_cpu console VTPM +node [ shape=box ]; session VM host network VIF PIF SR VDI VBD PBD user; +node [ shape=box ]; XSPolicy ACMPolicy DPCI PPCI host_cpu console VTPM; +node [ shape=box ]; DSCSI PSCSI; +node [ shape=ellipse ]; VM_metrics VM_guest_metrics host_metrics; +node [ shape=ellipse ]; PIF_metrics VIF_metrics VBD_metrics PBD_metrics; session -> host [ arrowhead="none" ] session -> user [ arrowhead="none" ] VM -> VM_metrics [ arrowhead="none" ] @@ -41,4 +43,7 @@ DPCI -> VM [ arrowhead="none", arrowtail DPCI -> VM [ arrowhead="none", arrowtail="crow" ] DPCI -> PPCI [ arrowhead="none" ] PPCI -> host [ arrowhead="none", arrowtail="crow" ] +DSCSI -> VM [ arrowhead="none", arrowtail="crow" ] +DSCSI -> PSCSI [ arrowhead="none" ] +PSCSI -> host [ arrowhead="none", arrowtail="crow" ] } diff -r 6583186e5989 -r 46d7e12c4c91 docs/xen-api/xenapi-datamodel.tex --- a/docs/xen-api/xenapi-datamodel.tex Wed Oct 22 11:38:22 2008 +0900 +++ b/docs/xen-api/xenapi-datamodel.tex Wed Oct 22 11:46:55 2008 +0900 @@ -46,6 +46,8 @@ Name & Description \\ {\tt console} & A console \\ {\tt DPCI} & A pass-through PCI device \\ {\tt PPCI} & A physical PCI device \\ +{\tt DSCSI} & A half-virtualized SCSI device \\ +{\tt PSCSI} & A physical SCSI device \\ {\tt user} & A user of the system \\ {\tt debug} & A basic class for testing \\ {\tt XSPolicy} & A class for handling Xen Security Policies \\ @@ -74,6 +76,8 @@ console.VM & VM.consoles & one-to-many\\ console.VM & VM.consoles & one-to-many\\ DPCI.VM & VM.DPCIs & one-to-many\\ PPCI.host & host.PPCIs & one-to-many\\ +DSCSI.VM & VM.DSCSIs & one-to-many\\ +PSCSI.host & host.PSCSIs & one-to-many\\ host.resident\_VMs & VM.resident\_on & many-to-one\\ host.host\_CPUs & host\_cpu.host & many-to-one\\ \hline @@ -1407,6 +1411,7 @@ Quals & Field & Type & Description \\ $\mathit{RO}_\mathit{run}$ & {\tt crash\_dumps} & (crashdump ref) Set & crash dumps associated with this VM \\ $\mathit{RO}_\mathit{run}$ & {\tt VTPMs} & (VTPM ref) Set & virtual TPMs \\ $\mathit{RO}_\mathit{run}$ & {\tt DPCIs} & (DPCI ref) Set & pass-through PCI devices \\ +$\mathit{RO}_\mathit{run}$ & {\tt DSCSIs} & (DSCSI ref) Set & half-virtualized SCSI devices \\ $\mathit{RW}$ & {\tt PV/bootloader} & string & name of or path to bootloader \\ $\mathit{RW}$ & {\tt PV/kernel} & string & path to the kernel \\ $\mathit{RW}$ & {\tt PV/ramdisk} & string & path to the initrd \\ @@ -3443,6 +3448,38 @@ Get the DPCIs field of the given VM. \noindent {\bf Return Type:} {\tt (DPCI ref) Set +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_DSCSIs} + +{\bf Overview:} +Get the DSCSIs field of the given VM. + + \noindent {\bf Signature:} +\begin{verbatim} ((DSCSI ref) Set) get_DSCSIs (session_id s, VM ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt VM ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +(DSCSI ref) Set } @@ -5518,6 +5555,7 @@ Quals & Field & Type & Description \\ $\mathit{RW}$ & {\tt crash\_dump\_sr} & SR ref & The SR in which VDIs for crash dumps are created \\ $\mathit{RO}_\mathit{run}$ & {\tt PBDs} & (PBD ref) Set & physical blockdevices \\ $\mathit{RO}_\mathit{run}$ & {\tt PPCIs} & (PPCI ref) Set & physical PCI devices \\ +$\mathit{RO}_\mathit{run}$ & {\tt PSCSIs} & (PSCSI ref) Set & physical SCSI devices \\ $\mathit{RO}_\mathit{run}$ & {\tt host\_CPUs} & (host\_cpu ref) Set & The physical CPUs on this host \\ $\mathit{RO}_\mathit{run}$ & {\tt metrics} & host\_metrics ref & metrics associated with this host \\ \hline @@ -6837,6 +6875,38 @@ Get the PPCIs field of the given host. \noindent {\bf Return Type:} {\tt (PPCI ref) Set +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_PSCSIs} + +{\bf Overview:} +Get the PSCSIs field of the given host. + + \noindent {\bf Signature:} +\begin{verbatim} ((PSCSI ref) Set) get_PSCSIs (session_id s, host ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt host ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +(PSCSI ref) Set } @@ -15723,6 +15793,1096 @@ all fields from the object \vspace{1cm} \newpage +\section{Class: DSCSI} +\subsection{Fields for class: DSCSI} +\begin{longtable}{|lllp{0.38\textwidth}|} +\hline +\multicolumn{1}{|l}{Name} & \multicolumn{3}{l|}{\bf DSCSI} \\ +\multicolumn{1}{|l}{Description} & \multicolumn{3}{l|}{\parbox{11cm}{\em A +half-virtualized SCSI device.}} \\ +\hline +Quals & Field & Type & Description \\ +\hline +$\mathit{RO}_\mathit{run}$ & {\tt uuid} & string & unique identifier/object reference \\ +$\mathit{RO}_\mathit{inst}$ & {\tt VM} & VM ref & the virtual machine \\ +$\mathit{RO}_\mathit{inst}$ & {\tt PSCSI} & PSCSI ref & the physical SCSI device \\ +$\mathit{RO}_\mathit{run}$ & {\tt virtual\_host} & int & the virtual host number \\ +$\mathit{RO}_\mathit{run}$ & {\tt virtual\_channel} & int & the virtual channel number \\ +$\mathit{RO}_\mathit{run}$ & {\tt virtual\_target} & int & the virtual target number \\ +$\mathit{RO}_\mathit{run}$ & {\tt virtual\_lun} & int & the virtual logical unit number \\ +$\mathit{RO}_\mathit{inst}$ & {\tt virtual\_HCTL} & string & the virtual HCTL \\ +$\mathit{RO}_\mathit{run}$ & {\tt runtime\_properties} & (string $\rightarrow$ string) Map & Device runtime properties \\ +\hline +\end{longtable} +\subsection{RPCs associated with class: DSCSI} +\subsubsection{RPC name:~get\_all} + +{\bf Overview:} +Return a list of all the DSCSIs known to the system. + + \noindent {\bf Signature:} +\begin{verbatim} ((DSCSI ref) Set) get_all (session_id s)\end{verbatim} + + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +(DSCSI ref) Set +} + + +references to all objects +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_uuid} + +{\bf Overview:} +Get the uuid field of the given DSCSI. + + \noindent {\bf Signature:} +\begin{verbatim} string get_uuid (session_id s, DSCSI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt DSCSI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +string +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_VM} + +{\bf Overview:} +Get the VM field of the given DSCSI. + + \noindent {\bf Signature:} +\begin{verbatim} (VM ref) get_VM (session_id s, DSCSI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt DSCSI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +VM ref +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_PSCSI} + +{\bf Overview:} +Get the PSCSI field of the given DSCSI. + + \noindent {\bf Signature:} +\begin{verbatim} (PSCSI ref) get_PSCSI (session_id s, DSCSI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt DSCSI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +PSCSI ref +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_virtual\_host} + +{\bf Overview:} +Get the virtual\_host field of the given DSCSI. + + \noindent {\bf Signature:} +\begin{verbatim} int get_virtual_host (session_id s, DSCSI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt DSCSI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +int +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_virtual\_channel} + +{\bf Overview:} +Get the virtual\_channel field of the given DSCSI. + + \noindent {\bf Signature:} +\begin{verbatim} int get_virtual_channel (session_id s, DSCSI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt DSCSI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +int +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_virtual\_target} + +{\bf Overview:} +Get the virtual\_target field of the given DSCSI. + + \noindent {\bf Signature:} +\begin{verbatim} int get_virtual_target (session_id s, DSCSI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt DSCSI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +int +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_virtual\_lun} + +{\bf Overview:} +Get the virtual\_lun field of the given DSCSI. + + \noindent {\bf Signature:} +\begin{verbatim} int get_virtual_lun (session_id s, DSCSI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt DSCSI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +int +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_virtual\_HCTL} + +{\bf Overview:} +Get the virtual\_HCTL field of the given DSCSI. + + \noindent {\bf Signature:} +\begin{verbatim} string get_virtual_HCTL (session_id s, DSCSI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt DSCSI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +string +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_runtime\_properties} + +{\bf Overview:} +Get the runtime\_properties field of the given DSCSI. + + \noindent {\bf Signature:} +\begin{verbatim} ((string -> string) Map) get_runtime_properties (session_id s, DSCSI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt DSCSI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +(string $\rightarrow$ string) Map +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~create} + +{\bf Overview:} +Create a new DSCSI instance, and return its handle. + + \noindent {\bf Signature:} +\begin{verbatim} (DSCSI ref) create (session_id s, DSCSI record args)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt DSCSI record } & args & All constructor arguments \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +DSCSI ref +} + + +reference to the newly created object +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~destroy} + +{\bf Overview:} +Destroy the specified DSCSI instance. + + \noindent {\bf Signature:} +\begin{verbatim} void destroy (session_id s, DSCSI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt DSCSI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +void +} + + +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_by\_uuid} + +{\bf Overview:} +Get a reference to the DSCSI instance with the specified UUID. + + \noindent {\bf Signature:} +\begin{verbatim} (DSCSI ref) get_by_uuid (session_id s, string uuid)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt string } & uuid & UUID of object to return \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +DSCSI ref +} + + +reference to the object +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_record} + +{\bf Overview:} +Get a record containing the current state of the given DSCSI. + + \noindent {\bf Signature:} +\begin{verbatim} (DSCSI record) get_record (session_id s, DSCSI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt DSCSI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +DSCSI record +} + + +all fields from the object +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} + +\vspace{1cm} +\newpage +\section{Class: PSCSI} +\subsection{Fields for class: PSCSI} +\begin{longtable}{|lllp{0.38\textwidth}|} +\hline +\multicolumn{1}{|l}{Name} & \multicolumn{3}{l|}{\bf PSCSI} \\ +\multicolumn{1}{|l}{Description} & \multicolumn{3}{l|}{\parbox{11cm}{\em A +physical SCSI device.}} \\ +\hline +Quals & Field & Type & Description \\ +\hline +$\mathit{RO}_\mathit{run}$ & {\tt uuid} & string & unique identifier/object reference \\ +$\mathit{RO}_\mathit{run}$ & {\tt host} & host ref & the physical machine to which this PSCSI is connected \\ +$\mathit{RO}_\mathit{run}$ & {\tt physical\_host} & int & the physical host number \\ +$\mathit{RO}_\mathit{run}$ & {\tt physical\_channel} & int & the physical channel number \\ +$\mathit{RO}_\mathit{run}$ & {\tt physical\_target} & int & the physical target number \\ +$\mathit{RO}_\mathit{run}$ & {\tt physical\_lun} & int & the physical logical unit number \\ +$\mathit{RO}_\mathit{run}$ & {\tt physical\_HCTL} & string & the physical HCTL \\ +$\mathit{RO}_\mathit{run}$ & {\tt vendor\_name} & string & the vendor name \\ +$\mathit{RO}_\mathit{run}$ & {\tt model} & string & the model \\ +$\mathit{RO}_\mathit{run}$ & {\tt type\_id} & int & the SCSI type ID \\ +$\mathit{RO}_\mathit{run}$ & {\tt type} & string & the SCSI type \\ +$\mathit{RO}_\mathit{run}$ & {\tt dev\_name} & string & the SCSI device name (e.g. sda or st0) \\ +$\mathit{RO}_\mathit{run}$ & {\tt sg\_name} & string & the SCSI generic device name (e.g. sg0) \\ +$\mathit{RO}_\mathit{run}$ & {\tt revision} & string & the revision \\ +$\mathit{RO}_\mathit{run}$ & {\tt scsi\_id} & string & the SCSI ID \\ +$\mathit{RO}_\mathit{run}$ & {\tt scsi\_level} & int & the SCSI level \\ +\hline +\end{longtable} +\subsection{RPCs associated with class: PSCSI} +\subsubsection{RPC name:~get\_all} + +{\bf Overview:} +Return a list of all the PSCSIs known to the system. + + \noindent {\bf Signature:} +\begin{verbatim} ((PSCSI ref) Set) get_all (session_id s)\end{verbatim} + + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +(PSCSI ref) Set +} + + +references to all objects +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_uuid} + +{\bf Overview:} +Get the uuid field of the given PSCSI. + + \noindent {\bf Signature:} +\begin{verbatim} string get_uuid (session_id s, PSCSI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PSCSI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +string +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_host} + +{\bf Overview:} +Get the host field of the given PSCSI. + + \noindent {\bf Signature:} +\begin{verbatim} (host ref) get_host (session_id s, PSCSI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PSCSI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +host ref +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_physical\_host} + +{\bf Overview:} +Get the physical\_host field of the given PSCSI. + + \noindent {\bf Signature:} +\begin{verbatim} int get_physical_host (session_id s, PSCSI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PSCSI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +int +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_physical\_channel} + +{\bf Overview:} +Get the physical\_channel field of the given PSCSI. + + \noindent {\bf Signature:} +\begin{verbatim} int get_physical_channel (session_id s, PSCSI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PSCSI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +int +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_physical\_target} + +{\bf Overview:} +Get the physical\_target field of the given PSCSI. + + \noindent {\bf Signature:} +\begin{verbatim} int get_physical_target (session_id s, PSCSI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PSCSI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +int +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_physical\_lun} + +{\bf Overview:} +Get the physical\_lun field of the given PSCSI. + + \noindent {\bf Signature:} +\begin{verbatim} int get_physical_lun (session_id s, PSCSI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PSCSI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +int +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_physical\_HCTL} + +{\bf Overview:} +Get the physical\_HCTL field of the given PSCSI. + + \noindent {\bf Signature:} +\begin{verbatim} string get_physical_HCTL (session_id s, PSCSI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PSCSI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +string +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_vendor\_name} + +{\bf Overview:} +Get the vendor\_name field of the given PSCSI. + + \noindent {\bf Signature:} +\begin{verbatim} string get_vendor_name (session_id s, PSCSI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PSCSI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +string +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_model} + +{\bf Overview:} +Get the model field of the given PSCSI. + + \noindent {\bf Signature:} +\begin{verbatim} string get_model (session_id s, PSCSI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PSCSI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +string +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_type\_id} + +{\bf Overview:} +Get the type\_id field of the given PSCSI. + + \noindent {\bf Signature:} +\begin{verbatim} int get_type_id (session_id s, PSCSI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PSCSI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +int +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_type} + +{\bf Overview:} +Get the type field of the given PSCSI. + + \noindent {\bf Signature:} +\begin{verbatim} string get_type (session_id s, PSCSI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PSCSI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +string +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_dev\_name} + +{\bf Overview:} +Get the dev\_name field of the given PSCSI. + + \noindent {\bf Signature:} +\begin{verbatim} string get_dev_name (session_id s, PSCSI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PSCSI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +string +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_sg\_name} + +{\bf Overview:} +Get the sg\_name field of the given PSCSI. + + \noindent {\bf Signature:} +\begin{verbatim} string get_sg_name (session_id s, PSCSI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PSCSI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +string +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_revision} + +{\bf Overview:} +Get the revision field of the given PSCSI. + + \noindent {\bf Signature:} +\begin{verbatim} string get_revision (session_id s, PSCSI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PSCSI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +string +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_scsi\_id} + +{\bf Overview:} +Get the scsi\_id field of the given PSCSI. + + \noindent {\bf Signature:} +\begin{verbatim} string get_scsi_id (session_id s, PSCSI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PSCSI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +string +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_scsi\_level} + +{\bf Overview:} +Get the scsi\_level field of the given PSCSI. + + \noindent {\bf Signature:} +\begin{verbatim} int get_scsi_level (session_id s, PSCSI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PSCSI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +int +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_by\_uuid} + +{\bf Overview:} +Get a reference to the PSCSI instance with the specified UUID. + + \noindent {\bf Signature:} +\begin{verbatim} (PSCSI ref) get_by_uuid (session_id s, string uuid)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt string } & uuid & UUID of object to return \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +PSCSI ref +} + + +reference to the object +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_record} + +{\bf Overview:} +Get a record containing the current state of the given PSCSI. + + \noindent {\bf Signature:} +\begin{verbatim} (PSCSI record) get_record (session_id s, PSCSI ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt PSCSI ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +PSCSI record +} + + +all fields from the object +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} + +\vspace{1cm} +\newpage \section{Class: user} \subsection{Fields for class: user} \begin{longtable}{|lllp{0.38\textwidth}|} diff -r 6583186e5989 -r 46d7e12c4c91 tools/blktap/drivers/block-qcow.c --- a/tools/blktap/drivers/block-qcow.c Wed Oct 22 11:38:22 2008 +0900 +++ b/tools/blktap/drivers/block-qcow.c Wed Oct 22 11:46:55 2008 +0900 @@ -734,8 +734,8 @@ static int tdqcow_open (struct disk_driv DPRINTF("QCOW: Opening %s\n",name); - o_flags = O_DIRECT | O_LARGEFILE | - ((flags == TD_RDONLY) ? O_RDONLY : O_RDWR); + /* Since we don't handle O_DIRECT correctly, don't use it */ + o_flags = O_LARGEFILE | ((flags == TD_RDONLY) ? O_RDONLY : O_RDWR); fd = open(name, o_flags); if (fd < 0) { DPRINTF("Unable to open %s (%d)\n",name,0 - errno); @@ -1385,7 +1385,7 @@ static int tdqcow_get_parent_id(struct d filename[len] = '\0'; id->name = strdup(filename); - id->drivertype = DISK_TYPE_QCOW; + id->drivertype = DISK_TYPE_AIO; err = 0; out: free(buf); @@ -1397,17 +1397,15 @@ static int tdqcow_validate_parent(struct { struct stat stats; uint64_t psize, csize; - struct tdqcow_state *c = (struct tdqcow_state *)child->private; - struct tdqcow_state *p = (struct tdqcow_state *)parent->private; - - if (stat(p->name, &stats)) + + if (stat(parent->name, &stats)) return -EINVAL; - if (get_filesize(p->name, &psize, &stats)) + if (get_filesize(parent->name, &psize, &stats)) return -EINVAL; - if (stat(c->name, &stats)) + if (stat(child->name, &stats)) return -EINVAL; - if (get_filesize(c->name, &csize, &stats)) + if (get_filesize(child->name, &csize, &stats)) return -EINVAL; if (csize != psize) diff -r 6583186e5989 -r 46d7e12c4c91 tools/blktap/drivers/block-qcow2.c --- a/tools/blktap/drivers/block-qcow2.c Wed Oct 22 11:38:22 2008 +0900 +++ b/tools/blktap/drivers/block-qcow2.c Wed Oct 22 11:46:55 2008 +0900 @@ -34,6 +34,7 @@ #include "tapdisk.h" #include "tapaio.h" #include "bswap.h" +#include "blk.h" #define USE_AIO @@ -1902,6 +1903,42 @@ repeat: #endif +static int get_filesize(char *filename, uint64_t *size, struct stat *st) +{ + int fd; + QCowHeader header; + + /*Set to the backing file size*/ + fd = open(filename, O_RDONLY); + if (fd < 0) + return -1; + if (read(fd, &header, sizeof(header)) < sizeof(header)) { + close(fd); + return -1; + } + close(fd); + + be32_to_cpus(&header.magic); + be32_to_cpus(&header.version); + be64_to_cpus(&header.size); + if (header.magic == QCOW_MAGIC && header.version == QCOW_VERSION) { + *size = header.size >> SECTOR_SHIFT; + return 0; + } + + if(S_ISBLK(st->st_mode)) { + fd = open(filename, O_RDONLY); + if (fd < 0) + return -1; + if (blk_getimagesize(fd, size) != 0) { + close(fd); + return -1; + } + close(fd); + } else *size = (st->st_size >> SECTOR_SHIFT); + return 0; +} + /** * @return * 0 if parent id successfully retrieved; @@ -1916,7 +1953,7 @@ static int qcow_get_parent_id(struct dis return TD_NO_PARENT; id->name = strdup(s->backing_file); - id->drivertype = DISK_TYPE_QCOW2; + id->drivertype = DISK_TYPE_AIO; return 0; } @@ -1924,15 +1961,22 @@ static int qcow_validate_parent(struct d static int qcow_validate_parent(struct disk_driver *child, struct disk_driver *parent, td_flag_t flags) { - struct BDRVQcowState *cs = (struct BDRVQcowState*) child->private; - struct BDRVQcowState *ps = (struct BDRVQcowState*) parent->private; - - if (ps->total_sectors != cs->total_sectors) { - DPRINTF("qcow_validate_parent(): %#"PRIx64" != %#"PRIx64"\n", - ps->total_sectors, cs->total_sectors); + struct stat stats; + uint64_t psize, csize; + + if (stat(parent->name, &stats)) return -EINVAL; - } - + if (get_filesize(parent->name, &psize, &stats)) + return -EINVAL; + + if (stat(child->name, &stats)) + return -EINVAL; + if (get_filesize(child->name, &csize, &stats)) + return -EINVAL; + + if (csize != psize) + return -EINVAL; + return 0; } diff -r 6583186e5989 -r 46d7e12c4c91 tools/libxc/xc_domain.c --- a/tools/libxc/xc_domain.c Wed Oct 22 11:38:22 2008 +0900 +++ b/tools/libxc/xc_domain.c Wed Oct 22 11:46:55 2008 +0900 @@ -1049,6 +1049,18 @@ int xc_domain_get_machine_address_size(i return rc == 0 ? domctl.u.address_size.size : rc; } +int xc_domain_suppress_spurious_page_faults(int xc, uint32_t domid) +{ + DECLARE_DOMCTL; + + memset(&domctl, 0, sizeof(domctl)); + domctl.domain = domid; + domctl.cmd = XEN_DOMCTL_suppress_spurious_page_faults; + + return do_domctl(xc, &domctl); + +} + /* * Local variables: * mode: C diff -r 6583186e5989 -r 46d7e12c4c91 tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Wed Oct 22 11:38:22 2008 +0900 +++ b/tools/libxc/xenctrl.h Wed Oct 22 11:46:55 2008 +0900 @@ -1103,6 +1103,9 @@ int xc_domain_get_machine_address_size(i int xc_domain_get_machine_address_size(int handle, uint32_t domid); +int xc_domain_suppress_spurious_page_faults(int handle, + uint32_t domid); + /* Set the target domain */ int xc_domain_set_target(int xc_handle, uint32_t domid, diff -r 6583186e5989 -r 46d7e12c4c91 tools/python/xen/lowlevel/xc/xc.c --- a/tools/python/xen/lowlevel/xc/xc.c Wed Oct 22 11:38:22 2008 +0900 +++ b/tools/python/xen/lowlevel/xc/xc.c Wed Oct 22 11:46:55 2008 +0900 @@ -859,6 +859,21 @@ static PyObject *pyxc_dom_set_machine_ad return zero; } +static PyObject *pyxc_dom_suppress_spurious_page_faults(XcObject *self, + PyObject *args, + PyObject *kwds) +{ + uint32_t dom; + + if (!PyArg_ParseTuple(args, "i", &dom)) + return NULL; + + if (xc_domain_suppress_spurious_page_faults(self->xc_handle, dom) != 0) + return pyxc_error_to_exception(); + + Py_INCREF(zero); + return zero; +} #endif /* __i386__ || __x86_64__ */ static PyObject *pyxc_hvm_build(XcObject *self, @@ -1911,6 +1926,12 @@ static PyMethodDef pyxc_methods[] = { "Set maximum machine address size for this domain.\n" " dom [int]: Identifier of domain.\n" " width [int]: Maximum machine address width.\n" }, + + { "domain_suppress_spurious_page_faults", + (PyCFunction)pyxc_dom_suppress_spurious_page_faults, + METH_VARARGS, "\n" + "Do not propagate spurious page faults to this guest.\n" + " dom [int]: Identifier of domain.\n" }, #endif { NULL, NULL, 0, NULL } diff -r 6583186e5989 -r 46d7e12c4c91 tools/python/xen/util/pci.py --- a/tools/python/xen/util/pci.py Wed Oct 22 11:38:22 2008 +0900 +++ b/tools/python/xen/util/pci.py Wed Oct 22 11:46:55 2008 +0900 @@ -12,8 +12,8 @@ import types import types import struct import time - -PROC_MNT_PATH = '/proc/mounts' +from xen.util import utils + PROC_PCI_PATH = '/proc/bus/pci/devices' PROC_PCI_NUM_RESOURCES = 7 @@ -97,9 +97,6 @@ MSIX_SIZE_MASK = 0x7ff # Global variable to store information from lspci lspci_info = None -# Global variable to store the sysfs mount point -sysfs_mnt_point = None - #Calculate PAGE_SHIFT: number of bits to shift an address to get the page number PAGE_SIZE = resource.getpagesize() PAGE_SHIFT = 0 @@ -141,20 +138,8 @@ def parse_pci_name(pci_name_string): def find_sysfs_mnt(): - global sysfs_mnt_point - if not sysfs_mnt_point is None: - return sysfs_mnt_point - try: - mounts_file = open(PROC_MNT_PATH,'r') - - for line in mounts_file: - sline = line.split() - if len(sline)<3: - continue - if sline[2]=='sysfs': - sysfs_mnt_point= sline[1] - return sysfs_mnt_point + return utils.find_sysfs_mount() except IOError, (errno, strerr): raise PciDeviceParseError(('Failed to locate sysfs mount: %s: %s (%d)'% (PROC_PCI_PATH, strerr, errno))) diff -r 6583186e5989 -r 46d7e12c4c91 tools/python/xen/util/utils.py --- a/tools/python/xen/util/utils.py Wed Oct 22 11:38:22 2008 +0900 +++ b/tools/python/xen/util/utils.py Wed Oct 22 11:46:55 2008 +0900 @@ -48,3 +48,29 @@ def daemonize(prog, args, stdin_tmpfile= os.waitpid(pid, 0) return daemon_pid +# Global variable to store the sysfs mount point +sysfs_mount_point = None + +PROC_MOUNTS_PATH = '/proc/mounts' + +def find_sysfs_mount(): + global sysfs_mount_point + + if not sysfs_mount_point is None: + return sysfs_mount_point + + try: + mounts_file = open(PROC_MOUNTS_PATH, 'r') + + for line in mounts_file: + sline = line.split() + if len(sline) < 3: + continue + if sline[2] == 'sysfs': + sysfs_mount_point= sline[1] + return sysfs_mount_point + except IOError, (errno, strerr): + raise + + return None + diff -r 6583186e5989 -r 46d7e12c4c91 tools/python/xen/util/vscsi_util.py --- a/tools/python/xen/util/vscsi_util.py Wed Oct 22 11:38:22 2008 +0900 +++ b/tools/python/xen/util/vscsi_util.py Wed Oct 22 11:46:55 2008 +0900 @@ -23,32 +23,40 @@ """Support for VSCSI Devices. """ import os +import os.path import sys import re import string - -def _vscsi_hctl_block(name, scsi_devices): - """ block-device name is convert into hctl. (e.g., '/dev/sda', - '0:0:0:0')""" +from xen.util import utils + +SYSFS_SCSI_PATH = "/bus/scsi/devices" +SYSFS_SCSI_DEV_VENDOR_PATH = '/vendor' +SYSFS_SCSI_DEV_MODEL_PATH = '/model' +SYSFS_SCSI_DEV_TYPEID_PATH = '/type' +SYSFS_SCSI_DEV_REVISION_PATH = '/rev' +SYSFS_SCSI_DEV_SCSILEVEL_PATH = '/scsi_level' + +def _vscsi_get_devname_by(name, scsi_devices): + """A device name is gotten by the HCTL. + (e.g., '0:0:0:0' to '/dev/sda') + """ + try: search = re.compile(r'' + name + '$', re.DOTALL) except Exception, e: raise VmError("vscsi: invalid expression. " + str(e)) - chk = 0 - for hctl, block, sg, scsi_id in scsi_devices: + + for hctl, devname, sg, scsi_id in scsi_devices: if search.match(hctl): - chk = 1 - break - - if chk: - return (hctl, block) - else: - return (None, None) - - -def _vscsi_block_scsiid_to_hctl(phyname, scsi_devices): - """ block-device name is convert into hctl. (e.g., '/dev/sda', - '0:0:0:0')""" + return (hctl, devname) + + return (None, None) + + +def _vscsi_get_hctl_by(phyname, scsi_devices): + """An HCTL is gotten by the device name or the scsi_id. + (e.g., '/dev/sda' to '0:0:0:0') + """ if re.match('/dev/sd[a-z]+([1-9]|1[0-5])?$', phyname): # sd driver @@ -63,71 +71,148 @@ def _vscsi_block_scsiid_to_hctl(phyname, # scsi_id -gu name = phyname - chk = 0 - for hctl, block, sg, scsi_id in scsi_devices: - if block == name: - chk = 1 - break - elif sg == name: - chk = 1 - break - elif scsi_id == name: - chk = 1 - break - - if chk: - return (hctl, block) - else: - return (None, None) + for hctl, devname, sg, scsi_id in scsi_devices: + if name in [devname, sg, scsi_id]: + return (hctl, devname) + + return (None, None) def vscsi_get_scsidevices(): """ get all scsi devices""" - SERCH_SCSI_PATH = "/sys/bus/scsi/devices" devices = [] - - for dirpath, dirnames, files in os.walk(SERCH_SCSI_PATH): + sysfs_mnt = utils.find_sysfs_mount() + + for dirpath, dirnames, files in os.walk(sysfs_mnt + SYSFS_SCSI_PATH): for hctl in dirnames: paths = os.path.join(dirpath, hctl) - block = "-" + devname = None + sg = None + scsi_id = None for f in os.listdir(paths): - if re.match('^block', f): - os.chdir(os.path.join(paths, f)) - block = os.path.basename(os.getcwd()) - elif re.match('^tape', f): - os.chdir(os.path.join(paths, f)) - block = os.path.basename(os.getcwd()) - elif re.match('^scsi_changer', f): - os.chdir(os.path.join(paths, f)) - block = os.path.basename(os.getcwd()) - elif re.match('^onstream_tape', f): - os.chdir(os.path.join(paths, f)) - block = os.path.basename(os.getcwd()) + realpath = os.path.realpath(os.path.join(paths, f)) + if re.match('^block', f) or \ + re.match('^tape', f) or \ + re.match('^scsi_changer', f) or \ + re.match('^onstream_tape', f): + devname = os.path.basename(realpath) if re.match('^scsi_generic', f): - os.chdir(os.path.join(paths, f)) - sg = os.path.basename(os.getcwd()) + sg = os.path.basename(realpath) lines = os.popen('/sbin/scsi_id -gu -s /class/scsi_generic/' + sg).read().split() - if len(lines) == 0: - scsi_id = '-' - else: + if len(lines): scsi_id = lines[0] - devices.append([hctl, block, sg, scsi_id]) + devices.append([hctl, devname, sg, scsi_id]) return devices -def vscsi_search_hctl_and_block(device): - - scsi_devices = vscsi_get_scsidevices() - - tmp = device.split(':') - if len(tmp) == 4: - (hctl, block) = _vscsi_hctl_block(device, scsi_devices) +def vscsi_get_hctl_and_devname_by(target, scsi_devices = None): + if scsi_devices is None: + scsi_devices = vscsi_get_scsidevices() + + if len(target.split(':')) == 4: + return _vscsi_get_devname_by(target, scsi_devices) else: - (hctl, block) = _vscsi_block_scsiid_to_hctl(device, scsi_devices) - - return (hctl, block) - + return _vscsi_get_hctl_by(target, scsi_devices) + + +def get_scsi_vendor(pHCTL): + try: + sysfs_mnt = utils.find_sysfs_mount() + sysfs_scsi_dev_path = \ + os.path.join(sysfs_mnt + SYSFS_SCSI_PATH, pHCTL) + scsi_vendor = \ + os.popen('cat ' + sysfs_scsi_dev_path + \ + SYSFS_SCSI_DEV_VENDOR_PATH).read() + return scsi_vendor.splitlines()[0] + except: + return None + +def get_scsi_model(pHCTL): + try: + sysfs_mnt = utils.find_sysfs_mount() + sysfs_scsi_dev_path = \ + os.path.join(sysfs_mnt + SYSFS_SCSI_PATH, pHCTL) + scsi_model = \ + os.popen('cat ' + sysfs_scsi_dev_path + \ + SYSFS_SCSI_DEV_MODEL_PATH).read() + return scsi_model.splitlines()[0] + except: + return None + +def get_scsi_typeid(pHCTL): + try: + sysfs_mnt = utils.find_sysfs_mount() + sysfs_scsi_dev_path = \ + os.path.join(sysfs_mnt + SYSFS_SCSI_PATH, pHCTL) + scsi_typeid = \ + os.popen('cat ' + sysfs_scsi_dev_path + \ + SYSFS_SCSI_DEV_TYPEID_PATH).read() + return int(scsi_typeid.splitlines()[0]) + except: + return None + +def get_scsi_revision(pHCTL): + try: + sysfs_mnt = utils.find_sysfs_mount() + sysfs_scsi_dev_path = \ + os.path.join(sysfs_mnt + SYSFS_SCSI_PATH, pHCTL) + scsi_revision = \ + os.popen('cat ' + sysfs_scsi_dev_path + \ + SYSFS_SCSI_DEV_REVISION_PATH).read() + return scsi_revision.splitlines()[0] + except: + return None + +def get_scsi_scsilevel(pHCTL): + try: + sysfs_mnt = utils.find_sysfs_mount() + sysfs_scsi_dev_path = \ + os.path.join(sysfs_mnt + SYSFS_SCSI_PATH, pHCTL) + scsi_scsilevel = \ + os.popen('cat ' + sysfs_scsi_dev_path + \ + SYSFS_SCSI_DEV_SCSILEVEL_PATH).read() + return int(scsi_scsilevel.splitlines()[0]) + except: + return None + +def get_all_scsi_devices(): + + scsi_devs = [] + + for scsi_info in vscsi_get_scsidevices(): + scsi_dev = { + 'physical_HCTL': scsi_info[0], + 'dev_name': None, + 'sg_name': scsi_info[2], + 'scsi_id': None + } + if scsi_info[1] is not None: + scsi_dev['dev_name'] = scsi_info[1] + if scsi_info[3] is not None: + scsi_dev['scsi_id'] = scsi_info[3] + + scsi_dev['vendor_name'] = \ + get_scsi_vendor(scsi_dev['physical_HCTL']) + scsi_dev['model'] = \ + get_scsi_model(scsi_dev['physical_HCTL']) + scsi_dev['type_id'] = \ + get_scsi_typeid(scsi_dev['physical_HCTL']) + scsi_dev['revision'] = \ + get_scsi_revision(scsi_dev['physical_HCTL']) + scsi_dev['scsi_level'] = \ + get_scsi_scsilevel(scsi_dev['physical_HCTL']) + + try: + lsscsi_info = os.popen('lsscsi ' + scsi_dev['physical_HCTL']).read().split() + scsi_dev['type'] = lsscsi_info[1] + except: + scsi_dev['type'] = None + + scsi_devs.append(scsi_dev) + + return scsi_devs + diff -r 6583186e5989 -r 46d7e12c4c91 tools/python/xen/xend/XendAPI.py --- a/tools/python/xen/xend/XendAPI.py Wed Oct 22 11:38:22 2008 +0900 +++ b/tools/python/xen/xend/XendAPI.py Wed Oct 22 11:46:55 2008 +0900 @@ -42,6 +42,8 @@ from XendPBD import XendPBD from XendPBD import XendPBD from XendPPCI import XendPPCI from XendDPCI import XendDPCI +from XendPSCSI import XendPSCSI +from XendDSCSI import XendDSCSI from XendXSPolicy import XendXSPolicy, XendACMPolicy from XendAPIConstants import * @@ -480,7 +482,9 @@ classes = { 'PBD' : valid_object("PBD"), 'PIF_metrics' : valid_object("PIF_metrics"), 'PPCI' : valid_object("PPCI"), - 'DPCI' : valid_object("DPCI") + 'DPCI' : valid_object("DPCI"), + 'PSCSI' : valid_object("PSCSI"), + 'DSCSI' : valid_object("DSCSI") } autoplug_classes = { @@ -491,6 +495,8 @@ autoplug_classes = { 'PIF_metrics' : XendPIFMetrics, 'PPCI' : XendPPCI, 'DPCI' : XendDPCI, + 'PSCSI' : XendPSCSI, + 'DSCSI' : XendDSCSI, 'XSPolicy' : XendXSPolicy, 'ACMPolicy' : XendACMPolicy, } @@ -881,6 +887,7 @@ class XendAPI(object): 'PBDs', 'PIFs', 'PPCIs', + 'PSCSIs', 'host_CPUs', 'cpu_configuration', 'metrics', @@ -961,6 +968,8 @@ class XendAPI(object): return xen_api_success(XendNode.instance().get_PIF_refs()) def host_get_PPCIs(self, session, ref): return xen_api_success(XendNode.instance().get_PPCI_refs()) + def host_get_PSCSIs(self, session, ref): + return xen_api_success(XendNode.instance().get_PSCSI_refs()) def host_get_host_CPUs(self, session, host_ref): return xen_api_success(XendNode.instance().get_host_cpu_refs()) def host_get_metrics(self, _, ref): @@ -1037,7 +1046,8 @@ class XendAPI(object): 'logging': {}, 'PIFs': XendPIF.get_all(), 'PBDs': XendPBD.get_all(), - 'PPCIs': XendPPCI.get_all()} + 'PPCIs': XendPPCI.get_all(), + 'PSCSIs': XendPSCSI.get_all()} return xen_api_success(record) # class methods @@ -1158,6 +1168,7 @@ class XendAPI(object): 'VBDs', 'VTPMs', 'DPCIs', + 'DSCSIs', 'tools_version', 'domid', 'is_control_domain', @@ -1304,6 +1315,10 @@ class XendAPI(object): dom = XendDomain.instance().get_vm_by_uuid(vm_ref) return xen_api_success(dom.get_dpcis()) + def VM_get_DSCSIs(self, session, vm_ref): + dom = XendDomain.instance().get_vm_by_uuid(vm_ref) + return xen_api_success(dom.get_dscsis()) + def VM_get_tools_version(self, session, vm_ref): dom = XendDomain.instance().get_vm_by_uuid(vm_ref) return dom.get_tools_version() @@ -1684,6 +1699,7 @@ class XendAPI(object): 'VBDs': xeninfo.get_vbds(), 'VTPMs': xeninfo.get_vtpms(), 'DPCIs': xeninfo.get_dpcis(), + 'DSCSIs': xeninfo.get_dscsis(), 'PV_bootloader': xeninfo.info.get('PV_bootloader'), 'PV_kernel': xeninfo.info.get('PV_kernel'), 'PV_ramdisk': xeninfo.info.get('PV_ramdisk'), diff -r 6583186e5989 -r 46d7e12c4c91 tools/python/xen/xend/XendConfig.py --- a/tools/python/xen/xend/XendConfig.py Wed Oct 22 11:38:22 2008 +0900 +++ b/tools/python/xen/xend/XendConfig.py Wed Oct 22 11:46:55 2008 +0900 @@ -26,6 +26,8 @@ from xen.xend import XendAPIStore from xen.xend import XendAPIStore from xen.xend.XendPPCI import XendPPCI from xen.xend.XendDPCI import XendDPCI +from xen.xend.XendPSCSI import XendPSCSI +from xen.xend.XendDSCSI import XendDSCSI from xen.xend.XendError import VmError from xen.xend.XendDevices import XendDevices from xen.xend.PrettyPrint import prettyprintstring @@ -210,6 +212,7 @@ XENAPI_CFG_TYPES = { 'cpuid' : dict, 'cpuid_check' : dict, 'machine_address_size': int, + 'suppress_spurious_page_faults': bool0, } # List of legacy configuration keys that have no equivalent in the @@ -781,8 +784,8 @@ class XendConfig(dict): log.debug('_sxp_to_xapi(%s)' % scrub_password(sxp_cfg)) # _parse_sxp() below will call device_add() and construct devices. - # Some devices (currently only pci) may require VM's uuid, so - # setup self['uuid'] beforehand. + # Some devices may require VM's uuid, so setup self['uuid'] + # beforehand. self['uuid'] = sxp.child_value(sxp_cfg, 'uuid', uuid.createString()) cfg = self._parse_sxp(sxp_cfg) @@ -1221,29 +1224,28 @@ class XendConfig(dict): dev_type = sxp.name(config) dev_info = {} - if dev_type == 'pci' or dev_type == 'vscsi': + if dev_type == 'pci': pci_devs_uuid = sxp.child_value(config, 'uuid', uuid.createString()) pci_dict = self.pci_convert_sxp_to_dict(config) pci_devs = pci_dict['devs'] - if dev_type != 'vscsi': - # create XenAPI DPCI objects. - for pci_dev in pci_devs: - dpci_uuid = pci_dev.get('uuid') - ppci_uuid = XendPPCI.get_by_sbdf(pci_dev['domain'], - pci_dev['bus'], - pci_dev['slot'], - pci_dev['func']) - if ppci_uuid is None: - continue - dpci_record = { - 'VM': self['uuid'], - 'PPCI': ppci_uuid, - 'hotplug_slot': pci_dev.get('vslot', 0) - } - XendDPCI(dpci_uuid, dpci_record) + # create XenAPI DPCI objects. + for pci_dev in pci_devs: + dpci_uuid = pci_dev.get('uuid') + ppci_uuid = XendPPCI.get_by_sbdf(pci_dev['domain'], + pci_dev['bus'], + pci_dev['slot'], + pci_dev['func']) + if ppci_uuid is None: + continue + dpci_record = { + 'VM': self['uuid'], + 'PPCI': ppci_uuid, + 'hotplug_slot': pci_dev.get('vslot', 0) + } + XendDPCI(dpci_uuid, dpci_record) target['devices'][pci_devs_uuid] = (dev_type, {'devs': pci_devs, @@ -1252,6 +1254,30 @@ class XendConfig(dict): log.debug("XendConfig: reading device: %s" % pci_devs) return pci_devs_uuid + + if dev_type == 'vscsi': + vscsi_devs_uuid = sxp.child_value(config, 'uuid', + uuid.createString()) + vscsi_dict = self.vscsi_convert_sxp_to_dict(config) + vscsi_devs = vscsi_dict['devs'] + + # create XenAPI DSCSI objects. + for vscsi_dev in vscsi_devs: + dscsi_uuid = vscsi_dev.get('uuid') + pscsi_uuid = XendPSCSI.get_by_HCTL(vscsi_dev['p-dev']) + if pscsi_uuid is None: + continue + dscsi_record = { + 'VM': self['uuid'], + 'PSCSI': pscsi_uuid, + 'virtual_HCTL': vscsi_dev.get('v-dev') + } + XendDSCSI(dscsi_uuid, dscsi_record) + + target['devices'][vscsi_devs_uuid] = \ + (dev_type, {'devs': vscsi_devs, 'uuid': vscsi_devs_uuid} ) + log.debug("XendConfig: reading device: %s" % vscsi_devs) + return vscsi_devs_uuid for opt_val in config[1:]: try: @@ -1558,6 +1584,86 @@ class XendConfig(dict): return dev_config + def vscsi_convert_sxp_to_dict(self, dev_sxp): + """Convert vscsi device sxp to dict + @param dev_sxp: device configuration + @type dev_sxp: SXP object (parsed config) + @return: dev_config + @rtype: dictionary + """ + # Parsing the device SXP's. In most cases, the SXP looks + # like this: + # + # [device, [vif, [mac, xx:xx:xx:xx:xx:xx], [ip 1.3.4.5]]] + # + # However, for SCSI devices it looks like this: + # + # [device, + # [vscsi, + # [dev, + # [devid, 0], [p-devname, sdb], [p-dev, 1:0:0:1], + # [v-dev, 0:0:0:0], [state, Initialising] + # ], + # [dev, + # [devid, 0], [p-devname, sdc], [p-dev, 1:0:0:2], + # [v-dev, 0:0:0:1], [satet, Initialising] + # ] + # ], + # [vscsi, + # [dev, + # [devid, 1], [p-devname, sdg], [p-dev, 2:0:0:0], + # [v-dev, 1:0:0:0], [state, Initialising] + # ], + # [dev, + # [devid, 1], [p-devname, sdh], [p-dev, 2:0:0:1], + # [v-dev, 1:0:0:1], [satet, Initialising] + # ] + # ] + # ] + # + # It seems the reasoning for this difference is because + # vscsiif.py needs all the SCSI device configurations with + # same host number at the same time when creating the devices. + + # For SCSI device hotplug support, the SXP of SCSI devices is + # extendend like this: + # + # [device, + # [vscsi, + # [dev, + # [devid, 0], [p-devname, sdd], [p-dev, 1:0:0:3], + # [v-dev, 0:0:0:2], [state, Initialising] + # ] + # ] + # ] + # + # state 'Initialising' indicates that the device is being attached, + # while state 'Closing' indicates that the device is being detached. + # + # The Dict looks like this: + # + # { devs: [ {devid: 0, p-devname: sdd, p-dev: 1:0:0:3, + # v-dev: 0:0:0:2, state: Initialising} ] } + + dev_config = {} + + vscsi_devs = [] + for vscsi_dev in sxp.children(dev_sxp, 'dev'): + vscsi_dev_info = {} + for opt_val in vscsi_dev[1:]: + try: + opt, val = opt_val + vscsi_dev_info[opt] = val + except TypeError: + pass + # append uuid for each vscsi device. + vscsi_uuid = vscsi_dev_info.get('uuid', uuid.createString()) + vscsi_dev_info['uuid'] = vscsi_uuid + vscsi_devs.append(vscsi_dev_info) + dev_config['devs'] = vscsi_devs + + return dev_config + def console_add(self, protocol, location, other_config = {}): dev_uuid = uuid.createString() if protocol == 'vt100': @@ -1631,7 +1737,7 @@ class XendConfig(dict): dev_type, dev_info = self['devices'][dev_uuid] - if dev_type == 'pci' or dev_type == 'vscsi': # Special case for pci + if dev_type == 'pci': # Special case for pci pci_dict = self.pci_convert_sxp_to_dict(config) pci_devs = pci_dict['devs'] @@ -1639,26 +1745,50 @@ class XendConfig(dict): for dpci_uuid in XendDPCI.get_by_VM(self['uuid']): XendAPIStore.deregister(dpci_uuid, "DPCI") - if dev_type != 'vscsi': - # create XenAPI DPCI objects. - for pci_dev in pci_devs: - dpci_uuid = pci_dev.get('uuid') - ppci_uuid = XendPPCI.get_by_sbdf(pci_dev['domain'], - pci_dev['bus'], - pci_dev['slot'], - pci_dev['func']) - if ppci_uuid is None: - continue - dpci_record = { - 'VM': self['uuid'], - 'PPCI': ppci_uuid, - 'hotplug_slot': pci_dev.get('vslot', 0) - } - XendDPCI(dpci_uuid, dpci_record) + # create XenAPI DPCI objects. + for pci_dev in pci_devs: + dpci_uuid = pci_dev.get('uuid') + ppci_uuid = XendPPCI.get_by_sbdf(pci_dev['domain'], + pci_dev['bus'], + pci_dev['slot'], + pci_dev['func']) + if ppci_uuid is None: + continue + dpci_record = { + 'VM': self['uuid'], + 'PPCI': ppci_uuid, + 'hotplug_slot': pci_dev.get('vslot', 0) + } + XendDPCI(dpci_uuid, dpci_record) self['devices'][dev_uuid] = (dev_type, {'devs': pci_devs, 'uuid': dev_uuid}) + return True + + if dev_type == 'vscsi': # Special case for vscsi + vscsi_dict = self.vscsi_convert_sxp_to_dict(config) + vscsi_devs = vscsi_dict['devs'] + + # destroy existing XenAPI DSCSI objects + for dscsi_uuid in XendDSCSI.get_by_VM(self['uuid']): + XendAPIStore.deregister(dscsi_uuid, "DSCSI") + + # create XenAPI DSCSI objects. + for vscsi_dev in vscsi_devs: + dscsi_uuid = vscsi_dev.get('uuid') + pscsi_uuid = XendPSCSI.get_by_HCTL(vscsi_dev['p-dev']) + if pscsi_uuid is None: + continue + dscsi_record = { + 'VM': self['uuid'], + 'PSCSI': pscsi_uuid, + 'virtual_HCTL': vscsi_dev.get('v-dev') + } + XendDSCSI(dscsi_uuid, dscsi_record) + + self['devices'][dev_uuid] = \ + (dev_type, {'devs': vscsi_devs, 'uuid': dev_uuid} ) return True for opt_val in config[1:]: diff -r 6583186e5989 -r 46d7e12c4c91 tools/python/xen/xend/XendDSCSI.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/python/xen/xend/XendDSCSI.py Wed Oct 22 11:46:55 2008 +0900 @@ -0,0 +1,174 @@ +#============================================================================ +# This library is free software; you can redistribute it and/or +# modify it under the terms of version 2.1 of the GNU Lesser General Public +# License as published by the Free Software Foundation. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +#============================================================================ +# Copyright FUJITSU LIMITED 2008 +# Masaki Kanno <kanno.masaki@xxxxxxxxxxxxxx> +#============================================================================ + +from xen.xend.XendBase import XendBase +from xen.xend.XendPSCSI import XendPSCSI +from xen.xend import XendAPIStore +from xen.xend import sxp +from xen.xend import uuid as genuuid + +import XendDomain, XendNode + +from XendError import * +from XendTask import XendTask +from XendLogging import log + +class XendDSCSI(XendBase): + """Representation of a half-virtualized SCSI device.""" + + def getClass(self): + return "DSCSI" + + def getAttrRO(self): + attrRO = ['VM', + 'PSCSI', + 'virtual_host', + 'virtual_channel', + 'virtual_target', + 'virtual_lun', + 'virtual_HCTL', + 'runtime_properties'] + return XendBase.getAttrRO() + attrRO + + def getAttrRW(self): + attrRW = [] + return XendBase.getAttrRW() + attrRW + + def getAttrInst(self): + attrInst = ['VM', + 'PSCSI', + 'virtual_HCTL'] + return XendBase.getAttrInst() + attrInst + + def getMethods(self): + methods = ['destroy'] + return XendBase.getMethods() + methods + + def getFuncs(self): + funcs = ['create'] + return XendBase.getFuncs() + funcs + + getClass = classmethod(getClass) + getAttrRO = classmethod(getAttrRO) + getAttrRW = classmethod(getAttrRW) + getAttrInst = classmethod(getAttrInst) + getMethods = classmethod(getMethods) + getFuncs = classmethod(getFuncs) + + def create(self, dscsi_struct): + + # Check if VM is valid + xendom = XendDomain.instance() + if not xendom.is_valid_vm(dscsi_struct['VM']): + raise InvalidHandleError('VM', dscsi_struct['VM']) + dom = xendom.get_vm_by_uuid(dscsi_struct['VM']) + + # Check if PSCSI is valid + xennode = XendNode.instance() + pscsi_uuid = xennode.get_pscsi_by_uuid(dscsi_struct['PSCSI']) + if not pscsi_uuid: + raise InvalidHandleError('PSCSI', dscsi_struct['PSCSI']) + + # Assign PSCSI to VM + try: + dscsi_ref = XendTask.log_progress(0, 100, \ + dom.create_dscsi, \ + dscsi_struct) + except XendError, e: + log.exception("Error in create_dscsi") + raise + + return dscsi_ref + + create = classmethod(create) + + def get_by_VM(cls, VM_ref): + result = [] + for dscsi in XendAPIStore.get_all("DSCSI"): + if dscsi.get_VM() == VM_ref: + result.append(dscsi.get_uuid()) + return result + + get_by_VM = classmethod(get_by_VM) + + def __init__(self, uuid, record): + XendBase.__init__(self, uuid, record) + v_hctl = self.virtual_HCTL.split(':') + self.virtual_host = int(v_hctl[0]) + self.virtual_channel = int(v_hctl[1]) + self.virtual_target = int(v_hctl[2]) + self.virtual_lun = int(v_hctl[3]) + + def get_VM(self): + return self.VM + + def get_PSCSI(self): + return self.PSCSI + + def get_virtual_host(self): + return self.virtual_host + + def get_virtual_channel(self): + return self.virtual_channel + + def get_virtual_target(self): + return self.virtual_target + + def get_virtual_lun(self): + return self.virtual_lun + + def get_virtual_HCTL(self): + return self.virtual_HCTL + + def get_runtime_properties(self): + xendom = XendDomain.instance() + dominfo = xendom.get_vm_by_uuid(self.VM) + + try: + device_dict = {} + for device_sxp in dominfo.getDeviceSxprs('vscsi'): + target_dev = None + for dev in device_sxp[1][0][1]: + vdev = sxp.child_value(dev, 'v-dev') + if vdev == self.virtual_HCTL: + target_dev = dev + break + if target_dev is None: + continue + + dev_dict = {} + for info in target_dev[1:]: + dev_dict[info[0]] = info[1] + device_dict['dev'] = dev_dict + for info in device_sxp[1][1:]: + device_dict[info[0]] = info[1] + + return device_dict + except Exception, exn: + log.exception(exn) + return {} + + def destroy(self): + xendom = XendDomain.instance() + dom = xendom.get_vm_by_uuid(self.get_VM()) + if not dom: + raise InvalidHandleError("VM", self.get_VM()) + XendTask.log_progress(0, 100, \ + dom.destroy_dscsi, \ + self.get_uuid()) + diff -r 6583186e5989 -r 46d7e12c4c91 tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Wed Oct 22 11:38:22 2008 +0900 +++ b/tools/python/xen/xend/XendDomainInfo.py Wed Oct 22 11:46:55 2008 +0900 @@ -55,9 +55,11 @@ from xen.xend.XendAPIConstants import * from xen.xend.XendVMMetrics import XendVMMetrics +from xen.xend import XendAPIStore from xen.xend.XendPPCI import XendPPCI from xen.xend.XendDPCI import XendDPCI -from xen.xend import XendAPIStore +from xen.xend.XendPSCSI import XendPSCSI +from xen.xend.XendDSCSI import XendDSCSI MIGRATE_TIMEOUT = 30.0 BOOTLOADER_LOOPBACK_DEVICE = '/dev/xvdp' @@ -663,6 +665,9 @@ class XendDomainInfo: if dev_type == 'pci': for dev in dev_config_dict['devs']: XendAPIStore.deregister(dev['uuid'], 'DPCI') + if dev_type == 'vscsi': + for dev in dev_config_dict['devs']: + XendAPIStore.deregister(dev['uuid'], 'DSCSI') elif dev_type == 'tap': self.info['vbd_refs'].remove(dev_uuid) else: @@ -786,12 +791,11 @@ class XendDomainInfo: if dev_class != 'vscsi': return False - dev_config = self.info.pci_convert_sxp_to_dict(dev_sxp) + dev_config = self.info.vscsi_convert_sxp_to_dict(dev_sxp) dev = dev_config['devs'][0] - req_devid = sxp.child_value(dev_sxp, 'devid') - req_devid = int(req_devid) + req_devid = int(dev['devid']) existing_dev_info = self._getDeviceInfo_vscsi(req_devid, dev['v-dev']) - state = sxp.child_value(dev_sxp, 'state') + state = dev['state'] if state == 'Initialising': # new create @@ -1502,23 +1506,18 @@ class XendDomainInfo: return self.info['VCPUs_max'] def setVCpuCount(self, vcpus): - if vcpus <= 0: - raise XendError('Invalid VCPUs') + def vcpus_valid(n): + if vcpus <= 0: + raise XendError('Zero or less VCPUs is invalid') + if self.domid >= 0 and vcpus > self.info['VCPUs_max']: + raise XendError('Cannot set vcpus greater than max vcpus on running domain') + vcpus_valid(vcpus) self.info['vcpu_avail'] = (1 << vcpus) - 1 if self.domid >= 0: self.storeVm('vcpu_avail', self.info['vcpu_avail']) - # update dom differently depending on whether we are adjusting - # vcpu number up or down, otherwise _vcpuDomDetails does not - # disable the vcpus - if self.info['VCPUs_max'] > vcpus: - # decreasing - self._writeDom(self._vcpuDomDetails()) - self.info['VCPUs_live'] = vcpus - else: - # same or increasing - self.info['VCPUs_live'] = vcpus - self._writeDom(self._vcpuDomDetails()) + self._writeDom(self._vcpuDomDetails()) + self.info['VCPUs_live'] = vcpus else: if self.info['VCPUs_max'] > vcpus: # decreasing @@ -1528,7 +1527,7 @@ class XendDomainInfo: for c in range(self.info['VCPUs_max'], vcpus): self.info['cpus'].append(list()) self.info['VCPUs_max'] = vcpus - xen.xend.XendDomain.instance().managed_config_save(self) + xen.xend.XendDomain.instance().managed_config_save(self) log.info("Set VCPU count on domain %s to %d", self.info['name_label'], vcpus) @@ -2241,6 +2240,10 @@ class XendDomainInfo: if self.info.has_key('machine_address_size'): log.debug("_initDomain: setting maximum machine address size %d" % self.info['machine_address_size']) xc.domain_set_machine_address_size(self.domid, self.info['machine_address_size']) + + if self.info.has_key('suppress_spurious_page_faults') and self.info['suppress_spurious_page_faults']: + log.debug("_initDomain: suppressing spurious page faults") + xc.domain_suppress_spurious_page_faults(self.domid) self._createChannels() @@ -3233,6 +3236,9 @@ class XendDomainInfo: def get_dpcis(self): return XendDPCI.get_by_VM(self.info.get('uuid')) + def get_dscsis(self): + return XendDSCSI.get_by_VM(self.info.get('uuid')) + def create_vbd(self, xenapi_vbd, vdi_image_path): """Create a VBD using a VDI from XendStorageRepository. @@ -3412,6 +3418,60 @@ class XendDomainInfo: raise XendError('Failed to create device') return dpci_uuid + + def create_dscsi(self, xenapi_dscsi): + """Create scsi device from the passed struct in Xen API format. + + @param xenapi_dscsi: DSCSI struct from Xen API + @rtype: string + @return: UUID + """ + + dscsi_uuid = uuid.createString() + + # Convert xenapi to sxp + pscsi = XendAPIStore.get(xenapi_dscsi.get('PSCSI'), 'PSCSI') + devid = int(xenapi_dscsi.get('virtual_HCTL').split(':')[0]) + target_vscsi_sxp = \ + ['vscsi', + ['dev', + ['devid', devid], + ['p-devname', pscsi.get_dev_name()], + ['p-dev', pscsi.get_physical_HCTL()], + ['v-dev', xenapi_dscsi.get('virtual_HCTL')], + ['state', 'Initialising'], + ['uuid', dscsi_uuid] + ] + ] + + if self._stateGet() != XEN_API_VM_POWER_STATE_RUNNING: + + cur_vscsi_sxp = self._getDeviceInfo_vscsi(devid, None) + + if cur_vscsi_sxp is None: + dev_uuid = self.info.device_add('vscsi', cfg_sxp = target_vscsi_sxp) + if not dev_uuid: + raise XendError('Failed to create device') + + else: + new_vscsi_sxp = ['vscsi'] + for existing_dev in sxp.children(cur_vscsi_sxp, 'dev'): + new_vscsi_sxp.append(existing_dev) + new_vscsi_sxp.append(sxp.child0(target_vscsi_sxp, 'dev')) + + dev_uuid = sxp.child_value(cur_vscsi_sxp, 'uuid') + self.info.device_update(dev_uuid, new_vscsi_sxp) + + xen.xend.XendDomain.instance().managed_config_save(self) + + else: + try: + self.device_configure(target_vscsi_sxp) + + except Exception, exn: + raise XendError('Failed to create device') + + return dscsi_uuid def destroy_device_by_uuid(self, dev_type, dev_uuid): @@ -3480,6 +3540,41 @@ class XendDomainInfo: except Exception, exn: raise XendError('Failed to destroy device') + def destroy_dscsi(self, dev_uuid): + dscsi = XendAPIStore.get(dev_uuid, 'DSCSI') + devid = dscsi.get_virtual_host() + vHCTL = dscsi.get_virtual_HCTL() + cur_vscsi_sxp = self._getDeviceInfo_vscsi(devid, None) + dev_uuid = sxp.child_value(cur_vscsi_sxp, 'uuid') + + target_dev = None + new_vscsi_sxp = ['vscsi'] + for dev in sxp.children(cur_vscsi_sxp, 'dev'): + if vHCTL == sxp.child_value(dev, 'v-dev'): + target_dev = dev + else: + new_vscsi_sxp.append(dev) + + if target_dev is None: + raise XendError('Failed to destroy device') + + target_dev.append(['state', 'Closing']) + target_vscsi_sxp = ['vscsi', target_dev] + + if self._stateGet() != XEN_API_VM_POWER_STATE_RUNNING: + + self.info.device_update(dev_uuid, new_vscsi_sxp) + if len(sxp.children(new_vscsi_sxp, 'dev')) == 0: + del self.info['devices'][dev_uuid] + xen.xend.XendDomain.instance().managed_config_save(self) + + else: + try: + self.device_configure(target_vscsi_sxp) + + except Exception, exn: + raise XendError('Failed to destroy device') + def destroy_xapi_instances(self): """Destroy Xen-API instances stored in XendAPIStore. """ @@ -3504,6 +3599,10 @@ class XendDomainInfo: for dpci_uuid in XendDPCI.get_by_VM(self.info.get('uuid')): XendAPIStore.deregister(dpci_uuid, "DPCI") + # Destroy DSCSI instances. + for dscsi_uuid in XendDSCSI.get_by_VM(self.info.get('uuid')): + XendAPIStore.deregister(dscsi_uuid, "DSCSI") + def has_device(self, dev_class, dev_uuid): return (dev_uuid in self.info['%s_refs' % dev_class.lower()]) diff -r 6583186e5989 -r 46d7e12c4c91 tools/python/xen/xend/XendNode.py --- a/tools/python/xen/xend/XendNode.py Wed Oct 22 11:38:22 2008 +0900 +++ b/tools/python/xen/xend/XendNode.py Wed Oct 22 11:46:55 2008 +0900 @@ -22,6 +22,7 @@ import xen.lowlevel.xc from xen.util import Brctl from xen.util import pci as PciUtil +from xen.util import vscsi_util from xen.xend import XendAPIStore from xen.xend import osdep @@ -38,7 +39,8 @@ from XendStateStore import XendStateStor from XendStateStore import XendStateStore from XendMonitor import XendMonitor from XendPPCI import XendPPCI - +from XendPSCSI import XendPSCSI + class XendNode: """XendNode - Represents a Domain 0 Host.""" @@ -53,6 +55,7 @@ class XendNode: * network * Storage Repository * PPCI + * PSCSI """ self.xc = xen.lowlevel.xc.xc() @@ -269,6 +272,24 @@ class XendNode: XendPPCI(ppci_uuid, ppci_record) + # Initialise PSCSIs + saved_pscsis = self.state_store.load_state('pscsi') + saved_pscsi_table = {} + if saved_pscsis: + for pscsi_uuid, pscsi_record in saved_pscsis.items(): + try: + saved_pscsi_table[pscsi_record['scsi_id']] = pscsi_uuid + except KeyError: + pass + + for pscsi_record in vscsi_util.get_all_scsi_devices(): + if pscsi_record['scsi_id']: + # If saved uuid exists, use it. Otherwise create one. + pscsi_uuid = saved_pscsi_table.get(pscsi_record['scsi_id'], + uuid.createString()) + XendPSCSI(pscsi_uuid, pscsi_record) + + ## def network_destroy(self, net_uuid): ## del self.networks[net_uuid] ## self.save_networks() @@ -317,6 +338,15 @@ class XendNode: def get_ppci_by_uuid(self, ppci_uuid): if ppci_uuid in self.get_PPCI_refs(): return ppci_uuid + return None + + + def get_PSCSI_refs(self): + return XendPSCSI.get_all() + + def get_pscsi_by_uuid(self, pscsi_uuid): + if pscsi_uuid in self.get_PSCSI_refs(): + return pscsi_uuid return None @@ -333,6 +363,7 @@ class XendNode: self.save_PBDs() self.save_SRs() self.save_PPCIs() + self.save_PSCSIs() def save_PIFs(self): pif_records = dict([(pif_uuid, XendAPIStore.get( @@ -362,6 +393,12 @@ class XendNode: ppci_uuid, "PPCI").get_record()) for ppci_uuid in XendPPCI.get_all()]) self.state_store.save_state('ppci', ppci_records) + + def save_PSCSIs(self): + pscsi_records = dict([(pscsi_uuid, XendAPIStore.get( + pscsi_uuid, "PSCSI").get_record()) + for pscsi_uuid in XendPSCSI.get_all()]) + self.state_store.save_state('pscsi', pscsi_records) def shutdown(self): return 0 diff -r 6583186e5989 -r 46d7e12c4c91 tools/python/xen/xend/XendPSCSI.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/python/xen/xend/XendPSCSI.py Wed Oct 22 11:46:55 2008 +0900 @@ -0,0 +1,143 @@ +#============================================================================ +# This library is free software; you can redistribute it and/or +# modify it under the terms of version 2.1 of the GNU Lesser General Public +# License as published by the Free Software Foundation. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +#============================================================================ +# Copyright FUJITSU LIMITED 2008 +# Masaki Kanno <kanno.masaki@xxxxxxxxxxxxxx> +#============================================================================ + +from xen.xend.XendBase import XendBase +from xen.xend.XendBase import XendAPIStore +from xen.xend import uuid as genuuid + +class XendPSCSI(XendBase): + """Representation of a physical SCSI device.""" + + def getClass(self): + return "PSCSI" + + def getAttrRO(self): + attrRO = ['host', + 'physical_host', + 'physical_channel', + 'physical_target', + 'physical_lun', + 'physical_HCTL', + 'vendor_name', + 'model', + 'type_id', + 'type', + 'dev_name', + 'sg_name', + 'revision', + 'scsi_id', + 'scsi_level'] + return XendBase.getAttrRO() + attrRO + + def getAttrRW(self): + attrRW = [] + return XendBase.getAttrRW() + attrRW + + def getAttrInst(self): + attrInst = [] + return XendBase.getAttrInst() + attrInst + + def getMethods(self): + methods = [] + return XendBase.getMethods() + methods + + def getFuncs(self): + funcs = [] + return XendBase.getFuncs() + funcs + + getClass = classmethod(getClass) + getAttrRO = classmethod(getAttrRO) + getAttrRW = classmethod(getAttrRW) + getAttrInst = classmethod(getAttrInst) + getMethods = classmethod(getMethods) + getFuncs = classmethod(getFuncs) + + def get_by_HCTL(self, physical_HCTL): + for pscsi in XendAPIStore.get_all("PSCSI"): + if pscsi.get_physical_HCTL() == physical_HCTL: + return pscsi.get_uuid() + return None + + get_by_HCTL = classmethod(get_by_HCTL) + + def __init__(self, uuid, record): + self.physical_HCTL = record['physical_HCTL'] + self.vendor_name = record['vendor_name'] + self.model = record['model'] + self.type_id = record['type_id'] + self.type = record['type'] + self.dev_name = record['dev_name'] + self.sg_name = record['sg_name'] + self.revision = record['revision'] + self.scsi_id = record['scsi_id'] + self.scsi_level = record['scsi_level'] + + p_hctl = self.physical_HCTL.split(':') + self.physical_host = int(p_hctl[0]) + self.physical_channel = int(p_hctl[1]) + self.physical_target = int(p_hctl[2]) + self.physical_lun = int(p_hctl[3]) + + XendBase.__init__(self, uuid, record) + + def get_host(self): + from xen.xend import XendNode + return XendNode.instance().get_uuid() + + def get_physical_host(self): + return self.physical_host + + def get_physical_channel(self): + return self.physical_channel + + def get_physical_target(self): + return self.physical_target + + def get_physical_lun(self): + return self.physical_lun + + def get_physical_HCTL(self): + return self.physical_HCTL + + def get_vendor_name(self): + return self.vendor_name + + def get_model(self): + return self.model + + def get_type_id(self): + return self.type_id + + def get_type(self): + return self.type + + def get_dev_name(self): + return self.dev_name + + def get_sg_name(self): + return self.sg_name + + def get_revision(self): + return self.revision + + def get_scsi_id(self): + return self.scsi_id + + def get_scsi_level(self): + return self.scsi_level + diff -r 6583186e5989 -r 46d7e12c4c91 tools/python/xen/xend/server/vscsiif.py --- a/tools/python/xen/xend/server/vscsiif.py Wed Oct 22 11:38:22 2008 +0900 +++ b/tools/python/xen/xend/server/vscsiif.py Wed Oct 22 11:46:55 2008 +0900 @@ -125,10 +125,10 @@ class VSCSIController(DevController): state = self.readBackend(devid, devpath + '/state') localdevid = self.readBackend(devid, devpath + '/devid') dev_dict = {'p-dev': pdev, - 'p-devname': pdevname, - 'v-dev': pdevname, - 'state': state, - 'devid': localdevid } + 'p-devname': pdevname, + 'v-dev': vdev, + 'state': state, + 'devid': localdevid } vscsi_devs.append(dev_dict) config['devs'] = vscsi_devs @@ -168,17 +168,17 @@ class VSCSIController(DevController): (devid, back, front) = self.getDeviceDetails(config) devid = int(devid) vscsi_config = config['devs'][0] - states = config.get('states', []) + state = vscsi_config.get('state', '') driver_state = self.readBackend(devid, 'state') if str(xenbusState['Connected']) != driver_state: raise VmError("Driver status is not connected") uuid = self.readBackend(devid, 'uuid') - if states[0] == 'Initialising': + if state == 'Initialising': back['uuid'] = uuid self.writeBackend(devid, back) - elif states[0] == 'Closing': + elif state == 'Closing': found = False devs = self.readBackendList(devid, "vscsi-devs") vscsipath = "vscsi-devs/" @@ -197,8 +197,8 @@ class VSCSIController(DevController): raise VmError("Device %s not connected" % vdev) else: - raise XendError('Error configuring device invalid state %s' - % state) + raise XendError("Error configuring device invalid " + "state '%s'" % state) self.writeBackend(devid, 'state', str(xenbusState['Reconfiguring'])) return self.readBackend(devid, 'uuid') diff -r 6583186e5989 -r 46d7e12c4c91 tools/python/xen/xm/create.dtd --- a/tools/python/xen/xm/create.dtd Wed Oct 22 11:38:22 2008 +0900 +++ b/tools/python/xen/xm/create.dtd Wed Oct 22 11:46:55 2008 +0900 @@ -40,6 +40,7 @@ vif*, vtpm*, pci*, + vscsi*, console*, platform*, vcpu_param*, @@ -87,6 +88,10 @@ slot CDATA #REQUIRED func CDATA #REQUIRED vslt CDATA #IMPLIED> + +<!ELEMENT vscsi EMPTY> +<!ATTLIST vscsi p-dev CDATA #REQUIRED + v-dev CDATA #REQUIRED> <!ELEMENT console (other_config*)> <!ATTLIST console protocol (vt100|rfb|rdp) #REQUIRED> diff -r 6583186e5989 -r 46d7e12c4c91 tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Wed Oct 22 11:38:22 2008 +0900 +++ b/tools/python/xen/xm/create.py Wed Oct 22 11:46:55 2008 +0900 @@ -583,6 +583,10 @@ gopts.var('machine_address_size', val='B fn=set_int, default=None, use="""Maximum machine address size""") +gopts.var('suppress_spurious_page_faults', val='yes|no', + fn=set_bool, default=None, + use="""Do not inject spurious page faults into this guest""") + def err(msg): """Print an error to stderr and exit. """ @@ -634,6 +638,9 @@ def configure_image(vals): if vals.machine_address_size: config_image.append(['machine_address_size', vals.machine_address_size]) + if vals.suppress_spurious_page_faults: + config_image.append(['suppress_spurious_page_faults', vals.suppress_spurious_page_faults]) + return config_image def configure_disks(config_devs, vals): @@ -696,11 +703,8 @@ def configure_vscsis(config_devs, vals): scsi_devices = vscsi_util.vscsi_get_scsidevices() for (p_dev, v_dev, backend) in vals.vscsi: - tmp = p_dev.split(':') - if len(tmp) == 4: - (p_hctl, block) = vscsi_util._vscsi_hctl_block(p_dev, scsi_devices) - else: - (p_hctl, block) = vscsi_util._vscsi_block_scsiid_to_hctl(p_dev, scsi_devices) + (p_hctl, devname) = \ + vscsi_util.vscsi_get_hctl_and_devname_by(p_dev, scsi_devices) if p_hctl == None: raise ValueError("Cannot find device \"%s\"" % p_dev) @@ -716,7 +720,7 @@ def configure_vscsis(config_devs, vals): ['state', 'Initialising'], \ ['devid', devid], \ ['p-dev', p_hctl], \ - ['p-devname', block], \ + ['p-devname', devname], \ ['v-dev', v_dev] ]) if vscsi_lookup_devid(devidlist, devid) == 0: @@ -887,7 +891,7 @@ def make_config(vals): 'restart', 'on_poweroff', 'on_reboot', 'on_crash', 'vcpus', 'vcpu_avail', 'features', 'on_xend_start', 'on_xend_stop', 'target', 'cpuid', - 'cpuid_check', 'machine_address_size']) + 'cpuid_check', 'machine_address_size', 'suppress_spurious_page_faults']) if vals.uuid is not None: config.append(['uuid', vals.uuid]) diff -r 6583186e5989 -r 46d7e12c4c91 tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Wed Oct 22 11:38:22 2008 +0900 +++ b/tools/python/xen/xm/main.py Wed Oct 22 11:46:55 2008 +0900 @@ -2235,12 +2235,34 @@ def vscsi_convert_sxp_to_dict(dev_sxp): return dev_dict def xm_scsi_list(args): - xenapi_unsupported() (use_long, params) = arg_check_for_resource_list(args, "scsi-list") dom = params[0] - devs = server.xend.domain.getDeviceSxprs(dom, 'vscsi') + devs = [] + if serverType == SERVER_XEN_API: + + dscsi_refs = server.xenapi.VM.get_DSCSIs(get_single_vm(dom)) + dscsi_properties = \ + map(server.xenapi.DSCSI.get_runtime_properties, dscsi_refs) + dscsi_dict = {} + for dscsi_property in dscsi_properties: + devid = int(dscsi_property['dev']['devid']) + try: + dscsi_sxp = dscsi_dict[devid] + except: + dscsi_sxp = [['devs', []]] + for key, value in dscsi_property.items(): + if key != 'dev': + dscsi_sxp.append([key, value]) + dev_sxp = ['dev'] + dev_sxp.extend(map2sxp(dscsi_property['dev'])) + dscsi_sxp[0][1].append(dev_sxp) + dscsi_dict[devid] = dscsi_sxp + devs = map2sxp(dscsi_dict) + + else: + devs = server.xend.domain.getDeviceSxprs(dom, 'vscsi') if use_long: map(PrettyPrint.prettyprint, devs) @@ -2464,37 +2486,60 @@ def xm_pci_attach(args): else: server.xend.domain.device_configure(dom, pci) +def parse_scsi_configuration(p_scsi, v_hctl, state): + v = v_hctl.split(':') + if len(v) != 4: + raise OptionError("Invalid argument: %s" % v_hctl) + + p_hctl = None + devname = None + if p_scsi is not None: + (p_hctl, devname) = \ + vscsi_util.vscsi_get_hctl_and_devname_by(p_scsi) + if p_hctl is None: + raise OptionError("Cannot find device '%s'" % p_scsi) + + scsi = ['vscsi'] + scsi.append(['dev', \ + ['state', state], \ + ['devid', int(v[0])], \ + ['p-dev', p_hctl], \ + ['p-devname', devname], \ + ['v-dev', v_hctl] \ + ]) + + return scsi + def xm_scsi_attach(args): - xenapi_unsupported() - arg_check(args, 'scsi-attach', 3, 4) - p_devname = args[1] - v_dev = args[2] - - v_hctl = v_dev.split(':') - if len(v_hctl) != 4: - raise OptionError("Invalid argument: %s" % v_dev) - - (p_hctl, block) = vscsi_util.vscsi_search_hctl_and_block(p_devname) - - if p_hctl == None: - raise OptionError("Cannot find device \"%s\"" % p_devname) - dom = args[0] - vscsi = ['vscsi'] - vscsi.append(['dev', \ - ['state', 'Initialising'], \ - ['devid', v_hctl[0]], \ - ['p-dev', p_hctl], \ - ['p-devname', block], \ - ['v-dev', v_dev] ]) - - if len(args) == 4: - vscsi.append(['backend', args[3]]) - - vscsi.append(['state', 'Initialising']) - vscsi.append(['devid', v_hctl[0]]) - server.xend.domain.device_configure(dom, vscsi) + p_scsi = args[1] + v_hctl = args[2] + scsi = parse_scsi_configuration(p_scsi, v_hctl, 'Initialising') + + if serverType == SERVER_XEN_API: + + scsi_dev = sxp.children(scsi, 'dev')[0] + p_hctl = sxp.child_value(scsi_dev, 'p-dev') + target_ref = None + for pscsi_ref in server.xenapi.PSCSI.get_all(): + if p_hctl == server.xenapi.PSCSI.get_physical_HCTL(pscsi_ref): + target_ref = pscsi_ref + break + if target_ref is None: + raise OptionError("Cannot find device '%s'" % p_scsi) + + dscsi_record = { + "VM": get_single_vm(dom), + "PSCSI": target_ref, + "virtual_HCTL": v_hctl + } + server.xenapi.DSCSI.create(dscsi_record) + + else: + if len(args) == 4: + scsi.append(['backend', args[3]]) + server.xend.domain.device_configure(dom, scsi) def detach(args, deviceClass): rm_cfg = True @@ -2587,26 +2632,25 @@ def xm_pci_detach(args): server.xend.domain.device_configure(dom, pci) def xm_scsi_detach(args): - xenapi_unsupported() arg_check(args, 'scsi-detach', 2) - - v_dev = args[1] - v_hctl = v_dev.split(':') - if len(v_hctl) != 4: - raise OptionError("Invalid argument: %s" % v_dev) - dom = args[0] - vscsi = ['vscsi'] - vscsi.append(['dev', \ - ['state', 'Closing'], \ - ['devid', v_hctl[0]], \ - ['p-dev', ''], \ - ['p-devname', ''], \ - ['v-dev', v_dev] ]) - - vscsi.append(['state', 'Closing']) - vscsi.append(['devid', v_hctl[0]]) - server.xend.domain.device_configure(dom, vscsi) + v_hctl = args[1] + scsi = parse_scsi_configuration(None, v_hctl, 'Closing') + + if serverType == SERVER_XEN_API: + + target_ref = None + for dscsi_ref in server.xenapi.VM.get_DSCSIs(get_single_vm(dom)): + if v_hctl == server.xenapi.DSCSI.get_virtual_HCTL(dscsi_ref): + target_ref = dscsi_ref + break + if target_ref is None: + raise OptionError("Device %s not assigned" % v_hctl) + + server.xenapi.DSCSI.destroy(target_ref) + + else: + server.xend.domain.device_configure(dom, scsi) def xm_vnet_list(args): xenapi_unsupported() diff -r 6583186e5989 -r 46d7e12c4c91 tools/python/xen/xm/xenapi_create.py --- a/tools/python/xen/xm/xenapi_create.py Wed Oct 22 11:38:22 2008 +0900 +++ b/tools/python/xen/xm/xenapi_create.py Wed Oct 22 11:46:55 2008 +0900 @@ -375,6 +375,12 @@ class xenapi_create: self.create_pcis(vm_ref, pcis) + # Now create scsis + + scsis = vm.getElementsByTagName("vscsi") + + self.create_scsis(vm_ref, scsis) + return vm_ref except: server.xenapi.VM.destroy(vm_ref) @@ -532,6 +538,33 @@ class xenapi_create: return server.xenapi.DPCI.create(dpci_record) + def create_scsis(self, vm_ref, scsis): + log(DEBUG, "create_scsis") + return map(lambda scsi: self.create_scsi(vm_ref, scsi), scsis) + + def create_scsi(self, vm_ref, scsi): + log(DEBUG, "create_scsi") + + target_ref = None + for pscsi_ref in server.xenapi.PSCSI.get_all(): + if scsi.attributes["p-dev"].value == server.xenapi.PSCSI.get_physical_HCTL(pscsi_ref): + target_ref = pscsi_ref + break + if target_ref is None: + log(DEBUG, "create_scsi: scsi device not found") + return None + + dscsi_record = { + "VM": + vm_ref, + "PSCSI": + target_ref, + "virtual_HCTL": + scsi.attributes["v-dev"].value + } + + return server.xenapi.DSCSI.create(dscsi_record) + def get_child_by_name(exp, childname, default = None): try: return [child for child in sxp.children(exp) @@ -562,6 +595,9 @@ class sxp2xml: pcis_sxp = map(lambda x: x[1], [device for device in devices if device[1][0] == "pci"]) + + scsis_sxp = map(lambda x: x[1], [device for device in devices + if device[1][0] == "vscsi"]) # Create XML Document @@ -704,6 +740,12 @@ class sxp2xml: map(vm.appendChild, pcis) + # And now the scsis + + scsis = self.extract_scsis(scsis_sxp, document) + + map(vm.appendChild, scsis) + # Last but not least the consoles... consoles = self.extract_consoles(image, document) @@ -893,6 +935,23 @@ class sxp2xml: pcis.append(pci) return pcis + + def extract_scsis(self, scsis_sxp, document): + + scsis = [] + + for scsi_sxp in scsis_sxp: + for dev_sxp in sxp.children(scsi_sxp, "dev"): + scsi = document.createElement("vscsi") + + scsi.attributes["p-dev"] \ + = get_child_by_name(dev_sxp, "p-dev") + scsi.attributes["v-dev"] \ + = get_child_by_name(dev_sxp, "v-dev") + + scsis.append(scsi) + + return scsis def mk_other_config(self, key, value, document): other_config = document.createElement("other_config") diff -r 6583186e5989 -r 46d7e12c4c91 tools/xentrace/formats --- a/tools/xentrace/formats Wed Oct 22 11:38:22 2008 +0900 +++ b/tools/xentrace/formats Wed Oct 22 11:46:55 2008 +0900 @@ -116,3 +116,7 @@ 0x0040f10e CPU%(cpu)d %(tsc)d (+%(relt 0x0040f10e CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_emulate_resync_full [ gfn = 0x%(1)16x ] 0x0040f00f CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_emulate_resync_only [ gfn = 0x%(1)08x ] 0x0040f10f CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_emulate_resync_only [ gfn = 0x%(1)16x ] + +0x00801001 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) cpu_freq_change [ %(1)dMHz -> %(2)dMHz ] +0x00802001 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) cpu_idle_entry [ C0 -> C%(1)d ] +0x00802002 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) cpu_idle_exit [ C%(1)d -> C0 ] diff -r 6583186e5989 -r 46d7e12c4c91 unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c --- a/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c Wed Oct 22 11:38:22 2008 +0900 +++ b/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c Wed Oct 22 11:46:55 2008 +0900 @@ -10,12 +10,6 @@ struct ap_suspend_info { int do_spin; atomic_t nr_spinning; }; - -/* - * Use a rwlock to protect the hypercall page from being executed in AP context - * while the BSP is re-initializing it after restore. - */ -static DEFINE_RWLOCK(suspend_lock); #ifdef CONFIG_SMP @@ -33,12 +27,8 @@ static void ap_suspend(void *_info) atomic_inc(&info->nr_spinning); mb(); - while (info->do_spin) { + while (info->do_spin) cpu_relax(); - read_lock(&suspend_lock); - HYPERVISOR_yield(); - read_unlock(&suspend_lock); - } mb(); atomic_dec(&info->nr_spinning); @@ -61,9 +51,7 @@ static int bp_suspend(void) suspend_cancelled = HYPERVISOR_suspend(0); if (!suspend_cancelled) { - write_lock(&suspend_lock); platform_pci_resume(); - write_unlock(&suspend_lock); gnttab_resume(); irq_resume(); } diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/ia64/vmx/vmmu.c --- a/xen/arch/ia64/vmx/vmmu.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/arch/ia64/vmx/vmmu.c Wed Oct 22 11:46:55 2008 +0900 @@ -446,7 +446,7 @@ IA64FAULT vmx_vcpu_ptc_ga(VCPU *vcpu, u6 do { cpu = v->processor; if (cpu != current->processor) { - spin_unlock_wait(&per_cpu(schedule_data, cpu).schedule_lock); + spin_barrier(&per_cpu(schedule_data, cpu).schedule_lock); /* Flush VHPT on remote processors. */ smp_call_function_single(cpu, &ptc_ga_remote_func, &args, 0, 1); diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/acpi/cpu_idle.c --- a/xen/arch/x86/acpi/cpu_idle.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/arch/x86/acpi/cpu_idle.c Wed Oct 22 11:46:55 2008 +0900 @@ -40,6 +40,7 @@ #include <xen/guest_access.h> #include <xen/keyhandler.h> #include <xen/cpuidle.h> +#include <xen/trace.h> #include <asm/cache.h> #include <asm/io.h> #include <asm/hpet.h> @@ -251,6 +252,9 @@ static void acpi_processor_idle(void) switch ( cx->type ) { case ACPI_STATE_C1: + /* Trace cpu idle entry */ + TRACE_1D(TRC_PM_IDLE_ENTRY, 1); + /* * Invoke C1. * Use the appropriate idle routine, the one that would @@ -261,6 +265,9 @@ static void acpi_processor_idle(void) else acpi_safe_halt(); + /* Trace cpu idle exit */ + TRACE_1D(TRC_PM_IDLE_EXIT, 1); + /* * TBD: Can't get time duration while in C1, as resumes * go to an ISR rather than here. Need to instrument @@ -272,12 +279,16 @@ static void acpi_processor_idle(void) case ACPI_STATE_C2: if ( local_apic_timer_c2_ok ) { + /* Trace cpu idle entry */ + TRACE_1D(TRC_PM_IDLE_ENTRY, 2); /* Get start time (ticks) */ t1 = inl(pmtmr_ioport); /* Invoke C2 */ acpi_idle_do_entry(cx); /* Get end time (ticks) */ t2 = inl(pmtmr_ioport); + /* Trace cpu idle exit */ + TRACE_1D(TRC_PM_IDLE_EXIT, 2); /* Re-enable interrupts */ local_irq_enable(); @@ -316,6 +327,8 @@ static void acpi_processor_idle(void) ACPI_FLUSH_CPU_CACHE(); } + /* Trace cpu idle entry */ + TRACE_1D(TRC_PM_IDLE_ENTRY, cx - &power->states[0]); /* * Before invoking C3, be aware that TSC/APIC timer may be * stopped by H/W. Without carefully handling of TSC/APIC stop issues, @@ -335,6 +348,8 @@ static void acpi_processor_idle(void) /* recovering TSC */ cstate_restore_tsc(); + /* Trace cpu idle exit */ + TRACE_1D(TRC_PM_IDLE_EXIT, cx - &power->states[0]); if ( power->flags.bm_check && power->flags.bm_control ) { diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/cpu/amd.c --- a/xen/arch/x86/cpu/amd.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/arch/x86/cpu/amd.c Wed Oct 22 11:46:55 2008 +0900 @@ -37,8 +37,8 @@ integer_param("cpuid_mask_ecx", opt_cpui integer_param("cpuid_mask_ecx", opt_cpuid_mask_ecx); integer_param("cpuid_mask_edx", opt_cpuid_mask_edx); static unsigned int opt_cpuid_mask_ext_ecx, opt_cpuid_mask_ext_edx; -integer_param("cpuid_mask_ecx", opt_cpuid_mask_ext_ecx); -integer_param("cpuid_mask_edx", opt_cpuid_mask_ext_edx); +integer_param("cpuid_mask_ext_ecx", opt_cpuid_mask_ext_ecx); +integer_param("cpuid_mask_ext_edx", opt_cpuid_mask_ext_edx); static inline void wrmsr_amd(unsigned int index, unsigned int lo, unsigned int hi) diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/arch/x86/domain.c Wed Oct 22 11:46:55 2008 +0900 @@ -575,7 +575,10 @@ int arch_set_info_guest( v->arch.guest_context.user_regs.eflags |= 2; if ( is_hvm_vcpu(v) ) + { + hvm_set_info_guest(v); goto out; + } /* Only CR0.TS is modifiable by guest or admin. */ v->arch.guest_context.ctrlreg[0] &= X86_CR0_TS; @@ -1252,10 +1255,10 @@ void context_switch(struct vcpu *prev, s flush_tlb_mask(next->vcpu_dirty_cpumask); } - local_irq_disable(); - if ( is_hvm_vcpu(prev) && !list_empty(&prev->arch.hvm_vcpu.tm_list) ) pt_save_timer(prev); + + local_irq_disable(); set_current(next); diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/domctl.c --- a/xen/arch/x86/domctl.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/arch/x86/domctl.c Wed Oct 22 11:46:55 2008 +0900 @@ -1028,6 +1028,21 @@ long arch_do_domctl( } break; + case XEN_DOMCTL_suppress_spurious_page_faults: + { + struct domain *d; + + ret = -ESRCH; + d = rcu_lock_domain_by_id(domctl->domain); + if ( d != NULL ) + { + d->arch.suppress_spurious_page_faults = 1; + rcu_unlock_domain(d); + ret = 0; + } + } + break; + default: ret = -ENOSYS; break; diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/hvm/svm/emulate.c --- a/xen/arch/x86/hvm/svm/emulate.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/arch/x86/hvm/svm/emulate.c Wed Oct 22 11:46:55 2008 +0900 @@ -61,6 +61,34 @@ static unsigned long svm_rip2pointer(str return p; } +static unsigned long svm_nextrip_insn_length(struct vcpu *v) +{ + struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; + + if ( !cpu_has_svm_nrips || (vmcb->nextrip <= vmcb->rip) ) + return 0; + +#ifndef NDEBUG + switch ( vmcb->exitcode ) + { + case VMEXIT_CR0_READ... VMEXIT_DR15_WRITE: + /* faults due to instruction intercepts */ + /* (exitcodes 84-95) are reserved */ + case VMEXIT_IDTR_READ ... VMEXIT_TR_WRITE: + case VMEXIT_RDTSC ... VMEXIT_MSR: + case VMEXIT_VMRUN ... VMEXIT_MWAIT_CONDITIONAL: + /* ...and the rest of the #VMEXITs */ + case VMEXIT_CR0_SEL_WRITE: + case VMEXIT_EXCEPTION_BP: + break; + default: + BUG(); + } +#endif + + return vmcb->nextrip - vmcb->rip; +} + /* First byte: Length. Following bytes: Opcode bytes. */ #define MAKE_INSTR(nm, ...) static const u8 OPCODE_##nm[] = { __VA_ARGS__ } MAKE_INSTR(INVD, 2, 0x0f, 0x08); @@ -118,6 +146,9 @@ int __get_instruction_length_from_list(s unsigned long fetch_addr; unsigned int fetch_len; + if ( (inst_len = svm_nextrip_insn_length(v)) != 0 ) + return inst_len; + /* Fetch up to the next page break; we'll fetch from the next page * later if we have to. */ fetch_addr = svm_rip2pointer(v); diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/hvm/svm/intr.c --- a/xen/arch/x86/hvm/svm/intr.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/arch/x86/hvm/svm/intr.c Wed Oct 22 11:46:55 2008 +0900 @@ -100,61 +100,6 @@ static void enable_intr_window(struct vc vmcb->general1_intercepts |= GENERAL1_INTERCEPT_VINTR; } -extern int vmsi_deliver(struct domain *d, int pirq); -static int hvm_pci_msi_assert(struct domain *d, int pirq) -{ - return vmsi_deliver(d, pirq); -} - -static void svm_dirq_assist(struct vcpu *v) -{ - unsigned int irq; - uint32_t device, intx; - struct domain *d = v->domain; - struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci; - struct dev_intx_gsi_link *digl; - - if ( !iommu_enabled || (v->vcpu_id != 0) || (hvm_irq_dpci == NULL) ) - return; - - for ( irq = find_first_bit(hvm_irq_dpci->dirq_mask, NR_IRQS); - irq < NR_IRQS; - irq = find_next_bit(hvm_irq_dpci->dirq_mask, NR_IRQS, irq + 1) ) - { - if ( !test_and_clear_bit(irq, &hvm_irq_dpci->dirq_mask) ) - continue; - - spin_lock(&d->event_lock); - if ( test_bit(_HVM_IRQ_DPCI_MSI, &hvm_irq_dpci->mirq[irq].flags) ) - { - hvm_pci_msi_assert(d, irq); - spin_unlock(&d->event_lock); - continue; - } - - stop_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, irq)]); - - list_for_each_entry ( digl, &hvm_irq_dpci->mirq[irq].digl_list, list ) - { - device = digl->device; - intx = digl->intx; - hvm_pci_intx_assert(d, device, intx); - hvm_irq_dpci->mirq[irq].pending++; - } - - /* - * Set a timer to see if the guest can finish the interrupt or not. For - * example, the guest OS may unmask the PIC during boot, before the - * guest driver is loaded. hvm_pci_intx_assert() may succeed, but the - * guest will never deal with the irq, then the physical interrupt line - * will never be deasserted. - */ - set_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, irq)], - NOW() + PT_IRQ_TIME_OUT); - spin_unlock(&d->event_lock); - } -} - asmlinkage void svm_intr_assist(void) { struct vcpu *v = current; @@ -163,7 +108,7 @@ asmlinkage void svm_intr_assist(void) /* Crank the handle on interrupt state. */ pt_update_irq(v); - svm_dirq_assist(v); + hvm_dirq_assist(v); do { intack = hvm_vcpu_has_pending_irq(v); diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/hvm/viridian.c --- a/xen/arch/x86/hvm/viridian.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/arch/x86/hvm/viridian.c Wed Oct 22 11:46:55 2008 +0900 @@ -244,7 +244,6 @@ int rdmsr_viridian_regs(uint32_t idx, ui int viridian_hypercall(struct cpu_user_regs *regs) { - struct domain *d = current->domain; int mode = hvm_guest_x86_mode(current); unsigned long input_params_gpa, output_params_gpa; uint16_t status = HV_STATUS_SUCCESS; @@ -271,7 +270,7 @@ int viridian_hypercall(struct cpu_user_r }; } output = { 0 }; - ASSERT(is_viridian_domain(d)); + ASSERT(is_viridian_domain(current->domain)); switch ( mode ) { diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/hvm/vmx/intr.c --- a/xen/arch/x86/hvm/vmx/intr.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/arch/x86/hvm/vmx/intr.c Wed Oct 22 11:46:55 2008 +0900 @@ -103,61 +103,6 @@ static void enable_intr_window(struct vc } } -extern int vmsi_deliver(struct domain *d, int pirq); -static int hvm_pci_msi_assert(struct domain *d, int pirq) -{ - return vmsi_deliver(d, pirq); -} - -static void vmx_dirq_assist(struct vcpu *v) -{ - unsigned int irq; - uint32_t device, intx; - struct domain *d = v->domain; - struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci; - struct dev_intx_gsi_link *digl; - - if ( !iommu_enabled || (v->vcpu_id != 0) || (hvm_irq_dpci == NULL) ) - return; - - for ( irq = find_first_bit(hvm_irq_dpci->dirq_mask, NR_IRQS); - irq < NR_IRQS; - irq = find_next_bit(hvm_irq_dpci->dirq_mask, NR_IRQS, irq + 1) ) - { - if ( !test_and_clear_bit(irq, &hvm_irq_dpci->dirq_mask) ) - continue; - - spin_lock(&d->event_lock); - if ( test_bit(_HVM_IRQ_DPCI_MSI, &hvm_irq_dpci->mirq[irq].flags) ) - { - hvm_pci_msi_assert(d, irq); - spin_unlock(&d->event_lock); - continue; - } - - stop_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, irq)]); - - list_for_each_entry ( digl, &hvm_irq_dpci->mirq[irq].digl_list, list ) - { - device = digl->device; - intx = digl->intx; - hvm_pci_intx_assert(d, device, intx); - hvm_irq_dpci->mirq[irq].pending++; - } - - /* - * Set a timer to see if the guest can finish the interrupt or not. For - * example, the guest OS may unmask the PIC during boot, before the - * guest driver is loaded. hvm_pci_intx_assert() may succeed, but the - * guest will never deal with the irq, then the physical interrupt line - * will never be deasserted. - */ - set_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, irq)], - NOW() + PT_IRQ_TIME_OUT); - spin_unlock(&d->event_lock); - } -} - asmlinkage void vmx_intr_assist(void) { struct hvm_intack intack; @@ -167,7 +112,7 @@ asmlinkage void vmx_intr_assist(void) /* Crank the handle on interrupt state. */ pt_update_irq(v); - vmx_dirq_assist(v); + hvm_dirq_assist(v); do { intack = hvm_vcpu_has_pending_irq(v); diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/arch/x86/hvm/vmx/vmx.c Wed Oct 22 11:46:55 2008 +0900 @@ -1184,6 +1184,13 @@ static void vmx_set_uc_mode(struct vcpu vpid_sync_all(); } +static void vmx_set_info_guest(struct vcpu *v) +{ + vmx_vmcs_enter(v); + __vmwrite(GUEST_DR7, v->arch.guest_context.debugreg[7]); + vmx_vmcs_exit(v); +} + static struct hvm_function_table vmx_function_table = { .name = "VMX", .domain_initialise = vmx_domain_initialise, @@ -1214,7 +1221,8 @@ static struct hvm_function_table vmx_fun .msr_read_intercept = vmx_msr_read_intercept, .msr_write_intercept = vmx_msr_write_intercept, .invlpg_intercept = vmx_invlpg_intercept, - .set_uc_mode = vmx_set_uc_mode + .set_uc_mode = vmx_set_uc_mode, + .set_info_guest = vmx_set_info_guest }; static unsigned long *vpid_bitmap; @@ -2048,8 +2056,12 @@ asmlinkage void vmx_vmexit_handler(struc perfc_incra(vmexits, exit_reason); - if ( exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT ) - local_irq_enable(); + /* Handle the interrupt we missed before allowing any more in. */ + if ( exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT ) + vmx_do_extint(regs); + + /* Now enable interrupts so it's safe to take locks. */ + local_irq_enable(); if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) ) return vmx_failed_vmentry(exit_reason, regs); @@ -2177,7 +2189,7 @@ asmlinkage void vmx_vmexit_handler(struc break; } case EXIT_REASON_EXTERNAL_INTERRUPT: - vmx_do_extint(regs); + /* Already handled above. */ break; case EXIT_REASON_TRIPLE_FAULT: hvm_triple_fault(); diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/irq.c --- a/xen/arch/x86/irq.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/arch/x86/irq.c Wed Oct 22 11:46:55 2008 +0900 @@ -510,7 +510,7 @@ int pirq_guest_bind(struct vcpu *v, int { unsigned int vector; irq_desc_t *desc; - irq_guest_action_t *action; + irq_guest_action_t *action, *newaction = NULL; int rc = 0; cpumask_t cpumask = CPU_MASK_NONE; @@ -520,7 +520,10 @@ int pirq_guest_bind(struct vcpu *v, int retry: desc = domain_spin_lock_irq_desc(v->domain, irq, NULL); if ( desc == NULL ) - return -EINVAL; + { + rc = -EINVAL; + goto out; + } action = (irq_guest_action_t *)desc->action; vector = desc - irq_desc; @@ -533,18 +536,24 @@ int pirq_guest_bind(struct vcpu *v, int "Cannot bind IRQ %d to guest. In use by '%s'.\n", irq, desc->action->name); rc = -EBUSY; - goto out; + goto unlock_out; } - action = xmalloc(irq_guest_action_t); - if ( (desc->action = (struct irqaction *)action) == NULL ) + if ( newaction == NULL ) { + spin_unlock_irq(&desc->lock); + if ( (newaction = xmalloc(irq_guest_action_t)) != NULL ) + goto retry; gdprintk(XENLOG_INFO, - "Cannot bind IRQ %d to guest. Out of memory.\n", - irq); + "Cannot bind IRQ %d to guest. Out of memory.\n", + irq); rc = -ENOMEM; goto out; } + + action = newaction; + desc->action = (struct irqaction *)action; + newaction = NULL; action->nr_guests = 0; action->in_flight = 0; @@ -568,7 +577,7 @@ int pirq_guest_bind(struct vcpu *v, int "Will not share with others.\n", irq); rc = -EBUSY; - goto out; + goto unlock_out; } else if ( action->nr_guests == 0 ) { @@ -588,17 +597,21 @@ int pirq_guest_bind(struct vcpu *v, int gdprintk(XENLOG_INFO, "Cannot bind IRQ %d to guest. " "Already at max share.\n", irq); rc = -EBUSY; - goto out; + goto unlock_out; } action->guest[action->nr_guests++] = v->domain; + unlock_out: + spin_unlock_irq(&desc->lock); out: - spin_unlock_irq(&desc->lock); + if ( newaction != NULL ) + xfree(newaction); return rc; } -static void __pirq_guest_unbind(struct domain *d, int irq, irq_desc_t *desc) +static irq_guest_action_t *__pirq_guest_unbind( + struct domain *d, int irq, irq_desc_t *desc) { unsigned int vector; irq_guest_action_t *action; @@ -644,7 +657,7 @@ static void __pirq_guest_unbind(struct d BUG_ON(test_bit(irq, d->pirq_mask)); if ( action->nr_guests != 0 ) - return; + return NULL; BUG_ON(action->in_flight != 0); @@ -672,15 +685,18 @@ static void __pirq_guest_unbind(struct d BUG_ON(!cpus_empty(action->cpu_eoi_map)); desc->action = NULL; - xfree(action); desc->status &= ~IRQ_GUEST; desc->status &= ~IRQ_INPROGRESS; kill_timer(&irq_guest_eoi_timer[vector]); desc->handler->shutdown(vector); + + /* Caller frees the old guest descriptor block. */ + return action; } void pirq_guest_unbind(struct domain *d, int irq) { + irq_guest_action_t *oldaction = NULL; irq_desc_t *desc; int vector; @@ -699,16 +715,19 @@ void pirq_guest_unbind(struct domain *d, } else { - __pirq_guest_unbind(d, irq, desc); + oldaction = __pirq_guest_unbind(d, irq, desc); } spin_unlock_irq(&desc->lock); + + if ( oldaction != NULL ) + xfree(oldaction); } int pirq_guest_force_unbind(struct domain *d, int irq) { irq_desc_t *desc; - irq_guest_action_t *action; + irq_guest_action_t *action, *oldaction = NULL; int i, bound = 0; WARN_ON(!spin_is_locked(&d->event_lock)); @@ -727,10 +746,14 @@ int pirq_guest_force_unbind(struct domai goto out; bound = 1; - __pirq_guest_unbind(d, irq, desc); + oldaction = __pirq_guest_unbind(d, irq, desc); out: spin_unlock_irq(&desc->lock); + + if ( oldaction != NULL ) + xfree(oldaction); + return bound; } diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/mm/hap/hap.c --- a/xen/arch/x86/mm/hap/hap.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/arch/x86/mm/hap/hap.c Wed Oct 22 11:46:55 2008 +0900 @@ -639,9 +639,16 @@ hap_write_p2m_entry(struct vcpu *v, unsi hap_write_p2m_entry(struct vcpu *v, unsigned long gfn, l1_pgentry_t *p, mfn_t table_mfn, l1_pgentry_t new, unsigned int level) { + uint32_t old_flags; + hap_lock(v->domain); + old_flags = l1e_get_flags(*p); safe_write_pte(p, new); + if ( (old_flags & _PAGE_PRESENT) + && (level == 1 || (level == 2 && (old_flags & _PAGE_PSE))) ) + flush_tlb_mask(v->domain->domain_dirty_cpumask); + #if CONFIG_PAGING_LEVELS == 3 /* install P2M in monitor table for PAE Xen */ if ( level == 3 ) diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/mm/shadow/private.h --- a/xen/arch/x86/mm/shadow/private.h Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/arch/x86/mm/shadow/private.h Wed Oct 22 11:46:55 2008 +0900 @@ -227,32 +227,40 @@ struct shadow_page_info struct shadow_page_info { union { - /* When in use, guest page we're a shadow of */ - unsigned long backpointer; - /* When free, order of the freelist we're on */ - unsigned int order; - }; - union { - /* When in use, next shadow in this hash chain */ - struct shadow_page_info *next_shadow; - /* When free, TLB flush time when freed */ - u32 tlbflush_timestamp; - }; - struct { - unsigned int type:5; /* What kind of shadow is this? */ - unsigned int pinned:1; /* Is the shadow pinned? */ - unsigned int count:26; /* Reference count */ - u32 mbz; /* Must be zero: this is where the owner - * field lives in a non-shadow page */ - } __attribute__((packed)); - union { - /* For unused shadow pages, a list of pages of this order; - * for pinnable shadows, if pinned, a list of other pinned shadows - * (see sh_type_is_pinnable() below for the definition of - * "pinnable" shadow types). */ - struct list_head list; - /* For non-pinnable shadows, a higher entry that points at us */ - paddr_t up; + /* Ensures that shadow_page_info is same size as page_info. */ + struct page_info page_info; + + struct { + union { + /* When in use, guest page we're a shadow of */ + unsigned long backpointer; + /* When free, order of the freelist we're on */ + unsigned int order; + }; + union { + /* When in use, next shadow in this hash chain */ + struct shadow_page_info *next_shadow; + /* When free, TLB flush time when freed */ + u32 tlbflush_timestamp; + }; + struct { + unsigned int type:5; /* What kind of shadow is this? */ + unsigned int pinned:1; /* Is the shadow pinned? */ + unsigned int count:26; /* Reference count */ + u32 mbz; /* Must be zero: this is where the + * owner field lives in page_info */ + } __attribute__((packed)); + union { + /* For unused shadow pages, a list of pages of this order; for + * pinnable shadows, if pinned, a list of other pinned shadows + * (see sh_type_is_pinnable() below for the definition of + * "pinnable" shadow types). */ + struct list_head list; + /* For non-pinnable shadows, a higher entry that points + * at us. */ + paddr_t up; + }; + }; }; }; @@ -261,7 +269,8 @@ struct shadow_page_info * Also, the mbz field must line up with the owner field of normal * pages, so they look properly like anonymous/xen pages. */ static inline void shadow_check_page_struct_offsets(void) { - BUILD_BUG_ON(sizeof (struct shadow_page_info) > sizeof (struct page_info)); + BUILD_BUG_ON(sizeof (struct shadow_page_info) != + sizeof (struct page_info)); BUILD_BUG_ON(offsetof(struct shadow_page_info, mbz) != offsetof(struct page_info, u.inuse._domain)); }; diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/msi.c --- a/xen/arch/x86/msi.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/arch/x86/msi.c Wed Oct 22 11:46:55 2008 +0900 @@ -364,6 +364,7 @@ static struct msi_desc* alloc_msi_entry( INIT_LIST_HEAD(&entry->list); entry->dev = NULL; + entry->remap_index = -1; return entry; } diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/nmi.c --- a/xen/arch/x86/nmi.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/arch/x86/nmi.c Wed Oct 22 11:46:55 2008 +0900 @@ -72,8 +72,8 @@ int nmi_active; #define P6_EVNTSEL_INT (1 << 20) #define P6_EVNTSEL_OS (1 << 17) #define P6_EVNTSEL_USR (1 << 16) -#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 -#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED +#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 +#define CORE_EVENT_CPU_CLOCKS_NOT_HALTED 0x3c #define P4_ESCR_EVENT_SELECT(N) ((N)<<25) #define P4_CCCR_OVF_PMI0 (1<<26) @@ -122,10 +122,17 @@ int __init check_nmi_watchdog (void) printk("\n"); - /* now that we know it works we can reduce NMI frequency to - something more reasonable; makes a difference in some configs */ + /* + * Now that we know it works we can reduce NMI frequency to + * something more reasonable; makes a difference in some configs. + * There's a limit to how slow we can go because writing the perfctr + * MSRs only sets the low 32 bits, with the top 8 bits sign-extended + * from those, so it's not possible to set up a delay larger than + * 2^31 cycles and smaller than (2^40 - 2^31) cycles. + * (Intel SDM, section 18.22.2) + */ if ( nmi_watchdog == NMI_LOCAL_APIC ) - nmi_hz = 1; + nmi_hz = max(1ul, cpu_khz >> 20); return 0; } @@ -248,7 +255,7 @@ static void __pminit setup_k7_watchdog(v wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); } -static void __pminit setup_p6_watchdog(void) +static void __pminit setup_p6_watchdog(unsigned counter) { unsigned int evntsel; @@ -260,7 +267,7 @@ static void __pminit setup_p6_watchdog(v evntsel = P6_EVNTSEL_INT | P6_EVNTSEL_OS | P6_EVNTSEL_USR - | P6_NMI_EVENT; + | counter; wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); write_watchdog_counter("P6_PERFCTR0"); @@ -326,7 +333,9 @@ void __pminit setup_apic_nmi_watchdog(vo case X86_VENDOR_INTEL: switch (boot_cpu_data.x86) { case 6: - setup_p6_watchdog(); + setup_p6_watchdog((boot_cpu_data.x86_model < 14) + ? P6_EVENT_CPU_CLOCKS_NOT_HALTED + : CORE_EVENT_CPU_CLOCKS_NOT_HALTED); break; case 15: if (!setup_p4_watchdog()) diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/platform_hypercall.c --- a/xen/arch/x86/platform_hypercall.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/arch/x86/platform_hypercall.c Wed Oct 22 11:46:55 2008 +0900 @@ -53,15 +53,6 @@ static long cpu_frequency_change_helper( return cpu_frequency_change(this_cpu(freq)); } -int xenpf_copy_px_states(struct processor_performance *pxpt, - struct xen_processor_performance *dom0_px_info) -{ - if (!pxpt || !dom0_px_info) - return -EINVAL; - return copy_from_compat(pxpt->states, dom0_px_info->states, - dom0_px_info->state_count); -} - ret_t do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op) { ret_t ret = 0; @@ -372,12 +363,13 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe switch ( op->u.set_pminfo.type ) { case XEN_PM_PX: - { - - ret = set_px_pminfo(op->u.set_pminfo.id, - &op->u.set_pminfo.perf); - break; - } + if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) ) + { + ret = -ENOSYS; + break; + } + ret = set_px_pminfo(op->u.set_pminfo.id, &op->u.set_pminfo.perf); + break; case XEN_PM_CX: if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_CX) ) diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/smpboot.c --- a/xen/arch/x86/smpboot.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/arch/x86/smpboot.c Wed Oct 22 11:46:55 2008 +0900 @@ -473,13 +473,6 @@ static void construct_percpu_idt(unsigne { unsigned char idt_load[10]; - /* If IDT table exists since last hotplug, reuse it */ - if (!idt_tables[cpu]) { - idt_tables[cpu] = xmalloc_array(idt_entry_t, IDT_ENTRIES); - memcpy(idt_tables[cpu], idt_table, - IDT_ENTRIES*sizeof(idt_entry_t)); - } - *(unsigned short *)(&idt_load[0]) = (IDT_ENTRIES*sizeof(idt_entry_t))-1; *(unsigned long *)(&idt_load[2]) = (unsigned long)idt_tables[cpu]; __asm__ __volatile__ ( "lidt %0" : "=m" (idt_load) ); @@ -908,6 +901,12 @@ static int __devinit do_boot_cpu(int api } #endif + if (!idt_tables[cpu]) { + idt_tables[cpu] = xmalloc_array(idt_entry_t, IDT_ENTRIES); + memcpy(idt_tables[cpu], idt_table, + IDT_ENTRIES*sizeof(idt_entry_t)); + } + /* * This grunge runs the startup process for * the targeted processor. diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/arch/x86/traps.c Wed Oct 22 11:46:55 2008 +0900 @@ -710,7 +710,7 @@ static void pv_cpuid(struct cpu_user_reg if ( current->domain->domain_id != 0 ) { if ( !cpuid_hypervisor_leaves(a, &a, &b, &c, &d) ) - domain_cpuid(current->domain, a, b, &a, &b, &c, &d); + domain_cpuid(current->domain, a, c, &a, &b, &c, &d); goto out; } @@ -1241,6 +1241,10 @@ asmlinkage void do_page_fault(struct cpu "Faulting linear address: %p\n", regs->error_code, _p(addr)); } + + if ( unlikely(current->domain->arch.suppress_spurious_page_faults + && spurious_page_fault(addr, regs)) ) + return; propagate_page_fault(addr, regs->error_code); } diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/x86_32/xen.lds.S --- a/xen/arch/x86/x86_32/xen.lds.S Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/arch/x86/x86_32/xen.lds.S Wed Oct 22 11:46:55 2008 +0900 @@ -26,7 +26,6 @@ SECTIONS *(.fixup) *(.gnu.warning) } :text =0x9090 - .text.lock : { *(.text.lock) } :text /* out-of-line lock text */ _etext = .; /* End of text section */ diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/x86_64/Makefile --- a/xen/arch/x86/x86_64/Makefile Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/arch/x86/x86_64/Makefile Wed Oct 22 11:46:55 2008 +0900 @@ -13,6 +13,7 @@ obj-$(CONFIG_COMPAT) += physdev.o obj-$(CONFIG_COMPAT) += physdev.o obj-$(CONFIG_COMPAT) += platform_hypercall.o obj-$(CONFIG_COMPAT) += cpu_idle.o +obj-$(CONFIG_COMPAT) += cpufreq.o ifeq ($(CONFIG_COMPAT),y) # extra dependencies @@ -24,4 +25,5 @@ sysctl.o: ../sysctl.c sysctl.o: ../sysctl.c traps.o: compat/traps.c cpu_idle.o: ../acpi/cpu_idle.c +cpufreq.o: ../../../drivers/cpufreq/cpufreq.c endif diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/x86_64/cpu_idle.c --- a/xen/arch/x86/x86_64/cpu_idle.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/arch/x86/x86_64/cpu_idle.c Wed Oct 22 11:46:55 2008 +0900 @@ -44,7 +44,7 @@ DEFINE_XEN_GUEST_HANDLE(compat_processor xlat_page_current = xlat_page_start; \ } while (0) -static void *xlat_malloc(unsigned long *xlat_page_current, size_t size) +void *xlat_malloc(unsigned long *xlat_page_current, size_t size) { void *ret; diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/x86_64/cpufreq.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/x86_64/cpufreq.c Wed Oct 22 11:46:55 2008 +0900 @@ -0,0 +1,91 @@ +/****************************************************************************** + * cpufreq.c -- adapt 32b compat guest to 64b hypervisor. + * + * Copyright (C) 2008, Liu Jinsong <jinsong.liu@xxxxxxxxx> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include <xen/config.h> +#include <xen/types.h> +#include <xen/xmalloc.h> +#include <xen/guest_access.h> +#include <compat/platform.h> + +DEFINE_XEN_GUEST_HANDLE(compat_processor_px_t); + +#define xlat_page_start ((unsigned long)COMPAT_ARG_XLAT_VIRT_BASE) + +#define xlat_malloc_init(xlat_page_current) do { \ + xlat_page_current = xlat_page_start; \ +} while (0) + +extern void *xlat_malloc(unsigned long *xlat_page_current, size_t size); + +#define xlat_malloc_array(_p, _t, _c) ((_t *) xlat_malloc(&_p, sizeof(_t) * _c)) + +extern int +set_px_pminfo(uint32_t cpu, struct xen_processor_performance *perf); + +int +compat_set_px_pminfo(uint32_t cpu, struct compat_processor_performance *perf) +{ + struct xen_processor_performance *xen_perf; + unsigned long xlat_page_current; + + xlat_malloc_init(xlat_page_current); + + xen_perf = xlat_malloc_array(xlat_page_current, + struct xen_processor_performance, 1); + if ( unlikely(xen_perf == NULL) ) + return -EFAULT; + +#define XLAT_processor_performance_HNDL_states(_d_, _s_) do { \ + xen_processor_px_t *xen_states = NULL; \ +\ + if ( likely((_s_)->state_count > 0) ) \ + { \ + XEN_GUEST_HANDLE(compat_processor_px_t) states; \ + compat_processor_px_t state; \ + int i; \ +\ + xen_states = xlat_malloc_array(xlat_page_current, \ + xen_processor_px_t, (_s_)->state_count); \ + if ( unlikely(xen_states == NULL) ) \ + return -EFAULT; \ +\ + if ( unlikely(!compat_handle_okay((_s_)->states, \ + (_s_)->state_count)) ) \ + return -EFAULT; \ + guest_from_compat_handle(states, (_s_)->states); \ +\ + for ( i = 0; i < _s_->state_count; i++ ) \ + { \ + if ( unlikely(copy_from_guest_offset(&state, states, i, 1)) ) \ + return -EFAULT; \ + XLAT_processor_px(&xen_states[i], &state); \ + } \ + } \ +\ + set_xen_guest_handle((_d_)->states, xen_states); \ +} while (0) + XLAT_processor_performance(xen_perf, perf); +#undef XLAT_processor_performance_HNDL_states + + return set_px_pminfo(cpu, xen_perf); +} diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/x86_64/mm.c --- a/xen/arch/x86/x86_64/mm.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/arch/x86/x86_64/mm.c Wed Oct 22 11:46:55 2008 +0900 @@ -252,8 +252,6 @@ void __init subarch_init_memory(void) BUILD_BUG_ON(offsetof(struct page_info, u.inuse._domain) != (offsetof(struct page_info, count_info) + sizeof(u32))); BUILD_BUG_ON((offsetof(struct page_info, count_info) & 7) != 0); - BUILD_BUG_ON(sizeof(struct page_info) != - (32 + BITS_TO_LONGS(NR_CPUS)*sizeof(long))); /* M2P table is mappable read-only by privileged domains. */ for ( v = RDWR_MPT_VIRT_START; diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/x86_64/platform_hypercall.c --- a/xen/arch/x86/x86_64/platform_hypercall.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/arch/x86/x86_64/platform_hypercall.c Wed Oct 22 11:46:55 2008 +0900 @@ -11,13 +11,13 @@ DEFINE_XEN_GUEST_HANDLE(compat_platform_ #define xen_platform_op_t compat_platform_op_t #define do_platform_op(x) compat_platform_op(_##x) -#define xenpf_copy_px_states compat_xenpf_copy_px_states - #define xen_processor_px compat_processor_px #define xen_processor_px_t compat_processor_px_t #define xen_processor_performance compat_processor_performance #define xen_processor_performance_t compat_processor_performance_t #define xenpf_set_processor_pminfo compat_pf_set_processor_pminfo + +#define set_px_pminfo compat_set_px_pminfo #define xen_processor_power compat_processor_power #define xen_processor_power_t compat_processor_power_t diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/x86_64/xen.lds.S --- a/xen/arch/x86/x86_64/xen.lds.S Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/arch/x86/x86_64/xen.lds.S Wed Oct 22 11:46:55 2008 +0900 @@ -24,7 +24,6 @@ SECTIONS *(.fixup) *(.gnu.warning) } :text = 0x9090 - .text.lock : { *(.text.lock) } :text /* out-of-line lock text */ _etext = .; /* End of text section */ diff -r 6583186e5989 -r 46d7e12c4c91 xen/arch/x86/x86_emulate/x86_emulate.c --- a/xen/arch/x86/x86_emulate/x86_emulate.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/arch/x86/x86_emulate/x86_emulate.c Wed Oct 22 11:46:55 2008 +0900 @@ -236,7 +236,8 @@ static uint8_t twobyte_table[256] = { DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov, /* 0xC0 - 0xC7 */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, 0, 0, + ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, + 0, DstMem|SrcReg|ModRM|Mov, 0, 0, 0, ImplicitOps|ModRM, /* 0xC8 - 0xCF */ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, @@ -3910,6 +3911,12 @@ x86_emulate( } goto add; + case 0xc3: /* movnti */ + /* Ignore the non-temporal hint for now. */ + generate_exception_if(dst.bytes <= 2, EXC_UD, -1); + dst.val = src.val; + break; + case 0xc7: /* Grp9 (cmpxchg8b/cmpxchg16b) */ { unsigned long old[2], exp[2], new[2]; unsigned int i; diff -r 6583186e5989 -r 46d7e12c4c91 xen/common/Makefile --- a/xen/common/Makefile Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/common/Makefile Wed Oct 22 11:46:55 2008 +0900 @@ -16,6 +16,7 @@ obj-y += schedule.o obj-y += schedule.o obj-y += shutdown.o obj-y += softirq.o +obj-y += spinlock.o obj-y += stop_machine.o obj-y += string.o obj-y += symbols.o @@ -25,7 +26,7 @@ obj-y += trace.o obj-y += trace.o obj-y += version.o obj-y += vsprintf.o -obj-y += xmalloc.o +obj-y += xmalloc_tlsf.o obj-y += rcupdate.o obj-$(perfc) += perfc.o diff -r 6583186e5989 -r 46d7e12c4c91 xen/common/kernel.c --- a/xen/common/kernel.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/common/kernel.c Wed Oct 22 11:46:55 2008 +0900 @@ -75,8 +75,7 @@ void cmdline_parse(char *cmdline) strlcpy(param->var, optval, param->len); break; case OPT_UINT: - *(unsigned int *)param->var = - simple_strtol(optval, (const char **)&optval, 0); + *(unsigned int *)param->var = simple_strtol(optval, NULL, 0); break; case OPT_BOOL: case OPT_INVBOOL: diff -r 6583186e5989 -r 46d7e12c4c91 xen/common/schedule.c --- a/xen/common/schedule.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/common/schedule.c Wed Oct 22 11:46:55 2008 +0900 @@ -455,6 +455,10 @@ static long do_poll(struct sched_poll *s goto out; #endif + rc = 0; + if ( local_events_need_delivery() ) + goto out; + for ( i = 0; i < sched_poll->nr_ports; i++ ) { rc = -EFAULT; diff -r 6583186e5989 -r 46d7e12c4c91 xen/common/spinlock.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/common/spinlock.c Wed Oct 22 11:46:55 2008 +0900 @@ -0,0 +1,154 @@ +#include <xen/config.h> +#include <xen/smp.h> +#include <xen/spinlock.h> + +void _spin_lock(spinlock_t *lock) +{ + _raw_spin_lock(&lock->raw); +} + +void _spin_lock_irq(spinlock_t *lock) +{ + local_irq_disable(); + _raw_spin_lock(&lock->raw); +} + +unsigned long _spin_lock_irqsave(spinlock_t *lock) +{ + unsigned long flags; + local_irq_save(flags); + _raw_spin_lock(&lock->raw); + return flags; +} + +void _spin_unlock(spinlock_t *lock) +{ + _raw_spin_unlock(&lock->raw); +} + +void _spin_unlock_irq(spinlock_t *lock) +{ + _raw_spin_unlock(&lock->raw); + local_irq_enable(); +} + +void _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) +{ + _raw_spin_unlock(&lock->raw); + local_irq_restore(flags); +} + +int _spin_is_locked(spinlock_t *lock) +{ + return _raw_spin_is_locked(&lock->raw); +} + +int _spin_trylock(spinlock_t *lock) +{ + return _raw_spin_trylock(&lock->raw); +} + +void _spin_barrier(spinlock_t *lock) +{ + do { mb(); } while ( _raw_spin_is_locked(&lock->raw) ); + mb(); +} + +void _spin_lock_recursive(spinlock_t *lock) +{ + int cpu = smp_processor_id(); + + /* Don't allow overflow of recurse_cpu field. */ + BUILD_BUG_ON(NR_CPUS > 0xfffu); + + if ( likely(lock->recurse_cpu != cpu) ) + { + spin_lock(lock); + lock->recurse_cpu = cpu; + } + + /* We support only fairly shallow recursion, else the counter overflows. */ + ASSERT(lock->recurse_cnt < 0xfu); + lock->recurse_cnt++; +} + +void _spin_unlock_recursive(spinlock_t *lock) +{ + if ( likely(--lock->recurse_cnt == 0) ) + { + lock->recurse_cpu = 0xfffu; + spin_unlock(lock); + } +} + +void _read_lock(rwlock_t *lock) +{ + _raw_read_lock(&lock->raw); +} + +void _read_lock_irq(rwlock_t *lock) +{ + local_irq_disable(); + _raw_read_lock(&lock->raw); +} + +unsigned long _read_lock_irqsave(rwlock_t *lock) +{ + unsigned long flags; + local_irq_save(flags); + _raw_read_lock(&lock->raw); + return flags; +} + +void _read_unlock(rwlock_t *lock) +{ + _raw_read_unlock(&lock->raw); +} + +void _read_unlock_irq(rwlock_t *lock) +{ + _raw_read_unlock(&lock->raw); + local_irq_enable(); +} + +void _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) +{ + _raw_read_unlock(&lock->raw); + local_irq_restore(flags); +} + +void _write_lock(rwlock_t *lock) +{ + _raw_write_lock(&lock->raw); +} + +void _write_lock_irq(rwlock_t *lock) +{ + local_irq_disable(); + _raw_write_lock(&lock->raw); +} + +unsigned long _write_lock_irqsave(rwlock_t *lock) +{ + unsigned long flags; + local_irq_save(flags); + _raw_write_lock(&lock->raw); + return flags; +} + +void _write_unlock(rwlock_t *lock) +{ + _raw_write_unlock(&lock->raw); +} + +void _write_unlock_irq(rwlock_t *lock) +{ + _raw_write_unlock(&lock->raw); + local_irq_enable(); +} + +void _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) +{ + _raw_write_unlock(&lock->raw); + local_irq_restore(flags); +} diff -r 6583186e5989 -r 46d7e12c4c91 xen/common/timer.c --- a/xen/common/timer.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/common/timer.c Wed Oct 22 11:46:55 2008 +0900 @@ -114,34 +114,19 @@ static int remove_from_heap(struct timer /* Add new entry @t to @heap. Return TRUE if new top of heap. */ -static int add_to_heap(struct timer ***pheap, struct timer *t) -{ - struct timer **heap = *pheap; +static int add_to_heap(struct timer **heap, struct timer *t) +{ int sz = GET_HEAP_SIZE(heap); - /* Copy the heap if it is full. */ + /* Fail if the heap is full. */ if ( unlikely(sz == GET_HEAP_LIMIT(heap)) ) - { - /* old_limit == (2^n)-1; new_limit == (2^(n+4))-1 */ - int old_limit = GET_HEAP_LIMIT(heap); - int new_limit = ((old_limit + 1) << 4) - 1; - if ( in_irq() ) - goto out; - heap = xmalloc_array(struct timer *, new_limit + 1); - if ( heap == NULL ) - goto out; - memcpy(heap, *pheap, (old_limit + 1) * sizeof(*heap)); - SET_HEAP_LIMIT(heap, new_limit); - if ( old_limit != 0 ) - xfree(*pheap); - *pheap = heap; - } + return 0; SET_HEAP_SIZE(heap, ++sz); heap[sz] = t; t->heap_offset = sz; up_heap(heap, sz); - out: + return (t->heap_offset == 1); } @@ -210,7 +195,7 @@ static int add_entry(struct timers *time /* Try to add to heap. t->heap_offset indicates whether we succeed. */ t->heap_offset = 0; t->status = TIMER_STATUS_in_heap; - rc = add_to_heap(&timers->heap, t); + rc = add_to_heap(timers->heap, t); if ( t->heap_offset != 0 ) return rc; @@ -368,6 +353,27 @@ static void timer_softirq_action(void) void *data; ts = &this_cpu(timers); + heap = ts->heap; + + /* If we are using overflow linked list, try to allocate a larger heap. */ + if ( unlikely(ts->list != NULL) ) + { + /* old_limit == (2^n)-1; new_limit == (2^(n+4))-1 */ + int old_limit = GET_HEAP_LIMIT(heap); + int new_limit = ((old_limit + 1) << 4) - 1; + struct timer **newheap = xmalloc_array(struct timer *, new_limit + 1); + if ( newheap != NULL ) + { + spin_lock_irq(&ts->lock); + memcpy(newheap, heap, (old_limit + 1) * sizeof(*heap)); + SET_HEAP_LIMIT(newheap, new_limit); + ts->heap = newheap; + spin_unlock_irq(&ts->lock); + if ( old_limit != 0 ) + xfree(heap); + heap = newheap; + } + } spin_lock_irq(&ts->lock); @@ -380,9 +386,8 @@ static void timer_softirq_action(void) t->status = TIMER_STATUS_inactive; add_entry(ts, t); } - - heap = ts->heap; - now = NOW(); + + now = NOW(); while ( (GET_HEAP_SIZE(heap) != 0) && ((t = heap[1])->expires < (now + TIMER_SLOP)) ) @@ -397,9 +402,6 @@ static void timer_softirq_action(void) spin_unlock_irq(&ts->lock); (*fn)(data); spin_lock_irq(&ts->lock); - - /* Heap may have grown while the lock was released. */ - heap = ts->heap; } deadline = GET_HEAP_SIZE(heap) ? heap[1]->expires : 0; diff -r 6583186e5989 -r 46d7e12c4c91 xen/common/xmalloc_tlsf.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/common/xmalloc_tlsf.c Wed Oct 22 11:46:55 2008 +0900 @@ -0,0 +1,599 @@ +/* + * Two Levels Segregate Fit memory allocator (TLSF) + * Version 2.3.2 + * + * Written by Miguel Masmano Tello <mimastel@xxxxxxxxxxxxx> + * + * Thanks to Ismael Ripoll for his suggestions and reviews + * + * Copyright (C) 2007, 2006, 2005, 2004 + * + * This code is released using a dual license strategy: GPL/LGPL + * You can choose the licence that better fits your requirements. + * + * Released under the terms of the GNU General Public License Version 2.0 + * Released under the terms of the GNU Lesser General Public License + * Version 2.1 + * + * This is kernel port of TLSF allocator. + * Original code can be found at: http://rtportal.upv.es/rtmalloc/ + * Adapted for Linux by Nitin Gupta (nitingupta910@xxxxxxxxx) + * (http://code.google.com/p/compcache/source/browse/trunk/sub-projects + * /allocators/tlsf-kmod r229 dated Aug 27, 2008 + * Adapted for Xen by Dan Magenheimer (dan.magenheimer@xxxxxxxxxx) + */ + +#include <xen/config.h> +#include <xen/irq.h> +#include <xen/mm.h> +#include <asm/time.h> + +#define MAX_POOL_NAME_LEN 16 + +/* Some IMPORTANT TLSF parameters */ +#define MEM_ALIGN (sizeof(void *) * 2) +#define MEM_ALIGN_MASK (~(MEM_ALIGN - 1)) + +#define MAX_FLI (30) +#define MAX_LOG2_SLI (5) +#define MAX_SLI (1 << MAX_LOG2_SLI) + +#define FLI_OFFSET (6) +/* tlsf structure just will manage blocks bigger than 128 bytes */ +#define SMALL_BLOCK (128) +#define REAL_FLI (MAX_FLI - FLI_OFFSET) +#define MIN_BLOCK_SIZE (sizeof(struct free_ptr)) +#define BHDR_OVERHEAD (sizeof(struct bhdr) - MIN_BLOCK_SIZE) + +#define PTR_MASK (sizeof(void *) - 1) +#define BLOCK_SIZE_MASK (0xFFFFFFFF - PTR_MASK) + +#define GET_NEXT_BLOCK(addr, r) ((struct bhdr *) \ + ((char *)(addr) + (r))) +#define ROUNDUP_SIZE(r) (((r) + MEM_ALIGN - 1) & MEM_ALIGN_MASK) +#define ROUNDDOWN_SIZE(r) ((r) & MEM_ALIGN_MASK) +#define ROUNDUP_PAGE(r) (((r) + PAGE_SIZE - 1) & PAGE_MASK) + +#define BLOCK_STATE (0x1) +#define PREV_STATE (0x2) + +/* bit 0 of the block size */ +#define FREE_BLOCK (0x1) +#define USED_BLOCK (0x0) + +/* bit 1 of the block size */ +#define PREV_FREE (0x2) +#define PREV_USED (0x0) + +static spinlock_t pool_list_lock; +static struct list_head pool_list_head; + +struct free_ptr { + struct bhdr *prev; + struct bhdr *next; +}; + +struct bhdr { + /* All blocks in a region are linked in order of physical address */ + struct bhdr *prev_hdr; + /* + * The size is stored in bytes + * bit 0: block is free, if set + * bit 1: previous block is free, if set + */ + u32 size; + /* Free blocks in individual freelists are linked */ + union { + struct free_ptr free_ptr; + u8 buffer[sizeof(struct free_ptr)]; + } ptr; +}; + +struct xmem_pool { + /* First level bitmap (REAL_FLI bits) */ + u32 fl_bitmap; + + /* Second level bitmap */ + u32 sl_bitmap[REAL_FLI]; + + /* Free lists */ + struct bhdr *matrix[REAL_FLI][MAX_SLI]; + + spinlock_t lock; + + unsigned long init_size; + unsigned long max_size; + unsigned long grow_size; + + /* Basic stats */ + unsigned long used_size; + unsigned long num_regions; + + /* User provided functions for expanding/shrinking pool */ + xmem_pool_get_memory *get_mem; + xmem_pool_put_memory *put_mem; + + struct list_head list; + + void *init_region; + char name[MAX_POOL_NAME_LEN]; +}; + +/* + * Helping functions + */ + +/** + * Returns indexes (fl, sl) of the list used to serve request of size r + */ +static inline void MAPPING_SEARCH(unsigned long *r, int *fl, int *sl) +{ + int t; + + if ( *r < SMALL_BLOCK ) + { + *fl = 0; + *sl = *r / (SMALL_BLOCK / MAX_SLI); + } + else + { + t = (1 << (fls(*r) - 1 - MAX_LOG2_SLI)) - 1; + *r = *r + t; + *fl = fls(*r) - 1; + *sl = (*r >> (*fl - MAX_LOG2_SLI)) - MAX_SLI; + *fl -= FLI_OFFSET; + /*if ((*fl -= FLI_OFFSET) < 0) // FL will be always >0! + *fl = *sl = 0; + */ + *r &= ~t; + } +} + +/** + * Returns indexes (fl, sl) which is used as starting point to search + * for a block of size r. It also rounds up requested size(r) to the + * next list. + */ +static inline void MAPPING_INSERT(unsigned long r, int *fl, int *sl) +{ + if ( r < SMALL_BLOCK ) + { + *fl = 0; + *sl = r / (SMALL_BLOCK / MAX_SLI); + } + else + { + *fl = fls(r) - 1; + *sl = (r >> (*fl - MAX_LOG2_SLI)) - MAX_SLI; + *fl -= FLI_OFFSET; + } +} + +/** + * Returns first block from a list that hold blocks larger than or + * equal to the one pointed by the indexes (fl, sl) + */ +static inline struct bhdr *FIND_SUITABLE_BLOCK(struct xmem_pool *p, int *fl, + int *sl) +{ + u32 tmp = p->sl_bitmap[*fl] & (~0 << *sl); + struct bhdr *b = NULL; + + if ( tmp ) + { + *sl = ffs(tmp) - 1; + b = p->matrix[*fl][*sl]; + } + else + { + *fl = ffs(p->fl_bitmap & (~0 << (*fl + 1))) - 1; + if ( likely(*fl > 0) ) + { + *sl = ffs(p->sl_bitmap[*fl]) - 1; + b = p->matrix[*fl][*sl]; + } + } + + return b; +} + +/** + * Remove first free block(b) from free list with indexes (fl, sl). + */ +static inline void EXTRACT_BLOCK_HDR(struct bhdr *b, struct xmem_pool *p, int fl, + int sl) +{ + p->matrix[fl][sl] = b->ptr.free_ptr.next; + if ( p->matrix[fl][sl] ) + { + p->matrix[fl][sl]->ptr.free_ptr.prev = NULL; + } + else + { + clear_bit(sl, &p->sl_bitmap[fl]); + if ( !p->sl_bitmap[fl] ) + clear_bit(fl, &p->fl_bitmap); + } + b->ptr.free_ptr = (struct free_ptr) {NULL, NULL}; +} + +/** + * Removes block(b) from free list with indexes (fl, sl) + */ +static inline void EXTRACT_BLOCK(struct bhdr *b, struct xmem_pool *p, int fl, + int sl) +{ + if ( b->ptr.free_ptr.next ) + b->ptr.free_ptr.next->ptr.free_ptr.prev = + b->ptr.free_ptr.prev; + if ( b->ptr.free_ptr.prev ) + b->ptr.free_ptr.prev->ptr.free_ptr.next = + b->ptr.free_ptr.next; + if ( p->matrix[fl][sl] == b ) + { + p->matrix[fl][sl] = b->ptr.free_ptr.next; + if ( !p->matrix[fl][sl] ) + { + clear_bit(sl, &p->sl_bitmap[fl]); + if ( !p->sl_bitmap[fl] ) + clear_bit (fl, &p->fl_bitmap); + } + } + b->ptr.free_ptr = (struct free_ptr) {NULL, NULL}; +} + +/** + * Insert block(b) in free list with indexes (fl, sl) + */ +static inline void INSERT_BLOCK(struct bhdr *b, struct xmem_pool *p, int fl, int sl) +{ + b->ptr.free_ptr = (struct free_ptr) {NULL, p->matrix[fl][sl]}; + if ( p->matrix[fl][sl] ) + p->matrix[fl][sl]->ptr.free_ptr.prev = b; + p->matrix[fl][sl] = b; + set_bit(sl, &p->sl_bitmap[fl]); + set_bit(fl, &p->fl_bitmap); +} + +/** + * Region is a virtually contiguous memory region and Pool is + * collection of such regions + */ +static inline void ADD_REGION(void *region, unsigned long region_size, + struct xmem_pool *pool) +{ + int fl, sl; + struct bhdr *b, *lb; + + b = (struct bhdr *)(region); + b->prev_hdr = NULL; + b->size = ROUNDDOWN_SIZE(region_size - 2 * BHDR_OVERHEAD) + | FREE_BLOCK | PREV_USED; + MAPPING_INSERT(b->size & BLOCK_SIZE_MASK, &fl, &sl); + INSERT_BLOCK(b, pool, fl, sl); + /* The sentinel block: allows us to know when we're in the last block */ + lb = GET_NEXT_BLOCK(b->ptr.buffer, b->size & BLOCK_SIZE_MASK); + lb->prev_hdr = b; + lb->size = 0 | USED_BLOCK | PREV_FREE; + pool->used_size += BHDR_OVERHEAD; /* only sentinel block is "used" */ + pool->num_regions++; +} + +/* + * TLSF pool-based allocator start. + */ + +struct xmem_pool *xmem_pool_create( + const char *name, + xmem_pool_get_memory get_mem, + xmem_pool_put_memory put_mem, + unsigned long init_size, + unsigned long max_size, + unsigned long grow_size) +{ + struct xmem_pool *pool; + void *region; + int pool_bytes, pool_order; + + BUG_ON(max_size && (max_size < init_size)); + + pool_bytes = ROUNDUP_SIZE(sizeof(*pool)); + pool_order = get_order_from_bytes(pool_bytes); + + pool = (void *)alloc_xenheap_pages(pool_order); + if ( pool == NULL ) + return NULL; + memset(pool, 0, pool_bytes); + + /* Round to next page boundary */ + init_size = ROUNDUP_PAGE(init_size); + max_size = ROUNDUP_PAGE(max_size); + grow_size = ROUNDUP_PAGE(grow_size); + + /* pool global overhead not included in used size */ + pool->used_size = 0; + + pool->init_size = init_size; + pool->max_size = max_size; + pool->grow_size = grow_size; + pool->get_mem = get_mem; + pool->put_mem = put_mem; + strlcpy(pool->name, name, sizeof(pool->name)); + region = get_mem(init_size); + if ( region == NULL ) + goto out_region; + ADD_REGION(region, init_size, pool); + pool->init_region = region; + + spin_lock_init(&pool->lock); + + spin_lock(&pool_list_lock); + list_add_tail(&pool->list, &pool_list_head); + spin_unlock(&pool_list_lock); + + return pool; + + out_region: + free_xenheap_pages(pool, pool_order); + return NULL; +} + +unsigned long xmem_pool_get_used_size(struct xmem_pool *pool) +{ + return pool->used_size; +} + +unsigned long xmem_pool_get_total_size(struct xmem_pool *pool) +{ + unsigned long total; + total = ROUNDUP_SIZE(sizeof(*pool)) + + pool->init_size + + (pool->num_regions - 1) * pool->grow_size; + return total; +} + +void xmem_pool_destroy(struct xmem_pool *pool) +{ + if ( pool == NULL ) + return; + + /* User is destroying without ever allocating from this pool */ + if ( xmem_pool_get_used_size(pool) == BHDR_OVERHEAD ) + { + pool->put_mem(pool->init_region); + pool->used_size -= BHDR_OVERHEAD; + } + + /* Check for memory leaks in this pool */ + if ( xmem_pool_get_used_size(pool) ) + printk("memory leak in pool: %s (%p). " + "%lu bytes still in use.\n", + pool->name, pool, xmem_pool_get_used_size(pool)); + + spin_lock(&pool_list_lock); + list_del_init(&pool->list); + spin_unlock(&pool_list_lock); + pool->put_mem(pool); +} + +void *xmem_pool_alloc(unsigned long size, struct xmem_pool *pool) +{ + struct bhdr *b, *b2, *next_b, *region; + int fl, sl; + unsigned long tmp_size; + + size = (size < MIN_BLOCK_SIZE) ? MIN_BLOCK_SIZE : ROUNDUP_SIZE(size); + /* Rounding up the requested size and calculating fl and sl */ + + spin_lock(&pool->lock); + retry_find: + MAPPING_SEARCH(&size, &fl, &sl); + + /* Searching a free block */ + if ( !(b = FIND_SUITABLE_BLOCK(pool, &fl, &sl)) ) + { + /* Not found */ + if ( size > (pool->grow_size - 2 * BHDR_OVERHEAD) ) + goto out_locked; + if ( pool->max_size && (pool->init_size + + pool->num_regions * pool->grow_size + > pool->max_size) ) + goto out_locked; + spin_unlock(&pool->lock); + if ( (region = pool->get_mem(pool->grow_size)) == NULL ) + goto out; + spin_lock(&pool->lock); + ADD_REGION(region, pool->grow_size, pool); + goto retry_find; + } + EXTRACT_BLOCK_HDR(b, pool, fl, sl); + + /*-- found: */ + next_b = GET_NEXT_BLOCK(b->ptr.buffer, b->size & BLOCK_SIZE_MASK); + /* Should the block be split? */ + tmp_size = (b->size & BLOCK_SIZE_MASK) - size; + if ( tmp_size >= sizeof(struct bhdr) ) + { + tmp_size -= BHDR_OVERHEAD; + b2 = GET_NEXT_BLOCK(b->ptr.buffer, size); + + b2->size = tmp_size | FREE_BLOCK | PREV_USED; + b2->prev_hdr = b; + + next_b->prev_hdr = b2; + + MAPPING_INSERT(tmp_size, &fl, &sl); + INSERT_BLOCK(b2, pool, fl, sl); + + b->size = size | (b->size & PREV_STATE); + } + else + { + next_b->size &= (~PREV_FREE); + b->size &= (~FREE_BLOCK); /* Now it's used */ + } + + pool->used_size += (b->size & BLOCK_SIZE_MASK) + BHDR_OVERHEAD; + + spin_unlock(&pool->lock); + return (void *)b->ptr.buffer; + + /* Failed alloc */ + out_locked: + spin_unlock(&pool->lock); + + out: + return NULL; +} + +void xmem_pool_free(void *ptr, struct xmem_pool *pool) +{ + struct bhdr *b, *tmp_b; + int fl = 0, sl = 0; + + if ( unlikely(ptr == NULL) ) + return; + + b = (struct bhdr *)((char *) ptr - BHDR_OVERHEAD); + + spin_lock(&pool->lock); + b->size |= FREE_BLOCK; + pool->used_size -= (b->size & BLOCK_SIZE_MASK) + BHDR_OVERHEAD; + b->ptr.free_ptr = (struct free_ptr) { NULL, NULL}; + tmp_b = GET_NEXT_BLOCK(b->ptr.buffer, b->size & BLOCK_SIZE_MASK); + if ( tmp_b->size & FREE_BLOCK ) + { + MAPPING_INSERT(tmp_b->size & BLOCK_SIZE_MASK, &fl, &sl); + EXTRACT_BLOCK(tmp_b, pool, fl, sl); + b->size += (tmp_b->size & BLOCK_SIZE_MASK) + BHDR_OVERHEAD; + } + if ( b->size & PREV_FREE ) + { + tmp_b = b->prev_hdr; + MAPPING_INSERT(tmp_b->size & BLOCK_SIZE_MASK, &fl, &sl); + EXTRACT_BLOCK(tmp_b, pool, fl, sl); + tmp_b->size += (b->size & BLOCK_SIZE_MASK) + BHDR_OVERHEAD; + b = tmp_b; + } + tmp_b = GET_NEXT_BLOCK(b->ptr.buffer, b->size & BLOCK_SIZE_MASK); + tmp_b->prev_hdr = b; + + MAPPING_INSERT(b->size & BLOCK_SIZE_MASK, &fl, &sl); + + if ( (b->prev_hdr == NULL) && ((tmp_b->size & BLOCK_SIZE_MASK) == 0) ) + { + pool->put_mem(b); + pool->num_regions--; + pool->used_size -= BHDR_OVERHEAD; /* sentinel block header */ + goto out; + } + + INSERT_BLOCK(b, pool, fl, sl); + + tmp_b->size |= PREV_FREE; + tmp_b->prev_hdr = b; + out: + spin_unlock(&pool->lock); +} + +/* + * Glue for xmalloc(). + */ + +static struct xmem_pool *xenpool; + +static void *xmalloc_pool_get(unsigned long size) +{ + ASSERT(size == PAGE_SIZE); + return alloc_xenheap_pages(0); +} + +static void xmalloc_pool_put(void *p) +{ + free_xenheap_pages(p,0); +} + +static void *xmalloc_whole_pages(unsigned long size) +{ + struct bhdr *b; + unsigned int pageorder = get_order_from_bytes(size + BHDR_OVERHEAD); + + b = alloc_xenheap_pages(pageorder); + if ( b == NULL ) + return NULL; + + b->size = (1 << (pageorder + PAGE_SHIFT)); + return (void *)b->ptr.buffer; +} + +static void tlsf_init(void) +{ + INIT_LIST_HEAD(&pool_list_head); + spin_lock_init(&pool_list_lock); + xenpool = xmem_pool_create( + "xmalloc", xmalloc_pool_get, xmalloc_pool_put, + PAGE_SIZE, 0, PAGE_SIZE); + BUG_ON(!xenpool); +} + +/* + * xmalloc() + */ + +void *_xmalloc(unsigned long size, unsigned long align) +{ + void *p; + u32 pad; + + ASSERT(!in_irq()); + + ASSERT((align & (align - 1)) == 0); + if ( align < MEM_ALIGN ) + align = MEM_ALIGN; + size += align - MEM_ALIGN; + + if ( !xenpool ) + tlsf_init(); + + if ( size >= (PAGE_SIZE - (2*BHDR_OVERHEAD)) ) + p = xmalloc_whole_pages(size); + else + p = xmem_pool_alloc(size, xenpool); + + /* Add alignment padding. */ + if ( (pad = -(long)p & (align - 1)) != 0 ) + { + char *q = (char *)p + pad; + struct bhdr *b = (struct bhdr *)(q - BHDR_OVERHEAD); + ASSERT(q > (char *)p); + b->size = pad | 1; + p = q; + } + + ASSERT(((unsigned long)p & (align - 1)) == 0); + return p; +} + +void xfree(void *p) +{ + struct bhdr *b; + + ASSERT(!in_irq()); + + if ( p == NULL ) + return; + + /* Strip alignment padding. */ + b = (struct bhdr *)((char *) p - BHDR_OVERHEAD); + if ( b->size & 1 ) + { + p = (char *)p - (b->size & ~1u); + b = (struct bhdr *)((char *)p - BHDR_OVERHEAD); + ASSERT(!(b->size & 1)); + } + + if ( b->size >= (PAGE_SIZE - (2*BHDR_OVERHEAD)) ) + free_xenheap_pages((void *)b, get_order_from_bytes(b->size)); + else + xmem_pool_free(p, xenpool); +} diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/Makefile --- a/xen/drivers/Makefile Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/drivers/Makefile Wed Oct 22 11:46:55 2008 +0900 @@ -1,6 +1,6 @@ subdir-y += char subdir-y += char subdir-y += cpufreq subdir-y += pci -subdir-$(x86) += passthrough +subdir-y += passthrough subdir-$(HAS_ACPI) += acpi subdir-$(HAS_VGA) += video diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/acpi/pmstat.c --- a/xen/drivers/acpi/pmstat.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/drivers/acpi/pmstat.c Wed Oct 22 11:46:55 2008 +0900 @@ -52,7 +52,7 @@ int do_get_pm_info(struct xen_sysctl_get int ret = 0; const struct processor_pminfo *pmpt; - if ( (op->cpuid >= NR_CPUS) || !cpu_online(op->cpuid) ) + if ( !op || (op->cpuid >= NR_CPUS) || !cpu_online(op->cpuid) ) return -EINVAL; pmpt = processor_pminfo[op->cpuid]; @@ -87,7 +87,7 @@ int do_get_pm_info(struct xen_sysctl_get uint64_t tmp_idle_ns; struct pm_px *pxpt = cpufreq_statistic_data[op->cpuid]; - if ( !pxpt ) + if ( !pxpt || !pxpt->u.pt || !pxpt->u.trans_pt ) return -ENODATA; total_idle_ns = get_cpu_idle_time(op->cpuid); diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/char/ns16550.c --- a/xen/drivers/char/ns16550.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/drivers/char/ns16550.c Wed Oct 22 11:46:55 2008 +0900 @@ -18,17 +18,19 @@ #include <asm/io.h> /* - * Configure serial port with a string <baud>,DPS,<io-base>,<irq>. + * Configure serial port with a string: + * <baud>[/<clock_hz>][,DPS[,<io-base>[,<irq>]]]. * The tail of the string can be omitted if platform defaults are sufficient. * If the baud rate is pre-configured, perhaps by a bootloader, then 'auto' - * can be specified in place of a numeric baud rate. + * can be specified in place of a numeric baud rate. Polled mode is specified + * by requesting irq 0. */ static char opt_com1[30] = "", opt_com2[30] = ""; string_param("com1", opt_com1); string_param("com2", opt_com2); static struct ns16550 { - int baud, data_bits, parity, stop_bits, irq; + int baud, clock_hz, data_bits, parity, stop_bits, irq; unsigned long io_base; /* I/O port or memory-mapped I/O address. */ char *remapped_io_base; /* Remapped virtual address of mmap I/O. */ /* UART with IRQ line: interrupt-driven I/O. */ @@ -192,7 +194,7 @@ static void __devinit ns16550_init_preir if ( uart->baud != BAUD_AUTO ) { /* Baud rate specified: program it into the divisor latch. */ - divisor = UART_CLOCK_HZ / (uart->baud * 16); + divisor = uart->clock_hz / (uart->baud << 4); ns_write_reg(uart, DLL, (char)divisor); ns_write_reg(uart, DLM, (char)(divisor >> 8)); } @@ -201,7 +203,7 @@ static void __devinit ns16550_init_preir /* Baud rate already set: read it out from the divisor latch. */ divisor = ns_read_reg(uart, DLL); divisor |= ns_read_reg(uart, DLM) << 8; - uart->baud = UART_CLOCK_HZ / (divisor * 16); + uart->baud = uart->clock_hz / (divisor << 4); } ns_write_reg(uart, LCR, lcr); @@ -354,6 +356,12 @@ static void __init ns16550_parse_port_co } else if ( (baud = simple_strtoul(conf, &conf, 10)) != 0 ) uart->baud = baud; + + if ( *conf == '/') + { + conf++; + uart->clock_hz = simple_strtoul(conf, &conf, 0) << 4; + } if ( *conf != ',' ) goto config_parsed; @@ -408,6 +416,7 @@ void __init ns16550_init(int index, stru uart->baud = (defaults->baud ? : console_has((index == 0) ? "com1" : "com2") ? BAUD_AUTO : 0); + uart->clock_hz = UART_CLOCK_HZ; uart->data_bits = defaults->data_bits; uart->parity = parse_parity_char(defaults->parity); uart->stop_bits = defaults->stop_bits; diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/cpufreq/cpufreq.c --- a/xen/drivers/cpufreq/cpufreq.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/drivers/cpufreq/cpufreq.c Wed Oct 22 11:46:55 2008 +0900 @@ -34,6 +34,7 @@ #include <xen/sched.h> #include <xen/timer.h> #include <xen/xmalloc.h> +#include <xen/guest_access.h> #include <xen/domain.h> #include <asm/bug.h> #include <asm/io.h> @@ -185,10 +186,18 @@ int cpufreq_del_cpu(unsigned int cpu) return 0; } +static void print_PCT(struct xen_pct_register *ptr) +{ + printk(KERN_INFO "\t_PCT: descriptor=%d, length=%d, space_id=%d, " + "bit_width=%d, bit_offset=%d, reserved=%d, address=%"PRId64"\n", + ptr->descriptor, ptr->length, ptr->space_id, ptr->bit_width, + ptr->bit_offset, ptr->reserved, ptr->address); +} + static void print_PSS(struct xen_processor_px *ptr, int count) { int i; - printk(KERN_INFO "\t_PSS:\n"); + printk(KERN_INFO "\t_PSS: state_count=%d\n", count); for (i=0; i<count; i++){ printk(KERN_INFO "\tState%d: %"PRId64"MHz %"PRId64"mW %"PRId64"us " "%"PRId64"us 0x%"PRIx64" 0x%"PRIx64"\n", @@ -211,20 +220,19 @@ static void print_PSD( struct xen_psd_pa ptr->num_processors); } +static void print_PPC(unsigned int platform_limit) +{ + printk(KERN_INFO "\t_PPC: %d\n", platform_limit); +} + int set_px_pminfo(uint32_t acpi_id, struct xen_processor_performance *dom0_px_info) { int ret=0, cpuid; struct processor_pminfo *pmpt; struct processor_performance *pxpt; - if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) ) - { - ret = -ENOSYS; - goto out; - } - cpuid = get_cpu_id(acpi_id); - if ( cpuid < 0 ) + if ( cpuid < 0 || !dom0_px_info) { ret = -EINVAL; goto out; @@ -256,6 +264,8 @@ int set_px_pminfo(uint32_t acpi_id, stru memcpy ((void *)&pxpt->status_register, (void *)&dom0_px_info->status_register, sizeof(struct xen_pct_register)); + print_PCT(&pxpt->control_register); + print_PCT(&pxpt->status_register); } if ( dom0_px_info->flags & XEN_PX_PSS ) { @@ -265,12 +275,8 @@ int set_px_pminfo(uint32_t acpi_id, stru ret = -ENOMEM; goto out; } - if ( xenpf_copy_px_states(pxpt, dom0_px_info) ) - { - xfree(pxpt->states); - ret = -EFAULT; - goto out; - } + copy_from_guest(pxpt->states, dom0_px_info->states, + dom0_px_info->state_count); pxpt->state_count = dom0_px_info->state_count; print_PSS(pxpt->states,pxpt->state_count); } @@ -285,6 +291,7 @@ int set_px_pminfo(uint32_t acpi_id, stru if ( dom0_px_info->flags & XEN_PX_PPC ) { pxpt->platform_limit = dom0_px_info->platform_limit; + print_PPC(pxpt->platform_limit); if ( pxpt->init == XEN_PX_INIT ) { diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/cpufreq/utility.c --- a/xen/drivers/cpufreq/utility.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/drivers/cpufreq/utility.c Wed Oct 22 11:46:55 2008 +0900 @@ -27,6 +27,7 @@ #include <xen/types.h> #include <xen/sched.h> #include <xen/timer.h> +#include <xen/trace.h> #include <asm/config.h> #include <acpi/cpufreq/cpufreq.h> #include <public/sysctl.h> @@ -72,27 +73,30 @@ int cpufreq_statistic_init(unsigned int struct pm_px *pxpt = cpufreq_statistic_data[cpuid]; const struct processor_pminfo *pmpt = processor_pminfo[cpuid]; + if ( !pmpt ) + return -EINVAL; + + if ( pxpt ) + return 0; + count = pmpt->perf.state_count; - if ( !pmpt ) - return -EINVAL; - + pxpt = xmalloc(struct pm_px); if ( !pxpt ) - { - pxpt = xmalloc(struct pm_px); - if ( !pxpt ) - return -ENOMEM; - memset(pxpt, 0, sizeof(*pxpt)); - cpufreq_statistic_data[cpuid] = pxpt; - } + return -ENOMEM; + memset(pxpt, 0, sizeof(*pxpt)); + cpufreq_statistic_data[cpuid] = pxpt; pxpt->u.trans_pt = xmalloc_array(uint64_t, count * count); - if (!pxpt->u.trans_pt) + if (!pxpt->u.trans_pt) { + xfree(pxpt); return -ENOMEM; + } pxpt->u.pt = xmalloc_array(struct pm_px_val, count); if (!pxpt->u.pt) { xfree(pxpt->u.trans_pt); + xfree(pxpt); return -ENOMEM; } @@ -119,7 +123,8 @@ void cpufreq_statistic_exit(unsigned int return; xfree(pxpt->u.trans_pt); xfree(pxpt->u.pt); - memset(pxpt, 0, sizeof(struct pm_px)); + xfree(pxpt); + cpufreq_statistic_data[cpuid] = NULL; } void cpufreq_statistic_reset(unsigned int cpuid) @@ -128,7 +133,7 @@ void cpufreq_statistic_reset(unsigned in struct pm_px *pxpt = cpufreq_statistic_data[cpuid]; const struct processor_pminfo *pmpt = processor_pminfo[cpuid]; - if ( !pxpt || !pmpt ) + if ( !pmpt || !pxpt || !pxpt->u.pt || !pxpt->u.trans_pt ) return; count = pmpt->perf.state_count; @@ -293,7 +298,13 @@ int __cpufreq_driver_target(struct cpufr int retval = -EINVAL; if (cpu_online(policy->cpu) && cpufreq_driver->target) + { + unsigned int prev_freq = policy->cur; + retval = cpufreq_driver->target(policy, target_freq, relation); + if ( retval == 0 ) + TRACE_2D(TRC_PM_FREQ_CHANGE, prev_freq/1000, policy->cur/1000); + } return retval; } diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/passthrough/Makefile --- a/xen/drivers/passthrough/Makefile Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/drivers/passthrough/Makefile Wed Oct 22 11:46:55 2008 +0900 @@ -1,4 +1,5 @@ subdir-$(x86) += vtd subdir-$(x86) += vtd +subdir-$(ia64) += vtd subdir-$(x86) += amd obj-y += iommu.o diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/passthrough/io.c --- a/xen/drivers/passthrough/io.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/drivers/passthrough/io.c Wed Oct 22 11:46:55 2008 +0900 @@ -20,6 +20,9 @@ #include <xen/event.h> #include <xen/iommu.h> +#include <asm/hvm/irq.h> +#include <asm/hvm/iommu.h> +#include <xen/hvm/irq.h> static void pt_irq_time_out(void *data) { @@ -245,6 +248,7 @@ int hvm_do_IRQ_dpci(struct domain *d, un return 1; } +#ifdef SUPPORT_MSI_REMAPPING void hvm_dpci_msi_eoi(struct domain *d, int vector) { struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci; @@ -278,6 +282,63 @@ void hvm_dpci_msi_eoi(struct domain *d, spin_unlock(&d->event_lock); } +extern int vmsi_deliver(struct domain *d, int pirq); +static int hvm_pci_msi_assert(struct domain *d, int pirq) +{ + return vmsi_deliver(d, pirq); +} +#endif + +void hvm_dirq_assist(struct vcpu *v) +{ + unsigned int irq; + uint32_t device, intx; + struct domain *d = v->domain; + struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci; + struct dev_intx_gsi_link *digl; + + if ( !iommu_enabled || (v->vcpu_id != 0) || (hvm_irq_dpci == NULL) ) + return; + + for ( irq = find_first_bit(hvm_irq_dpci->dirq_mask, NR_IRQS); + irq < NR_IRQS; + irq = find_next_bit(hvm_irq_dpci->dirq_mask, NR_IRQS, irq + 1) ) + { + if ( !test_and_clear_bit(irq, &hvm_irq_dpci->dirq_mask) ) + continue; + + spin_lock(&d->event_lock); +#ifdef SUPPORT_MSI_REMAPPING + if ( test_bit(_HVM_IRQ_DPCI_MSI, &hvm_irq_dpci->mirq[irq].flags) ) + { + hvm_pci_msi_assert(d, irq); + spin_unlock(&d->event_lock); + continue; + } +#endif + stop_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, irq)]); + + list_for_each_entry ( digl, &hvm_irq_dpci->mirq[irq].digl_list, list ) + { + device = digl->device; + intx = digl->intx; + hvm_pci_intx_assert(d, device, intx); + hvm_irq_dpci->mirq[irq].pending++; + } + + /* + * Set a timer to see if the guest can finish the interrupt or not. For + * example, the guest OS may unmask the PIC during boot, before the + * guest driver is loaded. hvm_pci_intx_assert() may succeed, but the + * guest will never deal with the irq, then the physical interrupt line + * will never be deasserted. + */ + set_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, irq)], + NOW() + PT_IRQ_TIME_OUT); + spin_unlock(&d->event_lock); + } +} + void hvm_dpci_eoi(struct domain *d, unsigned int guest_gsi, union vioapic_redir_entry *ent) { diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/passthrough/iommu.c --- a/xen/drivers/passthrough/iommu.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/drivers/passthrough/iommu.c Wed Oct 22 11:46:55 2008 +0900 @@ -19,8 +19,6 @@ #include <xen/paging.h> #include <xen/guest_access.h> -extern struct iommu_ops intel_iommu_ops; -extern struct iommu_ops amd_iommu_ops; static void parse_iommu_param(char *s); static int iommu_populate_page_table(struct domain *d); int intel_vtd_setup(void); diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/passthrough/pci.c --- a/xen/drivers/passthrough/pci.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/drivers/passthrough/pci.c Wed Oct 22 11:46:55 2008 +0900 @@ -21,6 +21,8 @@ #include <xen/list.h> #include <xen/prefetch.h> #include <xen/iommu.h> +#include <asm/hvm/iommu.h> +#include <asm/hvm/irq.h> #include <xen/delay.h> #include <xen/keyhandler.h> @@ -207,6 +209,7 @@ void pci_release_devices(struct domain * } } +#ifdef SUPPORT_MSI_REMAPPING static void dump_pci_devices(unsigned char ch) { struct pci_dev *pdev; @@ -236,7 +239,7 @@ static int __init setup_dump_pcidevs(voi return 0; } __initcall(setup_dump_pcidevs); - +#endif /* diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/passthrough/vtd/Makefile --- a/xen/drivers/passthrough/vtd/Makefile Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/drivers/passthrough/vtd/Makefile Wed Oct 22 11:46:55 2008 +0900 @@ -1,4 +1,5 @@ subdir-$(x86) += x86 subdir-$(x86) += x86 +subdir-$(ia64) += ia64 obj-y += iommu.o obj-y += dmar.o diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/passthrough/vtd/ia64/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/drivers/passthrough/vtd/ia64/Makefile Wed Oct 22 11:46:55 2008 +0900 @@ -0,0 +1,1 @@ +obj-y += vtd.o diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/passthrough/vtd/ia64/vtd.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/drivers/passthrough/vtd/ia64/vtd.c Wed Oct 22 11:46:55 2008 +0900 @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2008, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Copyright (C) Allen Kay <allen.m.kay@xxxxxxxxx> + * Copyright (C) Weidong Han <weidong.han@xxxxxxxxx> + */ + +#include <xen/sched.h> +#include <xen/domain_page.h> +#include <xen/iommu.h> +#include <asm/xensystem.h> +#include <asm/sal.h> +#include "../iommu.h" +#include "../dmar.h" +#include "../vtd.h" + + +int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1}; +/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ +u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; + +void *map_vtd_domain_page(u64 maddr) +{ + return (void *)((u64)map_domain_page(maddr >> PAGE_SHIFT) | + (maddr & (PAGE_SIZE - PAGE_SIZE_4K))); +} + +void unmap_vtd_domain_page(void *va) +{ + unmap_domain_page(va); +} + +/* Allocate page table, return its machine address */ +u64 alloc_pgtable_maddr(void) +{ + struct page_info *pg; + u64 *vaddr; + + pg = alloc_domheap_page(NULL, 0); + vaddr = map_domain_page(page_to_mfn(pg)); + if ( !vaddr ) + return 0; + memset(vaddr, 0, PAGE_SIZE); + + iommu_flush_cache_page(vaddr); + unmap_domain_page(vaddr); + + return page_to_maddr(pg); +} + +void free_pgtable_maddr(u64 maddr) +{ + if ( maddr != 0 ) + free_domheap_page(maddr_to_page(maddr)); +} + +unsigned int get_cache_line_size(void) +{ + return L1_CACHE_BYTES; +} + +void cacheline_flush(char * addr) +{ + ia64_fc(addr); + ia64_sync_i(); + ia64_srlz_i(); +} + +void flush_all_cache() +{ + ia64_sal_cache_flush(3); +} + +void * map_to_nocache_virt(int nr_iommus, u64 maddr) +{ + return (void *) ( maddr + __IA64_UNCACHED_OFFSET); +} + +struct hvm_irq_dpci *domain_get_irq_dpci(struct domain *domain) +{ + if ( !domain ) + return NULL; + + return domain->arch.hvm_domain.irq.dpci; +} + +int domain_set_irq_dpci(struct domain *domain, struct hvm_irq_dpci *dpci) +{ + if ( !domain || !dpci ) + return 0; + + domain->arch.hvm_domain.irq.dpci = dpci; + return 1; +} + +void hvm_dpci_isairq_eoi(struct domain *d, unsigned int isairq) +{ + /* dummy */ +} diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/passthrough/vtd/intremap.c --- a/xen/drivers/passthrough/vtd/intremap.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/drivers/passthrough/vtd/intremap.c Wed Oct 22 11:46:55 2008 +0900 @@ -21,6 +21,7 @@ #include <xen/irq.h> #include <xen/sched.h> #include <xen/iommu.h> +#include <asm/hvm/iommu.h> #include <xen/time.h> #include <xen/pci.h> #include <xen/pci_regs.h> @@ -128,7 +129,13 @@ static int ioapic_rte_to_remap_entry(str memcpy(&new_ire, iremap_entry, sizeof(struct iremap_entry)); if ( rte_upper ) + { +#if defined(__i386__) || defined(__x86_64__) new_ire.lo.dst = (value >> 24) << 8; +#else /* __ia64__ */ + new_ire.lo.dst = value >> 16; +#endif + } else { *(((u32 *)&new_rte) + 0) = value; @@ -179,7 +186,7 @@ unsigned int io_apic_read_remap_rte( struct IO_xAPIC_route_entry old_rte = { 0 }; struct IO_APIC_route_remap_entry *remap_rte; int rte_upper = (reg & 1) ? 1 : 0; - struct iommu *iommu = ioapic_to_iommu(mp_ioapics[apic].mpc_apicid); + struct iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic)); struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu); if ( !iommu || !ir_ctrl || ir_ctrl->iremap_maddr == 0 || @@ -224,7 +231,7 @@ void io_apic_write_remap_rte( struct IO_xAPIC_route_entry old_rte = { 0 }; struct IO_APIC_route_remap_entry *remap_rte; unsigned int rte_upper = (reg & 1) ? 1 : 0; - struct iommu *iommu = ioapic_to_iommu(mp_ioapics[apic].mpc_apicid); + struct iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic)); struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu); int saved_mask; @@ -253,7 +260,7 @@ void io_apic_write_remap_rte( *(IO_APIC_BASE(apic)+4) = *(((int *)&old_rte)+0); remap_rte->mask = saved_mask; - if ( ioapic_rte_to_remap_entry(iommu, mp_ioapics[apic].mpc_apicid, + if ( ioapic_rte_to_remap_entry(iommu, IO_APIC_ID(apic), &old_rte, rte_upper, value) ) { *IO_APIC_BASE(apic) = rte_upper ? (reg + 1) : reg; @@ -328,7 +335,8 @@ static int remap_entry_to_msi_msg( } static int msi_msg_to_remap_entry( - struct iommu *iommu, struct pci_dev *pdev, struct msi_msg *msg) + struct iommu *iommu, struct pci_dev *pdev, + struct msi_desc *msi_desc, struct msi_msg *msg) { struct iremap_entry *iremap_entry = NULL, *iremap_entries; struct iremap_entry new_ire; @@ -336,32 +344,18 @@ static int msi_msg_to_remap_entry( unsigned int index; unsigned long flags; struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu); - int i = 0; remap_rte = (struct msi_msg_remap_entry *) msg; spin_lock_irqsave(&ir_ctrl->iremap_lock, flags); - iremap_entries = - (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr); - - /* If the entry for a PCI device has been there, use the old entry, - * Or, assign a new entry for it. - */ - for ( i = 0; i <= ir_ctrl->iremap_index; i++ ) - { - iremap_entry = &iremap_entries[i]; - if ( iremap_entry->hi.sid == - ((pdev->bus << 8) | pdev->devfn) ) - break; - } - - if ( i > ir_ctrl->iremap_index ) - { - ir_ctrl->iremap_index++; + if ( msi_desc->remap_index < 0 ) + { + ir_ctrl->iremap_index++; index = ir_ctrl->iremap_index; + msi_desc->remap_index = index; } else - index = i; + index = msi_desc->remap_index; if ( index > IREMAP_ENTRY_NR - 1 ) { @@ -369,11 +363,13 @@ static int msi_msg_to_remap_entry( "%s: intremap index (%d) is larger than" " the maximum index (%ld)!\n", __func__, index, IREMAP_ENTRY_NR - 1); - unmap_vtd_domain_page(iremap_entries); + msi_desc->remap_index = -1; spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags); return -EFAULT; } + iremap_entries = + (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr); iremap_entry = &iremap_entries[index]; memcpy(&new_ire, iremap_entry, sizeof(struct iremap_entry)); @@ -450,7 +446,7 @@ void msi_msg_write_remap_rte( if ( !iommu || !ir_ctrl || ir_ctrl->iremap_maddr == 0 ) return; - msi_msg_to_remap_entry(iommu, pdev, msg); + msi_msg_to_remap_entry(iommu, pdev, msi_desc, msg); } #elif defined(__ia64__) void msi_msg_read_remap_rte( @@ -482,7 +478,7 @@ int intremap_setup(struct iommu *iommu) { dprintk(XENLOG_WARNING VTDPREFIX, "Cannot allocate memory for ir_ctrl->iremap_maddr\n"); - return -ENODEV; + return -ENOMEM; } ir_ctrl->iremap_index = -1; } @@ -490,10 +486,10 @@ int intremap_setup(struct iommu *iommu) #if defined(ENABLED_EXTENDED_INTERRUPT_SUPPORT) /* set extended interrupt mode bit */ ir_ctrl->iremap_maddr |= - ecap_ext_intr(iommu->ecap) ? (1 << IRTA_REG_EIMI_SHIFT) : 0; + ecap_ext_intr(iommu->ecap) ? (1 << IRTA_REG_EIME_SHIFT) : 0; #endif - /* size field = 256 entries per 4K page = 8 - 1 */ - ir_ctrl->iremap_maddr |= 7; + /* set size of the interrupt remapping table */ + ir_ctrl->iremap_maddr |= IRTA_REG_TABLE_SIZE; dmar_writeq(iommu->reg, DMAR_IRTA_REG, ir_ctrl->iremap_maddr); /* set SIRTP */ diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/passthrough/vtd/iommu.c --- a/xen/drivers/passthrough/vtd/iommu.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/drivers/passthrough/vtd/iommu.c Wed Oct 22 11:46:55 2008 +0900 @@ -24,6 +24,7 @@ #include <xen/xmalloc.h> #include <xen/domain_page.h> #include <xen/iommu.h> +#include <asm/hvm/iommu.h> #include <xen/numa.h> #include <xen/time.h> #include <xen/pci.h> @@ -218,10 +219,10 @@ static u64 addr_to_dma_page_maddr(struct if ( !alloc ) break; maddr = alloc_pgtable_maddr(); + if ( !maddr ) + break; dma_set_pte_addr(*pte, maddr); vaddr = map_vtd_domain_page(maddr); - if ( !vaddr ) - break; /* * high level table always sets r/w, last level @@ -234,8 +235,6 @@ static u64 addr_to_dma_page_maddr(struct else { vaddr = map_vtd_domain_page(pte->val); - if ( !vaddr ) - break; } if ( level == 2 ) @@ -567,26 +566,6 @@ static void dma_pte_clear_one(struct dom } unmap_vtd_domain_page(page); -} - -/* clear last level pte, a tlb flush should be followed */ -static void dma_pte_clear_range(struct domain *domain, u64 start, u64 end) -{ - struct hvm_iommu *hd = domain_hvm_iommu(domain); - int addr_width = agaw_to_width(hd->agaw); - - start &= (((u64)1) << addr_width) - 1; - end &= (((u64)1) << addr_width) - 1; - /* in case it's partial page */ - start = PAGE_ALIGN_4K(start); - end &= PAGE_MASK_4K; - - /* we don't need lock here, nobody else touches the iova range */ - while ( start < end ) - { - dma_pte_clear_one(domain, start); - start += PAGE_SIZE_4K; - } } static void iommu_free_pagetable(u64 pt_maddr, int level) @@ -877,6 +856,7 @@ static void dma_msi_data_init(struct iom spin_unlock_irqrestore(&iommu->register_lock, flags); } +#ifdef SUPPORT_MSI_REMAPPING static void dma_msi_addr_init(struct iommu *iommu, int phy_cpu) { u64 msi_address; @@ -893,6 +873,12 @@ static void dma_msi_addr_init(struct iom dmar_writel(iommu->reg, DMAR_FEUADDR_REG, (u32)(msi_address >> 32)); spin_unlock_irqrestore(&iommu->register_lock, flags); } +#else +static void dma_msi_addr_init(struct iommu *iommu, int phy_cpu) +{ + /* ia64: TODO */ +} +#endif static void dma_msi_set_affinity(unsigned int vector, cpumask_t dest) { @@ -1024,7 +1010,7 @@ static int intel_iommu_domain_init(struc { struct hvm_iommu *hd = domain_hvm_iommu(d); struct iommu *iommu = NULL; - u64 i; + u64 i, j, tmp; struct acpi_drhd_unit *drhd; drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); @@ -1043,11 +1029,13 @@ static int intel_iommu_domain_init(struc */ for ( i = 0; i < max_page; i++ ) { - if ( xen_in_range(i << PAGE_SHIFT_4K, (i + 1) << PAGE_SHIFT_4K) || - tboot_in_range(i << PAGE_SHIFT_4K, (i + 1) << PAGE_SHIFT_4K) ) + if ( xen_in_range(i << PAGE_SHIFT, (i + 1) << PAGE_SHIFT) || + tboot_in_range(i << PAGE_SHIFT, (i + 1) << PAGE_SHIFT) ) continue; - iommu_map_page(d, i, i); + tmp = 1 << (PAGE_SHIFT - PAGE_SHIFT_4K); + for ( j = 0; j < tmp; j++ ) + iommu_map_page(d, (i*tmp+j), (i*tmp+j)); } setup_dom0_devices(d); @@ -1511,75 +1499,26 @@ int intel_iommu_unmap_page(struct domain return 0; } -int iommu_page_mapping(struct domain *domain, paddr_t iova, - paddr_t hpa, size_t size, int prot) -{ - struct hvm_iommu *hd = domain_hvm_iommu(domain); - struct acpi_drhd_unit *drhd; - struct iommu *iommu; - u64 start_pfn, end_pfn; - struct dma_pte *page = NULL, *pte = NULL; - int index; - u64 pg_maddr; - - if ( (prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0 ) - return -EINVAL; - - iova = (iova >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K; - start_pfn = hpa >> PAGE_SHIFT_4K; - end_pfn = (PAGE_ALIGN_4K(hpa + size)) >> PAGE_SHIFT_4K; - index = 0; - while ( start_pfn < end_pfn ) - { - pg_maddr = addr_to_dma_page_maddr(domain, iova + PAGE_SIZE_4K*index, 1); - if ( pg_maddr == 0 ) - return -ENOMEM; - page = (struct dma_pte *)map_vtd_domain_page(pg_maddr); - pte = page + (start_pfn & LEVEL_MASK); - dma_set_pte_addr(*pte, (paddr_t)start_pfn << PAGE_SHIFT_4K); - dma_set_pte_prot(*pte, prot); - iommu_flush_cache_entry(pte); - unmap_vtd_domain_page(page); - start_pfn++; - index++; - } - - if ( index > 0 ) - { - for_each_drhd_unit ( drhd ) - { - iommu = drhd->iommu; - if ( test_bit(iommu->index, &hd->iommu_bitmap) ) - if ( iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain), - iova, index, 1)) - iommu_flush_write_buffer(iommu); - } - } - - return 0; -} - -int iommu_page_unmapping(struct domain *domain, paddr_t addr, size_t size) -{ - dma_pte_clear_range(domain, addr, addr + size); - - return 0; -} - static int iommu_prepare_rmrr_dev(struct domain *d, struct acpi_rmrr_unit *rmrr, u8 bus, u8 devfn) { - u64 size; - int ret; - - /* page table init */ - size = rmrr->end_address - rmrr->base_address + 1; - ret = iommu_page_mapping(d, rmrr->base_address, - rmrr->base_address, size, - DMA_PTE_READ|DMA_PTE_WRITE); - if ( ret ) - return ret; + int ret = 0; + u64 base, end; + unsigned long base_pfn, end_pfn; + + ASSERT(rmrr->base_address < rmrr->end_address); + + base = rmrr->base_address & PAGE_MASK_4K; + base_pfn = base >> PAGE_SHIFT_4K; + end = PAGE_ALIGN_4K(rmrr->end_address); + end_pfn = end >> PAGE_SHIFT_4K; + + while ( base_pfn < end_pfn ) + { + intel_iommu_map_page(d, base_pfn, base_pfn); + base_pfn++; + } if ( domain_context_mapped(bus, devfn) == 0 ) ret = domain_context_mapping(d, bus, devfn); diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/passthrough/vtd/qinval.c --- a/xen/drivers/passthrough/vtd/qinval.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/drivers/passthrough/vtd/qinval.c Wed Oct 22 11:46:55 2008 +0900 @@ -428,7 +428,11 @@ int qinval_setup(struct iommu *iommu) { qi_ctrl->qinval_maddr = alloc_pgtable_maddr(); if ( qi_ctrl->qinval_maddr == 0 ) - panic("Cannot allocate memory for qi_ctrl->qinval_maddr\n"); + { + dprintk(XENLOG_WARNING VTDPREFIX, + "Cannot allocate memory for qi_ctrl->qinval_maddr\n"); + return -ENOMEM; + } flush->context = flush_context_qi; flush->iotlb = flush_iotlb_qi; } diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/passthrough/vtd/utils.c --- a/xen/drivers/passthrough/vtd/utils.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/drivers/passthrough/vtd/utils.c Wed Oct 22 11:46:55 2008 +0900 @@ -204,6 +204,7 @@ void print_vtd_entries(struct iommu *iom void dump_iommu_info(unsigned char key) { +#if defined(__i386__) || defined(__x86_64__) struct acpi_drhd_unit *drhd; struct iommu *iommu; int i; @@ -305,6 +306,10 @@ void dump_iommu_info(unsigned char key) } } } +#else + printk("%s: not implemnted on IA64 for now.\n", __func__); + /* ia64: TODO */ +#endif } /* diff -r 6583186e5989 -r 46d7e12c4c91 xen/drivers/passthrough/vtd/x86/vtd.c --- a/xen/drivers/passthrough/vtd/x86/vtd.c Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/drivers/passthrough/vtd/x86/vtd.c Wed Oct 22 11:46:55 2008 +0900 @@ -41,17 +41,19 @@ u64 alloc_pgtable_maddr(void) { struct page_info *pg; u64 *vaddr; + unsigned long mfn; pg = alloc_domheap_page(NULL, 0); - vaddr = map_domain_page(page_to_mfn(pg)); - if ( !vaddr ) + if ( !pg ) return 0; + mfn = page_to_mfn(pg); + vaddr = map_domain_page(mfn); memset(vaddr, 0, PAGE_SIZE); iommu_flush_cache_page(vaddr); unmap_domain_page(vaddr); - return page_to_maddr(pg); + return (u64)mfn << PAGE_SHIFT_4K; } void free_pgtable_maddr(u64 maddr) diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/acpi/cpufreq/processor_perf.h --- a/xen/include/acpi/cpufreq/processor_perf.h Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/include/acpi/cpufreq/processor_perf.h Wed Oct 22 11:46:55 2008 +0900 @@ -60,8 +60,5 @@ struct pm_px { extern struct pm_px *cpufreq_statistic_data[NR_CPUS]; -int xenpf_copy_px_states(struct processor_performance *pxpt, - struct xen_processor_performance *dom0_px_info); - int cpufreq_cpu_init(unsigned int cpuid); #endif /* __XEN_PROCESSOR_PM_H__ */ diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/asm-ia64/linux-xen/asm/spinlock.h --- a/xen/include/asm-ia64/linux-xen/asm/spinlock.h Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/include/asm-ia64/linux-xen/asm/spinlock.h Wed Oct 22 11:46:55 2008 +0900 @@ -27,25 +27,16 @@ typedef struct { #ifdef DEBUG_SPINLOCK void *locker; #endif +} raw_spinlock_t; + #ifdef XEN - unsigned char recurse_cpu; - unsigned char recurse_cnt; -#endif -} spinlock_t; - -#ifdef XEN -#ifdef DEBUG_SPINLOCK -#define SPIN_LOCK_UNLOCKED /*(spinlock_t)*/ { 0, NULL, -1, 0 } -#else -#define SPIN_LOCK_UNLOCKED /*(spinlock_t)*/ { 0, -1, 0 } -#endif -static inline void spin_lock_init(spinlock_t *lock) -{ - *lock = ((spinlock_t)SPIN_LOCK_UNLOCKED); -} -#else -#define SPIN_LOCK_UNLOCKED /*(spinlock_t)*/ { 0 } -#define spin_lock_init(x) ((x)->lock = 0) +#ifdef DEBUG_SPINLOCK +#define _RAW_SPIN_LOCK_UNLOCKED /*(raw_spinlock_t)*/ { 0, NULL } +#else +#define _RAW_SPIN_LOCK_UNLOCKED /*(raw_spinlock_t)*/ { 0 } +#endif +#else +#define _RAW_SPIN_LOCK_UNLOCKED /*(raw_spinlock_t)*/ { 0 } #endif #ifdef ASM_SUPPORTED @@ -59,7 +50,7 @@ static inline void spin_lock_init(spinlo #define IA64_SPINLOCK_CLOBBERS "ar.ccv", "ar.pfs", "p14", "p15", "r27", "r28", "r29", "r30", "b6", "memory" static inline void -_raw_spin_lock_flags (spinlock_t *lock, unsigned long flags) +_raw_spin_lock_flags (raw_spinlock_t *lock, unsigned long flags) { register volatile unsigned int *ptr asm ("r31") = &lock->lock; @@ -136,10 +127,9 @@ do { \ } while (0) #endif /* !ASM_SUPPORTED */ -#define spin_is_locked(x) ((x)->lock != 0) -#define _raw_spin_unlock(x) do { barrier(); ((spinlock_t *) x)->lock = 0; } while (0) +#define _raw_spin_is_locked(x) ((x)->lock != 0) +#define _raw_spin_unlock(x) do { barrier(); (x)->lock = 0; } while (0) #define _raw_spin_trylock(x) (cmpxchg_acq(&(x)->lock, 0, 1) == 0) -#define spin_unlock_wait(x) do { barrier(); } while ((x)->lock) typedef struct { volatile unsigned int read_counter : 31; @@ -147,16 +137,12 @@ typedef struct { #ifdef CONFIG_PREEMPT unsigned int break_lock; #endif -} rwlock_t; -#define RW_LOCK_UNLOCKED /*(rwlock_t)*/ { 0, 0 } - -#define rwlock_init(x) do { *(x) = (rwlock_t) RW_LOCK_UNLOCKED; } while(0) -#define read_can_lock(rw) (*(volatile int *)(rw) >= 0) -#define write_can_lock(rw) (*(volatile int *)(rw) == 0) +} raw_rwlock_t; +#define _RAW_RW_LOCK_UNLOCKED /*(raw_rwlock_t)*/ { 0, 0 } #define _raw_read_lock(rw) \ do { \ - rwlock_t *__read_lock_ptr = (rw); \ + raw_rwlock_t *__read_lock_ptr = (rw); \ \ while (unlikely(ia64_fetchadd(1, (int *) __read_lock_ptr, acq) < 0)) { \ ia64_fetchadd(-1, (int *) __read_lock_ptr, rel); \ @@ -167,7 +153,7 @@ do { \ #define _raw_read_unlock(rw) \ do { \ - rwlock_t *__read_lock_ptr = (rw); \ + raw_rwlock_t *__read_lock_ptr = (rw); \ ia64_fetchadd(-1, (int *) __read_lock_ptr, rel); \ } while (0) @@ -230,7 +216,4 @@ do { \ clear_bit(31, (x)); \ }) -#ifdef XEN -#include <asm/xenspinlock.h> -#endif #endif /* _ASM_IA64_SPINLOCK_H */ diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/asm-ia64/xenspinlock.h --- a/xen/include/asm-ia64/xenspinlock.h Wed Oct 22 11:38:22 2008 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,30 +0,0 @@ -#ifndef _ASM_IA64_XENSPINLOCK_H -#define _ASM_IA64_XENSPINLOCK_H - -/* - * spin_[un]lock_recursive(): Use these forms when the lock can (safely!) be - * reentered recursively on the same CPU. All critical regions that may form - * part of a recursively-nested set must be protected by these forms. If there - * are any critical regions that cannot form part of such a set, they can use - * standard spin_[un]lock(). - */ -#define _raw_spin_lock_recursive(_lock) \ - do { \ - int cpu = smp_processor_id(); \ - if ( likely((_lock)->recurse_cpu != cpu) ) \ - { \ - spin_lock(_lock); \ - (_lock)->recurse_cpu = cpu; \ - } \ - (_lock)->recurse_cnt++; \ - } while ( 0 ) - -#define _raw_spin_unlock_recursive(_lock) \ - do { \ - if ( likely(--(_lock)->recurse_cnt == 0) ) \ - { \ - (_lock)->recurse_cpu = -1; \ - spin_unlock(_lock); \ - } \ - } while ( 0 ) -#endif /* _ASM_IA64_XENSPINLOCK_H */ diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/asm-x86/domain.h --- a/xen/include/asm-x86/domain.h Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/include/asm-x86/domain.h Wed Oct 22 11:46:55 2008 +0900 @@ -250,6 +250,8 @@ struct arch_domain bool_t is_32bit_pv; /* Is shared-info page in 32-bit format? */ bool_t has_32bit_shinfo; + /* Domain cannot handle spurious page faults? */ + bool_t suppress_spurious_page_faults; /* Continuable domain_relinquish_resources(). */ enum { diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/asm-x86/hvm/hvm.h --- a/xen/include/asm-x86/hvm/hvm.h Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/include/asm-x86/hvm/hvm.h Wed Oct 22 11:46:55 2008 +0900 @@ -128,6 +128,7 @@ struct hvm_function_table { int (*msr_write_intercept)(struct cpu_user_regs *regs); void (*invlpg_intercept)(unsigned long vaddr); void (*set_uc_mode)(struct vcpu *v); + void (*set_info_guest)(struct vcpu *v); }; extern struct hvm_function_table hvm_funcs; @@ -314,4 +315,10 @@ int hvm_virtual_to_linear_addr( unsigned int addr_size, unsigned long *linear_addr); +static inline void hvm_set_info_guest(struct vcpu *v) +{ + if ( hvm_funcs.set_info_guest ) + return hvm_funcs.set_info_guest(v); +} + #endif /* __ASM_X86_HVM_HVM_H__ */ diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/asm-x86/hvm/irq.h --- a/xen/include/asm-x86/hvm/irq.h Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/include/asm-x86/hvm/irq.h Wed Oct 22 11:46:55 2008 +0900 @@ -22,62 +22,11 @@ #ifndef __ASM_X86_HVM_IRQ_H__ #define __ASM_X86_HVM_IRQ_H__ -#include <xen/types.h> -#include <xen/spinlock.h> -#include <asm/irq.h> #include <asm/pirq.h> +#include <xen/hvm/irq.h> #include <asm/hvm/hvm.h> #include <asm/hvm/vpic.h> #include <asm/hvm/vioapic.h> -#include <public/hvm/save.h> - -struct dev_intx_gsi_link { - struct list_head list; - uint8_t device; - uint8_t intx; - uint8_t gsi; - uint8_t link; -}; - -#define _HVM_IRQ_DPCI_MSI 0x1 - -struct hvm_gmsi_info { - uint32_t gvec; - uint32_t gflags; -}; - -struct hvm_mirq_dpci_mapping { - uint32_t flags; - int pending; - struct list_head digl_list; - struct domain *dom; - struct hvm_gmsi_info gmsi; -}; - -struct hvm_girq_dpci_mapping { - uint8_t valid; - uint8_t device; - uint8_t intx; - uint8_t machine_gsi; -}; - -#define NR_ISAIRQS 16 -#define NR_LINK 4 -/* Protected by domain's event_lock */ -struct hvm_irq_dpci { - /* Machine IRQ to guest device/intx mapping. */ - DECLARE_BITMAP(mapping, NR_PIRQS); - struct hvm_mirq_dpci_mapping mirq[NR_IRQS]; - /* Guest IRQ to guest device/intx mapping. */ - struct hvm_girq_dpci_mapping girq[NR_IRQS]; - uint8_t msi_gvec_pirq[NR_VECTORS]; - DECLARE_BITMAP(dirq_mask, NR_IRQS); - /* Record of mapped ISA IRQs */ - DECLARE_BITMAP(isairq_map, NR_ISAIRQS); - /* Record of mapped Links */ - uint8_t link_cnt[NR_LINK]; - struct timer hvm_timer[NR_IRQS]; -}; struct hvm_irq { /* @@ -149,27 +98,16 @@ struct hvm_irq { #define hvm_isa_irq_to_gsi(isa_irq) ((isa_irq) ? : 2) -/* Modify state of a PCI INTx wire. */ -void hvm_pci_intx_assert( - struct domain *d, unsigned int device, unsigned int intx); -void hvm_pci_intx_deassert( - struct domain *d, unsigned int device, unsigned int intx); - -/* Modify state of an ISA device's IRQ wire. */ -void hvm_isa_irq_assert( - struct domain *d, unsigned int isa_irq); -void hvm_isa_irq_deassert( - struct domain *d, unsigned int isa_irq); - -void hvm_set_pci_link_route(struct domain *d, u8 link, u8 isa_irq); - -void hvm_maybe_deassert_evtchn_irq(void); -void hvm_assert_evtchn_irq(struct vcpu *v); -void hvm_set_callback_via(struct domain *d, uint64_t via); - /* Check/Acknowledge next pending interrupt. */ struct hvm_intack hvm_vcpu_has_pending_irq(struct vcpu *v); struct hvm_intack hvm_vcpu_ack_pending_irq(struct vcpu *v, struct hvm_intack intack); +/* + * Currently IA64 Xen doesn't support MSI. So for x86, we define this macro + * to control the conditional compilation of some MSI-related functions. + * This macro will be removed once IA64 has MSI support. + */ +#define SUPPORT_MSI_REMAPPING 1 + #endif /* __ASM_X86_HVM_IRQ_H__ */ diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/asm-x86/hvm/svm/vmcb.h --- a/xen/include/asm-x86/hvm/svm/vmcb.h Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/include/asm-x86/hvm/svm/vmcb.h Wed Oct 22 11:46:55 2008 +0900 @@ -393,7 +393,9 @@ struct vmcb_struct { eventinj_t eventinj; /* offset 0xA8 */ u64 h_cr3; /* offset 0xB0 */ lbrctrl_t lbr_control; /* offset 0xB8 */ - u64 res09[104]; /* offset 0xC0 pad to save area */ + u64 res09; /* offset 0xC0 */ + u64 nextrip; /* offset 0xC8 */ + u64 res10a[102]; /* offset 0xD0 pad to save area */ svm_segment_register_t es; /* offset 1024 */ svm_segment_register_t cs; diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/asm-x86/io_apic.h --- a/xen/include/asm-x86/io_apic.h Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/include/asm-x86/io_apic.h Wed Oct 22 11:46:55 2008 +0900 @@ -19,6 +19,8 @@ #define IO_APIC_BASE(idx) \ ((volatile int *)(__fix_to_virt(FIX_IO_APIC_BASE_0 + idx) \ + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK))) + +#define IO_APIC_ID(idx) (mp_ioapics[idx].mpc_apicid) /* * The structure of the IO-APIC: diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/asm-x86/msi.h --- a/xen/include/asm-x86/msi.h Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/include/asm-x86/msi.h Wed Oct 22 11:46:55 2008 +0900 @@ -90,10 +90,11 @@ struct msi_desc { void __iomem *mask_base; struct pci_dev *dev; - int vector; - - /* Last set MSI message */ - struct msi_msg msg; + int vector; + + struct msi_msg msg; /* Last set MSI message */ + + int remap_index; /* index in interrupt remapping table */ }; /* diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/asm-x86/rwlock.h --- a/xen/include/asm-x86/rwlock.h Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/include/asm-x86/rwlock.h Wed Oct 22 11:46:55 2008 +0900 @@ -22,25 +22,19 @@ #define __build_read_lock_ptr(rw, helper) \ asm volatile(LOCK "subl $1,(%0)\n\t" \ - "js 2f\n" \ + "jns 1f\n\t" \ + "call " helper "\n\t" \ "1:\n" \ - ".section .text.lock,\"ax\"\n" \ - "2:\tcall " helper "\n\t" \ - "jmp 1b\n" \ - ".previous" \ ::"a" (rw) : "memory") #define __build_read_lock_const(rw, helper) \ asm volatile(LOCK "subl $1,%0\n\t" \ - "js 2f\n" \ - "1:\n" \ - ".section .text.lock,\"ax\"\n" \ - "2:\tpush %%"__OP"ax\n\t" \ + "jns 1f\n\t" \ + "push %%"__OP"ax\n\t" \ "lea %0,%%"__OP"ax\n\t" \ "call " helper "\n\t" \ "pop %%"__OP"ax\n\t" \ - "jmp 1b\n" \ - ".previous" \ + "1:\n" \ :"=m" (*(volatile int *)rw) : : "memory") #define __build_read_lock(rw, helper) do { \ @@ -52,25 +46,19 @@ #define __build_write_lock_ptr(rw, helper) \ asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \ - "jnz 2f\n" \ + "jz 1f\n\t" \ + "call " helper "\n\t" \ "1:\n" \ - ".section .text.lock,\"ax\"\n" \ - "2:\tcall " helper "\n\t" \ - "jmp 1b\n" \ - ".previous" \ ::"a" (rw) : "memory") #define __build_write_lock_const(rw, helper) \ asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \ - "jnz 2f\n" \ - "1:\n" \ - ".section .text.lock,\"ax\"\n" \ - "2:\tpush %%"__OP"ax\n\t" \ + "jz 1f\n\t" \ + "push %%"__OP"ax\n\t" \ "lea %0,%%"__OP"ax\n\t" \ "call " helper "\n\t" \ "pop %%"__OP"ax\n\t" \ - "jmp 1b\n" \ - ".previous" \ + "1:\n" \ :"=m" (*(volatile int *)rw) : : "memory") #define __build_write_lock(rw, helper) do { \ diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/asm-x86/spinlock.h --- a/xen/include/asm-x86/spinlock.h Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/include/asm-x86/spinlock.h Wed Oct 22 11:46:55 2008 +0900 @@ -8,104 +8,71 @@ typedef struct { volatile s16 lock; - s8 recurse_cpu; - u8 recurse_cnt; -} spinlock_t; +} raw_spinlock_t; -#define SPIN_LOCK_UNLOCKED /*(spinlock_t)*/ { 1, -1, 0 } +#define _RAW_SPIN_LOCK_UNLOCKED /*(raw_spinlock_t)*/ { 1 } -#define spin_lock_init(x) do { *(x) = (spinlock_t) SPIN_LOCK_UNLOCKED; } while(0) -#define spin_is_locked(x) (*(volatile char *)(&(x)->lock) <= 0) +#define _raw_spin_is_locked(x) \ + (*(volatile char *)(&(x)->lock) <= 0) -static inline void _raw_spin_lock(spinlock_t *lock) +static always_inline void _raw_spin_lock(raw_spinlock_t *lock) { - __asm__ __volatile__ ( - "1: lock; decb %0 \n" - " js 2f \n" - ".section .text.lock,\"ax\"\n" + asm volatile ( + "1: lock; decw %0 \n" + " jns 3f \n" "2: rep; nop \n" - " cmpb $0,%0 \n" + " cmpw $0,%0 \n" " jle 2b \n" " jmp 1b \n" - ".previous" + "3:" : "=m" (lock->lock) : : "memory" ); } -static inline void _raw_spin_unlock(spinlock_t *lock) +static always_inline void _raw_spin_unlock(raw_spinlock_t *lock) { - ASSERT(spin_is_locked(lock)); - __asm__ __volatile__ ( - "movb $1,%0" + ASSERT(_raw_spin_is_locked(lock)); + asm volatile ( + "movw $1,%0" : "=m" (lock->lock) : : "memory" ); } -static inline int _raw_spin_trylock(spinlock_t *lock) +static always_inline int _raw_spin_trylock(raw_spinlock_t *lock) { - char oldval; - __asm__ __volatile__( - "xchgb %b0,%1" - :"=q" (oldval), "=m" (lock->lock) - :"0" (0) : "memory"); - return oldval > 0; + s16 oldval; + asm volatile ( + "xchgw %w0,%1" + :"=r" (oldval), "=m" (lock->lock) + :"0" (0) : "memory" ); + return (oldval > 0); } - -/* - * spin_[un]lock_recursive(): Use these forms when the lock can (safely!) be - * reentered recursively on the same CPU. All critical regions that may form - * part of a recursively-nested set must be protected by these forms. If there - * are any critical regions that cannot form part of such a set, they can use - * standard spin_[un]lock(). - */ -#define _raw_spin_lock_recursive(_lock) \ - do { \ - int cpu = smp_processor_id(); \ - if ( likely((_lock)->recurse_cpu != cpu) ) \ - { \ - spin_lock(_lock); \ - (_lock)->recurse_cpu = cpu; \ - } \ - (_lock)->recurse_cnt++; \ - } while ( 0 ) - -#define _raw_spin_unlock_recursive(_lock) \ - do { \ - if ( likely(--(_lock)->recurse_cnt == 0) ) \ - { \ - (_lock)->recurse_cpu = -1; \ - spin_unlock(_lock); \ - } \ - } while ( 0 ) - typedef struct { volatile unsigned int lock; -} rwlock_t; +} raw_rwlock_t; -#define RW_LOCK_UNLOCKED /*(rwlock_t)*/ { RW_LOCK_BIAS } - -#define rwlock_init(x) do { *(x) = (rwlock_t) RW_LOCK_UNLOCKED; } while(0) +#define _RAW_RW_LOCK_UNLOCKED /*(raw_rwlock_t)*/ { RW_LOCK_BIAS } /* * On x86, we implement read-write locks as a 32-bit counter * with the high bit (sign) being the "contended" bit. */ -static inline void _raw_read_lock(rwlock_t *rw) +static always_inline void _raw_read_lock(raw_rwlock_t *rw) { __build_read_lock(rw, "__read_lock_failed"); } -static inline void _raw_write_lock(rwlock_t *rw) +static always_inline void _raw_write_lock(raw_rwlock_t *rw) { __build_write_lock(rw, "__write_lock_failed"); } -#define _raw_read_unlock(rw) \ - __asm__ __volatile__ ( \ - "lock ; incl %0" : \ +#define _raw_read_unlock(rw) \ + asm volatile ( \ + "lock ; incl %0" : \ "=m" ((rw)->lock) : : "memory" ) -#define _raw_write_unlock(rw) \ - __asm__ __volatile__ ( \ - "lock ; addl $" RW_LOCK_BIAS_STR ",%0" : \ +#define _raw_write_unlock(rw) \ + asm volatile ( \ + "lock ; addl $" RW_LOCK_BIAS_STR ",%0" : \ "=m" ((rw)->lock) : : "memory" ) #endif /* __ASM_SPINLOCK_H */ diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/public/domctl.h --- a/xen/include/public/domctl.h Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/include/public/domctl.h Wed Oct 22 11:46:55 2008 +0900 @@ -614,6 +614,10 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_subsc #define XEN_DOMCTL_set_machine_address_size 51 #define XEN_DOMCTL_get_machine_address_size 52 +/* + * Do not inject spurious page faults into this domain. + */ +#define XEN_DOMCTL_suppress_spurious_page_faults 53 struct xen_domctl { uint32_t cmd; diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/public/trace.h --- a/xen/include/public/trace.h Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/include/public/trace.h Wed Oct 22 11:46:55 2008 +0900 @@ -38,6 +38,7 @@ #define TRC_MEM 0x0010f000 /* Xen memory trace */ #define TRC_PV 0x0020f000 /* Xen PV traces */ #define TRC_SHADOW 0x0040f000 /* Xen shadow tracing */ +#define TRC_PM 0x0080f000 /* Xen power management trace */ #define TRC_ALL 0x0ffff000 #define TRC_HD_TO_EVENT(x) ((x)&0x0fffffff) #define TRC_HD_CYCLE_FLAG (1UL<<31) @@ -146,6 +147,15 @@ #define TRC_HVM_LMSW (TRC_HVM_HANDLER + 0x19) #define TRC_HVM_LMSW64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x19) +/* trace subclasses for power management */ +#define TRC_PM_FREQ 0x00801000 /* xen cpu freq events */ +#define TRC_PM_IDLE 0x00802000 /* xen cpu idle events */ + +/* trace events for per class */ +#define TRC_PM_FREQ_CHANGE (TRC_PM_FREQ + 0x01) +#define TRC_PM_IDLE_ENTRY (TRC_PM_IDLE + 0x01) +#define TRC_PM_IDLE_EXIT (TRC_PM_IDLE + 0x02) + /* This structure represents a single trace buffer record. */ struct t_rec { uint32_t event:28; diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/xen/hvm/irq.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/include/xen/hvm/irq.h Wed Oct 22 11:46:55 2008 +0900 @@ -0,0 +1,99 @@ +/****************************************************************************** + * irq.h + * + * Interrupt distribution and delivery logic. + * + * Copyright (c) 2006, K A Fraser, XenSource Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ + +#ifndef __XEN_HVM_IRQ_H__ +#define __XEN_HVM_IRQ_H__ + +#include <xen/types.h> +#include <xen/spinlock.h> +#include <asm/irq.h> +#include <public/hvm/save.h> + +struct dev_intx_gsi_link { + struct list_head list; + uint8_t device; + uint8_t intx; + uint8_t gsi; + uint8_t link; +}; + +#define _HVM_IRQ_DPCI_MSI 0x1 + +struct hvm_gmsi_info { + uint32_t gvec; + uint32_t gflags; +}; + +struct hvm_mirq_dpci_mapping { + uint32_t flags; + int pending; + struct list_head digl_list; + struct domain *dom; + struct hvm_gmsi_info gmsi; +}; + +struct hvm_girq_dpci_mapping { + uint8_t valid; + uint8_t device; + uint8_t intx; + uint8_t machine_gsi; +}; + +#define NR_ISAIRQS 16 +#define NR_LINK 4 + +/* Protected by domain's event_lock */ +struct hvm_irq_dpci { + /* Machine IRQ to guest device/intx mapping. */ + DECLARE_BITMAP(mapping, NR_PIRQS); + struct hvm_mirq_dpci_mapping mirq[NR_IRQS]; + /* Guest IRQ to guest device/intx mapping. */ + struct hvm_girq_dpci_mapping girq[NR_IRQS]; + uint8_t msi_gvec_pirq[NR_VECTORS]; + DECLARE_BITMAP(dirq_mask, NR_IRQS); + /* Record of mapped ISA IRQs */ + DECLARE_BITMAP(isairq_map, NR_ISAIRQS); + /* Record of mapped Links */ + uint8_t link_cnt[NR_LINK]; + struct timer hvm_timer[NR_IRQS]; +}; + +/* Modify state of a PCI INTx wire. */ +void hvm_pci_intx_assert( + struct domain *d, unsigned int device, unsigned int intx); +void hvm_pci_intx_deassert( + struct domain *d, unsigned int device, unsigned int intx); + +/* Modify state of an ISA device's IRQ wire. */ +void hvm_isa_irq_assert( + struct domain *d, unsigned int isa_irq); +void hvm_isa_irq_deassert( + struct domain *d, unsigned int isa_irq); + +void hvm_set_pci_link_route(struct domain *d, u8 link, u8 isa_irq); + +void hvm_maybe_deassert_evtchn_irq(void); +void hvm_assert_evtchn_irq(struct vcpu *v); +void hvm_set_callback_via(struct domain *d, uint64_t via); + +void hvm_dirq_assist(struct vcpu *v); + +#endif /* __XEN_HVM_IRQ_H__ */ diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/xen/spinlock.h --- a/xen/include/xen/spinlock.h Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/include/xen/spinlock.h Wed Oct 22 11:46:55 2008 +0900 @@ -3,93 +3,95 @@ #include <xen/config.h> #include <asm/system.h> - -#define spin_lock_irqsave(lock, flags) \ - do { local_irq_save(flags); spin_lock(lock); } while ( 0 ) -#define spin_lock_irq(lock) \ - do { local_irq_disable(); spin_lock(lock); } while ( 0 ) - -#define read_lock_irqsave(lock, flags) \ - do { local_irq_save(flags); read_lock(lock); } while ( 0 ) -#define read_lock_irq(lock) \ - do { local_irq_disable(); read_lock(lock); } while ( 0 ) - -#define write_lock_irqsave(lock, flags) \ - do { local_irq_save(flags); write_lock(lock); } while ( 0 ) -#define write_lock_irq(lock) \ - do { local_irq_disable(); write_lock(lock); } while ( 0 ) - -#define spin_unlock_irqrestore(lock, flags) \ - do { spin_unlock(lock); local_irq_restore(flags); } while ( 0 ) -#define spin_unlock_irq(lock) \ - do { spin_unlock(lock); local_irq_enable(); } while ( 0 ) - -#define read_unlock_irqrestore(lock, flags) \ - do { read_unlock(lock); local_irq_restore(flags); } while ( 0 ) -#define read_unlock_irq(lock) \ - do { read_unlock(lock); local_irq_enable(); } while ( 0 ) - -#define write_unlock_irqrestore(lock, flags) \ - do { write_unlock(lock); local_irq_restore(flags); } while ( 0 ) -#define write_unlock_irq(lock) \ - do { write_unlock(lock); local_irq_enable(); } while ( 0 ) - -#ifdef CONFIG_SMP - #include <asm/spinlock.h> -#else +typedef struct { + raw_spinlock_t raw; + u16 recurse_cpu:12; + u16 recurse_cnt:4; +} spinlock_t; -#if (__GNUC__ > 2) -typedef struct { } spinlock_t; -#define SPIN_LOCK_UNLOCKED /*(spinlock_t)*/ { } -#else -typedef struct { int gcc_is_buggy; } spinlock_t; -#define SPIN_LOCK_UNLOCKED /*(spinlock_t)*/ { 0 } -#endif +#define SPIN_LOCK_UNLOCKED { _RAW_SPIN_LOCK_UNLOCKED, 0xfffu, 0 } +#define DEFINE_SPINLOCK(l) spinlock_t l = SPIN_LOCK_UNLOCKED +#define spin_lock_init(l) (*(l) = (spinlock_t)SPIN_LOCK_UNLOCKED) -#define spin_lock_init(lock) do { } while(0) -#define spin_is_locked(lock) (0) -#define _raw_spin_lock(lock) (void)(lock) -#define _raw_spin_trylock(lock) ({1; }) -#define _raw_spin_unlock(lock) do { } while(0) -#define _raw_spin_lock_recursive(lock) do { } while(0) -#define _raw_spin_unlock_recursive(lock) do { } while(0) +typedef struct { + raw_rwlock_t raw; +} rwlock_t; -#if (__GNUC__ > 2) -typedef struct { } rwlock_t; -#define RW_LOCK_UNLOCKED /*(rwlock_t)*/ { } -#else -typedef struct { int gcc_is_buggy; } rwlock_t; -#define RW_LOCK_UNLOCKED /*(rwlock_t)*/ { 0 } -#endif +#define RW_LOCK_UNLOCKED { _RAW_RW_LOCK_UNLOCKED } +#define DEFINE_RWLOCK(l) rwlock_t l = RW_LOCK_UNLOCKED +#define rwlock_init(l) (*(l) = (rwlock_t)RW_LOCK_UNLOCKED) -#define rwlock_init(lock) do { } while(0) -#define _raw_read_lock(lock) (void)(lock) /* Not "unused variable". */ -#define _raw_read_unlock(lock) do { } while(0) -#define _raw_write_lock(lock) (void)(lock) /* Not "unused variable". */ -#define _raw_write_unlock(lock) do { } while(0) +void _spin_lock(spinlock_t *lock); +void _spin_lock_irq(spinlock_t *lock); +unsigned long _spin_lock_irqsave(spinlock_t *lock); -#endif +void _spin_unlock(spinlock_t *lock); +void _spin_unlock_irq(spinlock_t *lock); +void _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags); -#define spin_lock(_lock) _raw_spin_lock(_lock) -#define spin_trylock(_lock) _raw_spin_trylock(_lock) -#define spin_unlock(_lock) _raw_spin_unlock(_lock) -#define spin_lock_recursive(_lock) _raw_spin_lock_recursive(_lock) -#define spin_unlock_recursive(_lock) _raw_spin_unlock_recursive(_lock) -#define read_lock(_lock) _raw_read_lock(_lock) -#define read_unlock(_lock) _raw_read_unlock(_lock) -#define write_lock(_lock) _raw_write_lock(_lock) -#define write_unlock(_lock) _raw_write_unlock(_lock) +int _spin_is_locked(spinlock_t *lock); +int _spin_trylock(spinlock_t *lock); +void _spin_barrier(spinlock_t *lock); + +void _spin_lock_recursive(spinlock_t *lock); +void _spin_unlock_recursive(spinlock_t *lock); + +void _read_lock(rwlock_t *lock); +void _read_lock_irq(rwlock_t *lock); +unsigned long _read_lock_irqsave(rwlock_t *lock); + +void _read_unlock(rwlock_t *lock); +void _read_unlock_irq(rwlock_t *lock); +void _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags); + +void _write_lock(rwlock_t *lock); +void _write_lock_irq(rwlock_t *lock); +unsigned long _write_lock_irqsave(rwlock_t *lock); + +void _write_unlock(rwlock_t *lock); +void _write_unlock_irq(rwlock_t *lock); +void _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags); + +#define spin_lock(l) _spin_lock(l) +#define spin_lock_irq(l) _spin_lock_irq(l) +#define spin_lock_irqsave(l, f) ((f) = _spin_lock_irqsave(l)) + +#define spin_unlock(l) _spin_unlock(l) +#define spin_unlock_irq(l) _spin_unlock_irq(l) +#define spin_unlock_irqrestore(l, f) _spin_unlock_irqrestore(l, f) + +#define spin_is_locked(l) _raw_spin_is_locked(&(l)->raw) +#define spin_trylock(l) _spin_trylock(l) /* Ensure a lock is quiescent between two critical operations. */ -static inline void spin_barrier(spinlock_t *lock) -{ - do { mb(); } while ( spin_is_locked(lock) ); - mb(); -} +#define spin_barrier(l) _spin_barrier(l) -#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED -#define DEFINE_RWLOCK(x) rwlock_t x = RW_LOCK_UNLOCKED +/* + * spin_[un]lock_recursive(): Use these forms when the lock can (safely!) be + * reentered recursively on the same CPU. All critical regions that may form + * part of a recursively-nested set must be protected by these forms. If there + * are any critical regions that cannot form part of such a set, they can use + * standard spin_[un]lock(). + */ +#define spin_lock_recursive(l) _spin_lock_recursive(l) +#define spin_unlock_recursive(l) _spin_unlock_recursive(l) + +#define read_lock(l) _read_lock(l) +#define read_lock_irq(l) _read_lock_irq(l) +#define read_lock_irqsave(l, f) ((f) = _read_lock_irqsave(l)) + +#define read_unlock(l) _read_unlock(l) +#define read_unlock_irq(l) _read_unlock_irq(l) +#define read_unlock_irqrestore(l, f) _read_unlock_irqrestore(l, f) + +#define write_lock(l) _write_lock(l) +#define write_lock_irq(l) _write_lock_irq(l) +#define write_lock_irqsave(l, f) ((f) = _write_lock_irqsave(l)) + +#define write_unlock(l) _write_unlock(l) +#define write_unlock_irq(l) _write_unlock_irq(l) +#define write_unlock_irqrestore(l, f) _write_unlock_irqrestore(l, f) #endif /* __SPINLOCK_H__ */ diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/xen/xmalloc.h --- a/xen/include/xen/xmalloc.h Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/include/xen/xmalloc.h Wed Oct 22 11:46:55 2008 +0900 @@ -1,12 +1,17 @@ #ifndef __XMALLOC_H__ #define __XMALLOC_H__ + +/* + * Xen malloc/free-style interface. + */ /* Allocate space for typed object. */ #define xmalloc(_type) ((_type *)_xmalloc(sizeof(_type), __alignof__(_type))) /* Allocate space for array of typed objects. */ -#define xmalloc_array(_type, _num) ((_type *)_xmalloc_array(sizeof(_type), __alignof__(_type), _num)) +#define xmalloc_array(_type, _num) \ + ((_type *)_xmalloc_array(sizeof(_type), __alignof__(_type), _num)) /* Allocate untyped storage. */ #define xmalloc_bytes(_bytes) (_xmalloc(_bytes, SMP_CACHE_BYTES)) @@ -15,8 +20,9 @@ extern void xfree(void *); extern void xfree(void *); /* Underlying functions */ -extern void *_xmalloc(size_t size, size_t align); -static inline void *_xmalloc_array(size_t size, size_t align, size_t num) +extern void *_xmalloc(unsigned long size, unsigned long align); +static inline void *_xmalloc_array( + unsigned long size, unsigned long align, unsigned long num) { /* Check for overflow. */ if (size && num > UINT_MAX / size) @@ -24,4 +30,73 @@ static inline void *_xmalloc_array(size_ return _xmalloc(size * num, align); } +/* + * Pooled allocator interface. + */ + +struct xmem_pool; + +typedef void *(xmem_pool_get_memory)(unsigned long bytes); +typedef void (xmem_pool_put_memory)(void *ptr); + +/** + * xmem_pool_create - create dynamic memory pool + * @name: name of the pool + * @get_mem: callback function used to expand pool + * @put_mem: callback function used to shrink pool + * @init_size: inital pool size (in bytes) + * @max_size: maximum pool size (in bytes) - set this as 0 for no limit + * @grow_size: amount of memory (in bytes) added to pool whenever required + * + * All size values are rounded up to next page boundary. + */ +struct xmem_pool *xmem_pool_create( + const char *name, + xmem_pool_get_memory get_mem, + xmem_pool_put_memory put_mem, + unsigned long init_size, + unsigned long max_size, + unsigned long grow_size); + +/** + * xmem_pool_destroy - cleanup given pool + * @mem_pool: Pool to be destroyed + * + * Data structures associated with pool are freed. + * All memory allocated from pool must be freed before + * destorying it. + */ +void xmem_pool_destroy(struct xmem_pool *pool); + +/** + * xmem_pool_alloc - allocate memory from given pool + * @size: no. of bytes + * @mem_pool: pool to allocate from + */ +void *xmem_pool_alloc(unsigned long size, struct xmem_pool *pool); + +/** + * xmem_pool_free - free memory from given pool + * @ptr: address of memory to be freed + * @mem_pool: pool to free from + */ +void xmem_pool_free(void *ptr, struct xmem_pool *pool); + +/** + * xmem_pool_get_used_size - get memory currently used by given pool + * + * Used memory includes stored data + metadata + internal fragmentation + */ +unsigned long xmem_pool_get_used_size(struct xmem_pool *pool); + +/** + * xmem_pool_get_total_size - get total memory currently allocated for pool + * + * This is the total memory currently allocated for this pool which includes + * used size + free size. + * + * (Total - Used) is good indicator of memory efficiency of allocator. + */ +unsigned long xmem_pool_get_total_size(struct xmem_pool *pool); + #endif /* __XMALLOC_H__ */ diff -r 6583186e5989 -r 46d7e12c4c91 xen/include/xlat.lst --- a/xen/include/xlat.lst Wed Oct 22 11:38:22 2008 +0900 +++ b/xen/include/xlat.lst Wed Oct 22 11:46:55 2008 +0900 @@ -55,3 +55,7 @@ ! processor_cx platform.h ! processor_flags platform.h ! processor_power platform.h +! pct_register platform.h +! processor_px platform.h +! psd_package platform.h +! processor_performance platform.h _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |