[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
# HG changeset patch # User Alex Williamson <alex.williamson@xxxxxx> # Date 1175875710 21600 # Node ID eb3e430242ac8732d665b0a1f929bc324bc9fcdd # Parent 9ec7dadc98ba1eadca8fe9c38ae6d6dd41d6d29d # Parent ef33477324f669037a1b35b719fa0a01e98ec2f3 merge with xen-unstable.hg --- tools/libxc/xc_hvm_restore.c | 360 --- tools/libxc/xc_linux_restore.c | 962 -------- .hgignore | 1 docs/Docs.mk | 1 docs/xen-api/Makefile | 3 docs/xen-api/coversheet.tex | 24 docs/xen-api/presentation.tex | 2 docs/xen-api/vm-lifecycle.tex | 2 docs/xen-api/wire-protocol.tex | 2 docs/xen-api/xenapi-coversheet.tex | 16 docs/xen-api/xenapi-datamodel-graph.dot | 55 docs/xen-api/xenapi-datamodel.tex | 261 -- docs/xen-api/xenapi.tex | 10 extras/mini-os/xenbus/xenbus.c | 2 linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S | 8 linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c | 2 linux-2.6-xen-sparse/arch/i386/mach-xen/setup.c | 4 linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c | 12 linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S | 61 linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S | 11 linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c | 4 linux-2.6-xen-sparse/drivers/xen/Kconfig | 13 linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c | 38 linux-2.6-xen-sparse/drivers/xen/balloon/sysfs.c | 6 linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c | 5 linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c | 1 linux-2.6-xen-sparse/drivers/xen/core/reboot.c | 4 linux-2.6-xen-sparse/drivers/xen/core/xen_proc.c | 5 linux-2.6-xen-sparse/drivers/xen/fbfront/xenfb.c | 1 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c | 6 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h | 8 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h | 16 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/maddr.h | 2 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h | 14 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h | 32 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h | 60 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h | 107 linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h | 2 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h | 8 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/maddr.h | 2 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h | 12 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h | 163 - linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/setup_arch_post.h | 4 linux-2.6-xen-sparse/include/xen/hvm.h | 1 linux-2.6-xen-sparse/include/xen/hypercall.h | 24 tools/ioemu/hw/xen_platform.c | 8 tools/ioemu/target-i386-dm/exec-dm.c | 10 tools/ioemu/target-i386-dm/helper2.c | 70 tools/ioemu/vl.c | 84 tools/ioemu/xenstore.c | 14 tools/libxc/Makefile | 4 tools/libxc/ia64/xc_ia64_linux_restore.c | 25 tools/libxc/xc_domain_restore.c | 1086 ++++++++++ tools/libxc/xc_hvm_save.c | 407 +-- tools/libxc/xenguest.h | 27 tools/libxc/xg_private.c | 15 tools/libxen/include/xen_host.h | 7 tools/libxen/include/xen_sr.h | 36 tools/libxen/include/xen_vdi.h | 15 tools/libxen/src/xen_common.c | 45 tools/libxen/src/xen_host.c | 12 tools/libxen/src/xen_sr.c | 105 tools/libxen/src/xen_vdi.c | 33 tools/libxen/test/test_bindings.c | 43 tools/python/xen/xend/XendAPI.py | 132 - tools/python/xen/xend/XendCheckpoint.py | 21 tools/python/xen/xend/XendLocalStorageRepo.py | 17 tools/python/xen/xend/XendNode.py | 39 tools/python/xen/xend/XendPBD.py | 79 tools/python/xen/xend/XendQCoWStorageRepo.py | 23 tools/python/xen/xend/XendStateStore.py | 2 tools/python/xen/xend/XendStorageRepository.py | 13 tools/python/xen/xend/XendTask.py | 10 tools/python/xen/xend/image.py | 9 tools/python/xen/xm/main.py | 2 tools/xcutils/xc_restore.c | 34 unmodified_drivers/linux-2.6/Makefile | 1 unmodified_drivers/linux-2.6/balloon/Kbuild | 9 unmodified_drivers/linux-2.6/balloon/Makefile | 3 unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h | 22 unmodified_drivers/linux-2.6/mkbuildtree | 3 unmodified_drivers/linux-2.6/overrides.mk | 3 unmodified_drivers/linux-2.6/platform-pci/platform-pci.c | 10 unmodified_drivers/linux-2.6/platform-pci/xen_support.c | 9 xen/arch/x86/hvm/hvm.c | 113 - xen/arch/x86/hvm/io.c | 227 +- xen/arch/x86/hvm/irq.c | 15 xen/arch/x86/hvm/platform.c | 32 xen/arch/x86/hvm/svm/svm.c | 143 - xen/arch/x86/hvm/svm/vmcb.c | 8 xen/arch/x86/hvm/vmx/vmx.c | 18 xen/common/domain.c | 8 xen/common/memory.c | 11 xen/common/schedule.c | 4 xen/drivers/char/ns16550.c | 2 xen/include/asm-ia64/xentypes.h | 5 xen/include/asm-powerpc/types.h | 4 xen/include/asm-x86/event.h | 7 xen/include/asm-x86/hvm/hvm.h | 14 xen/include/asm-x86/hvm/io.h | 3 xen/include/asm-x86/hvm/support.h | 5 xen/include/asm-x86/types.h | 4 xen/include/public/acm.h | 16 xen/include/public/foreign/Makefile | 2 xen/include/public/hvm/hvm_op.h | 8 xen/include/public/hvm/ioreq.h | 1 xen/include/xen/types.h | 2 107 files changed, 2671 insertions(+), 2805 deletions(-) diff -r 9ec7dadc98ba -r eb3e430242ac .hgignore --- a/.hgignore Fri Apr 06 10:06:30 2007 -0600 +++ b/.hgignore Fri Apr 06 10:08:30 2007 -0600 @@ -14,6 +14,7 @@ .*\.orig$ .*\.rej$ .*/a\.out$ +.*/Modules\.symvers$ .*/cscope\..*$ ^cscope.*$ ^[^/]*\.bz2$ diff -r 9ec7dadc98ba -r eb3e430242ac docs/Docs.mk --- a/docs/Docs.mk Fri Apr 06 10:06:30 2007 -0600 +++ b/docs/Docs.mk Fri Apr 06 10:08:30 2007 -0600 @@ -6,6 +6,7 @@ DOXYGEN := doxygen DOXYGEN := doxygen POD2MAN := pod2man DOT := dot +NEATO := neato pkgdocdir := /usr/share/doc/xen mandir := /usr/share/man diff -r 9ec7dadc98ba -r eb3e430242ac docs/xen-api/Makefile --- a/docs/xen-api/Makefile Fri Apr 06 10:06:30 2007 -0600 +++ b/docs/xen-api/Makefile Fri Apr 06 10:08:30 2007 -0600 @@ -37,6 +37,9 @@ xenapi.dvi: $(TEX) $(EPS) $(EPSDOT) %.eps: %.dot $(DOT) -Tps $< >$@ +xenapi-datamodel-graph.eps: xenapi-datamodel-graph.dot + $(NEATO) -Goverlap=false -Tps $< >$@ + .PHONY: clean clean: rm -f *.pdf *.ps *.dvi *.aux *.log $(EPSDOT) diff -r 9ec7dadc98ba -r eb3e430242ac docs/xen-api/coversheet.tex --- a/docs/xen-api/coversheet.tex Fri Apr 06 10:06:30 2007 -0600 +++ b/docs/xen-api/coversheet.tex Fri Apr 06 10:08:30 2007 -0600 @@ -1,5 +1,5 @@ % -% Copyright (c) 2006 XenSource, Inc. +% Copyright (c) 2006-2007 XenSource, Inc. % % Permission is granted to copy, distribute and/or modify this document under % the terms of the GNU Free Documentation License, Version 1.2 or any later @@ -20,7 +20,7 @@ \begin{center} \resizebox{8cm}{!}{\includegraphics{\coversheetlogo}} -\vspace{3cm} +\vspace{2cm} \begin{Huge} \doctitle{} @@ -37,9 +37,27 @@ Date: \datestring{} \begin{tabular}{rl} \docauthors{} \end{tabular} - \end{Large} \end{center} +\vspace{.5cm} +\begin{large} +\textbf{Contributors:} +\begin{multicols}{2}{ +Stefan Berger, IBM \\ +Daniel Berrang\'e, Red Hat \\ +Gareth Bestor, IBM \\ +Hollis Blanchard, IBM \\ +Mike Day, IBM \\ +Jim Fehlig, Novell \\ +Jon Harrop, XenSource \\ +Vincent Hanquez, XenSource \\ +John Levon, Sun Microsystems \\ +Jon Ludlam, XenSource \\ +Alastair Tse, XenSource \\ +Daniel Veillard, Red Hat \\ +Tom Wilkie, University of Cambridge} +\end{multicols} +\end{large} \vfill diff -r 9ec7dadc98ba -r eb3e430242ac docs/xen-api/presentation.tex --- a/docs/xen-api/presentation.tex Fri Apr 06 10:06:30 2007 -0600 +++ b/docs/xen-api/presentation.tex Fri Apr 06 10:08:30 2007 -0600 @@ -1,5 +1,5 @@ % -% Copyright (c) 2006 XenSource, Inc. +% Copyright (c) 2006-2007 XenSource, Inc. % % Permission is granted to copy, distribute and/or modify this document under % the terms of the GNU Free Documentation License, Version 1.2 or any later diff -r 9ec7dadc98ba -r eb3e430242ac docs/xen-api/vm-lifecycle.tex --- a/docs/xen-api/vm-lifecycle.tex Fri Apr 06 10:06:30 2007 -0600 +++ b/docs/xen-api/vm-lifecycle.tex Fri Apr 06 10:08:30 2007 -0600 @@ -1,5 +1,5 @@ % -% Copyright (c) 2006 XenSource, Inc. +% Copyright (c) 2006-2007 XenSource, Inc. % % Permission is granted to copy, distribute and/or modify this document under % the terms of the GNU Free Documentation License, Version 1.2 or any later diff -r 9ec7dadc98ba -r eb3e430242ac docs/xen-api/wire-protocol.tex --- a/docs/xen-api/wire-protocol.tex Fri Apr 06 10:06:30 2007 -0600 +++ b/docs/xen-api/wire-protocol.tex Fri Apr 06 10:08:30 2007 -0600 @@ -1,5 +1,5 @@ % -% Copyright (c) 2006 XenSource, Inc. +% Copyright (c) 2006-2007 XenSource, Inc. % % Permission is granted to copy, distribute and/or modify this document under % the terms of the GNU Free Documentation License, Version 1.2 or any later diff -r 9ec7dadc98ba -r eb3e430242ac docs/xen-api/xenapi-coversheet.tex --- a/docs/xen-api/xenapi-coversheet.tex Fri Apr 06 10:06:30 2007 -0600 +++ b/docs/xen-api/xenapi-coversheet.tex Fri Apr 06 10:08:30 2007 -0600 @@ -1,5 +1,5 @@ % -% Copyright (c) 2006 XenSource, Inc. +% Copyright (c) 2006-2007 XenSource, Inc. % % Permission is granted to copy, distribute and/or modify this document under % the terms of the GNU Free Documentation License, Version 1.2 or any later @@ -12,26 +12,24 @@ % %% Document title -\newcommand{\doctitle}{Xen Management API Draft} +\newcommand{\doctitle}{Xen Management API} \newcommand{\coversheetlogo}{xen.eps} %% Document date -\newcommand{\datestring}{25th August 2006} +\newcommand{\datestring}{5th April 2007} -\newcommand{\releasestatement}{Open Preview Release\\Comments are welcome!} +\newcommand{\releasestatement}{Candidate for Release\\Comments are welcome!} %% Document revision -\newcommand{\revstring}{API Revision 0.4.3 (Draft for discussion)} +\newcommand{\revstring}{API Revision 0.9.0} %% Document authors \newcommand{\docauthors}{ Ewan Mellor: & {\tt ewan@xxxxxxxxxxxxx} \\ Richard Sharp: & {\tt richard.sharp@xxxxxxxxxxxxx} \\ -David Scott: & {\tt david.scott@xxxxxxxxxxxxx} \\ -Jon Harrop: & {\tt jon.harrop@xxxxxxxxxxxxx} -} -\newcommand{\legalnotice}{Copyright \copyright{} 2006 XenSource, Inc.\\ \\ +David Scott: & {\tt david.scott@xxxxxxxxxxxxx}} +\newcommand{\legalnotice}{Copyright \copyright{} 2006-2007 XenSource, Inc.\\ \\ Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.2 or any later version published by the Free Software Foundation; with no Invariant Sections, diff -r 9ec7dadc98ba -r eb3e430242ac docs/xen-api/xenapi-datamodel-graph.dot --- a/docs/xen-api/xenapi-datamodel-graph.dot Fri Apr 06 10:06:30 2007 -0600 +++ b/docs/xen-api/xenapi-datamodel-graph.dot Fri Apr 06 10:08:30 2007 -0600 @@ -1,18 +1,39 @@ digraph g{ -digraph g{ -node [ shape=box ]; session [ URL="session.html" ] task [ URL="task.html" ] VM [ URL="VM.html" ] host [ URL="host.html" ] host_cpu [ URL="host_cpu.html" ] network [ URL="network.html" ] VIF [ URL="VIF.html" ] PIF [ URL="PIF.html" ] SR [ URL="SR.html" ] VDI [ URL="VDI.html" ] VBD [ URL="VBD.html" ] VTPM [ URL="VTPM.html" ] console [ URL="console.html" ] user [ URL="user.html" ] debug [ URL="debug.html" ]; -session -> host [ label="this_host(1)" ] -session -> user [ label="this_user(1)" ] -host -> VM [ color="blue", arrowhead="crow", arrowtail="none" ] -host -> host_cpu [ color="blue", arrowhead="crow", arrowtail="none" ] -VIF -> VM [ color="blue", arrowhead="none", arrowtail="crow" ] -VIF -> network [ color="blue", arrowhead="none", arrowtail="crow" ] -PIF -> host [ color="blue", arrowhead="none", arrowtail="crow" ] -PIF -> network [ color="blue", arrowhead="none", arrowtail="crow" ] -SR -> VDI [ color="blue", arrowhead="crow", arrowtail="none" ] -VDI -> VBD [ color="blue", arrowhead="crow", arrowtail="none" ] -VDI -> VDI [ color="blue", arrowhead="none", arrowtail="crow" ] -VBD -> VM [ color="blue", arrowhead="none", arrowtail="crow" ] -VTPM -> VM [ label="backend(1)" ] -VTPM -> VM [ color="blue", arrowhead="none", arrowtail="crow" ] -console -> VM [ color="blue", arrowhead="none", arrowtail="crow" ] +# +# Copyright (c) 2006-2007 XenSource, Inc. +# +# Permission is granted to copy, distribute and/or modify this document under +# the terms of the GNU Free Documentation License, Version 1.2 or any later +# version published by the Free Software Foundation; with no Invariant +# Sections, no Front-Cover Texts and no Back-Cover Texts. A copy of the +# license is included in the section entitled +# "GNU Free Documentation License" or the file fdl.tex. +# + +digraph "Xen-API Class Diagram" { +fontname="Verdana"; + +node [ shape=box ]; session VM host network VIF PIF SR VDI VBD PBD user; +node [shape=ellipse]; PIF_metrics VIF_metrics VM_metrics VBD_metrics PBD_metrics VM_guest_metrics host_metrics; +node [shape=box]; host_cpu console +session -> host [ arrowhead="none" ] +session -> user [ arrowhead="none" ] +VM -> VM_metrics [ arrowhead="none" ] +VM -> VM_guest_metrics [ arrowhead="none" ] +VM -> console [ arrowhead="crow" ] +host -> PBD [ arrowhead="crow", arrowtail="none" ] +host -> host_metrics [ arrowhead="none" ] +host -> host_cpu [ arrowhead="none" ] +VIF -> VM [ arrowhead="none", arrowtail="crow" ] +VIF -> network [ arrowhead="none", arrowtail="crow" ] +VIF -> VIF_metrics [ arrowhead="none" ] +PIF -> host [ arrowhead="none", arrowtail="crow" ] +PIF -> network [ arrowhead="none", arrowtail="crow" ] +PIF -> PIF_metrics [ arrowhead="none" ] +SR -> PBD [ arrowhead="crow", arrowtail="none" ] +PBD -> PBD_metrics [ arrowhead="none" ] +SR -> VDI [ arrowhead="crow", arrowtail="none" ] +VDI -> VBD [ arrowhead="crow", arrowtail="none" ] +VBD -> VM [ arrowhead="none", arrowtail="crow" ] +VTPM -> VM [ arrowhead="none", arrowtail="crow" ] +VBD -> VBD_metrics [ arrowhead="none" ] } diff -r 9ec7dadc98ba -r eb3e430242ac docs/xen-api/xenapi-datamodel.tex --- a/docs/xen-api/xenapi-datamodel.tex Fri Apr 06 10:06:30 2007 -0600 +++ b/docs/xen-api/xenapi-datamodel.tex Fri Apr 06 10:08:30 2007 -0600 @@ -1,5 +1,5 @@ % -% Copyright (c) 2006 XenSource, Inc. +% Copyright (c) 2006-2007 XenSource, Inc. % % Permission is granted to copy, distribute and/or modify this document under % the terms of the GNU Free Documentation License, Version 1.2 or any later @@ -512,7 +512,6 @@ Quals & Field & Type & Description \\ $\mathit{RO}_\mathit{run}$ & {\tt progress} & int & if the task is still pending, this field contains the estimated percentage complete (0-100). If task has completed (successfully or unsuccessfully) this should be 100. \\ $\mathit{RO}_\mathit{run}$ & {\tt type} & string & if the task has completed successfully, this field contains the type of the encoded result (i.e. name of the class whose reference is in the result field). Undefined otherwise. \\ $\mathit{RO}_\mathit{run}$ & {\tt result} & string & if the task has completed successfully, this field contains the result value (either Void or an object reference). Undefined otherwise. \\ -$\mathit{RO}_\mathit{run}$ & {\tt error\_code} & int & if the task has failed, this field contains the error code. Undefined otherwise. \\ $\mathit{RO}_\mathit{run}$ & {\tt error\_info} & string Set & if the task has failed, this field contains the set of associated error strings. Undefined otherwise. \\ $\mathit{RO}_\mathit{run}$ & {\tt allowed\_operations} & (task\_allowed\_operations) Set & Operations allowed on this task \\ \hline @@ -826,38 +825,6 @@ Get the result field of the given task. \noindent {\bf Return Type:} {\tt string -} - - -value of the field -\vspace{0.3cm} -\vspace{0.3cm} -\vspace{0.3cm} -\subsubsection{RPC name:~get\_error\_code} - -{\bf Overview:} -Get the error\_code field of the given task. - - \noindent {\bf Signature:} -\begin{verbatim} int get_error_code (session_id s, task ref self)\end{verbatim} - - -\noindent{\bf Arguments:} - - -\vspace{0.3cm} -\begin{tabular}{|c|c|p{7cm}|} - \hline -{\bf type} & {\bf name} & {\bf description} \\ \hline -{\tt task ref } & self & reference to the object \\ \hline - -\end{tabular} - -\vspace{0.3cm} - - \noindent {\bf Return Type:} -{\tt -int } @@ -4952,6 +4919,27 @@ dmesg string \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} +\subsubsection{RPC name:~list\_methods} + +{\bf Overview:} +List all supported methods. + + \noindent {\bf Signature:} +\begin{verbatim} (string Set) list_methods (session_id s)\end{verbatim} + + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +string Set +} + + +The name of every supported method. +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} \subsubsection{RPC name:~get\_all} {\bf Overview:} @@ -9190,51 +9178,35 @@ Quals & Field & Type & Description \\ $\mathit{RO}_\mathit{run}$ & {\tt physical\_utilisation} & int & physical space currently utilised on this storage repository (in bytes). Note that for sparse disk formats, physical\_utilisation may be less than virtual\_allocation \\ $\mathit{RO}_\mathit{ins}$ & {\tt physical\_size} & int & total physical size of the repository (in bytes) \\ $\mathit{RO}_\mathit{ins}$ & {\tt type} & string & type of the storage repository \\ -$\mathit{RO}_\mathit{ins}$ & {\tt location} & string & a string that uniquely determines the location of the storage repository; the format of this string depends on the repository's type \\ +$\mathit{RO}_\mathit{ins}$ & {\tt content\_type} & string & the type of the SR's content, if required (e.g. ISOs) \\ \hline \end{longtable} \subsection{RPCs associated with class: SR} -\subsubsection{RPC name:~clone} - -{\bf Overview:} -Take an exact copy of the Storage Repository; - the cloned storage repository has the same type as its parent - - \noindent {\bf Signature:} -\begin{verbatim} (SR ref) clone (session_id s, SR ref sr, string loc, string name)\end{verbatim} - - -\noindent{\bf Arguments:} - - -\vspace{0.3cm} -\begin{tabular}{|c|c|p{7cm}|} - \hline -{\bf type} & {\bf name} & {\bf description} \\ \hline -{\tt SR ref } & sr & The Storage Repository to clone \\ \hline - -{\tt string } & loc & The location string that defines where the new storage repository will be located \\ \hline - -{\tt string } & name & The name of the new storage repository \\ \hline - -\end{tabular} - -\vspace{0.3cm} - - \noindent {\bf Return Type:} -{\tt -SR ref -} - - -The ID of the newly created Storage Repository. +\subsubsection{RPC name:~get\_supported\_types} + +{\bf Overview:} +Return a set of all the SR types supported by the system. + + \noindent {\bf Signature:} +\begin{verbatim} (string Set) get_supported_types (session_id s)\end{verbatim} + + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +string Set +} + + +the supported SR types \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} \subsubsection{RPC name:~get\_all} {\bf Overview:} -Return a list of all the Storage Repositories known to the system +Return a list of all the SRs known to the system. \noindent {\bf Signature:} \begin{verbatim} ((SR ref) Set) get_all (session_id s)\end{verbatim} @@ -9248,7 +9220,7 @@ Return a list of all the Storage Reposit } -A list of all the IDs of all the Storage Repositories +references to all objects \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} @@ -9608,13 +9580,13 @@ value of the field \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} -\subsubsection{RPC name:~get\_location} - -{\bf Overview:} -Get the location field of the given SR. - - \noindent {\bf Signature:} -\begin{verbatim} string get_location (session_id s, SR ref self)\end{verbatim} +\subsubsection{RPC name:~get\_content\_type} + +{\bf Overview:} +Get the content\_type field of the given SR. + + \noindent {\bf Signature:} +\begin{verbatim} string get_content_type (session_id s, SR ref self)\end{verbatim} \noindent{\bf Arguments:} @@ -9637,70 +9609,6 @@ string value of the field -\vspace{0.3cm} -\vspace{0.3cm} -\vspace{0.3cm} -\subsubsection{RPC name:~create} - -{\bf Overview:} -Create a new SR instance, and return its handle. - - \noindent {\bf Signature:} -\begin{verbatim} (SR ref) create (session_id s, SR record args)\end{verbatim} - - -\noindent{\bf Arguments:} - - -\vspace{0.3cm} -\begin{tabular}{|c|c|p{7cm}|} - \hline -{\bf type} & {\bf name} & {\bf description} \\ \hline -{\tt SR record } & args & All constructor arguments \\ \hline - -\end{tabular} - -\vspace{0.3cm} - - \noindent {\bf Return Type:} -{\tt -SR ref -} - - -reference to the newly created object -\vspace{0.3cm} -\vspace{0.3cm} -\vspace{0.3cm} -\subsubsection{RPC name:~destroy} - -{\bf Overview:} -Destroy the specified SR instance. - - \noindent {\bf Signature:} -\begin{verbatim} void destroy (session_id s, SR ref self)\end{verbatim} - - -\noindent{\bf Arguments:} - - -\vspace{0.3cm} -\begin{tabular}{|c|c|p{7cm}|} - \hline -{\bf type} & {\bf name} & {\bf description} \\ \hline -{\tt SR ref } & self & reference to the object \\ \hline - -\end{tabular} - -\vspace{0.3cm} - - \noindent {\bf Return Type:} -{\tt -void -} - - - \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} @@ -9828,73 +9736,6 @@ Quals & Field & Type & Description \\ \hline \end{longtable} \subsection{RPCs associated with class: VDI} -\subsubsection{RPC name:~snapshot} - -{\bf Overview:} -Take an exact copy of the VDI; the snapshot lives in the same Storage -Repository as its parent. - - \noindent {\bf Signature:} -\begin{verbatim} (VDI ref) snapshot (session_id s, VDI ref vdi)\end{verbatim} - - -\noindent{\bf Arguments:} - - -\vspace{0.3cm} -\begin{tabular}{|c|c|p{7cm}|} - \hline -{\bf type} & {\bf name} & {\bf description} \\ \hline -{\tt VDI ref } & vdi & The VDI to snapshot \\ \hline - -\end{tabular} - -\vspace{0.3cm} - - \noindent {\bf Return Type:} -{\tt -VDI ref -} - - -The ID of the newly created VDI. -\vspace{0.3cm} -\vspace{0.3cm} -\vspace{0.3cm} -\subsubsection{RPC name:~resize} - -{\bf Overview:} -Resize the vdi to the size. - - \noindent {\bf Signature:} -\begin{verbatim} void resize (session_id s, VDI ref vdi, int size)\end{verbatim} - - -\noindent{\bf Arguments:} - - -\vspace{0.3cm} -\begin{tabular}{|c|c|p{7cm}|} - \hline -{\bf type} & {\bf name} & {\bf description} \\ \hline -{\tt VDI ref } & vdi & The VDI to resize \\ \hline - -{\tt int } & size & The new size of the VDI \\ \hline - -\end{tabular} - -\vspace{0.3cm} - - \noindent {\bf Return Type:} -{\tt -void -} - - - -\vspace{0.3cm} -\vspace{0.3cm} -\vspace{0.3cm} \subsubsection{RPC name:~get\_all} {\bf Overview:} @@ -11988,7 +11829,7 @@ Quals & Field & Type & Description \\ $\mathit{RO}_\mathit{run}$ & {\tt uuid} & string & unique identifier/object reference \\ $\mathit{RO}_\mathit{ins}$ & {\tt host} & host ref & physical machine on which the pbd is available \\ $\mathit{RO}_\mathit{ins}$ & {\tt SR} & SR ref & the storage repository that the pbd realises \\ -$\mathit{RO}_\mathit{ins}$ & {\tt device\_config} & (string $\rightarrow$ string) Map & a config string that is provided to the host's SR-backend-driver \\ +$\mathit{RO}_\mathit{ins}$ & {\tt device\_config} & (string $\rightarrow$ string) Map & a config string to string map that is provided to the host's SR-backend-driver \\ $\mathit{RO}_\mathit{run}$ & {\tt currently\_attached} & bool & is the SR currently attached on this host? \\ \hline \end{longtable} diff -r 9ec7dadc98ba -r eb3e430242ac docs/xen-api/xenapi.tex --- a/docs/xen-api/xenapi.tex Fri Apr 06 10:06:30 2007 -0600 +++ b/docs/xen-api/xenapi.tex Fri Apr 06 10:08:30 2007 -0600 @@ -1,5 +1,5 @@ % -% Copyright (c) 2006 XenSource, Inc. +% Copyright (c) 2006-2007 XenSource, Inc. % % Permission is granted to copy, distribute and/or modify this document under % the terms of the GNU Free Documentation License, Version 1.2 or any later @@ -17,6 +17,7 @@ \usepackage{graphics} \usepackage{longtable} \usepackage{fancyhdr} +\usepackage{multicol} \setlength\topskip{0cm} \setlength\topmargin{0cm} @@ -40,17 +41,10 @@ remotely configuring and controlling vir remotely configuring and controlling virtualised guests running on a Xen-enabled host. -~ - -{\bf \large This document is an early draft for discussion purposes only.} - -~ - \input{presentation} \include{wire-protocol} \include{vm-lifecycle} -\include{todo} \include{xenapi-datamodel} \include{fdl} diff -r 9ec7dadc98ba -r eb3e430242ac extras/mini-os/xenbus/xenbus.c --- a/extras/mini-os/xenbus/xenbus.c Fri Apr 06 10:06:30 2007 -0600 +++ b/extras/mini-os/xenbus/xenbus.c Fri Apr 06 10:08:30 2007 -0600 @@ -178,6 +178,7 @@ static void release_xenbus_id(int id) { BUG_ON(!req_info[id].in_use); spin_lock(&req_lock); + req_info[id].in_use = 0; nr_live_reqs--; req_info[id].in_use = 0; if (nr_live_reqs == NR_REQS - 1) @@ -335,6 +336,7 @@ xenbus_msg_reply(int type, xb_write(type, id, trans, io, nr_reqs); schedule(); + remove_waiter(w); wake(current); rep = req_info[id].reply; diff -r 9ec7dadc98ba -r eb3e430242ac linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S --- a/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S Fri Apr 06 10:06:30 2007 -0600 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S Fri Apr 06 10:08:30 2007 -0600 @@ -142,7 +142,7 @@ ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* 0xf0 - unused */ .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */ -#ifdef CONFIG_XEN_COMPAT_030002 +#if CONFIG_XEN_COMPAT <= 0x030002 /* * __xen_guest information */ @@ -180,18 +180,18 @@ ENTRY(cpu_gdt_table) #endif .ascii ",LOADER=generic" .byte 0 -#endif /* CONFIG_XEN_COMPAT_030002 */ +#endif /* CONFIG_XEN_COMPAT <= 0x030002 */ ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz, "linux") ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz, "2.6") ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz, "xen-3.0") ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .long, __PAGE_OFFSET) -#ifdef CONFIG_XEN_COMPAT_030002 +#if CONFIG_XEN_COMPAT <= 0x030002 ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .long, __PAGE_OFFSET) #else ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .long, 0) -#endif /* !CONFIG_XEN_COMPAT_030002 */ +#endif ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long, startup_32) ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long, hypercall_page) ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long, HYPERVISOR_VIRT_START) diff -r 9ec7dadc98ba -r eb3e430242ac linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c Fri Apr 06 10:06:30 2007 -0600 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c Fri Apr 06 10:08:30 2007 -0600 @@ -1010,7 +1010,7 @@ static void stop_hz_timer(void) singleshot.timeout_abs_ns = jiffies_to_st(j); singleshot.flags = 0; rc = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, cpu, &singleshot); -#ifdef CONFIG_XEN_COMPAT_030004 +#if CONFIG_XEN_COMPAT <= 0x030004 if (rc) { BUG_ON(rc != -ENOSYS); rc = HYPERVISOR_set_timer_op(singleshot.timeout_abs_ns); diff -r 9ec7dadc98ba -r eb3e430242ac linux-2.6-xen-sparse/arch/i386/mach-xen/setup.c --- a/linux-2.6-xen-sparse/arch/i386/mach-xen/setup.c Fri Apr 06 10:06:30 2007 -0600 +++ b/linux-2.6-xen-sparse/arch/i386/mach-xen/setup.c Fri Apr 06 10:08:30 2007 -0600 @@ -114,7 +114,7 @@ void __init machine_specific_arch_setup( ret = HYPERVISOR_callback_op(CALLBACKOP_register, &event); if (ret == 0) ret = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe); -#ifdef CONFIG_XEN_COMPAT_030002 +#if CONFIG_XEN_COMPAT <= 0x030002 if (ret == -ENOSYS) ret = HYPERVISOR_set_callbacks( event.address.cs, event.address.eip, @@ -123,7 +123,7 @@ void __init machine_specific_arch_setup( BUG_ON(ret); ret = HYPERVISOR_callback_op(CALLBACKOP_register, &nmi_cb); -#ifdef CONFIG_XEN_COMPAT_030002 +#if CONFIG_XEN_COMPAT <= 0x030002 if (ret == -ENOSYS) { static struct xennmi_callback __initdata cb = { .handler_address = (unsigned long)nmi diff -r 9ec7dadc98ba -r eb3e430242ac linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c --- a/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c Fri Apr 06 10:06:30 2007 -0600 +++ b/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c Fri Apr 06 10:08:30 2007 -0600 @@ -303,7 +303,7 @@ int xen_create_contiguous_region( set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, INVALID_P2M_ENTRY); } - if (HYPERVISOR_multicall(cr_mcl, i)) + if (HYPERVISOR_multicall_check(cr_mcl, i, NULL)) BUG(); /* 2. Get a new contiguous memory extent. */ @@ -312,7 +312,7 @@ int xen_create_contiguous_region( success = (exchange.nr_exchanged == (1UL << order)); BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0))); BUG_ON(success && (rc != 0)); -#ifdef CONFIG_XEN_COMPAT_030002 +#if CONFIG_XEN_COMPAT <= 0x030002 if (unlikely(rc == -ENOSYS)) { /* Compatibility when XENMEM_exchange is unsupported. */ if (HYPERVISOR_memory_op(XENMEM_decrease_reservation, @@ -342,7 +342,7 @@ int xen_create_contiguous_region( cr_mcl[i - 1].args[MULTI_UVMFLAGS_INDEX] = order ? UVMF_TLB_FLUSH|UVMF_ALL : UVMF_INVLPG|UVMF_ALL; - if (HYPERVISOR_multicall(cr_mcl, i)) + if (HYPERVISOR_multicall_check(cr_mcl, i, NULL)) BUG(); if (success) @@ -400,7 +400,7 @@ void xen_destroy_contiguous_region(unsig INVALID_P2M_ENTRY); out_frames[i] = (__pa(vstart) >> PAGE_SHIFT) + i; } - if (HYPERVISOR_multicall(cr_mcl, i)) + if (HYPERVISOR_multicall_check(cr_mcl, i, NULL)) BUG(); /* 3. Do the exchange for non-contiguous MFNs. */ @@ -408,7 +408,7 @@ void xen_destroy_contiguous_region(unsig success = (exchange.nr_exchanged == 1); BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0))); BUG_ON(success && (rc != 0)); -#ifdef CONFIG_XEN_COMPAT_030002 +#if CONFIG_XEN_COMPAT <= 0x030002 if (unlikely(rc == -ENOSYS)) { /* Compatibility when XENMEM_exchange is unsupported. */ if (HYPERVISOR_memory_op(XENMEM_decrease_reservation, @@ -432,7 +432,7 @@ void xen_destroy_contiguous_region(unsig cr_mcl[i - 1].args[MULTI_UVMFLAGS_INDEX] = order ? UVMF_TLB_FLUSH|UVMF_ALL : UVMF_INVLPG|UVMF_ALL; - if (HYPERVISOR_multicall(cr_mcl, i)) + if (HYPERVISOR_multicall_check(cr_mcl, i, NULL)) BUG(); balloon_unlock(flags); diff -r 9ec7dadc98ba -r eb3e430242ac linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S Fri Apr 06 10:06:30 2007 -0600 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S Fri Apr 06 10:08:30 2007 -0600 @@ -201,6 +201,19 @@ END(ret_from_fork) END(ret_from_fork) /* + * initial frame state for interrupts and exceptions + */ + .macro _frame ref + CFI_STARTPROC simple + CFI_DEF_CFA rsp,SS+8-\ref + /*CFI_REL_OFFSET ss,SS-\ref*/ + CFI_REL_OFFSET rsp,RSP-\ref + /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/ + /*CFI_REL_OFFSET cs,CS-\ref*/ + CFI_REL_OFFSET rip,RIP-\ref + .endm + +/* * System call entry. Upto 6 arguments in registers are supported. * * SYSCALL does not save anything on the stack and does not change the @@ -232,10 +245,7 @@ END(ret_from_fork) */ ENTRY(system_call) - CFI_STARTPROC simple - CFI_DEF_CFA rsp,PDA_STACKOFFSET - CFI_REGISTER rip,rcx - /*CFI_REGISTER rflags,r11*/ + _frame (RIP-0x10) SAVE_ARGS -8,0 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) /* @@ -273,9 +283,7 @@ sysret_check: */ TRACE_IRQS_ON XEN_UNBLOCK_EVENTS(%rsi) - CFI_REGISTER rip,rcx RESTORE_ARGS 0,8,0 - /*CFI_REGISTER rflags,r11*/ HYPERVISOR_IRET VGCF_IN_SYSCALL /* Handle reschedules */ @@ -505,24 +513,16 @@ ENTRY(stub_rt_sigreturn) CFI_ENDPROC END(stub_rt_sigreturn) -/* - * initial frame state for interrupts and exceptions - */ - .macro _frame ref - CFI_STARTPROC simple - CFI_DEF_CFA rsp,SS+8-\ref - /*CFI_REL_OFFSET ss,SS-\ref*/ - CFI_REL_OFFSET rsp,RSP-\ref - /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/ - /*CFI_REL_OFFSET cs,CS-\ref*/ - CFI_REL_OFFSET rip,RIP-\ref - .endm - /* initial frame state for interrupts (and exceptions without error code) */ -#define INTR_FRAME _frame RIP +#define INTR_FRAME _frame (RIP-0x10); \ + CFI_REL_OFFSET rcx,0; \ + CFI_REL_OFFSET r11,8 + /* initial frame state for exceptions with error code (and interrupts with vector already pushed) */ -#define XCPT_FRAME _frame ORIG_RAX +#define XCPT_FRAME _frame (RIP-0x18); \ + CFI_REL_OFFSET rcx,0; \ + CFI_REL_OFFSET r11,8 /* * Interrupt exit. @@ -599,8 +599,9 @@ retint_kernel: #endif CFI_ENDPROC -END(common_interrupt) - +END(retint_check) + +#ifndef CONFIG_XEN /* * APIC interrupts. */ @@ -613,7 +614,6 @@ END(common_interrupt) CFI_ENDPROC .endm -#ifndef CONFIG_XEN ENTRY(thermal_interrupt) apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt END(thermal_interrupt) @@ -668,12 +668,16 @@ END(spurious_interrupt) .macro zeroentry sym INTR_FRAME movq (%rsp),%rcx + CFI_RESTORE rcx movq 8(%rsp),%r11 + CFI_RESTORE r11 addq $0x10,%rsp /* skip rcx and r11 */ + CFI_ADJUST_CFA_OFFSET -0x10 pushq $0 /* push error code/oldrax */ CFI_ADJUST_CFA_OFFSET 8 pushq %rax /* push real oldrax to the rdi slot */ CFI_ADJUST_CFA_OFFSET 8 + CFI_REL_OFFSET rax,0 leaq \sym(%rip),%rax jmp error_entry CFI_ENDPROC @@ -682,10 +686,14 @@ END(spurious_interrupt) .macro errorentry sym XCPT_FRAME movq (%rsp),%rcx + CFI_RESTORE rcx movq 8(%rsp),%r11 + CFI_RESTORE r11 addq $0x10,%rsp /* rsp points to the error code */ + CFI_ADJUST_CFA_OFFSET -0x10 pushq %rax CFI_ADJUST_CFA_OFFSET 8 + CFI_REL_OFFSET rax,0 leaq \sym(%rip),%rax jmp error_entry CFI_ENDPROC @@ -799,6 +807,7 @@ paranoid_schedule\trace: */ ENTRY(error_entry) _frame RDI + CFI_REL_OFFSET rax,0 /* rdi slot contains rax, oldrax contains error code */ cld subq $14*8,%rsp @@ -806,6 +815,7 @@ ENTRY(error_entry) movq %rsi,13*8(%rsp) CFI_REL_OFFSET rsi,RSI movq 14*8(%rsp),%rsi /* load rax from rdi slot */ + CFI_REGISTER rax,rsi movq %rdx,12*8(%rsp) CFI_REL_OFFSET rdx,RDX movq %rcx,11*8(%rsp) @@ -839,6 +849,7 @@ ENTRY(error_entry) #endif error_call_handler: movq %rdi, RDI(%rsp) + CFI_REL_OFFSET rdi,RDI movq %rsp,%rdi movq ORIG_RAX(%rsp),%rsi # get error code movq $-1,ORIG_RAX(%rsp) @@ -1186,7 +1197,7 @@ END(int3) ENTRY(overflow) zeroentry do_overflow -END(debug) +END(overflow) ENTRY(bounds) zeroentry do_bounds diff -r 9ec7dadc98ba -r eb3e430242ac linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S Fri Apr 06 10:06:30 2007 -0600 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S Fri Apr 06 10:08:30 2007 -0600 @@ -33,9 +33,6 @@ startup_64: startup_64: ENTRY(_start) movq $(init_thread_union+THREAD_SIZE-8),%rsp - /* zero EFLAGS after setting rsp */ - pushq $0 - popfq /* rsi is pointer to startup info structure. pass it to C */ @@ -155,7 +152,7 @@ ENTRY(empty_zero_page) ENTRY(empty_zero_page) .skip PAGE_SIZE -#ifdef CONFIG_XEN_COMPAT_030002 +#if CONFIG_XEN_COMPAT <= 0x030002 /* * __xen_guest information */ @@ -187,17 +184,17 @@ ENTRY(empty_zero_page) .ascii "|supervisor_mode_kernel" .ascii ",LOADER=generic" .byte 0 -#endif /* CONFIG_XEN_COMPAT_030002 */ +#endif /* CONFIG_XEN_COMPAT <= 0x030002 */ ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz, "linux") ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz, "2.6") ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz, "xen-3.0") ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .quad, __START_KERNEL_map) -#ifdef CONFIG_XEN_COMPAT_030002 +#if CONFIG_XEN_COMPAT <= 0x030002 ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .quad, __START_KERNEL_map) #else ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .quad, 0) -#endif /* !CONFIG_XEN_COMPAT_030002 */ +#endif ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .quad, startup_64) ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .quad, hypercall_page) ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .quad, _PAGE_PRESENT,_PAGE_PRESENT) diff -r 9ec7dadc98ba -r eb3e430242ac linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c Fri Apr 06 10:06:30 2007 -0600 +++ b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c Fri Apr 06 10:08:30 2007 -0600 @@ -54,7 +54,7 @@ struct dma_mapping_ops* dma_ops; struct dma_mapping_ops* dma_ops; EXPORT_SYMBOL(dma_ops); -#ifdef CONFIG_XEN_COMPAT_030002 +#if CONFIG_XEN_COMPAT <= 0x030002 unsigned int __kernel_page_user; EXPORT_SYMBOL(__kernel_page_user); #endif @@ -551,7 +551,7 @@ void __init xen_init_pt(void) addr = page[pud_index(__START_KERNEL_map)]; addr_to_page(addr, page); -#ifdef CONFIG_XEN_COMPAT_030002 +#if CONFIG_XEN_COMPAT <= 0x030002 /* On Xen 3.0.2 and older we may need to explicitly specify _PAGE_USER in kernel PTEs. We check that here. */ if (HYPERVISOR_xen_version(XENVER_version, NULL) <= 0x30000) { diff -r 9ec7dadc98ba -r eb3e430242ac linux-2.6-xen-sparse/drivers/xen/Kconfig --- a/linux-2.6-xen-sparse/drivers/xen/Kconfig Fri Apr 06 10:06:30 2007 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/Kconfig Fri Apr 06 10:08:30 2007 -0600 @@ -235,13 +235,12 @@ choice endchoice -config XEN_COMPAT_030002 - bool - default XEN_COMPAT_030002_AND_LATER - -config XEN_COMPAT_030004 - bool - default XEN_COMPAT_030002_AND_LATER || XEN_COMPAT_030004_AND_LATER +config XEN_COMPAT + hex + default 0xffffff if XEN_COMPAT_LATEST_ONLY + default 0x030004 if XEN_COMPAT_030004_AND_LATER + default 0x030002 if XEN_COMPAT_030002_AND_LATER + default 0 endmenu diff -r 9ec7dadc98ba -r eb3e430242ac linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c --- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Fri Apr 06 10:06:30 2007 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Fri Apr 06 10:08:30 2007 -0600 @@ -48,13 +48,20 @@ #include <asm/hypervisor.h> #include <xen/balloon.h> #include <xen/interface/memory.h> +#include <asm/maddr.h> +#include <asm/page.h> #include <asm/pgalloc.h> #include <asm/pgtable.h> #include <asm/uaccess.h> #include <asm/tlb.h> +#include <linux/highmem.h> #include <linux/list.h> #include <xen/xenbus.h> #include "common.h" + +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include <xen/platform-compat.h> +#endif #ifdef CONFIG_PROC_FS static struct proc_dir_entry *balloon_pde; @@ -217,6 +224,7 @@ static int increase_reservation(unsigned set_phys_to_machine(pfn, frame_list[i]); +#ifdef CONFIG_XEN /* Link back into the page tables if not highmem. */ if (pfn < max_low_pfn) { int ret; @@ -226,6 +234,7 @@ static int increase_reservation(unsigned 0); BUG_ON(ret); } +#endif /* Relinquish the page back to the allocator. */ ClearPageReserved(page); @@ -271,9 +280,11 @@ static int decrease_reservation(unsigned if (!PageHighMem(page)) { v = phys_to_virt(pfn << PAGE_SHIFT); scrub_pages(v, 1); +#ifdef CONFIG_XEN ret = HYPERVISOR_update_va_mapping( (unsigned long)v, __pte_ma(0), 0); BUG_ON(ret); +#endif } #ifdef CONFIG_XEN_SCRUB_PAGES else { @@ -284,9 +295,11 @@ static int decrease_reservation(unsigned #endif } +#ifdef CONFIG_XEN /* Ensure that ballooned highmem pages don't have kmaps. */ kmap_flush_unused(); flush_tlb_all(); +#endif balloon_lock(flags); @@ -446,7 +459,7 @@ static struct notifier_block xenstore_no static int __init balloon_init(void) { -#ifdef CONFIG_X86 +#if defined(CONFIG_X86) && defined(CONFIG_XEN) unsigned long pfn; struct page *page; #endif @@ -456,8 +469,12 @@ static int __init balloon_init(void) IPRINTK("Initialising balloon driver.\n"); +#ifdef CONFIG_XEN bs.current_pages = min(xen_start_info->nr_pages, max_pfn); totalram_pages = bs.current_pages; +#else + bs.current_pages = totalram_pages; +#endif bs.target_pages = bs.current_pages; bs.balloon_low = 0; bs.balloon_high = 0; @@ -479,7 +496,7 @@ static int __init balloon_init(void) #endif balloon_sysfs_init(); -#ifdef CONFIG_X86 +#if defined(CONFIG_X86) && defined(CONFIG_XEN) /* Initialise the balloon with excess memory space. */ for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { page = pfn_to_page(pfn); @@ -498,6 +515,14 @@ static int __init balloon_init(void) subsys_initcall(balloon_init); +static void balloon_exit(void) +{ + /* XXX - release balloon here */ + return; +} + +module_exit(balloon_exit); + void balloon_update_driver_allowance(long delta) { unsigned long flags; @@ -507,6 +532,7 @@ void balloon_update_driver_allowance(lon balloon_unlock(flags); } +#ifdef CONFIG_XEN static int dealloc_pte_fn( pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) { @@ -524,6 +550,7 @@ static int dealloc_pte_fn( BUG_ON(ret != 1); return 0; } +#endif struct page **alloc_empty_pages_and_pagevec(int nr_pages) { @@ -559,8 +586,13 @@ struct page **alloc_empty_pages_and_page if (ret == 1) ret = 0; /* success */ } else { +#ifdef CONFIG_XEN ret = apply_to_page_range(&init_mm, vaddr, PAGE_SIZE, dealloc_pte_fn, NULL); +#else + /* Cannot handle non-auto translate mode. */ + ret = 1; +#endif } if (ret != 0) { @@ -576,7 +608,9 @@ struct page **alloc_empty_pages_and_page out: schedule_work(&balloon_worker); +#ifdef CONFIG_XEN flush_tlb_all(); +#endif return pagevec; err: diff -r 9ec7dadc98ba -r eb3e430242ac linux-2.6-xen-sparse/drivers/xen/balloon/sysfs.c --- a/linux-2.6-xen-sparse/drivers/xen/balloon/sysfs.c Fri Apr 06 10:06:30 2007 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/balloon/sysfs.c Fri Apr 06 10:08:30 2007 -0600 @@ -29,9 +29,15 @@ */ #include <linux/capability.h> +#include <linux/errno.h> #include <linux/stat.h> +#include <linux/string.h> #include <linux/sysdev.h> #include "common.h" + +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include <xen/platform-compat.h> +#endif #define BALLOON_CLASS_NAME "memory" diff -r 9ec7dadc98ba -r eb3e430242ac linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Fri Apr 06 10:06:30 2007 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Fri Apr 06 10:08:30 2007 -0600 @@ -661,9 +661,10 @@ void do_blkif_request(request_queue_t *r if (RING_FULL(&info->ring)) goto wait; - DPRINTK("do_blk_req %p: cmd %p, sec %lx, " + DPRINTK("do_blk_req %p: cmd %p, sec %llx, " "(%u/%li) buffer:%p [%s]\n", - req, req->cmd, req->sector, req->current_nr_sectors, + req, req->cmd, (long long)req->sector, + req->current_nr_sectors, req->nr_sectors, req->buffer, rq_data_dir(req) ? "write" : "read"); diff -r 9ec7dadc98ba -r eb3e430242ac linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c --- a/linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c Fri Apr 06 10:06:30 2007 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c Fri Apr 06 10:08:30 2007 -0600 @@ -7,7 +7,6 @@ #include <xen/interface/kexec.h> #include <linux/mm.h> #include <linux/bootmem.h> -#include <asm/hypercall.h> extern void machine_kexec_setup_load_arg(xen_kexec_image_t *xki, struct kimage *image); diff -r 9ec7dadc98ba -r eb3e430242ac linux-2.6-xen-sparse/drivers/xen/core/reboot.c --- a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c Fri Apr 06 10:06:30 2007 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c Fri Apr 06 10:08:30 2007 -0600 @@ -8,6 +8,10 @@ #include <asm/hypervisor.h> #include <xen/xenbus.h> #include <linux/kthread.h> + +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include <xen/platform-compat.h> +#endif MODULE_LICENSE("Dual BSD/GPL"); diff -r 9ec7dadc98ba -r eb3e430242ac linux-2.6-xen-sparse/drivers/xen/core/xen_proc.c --- a/linux-2.6-xen-sparse/drivers/xen/core/xen_proc.c Fri Apr 06 10:06:30 2007 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/core/xen_proc.c Fri Apr 06 10:08:30 2007 -0600 @@ -1,4 +1,5 @@ +#include <linux/module.h> #include <linux/proc_fs.h> #include <xen/xen_proc.h> @@ -12,7 +13,11 @@ struct proc_dir_entry *create_xen_proc_e return create_proc_entry(name, mode, xen_base); } +EXPORT_SYMBOL_GPL(create_xen_proc_entry); + void remove_xen_proc_entry(const char *name) { remove_proc_entry(name, xen_base); } + +EXPORT_SYMBOL_GPL(remove_xen_proc_entry); diff -r 9ec7dadc98ba -r eb3e430242ac linux-2.6-xen-sparse/drivers/xen/fbfront/xenfb.c --- a/linux-2.6-xen-sparse/drivers/xen/fbfront/xenfb.c Fri Apr 06 10:06:30 2007 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/fbfront/xenfb.c Fri Apr 06 10:08:30 2007 -0600 @@ -24,6 +24,7 @@ #include <linux/module.h> #include <linux/vmalloc.h> #include <linux/mm.h> +#include <linux/mutex.h> #include <asm/hypervisor.h> #include <xen/evtchn.h> #include <xen/interface/io/fbif.h> diff -r 9ec7dadc98ba -r eb3e430242ac linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Fri Apr 06 10:06:30 2007 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Fri Apr 06 10:08:30 2007 -0600 @@ -1511,7 +1511,7 @@ static void netif_release_rx_bufs(struct struct sk_buff *skb; unsigned long mfn; int xfer = 0, noxfer = 0, unused = 0; - int id, ref; + int id, ref, rc; if (np->copying_receiver) { WPRINTK("%s: fix me for copying receiver.\n", __FUNCTION__); @@ -1579,7 +1579,9 @@ static void netif_release_rx_bufs(struct mcl->args[2] = 0; mcl->args[3] = DOMID_SELF; mcl++; - HYPERVISOR_multicall(np->rx_mcl, mcl - np->rx_mcl); + rc = HYPERVISOR_multicall_check( + np->rx_mcl, mcl - np->rx_mcl, NULL); + BUG_ON(rc); } } diff -r 9ec7dadc98ba -r eb3e430242ac linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h Fri Apr 06 10:06:30 2007 -0600 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h Fri Apr 06 10:08:30 2007 -0600 @@ -238,7 +238,7 @@ HYPERVISOR_memory_op( static inline int HYPERVISOR_multicall( - void *call_list, int nr_calls) + multicall_entry_t *call_list, int nr_calls) { return _hypercall2(int, multicall, call_list, nr_calls); } @@ -261,7 +261,7 @@ HYPERVISOR_event_channel_op( { int rc = _hypercall2(int, event_channel_op, cmd, arg); -#ifdef CONFIG_XEN_COMPAT_030002 +#if CONFIG_XEN_COMPAT <= 0x030002 if (unlikely(rc == -ENOSYS)) { struct evtchn_op op; op.cmd = cmd; @@ -301,7 +301,7 @@ HYPERVISOR_physdev_op( { int rc = _hypercall2(int, physdev_op, cmd, arg); -#ifdef CONFIG_XEN_COMPAT_030002 +#if CONFIG_XEN_COMPAT <= 0x030002 if (unlikely(rc == -ENOSYS)) { struct physdev_op op; op.cmd = cmd; @@ -358,7 +358,7 @@ HYPERVISOR_suspend( int rc = _hypercall3(int, sched_op, SCHEDOP_shutdown, &sched_shutdown, srec); -#ifdef CONFIG_XEN_COMPAT_030002 +#if CONFIG_XEN_COMPAT <= 0x030002 if (rc == -ENOSYS) rc = _hypercall3(int, sched_op_compat, SCHEDOP_shutdown, SHUTDOWN_suspend, srec); diff -r 9ec7dadc98ba -r eb3e430242ac linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h Fri Apr 06 10:06:30 2007 -0600 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h Fri Apr 06 10:08:30 2007 -0600 @@ -122,7 +122,13 @@ void xen_destroy_contiguous_region( /* Turn jiffies into Xen system time. */ u64 jiffies_to_st(unsigned long jiffies); -#include <asm/hypercall.h> +#ifdef CONFIG_XEN_SCRUB_PAGES +#define scrub_pages(_p,_n) memset((void *)(_p), 0, (_n) << PAGE_SHIFT) +#else +#define scrub_pages(_p,_n) ((void)0) +#endif + +#include <xen/hypercall.h> #if defined(CONFIG_X86_64) #define MULTI_UVMFLAGS_INDEX 2 @@ -140,7 +146,7 @@ HYPERVISOR_yield( { int rc = HYPERVISOR_sched_op(SCHEDOP_yield, NULL); -#ifdef CONFIG_XEN_COMPAT_030002 +#if CONFIG_XEN_COMPAT <= 0x030002 if (rc == -ENOSYS) rc = HYPERVISOR_sched_op_compat(SCHEDOP_yield, 0); #endif @@ -154,7 +160,7 @@ HYPERVISOR_block( { int rc = HYPERVISOR_sched_op(SCHEDOP_block, NULL); -#ifdef CONFIG_XEN_COMPAT_030002 +#if CONFIG_XEN_COMPAT <= 0x030002 if (rc == -ENOSYS) rc = HYPERVISOR_sched_op_compat(SCHEDOP_block, 0); #endif @@ -172,7 +178,7 @@ HYPERVISOR_shutdown( int rc = HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown); -#ifdef CONFIG_XEN_COMPAT_030002 +#if CONFIG_XEN_COMPAT <= 0x030002 if (rc == -ENOSYS) rc = HYPERVISOR_sched_op_compat(SCHEDOP_shutdown, reason); #endif @@ -192,7 +198,7 @@ HYPERVISOR_poll( set_xen_guest_handle(sched_poll.ports, ports); rc = HYPERVISOR_sched_op(SCHEDOP_poll, &sched_poll); -#ifdef CONFIG_XEN_COMPAT_030002 +#if CONFIG_XEN_COMPAT <= 0x030002 if (rc == -ENOSYS) rc = HYPERVISOR_sched_op_compat(SCHEDOP_yield, 0); #endif diff -r 9ec7dadc98ba -r eb3e430242ac linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/maddr.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/maddr.h Fri Apr 06 10:06:30 2007 -0600 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/maddr.h Fri Apr 06 10:08:30 2007 -0600 @@ -158,7 +158,7 @@ static inline paddr_t pte_machine_to_phy #define pfn_to_mfn(pfn) (pfn) #define mfn_to_pfn(mfn) (mfn) #define mfn_to_local_pfn(mfn) (mfn) -#define set_phys_to_machine(pfn, mfn) BUG_ON((pfn) != (mfn)) +#define set_phys_to_machine(pfn, mfn) ((void)0) #define phys_to_machine_mapping_valid(pfn) (1) #define phys_to_machine(phys) ((maddr_t)(phys)) #define machine_to_phys(mach) ((paddr_t)(mach)) diff -r 9ec7dadc98ba -r eb3e430242ac linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h Fri Apr 06 10:06:30 2007 -0600 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h Fri Apr 06 10:08:30 2007 -0600 @@ -44,12 +44,6 @@ foreign; \ }) #define HAVE_ARCH_FREE_PAGE - -#ifdef CONFIG_XEN_SCRUB_PAGES -#define scrub_pages(_p,_n) memset((void *)(_p), 0, (_n) << PAGE_SHIFT) -#else -#define scrub_pages(_p,_n) ((void)0) -#endif #ifdef CONFIG_X86_USE_3DNOW @@ -108,7 +102,7 @@ static inline unsigned long long pmd_val static inline unsigned long long pmd_val(pmd_t x) { unsigned long long ret = x.pmd; -#ifdef CONFIG_XEN_COMPAT_030002 +#if CONFIG_XEN_COMPAT <= 0x030002 if (ret) ret = pte_machine_to_phys(ret) | _PAGE_PRESENT; #else if (ret & _PAGE_PRESENT) ret = pte_machine_to_phys(ret); @@ -140,7 +134,7 @@ static inline unsigned long pgd_val(pgd_ static inline unsigned long pgd_val(pgd_t x) { unsigned long ret = x.pgd; -#ifdef CONFIG_XEN_COMPAT_030002 +#if CONFIG_XEN_COMPAT <= 0x030002 if (ret) ret = machine_to_phys(ret) | _PAGE_PRESENT; #else if (ret & _PAGE_PRESENT) ret = machine_to_phys(ret); @@ -203,10 +197,10 @@ extern int page_is_ram(unsigned long pag #endif #define __KERNEL_START (__PAGE_OFFSET + __PHYSICAL_START) -#ifdef CONFIG_XEN_COMPAT_030002 +#if CONFIG_XEN_COMPAT <= 0x030002 #undef LOAD_OFFSET #define LOAD_OFFSET 0 -#endif /* CONFIG_XEN_COMPAT_030002 */ +#endif #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) #define VMALLOC_RESERVE ((unsigned long)__VMALLOC_RESERVE) diff -r 9ec7dadc98ba -r eb3e430242ac linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h Fri Apr 06 10:06:30 2007 -0600 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h Fri Apr 06 10:08:30 2007 -0600 @@ -36,8 +36,37 @@ #define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0) #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) -#define ptep_get_and_clear(mm,addr,xp) __pte_ma(xchg(&(xp)->pte_low, 0)) +#define pte_none(x) (!(x).pte_low) + +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + pte_t pte = *ptep; + if (!pte_none(pte)) { + if (mm != &init_mm) + pte = __pte_ma(xchg(&ptep->pte_low, 0)); + else + HYPERVISOR_update_va_mapping(addr, __pte(0), 0); + } + return pte; +} + +#define ptep_clear_flush(vma, addr, ptep) \ +({ \ + pte_t *__ptep = (ptep); \ + pte_t __res = *__ptep; \ + if (!pte_none(__res) && \ + ((vma)->vm_mm != current->mm || \ + HYPERVISOR_update_va_mapping(addr, __pte(0), \ + (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \ + UVMF_INVLPG|UVMF_MULTI))) { \ + __ptep->pte_low = 0; \ + flush_tlb_page(vma, addr); \ + } \ + __res; \ +}) + #define pte_same(a, b) ((a).pte_low == (b).pte_low) + #define __pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT) #define pte_mfn(_pte) ((_pte).pte_low & _PAGE_PRESENT ? \ __pte_mfn(_pte) : pfn_to_mfn(__pte_mfn(_pte))) @@ -46,7 +75,6 @@ #define pte_page(_pte) pfn_to_page(pte_pfn(_pte)) -#define pte_none(x) (!(x).pte_low) #define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) #define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) diff -r 9ec7dadc98ba -r eb3e430242ac linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h Fri Apr 06 10:06:30 2007 -0600 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h Fri Apr 06 10:08:30 2007 -0600 @@ -99,6 +99,11 @@ static inline void pud_clear (pud_t * pu #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \ pmd_index(address)) +static inline int pte_none(pte_t pte) +{ + return !(pte.pte_low | pte.pte_high); +} + /* * For PTEs and PDEs, we must clear the P-bit first when clearing a page table * entry, so clear the bottom half first and enforce ordering with a compiler @@ -106,24 +111,50 @@ static inline void pud_clear (pud_t * pu */ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - ptep->pte_low = 0; - smp_wmb(); - ptep->pte_high = 0; + if ((mm != current->mm && mm != &init_mm) + || HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) { + ptep->pte_low = 0; + smp_wmb(); + ptep->pte_high = 0; + } } #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - pte_t res; - - /* xchg acts as a barrier before the setting of the high bits */ - res.pte_low = xchg(&ptep->pte_low, 0); - res.pte_high = ptep->pte_high; - ptep->pte_high = 0; - - return res; -} + pte_t pte = *ptep; + if (!pte_none(pte)) { + if (mm != &init_mm) { + uint64_t val = pte_val_ma(pte); + if (__cmpxchg64(ptep, val, 0) != val) { + /* xchg acts as a barrier before the setting of the high bits */ + pte.pte_low = xchg(&ptep->pte_low, 0); + pte.pte_high = ptep->pte_high; + ptep->pte_high = 0; + } + } else + HYPERVISOR_update_va_mapping(addr, __pte(0), 0); + } + return pte; +} + +#define ptep_clear_flush(vma, addr, ptep) \ +({ \ + pte_t *__ptep = (ptep); \ + pte_t __res = *__ptep; \ + if (!pte_none(__res) && \ + ((vma)->vm_mm != current->mm || \ + HYPERVISOR_update_va_mapping(addr, __pte(0), \ + (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \ + UVMF_INVLPG|UVMF_MULTI))) { \ + __ptep->pte_low = 0; \ + smp_wmb(); \ + __ptep->pte_high = 0; \ + flush_tlb_page(vma, addr); \ + } \ + __res; \ +}) static inline int pte_same(pte_t a, pte_t b) { @@ -131,11 +162,6 @@ static inline int pte_same(pte_t a, pte_ } #define pte_page(x) pfn_to_page(pte_pfn(x)) - -static inline int pte_none(pte_t pte) -{ - return !pte.pte_low && !pte.pte_high; -} #define __pte_mfn(_pte) (((_pte).pte_low >> PAGE_SHIFT) | \ ((_pte).pte_high << (32-PAGE_SHIFT))) diff -r 9ec7dadc98ba -r eb3e430242ac linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h Fri Apr 06 10:06:30 2007 -0600 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h Fri Apr 06 10:08:30 2007 -0600 @@ -210,9 +210,13 @@ extern unsigned long pg0[]; /* To avoid harmful races, pmd_none(x) should check only the lower when PAE */ #define pmd_none(x) (!(unsigned long)pmd_val(x)) +#ifdef CONFIG_XEN_COMPAT_030002 /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t. can temporarily clear it. */ #define pmd_present(x) (pmd_val(x)) +#else +#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) +#endif #define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT)) @@ -252,36 +256,47 @@ static inline pte_t pte_mkhuge(pte_t pte # include <asm/pgtable-2level.h> #endif -static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) -{ - if (!pte_dirty(*ptep)) - return 0; - return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte_low); -} - -static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) -{ - if (!pte_young(*ptep)) - return 0; - return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte_low); -} - -static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full) -{ - pte_t pte; - if (full) { - pte = *ptep; - pte_clear(mm, addr, ptep); - } else { - pte = ptep_get_and_clear(mm, addr, ptep); - } - return pte; -} +#define ptep_test_and_clear_dirty(vma, addr, ptep) \ +({ \ + pte_t __pte = *(ptep); \ + int __ret = pte_dirty(__pte); \ + if (__ret) { \ + __pte = pte_mkclean(__pte); \ + if ((vma)->vm_mm != current->mm || \ + HYPERVISOR_update_va_mapping(addr, __pte, 0)) \ + (ptep)->pte_low = __pte.pte_low; \ + } \ + __ret; \ +}) + +#define ptep_test_and_clear_young(vma, addr, ptep) \ +({ \ + pte_t __pte = *(ptep); \ + int __ret = pte_young(__pte); \ + if (__ret) \ + __pte = pte_mkold(__pte); \ + if ((vma)->vm_mm != current->mm || \ + HYPERVISOR_update_va_mapping(addr, __pte, 0)) \ + (ptep)->pte_low = __pte.pte_low; \ + __ret; \ +}) + +#define ptep_get_and_clear_full(mm, addr, ptep, full) \ + ((full) ? ({ \ + pte_t __res = *(ptep); \ + if (test_bit(PG_pinned, &virt_to_page((mm)->pgd)->flags)) \ + xen_l1_entry_update(ptep, __pte(0)); \ + else \ + *(ptep) = __pte(0); \ + __res; \ + }) : \ + ptep_get_and_clear(mm, addr, ptep)) static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - if (pte_write(*ptep)) - clear_bit(_PAGE_BIT_RW, &ptep->pte_low); + pte_t pte = *ptep; + if (pte_write(pte)) + set_pte_at(mm, addr, ptep, pte_wrprotect(pte)); } /* @@ -418,6 +433,20 @@ extern void noexec_setup(const char *str #define pte_unmap_nested(pte) do { } while (0) #endif +#define __HAVE_ARCH_PTEP_ESTABLISH +#define ptep_establish(vma, address, ptep, pteval) \ + do { \ + if ( likely((vma)->vm_mm == current->mm) ) { \ + BUG_ON(HYPERVISOR_update_va_mapping(address, \ + pteval, \ + (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \ + UVMF_INVLPG|UVMF_MULTI)); \ + } else { \ + xen_l1_entry_update(ptep, pteval); \ + flush_tlb_page(vma, address); \ + } \ + } while (0) + /* * The i386 doesn't have any external MMU info: the kernel page * tables contain all the necessary information. @@ -430,26 +459,11 @@ extern void noexec_setup(const char *str */ #define update_mmu_cache(vma,address,pte) do { } while (0) #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS -#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \ - do { \ - if (__dirty) { \ - if ( likely((__vma)->vm_mm == current->mm) ) { \ - BUG_ON(HYPERVISOR_update_va_mapping(__address, \ - __entry, \ - (unsigned long)(__vma)->vm_mm->cpu_vm_mask.bits| \ - UVMF_INVLPG|UVMF_MULTI)); \ - } else { \ - xen_l1_entry_update(__ptep, __entry); \ - flush_tlb_page(__vma, __address); \ - } \ - } \ +#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \ + do { \ + if (dirty) \ + ptep_establish(vma, address, ptep, entry); \ } while (0) - -#define __HAVE_ARCH_PTEP_ESTABLISH -#define ptep_establish(__vma, __address, __ptep, __entry) \ -do { \ - ptep_set_access_flags(__vma, __address, __ptep, __entry, 1); \ -} while (0) #include <xen/features.h> void make_lowmem_page_readonly(void *va, unsigned int feature); @@ -508,6 +522,7 @@ direct_remap_pfn_range(vma,from,pfn,size #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY #define __HAVE_ARCH_PTEP_GET_AND_CLEAR #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL +#define __HAVE_ARCH_PTEP_CLEAR_FLUSH #define __HAVE_ARCH_PTEP_SET_WRPROTECT #define __HAVE_ARCH_PTE_SAME #include <asm-generic/pgtable.h> diff -r 9ec7dadc98ba -r eb3e430242ac linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h --- a/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h Fri Apr 06 10:06:30 2007 -0600 +++ b/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h Fri Apr 06 10:08:30 2007 -0600 @@ -55,7 +55,7 @@ extern int running_on_xen; #include <xen/interface/event_channel.h> #include <xen/interface/physdev.h> #include <xen/interface/sched.h> -#include <asm/hypercall.h> +#include <xen/hypercall.h> #include <asm/ptrace.h> #include <asm/page.h> diff -r 9ec7dadc98ba -r eb3e430242ac linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h Fri Apr 06 10:06:30 2007 -0600 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h Fri Apr 06 10:08:30 2007 -0600 @@ -241,7 +241,7 @@ HYPERVISOR_memory_op( static inline int HYPERVISOR_multicall( - void *call_list, int nr_calls) + multicall_entry_t *call_list, int nr_calls) { return _hypercall2(int, multicall, call_list, nr_calls); } @@ -259,7 +259,7 @@ HYPERVISOR_event_channel_op( { int rc = _hypercall2(int, event_channel_op, cmd, arg); -#ifdef CONFIG_XEN_COMPAT_030002 +#if CONFIG_XEN_COMPAT <= 0x030002 if (unlikely(rc == -ENOSYS)) { struct evtchn_op op; op.cmd = cmd; @@ -299,7 +299,7 @@ HYPERVISOR_physdev_op( { int rc = _hypercall2(int, physdev_op, cmd, arg); -#ifdef CONFIG_XEN_COMPAT_030002 +#if CONFIG_XEN_COMPAT <= 0x030002 if (unlikely(rc == -ENOSYS)) { struct physdev_op op; op.cmd = cmd; @@ -359,7 +359,7 @@ HYPERVISOR_suspend( int rc = _hypercall3(int, sched_op, SCHEDOP_shutdown, &sched_shutdown, srec); -#ifdef CONFIG_XEN_COMPAT_030002 +#if CONFIG_XEN_COMPAT <= 0x030002 if (rc == -ENOSYS) rc = _hypercall3(int, sched_op_compat, SCHEDOP_shutdown, SHUTDOWN_suspend, srec); diff -r 9ec7dadc98ba -r eb3e430242ac linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/maddr.h --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/maddr.h Fri Apr 06 10:06:30 2007 -0600 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/maddr.h Fri Apr 06 10:08:30 2007 -0600 @@ -140,7 +140,7 @@ static inline paddr_t pte_machine_to_phy #define pfn_to_mfn(pfn) (pfn) #define mfn_to_pfn(mfn) (mfn) #define mfn_to_local_pfn(mfn) (mfn) -#define set_phys_to_machine(pfn, mfn) BUG_ON((pfn) != (mfn)) +#define set_phys_to_machine(pfn, mfn) ((void)0) #define phys_to_machine_mapping_valid(pfn) (1) #define phys_to_machine(phys) ((maddr_t)(phys)) #define machine_to_phys(mach) ((paddr_t)(mach)) diff -r 9ec7dadc98ba -r eb3e430242ac linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h Fri Apr 06 10:06:30 2007 -0600 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h Fri Apr 06 10:08:30 2007 -0600 @@ -23,12 +23,6 @@ foreign; \ }) #define HAVE_ARCH_FREE_PAGE - -#ifdef CONFIG_XEN_SCRUB_PAGES -#define scrub_pages(_p,_n) memset((void *)(_p), 0, (_n) << PAGE_SHIFT) -#else -#define scrub_pages(_p,_n) ((void)0) -#endif /* PAGE_SHIFT determines the page size */ #define PAGE_SHIFT 12 @@ -110,7 +104,7 @@ static inline unsigned long pmd_val(pmd_ static inline unsigned long pmd_val(pmd_t x) { unsigned long ret = x.pmd; -#ifdef CONFIG_XEN_COMPAT_030002 +#if CONFIG_XEN_COMPAT <= 0x030002 if (ret) ret = pte_machine_to_phys(ret) | _PAGE_PRESENT; #else if (ret & _PAGE_PRESENT) ret = pte_machine_to_phys(ret); @@ -172,10 +166,10 @@ static inline pgd_t __pgd(unsigned long #define __PAGE_OFFSET 0xffff880000000000 #endif /* !__ASSEMBLY__ */ -#ifdef CONFIG_XEN_COMPAT_030002 +#if CONFIG_XEN_COMPAT <= 0x030002 #undef LOAD_OFFSET #define LOAD_OFFSET 0 -#endif /* CONFIG_XEN_COMPAT_030002 */ +#endif /* to align the pointer to the (next) page boundary */ #define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) diff -r 9ec7dadc98ba -r eb3e430242ac linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h Fri Apr 06 10:06:30 2007 -0600 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h Fri Apr 06 10:08:30 2007 -0600 @@ -93,11 +93,6 @@ extern unsigned long empty_zero_page[PAG #define pgd_none(x) (!pgd_val(x)) #define pud_none(x) (!pud_val(x)) -#define set_pte_batched(pteptr, pteval) \ - queue_l1_entry_update(pteptr, (pteval)) - -extern inline int pud_present(pud_t pud) { return !pud_none(pud); } - static inline void set_pte(pte_t *dst, pte_t val) { *dst = val; @@ -122,41 +117,6 @@ static inline void pgd_clear (pgd_t * pg #define pud_page(pud) \ ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK)) - -/* - * A note on implementation of this atomic 'get-and-clear' operation. - * This is actually very simple because Xen Linux can only run on a single - * processor. Therefore, we cannot race other processors setting the 'accessed' - * or 'dirty' bits on a page-table entry. - * Even if pages are shared between domains, that is not a problem because - * each domain will have separate page tables, with their own versions of - * accessed & dirty state. - */ -#define ptep_get_and_clear(mm,addr,xp) __pte_ma(xchg(&(xp)->pte, 0)) - -#if 0 -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *xp) -{ - pte_t pte = *xp; - if (pte.pte) - set_pte(xp, __pte_ma(0)); - return pte; -} -#endif - -struct mm_struct; - -static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full) -{ - pte_t pte; - if (full) { - pte = *ptep; - *ptep = __pte(0); - } else { - pte = ptep_get_and_clear(mm, addr, ptep); - } - return pte; -} #define pte_same(a, b) ((a).pte == (b).pte) @@ -205,7 +165,7 @@ static inline pte_t ptep_get_and_clear_f #define _PAGE_PROTNONE 0x080 /* If not present */ #define _PAGE_NX (1UL<<_PAGE_BIT_NX) -#ifdef CONFIG_XEN_COMPAT_030002 +#if CONFIG_XEN_COMPAT <= 0x030002 extern unsigned int __kernel_page_user; #else #define __kernel_page_user 0 @@ -318,6 +278,46 @@ static inline pte_t pfn_pte(unsigned lon return __pte(pte); } +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + pte_t pte = *ptep; + if (!pte_none(pte)) { + if (mm != &init_mm) + pte = __pte_ma(xchg(&ptep->pte, 0)); + else + HYPERVISOR_update_va_mapping(addr, __pte(0), 0); + } + return pte; +} + +static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full) +{ + if (full) { + pte_t pte = *ptep; + if (mm->context.pinned) + xen_l1_entry_update(ptep, __pte(0)); + else + *ptep = __pte(0); + return pte; + } + return ptep_get_and_clear(mm, addr, ptep); +} + +#define ptep_clear_flush(vma, addr, ptep) \ +({ \ + pte_t *__ptep = (ptep); \ + pte_t __res = *__ptep; \ + if (!pte_none(__res) && \ + ((vma)->vm_mm != current->mm || \ + HYPERVISOR_update_va_mapping(addr, __pte(0), \ + (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \ + UVMF_INVLPG|UVMF_MULTI))) { \ + __ptep->pte = 0; \ + flush_tlb_page(vma, addr); \ + } \ + __res; \ +}) + /* * The following only work if pte_present() is true. * Undefined behaviour if not.. @@ -346,31 +346,29 @@ static inline pte_t pte_mkwrite(pte_t pt static inline pte_t pte_mkwrite(pte_t pte) { __pte_val(pte) |= _PAGE_RW; return pte; } static inline pte_t pte_mkhuge(pte_t pte) { __pte_val(pte) |= _PAGE_PSE; return pte; } -struct vm_area_struct; - -static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) -{ - pte_t pte = *ptep; - int ret = pte_dirty(pte); - if (ret) - set_pte(ptep, pte_mkclean(pte)); - return ret; -} - -static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) -{ - pte_t pte = *ptep; - int ret = pte_young(pte); - if (ret) - set_pte(ptep, pte_mkold(pte)); - return ret; -} +#define ptep_test_and_clear_dirty(vma, addr, ptep) \ +({ \ + pte_t __pte = *(ptep); \ + int __ret = pte_dirty(__pte); \ + if (__ret) \ + set_pte_at((vma)->vm_mm, addr, ptep, pte_mkclean(__pte)); \ + __ret; \ +}) + +#define ptep_test_and_clear_young(vma, addr, ptep) \ +({ \ + pte_t __pte = *(ptep); \ + int __ret = pte_young(__pte); \ + if (__ret) \ + set_pte_at((vma)->vm_mm, addr, ptep, pte_mkold(__pte)); \ + __ret; \ +}) static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_t pte = *ptep; if (pte_write(pte)) - set_pte(ptep, pte_wrprotect(pte)); + set_pte_at(mm, addr, ptep, pte_wrprotect(pte)); } /* @@ -403,6 +401,7 @@ static inline int pmd_large(pmd_t pte) { /* to find an entry in a page-table-directory. */ #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) #define pud_offset(pgd, address) ((pud_t *) pgd_page(*(pgd)) + pud_index(address)) +#define pud_present(pud) (pud_val(pud) & _PAGE_PRESENT) /* PMD - Level 2 access */ #define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK)) @@ -412,9 +411,13 @@ static inline int pmd_large(pmd_t pte) { #define pmd_offset(dir, address) ((pmd_t *) pud_page(*(dir)) + \ pmd_index(address)) #define pmd_none(x) (!pmd_val(x)) +#ifdef CONFIG_XEN_COMPAT_030002 /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t. can temporarily clear it. */ #define pmd_present(x) (pmd_val(x)) +#else +#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) +#endif #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) #define pmd_bad(x) ((pmd_val(x) & ~(PTE_MASK | _PAGE_USER | _PAGE_PRESENT)) \ != (_KERNPG_TABLE & ~(_PAGE_USER | _PAGE_PRESENT))) @@ -468,25 +471,34 @@ static inline pte_t pte_modify(pte_t pte #define update_mmu_cache(vma,address,pte) do { } while (0) +/* + * Rules for using ptep_establish: the pte MUST be a user pte, and + * must be a present->present transition. + */ +#define __HAVE_ARCH_PTEP_ESTABLISH +#define ptep_establish(vma, address, ptep, pteval) \ + do { \ + if ( likely((vma)->vm_mm == current->mm) ) { \ + BUG_ON(HYPERVISOR_update_va_mapping(address, \ + pteval, \ + (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \ + UVMF_INVLPG|UVMF_MULTI)); \ + } else { \ + xen_l1_entry_update(ptep, pteval); \ + flush_tlb_page(vma, address); \ + } \ + } while (0) + /* We only update the dirty/accessed state if we set * the dirty bit by hand in the kernel, since the hardware * will do the accessed bit for us, and we don't want to * race with other CPU's that might be updating the dirty * bit at the same time. */ #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS -#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \ - do { \ - if (__dirty) { \ - if ( likely((__vma)->vm_mm == current->mm) ) { \ - BUG_ON(HYPERVISOR_update_va_mapping(__address, \ - __entry, \ - (unsigned long)(__vma)->vm_mm->cpu_vm_mask.bits| \ - UVMF_INVLPG|UVMF_MULTI)); \ - } else { \ - xen_l1_entry_update(__ptep, __entry); \ - flush_tlb_page(__vma, __address); \ - } \ - } \ +#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \ + do { \ + if (dirty) \ + ptep_establish(vma, address, ptep, entry); \ } while (0) /* Encode and de-code a swap entry */ @@ -506,6 +518,8 @@ extern int kern_addr_valid(unsigned long #define DOMID_LOCAL (0xFFFFU) +struct vm_area_struct; + int direct_remap_pfn_range(struct vm_area_struct *vma, unsigned long address, unsigned long mfn, @@ -551,6 +565,7 @@ int touch_pte_range(struct mm_struct *mm #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY #define __HAVE_ARCH_PTEP_GET_AND_CLEAR #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL +#define __HAVE_ARCH_PTEP_CLEAR_FLUSH #define __HAVE_ARCH_PTEP_SET_WRPROTECT #define __HAVE_ARCH_PTE_SAME #include <asm-generic/pgtable.h> diff -r 9ec7dadc98ba -r eb3e430242ac linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/setup_arch_post.h --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/setup_arch_post.h Fri Apr 06 10:06:30 2007 -0600 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/setup_arch_post.h Fri Apr 06 10:08:30 2007 -0600 @@ -39,7 +39,7 @@ static void __init machine_specific_arch ret = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe); if (ret == 0) ret = HYPERVISOR_callback_op(CALLBACKOP_register, &syscall); -#ifdef CONFIG_XEN_COMPAT_030002 +#if CONFIG_XEN_COMPAT <= 0x030002 if (ret == -ENOSYS) ret = HYPERVISOR_set_callbacks( event.address, @@ -50,7 +50,7 @@ static void __init machine_specific_arch #ifdef CONFIG_X86_LOCAL_APIC ret = HYPERVISOR_callback_op(CALLBACKOP_register, &nmi_cb); -#ifdef CONFIG_XEN_COMPAT_030002 +#if CONFIG_XEN_COMPAT <= 0x030002 if (ret == -ENOSYS) { static struct xennmi_callback __initdata cb = { .handler_address = (unsigned long)nmi diff -r 9ec7dadc98ba -r eb3e430242ac linux-2.6-xen-sparse/include/xen/hvm.h --- a/linux-2.6-xen-sparse/include/xen/hvm.h Fri Apr 06 10:06:30 2007 -0600 +++ b/linux-2.6-xen-sparse/include/xen/hvm.h Fri Apr 06 10:08:30 2007 -0600 @@ -3,7 +3,6 @@ #define XEN_HVM_H__ #include <xen/interface/hvm/params.h> -#include <asm/hypercall.h> static inline unsigned long hvm_get_parameter(int idx) { diff -r 9ec7dadc98ba -r eb3e430242ac linux-2.6-xen-sparse/include/xen/hypercall.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/linux-2.6-xen-sparse/include/xen/hypercall.h Fri Apr 06 10:08:30 2007 -0600 @@ -0,0 +1,24 @@ +#ifndef __XEN_HYPERCALL_H__ +#define __XEN_HYPERCALL_H__ + +#include <asm/hypercall.h> + +static inline int +HYPERVISOR_multicall_check( + multicall_entry_t *call_list, int nr_calls, + const unsigned long *rc_list) +{ + int rc = HYPERVISOR_multicall(call_list, nr_calls); + + if (unlikely(rc < 0)) + return rc; + BUG_ON(rc); + + for ( ; nr_calls > 0; --nr_calls, ++call_list) + if (unlikely(call_list->result != (rc_list ? *rc_list++ : 0))) + return nr_calls; + + return 0; +} + +#endif /* __XEN_HYPERCALL_H__ */ diff -r 9ec7dadc98ba -r eb3e430242ac tools/ioemu/hw/xen_platform.c --- a/tools/ioemu/hw/xen_platform.c Fri Apr 06 10:06:30 2007 -0600 +++ b/tools/ioemu/hw/xen_platform.c Fri Apr 06 10:08:30 2007 -0600 @@ -29,16 +29,10 @@ extern FILE *logfile; -static void platform_ioport_write(void *opaque, uint32_t addr, uint32_t val) -{ - if (val == 0) - qemu_invalidate_map_cache(); -} - static void platform_ioport_map(PCIDevice *pci_dev, int region_num, uint32_t addr, uint32_t size, int type) { - register_ioport_write(addr, 1, 1, platform_ioport_write, NULL); + /* nothing yet */ } static uint32_t platform_mmio_read(void *opaque, target_phys_addr_t addr) diff -r 9ec7dadc98ba -r eb3e430242ac tools/ioemu/target-i386-dm/exec-dm.c --- a/tools/ioemu/target-i386-dm/exec-dm.c Fri Apr 06 10:06:30 2007 -0600 +++ b/tools/ioemu/target-i386-dm/exec-dm.c Fri Apr 06 10:08:30 2007 -0600 @@ -431,14 +431,8 @@ static inline int paddr_is_ram(target_ph { /* Is this guest physical address RAM-backed? */ #if defined(CONFIG_DM) && (defined(__i386__) || defined(__x86_64__)) - if (ram_size <= HVM_BELOW_4G_RAM_END) - /* RAM is contiguous */ - return (addr < ram_size); - else - /* There is RAM below and above the MMIO hole */ - return ((addr < HVM_BELOW_4G_MMIO_START) || - ((addr >= HVM_BELOW_4G_MMIO_START + HVM_BELOW_4G_MMIO_LENGTH) - && (addr < ram_size + HVM_BELOW_4G_MMIO_LENGTH))); + return ((addr < HVM_BELOW_4G_MMIO_START) || + (addr >= HVM_BELOW_4G_MMIO_START + HVM_BELOW_4G_MMIO_LENGTH)); #else return (addr < ram_size); #endif diff -r 9ec7dadc98ba -r eb3e430242ac tools/ioemu/target-i386-dm/helper2.c --- a/tools/ioemu/target-i386-dm/helper2.c Fri Apr 06 10:06:30 2007 -0600 +++ b/tools/ioemu/target-i386-dm/helper2.c Fri Apr 06 10:08:30 2007 -0600 @@ -135,9 +135,6 @@ void cpu_reset(CPUX86State *env) { int xcHandle; int sts; - - /* pause domain first, to avoid repeated reboot request*/ - xc_domain_pause(xc_handle, domid); xcHandle = xc_interface_open(); if (xcHandle < 0) @@ -509,8 +506,11 @@ void __handle_ioreq(CPUState *env, ioreq cpu_ioreq_xchg(env, req); break; case IOREQ_TYPE_TIMEOFFSET: - cpu_ioreq_timeoffset(env, req); - break; + cpu_ioreq_timeoffset(env, req); + break; + case IOREQ_TYPE_INVALIDATE: + qemu_invalidate_map_cache(); + break; default: hw_error("Invalid ioreq type 0x%x\n", req->type); } @@ -597,6 +597,7 @@ int main_loop(void) extern int suspend_requested; CPUState *env = cpu_single_env; int evtchn_fd = xc_evtchn_fd(xce_handle); + char qemu_file[20]; buffered_io_timer = qemu_new_timer(rt_clock, handle_buffered_io, cpu_single_env); @@ -604,52 +605,23 @@ int main_loop(void) qemu_set_fd_handler(evtchn_fd, cpu_handle_ioreq, NULL, env); - while (1) { - if (vm_running) { - if (shutdown_requested) - break; - if (reset_requested) { - qemu_system_reset(); - reset_requested = 0; - } - if (suspend_requested) { - fprintf(logfile, "device model received suspend signal!\n"); - break; - } - } - + while (!(vm_running && suspend_requested)) /* Wait up to 10 msec. */ main_loop_wait(10); - } - if (!suspend_requested) - destroy_hvm_domain(); - else { - char qemu_file[20]; - ioreq_t *req; - int rc; - - sprintf(qemu_file, "/tmp/xen.qemu-dm.%d", domid); - xc_domain_pause(xc_handle, domid); - - /* Pull all outstanding ioreqs through the system */ - handle_buffered_io(env); - main_loop_wait(1); /* For the select() on events */ - - /* Stop the IDE thread */ - ide_stop_dma_thread(); - - /* Make sure that all outstanding IO responses are handled too */ - if ( xc_hvm_drain_io(xc_handle, domid) != 0 ) - { - fprintf(stderr, "error clearing ioreq rings (%s)\n", - strerror(errno)); - return -1; - } - - /* Save the device state */ - if (qemu_savevm(qemu_file) < 0) - fprintf(stderr, "qemu save fail.\n"); - } + + fprintf(logfile, "device model received suspend signal!\n"); + + /* Pull all outstanding ioreqs through the system */ + handle_buffered_io(env); + main_loop_wait(1); /* For the select() on events */ + + /* Stop the IDE thread */ + ide_stop_dma_thread(); + + /* Save the device state */ + sprintf(qemu_file, "/tmp/xen.qemu-dm.%d", domid); + if (qemu_savevm(qemu_file) < 0) + fprintf(stderr, "qemu save fail.\n"); return 0; } diff -r 9ec7dadc98ba -r eb3e430242ac tools/ioemu/vl.c --- a/tools/ioemu/vl.c Fri Apr 06 10:06:30 2007 -0600 +++ b/tools/ioemu/vl.c Fri Apr 06 10:08:30 2007 -0600 @@ -88,6 +88,7 @@ #include "exec-all.h" +#include <xen/hvm/params.h> #define DEFAULT_NETWORK_SCRIPT "/etc/xen/qemu-ifup" #define DEFAULT_BRIDGE "xenbr0" @@ -5886,7 +5887,8 @@ int set_mm_mapping(int xc_handle, uint32 void suspend(int sig) { - fprintf(logfile, "suspend sig handler called with requested=%d!\n", suspend_requested); + fprintf(logfile, "suspend sig handler called with requested=%d!\n", + suspend_requested); if (sig != SIGUSR1) fprintf(logfile, "suspend signal dismatch, get sig=%d!\n", sig); suspend_requested = 1; @@ -5900,32 +5902,28 @@ static unsigned long last_address_index static unsigned long last_address_index = ~0UL; static uint8_t *last_address_vaddr; -static int qemu_map_cache_init(unsigned long nr_pages) -{ - unsigned long max_pages = MAX_MCACHE_SIZE >> PAGE_SHIFT; - int i; - - if (nr_pages < max_pages) - max_pages = nr_pages; - - nr_buckets = max_pages + (1UL << (MCACHE_BUCKET_SHIFT - PAGE_SHIFT)) - 1; - nr_buckets >>= (MCACHE_BUCKET_SHIFT - PAGE_SHIFT); +static int qemu_map_cache_init(void) +{ + unsigned long size; + + nr_buckets = (((MAX_MCACHE_SIZE >> PAGE_SHIFT) + + (1UL << (MCACHE_BUCKET_SHIFT - PAGE_SHIFT)) - 1) >> + (MCACHE_BUCKET_SHIFT - PAGE_SHIFT)); fprintf(logfile, "qemu_map_cache_init nr_buckets = %lx\n", nr_buckets); - mapcache_entry = malloc(nr_buckets * sizeof(struct map_cache)); - if (mapcache_entry == NULL) { + /* + * Use mmap() directly: lets us allocate a big hash table with no up-front + * cost in storage space. The OS will allocate memory only for the buckets + * that we actually use. All others will contain all zeroes. + */ + size = nr_buckets * sizeof(struct map_cache); + size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); + mapcache_entry = mmap(NULL, size, PROT_READ|PROT_WRITE, + MAP_SHARED|MAP_ANONYMOUS, 0, 0); + if (mapcache_entry == MAP_FAILED) { errno = ENOMEM; return -1; } - - memset(mapcache_entry, 0, nr_buckets * sizeof(struct map_cache)); - - /* - * To avoid ENOMEM from xc_map_foreign_batch() at runtime, we - * pre-fill all the map caches in advance. - */ - for (i = 0; i < nr_buckets; i++) - (void)qemu_map_cache(((target_phys_addr_t)i) << MCACHE_BUCKET_SHIFT); return 0; } @@ -6038,11 +6036,14 @@ int main(int argc, char **argv) QEMUMachine *machine; char usb_devices[MAX_USB_CMDLINE][128]; int usb_devices_index; - unsigned long nr_pages, tmp_nr_pages, shared_page_nr; - xen_pfn_t *page_array; + unsigned long ioreq_pfn; extern void *shared_page; extern void *buffered_io_page; +#ifdef __ia64__ + unsigned long nr_pages; + xen_pfn_t *page_array; extern void *buffered_pio_page; +#endif char qemu_dm_logfilename[64]; @@ -6592,47 +6593,36 @@ int main(int argc, char **argv) xc_handle = xc_interface_open(); - nr_pages = ram_size/PAGE_SIZE; - tmp_nr_pages = nr_pages; - #if defined(__i386__) || defined(__x86_64__) - if (ram_size > HVM_BELOW_4G_RAM_END) { - tmp_nr_pages += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT; - shared_page_nr = (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) - 1; - } else - shared_page_nr = nr_pages - 1; -#endif - -#if defined(__i386__) || defined(__x86_64__) - - if ( qemu_map_cache_init(tmp_nr_pages) ) - { + + if (qemu_map_cache_init()) { fprintf(logfile, "qemu_map_cache_init returned: error %d\n", errno); exit(-1); } + xc_get_hvm_param(xc_handle, domid, HVM_PARAM_IOREQ_PFN, &ioreq_pfn); + fprintf(logfile, "shared page at pfn %lx\n", ioreq_pfn); shared_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, - PROT_READ|PROT_WRITE, shared_page_nr); + PROT_READ|PROT_WRITE, ioreq_pfn); if (shared_page == NULL) { fprintf(logfile, "map shared IO page returned error %d\n", errno); exit(-1); } - fprintf(logfile, "shared page at pfn:%lx\n", shared_page_nr); - + xc_get_hvm_param(xc_handle, domid, HVM_PARAM_BUFIOREQ_PFN, &ioreq_pfn); + fprintf(logfile, "buffered io page at pfn %lx\n", ioreq_pfn); buffered_io_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, - PROT_READ|PROT_WRITE, - shared_page_nr - 2); + PROT_READ|PROT_WRITE, ioreq_pfn); if (buffered_io_page == NULL) { fprintf(logfile, "map buffered IO page returned error %d\n", errno); exit(-1); } - fprintf(logfile, "buffered io page at pfn:%lx\n", shared_page_nr - 2); - #elif defined(__ia64__) - page_array = (xen_pfn_t *)malloc(tmp_nr_pages * sizeof(xen_pfn_t)); + nr_pages = ram_size/PAGE_SIZE; + + page_array = (xen_pfn_t *)malloc(nr_pages * sizeof(xen_pfn_t)); if (page_array == NULL) { fprintf(logfile, "malloc returned error %d\n", errno); exit(-1); @@ -6650,7 +6640,7 @@ int main(int argc, char **argv) PROT_READ|PROT_WRITE, BUFFER_PIO_PAGE_START >> PAGE_SHIFT); - for (i = 0; i < tmp_nr_pages; i++) + for (i = 0; i < nr_pages; i++) page_array[i] = i; /* VTI will not use memory between 3G~4G, so we just pass a legal pfn diff -r 9ec7dadc98ba -r eb3e430242ac tools/ioemu/xenstore.c --- a/tools/ioemu/xenstore.c Fri Apr 06 10:06:30 2007 -0600 +++ b/tools/ioemu/xenstore.c Fri Apr 06 10:08:30 2007 -0600 @@ -248,12 +248,8 @@ void xenstore_process_logdirty_event(voi key = (key_t) strtoull(key_terminated, NULL, 16); /* Figure out how bit the log-dirty bitmaps are */ - logdirty_bitmap_size = ((phys_ram_size + 0x20 - - (vga_ram_size + bios_size)) - >> (TARGET_PAGE_BITS)); /* nr of bits in map*/ - if (logdirty_bitmap_size > HVM_BELOW_4G_MMIO_START >> TARGET_PAGE_BITS) - logdirty_bitmap_size += - HVM_BELOW_4G_MMIO_LENGTH >> TARGET_PAGE_BITS; /* still bits */ + logdirty_bitmap_size = xc_memory_op(xc_handle, + XENMEM_maximum_gpfn, &domid) + 1; logdirty_bitmap_size = ((logdirty_bitmap_size + HOST_LONG_BITS - 1) / HOST_LONG_BITS); /* longs */ logdirty_bitmap_size *= sizeof (unsigned long); /* bytes */ @@ -272,7 +268,7 @@ void xenstore_process_logdirty_event(voi /* Double-check that the bitmaps are the size we expect */ if (logdirty_bitmap_size != *(uint32_t *)seg) { - fprintf(logfile, "Log-dirty: got %lu, calc %lu\n", + fprintf(logfile, "Log-dirty: got %u, calc %lu\n", *(uint32_t *)seg, logdirty_bitmap_size); return; } @@ -304,7 +300,7 @@ void xenstore_process_logdirty_event(voi fprintf(logfile, "Log-dirty: bad next-active entry: %s\n", act); exit(1); } - logdirty_bitmap = seg + i * logdirty_bitmap_size; + logdirty_bitmap = (unsigned long *)(seg + i * logdirty_bitmap_size); /* Ack that we've switched */ xs_write(xsh, XBT_NULL, active_path, act, len); @@ -612,7 +608,7 @@ int xenstore_vm_write(int domid, char *k path = xs_get_domain_path(xsh, domid); if (path == NULL) { - fprintf(logfile, "xs_get_domain_path(%d): error\n"); + fprintf(logfile, "xs_get_domain_path: error\n"); goto out; } diff -r 9ec7dadc98ba -r eb3e430242ac tools/libxc/Makefile --- a/tools/libxc/Makefile Fri Apr 06 10:06:30 2007 -0600 +++ b/tools/libxc/Makefile Fri Apr 06 10:08:30 2007 -0600 @@ -26,8 +26,8 @@ CTRL_SRCS-$(CONFIG_X86_Linux) += xc_ptra GUEST_SRCS-y := GUEST_SRCS-y += xg_private.c -GUEST_SRCS-$(CONFIG_MIGRATE) += xc_linux_restore.c xc_linux_save.c -GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c xc_hvm_restore.c xc_hvm_save.c +GUEST_SRCS-$(CONFIG_MIGRATE) += xc_domain_restore.c xc_linux_save.c +GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c xc_hvm_save.c # symlink libelf from xen/common/libelf/ LIBELF_SRCS := libelf-tools.c libelf-loader.c diff -r 9ec7dadc98ba -r eb3e430242ac tools/libxc/ia64/xc_ia64_linux_restore.c --- a/tools/libxc/ia64/xc_ia64_linux_restore.c Fri Apr 06 10:06:30 2007 -0600 +++ b/tools/libxc/ia64/xc_ia64_linux_restore.c Fri Apr 06 10:08:30 2007 -0600 @@ -20,9 +20,6 @@ static unsigned long p2m_size; /* number of 'in use' pfns in the guest (i.e. #P2M entries with a valid mfn) */ static unsigned long nr_pfns; -/* largest possible value of nr_pfns (i.e. domain's maximum memory size) */ -static unsigned long max_nr_pfns; - static ssize_t read_exact(int fd, void *buf, size_t count) { @@ -62,10 +59,10 @@ read_page(int xc_handle, int io_fd, uint } int -xc_linux_restore(int xc_handle, int io_fd, uint32_t dom, - unsigned long p2msize, unsigned long maxnrpfns, +xc_domain_restore(int xc_handle, int io_fd, uint32_t dom, unsigned int store_evtchn, unsigned long *store_mfn, - unsigned int console_evtchn, unsigned long *console_mfn) + unsigned int console_evtchn, unsigned long *console_mfn, + unsigned int hvm, unsigned int pae) { DECLARE_DOMCTL; int rc = 1, i; @@ -85,12 +82,19 @@ xc_linux_restore(int xc_handle, int io_f /* A temporary mapping of the guest's start_info page. */ start_info_t *start_info; - p2m_size = p2msize; - max_nr_pfns = maxnrpfns; + if (hvm) { + ERROR("HVM Restore is unsupported"); + goto out; + } /* For info only */ nr_pfns = 0; + if ( !read_exact(io_fd, &p2m_size, sizeof(unsigned long)) ) + { + ERROR("read: p2m_size"); + goto out; + } DPRINTF("xc_linux_restore start: p2m_size = %lx\n", p2m_size); if (!read_exact(io_fd, &ver, sizeof(unsigned long))) { @@ -106,11 +110,6 @@ xc_linux_restore(int xc_handle, int io_f /* needed for build domctl, but might as well do early */ ERROR("Unable to mlock ctxt"); return 1; - } - - if (xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_nr_pfns)) != 0) { - errno = ENOMEM; - goto out; } /* Get pages. */ diff -r 9ec7dadc98ba -r eb3e430242ac tools/libxc/xc_domain_restore.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/libxc/xc_domain_restore.c Fri Apr 06 10:08:30 2007 -0600 @@ -0,0 +1,1086 @@ +/****************************************************************************** + * xc_domain_restore.c + * + * Restore the state of a guest session. + * + * Copyright (c) 2003, K A Fraser. + * Copyright (c) 2006, Intel Corporation + * Copyright (c) 2007, XenSource Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include <stdlib.h> +#include <unistd.h> + +#include "xg_private.h" +#include "xg_save_restore.h" +#include "xc_dom.h" + +#include <xen/hvm/ioreq.h> +#include <xen/hvm/params.h> + +/* max mfn of the current host machine */ +static unsigned long max_mfn; + +/* virtual starting address of the hypervisor */ +static unsigned long hvirt_start; + +/* #levels of page tables used by the current guest */ +static unsigned int pt_levels; + +/* number of pfns this guest has (i.e. number of entries in the P2M) */ +static unsigned long p2m_size; + +/* number of 'in use' pfns in the guest (i.e. #P2M entries with a valid mfn) */ +static unsigned long nr_pfns; + +/* Live mapping of the table mapping each PFN to its current MFN. */ +static xen_pfn_t *live_p2m = NULL; + +/* A table mapping each PFN to its new MFN. */ +static xen_pfn_t *p2m = NULL; + +/* A table of P2M mappings in the current region */ +static xen_pfn_t *p2m_batch = NULL; + +static ssize_t +read_exact(int fd, void *buf, size_t count) +{ + int r = 0, s; + unsigned char *b = buf; + + while (r < count) { + s = read(fd, &b[r], count - r); + if ((s == -1) && (errno == EINTR)) + continue; + if (s <= 0) { + break; + } + r += s; + } + + return (r == count) ? 1 : 0; +} + +/* +** In the state file (or during transfer), all page-table pages are +** converted into a 'canonical' form where references to actual mfns +** are replaced with references to the corresponding pfns. +** This function inverts that operation, replacing the pfn values with +** the (now known) appropriate mfn values. +*/ +static int uncanonicalize_pagetable(int xc_handle, uint32_t dom, + unsigned long type, void *page) +{ + int i, pte_last; + unsigned long pfn; + uint64_t pte; + int nr_mfns = 0; + + pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8); + + /* First pass: work out how many (if any) MFNs we need to alloc */ + for(i = 0; i < pte_last; i++) { + + if(pt_levels == 2) + pte = ((uint32_t *)page)[i]; + else + pte = ((uint64_t *)page)[i]; + + /* XXX SMH: below needs fixing for PROT_NONE etc */ + if(!(pte & _PAGE_PRESENT)) + continue; + + pfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86; + + if(pfn >= p2m_size) { + /* This "page table page" is probably not one; bail. */ + ERROR("Frame number in type %lu page table is out of range: " + "i=%d pfn=0x%lx p2m_size=%lu", + type >> 28, i, pfn, p2m_size); + return 0; + } + + if(p2m[pfn] == INVALID_P2M_ENTRY) { + /* Have a 'valid' PFN without a matching MFN - need to alloc */ + p2m_batch[nr_mfns++] = pfn; + } + } + + + /* Allocate the requistite number of mfns */ + if (nr_mfns && xc_domain_memory_populate_physmap( + xc_handle, dom, nr_mfns, 0, 0, p2m_batch) != 0) { + ERROR("Failed to allocate memory for batch.!\n"); + errno = ENOMEM; + return 0; + } + + /* Second pass: uncanonicalize each present PTE */ + nr_mfns = 0; + for(i = 0; i < pte_last; i++) { + + if(pt_levels == 2) + pte = ((uint32_t *)page)[i]; + else + pte = ((uint64_t *)page)[i]; + + /* XXX SMH: below needs fixing for PROT_NONE etc */ + if(!(pte & _PAGE_PRESENT)) + continue; + + pfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86; + + if(p2m[pfn] == INVALID_P2M_ENTRY) + p2m[pfn] = p2m_batch[nr_mfns++]; + + pte &= ~MADDR_MASK_X86; + pte |= (uint64_t)p2m[pfn] << PAGE_SHIFT; + + if(pt_levels == 2) + ((uint32_t *)page)[i] = (uint32_t)pte; + else + ((uint64_t *)page)[i] = (uint64_t)pte; + } + + return 1; +} + + +/* Load the p2m frame list, plus potential extended info chunk */ +static xen_pfn_t * load_p2m_frame_list(int io_fd, int *pae_extended_cr3) +{ + xen_pfn_t *p2m_frame_list; + vcpu_guest_context_t ctxt; + + if (!(p2m_frame_list = malloc(P2M_FL_SIZE))) { + ERROR("Couldn't allocate p2m_frame_list array"); + return NULL; + } + + /* Read first entry of P2M list, or extended-info signature (~0UL). */ + if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) { + ERROR("read extended-info signature failed"); + return NULL; + } + + if (p2m_frame_list[0] == ~0UL) { + uint32_t tot_bytes; + + /* Next 4 bytes: total size of following extended info. */ + if (!read_exact(io_fd, &tot_bytes, sizeof(tot_bytes))) { + ERROR("read extended-info size failed"); + return NULL; + } + + while (tot_bytes) { + uint32_t chunk_bytes; + char chunk_sig[4]; + + /* 4-character chunk signature + 4-byte remaining chunk size. */ + if (!read_exact(io_fd, chunk_sig, sizeof(chunk_sig)) || + !read_exact(io_fd, &chunk_bytes, sizeof(chunk_bytes))) { + ERROR("read extended-info chunk signature failed"); + return NULL; + } + tot_bytes -= 8; + + /* VCPU context structure? */ + if (!strncmp(chunk_sig, "vcpu", 4)) { + if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) { + ERROR("read extended-info vcpu context failed"); + return NULL; + } + tot_bytes -= sizeof(struct vcpu_guest_context); + chunk_bytes -= sizeof(struct vcpu_guest_context); + + if (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3)) + *pae_extended_cr3 = 1; + } + + /* Any remaining bytes of this chunk: read and discard. */ + while (chunk_bytes) { + unsigned long sz = chunk_bytes; + if ( sz > P2M_FL_SIZE ) + sz = P2M_FL_SIZE; + if (!read_exact(io_fd, p2m_frame_list, sz)) { + ERROR("read-and-discard extended-info chunk bytes failed"); + return NULL; + } + chunk_bytes -= sz; + tot_bytes -= sz; + } + } + + /* Now read the real first entry of P2M list. */ + if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) { + ERROR("read first entry of p2m_frame_list failed"); + return NULL; + } + } + + /* First entry is already read into the p2m array. */ + if (!read_exact(io_fd, &p2m_frame_list[1], P2M_FL_SIZE - sizeof(long))) { + ERROR("read p2m_frame_list failed"); + return NULL; + } + + return p2m_frame_list; +} + + + +int xc_domain_restore(int xc_handle, int io_fd, uint32_t dom, + unsigned int store_evtchn, unsigned long *store_mfn, + unsigned int console_evtchn, unsigned long *console_mfn, + unsigned int hvm, unsigned int pae) +{ + DECLARE_DOMCTL; + int rc = 1, i, j, n, m, pae_extended_cr3 = 0; + unsigned long mfn, pfn; + unsigned int prev_pc, this_pc; + int verify = 0; + int nraces = 0; + + /* The new domain's shared-info frame number. */ + unsigned long shared_info_frame; + unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */ + shared_info_t *shared_info = (shared_info_t *)shared_info_page; + + /* A copy of the CPU context of the guest. */ + vcpu_guest_context_t ctxt; + + /* A table containing the type of each PFN (/not/ MFN!). */ + unsigned long *pfn_type = NULL; + + /* A table of MFNs to map in the current region */ + xen_pfn_t *region_mfn = NULL; + + /* Types of the pfns in the current region */ + unsigned long region_pfn_type[MAX_BATCH_SIZE]; + + /* A temporary mapping, and a copy, of one frame of guest memory. */ + unsigned long *page = NULL; + + /* A copy of the pfn-to-mfn table frame list. */ + xen_pfn_t *p2m_frame_list = NULL; + + /* A temporary mapping of the guest's start_info page. */ + start_info_t *start_info; + + /* Our mapping of the current region (batch) */ + char *region_base; + + xc_mmu_t *mmu = NULL; + + /* used by debug verify code */ + unsigned long buf[PAGE_SIZE/sizeof(unsigned long)]; + + struct mmuext_op pin[MAX_PIN_BATCH]; + unsigned int nr_pins; + + uint64_t vcpumap = 1ULL; + unsigned int max_vcpu_id = 0; + int new_ctxt_format = 0; + + /* Magic frames in HVM guests: ioreqs and xenstore comms. */ + uint64_t magic_pfns[3]; /* ioreq_pfn, bufioreq_pfn, store_pfn */ + + /* Buffer for holding HVM context */ + uint8_t *hvm_buf = NULL; + + /* For info only */ + nr_pfns = 0; + + if ( !read_exact(io_fd, &p2m_size, sizeof(unsigned long)) ) + { + ERROR("read: p2m_size"); + goto out; + } + DPRINTF("xc_domain_restore start: p2m_size = %lx\n", p2m_size); + + if ( !hvm ) + { + /* + * XXX For now, 32bit dom0's can only save/restore 32bit domUs + * on 64bit hypervisors. + */ + memset(&domctl, 0, sizeof(domctl)); + domctl.domain = dom; + domctl.cmd = XEN_DOMCTL_set_address_size; + domctl.u.address_size.size = sizeof(unsigned long) * 8; + rc = do_domctl(xc_handle, &domctl); + if ( rc != 0 ) { + ERROR("Unable to set guest address size."); + goto out; + } + rc = 1; + } + + if(!get_platform_info(xc_handle, dom, + &max_mfn, &hvirt_start, &pt_levels)) { + ERROR("Unable to get platform info."); + return 1; + } + + if (lock_pages(&ctxt, sizeof(ctxt))) { + /* needed for build domctl, but might as well do early */ + ERROR("Unable to lock ctxt"); + return 1; + } + + /* Load the p2m frame list, plus potential extended info chunk */ + if ( !hvm ) + { + p2m_frame_list = load_p2m_frame_list(io_fd, &pae_extended_cr3); + if ( !p2m_frame_list ) + goto out; + } + + /* We want zeroed memory so use calloc rather than malloc. */ + p2m = calloc(p2m_size, sizeof(xen_pfn_t)); + pfn_type = calloc(p2m_size, sizeof(unsigned long)); + region_mfn = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t)); + p2m_batch = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t)); + + if ((p2m == NULL) || (pfn_type == NULL) || + (region_mfn == NULL) || (p2m_batch == NULL)) { + ERROR("memory alloc failed"); + errno = ENOMEM; + goto out; + } + + if (lock_pages(region_mfn, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) { + ERROR("Could not lock region_mfn"); + goto out; + } + + if (lock_pages(p2m_batch, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) { + ERROR("Could not lock p2m_batch"); + goto out; + } + + /* Get the domain's shared-info frame. */ + domctl.cmd = XEN_DOMCTL_getdomaininfo; + domctl.domain = (domid_t)dom; + if (xc_domctl(xc_handle, &domctl) < 0) { + ERROR("Could not get information on new domain"); + goto out; + } + shared_info_frame = domctl.u.getdomaininfo.shared_info_frame; + + /* Mark all PFNs as invalid; we allocate on demand */ + for ( pfn = 0; pfn < p2m_size; pfn++ ) + p2m[pfn] = INVALID_P2M_ENTRY; + + if(!(mmu = xc_init_mmu_updates(xc_handle, dom))) { + ERROR("Could not initialise for MMU updates"); + goto out; + } + + DPRINTF("Reloading memory pages: 0%%\n"); + + /* + * Now simply read each saved frame into its new machine frame. + * We uncanonicalise page tables as we go. + */ + prev_pc = 0; + + n = m = 0; + while (1) { + + int j, nr_mfns = 0; + + this_pc = (n * 100) / p2m_size; + if ( (this_pc - prev_pc) >= 5 ) + { + PPRINTF("\b\b\b\b%3d%%", this_pc); + prev_pc = this_pc; + } + + if (!read_exact(io_fd, &j, sizeof(int))) { + ERROR("Error when reading batch size"); + goto out; + } + + PPRINTF("batch %d\n",j); + + if (j == -1) { + verify = 1; + DPRINTF("Entering page verify mode\n"); + continue; + } + + if (j == -2) { + new_ctxt_format = 1; + if (!read_exact(io_fd, &max_vcpu_id, sizeof(int)) || + (max_vcpu_id >= 64) || + !read_exact(io_fd, &vcpumap, sizeof(uint64_t))) { + ERROR("Error when reading max_vcpu_id"); + goto out; + } + continue; + } + + if (j == 0) + break; /* our work here is done */ + + if (j > MAX_BATCH_SIZE) { + ERROR("Max batch size exceeded. Giving up."); + goto out; + } + + if (!read_exact(io_fd, region_pfn_type, j*sizeof(unsigned long))) { + ERROR("Error when reading region pfn types"); + goto out; + } + + /* First pass for this batch: work out how much memory to alloc */ + nr_mfns = 0; + for ( i = 0; i < j; i++ ) + { + unsigned long pfn, pagetype; + pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; + pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK; + + if ( (pagetype != XEN_DOMCTL_PFINFO_XTAB) && + (p2m[pfn] == INVALID_P2M_ENTRY) ) + { + /* Have a live PFN which hasn't had an MFN allocated */ + p2m_batch[nr_mfns++] = pfn; + } + } + + + /* Now allocate a bunch of mfns for this batch */ + if (nr_mfns && xc_domain_memory_populate_physmap( + xc_handle, dom, nr_mfns, 0, 0, p2m_batch) != 0) { + ERROR("Failed to allocate memory for batch.!\n"); + errno = ENOMEM; + goto out; + } + + /* Second pass for this batch: update p2m[] and region_mfn[] */ + nr_mfns = 0; + for ( i = 0; i < j; i++ ) + { + unsigned long pfn, pagetype; + pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; + pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK; + + if ( pagetype == XEN_DOMCTL_PFINFO_XTAB) + region_mfn[i] = ~0UL; /* map will fail but we don't care */ + else + { + if (p2m[pfn] == INVALID_P2M_ENTRY) { + /* We just allocated a new mfn above; update p2m */ + p2m[pfn] = p2m_batch[nr_mfns++]; + nr_pfns++; + } + + /* setup region_mfn[] for batch map. + * For HVM guests, this interface takes PFNs, not MFNs */ + region_mfn[i] = hvm ? pfn : p2m[pfn]; + } + } + + /* Map relevant mfns */ + region_base = xc_map_foreign_batch( + xc_handle, dom, PROT_WRITE, region_mfn, j); + + if ( region_base == NULL ) + { + ERROR("map batch failed"); + goto out; + } + + for ( i = 0; i < j; i++ ) + { + void *page; + unsigned long pagetype; + + pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; + pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK; + + if ( pagetype == XEN_DOMCTL_PFINFO_XTAB ) + /* a bogus/unmapped page: skip it */ + continue; + + if ( pfn > p2m_size ) + { + ERROR("pfn out of range"); + goto out; + } + + pfn_type[pfn] = pagetype; + + mfn = p2m[pfn]; + + /* In verify mode, we use a copy; otherwise we work in place */ + page = verify ? (void *)buf : (region_base + i*PAGE_SIZE); + + if (!read_exact(io_fd, page, PAGE_SIZE)) { + ERROR("Error when reading page (type was %lx)", pagetype); + goto out; + } + + pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK; + + if ( (pagetype >= XEN_DOMCTL_PFINFO_L1TAB) && + (pagetype <= XEN_DOMCTL_PFINFO_L4TAB) ) + { + /* + ** A page table page - need to 'uncanonicalize' it, i.e. + ** replace all the references to pfns with the corresponding + ** mfns for the new domain. + ** + ** On PAE we need to ensure that PGDs are in MFNs < 4G, and + ** so we may need to update the p2m after the main loop. + ** Hence we defer canonicalization of L1s until then. + */ + if ((pt_levels != 3) || + pae_extended_cr3 || + (pagetype != XEN_DOMCTL_PFINFO_L1TAB)) { + + if (!uncanonicalize_pagetable(xc_handle, dom, + pagetype, page)) { + /* + ** Failing to uncanonicalize a page table can be ok + ** under live migration since the pages type may have + ** changed by now (and we'll get an update later). + */ + DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n", + pagetype >> 28, pfn, mfn); + nraces++; + continue; + } + } + } + else if ( pagetype != XEN_DOMCTL_PFINFO_NOTAB ) + { + ERROR("Bogus page type %lx page table is out of range: " + "i=%d p2m_size=%lu", pagetype, i, p2m_size); + goto out; + + } + + + if (verify) { + + int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE); + + if (res) { + + int v; + + DPRINTF("************** pfn=%lx type=%lx gotcs=%08lx " + "actualcs=%08lx\n", pfn, pfn_type[pfn], + csum_page(region_base + i*PAGE_SIZE), + csum_page(buf)); + + for (v = 0; v < 4; v++) { + + unsigned long *p = (unsigned long *) + (region_base + i*PAGE_SIZE); + if (buf[v] != p[v]) + DPRINTF(" %d: %08lx %08lx\n", v, buf[v], p[v]); + } + } + } + + if (!hvm + && xc_add_mmu_update(xc_handle, mmu, + (((unsigned long long)mfn) << PAGE_SHIFT) + | MMU_MACHPHYS_UPDATE, pfn)) { + ERROR("failed machpys update mfn=%lx pfn=%lx", mfn, pfn); + goto out; + } + } /* end of 'batch' for loop */ + + munmap(region_base, j*PAGE_SIZE); + n+= j; /* crude stats */ + + /* + * Discard cache for portion of file read so far up to last + * page boundary every 16MB or so. + */ + m += j; + if ( m > MAX_PAGECACHE_USAGE ) + { + discard_file_cache(io_fd, 0 /* no flush */); + m = 0; + } + } + + /* + * Ensure we flush all machphys updates before potential PAE-specific + * reallocations below. + */ + if (!hvm && xc_finish_mmu_updates(xc_handle, mmu)) { + ERROR("Error doing finish_mmu_updates()"); + goto out; + } + + DPRINTF("Received all pages (%d races)\n", nraces); + + if ( hvm ) + { + uint32_t rec_len; + + /* Set HVM-specific parameters */ + if ( !read_exact(io_fd, magic_pfns, sizeof(magic_pfns)) ) + { + ERROR("error reading magic page addresses"); + goto out; + } + + /* These comms pages need to be zeroed at the start of day */ + if ( xc_clear_domain_page(xc_handle, dom, magic_pfns[0]) || + xc_clear_domain_page(xc_handle, dom, magic_pfns[1]) || + xc_clear_domain_page(xc_handle, dom, magic_pfns[2]) ) + { + ERROR("error zeroing magic pages"); + goto out; + } + + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, magic_pfns[0]); + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, magic_pfns[1]); + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, magic_pfns[2]); + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae); + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn); + *store_mfn = magic_pfns[2]; + + /* Read vcpu contexts */ + for (i = 0; i <= max_vcpu_id; i++) + { + if (!(vcpumap & (1ULL << i))) + continue; + + if ( !read_exact(io_fd, &(ctxt), sizeof(ctxt)) ) + { + ERROR("error read vcpu context.\n"); + goto out; + } + + if ( (rc = xc_vcpu_setcontext(xc_handle, dom, i, &ctxt)) ) + { + ERROR("Could not set vcpu context, rc=%d", rc); + goto out; + } + rc = 1; + } + + /* Read HVM context */ + if ( !read_exact(io_fd, &rec_len, sizeof(uint32_t)) ) + { + ERROR("error read hvm context size!\n"); + goto out; + } + + hvm_buf = malloc(rec_len); + if ( hvm_buf == NULL ) + { + ERROR("memory alloc for hvm context buffer failed"); + errno = ENOMEM; + goto out; + } + + if ( !read_exact(io_fd, hvm_buf, rec_len) ) + { + ERROR("error loading the HVM context"); + goto out; + } + + rc = xc_domain_hvm_setcontext(xc_handle, dom, hvm_buf, rec_len); + if ( rc ) + ERROR("error setting the HVM context"); + + goto out; + } + + /* Non-HVM guests only from here on */ + + if ((pt_levels == 3) && !pae_extended_cr3) { + + /* + ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This + ** is a little awkward and involves (a) finding all such PGDs and + ** replacing them with 'lowmem' versions; (b) upating the p2m[] + ** with the new info; and (c) canonicalizing all the L1s using the + ** (potentially updated) p2m[]. + ** + ** This is relatively slow (and currently involves two passes through + ** the pfn_type[] array), but at least seems to be correct. May wish + ** to consider more complex approaches to optimize this later. + */ + + int j, k; + + /* First pass: find all L3TABs current in > 4G mfns and get new mfns */ + for ( i = 0; i < p2m_size; i++ ) + { + if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) == + XEN_DOMCTL_PFINFO_L3TAB) && + (p2m[i] > 0xfffffUL) ) + { + unsigned long new_mfn; + uint64_t l3ptes[4]; + uint64_t *l3tab; + + l3tab = (uint64_t *) + xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ, p2m[i]); + + for(j = 0; j < 4; j++) + l3ptes[j] = l3tab[j]; + + munmap(l3tab, PAGE_SIZE); + + if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) { + ERROR("Couldn't get a page below 4GB :-("); + goto out; + } + + p2m[i] = new_mfn; + if (xc_add_mmu_update(xc_handle, mmu, + (((unsigned long long)new_mfn) + << PAGE_SHIFT) | + MMU_MACHPHYS_UPDATE, i)) { + ERROR("Couldn't m2p on PAE root pgdir"); + goto out; + } + + l3tab = (uint64_t *) + xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ | PROT_WRITE, p2m[i]); + + for(j = 0; j < 4; j++) + l3tab[j] = l3ptes[j]; + + munmap(l3tab, PAGE_SIZE); + + } + } + + /* Second pass: find all L1TABs and uncanonicalize them */ + j = 0; + + for ( i = 0; i < p2m_size; i++ ) + { + if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) == + XEN_DOMCTL_PFINFO_L1TAB) ) + { + region_mfn[j] = p2m[i]; + j++; + } + + if(i == (p2m_size-1) || j == MAX_BATCH_SIZE) { + + if (!(region_base = xc_map_foreign_batch( + xc_handle, dom, PROT_READ | PROT_WRITE, + region_mfn, j))) { + ERROR("map batch failed"); + goto out; + } + + for(k = 0; k < j; k++) { + if(!uncanonicalize_pagetable(xc_handle, dom, + XEN_DOMCTL_PFINFO_L1TAB, + region_base + k*PAGE_SIZE)) { + ERROR("failed uncanonicalize pt!"); + goto out; + } + } + + munmap(region_base, j*PAGE_SIZE); + j = 0; + } + } + + if (xc_finish_mmu_updates(xc_handle, mmu)) { + ERROR("Error doing finish_mmu_updates()"); + goto out; + } + } + + /* + * Pin page tables. Do this after writing to them as otherwise Xen + * will barf when doing the type-checking. + */ + nr_pins = 0; + for ( i = 0; i < p2m_size; i++ ) + { + if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 ) + continue; + + switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK ) + { + case XEN_DOMCTL_PFINFO_L1TAB: + pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE; + break; + + case XEN_DOMCTL_PFINFO_L2TAB: + pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE; + break; + + case XEN_DOMCTL_PFINFO_L3TAB: + pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE; + break; + + case XEN_DOMCTL_PFINFO_L4TAB: + pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE; + break; + + default: + continue; + } + + pin[nr_pins].arg1.mfn = p2m[i]; + nr_pins++; + + /* Batch full? Then flush. */ + if (nr_pins == MAX_PIN_BATCH) { + if (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0) { + ERROR("Failed to pin batch of %d page tables", nr_pins); + goto out; + } + nr_pins = 0; + } + } + + /* Flush final partial batch. */ + if ((nr_pins != 0) && (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0)) { + ERROR("Failed to pin batch of %d page tables", nr_pins); + goto out; + } + + DPRINTF("\b\b\b\b100%%\n"); + DPRINTF("Memory reloaded (%ld pages)\n", nr_pfns); + + /* Get the list of PFNs that are not in the psuedo-phys map */ + { + unsigned int count; + unsigned long *pfntab; + int nr_frees, rc; + + if (!read_exact(io_fd, &count, sizeof(count))) { + ERROR("Error when reading pfn count"); + goto out; + } + + if(!(pfntab = malloc(sizeof(unsigned long) * count))) { + ERROR("Out of memory"); + goto out; + } + + if (!read_exact(io_fd, pfntab, sizeof(unsigned long)*count)) { + ERROR("Error when reading pfntab"); + goto out; + } + + nr_frees = 0; + for (i = 0; i < count; i++) { + + unsigned long pfn = pfntab[i]; + + if(p2m[pfn] != INVALID_P2M_ENTRY) { + /* pfn is not in physmap now, but was at some point during + the save/migration process - need to free it */ + pfntab[nr_frees++] = p2m[pfn]; + p2m[pfn] = INVALID_P2M_ENTRY; // not in pseudo-physical map + } + } + + if (nr_frees > 0) { + + struct xen_memory_reservation reservation = { + .nr_extents = nr_frees, + .extent_order = 0, + .domid = dom + }; + set_xen_guest_handle(reservation.extent_start, pfntab); + + if ((rc = xc_memory_op(xc_handle, XENMEM_decrease_reservation, + &reservation)) != nr_frees) { + ERROR("Could not decrease reservation : %d", rc); + goto out; + } else + DPRINTF("Decreased reservation by %d pages\n", count); + } + } + + for (i = 0; i <= max_vcpu_id; i++) { + if (!(vcpumap & (1ULL << i))) + continue; + + if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) { + ERROR("Error when reading ctxt %d", i); + goto out; + } + + if ( !new_ctxt_format ) + ctxt.flags |= VGCF_online; + + if (i == 0) { + /* + * Uncanonicalise the suspend-record frame number and poke + * resume record. + */ + pfn = ctxt.user_regs.edx; + if ((pfn >= p2m_size) || + (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) { + ERROR("Suspend record frame number is bad"); + goto out; + } + ctxt.user_regs.edx = mfn = p2m[pfn]; + start_info = xc_map_foreign_range( + xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn); + start_info->nr_pages = p2m_size; + start_info->shared_info = shared_info_frame << PAGE_SHIFT; + start_info->flags = 0; + *store_mfn = start_info->store_mfn = p2m[start_info->store_mfn]; + start_info->store_evtchn = store_evtchn; + start_info->console.domU.mfn = p2m[start_info->console.domU.mfn]; + start_info->console.domU.evtchn = console_evtchn; + *console_mfn = start_info->console.domU.mfn; + munmap(start_info, PAGE_SIZE); + } + + /* Uncanonicalise each GDT frame number. */ + if (ctxt.gdt_ents > 8192) { + ERROR("GDT entry count out of range"); + goto out; + } + + for (j = 0; (512*j) < ctxt.gdt_ents; j++) { + pfn = ctxt.gdt_frames[j]; + if ((pfn >= p2m_size) || + (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) { + ERROR("GDT frame number is bad"); + goto out; + } + ctxt.gdt_frames[j] = p2m[pfn]; + } + + /* Uncanonicalise the page table base pointer. */ + pfn = xen_cr3_to_pfn(ctxt.ctrlreg[3]); + + if (pfn >= p2m_size) { + ERROR("PT base is bad: pfn=%lu p2m_size=%lu type=%08lx", + pfn, p2m_size, pfn_type[pfn]); + goto out; + } + + if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) != + ((unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) { + ERROR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx", + pfn, p2m_size, pfn_type[pfn], + (unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT); + goto out; + } + + ctxt.ctrlreg[3] = xen_pfn_to_cr3(p2m[pfn]); + + /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */ + if ( (pt_levels == 4) && ctxt.ctrlreg[1] ) + { + pfn = xen_cr3_to_pfn(ctxt.ctrlreg[1]); + + if (pfn >= p2m_size) { + ERROR("User PT base is bad: pfn=%lu p2m_size=%lu type=%08lx", + pfn, p2m_size, pfn_type[pfn]); + goto out; + } + + if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) != + ((unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) { + ERROR("User PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx", + pfn, p2m_size, pfn_type[pfn], + (unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT); + goto out; + } + + ctxt.ctrlreg[1] = xen_pfn_to_cr3(p2m[pfn]); + } + + domctl.cmd = XEN_DOMCTL_setvcpucontext; + domctl.domain = (domid_t)dom; + domctl.u.vcpucontext.vcpu = i; + set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt); + rc = xc_domctl(xc_handle, &domctl); + if (rc != 0) { + ERROR("Couldn't build vcpu%d", i); + goto out; + } + rc = 1; + } + + if (!read_exact(io_fd, shared_info_page, PAGE_SIZE)) { + ERROR("Error when reading shared info page"); + goto out; + } + + /* clear any pending events and the selector */ + memset(&(shared_info->evtchn_pending[0]), 0, + sizeof (shared_info->evtchn_pending)); + for ( i = 0; i < MAX_VIRT_CPUS; i++ ) + shared_info->vcpu_info[i].evtchn_pending_sel = 0; + + /* Copy saved contents of shared-info page. No checking needed. */ + page = xc_map_foreign_range( + xc_handle, dom, PAGE_SIZE, PROT_WRITE, shared_info_frame); + memcpy(page, shared_info, PAGE_SIZE); + munmap(page, PAGE_SIZE); + + /* Uncanonicalise the pfn-to-mfn table frame-number list. */ + for (i = 0; i < P2M_FL_ENTRIES; i++) { + pfn = p2m_frame_list[i]; + if ((pfn >= p2m_size) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) { + ERROR("PFN-to-MFN frame number is bad"); + goto out; + } + + p2m_frame_list[i] = p2m[pfn]; + } + + /* Copy the P2M we've constructed to the 'live' P2M */ + if (!(live_p2m = xc_map_foreign_batch(xc_handle, dom, PROT_WRITE, + p2m_frame_list, P2M_FL_ENTRIES))) { + ERROR("Couldn't map p2m table"); + goto out; + } + + memcpy(live_p2m, p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT)); + munmap(live_p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT)); + + DPRINTF("Domain ready to be built.\n"); + rc = 0; + + out: + if ( (rc != 0) && (dom != 0) ) + xc_domain_destroy(xc_handle, dom); + free(mmu); + free(p2m); + free(pfn_type); + free(hvm_buf); + + /* discard cache for save file */ + discard_file_cache(io_fd, 1 /*flush*/); + + DPRINTF("Restore exit with rc=%d\n", rc); + + return rc; +} diff -r 9ec7dadc98ba -r eb3e430242ac tools/libxc/xc_hvm_restore.c --- a/tools/libxc/xc_hvm_restore.c Fri Apr 06 10:06:30 2007 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,360 +0,0 @@ -/****************************************************************************** - * xc_hvm_restore.c - * - * Restore the state of a HVM guest. - * - * Copyright (c) 2003, K A Fraser. - * Copyright (c) 2006 Intel Corperation - * rewriten for hvm guest by Zhai Edwin <edwin.zhai@xxxxxxxxx> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - */ - -#include <stdlib.h> -#include <unistd.h> - -#include "xg_private.h" -#include "xg_save_restore.h" - -#include <xen/hvm/ioreq.h> -#include <xen/hvm/params.h> -#include <xen/hvm/e820.h> - -/* max mfn of the whole machine */ -static unsigned long max_mfn; - -/* virtual starting address of the hypervisor */ -static unsigned long hvirt_start; - -/* #levels of page tables used by the currrent guest */ -static unsigned int pt_levels; - -/* A list of PFNs that exist, used when allocating memory to the guest */ -static xen_pfn_t *pfns = NULL; - -static ssize_t -read_exact(int fd, void *buf, size_t count) -{ - int r = 0, s; - unsigned char *b = buf; - - while (r < count) { - s = read(fd, &b[r], count - r); - if ((s == -1) && (errno == EINTR)) - continue; - if (s <= 0) { - break; - } - r += s; - } - - return (r == count) ? 1 : 0; -} - -int xc_hvm_restore(int xc_handle, int io_fd, - uint32_t dom, unsigned long max_pfn, - unsigned int store_evtchn, unsigned long *store_mfn, - unsigned int pae, unsigned int apic) -{ - DECLARE_DOMCTL; - - /* A copy of the CPU context of the guest. */ - vcpu_guest_context_t ctxt; - - char *region_base; - - unsigned long buf[PAGE_SIZE/sizeof(unsigned long)]; - - xc_dominfo_t info; - unsigned int rc = 1, n, i; - uint32_t rec_len, nr_vcpus; - uint8_t *hvm_buf = NULL; - unsigned long long v_end; - unsigned long shared_page_nr; - - unsigned long pfn; - unsigned int prev_pc, this_pc; - int verify = 0; - - /* Types of the pfns in the current region */ - unsigned long region_pfn_type[MAX_BATCH_SIZE]; - - /* The size of an array big enough to contain all guest pfns */ - unsigned long pfn_array_size = max_pfn + 1; - - /* Number of pages of memory the guest has. *Not* the same as max_pfn. */ - unsigned long nr_pages = max_pfn; - /* MMIO hole doesn't contain RAM */ - if ( nr_pages >= HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT ) - nr_pages -= HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT; - /* VGA hole doesn't contain RAM */ - nr_pages -= 0x20; - - /* XXX: Unlikely to be true, but matches previous behaviour. :( */ - v_end = (nr_pages + 0x20) << PAGE_SHIFT; - - DPRINTF("xc_hvm_restore:dom=%d, nr_pages=0x%lx, store_evtchn=%d, " - "*store_mfn=%ld, pae=%u, apic=%u.\n", - dom, nr_pages, store_evtchn, *store_mfn, pae, apic); - - if(!get_platform_info(xc_handle, dom, - &max_mfn, &hvirt_start, &pt_levels)) { - ERROR("Unable to get platform info."); - return 1; - } - - DPRINTF("xc_hvm_restore start: nr_pages = %lx, max_pfn = %lx, " - "max_mfn = %lx, hvirt_start=%lx, pt_levels=%d\n", - nr_pages, max_pfn, max_mfn, hvirt_start, pt_levels); - - if (mlock(&ctxt, sizeof(ctxt))) { - /* needed for build dom0 op, but might as well do early */ - ERROR("Unable to mlock ctxt"); - return 1; - } - - - pfns = malloc(pfn_array_size * sizeof(xen_pfn_t)); - if (pfns == NULL) { - ERROR("memory alloc failed"); - errno = ENOMEM; - goto out; - } - - if(xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(nr_pages)) != 0) { - errno = ENOMEM; - goto out; - } - - for ( i = 0; i < pfn_array_size; i++ ) - pfns[i] = i; - for ( i = HVM_BELOW_4G_RAM_END >> PAGE_SHIFT; i < pfn_array_size; i++ ) - pfns[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT; - - /* Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000. */ - rc = xc_domain_memory_populate_physmap( - xc_handle, dom, (nr_pages > 0xa0) ? 0xa0 : nr_pages, - 0, 0, &pfns[0x00]); - if ( (rc == 0) && (nr_pages > 0xc0) ) - rc = xc_domain_memory_populate_physmap( - xc_handle, dom, nr_pages - 0xa0, 0, 0, &pfns[0xc0]); - if ( rc != 0 ) - { - PERROR("Could not allocate memory for HVM guest.\n"); - goto out; - } - - - /**********XXXXXXXXXXXXXXXX******************/ - if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) { - ERROR("Could not get domain info"); - return 1; - } - - domctl.cmd = XEN_DOMCTL_getdomaininfo; - domctl.domain = (domid_t)dom; - if (xc_domctl(xc_handle, &domctl) < 0) { - ERROR("Could not get information on new domain"); - goto out; - } - - prev_pc = 0; - - n = 0; - while (1) { - - int j; - - this_pc = (n * 100) / nr_pages; - if ( (this_pc - prev_pc) >= 5 ) - { - PPRINTF("\b\b\b\b%3d%%", this_pc); - prev_pc = this_pc; - } - - if (!read_exact(io_fd, &j, sizeof(int))) { - ERROR("HVM restore Error when reading batch size"); - goto out; - } - - PPRINTF("batch %d\n",j); - - if (j == -1) { - verify = 1; - DPRINTF("Entering page verify mode\n"); - continue; - } - - if (j == 0) - break; /* our work here is done */ - - if (j > MAX_BATCH_SIZE) { - ERROR("Max batch size exceeded. Giving up."); - goto out; - } - - if (!read_exact(io_fd, region_pfn_type, j*sizeof(unsigned long))) { - ERROR("Error when reading region pfn types"); - goto out; - } - - region_base = xc_map_foreign_batch( - xc_handle, dom, PROT_WRITE, region_pfn_type, j); - - for ( i = 0; i < j; i++ ) - { - void *page; - - pfn = region_pfn_type[i]; - if ( pfn & XEN_DOMCTL_PFINFO_LTAB_MASK ) - continue; - - if ( pfn > max_pfn ) - { - ERROR("pfn out of range"); - goto out; - } - - if ( pfn >= 0xa0 && pfn < 0xc0) { - ERROR("hvm restore:pfn in vga hole"); - goto out; - } - - - /* In verify mode, we use a copy; otherwise we work in place */ - page = verify ? (void *)buf : (region_base + i*PAGE_SIZE); - - if (!read_exact(io_fd, page, PAGE_SIZE)) { - ERROR("Error when reading page (%x)", i); - goto out; - } - - if (verify) { - - int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE); - - if (res) { - - int v; - - DPRINTF("************** pfn=%lx gotcs=%08lx " - "actualcs=%08lx\n", pfn, - csum_page(region_base + i*PAGE_SIZE), - csum_page(buf)); - - for (v = 0; v < 4; v++) { - - unsigned long *p = (unsigned long *) - (region_base + i*PAGE_SIZE); - if (buf[v] != p[v]) - DPRINTF(" %d: %08lx %08lx\n", v, buf[v], p[v]); - } - } - } - - } /* end of 'batch' for loop */ - munmap(region_base, j*PAGE_SIZE); - n+= j; /* crude stats */ - - }/*while 1*/ - - xc_set_hvm_param(xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae); - xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn); - - if ( v_end > HVM_BELOW_4G_RAM_END ) - shared_page_nr = (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) - 1; - else - shared_page_nr = (v_end >> PAGE_SHIFT) - 1; - - /* Ensure we clear these pages */ - if ( xc_clear_domain_page(xc_handle, dom, shared_page_nr) || - xc_clear_domain_page(xc_handle, dom, shared_page_nr-1) || - xc_clear_domain_page(xc_handle, dom, shared_page_nr-2) ) { - rc = -1; - goto out; - } - - xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, shared_page_nr-1); - xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, shared_page_nr-2); - xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, shared_page_nr); - - /* caculate the store_mfn , wrong val cause hang when introduceDomain */ - *store_mfn = (v_end >> PAGE_SHIFT) - 2; - DPRINTF("hvm restore: calculate new store_mfn=0x%lx, v_end=0x%llx.\n", - *store_mfn, v_end); - - if (!read_exact(io_fd, &nr_vcpus, sizeof(uint32_t))) { - ERROR("error read nr vcpu !\n"); - goto out; - } - DPRINTF("hvm restore:get nr_vcpus=%d.\n", nr_vcpus); - - for (i =0; i < nr_vcpus; i++) { - if (!read_exact(io_fd, &rec_len, sizeof(uint32_t))) { - ERROR("error read vcpu context size!\n"); - goto out; - } - if (rec_len != sizeof(ctxt)) { - ERROR("vcpu context size dismatch!\n"); - goto out; - } - - if (!read_exact(io_fd, &(ctxt), sizeof(ctxt))) { - ERROR("error read vcpu context.\n"); - goto out; - } - - if ( (rc = xc_vcpu_setcontext(xc_handle, dom, i, &ctxt)) ) { - ERROR("Could not set vcpu context, rc=%d", rc); - goto out; - } - } - - /* restore hvm context including pic/pit/shpage */ - if (!read_exact(io_fd, &rec_len, sizeof(uint32_t))) { - ERROR("error read hvm context size!\n"); - goto out; - } - - hvm_buf = malloc(rec_len); - if (hvm_buf == NULL) { - ERROR("memory alloc for hvm context buffer failed"); - errno = ENOMEM; - goto out; - } - - if (!read_exact(io_fd, hvm_buf, rec_len)) { - ERROR("error read hvm buffer!\n"); - goto out; - } - - if (( rc = xc_domain_hvm_setcontext(xc_handle, dom, hvm_buf, rec_len))) { - ERROR("error set hvm buffer!\n"); - goto out; - } - - rc = 0; - goto out; - - out: - if ( (rc != 0) && (dom != 0) ) - xc_domain_destroy(xc_handle, dom); - free(pfns); - free(hvm_buf); - - DPRINTF("Restore exit with rc=%d\n", rc); - - return rc; -} diff -r 9ec7dadc98ba -r eb3e430242ac tools/libxc/xc_hvm_save.c --- a/tools/libxc/xc_hvm_save.c Fri Apr 06 10:06:30 2007 -0600 +++ b/tools/libxc/xc_hvm_save.c Fri Apr 06 10:08:30 2007 -0600 @@ -45,36 +45,10 @@ #define DEF_MAX_ITERS 29 /* limit us to 30 times round loop */ #define DEF_MAX_FACTOR 3 /* never send more than 3x nr_pfns */ -/* max mfn of the whole machine */ -static unsigned long max_mfn; - -/* virtual starting address of the hypervisor */ -static unsigned long hvirt_start; - -/* #levels of page tables used by the current guest */ -static unsigned int pt_levels; - /* Shared-memory bitmaps for getting log-dirty bits from qemu */ static unsigned long *qemu_bitmaps[2]; static int qemu_active; static int qemu_non_active; - -int xc_hvm_drain_io(int handle, domid_t dom) -{ - DECLARE_HYPERCALL; - xen_hvm_drain_io_t arg; - int rc; - - hypercall.op = __HYPERVISOR_hvm_op; - hypercall.arg[0] = HVMOP_drain_io; - hypercall.arg[1] = (unsigned long)&arg; - arg.domid = dom; - if ( lock_pages(&arg, sizeof(arg)) != 0 ) - return -1; - rc = do_xen_hypercall(handle, &hypercall); - unlock_pages(&arg, sizeof(arg)); - return rc; -} /* ** During (live) save/migrate, we maintain a number of bitmaps to track @@ -291,9 +265,8 @@ int xc_hvm_save(int xc_handle, int io_fd xc_dominfo_t info; int rc = 1, i, j, last_iter, iter = 0; - int live = (flags & XCFLAGS_LIVE); - int debug = (flags & XCFLAGS_DEBUG); - int stdvga = (flags & XCFLAGS_STDVGA); + int live = !!(flags & XCFLAGS_LIVE); + int debug = !!(flags & XCFLAGS_DEBUG); int sent_last_iter, skip_this_iter; /* The highest guest-physical frame number used by the current guest */ @@ -302,8 +275,8 @@ int xc_hvm_save(int xc_handle, int io_fd /* The size of an array big enough to contain all guest pfns */ unsigned long pfn_array_size; - /* Other magic frames: ioreqs and xenstore comms */ - unsigned long ioreq_pfn, bufioreq_pfn, store_pfn; + /* Magic frames: ioreqs and xenstore comms. */ + uint64_t magic_pfns[3]; /* ioreq_pfn, bufioreq_pfn, store_pfn */ /* A copy of the CPU context of the guest. */ vcpu_guest_context_t ctxt; @@ -330,122 +303,97 @@ int xc_hvm_save(int xc_handle, int io_fd xc_shadow_op_stats_t stats; - unsigned long total_sent = 0; + unsigned long total_sent = 0; + + uint64_t vcpumap = 1ULL; DPRINTF("xc_hvm_save: dom=%d, max_iters=%d, max_factor=%d, flags=0x%x, " "live=%d, debug=%d.\n", dom, max_iters, max_factor, flags, live, debug); /* If no explicit control parameters given, use defaults */ - if(!max_iters) - max_iters = DEF_MAX_ITERS; - if(!max_factor) - max_factor = DEF_MAX_FACTOR; + max_iters = max_iters ? : DEF_MAX_ITERS; + max_factor = max_factor ? : DEF_MAX_FACTOR; initialize_mbit_rate(); - if(!get_platform_info(xc_handle, dom, - &max_mfn, &hvirt_start, &pt_levels)) { - ERROR("HVM:Unable to get platform info."); + if ( xc_domain_getinfo(xc_handle, dom, 1, &info) != 1 ) + { + ERROR("HVM: Could not get domain info"); return 1; } - - if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) { - ERROR("HVM:Could not get domain info"); + nr_vcpus = info.nr_online_vcpus; + + if ( mlock(&ctxt, sizeof(ctxt)) ) + { + ERROR("HVM: Unable to mlock ctxt"); return 1; } - nr_vcpus = info.nr_online_vcpus; - - if (mlock(&ctxt, sizeof(ctxt))) { - ERROR("HVM:Unable to mlock ctxt"); - return 1; - } /* Only have to worry about vcpu 0 even for SMP */ - if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) { - ERROR("HVM:Could not get vcpu context"); - goto out; - } - - /* cheesy sanity check */ - if ((info.max_memkb >> (PAGE_SHIFT - 10)) > max_mfn) { - ERROR("Invalid HVM state record -- pfn count out of range: %lu", - (info.max_memkb >> (PAGE_SHIFT - 10))); - goto out; - } - - if ( xc_get_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, &store_pfn) - || xc_get_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, &ioreq_pfn) - || xc_get_hvm_param(xc_handle, dom, - HVM_PARAM_BUFIOREQ_PFN, &bufioreq_pfn) ) - { - ERROR("HVM: Could not read magic PFN parameters"); - goto out; - } - DPRINTF("saved hvm domain info:max_memkb=0x%lx, max_mfn=0x%lx, " - "nr_pages=0x%lx\n", info.max_memkb, max_mfn, info.nr_pages); - - if (live) { - - if (xc_shadow_control(xc_handle, dom, - XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY, - NULL, 0, NULL, 0, NULL) < 0) { + if ( xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt) ) + { + ERROR("HVM: Could not get vcpu context"); + goto out; + } + + DPRINTF("saved hvm domain info: max_memkb=0x%lx, nr_pages=0x%lx\n", + info.max_memkb, info.nr_pages); + + if ( live ) + { + /* Live suspend. Enable log-dirty mode. */ + if ( xc_shadow_control(xc_handle, dom, + XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY, + NULL, 0, NULL, 0, NULL) < 0 ) + { ERROR("Couldn't enable shadow mode"); goto out; } - last_iter = 0; - DPRINTF("hvm domain live migration debug start: logdirty enable.\n"); - } else { - /* This is a non-live suspend. Issue the call back to get the - domain suspended */ - - last_iter = 1; - - /* suspend hvm domain */ - if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt)) { + } + else + { + /* This is a non-live suspend. Suspend the domain .*/ + if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt) ) + { ERROR("HVM Domain appears not to have suspended"); goto out; } } - DPRINTF("after 1st handle hvm domain nr_pages=0x%lx, " + last_iter = !live; + + max_pfn = xc_memory_op(xc_handle, XENMEM_maximum_gpfn, &dom); + + DPRINTF("after 1st handle hvm domain max_pfn=0x%lx, " "max_memkb=0x%lx, live=%d.\n", - info.nr_pages, info.max_memkb, live); - - /* Calculate the highest PFN of "normal" memory: - * HVM memory is sequential except for the VGA and MMIO holes. */ - max_pfn = info.nr_pages - 1; - /* If the domain has a Cirrus framebuffer and we haven't already - * suspended qemu-dm, it will have 8MB of framebuffer memory - * still allocated, which we don't want to copy: qemu will save it - * for us later */ - if ( live && !stdvga ) - max_pfn -= 0x800; - /* Skip the VGA hole from 0xa0000 to 0xc0000 */ - max_pfn += 0x20; - /* Skip the MMIO hole: 256MB just below 4GB */ - if ( max_pfn >= (HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT) ) - max_pfn += (HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT); + max_pfn, info.max_memkb, live); /* Size of any array that covers 0 ... max_pfn */ pfn_array_size = max_pfn + 1; + if ( !write_exact(io_fd, &pfn_array_size, sizeof(unsigned long)) ) + { + ERROR("Error when writing to state file (1)"); + goto out; + } + /* pretend we sent all the pages last iteration */ sent_last_iter = pfn_array_size; /* calculate the power of 2 order of pfn_array_size, e.g. 15->4 16->4 17->5 */ - for (i = pfn_array_size-1, order_nr = 0; i ; i >>= 1, order_nr++) + for ( i = pfn_array_size-1, order_nr = 0; i ; i >>= 1, order_nr++ ) continue; /* Setup to_send / to_fix and to_skip bitmaps */ to_send = malloc(BITMAP_SIZE); to_skip = malloc(BITMAP_SIZE); - - if (live) { + if ( live ) + { /* Get qemu-dm logging dirty pages too */ void *seg = init_qemu_maps(dom, BITMAP_SIZE); qemu_bitmaps[0] = seg; @@ -462,44 +410,40 @@ int xc_hvm_save(int xc_handle, int io_fd } hvm_buf = malloc(hvm_buf_size); - if (!to_send ||!to_skip ||!hvm_buf) { + if ( !to_send || !to_skip || !hvm_buf ) + { ERROR("Couldn't allocate memory"); goto out; } memset(to_send, 0xff, BITMAP_SIZE); - if (lock_pages(to_send, BITMAP_SIZE)) { + if ( lock_pages(to_send, BITMAP_SIZE) ) + { ERROR("Unable to lock to_send"); return 1; } /* (to fix is local only) */ - if (lock_pages(to_skip, BITMAP_SIZE)) { + if ( lock_pages(to_skip, BITMAP_SIZE) ) + { ERROR("Unable to lock to_skip"); return 1; } analysis_phase(xc_handle, dom, pfn_array_size, to_skip, 0); - /* We want zeroed memory so use calloc rather than malloc. */ pfn_batch = calloc(MAX_BATCH_SIZE, sizeof(*pfn_batch)); - - if (pfn_batch == NULL) { + if ( pfn_batch == NULL ) + { ERROR("failed to alloc memory for pfn_batch array"); errno = ENOMEM; goto out; } - /* Start writing out the saved-domain record. */ - if (!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) { - ERROR("write: max_pfn"); - goto out; - } - - while(1) { - + for ( ; ; ) + { unsigned int prev_pc, sent_this_iter, N, batch; iter++; @@ -510,51 +454,56 @@ int xc_hvm_save(int xc_handle, int io_fd DPRINTF("Saving HVM domain memory pages: iter %d 0%%", iter); - while( N < pfn_array_size ){ - + while ( N < pfn_array_size ) + { unsigned int this_pc = (N * 100) / pfn_array_size; int rc; - if ((this_pc - prev_pc) >= 5) { + if ( (this_pc - prev_pc) >= 5 ) + { DPRINTF("\b\b\b\b%3d%%", this_pc); prev_pc = this_pc; } - /* slightly wasteful to peek the whole array evey time, - but this is fast enough for the moment. */ - if (!last_iter && (rc = xc_shadow_control( + if ( !last_iter ) + { + /* Slightly wasteful to peek the whole array evey time, + but this is fast enough for the moment. */ + rc = xc_shadow_control( xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK, to_skip, - pfn_array_size, NULL, 0, NULL)) != pfn_array_size) { - ERROR("Error peeking HVM shadow bitmap"); - goto out; - } - + pfn_array_size, NULL, 0, NULL); + if ( rc != pfn_array_size ) + { + ERROR("Error peeking HVM shadow bitmap"); + goto out; + } + } /* load pfn_batch[] with the mfn of all the pages we're doing in this batch. */ - for (batch = 0; batch < MAX_BATCH_SIZE && N < pfn_array_size; N++){ - + for ( batch = 0; + (batch < MAX_BATCH_SIZE) && (N < pfn_array_size); + N++ ) + { int n = permute(N, pfn_array_size, order_nr); - if (0&&debug) { + if ( 0 && debug ) DPRINTF("%d pfn= %08lx %d \n", iter, (unsigned long)n, test_bit(n, to_send)); - } - - if (!last_iter && test_bit(n, to_send)&& test_bit(n, to_skip)) + + if ( !last_iter && + test_bit(n, to_send) && + test_bit(n, to_skip) ) skip_this_iter++; /* stats keeping */ - if (!((test_bit(n, to_send) && !test_bit(n, to_skip)) || - (test_bit(n, to_send) && last_iter))) + if ( !((test_bit(n, to_send) && !test_bit(n, to_skip)) || + (test_bit(n, to_send) && last_iter)) ) continue; /* Skip PFNs that aren't really there */ - if ((n >= 0xa0 && n < 0xc0) /* VGA hole */ - || (n >= (HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT) - && n < (1ULL << 32) >> PAGE_SHIFT) /* 4G MMIO hole */ - || n == store_pfn - || n == ioreq_pfn - || n == bufioreq_pfn) + if ( (n >= 0xa0 && n < 0xc0) /* VGA hole */ + || (n >= (HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT) && + n < (1ULL << 32) >> PAGE_SHIFT) /* 4G MMIO hole */ ) continue; /* @@ -568,24 +517,27 @@ int xc_hvm_save(int xc_handle, int io_fd batch++; } - if (batch == 0) + if ( batch == 0 ) goto skip; /* vanishingly unlikely... */ - /* map_foreign use pfns now !*/ - if ((region_base = xc_map_foreign_batch( - xc_handle, dom, PROT_READ, pfn_batch, batch)) == 0) { + region_base = xc_map_foreign_batch( + xc_handle, dom, PROT_READ, pfn_batch, batch); + if ( region_base == 0 ) + { ERROR("map batch failed"); goto out; } /* write num of pfns */ - if(!write_exact(io_fd, &batch, sizeof(unsigned int))) { + if ( !write_exact(io_fd, &batch, sizeof(unsigned int)) ) + { ERROR("Error when writing to state file (2)"); goto out; } /* write all the pfns */ - if(!write_exact(io_fd, pfn_batch, sizeof(unsigned long)*batch)) { + if ( !write_exact(io_fd, pfn_batch, sizeof(unsigned long)*batch) ) + { ERROR("Error when writing to state file (3)"); goto out; } @@ -615,21 +567,23 @@ int xc_hvm_save(int xc_handle, int io_fd DPRINTF("\r %d: sent %d, skipped %d, ", iter, sent_this_iter, skip_this_iter ); - if (last_iter) { + if ( last_iter ) + { print_stats( xc_handle, dom, sent_this_iter, &stats, 1); - DPRINTF("Total pages sent= %ld (%.2fx)\n", total_sent, ((float)total_sent)/pfn_array_size ); } - if (last_iter && debug){ + if ( last_iter && debug ) + { int minusone = -1; memset(to_send, 0xff, BITMAP_SIZE); debug = 0; DPRINTF("Entering debug resend-all mode\n"); /* send "-1" to put receiver into debug mode */ - if(!write_exact(io_fd, &minusone, sizeof(int))) { + if ( !write_exact(io_fd, &minusone, sizeof(int)) ) + { ERROR("Error when writing to state file (6)"); goto out; } @@ -637,22 +591,22 @@ int xc_hvm_save(int xc_handle, int io_fd continue; } - if (last_iter) break; - - if (live) { - - - if( - ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) || - (iter >= max_iters) || - (sent_this_iter+skip_this_iter < 50) || - (total_sent > pfn_array_size*max_factor) ) { - + if ( last_iter ) + break; + + if ( live ) + { + if ( ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) || + (iter >= max_iters) || + (sent_this_iter+skip_this_iter < 50) || + (total_sent > pfn_array_size*max_factor) ) + { DPRINTF("Start last iteration for HVM domain\n"); last_iter = 1; - if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info, - &ctxt)) { + if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info, + &ctxt)) + { ERROR("Domain appears not to have suspended"); goto out; } @@ -662,25 +616,30 @@ int xc_hvm_save(int xc_handle, int io_fd (unsigned long)ctxt.user_regs.edx); } - if (xc_shadow_control(xc_handle, dom, - XEN_DOMCTL_SHADOW_OP_CLEAN, to_send, - pfn_array_size, NULL, - 0, &stats) != pfn_array_size) { + if ( xc_shadow_control(xc_handle, dom, + XEN_DOMCTL_SHADOW_OP_CLEAN, to_send, + pfn_array_size, NULL, + 0, &stats) != pfn_array_size ) + { ERROR("Error flushing shadow PT"); goto out; } /* Pull in the dirty bits from qemu too */ - if (!last_iter) { + if ( !last_iter ) + { qemu_active = qemu_non_active; qemu_non_active = qemu_active ? 0 : 1; qemu_flip_buffer(dom, qemu_active); - for (j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++) { + for ( j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++ ) + { to_send[j] |= qemu_bitmaps[qemu_non_active][j]; qemu_bitmaps[qemu_non_active][j] = 0; } - } else { - for (j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++) + } + else + { + for ( j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++ ) to_send[j] |= qemu_bitmaps[qemu_active][j]; } @@ -688,61 +647,96 @@ int xc_hvm_save(int xc_handle, int io_fd print_stats(xc_handle, dom, sent_this_iter, &stats, 1); } - - } /* end of while 1 */ DPRINTF("All HVM memory is saved\n"); + + { + struct { + int minustwo; + int max_vcpu_id; + uint64_t vcpumap; + } chunk = { -2, info.max_vcpu_id }; + + if (info.max_vcpu_id >= 64) { + ERROR("Too many VCPUS in guest!"); + goto out; + } + + for (i = 1; i <= info.max_vcpu_id; i++) { + xc_vcpuinfo_t vinfo; + if ((xc_vcpu_getinfo(xc_handle, dom, i, &vinfo) == 0) && + vinfo.online) + vcpumap |= 1ULL << i; + } + + chunk.vcpumap = vcpumap; + if(!write_exact(io_fd, &chunk, sizeof(chunk))) { + ERROR("Error when writing to state file (errno %d)", errno); + goto out; + } + } /* Zero terminate */ i = 0; - if (!write_exact(io_fd, &i, sizeof(int))) { + if ( !write_exact(io_fd, &i, sizeof(int)) ) + { ERROR("Error when writing to state file (6)"); goto out; } - - /* save vcpu/vmcs context */ - if (!write_exact(io_fd, &nr_vcpus, sizeof(uint32_t))) { - ERROR("error write nr vcpus"); - goto out; - } - - /*XXX: need a online map to exclude down cpu */ - for (i = 0; i < nr_vcpus; i++) { - - if (xc_vcpu_getcontext(xc_handle, dom, i, &ctxt)) { + /* Save magic-page locations. */ + memset(magic_pfns, 0, sizeof(magic_pfns)); + xc_get_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, + (unsigned long *)&magic_pfns[0]); + xc_get_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, + (unsigned long *)&magic_pfns[1]); + xc_get_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, + (unsigned long *)&magic_pfns[2]); + if ( !write_exact(io_fd, magic_pfns, sizeof(magic_pfns)) ) + { + ERROR("Error when writing to state file (7)"); + goto out; + } + + /* save vcpu/vmcs contexts */ + for ( i = 0; i < nr_vcpus; i++ ) + { + if (!(vcpumap & (1ULL << i))) + continue; + + if ( xc_vcpu_getcontext(xc_handle, dom, i, &ctxt) ) + { ERROR("HVM:Could not get vcpu context"); goto out; } - rec_size = sizeof(ctxt); - DPRINTF("write %d vcpucontext of total %d.\n", i, nr_vcpus); - if (!write_exact(io_fd, &rec_size, sizeof(uint32_t))) { - ERROR("error write vcpu ctxt size"); + DPRINTF("write vcpu %d context.\n", i); + if ( !write_exact(io_fd, &(ctxt), sizeof(ctxt)) ) + { + ERROR("write vcpu context failed!\n"); goto out; } - - if (!write_exact(io_fd, &(ctxt), sizeof(ctxt)) ) { - ERROR("write vmcs failed!\n"); - goto out; - } } if ( (rec_size = xc_domain_hvm_getcontext(xc_handle, dom, hvm_buf, - hvm_buf_size)) == -1) { + hvm_buf_size)) == -1 ) + { ERROR("HVM:Could not get hvm buffer"); goto out; } - if (!write_exact(io_fd, &rec_size, sizeof(uint32_t))) { + if ( !write_exact(io_fd, &rec_size, sizeof(uint32_t)) ) + { ERROR("error write hvm buffer size"); goto out; } - if ( !write_exact(io_fd, hvm_buf, rec_size) ) { + if ( !write_exact(io_fd, hvm_buf, rec_size) ) + { ERROR("write HVM info failed!\n"); + goto out; } /* Success! */ @@ -750,12 +744,11 @@ int xc_hvm_save(int xc_handle, int io_fd out: - if (live) { - if(xc_shadow_control(xc_handle, dom, - XEN_DOMCTL_SHADOW_OP_OFF, - NULL, 0, NULL, 0, NULL) < 0) { + if ( live ) + { + if ( xc_shadow_control(xc_handle, dom, XEN_DOMCTL_SHADOW_OP_OFF, + NULL, 0, NULL, 0, NULL) < 0 ) DPRINTF("Warning - couldn't disable shadow mode"); - } } free(hvm_buf); diff -r 9ec7dadc98ba -r eb3e430242ac tools/libxc/xc_linux_restore.c --- a/tools/libxc/xc_linux_restore.c Fri Apr 06 10:06:30 2007 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,962 +0,0 @@ -/****************************************************************************** - * xc_linux_restore.c - * - * Restore the state of a Linux session. - * - * Copyright (c) 2003, K A Fraser. - */ - -#include <stdlib.h> -#include <unistd.h> - -#include "xg_private.h" -#include "xg_save_restore.h" -#include "xc_dom.h" - -/* max mfn of the current host machine */ -static unsigned long max_mfn; - -/* virtual starting address of the hypervisor */ -static unsigned long hvirt_start; - -/* #levels of page tables used by the current guest */ -static unsigned int pt_levels; - -/* number of pfns this guest has (i.e. number of entries in the P2M) */ -static unsigned long p2m_size; - -/* number of 'in use' pfns in the guest (i.e. #P2M entries with a valid mfn) */ -static unsigned long nr_pfns; - -/* largest possible value of nr_pfns (i.e. domain's maximum memory size) */ -static unsigned long max_nr_pfns; - -/* Live mapping of the table mapping each PFN to its current MFN. */ -static xen_pfn_t *live_p2m = NULL; - -/* A table mapping each PFN to its new MFN. */ -static xen_pfn_t *p2m = NULL; - -/* A table of P2M mappings in the current region */ -static xen_pfn_t *p2m_batch = NULL; - -static ssize_t -read_exact(int fd, void *buf, size_t count) -{ - int r = 0, s; - unsigned char *b = buf; - - while (r < count) { - s = read(fd, &b[r], count - r); - if ((s == -1) && (errno == EINTR)) - continue; - if (s <= 0) { - break; - } - r += s; - } - - return (r == count) ? 1 : 0; -} - -/* -** In the state file (or during transfer), all page-table pages are -** converted into a 'canonical' form where references to actual mfns -** are replaced with references to the corresponding pfns. -** This function inverts that operation, replacing the pfn values with -** the (now known) appropriate mfn values. -*/ -static int uncanonicalize_pagetable(int xc_handle, uint32_t dom, - unsigned long type, void *page) -{ - int i, pte_last; - unsigned long pfn; - uint64_t pte; - int nr_mfns = 0; - - pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8); - - /* First pass: work out how many (if any) MFNs we need to alloc */ - for(i = 0; i < pte_last; i++) { - - if(pt_levels == 2) - pte = ((uint32_t *)page)[i]; - else - pte = ((uint64_t *)page)[i]; - - /* XXX SMH: below needs fixing for PROT_NONE etc */ - if(!(pte & _PAGE_PRESENT)) - continue; - - pfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86; - - if(pfn >= p2m_size) { - /* This "page table page" is probably not one; bail. */ - ERROR("Frame number in type %lu page table is out of range: " - "i=%d pfn=0x%lx p2m_size=%lu", - type >> 28, i, pfn, p2m_size); - return 0; - } - - if(p2m[pfn] == INVALID_P2M_ENTRY) { - /* Have a 'valid' PFN without a matching MFN - need to alloc */ - p2m_batch[nr_mfns++] = pfn; - } - } - - - /* Alllocate the requistite number of mfns */ - if (nr_mfns && xc_domain_memory_populate_physmap( - xc_handle, dom, nr_mfns, 0, 0, p2m_batch) != 0) { - ERROR("Failed to allocate memory for batch.!\n"); - errno = ENOMEM; - return 0; - } - - /* Second pass: uncanonicalize each present PTE */ - nr_mfns = 0; - for(i = 0; i < pte_last; i++) { - - if(pt_levels == 2) - pte = ((uint32_t *)page)[i]; - else - pte = ((uint64_t *)page)[i]; - - /* XXX SMH: below needs fixing for PROT_NONE etc */ - if(!(pte & _PAGE_PRESENT)) - continue; - - pfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86; - - if(p2m[pfn] == INVALID_P2M_ENTRY) - p2m[pfn] = p2m_batch[nr_mfns++]; - - pte &= ~MADDR_MASK_X86; - pte |= (uint64_t)p2m[pfn] << PAGE_SHIFT; - - if(pt_levels == 2) - ((uint32_t *)page)[i] = (uint32_t)pte; - else - ((uint64_t *)page)[i] = (uint64_t)pte; - } - - return 1; -} - - -int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom, - unsigned long p2msize, unsigned long maxnrpfns, - unsigned int store_evtchn, unsigned long *store_mfn, - unsigned int console_evtchn, unsigned long *console_mfn) -{ - DECLARE_DOMCTL; - int rc = 1, i, j, n, m, pae_extended_cr3 = 0; - unsigned long mfn, pfn; - unsigned int prev_pc, this_pc; - int verify = 0; - int nraces = 0; - - /* The new domain's shared-info frame number. */ - unsigned long shared_info_frame; - unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */ - shared_info_t *shared_info = (shared_info_t *)shared_info_page; - - /* A copy of the CPU context of the guest. */ - vcpu_guest_context_t ctxt; - - /* A table containing the type of each PFN (/not/ MFN!). */ - unsigned long *pfn_type = NULL; - - /* A table of MFNs to map in the current region */ - xen_pfn_t *region_mfn = NULL; - - /* Types of the pfns in the current region */ - unsigned long region_pfn_type[MAX_BATCH_SIZE]; - - /* A temporary mapping, and a copy, of one frame of guest memory. */ - unsigned long *page = NULL; - - /* A copy of the pfn-to-mfn table frame list. */ - xen_pfn_t *p2m_frame_list = NULL; - - /* A temporary mapping of the guest's start_info page. */ - start_info_t *start_info; - - /* Our mapping of the current region (batch) */ - char *region_base; - - xc_mmu_t *mmu = NULL; - - /* used by debug verify code */ - unsigned long buf[PAGE_SIZE/sizeof(unsigned long)]; - - struct mmuext_op pin[MAX_PIN_BATCH]; - unsigned int nr_pins; - - uint64_t vcpumap = 1ULL; - unsigned int max_vcpu_id = 0; - int new_ctxt_format = 0; - - p2m_size = p2msize; - max_nr_pfns = maxnrpfns; - - /* For info only */ - nr_pfns = 0; - - DPRINTF("xc_linux_restore start: p2m_size = %lx\n", p2m_size); - - /* - * XXX For now, 32bit dom0's can only save/restore 32bit domUs - * on 64bit hypervisors. - */ - memset(&domctl, 0, sizeof(domctl)); - domctl.domain = dom; - domctl.cmd = XEN_DOMCTL_set_address_size; - domctl.u.address_size.size = sizeof(unsigned long) * 8; - rc = do_domctl(xc_handle, &domctl); - if ( rc != 0 ) { - ERROR("Unable to set guest address size."); - goto out; - } - - if(!get_platform_info(xc_handle, dom, - &max_mfn, &hvirt_start, &pt_levels)) { - ERROR("Unable to get platform info."); - return 1; - } - - if (lock_pages(&ctxt, sizeof(ctxt))) { - /* needed for build domctl, but might as well do early */ - ERROR("Unable to lock ctxt"); - return 1; - } - - if (!(p2m_frame_list = malloc(P2M_FL_SIZE))) { - ERROR("Couldn't allocate p2m_frame_list array"); - goto out; - } - - /* Read first entry of P2M list, or extended-info signature (~0UL). */ - if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) { - ERROR("read extended-info signature failed"); - goto out; - } - - if (p2m_frame_list[0] == ~0UL) { - uint32_t tot_bytes; - - /* Next 4 bytes: total size of following extended info. */ - if (!read_exact(io_fd, &tot_bytes, sizeof(tot_bytes))) { - ERROR("read extended-info size failed"); - goto out; - } - - while (tot_bytes) { - uint32_t chunk_bytes; - char chunk_sig[4]; - - /* 4-character chunk signature + 4-byte remaining chunk size. */ - if (!read_exact(io_fd, chunk_sig, sizeof(chunk_sig)) || - !read_exact(io_fd, &chunk_bytes, sizeof(chunk_bytes))) { - ERROR("read extended-info chunk signature failed"); - goto out; - } - tot_bytes -= 8; - - /* VCPU context structure? */ - if (!strncmp(chunk_sig, "vcpu", 4)) { - if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) { - ERROR("read extended-info vcpu context failed"); - goto out; - } - tot_bytes -= sizeof(struct vcpu_guest_context); - chunk_bytes -= sizeof(struct vcpu_guest_context); - - if (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3)) - pae_extended_cr3 = 1; - } - - /* Any remaining bytes of this chunk: read and discard. */ - while (chunk_bytes) { - unsigned long sz = chunk_bytes; - if ( sz > P2M_FL_SIZE ) - sz = P2M_FL_SIZE; - if (!read_exact(io_fd, p2m_frame_list, sz)) { - ERROR("read-and-discard extended-info chunk bytes failed"); - goto out; - } - chunk_bytes -= sz; - tot_bytes -= sz; - } - } - - /* Now read the real first entry of P2M list. */ - if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) { - ERROR("read first entry of p2m_frame_list failed"); - goto out; - } - } - - /* First entry is already read into the p2m array. */ - if (!read_exact(io_fd, &p2m_frame_list[1], P2M_FL_SIZE - sizeof(long))) { - ERROR("read p2m_frame_list failed"); - goto out; - } - - /* We want zeroed memory so use calloc rather than malloc. */ - p2m = calloc(p2m_size, sizeof(xen_pfn_t)); - pfn_type = calloc(p2m_size, sizeof(unsigned long)); - region_mfn = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t)); - p2m_batch = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t)); - - if ((p2m == NULL) || (pfn_type == NULL) || - (region_mfn == NULL) || (p2m_batch == NULL)) { - ERROR("memory alloc failed"); - errno = ENOMEM; - goto out; - } - - if (lock_pages(region_mfn, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) { - ERROR("Could not lock region_mfn"); - goto out; - } - - if (lock_pages(p2m_batch, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) { - ERROR("Could not lock p2m_batch"); - goto out; - } - - /* Get the domain's shared-info frame. */ - domctl.cmd = XEN_DOMCTL_getdomaininfo; - domctl.domain = (domid_t)dom; - if (xc_domctl(xc_handle, &domctl) < 0) { - ERROR("Could not get information on new domain"); - goto out; - } - shared_info_frame = domctl.u.getdomaininfo.shared_info_frame; - - if (xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_nr_pfns)) != 0) { - errno = ENOMEM; - goto out; - } - - /* Mark all PFNs as invalid; we allocate on demand */ - for ( pfn = 0; pfn < p2m_size; pfn++ ) - p2m[pfn] = INVALID_P2M_ENTRY; - - if(!(mmu = xc_init_mmu_updates(xc_handle, dom))) { - ERROR("Could not initialise for MMU updates"); - goto out; - } - - DPRINTF("Reloading memory pages: 0%%\n"); - - /* - * Now simply read each saved frame into its new machine frame. - * We uncanonicalise page tables as we go. - */ - prev_pc = 0; - - n = m = 0; - while (1) { - - int j, nr_mfns = 0; - - this_pc = (n * 100) / p2m_size; - if ( (this_pc - prev_pc) >= 5 ) - { - PPRINTF("\b\b\b\b%3d%%", this_pc); - prev_pc = this_pc; - } - - if (!read_exact(io_fd, &j, sizeof(int))) { - ERROR("Error when reading batch size"); - goto out; - } - - PPRINTF("batch %d\n",j); - - if (j == -1) { - verify = 1; - DPRINTF("Entering page verify mode\n"); - continue; - } - - if (j == -2) { - new_ctxt_format = 1; - if (!read_exact(io_fd, &max_vcpu_id, sizeof(int)) || - (max_vcpu_id >= 64) || - !read_exact(io_fd, &vcpumap, sizeof(uint64_t))) { - ERROR("Error when reading max_vcpu_id"); - goto out; - } - continue; - } - - if (j == 0) - break; /* our work here is done */ - - if (j > MAX_BATCH_SIZE) { - ERROR("Max batch size exceeded. Giving up."); - goto out; - } - - if (!read_exact(io_fd, region_pfn_type, j*sizeof(unsigned long))) { - ERROR("Error when reading region pfn types"); - goto out; - } - - /* First pass for this batch: work out how much memory to alloc */ - nr_mfns = 0; - for ( i = 0; i < j; i++ ) - { - unsigned long pfn, pagetype; - pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; - pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK; - - if ( (pagetype != XEN_DOMCTL_PFINFO_XTAB) && - (p2m[pfn] == INVALID_P2M_ENTRY) ) - { - /* Have a live PFN which hasn't had an MFN allocated */ - p2m_batch[nr_mfns++] = pfn; - } - } - - - /* Now allocate a bunch of mfns for this batch */ - if (nr_mfns && xc_domain_memory_populate_physmap( - xc_handle, dom, nr_mfns, 0, 0, p2m_batch) != 0) { - ERROR("Failed to allocate memory for batch.!\n"); - errno = ENOMEM; - goto out; - } - - /* Second pass for this batch: update p2m[] and region_mfn[] */ - nr_mfns = 0; - for ( i = 0; i < j; i++ ) - { - unsigned long pfn, pagetype; - pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; - pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK; - - if ( pagetype == XEN_DOMCTL_PFINFO_XTAB) - region_mfn[i] = ~0UL; /* map will fail but we don't care */ - else - { - if (p2m[pfn] == INVALID_P2M_ENTRY) { - /* We just allocated a new mfn above; update p2m */ - p2m[pfn] = p2m_batch[nr_mfns++]; - nr_pfns++; - } - - /* setup region_mfn[] for batch map */ - region_mfn[i] = p2m[pfn]; - } - } - - /* Map relevant mfns */ - region_base = xc_map_foreign_batch( - xc_handle, dom, PROT_WRITE, region_mfn, j); - - if ( region_base == NULL ) - { - ERROR("map batch failed"); - goto out; - } - - for ( i = 0; i < j; i++ ) - { - void *page; - unsigned long pagetype; - - pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; - pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK; - - if ( pagetype == XEN_DOMCTL_PFINFO_XTAB ) - /* a bogus/unmapped page: skip it */ - continue; - - if ( pfn > p2m_size ) - { - ERROR("pfn out of range"); - goto out; - } - - pfn_type[pfn] = pagetype; - - mfn = p2m[pfn]; - - /* In verify mode, we use a copy; otherwise we work in place */ - page = verify ? (void *)buf : (region_base + i*PAGE_SIZE); - - if (!read_exact(io_fd, page, PAGE_SIZE)) { - ERROR("Error when reading page (type was %lx)", pagetype); - goto out; - } - - pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK; - - if ( (pagetype >= XEN_DOMCTL_PFINFO_L1TAB) && - (pagetype <= XEN_DOMCTL_PFINFO_L4TAB) ) - { - /* - ** A page table page - need to 'uncanonicalize' it, i.e. - ** replace all the references to pfns with the corresponding - ** mfns for the new domain. - ** - ** On PAE we need to ensure that PGDs are in MFNs < 4G, and - ** so we may need to update the p2m after the main loop. - ** Hence we defer canonicalization of L1s until then. - */ - if ((pt_levels != 3) || - pae_extended_cr3 || - (pagetype != XEN_DOMCTL_PFINFO_L1TAB)) { - - if (!uncanonicalize_pagetable(xc_handle, dom, - pagetype, page)) { - /* - ** Failing to uncanonicalize a page table can be ok - ** under live migration since the pages type may have - ** changed by now (and we'll get an update later). - */ - DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n", - pagetype >> 28, pfn, mfn); - nraces++; - continue; - } - } - } - else if ( pagetype != XEN_DOMCTL_PFINFO_NOTAB ) - { - ERROR("Bogus page type %lx page table is out of range: " - "i=%d p2m_size=%lu", pagetype, i, p2m_size); - goto out; - - } - - - if (verify) { - - int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE); - - if (res) { - - int v; - - DPRINTF("************** pfn=%lx type=%lx gotcs=%08lx " - "actualcs=%08lx\n", pfn, pfn_type[pfn], - csum_page(region_base + i*PAGE_SIZE), - csum_page(buf)); - - for (v = 0; v < 4; v++) { - - unsigned long *p = (unsigned long *) - (region_base + i*PAGE_SIZE); - if (buf[v] != p[v]) - DPRINTF(" %d: %08lx %08lx\n", v, buf[v], p[v]); - } - } - } - - if (xc_add_mmu_update(xc_handle, mmu, - (((unsigned long long)mfn) << PAGE_SHIFT) - | MMU_MACHPHYS_UPDATE, pfn)) { - ERROR("failed machpys update mfn=%lx pfn=%lx", mfn, pfn); - goto out; - } - } /* end of 'batch' for loop */ - - munmap(region_base, j*PAGE_SIZE); - n+= j; /* crude stats */ - - /* - * Discard cache for portion of file read so far up to last - * page boundary every 16MB or so. - */ - m += j; - if ( m > MAX_PAGECACHE_USAGE ) - { - discard_file_cache(io_fd, 0 /* no flush */); - m = 0; - } - } - - /* - * Ensure we flush all machphys updates before potential PAE-specific - * reallocations below. - */ - if (xc_finish_mmu_updates(xc_handle, mmu)) { - ERROR("Error doing finish_mmu_updates()"); - goto out; - } - - DPRINTF("Received all pages (%d races)\n", nraces); - - if ((pt_levels == 3) && !pae_extended_cr3) { - - /* - ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This - ** is a little awkward and involves (a) finding all such PGDs and - ** replacing them with 'lowmem' versions; (b) upating the p2m[] - ** with the new info; and (c) canonicalizing all the L1s using the - ** (potentially updated) p2m[]. - ** - ** This is relatively slow (and currently involves two passes through - ** the pfn_type[] array), but at least seems to be correct. May wish - ** to consider more complex approaches to optimize this later. - */ - - int j, k; - - /* First pass: find all L3TABs current in > 4G mfns and get new mfns */ - for ( i = 0; i < p2m_size; i++ ) - { - if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) == - XEN_DOMCTL_PFINFO_L3TAB) && - (p2m[i] > 0xfffffUL) ) - { - unsigned long new_mfn; - uint64_t l3ptes[4]; - uint64_t *l3tab; - - l3tab = (uint64_t *) - xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, - PROT_READ, p2m[i]); - - for(j = 0; j < 4; j++) - l3ptes[j] = l3tab[j]; - - munmap(l3tab, PAGE_SIZE); - - if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) { - ERROR("Couldn't get a page below 4GB :-("); - goto out; - } - - p2m[i] = new_mfn; - if (xc_add_mmu_update(xc_handle, mmu, - (((unsigned long long)new_mfn) - << PAGE_SHIFT) | - MMU_MACHPHYS_UPDATE, i)) { - ERROR("Couldn't m2p on PAE root pgdir"); - goto out; - } - - l3tab = (uint64_t *) - xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, - PROT_READ | PROT_WRITE, p2m[i]); - - for(j = 0; j < 4; j++) - l3tab[j] = l3ptes[j]; - - munmap(l3tab, PAGE_SIZE); - - } - } - - /* Second pass: find all L1TABs and uncanonicalize them */ - j = 0; - - for ( i = 0; i < p2m_size; i++ ) - { - if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) == - XEN_DOMCTL_PFINFO_L1TAB) ) - { - region_mfn[j] = p2m[i]; - j++; - } - - if(i == (p2m_size-1) || j == MAX_BATCH_SIZE) { - - if (!(region_base = xc_map_foreign_batch( - xc_handle, dom, PROT_READ | PROT_WRITE, - region_mfn, j))) { - ERROR("map batch failed"); - goto out; - } - - for(k = 0; k < j; k++) { - if(!uncanonicalize_pagetable(xc_handle, dom, - XEN_DOMCTL_PFINFO_L1TAB, - region_base + k*PAGE_SIZE)) { - ERROR("failed uncanonicalize pt!"); - goto out; - } - } - - munmap(region_base, j*PAGE_SIZE); - j = 0; - } - } - - if (xc_finish_mmu_updates(xc_handle, mmu)) { - ERROR("Error doing finish_mmu_updates()"); - goto out; - } - } - - /* - * Pin page tables. Do this after writing to them as otherwise Xen - * will barf when doing the type-checking. - */ - nr_pins = 0; - for ( i = 0; i < p2m_size; i++ ) - { - if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 ) - continue; - - switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK ) - { - case XEN_DOMCTL_PFINFO_L1TAB: - pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE; - break; - - case XEN_DOMCTL_PFINFO_L2TAB: - pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE; - break; - - case XEN_DOMCTL_PFINFO_L3TAB: - pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE; - break; - - case XEN_DOMCTL_PFINFO_L4TAB: - pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE; - break; - - default: - continue; - } - - pin[nr_pins].arg1.mfn = p2m[i]; - nr_pins++; - - /* Batch full? Then flush. */ - if (nr_pins == MAX_PIN_BATCH) { - if (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0) { - ERROR("Failed to pin batch of %d page tables", nr_pins); - goto out; - } - nr_pins = 0; - } - } - - /* Flush final partial batch. */ - if ((nr_pins != 0) && (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0)) { - ERROR("Failed to pin batch of %d page tables", nr_pins); - goto out; - } - - DPRINTF("\b\b\b\b100%%\n"); - DPRINTF("Memory reloaded (%ld pages of max %ld)\n", nr_pfns, max_nr_pfns); - - /* Get the list of PFNs that are not in the psuedo-phys map */ - { - unsigned int count; - unsigned long *pfntab; - int nr_frees, rc; - - if (!read_exact(io_fd, &count, sizeof(count))) { - ERROR("Error when reading pfn count"); - goto out; - } - - if(!(pfntab = malloc(sizeof(unsigned long) * count))) { - ERROR("Out of memory"); - goto out; - } - - if (!read_exact(io_fd, pfntab, sizeof(unsigned long)*count)) { - ERROR("Error when reading pfntab"); - goto out; - } - - nr_frees = 0; - for (i = 0; i < count; i++) { - - unsigned long pfn = pfntab[i]; - - if(p2m[pfn] != INVALID_P2M_ENTRY) { - /* pfn is not in physmap now, but was at some point during - the save/migration process - need to free it */ - pfntab[nr_frees++] = p2m[pfn]; - p2m[pfn] = INVALID_P2M_ENTRY; // not in pseudo-physical map - } - } - - if (nr_frees > 0) { - - struct xen_memory_reservation reservation = { - .nr_extents = nr_frees, - .extent_order = 0, - .domid = dom - }; - set_xen_guest_handle(reservation.extent_start, pfntab); - - if ((rc = xc_memory_op(xc_handle, XENMEM_decrease_reservation, - &reservation)) != nr_frees) { - ERROR("Could not decrease reservation : %d", rc); - goto out; - } else - DPRINTF("Decreased reservation by %d pages\n", count); - } - } - - for (i = 0; i <= max_vcpu_id; i++) { - if (!(vcpumap & (1ULL << i))) - continue; - - if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) { - ERROR("Error when reading ctxt %d", i); - goto out; - } - - if ( !new_ctxt_format ) - ctxt.flags |= VGCF_online; - - if (i == 0) { - /* - * Uncanonicalise the suspend-record frame number and poke - * resume record. - */ - pfn = ctxt.user_regs.edx; - if ((pfn >= p2m_size) || - (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) { - ERROR("Suspend record frame number is bad"); - goto out; - } - ctxt.user_regs.edx = mfn = p2m[pfn]; - start_info = xc_map_foreign_range( - xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn); - start_info->nr_pages = p2m_size; - start_info->shared_info = shared_info_frame << PAGE_SHIFT; - start_info->flags = 0; - *store_mfn = start_info->store_mfn = p2m[start_info->store_mfn]; - start_info->store_evtchn = store_evtchn; - start_info->console.domU.mfn = p2m[start_info->console.domU.mfn]; - start_info->console.domU.evtchn = console_evtchn; - *console_mfn = start_info->console.domU.mfn; - munmap(start_info, PAGE_SIZE); - } - - /* Uncanonicalise each GDT frame number. */ - if (ctxt.gdt_ents > 8192) { - ERROR("GDT entry count out of range"); - goto out; - } - - for (j = 0; (512*j) < ctxt.gdt_ents; j++) { - pfn = ctxt.gdt_frames[j]; - if ((pfn >= p2m_size) || - (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) { - ERROR("GDT frame number is bad"); - goto out; - } - ctxt.gdt_frames[j] = p2m[pfn]; - } - - /* Uncanonicalise the page table base pointer. */ - pfn = xen_cr3_to_pfn(ctxt.ctrlreg[3]); - - if (pfn >= p2m_size) { - ERROR("PT base is bad: pfn=%lu p2m_size=%lu type=%08lx", - pfn, p2m_size, pfn_type[pfn]); - goto out; - } - - if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) != - ((unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) { - ERROR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx", - pfn, p2m_size, pfn_type[pfn], - (unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT); - goto out; - } - - ctxt.ctrlreg[3] = xen_pfn_to_cr3(p2m[pfn]); - - /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */ - if ( (pt_levels == 4) && ctxt.ctrlreg[1] ) - { - pfn = xen_cr3_to_pfn(ctxt.ctrlreg[1]); - - if (pfn >= p2m_size) { - ERROR("User PT base is bad: pfn=%lu p2m_size=%lu type=%08lx", - pfn, p2m_size, pfn_type[pfn]); - goto out; - } - - if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) != - ((unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) { - ERROR("User PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx", - pfn, p2m_size, pfn_type[pfn], - (unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT); - goto out; - } - - ctxt.ctrlreg[1] = xen_pfn_to_cr3(p2m[pfn]); - } - - domctl.cmd = XEN_DOMCTL_setvcpucontext; - domctl.domain = (domid_t)dom; - domctl.u.vcpucontext.vcpu = i; - set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt); - rc = xc_domctl(xc_handle, &domctl); - if (rc != 0) { - ERROR("Couldn't build vcpu%d", i); - goto out; - } - } - - if (!read_exact(io_fd, shared_info_page, PAGE_SIZE)) { - ERROR("Error when reading shared info page"); - goto out; - } - - /* clear any pending events and the selector */ - memset(&(shared_info->evtchn_pending[0]), 0, - sizeof (shared_info->evtchn_pending)); - for ( i = 0; i < MAX_VIRT_CPUS; i++ ) - shared_info->vcpu_info[i].evtchn_pending_sel = 0; - - /* Copy saved contents of shared-info page. No checking needed. */ - page = xc_map_foreign_range( - xc_handle, dom, PAGE_SIZE, PROT_WRITE, shared_info_frame); - memcpy(page, shared_info, PAGE_SIZE); - munmap(page, PAGE_SIZE); - - /* Uncanonicalise the pfn-to-mfn table frame-number list. */ - for (i = 0; i < P2M_FL_ENTRIES; i++) { - pfn = p2m_frame_list[i]; - if ((pfn >= p2m_size) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) { - ERROR("PFN-to-MFN frame number is bad"); - goto out; - } - - p2m_frame_list[i] = p2m[pfn]; - } - - /* Copy the P2M we've constructed to the 'live' P2M */ - if (!(live_p2m = xc_map_foreign_batch(xc_handle, dom, PROT_WRITE, - p2m_frame_list, P2M_FL_ENTRIES))) { - ERROR("Couldn't map p2m table"); - goto out; - } - - memcpy(live_p2m, p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT)); - munmap(live_p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT)); - - DPRINTF("Domain ready to be built.\n"); - - out: - if ( (rc != 0) && (dom != 0) ) - xc_domain_destroy(xc_handle, dom); - free(mmu); - free(p2m); - free(pfn_type); - - /* discard cache for save file */ - discard_file_cache(io_fd, 1 /*flush*/); - - DPRINTF("Restore exit with rc=%d\n", rc); - - return rc; -} diff -r 9ec7dadc98ba -r eb3e430242ac tools/libxc/xenguest.h --- a/tools/libxc/xenguest.h Fri Apr 06 10:06:30 2007 -0600 +++ b/tools/libxc/xenguest.h Fri Apr 06 10:08:30 2007 -0600 @@ -38,32 +38,21 @@ int xc_hvm_save(int xc_handle, int io_fd void (*qemu_flip_buffer)(int, int)); /** - * This function will restore a saved domain running Linux. + * This function will restore a saved domain. * * @parm xc_handle a handle to an open hypervisor interface * @parm fd the file descriptor to restore a domain from * @parm dom the id of the domain - * @parm p2m_size number of pages the guest has (i.e. number entries in P2M) - * @parm max_nr_pfns domains maximum real memory allocation, in pages * @parm store_evtchn the store event channel for this domain to use * @parm store_mfn returned with the mfn of the store page + * @parm hvm non-zero if this is a HVM restore + * @parm pae non-zero if this HVM domain has PAE support enabled * @return 0 on success, -1 on failure */ -int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom, - unsigned long p2m_size, unsigned long max_nr_pfns, - unsigned int store_evtchn, unsigned long *store_mfn, - unsigned int console_evtchn, unsigned long *console_mfn); - -/** - * This function will restore a saved hvm domain running unmodified guest. - * - * @parm store_mfn pass mem size & returned with the mfn of the store page - * @return 0 on success, -1 on failure - */ -int xc_hvm_restore(int xc_handle, int io_fd, uint32_t dom, - unsigned long max_pfn, unsigned int store_evtchn, - unsigned long *store_mfn, - unsigned int pae, unsigned int apic); +int xc_domain_restore(int xc_handle, int io_fd, uint32_t dom, + unsigned int store_evtchn, unsigned long *store_mfn, + unsigned int console_evtchn, unsigned long *console_mfn, + unsigned int hvm, unsigned int pae); /** * This function will create a domain for a paravirtualized Linux @@ -159,8 +148,6 @@ int xc_get_hvm_param( int xc_get_hvm_param( int handle, domid_t dom, int param, unsigned long *value); -int xc_hvm_drain_io(int handle, domid_t dom); - /* PowerPC specific. */ int xc_prose_build(int xc_handle, uint32_t domid, diff -r 9ec7dadc98ba -r eb3e430242ac tools/libxc/xg_private.c --- a/tools/libxc/xg_private.c Fri Apr 06 10:06:30 2007 -0600 +++ b/tools/libxc/xg_private.c Fri Apr 06 10:08:30 2007 -0600 @@ -209,16 +209,6 @@ __attribute__((weak)) return -1; } -__attribute__((weak)) - int xc_hvm_restore(int xc_handle, int io_fd, uint32_t dom, - unsigned long max_pfn, unsigned int store_evtchn, - unsigned long *store_mfn, - unsigned int pae, unsigned int apic) -{ - errno = ENOSYS; - return -1; -} - __attribute__((weak)) int xc_get_hvm_param( int handle, domid_t dom, int param, unsigned long *value) { @@ -227,11 +217,6 @@ __attribute__((weak)) int xc_get_hvm_par __attribute__((weak)) int xc_set_hvm_param( int handle, domid_t dom, int param, unsigned long value) -{ - return -ENOSYS; -} - -__attribute__((weak)) int xc_hvm_drain_io(int handle, domid_t dom) { return -ENOSYS; } diff -r 9ec7dadc98ba -r eb3e430242ac tools/libxen/include/xen_host.h --- a/tools/libxen/include/xen_host.h Fri Apr 06 10:06:30 2007 -0600 +++ b/tools/libxen/include/xen_host.h Fri Apr 06 10:08:30 2007 -0600 @@ -436,6 +436,13 @@ xen_host_dmesg(xen_session *session, cha /** + * List all supported methods. + */ +extern bool +xen_host_list_methods(xen_session *session, struct xen_string_set **result); + + +/** * Return a list of all the hosts known to the system. */ extern bool diff -r 9ec7dadc98ba -r eb3e430242ac tools/libxen/include/xen_sr.h --- a/tools/libxen/include/xen_sr.h Fri Apr 06 10:06:30 2007 -0600 +++ b/tools/libxen/include/xen_sr.h Fri Apr 06 10:08:30 2007 -0600 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, XenSource Inc. + * Copyright (c) 2006-2007, XenSource Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -22,6 +22,7 @@ #include "xen_common.h" #include "xen_pbd_decl.h" #include "xen_sr_decl.h" +#include "xen_string_set.h" #include "xen_vdi_decl.h" @@ -72,7 +73,7 @@ typedef struct xen_sr_record int64_t physical_utilisation; int64_t physical_size; char *type; - char *location; + char *content_type; } xen_sr_record; /** @@ -169,20 +170,6 @@ xen_sr_get_by_uuid(xen_session *session, /** - * Create a new SR instance, and return its handle. - */ -extern bool -xen_sr_create(xen_session *session, xen_sr *result, xen_sr_record *record); - - -/** - * Destroy the specified SR instance. - */ -extern bool -xen_sr_destroy(xen_session *session, xen_sr sr); - - -/** * Get all the SR instances with the given label. */ extern bool @@ -253,10 +240,10 @@ xen_sr_get_type(xen_session *session, ch /** - * Get the location field of the given SR. - */ -extern bool -xen_sr_get_location(xen_session *session, char **result, xen_sr sr); + * Get the content_type field of the given SR. + */ +extern bool +xen_sr_get_content_type(xen_session *session, char **result, xen_sr sr); /** @@ -274,11 +261,10 @@ xen_sr_set_name_description(xen_session /** - * Take an exact copy of the Storage Repository; the cloned storage - * repository has the same type as its parent - */ -extern bool -xen_sr_clone(xen_session *session, xen_sr *result, xen_sr sr, char *loc, char *name); + * Return a set of all the SR types supported by the system. + */ +extern bool +xen_sr_get_supported_types(xen_session *session, struct xen_string_set **result); /** diff -r 9ec7dadc98ba -r eb3e430242ac tools/libxen/include/xen_vdi.h --- a/tools/libxen/include/xen_vdi.h Fri Apr 06 10:06:30 2007 -0600 +++ b/tools/libxen/include/xen_vdi.h Fri Apr 06 10:08:30 2007 -0600 @@ -338,21 +338,6 @@ xen_vdi_remove_from_other_config(xen_ses /** - * Take an exact copy of the VDI; the snapshot lives in the same - * Storage Repository as its parent. - */ -extern bool -xen_vdi_snapshot(xen_session *session, xen_vdi *result, xen_vdi vdi); - - -/** - * Resize the vdi to the size. - */ -extern bool -xen_vdi_resize(xen_session *session, xen_vdi vdi, int64_t size); - - -/** * Return a list of all the VDIs known to the system. */ extern bool diff -r 9ec7dadc98ba -r eb3e430242ac tools/libxen/src/xen_common.c --- a/tools/libxen/src/xen_common.c Fri Apr 06 10:06:30 2007 -0600 +++ b/tools/libxen/src/xen_common.c Fri Apr 06 10:08:30 2007 -0600 @@ -102,6 +102,9 @@ add_struct_value(const struct abstract_t const char *), const char *, xmlNode *); +static xmlNode * +add_container(xmlNode *parent, const char *name); + static void call_raw(xen_session *, const char *, abstract_value [], int, const abstract_type *, void *); @@ -1290,6 +1293,48 @@ make_body_add_type(enum abstract_typenam } break; + case MAP: + { + const struct struct_member *member = v->type->members; + arbitrary_map *map_val = v->u.struct_val; + xmlNode *param_node = add_param_struct(params_node); + for (size_t i = 0; i < map_val->size; i++) { + enum abstract_typename typename_key = member[0].type->typename; + enum abstract_typename typename_val = member[1].type->typename; + int offset_key = member[0].offset; + int offset_val = member[1].offset; + int struct_size = v->type->struct_size; + + switch (typename_key) { + case STRING: { + char **addr = (void *)(map_val + 1) + + (i * struct_size) + + offset_key; + char *key = *addr; + + switch (typename_val) { + case STRING: { + char *val; + addr = (void *)(map_val + 1) + + (i * struct_size) + + offset_val; + val = *addr; + add_struct_member(param_node, key, "string", val); + break; + } + default: + assert(false); + } + break; + } + default: + assert(false); + } + } + } + break; + + default: assert(false); } diff -r 9ec7dadc98ba -r eb3e430242ac tools/libxen/src/xen_host.c --- a/tools/libxen/src/xen_host.c Fri Apr 06 10:06:30 2007 -0600 +++ b/tools/libxen/src/xen_host.c Fri Apr 06 10:08:30 2007 -0600 @@ -740,6 +740,18 @@ xen_host_dmesg(xen_session *session, cha bool +xen_host_list_methods(xen_session *session, struct xen_string_set **result) +{ + + abstract_type result_type = abstract_type_string_set; + + *result = NULL; + xen_call_(session, "host.list_methods", NULL, 0, &result_type, result); + return session->ok; +} + + +bool xen_host_get_all(xen_session *session, struct xen_host_set **result) { diff -r 9ec7dadc98ba -r eb3e430242ac tools/libxen/src/xen_sr.c --- a/tools/libxen/src/xen_sr.c Fri Apr 06 10:06:30 2007 -0600 +++ b/tools/libxen/src/xen_sr.c Fri Apr 06 10:08:30 2007 -0600 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, XenSource Inc. + * Copyright (c) 2006-2007, XenSource Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -65,9 +65,9 @@ static const struct_member xen_sr_record { .key = "type", .type = &abstract_type_string, .offset = offsetof(xen_sr_record, type) }, - { .key = "location", - .type = &abstract_type_string, - .offset = offsetof(xen_sr_record, location) } + { .key = "content_type", + .type = &abstract_type_string, + .offset = offsetof(xen_sr_record, content_type) } }; const abstract_type xen_sr_record_abstract_type_ = @@ -94,7 +94,7 @@ xen_sr_record_free(xen_sr_record *record xen_vdi_record_opt_set_free(record->vdis); xen_pbd_record_opt_set_free(record->pbds); free(record->type); - free(record->location); + free(record->content_type); free(record); } @@ -140,37 +140,6 @@ xen_sr_get_by_uuid(xen_session *session, bool -xen_sr_create(xen_session *session, xen_sr *result, xen_sr_record *record) -{ - abstract_value param_values[] = - { - { .type = &xen_sr_record_abstract_type_, - .u.struct_val = record } - }; - - abstract_type result_type = abstract_type_string; - - *result = NULL; - XEN_CALL_("SR.create"); - return session->ok; -} - - -bool -xen_sr_destroy(xen_session *session, xen_sr sr) -{ - abstract_value param_values[] = - { - { .type = &abstract_type_string, - .u.string_val = sr } - }; - - xen_call_(session, "SR.destroy", param_values, 1, NULL, NULL); - return session->ok; -} - - -bool xen_sr_get_by_name_label(xen_session *session, struct xen_sr_set **result, char *label) { abstract_value param_values[] = @@ -239,6 +208,23 @@ xen_sr_get_vdis(xen_session *session, st bool +xen_sr_get_pbds(xen_session *session, struct xen_pbd_set **result, xen_sr sr) +{ + abstract_value param_values[] = + { + { .type = &abstract_type_string, + .u.string_val = sr } + }; + + abstract_type result_type = abstract_type_string_set; + + *result = NULL; + XEN_CALL_("SR.get_PBDs"); + return session->ok; +} + + +bool xen_sr_get_virtual_allocation(xen_session *session, int64_t *result, xen_sr sr) { abstract_value param_values[] = @@ -304,18 +290,18 @@ xen_sr_get_type(xen_session *session, ch bool -xen_sr_get_location(xen_session *session, char **result, xen_sr sr) -{ - abstract_value param_values[] = - { - { .type = &abstract_type_string, - .u.string_val = sr } - }; - - abstract_type result_type = abstract_type_string; - - *result = NULL; - XEN_CALL_("SR.get_location"); +xen_sr_get_content_type(xen_session *session, char **result, xen_sr sr) +{ + abstract_value param_values[] = + { + { .type = &abstract_type_string, + .u.string_val = sr } + }; + + abstract_type result_type = abstract_type_string; + + *result = NULL; + XEN_CALL_("SR.get_content_type"); return session->ok; } @@ -353,22 +339,13 @@ xen_sr_set_name_description(xen_session bool -xen_sr_clone(xen_session *session, xen_sr *result, xen_sr sr, char *loc, char *name) -{ - abstract_value param_values[] = - { - { .type = &abstract_type_string, - .u.string_val = sr }, - { .type = &abstract_type_string, - .u.string_val = loc }, - { .type = &abstract_type_string, - .u.string_val = name } - }; - - abstract_type result_type = abstract_type_string; - - *result = NULL; - XEN_CALL_("SR.clone"); +xen_sr_get_supported_types(xen_session *session, struct xen_string_set **result) +{ + + abstract_type result_type = abstract_type_string_set; + + *result = NULL; + xen_call_(session, "SR.get_supported_types", NULL, 0, &result_type, result); return session->ok; } diff -r 9ec7dadc98ba -r eb3e430242ac tools/libxen/src/xen_vdi.c --- a/tools/libxen/src/xen_vdi.c Fri Apr 06 10:06:30 2007 -0600 +++ b/tools/libxen/src/xen_vdi.c Fri Apr 06 10:08:30 2007 -0600 @@ -508,39 +508,6 @@ xen_vdi_remove_from_other_config(xen_ses bool -xen_vdi_snapshot(xen_session *session, xen_vdi *result, xen_vdi vdi) -{ - abstract_value param_values[] = - { - { .type = &abstract_type_string, - .u.string_val = vdi } - }; - - abstract_type result_type = abstract_type_string; - - *result = NULL; - XEN_CALL_("VDI.snapshot"); - return session->ok; -} - - -bool -xen_vdi_resize(xen_session *session, xen_vdi vdi, int64_t size) -{ - abstract_value param_values[] = - { - { .type = &abstract_type_string, - .u.string_val = vdi }, - { .type = &abstract_type_int, - .u.int_val = size } - }; - - xen_call_(session, "VDI.resize", param_values, 2, NULL, NULL); - return session->ok; -} - - -bool xen_vdi_get_all(xen_session *session, struct xen_vdi_set **result) { diff -r 9ec7dadc98ba -r eb3e430242ac tools/libxen/test/test_bindings.c --- a/tools/libxen/test/test_bindings.c Fri Apr 06 10:06:30 2007 -0600 +++ b/tools/libxen/test/test_bindings.c Fri Apr 06 10:08:30 2007 -0600 @@ -64,6 +64,7 @@ typedef struct static xen_vm create_new_vm(xen_session *session, bool hvm); static void print_session_info(xen_session *session); +static void print_methods(xen_session *session); static void print_vm_power_state(xen_session *session, xen_vm vm); static void print_vm_metrics(xen_session *session, xen_vm vm); @@ -159,6 +160,14 @@ int main(int argc, char **argv) xen_session_login_with_password(call_func, NULL, username, password); print_session_info(session); + if (!session->ok) + { + /* Error has been logged, just clean up. */ + CLEANUP; + return 1; + } + + print_methods(session); if (!session->ok) { /* Error has been logged, just clean up. */ @@ -644,6 +653,40 @@ static void print_session_info(xen_sessi } +static int pstrcmp(const void *p1, const void *p2) +{ + return strcmp(*(char **)p1, *(char **)p2); +} + + +/** + * Print the list of supported methods. + */ +static void print_methods(xen_session *session) +{ + xen_string_set *methods; + + if (!xen_host_list_methods(session, &methods)) + { + print_error(session); + goto done; + } + + printf("%zd.\n", methods->size); + qsort(methods->contents, methods->size, sizeof(char *), pstrcmp); + + printf("Supported methods:\n"); + for (size_t i = 0; i < methods->size; i++) + { + printf(" %s\n", methods->contents[i]); + } + fflush(stdout); + +done: + xen_string_set_free(methods); +} + + /** * Print the metrics for the given VM. */ diff -r 9ec7dadc98ba -r eb3e430242ac tools/python/xen/xend/XendAPI.py --- a/tools/python/xen/xend/XendAPI.py Fri Apr 06 10:06:30 2007 -0600 +++ b/tools/python/xen/xend/XendAPI.py Fri Apr 06 10:08:30 2007 -0600 @@ -26,20 +26,22 @@ import time import time import xmlrpclib -from xen.xend import XendDomain, XendDomainInfo, XendNode, XendDmesg -from xen.xend import XendLogging, XendTaskManager - -from xen.xend.XendAPIVersion import * -from xen.xend.XendAuthSessions import instance as auth_manager -from xen.xend.XendError import * -from xen.xend.XendClient import ERROR_INVALID_DOMAIN -from xen.xend.XendLogging import log -from xen.xend.XendNetwork import XendNetwork -from xen.xend.XendTask import XendTask -from xen.xend.XendPIFMetrics import XendPIFMetrics -from xen.xend.XendVMMetrics import XendVMMetrics - -from xen.xend.XendAPIConstants import * +import XendDomain, XendDomainInfo, XendNode, XendDmesg +import XendLogging, XendTaskManager + +from XendAPIVersion import * +from XendAuthSessions import instance as auth_manager +from XendError import * +from XendClient import ERROR_INVALID_DOMAIN +from XendLogging import log +from XendNetwork import XendNetwork +from XendTask import XendTask +from XendPIFMetrics import XendPIFMetrics +from XendVMMetrics import XendVMMetrics + +import XendPBD + +from XendAPIConstants import * from xen.util.xmlrpclib2 import stringify from xen.util.blkif import blkdev_name_to_number @@ -394,6 +396,17 @@ def valid_sr(func): _check_ref(lambda r: XendNode.instance().is_valid_sr, 'SR', func, *args, **kwargs) +def valid_pbd(func): + """Decorator to verify if pbd_ref is valid before calling + method. + + @param func: function with params: (self, session, pbd_ref) + @rtype: callable object + """ + return lambda *args, **kwargs: \ + _check_ref(lambda r: r in XendPBD.get_all_refs(), + 'PBD', func, *args, **kwargs) + def valid_pif(func): """Decorator to verify if pif_ref is valid before calling method. @@ -479,6 +492,7 @@ classes = { 'VTPM' : valid_vtpm, 'console' : valid_console, 'SR' : valid_sr, + 'PBD' : valid_pbd, 'PIF' : valid_pif, 'PIF_metrics' : valid_pif_metrics, 'task' : valid_task, @@ -488,6 +502,7 @@ autoplug_classes = { autoplug_classes = { 'network' : XendNetwork, 'VM_metrics' : XendVMMetrics, + 'PBD' : XendPBD.XendPBD, 'PIF_metrics' : XendPIFMetrics, } @@ -774,7 +789,6 @@ class XendAPI(object): 'progress', 'type', 'result', - 'error_code', 'error_info', 'allowed_operations', 'session' @@ -809,10 +823,6 @@ class XendAPI(object): task = XendTaskManager.get_task(task_ref) return xen_api_success(task.result) - def task_get_error_code(self, session, task_ref): - task = XendTaskManager.get_task(task_ref) - return xen_api_success(task.error_code) - def task_get_error_info(self, session, task_ref): task = XendTaskManager.get_task(task_ref) return xen_api_success(task.error_info) @@ -843,6 +853,7 @@ class XendAPI(object): host_attr_ro = ['software_version', 'resident_VMs', + 'PBDs', 'PIFs', 'host_CPUs', 'cpu_configuration', @@ -870,7 +881,8 @@ class XendAPI(object): ('get_log', 'String'), ('send_debug_keys', None)] - host_funcs = [('get_by_name_label', 'Set(host)')] + host_funcs = [('get_by_name_label', None), + ('list_methods', None)] # attributes def host_get_name_label(self, session, host_ref): @@ -913,6 +925,8 @@ class XendAPI(object): return xen_api_success(XendNode.instance().xen_version()) def host_get_resident_VMs(self, session, host_ref): return xen_api_success(XendDomain.instance().get_domain_refs()) + def host_get_PBDs(self, _, ref): + return xen_api_success(XendPBD.get_all_refs()) def host_get_PIFs(self, session, ref): return xen_api_success(XendNode.instance().get_PIF_refs()) def host_get_host_CPUs(self, session, host_ref): @@ -925,8 +939,6 @@ class XendAPI(object): return xen_api_success(['pygrub']) def host_get_sched_policy(self, _, host_ref): return xen_api_success(XendNode.instance().get_vcpus_policy()) - def host_set_sched_policy(self, _, host_ref, policy): - return xen_api_todo() def host_get_cpu_configuration(self, _, host_ref): return xen_api_success(XendNode.instance().get_cpu_configuration()) @@ -992,6 +1004,12 @@ class XendAPI(object): return xen_api_success((XendNode.instance().uuid,)) return xen_api_success([]) + def host_list_methods(self, _): + def _funcs(): + return [getattr(XendAPI, x) for x in XendAPI.__dict__] + + return xen_api_success([x.api for x in _funcs() + if hasattr(x, 'api')]) # Xen API: Class host_CPU # ---------------------------------------------------------------- @@ -2061,8 +2079,8 @@ class XendAPI(object): vif_ref = dom.create_vif(vif_struct) xendom.managed_config_save(dom) return xen_api_success(vif_ref) - except XendError: - return xen_api_error(XEND_ERROR_TODO) + except XendError, exn: + return xen_api_error(['INTERNAL_ERROR', str(exn)]) def VIF_destroy(self, session, vif_ref): xendom = XendDomain.instance() @@ -2169,7 +2187,7 @@ class XendAPI(object): 'other_config'] VDI_attr_inst = VDI_attr_ro + VDI_attr_rw - VDI_methods = [('snapshot', 'VDI'), ('destroy', None)] + VDI_methods = [('destroy', None)] VDI_funcs = [('create', 'VDI'), ('get_by_name_label', 'Set(VDI)')] @@ -2233,8 +2251,6 @@ class XendAPI(object): return xen_api_success_void() # Object Methods - def VDI_snapshot(self, session, vdi_ref): - return xen_api_todo() def VDI_destroy(self, session, vdi_ref): sr = XendNode.instance().get_sr_containing_vdi(vdi_ref) @@ -2349,8 +2365,8 @@ class XendAPI(object): vtpm_ref = dom.create_vtpm(vtpm_struct) xendom.managed_config_save(dom) return xen_api_success(vtpm_ref) - except XendError: - return xen_api_error(XEND_ERROR_TODO) + except XendError, exn: + return xen_api_error(['INTERNAL_ERROR', str(exn)]) else: return xen_api_error(['HANDLE_INVALID', 'VM', vtpm_struct['VM']]) @@ -2424,8 +2440,8 @@ class XendAPI(object): console_ref = dom.create_console(console_struct) xendom.managed_config_save(dom) return xen_api_success(console_ref) - except XendError, e: - return xen_api_error([XEND_ERROR_TODO, str(e)]) + except XendError, exn: + return xen_api_error(['INTERNAL_ERROR', str(exn)]) # Xen API: Class SR # ---------------------------------------------------------------- @@ -2434,18 +2450,17 @@ class XendAPI(object): 'physical_utilisation', 'physical_size', 'type', - 'location'] + 'content_type'] SR_attr_rw = ['name_label', 'name_description'] SR_attr_inst = ['physical_size', 'type', - 'location', 'name_label', 'name_description'] - SR_methods = [('clone', 'SR'), ('destroy', None)] + SR_methods = [] SR_funcs = [('get_by_name_label', 'Set(SR)'), ('get_by_uuid', 'SR')] @@ -2456,15 +2471,10 @@ class XendAPI(object): def SR_get_by_name_label(self, session, label): return xen_api_success(XendNode.instance().get_sr_by_name(label)) - def SR_create(self, session): - return xen_api_error(XEND_ERROR_UNSUPPORTED) + def SR_get_supported_types(self, _): + return xen_api_success(['local', 'qcow_file']) # Class Methods - def SR_clone(self, session, sr_ref): - return xen_api_error(XEND_ERROR_UNSUPPORTED) - - def SR_destroy(self, session, sr_ref): - return xen_api_error(XEND_ERROR_UNSUPPORTED) def SR_get_record(self, session, sr_ref): sr = XendNode.instance().get_sr(sr_ref) @@ -2497,8 +2507,8 @@ class XendAPI(object): def SR_get_type(self, _, ref): return self._get_SR_attr(ref, 'type') - def SR_get_location(self, _, ref): - return self._get_SR_attr(ref, 'location') + def SR_get_content_type(self, _, ref): + return self._get_SR_attr(ref, 'content_type') def SR_get_name_label(self, _, ref): return self._get_SR_attr(ref, 'name_label') @@ -2519,6 +2529,33 @@ class XendAPI(object): sr.name_description = value XendNode.instance().save() return xen_api_success_void() + + + # Xen API: Class PBD + # ---------------------------------------------------------------- + + PBD_attr_ro = ['host', + 'SR', + 'device_config', + 'currently_attached'] + PBD_attr_rw = [] + PBD_methods = [('destroy', None)] + PBD_funcs = [('create', None)] + + def PBD_get_all(self, _): + return xen_api_success(XendPBD.get_all_refs()) + + def _PBD_get(self, _, ref): + return XendPBD.get(ref) + + def PBD_create(self, _, record): + if 'uuid' in record: + return xen_api_error(['VALUE_NOT_SUPPORTED', + 'uuid', record['uuid'], + 'You may not specify a UUID on creation']) + new_uuid = XendPBD.XendPBD(record).get_uuid() + XendNode.instance().save() + return xen_api_success(new_uuid) # Xen API: Class event @@ -2572,15 +2609,6 @@ class XendAPI(object): def debug_get_record(self, session, debug_ref): return xen_api_success({'uuid': debug_ref}) - - - def list_all_methods(self, _): - def _funcs(): - return [getattr(XendAPI, x) for x in XendAPI.__dict__] - - return xen_api_success([x.api for x in _funcs() - if hasattr(x, 'api')]) - list_all_methods.api = '_UNSUPPORTED_list_all_methods' class XendAPIAsyncProxy: diff -r 9ec7dadc98ba -r eb3e430242ac tools/python/xen/xend/XendCheckpoint.py --- a/tools/python/xen/xend/XendCheckpoint.py Fri Apr 06 10:06:30 2007 -0600 +++ b/tools/python/xen/xend/XendCheckpoint.py Fri Apr 06 10:08:30 2007 -0600 @@ -91,7 +91,7 @@ def save(fd, dominfo, network, live, dst # more information. cmd = [xen.util.auxbin.pathTo(XC_SAVE), str(fd), str(dominfo.getDomid()), "0", "0", - str(int(live) | (int(hvm) << 2) | (int(stdvga) << 3)) ] + str(int(live) | (int(hvm) << 2)) ] log.debug("[xc_save]: %s", string.join(cmd)) def saveInputHandler(line, tochild): @@ -187,7 +187,6 @@ def restore(xd, fd, dominfo = None, paus assert console_port nr_pfns = (dominfo.getMemoryTarget() + 3) / 4 - max_nr_pfns = (dominfo.getMemoryMaximum() + 3) / 4 # if hvm, pass mem size to calculate the store_mfn image_cfg = dominfo.info.get('image', {}) @@ -202,27 +201,21 @@ def restore(xd, fd, dominfo = None, paus pae = 0 try: - l = read_exact(fd, sizeof_unsigned_long, - "not a valid guest state file: pfn count read") - p2m_size = unpack("L", l)[0] # native sizeof long - - if p2m_size > 16*1024*1024: # XXX - raise XendError( - "not a valid guest state file: pfn count out of range") - shadow = dominfo.info['shadow_memory'] - log.debug("restore:shadow=0x%x, _static_max=0x%x, _static_min=0x%x, " - "p2m_size=0x%x.", dominfo.info['shadow_memory'], + log.debug("restore:shadow=0x%x, _static_max=0x%x, _static_min=0x%x, ", + dominfo.info['shadow_memory'], dominfo.info['memory_static_max'], - dominfo.info['memory_static_min'], p2m_size) + dominfo.info['memory_static_min']) balloon.free(xc.pages_to_kib(nr_pfns) + shadow * 1024) shadow_cur = xc.shadow_mem_control(dominfo.getDomid(), shadow) dominfo.info['shadow_memory'] = shadow_cur + xc.domain_setmaxmem(dominfo.getDomid(), dominfo.getMemoryMaximum()) + cmd = map(str, [xen.util.auxbin.pathTo(XC_RESTORE), - fd, dominfo.getDomid(), p2m_size, max_nr_pfns, + fd, dominfo.getDomid(), store_port, console_port, int(is_hvm), pae, apic]) log.debug("[xc_restore]: %s", string.join(cmd)) diff -r 9ec7dadc98ba -r eb3e430242ac tools/python/xen/xend/XendLocalStorageRepo.py --- a/tools/python/xen/xend/XendLocalStorageRepo.py Fri Apr 06 10:06:30 2007 -0600 +++ b/tools/python/xen/xend/XendLocalStorageRepo.py Fri Apr 06 10:08:30 2007 -0600 @@ -30,13 +30,13 @@ import struct import struct from xen.util import mkdir -from xen.xend import uuid -from xen.xend.XendError import XendError -from xen.xend.XendVDI import * -from xen.xend.XendTask import XendTask -from xen.xend.XendStorageRepository import XendStorageRepository -from xen.xend.XendStateStore import XendStateStore -from xen.xend.XendOptions import instance as xendoptions +import uuid +from XendError import XendError +from XendVDI import * +from XendTask import XendTask +from XendStorageRepository import XendStorageRepository +from XendStateStore import XendStateStore +from XendOptions import instance as xendoptions MB = 1024 * 1024 @@ -58,8 +58,7 @@ class XendLocalStorageRepo(XendStorageRe """ XendStorageRepository.__init__(self, sr_uuid, sr_type, - name_label, name_description, - '/') + name_label, name_description) self.state = XendStateStore(xendoptions().get_xend_state_path() + '/local_sr') diff -r 9ec7dadc98ba -r eb3e430242ac tools/python/xen/xend/XendNode.py --- a/tools/python/xen/xend/XendNode.py Fri Apr 06 10:06:30 2007 -0600 +++ b/tools/python/xen/xend/XendNode.py Fri Apr 06 10:08:30 2007 -0600 @@ -22,17 +22,18 @@ import xen.lowlevel.xc from xen.util import Brctl -from xen.xend import uuid, arch -from xen.xend.XendError import * -from xen.xend.XendOptions import instance as xendoptions -from xen.xend.XendQCoWStorageRepo import XendQCoWStorageRepo -from xen.xend.XendLocalStorageRepo import XendLocalStorageRepo -from xen.xend.XendLogging import log -from xen.xend.XendPIF import * -from xen.xend.XendPIFMetrics import XendPIFMetrics -from xen.xend.XendNetwork import * -from xen.xend.XendStateStore import XendStateStore -from xen.xend.XendMonitor import XendMonitor +import uuid, arch +import XendPBD +from XendError import * +from XendOptions import instance as xendoptions +from XendQCoWStorageRepo import XendQCoWStorageRepo +from XendLocalStorageRepo import XendLocalStorageRepo +from XendLogging import log +from XendPIF import * +from XendPIFMetrics import XendPIFMetrics +from XendNetwork import * +from XendStateStore import XendStateStore +from XendMonitor import XendMonitor class XendNode: """XendNode - Represents a Domain 0 Host.""" @@ -193,13 +194,14 @@ class XendNode: saved_srs = self.state_store.load_state('sr') if saved_srs: for sr_uuid, sr_cfg in saved_srs.items(): + log.error("SAved SRS %s %s", sr_uuid, sr_cfg['type']) if sr_cfg['type'] == 'qcow_file': self.srs[sr_uuid] = XendQCoWStorageRepo(sr_uuid) - elif sr_cfg['type'] == 'local_image': + elif sr_cfg['type'] == 'local': self.srs[sr_uuid] = XendLocalStorageRepo(sr_uuid) # Create missing SRs if they don't exist - if not self.get_sr_by_type('local_image'): + if not self.get_sr_by_type('local'): image_sr_uuid = uuid.createString() self.srs[image_sr_uuid] = XendLocalStorageRepo(image_sr_uuid) @@ -207,6 +209,11 @@ class XendNode: qcow_sr_uuid = uuid.createString() self.srs[qcow_sr_uuid] = XendQCoWStorageRepo(qcow_sr_uuid) + saved_pbds = self.state_store.load_state('pbd') + if saved_pbds: + for pbd_uuid, pbd_cfg in saved_pbds.items(): + pbd_cfg['uuid'] = pbd_uuid + XendPBD.XendPBD(pbd_cfg) def network_create(self, record, persist = True, net_uuid = None): @@ -280,6 +287,7 @@ class XendNode: self.state_store.save_state('cpu', self.cpus) self.save_PIFs() self.save_networks() + self.save_PBDs() self.save_SRs() def save_PIFs(self): @@ -291,6 +299,11 @@ class XendNode: net_records = dict([(k, v.get_record_internal(False)) for k, v in self.networks.items()]) self.state_store.save_state('network', net_records) + + def save_PBDs(self): + pbd_records = dict([(v.get_uuid(), v.get_record()) + for v in XendPBD.get_all()]) + self.state_store.save_state('pbd', pbd_records) def save_SRs(self): sr_records = dict([(k, v.get_record(transient = False)) diff -r 9ec7dadc98ba -r eb3e430242ac tools/python/xen/xend/XendPBD.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/python/xen/xend/XendPBD.py Fri Apr 06 10:08:30 2007 -0600 @@ -0,0 +1,79 @@ +#============================================================================ +# This library is free software; you can redistribute it and/or +# modify it under the terms of version 2.1 of the GNU Lesser General Public +# License as published by the Free Software Foundation. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +#============================================================================ +# Copyright (c) 2007 Xensource Inc. +#============================================================================ + + +import uuid +from XendLogging import log + + +attr_inst = ['uuid', + 'host', + 'SR', + 'device_config'] +attr_ro = attr_inst + ['currently_attached'] + + +_all = {} + + +def get(ref): + return _all[ref] + + +def get_all(): + return _all.values() + + +def get_all_refs(): + return _all.keys() + + +def get_by_SR(sr_ref): + return [k for (k, v) in _all.items() if v.get_SR() == sr_ref] + + +class XendPBD: + """Physical block devices.""" + + def __init__(self, record): + if 'uuid' not in record: + record['uuid'] = uuid.createString() + + import XendAPI + for v in attr_inst: + setattr(self, v, record[v]) + self.currently_attached = True + _all[record['uuid']] = self + + + def destroy(self): + if self.uuid in _all: + del _all[self.uuid] + + + def get_record(self): + import XendAPI + result = {} + for v in attr_ro: + result[v] = getattr(self, v) + return result + + +for v in attr_ro: + def f(v_): + setattr(XendPBD, 'get_' + v_, lambda s: getattr(s, v_)) + f(v) diff -r 9ec7dadc98ba -r eb3e430242ac tools/python/xen/xend/XendQCoWStorageRepo.py --- a/tools/python/xen/xend/XendQCoWStorageRepo.py Fri Apr 06 10:06:30 2007 -0600 +++ b/tools/python/xen/xend/XendQCoWStorageRepo.py Fri Apr 06 10:08:30 2007 -0600 @@ -29,12 +29,13 @@ import struct import struct from xen.util import mkdir -from xen.xend import uuid -from xen.xend.XendError import XendError -from xen.xend.XendVDI import * -from xen.xend.XendTask import XendTask -from xen.xend.XendStorageRepository import XendStorageRepository -from xen.xend.XendOptions import instance as xendoptions +import uuid +import XendPBD +from XendError import XendError +from XendVDI import * +from XendTask import XendTask +from XendStorageRepository import XendStorageRepository +from XendOptions import instance as xendoptions XEND_STORAGE_NO_MAXIMUM = sys.maxint XEND_STORAGE_QCOW_FILENAME = "%s.qcow" @@ -72,7 +73,6 @@ class XendQCoWStorageRepo(XendStorageRep sr_type = "qcow_file", name_label = "QCoW", name_description = "Xend QCoW Storage Repository", - location = xendoptions().get_xend_storage_path(), storage_max = XEND_STORAGE_NO_MAXIMUM): """ @keyword storage_max: Maximum disk space to use in bytes. @@ -85,9 +85,9 @@ class XendQCoWStorageRepo(XendStorageRep """ XendStorageRepository.__init__(self, sr_uuid, sr_type, name_label, - name_description, location, - storage_max) + name_description, storage_max) self.storage_free = 0 + self.location = xendoptions().get_xend_storage_path() self._refresh() def get_record(self, transient = True): @@ -98,8 +98,9 @@ class XendQCoWStorageRepo(XendStorageRep 'physical_utilisation': self.physical_utilisation, 'physical_size': self.physical_size, 'type': self.type, - 'location': self.location, - 'VDIs': self.images.keys()} + 'content_type': self.content_type, + 'VDIs': self.images.keys(), + 'PBDs': XendPBD.get_by_SR(self.uuid)} if self.physical_size == XEND_STORAGE_NO_MAXIMUM: stfs = os.statvfs(self.location) diff -r 9ec7dadc98ba -r eb3e430242ac tools/python/xen/xend/XendStateStore.py --- a/tools/python/xen/xend/XendStateStore.py Fri Apr 06 10:06:30 2007 -0600 +++ b/tools/python/xen/xend/XendStateStore.py Fri Apr 06 10:08:30 2007 -0600 @@ -204,7 +204,7 @@ class XendStateStore: if type(val) == dict: for val_uuid in val.keys(): val_node = doc.createElement(key) - if key == 'other_config': + if key in ['other_config', 'device_config']: val_node.setAttribute('key', str(val_uuid)) val_node.setAttribute('value', str(val[val_uuid])) else: diff -r 9ec7dadc98ba -r eb3e430242ac tools/python/xen/xend/XendStorageRepository.py --- a/tools/python/xen/xend/XendStorageRepository.py Fri Apr 06 10:06:30 2007 -0600 +++ b/tools/python/xen/xend/XendStorageRepository.py Fri Apr 06 10:08:30 2007 -0600 @@ -22,8 +22,9 @@ import threading import threading import sys -from xen.xend.XendError import XendError -from xen.xend.XendVDI import * +from XendError import XendError +from XendVDI import * +import XendPBD XEND_STORAGE_NO_MAXIMUM = sys.maxint @@ -34,7 +35,6 @@ class XendStorageRepository: sr_type = "unknown", name_label = 'Unknown', name_description = 'Not Implemented', - location = '', storage_max = XEND_STORAGE_NO_MAXIMUM): """ @keyword storage_max: Maximum disk space to use in bytes. @@ -49,7 +49,6 @@ class XendStorageRepository: # XenAPI Parameters self.uuid = uuid self.type = sr_type - self.location = location self.name_label = name_label self.name_description = name_description self.images = {} @@ -57,6 +56,7 @@ class XendStorageRepository: self.physical_size = storage_max self.physical_utilisation = 0 self.virtual_allocation = 0 + self.content_type = '' self.lock = threading.RLock() @@ -68,9 +68,10 @@ class XendStorageRepository: 'physical_utilisation': self.physical_utilisation, 'physical_size': self.physical_size, 'type': self.type, - 'location': self.location, + 'content_type': self.content_type, 'VDIs': self.images.keys()} - + if not transient: + retval ['PBDs'] = XendPBD.get_by_SR(self.uuid) return retval diff -r 9ec7dadc98ba -r eb3e430242ac tools/python/xen/xend/XendTask.py --- a/tools/python/xen/xend/XendTask.py Fri Apr 06 10:06:30 2007 -0600 +++ b/tools/python/xen/xend/XendTask.py Fri Apr 06 10:08:30 2007 -0600 @@ -24,7 +24,7 @@ class XendTask(threading.Thread): """Represents a Asynchronous Task used by Xen API. Basically proxies the callable object in a thread and returns the - results via self.{type,result,error_code,error_info}. + results via self.{type,result,error_info}. @cvar task_progress: Thread local storage for progress tracking. It is a dict indexed by thread_id. Note that the @@ -71,7 +71,6 @@ class XendTask(threading.Thread): self.uuid = uuid self.result = None - self.error_code = '' self.error_info = [] self.name_label = label or func.__name__ @@ -118,13 +117,11 @@ class XendTask(threading.Thread): self.result = result['Value'] self.set_status(XEN_API_TASK_STATUS_TYPE[1]) else: - self.error_code = result['ErrorDescription'][0] - self.error_info = result['ErrorDescription'][1:] + self.error_info = result['ErrorDescription'] self.set_status(XEN_API_TASK_STATUS_TYPE[2]) except Exception, e: log.exception('Error running Async Task') - self.error_code = 'INTERNAL ERROR' - self.error_info = [str(e)] + self.error_info = ['INTERNAL ERROR', str(e)] self.set_status(XEN_API_TASK_STATUS_TYPE[2]) self.task_progress_lock.acquire() @@ -144,7 +141,6 @@ class XendTask(threading.Thread): 'progress': self.get_progress(), 'type': self.type, 'result': self.result, - 'error_code': self.error_code, 'error_info': self.error_info, 'allowed_operations': {}, 'session': self.session, diff -r 9ec7dadc98ba -r eb3e430242ac tools/python/xen/xend/image.py --- a/tools/python/xen/xend/image.py Fri Apr 06 10:06:30 2007 -0600 +++ b/tools/python/xen/xend/image.py Fri Apr 06 10:08:30 2007 -0600 @@ -426,11 +426,14 @@ class HVMImageHandler(ImageHandler): # Execute device model. #todo: Error handling args = [self.device_model] - args = args + ([ "-d", "%d" % self.vm.getDomid(), - "-m", "%s" % (self.getRequiredInitialReservation() / 1024)]) + args = args + ([ "-d", "%d" % self.vm.getDomid() ]) + if arch.type == "ia64": + args = args + ([ "-m", "%s" % + (self.getRequiredInitialReservation() / 1024) ]) args = args + self.dmargs if restore: - args = args + ([ "-loadvm", "/tmp/xen.qemu-dm.%d" % self.vm.getDomid() ]) + args = args + ([ "-loadvm", "/tmp/xen.qemu-dm.%d" % + self.vm.getDomid() ]) env = dict(os.environ) if self.display: env['DISPLAY'] = self.display diff -r 9ec7dadc98ba -r eb3e430242ac tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Fri Apr 06 10:06:30 2007 -0600 +++ b/tools/python/xen/xm/main.py Fri Apr 06 10:08:30 2007 -0600 @@ -591,7 +591,7 @@ class Shell(cmd.Cmd): self.prompt = "xm> " if serverType == SERVER_XEN_API: try: - res = server.xenapi._UNSUPPORTED_list_all_methods() + res = server.xenapi.host.list_methods() for f in res: setattr(Shell, 'do_' + f + ' ', self.default) except: diff -r 9ec7dadc98ba -r eb3e430242ac tools/xcutils/xc_restore.c --- a/tools/xcutils/xc_restore.c Fri Apr 06 10:06:30 2007 -0600 +++ b/tools/xcutils/xc_restore.c Fri Apr 06 10:08:30 2007 -0600 @@ -21,37 +21,31 @@ main(int argc, char **argv) unsigned int xc_fd, io_fd, domid, store_evtchn, console_evtchn; unsigned int hvm, pae, apic; int ret; - unsigned long p2m_size, max_nr_pfns, store_mfn, console_mfn; + unsigned long store_mfn, console_mfn; - if (argc != 10) - errx(1, "usage: %s iofd domid p2m_size max_nr_pfns store_evtchn " + if ( argc != 8 ) + errx(1, "usage: %s iofd domid store_evtchn " "console_evtchn hvm pae apic", argv[0]); xc_fd = xc_interface_open(); - if (xc_fd < 0) + if ( xc_fd < 0 ) errx(1, "failed to open control interface"); io_fd = atoi(argv[1]); domid = atoi(argv[2]); - p2m_size = atoi(argv[3]); - max_nr_pfns = atoi(argv[4]); - store_evtchn = atoi(argv[5]); - console_evtchn = atoi(argv[6]); - hvm = atoi(argv[7]); - pae = atoi(argv[8]); - apic = atoi(argv[9]); + store_evtchn = atoi(argv[3]); + console_evtchn = atoi(argv[4]); + hvm = atoi(argv[5]); + pae = atoi(argv[6]); + apic = atoi(argv[7]); - if (hvm) { - ret = xc_hvm_restore(xc_fd, io_fd, domid, max_nr_pfns, store_evtchn, - &store_mfn, pae, apic); - } else - ret = xc_linux_restore(xc_fd, io_fd, domid, p2m_size, - max_nr_pfns, store_evtchn, &store_mfn, - console_evtchn, &console_mfn); + ret = xc_domain_restore(xc_fd, io_fd, domid, store_evtchn, &store_mfn, + console_evtchn, &console_mfn, hvm, pae); - if (ret == 0) { + if ( ret == 0 ) + { printf("store-mfn %li\n", store_mfn); - if (!hvm) + if ( !hvm ) printf("console-mfn %li\n", console_mfn); fflush(stdout); } diff -r 9ec7dadc98ba -r eb3e430242ac unmodified_drivers/linux-2.6/Makefile --- a/unmodified_drivers/linux-2.6/Makefile Fri Apr 06 10:06:30 2007 -0600 +++ b/unmodified_drivers/linux-2.6/Makefile Fri Apr 06 10:08:30 2007 -0600 @@ -2,6 +2,7 @@ include $(M)/overrides.mk obj-m += platform-pci/ obj-m += xenbus/ +obj-m += balloon/ obj-m += blkfront/ obj-m += netfront/ obj-m += util/ diff -r 9ec7dadc98ba -r eb3e430242ac unmodified_drivers/linux-2.6/balloon/Kbuild --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/unmodified_drivers/linux-2.6/balloon/Kbuild Fri Apr 06 10:08:30 2007 -0600 @@ -0,0 +1,9 @@ +include $(M)/overrides.mk + +obj-m = xen-balloon.o + +EXTRA_CFLAGS += -I$(M)/platform-pci + +xen-balloon-objs = +xen-balloon-objs += balloon.o +xen-balloon-objs += sysfs.o diff -r 9ec7dadc98ba -r eb3e430242ac unmodified_drivers/linux-2.6/balloon/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/unmodified_drivers/linux-2.6/balloon/Makefile Fri Apr 06 10:08:30 2007 -0600 @@ -0,0 +1,3 @@ +ifneq ($(KERNELRELEASE),) +include $(src)/Kbuild +endif diff -r 9ec7dadc98ba -r eb3e430242ac unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h --- a/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h Fri Apr 06 10:06:30 2007 -0600 +++ b/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h Fri Apr 06 10:08:30 2007 -0600 @@ -25,6 +25,10 @@ #define NET_IP_ALIGN 0 #endif +#if defined(_LINUX_SKBUFF_H) && !defined(CHECKSUM_HW) +#define CHECKSUM_HW CHECKSUM_PARTIAL +#endif + #if defined(_LINUX_ERR_H) && !defined(IS_ERR_VALUE) #define IS_ERR_VALUE(x) unlikely((x) > (unsigned long)-1000L) #endif @@ -36,7 +40,7 @@ /* Some kernels have this typedef backported so we cannot reliably * detect based on version number, hence we forcibly #define it. */ -#if defined(__LINUX_TYPES_H) || defined(__LINUX_GFP_H) +#if defined(__LINUX_TYPES_H) || defined(__LINUX_GFP_H) || defined(_LINUX_KERNEL_H) #define gfp_t unsigned #endif @@ -45,6 +49,14 @@ #define atomic_notifier_chain_register(chain,nb) notifier_chain_register(chain,nb) #define atomic_notifier_chain_unregister(chain,nb) notifier_chain_unregister(chain,nb) #define atomic_notifier_call_chain(chain,val,v) notifier_call_chain(chain,val,v) +#endif + +#if defined(_LINUX_MM_H) && defined set_page_count +#define init_page_count(page) set_page_count(page, 1) +#endif + +#if defined(__LINUX_GFP_H) && !defined __GFP_NOMEMALLOC +#define __GFP_NOMEMALLOC 0 #endif #if defined(_LINUX_FS_H) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9) @@ -71,9 +83,17 @@ void *kzalloc(size_t size, int flags); #define end_that_request_last(req, uptodate) end_that_request_last(req) #endif +#if defined(_LINUX_CAPABILITY_H) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) +#define capable(cap) (1) +#endif + #if defined(_LINUX_KERNEL_H) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) extern char *kasprintf(gfp_t gfp, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); +#endif + +#if defined(_LINUX_SYSRQ_H) && LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) +#define handle_sysrq(x,y,z) handle_sysrq(x,y) #endif /* diff -r 9ec7dadc98ba -r eb3e430242ac unmodified_drivers/linux-2.6/mkbuildtree --- a/unmodified_drivers/linux-2.6/mkbuildtree Fri Apr 06 10:06:30 2007 -0600 +++ b/unmodified_drivers/linux-2.6/mkbuildtree Fri Apr 06 10:08:30 2007 -0600 @@ -15,6 +15,9 @@ XL=$C/../../linux-2.6-xen-sparse for d in $(find ${XL}/drivers/xen/ -maxdepth 1 -type d | sed -e 1d); do if ! echo $d | egrep -q back; then + lndir $d $(basename $d) > /dev/null 2>&1 + fi + if ! echo $d | egrep -q ball; then lndir $d $(basename $d) > /dev/null 2>&1 fi done diff -r 9ec7dadc98ba -r eb3e430242ac unmodified_drivers/linux-2.6/overrides.mk --- a/unmodified_drivers/linux-2.6/overrides.mk Fri Apr 06 10:06:30 2007 -0600 +++ b/unmodified_drivers/linux-2.6/overrides.mk Fri Apr 06 10:08:30 2007 -0600 @@ -4,7 +4,8 @@ # # (i.e. we need the native config for things like -mregparm, but # a Xen kernel to find the right headers) -EXTRA_CFLAGS += -D__XEN_INTERFACE_VERSION__=0x00030202 +EXTRA_CFLAGS += -D__XEN_INTERFACE_VERSION__=0x00030205 +EXTRA_CFLAGS += -DCONFIG_XEN_COMPAT=0xffffff EXTRA_CFLAGS += -I$(M)/include -I$(M)/compat-include -DHAVE_XEN_PLATFORM_COMPAT_H ifeq ($(ARCH),ia64) EXTRA_CFLAGS += -DCONFIG_VMX_GUEST diff -r 9ec7dadc98ba -r eb3e430242ac unmodified_drivers/linux-2.6/platform-pci/platform-pci.c --- a/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c Fri Apr 06 10:06:30 2007 -0600 +++ b/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c Fri Apr 06 10:08:30 2007 -0600 @@ -208,14 +208,6 @@ static uint64_t get_callback_via(struct ((uint64_t)(pin - 1) & 3)); } -/* Invalidate foreign mappings (e.g., in qemu-based device model). */ -static uint16_t invlmap_port; -void xen_invalidate_foreign_mappings(void) -{ - outb(0, invlmap_port); -} -EXPORT_SYMBOL(xen_invalidate_foreign_mappings); - static int __devinit platform_pci_init(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -239,8 +231,6 @@ static int __devinit platform_pci_init(s printk(KERN_WARNING DRV_NAME ":no resources found\n"); return -ENOENT; } - - invlmap_port = ioaddr; if (request_mem_region(mmio_addr, mmio_len, DRV_NAME) == NULL) { diff -r 9ec7dadc98ba -r eb3e430242ac unmodified_drivers/linux-2.6/platform-pci/xen_support.c --- a/unmodified_drivers/linux-2.6/platform-pci/xen_support.c Fri Apr 06 10:06:30 2007 -0600 +++ b/unmodified_drivers/linux-2.6/platform-pci/xen_support.c Fri Apr 06 10:08:30 2007 -0600 @@ -59,12 +59,3 @@ void xen_machphys_update(unsigned long m } EXPORT_SYMBOL(xen_machphys_update); -void balloon_update_driver_allowance(long delta) -{ -} -EXPORT_SYMBOL(balloon_update_driver_allowance); - -void balloon_release_driver_page(struct page *page) -{ -} -EXPORT_SYMBOL(balloon_release_driver_page); diff -r 9ec7dadc98ba -r eb3e430242ac xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Fri Apr 06 10:06:30 2007 -0600 +++ b/xen/arch/x86/hvm/hvm.c Fri Apr 06 10:08:30 2007 -0600 @@ -131,7 +131,7 @@ void hvm_do_resume(struct vcpu *v) switch ( p->state ) { case STATE_IORESP_READY: /* IORESP_READY -> NONE */ - hvm_io_assist(v); + hvm_io_assist(); break; case STATE_IOREQ_READY: /* IOREQ_{READY,INPROCESS} -> IORESP_READY */ case STATE_IOREQ_INPROCESS: @@ -144,48 +144,6 @@ void hvm_do_resume(struct vcpu *v) domain_crash_synchronous(); } } -} - -/* Called from the tools when saving a domain to make sure the io - * request-response ring is entirely empty. */ -static int hvmop_drain_io( - XEN_GUEST_HANDLE(xen_hvm_drain_io_t) uop) -{ - struct xen_hvm_drain_io op; - struct domain *d; - struct vcpu *v; - ioreq_t *p; - int rc; - - if ( copy_from_guest(&op, uop, 1) ) - return -EFAULT; - - if ( !IS_PRIV(current->domain) ) - return -EPERM; - - d = rcu_lock_domain_by_id(op.domid); - if ( d == NULL ) - return -ESRCH; - - rc = -EINVAL; - /* Can't do this to yourself, or to a domain without an ioreq ring */ - if ( d == current->domain || !is_hvm_domain(d) || get_sp(d) == NULL ) - goto out; - - rc = 0; - - domain_pause(d); /* It's not safe to do this to running vcpus */ - for_each_vcpu(d, v) - { - p = &get_vio(v->domain, v->vcpu_id)->vp_ioreq; - if ( p->state == STATE_IORESP_READY ) - hvm_io_assist(v); - } - domain_unpause(d); - - out: - rcu_unlock_domain(d); - return rc; } int hvm_domain_initialise(struct domain *d) @@ -563,19 +521,13 @@ static hvm_hypercall_t *hvm_hypercall_ta HYPERCALL(hvm_op) }; -void hvm_do_hypercall(struct cpu_user_regs *pregs) -{ - if ( unlikely(ring_3(pregs)) ) - { - pregs->eax = -EPERM; - return; - } - +static void __hvm_do_hypercall(struct cpu_user_regs *pregs) +{ if ( (pregs->eax >= NR_hypercalls) || !hvm_hypercall_table[pregs->eax] ) { - gdprintk(XENLOG_WARNING, "HVM vcpu %d:%d did a bad hypercall %d.\n", - current->domain->domain_id, current->vcpu_id, - pregs->eax); + if ( pregs->eax != __HYPERVISOR_grant_table_op ) + gdprintk(XENLOG_WARNING, "HVM vcpu %d:%d bad hypercall %d.\n", + current->domain->domain_id, current->vcpu_id, pregs->eax); pregs->eax = -ENOSYS; return; } @@ -641,20 +593,14 @@ static hvm_hypercall_t *hvm_hypercall32_ HYPERCALL(event_channel_op) }; -void hvm_do_hypercall(struct cpu_user_regs *pregs) -{ - if ( unlikely(ring_3(pregs)) ) - { - pregs->rax = -EPERM; - return; - } - +static void __hvm_do_hypercall(struct cpu_user_regs *pregs) +{ pregs->rax = (uint32_t)pregs->eax; /* mask in case compat32 caller */ if ( (pregs->rax >= NR_hypercalls) || !hvm_hypercall64_table[pregs->rax] ) { - gdprintk(XENLOG_WARNING, "HVM vcpu %d:%d did a bad hypercall %ld.\n", - current->domain->domain_id, current->vcpu_id, - pregs->rax); + if ( pregs->rax != __HYPERVISOR_grant_table_op ) + gdprintk(XENLOG_WARNING, "HVM vcpu %d:%d bad hypercall %ld.\n", + current->domain->domain_id, current->vcpu_id, pregs->rax); pregs->rax = -ENOSYS; return; } @@ -678,6 +624,37 @@ void hvm_do_hypercall(struct cpu_user_re } #endif /* defined(__x86_64__) */ + +int hvm_do_hypercall(struct cpu_user_regs *pregs) +{ + int flush, preempted; + unsigned long old_eip; + + if ( unlikely(ring_3(pregs)) ) + { + pregs->eax = -EPERM; + return 0; + } + + /* + * NB. In future flush only on decrease_reservation. + * For now we also need to flush when pages are added, as qemu-dm is not + * yet capable of faulting pages into an existing valid mapcache bucket. + */ + flush = ((uint32_t)pregs->eax == __HYPERVISOR_memory_op); + + /* Check for preemption: RIP will be modified from this dummy value. */ + old_eip = pregs->eip; + pregs->eip = 0xF0F0F0FF; + + __hvm_do_hypercall(pregs); + + preempted = (pregs->eip != 0xF0F0F0FF); + pregs->eip = old_eip; + + return (preempted ? HVM_HCALL_preempted : + flush ? HVM_HCALL_invalidate : HVM_HCALL_completed); +} void hvm_update_guest_cr3(struct vcpu *v, unsigned long guest_cr3) { @@ -963,12 +940,6 @@ long do_hvm_op(unsigned long op, XEN_GUE guest_handle_cast(arg, xen_hvm_set_pci_link_route_t)); break; - case HVMOP_drain_io: - rc = hvmop_drain_io( - guest_handle_cast(arg, xen_hvm_drain_io_t)); - break; - - default: { gdprintk(XENLOG_WARNING, "Bad HVM op %ld.\n", op); diff -r 9ec7dadc98ba -r eb3e430242ac xen/arch/x86/hvm/io.c --- a/xen/arch/x86/hvm/io.c Fri Apr 06 10:06:30 2007 -0600 +++ b/xen/arch/x86/hvm/io.c Fri Apr 06 10:08:30 2007 -0600 @@ -289,8 +289,12 @@ static void set_reg_value (int size, int long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs); -static inline void set_eflags_CF(int size, unsigned long v1, - unsigned long v2, struct cpu_user_regs *regs) +static inline void set_eflags_CF(int size, + unsigned int instr, + unsigned long result, + unsigned long src, + unsigned long dst, + struct cpu_user_regs *regs) { unsigned long mask; @@ -300,14 +304,28 @@ static inline void set_eflags_CF(int siz mask = ~0UL >> (8 * (sizeof(mask) - size)); - if ((v1 & mask) > (v2 & mask)) - regs->eflags |= X86_EFLAGS_CF; + if ( instr == INSTR_ADD ) + { + /* CF=1 <==> result is less than the augend and addend) */ + if ( (result & mask) < (dst & mask) ) + { + ASSERT((result & mask) < (src & mask)); + regs->eflags |= X86_EFLAGS_CF; + } + } else - regs->eflags &= ~X86_EFLAGS_CF; -} - -static inline void set_eflags_OF(int size, unsigned long v1, - unsigned long v2, unsigned long v3, + { + ASSERT( instr == INSTR_CMP || instr == INSTR_SUB ); + if ( (src & mask) > (dst & mask) ) + regs->eflags |= X86_EFLAGS_CF; + } +} + +static inline void set_eflags_OF(int size, + unsigned int instr, + unsigned long result, + unsigned long src, + unsigned long dst, struct cpu_user_regs *regs) { unsigned long mask; @@ -316,21 +334,32 @@ static inline void set_eflags_OF(int siz size = BYTE; ASSERT((size <= sizeof(mask)) && (size > 0)); - mask = ~0UL >> (8 * (sizeof(mask) - size)); - - if ((v3 ^ v2) & (v3 ^ v1) & mask) - regs->eflags |= X86_EFLAGS_OF; -} - -static inline void set_eflags_AF(int size, unsigned long v1, - unsigned long v2, unsigned long v3, + mask = 1UL << ((8*size) - 1); + + if ( instr == INSTR_ADD ) + { + if ((src ^ result) & (dst ^ result) & mask); + regs->eflags |= X86_EFLAGS_OF; + } + else + { + ASSERT(instr == INSTR_CMP || instr == INSTR_SUB); + if ((dst ^ src) & (dst ^ result) & mask) + regs->eflags |= X86_EFLAGS_OF; + } +} + +static inline void set_eflags_AF(int size, + unsigned long result, + unsigned long src, + unsigned long dst, struct cpu_user_regs *regs) { - if ((v1 ^ v2 ^ v3) & 0x10) + if ((result ^ src ^ dst) & 0x10) regs->eflags |= X86_EFLAGS_AF; } -static inline void set_eflags_ZF(int size, unsigned long v1, +static inline void set_eflags_ZF(int size, unsigned long result, struct cpu_user_regs *regs) { unsigned long mask; @@ -341,11 +370,11 @@ static inline void set_eflags_ZF(int siz mask = ~0UL >> (8 * (sizeof(mask) - size)); - if ((v1 & mask) == 0) + if ((result & mask) == 0) regs->eflags |= X86_EFLAGS_ZF; } -static inline void set_eflags_SF(int size, unsigned long v1, +static inline void set_eflags_SF(int size, unsigned long result, struct cpu_user_regs *regs) { unsigned long mask; @@ -354,9 +383,9 @@ static inline void set_eflags_SF(int siz size = BYTE; ASSERT((size <= sizeof(mask)) && (size > 0)); - mask = ~0UL >> (8 * (sizeof(mask) - size)); - - if (v1 & mask) + mask = 1UL << ((8*size) - 1); + + if (result & mask) regs->eflags |= X86_EFLAGS_SF; } @@ -379,10 +408,10 @@ static char parity_table[256] = { 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1 }; -static inline void set_eflags_PF(int size, unsigned long v1, +static inline void set_eflags_PF(int size, unsigned long result, struct cpu_user_regs *regs) { - if (parity_table[v1 & 0xFF]) + if (parity_table[result & 0xFF]) regs->eflags |= X86_EFLAGS_PF; } @@ -454,7 +483,7 @@ static void hvm_mmio_assist(struct cpu_u { int sign = p->df ? -1 : 1; int size = -1, index = -1; - unsigned long value = 0, diff = 0; + unsigned long value = 0, result = 0; unsigned long src, dst; src = mmio_opp->operand[0]; @@ -575,31 +604,15 @@ static void hvm_mmio_assist(struct cpu_u if (src & REGISTER) { index = operand_index(src); value = get_reg_value(size, index, 0, regs); - diff = (unsigned long) p->data & value; + result = (unsigned long) p->data & value; } else if (src & IMMEDIATE) { value = mmio_opp->immediate; - diff = (unsigned long) p->data & value; + result = (unsigned long) p->data & value; } else if (src & MEMORY) { index = operand_index(dst); value = get_reg_value(size, index, 0, regs); - diff = (unsigned long) p->data & value; - set_reg_value(size, index, 0, regs, diff); - } - break; - - case INSTR_ADD: - if (src & REGISTER) { - index = operand_index(src); - value = get_reg_value(size, index, 0, regs); - diff = (unsigned long) p->data + value; - } else if (src & IMMEDIATE) { - value = mmio_opp->immediate; - diff = (unsigned long) p->data + value; - } else if (src & MEMORY) { - index = operand_index(dst); - value = get_reg_value(size, index, 0, regs); - diff = (unsigned long) p->data + value; - set_reg_value(size, index, 0, regs, diff); + result = (unsigned long) p->data & value; + set_reg_value(size, index, 0, regs, result); } /* @@ -609,24 +622,55 @@ static void hvm_mmio_assist(struct cpu_u */ regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF| X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF); - set_eflags_ZF(size, diff, regs); - set_eflags_SF(size, diff, regs); - set_eflags_PF(size, diff, regs); + set_eflags_ZF(size, result, regs); + set_eflags_SF(size, result, regs); + set_eflags_PF(size, result, regs); + break; + + case INSTR_ADD: + if (src & REGISTER) { + index = operand_index(src); + value = get_reg_value(size, index, 0, regs); + result = (unsigned long) p->data + value; + } else if (src & IMMEDIATE) { + value = mmio_opp->immediate; + result = (unsigned long) p->data + value; + } else if (src & MEMORY) { + index = operand_index(dst); + value = get_reg_value(size, index, 0, regs); + result = (unsigned long) p->data + value; + set_reg_value(size, index, 0, regs, result); + } + + /* + * The CF, OF, SF, ZF, AF, and PF flags are set according + * to the result + */ + regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|X86_EFLAGS_AF| + X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF); + set_eflags_CF(size, mmio_opp->instr, result, value, + (unsigned long) p->data, regs); + set_eflags_OF(size, mmio_opp->instr, result, value, + (unsigned long) p->data, regs); + set_eflags_AF(size, result, value, (unsigned long) p->data, regs); + set_eflags_ZF(size, result, regs); + set_eflags_SF(size, result, regs); + set_eflags_PF(size, result, regs); break; case INSTR_OR: if (src & REGISTER) { index = operand_index(src); value = get_reg_value(size, index, 0, regs); - diff = (unsigned long) p->data | value; + result = (unsigned long) p->data | value; } else if (src & IMMEDIATE) { value = mmio_opp->immediate; - diff = (unsigned long) p->data | value; + result = (unsigned long) p->data | value; } else if (src & MEMORY) { index = operand_index(dst); value = get_reg_value(size, index, 0, regs); - diff = (unsigned long) p->data | value; - set_reg_value(size, index, 0, regs, diff); + result = (unsigned long) p->data | value; + set_reg_value(size, index, 0, regs, result); } /* @@ -636,24 +680,24 @@ static void hvm_mmio_assist(struct cpu_u */ regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF| X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF); - set_eflags_ZF(size, diff, regs); - set_eflags_SF(size, diff, regs); - set_eflags_PF(size, diff, regs); + set_eflags_ZF(size, result, regs); + set_eflags_SF(size, result, regs); + set_eflags_PF(size, result, regs); break; case INSTR_XOR: if (src & REGISTER) { index = operand_index(src); value = get_reg_value(size, index, 0, regs); - diff = (unsigned long) p->data ^ value; + result = (unsigned long) p->data ^ value; } else if (src & IMMEDIATE) { value = mmio_opp->immediate; - diff = (unsigned long) p->data ^ value; + result = (unsigned long) p->data ^ value; } else if (src & MEMORY) { index = operand_index(dst); value = get_reg_value(size, index, 0, regs); - diff = (unsigned long) p->data ^ value; - set_reg_value(size, index, 0, regs, diff); + result = (unsigned long) p->data ^ value; + set_reg_value(size, index, 0, regs, result); } /* @@ -663,9 +707,9 @@ static void hvm_mmio_assist(struct cpu_u */ regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF| X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF); - set_eflags_ZF(size, diff, regs); - set_eflags_SF(size, diff, regs); - set_eflags_PF(size, diff, regs); + set_eflags_ZF(size, result, regs); + set_eflags_SF(size, result, regs); + set_eflags_PF(size, result, regs); break; case INSTR_CMP: @@ -673,16 +717,16 @@ static void hvm_mmio_assist(struct cpu_u if (src & REGISTER) { index = operand_index(src); value = get_reg_value(size, index, 0, regs); - diff = (unsigned long) p->data - value; + result = (unsigned long) p->data - value; } else if (src & IMMEDIATE) { value = mmio_opp->immediate; - diff = (unsigned long) p->data - value; + result = (unsigned long) p->data - value; } else if (src & MEMORY) { index = operand_index(dst); value = get_reg_value(size, index, 0, regs); - diff = value - (unsigned long) p->data; + result = value - (unsigned long) p->data; if ( mmio_opp->instr == INSTR_SUB ) - set_reg_value(size, index, 0, regs, diff); + set_reg_value(size, index, 0, regs, result); } /* @@ -691,12 +735,24 @@ static void hvm_mmio_assist(struct cpu_u */ regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|X86_EFLAGS_AF| X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF); - set_eflags_CF(size, value, (unsigned long) p->data, regs); - set_eflags_OF(size, diff, value, (unsigned long) p->data, regs); - set_eflags_AF(size, diff, value, (unsigned long) p->data, regs); - set_eflags_ZF(size, diff, regs); - set_eflags_SF(size, diff, regs); - set_eflags_PF(size, diff, regs); + if ( src & (REGISTER | IMMEDIATE) ) + { + set_eflags_CF(size, mmio_opp->instr, result, value, + (unsigned long) p->data, regs); + set_eflags_OF(size, mmio_opp->instr, result, value, + (unsigned long) p->data, regs); + } + else + { + set_eflags_CF(size, mmio_opp->instr, result, + (unsigned long) p->data, value, regs); + set_eflags_OF(size, mmio_opp->instr, result, + (unsigned long) p->data, value, regs); + } + set_eflags_AF(size, result, value, (unsigned long) p->data, regs); + set_eflags_ZF(size, result, regs); + set_eflags_SF(size, result, regs); + set_eflags_PF(size, result, regs); break; case INSTR_TEST: @@ -709,16 +765,16 @@ static void hvm_mmio_assist(struct cpu_u index = operand_index(dst); value = get_reg_value(size, index, 0, regs); } - diff = (unsigned long) p->data & value; + result = (unsigned long) p->data & value; /* * Sets the SF, ZF, and PF status flags. CF and OF are set to 0 */ regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF| X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF); - set_eflags_ZF(size, diff, regs); - set_eflags_SF(size, diff, regs); - set_eflags_PF(size, diff, regs); + set_eflags_ZF(size, result, regs); + set_eflags_SF(size, result, regs); + set_eflags_PF(size, result, regs); break; case INSTR_BT: @@ -764,13 +820,14 @@ static void hvm_mmio_assist(struct cpu_u } } -void hvm_io_assist(struct vcpu *v) +void hvm_io_assist(void) { vcpu_iodata_t *vio; ioreq_t *p; struct cpu_user_regs *regs; struct hvm_io_op *io_opp; unsigned long gmfn; + struct vcpu *v = current; struct domain *d = v->domain; io_opp = &v->arch.hvm_vcpu.io_op; @@ -788,10 +845,17 @@ void hvm_io_assist(struct vcpu *v) p->state = STATE_IOREQ_NONE; - if ( p->type == IOREQ_TYPE_PIO ) + switch ( p->type ) + { + case IOREQ_TYPE_INVALIDATE: + goto out; + case IOREQ_TYPE_PIO: hvm_pio_assist(regs, p, io_opp); - else + break; + default: hvm_mmio_assist(regs, p, io_opp); + break; + } /* Copy register changes back into current guest state. */ hvm_load_cpu_guest_regs(v, regs); @@ -804,6 +868,7 @@ void hvm_io_assist(struct vcpu *v) mark_dirty(d, gmfn); } + out: vcpu_end_shutdown_deferral(v); } diff -r 9ec7dadc98ba -r eb3e430242ac xen/arch/x86/hvm/irq.c --- a/xen/arch/x86/hvm/irq.c Fri Apr 06 10:06:30 2007 -0600 +++ b/xen/arch/x86/hvm/irq.c Fri Apr 06 10:08:30 2007 -0600 @@ -335,6 +335,21 @@ int is_isa_irq_masked(struct vcpu *v, in return ((v->domain->arch.hvm_domain.vpic[isa_irq >> 3].imr & (1 << (isa_irq & 7))) && domain_vioapic(v->domain)->redirtbl[gsi].fields.mask); +} + +/* + * TODO: 1. Should not need special treatment of event-channel events. + * 2. Should take notice of interrupt shadows (or clear them). + */ +int hvm_local_events_need_delivery(struct vcpu *v) +{ + int pending; + + pending = (vcpu_info(v, evtchn_upcall_pending) || cpu_has_pending_irq(v)); + if ( unlikely(pending) ) + pending = hvm_interrupts_enabled(v); + + return pending; } #if 0 /* Keep for debugging */ diff -r 9ec7dadc98ba -r eb3e430242ac xen/arch/x86/hvm/platform.c --- a/xen/arch/x86/hvm/platform.c Fri Apr 06 10:06:30 2007 -0600 +++ b/xen/arch/x86/hvm/platform.c Fri Apr 06 10:08:30 2007 -0600 @@ -865,7 +865,7 @@ void send_pio_req(unsigned long port, un if ( hvm_portio_intercept(p) ) { p->state = STATE_IORESP_READY; - hvm_io_assist(v); + hvm_io_assist(); return; } @@ -914,7 +914,7 @@ static void send_mmio_req(unsigned char if ( hvm_mmio_intercept(p) || hvm_buffered_io_intercept(p) ) { p->state = STATE_IORESP_READY; - hvm_io_assist(v); + hvm_io_assist(); return; } @@ -939,6 +939,34 @@ void send_timeoffset_req(unsigned long t if ( !hvm_buffered_io_send(p) ) printk("Unsuccessful timeoffset update\n"); +} + +/* Ask ioemu mapcache to invalidate mappings. */ +void send_invalidate_req(void) +{ + struct vcpu *v = current; + vcpu_iodata_t *vio; + ioreq_t *p; + + vio = get_vio(v->domain, v->vcpu_id); + if ( vio == NULL ) + { + printk("bad shared page: %lx\n", (unsigned long) vio); + domain_crash_synchronous(); + } + + p = &vio->vp_ioreq; + if ( p->state != STATE_IOREQ_NONE ) + printk("WARNING: send invalidate req with something " + "already pending (%d)?\n", p->state); + + p->type = IOREQ_TYPE_INVALIDATE; + p->size = 4; + p->dir = IOREQ_WRITE; + p->data = ~0UL; /* flush all */ + p->io_count++; + + hvm_send_assist_req(v); } static void mmio_operands(int type, unsigned long gpa, diff -r 9ec7dadc98ba -r eb3e430242ac xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Fri Apr 06 10:06:30 2007 -0600 +++ b/xen/arch/x86/hvm/svm/svm.c Fri Apr 06 10:08:30 2007 -0600 @@ -131,66 +131,6 @@ static void svm_store_cpu_guest_regs( } } - -static inline int long_mode_do_msr_read(struct cpu_user_regs *regs) -{ - u64 msr_content = 0; - struct vcpu *v = current; - struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - - switch ((u32)regs->ecx) - { - case MSR_EFER: - msr_content = v->arch.hvm_svm.cpu_shadow_efer; - break; - -#ifdef __x86_64__ - case MSR_FS_BASE: - msr_content = vmcb->fs.base; - goto check_long_mode; - - case MSR_GS_BASE: - msr_content = vmcb->gs.base; - goto check_long_mode; - - case MSR_SHADOW_GS_BASE: - msr_content = vmcb->kerngsbase; - check_long_mode: - if ( !svm_long_mode_enabled(v) ) - { - svm_inject_exception(v, TRAP_gp_fault, 1, 0); - return 0; - } - break; -#endif - - case MSR_STAR: - msr_content = vmcb->star; - break; - - case MSR_LSTAR: - msr_content = vmcb->lstar; - break; - - case MSR_CSTAR: - msr_content = vmcb->cstar; - break; - - case MSR_SYSCALL_MASK: - msr_content = vmcb->sfmask; - break; - default: - return 0; - } - - HVM_DBG_LOG(DBG_LEVEL_2, "msr_content: %"PRIx64"\n", - msr_content); - - regs->eax = (u32)(msr_content >> 0); - regs->edx = (u32)(msr_content >> 32); - return 1; -} - static inline int long_mode_do_msr_write(struct cpu_user_regs *regs) { u64 msr_content = (u32)regs->eax | ((u64)regs->edx << 32); @@ -242,52 +182,12 @@ static inline int long_mode_do_msr_write break; -#ifdef __x86_64__ - case MSR_FS_BASE: - case MSR_GS_BASE: - case MSR_SHADOW_GS_BASE: - if ( !svm_long_mode_enabled(v) ) - goto gp_fault; - - if ( !is_canonical_address(msr_content) ) - goto uncanonical_address; - - if ( ecx == MSR_FS_BASE ) - vmcb->fs.base = msr_content; - else if ( ecx == MSR_GS_BASE ) - vmcb->gs.base = msr_content; - else - vmcb->kerngsbase = msr_content; - break; -#endif - - case MSR_STAR: - vmcb->star = msr_content; - break; - - case MSR_LSTAR: - case MSR_CSTAR: - if ( !is_canonical_address(msr_content) ) - goto uncanonical_address; - - if ( ecx == MSR_LSTAR ) - vmcb->lstar = msr_content; - else - vmcb->cstar = msr_content; - break; - - case MSR_SYSCALL_MASK: - vmcb->sfmask = msr_content; - break; - default: return 0; } return 1; - uncanonical_address: - HVM_DBG_LOG(DBG_LEVEL_1, "Not cano address of msr write %x\n", ecx); gp_fault: svm_inject_exception(v, TRAP_gp_fault, 1, 0); return 0; @@ -596,6 +496,12 @@ static int svm_realmode(struct vcpu *v) unsigned long eflags = v->arch.hvm_svm.vmcb->rflags; return (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE); +} + +static int svm_interrupts_enabled(struct vcpu *v) +{ + unsigned long eflags = v->arch.hvm_svm.vmcb->rflags; + return !irq_masked(eflags); } static int svm_guest_x86_mode(struct vcpu *v) @@ -900,6 +806,7 @@ static struct hvm_function_table svm_fun .paging_enabled = svm_paging_enabled, .long_mode_enabled = svm_long_mode_enabled, .pae_enabled = svm_pae_enabled, + .interrupts_enabled = svm_interrupts_enabled, .guest_x86_mode = svm_guest_x86_mode, .get_guest_ctrl_reg = svm_get_ctrl_reg, .get_segment_base = svm_get_segment_base, @@ -2013,22 +1920,14 @@ static inline void svm_do_msr_access( case MSR_IA32_TIME_STAMP_COUNTER: msr_content = hvm_get_guest_time(v); break; - case MSR_IA32_SYSENTER_CS: - msr_content = vmcb->sysenter_cs; - break; - case MSR_IA32_SYSENTER_ESP: - msr_content = vmcb->sysenter_esp; - break; - case MSR_IA32_SYSENTER_EIP: - msr_content = vmcb->sysenter_eip; - break; case MSR_IA32_APICBASE: msr_content = vcpu_vlapic(v)->hw.apic_base_msr; break; + case MSR_EFER: + msr_content = v->arch.hvm_svm.cpu_shadow_efer; + break; + default: - if (long_mode_do_msr_read(regs)) - goto done; - if ( rdmsr_hypervisor_regs(ecx, &eax, &edx) || rdmsr_safe(ecx, eax, edx) == 0 ) { @@ -2060,15 +1959,6 @@ static inline void svm_do_msr_access( case MSR_IA32_TIME_STAMP_COUNTER: hvm_set_guest_time(v, msr_content); pt_reset(v); - break; - case MSR_IA32_SYSENTER_CS: - vmcb->sysenter_cs = msr_content; - break; - case MSR_IA32_SYSENTER_ESP: - vmcb->sysenter_esp = msr_content; - break; - case MSR_IA32_SYSENTER_EIP: - vmcb->sysenter_eip = msr_content; break; case MSR_IA32_APICBASE: vlapic_msr_set(vcpu_vlapic(v), msr_content); @@ -2276,7 +2166,7 @@ asmlinkage void svm_vmexit_handler(struc unsigned long eip; struct vcpu *v = current; struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - int inst_len; + int inst_len, rc; exit_reason = vmcb->exitcode; save_svm_cpu_user_regs(v, regs); @@ -2385,8 +2275,13 @@ asmlinkage void svm_vmexit_handler(struc inst_len = __get_instruction_length(v, INSTR_VMCALL, NULL); ASSERT(inst_len > 0); HVMTRACE_1D(VMMCALL, v, regs->eax); - __update_guest_eip(vmcb, inst_len); - hvm_do_hypercall(regs); + rc = hvm_do_hypercall(regs); + if ( rc != HVM_HCALL_preempted ) + { + __update_guest_eip(vmcb, inst_len); + if ( rc == HVM_HCALL_invalidate ) + send_invalidate_req(); + } break; case VMEXIT_CR0_READ: diff -r 9ec7dadc98ba -r eb3e430242ac xen/arch/x86/hvm/svm/vmcb.c --- a/xen/arch/x86/hvm/svm/vmcb.c Fri Apr 06 10:06:30 2007 -0600 +++ b/xen/arch/x86/hvm/svm/vmcb.c Fri Apr 06 10:08:30 2007 -0600 @@ -141,6 +141,14 @@ static int construct_vmcb(struct vcpu *v disable_intercept_for_msr((char *)arch_svm->msrpm, MSR_FS_BASE); disable_intercept_for_msr((char *)arch_svm->msrpm, MSR_GS_BASE); + disable_intercept_for_msr((char *)arch_svm->msrpm, MSR_SHADOW_GS_BASE); + disable_intercept_for_msr((char *)arch_svm->msrpm, MSR_CSTAR); + disable_intercept_for_msr((char *)arch_svm->msrpm, MSR_LSTAR); + disable_intercept_for_msr((char *)arch_svm->msrpm, MSR_STAR); + disable_intercept_for_msr((char *)arch_svm->msrpm, MSR_SYSCALL_MASK); + disable_intercept_for_msr((char *)arch_svm->msrpm, MSR_IA32_SYSENTER_CS); + disable_intercept_for_msr((char *)arch_svm->msrpm, MSR_IA32_SYSENTER_ESP); + disable_intercept_for_msr((char *)arch_svm->msrpm, MSR_IA32_SYSENTER_EIP); vmcb->msrpm_base_pa = (u64)virt_to_maddr(arch_svm->msrpm); vmcb->iopm_base_pa = (u64)virt_to_maddr(hvm_io_bitmap); diff -r 9ec7dadc98ba -r eb3e430242ac xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Fri Apr 06 10:06:30 2007 -0600 +++ b/xen/arch/x86/hvm/vmx/vmx.c Fri Apr 06 10:08:30 2007 -0600 @@ -957,6 +957,13 @@ static int vmx_pae_enabled(struct vcpu * return (vmx_paging_enabled(v) && (cr4 & X86_CR4_PAE)); } +static int vmx_interrupts_enabled(struct vcpu *v) +{ + unsigned long eflags = __vmread(GUEST_RFLAGS); + return !irq_masked(eflags); +} + + static void vmx_update_host_cr3(struct vcpu *v) { ASSERT( (v == current) || !vcpu_runnable(v) ); @@ -1030,6 +1037,7 @@ static struct hvm_function_table vmx_fun .paging_enabled = vmx_paging_enabled, .long_mode_enabled = vmx_long_mode_enabled, .pae_enabled = vmx_pae_enabled, + .interrupts_enabled = vmx_interrupts_enabled, .guest_x86_mode = vmx_guest_x86_mode, .get_guest_ctrl_reg = vmx_get_ctrl_reg, .get_segment_base = vmx_get_segment_base, @@ -2618,10 +2626,16 @@ asmlinkage void vmx_vmexit_handler(struc } case EXIT_REASON_VMCALL: { + int rc; HVMTRACE_1D(VMMCALL, v, regs->eax); inst_len = __get_instruction_length(); /* Safe: VMCALL */ - __update_guest_eip(inst_len); - hvm_do_hypercall(regs); + rc = hvm_do_hypercall(regs); + if ( rc != HVM_HCALL_preempted ) + { + __update_guest_eip(inst_len); + if ( rc == HVM_HCALL_invalidate ) + send_invalidate_req(); + } break; } case EXIT_REASON_CR_ACCESS: diff -r 9ec7dadc98ba -r eb3e430242ac xen/common/domain.c --- a/xen/common/domain.c Fri Apr 06 10:06:30 2007 -0600 +++ b/xen/common/domain.c Fri Apr 06 10:08:30 2007 -0600 @@ -307,7 +307,7 @@ void domain_kill(struct domain *d) domain_pause(d); /* Already dying? Then bail. */ - if ( xchg(&d->is_dying, 1) ) + if ( test_and_set_bool(d->is_dying) ) { domain_unpause(d); return; @@ -453,7 +453,7 @@ void domain_pause_for_debugger(void) struct vcpu *v; atomic_inc(&d->pause_count); - if ( xchg(&d->is_paused_by_controller, 1) ) + if ( test_and_set_bool(d->is_paused_by_controller) ) domain_unpause(d); /* race-free atomic_dec(&d->pause_count) */ for_each_vcpu ( d, v ) @@ -553,13 +553,13 @@ void domain_pause_by_systemcontroller(st void domain_pause_by_systemcontroller(struct domain *d) { domain_pause(d); - if ( xchg(&d->is_paused_by_controller, 1) ) + if ( test_and_set_bool(d->is_paused_by_controller) ) domain_unpause(d); } void domain_unpause_by_systemcontroller(struct domain *d) { - if ( xchg(&d->is_paused_by_controller, 0) ) + if ( test_and_clear_bool(d->is_paused_by_controller) ) domain_unpause(d); } diff -r 9ec7dadc98ba -r eb3e430242ac xen/common/memory.c --- a/xen/common/memory.c Fri Apr 06 10:06:30 2007 -0600 +++ b/xen/common/memory.c Fri Apr 06 10:08:30 2007 -0600 @@ -173,17 +173,6 @@ int guest_remove_page(struct domain *d, if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) put_page(page); - if ( unlikely((page->count_info & PGC_count_mask) != 1) ) - { - shadow_drop_references(d, page); - /* We'll make this a guest-visible error in future, so take heed! */ - if ( (page->count_info & PGC_count_mask) != 1 ) - gdprintk(XENLOG_INFO, "Dom%d freeing in-use page %lx " - "(pseudophys %lx): count=%lx type=%lx\n", - d->domain_id, mfn, get_gpfn_from_mfn(mfn), - (unsigned long)page->count_info, page->u.inuse.type_info); - } - guest_physmap_remove_page(d, gmfn, mfn); put_page(page); diff -r 9ec7dadc98ba -r eb3e430242ac xen/common/schedule.c --- a/xen/common/schedule.c Fri Apr 06 10:06:30 2007 -0600 +++ b/xen/common/schedule.c Fri Apr 06 10:08:30 2007 -0600 @@ -461,7 +461,11 @@ ret_t do_sched_op(int cmd, XEN_GUEST_HAN if ( d == NULL ) break; + /* domain_pause() prevens any further execution in guest context. */ + domain_pause(d); domain_shutdown(d, (u8)sched_remote_shutdown.reason); + domain_unpause(d); + rcu_unlock_domain(d); ret = 0; diff -r 9ec7dadc98ba -r eb3e430242ac xen/drivers/char/ns16550.c --- a/xen/drivers/char/ns16550.c Fri Apr 06 10:06:30 2007 -0600 +++ b/xen/drivers/char/ns16550.c Fri Apr 06 10:08:30 2007 -0600 @@ -239,7 +239,7 @@ static void ns16550_init_postirq(struct uart->irqaction.name = "ns16550"; uart->irqaction.dev_id = port; if ( (rc = setup_irq(uart->irq, &uart->irqaction)) != 0 ) - printk("ERROR: Failed to allocate na16550 IRQ %d\n", uart->irq); + printk("ERROR: Failed to allocate ns16550 IRQ %d\n", uart->irq); /* Master interrupt enable; also keep DTR/RTS asserted. */ ns_write_reg(uart, MCR, MCR_OUT2 | MCR_DTR | MCR_RTS); diff -r 9ec7dadc98ba -r eb3e430242ac xen/include/asm-ia64/xentypes.h --- a/xen/include/asm-ia64/xentypes.h Fri Apr 06 10:06:30 2007 -0600 +++ b/xen/include/asm-ia64/xentypes.h Fri Apr 06 10:08:30 2007 -0600 @@ -5,6 +5,11 @@ typedef unsigned long ssize_t; typedef unsigned long ssize_t; typedef unsigned long size_t; typedef long long loff_t; + +typedef char bool_t; +#define test_and_set_bool(b) xchg(&(b), 1) +#define test_and_clear_bool(b) xchg(&(b), 0) + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_IA64_XENTYPES_H */ diff -r 9ec7dadc98ba -r eb3e430242ac xen/include/asm-powerpc/types.h --- a/xen/include/asm-powerpc/types.h Fri Apr 06 10:06:30 2007 -0600 +++ b/xen/include/asm-powerpc/types.h Fri Apr 06 10:08:30 2007 -0600 @@ -70,5 +70,9 @@ typedef u64 dma64_addr_t; typedef unsigned short xmem_bufctl_t; +typedef int bool_t; +#define test_and_set_bool(b) xchg(&(b), 1) +#define test_and_clear_bool(b) xchg(&(b), 0) + #endif /* __ASSEMBLY__ */ #endif diff -r 9ec7dadc98ba -r eb3e430242ac xen/include/asm-x86/event.h --- a/xen/include/asm-x86/event.h Fri Apr 06 10:06:30 2007 -0600 +++ b/xen/include/asm-x86/event.h Fri Apr 06 10:08:30 2007 -0600 @@ -35,12 +35,13 @@ static inline void vcpu_mark_events_pend vcpu_kick(v); } +int hvm_local_events_need_delivery(struct vcpu *v); static inline int local_events_need_delivery(void) { struct vcpu *v = current; - return ((vcpu_info(v, evtchn_upcall_pending) && - !vcpu_info(v, evtchn_upcall_mask)) || - (is_hvm_vcpu(v) && cpu_has_pending_irq(v))); + return (is_hvm_vcpu(v) ? hvm_local_events_need_delivery(v) : + (vcpu_info(v, evtchn_upcall_pending) && + !vcpu_info(v, evtchn_upcall_mask))); } static inline int local_event_delivery_is_enabled(void) diff -r 9ec7dadc98ba -r eb3e430242ac xen/include/asm-x86/hvm/hvm.h --- a/xen/include/asm-x86/hvm/hvm.h Fri Apr 06 10:06:30 2007 -0600 +++ b/xen/include/asm-x86/hvm/hvm.h Fri Apr 06 10:08:30 2007 -0600 @@ -93,13 +93,15 @@ struct hvm_function_table { * 1) determine whether paging is enabled, * 2) determine whether long mode is enabled, * 3) determine whether PAE paging is enabled, - * 4) determine the mode the guest is running in, - * 5) return the current guest control-register value - * 6) return the current guest segment descriptor base + * 4) determine whether interrupts are enabled or not, + * 5) determine the mode the guest is running in, + * 6) return the current guest control-register value + * 7) return the current guest segment descriptor base */ int (*paging_enabled)(struct vcpu *v); int (*long_mode_enabled)(struct vcpu *v); int (*pae_enabled)(struct vcpu *v); + int (*interrupts_enabled)(struct vcpu *v); int (*guest_x86_mode)(struct vcpu *v); unsigned long (*get_guest_ctrl_reg)(struct vcpu *v, unsigned int num); unsigned long (*get_segment_base)(struct vcpu *v, enum x86_segment seg); @@ -190,6 +192,12 @@ hvm_pae_enabled(struct vcpu *v) } static inline int +hvm_interrupts_enabled(struct vcpu *v) +{ + return hvm_funcs.interrupts_enabled(v); +} + +static inline int hvm_guest_x86_mode(struct vcpu *v) { return hvm_funcs.guest_x86_mode(v); diff -r 9ec7dadc98ba -r eb3e430242ac xen/include/asm-x86/hvm/io.h --- a/xen/include/asm-x86/hvm/io.h Fri Apr 06 10:06:30 2007 -0600 +++ b/xen/include/asm-x86/hvm/io.h Fri Apr 06 10:08:30 2007 -0600 @@ -147,9 +147,10 @@ extern void send_pio_req(unsigned long p extern void send_pio_req(unsigned long port, unsigned long count, int size, paddr_t value, int dir, int df, int value_is_ptr); void send_timeoffset_req(unsigned long timeoff); +void send_invalidate_req(void); extern void handle_mmio(unsigned long gpa); extern void hvm_interrupt_post(struct vcpu *v, int vector, int type); -extern void hvm_io_assist(struct vcpu *v); +extern void hvm_io_assist(void); #endif /* __ASM_X86_HVM_IO_H__ */ diff -r 9ec7dadc98ba -r eb3e430242ac xen/include/asm-x86/hvm/support.h --- a/xen/include/asm-x86/hvm/support.h Fri Apr 06 10:06:30 2007 -0600 +++ b/xen/include/asm-x86/hvm/support.h Fri Apr 06 10:08:30 2007 -0600 @@ -228,7 +228,10 @@ void hvm_print_line(struct vcpu *v, cons void hvm_print_line(struct vcpu *v, const char c); void hlt_timer_fn(void *data); -void hvm_do_hypercall(struct cpu_user_regs *pregs); +#define HVM_HCALL_completed 0 /* hypercall completed - no further action */ +#define HVM_HCALL_preempted 1 /* hypercall preempted - re-execute VMCALL */ +#define HVM_HCALL_invalidate 2 /* invalidate ioemu-dm memory cache */ +int hvm_do_hypercall(struct cpu_user_regs *pregs); void hvm_hlt(unsigned long rflags); void hvm_triple_fault(void); diff -r 9ec7dadc98ba -r eb3e430242ac xen/include/asm-x86/types.h --- a/xen/include/asm-x86/types.h Fri Apr 06 10:06:30 2007 -0600 +++ b/xen/include/asm-x86/types.h Fri Apr 06 10:08:30 2007 -0600 @@ -52,6 +52,10 @@ typedef unsigned long paddr_t; typedef unsigned long size_t; +typedef char bool_t; +#define test_and_set_bool(b) xchg(&(b), 1) +#define test_and_clear_bool(b) xchg(&(b), 0) + #endif /* __ASSEMBLY__ */ #if defined(__i386__) diff -r 9ec7dadc98ba -r eb3e430242ac xen/include/public/acm.h --- a/xen/include/public/acm.h Fri Apr 06 10:06:30 2007 -0600 +++ b/xen/include/public/acm.h Fri Apr 06 10:08:30 2007 -0600 @@ -124,7 +124,7 @@ struct acm_policy_version { uint32_t major; uint32_t minor; -} __attribute__((packed)); +}; /* each buffer consists of all policy information for @@ -145,12 +145,12 @@ struct acm_policy_buffer { uint32_t secondary_policy_code; uint32_t secondary_buffer_offset; struct acm_policy_version xml_pol_version; /* add in V3 */ -} __attribute__((packed)); +}; struct acm_policy_reference_buffer { uint32_t len; -} __attribute__((packed)); +}; struct acm_chwall_policy_buffer { uint32_t policy_version; /* ACM_CHWALL_VERSION */ @@ -162,7 +162,7 @@ struct acm_chwall_policy_buffer { uint32_t chwall_conflict_sets_offset; uint32_t chwall_running_types_offset; uint32_t chwall_conflict_aggregate_offset; -} __attribute__((packed)); +}; struct acm_ste_policy_buffer { uint32_t policy_version; /* ACM_STE_VERSION */ @@ -170,7 +170,7 @@ struct acm_ste_policy_buffer { uint32_t ste_max_types; uint32_t ste_max_ssidrefs; uint32_t ste_ssid_offset; -} __attribute__((packed)); +}; struct acm_stats_buffer { uint32_t magic; @@ -179,7 +179,7 @@ struct acm_stats_buffer { uint32_t primary_stats_offset; uint32_t secondary_policy_code; uint32_t secondary_stats_offset; -} __attribute__((packed)); +}; struct acm_ste_stats_buffer { uint32_t ec_eval_count; @@ -188,7 +188,7 @@ struct acm_ste_stats_buffer { uint32_t gt_denied_count; uint32_t ec_cachehit_count; uint32_t gt_cachehit_count; -} __attribute__((packed)); +}; struct acm_ssid_buffer { uint32_t len; @@ -200,7 +200,7 @@ struct acm_ssid_buffer { uint32_t secondary_policy_code; uint32_t secondary_max_types; uint32_t secondary_types_offset; -} __attribute__((packed)); +}; #endif diff -r 9ec7dadc98ba -r eb3e430242ac xen/include/public/foreign/Makefile --- a/xen/include/public/foreign/Makefile Fri Apr 06 10:06:30 2007 -0600 +++ b/xen/include/public/foreign/Makefile Fri Apr 06 10:08:30 2007 -0600 @@ -13,7 +13,7 @@ clean: rm -f checker checker.c $(XEN_TARGET_ARCH).size rm -f *.pyc *.o *~ -ifeq ($(CROSS_COMPILE),) +ifeq ($(CROSS_COMPILE)$(XEN_TARGET_ARCH),$(XEN_COMPILE_ARCH)) check-headers: checker ./checker > $(XEN_TARGET_ARCH).size diff -u reference.size $(XEN_TARGET_ARCH).size diff -r 9ec7dadc98ba -r eb3e430242ac xen/include/public/hvm/hvm_op.h --- a/xen/include/public/hvm/hvm_op.h Fri Apr 06 10:06:30 2007 -0600 +++ b/xen/include/public/hvm/hvm_op.h Fri Apr 06 10:08:30 2007 -0600 @@ -70,12 +70,4 @@ typedef struct xen_hvm_set_pci_link_rout typedef struct xen_hvm_set_pci_link_route xen_hvm_set_pci_link_route_t; DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_link_route_t); -/* Drain all outstanding qemu-dm IO responses from a domain's ioreq ring. */ -#define HVMOP_drain_io 5 -struct xen_hvm_drain_io { - domid_t domid; -}; -typedef struct xen_hvm_drain_io xen_hvm_drain_io_t; -DEFINE_XEN_GUEST_HANDLE(xen_hvm_drain_io_t); - #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */ diff -r 9ec7dadc98ba -r eb3e430242ac xen/include/public/hvm/ioreq.h --- a/xen/include/public/hvm/ioreq.h Fri Apr 06 10:06:30 2007 -0600 +++ b/xen/include/public/hvm/ioreq.h Fri Apr 06 10:08:30 2007 -0600 @@ -40,6 +40,7 @@ #define IOREQ_TYPE_XCHG 5 #define IOREQ_TYPE_ADD 6 #define IOREQ_TYPE_TIMEOFFSET 7 +#define IOREQ_TYPE_INVALIDATE 8 /* mapcache */ /* * VMExit dispatcher should cooperate with instruction decoder to diff -r 9ec7dadc98ba -r eb3e430242ac xen/include/xen/types.h --- a/xen/include/xen/types.h Fri Apr 06 10:06:30 2007 -0600 +++ b/xen/include/xen/types.h Fri Apr 06 10:08:30 2007 -0600 @@ -19,8 +19,6 @@ #define LONG_MAX ((long)(~0UL>>1)) #define LONG_MIN (-LONG_MAX - 1) #define ULONG_MAX (~0UL) - -typedef char bool_t; /* bsd */ typedef unsigned char u_char; _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |