[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] Merge with xenppc-unstable-merge.hg



# HG changeset patch
# User kfraser@xxxxxxxxxxxxxxxxxxxxx
# Date 1176459266 -3600
# Node ID f92a79e39da872c5632a8490ebb97e2e1fcbca28
# Parent  5bda20f0723daea6c4390eaa77f7860ec0cd67a7
# Parent  fdbbc6aa2cbf230fbe0341a04d78dc1d55fb3244
Merge with xenppc-unstable-merge.hg
---
 tools/libxc/xc_hvm_save.c                                         |  755 ----
 tools/libxc/xc_linux_save.c                                       | 1414 
--------
 README                                                            |    4 
 docs/xen-api/xenapi-datamodel.tex                                 |  173 +
 linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c               |    2 
 linux-2.6-xen-sparse/arch/ia64/kernel/setup.c                     |    4 
 linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c                   |    3 
 linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c                    |   36 
 linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S                     |    6 
 linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S                       |   63 
 linux-2.6-xen-sparse/drivers/xen/core/reboot.c                    |    3 
 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h      |    2 
 linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h                |    2 
 linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h                |    1 
 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h    |    2 
 tools/examples/network-bridge                                     |   19 
 tools/examples/xmexample.hvm                                      |    6 
 tools/examples/xmexample.vti                                      |    5 
 tools/ioemu/hw/pc.c                                               |   12 
 tools/ioemu/monitor.c                                             |    2 
 tools/ioemu/vl.c                                                  |   10 
 tools/ioemu/vl.h                                                  |    5 
 tools/ioemu/xenstore.c                                            |  220 -
 tools/libfsimage/fat/fat.h                                        |   14 
 tools/libxc/Makefile                                              |    4 
 tools/libxc/ia64/xc_ia64_linux_save.c                             |    6 
 tools/libxc/xc_domain.c                                           |   39 
 tools/libxc/xc_domain_restore.c                                   |   41 
 tools/libxc/xc_domain_save.c                                      | 1587 
++++++++++
 tools/libxc/xc_hvm_build.c                                        |   41 
 tools/libxc/xc_resume.c                                           |   65 
 tools/libxc/xenctrl.h                                             |    3 
 tools/libxc/xenguest.h                                            |   24 
 tools/libxc/xg_private.c                                          |   23 
 tools/libxen/include/xen_host_cpu.h                               |    8 
 tools/libxen/include/xen_vm.h                                     |   22 
 tools/libxen/include/xen_vm_metrics.h                             |    9 
 tools/libxen/src/xen_host_cpu.c                                   |   21 
 tools/libxen/src/xen_vm.c                                         |   50 
 tools/libxen/src/xen_vm_metrics.c                                 |   21 
 tools/pygrub/src/LiloConf.py                                      |  147 
 tools/pygrub/src/pygrub                                           |   32 
 tools/python/README.XendConfig                                    |    1 
 tools/python/README.sxpcfg                                        |    1 
 tools/python/xen/xend/XendConfig.py                               |    2 
 tools/python/xen/xend/XendDomainInfo.py                           |    1 
 tools/python/xen/xend/XendNode.py                                 |    2 
 tools/python/xen/xend/image.py                                    |   95 
 tools/python/xen/xm/create.dtd                                    |    2 
 tools/python/xen/xm/create.py                                     |    4 
 tools/python/xen/xm/main.py                                       |   75 
 tools/python/xen/xm/xenapi_create.py                              |   30 
 tools/xcutils/xc_save.c                                           |    9 
 tools/xm-test/lib/XmTestLib/NetConfig.py                          |    6 
 unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h |    5 
 unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c        |   34 
 xen/arch/ia64/xen/hyperprivop.S                                   |   14 
 xen/arch/ia64/xen/mm.c                                            |    2 
 xen/arch/x86/hvm/hvm.c                                            |   76 
 xen/arch/x86/hvm/svm/svm.c                                        |   19 
 xen/arch/x86/hvm/vmx/vmx.c                                        |   21 
 xen/include/public/hvm/save.h                                     |   35 
 62 files changed, 2718 insertions(+), 2622 deletions(-)

diff -r 5bda20f0723d -r f92a79e39da8 README
--- a/README    Thu Apr 12 16:37:32 2007 -0500
+++ b/README    Fri Apr 13 11:14:26 2007 +0100
@@ -199,3 +199,7 @@ Xend (the Xen daemon) has the following 
     * For optional PAM support, PyPAM:
           URL:    http://www.pangalactic.org/PyPAM/
           Debian: python-pam
+
+    * For optional XenAPI support in XM, PyXML:
+          URL:    http://pyxml.sourceforge.net
+          YUM:    PyXML
diff -r 5bda20f0723d -r f92a79e39da8 docs/xen-api/xenapi-datamodel.tex
--- a/docs/xen-api/xenapi-datamodel.tex Thu Apr 12 16:37:32 2007 -0500
+++ b/docs/xen-api/xenapi-datamodel.tex Fri Apr 13 11:14:26 2007 +0100
@@ -1558,6 +1558,111 @@ void
 \vspace{0.3cm}
 \vspace{0.3cm}
 \vspace{0.3cm}
+\subsubsection{RPC name:~add\_to\_VCPUs\_params\_live}
+
+{\bf Overview:} 
+Add the given key-value pair to VM.VCPUs\_params, and apply that value on
+the running VM.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} void add_to_VCPUs_params_live (session_id s, VM ref self, 
string key, string value)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt VM ref } & self & The VM \\ \hline 
+
+{\tt string } & key & The key \\ \hline 
+
+{\tt string } & value & The value \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+void
+}
+
+
+
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~set\_memory\_dynamic\_max\_live}
+
+{\bf Overview:} 
+Set memory\_dynamic\_max in database and on running VM.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} void set_memory_dynamic_max_live (session_id s, VM ref self, 
int max)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt VM ref } & self & The VM \\ \hline 
+
+{\tt int } & max & The memory\_dynamic\_max value \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+void
+}
+
+
+
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~set\_memory\_dynamic\_min\_live}
+
+{\bf Overview:} 
+Set memory\_dynamic\_min in database and on running VM.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} void set_memory_dynamic_min_live (session_id s, VM ref self, 
int min)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt VM ref } & self & The VM \\ \hline 
+
+{\tt int } & min & The memory\_dynamic\_min value \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+void
+}
+
+
+
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
 \subsubsection{RPC name:~send\_sysrq}
 
 {\bf Overview:} 
@@ -4184,6 +4289,7 @@ Quals & Field & Type & Description \\
 $\mathit{RO}_\mathit{run}$ &  {\tt VCPUs/utilisation} & (int $\rightarrow$ 
float) Map & Utilisation for all of guest's current VCPUs \\
 $\mathit{RO}_\mathit{run}$ &  {\tt VCPUs/CPU} & (int $\rightarrow$ int) Map & 
VCPU to PCPU map \\
 $\mathit{RO}_\mathit{run}$ &  {\tt VCPUs/params} & (string $\rightarrow$ 
string) Map & The live equivalent to VM.VCPUs\_params \\
+$\mathit{RO}_\mathit{run}$ &  {\tt state} & string Set & The state of the 
guest, eg blocked, dying etc \\
 $\mathit{RO}_\mathit{run}$ &  {\tt start\_time} & datetime & Time at which 
this VM was last booted \\
 $\mathit{RO}_\mathit{run}$ &  {\tt last\_updated} & datetime & Time at which 
this information was last updated \\
 \hline
@@ -4395,6 +4501,38 @@ Get the VCPUs/params field of the given 
  \noindent {\bf Return Type:} 
 {\tt 
 (string $\rightarrow$ string) Map
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_state}
+
+{\bf Overview:} 
+Get the state field of the given VM\_metrics.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} (string Set) get_state (session_id s, VM_metrics ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt VM\_metrics ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+string Set
 }
 
 
@@ -6601,7 +6739,8 @@ Quals & Field & Type & Description \\
 $\mathit{RO}_\mathit{run}$ &  {\tt speed} & int & the speed of the physical 
CPU \\
 $\mathit{RO}_\mathit{run}$ &  {\tt modelname} & string & the model name of the 
physical CPU \\
 $\mathit{RO}_\mathit{run}$ &  {\tt stepping} & string & the stepping of the 
physical CPU \\
-$\mathit{RO}_\mathit{run}$ &  {\tt flags} & string & the flags of the physical 
CPU \\
+$\mathit{RO}_\mathit{run}$ &  {\tt flags} & string & the flags of the physical 
CPU (a decoded version of the features field) \\
+$\mathit{RO}_\mathit{run}$ &  {\tt features} & string & the physical CPU 
feature bitmap \\
 $\mathit{RO}_\mathit{run}$ &  {\tt utilisation} & float & the current CPU 
utilisation \\
 \hline
 \end{longtable}
@@ -6858,6 +6997,38 @@ Get the flags field of the given host\_c
 
  \noindent {\bf Signature:} 
 \begin{verbatim} string get_flags (session_id s, host_cpu ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt host\_cpu ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_features}
+
+{\bf Overview:} 
+Get the features field of the given host\_cpu.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} string get_features (session_id s, host_cpu ref 
self)\end{verbatim}
 
 
 \noindent{\bf Arguments:}
diff -r 5bda20f0723d -r f92a79e39da8 
linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c       Thu Apr 12 
16:37:32 2007 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c       Fri Apr 13 
11:14:26 2007 +0100
@@ -290,5 +290,7 @@ void foo(void)
        DEFINE_MAPPED_REG_OFS(XSI_BANKNUM_OFS, banknum);
        DEFINE_MAPPED_REG_OFS(XSI_BANK0_R16_OFS, bank0_regs[0]);
        DEFINE_MAPPED_REG_OFS(XSI_BANK1_R16_OFS, bank1_regs[0]);
+       DEFINE_MAPPED_REG_OFS(XSI_B0NATS_OFS, vbnat);
+       DEFINE_MAPPED_REG_OFS(XSI_B1NATS_OFS, vnat);    
 #endif /* CONFIG_XEN */
 }
diff -r 5bda20f0723d -r f92a79e39da8 
linux-2.6-xen-sparse/arch/ia64/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c     Thu Apr 12 16:37:32 
2007 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c     Fri Apr 13 11:14:26 
2007 +0100
@@ -594,6 +594,10 @@ setup_arch (char **cmdline_p)
 
 
        /* enable IA-64 Machine Check Abort Handling unless disabled */
+#ifdef CONFIG_XEN
+       if (is_running_on_xen() && !is_initial_xendomain())
+               nomca = 1;
+#endif
        if (!nomca)
                ia64_mca_init();
 
diff -r 5bda20f0723d -r f92a79e39da8 
linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c
--- a/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c   Thu Apr 12 16:37:32 
2007 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c   Fri Apr 13 11:14:26 
2007 +0100
@@ -852,6 +852,9 @@ time_resume(void)
 
        /* Just trigger a tick.  */
        ia64_cpu_local_tick();
+
+       /* Time interpolator remembers the last timer status.  Forget it */
+       time_interpolator_reset();
 }
 
 ///////////////////////////////////////////////////////////////////////////
diff -r 5bda20f0723d -r f92a79e39da8 
linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c    Thu Apr 12 16:37:32 
2007 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c    Fri Apr 13 11:14:26 
2007 +0100
@@ -418,3 +418,39 @@ xencomm_mini_hypercall_perfmon_op(unsign
        return xencomm_arch_hypercall_perfmon_op(cmd, desc, count);
 }
 EXPORT_SYMBOL_GPL(xencomm_mini_hypercall_perfmon_op);
+
+int
+xencomm_mini_hypercall_sched_op(int cmd, void *arg)
+{
+       int rc, nbr_area = 2;
+       struct xencomm_mini xc_area[2];
+       struct xencomm_handle *desc;
+       unsigned int argsize;
+
+       switch (cmd) {
+       case SCHEDOP_yield:
+       case SCHEDOP_block:
+               argsize = 0;
+               break;
+       case SCHEDOP_shutdown:
+               argsize = sizeof(sched_shutdown_t);
+               break;
+       case SCHEDOP_poll:
+               argsize = sizeof(sched_poll_t);
+               break;
+       case SCHEDOP_remote_shutdown:
+               argsize = sizeof(sched_remote_shutdown_t);
+               break;
+
+       default:
+               printk("%s: unknown sched op %d\n", __func__, cmd);
+               return -ENOSYS;
+       }
+
+       rc = xencomm_create_mini(xc_area, &nbr_area, arg, argsize, &desc);
+       if (rc)
+               return rc;
+
+       return xencomm_arch_hypercall_sched_op(cmd, desc);
+}
+EXPORT_SYMBOL_GPL(xencomm_mini_hypercall_sched_op);
diff -r 5bda20f0723d -r f92a79e39da8 
linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S     Thu Apr 12 16:37:32 
2007 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S     Fri Apr 13 11:14:26 
2007 +0100
@@ -614,6 +614,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 #ifdef CONFIG_XEN
        ;;
        // r16-r31 all now hold bank1 values
+       mov r15=ar.unat
        movl r2=XSI_BANK1_R16
        movl r3=XSI_BANK1_R16+8
        ;;
@@ -641,6 +642,11 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 .mem.offset 0,0; st8.spill [r2]=r30,16
 .mem.offset 8,0; st8.spill [r3]=r31,16
        ;;
+       mov r3=ar.unat
+       movl r2=XSI_B1NAT
+       ;;
+       st8 [r2]=r3
+       mov ar.unat=r15
        movl r2=XSI_BANKNUM;;
        st4 [r2]=r0;
 #else
diff -r 5bda20f0723d -r f92a79e39da8 linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S       Thu Apr 12 16:37:32 
2007 -0500
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S       Fri Apr 13 11:14:26 
2007 +0100
@@ -2013,33 +2013,6 @@ END(ia32_interrupt)
        DBG_FAULT(66)
        FAULT(66)
 
-#ifdef CONFIG_XEN
-       /*
-        * There is no particular reason for this code to be here, other than 
that
-        * there happens to be space here that would go unused otherwise.  If 
this
-        * fault ever gets "unreserved", simply moved the following code to a 
more
-        * suitable spot...
-        */
-
-GLOBAL_ENTRY(xen_bsw1)
-       /* FIXME: THIS CODE IS NOT NaT SAFE! */
-       movl r30=XSI_BANKNUM;
-       mov r31=1;;
-       st4 [r30]=r31;
-       movl r30=XSI_BANK1_R16;
-       movl r31=XSI_BANK1_R16+8;;
-       ld8 r16=[r30],16; ld8 r17=[r31],16;;
-       ld8 r18=[r30],16; ld8 r19=[r31],16;;
-       ld8 r20=[r30],16; ld8 r21=[r31],16;;
-       ld8 r22=[r30],16; ld8 r23=[r31],16;;
-       ld8 r24=[r30],16; ld8 r25=[r31],16;;
-       ld8 r26=[r30],16; ld8 r27=[r31],16;;
-       ld8 r28=[r30],16; ld8 r29=[r31],16;;
-       ld8 r30=[r30]; ld8 r31=[r31];;
-       br.ret.sptk.many b0
-END(xen_bsw1)
-#endif
-
        .org ia64_ivt+0x7f00
 
/////////////////////////////////////////////////////////////////////////////////////////
 // 0x7f00 Entry 67 (size 16 bundles) Reserved
@@ -2167,4 +2140,38 @@ 1:
        (p6) br.spnt.few 1b     // call evtchn_do_upcall again.
        br.sptk.many ia64_leave_kernel   
 END(xen_event_callback)
-#endif
+
+
+       /*
+        * There is no particular reason for this code to be here, other than 
that
+        * there happens to be space here that would go unused otherwise.  If 
this
+        * fault ever gets "unreserved", simply moved the following code to a 
more
+        * suitable spot...
+        */
+
+GLOBAL_ENTRY(xen_bsw1)
+       /* FIXME: THIS CODE IS NOT NaT SAFE! */
+       mov r14=ar.unat
+       movl r30=XSI_B1NAT
+       ;;
+       ld8 r30=[r30];;
+       mov ar.unat=r30
+       movl r30=XSI_BANKNUM;
+       mov r31=1;;
+       st4 [r30]=r31;
+       movl r30=XSI_BANK1_R16;
+       movl r31=XSI_BANK1_R16+8;;
+       ld8.fill r16=[r30],16; ld8.fill r17=[r31],16;;
+       ld8.fill r18=[r30],16; ld8.fill r19=[r31],16;;
+       ld8.fill r20=[r30],16; ld8.fill r21=[r31],16;;
+       ld8.fill r22=[r30],16; ld8.fill r23=[r31],16;;
+       ld8.fill r24=[r30],16; ld8.fill r25=[r31],16;;
+       ld8.fill r26=[r30],16; ld8.fill r27=[r31],16;;
+       ld8.fill r28=[r30],16; ld8.fill r29=[r31],16;;
+       ld8.fill r30=[r30]; ld8.fill r31=[r31];;
+       mov ar.unat=r14
+       br.ret.sptk.many b0
+END(xen_bsw1)
+
+   
+#endif
diff -r 5bda20f0723d -r f92a79e39da8 
linux-2.6-xen-sparse/drivers/xen/core/reboot.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c    Thu Apr 12 16:37:32 
2007 -0500
+++ b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c    Fri Apr 13 11:14:26 
2007 +0100
@@ -118,6 +118,7 @@ static void shutdown_handler(struct xenb
        err = xenbus_transaction_start(&xbt);
        if (err)
                return;
+
        str = (char *)xenbus_read(xbt, "control", "shutdown", NULL);
        /* Ignore read errors and empty reads. */
        if (XENBUS_IS_ERR_READ(str)) {
@@ -206,14 +207,12 @@ static int setup_shutdown_watcher(void)
                printk(KERN_ERR "Failed to set shutdown watcher\n");
                return err;
        }
-       xenbus_write(XBT_NIL, "control", "feature-reboot", "1");
 
        err = register_xenbus_watch(&sysrq_watch);
        if (err) {
                printk(KERN_ERR "Failed to set sysrq watcher\n");
                return err;
        }
-       xenbus_write(XBT_NIL, "control", "feature-sysrq", "1");
 
        return 0;
 }
diff -r 5bda20f0723d -r f92a79e39da8 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h      Thu Apr 
12 16:37:32 2007 -0500
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h      Fri Apr 
13 11:14:26 2007 +0100
@@ -210,7 +210,7 @@ extern unsigned long pg0[];
 
 /* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
 #define pmd_none(x)    (!(unsigned long)pmd_val(x))
-#ifdef CONFIG_XEN_COMPAT_030002
+#if CONFIG_XEN_COMPAT <= 0x030002
 /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
    can temporarily clear it. */
 #define pmd_present(x) (pmd_val(x))
diff -r 5bda20f0723d -r f92a79e39da8 
linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h
--- a/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h        Thu Apr 12 
16:37:32 2007 -0500
+++ b/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h        Fri Apr 13 
11:14:26 2007 +0100
@@ -64,7 +64,6 @@ extern start_info_t *xen_start_info;
 
 void force_evtchn_callback(void);
 
-#ifndef CONFIG_VMX_GUEST
 /* Turn jiffies into Xen system time. XXX Implement me. */
 #define jiffies_to_st(j)       0
 
@@ -116,6 +115,7 @@ HYPERVISOR_poll(
        return rc;
 }
 
+#ifndef CONFIG_VMX_GUEST
 // for drivers/xen/privcmd/privcmd.c
 #define machine_to_phys_mapping 0
 struct vm_area_struct;
diff -r 5bda20f0723d -r f92a79e39da8 
linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h
--- a/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h        Thu Apr 12 
16:37:32 2007 -0500
+++ b/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h        Fri Apr 13 
11:14:26 2007 +0100
@@ -57,6 +57,7 @@
 #define XSI_PSR_IC             (XSI_BASE + XSI_PSR_IC_OFS)
 #define XSI_IPSR               (XSI_BASE + XSI_IPSR_OFS)
 #define XSI_IIP                        (XSI_BASE + XSI_IIP_OFS)
+#define XSI_B1NAT              (XSI_BASE + XSI_B1NATS_OFS)
 #define XSI_BANK1_R16          (XSI_BASE + XSI_BANK1_R16_OFS)
 #define XSI_BANKNUM            (XSI_BASE + XSI_BANKNUM_OFS)
 #define XSI_IHA                        (XSI_BASE + XSI_IHA_OFS)
diff -r 5bda20f0723d -r f92a79e39da8 
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h    Thu Apr 
12 16:37:32 2007 -0500
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h    Fri Apr 
13 11:14:26 2007 +0100
@@ -411,7 +411,7 @@ static inline int pmd_large(pmd_t pte) {
 #define pmd_offset(dir, address) ((pmd_t *) pud_page(*(dir)) + \
                                   pmd_index(address))
 #define pmd_none(x)    (!pmd_val(x))
-#ifdef CONFIG_XEN_COMPAT_030002
+#if CONFIG_XEN_COMPAT <= 0x030002
 /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
    can temporarily clear it. */
 #define pmd_present(x) (pmd_val(x))
diff -r 5bda20f0723d -r f92a79e39da8 tools/examples/network-bridge
--- a/tools/examples/network-bridge     Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/examples/network-bridge     Fri Apr 13 11:14:26 2007 +0100
@@ -183,12 +183,12 @@ op_start () {
        return
     fi
 
-    if ! link_exists "$vdev"; then
-        if link_exists "$pdev"; then
-            # The device is already up.
-            return
-        else
-            echo "
+    if link_exists "$pdev"; then
+       # The device is already up.
+       return
+    fi
+    if link_exists veth0 && ! link_exists "$vdev"; then
+       echo "
 Link $vdev is missing.
 This may be because you have reached the limit of the number of interfaces
 that the loopback driver supports.  If the loopback driver is a module, you
@@ -196,8 +196,7 @@ driver is compiled statically into the k
 driver is compiled statically into the kernel, then you may set the parameter
 using netloop.nloopbacks=<N> on the domain 0 kernel command line.
 " >&2
-            exit 1
-        fi
+       exit 1
     fi
 
     create_bridge ${bridge}
@@ -224,9 +223,13 @@ using netloop.nloopbacks=<N> on the doma
        add_to_bridge2 ${bridge} ${pdev}
        do_ifup ${netdev}
     else
+       ip link set ${bridge} arp on
+       ip link set ${bridge} multicast on
        # old style without ${vdev}
        transfer_addrs  ${netdev} ${bridge}
        transfer_routes ${netdev} ${bridge}
+       # Attach the real interface to the bridge.
+       add_to_bridge ${bridge} ${netdev}
     fi
 
     if [ ${antispoof} = 'yes' ] ; then
diff -r 5bda20f0723d -r f92a79e39da8 tools/examples/xmexample.hvm
--- a/tools/examples/xmexample.hvm      Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/examples/xmexample.hvm      Fri Apr 13 11:14:26 2007 +0100
@@ -170,6 +170,12 @@ serial='pty'
 
 
 #-----------------------------------------------------------------------------
+#   Qemu Monitor, default is disable
+#   Use ctrl-alt-2 to connect
+#monitor=1
+
+
+#-----------------------------------------------------------------------------
 #   enable sound card support, [sb16|es1370|all|..,..], default none
 #soundhw='sb16'
 
diff -r 5bda20f0723d -r f92a79e39da8 tools/examples/xmexample.vti
--- a/tools/examples/xmexample.vti      Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/examples/xmexample.vti      Fri Apr 13 11:14:26 2007 +0100
@@ -113,6 +113,11 @@ serial='pty'
 serial='pty'
 
 #-----------------------------------------------------------------------------
+#   Qemu Monitor, default is disable
+#   Use ctrl-alt-2 to connect
+#monitor=1
+
+#-----------------------------------------------------------------------------
 #   enable sound card support, [sb16|es1370|all|..,..], default none
 #soundhw='sb16'
 
diff -r 5bda20f0723d -r f92a79e39da8 tools/ioemu/hw/pc.c
--- a/tools/ioemu/hw/pc.c       Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/ioemu/hw/pc.c       Fri Apr 13 11:14:26 2007 +0100
@@ -902,7 +902,6 @@ static void pc_init1(uint64_t ram_size, 
     if (pci_enabled && acpi_enabled) {
         piix4_pm_init(pci_bus, piix3_devfn + 3);
     }
-#endif /* !CONFIG_DM */
 
 #if 0
     /* ??? Need to figure out some way for the user to
@@ -921,6 +920,17 @@ static void pc_init1(uint64_t ram_size, 
         lsi_scsi_attach(scsi, bdrv, -1);
     }
 #endif
+#else
+    if (pci_enabled) {
+        void *scsi;
+
+        scsi = lsi_scsi_init(pci_bus, -1);
+        for (i = 0; i < MAX_SCSI_DISKS ; i++) {
+            if (bs_table[i + MAX_DISKS]) 
+                lsi_scsi_attach(scsi, bs_table[i + MAX_DISKS], -1);
+        }
+    }
+#endif /* !CONFIG_DM */
     /* must be done after all PCI devices are instanciated */
     /* XXX: should be done in the Bochs BIOS */
     if (pci_enabled) {
diff -r 5bda20f0723d -r f92a79e39da8 tools/ioemu/monitor.c
--- a/tools/ioemu/monitor.c     Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/ioemu/monitor.c     Fri Apr 13 11:14:26 2007 +0100
@@ -180,7 +180,7 @@ static void do_commit(void)
 {
     int i;
 
-    for (i = 0; i < MAX_DISKS; i++) {
+    for (i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++) {
         if (bs_table[i]) {
             bdrv_commit(bs_table[i]);
         }
diff -r 5bda20f0723d -r f92a79e39da8 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c  Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/ioemu/vl.c  Fri Apr 13 11:14:26 2007 +0100
@@ -116,7 +116,7 @@ void *ioport_opaque[MAX_IOPORTS];
 void *ioport_opaque[MAX_IOPORTS];
 IOPortReadFunc *ioport_read_table[3][MAX_IOPORTS];
 IOPortWriteFunc *ioport_write_table[3][MAX_IOPORTS];
-BlockDriverState *bs_table[MAX_DISKS], *fd_table[MAX_FD];
+BlockDriverState *bs_table[MAX_DISKS + MAX_SCSI_DISKS], *fd_table[MAX_FD];
 int vga_ram_size;
 int bios_size;
 static DisplayState display_state;
@@ -1396,7 +1396,7 @@ static void stdio_received_byte(int ch)
         case 's': 
             {
                 int i;
-                for (i = 0; i < MAX_DISKS; i++) {
+                for (i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++) {
                     if (bs_table[i])
                         bdrv_commit(bs_table[i]);
                 }
@@ -6057,7 +6057,7 @@ int main(int argc, char **argv)
     int snapshot, linux_boot;
     const char *initrd_filename;
 #ifndef CONFIG_DM
-    const char *hd_filename[MAX_DISKS];
+    const char *hd_filename[MAX_DISKS + MAX_SCSI_DISKS];
 #endif /* !CONFIG_DM */
     const char *fd_filename[MAX_FD];
     const char *kernel_filename, *kernel_cmdline;
@@ -6126,7 +6126,7 @@ int main(int argc, char **argv)
     for(i = 0; i < MAX_FD; i++)
         fd_filename[i] = NULL;
 #ifndef CONFIG_DM
-    for(i = 0; i < MAX_DISKS; i++)
+    for(i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++)
         hd_filename[i] = NULL;
 #endif /* !CONFIG_DM */
     ram_size = DEFAULT_RAM_SIZE * 1024 * 1024;
@@ -6724,7 +6724,7 @@ int main(int argc, char **argv)
     }
 
     /* open the virtual block devices */
-    for(i = 0; i < MAX_DISKS; i++) {
+    for(i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++) {
         if (hd_filename[i]) {
             if (!bs_table[i]) {
                 char buf[64];
diff -r 5bda20f0723d -r f92a79e39da8 tools/ioemu/vl.h
--- a/tools/ioemu/vl.h  Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/ioemu/vl.h  Fri Apr 13 11:14:26 2007 +0100
@@ -818,8 +818,9 @@ int vnc_start_viewer(int port);
 
 /* ide.c */
 #define MAX_DISKS 4
-
-extern BlockDriverState *bs_table[MAX_DISKS];
+#define MAX_SCSI_DISKS 7
+
+extern BlockDriverState *bs_table[MAX_DISKS + MAX_SCSI_DISKS];
 
 void isa_ide_init(int iobase, int iobase2, int irq,
                   BlockDriverState *hd0, BlockDriverState *hd1);
diff -r 5bda20f0723d -r f92a79e39da8 tools/ioemu/xenstore.c
--- a/tools/ioemu/xenstore.c    Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/ioemu/xenstore.c    Fri Apr 13 11:14:26 2007 +0100
@@ -18,7 +18,7 @@
 #include <fcntl.h>
 
 static struct xs_handle *xsh = NULL;
-static char *media_filename[MAX_DISKS];
+static char *media_filename[MAX_DISKS + MAX_SCSI_DISKS];
 static QEMUTimer *insert_timer = NULL;
 
 #define UWAIT_MAX (30*1000000) /* thirty seconds */
@@ -30,11 +30,11 @@ static int pasprintf(char **buf, const c
     int ret = 0;
 
     if (*buf)
-       free(*buf);
+        free(*buf);
     va_start(ap, fmt);
     if (vasprintf(buf, fmt, ap) == -1) {
-       buf = NULL;
-       ret = -1;
+        buf = NULL;
+        ret = -1;
     }
     va_end(ap);
     return ret;
@@ -44,12 +44,12 @@ static void insert_media(void *opaque)
 {
     int i;
 
-    for (i = 0; i < MAX_DISKS; i++) {
-       if (media_filename[i] && bs_table[i]) {
-           do_change(bs_table[i]->device_name, media_filename[i]);
-           free(media_filename[i]);
-           media_filename[i] = NULL;
-       }
+    for (i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++) {
+        if (media_filename[i] && bs_table[i]) {
+            do_change(bs_table[i]->device_name, media_filename[i]);
+            free(media_filename[i]);
+            media_filename[i] = NULL;
+        }
     }
 }
 
@@ -57,7 +57,7 @@ void xenstore_check_new_media_present(in
 {
 
     if (insert_timer == NULL)
-       insert_timer = qemu_new_timer(rt_clock, insert_media, NULL);
+        insert_timer = qemu_new_timer(rt_clock, insert_media, NULL);
     qemu_mod_timer(insert_timer, qemu_get_clock(rt_clock) + timeout);
 }
 
@@ -82,17 +82,17 @@ void xenstore_parse_domain_config(int do
     char **e = NULL;
     char *buf = NULL, *path;
     char *fpath = NULL, *bpath = NULL,
-         *dev = NULL, *params = NULL, *type = NULL;
-    int i;
+        *dev = NULL, *params = NULL, *type = NULL;
+    int i, is_scsi;
     unsigned int len, num, hd_index;
 
-    for(i = 0; i < MAX_DISKS; i++)
+    for(i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++)
         media_filename[i] = NULL;
 
     xsh = xs_daemon_open();
     if (xsh == NULL) {
-       fprintf(logfile, "Could not contact xenstore for domain config\n");
-       return;
+        fprintf(logfile, "Could not contact xenstore for domain config\n");
+        return;
     }
 
     path = xs_get_domain_path(xsh, domid);
@@ -102,59 +102,60 @@ void xenstore_parse_domain_config(int do
     }
 
     if (pasprintf(&buf, "%s/device/vbd", path) == -1)
-       goto out;
+        goto out;
 
     e = xs_directory(xsh, XBT_NULL, buf, &num);
     if (e == NULL)
-       goto out;
+        goto out;
 
     for (i = 0; i < num; i++) {
-       /* read the backend path */
-       if (pasprintf(&buf, "%s/device/vbd/%s/backend", path, e[i]) == -1)
-           continue;
-       free(bpath);
+        /* read the backend path */
+        if (pasprintf(&buf, "%s/device/vbd/%s/backend", path, e[i]) == -1)
+            continue;
+        free(bpath);
         bpath = xs_read(xsh, XBT_NULL, buf, &len);
-       if (bpath == NULL)
-           continue;
-       /* read the name of the device */
-       if (pasprintf(&buf, "%s/dev", bpath) == -1)
-           continue;
-       free(dev);
-       dev = xs_read(xsh, XBT_NULL, buf, &len);
-       if (dev == NULL)
-           continue;
-       if (strncmp(dev, "hd", 2) || strlen(dev) != 3)
-           continue;
-       hd_index = dev[2] - 'a';
-       if (hd_index >= MAX_DISKS)
-           continue;
-       /* read the type of the device */
-       if (pasprintf(&buf, "%s/device/vbd/%s/device-type", path, e[i]) == -1)
-           continue;
-       free(type);
-       type = xs_read(xsh, XBT_NULL, buf, &len);
-       if (pasprintf(&buf, "%s/params", bpath) == -1)
-           continue;
-       free(params);
-       params = xs_read(xsh, XBT_NULL, buf, &len);
-       if (params == NULL)
-           continue;
+        if (bpath == NULL)
+            continue;
+        /* read the name of the device */
+        if (pasprintf(&buf, "%s/dev", bpath) == -1)
+            continue;
+        free(dev);
+        dev = xs_read(xsh, XBT_NULL, buf, &len);
+        if (dev == NULL)
+            continue;
+        is_scsi = !strncmp(dev, "sd", 2);
+        if ((strncmp(dev, "hd", 2) && !is_scsi) || strlen(dev) != 3 )
+            continue;
+        hd_index = dev[2] - 'a';
+        if (hd_index >= (is_scsi ? MAX_SCSI_DISKS : MAX_DISKS))
+            continue;
+        /* read the type of the device */
+        if (pasprintf(&buf, "%s/device/vbd/%s/device-type", path, e[i]) == -1)
+            continue;
+        free(type);
+        type = xs_read(xsh, XBT_NULL, buf, &len);
+        if (pasprintf(&buf, "%s/params", bpath) == -1)
+            continue;
+        free(params);
+        params = xs_read(xsh, XBT_NULL, buf, &len);
+        if (params == NULL)
+            continue;
         /* 
          * check if device has a phantom vbd; the phantom is hooked
          * to the frontend device (for ease of cleanup), so lookup 
          * the frontend device, and see if there is a phantom_vbd
          * if there is, we will use resolution as the filename
          */
-       if (pasprintf(&buf, "%s/device/vbd/%s/phantom_vbd", path, e[i]) == -1)
-           continue;
-       free(fpath);
+        if (pasprintf(&buf, "%s/device/vbd/%s/phantom_vbd", path, e[i]) == -1)
+            continue;
+        free(fpath);
         fpath = xs_read(xsh, XBT_NULL, buf, &len);
-       if (fpath) {
-           if (pasprintf(&buf, "%s/dev", fpath) == -1)
-               continue;
-           free(params);
+        if (fpath) {
+            if (pasprintf(&buf, "%s/dev", fpath) == -1)
+                continue;
+            free(params);
             params = xs_read(xsh, XBT_NULL, buf , &len);
-           if (params) {
+            if (params) {
                 /* 
                  * wait for device, on timeout silently fail because we will 
                  * fail to open below
@@ -163,19 +164,20 @@ void xenstore_parse_domain_config(int do
             }
         }
 
-       bs_table[hd_index] = bdrv_new(dev);
-       /* check if it is a cdrom */
-       if (type && !strcmp(type, "cdrom")) {
-           bdrv_set_type_hint(bs_table[hd_index], BDRV_TYPE_CDROM);
-           if (pasprintf(&buf, "%s/params", bpath) != -1)
-               xs_watch(xsh, buf, dev);
-       }
-       /* open device now if media present */
-       if (params[0]) {
-            if (bdrv_open(bs_table[hd_index], params, 0 /* snapshot */) < 0)
+        bs_table[hd_index + (is_scsi ? MAX_DISKS : 0)] = bdrv_new(dev);
+        /* check if it is a cdrom */
+        if (type && !strcmp(type, "cdrom")) {
+            bdrv_set_type_hint(bs_table[hd_index], BDRV_TYPE_CDROM);
+            if (pasprintf(&buf, "%s/params", bpath) != -1)
+                xs_watch(xsh, buf, dev);
+        }
+        /* open device now if media present */
+        if (params[0]) {
+            if (bdrv_open(bs_table[hd_index + (is_scsi ? MAX_DISKS : 0)],
+                          params, 0 /* snapshot */) < 0)
                 fprintf(stderr, "qemu: could not open hard disk image '%s'\n",
                         params);
-       }
+        }
     }
 
     /* Set a watch for log-dirty requests from the migration tools */
@@ -199,7 +201,7 @@ int xenstore_fd(void)
 int xenstore_fd(void)
 {
     if (xsh)
-       return xs_fileno(xsh);
+        return xs_fileno(xsh);
     return -1;
 }
 
@@ -316,7 +318,7 @@ void xenstore_process_event(void *opaque
 
     vec = xs_read_watch(xsh, &num);
     if (!vec)
-       return;
+        return;
 
     if (!strcmp(vec[XS_WATCH_TOKEN], "logdirty")) {
         xenstore_process_logdirty_event();
@@ -324,23 +326,23 @@ void xenstore_process_event(void *opaque
     }
 
     if (strncmp(vec[XS_WATCH_TOKEN], "hd", 2) ||
-       strlen(vec[XS_WATCH_TOKEN]) != 3)
-       goto out;
+        strlen(vec[XS_WATCH_TOKEN]) != 3)
+        goto out;
     hd_index = vec[XS_WATCH_TOKEN][2] - 'a';
     image = xs_read(xsh, XBT_NULL, vec[XS_WATCH_PATH], &len);
     if (image == NULL || !strcmp(image, bs_table[hd_index]->filename))
-       goto out;               /* gone or identical */
+        goto out;  /* gone or identical */
 
     do_eject(0, vec[XS_WATCH_TOKEN]);
     bs_table[hd_index]->filename[0] = 0;
     if (media_filename[hd_index]) {
-       free(media_filename[hd_index]);
-       media_filename[hd_index] = NULL;
+        free(media_filename[hd_index]);
+        media_filename[hd_index] = NULL;
     }
 
     if (image[0]) {
-       media_filename[hd_index] = strdup(image);
-       xenstore_check_new_media_present(5000);
+        media_filename[hd_index] = strdup(image);
+        xenstore_check_new_media_present(5000);
     }
 
  out:
@@ -354,7 +356,7 @@ void xenstore_write_vncport(int display)
     char *portstr = NULL;
 
     if (xsh == NULL)
-       return;
+        return;
 
     path = xs_get_domain_path(xsh, domid);
     if (path == NULL) {
@@ -363,10 +365,10 @@ void xenstore_write_vncport(int display)
     }
 
     if (pasprintf(&buf, "%s/console/vnc-port", path) == -1)
-       goto out;
+        goto out;
 
     if (pasprintf(&portstr, "%d", 5900 + display) == -1)
-       goto out;
+        goto out;
 
     if (xs_write(xsh, XBT_NULL, buf, portstr, strlen(portstr)) == 0)
         fprintf(logfile, "xs_write() vncport failed\n");
@@ -383,41 +385,41 @@ int xenstore_read_vncpasswd(int domid)
     unsigned int i, len, rc = 0;
 
     if (xsh == NULL) {
-       return -1;
+        return -1;
     }
 
     path = xs_get_domain_path(xsh, domid);
     if (path == NULL) {
-       fprintf(logfile, "xs_get_domain_path() error. domid %d.\n", domid);
-       return -1;
+        fprintf(logfile, "xs_get_domain_path() error. domid %d.\n", domid);
+        return -1;
     }
 
     pasprintf(&buf, "%s/vm", path);
     uuid = xs_read(xsh, XBT_NULL, buf, &len);
     if (uuid == NULL) {
-       fprintf(logfile, "xs_read(): uuid get error. %s.\n", buf);
-       free(path);
-       return -1;
+        fprintf(logfile, "xs_read(): uuid get error. %s.\n", buf);
+        free(path);
+        return -1;
     }
 
     pasprintf(&buf, "%s/vncpasswd", uuid);
     passwd = xs_read(xsh, XBT_NULL, buf, &len);
     if (passwd == NULL) {
-       fprintf(logfile, "xs_read(): vncpasswd get error. %s.\n", buf);
-       free(uuid);
-       free(path);
-       return rc;
+        fprintf(logfile, "xs_read(): vncpasswd get error. %s.\n", buf);
+        free(uuid);
+        free(path);
+        return rc;
     }
 
     for (i=0; i<len && i<63; i++) {
-       vncpasswd[i] = passwd[i];
-       passwd[i] = '\0';
+        vncpasswd[i] = passwd[i];
+        passwd[i] = '\0';
     }
     vncpasswd[len] = '\0';
     pasprintf(&buf, "%s/vncpasswd", uuid);
     if (xs_write(xsh, XBT_NULL, buf, passwd, len) == 0) {
-       fprintf(logfile, "xs_write() vncpasswd failed.\n");
-       rc = -1;
+        fprintf(logfile, "xs_write() vncpasswd failed.\n");
+        rc = -1;
     }
 
     free(passwd);
@@ -443,7 +445,7 @@ char **xenstore_domain_get_devices(struc
         goto out;
 
     if (pasprintf(&buf, "%s/device/%s", path,devtype) == -1)
-       goto out;
+        goto out;
 
     e = xs_directory(handle, XBT_NULL, buf, num);
 
@@ -496,13 +498,13 @@ char *xenstore_backend_read_variable(str
 
     buf = get_device_variable_path(devtype, inst, var);
     if (NULL == buf)
-       goto out;
+        goto out;
 
     value = xs_read(handle, XBT_NULL, buf, &len);
 
     free(buf);
 
-out:
+ out:
     return value;
 }
 
@@ -569,27 +571,27 @@ char *xenstore_vm_read(int domid, char *
     char *buf = NULL, *path = NULL, *value = NULL;
 
     if (xsh == NULL)
-       goto out;
+        goto out;
 
     path = xs_get_domain_path(xsh, domid);
     if (path == NULL) {
-       fprintf(logfile, "xs_get_domain_path(%d): error\n", domid);
-       goto out;
+        fprintf(logfile, "xs_get_domain_path(%d): error\n", domid);
+        goto out;
     }
 
     pasprintf(&buf, "%s/vm", path);
     free(path);
     path = xs_read(xsh, XBT_NULL, buf, NULL);
     if (path == NULL) {
-       fprintf(logfile, "xs_read(%s): read error\n", buf);
-       goto out;
+        fprintf(logfile, "xs_read(%s): read error\n", buf);
+        goto out;
     }
 
     pasprintf(&buf, "%s/%s", path, key);
     value = xs_read(xsh, XBT_NULL, buf, len);
     if (value == NULL) {
-       fprintf(logfile, "xs_read(%s): read error\n", buf);
-       goto out;
+        fprintf(logfile, "xs_read(%s): read error\n", buf);
+        goto out;
     }
 
  out:
@@ -604,27 +606,27 @@ int xenstore_vm_write(int domid, char *k
     int rc = -1;
 
     if (xsh == NULL)
-       goto out;
+        goto out;
 
     path = xs_get_domain_path(xsh, domid);
     if (path == NULL) {
-       fprintf(logfile, "xs_get_domain_path: error\n");
-       goto out;
+        fprintf(logfile, "xs_get_domain_path: error\n");
+        goto out;
     }
 
     pasprintf(&buf, "%s/vm", path);
     free(path);
     path = xs_read(xsh, XBT_NULL, buf, NULL);
     if (path == NULL) {
-       fprintf(logfile, "xs_read(%s): read error\n", buf);
-       goto out;
+        fprintf(logfile, "xs_read(%s): read error\n", buf);
+        goto out;
     }
 
     pasprintf(&buf, "%s/%s", path, key);
     rc = xs_write(xsh, XBT_NULL, buf, value, strlen(value));
     if (rc) {
-       fprintf(logfile, "xs_write(%s, %s): write error\n", buf, key);
-       goto out;
+        fprintf(logfile, "xs_write(%s, %s): write error\n", buf, key);
+        goto out;
     }
 
  out:
diff -r 5bda20f0723d -r f92a79e39da8 tools/libfsimage/fat/fat.h
--- a/tools/libfsimage/fat/fat.h        Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libfsimage/fat/fat.h        Fri Apr 13 11:14:26 2007 +0100
@@ -84,17 +84,17 @@ struct fat_bpb {
 #define FAT_DIRENTRY_LENGTH       32
 
 #define FAT_DIRENTRY_ATTRIB(entry) \
-  (*((unsigned char *) (entry+11)))
+  (*((__u8 *) (entry+11)))
 #define FAT_DIRENTRY_VALID(entry) \
-  ( ((*((unsigned char *) entry)) != 0) \
-    && ((*((unsigned char *) entry)) != 0xE5) \
+  ( ((*((__u8 *) entry)) != 0) \
+    && ((*((__u8 *) entry)) != 0xE5) \
     && !(FAT_DIRENTRY_ATTRIB(entry) & FAT_ATTRIB_NOT_OK_MASK) )
 #define FAT_DIRENTRY_FIRST_CLUSTER(entry) \
-  ((*((unsigned short *) (entry+26)))+(*((unsigned short *) (entry+20)) << 16))
+  ((*((__u16 *) (entry+26)))+(*((__u16 *) (entry+20)) << 16))
 #define FAT_DIRENTRY_FILELENGTH(entry) \
-  (*((unsigned long *) (entry+28)))
+  (*((__u32 *) (entry+28)))
 
 #define FAT_LONGDIR_ID(entry) \
-  (*((unsigned char *) (entry)))
+  (*((__u8 *) (entry)))
 #define FAT_LONGDIR_ALIASCHECKSUM(entry) \
-  (*((unsigned char *) (entry+13)))
+  (*((__u8 *) (entry+13)))
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/Makefile
--- a/tools/libxc/Makefile      Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libxc/Makefile      Fri Apr 13 11:14:26 2007 +0100
@@ -26,8 +26,8 @@ CTRL_SRCS-$(CONFIG_X86_Linux) += xc_ptra
 
 GUEST_SRCS-y :=
 GUEST_SRCS-y += xg_private.c
-GUEST_SRCS-$(CONFIG_MIGRATE) += xc_domain_restore.c xc_linux_save.c
-GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c xc_hvm_save.c
+GUEST_SRCS-$(CONFIG_MIGRATE) += xc_domain_restore.c xc_domain_save.c
+GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c
 
 # symlink libelf from xen/common/libelf/
 LIBELF_SRCS := libelf-tools.c libelf-loader.c
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/ia64/xc_ia64_linux_save.c
--- a/tools/libxc/ia64/xc_ia64_linux_save.c     Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libxc/ia64/xc_ia64_linux_save.c     Fri Apr 13 11:14:26 2007 +0100
@@ -134,8 +134,10 @@ retry:
 }
 
 int
-xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
-              uint32_t max_factor, uint32_t flags, int (*suspend)(int))
+xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
+               uint32_t max_factor, uint32_t flags, int (*suspend)(int),
+               int hvm, void *(*init_qemu_maps)(int, unsigned),
+               void (*qemu_flip_buffer)(int, int))
 {
     DECLARE_DOMCTL;
     xc_dominfo_t info;
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c   Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libxc/xc_domain.c   Fri Apr 13 11:14:26 2007 +0100
@@ -8,6 +8,7 @@
 
 #include "xc_private.h"
 #include <xen/memory.h>
+#include <xen/hvm/hvm_op.h>
 
 int xc_domain_create(int xc_handle,
                      uint32_t ssidref,
@@ -655,6 +656,44 @@ int xc_domain_send_trigger(int xc_handle
     domctl.u.sendtrigger.vcpu = vcpu;
 
     return do_domctl(xc_handle, &domctl);
+}
+
+int xc_set_hvm_param(int handle, domid_t dom, int param, unsigned long value)
+{
+    DECLARE_HYPERCALL;
+    xen_hvm_param_t arg;
+    int rc;
+
+    hypercall.op     = __HYPERVISOR_hvm_op;
+    hypercall.arg[0] = HVMOP_set_param;
+    hypercall.arg[1] = (unsigned long)&arg;
+    arg.domid = dom;
+    arg.index = param;
+    arg.value = value;
+    if ( lock_pages(&arg, sizeof(arg)) != 0 )
+        return -1;
+    rc = do_xen_hypercall(handle, &hypercall);
+    unlock_pages(&arg, sizeof(arg));
+    return rc;
+}
+
+int xc_get_hvm_param(int handle, domid_t dom, int param, unsigned long *value)
+{
+    DECLARE_HYPERCALL;
+    xen_hvm_param_t arg;
+    int rc;
+
+    hypercall.op     = __HYPERVISOR_hvm_op;
+    hypercall.arg[0] = HVMOP_get_param;
+    hypercall.arg[1] = (unsigned long)&arg;
+    arg.domid = dom;
+    arg.index = param;
+    if ( lock_pages(&arg, sizeof(arg)) != 0 )
+        return -1;
+    rc = do_xen_hypercall(handle, &hypercall);
+    unlock_pages(&arg, sizeof(arg));
+    *value = arg.value;
+    return rc;
 }
 
 /*
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/xc_domain_restore.c
--- a/tools/libxc/xc_domain_restore.c   Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libxc/xc_domain_restore.c   Fri Apr 13 11:14:26 2007 +0100
@@ -688,33 +688,22 @@ int xc_domain_restore(int xc_handle, int
             ERROR("error zeroing magic pages");
             goto out;
         }
-        
-        xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, magic_pfns[0]);
-        xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, 
magic_pfns[1]);
-        xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, magic_pfns[2]);
-        xc_set_hvm_param(xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae);
-        xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn);
+                
+        if ( (rc = xc_set_hvm_param(xc_handle, dom, 
+                                    HVM_PARAM_IOREQ_PFN, magic_pfns[0]))
+             || (rc = xc_set_hvm_param(xc_handle, dom, 
+                                       HVM_PARAM_BUFIOREQ_PFN, magic_pfns[1]))
+             || (rc = xc_set_hvm_param(xc_handle, dom, 
+                                       HVM_PARAM_STORE_PFN, magic_pfns[2]))
+             || (rc = xc_set_hvm_param(xc_handle, dom, 
+                                       HVM_PARAM_PAE_ENABLED, pae))
+             || (rc = xc_set_hvm_param(xc_handle, dom, 
+                                       HVM_PARAM_STORE_EVTCHN, store_evtchn)) )
+        {
+            ERROR("error setting HVM params: %i", rc);
+            goto out;
+        }
         *store_mfn = magic_pfns[2];
-
-        /* Read vcpu contexts */
-        for ( i = 0; i <= max_vcpu_id; i++ )
-        {
-            if ( !(vcpumap & (1ULL << i)) )
-                continue;
-
-            if ( !read_exact(io_fd, &(ctxt), sizeof(ctxt)) )
-            {
-                ERROR("error read vcpu context.\n");
-                goto out;
-            }
-            
-            if ( (rc = xc_vcpu_setcontext(xc_handle, dom, i, &ctxt)) )
-            {
-                ERROR("Could not set vcpu context, rc=%d", rc);
-                goto out;
-            }
-            rc = 1;
-        }
 
         /* Read HVM context */
         if ( !read_exact(io_fd, &rec_len, sizeof(uint32_t)) )
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/xc_domain_save.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_domain_save.c      Fri Apr 13 11:14:26 2007 +0100
@@ -0,0 +1,1587 @@
+/******************************************************************************
+ * xc_linux_save.c
+ *
+ * Save the state of a running Linux session.
+ *
+ * Copyright (c) 2003, K A Fraser.
+ */
+
+#include <inttypes.h>
+#include <time.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/time.h>
+
+#include "xc_private.h"
+#include "xc_dom.h"
+#include "xg_private.h"
+#include "xg_save_restore.h"
+
+#include <xen/hvm/params.h>
+#include <xen/hvm/e820.h>
+
+/*
+** Default values for important tuning parameters. Can override by passing
+** non-zero replacement values to xc_domain_save().
+**
+** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too.
+**
+*/
+#define DEF_MAX_ITERS   29   /* limit us to 30 times round loop   */
+#define DEF_MAX_FACTOR   3   /* never send more than 3x p2m_size  */
+
+/* max mfn of the whole machine */
+static unsigned long max_mfn;
+
+/* virtual starting address of the hypervisor */
+static unsigned long hvirt_start;
+
+/* #levels of page tables used by the current guest */
+static unsigned int pt_levels;
+
+/* HVM: shared-memory bitmaps for getting log-dirty bits from qemu-dm */
+static unsigned long *qemu_bitmaps[2];
+static int qemu_active;
+static int qemu_non_active;
+
+/* number of pfns this guest has (i.e. number of entries in the P2M) */
+static unsigned long p2m_size;
+
+/* Live mapping of the table mapping each PFN to its current MFN. */
+static xen_pfn_t *live_p2m = NULL;
+
+/* Live mapping of system MFN to PFN table. */
+static xen_pfn_t *live_m2p = NULL;
+static unsigned long m2p_mfn0;
+
+/* grep fodder: machine_to_phys */
+
+#define mfn_to_pfn(_mfn) live_m2p[(_mfn)]
+
+/*
+ * Returns TRUE if the given machine frame number has a unique mapping
+ * in the guest's pseudophysical map.
+ */
+#define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn)          \
+    (((_mfn) < (max_mfn)) &&                    \
+     ((mfn_to_pfn(_mfn) < (p2m_size)) &&        \
+      (live_p2m[mfn_to_pfn(_mfn)] == (_mfn))))
+
+/* Returns TRUE if MFN is successfully converted to a PFN. */
+#define translate_mfn_to_pfn(_pmfn)                             \
+({                                                              \
+    unsigned long mfn = *(_pmfn);                               \
+    int _res = 1;                                               \
+    if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )                       \
+        _res = 0;                                               \
+    else                                                        \
+        *(_pmfn) = mfn_to_pfn(mfn);                             \
+    _res;                                                       \
+})
+
+/*
+** During (live) save/migrate, we maintain a number of bitmaps to track
+** which pages we have to send, to fixup, and to skip.
+*/
+
+#define BITS_PER_LONG (sizeof(unsigned long) * 8)
+#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
+#define BITMAP_SIZE   (BITS_TO_LONGS(p2m_size) * sizeof(unsigned long))
+
+#define BITMAP_ENTRY(_nr,_bmap) \
+   ((volatile unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG]
+
+#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG)
+
+static inline int test_bit (int nr, volatile void * addr)
+{
+    return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1;
+}
+
+static inline void clear_bit (int nr, volatile void * addr)
+{
+    BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr));
+}
+
+static inline void set_bit ( int nr, volatile void * addr)
+{
+    BITMAP_ENTRY(nr, addr) |= (1UL << BITMAP_SHIFT(nr));
+}
+
+/* Returns the hamming weight (i.e. the number of bits set) in a N-bit word */
+static inline unsigned int hweight32(unsigned int w)
+{
+    unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
+    res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
+    res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
+    res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
+    return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
+}
+
+static inline int count_bits ( int nr, volatile void *addr)
+{
+    int i, count = 0;
+    volatile unsigned long *p = (volatile unsigned long *)addr;
+    /* We know that the array is padded to unsigned long. */
+    for ( i = 0; i < (nr / (sizeof(unsigned long)*8)); i++, p++ )
+        count += hweight32(*p);
+    return count;
+}
+
+static inline int permute( int i, int nr, int order_nr  )
+{
+    /* Need a simple permutation function so that we scan pages in a
+       pseudo random order, enabling us to get a better estimate of
+       the domain's page dirtying rate as we go (there are often
+       contiguous ranges of pfns that have similar behaviour, and we
+       want to mix them up. */
+
+    /* e.g. nr->oder 15->4 16->4 17->5 */
+    /* 512MB domain, 128k pages, order 17 */
+
+    /*
+      QPONMLKJIHGFEDCBA
+             QPONMLKJIH
+      GFEDCBA
+     */
+
+    /*
+      QPONMLKJIHGFEDCBA
+                  EDCBA
+             QPONM
+      LKJIHGF
+      */
+
+    do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); }
+    while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */
+
+    return i;
+}
+
+static uint64_t tv_to_us(struct timeval *new)
+{
+    return (new->tv_sec * 1000000) + new->tv_usec;
+}
+
+static uint64_t llgettimeofday(void)
+{
+    struct timeval now;
+    gettimeofday(&now, NULL);
+    return tv_to_us(&now);
+}
+
+static uint64_t tv_delta(struct timeval *new, struct timeval *old)
+{
+    return (((new->tv_sec - old->tv_sec)*1000000) +
+            (new->tv_usec - old->tv_usec));
+}
+
+static int noncached_write(int fd, int live, void *buffer, int len) 
+{
+    static int write_count = 0;
+
+    int rc = write(fd,buffer,len);
+
+    write_count += len;
+    if ( write_count >= (MAX_PAGECACHE_USAGE * PAGE_SIZE) )
+    {
+        /* Time to discard cache - dont care if this fails */
+        discard_file_cache(fd, 0 /* no flush */);
+        write_count = 0;
+    }
+
+    return rc;
+}
+
+#ifdef ADAPTIVE_SAVE
+
+/*
+** We control the rate at which we transmit (or save) to minimize impact
+** on running domains (including the target if we're doing live migrate).
+*/
+
+#define MAX_MBIT_RATE    500      /* maximum transmit rate for migrate */
+#define START_MBIT_RATE  100      /* initial transmit rate for migrate */
+
+/* Scaling factor to convert between a rate (in Mb/s) and time (in usecs) */
+#define RATE_TO_BTU      781250
+
+/* Amount in bytes we allow ourselves to send in a burst */
+#define BURST_BUDGET (100*1024)
+
+/* We keep track of the current and previous transmission rate */
+static int mbit_rate, ombit_rate = 0;
+
+/* Have we reached the maximum transmission rate? */
+#define RATE_IS_MAX() (mbit_rate == MAX_MBIT_RATE)
+
+static inline void initialize_mbit_rate()
+{
+    mbit_rate = START_MBIT_RATE;
+}
+
+static int ratewrite(int io_fd, int live, void *buf, int n)
+{
+    static int budget = 0;
+    static int burst_time_us = -1;
+    static struct timeval last_put = { 0 };
+    struct timeval now;
+    struct timespec delay;
+    long long delta;
+
+    if ( START_MBIT_RATE == 0 )
+        return noncached_write(io_fd, live, buf, n);
+
+    budget -= n;
+    if ( budget < 0 )
+    {
+        if ( mbit_rate != ombit_rate )
+        {
+            burst_time_us = RATE_TO_BTU / mbit_rate;
+            ombit_rate = mbit_rate;
+            DPRINTF("rate limit: %d mbit/s burst budget %d slot time %d\n",
+                    mbit_rate, BURST_BUDGET, burst_time_us);
+        }
+        if ( last_put.tv_sec == 0 )
+        {
+            budget += BURST_BUDGET;
+            gettimeofday(&last_put, NULL);
+        }
+        else
+        {
+            while ( budget < 0 )
+            {
+                gettimeofday(&now, NULL);
+                delta = tv_delta(&now, &last_put);
+                while ( delta > burst_time_us )
+                {
+                    budget += BURST_BUDGET;
+                    last_put.tv_usec += burst_time_us;
+                    if ( last_put.tv_usec > 1000000 
+                    {
+                        last_put.tv_usec -= 1000000;
+                        last_put.tv_sec++;
+                    }
+                    delta -= burst_time_us;
+                }
+                if ( budget > 0 )
+                    break;
+                delay.tv_sec = 0;
+                delay.tv_nsec = 1000 * (burst_time_us - delta);
+                while ( delay.tv_nsec > 0 )
+                    if ( nanosleep(&delay, &delay) == 0 )
+                        break;
+            }
+        }
+    }
+    return noncached_write(io_fd, live, buf, n);
+}
+
+#else /* ! ADAPTIVE SAVE */
+
+#define RATE_IS_MAX() (0)
+#define ratewrite(_io_fd, _live, _buf, _n) noncached_write((_io_fd), (_live), 
(_buf), (_n))
+#define initialize_mbit_rate()
+
+#endif
+
+static inline ssize_t write_exact(int fd, void *buf, size_t count)
+{
+    return (write(fd, buf, count) == count);
+}
+
+static int print_stats(int xc_handle, uint32_t domid, int pages_sent,
+                       xc_shadow_op_stats_t *stats, int print)
+{
+    static struct timeval wall_last;
+    static long long      d0_cpu_last;
+    static long long      d1_cpu_last;
+
+    struct timeval        wall_now;
+    long long             wall_delta;
+    long long             d0_cpu_now, d0_cpu_delta;
+    long long             d1_cpu_now, d1_cpu_delta;
+
+    gettimeofday(&wall_now, NULL);
+
+    d0_cpu_now = xc_domain_get_cpu_usage(xc_handle, 0, /* FIXME */ 0)/1000;
+    d1_cpu_now = xc_domain_get_cpu_usage(xc_handle, domid, /* FIXME */ 0)/1000;
+
+    if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) )
+        DPRINTF("ARRHHH!!\n");
+
+    wall_delta = tv_delta(&wall_now,&wall_last)/1000;
+    if ( wall_delta == 0 )
+        wall_delta = 1;
+
+    d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000;
+    d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000;
+
+    if ( print )
+        DPRINTF("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, "
+                "dirtied %dMb/s %" PRId32 " pages\n",
+                wall_delta,
+                (int)((d0_cpu_delta*100)/wall_delta),
+                (int)((d1_cpu_delta*100)/wall_delta),
+                (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))),
+                (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))),
+                stats->dirty_count);
+
+#ifdef ADAPTIVE_SAVE
+    if ( ((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))) > mbit_rate )
+    {
+        mbit_rate = (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8)))
+            + 50;
+        if ( mbit_rate > MAX_MBIT_RATE )
+            mbit_rate = MAX_MBIT_RATE;
+    }
+#endif
+
+    d0_cpu_last = d0_cpu_now;
+    d1_cpu_last = d1_cpu_now;
+    wall_last   = wall_now;
+
+    return 0;
+}
+
+
+static int analysis_phase(int xc_handle, uint32_t domid, int p2m_size,
+                          unsigned long *arr, int runs)
+{
+    long long start, now;
+    xc_shadow_op_stats_t stats;
+    int j;
+
+    start = llgettimeofday();
+
+    for ( j = 0; j < runs; j++ )
+    {
+        int i;
+
+        xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
+                          arr, p2m_size, NULL, 0, NULL);
+        DPRINTF("#Flush\n");
+        for ( i = 0; i < 40; i++ )
+        {
+            usleep(50000);
+            now = llgettimeofday();
+            xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_PEEK,
+                              NULL, 0, NULL, 0, &stats);
+            DPRINTF("now= %lld faults= %"PRId32" dirty= %"PRId32"\n",
+                    ((now-start)+500)/1000,
+                    stats.fault_count, stats.dirty_count);
+        }
+    }
+
+    return -1;
+}
+
+
+static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
+                             int dom, xc_dominfo_t *info)
+{
+    int i = 0;
+
+    if ( !(*suspend)(dom) )
+    {
+        ERROR("Suspend request failed");
+        return -1;
+    }
+
+ retry:
+
+    if ( xc_domain_getinfo(xc_handle, dom, 1, info) != 1 )
+    {
+        ERROR("Could not get domain info");
+        return -1;
+    }
+
+    if ( info->dying )
+    {
+        ERROR("domain is dying");
+        return -1;
+    }
+
+    if ( info->crashed )
+    {
+        ERROR("domain has crashed");
+        return -1;
+    }
+
+    if ( info->shutdown )
+    {
+        switch ( info->shutdown_reason )
+        {
+        case SHUTDOWN_poweroff:
+        case SHUTDOWN_reboot:
+            ERROR("domain has shut down");
+            return -1;
+        case SHUTDOWN_suspend:
+            return 0;
+        case SHUTDOWN_crash:
+            ERROR("domain has crashed");
+            return -1;
+        }
+    }
+
+    if ( info->paused )
+    {
+        /* Try unpausing domain, wait, and retest. */
+        xc_domain_unpause( xc_handle, dom );
+        ERROR("Domain was paused. Wait and re-test.");
+        usleep(10000); /* 10ms */
+        goto retry;
+    }
+
+    if ( ++i < 100 )
+    {
+        ERROR("Retry suspend domain");
+        usleep(10000); /* 10ms */
+        goto retry;
+    }
+
+    ERROR("Unable to suspend domain.");
+
+    return -1;
+}
+
+/*
+** Map the top-level page of MFNs from the guest. The guest might not have
+** finished resuming from a previous restore operation, so we wait a while for
+** it to update the MFN to a reasonable value.
+*/
+static void *map_frame_list_list(int xc_handle, uint32_t dom,
+                                 shared_info_t *shinfo)
+{
+    int count = 100;
+    void *p;
+
+    while ( count-- && (shinfo->arch.pfn_to_mfn_frame_list_list == 0) )
+        usleep(10000);
+
+    if ( shinfo->arch.pfn_to_mfn_frame_list_list == 0 )
+    {
+        ERROR("Timed out waiting for frame list updated.");
+        return NULL;
+    }
+
+    p = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ,
+                             shinfo->arch.pfn_to_mfn_frame_list_list);
+    if ( p == NULL )
+        ERROR("Couldn't map p2m_frame_list_list (errno %d)", errno);
+
+    return p;
+}
+
+/*
+** During transfer (or in the state file), all page-table pages must be
+** converted into a 'canonical' form where references to actual mfns
+** are replaced with references to the corresponding pfns.
+**
+** This function performs the appropriate conversion, taking into account
+** which entries do not require canonicalization (in particular, those
+** entries which map the virtual address reserved for the hypervisor).
+*/
+static int canonicalize_pagetable(unsigned long type, unsigned long pfn,
+                           const void *spage, void *dpage)
+{
+
+    int i, pte_last, xen_start, xen_end, race = 0; 
+    uint64_t pte;
+
+    /*
+    ** We need to determine which entries in this page table hold
+    ** reserved hypervisor mappings. This depends on the current
+    ** page table type as well as the number of paging levels.
+    */
+    xen_start = xen_end = pte_last = PAGE_SIZE / ((pt_levels == 2) ? 4 : 8);
+
+    if ( (pt_levels == 2) && (type == XEN_DOMCTL_PFINFO_L2TAB) )
+        xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT);
+
+    if ( (pt_levels == 3) && (type == XEN_DOMCTL_PFINFO_L3TAB) )
+        xen_start = L3_PAGETABLE_ENTRIES_PAE;
+
+    /*
+    ** in PAE only the L2 mapping the top 1GB contains Xen mappings.
+    ** We can spot this by looking for the guest linear mapping which
+    ** Xen always ensures is present in that L2. Guests must ensure
+    ** that this check will fail for other L2s.
+    */
+    if ( (pt_levels == 3) && (type == XEN_DOMCTL_PFINFO_L2TAB) )
+    {
+        int hstart;
+        uint64_t he;
+
+        hstart = (hvirt_start >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff;
+        he = ((const uint64_t *) spage)[hstart];
+
+        if ( ((he >> PAGE_SHIFT) & MFN_MASK_X86) == m2p_mfn0 )
+        {
+            /* hvirt starts with xen stuff... */
+            xen_start = hstart;
+        }
+        else if ( hvirt_start != 0xf5800000 )
+        {
+            /* old L2s from before hole was shrunk... */
+            hstart = (0xf5800000 >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff;
+            he = ((const uint64_t *) spage)[hstart];
+            if ( ((he >> PAGE_SHIFT) & MFN_MASK_X86) == m2p_mfn0 )
+                xen_start = hstart;
+        }
+    }
+
+    if ( (pt_levels == 4) && (type == XEN_DOMCTL_PFINFO_L4TAB) )
+    {
+        /*
+        ** XXX SMH: should compute these from hvirt_start (which we have)
+        ** and hvirt_end (which we don't)
+        */
+        xen_start = 256;
+        xen_end   = 272;
+    }
+
+    /* Now iterate through the page table, canonicalizing each PTE */
+    for (i = 0; i < pte_last; i++ )
+    {
+        unsigned long pfn, mfn;
+
+        if ( pt_levels == 2 )
+            pte = ((const uint32_t*)spage)[i];
+        else
+            pte = ((const uint64_t*)spage)[i];
+
+        if ( (i >= xen_start) && (i < xen_end) )
+            pte = 0;
+
+        if ( pte & _PAGE_PRESENT )
+        {
+            mfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86;
+            if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
+            {
+                /* This will happen if the type info is stale which
+                   is quite feasible under live migration */
+                pfn  = 0;  /* zap it - we'll retransmit this page later */
+                race = 1;  /* inform the caller of race; fatal if !live */ 
+            }
+            else
+                pfn = mfn_to_pfn(mfn);
+
+            pte &= ~MADDR_MASK_X86;
+            pte |= (uint64_t)pfn << PAGE_SHIFT;
+
+            /*
+             * PAE guest L3Es can contain these flags when running on
+             * a 64bit hypervisor. We zap these here to avoid any
+             * surprise at restore time...
+             */
+            if ( (pt_levels == 3) &&
+                 (type == XEN_DOMCTL_PFINFO_L3TAB) &&
+                 (pte & (_PAGE_USER|_PAGE_RW|_PAGE_ACCESSED)) )
+                pte &= ~(_PAGE_USER|_PAGE_RW|_PAGE_ACCESSED);
+        }
+
+        if ( pt_levels == 2 )
+            ((uint32_t*)dpage)[i] = pte;
+        else
+            ((uint64_t*)dpage)[i] = pte;
+    }
+
+    return race;
+}
+
+static xen_pfn_t *xc_map_m2p(int xc_handle,
+                                 unsigned long max_mfn,
+                                 int prot)
+{
+    struct xen_machphys_mfn_list xmml;
+    privcmd_mmap_entry_t *entries;
+    unsigned long m2p_chunks, m2p_size;
+    xen_pfn_t *m2p;
+    xen_pfn_t *extent_start;
+    int i, rc;
+
+    m2p_size   = M2P_SIZE(max_mfn);
+    m2p_chunks = M2P_CHUNKS(max_mfn);
+
+    xmml.max_extents = m2p_chunks;
+    if ( !(extent_start = malloc(m2p_chunks * sizeof(xen_pfn_t))) )
+    {
+        ERROR("failed to allocate space for m2p mfns");
+        return NULL;
+    }
+    set_xen_guest_handle(xmml.extent_start, extent_start);
+
+    if ( xc_memory_op(xc_handle, XENMEM_machphys_mfn_list, &xmml) ||
+         (xmml.nr_extents != m2p_chunks) )
+    {
+        ERROR("xc_get_m2p_mfns");
+        return NULL;
+    }
+
+    if ( (m2p = mmap(NULL, m2p_size, prot,
+                     MAP_SHARED, xc_handle, 0)) == MAP_FAILED )
+    {
+        ERROR("failed to mmap m2p");
+        return NULL;
+    }
+
+    if ( !(entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t))) )
+    {
+        ERROR("failed to allocate space for mmap entries");
+        return NULL;
+    }
+
+    for ( i = 0; i < m2p_chunks; i++ )
+    {
+        entries[i].va = (unsigned long)(((void *)m2p) + (i * M2P_CHUNK_SIZE));
+        entries[i].mfn = extent_start[i];
+        entries[i].npages = M2P_CHUNK_SIZE >> PAGE_SHIFT;
+    }
+
+    if ( (rc = xc_map_foreign_ranges(xc_handle, DOMID_XEN,
+                                     entries, m2p_chunks)) < 0 )
+    {
+        ERROR("xc_mmap_foreign_ranges failed (rc = %d)", rc);
+        return NULL;
+    }
+
+    m2p_mfn0 = entries[0].mfn;
+
+    free(extent_start);
+    free(entries);
+
+    return m2p;
+}
+
+
+static xen_pfn_t *map_and_save_p2m_table(int xc_handle, 
+                                         int io_fd, 
+                                         uint32_t dom,
+                                         unsigned long p2m_size,
+                                         shared_info_t *live_shinfo)
+{
+    vcpu_guest_context_t ctxt;
+
+    /* Double and single indirect references to the live P2M table */
+    xen_pfn_t *live_p2m_frame_list_list = NULL;
+    xen_pfn_t *live_p2m_frame_list = NULL;
+
+    /* A copy of the pfn-to-mfn table frame list. */
+    xen_pfn_t *p2m_frame_list = NULL;
+
+    /* The mapping of the live p2m table itself */
+    xen_pfn_t *p2m = NULL;
+
+    int i, success = 0;
+
+    live_p2m_frame_list_list = map_frame_list_list(xc_handle, dom,
+                                                   live_shinfo);
+    if ( !live_p2m_frame_list_list )
+        goto out;
+
+    live_p2m_frame_list =
+        xc_map_foreign_batch(xc_handle, dom, PROT_READ,
+                             live_p2m_frame_list_list,
+                             P2M_FLL_ENTRIES);
+    if ( !live_p2m_frame_list )
+    {
+        ERROR("Couldn't map p2m_frame_list");
+        goto out;
+    }
+
+
+    /* Map all the frames of the pfn->mfn table. For migrate to succeed,
+       the guest must not change which frames are used for this purpose.
+       (its not clear why it would want to change them, and we'll be OK
+       from a safety POV anyhow. */
+
+    p2m = xc_map_foreign_batch(xc_handle, dom, PROT_READ,
+                               live_p2m_frame_list,
+                               P2M_FL_ENTRIES);
+    if ( !p2m )
+    {
+        ERROR("Couldn't map p2m table");
+        goto out;
+    }
+    live_p2m = p2m; /* So that translation macros will work */
+    
+    /* Get a local copy of the live_P2M_frame_list */
+    if ( !(p2m_frame_list = malloc(P2M_FL_SIZE)) )
+    {
+        ERROR("Couldn't allocate p2m_frame_list array");
+        goto out;
+    }
+    memcpy(p2m_frame_list, live_p2m_frame_list, P2M_FL_SIZE);
+
+    /* Canonicalise the pfn-to-mfn table frame-number list. */
+    for ( i = 0; i < p2m_size; i += fpp )
+    {
+        if ( !translate_mfn_to_pfn(&p2m_frame_list[i/fpp]) )
+        {
+            ERROR("Frame# in pfn-to-mfn frame list is not in pseudophys");
+            ERROR("entry %d: p2m_frame_list[%ld] is 0x%"PRIx64, i, i/fpp,
+                  (uint64_t)p2m_frame_list[i/fpp]);
+            goto out;
+        }
+    }
+
+    if ( xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt) )
+    {
+        ERROR("Could not get vcpu context");
+        goto out;
+    }
+
+    /*
+     * Write an extended-info structure to inform the restore code that
+     * a PAE guest understands extended CR3 (PDPTs above 4GB). Turns off
+     * slow paths in the restore code.
+     */
+    if ( (pt_levels == 3) &&
+         (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3)) )
+    {
+        unsigned long signature = ~0UL;
+        uint32_t tot_sz   = sizeof(struct vcpu_guest_context) + 8;
+        uint32_t chunk_sz = sizeof(struct vcpu_guest_context);
+        char chunk_sig[]  = "vcpu";
+        if ( !write_exact(io_fd, &signature, sizeof(signature)) ||
+             !write_exact(io_fd, &tot_sz,    sizeof(tot_sz)) ||
+             !write_exact(io_fd, &chunk_sig, 4) ||
+             !write_exact(io_fd, &chunk_sz,  sizeof(chunk_sz)) ||
+             !write_exact(io_fd, &ctxt,      sizeof(ctxt)) )
+        {
+            ERROR("write: extended info");
+            goto out;
+        }
+    }
+
+    if ( !write_exact(io_fd, p2m_frame_list, P2M_FL_SIZE) )
+    {
+        ERROR("write: p2m_frame_list");
+        goto out;
+    }    
+
+    success = 1;
+
+ out:
+    
+    if ( !success && p2m )
+        munmap(p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
+
+    if ( live_p2m_frame_list_list )
+        munmap(live_p2m_frame_list_list, PAGE_SIZE);
+
+    if ( live_p2m_frame_list )
+        munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE);
+
+    if ( p2m_frame_list ) 
+        free(p2m_frame_list);
+
+    return success ? p2m : NULL;
+}
+
+
+
+int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
+                   uint32_t max_factor, uint32_t flags, int (*suspend)(int),
+                   int hvm, void *(*init_qemu_maps)(int, unsigned), 
+                   void (*qemu_flip_buffer)(int, int))
+{
+    xc_dominfo_t info;
+
+    int rc = 1, i, j, last_iter, iter = 0;
+    int live  = (flags & XCFLAGS_LIVE);
+    int debug = (flags & XCFLAGS_DEBUG);
+    int race = 0, sent_last_iter, skip_this_iter;
+
+    /* The new domain's shared-info frame number. */
+    unsigned long shared_info_frame;
+
+    /* A copy of the CPU context of the guest. */
+    vcpu_guest_context_t ctxt;
+
+    /* A table containing the type of each PFN (/not/ MFN!). */
+    unsigned long *pfn_type = NULL;
+    unsigned long *pfn_batch = NULL;
+
+    /* A copy of one frame of guest memory. */
+    char page[PAGE_SIZE];
+
+    /* Live mapping of shared info structure */
+    shared_info_t *live_shinfo = NULL;
+
+    /* base of the region in which domain memory is mapped */
+    unsigned char *region_base = NULL;
+
+    /* power of 2 order of p2m_size */
+    int order_nr;
+
+    /* bitmap of pages:
+       - that should be sent this iteration (unless later marked as skip);
+       - to skip this iteration because already dirty;
+       - to fixup by sending at the end if not already resent; */
+    unsigned long *to_send = NULL, *to_skip = NULL, *to_fix = NULL;
+
+    xc_shadow_op_stats_t stats;
+
+    unsigned long needed_to_fix = 0;
+    unsigned long total_sent    = 0;
+
+    uint64_t vcpumap = 1ULL;
+
+    /* HVM: a buffer for holding HVM context */
+    uint32_t hvm_buf_size = 0;
+    uint8_t *hvm_buf = NULL;
+
+    /* HVM: magic frames for ioreqs and xenstore comms. */
+    uint64_t magic_pfns[3]; /* ioreq_pfn, bufioreq_pfn, store_pfn */
+
+    /* If no explicit control parameters given, use defaults */
+    max_iters  = max_iters  ? : DEF_MAX_ITERS;
+    max_factor = max_factor ? : DEF_MAX_FACTOR;
+
+    initialize_mbit_rate();
+
+    if ( !get_platform_info(xc_handle, dom,
+                            &max_mfn, &hvirt_start, &pt_levels) )
+    {
+        ERROR("Unable to get platform info.");
+        return 1;
+    }
+
+    if ( xc_domain_getinfo(xc_handle, dom, 1, &info) != 1 )
+    {
+        ERROR("Could not get domain info");
+        return 1;
+    }
+
+    shared_info_frame = info.shared_info_frame;
+
+    /* Map the shared info frame */
+    if ( !hvm )
+    {
+        live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                           PROT_READ, shared_info_frame);
+        if ( !live_shinfo )
+        {
+            ERROR("Couldn't map live_shinfo");
+            goto out;
+        }
+    }
+
+    /* Get the size of the P2M table */
+    p2m_size = xc_memory_op(xc_handle, XENMEM_maximum_gpfn, &dom);
+
+    /* Domain is still running at this point */
+    if ( live )
+    {
+        /* Live suspend. Enable log-dirty mode. */
+        if ( xc_shadow_control(xc_handle, dom,
+                               XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
+                               NULL, 0, NULL, 0, NULL) < 0 )
+        {
+            ERROR("Couldn't enable shadow mode");
+            goto out;
+        }
+
+        if ( hvm )
+        {
+            /* Get qemu-dm logging dirty pages too */
+            void *seg = init_qemu_maps(dom, BITMAP_SIZE);
+            qemu_bitmaps[0] = seg;
+            qemu_bitmaps[1] = seg + BITMAP_SIZE;
+            qemu_active = 0;
+            qemu_non_active = 1;
+        }
+    }
+    else
+    {
+        /* This is a non-live suspend. Suspend the domain .*/
+        if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info) )
+        {
+            ERROR("Domain appears not to have suspended");
+            goto out;
+        }
+    }
+
+    last_iter = !live;
+
+    /* pretend we sent all the pages last iteration */
+    sent_last_iter = p2m_size;
+
+    /* calculate the power of 2 order of p2m_size, e.g.
+       15->4 16->4 17->5 */
+    for ( i = p2m_size-1, order_nr = 0; i ; i >>= 1, order_nr++ )
+        continue;
+
+    /* Setup to_send / to_fix and to_skip bitmaps */
+    to_send = malloc(BITMAP_SIZE);
+    to_fix  = calloc(1, BITMAP_SIZE);
+    to_skip = malloc(BITMAP_SIZE);
+
+    if ( !to_send || !to_fix || !to_skip )
+    {
+        ERROR("Couldn't allocate to_send array");
+        goto out;
+    }
+
+    memset(to_send, 0xff, BITMAP_SIZE);
+
+    if ( lock_pages(to_send, BITMAP_SIZE) )
+    {
+        ERROR("Unable to lock to_send");
+        return 1;
+    }
+
+    /* (to fix is local only) */
+    if ( lock_pages(to_skip, BITMAP_SIZE) )
+    {
+        ERROR("Unable to lock to_skip");
+        return 1;
+    }
+
+    if ( hvm ) 
+    {
+        /* Need another buffer for HVM context */
+        hvm_buf_size = xc_domain_hvm_getcontext(xc_handle, dom, 0, 0);
+        if ( hvm_buf_size == -1 )
+        {
+            ERROR("Couldn't get HVM context size from Xen");
+            goto out;
+        }
+        hvm_buf = malloc(hvm_buf_size);
+        if ( !hvm_buf )
+        {
+            ERROR("Couldn't allocate memory");
+            goto out;
+        }
+    }
+
+    analysis_phase(xc_handle, dom, p2m_size, to_skip, 0);
+
+    /* We want zeroed memory so use calloc rather than malloc. */
+    pfn_type   = calloc(MAX_BATCH_SIZE, sizeof(*pfn_type));
+    pfn_batch  = calloc(MAX_BATCH_SIZE, sizeof(*pfn_batch));
+    if ( (pfn_type == NULL) || (pfn_batch == NULL) )
+    {
+        ERROR("failed to alloc memory for pfn_type and/or pfn_batch arrays");
+        errno = ENOMEM;
+        goto out;
+    }
+
+    if ( lock_pages(pfn_type, MAX_BATCH_SIZE * sizeof(*pfn_type)) )
+    {
+        ERROR("Unable to lock");
+        goto out;
+    }
+
+    /* Setup the mfn_to_pfn table mapping */
+    if ( !(live_m2p = xc_map_m2p(xc_handle, max_mfn, PROT_READ)) )
+    {
+        ERROR("Failed to map live M2P table");
+        goto out;
+    }
+
+    /* Start writing out the saved-domain record. */
+    if ( !write_exact(io_fd, &p2m_size, sizeof(unsigned long)) )
+    {
+        ERROR("write: p2m_size");
+        goto out;
+    }
+
+    if ( !hvm )
+    {
+        int err = 0;
+        unsigned long mfn;
+
+        /* Map the P2M table, and write the list of P2M frames */
+        live_p2m = map_and_save_p2m_table(xc_handle, io_fd, dom, 
+                                          p2m_size, live_shinfo);
+        if ( live_p2m == NULL )
+        {
+            ERROR("Failed to map/save the p2m frame list");
+            goto out;
+        }
+
+        /*
+         * Quick belt and braces sanity check.
+         */
+        
+        for ( i = 0; i < p2m_size; i++ )
+        {
+            mfn = live_p2m[i];
+            if( (mfn != INVALID_P2M_ENTRY) && (mfn_to_pfn(mfn) != i) )
+            {
+                DPRINTF("i=0x%x mfn=%lx live_m2p=%lx\n", i,
+                        mfn, mfn_to_pfn(mfn));
+                err++;
+            }
+        }
+        DPRINTF("Had %d unexplained entries in p2m table\n", err);
+    }
+
+    print_stats(xc_handle, dom, 0, &stats, 0);
+
+    /* Now write out each data page, canonicalising page tables as we go... */
+    for ( ; ; )
+    {
+        unsigned int prev_pc, sent_this_iter, N, batch;
+
+        iter++;
+        sent_this_iter = 0;
+        skip_this_iter = 0;
+        prev_pc = 0;
+        N = 0;
+
+        DPRINTF("Saving memory pages: iter %d   0%%", iter);
+
+        while ( N < p2m_size )
+        {
+            unsigned int this_pc = (N * 100) / p2m_size;
+            int rc;
+
+            if ( (this_pc - prev_pc) >= 5 )
+            {
+                DPRINTF("\b\b\b\b%3d%%", this_pc);
+                prev_pc = this_pc;
+            }
+
+            if ( !last_iter )
+            {
+                /* Slightly wasteful to peek the whole array evey time,
+                   but this is fast enough for the moment. */
+                rc = xc_shadow_control(
+                    xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK, to_skip, 
+                    p2m_size, NULL, 0, NULL);
+                if ( rc != p2m_size )
+                {
+                    ERROR("Error peeking shadow bitmap");
+                    goto out;
+                }
+            }
+
+            /* load pfn_type[] with the mfn of all the pages we're doing in
+               this batch. */
+            for  ( batch = 0;
+                   (batch < MAX_BATCH_SIZE) && (N < p2m_size);
+                   N++ )
+            {
+                int n = permute(N, p2m_size, order_nr);
+
+                if ( debug )
+                    DPRINTF("%d pfn= %08lx mfn= %08lx %d  [mfn]= %08lx\n",
+                            iter, (unsigned long)n, hvm ? 0 : live_p2m[n],
+                            test_bit(n, to_send),
+                            hvm ? 0 : mfn_to_pfn(live_p2m[n]&0xFFFFF));
+
+                if ( !last_iter &&
+                     test_bit(n, to_send) &&
+                     test_bit(n, to_skip) )
+                    skip_this_iter++; /* stats keeping */
+
+                if ( !((test_bit(n, to_send) && !test_bit(n, to_skip)) ||
+                       (test_bit(n, to_send) && last_iter) ||
+                       (test_bit(n, to_fix)  && last_iter)) )
+                    continue;
+
+                /* Skip PFNs that aren't really there */
+                if ( hvm && ((n >= 0xa0 && n < 0xc0) /* VGA hole */
+                             || (n >= (HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT) 
+                                 && n < (1ULL<<32) >> PAGE_SHIFT)) /* MMIO */ )
+                    continue;
+
+                /*
+                ** we get here if:
+                **  1. page is marked to_send & hasn't already been re-dirtied
+                **  2. (ignore to_skip in last iteration)
+                **  3. add in pages that still need fixup (net bufs)
+                */
+
+                pfn_batch[batch] = n;
+
+                /* Hypercall interfaces operate in PFNs for HVM guests
+                * and MFNs for PV guests */
+                if ( hvm ) 
+                    pfn_type[batch] = n;
+                else
+                    pfn_type[batch] = live_p2m[n];
+                    
+                if ( !is_mapped(pfn_type[batch]) )
+                {
+                    /*
+                    ** not currently in psuedo-physical map -- set bit
+                    ** in to_fix since we must send this page in last_iter
+                    ** unless its sent sooner anyhow, or it never enters
+                    ** pseudo-physical map (e.g. for ballooned down doms)
+                    */
+                    set_bit(n, to_fix);
+                    continue;
+                }
+
+                if ( last_iter &&
+                     test_bit(n, to_fix) &&
+                     !test_bit(n, to_send) )
+                {
+                    needed_to_fix++;
+                    DPRINTF("Fix! iter %d, pfn %x. mfn %lx\n",
+                            iter, n, pfn_type[batch]);
+                }
+                
+                clear_bit(n, to_fix);
+                
+                batch++;
+            }
+
+            if ( batch == 0 )
+                goto skip; /* vanishingly unlikely... */
+
+            region_base = xc_map_foreign_batch(
+                xc_handle, dom, PROT_READ, pfn_type, batch);
+            if ( region_base == NULL )
+            {
+                ERROR("map batch failed");
+                goto out;
+            }
+
+            if ( !hvm )
+            {
+                /* Get page types */
+                for ( j = 0; j < batch; j++ )
+                    ((uint32_t *)pfn_type)[j] = pfn_type[j];
+                if ( xc_get_pfn_type_batch(xc_handle, dom, batch,
+                                           (uint32_t *)pfn_type) )
+                {
+                    ERROR("get_pfn_type_batch failed");
+                    goto out;
+                }
+                for ( j = batch-1; j >= 0; j-- )
+                    pfn_type[j] = ((uint32_t *)pfn_type)[j];
+
+                for ( j = 0; j < batch; j++ )
+                {
+                    
+                    if ( (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) ==
+                         XEN_DOMCTL_PFINFO_XTAB )
+                    {
+                        DPRINTF("type fail: page %i mfn %08lx\n", 
+                                j, pfn_type[j]);
+                        continue;
+                    }
+                    
+                    if ( debug )
+                        DPRINTF("%d pfn= %08lx mfn= %08lx [mfn]= %08lx"
+                                " sum= %08lx\n",
+                                iter,
+                                (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) |
+                                pfn_batch[j],
+                                pfn_type[j],
+                                mfn_to_pfn(pfn_type[j] &
+                                           ~XEN_DOMCTL_PFINFO_LTAB_MASK),
+                                csum_page(region_base + (PAGE_SIZE*j)));
+                    
+                    /* canonicalise mfn->pfn */
+                    pfn_type[j] = (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) |
+                        pfn_batch[j];
+                }
+            }
+
+            if ( !write_exact(io_fd, &batch, sizeof(unsigned int)) )
+            {
+                ERROR("Error when writing to state file (2) (errno %d)",
+                      errno);
+                goto out;
+            }
+
+            if ( !write_exact(io_fd, pfn_type, sizeof(unsigned long)*batch) )
+            {
+                ERROR("Error when writing to state file (3) (errno %d)",
+                      errno);
+                goto out;
+            }
+
+            /* entering this loop, pfn_type is now in pfns (Not mfns) */
+            for ( j = 0; j < batch; j++ )
+            {
+                unsigned long pfn, pagetype;
+                void *spage = (char *)region_base + (PAGE_SIZE*j);
+
+                pfn      = pfn_type[j] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
+                pagetype = pfn_type[j] &  XEN_DOMCTL_PFINFO_LTAB_MASK;
+
+                /* write out pages in batch */
+                if ( pagetype == XEN_DOMCTL_PFINFO_XTAB )
+                    continue;
+
+                pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
+
+                if ( (pagetype >= XEN_DOMCTL_PFINFO_L1TAB) &&
+                     (pagetype <= XEN_DOMCTL_PFINFO_L4TAB) )
+                {
+                    /* We have a pagetable page: need to rewrite it. */
+                    race = 
+                        canonicalize_pagetable(pagetype, pfn, spage, page); 
+
+                    if ( race && !live )
+                    {
+                        ERROR("Fatal PT race (pfn %lx, type %08lx)", pfn,
+                              pagetype);
+                        goto out;
+                    }
+
+                    if ( ratewrite(io_fd, live, page, PAGE_SIZE) != PAGE_SIZE )
+                    {
+                        ERROR("Error when writing to state file (4)"
+                              " (errno %d)", errno);
+                        goto out;
+                    }
+                }
+                else
+                {
+                    /* We have a normal page: just write it directly. */
+                    if ( ratewrite(io_fd, live, spage, PAGE_SIZE) !=
+                         PAGE_SIZE )
+                    {
+                        ERROR("Error when writing to state file (5)"
+                              " (errno %d)", errno);
+                        goto out;
+                    }
+                }
+            } /* end of the write out for this batch */
+
+            sent_this_iter += batch;
+
+            munmap(region_base, batch*PAGE_SIZE);
+
+        } /* end of this while loop for this iteration */
+
+      skip:
+
+        total_sent += sent_this_iter;
+
+        DPRINTF("\r %d: sent %d, skipped %d, ",
+                iter, sent_this_iter, skip_this_iter );
+
+        if ( last_iter )
+        {
+            print_stats( xc_handle, dom, sent_this_iter, &stats, 1);
+
+            DPRINTF("Total pages sent= %ld (%.2fx)\n",
+                    total_sent, ((float)total_sent)/p2m_size );
+            DPRINTF("(of which %ld were fixups)\n", needed_to_fix  );
+        }
+
+        if ( last_iter && debug )
+        {
+            int minusone = -1;
+            memset(to_send, 0xff, BITMAP_SIZE);
+            debug = 0;
+            DPRINTF("Entering debug resend-all mode\n");
+
+            /* send "-1" to put receiver into debug mode */
+            if ( !write_exact(io_fd, &minusone, sizeof(int)) )
+            {
+                ERROR("Error when writing to state file (6) (errno %d)",
+                      errno);
+                goto out;
+            }
+
+            continue;
+        }
+
+        if ( last_iter )
+            break;
+
+        if ( live )
+        {
+            if ( ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) ||
+                 (iter >= max_iters) ||
+                 (sent_this_iter+skip_this_iter < 50) ||
+                 (total_sent > p2m_size*max_factor) )
+            {
+                DPRINTF("Start last iteration\n");
+                last_iter = 1;
+
+                if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info) )
+                {
+                    ERROR("Domain appears not to have suspended");
+                    goto out;
+                }
+
+                DPRINTF("SUSPEND shinfo %08lx\n", info.shared_info_frame);
+            }
+
+            if ( xc_shadow_control(xc_handle, dom, 
+                                   XEN_DOMCTL_SHADOW_OP_CLEAN, to_send, 
+                                   p2m_size, NULL, 0, &stats) != p2m_size )
+            {
+                ERROR("Error flushing shadow PT");
+                goto out;
+            }
+
+            if ( hvm ) 
+            {
+                /* Pull in the dirty bits from qemu-dm too */
+                if ( !last_iter )
+                {
+                    qemu_active = qemu_non_active;
+                    qemu_non_active = qemu_active ? 0 : 1;
+                    qemu_flip_buffer(dom, qemu_active);
+                    for ( j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++ )
+                    {
+                        to_send[j] |= qemu_bitmaps[qemu_non_active][j];
+                        qemu_bitmaps[qemu_non_active][j] = 0;
+                    }
+                }
+                else
+                {
+                    for ( j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++ )
+                        to_send[j] |= qemu_bitmaps[qemu_active][j];
+                }
+            }
+
+            sent_last_iter = sent_this_iter;
+
+            print_stats(xc_handle, dom, sent_this_iter, &stats, 1);
+
+        }
+    } /* end of infinite for loop */
+
+    DPRINTF("All memory is saved\n");
+
+    {
+        struct {
+            int minustwo;
+            int max_vcpu_id;
+            uint64_t vcpumap;
+        } chunk = { -2, info.max_vcpu_id };
+
+        if ( info.max_vcpu_id >= 64 )
+        {
+            ERROR("Too many VCPUS in guest!");
+            goto out;
+        }
+
+        for ( i = 1; i <= info.max_vcpu_id; i++ )
+        {
+            xc_vcpuinfo_t vinfo;
+            if ( (xc_vcpu_getinfo(xc_handle, dom, i, &vinfo) == 0) &&
+                 vinfo.online )
+                vcpumap |= 1ULL << i;
+        }
+
+        chunk.vcpumap = vcpumap;
+        if ( !write_exact(io_fd, &chunk, sizeof(chunk)) )
+        {
+            ERROR("Error when writing to state file (errno %d)", errno);
+            goto out;
+        }
+    }
+
+    /* Zero terminate */
+    i = 0;
+    if ( !write_exact(io_fd, &i, sizeof(int)) )
+    {
+        ERROR("Error when writing to state file (6') (errno %d)", errno);
+        goto out;
+    }
+
+    if ( hvm ) 
+    {
+        uint32_t rec_size;
+
+        /* Save magic-page locations. */
+        memset(magic_pfns, 0, sizeof(magic_pfns));
+        xc_get_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN,
+                         (unsigned long *)&magic_pfns[0]);
+        xc_get_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN,
+                         (unsigned long *)&magic_pfns[1]);
+        xc_get_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN,
+                         (unsigned long *)&magic_pfns[2]);
+        if ( !write_exact(io_fd, magic_pfns, sizeof(magic_pfns)) )
+        {
+            ERROR("Error when writing to state file (7)");
+            goto out;
+        }
+
+        /* Get HVM context from Xen and save it too */
+        if ( (rec_size = xc_domain_hvm_getcontext(xc_handle, dom, hvm_buf, 
+                                                  hvm_buf_size)) == -1 )
+        {
+            ERROR("HVM:Could not get hvm buffer");
+            goto out;
+        }
+        
+        if ( !write_exact(io_fd, &rec_size, sizeof(uint32_t)) )
+        {
+            ERROR("error write hvm buffer size");
+            goto out;
+        }
+        
+        if ( !write_exact(io_fd, hvm_buf, rec_size) )
+        {
+            ERROR("write HVM info failed!\n");
+            goto out;
+        }
+        
+        /* HVM guests are done now */
+        rc = 0;
+        goto out;
+    }
+
+    /* PV guests only from now on */
+
+    /* Send through a list of all the PFNs that were not in map at the close */
+    {
+        unsigned int i,j;
+        unsigned long pfntab[1024];
+
+        for ( i = 0, j = 0; i < p2m_size; i++ )
+        {
+            if ( !is_mapped(live_p2m[i]) )
+                j++;
+        }
+
+        if ( !write_exact(io_fd, &j, sizeof(unsigned int)) )
+        {
+            ERROR("Error when writing to state file (6a) (errno %d)", errno);
+            goto out;
+        }
+
+        for ( i = 0, j = 0; i < p2m_size; )
+        {
+            if ( !is_mapped(live_p2m[i]) )
+                pfntab[j++] = i;
+
+            i++;
+            if ( (j == 1024) || (i == p2m_size) )
+            {
+                if ( !write_exact(io_fd, &pfntab, sizeof(unsigned long)*j) )
+                {
+                    ERROR("Error when writing to state file (6b) (errno %d)",
+                          errno);
+                    goto out;
+                }
+                j = 0;
+            }
+        }
+    }
+
+    if ( xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt) )
+    {
+        ERROR("Could not get vcpu context");
+        goto out;
+    }
+
+    /* Canonicalise the suspend-record frame number. */
+    if ( !translate_mfn_to_pfn(&ctxt.user_regs.edx) )
+    {
+        ERROR("Suspend record is not in range of pseudophys map");
+        goto out;
+    }
+
+    for ( i = 0; i <= info.max_vcpu_id; i++ )
+    {
+        if ( !(vcpumap & (1ULL << i)) )
+            continue;
+
+        if ( (i != 0) && xc_vcpu_getcontext(xc_handle, dom, i, &ctxt) )
+        {
+            ERROR("No context for VCPU%d", i);
+            goto out;
+        }
+
+        /* Canonicalise each GDT frame number. */
+        for ( j = 0; (512*j) < ctxt.gdt_ents; j++ )
+        {
+            if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[j]) )
+            {
+                ERROR("GDT frame is not in range of pseudophys map");
+                goto out;
+            }
+        }
+
+        /* Canonicalise the page table base pointer. */
+        if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[3])) )
+        {
+            ERROR("PT base is not in range of pseudophys map");
+            goto out;
+        }
+        ctxt.ctrlreg[3] = 
+            xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[3])));
+
+        /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */
+        if ( (pt_levels == 4) && ctxt.ctrlreg[1] )
+        {
+            if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[1])) )
+            {
+                ERROR("PT base is not in range of pseudophys map");
+                goto out;
+            }
+            /* Least-significant bit means 'valid PFN'. */
+            ctxt.ctrlreg[1] = 1 |
+                xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[1])));
+        }
+
+        if ( !write_exact(io_fd, &ctxt, sizeof(ctxt)) )
+        {
+            ERROR("Error when writing to state file (1) (errno %d)", errno);
+            goto out;
+        }
+    }
+
+    /*
+     * Reset the MFN to be a known-invalid value. See map_frame_list_list().
+     */
+    memcpy(page, live_shinfo, PAGE_SIZE);
+    ((shared_info_t *)page)->arch.pfn_to_mfn_frame_list_list = 0;
+    if ( !write_exact(io_fd, page, PAGE_SIZE) )
+    {
+        ERROR("Error when writing to state file (1) (errno %d)", errno);
+        goto out;
+    }
+
+    /* Success! */
+    rc = 0;
+
+ out:
+
+    if ( live )
+    {
+        if ( xc_shadow_control(xc_handle, dom, 
+                               XEN_DOMCTL_SHADOW_OP_OFF,
+                               NULL, 0, NULL, 0, NULL) < 0 )
+            DPRINTF("Warning - couldn't disable shadow mode");
+    }
+
+    /* Flush last write and discard cache for file. */
+    discard_file_cache(io_fd, 1 /* flush */);
+
+    if ( live_shinfo )
+        munmap(live_shinfo, PAGE_SIZE);
+
+    if ( live_p2m )
+        munmap(live_p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
+
+    if ( live_m2p )
+        munmap(live_m2p, M2P_SIZE(max_mfn));
+
+    free(pfn_type);
+    free(pfn_batch);
+    free(to_send);
+    free(to_fix);
+    free(to_skip);
+
+    DPRINTF("Save exit rc=%d\n",rc);
+
+    return !!rc;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c        Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libxc/xc_hvm_build.c        Fri Apr 13 11:14:26 2007 +0100
@@ -28,47 +28,6 @@ typedef union
     vcpu_guest_context_x86_32_t c32;   
     vcpu_guest_context_t c;
 } vcpu_guest_context_either_t;
-
-
-int xc_set_hvm_param(
-    int handle, domid_t dom, int param, unsigned long value)
-{
-    DECLARE_HYPERCALL;
-    xen_hvm_param_t arg;
-    int rc;
-
-    hypercall.op     = __HYPERVISOR_hvm_op;
-    hypercall.arg[0] = HVMOP_set_param;
-    hypercall.arg[1] = (unsigned long)&arg;
-    arg.domid = dom;
-    arg.index = param;
-    arg.value = value;
-    if ( lock_pages(&arg, sizeof(arg)) != 0 )
-        return -1;
-    rc = do_xen_hypercall(handle, &hypercall);
-    unlock_pages(&arg, sizeof(arg));
-    return rc;
-}
-
-int xc_get_hvm_param(
-    int handle, domid_t dom, int param, unsigned long *value)
-{
-    DECLARE_HYPERCALL;
-    xen_hvm_param_t arg;
-    int rc;
-
-    hypercall.op     = __HYPERVISOR_hvm_op;
-    hypercall.arg[0] = HVMOP_get_param;
-    hypercall.arg[1] = (unsigned long)&arg;
-    arg.domid = dom;
-    arg.index = param;
-    if ( lock_pages(&arg, sizeof(arg)) != 0 )
-        return -1;
-    rc = do_xen_hypercall(handle, &hypercall);
-    unlock_pages(&arg, sizeof(arg));
-    *value = arg.value;
-    return rc;
-}
 
 static void build_e820map(void *e820_page, unsigned long long mem_size)
 {
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/xc_hvm_save.c
--- a/tools/libxc/xc_hvm_save.c Thu Apr 12 16:37:32 2007 -0500
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,755 +0,0 @@
-/******************************************************************************
- * xc_hvm_save.c
- *
- * Save the state of a running HVM guest.
- *
- * Copyright (c) 2003, K A Fraser.
- * Copyright (c) 2006 Intel Corperation
- * rewriten for hvm guest by Zhai Edwin <edwin.zhai@xxxxxxxxx>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <inttypes.h>
-#include <time.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/time.h>
-
-#include "xc_private.h"
-#include "xg_private.h"
-#include "xg_save_restore.h"
-
-#include <xen/hvm/e820.h>
-#include <xen/hvm/params.h>
-
-/*
-** Default values for important tuning parameters. Can override by passing
-** non-zero replacement values to xc_hvm_save().
-**
-** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too.
-**
-*/
-#define DEF_MAX_ITERS   29   /* limit us to 30 times round loop   */
-#define DEF_MAX_FACTOR   3   /* never send more than 3x nr_pfns   */
-
-/* Shared-memory bitmaps for getting log-dirty bits from qemu */
-static unsigned long *qemu_bitmaps[2];
-static int qemu_active;
-static int qemu_non_active;
-
-/*
-** During (live) save/migrate, we maintain a number of bitmaps to track
-** which pages we have to send, to fixup, and to skip.
-*/
-
-#define BITS_PER_LONG (sizeof(unsigned long) * 8)
-#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
-#define BITMAP_SIZE   (BITS_TO_LONGS(pfn_array_size) * sizeof(unsigned long))
-
-#define BITMAP_ENTRY(_nr,_bmap) \
-   ((unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG]
-
-#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG)
-
-static inline int test_bit (int nr, volatile void * addr)
-{
-    return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1;
-}
-
-static inline void clear_bit (int nr, volatile void * addr)
-{
-    BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr));
-}
-
-static inline int permute( int i, int nr, int order_nr  )
-{
-    /* Need a simple permutation function so that we scan pages in a
-       pseudo random order, enabling us to get a better estimate of
-       the domain's page dirtying rate as we go (there are often
-       contiguous ranges of pfns that have similar behaviour, and we
-       want to mix them up. */
-
-    /* e.g. nr->oder 15->4 16->4 17->5 */
-    /* 512MB domain, 128k pages, order 17 */
-
-    /*
-      QPONMLKJIHGFEDCBA
-             QPONMLKJIH
-      GFEDCBA
-     */
-
-    /*
-      QPONMLKJIHGFEDCBA
-                  EDCBA
-             QPONM
-      LKJIHGF
-      */
-
-    do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); }
-    while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */
-
-    return i;
-}
-
-
-static uint64_t tv_to_us(struct timeval *new)
-{
-    return (new->tv_sec * 1000000) + new->tv_usec;
-}
-
-static uint64_t llgettimeofday(void)
-{
-    struct timeval now;
-    gettimeofday(&now, NULL);
-    return tv_to_us(&now);
-}
-
-static uint64_t tv_delta(struct timeval *new, struct timeval *old)
-{
-    return (((new->tv_sec - old->tv_sec)*1000000) +
-            (new->tv_usec - old->tv_usec));
-}
-
-
-#define RATE_IS_MAX() (0)
-#define ratewrite(_io_fd, _buf, _n) write((_io_fd), (_buf), (_n))
-#define initialize_mbit_rate()
-
-static inline ssize_t write_exact(int fd, void *buf, size_t count)
-{
-    return (write(fd, buf, count) == count);
-}
-
-static int print_stats(int xc_handle, uint32_t domid, int pages_sent,
-                       xc_shadow_op_stats_t *stats, int print)
-{
-    static struct timeval wall_last;
-    static long long      d0_cpu_last;
-    static long long      d1_cpu_last;
-
-    struct timeval        wall_now;
-    long long             wall_delta;
-    long long             d0_cpu_now, d0_cpu_delta;
-    long long             d1_cpu_now, d1_cpu_delta;
-
-    gettimeofday(&wall_now, NULL);
-
-    d0_cpu_now = xc_domain_get_cpu_usage(xc_handle, 0, /* FIXME */ 0)/1000;
-    d1_cpu_now = xc_domain_get_cpu_usage(xc_handle, domid, /* FIXME */ 0)/1000;
-
-    if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) )
-        DPRINTF("ARRHHH!!\n");
-
-    wall_delta = tv_delta(&wall_now,&wall_last)/1000;
-    if ( wall_delta == 0 )
-        wall_delta = 1;
-
-    d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000;
-    d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000;
-
-    if ( print )
-        DPRINTF("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, "
-                "dirtied %dMb/s %" PRId32 " pages\n",
-                wall_delta,
-                (int)((d0_cpu_delta*100)/wall_delta),
-                (int)((d1_cpu_delta*100)/wall_delta),
-                (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))),
-                (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))),
-                stats->dirty_count);
-
-    d0_cpu_last = d0_cpu_now;
-    d1_cpu_last = d1_cpu_now;
-    wall_last   = wall_now;
-
-    return 0;
-}
-
-static int analysis_phase(int xc_handle, uint32_t domid, int pfn_array_size,
-                          unsigned long *arr, int runs)
-{
-    long long start, now;
-    xc_shadow_op_stats_t stats;
-    int j;
-
-    start = llgettimeofday();
-
-    for ( j = 0; j < runs; j++ )
-    {
-        int i;
-
-        xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
-                          arr, pfn_array_size, NULL, 0, NULL);
-        DPRINTF("#Flush\n");
-        for ( i = 0; i < 40; i++ )
-        {
-            usleep(50000);
-            now = llgettimeofday();
-            xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_PEEK,
-                              NULL, 0, NULL, 0, &stats);
-            DPRINTF("now= %lld faults= %"PRId32" dirty= %"PRId32"\n",
-                    ((now-start)+500)/1000,
-                    stats.fault_count, stats.dirty_count);
-        }
-    }
-
-    return -1;
-}
-
-static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
-                             int dom, xc_dominfo_t *info,
-                             vcpu_guest_context_t *ctxt)
-{
-    int i = 0;
-
-    if ( !(*suspend)(dom) )
-    {
-        ERROR("Suspend request failed");
-        return -1;
-    }
-
- retry:
-
-    if ( xc_domain_getinfo(xc_handle, dom, 1, info) != 1 )
-    {
-        ERROR("Could not get domain info");
-        return -1;
-    }
-
-    if ( xc_vcpu_getcontext(xc_handle, dom, 0, ctxt) )
-        ERROR("Could not get vcpu context");
-
-    if ( info->shutdown && (info->shutdown_reason == SHUTDOWN_suspend) )
-        return 0; /* success */
-
-    if ( info->paused )
-    {
-        /* Try unpausing domain, wait, and retest. */
-        xc_domain_unpause( xc_handle, dom );
-        ERROR("Domain was paused. Wait and re-test.");
-        usleep(10000);  /* 10ms */
-        goto retry;
-    }
-
-    if ( ++i < 100 )
-    {
-        ERROR("Retry suspend domain.");
-        usleep(10000); /* 10ms */
-        goto retry;
-    }
-
-    ERROR("Unable to suspend domain.");
-
-    return -1;
-}
-
-int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
-                uint32_t max_factor, uint32_t flags, int (*suspend)(int),
-                void *(*init_qemu_maps)(int, unsigned), 
-                void (*qemu_flip_buffer)(int, int))
-{
-    xc_dominfo_t info;
-
-    int rc = 1, i, j, last_iter, iter = 0;
-    int live  = !!(flags & XCFLAGS_LIVE);
-    int debug = !!(flags & XCFLAGS_DEBUG);
-    int sent_last_iter, skip_this_iter;
-
-    /* The highest guest-physical frame number used by the current guest */
-    unsigned long max_pfn;
-
-    /* The size of an array big enough to contain all guest pfns */
-    unsigned long pfn_array_size;
-
-    /* Magic frames: ioreqs and xenstore comms. */
-    uint64_t magic_pfns[3]; /* ioreq_pfn, bufioreq_pfn, store_pfn */
-
-    /* A copy of the CPU context of the guest. */
-    vcpu_guest_context_t ctxt;
-
-    /* A table containg the PFNs (/not/ MFN!) to map. */
-    xen_pfn_t *pfn_batch = NULL;
-
-    /* A copy of hvm domain context buffer*/
-    uint32_t hvm_buf_size;
-    uint8_t *hvm_buf = NULL;
-
-    /* base of the region in which domain memory is mapped */
-    unsigned char *region_base = NULL;
-
-    uint32_t rec_size, nr_vcpus;
-
-    /* power of 2 order of pfn_array_size */
-    int order_nr;
-
-    /* bitmap of pages:
-       - that should be sent this iteration (unless later marked as skip);
-       - to skip this iteration because already dirty; */
-    unsigned long *to_send = NULL, *to_skip = NULL;
-
-    xc_shadow_op_stats_t stats;
-
-    unsigned long total_sent = 0;
-
-    uint64_t vcpumap = 1ULL;
-
-    DPRINTF("xc_hvm_save: dom=%d, max_iters=%d, max_factor=%d, flags=0x%x, "
-            "live=%d, debug=%d.\n", dom, max_iters, max_factor, flags,
-            live, debug);
-    
-    /* If no explicit control parameters given, use defaults */
-    max_iters  = max_iters  ? : DEF_MAX_ITERS;
-    max_factor = max_factor ? : DEF_MAX_FACTOR;
-
-    initialize_mbit_rate();
-
-    if ( xc_domain_getinfo(xc_handle, dom, 1, &info) != 1 )
-    {
-        ERROR("HVM: Could not get domain info");
-        return 1;
-    }
-    nr_vcpus = info.nr_online_vcpus;
-
-    if ( mlock(&ctxt, sizeof(ctxt)) )
-    {
-        ERROR("HVM: Unable to mlock ctxt");
-        return 1;
-    }
-
-    /* Only have to worry about vcpu 0 even for SMP */
-    if ( xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt) )
-    {
-        ERROR("HVM: Could not get vcpu context");
-        goto out;
-    }
-
-    DPRINTF("saved hvm domain info: max_memkb=0x%lx, nr_pages=0x%lx\n",
-            info.max_memkb, info.nr_pages); 
-
-    if ( live )
-    {
-        /* Live suspend. Enable log-dirty mode. */
-        if ( xc_shadow_control(xc_handle, dom,
-                               XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
-                               NULL, 0, NULL, 0, NULL) < 0 )
-        {
-            ERROR("Couldn't enable shadow mode");
-            goto out;
-        }
-    }
-    else
-    {
-        /* This is a non-live suspend. Suspend the domain .*/
-        if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt) )
-        {
-            ERROR("HVM Domain appears not to have suspended");
-            goto out;
-        }
-    }
-
-    last_iter = !live;
-
-    max_pfn = xc_memory_op(xc_handle, XENMEM_maximum_gpfn, &dom);
-
-    DPRINTF("after 1st handle hvm domain max_pfn=0x%lx, "
-            "max_memkb=0x%lx, live=%d.\n",
-            max_pfn, info.max_memkb, live);
-
-    /* Size of any array that covers 0 ... max_pfn */
-    pfn_array_size = max_pfn + 1;
-    if ( !write_exact(io_fd, &pfn_array_size, sizeof(unsigned long)) )
-    {
-        ERROR("Error when writing to state file (1)");
-        goto out;
-    }
-
-    /* pretend we sent all the pages last iteration */
-    sent_last_iter = pfn_array_size;
-
-    /* calculate the power of 2 order of pfn_array_size, e.g.
-       15->4 16->4 17->5 */
-    for ( i = pfn_array_size-1, order_nr = 0; i ; i >>= 1, order_nr++ )
-        continue;
-
-    /* Setup to_send / to_fix and to_skip bitmaps */
-    to_send = malloc(BITMAP_SIZE);
-    to_skip = malloc(BITMAP_SIZE);
-
-    if ( live )
-    {
-        /* Get qemu-dm logging dirty pages too */
-        void *seg = init_qemu_maps(dom, BITMAP_SIZE);
-        qemu_bitmaps[0] = seg;
-        qemu_bitmaps[1] = seg + BITMAP_SIZE;
-        qemu_active = 0;
-        qemu_non_active = 1;
-    }
-
-    hvm_buf_size = xc_domain_hvm_getcontext(xc_handle, dom, 0, 0);
-    if ( hvm_buf_size == -1 )
-    {
-        ERROR("Couldn't get HVM context size from Xen");
-        goto out;
-    }
-    hvm_buf = malloc(hvm_buf_size);
-
-    if ( !to_send || !to_skip || !hvm_buf )
-    {
-        ERROR("Couldn't allocate memory");
-        goto out;
-    }
-
-    memset(to_send, 0xff, BITMAP_SIZE);
-
-    if ( lock_pages(to_send, BITMAP_SIZE) )
-    {
-        ERROR("Unable to lock to_send");
-        return 1;
-    }
-
-    /* (to fix is local only) */
-    if ( lock_pages(to_skip, BITMAP_SIZE) )
-    {
-        ERROR("Unable to lock to_skip");
-        return 1;
-    }
-
-    analysis_phase(xc_handle, dom, pfn_array_size, to_skip, 0);
-
-    /* We want zeroed memory so use calloc rather than malloc. */
-    pfn_batch = calloc(MAX_BATCH_SIZE, sizeof(*pfn_batch));
-    if ( pfn_batch == NULL )
-    {
-        ERROR("failed to alloc memory for pfn_batch array");
-        errno = ENOMEM;
-        goto out;
-    }
-
-    for ( ; ; )
-    {
-        unsigned int prev_pc, sent_this_iter, N, batch;
-
-        iter++;
-        sent_this_iter = 0;
-        skip_this_iter = 0;
-        prev_pc = 0;
-        N=0;
-
-        DPRINTF("Saving memory pages: iter %d   0%%", iter);
-
-        while ( N < pfn_array_size )
-        {
-            unsigned int this_pc = (N * 100) / pfn_array_size;
-            int rc;
-
-            if ( (this_pc - prev_pc) >= 5 )
-            {
-                DPRINTF("\b\b\b\b%3d%%", this_pc);
-                prev_pc = this_pc;
-            }
-
-            if ( !last_iter )
-            {
-                /* Slightly wasteful to peek the whole array evey time,
-                   but this is fast enough for the moment. */
-                rc = xc_shadow_control(
-                    xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK, to_skip, 
-                    pfn_array_size, NULL, 0, NULL);
-                if ( rc != pfn_array_size )
-                {
-                    ERROR("Error peeking shadow bitmap");
-                    goto out;
-                }
-            }
-
-            /* load pfn_batch[] with the mfn of all the pages we're doing in
-               this batch. */
-            for ( batch = 0;
-                  (batch < MAX_BATCH_SIZE) && (N < pfn_array_size);
-                  N++ )
-            {
-                int n = permute(N, pfn_array_size, order_nr);
-
-                if ( 0 && debug )
-                    DPRINTF("%d pfn= %08lx %d \n",
-                            iter, (unsigned long)n, test_bit(n, to_send));
-
-                if ( !last_iter &&
-                     test_bit(n, to_send) &&
-                     test_bit(n, to_skip) )
-                    skip_this_iter++; /* stats keeping */
-
-                if ( !((test_bit(n, to_send) && !test_bit(n, to_skip)) ||
-                       (test_bit(n, to_send) && last_iter)) )
-                    continue;
-
-                /* Skip PFNs that aren't really there */
-                if ( (n >= 0xa0 && n < 0xc0) /* VGA hole */
-                     || (n >= (HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT) &&
-                         n < (1ULL << 32) >> PAGE_SHIFT) /* 4G MMIO hole */ )
-                    continue;
-
-                /*
-                ** we get here if:
-                **  1. page is marked to_send & hasn't already been re-dirtied
-                **  2. (ignore to_skip in last iteration)
-                */
-
-                pfn_batch[batch] = n;
-
-                batch++;
-            }
-
-            if ( batch == 0 )
-                goto skip; /* vanishingly unlikely... */
-
-            region_base = xc_map_foreign_batch(
-                xc_handle, dom, PROT_READ, pfn_batch, batch);
-            if ( region_base == 0 )
-            {
-                ERROR("map batch failed");
-                goto out;
-            }
-
-            /* write num of pfns */
-            if ( !write_exact(io_fd, &batch, sizeof(unsigned int)) )
-            {
-                ERROR("Error when writing to state file (2)");
-                goto out;
-            }
-
-            /* write all the pfns */
-            if ( !write_exact(io_fd, pfn_batch, sizeof(unsigned long)*batch) )
-            {
-                ERROR("Error when writing to state file (3)");
-                goto out;
-            }
-
-            for ( j = 0; j < batch; j++ )
-            {
-                if ( pfn_batch[j] & XEN_DOMCTL_PFINFO_LTAB_MASK )
-                    continue;
-                if ( ratewrite(io_fd, region_base + j*PAGE_SIZE,
-                               PAGE_SIZE) != PAGE_SIZE )
-                {
-                    ERROR("ERROR when writing to state file (4)");
-                    goto out;
-                }
-            }
-
-            sent_this_iter += batch;
-
-            munmap(region_base, batch*PAGE_SIZE);
-
-        } /* end of this while loop for this iteration */
-
-      skip:
-
-        total_sent += sent_this_iter;
-
-        DPRINTF("\r %d: sent %d, skipped %d, ",
-                iter, sent_this_iter, skip_this_iter );
-
-        if ( last_iter )
-        {
-            print_stats( xc_handle, dom, sent_this_iter, &stats, 1);
-            DPRINTF("Total pages sent= %ld (%.2fx)\n",
-                    total_sent, ((float)total_sent)/pfn_array_size );
-        }
-
-        if ( last_iter && debug )
-        {
-            int minusone = -1;
-            memset(to_send, 0xff, BITMAP_SIZE);
-            debug = 0;
-            DPRINTF("Entering debug resend-all mode\n");
-
-            /* send "-1" to put receiver into debug mode */
-            if ( !write_exact(io_fd, &minusone, sizeof(int)) )
-            {
-                ERROR("Error when writing to state file (6)");
-                goto out;
-            }
-
-            continue;
-        }
-
-        if ( last_iter )
-            break;
-
-        if ( live )
-        {
-            if ( ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) ||
-                 (iter >= max_iters) ||
-                 (sent_this_iter+skip_this_iter < 50) ||
-                 (total_sent > pfn_array_size*max_factor) )
-            {
-                DPRINTF("Start last iteration for HVM domain\n");
-                last_iter = 1;
-
-                if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info,
-                                       &ctxt))
-                {
-                    ERROR("Domain appears not to have suspended");
-                    goto out;
-                }
-
-                DPRINTF("SUSPEND eip %08lx edx %08lx\n",
-                        (unsigned long)ctxt.user_regs.eip,
-                        (unsigned long)ctxt.user_regs.edx);
-            }
-
-            if ( xc_shadow_control(xc_handle, dom, 
-                                   XEN_DOMCTL_SHADOW_OP_CLEAN, to_send, 
-                                   pfn_array_size, NULL, 
-                                   0, &stats) != pfn_array_size )
-            {
-                ERROR("Error flushing shadow PT");
-                goto out;
-            }
-
-            /* Pull in the dirty bits from qemu too */
-            if ( !last_iter )
-            {
-                qemu_active = qemu_non_active;
-                qemu_non_active = qemu_active ? 0 : 1;
-                qemu_flip_buffer(dom, qemu_active);
-                for ( j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++ )
-                {
-                    to_send[j] |= qemu_bitmaps[qemu_non_active][j];
-                    qemu_bitmaps[qemu_non_active][j] = 0;
-                }
-            }
-            else
-            {
-                for ( j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++ )
-                    to_send[j] |= qemu_bitmaps[qemu_active][j];
-            }
-
-            sent_last_iter = sent_this_iter;
-
-            print_stats(xc_handle, dom, sent_this_iter, &stats, 1);
-        }
-    } /* end of while 1 */
-
-
-    DPRINTF("All HVM memory is saved\n");
-
-    {
-        struct {
-            int minustwo;
-            int max_vcpu_id;
-            uint64_t vcpumap;
-        } chunk = { -2, info.max_vcpu_id };
-
-        if (info.max_vcpu_id >= 64) {
-            ERROR("Too many VCPUS in guest!");
-            goto out;
-        }
-
-        for (i = 1; i <= info.max_vcpu_id; i++) {
-            xc_vcpuinfo_t vinfo;
-            if ((xc_vcpu_getinfo(xc_handle, dom, i, &vinfo) == 0) &&
-                vinfo.online)
-                vcpumap |= 1ULL << i;
-        }
-
-        chunk.vcpumap = vcpumap;
-        if(!write_exact(io_fd, &chunk, sizeof(chunk))) {
-            ERROR("Error when writing to state file (errno %d)", errno);
-            goto out;
-        }
-    }
-
-    /* Zero terminate */
-    i = 0;
-    if ( !write_exact(io_fd, &i, sizeof(int)) )
-    {
-        ERROR("Error when writing to state file (6)");
-        goto out;
-    }
-
-    /* Save magic-page locations. */
-    memset(magic_pfns, 0, sizeof(magic_pfns));
-    xc_get_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN,
-                     (unsigned long *)&magic_pfns[0]);
-    xc_get_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN,
-                     (unsigned long *)&magic_pfns[1]);
-    xc_get_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN,
-                     (unsigned long *)&magic_pfns[2]);
-    if ( !write_exact(io_fd, magic_pfns, sizeof(magic_pfns)) )
-    {
-        ERROR("Error when writing to state file (7)");
-        goto out;
-    }
-
-    /* save vcpu/vmcs contexts */
-    for ( i = 0; i < nr_vcpus; i++ )
-    {
-        if ( !(vcpumap & (1ULL << i)) )
-            continue;
-
-        if ( xc_vcpu_getcontext(xc_handle, dom, i, &ctxt) )
-        {
-            ERROR("HVM:Could not get vcpu context");
-            goto out;
-        }
-
-        DPRINTF("write vcpu %d context.\n", i); 
-        if ( !write_exact(io_fd, &(ctxt), sizeof(ctxt)) )
-        {
-            ERROR("write vcpu context failed!\n");
-            goto out;
-        }
-    }
-
-    if ( (rec_size = xc_domain_hvm_getcontext(xc_handle, dom, hvm_buf, 
-                                              hvm_buf_size)) == -1 )
-    {
-        ERROR("HVM:Could not get hvm buffer");
-        goto out;
-    }
-
-    if ( !write_exact(io_fd, &rec_size, sizeof(uint32_t)) )
-    {
-        ERROR("error write hvm buffer size");
-        goto out;
-    }
-
-    if ( !write_exact(io_fd, hvm_buf, rec_size) )
-    {
-        ERROR("write HVM info failed!\n");
-        goto out;
-    }
-
-    /* Success! */
-    rc = 0;
-
- out:
-
-    if ( live )
-    {
-        if ( xc_shadow_control(xc_handle, dom, XEN_DOMCTL_SHADOW_OP_OFF,
-                               NULL, 0, NULL, 0, NULL) < 0 )
-            DPRINTF("Warning - couldn't disable shadow mode");
-    }
-
-    free(hvm_buf);
-    free(pfn_batch);
-    free(to_send);
-    free(to_skip);
-
-    return !!rc;
-}
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/xc_linux_save.c
--- a/tools/libxc/xc_linux_save.c       Thu Apr 12 16:37:32 2007 -0500
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,1414 +0,0 @@
-/******************************************************************************
- * xc_linux_save.c
- *
- * Save the state of a running Linux session.
- *
- * Copyright (c) 2003, K A Fraser.
- */
-
-#include <inttypes.h>
-#include <time.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/time.h>
-
-#include "xc_private.h"
-#include "xc_dom.h"
-#include "xg_private.h"
-#include "xg_save_restore.h"
-
-/*
-** Default values for important tuning parameters. Can override by passing
-** non-zero replacement values to xc_linux_save().
-**
-** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too.
-**
-*/
-#define DEF_MAX_ITERS   29   /* limit us to 30 times round loop   */
-#define DEF_MAX_FACTOR   3   /* never send more than 3x p2m_size  */
-
-/* max mfn of the whole machine */
-static unsigned long max_mfn;
-
-/* virtual starting address of the hypervisor */
-static unsigned long hvirt_start;
-
-/* #levels of page tables used by the current guest */
-static unsigned int pt_levels;
-
-/* number of pfns this guest has (i.e. number of entries in the P2M) */
-static unsigned long p2m_size;
-
-/* Live mapping of the table mapping each PFN to its current MFN. */
-static xen_pfn_t *live_p2m = NULL;
-
-/* Live mapping of system MFN to PFN table. */
-static xen_pfn_t *live_m2p = NULL;
-static unsigned long m2p_mfn0;
-
-/* grep fodder: machine_to_phys */
-
-#define mfn_to_pfn(_mfn) live_m2p[(_mfn)]
-
-/*
- * Returns TRUE if the given machine frame number has a unique mapping
- * in the guest's pseudophysical map.
- */
-#define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn)          \
-    (((_mfn) < (max_mfn)) &&                    \
-     ((mfn_to_pfn(_mfn) < (p2m_size)) &&        \
-      (live_p2m[mfn_to_pfn(_mfn)] == (_mfn))))
-
-/* Returns TRUE if MFN is successfully converted to a PFN. */
-#define translate_mfn_to_pfn(_pmfn)                             \
-({                                                              \
-    unsigned long mfn = *(_pmfn);                               \
-    int _res = 1;                                               \
-    if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )                       \
-        _res = 0;                                               \
-    else                                                        \
-        *(_pmfn) = mfn_to_pfn(mfn);                             \
-    _res;                                                       \
-})
-
-/*
-** During (live) save/migrate, we maintain a number of bitmaps to track
-** which pages we have to send, to fixup, and to skip.
-*/
-
-#define BITS_PER_LONG (sizeof(unsigned long) * 8)
-#define BITMAP_SIZE   ((p2m_size + BITS_PER_LONG - 1) / 8)
-
-#define BITMAP_ENTRY(_nr,_bmap) \
-   ((volatile unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG]
-
-#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG)
-
-static inline int test_bit (int nr, volatile void * addr)
-{
-    return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1;
-}
-
-static inline void clear_bit (int nr, volatile void * addr)
-{
-    BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr));
-}
-
-static inline void set_bit ( int nr, volatile void * addr)
-{
-    BITMAP_ENTRY(nr, addr) |= (1UL << BITMAP_SHIFT(nr));
-}
-
-/* Returns the hamming weight (i.e. the number of bits set) in a N-bit word */
-static inline unsigned int hweight32(unsigned int w)
-{
-    unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
-    res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
-    res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
-    res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
-    return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
-}
-
-static inline int count_bits ( int nr, volatile void *addr)
-{
-    int i, count = 0;
-    volatile unsigned long *p = (volatile unsigned long *)addr;
-    /* We know that the array is padded to unsigned long. */
-    for ( i = 0; i < (nr / (sizeof(unsigned long)*8)); i++, p++ )
-        count += hweight32(*p);
-    return count;
-}
-
-static inline int permute( int i, int nr, int order_nr  )
-{
-    /* Need a simple permutation function so that we scan pages in a
-       pseudo random order, enabling us to get a better estimate of
-       the domain's page dirtying rate as we go (there are often
-       contiguous ranges of pfns that have similar behaviour, and we
-       want to mix them up. */
-
-    /* e.g. nr->oder 15->4 16->4 17->5 */
-    /* 512MB domain, 128k pages, order 17 */
-
-    /*
-      QPONMLKJIHGFEDCBA
-             QPONMLKJIH
-      GFEDCBA
-     */
-
-    /*
-      QPONMLKJIHGFEDCBA
-                  EDCBA
-             QPONM
-      LKJIHGF
-      */
-
-    do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); }
-    while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */
-
-    return i;
-}
-
-static uint64_t tv_to_us(struct timeval *new)
-{
-    return (new->tv_sec * 1000000) + new->tv_usec;
-}
-
-static uint64_t llgettimeofday(void)
-{
-    struct timeval now;
-    gettimeofday(&now, NULL);
-    return tv_to_us(&now);
-}
-
-static uint64_t tv_delta(struct timeval *new, struct timeval *old)
-{
-    return (((new->tv_sec - old->tv_sec)*1000000) +
-            (new->tv_usec - old->tv_usec));
-}
-
-static int noncached_write(int fd, int live, void *buffer, int len) 
-{
-    static int write_count = 0;
-
-    int rc = write(fd,buffer,len);
-
-    write_count += len;
-    if ( write_count >= (MAX_PAGECACHE_USAGE * PAGE_SIZE) )
-    {
-        /* Time to discard cache - dont care if this fails */
-        discard_file_cache(fd, 0 /* no flush */);
-        write_count = 0;
-    }
-
-    return rc;
-}
-
-#ifdef ADAPTIVE_SAVE
-
-/*
-** We control the rate at which we transmit (or save) to minimize impact
-** on running domains (including the target if we're doing live migrate).
-*/
-
-#define MAX_MBIT_RATE    500      /* maximum transmit rate for migrate */
-#define START_MBIT_RATE  100      /* initial transmit rate for migrate */
-
-/* Scaling factor to convert between a rate (in Mb/s) and time (in usecs) */
-#define RATE_TO_BTU      781250
-
-/* Amount in bytes we allow ourselves to send in a burst */
-#define BURST_BUDGET (100*1024)
-
-/* We keep track of the current and previous transmission rate */
-static int mbit_rate, ombit_rate = 0;
-
-/* Have we reached the maximum transmission rate? */
-#define RATE_IS_MAX() (mbit_rate == MAX_MBIT_RATE)
-
-static inline void initialize_mbit_rate()
-{
-    mbit_rate = START_MBIT_RATE;
-}
-
-static int ratewrite(int io_fd, int live, void *buf, int n)
-{
-    static int budget = 0;
-    static int burst_time_us = -1;
-    static struct timeval last_put = { 0 };
-    struct timeval now;
-    struct timespec delay;
-    long long delta;
-
-    if ( START_MBIT_RATE == 0 )
-        return noncached_write(io_fd, live, buf, n);
-
-    budget -= n;
-    if ( budget < 0 )
-    {
-        if ( mbit_rate != ombit_rate )
-        {
-            burst_time_us = RATE_TO_BTU / mbit_rate;
-            ombit_rate = mbit_rate;
-            DPRINTF("rate limit: %d mbit/s burst budget %d slot time %d\n",
-                    mbit_rate, BURST_BUDGET, burst_time_us);
-        }
-        if ( last_put.tv_sec == 0 )
-        {
-            budget += BURST_BUDGET;
-            gettimeofday(&last_put, NULL);
-        }
-        else
-        {
-            while ( budget < 0 )
-            {
-                gettimeofday(&now, NULL);
-                delta = tv_delta(&now, &last_put);
-                while ( delta > burst_time_us )
-                {
-                    budget += BURST_BUDGET;
-                    last_put.tv_usec += burst_time_us;
-                    if ( last_put.tv_usec > 1000000 
-                    {
-                        last_put.tv_usec -= 1000000;
-                        last_put.tv_sec++;
-                    }
-                    delta -= burst_time_us;
-                }
-                if ( budget > 0 )
-                    break;
-                delay.tv_sec = 0;
-                delay.tv_nsec = 1000 * (burst_time_us - delta);
-                while ( delay.tv_nsec > 0 )
-                    if ( nanosleep(&delay, &delay) == 0 )
-                        break;
-            }
-        }
-    }
-    return noncached_write(io_fd, live, buf, n);
-}
-
-#else /* ! ADAPTIVE SAVE */
-
-#define RATE_IS_MAX() (0)
-#define ratewrite(_io_fd, _live, _buf, _n) noncached_write((_io_fd), (_live), 
(_buf), (_n))
-#define initialize_mbit_rate()
-
-#endif
-
-static inline ssize_t write_exact(int fd, void *buf, size_t count)
-{
-    return (write(fd, buf, count) == count);
-}
-
-static int print_stats(int xc_handle, uint32_t domid, int pages_sent,
-                       xc_shadow_op_stats_t *stats, int print)
-{
-    static struct timeval wall_last;
-    static long long      d0_cpu_last;
-    static long long      d1_cpu_last;
-
-    struct timeval        wall_now;
-    long long             wall_delta;
-    long long             d0_cpu_now, d0_cpu_delta;
-    long long             d1_cpu_now, d1_cpu_delta;
-
-    gettimeofday(&wall_now, NULL);
-
-    d0_cpu_now = xc_domain_get_cpu_usage(xc_handle, 0, /* FIXME */ 0)/1000;
-    d1_cpu_now = xc_domain_get_cpu_usage(xc_handle, domid, /* FIXME */ 0)/1000;
-
-    if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) )
-        DPRINTF("ARRHHH!!\n");
-
-    wall_delta = tv_delta(&wall_now,&wall_last)/1000;
-    if ( wall_delta == 0 )
-        wall_delta = 1;
-
-    d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000;
-    d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000;
-
-    if ( print )
-        DPRINTF("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, "
-                "dirtied %dMb/s %" PRId32 " pages\n",
-                wall_delta,
-                (int)((d0_cpu_delta*100)/wall_delta),
-                (int)((d1_cpu_delta*100)/wall_delta),
-                (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))),
-                (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))),
-                stats->dirty_count);
-
-#ifdef ADAPTIVE_SAVE
-    if ( ((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))) > mbit_rate )
-    {
-        mbit_rate = (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8)))
-            + 50;
-        if ( mbit_rate > MAX_MBIT_RATE )
-            mbit_rate = MAX_MBIT_RATE;
-    }
-#endif
-
-    d0_cpu_last = d0_cpu_now;
-    d1_cpu_last = d1_cpu_now;
-    wall_last   = wall_now;
-
-    return 0;
-}
-
-
-static int analysis_phase(int xc_handle, uint32_t domid, int p2m_size,
-                          unsigned long *arr, int runs)
-{
-    long long start, now;
-    xc_shadow_op_stats_t stats;
-    int j;
-
-    start = llgettimeofday();
-
-    for ( j = 0; j < runs; j++ )
-    {
-        int i;
-
-        xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
-                          arr, p2m_size, NULL, 0, NULL);
-        DPRINTF("#Flush\n");
-        for ( i = 0; i < 40; i++ )
-        {
-            usleep(50000);
-            now = llgettimeofday();
-            xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_PEEK,
-                              NULL, 0, NULL, 0, &stats);
-            DPRINTF("now= %lld faults= %"PRId32" dirty= %"PRId32"\n",
-                    ((now-start)+500)/1000,
-                    stats.fault_count, stats.dirty_count);
-        }
-    }
-
-    return -1;
-}
-
-
-static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
-                             int dom, xc_dominfo_t *info,
-                             vcpu_guest_context_t *ctxt)
-{
-    int i = 0;
-
-    if ( !(*suspend)(dom) )
-    {
-        ERROR("Suspend request failed");
-        return -1;
-    }
-
- retry:
-
-    if ( xc_domain_getinfo(xc_handle, dom, 1, info) != 1 )
-    {
-        ERROR("Could not get domain info");
-        return -1;
-    }
-
-    if ( xc_vcpu_getcontext(xc_handle, dom, 0, ctxt) )
-        ERROR("Could not get vcpu context");
-
-
-    if ( info->dying )
-    {
-        ERROR("domain is dying");
-        return -1;
-    }
-
-    if ( info->crashed )
-    {
-        ERROR("domain has crashed");
-        return -1;
-    }
-
-    if ( info->shutdown )
-    {
-        switch ( info->shutdown_reason )
-        {
-        case SHUTDOWN_poweroff:
-        case SHUTDOWN_reboot:
-            ERROR("domain has shut down");
-            return -1;
-        case SHUTDOWN_suspend:
-            return 0;
-        case SHUTDOWN_crash:
-            ERROR("domain has crashed");
-            return -1;
-        }
-    }
-
-    if ( info->paused )
-    {
-        /* Try unpausing domain, wait, and retest. */
-        xc_domain_unpause( xc_handle, dom );
-        ERROR("Domain was paused. Wait and re-test.");
-        usleep(10000); /* 10ms */
-        goto retry;
-    }
-
-    if ( ++i < 100 )
-    {
-        ERROR("Retry suspend domain");
-        usleep(10000); /* 10ms */
-        goto retry;
-    }
-
-    ERROR("Unable to suspend domain.");
-
-    return -1;
-}
-
-/*
-** Map the top-level page of MFNs from the guest. The guest might not have
-** finished resuming from a previous restore operation, so we wait a while for
-** it to update the MFN to a reasonable value.
-*/
-static void *map_frame_list_list(int xc_handle, uint32_t dom,
-                                 shared_info_t *shinfo)
-{
-    int count = 100;
-    void *p;
-
-    while ( count-- && (shinfo->arch.pfn_to_mfn_frame_list_list == 0) )
-        usleep(10000);
-
-    if ( shinfo->arch.pfn_to_mfn_frame_list_list == 0 )
-    {
-        ERROR("Timed out waiting for frame list updated.");
-        return NULL;
-    }
-
-    p = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ,
-                             shinfo->arch.pfn_to_mfn_frame_list_list);
-    if ( p == NULL )
-        ERROR("Couldn't map p2m_frame_list_list (errno %d)", errno);
-
-    return p;
-}
-
-/*
-** During transfer (or in the state file), all page-table pages must be
-** converted into a 'canonical' form where references to actual mfns
-** are replaced with references to the corresponding pfns.
-**
-** This function performs the appropriate conversion, taking into account
-** which entries do not require canonicalization (in particular, those
-** entries which map the virtual address reserved for the hypervisor).
-*/
-static int canonicalize_pagetable(unsigned long type, unsigned long pfn,
-                           const void *spage, void *dpage)
-{
-
-    int i, pte_last, xen_start, xen_end, race = 0; 
-    uint64_t pte;
-
-    /*
-    ** We need to determine which entries in this page table hold
-    ** reserved hypervisor mappings. This depends on the current
-    ** page table type as well as the number of paging levels.
-    */
-    xen_start = xen_end = pte_last = PAGE_SIZE / ((pt_levels == 2) ? 4 : 8);
-
-    if ( (pt_levels == 2) && (type == XEN_DOMCTL_PFINFO_L2TAB) )
-        xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT);
-
-    if ( (pt_levels == 3) && (type == XEN_DOMCTL_PFINFO_L3TAB) )
-        xen_start = L3_PAGETABLE_ENTRIES_PAE;
-
-    /*
-    ** in PAE only the L2 mapping the top 1GB contains Xen mappings.
-    ** We can spot this by looking for the guest linear mapping which
-    ** Xen always ensures is present in that L2. Guests must ensure
-    ** that this check will fail for other L2s.
-    */
-    if ( (pt_levels == 3) && (type == XEN_DOMCTL_PFINFO_L2TAB) )
-    {
-        int hstart;
-        uint64_t he;
-
-        hstart = (hvirt_start >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff;
-        he = ((const uint64_t *) spage)[hstart];
-
-        if ( ((he >> PAGE_SHIFT) & MFN_MASK_X86) == m2p_mfn0 )
-        {
-            /* hvirt starts with xen stuff... */
-            xen_start = hstart;
-        }
-        else if ( hvirt_start != 0xf5800000 )
-        {
-            /* old L2s from before hole was shrunk... */
-            hstart = (0xf5800000 >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff;
-            he = ((const uint64_t *) spage)[hstart];
-            if ( ((he >> PAGE_SHIFT) & MFN_MASK_X86) == m2p_mfn0 )
-                xen_start = hstart;
-        }
-    }
-
-    if ( (pt_levels == 4) && (type == XEN_DOMCTL_PFINFO_L4TAB) )
-    {
-        /*
-        ** XXX SMH: should compute these from hvirt_start (which we have)
-        ** and hvirt_end (which we don't)
-        */
-        xen_start = 256;
-        xen_end   = 272;
-    }
-
-    /* Now iterate through the page table, canonicalizing each PTE */
-    for (i = 0; i < pte_last; i++ )
-    {
-        unsigned long pfn, mfn;
-
-        if ( pt_levels == 2 )
-            pte = ((const uint32_t*)spage)[i];
-        else
-            pte = ((const uint64_t*)spage)[i];
-
-        if ( (i >= xen_start) && (i < xen_end) )
-            pte = 0;
-
-        if ( pte & _PAGE_PRESENT )
-        {
-            mfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86;
-            if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
-            {
-                /* This will happen if the type info is stale which
-                   is quite feasible under live migration */
-                pfn  = 0;  /* zap it - we'll retransmit this page later */
-                race = 1;  /* inform the caller of race; fatal if !live */ 
-            }
-            else
-                pfn = mfn_to_pfn(mfn);
-
-            pte &= ~MADDR_MASK_X86;
-            pte |= (uint64_t)pfn << PAGE_SHIFT;
-
-            /*
-             * PAE guest L3Es can contain these flags when running on
-             * a 64bit hypervisor. We zap these here to avoid any
-             * surprise at restore time...
-             */
-            if ( (pt_levels == 3) &&
-                 (type == XEN_DOMCTL_PFINFO_L3TAB) &&
-                 (pte & (_PAGE_USER|_PAGE_RW|_PAGE_ACCESSED)) )
-                pte &= ~(_PAGE_USER|_PAGE_RW|_PAGE_ACCESSED);
-        }
-
-        if ( pt_levels == 2 )
-            ((uint32_t*)dpage)[i] = pte;
-        else
-            ((uint64_t*)dpage)[i] = pte;
-    }
-
-    return race;
-}
-
-static xen_pfn_t *xc_map_m2p(int xc_handle,
-                                 unsigned long max_mfn,
-                                 int prot)
-{
-    struct xen_machphys_mfn_list xmml;
-    privcmd_mmap_entry_t *entries;
-    unsigned long m2p_chunks, m2p_size;
-    xen_pfn_t *m2p;
-    xen_pfn_t *extent_start;
-    int i, rc;
-
-    m2p_size   = M2P_SIZE(max_mfn);
-    m2p_chunks = M2P_CHUNKS(max_mfn);
-
-    xmml.max_extents = m2p_chunks;
-    if ( !(extent_start = malloc(m2p_chunks * sizeof(xen_pfn_t))) )
-    {
-        ERROR("failed to allocate space for m2p mfns");
-        return NULL;
-    }
-    set_xen_guest_handle(xmml.extent_start, extent_start);
-
-    if ( xc_memory_op(xc_handle, XENMEM_machphys_mfn_list, &xmml) ||
-         (xmml.nr_extents != m2p_chunks) )
-    {
-        ERROR("xc_get_m2p_mfns");
-        return NULL;
-    }
-
-    if ( (m2p = mmap(NULL, m2p_size, prot,
-                     MAP_SHARED, xc_handle, 0)) == MAP_FAILED )
-    {
-        ERROR("failed to mmap m2p");
-        return NULL;
-    }
-
-    if ( !(entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t))) )
-    {
-        ERROR("failed to allocate space for mmap entries");
-        return NULL;
-    }
-
-    for ( i = 0; i < m2p_chunks; i++ )
-    {
-        entries[i].va = (unsigned long)(((void *)m2p) + (i * M2P_CHUNK_SIZE));
-        entries[i].mfn = extent_start[i];
-        entries[i].npages = M2P_CHUNK_SIZE >> PAGE_SHIFT;
-    }
-
-    if ( (rc = xc_map_foreign_ranges(xc_handle, DOMID_XEN,
-                                     entries, m2p_chunks)) < 0 )
-    {
-        ERROR("xc_mmap_foreign_ranges failed (rc = %d)", rc);
-        return NULL;
-    }
-
-    m2p_mfn0 = entries[0].mfn;
-
-    free(extent_start);
-    free(entries);
-
-    return m2p;
-}
-
-int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
-                  uint32_t max_factor, uint32_t flags, int (*suspend)(int))
-{
-    xc_dominfo_t info;
-
-    int rc = 1, i, j, last_iter, iter = 0;
-    int live  = (flags & XCFLAGS_LIVE);
-    int debug = (flags & XCFLAGS_DEBUG);
-    int race = 0, sent_last_iter, skip_this_iter;
-
-    /* The new domain's shared-info frame number. */
-    unsigned long shared_info_frame;
-
-    /* A copy of the CPU context of the guest. */
-    vcpu_guest_context_t ctxt;
-
-    /* A table containg the type of each PFN (/not/ MFN!). */
-    unsigned long *pfn_type = NULL;
-    unsigned long *pfn_batch = NULL;
-
-    /* A temporary mapping, and a copy, of one frame of guest memory. */
-    char page[PAGE_SIZE];
-
-    /* Double and single indirect references to the live P2M table */
-    xen_pfn_t *live_p2m_frame_list_list = NULL;
-    xen_pfn_t *live_p2m_frame_list = NULL;
-
-    /* A copy of the pfn-to-mfn table frame list. */
-    xen_pfn_t *p2m_frame_list = NULL;
-
-    /* Live mapping of shared info structure */
-    shared_info_t *live_shinfo = NULL;
-
-    /* base of the region in which domain memory is mapped */
-    unsigned char *region_base = NULL;
-
-    /* power of 2 order of p2m_size */
-    int order_nr;
-
-    /* bitmap of pages:
-       - that should be sent this iteration (unless later marked as skip);
-       - to skip this iteration because already dirty;
-       - to fixup by sending at the end if not already resent; */
-    unsigned long *to_send = NULL, *to_skip = NULL, *to_fix = NULL;
-
-    xc_shadow_op_stats_t stats;
-
-    unsigned long needed_to_fix = 0;
-    unsigned long total_sent    = 0;
-
-    uint64_t vcpumap = 1ULL;
-
-    /* If no explicit control parameters given, use defaults */
-    max_iters  = max_iters  ? : DEF_MAX_ITERS;
-    max_factor = max_factor ? : DEF_MAX_FACTOR;
-
-    initialize_mbit_rate();
-
-    if ( !get_platform_info(xc_handle, dom,
-                            &max_mfn, &hvirt_start, &pt_levels) )
-    {
-        ERROR("Unable to get platform info.");
-        return 1;
-    }
-
-    if ( xc_domain_getinfo(xc_handle, dom, 1, &info) != 1 )
-    {
-        ERROR("Could not get domain info");
-        return 1;
-    }
-
-    if ( xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt) )
-    {
-        ERROR("Could not get vcpu context");
-        goto out;
-    }
-    shared_info_frame = info.shared_info_frame;
-
-    /* Map the shared info frame */
-    if ( !(live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                              PROT_READ, shared_info_frame)) )
-    {
-        ERROR("Couldn't map live_shinfo");
-        goto out;
-    }
-
-    p2m_size = live_shinfo->arch.max_pfn;
-
-    live_p2m_frame_list_list = map_frame_list_list(xc_handle, dom,
-                                                   live_shinfo);
-    if ( !live_p2m_frame_list_list )
-        goto out;
-
-    live_p2m_frame_list =
-        xc_map_foreign_batch(xc_handle, dom, PROT_READ,
-                             live_p2m_frame_list_list,
-                             P2M_FLL_ENTRIES);
-    if ( !live_p2m_frame_list )
-    {
-        ERROR("Couldn't map p2m_frame_list");
-        goto out;
-    }
-
-    /* Map all the frames of the pfn->mfn table. For migrate to succeed,
-       the guest must not change which frames are used for this purpose.
-       (its not clear why it would want to change them, and we'll be OK
-       from a safety POV anyhow. */
-
-    live_p2m = xc_map_foreign_batch(xc_handle, dom, PROT_READ,
-                                    live_p2m_frame_list,
-                                    P2M_FL_ENTRIES);
-    if ( !live_p2m )
-    {
-        ERROR("Couldn't map p2m table");
-        goto out;
-    }
-
-    /* Setup the mfn_to_pfn table mapping */
-    if ( !(live_m2p = xc_map_m2p(xc_handle, max_mfn, PROT_READ)) )
-    {
-        ERROR("Failed to map live M2P table");
-        goto out;
-    }
-
-
-    /* Get a local copy of the live_P2M_frame_list */
-    if ( !(p2m_frame_list = malloc(P2M_FL_SIZE)) )
-    {
-        ERROR("Couldn't allocate p2m_frame_list array");
-        goto out;
-    }
-    memcpy(p2m_frame_list, live_p2m_frame_list, P2M_FL_SIZE);
-
-    /* Canonicalise the pfn-to-mfn table frame-number list. */
-    for ( i = 0; i < p2m_size; i += fpp )
-    {
-        if ( !translate_mfn_to_pfn(&p2m_frame_list[i/fpp]) )
-        {
-            ERROR("Frame# in pfn-to-mfn frame list is not in pseudophys");
-            ERROR("entry %d: p2m_frame_list[%ld] is 0x%"PRIx64, i, i/fpp,
-                  (uint64_t)p2m_frame_list[i/fpp]);
-            goto out;
-        }
-    }
-
-    /* Domain is still running at this point */
-    if ( live )
-    {
-        /* Live suspend. Enable log-dirty mode. */
-        if ( xc_shadow_control(xc_handle, dom,
-                               XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
-                               NULL, 0, NULL, 0, NULL) < 0 )
-        {
-            ERROR("Couldn't enable shadow mode");
-            goto out;
-        }
-    }
-    else
-    {
-        /* This is a non-live suspend. Suspend the domain .*/
-        if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt) )
-        {
-            ERROR("Domain appears not to have suspended");
-            goto out;
-        }
-    }
-
-    last_iter = !live;
-
-    /* pretend we sent all the pages last iteration */
-    sent_last_iter = p2m_size;
-
-    /* calculate the power of 2 order of p2m_size, e.g.
-       15->4 16->4 17->5 */
-    for ( i = p2m_size-1, order_nr = 0; i ; i >>= 1, order_nr++ )
-        continue;
-
-    /* Setup to_send / to_fix and to_skip bitmaps */
-    to_send = malloc(BITMAP_SIZE);
-    to_fix  = calloc(1, BITMAP_SIZE);
-    to_skip = malloc(BITMAP_SIZE);
-
-    if ( !to_send || !to_fix || !to_skip )
-    {
-        ERROR("Couldn't allocate to_send array");
-        goto out;
-    }
-
-    memset(to_send, 0xff, BITMAP_SIZE);
-
-    if ( lock_pages(to_send, BITMAP_SIZE) )
-    {
-        ERROR("Unable to lock to_send");
-        return 1;
-    }
-
-    /* (to fix is local only) */
-    if ( lock_pages(to_skip, BITMAP_SIZE) )
-    {
-        ERROR("Unable to lock to_skip");
-        return 1;
-    }
-
-    analysis_phase(xc_handle, dom, p2m_size, to_skip, 0);
-
-    /* We want zeroed memory so use calloc rather than malloc. */
-    pfn_type   = calloc(MAX_BATCH_SIZE, sizeof(*pfn_type));
-    pfn_batch  = calloc(MAX_BATCH_SIZE, sizeof(*pfn_batch));
-    if ( (pfn_type == NULL) || (pfn_batch == NULL) )
-    {
-        ERROR("failed to alloc memory for pfn_type and/or pfn_batch arrays");
-        errno = ENOMEM;
-        goto out;
-    }
-
-    if ( lock_pages(pfn_type, MAX_BATCH_SIZE * sizeof(*pfn_type)) )
-    {
-        ERROR("Unable to lock");
-        goto out;
-    }
-
-    /*
-     * Quick belt and braces sanity check.
-     */
-    {
-        int err=0;
-        unsigned long mfn;
-        for ( i = 0; i < p2m_size; i++ )
-        {
-            mfn = live_p2m[i];
-            if( (mfn != INVALID_P2M_ENTRY) && (mfn_to_pfn(mfn) != i) )
-            {
-                DPRINTF("i=0x%x mfn=%lx live_m2p=%lx\n", i,
-                        mfn, mfn_to_pfn(mfn));
-                err++;
-            }
-        }
-        DPRINTF("Had %d unexplained entries in p2m table\n", err);
-    }
-
-    /* Start writing out the saved-domain record. */
-    if ( !write_exact(io_fd, &p2m_size, sizeof(unsigned long)) )
-    {
-        ERROR("write: p2m_size");
-        goto out;
-    }
-
-    /*
-     * Write an extended-info structure to inform the restore code that
-     * a PAE guest understands extended CR3 (PDPTs above 4GB). Turns off
-     * slow paths in the restore code.
-     */
-    if ( (pt_levels == 3) &&
-         (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3)) )
-    {
-        unsigned long signature = ~0UL;
-        uint32_t tot_sz   = sizeof(struct vcpu_guest_context) + 8;
-        uint32_t chunk_sz = sizeof(struct vcpu_guest_context);
-        char chunk_sig[]  = "vcpu";
-        if ( !write_exact(io_fd, &signature, sizeof(signature)) ||
-             !write_exact(io_fd, &tot_sz,    sizeof(tot_sz)) ||
-             !write_exact(io_fd, &chunk_sig, 4) ||
-             !write_exact(io_fd, &chunk_sz,  sizeof(chunk_sz)) ||
-             !write_exact(io_fd, &ctxt,      sizeof(ctxt)) )
-        {
-            ERROR("write: extended info");
-            goto out;
-        }
-    }
-
-    if ( !write_exact(io_fd, p2m_frame_list, P2M_FL_SIZE) )
-    {
-        ERROR("write: p2m_frame_list");
-        goto out;
-    }
-
-    print_stats(xc_handle, dom, 0, &stats, 0);
-
-    /* Now write out each data page, canonicalising page tables as we go... */
-    for ( ; ; )
-    {
-        unsigned int prev_pc, sent_this_iter, N, batch;
-
-        iter++;
-        sent_this_iter = 0;
-        skip_this_iter = 0;
-        prev_pc = 0;
-        N = 0;
-
-        DPRINTF("Saving memory pages: iter %d   0%%", iter);
-
-        while ( N < p2m_size )
-        {
-            unsigned int this_pc = (N * 100) / p2m_size;
-            int rc;
-
-            if ( (this_pc - prev_pc) >= 5 )
-            {
-                DPRINTF("\b\b\b\b%3d%%", this_pc);
-                prev_pc = this_pc;
-            }
-
-            if ( !last_iter )
-            {
-                /* Slightly wasteful to peek the whole array evey time,
-                   but this is fast enough for the moment. */
-                rc = xc_shadow_control(
-                    xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK, to_skip, 
-                    p2m_size, NULL, 0, NULL);
-                if ( rc != p2m_size )
-                {
-                    ERROR("Error peeking shadow bitmap");
-                    goto out;
-                }
-            }
-
-            /* load pfn_type[] with the mfn of all the pages we're doing in
-               this batch. */
-            for  ( batch = 0;
-                   (batch < MAX_BATCH_SIZE) && (N < p2m_size);
-                   N++ )
-            {
-                int n = permute(N, p2m_size, order_nr);
-
-                if ( debug )
-                    DPRINTF("%d pfn= %08lx mfn= %08lx %d  [mfn]= %08lx\n",
-                            iter, (unsigned long)n, live_p2m[n],
-                            test_bit(n, to_send),
-                            mfn_to_pfn(live_p2m[n]&0xFFFFF));
-
-                if ( !last_iter &&
-                     test_bit(n, to_send) &&
-                     test_bit(n, to_skip) )
-                    skip_this_iter++; /* stats keeping */
-
-                if ( !((test_bit(n, to_send) && !test_bit(n, to_skip)) ||
-                       (test_bit(n, to_send) && last_iter) ||
-                       (test_bit(n, to_fix)  && last_iter)) )
-                    continue;
-
-                /*
-                ** we get here if:
-                **  1. page is marked to_send & hasn't already been re-dirtied
-                **  2. (ignore to_skip in last iteration)
-                **  3. add in pages that still need fixup (net bufs)
-                */
-
-                pfn_batch[batch] = n;
-                pfn_type[batch]  = live_p2m[n];
-
-                if ( !is_mapped(pfn_type[batch]) )
-                {
-                    /*
-                    ** not currently in psuedo-physical map -- set bit
-                    ** in to_fix since we must send this page in last_iter
-                    ** unless its sent sooner anyhow, or it never enters
-                    ** pseudo-physical map (e.g. for ballooned down domains)
-                    */
-                    set_bit(n, to_fix);
-                    continue;
-                }
-
-                if ( last_iter &&
-                     test_bit(n, to_fix) &&
-                     !test_bit(n, to_send) )
-                {
-                    needed_to_fix++;
-                    DPRINTF("Fix! iter %d, pfn %x. mfn %lx\n",
-                            iter, n, pfn_type[batch]);
-                }
-
-                clear_bit(n, to_fix);
-
-                batch++;
-            }
-
-            if ( batch == 0 )
-                goto skip; /* vanishingly unlikely... */
-
-            region_base = xc_map_foreign_batch(
-                xc_handle, dom, PROT_READ, pfn_type, batch);
-            if ( region_base == NULL )
-            {
-                ERROR("map batch failed");
-                goto out;
-            }
-
-            for ( j = 0; j < batch; j++ )
-                ((uint32_t *)pfn_type)[j] = pfn_type[j];
-            if ( xc_get_pfn_type_batch(xc_handle, dom, batch,
-                                       (uint32_t *)pfn_type) )
-            {
-                ERROR("get_pfn_type_batch failed");
-                goto out;
-            }
-            for ( j = batch-1; j >= 0; j-- )
-                pfn_type[j] = ((uint32_t *)pfn_type)[j];
-
-            for ( j = 0; j < batch; j++ )
-            {
-
-                if ( (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) ==
-                     XEN_DOMCTL_PFINFO_XTAB )
-                {
-                    DPRINTF("type fail: page %i mfn %08lx\n", j, pfn_type[j]);
-                    continue;
-                }
-
-                if ( debug )
-                    DPRINTF("%d pfn= %08lx mfn= %08lx [mfn]= %08lx"
-                            " sum= %08lx\n",
-                            iter,
-                            (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) |
-                            pfn_batch[j],
-                            pfn_type[j],
-                            mfn_to_pfn(pfn_type[j] &
-                                       ~XEN_DOMCTL_PFINFO_LTAB_MASK),
-                            csum_page(region_base + (PAGE_SIZE*j)));
-
-                /* canonicalise mfn->pfn */
-                pfn_type[j] = (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) |
-                    pfn_batch[j];
-            }
-
-            if ( !write_exact(io_fd, &batch, sizeof(unsigned int)) )
-            {
-                ERROR("Error when writing to state file (2) (errno %d)",
-                      errno);
-                goto out;
-            }
-
-            if ( !write_exact(io_fd, pfn_type, sizeof(unsigned long)*j) )
-            {
-                ERROR("Error when writing to state file (3) (errno %d)",
-                      errno);
-                goto out;
-            }
-
-            /* entering this loop, pfn_type is now in pfns (Not mfns) */
-            for ( j = 0; j < batch; j++ )
-            {
-                unsigned long pfn, pagetype;
-                void *spage = (char *)region_base + (PAGE_SIZE*j);
-
-                pfn      = pfn_type[j] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
-                pagetype = pfn_type[j] &  XEN_DOMCTL_PFINFO_LTAB_MASK;
-
-                /* write out pages in batch */
-                if ( pagetype == XEN_DOMCTL_PFINFO_XTAB )
-                    continue;
-
-                pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
-
-                if ( (pagetype >= XEN_DOMCTL_PFINFO_L1TAB) &&
-                     (pagetype <= XEN_DOMCTL_PFINFO_L4TAB) )
-                {
-                    /* We have a pagetable page: need to rewrite it. */
-                    race = 
-                        canonicalize_pagetable(pagetype, pfn, spage, page); 
-
-                    if ( race && !live )
-                    {
-                        ERROR("Fatal PT race (pfn %lx, type %08lx)", pfn,
-                              pagetype);
-                        goto out;
-                    }
-
-                    if ( ratewrite(io_fd, live, page, PAGE_SIZE) != PAGE_SIZE )
-                    {
-                        ERROR("Error when writing to state file (4)"
-                              " (errno %d)", errno);
-                        goto out;
-                    }
-                }
-                else
-                {
-                    /* We have a normal page: just write it directly. */
-                    if ( ratewrite(io_fd, live, spage, PAGE_SIZE) !=
-                         PAGE_SIZE )
-                    {
-                        ERROR("Error when writing to state file (5)"
-                              " (errno %d)", errno);
-                        goto out;
-                    }
-                }
-            } /* end of the write out for this batch */
-
-            sent_this_iter += batch;
-
-            munmap(region_base, batch*PAGE_SIZE);
-
-        } /* end of this while loop for this iteration */
-
-      skip:
-
-        total_sent += sent_this_iter;
-
-        DPRINTF("\r %d: sent %d, skipped %d, ",
-                iter, sent_this_iter, skip_this_iter );
-
-        if ( last_iter )
-        {
-            print_stats( xc_handle, dom, sent_this_iter, &stats, 1);
-
-            DPRINTF("Total pages sent= %ld (%.2fx)\n",
-                    total_sent, ((float)total_sent)/p2m_size );
-            DPRINTF("(of which %ld were fixups)\n", needed_to_fix  );
-        }
-
-        if ( last_iter && debug )
-        {
-            int minusone = -1;
-            memset(to_send, 0xff, BITMAP_SIZE);
-            debug = 0;
-            DPRINTF("Entering debug resend-all mode\n");
-
-            /* send "-1" to put receiver into debug mode */
-            if ( !write_exact(io_fd, &minusone, sizeof(int)) )
-            {
-                ERROR("Error when writing to state file (6) (errno %d)",
-                      errno);
-                goto out;
-            }
-
-            continue;
-        }
-
-        if ( last_iter )
-            break;
-
-        if ( live )
-        {
-            if ( ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) ||
-                 (iter >= max_iters) ||
-                 (sent_this_iter+skip_this_iter < 50) ||
-                 (total_sent > p2m_size*max_factor) )
-            {
-                DPRINTF("Start last iteration\n");
-                last_iter = 1;
-
-                if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info,
-                                       &ctxt) )
-                {
-                    ERROR("Domain appears not to have suspended");
-                    goto out;
-                }
-
-                DPRINTF("SUSPEND shinfo %08lx eip %08lx edx %08lx\n",
-                        info.shared_info_frame,
-                        (unsigned long)ctxt.user_regs.eip,
-                        (unsigned long)ctxt.user_regs.edx);
-            }
-
-            if ( xc_shadow_control(xc_handle, dom, 
-                                   XEN_DOMCTL_SHADOW_OP_CLEAN, to_send, 
-                                   p2m_size, NULL, 0, &stats) != p2m_size )
-            {
-                ERROR("Error flushing shadow PT");
-                goto out;
-            }
-
-            sent_last_iter = sent_this_iter;
-
-            print_stats(xc_handle, dom, sent_this_iter, &stats, 1);
-
-        }
-    } /* end of infinite for loop */
-
-    DPRINTF("All memory is saved\n");
-
-    {
-        struct {
-            int minustwo;
-            int max_vcpu_id;
-            uint64_t vcpumap;
-        } chunk = { -2, info.max_vcpu_id };
-
-        if ( info.max_vcpu_id >= 64 )
-        {
-            ERROR("Too many VCPUS in guest!");
-            goto out;
-        }
-
-        for ( i = 1; i <= info.max_vcpu_id; i++ )
-        {
-            xc_vcpuinfo_t vinfo;
-            if ( (xc_vcpu_getinfo(xc_handle, dom, i, &vinfo) == 0) &&
-                 vinfo.online )
-                vcpumap |= 1ULL << i;
-        }
-
-        chunk.vcpumap = vcpumap;
-        if ( !write_exact(io_fd, &chunk, sizeof(chunk)) )
-        {
-            ERROR("Error when writing to state file (errno %d)", errno);
-            goto out;
-        }
-    }
-
-    /* Zero terminate */
-    i = 0;
-    if ( !write_exact(io_fd, &i, sizeof(int)) )
-    {
-        ERROR("Error when writing to state file (6') (errno %d)", errno);
-        goto out;
-    }
-
-    /* Send through a list of all the PFNs that were not in map at the close */
-    {
-        unsigned int i,j;
-        unsigned long pfntab[1024];
-
-        for ( i = 0, j = 0; i < p2m_size; i++ )
-        {
-            if ( !is_mapped(live_p2m[i]) )
-                j++;
-        }
-
-        if ( !write_exact(io_fd, &j, sizeof(unsigned int)) )
-        {
-            ERROR("Error when writing to state file (6a) (errno %d)", errno);
-            goto out;
-        }
-
-        for ( i = 0, j = 0; i < p2m_size; )
-        {
-            if ( !is_mapped(live_p2m[i]) )
-                pfntab[j++] = i;
-
-            i++;
-            if ( (j == 1024) || (i == p2m_size) )
-            {
-                if ( !write_exact(io_fd, &pfntab, sizeof(unsigned long)*j) )
-                {
-                    ERROR("Error when writing to state file (6b) (errno %d)",
-                          errno);
-                    goto out;
-                }
-                j = 0;
-            }
-        }
-    }
-
-    /* Canonicalise the suspend-record frame number. */
-    if ( !translate_mfn_to_pfn(&ctxt.user_regs.edx) )
-    {
-        ERROR("Suspend record is not in range of pseudophys map");
-        goto out;
-    }
-
-    for ( i = 0; i <= info.max_vcpu_id; i++ )
-    {
-        if ( !(vcpumap & (1ULL << i)) )
-            continue;
-
-        if ( (i != 0) && xc_vcpu_getcontext(xc_handle, dom, i, &ctxt) )
-        {
-            ERROR("No context for VCPU%d", i);
-            goto out;
-        }
-
-        /* Canonicalise each GDT frame number. */
-        for ( j = 0; (512*j) < ctxt.gdt_ents; j++ )
-        {
-            if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[j]) )
-            {
-                ERROR("GDT frame is not in range of pseudophys map");
-                goto out;
-            }
-        }
-
-        /* Canonicalise the page table base pointer. */
-        if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[3])) )
-        {
-            ERROR("PT base is not in range of pseudophys map");
-            goto out;
-        }
-        ctxt.ctrlreg[3] = 
-            xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[3])));
-
-        /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */
-        if ( (pt_levels == 4) && ctxt.ctrlreg[1] )
-        {
-            if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[1])) )
-            {
-                ERROR("PT base is not in range of pseudophys map");
-                goto out;
-            }
-            /* Least-significant bit means 'valid PFN'. */
-            ctxt.ctrlreg[1] = 1 |
-                xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[1])));
-        }
-
-        if ( !write_exact(io_fd, &ctxt, sizeof(ctxt)) )
-        {
-            ERROR("Error when writing to state file (1) (errno %d)", errno);
-            goto out;
-        }
-    }
-
-    /*
-     * Reset the MFN to be a known-invalid value. See map_frame_list_list().
-     */
-    memcpy(page, live_shinfo, PAGE_SIZE);
-    ((shared_info_t *)page)->arch.pfn_to_mfn_frame_list_list = 0;
-    if ( !write_exact(io_fd, page, PAGE_SIZE) )
-    {
-        ERROR("Error when writing to state file (1) (errno %d)", errno);
-        goto out;
-    }
-
-    /* Success! */
-    rc = 0;
-
- out:
-
-    if ( live )
-    {
-        if ( xc_shadow_control(xc_handle, dom, 
-                               XEN_DOMCTL_SHADOW_OP_OFF,
-                               NULL, 0, NULL, 0, NULL) < 0 )
-            DPRINTF("Warning - couldn't disable shadow mode");
-    }
-
-    /* Flush last write and discard cache for file. */
-    discard_file_cache(io_fd, 1 /* flush */);
-
-    if ( live_shinfo )
-        munmap(live_shinfo, PAGE_SIZE);
-
-    if ( live_p2m_frame_list_list )
-        munmap(live_p2m_frame_list_list, PAGE_SIZE);
-
-    if ( live_p2m_frame_list )
-        munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE);
-
-    if ( live_p2m )
-        munmap(live_p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
-
-    if ( live_m2p )
-        munmap(live_m2p, M2P_SIZE(max_mfn));
-
-    free(pfn_type);
-    free(pfn_batch);
-    free(to_send);
-    free(to_fix);
-    free(to_skip);
-
-    DPRINTF("Save exit rc=%d\n",rc);
-
-    return !!rc;
-}
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/xc_resume.c
--- a/tools/libxc/xc_resume.c   Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libxc/xc_resume.c   Fri Apr 13 11:14:26 2007 +0100
@@ -3,24 +3,71 @@
 #include "xg_save_restore.h"
 
 #if defined(__i386__) || defined(__x86_64__)
+
+#include <xen/foreign/x86_32.h>
+#include <xen/foreign/x86_64.h>
+#include <xen/hvm/params.h>
+
+/* Need to provide the right flavour of vcpu context for Xen */
+typedef union
+{
+    vcpu_guest_context_x86_64_t c64;
+    vcpu_guest_context_x86_32_t c32;   
+    vcpu_guest_context_t c;
+} vcpu_guest_context_either_t;
+
 static int modify_returncode(int xc_handle, uint32_t domid)
 {
-    vcpu_guest_context_t ctxt;
+    vcpu_guest_context_either_t ctxt;
+    xc_dominfo_t info;
+    xen_capabilities_info_t caps;
     int rc;
 
-    if ( (rc = xc_vcpu_getcontext(xc_handle, domid, 0, &ctxt)) != 0 )
-        return rc;
-    ctxt.user_regs.eax = 1;
-    if ( (rc = xc_vcpu_setcontext(xc_handle, domid, 0, &ctxt)) != 0 )
+    if ( xc_domain_getinfo(xc_handle, domid, 1, &info) != 1 )
+    {
+        PERROR("Could not get domain info");
+        return -1;
+    }
+
+    /* HVM guests without PV drivers do not have a return code to modify. */
+    if ( info.hvm )
+    {
+        unsigned long irq = 0;
+        xc_get_hvm_param(xc_handle, domid, HVM_PARAM_CALLBACK_IRQ, &irq);
+        if ( !irq )
+            return 0;
+    }
+
+    if ( xc_version(xc_handle, XENVER_capabilities, &caps) != 0 )
+    {
+        PERROR("Could not get Xen capabilities\n");
+        return -1;
+    }
+
+    if ( (rc = xc_vcpu_getcontext(xc_handle, domid, 0, &ctxt.c)) != 0 )
+        return rc;
+
+    if ( !info.hvm )
+        ctxt.c.user_regs.eax = 1;
+    else if ( strstr(caps, "x86_64") )
+        ctxt.c64.user_regs.eax = 1;
+    else
+        ctxt.c32.user_regs.eax = 1;
+
+    if ( (rc = xc_vcpu_setcontext(xc_handle, domid, 0, &ctxt.c)) != 0 )
         return rc;
 
     return 0;
 }
+
 #else
+
 static int modify_returncode(int xc_handle, uint32_t domid)
 {
     return 0;
-}
+
+}
+
 #endif
 
 static int xc_domain_resume_cooperative(int xc_handle, uint32_t domid)
@@ -65,6 +112,12 @@ static int xc_domain_resume_any(int xc_h
      * (x86 only) Rewrite store_mfn and console_mfn back to MFN (from PFN).
      */
 #if defined(__i386__) || defined(__x86_64__)
+    if ( info.hvm )
+    {
+        ERROR("Cannot resume uncooperative HVM guests");
+        return rc;
+    }
+
     /* Map the shared info frame */
     shinfo = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
                                   PROT_READ, info.shared_info_frame);
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libxc/xenctrl.h     Fri Apr 13 11:14:26 2007 +0100
@@ -840,6 +840,9 @@ const char *xc_error_code_to_desc(int co
  */
 xc_error_handler xc_set_error_handler(xc_error_handler handler);
 
+int xc_set_hvm_param(int handle, domid_t dom, int param, unsigned long value);
+int xc_get_hvm_param(int handle, domid_t dom, int param, unsigned long *value);
+
 /* PowerPC specific. */
 int xc_alloc_real_mode_area(int xc_handle,
                             uint32_t domid,
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h    Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libxc/xenguest.h    Fri Apr 13 11:14:26 2007 +0100
@@ -16,26 +16,19 @@
 
 
 /**
- * This function will save a domain running Linux.
+ * This function will save a running domain.
  *
  * @parm xc_handle a handle to an open hypervisor interface
  * @parm fd the file descriptor to save a domain to
  * @parm dom the id of the domain
  * @return 0 on success, -1 on failure
  */
-int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
-                  uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
-                  int (*suspend)(int domid));
+int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
+                   uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
+                   int (*suspend)(int domid), int hvm,
+                   void *(*init_qemu_maps)(int, unsigned),  /* HVM only */
+                   void (*qemu_flip_buffer)(int, int));     /* HVM only */
 
-/**
- * This function will save a hvm domain running unmodified guest.
- * @return 0 on success, -1 on failure
- */
-int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
-                uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
-                int (*suspend)(int domid),  
-                void *(*init_qemu_maps)(int, unsigned), 
-                void (*qemu_flip_buffer)(int, int));
 
 /**
  * This function will restore a saved domain.
@@ -143,11 +136,6 @@ int xc_hvm_build_mem(int xc_handle,
                      const char *image_buffer,
                      unsigned long image_size);
 
-int xc_set_hvm_param(
-    int handle, domid_t dom, int param, unsigned long value);
-int xc_get_hvm_param(
-    int handle, domid_t dom, int param, unsigned long *value);
-
 /* PowerPC specific. */
 int xc_prose_build(int xc_handle,
                    uint32_t domid,
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/xg_private.c
--- a/tools/libxc/xg_private.c  Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libxc/xg_private.c  Fri Apr 13 11:14:26 2007 +0100
@@ -196,29 +196,6 @@ __attribute__((weak))
 {
     errno = ENOSYS;
     return -1;
-}
-
-__attribute__((weak)) 
-    int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
-                    uint32_t max_factor, uint32_t flags,
-                    int (*suspend)(int domid), 
-                    void *(*init_qemu_maps)(int, unsigned), 
-                    void (*qemu_flip_buffer)(int, int))
-{
-    errno = ENOSYS;
-    return -1;
-}
-
-__attribute__((weak)) int xc_get_hvm_param(
-    int handle, domid_t dom, int param, unsigned long *value)
-{
-    return -ENOSYS;
-}
-
-__attribute__((weak)) int xc_set_hvm_param(
-    int handle, domid_t dom, int param, unsigned long value)
-{
-    return -ENOSYS;
 }
 
 /*
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxen/include/xen_host_cpu.h
--- a/tools/libxen/include/xen_host_cpu.h       Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libxen/include/xen_host_cpu.h       Fri Apr 13 11:14:26 2007 +0100
@@ -70,6 +70,7 @@ typedef struct xen_host_cpu_record
     char *modelname;
     char *stepping;
     char *flags;
+    char *features;
     double utilisation;
 } xen_host_cpu_record;
 
@@ -223,6 +224,13 @@ xen_host_cpu_get_flags(xen_session *sess
 
 
 /**
+ * Get the features field of the given host_cpu.
+ */
+extern bool
+xen_host_cpu_get_features(xen_session *session, char **result, xen_host_cpu 
host_cpu);
+
+
+/**
  * Get the utilisation field of the given host_cpu.
  */
 extern bool
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxen/include/xen_vm.h
--- a/tools/libxen/include/xen_vm.h     Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libxen/include/xen_vm.h     Fri Apr 13 11:14:26 2007 +0100
@@ -838,6 +838,28 @@ xen_vm_set_vcpus_number_live(xen_session
 
 
 /**
+ * Add the given key-value pair to VM.VCPUs_params, and apply that
+ * value on the running VM.
+ */
+extern bool
+xen_vm_add_to_vcpus_params_live(xen_session *session, xen_vm self, char *key, 
char *value);
+
+
+/**
+ * Set memory_dynamic_max in database and on running VM.
+ */
+extern bool
+xen_vm_set_memory_dynamic_max_live(xen_session *session, xen_vm self, int64_t 
max);
+
+
+/**
+ * Set memory_dynamic_min in database and on running VM.
+ */
+extern bool
+xen_vm_set_memory_dynamic_min_live(xen_session *session, xen_vm self, int64_t 
min);
+
+
+/**
  * Send the given key as a sysrq to this VM.  The key is specified as a
  * single character (a String of length 1).  This can only be called when the
  * specified VM is in the Running state.
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxen/include/xen_vm_metrics.h
--- a/tools/libxen/include/xen_vm_metrics.h     Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libxen/include/xen_vm_metrics.h     Fri Apr 13 11:14:26 2007 +0100
@@ -22,6 +22,7 @@
 #include "xen_common.h"
 #include "xen_int_float_map.h"
 #include "xen_int_int_map.h"
+#include "xen_string_set.h"
 #include "xen_string_string_map.h"
 #include "xen_vm_metrics_decl.h"
 
@@ -70,6 +71,7 @@ typedef struct xen_vm_metrics_record
     xen_int_float_map *vcpus_utilisation;
     xen_int_int_map *vcpus_cpu;
     xen_string_string_map *vcpus_params;
+    struct xen_string_set *state;
     time_t start_time;
     time_t last_updated;
 } xen_vm_metrics_record;
@@ -210,6 +212,13 @@ xen_vm_metrics_get_vcpus_params(xen_sess
 
 
 /**
+ * Get the state field of the given VM_metrics.
+ */
+extern bool
+xen_vm_metrics_get_state(xen_session *session, struct xen_string_set **result, 
xen_vm_metrics vm_metrics);
+
+
+/**
  * Get the start_time field of the given VM_metrics.
  */
 extern bool
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxen/src/xen_host_cpu.c
--- a/tools/libxen/src/xen_host_cpu.c   Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libxen/src/xen_host_cpu.c   Fri Apr 13 11:14:26 2007 +0100
@@ -61,6 +61,9 @@ static const struct_member xen_host_cpu_
         { .key = "flags",
           .type = &abstract_type_string,
           .offset = offsetof(xen_host_cpu_record, flags) },
+        { .key = "features",
+          .type = &abstract_type_string,
+          .offset = offsetof(xen_host_cpu_record, features) },
         { .key = "utilisation",
           .type = &abstract_type_float,
           .offset = offsetof(xen_host_cpu_record, utilisation) }
@@ -90,6 +93,7 @@ xen_host_cpu_record_free(xen_host_cpu_re
     free(record->modelname);
     free(record->stepping);
     free(record->flags);
+    free(record->features);
     free(record);
 }
 
@@ -252,6 +256,23 @@ xen_host_cpu_get_flags(xen_session *sess
 
 
 bool
+xen_host_cpu_get_features(xen_session *session, char **result, xen_host_cpu 
host_cpu)
+{
+    abstract_value param_values[] =
+        {
+            { .type = &abstract_type_string,
+              .u.string_val = host_cpu }
+        };
+
+    abstract_type result_type = abstract_type_string;
+
+    *result = NULL;
+    XEN_CALL_("host_cpu.get_features");
+    return session->ok;
+}
+
+
+bool
 xen_host_cpu_get_utilisation(xen_session *session, double *result, 
xen_host_cpu host_cpu)
 {
     abstract_value param_values[] =
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxen/src/xen_vm.c
--- a/tools/libxen/src/xen_vm.c Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libxen/src/xen_vm.c Fri Apr 13 11:14:26 2007 +0100
@@ -1610,6 +1610,56 @@ xen_vm_set_vcpus_number_live(xen_session
 
 
 bool
+xen_vm_add_to_vcpus_params_live(xen_session *session, xen_vm self, char *key, 
char *value)
+{
+    abstract_value param_values[] =
+        {
+            { .type = &abstract_type_string,
+              .u.string_val = self },
+            { .type = &abstract_type_string,
+              .u.string_val = key },
+            { .type = &abstract_type_string,
+              .u.string_val = value }
+        };
+
+    xen_call_(session, "VM.add_to_VCPUs_params_live", param_values, 3, NULL, 
NULL);
+    return session->ok;
+}
+
+
+bool
+xen_vm_set_memory_dynamic_max_live(xen_session *session, xen_vm self, int64_t 
max)
+{
+    abstract_value param_values[] =
+        {
+            { .type = &abstract_type_string,
+              .u.string_val = self },
+            { .type = &abstract_type_int,
+              .u.int_val = max }
+        };
+
+    xen_call_(session, "VM.set_memory_dynamic_max_live", param_values, 2, 
NULL, NULL);
+    return session->ok;
+}
+
+
+bool
+xen_vm_set_memory_dynamic_min_live(xen_session *session, xen_vm self, int64_t 
min)
+{
+    abstract_value param_values[] =
+        {
+            { .type = &abstract_type_string,
+              .u.string_val = self },
+            { .type = &abstract_type_int,
+              .u.int_val = min }
+        };
+
+    xen_call_(session, "VM.set_memory_dynamic_min_live", param_values, 2, 
NULL, NULL);
+    return session->ok;
+}
+
+
+bool
 xen_vm_send_sysrq(xen_session *session, xen_vm vm, char *key)
 {
     abstract_value param_values[] =
diff -r 5bda20f0723d -r f92a79e39da8 tools/libxen/src/xen_vm_metrics.c
--- a/tools/libxen/src/xen_vm_metrics.c Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/libxen/src/xen_vm_metrics.c Fri Apr 13 11:14:26 2007 +0100
@@ -57,6 +57,9 @@ static const struct_member xen_vm_metric
         { .key = "VCPUs_params",
           .type = &abstract_type_string_string_map,
           .offset = offsetof(xen_vm_metrics_record, vcpus_params) },
+        { .key = "state",
+          .type = &abstract_type_string_set,
+          .offset = offsetof(xen_vm_metrics_record, state) },
         { .key = "start_time",
           .type = &abstract_type_datetime,
           .offset = offsetof(xen_vm_metrics_record, start_time) },
@@ -87,6 +90,7 @@ xen_vm_metrics_record_free(xen_vm_metric
     xen_int_float_map_free(record->vcpus_utilisation);
     xen_int_int_map_free(record->vcpus_cpu);
     xen_string_string_map_free(record->vcpus_params);
+    xen_string_set_free(record->state);
     free(record);
 }
 
@@ -215,6 +219,23 @@ xen_vm_metrics_get_vcpus_params(xen_sess
 
 
 bool
+xen_vm_metrics_get_state(xen_session *session, struct xen_string_set **result, 
xen_vm_metrics vm_metrics)
+{
+    abstract_value param_values[] =
+        {
+            { .type = &abstract_type_string,
+              .u.string_val = vm_metrics }
+        };
+
+    abstract_type result_type = abstract_type_string_set;
+
+    *result = NULL;
+    XEN_CALL_("VM_metrics.get_state");
+    return session->ok;
+}
+
+
+bool
 xen_vm_metrics_get_start_time(xen_session *session, time_t *result, 
xen_vm_metrics vm_metrics)
 {
     abstract_value param_values[] =
diff -r 5bda20f0723d -r f92a79e39da8 tools/pygrub/src/LiloConf.py
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/pygrub/src/LiloConf.py      Fri Apr 13 11:14:26 2007 +0100
@@ -0,0 +1,147 @@
+#
+#LiloConf.py
+#
+
+import sys, re, os
+import logging
+import GrubConf
+
+class LiloImage(object):
+    def __init__(self, lines, path):
+        self.reset(lines, path)
+
+    def __repr__(self):
+        return ("title: %s\n"
+                "  root: %s\n"
+                "  kernel: %s\n"
+                "  args: %s\n"
+                "  initrd: %s\n" %(self.title, self.root, self.kernel,
+                                   self.args, self.initrd))
+    def reset(self, lines, path):
+        self._root = self._initrd = self._kernel = self._args = None
+        self.title = ""
+        self.lines = []
+        self.path = path
+        map(self.set_from_line, lines)
+        self.root = "" # dummy
+
+    def set_from_line(self, line, replace = None):
+        (com, arg) = GrubConf.grub_exact_split(line, 2)
+
+        if self.commands.has_key(com):
+            if self.commands[com] is not None:
+                exec("%s = r\'%s\'" %(self.commands[com], re.sub('^"(.+)"$', 
r"\1", arg.strip())))
+            else:
+                logging.info("Ignored image directive %s" %(com,))
+        else:
+            logging.warning("Unknown image directive %s" %(com,))
+
+        # now put the line in the list of lines
+        if replace is None:
+            self.lines.append(line)
+        else:
+            self.lines.pop(replace)
+            self.lines.insert(replace, line)
+
+    def set_kernel(self, val):
+        self._kernel = (None, self.path + "/" + val)
+    def get_kernel(self):
+        return self._kernel
+    kernel = property(get_kernel, set_kernel)
+
+    def set_initrd(self, val):
+        self._initrd = (None, self.path + "/" + val)
+    def get_initrd(self):
+        return self._initrd
+    initrd = property(get_initrd, set_initrd)
+
+    # set up command handlers
+    commands = { "label": "self.title",
+                 "root": "self.root",
+                 "rootnoverify": "self.root",
+                 "image": "self.kernel",
+                 "initrd": "self.initrd",
+                 "append": "self.args",
+                 "read-only": None,
+                 "chainloader": None,
+                 "module": None}
+
+class LiloConfigFile(object):
+    def __init__(self, fn = None):
+        self.filename = fn
+        self.images = []
+        self.timeout = -1
+        self._default = 0
+
+        if fn is not None:
+            self.parse()
+
+    def parse(self, buf = None):
+        if buf is None:
+            if self.filename is None:
+                raise ValueError, "No config file defined to parse!"
+
+            f = open(self.filename, 'r')
+            lines = f.readlines()
+            f.close()
+        else:
+            lines = buf.split("\n")
+
+        path = os.path.dirname(self.filename)
+        img = []
+        for l in lines:
+            l = l.strip()
+            # skip blank lines
+            if len(l) == 0:
+                continue
+            # skip comments
+            if l.startswith('#'):
+                continue
+            # new image
+            if l.startswith("image"):
+                if len(img) > 0:
+                    self.add_image(LiloImage(img, path))
+                img = [l]
+                continue
+
+            if len(img) > 0:
+                img.append(l)
+                continue
+
+            (com, arg) = GrubConf.grub_exact_split(l, 2)
+            if self.commands.has_key(com):
+                if self.commands[com] is not None:
+                    exec("%s = r\"%s\"" %(self.commands[com], arg.strip()))
+                else:
+                    logging.info("Ignored directive %s" %(com,))
+            else:
+                logging.warning("Unknown directive %s" %(com,))
+
+        if len(img) > 0:
+            self.add_image(LiloImage(img, path))
+
+    def add_image(self, image):
+        self.images.append(image)
+
+    def _get_default(self):
+        for i in range(0, len(self.images) - 1):
+            if self.images[i].title == self._default:
+                return i
+        return 0
+    def _set_default(self, val):
+        self._default = val
+    default = property(_get_default, _set_default)
+
+    commands = { "default": "self.default",
+                 "timeout": "self.timeout",
+                 "prompt": None,
+                 "relocatable": None,
+                 }
+
+if __name__ == "__main__":
+    if sys.argv < 2:
+        raise RuntimeError, "Need a grub.conf to read"
+    g = LiloConfigFile(sys.argv[1])
+    for i in g.images:
+        print i #, i.title, i.root, i.kernel, i.args, i.initrd
+    print g.default
diff -r 5bda20f0723d -r f92a79e39da8 tools/pygrub/src/pygrub
--- a/tools/pygrub/src/pygrub   Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/pygrub/src/pygrub   Fri Apr 13 11:14:26 2007 +0100
@@ -16,6 +16,7 @@ import os, sys, string, struct, tempfile
 import os, sys, string, struct, tempfile, re
 import copy
 import logging
+import platform
 
 import curses, _curses, curses.wrapper, curses.textpad, curses.ascii
 import getopt
@@ -24,6 +25,7 @@ sys.path = [ '/usr/lib/python' ] + sys.p
 
 import fsimage
 import grub.GrubConf
+import grub.LiloConf
 
 PYGRUB_VER = 0.5
 
@@ -58,6 +60,13 @@ def get_active_partition(file):
         # active partition has 0x80 as the first byte
         if struct.unpack("<c", buf[poff:poff+1]) == ('\x80',):
             return buf[poff:poff+16]
+
+        # type=0xee: GUID partition table
+        # XXX assume the first partition is active
+        if struct.unpack("<c", buf[poff+4:poff+5]) == ('\xee',):
+            os.lseek(fd, 0x400, 0)
+            buf = os.read(fd, 512)
+            return buf[24:40] # XXX buf[32:40]
 
     # if there's not a partition marked as active, fall back to
     # the first partition
@@ -346,7 +355,13 @@ class Grub:
         if not os.access(fn, os.R_OK):
             raise RuntimeError, "Unable to access %s" %(fn,)
 
-        self.cf = grub.GrubConf.GrubConfigFile()
+        if platform.machine() == 'ia64':
+            self.cf = grub.LiloConf.LiloConfigFile()
+            file_list = ("/efi/redhat/elilo.conf",)
+        else:
+            self.cf = grub.GrubConf.GrubConfigFile()
+            file_list = ("/boot/grub/menu.lst", "/boot/grub/grub.conf",
+                         "/grub/menu.lst", "/grub/grub.conf")
 
         if not fs:
             # set the config file and parse it
@@ -354,18 +369,15 @@ class Grub:
             self.cf.parse()
             return
 
-        grubfile = None
-        for f in ("/boot/grub/menu.lst", "/boot/grub/grub.conf",
-                  "/grub/menu.lst", "/grub/grub.conf"):
+        for f in file_list:
             if fs.file_exists(f):
-                grubfile = f
-                break
-        if grubfile is None:
-            raise RuntimeError, "we couldn't find grub config file in the 
image provided."
-        f = fs.open_file(grubfile)
+                self.cf.filename = f
+                break
+        if self.cf.filename is None:
+            raise RuntimeError, "couldn't find bootloader config file in the 
image provided."
+        f = fs.open_file(self.cf.filename)
         buf = f.read()
         del f
-        # then parse the grub config
         self.cf.parse(buf)
 
     def run(self):
diff -r 5bda20f0723d -r f92a79e39da8 tools/python/README.XendConfig
--- a/tools/python/README.XendConfig    Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/python/README.XendConfig    Fri Apr 13 11:14:26 2007 +0100
@@ -115,6 +115,7 @@ otherConfig
                                 image.nographic
                                 image.vnc
                                 image.sdl
+                                image.monitor
                                 image.vncdisplay
                                 image.vncunused
                                 image.hvm.device_model
diff -r 5bda20f0723d -r f92a79e39da8 tools/python/README.sxpcfg
--- a/tools/python/README.sxpcfg        Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/python/README.sxpcfg        Fri Apr 13 11:14:26 2007 +0100
@@ -63,6 +63,7 @@ image
   - fdb
   - soundhw
   - localtime
+  - monitor
   - serial
   - stdvga
   - isa
diff -r 5bda20f0723d -r f92a79e39da8 tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py       Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/python/xen/xend/XendConfig.py       Fri Apr 13 11:14:26 2007 +0100
@@ -117,7 +117,7 @@ LEGACY_CFG_TO_XENAPI_CFG = reverse_dict(
 
 # Platform configuration keys.
 XENAPI_PLATFORM_CFG = [ 'acpi', 'apic', 'boot', 'device_model', 'display', 
-                        'fda', 'fdb', 'keymap', 'isa', 'localtime',
+                        'fda', 'fdb', 'keymap', 'isa', 'localtime', 'monitor', 
                         'nographic', 'pae', 'rtc_timeoffset', 'serial', 'sdl',
                         'soundhw','stdvga', 'usb', 'usbdevice', 'vnc',
                         'vncconsole', 'vncdisplay', 'vnclisten',
diff -r 5bda20f0723d -r f92a79e39da8 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/python/xen/xend/XendDomainInfo.py   Fri Apr 13 11:14:26 2007 +0100
@@ -1601,7 +1601,6 @@ class XendDomainInfo:
             self.image = image.create(self, self.info)
             if self.image:
                 self.image.createDeviceModel(True)
-                self.image.register_shutdown_watch()
         self._storeDomDetails()
         self._registerWatches()
         self.refreshShutdown()
diff -r 5bda20f0723d -r f92a79e39da8 tools/python/xen/xend/XendNode.py
--- a/tools/python/xen/xend/XendNode.py Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/python/xen/xend/XendNode.py Fri Apr 13 11:14:26 2007 +0100
@@ -603,7 +603,7 @@ class XendNode:
         return [[k, info[k]] for k in ITEM_ORDER]
 
     def xendinfo(self):
-        return [['xend_config_format', 3]]
+        return [['xend_config_format', 4]]
 
     #
     # utilisation tracking
diff -r 5bda20f0723d -r f92a79e39da8 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/python/xen/xend/image.py    Fri Apr 13 11:14:26 2007 +0100
@@ -284,17 +284,16 @@ class HVMImageHandler(ImageHandler):
         log.debug("acpi           = %d", self.acpi)
         log.debug("apic           = %d", self.apic)
 
-        self.register_shutdown_watch()
-        self.register_reboot_feature_watch()
-
-        return xc.hvm_build(domid          = self.vm.getDomid(),
-                            image          = self.kernel,
-                            store_evtchn   = store_evtchn,
-                            memsize        = mem_mb,
-                            vcpus          = self.vm.getVCpuCount(),
-                            pae            = self.pae,
-                            acpi           = self.acpi,
-                            apic           = self.apic)
+        rc = xc.hvm_build(domid          = self.vm.getDomid(),
+                          image          = self.kernel,
+                          store_evtchn   = store_evtchn,
+                          memsize        = mem_mb,
+                          vcpus          = self.vm.getVCpuCount(),
+                          pae            = self.pae,
+                          acpi           = self.acpi,
+                          apic           = self.apic)
+        rc['notes'] = { 'SUSPEND_CANCEL': 1 }
+        return rc
 
     # Return a list of cmd line args to the device models based on the
     # xm config file
@@ -418,6 +417,8 @@ class HVMImageHandler(ImageHandler):
         else:
             ret.append('-nographic')
 
+        if int(vmConfig['platform'].get('monitor', 0)) != 0:
+            ret.append('-monitor vc')
         return ret
 
     def createDeviceModel(self, restore = False):
@@ -448,13 +449,9 @@ class HVMImageHandler(ImageHandler):
         log.info("device model pid: %d", self.pid)
 
     def recreate(self):
-        self.register_shutdown_watch()
-        self.register_reboot_feature_watch()
         self.pid = self.vm.gatherDom(('image/device-model-pid', int))
 
     def destroy(self, suspend = False):
-        self.unregister_shutdown_watch()
-        self.unregister_reboot_feature_watch();
         if self.pid:
             try:
                 sig = signal.SIGKILL
@@ -473,74 +470,6 @@ class HVMImageHandler(ImageHandler):
                 pass
             self.pid = None
 
-    def register_shutdown_watch(self):
-        """ add xen store watch on control/shutdown """
-        self.shutdownWatch = xswatch(self.vm.dompath + "/control/shutdown",
-                                     self.hvm_shutdown)
-        log.debug("hvm shutdown watch registered")
-
-    def unregister_shutdown_watch(self):
-        """Remove the watch on the control/shutdown, if any. Nothrow
-        guarantee."""
-
-        try:
-            if self.shutdownWatch:
-                self.shutdownWatch.unwatch()
-        except:
-            log.exception("Unwatching hvm shutdown watch failed.")
-        self.shutdownWatch = None
-        log.debug("hvm shutdown watch unregistered")
-
-    def hvm_shutdown(self, _):
-        """ watch call back on node control/shutdown,
-            if node changed, this function will be called
-        """
-        xd = xen.xend.XendDomain.instance()
-        try:
-            vm = xd.domain_lookup( self.vm.getDomid() )
-        except XendError:
-            # domain isn't registered, no need to clean it up.
-            return False
-
-        reason = vm.getShutdownReason()
-        log.debug("hvm_shutdown fired, shutdown reason=%s", reason)
-        if reason in REVERSE_DOMAIN_SHUTDOWN_REASONS:
-            vm.info['shutdown'] = 1
-            vm.info['shutdown_reason'] = \
-                REVERSE_DOMAIN_SHUTDOWN_REASONS[reason]
-            vm.refreshShutdown(vm.info)
-
-        return True # Keep watching
-
-    def register_reboot_feature_watch(self):
-        """ add xen store watch on control/feature-reboot """
-        self.rebootFeatureWatch = xswatch(self.vm.dompath + 
"/control/feature-reboot", \
-                                         self.hvm_reboot_feature)
-        log.debug("hvm reboot feature watch registered")
-
-    def unregister_reboot_feature_watch(self):
-        """Remove the watch on the control/feature-reboot, if any. Nothrow
-        guarantee."""
-
-        try:
-            if self.rebootFeatureWatch:
-                self.rebootFeatureWatch.unwatch()
-        except:
-            log.exception("Unwatching hvm reboot feature watch failed.")
-        self.rebootFeatureWatch = None
-        log.debug("hvm reboot feature watch unregistered")
-
-    def hvm_reboot_feature(self, _):
-        """ watch call back on node control/feature-reboot,
-            if node changed, this function will be called
-        """
-        status = self.vm.readDom('control/feature-reboot')
-        log.debug("hvm_reboot_feature fired, module status=%s", status)
-        if status == '1':
-            self.unregister_shutdown_watch()
-
-        return True # Keep watching
-
 
 class IA64_HVM_ImageHandler(HVMImageHandler):
 
diff -r 5bda20f0723d -r f92a79e39da8 tools/python/xen/xm/create.dtd
--- a/tools/python/xen/xm/create.dtd    Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/python/xen/xm/create.dtd    Fri Apr 13 11:14:26 2007 +0100
@@ -95,7 +95,7 @@
                  src             %URI; #REQUIRED
                  type            %VDI_TYPE; #REQUIRED
                  size            CDATA #REQUIRED
-                 shareable       CDATA #REQUIRED
+                 sharable        CDATA #REQUIRED
                  read_only       CDATA #REQUIRED>
 
 <!ELEMENT name   (label, 
diff -r 5bda20f0723d -r f92a79e39da8 tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py     Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/python/xen/xm/create.py     Fri Apr 13 11:14:26 2007 +0100
@@ -420,6 +420,10 @@ gopts.var('serial', val='FILE',
 gopts.var('serial', val='FILE',
           fn=set_value, default='',
           use="Path to serial or pty or vc")
+
+gopts.var('monitor', val='no|yes',
+          fn=set_bool, default=0,
+          use="""Should the device model use monitor?""")
 
 gopts.var('localtime', val='no|yes',
           fn=set_bool, default=0,
diff -r 5bda20f0723d -r f92a79e39da8 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/python/xen/xm/main.py       Fri Apr 13 11:14:26 2007 +0100
@@ -1544,34 +1544,59 @@ def xm_info(args):
 
         host_metrics_record = 
server.xenapi.host_metrics.get_record(host_record["metrics"])
 
+        def getVal(keys, default=""):
+            data = host_record
+            for key in keys:
+                if key in data:
+                    data = data[key]
+                else:
+                    return default
+            return data
+
+        def getCpuMhz():
+            cpu_speeds = [int(host_cpu_record["speed"])
+                          for host_cpu_record in host_cpu_records
+                          if "speed" in host_cpu_record]
+            if len(cpu_speeds) > 0:
+                return sum(cpu_speeds) / len(cpu_speeds)
+            else:
+                return 0
+
+        getCpuMhz()
+
+        def getCpuFeatures():
+            if len(host_cpu_records) > 0:
+                return host_cpu_records[0].get("features", "")
+            else:
+                return ""
+                
         info = {
-            "host":              host_record["name_label"],
-            "release":           host_record["software_version"]["release"],
-            "version":           host_record["software_version"]["version"],
-            "machine":           host_record["software_version"]["machine"],
-            "nr_cpus":           len(host_record["host_CPUs"]),
-            "nr_nodes":          host_record["cpu_configuration"]["nr_nodes"],
-            "sockets_per_node":  
host_record["cpu_configuration"]["sockets_per_node"],
-            "cores_per_socket":  
host_record["cpu_configuration"]["cores_per_socket"],
-            "threads_per_core":  
host_record["cpu_configuration"]["threads_per_core"],
-            "cpu_mhz":           sum([int(host_cpu_record["speed"]) for 
host_cpu_record in host_cpu_records])
-                                   / len(host_cpu_records),
-            "hw_caps":           host_cpu_records[0]["features"],
+            "host":              getVal(["name_label"]),
+            "release":           getVal(["software_version", "release"]),
+            "version":           getVal(["software_version", "version"]),
+            "machine":           getVal(["software_version", "machine"]),
+            "nr_cpus":           len(getVal(["host_CPUs"], [])),
+            "nr_nodes":          getVal(["cpu_configuration", "nr_nodes"]),
+            "sockets_per_node":  getVal(["cpu_configuration", 
"sockets_per_node"]),
+            "cores_per_socket":  getVal(["cpu_configuration", 
"cores_per_socket"]),
+            "threads_per_core":  getVal(["cpu_configuration", 
"threads_per_core"]),
+            "cpu_mhz":           getCpuMhz(),
+            "hw_caps":           getCpuFeatures(),
             "total_memory":      
int(host_metrics_record["memory_total"])/1024/1024,
             "free_memory":       
int(host_metrics_record["memory_free"])/1024/1024,
-            "xen_major":         host_record["software_version"]["xen_major"],
-            "xen_minor":         host_record["software_version"]["xen_minor"],
-            "xen_extra":         host_record["software_version"]["xen_extra"],
-            "xen_caps":          " ".join(host_record["capabilities"]),
-            "xen_scheduler":     host_record["sched_policy"],
-            "xen_pagesize":      host_record["other_config"]["xen_pagesize"],
-            "platform_params":   
host_record["other_config"]["platform_params"],
-            "xen_changeset":     
host_record["software_version"]["xen_changeset"],
-            "cc_compiler":       
host_record["software_version"]["cc_compiler"],
-            "cc_compile_by":     
host_record["software_version"]["cc_compile_by"],
-            "cc_compile_domain": 
host_record["software_version"]["cc_compile_domain"],
-            "cc_compile_date":   
host_record["software_version"]["cc_compile_date"],
-            
"xend_config_format":host_record["software_version"]["xend_config_format"]      
                          
+            "xen_major":         getVal(["software_version", "xen_major"]),
+            "xen_minor":         getVal(["software_version", "xen_minor"]),
+            "xen_extra":         getVal(["software_version", "xen_extra"]),
+            "xen_caps":          " ".join(getVal(["capabilities"], [])),
+            "xen_scheduler":     getVal(["sched_policy"]),
+            "xen_pagesize":      getVal(["other_config", "xen_pagesize"]),
+            "platform_params":   getVal(["other_config", "platform_params"]),
+            "xen_changeset":     getVal(["software_version", "xen_changeset"]),
+            "cc_compiler":       getVal(["software_version", "cc_compiler"]),
+            "cc_compile_by":     getVal(["software_version", "cc_compile_by"]),
+            "cc_compile_domain": getVal(["software_version", 
"cc_compile_domain"]),
+            "cc_compile_date":   getVal(["software_version", 
"cc_compile_date"]),
+            "xend_config_format":getVal(["software_version", 
"xend_config_format"])                                
         }
 
         sorted = info.items()
diff -r 5bda20f0723d -r f92a79e39da8 tools/python/xen/xm/xenapi_create.py
--- a/tools/python/xen/xm/xenapi_create.py      Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/python/xen/xm/xenapi_create.py      Fri Apr 13 11:14:26 2007 +0100
@@ -48,7 +48,7 @@ def get_name_description(node):
 
 def get_text_in_child_node(node, child):
     tag_node = node.getElementsByTagName(child)[0]
-    return tag_node.nodeValue
+    return " ".join([child.nodeValue for child in tag_node.childNodes])
 
 def get_child_node_attribute(node, child, attribute):
     tag_node = node.getElementsByTagName(child)[0]
@@ -212,8 +212,8 @@ class xenapi_create:
             "SR":               self.DEFAULT_STORAGE_REPOSITORY,  
             "virtual_size":     vdi.attributes["size"].value,
             "type":             vdi.attributes["type"].value,
-            "shareable":        vdi.attributes["shareable"].value,
-            "read_only":        vdi.attributes["read_only"].value,
+            "sharable":         bool(vdi.attributes["sharable"].value),
+            "read_only":        bool(vdi.attributes["read_only"].value),
             "other_config":     {"location":
                 vdi.attributes["src"].value}
             }
@@ -264,7 +264,23 @@ class xenapi_create:
             "platform":
                 get_child_nodes_as_dict(vm, "platform", "key", "value"),
             "other_config":
-                get_child_nodes_as_dict(vm, "other_config", "key", "value")
+                get_child_nodes_as_dict(vm, "other_config", "key", "value"),
+            "PV_bootloader":
+                "",
+            "PV_kernel":
+                "",
+            "PV_ramdisk":
+                "",
+            "PV_args":
+                "",
+            "PV_bootloader_args":
+                "",
+            "HVM_boot_policy":
+                "",
+            "HVM_boot_params":
+                {},
+            "PCI_bus":
+               ""
             }
 
         if len(vm.getElementsByTagName("pv")) > 0:
@@ -494,7 +510,7 @@ class sxp2xml:
         # Make version tag
 
         version = document.createElement("version")
-        version.appendChild(document.createTextNode("1.0"))
+        version.appendChild(document.createTextNode("0"))
         vm.appendChild(version)
         
         # Make pv or hvm tag
@@ -629,10 +645,10 @@ class sxp2xml:
         vdi.attributes["src"] = src
         vdi.attributes["read_only"] \
             = (get_child_by_name(vbd_sxp, "mode") != "w") \
-               and "true" or "false"
+               and "True" or "False"
         vdi.attributes["size"] = '-1'
         vdi.attributes["type"] = "system"
-        vdi.attributes["shareable"] = "false"
+        vdi.attributes["sharable"] = "False"
         vdi.attributes["name"] = name
 
         vdi.appendChild(self.make_name_tag(name, document))
diff -r 5bda20f0723d -r f92a79e39da8 tools/xcutils/xc_save.c
--- a/tools/xcutils/xc_save.c   Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/xcutils/xc_save.c   Fri Apr 13 11:14:26 2007 +0100
@@ -174,12 +174,9 @@ main(int argc, char **argv)
     max_f = atoi(argv[4]);
     flags = atoi(argv[5]);
 
-    if (flags & XCFLAGS_HVM)
-        ret = xc_hvm_save(xc_fd, io_fd, domid, maxit, max_f, flags, 
-                          &suspend, &init_qemu_maps, &qemu_flip_buffer);
-    else 
-        ret = xc_linux_save(xc_fd, io_fd, domid, maxit, max_f, flags, 
-                            &suspend);
+    ret = xc_domain_save(xc_fd, io_fd, domid, maxit, max_f, flags, 
+                         &suspend, !!(flags & XCFLAGS_HVM),
+                         &init_qemu_maps, &qemu_flip_buffer);
 
     xc_interface_close(xc_fd);
 
diff -r 5bda20f0723d -r f92a79e39da8 tools/xm-test/lib/XmTestLib/NetConfig.py
--- a/tools/xm-test/lib/XmTestLib/NetConfig.py  Thu Apr 12 16:37:32 2007 -0500
+++ b/tools/xm-test/lib/XmTestLib/NetConfig.py  Fri Apr 13 11:14:26 2007 +0100
@@ -44,7 +44,11 @@ def getXendNetConfig():
     if not xconfig:
         xconfig = "/etc/xen/xend-config.sxp"
 
-    configfile = open(xconfig, 'r')
+    try:
+        configfile = open(xconfig, 'r')
+    except:
+        return "bridge"
+    
     S = configfile.read()
     pin = Parser()
     pin.input(S)
diff -r 5bda20f0723d -r f92a79e39da8 
unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h
--- a/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h Thu Apr 
12 16:37:32 2007 -0500
+++ b/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h Fri Apr 
13 11:14:26 2007 +0100
@@ -2,8 +2,8 @@
 #define COMPAT_INCLUDE_XEN_PLATFORM_COMPAT_H
 
 #include <linux/version.h>
-
 #include <linux/spinlock.h>
+#include <asm/maddr.h>
 
 #if defined(__LINUX_COMPILER_H) && !defined(__always_inline)
 #define __always_inline inline
@@ -98,8 +98,6 @@ extern char *kasprintf(gfp_t gfp, const 
 
 #if defined(_PAGE_PRESENT) && !defined(_PAGE_NX)
 #define _PAGE_NX 0
-#endif
-
 /*
  * This variable at present is referenced by netfront, but only in code that
  * is dead when running in hvm guests. To detect potential active uses of it
@@ -107,5 +105,6 @@ extern char *kasprintf(gfp_t gfp, const 
  * mappings created with it will fault when accessed.
  */
 #define __supported_pte_mask ((maddr_t)0)
+#endif
 
 #endif
diff -r 5bda20f0723d -r f92a79e39da8 
unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c
--- a/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c        Thu Apr 
12 16:37:32 2007 -0500
+++ b/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c        Fri Apr 
13 11:14:26 2007 +0100
@@ -6,21 +6,32 @@
 #include "platform-pci.h"
 #include <asm/hypervisor.h>
 
+struct ap_suspend_info {
+       int      do_spin;
+       atomic_t nr_spinning;
+};
+
 /*
  * Spinning prevents, for example, APs touching grant table entries while
  * the shared grant table is not mapped into the address space imemdiately
  * after resume.
  */
-static void ap_suspend(void *_ap_spin)
+static void ap_suspend(void *_info)
 {
-       int *ap_spin = _ap_spin;
+       struct ap_suspend_info *info = _info;
 
        BUG_ON(!irqs_disabled());
 
-       while (*ap_spin) {
+       atomic_inc(&info->nr_spinning);
+       mb();
+
+       while (info->do_spin) {
                cpu_relax();
                HYPERVISOR_yield();
        }
+
+       mb();
+       atomic_dec(&info->nr_spinning);
 }
 
 static int bp_suspend(void)
@@ -42,7 +53,8 @@ static int bp_suspend(void)
 
 int __xen_suspend(int fast_suspend)
 {
-       int err, suspend_cancelled, ap_spin;
+       int err, suspend_cancelled, nr_cpus;
+       struct ap_suspend_info info;
 
        xenbus_suspend();
 
@@ -51,22 +63,30 @@ int __xen_suspend(int fast_suspend)
        /* Prevent any races with evtchn_interrupt() handler. */
        disable_irq(xen_platform_pdev->irq);
 
-       ap_spin = 1;
+       info.do_spin = 1;
+       atomic_set(&info.nr_spinning, 0);
        smp_mb();
 
-       err = smp_call_function(ap_suspend, &ap_spin, 0, 0);
+       nr_cpus = num_online_cpus() - 1;
+
+       err = smp_call_function(ap_suspend, &info, 0, 0);
        if (err < 0) {
                preempt_enable();
                xenbus_suspend_cancel();
                return err;
        }
 
+       while (atomic_read(&info.nr_spinning) != nr_cpus)
+               cpu_relax();
+
        local_irq_disable();
        suspend_cancelled = bp_suspend();
        local_irq_enable();
 
        smp_mb();
-       ap_spin = 0;
+       info.do_spin = 0;
+       while (atomic_read(&info.nr_spinning) != 0)
+               cpu_relax();
 
        enable_irq(xen_platform_pdev->irq);
 
diff -r 5bda20f0723d -r f92a79e39da8 xen/arch/ia64/xen/hyperprivop.S
--- a/xen/arch/ia64/xen/hyperprivop.S   Thu Apr 12 16:37:32 2007 -0500
+++ b/xen/arch/ia64/xen/hyperprivop.S   Fri Apr 13 11:14:26 2007 +0100
@@ -304,6 +304,8 @@ ENTRY(hyper_ssm_i)
        ;;
        adds r2=XSI_BANK1_R16_OFS-XSI_PSR_IC_OFS,r18;
        adds r3=(XSI_BANK1_R16_OFS+8)-XSI_PSR_IC_OFS,r18;;
+       // temporarily save ar.unat
+       mov r28=ar.unat   
        bsw.1;;
        // FIXME?: ar.unat is not really handled correctly,
        // but may not matter if the OS is NaT-clean
@@ -324,6 +326,12 @@ ENTRY(hyper_ssm_i)
        .mem.offset 0,0; st8.spill [r2]=r30,16;
        .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
        bsw.0 ;;
+       mov r27=ar.unat
+       adds r26=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18 ;;
+       //save bank1 ar.unat
+       st8 [r26]=r27
+       //restore ar.unat
+       mov ar.unat=r28
        mov r2=r30
        mov r3=r29
        adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
@@ -1518,8 +1526,10 @@ ENTRY(hyper_get_psr)
        adds r20=XSI_PSR_I_ADDR_OFS-XSI_PSR_IC_OFS,r18 ;;
        ld8 r20=[r20];;
        ld1 r21=[r20];;
-       dep r8=r21,r8,IA64_PSR_I_BIT,1
-       ;;
+       cmp.eq p8,p9=r0,r21
+       ;;
+(p8)   dep r8=-1,r8,IA64_PSR_I_BIT,1
+(p9)   dep r8=0,r8,IA64_PSR_I_BIT,1
        // set vpsr.dfh
        adds r20=XSI_VPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;;
        ld1 r21=[r20];;
diff -r 5bda20f0723d -r f92a79e39da8 xen/arch/ia64/xen/mm.c
--- a/xen/arch/ia64/xen/mm.c    Thu Apr 12 16:37:32 2007 -0500
+++ b/xen/arch/ia64/xen/mm.c    Fri Apr 13 11:14:26 2007 +0100
@@ -673,7 +673,7 @@ unsigned long lookup_domain_mpa(struct d
     } else if (mpaddr - IO_PORTS_PADDR < IO_PORTS_SIZE) {
         /* Log I/O port probing, but complain less loudly about it */
         gdprintk(XENLOG_INFO, "vcpu %d iip 0x%016lx: bad I/O port access "
-                 "0x%lx\n ", current->vcpu_id, PSCB(current, iip),
+                 "0x%lx\n", current->vcpu_id, PSCB(current, iip),
                  IO_SPACE_SPARSE_DECODING(mpaddr - IO_PORTS_PADDR));
     } else {
         gdprintk(XENLOG_WARNING, "vcpu %d iip 0x%016lx: bad mpa 0x%lx "
diff -r 5bda20f0723d -r f92a79e39da8 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Thu Apr 12 16:37:32 2007 -0500
+++ b/xen/arch/x86/hvm/hvm.c    Fri Apr 13 11:14:26 2007 +0100
@@ -191,6 +191,7 @@ static int hvm_save_cpu_ctxt(struct doma
 {
     struct vcpu *v;
     struct hvm_hw_cpu ctxt;
+    struct vcpu_guest_context *vc;
 
     for_each_vcpu(d, v)
     {
@@ -199,7 +200,40 @@ static int hvm_save_cpu_ctxt(struct doma
         if ( test_bit(_VPF_down, &v->pause_flags) ) 
             continue;
 
+        /* Architecture-specific vmcs/vmcb bits */
         hvm_funcs.save_cpu_ctxt(v, &ctxt);
+
+        /* Other vcpu register state */
+        vc = &v->arch.guest_context;
+        if ( vc->flags & VGCF_i387_valid )
+            memcpy(ctxt.fpu_regs, &vc->fpu_ctxt, sizeof(ctxt.fpu_regs));
+        else 
+            memset(ctxt.fpu_regs, 0, sizeof(ctxt.fpu_regs));
+        ctxt.rax = vc->user_regs.eax;
+        ctxt.rbx = vc->user_regs.ebx;
+        ctxt.rcx = vc->user_regs.ecx;
+        ctxt.rdx = vc->user_regs.edx;
+        ctxt.rbp = vc->user_regs.ebp;
+        ctxt.rsi = vc->user_regs.esi;
+        ctxt.rdi = vc->user_regs.edi;
+        /* %rsp handled by arch-specific call above */
+#ifdef __x86_64__        
+        ctxt.r8  = vc->user_regs.r8;
+        ctxt.r9  = vc->user_regs.r9;
+        ctxt.r10 = vc->user_regs.r10;
+        ctxt.r11 = vc->user_regs.r11;
+        ctxt.r12 = vc->user_regs.r12;
+        ctxt.r13 = vc->user_regs.r13;
+        ctxt.r14 = vc->user_regs.r14;
+        ctxt.r15 = vc->user_regs.r15;
+#endif
+        ctxt.dr0 = vc->debugreg[0];
+        ctxt.dr1 = vc->debugreg[1];
+        ctxt.dr2 = vc->debugreg[2];
+        ctxt.dr3 = vc->debugreg[3];
+        ctxt.dr6 = vc->debugreg[6];
+        ctxt.dr7 = vc->debugreg[7];
+
         if ( hvm_save_entry(CPU, v->vcpu_id, h, &ctxt) != 0 )
             return 1; 
     }
@@ -208,9 +242,10 @@ static int hvm_save_cpu_ctxt(struct doma
 
 static int hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
 {
-    int vcpuid;
+    int vcpuid, rc;
     struct vcpu *v;
     struct hvm_hw_cpu ctxt;
+    struct vcpu_guest_context *vc;
 
     /* Which vcpu is this? */
     vcpuid = hvm_load_instance(h);
@@ -219,12 +254,51 @@ static int hvm_load_cpu_ctxt(struct doma
         gdprintk(XENLOG_ERR, "HVM restore: domain has no vcpu %u\n", vcpuid);
         return -EINVAL;
     }
+    vc = &v->arch.guest_context;
+
+    /* Need to init this vcpu before loading its contents */
+    LOCK_BIGLOCK(d);
+    if ( !v->is_initialised )
+        if ( (rc = boot_vcpu(d, vcpuid, vc)) != 0 )
+            return rc;
+    UNLOCK_BIGLOCK(d);
 
     if ( hvm_load_entry(CPU, h, &ctxt) != 0 ) 
         return -EINVAL;
 
+    /* Architecture-specific vmcs/vmcb bits */
     if ( hvm_funcs.load_cpu_ctxt(v, &ctxt) < 0 )
         return -EINVAL;
+
+    /* Other vcpu register state */
+    memcpy(&vc->fpu_ctxt, ctxt.fpu_regs, sizeof(ctxt.fpu_regs));
+    vc->user_regs.eax = ctxt.rax;
+    vc->user_regs.ebx = ctxt.rbx;
+    vc->user_regs.ecx = ctxt.rcx;
+    vc->user_regs.edx = ctxt.rdx;
+    vc->user_regs.ebp = ctxt.rbp;
+    vc->user_regs.esi = ctxt.rsi;
+    vc->user_regs.edi = ctxt.rdi;
+    vc->user_regs.esp = ctxt.rsp;
+#ifdef __x86_64__
+    vc->user_regs.r8  = ctxt.r8; 
+    vc->user_regs.r9  = ctxt.r9; 
+    vc->user_regs.r10 = ctxt.r10;
+    vc->user_regs.r11 = ctxt.r11;
+    vc->user_regs.r12 = ctxt.r12;
+    vc->user_regs.r13 = ctxt.r13;
+    vc->user_regs.r14 = ctxt.r14;
+    vc->user_regs.r15 = ctxt.r15;
+#endif
+    vc->debugreg[0] = ctxt.dr0;
+    vc->debugreg[1] = ctxt.dr1;
+    vc->debugreg[2] = ctxt.dr2;
+    vc->debugreg[3] = ctxt.dr3;
+    vc->debugreg[6] = ctxt.dr6;
+    vc->debugreg[7] = ctxt.dr7;
+
+    vc->flags = VGCF_i387_valid | VGCF_online;
+    v->fpu_initialised = 1;
 
     /* Auxiliary processors should be woken immediately. */
     if ( test_and_clear_bit(_VPF_down, &v->pause_flags) )
diff -r 5bda20f0723d -r f92a79e39da8 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Thu Apr 12 16:37:32 2007 -0500
+++ b/xen/arch/x86/hvm/svm/svm.c        Fri Apr 13 11:14:26 2007 +0100
@@ -233,7 +233,7 @@ int svm_vmcb_save(struct vcpu *v, struct
 {
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
 
-    c->eip = vmcb->rip;
+    c->rip = vmcb->rip;
 
 #ifdef HVM_DEBUG_SUSPEND
     printk("%s: eip=0x%"PRIx64".\n", 
@@ -241,10 +241,11 @@ int svm_vmcb_save(struct vcpu *v, struct
            inst_len, c->eip);
 #endif
 
-    c->esp = vmcb->rsp;
-    c->eflags = vmcb->rflags;
+    c->rsp = vmcb->rsp;
+    c->rflags = vmcb->rflags;
 
     c->cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
+    c->cr2 = v->arch.hvm_svm.cpu_cr2;
     c->cr3 = v->arch.hvm_svm.cpu_cr3;
     c->cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
 
@@ -315,14 +316,16 @@ int svm_vmcb_restore(struct vcpu *v, str
     unsigned long mfn, old_base_mfn;
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
 
-    vmcb->rip    = c->eip;
-    vmcb->rsp    = c->esp;
-    vmcb->rflags = c->eflags;
+    vmcb->rip    = c->rip;
+    vmcb->rsp    = c->rsp;
+    vmcb->rflags = c->rflags;
 
     v->arch.hvm_svm.cpu_shadow_cr0 = c->cr0;
     vmcb->cr0 = c->cr0 | X86_CR0_WP | X86_CR0_ET;
     if ( !paging_mode_hap(v->domain) ) 
         vmcb->cr0 |= X86_CR0_PG;
+
+    v->arch.hvm_svm.cpu_cr2 = c->cr2;
 
 #ifdef HVM_DEBUG_SUSPEND
     printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
@@ -421,6 +424,9 @@ int svm_vmcb_restore(struct vcpu *v, str
     vmcb->sysenter_esp = c->sysenter_esp;
     vmcb->sysenter_eip = c->sysenter_eip;
 
+    vmcb->dr6 = c->dr6;
+    vmcb->dr7 = c->dr7;
+
     paging_update_paging_modes(v);
     return 0;
  
@@ -440,6 +446,7 @@ void svm_save_cpu_state(struct vcpu *v, 
     data->msr_cstar        = vmcb->cstar;
     data->msr_syscall_mask = vmcb->sfmask;
     data->msr_efer         = v->arch.hvm_svm.cpu_shadow_efer;
+    data->msr_flags        = -1ULL;
 
     data->tsc = hvm_get_guest_time(v);
 }
diff -r 5bda20f0723d -r f92a79e39da8 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Thu Apr 12 16:37:32 2007 -0500
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Fri Apr 13 11:14:26 2007 +0100
@@ -370,11 +370,12 @@ static inline void __restore_debug_regis
 
 int vmx_vmcs_save(struct vcpu *v, struct hvm_hw_cpu *c)
 {    
-    c->eip = __vmread(GUEST_RIP);
-    c->esp = __vmread(GUEST_RSP);
-    c->eflags = __vmread(GUEST_RFLAGS);
+    c->rip = __vmread(GUEST_RIP);
+    c->rsp = __vmread(GUEST_RSP);
+    c->rflags = __vmread(GUEST_RFLAGS);
 
     c->cr0 = v->arch.hvm_vmx.cpu_shadow_cr0;
+    c->cr2 = v->arch.hvm_vmx.cpu_cr2;
     c->cr3 = v->arch.hvm_vmx.cpu_cr3;
     c->cr4 = v->arch.hvm_vmx.cpu_shadow_cr4;
 
@@ -444,12 +445,14 @@ int vmx_vmcs_restore(struct vcpu *v, str
 
     vmx_vmcs_enter(v);
 
-    __vmwrite(GUEST_RIP, c->eip);
-    __vmwrite(GUEST_RSP, c->esp);
-    __vmwrite(GUEST_RFLAGS, c->eflags);
+    __vmwrite(GUEST_RIP, c->rip);
+    __vmwrite(GUEST_RSP, c->rsp);
+    __vmwrite(GUEST_RFLAGS, c->rflags);
 
     v->arch.hvm_vmx.cpu_shadow_cr0 = c->cr0;
     __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
+
+    v->arch.hvm_vmx.cpu_cr2 = c->cr2;
 
 #ifdef HVM_DEBUG_SUSPEND
     printk("vmx_vmcs_restore: cr3=0x%"PRIx64", cr0=0x%"PRIx64", 
cr4=0x%"PRIx64".\n",
@@ -555,6 +558,8 @@ int vmx_vmcs_restore(struct vcpu *v, str
     __vmwrite(GUEST_SYSENTER_ESP, c->sysenter_esp);
     __vmwrite(GUEST_SYSENTER_EIP, c->sysenter_eip);
 
+    __vmwrite(GUEST_DR7, c->dr7);
+
     vmx_vmcs_exit(v);
 
     paging_update_paging_modes(v);
@@ -590,7 +595,7 @@ void vmx_save_cpu_state(struct vcpu *v, 
     data->shadow_gs = guest_state->shadow_gs;
 
     /* save msrs */
-    data->flags = guest_flags;
+    data->msr_flags        = guest_flags;
     data->msr_lstar        = guest_state->msrs[VMX_INDEX_MSR_LSTAR];
     data->msr_star         = guest_state->msrs[VMX_INDEX_MSR_STAR];
     data->msr_cstar        = guest_state->msrs[VMX_INDEX_MSR_CSTAR];
@@ -607,7 +612,7 @@ void vmx_load_cpu_state(struct vcpu *v, 
     struct vmx_msr_state *guest_state = &v->arch.hvm_vmx.msr_state;
 
     /* restore msrs */
-    guest_state->flags = data->flags;
+    guest_state->flags = data->msr_flags;
     guest_state->msrs[VMX_INDEX_MSR_LSTAR]        = data->msr_lstar;
     guest_state->msrs[VMX_INDEX_MSR_STAR]         = data->msr_star;
     guest_state->msrs[VMX_INDEX_MSR_CSTAR]        = data->msr_cstar;
diff -r 5bda20f0723d -r f92a79e39da8 xen/include/public/hvm/save.h
--- a/xen/include/public/hvm/save.h     Thu Apr 12 16:37:32 2007 -0500
+++ b/xen/include/public/hvm/save.h     Fri Apr 13 11:14:26 2007 +0100
@@ -87,12 +87,39 @@ DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct 
  */
 
 struct hvm_hw_cpu {
-    uint64_t eip;
-    uint64_t esp;
-    uint64_t eflags;
+    uint8_t  fpu_regs[512];
+
+    uint64_t rax;
+    uint64_t rbx;
+    uint64_t rcx;
+    uint64_t rdx;
+    uint64_t rbp;
+    uint64_t rsi;
+    uint64_t rdi;
+    uint64_t rsp;
+    uint64_t r8;
+    uint64_t r9;
+    uint64_t r10;
+    uint64_t r11;
+    uint64_t r12;
+    uint64_t r13;
+    uint64_t r14;
+    uint64_t r15;
+
+    uint64_t rip;
+    uint64_t rflags;
+
     uint64_t cr0;
+    uint64_t cr2;
     uint64_t cr3;
     uint64_t cr4;
+
+    uint64_t dr0;
+    uint64_t dr1;
+    uint64_t dr2;
+    uint64_t dr3;
+    uint64_t dr6;
+    uint64_t dr7;    
 
     uint32_t cs_sel;
     uint32_t ds_sel;
@@ -142,9 +169,9 @@ struct hvm_hw_cpu {
 
     /* msr for em64t */
     uint64_t shadow_gs;
-    uint64_t flags;
 
     /* msr content saved/restored. */
+    uint64_t msr_flags;
     uint64_t msr_lstar;
     uint64_t msr_star;
     uint64_t msr_cstar;

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.