[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] Merge with xenppc-unstable-merge.hg
# HG changeset patch # User kfraser@xxxxxxxxxxxxxxxxxxxxx # Date 1176459266 -3600 # Node ID f92a79e39da872c5632a8490ebb97e2e1fcbca28 # Parent 5bda20f0723daea6c4390eaa77f7860ec0cd67a7 # Parent fdbbc6aa2cbf230fbe0341a04d78dc1d55fb3244 Merge with xenppc-unstable-merge.hg --- tools/libxc/xc_hvm_save.c | 755 ---- tools/libxc/xc_linux_save.c | 1414 -------- README | 4 docs/xen-api/xenapi-datamodel.tex | 173 + linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c | 2 linux-2.6-xen-sparse/arch/ia64/kernel/setup.c | 4 linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c | 3 linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c | 36 linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S | 6 linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S | 63 linux-2.6-xen-sparse/drivers/xen/core/reboot.c | 3 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h | 2 linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h | 2 linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h | 1 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h | 2 tools/examples/network-bridge | 19 tools/examples/xmexample.hvm | 6 tools/examples/xmexample.vti | 5 tools/ioemu/hw/pc.c | 12 tools/ioemu/monitor.c | 2 tools/ioemu/vl.c | 10 tools/ioemu/vl.h | 5 tools/ioemu/xenstore.c | 220 - tools/libfsimage/fat/fat.h | 14 tools/libxc/Makefile | 4 tools/libxc/ia64/xc_ia64_linux_save.c | 6 tools/libxc/xc_domain.c | 39 tools/libxc/xc_domain_restore.c | 41 tools/libxc/xc_domain_save.c | 1587 ++++++++++ tools/libxc/xc_hvm_build.c | 41 tools/libxc/xc_resume.c | 65 tools/libxc/xenctrl.h | 3 tools/libxc/xenguest.h | 24 tools/libxc/xg_private.c | 23 tools/libxen/include/xen_host_cpu.h | 8 tools/libxen/include/xen_vm.h | 22 tools/libxen/include/xen_vm_metrics.h | 9 tools/libxen/src/xen_host_cpu.c | 21 tools/libxen/src/xen_vm.c | 50 tools/libxen/src/xen_vm_metrics.c | 21 tools/pygrub/src/LiloConf.py | 147 tools/pygrub/src/pygrub | 32 tools/python/README.XendConfig | 1 tools/python/README.sxpcfg | 1 tools/python/xen/xend/XendConfig.py | 2 tools/python/xen/xend/XendDomainInfo.py | 1 tools/python/xen/xend/XendNode.py | 2 tools/python/xen/xend/image.py | 95 tools/python/xen/xm/create.dtd | 2 tools/python/xen/xm/create.py | 4 tools/python/xen/xm/main.py | 75 tools/python/xen/xm/xenapi_create.py | 30 tools/xcutils/xc_save.c | 9 tools/xm-test/lib/XmTestLib/NetConfig.py | 6 unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h | 5 unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c | 34 xen/arch/ia64/xen/hyperprivop.S | 14 xen/arch/ia64/xen/mm.c | 2 xen/arch/x86/hvm/hvm.c | 76 xen/arch/x86/hvm/svm/svm.c | 19 xen/arch/x86/hvm/vmx/vmx.c | 21 xen/include/public/hvm/save.h | 35 62 files changed, 2718 insertions(+), 2622 deletions(-) diff -r 5bda20f0723d -r f92a79e39da8 README --- a/README Thu Apr 12 16:37:32 2007 -0500 +++ b/README Fri Apr 13 11:14:26 2007 +0100 @@ -199,3 +199,7 @@ Xend (the Xen daemon) has the following * For optional PAM support, PyPAM: URL: http://www.pangalactic.org/PyPAM/ Debian: python-pam + + * For optional XenAPI support in XM, PyXML: + URL: http://pyxml.sourceforge.net + YUM: PyXML diff -r 5bda20f0723d -r f92a79e39da8 docs/xen-api/xenapi-datamodel.tex --- a/docs/xen-api/xenapi-datamodel.tex Thu Apr 12 16:37:32 2007 -0500 +++ b/docs/xen-api/xenapi-datamodel.tex Fri Apr 13 11:14:26 2007 +0100 @@ -1558,6 +1558,111 @@ void \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} +\subsubsection{RPC name:~add\_to\_VCPUs\_params\_live} + +{\bf Overview:} +Add the given key-value pair to VM.VCPUs\_params, and apply that value on +the running VM. + + \noindent {\bf Signature:} +\begin{verbatim} void add_to_VCPUs_params_live (session_id s, VM ref self, string key, string value)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt VM ref } & self & The VM \\ \hline + +{\tt string } & key & The key \\ \hline + +{\tt string } & value & The value \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +void +} + + + +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~set\_memory\_dynamic\_max\_live} + +{\bf Overview:} +Set memory\_dynamic\_max in database and on running VM. + + \noindent {\bf Signature:} +\begin{verbatim} void set_memory_dynamic_max_live (session_id s, VM ref self, int max)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt VM ref } & self & The VM \\ \hline + +{\tt int } & max & The memory\_dynamic\_max value \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +void +} + + + +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~set\_memory\_dynamic\_min\_live} + +{\bf Overview:} +Set memory\_dynamic\_min in database and on running VM. + + \noindent {\bf Signature:} +\begin{verbatim} void set_memory_dynamic_min_live (session_id s, VM ref self, int min)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt VM ref } & self & The VM \\ \hline + +{\tt int } & min & The memory\_dynamic\_min value \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +void +} + + + +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} \subsubsection{RPC name:~send\_sysrq} {\bf Overview:} @@ -4184,6 +4289,7 @@ Quals & Field & Type & Description \\ $\mathit{RO}_\mathit{run}$ & {\tt VCPUs/utilisation} & (int $\rightarrow$ float) Map & Utilisation for all of guest's current VCPUs \\ $\mathit{RO}_\mathit{run}$ & {\tt VCPUs/CPU} & (int $\rightarrow$ int) Map & VCPU to PCPU map \\ $\mathit{RO}_\mathit{run}$ & {\tt VCPUs/params} & (string $\rightarrow$ string) Map & The live equivalent to VM.VCPUs\_params \\ +$\mathit{RO}_\mathit{run}$ & {\tt state} & string Set & The state of the guest, eg blocked, dying etc \\ $\mathit{RO}_\mathit{run}$ & {\tt start\_time} & datetime & Time at which this VM was last booted \\ $\mathit{RO}_\mathit{run}$ & {\tt last\_updated} & datetime & Time at which this information was last updated \\ \hline @@ -4395,6 +4501,38 @@ Get the VCPUs/params field of the given \noindent {\bf Return Type:} {\tt (string $\rightarrow$ string) Map +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_state} + +{\bf Overview:} +Get the state field of the given VM\_metrics. + + \noindent {\bf Signature:} +\begin{verbatim} (string Set) get_state (session_id s, VM_metrics ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt VM\_metrics ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +string Set } @@ -6601,7 +6739,8 @@ Quals & Field & Type & Description \\ $\mathit{RO}_\mathit{run}$ & {\tt speed} & int & the speed of the physical CPU \\ $\mathit{RO}_\mathit{run}$ & {\tt modelname} & string & the model name of the physical CPU \\ $\mathit{RO}_\mathit{run}$ & {\tt stepping} & string & the stepping of the physical CPU \\ -$\mathit{RO}_\mathit{run}$ & {\tt flags} & string & the flags of the physical CPU \\ +$\mathit{RO}_\mathit{run}$ & {\tt flags} & string & the flags of the physical CPU (a decoded version of the features field) \\ +$\mathit{RO}_\mathit{run}$ & {\tt features} & string & the physical CPU feature bitmap \\ $\mathit{RO}_\mathit{run}$ & {\tt utilisation} & float & the current CPU utilisation \\ \hline \end{longtable} @@ -6858,6 +6997,38 @@ Get the flags field of the given host\_c \noindent {\bf Signature:} \begin{verbatim} string get_flags (session_id s, host_cpu ref self)\end{verbatim} + + +\noindent{\bf Arguments:} + + +\vspace{0.3cm} +\begin{tabular}{|c|c|p{7cm}|} + \hline +{\bf type} & {\bf name} & {\bf description} \\ \hline +{\tt host\_cpu ref } & self & reference to the object \\ \hline + +\end{tabular} + +\vspace{0.3cm} + + \noindent {\bf Return Type:} +{\tt +string +} + + +value of the field +\vspace{0.3cm} +\vspace{0.3cm} +\vspace{0.3cm} +\subsubsection{RPC name:~get\_features} + +{\bf Overview:} +Get the features field of the given host\_cpu. + + \noindent {\bf Signature:} +\begin{verbatim} string get_features (session_id s, host_cpu ref self)\end{verbatim} \noindent{\bf Arguments:} diff -r 5bda20f0723d -r f92a79e39da8 linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c --- a/linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c Thu Apr 12 16:37:32 2007 -0500 +++ b/linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c Fri Apr 13 11:14:26 2007 +0100 @@ -290,5 +290,7 @@ void foo(void) DEFINE_MAPPED_REG_OFS(XSI_BANKNUM_OFS, banknum); DEFINE_MAPPED_REG_OFS(XSI_BANK0_R16_OFS, bank0_regs[0]); DEFINE_MAPPED_REG_OFS(XSI_BANK1_R16_OFS, bank1_regs[0]); + DEFINE_MAPPED_REG_OFS(XSI_B0NATS_OFS, vbnat); + DEFINE_MAPPED_REG_OFS(XSI_B1NATS_OFS, vnat); #endif /* CONFIG_XEN */ } diff -r 5bda20f0723d -r f92a79e39da8 linux-2.6-xen-sparse/arch/ia64/kernel/setup.c --- a/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c Thu Apr 12 16:37:32 2007 -0500 +++ b/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c Fri Apr 13 11:14:26 2007 +0100 @@ -594,6 +594,10 @@ setup_arch (char **cmdline_p) /* enable IA-64 Machine Check Abort Handling unless disabled */ +#ifdef CONFIG_XEN + if (is_running_on_xen() && !is_initial_xendomain()) + nomca = 1; +#endif if (!nomca) ia64_mca_init(); diff -r 5bda20f0723d -r f92a79e39da8 linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c --- a/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c Thu Apr 12 16:37:32 2007 -0500 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c Fri Apr 13 11:14:26 2007 +0100 @@ -852,6 +852,9 @@ time_resume(void) /* Just trigger a tick. */ ia64_cpu_local_tick(); + + /* Time interpolator remembers the last timer status. Forget it */ + time_interpolator_reset(); } /////////////////////////////////////////////////////////////////////////// diff -r 5bda20f0723d -r f92a79e39da8 linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c --- a/linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c Thu Apr 12 16:37:32 2007 -0500 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c Fri Apr 13 11:14:26 2007 +0100 @@ -418,3 +418,39 @@ xencomm_mini_hypercall_perfmon_op(unsign return xencomm_arch_hypercall_perfmon_op(cmd, desc, count); } EXPORT_SYMBOL_GPL(xencomm_mini_hypercall_perfmon_op); + +int +xencomm_mini_hypercall_sched_op(int cmd, void *arg) +{ + int rc, nbr_area = 2; + struct xencomm_mini xc_area[2]; + struct xencomm_handle *desc; + unsigned int argsize; + + switch (cmd) { + case SCHEDOP_yield: + case SCHEDOP_block: + argsize = 0; + break; + case SCHEDOP_shutdown: + argsize = sizeof(sched_shutdown_t); + break; + case SCHEDOP_poll: + argsize = sizeof(sched_poll_t); + break; + case SCHEDOP_remote_shutdown: + argsize = sizeof(sched_remote_shutdown_t); + break; + + default: + printk("%s: unknown sched op %d\n", __func__, cmd); + return -ENOSYS; + } + + rc = xencomm_create_mini(xc_area, &nbr_area, arg, argsize, &desc); + if (rc) + return rc; + + return xencomm_arch_hypercall_sched_op(cmd, desc); +} +EXPORT_SYMBOL_GPL(xencomm_mini_hypercall_sched_op); diff -r 5bda20f0723d -r f92a79e39da8 linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S --- a/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S Thu Apr 12 16:37:32 2007 -0500 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S Fri Apr 13 11:14:26 2007 +0100 @@ -614,6 +614,7 @@ GLOBAL_ENTRY(ia64_leave_kernel) #ifdef CONFIG_XEN ;; // r16-r31 all now hold bank1 values + mov r15=ar.unat movl r2=XSI_BANK1_R16 movl r3=XSI_BANK1_R16+8 ;; @@ -641,6 +642,11 @@ GLOBAL_ENTRY(ia64_leave_kernel) .mem.offset 0,0; st8.spill [r2]=r30,16 .mem.offset 8,0; st8.spill [r3]=r31,16 ;; + mov r3=ar.unat + movl r2=XSI_B1NAT + ;; + st8 [r2]=r3 + mov ar.unat=r15 movl r2=XSI_BANKNUM;; st4 [r2]=r0; #else diff -r 5bda20f0723d -r f92a79e39da8 linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S --- a/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S Thu Apr 12 16:37:32 2007 -0500 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S Fri Apr 13 11:14:26 2007 +0100 @@ -2013,33 +2013,6 @@ END(ia32_interrupt) DBG_FAULT(66) FAULT(66) -#ifdef CONFIG_XEN - /* - * There is no particular reason for this code to be here, other than that - * there happens to be space here that would go unused otherwise. If this - * fault ever gets "unreserved", simply moved the following code to a more - * suitable spot... - */ - -GLOBAL_ENTRY(xen_bsw1) - /* FIXME: THIS CODE IS NOT NaT SAFE! */ - movl r30=XSI_BANKNUM; - mov r31=1;; - st4 [r30]=r31; - movl r30=XSI_BANK1_R16; - movl r31=XSI_BANK1_R16+8;; - ld8 r16=[r30],16; ld8 r17=[r31],16;; - ld8 r18=[r30],16; ld8 r19=[r31],16;; - ld8 r20=[r30],16; ld8 r21=[r31],16;; - ld8 r22=[r30],16; ld8 r23=[r31],16;; - ld8 r24=[r30],16; ld8 r25=[r31],16;; - ld8 r26=[r30],16; ld8 r27=[r31],16;; - ld8 r28=[r30],16; ld8 r29=[r31],16;; - ld8 r30=[r30]; ld8 r31=[r31];; - br.ret.sptk.many b0 -END(xen_bsw1) -#endif - .org ia64_ivt+0x7f00 ///////////////////////////////////////////////////////////////////////////////////////// // 0x7f00 Entry 67 (size 16 bundles) Reserved @@ -2167,4 +2140,38 @@ 1: (p6) br.spnt.few 1b // call evtchn_do_upcall again. br.sptk.many ia64_leave_kernel END(xen_event_callback) -#endif + + + /* + * There is no particular reason for this code to be here, other than that + * there happens to be space here that would go unused otherwise. If this + * fault ever gets "unreserved", simply moved the following code to a more + * suitable spot... + */ + +GLOBAL_ENTRY(xen_bsw1) + /* FIXME: THIS CODE IS NOT NaT SAFE! */ + mov r14=ar.unat + movl r30=XSI_B1NAT + ;; + ld8 r30=[r30];; + mov ar.unat=r30 + movl r30=XSI_BANKNUM; + mov r31=1;; + st4 [r30]=r31; + movl r30=XSI_BANK1_R16; + movl r31=XSI_BANK1_R16+8;; + ld8.fill r16=[r30],16; ld8.fill r17=[r31],16;; + ld8.fill r18=[r30],16; ld8.fill r19=[r31],16;; + ld8.fill r20=[r30],16; ld8.fill r21=[r31],16;; + ld8.fill r22=[r30],16; ld8.fill r23=[r31],16;; + ld8.fill r24=[r30],16; ld8.fill r25=[r31],16;; + ld8.fill r26=[r30],16; ld8.fill r27=[r31],16;; + ld8.fill r28=[r30],16; ld8.fill r29=[r31],16;; + ld8.fill r30=[r30]; ld8.fill r31=[r31];; + mov ar.unat=r14 + br.ret.sptk.many b0 +END(xen_bsw1) + + +#endif diff -r 5bda20f0723d -r f92a79e39da8 linux-2.6-xen-sparse/drivers/xen/core/reboot.c --- a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c Thu Apr 12 16:37:32 2007 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c Fri Apr 13 11:14:26 2007 +0100 @@ -118,6 +118,7 @@ static void shutdown_handler(struct xenb err = xenbus_transaction_start(&xbt); if (err) return; + str = (char *)xenbus_read(xbt, "control", "shutdown", NULL); /* Ignore read errors and empty reads. */ if (XENBUS_IS_ERR_READ(str)) { @@ -206,14 +207,12 @@ static int setup_shutdown_watcher(void) printk(KERN_ERR "Failed to set shutdown watcher\n"); return err; } - xenbus_write(XBT_NIL, "control", "feature-reboot", "1"); err = register_xenbus_watch(&sysrq_watch); if (err) { printk(KERN_ERR "Failed to set sysrq watcher\n"); return err; } - xenbus_write(XBT_NIL, "control", "feature-sysrq", "1"); return 0; } diff -r 5bda20f0723d -r f92a79e39da8 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h Thu Apr 12 16:37:32 2007 -0500 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h Fri Apr 13 11:14:26 2007 +0100 @@ -210,7 +210,7 @@ extern unsigned long pg0[]; /* To avoid harmful races, pmd_none(x) should check only the lower when PAE */ #define pmd_none(x) (!(unsigned long)pmd_val(x)) -#ifdef CONFIG_XEN_COMPAT_030002 +#if CONFIG_XEN_COMPAT <= 0x030002 /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t. can temporarily clear it. */ #define pmd_present(x) (pmd_val(x)) diff -r 5bda20f0723d -r f92a79e39da8 linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h --- a/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h Thu Apr 12 16:37:32 2007 -0500 +++ b/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h Fri Apr 13 11:14:26 2007 +0100 @@ -64,7 +64,6 @@ extern start_info_t *xen_start_info; void force_evtchn_callback(void); -#ifndef CONFIG_VMX_GUEST /* Turn jiffies into Xen system time. XXX Implement me. */ #define jiffies_to_st(j) 0 @@ -116,6 +115,7 @@ HYPERVISOR_poll( return rc; } +#ifndef CONFIG_VMX_GUEST // for drivers/xen/privcmd/privcmd.c #define machine_to_phys_mapping 0 struct vm_area_struct; diff -r 5bda20f0723d -r f92a79e39da8 linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h --- a/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h Thu Apr 12 16:37:32 2007 -0500 +++ b/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h Fri Apr 13 11:14:26 2007 +0100 @@ -57,6 +57,7 @@ #define XSI_PSR_IC (XSI_BASE + XSI_PSR_IC_OFS) #define XSI_IPSR (XSI_BASE + XSI_IPSR_OFS) #define XSI_IIP (XSI_BASE + XSI_IIP_OFS) +#define XSI_B1NAT (XSI_BASE + XSI_B1NATS_OFS) #define XSI_BANK1_R16 (XSI_BASE + XSI_BANK1_R16_OFS) #define XSI_BANKNUM (XSI_BASE + XSI_BANKNUM_OFS) #define XSI_IHA (XSI_BASE + XSI_IHA_OFS) diff -r 5bda20f0723d -r f92a79e39da8 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h Thu Apr 12 16:37:32 2007 -0500 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h Fri Apr 13 11:14:26 2007 +0100 @@ -411,7 +411,7 @@ static inline int pmd_large(pmd_t pte) { #define pmd_offset(dir, address) ((pmd_t *) pud_page(*(dir)) + \ pmd_index(address)) #define pmd_none(x) (!pmd_val(x)) -#ifdef CONFIG_XEN_COMPAT_030002 +#if CONFIG_XEN_COMPAT <= 0x030002 /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t. can temporarily clear it. */ #define pmd_present(x) (pmd_val(x)) diff -r 5bda20f0723d -r f92a79e39da8 tools/examples/network-bridge --- a/tools/examples/network-bridge Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/examples/network-bridge Fri Apr 13 11:14:26 2007 +0100 @@ -183,12 +183,12 @@ op_start () { return fi - if ! link_exists "$vdev"; then - if link_exists "$pdev"; then - # The device is already up. - return - else - echo " + if link_exists "$pdev"; then + # The device is already up. + return + fi + if link_exists veth0 && ! link_exists "$vdev"; then + echo " Link $vdev is missing. This may be because you have reached the limit of the number of interfaces that the loopback driver supports. If the loopback driver is a module, you @@ -196,8 +196,7 @@ driver is compiled statically into the k driver is compiled statically into the kernel, then you may set the parameter using netloop.nloopbacks=<N> on the domain 0 kernel command line. " >&2 - exit 1 - fi + exit 1 fi create_bridge ${bridge} @@ -224,9 +223,13 @@ using netloop.nloopbacks=<N> on the doma add_to_bridge2 ${bridge} ${pdev} do_ifup ${netdev} else + ip link set ${bridge} arp on + ip link set ${bridge} multicast on # old style without ${vdev} transfer_addrs ${netdev} ${bridge} transfer_routes ${netdev} ${bridge} + # Attach the real interface to the bridge. + add_to_bridge ${bridge} ${netdev} fi if [ ${antispoof} = 'yes' ] ; then diff -r 5bda20f0723d -r f92a79e39da8 tools/examples/xmexample.hvm --- a/tools/examples/xmexample.hvm Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/examples/xmexample.hvm Fri Apr 13 11:14:26 2007 +0100 @@ -170,6 +170,12 @@ serial='pty' #----------------------------------------------------------------------------- +# Qemu Monitor, default is disable +# Use ctrl-alt-2 to connect +#monitor=1 + + +#----------------------------------------------------------------------------- # enable sound card support, [sb16|es1370|all|..,..], default none #soundhw='sb16' diff -r 5bda20f0723d -r f92a79e39da8 tools/examples/xmexample.vti --- a/tools/examples/xmexample.vti Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/examples/xmexample.vti Fri Apr 13 11:14:26 2007 +0100 @@ -113,6 +113,11 @@ serial='pty' serial='pty' #----------------------------------------------------------------------------- +# Qemu Monitor, default is disable +# Use ctrl-alt-2 to connect +#monitor=1 + +#----------------------------------------------------------------------------- # enable sound card support, [sb16|es1370|all|..,..], default none #soundhw='sb16' diff -r 5bda20f0723d -r f92a79e39da8 tools/ioemu/hw/pc.c --- a/tools/ioemu/hw/pc.c Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/ioemu/hw/pc.c Fri Apr 13 11:14:26 2007 +0100 @@ -902,7 +902,6 @@ static void pc_init1(uint64_t ram_size, if (pci_enabled && acpi_enabled) { piix4_pm_init(pci_bus, piix3_devfn + 3); } -#endif /* !CONFIG_DM */ #if 0 /* ??? Need to figure out some way for the user to @@ -921,6 +920,17 @@ static void pc_init1(uint64_t ram_size, lsi_scsi_attach(scsi, bdrv, -1); } #endif +#else + if (pci_enabled) { + void *scsi; + + scsi = lsi_scsi_init(pci_bus, -1); + for (i = 0; i < MAX_SCSI_DISKS ; i++) { + if (bs_table[i + MAX_DISKS]) + lsi_scsi_attach(scsi, bs_table[i + MAX_DISKS], -1); + } + } +#endif /* !CONFIG_DM */ /* must be done after all PCI devices are instanciated */ /* XXX: should be done in the Bochs BIOS */ if (pci_enabled) { diff -r 5bda20f0723d -r f92a79e39da8 tools/ioemu/monitor.c --- a/tools/ioemu/monitor.c Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/ioemu/monitor.c Fri Apr 13 11:14:26 2007 +0100 @@ -180,7 +180,7 @@ static void do_commit(void) { int i; - for (i = 0; i < MAX_DISKS; i++) { + for (i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++) { if (bs_table[i]) { bdrv_commit(bs_table[i]); } diff -r 5bda20f0723d -r f92a79e39da8 tools/ioemu/vl.c --- a/tools/ioemu/vl.c Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/ioemu/vl.c Fri Apr 13 11:14:26 2007 +0100 @@ -116,7 +116,7 @@ void *ioport_opaque[MAX_IOPORTS]; void *ioport_opaque[MAX_IOPORTS]; IOPortReadFunc *ioport_read_table[3][MAX_IOPORTS]; IOPortWriteFunc *ioport_write_table[3][MAX_IOPORTS]; -BlockDriverState *bs_table[MAX_DISKS], *fd_table[MAX_FD]; +BlockDriverState *bs_table[MAX_DISKS + MAX_SCSI_DISKS], *fd_table[MAX_FD]; int vga_ram_size; int bios_size; static DisplayState display_state; @@ -1396,7 +1396,7 @@ static void stdio_received_byte(int ch) case 's': { int i; - for (i = 0; i < MAX_DISKS; i++) { + for (i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++) { if (bs_table[i]) bdrv_commit(bs_table[i]); } @@ -6057,7 +6057,7 @@ int main(int argc, char **argv) int snapshot, linux_boot; const char *initrd_filename; #ifndef CONFIG_DM - const char *hd_filename[MAX_DISKS]; + const char *hd_filename[MAX_DISKS + MAX_SCSI_DISKS]; #endif /* !CONFIG_DM */ const char *fd_filename[MAX_FD]; const char *kernel_filename, *kernel_cmdline; @@ -6126,7 +6126,7 @@ int main(int argc, char **argv) for(i = 0; i < MAX_FD; i++) fd_filename[i] = NULL; #ifndef CONFIG_DM - for(i = 0; i < MAX_DISKS; i++) + for(i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++) hd_filename[i] = NULL; #endif /* !CONFIG_DM */ ram_size = DEFAULT_RAM_SIZE * 1024 * 1024; @@ -6724,7 +6724,7 @@ int main(int argc, char **argv) } /* open the virtual block devices */ - for(i = 0; i < MAX_DISKS; i++) { + for(i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++) { if (hd_filename[i]) { if (!bs_table[i]) { char buf[64]; diff -r 5bda20f0723d -r f92a79e39da8 tools/ioemu/vl.h --- a/tools/ioemu/vl.h Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/ioemu/vl.h Fri Apr 13 11:14:26 2007 +0100 @@ -818,8 +818,9 @@ int vnc_start_viewer(int port); /* ide.c */ #define MAX_DISKS 4 - -extern BlockDriverState *bs_table[MAX_DISKS]; +#define MAX_SCSI_DISKS 7 + +extern BlockDriverState *bs_table[MAX_DISKS + MAX_SCSI_DISKS]; void isa_ide_init(int iobase, int iobase2, int irq, BlockDriverState *hd0, BlockDriverState *hd1); diff -r 5bda20f0723d -r f92a79e39da8 tools/ioemu/xenstore.c --- a/tools/ioemu/xenstore.c Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/ioemu/xenstore.c Fri Apr 13 11:14:26 2007 +0100 @@ -18,7 +18,7 @@ #include <fcntl.h> static struct xs_handle *xsh = NULL; -static char *media_filename[MAX_DISKS]; +static char *media_filename[MAX_DISKS + MAX_SCSI_DISKS]; static QEMUTimer *insert_timer = NULL; #define UWAIT_MAX (30*1000000) /* thirty seconds */ @@ -30,11 +30,11 @@ static int pasprintf(char **buf, const c int ret = 0; if (*buf) - free(*buf); + free(*buf); va_start(ap, fmt); if (vasprintf(buf, fmt, ap) == -1) { - buf = NULL; - ret = -1; + buf = NULL; + ret = -1; } va_end(ap); return ret; @@ -44,12 +44,12 @@ static void insert_media(void *opaque) { int i; - for (i = 0; i < MAX_DISKS; i++) { - if (media_filename[i] && bs_table[i]) { - do_change(bs_table[i]->device_name, media_filename[i]); - free(media_filename[i]); - media_filename[i] = NULL; - } + for (i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++) { + if (media_filename[i] && bs_table[i]) { + do_change(bs_table[i]->device_name, media_filename[i]); + free(media_filename[i]); + media_filename[i] = NULL; + } } } @@ -57,7 +57,7 @@ void xenstore_check_new_media_present(in { if (insert_timer == NULL) - insert_timer = qemu_new_timer(rt_clock, insert_media, NULL); + insert_timer = qemu_new_timer(rt_clock, insert_media, NULL); qemu_mod_timer(insert_timer, qemu_get_clock(rt_clock) + timeout); } @@ -82,17 +82,17 @@ void xenstore_parse_domain_config(int do char **e = NULL; char *buf = NULL, *path; char *fpath = NULL, *bpath = NULL, - *dev = NULL, *params = NULL, *type = NULL; - int i; + *dev = NULL, *params = NULL, *type = NULL; + int i, is_scsi; unsigned int len, num, hd_index; - for(i = 0; i < MAX_DISKS; i++) + for(i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++) media_filename[i] = NULL; xsh = xs_daemon_open(); if (xsh == NULL) { - fprintf(logfile, "Could not contact xenstore for domain config\n"); - return; + fprintf(logfile, "Could not contact xenstore for domain config\n"); + return; } path = xs_get_domain_path(xsh, domid); @@ -102,59 +102,60 @@ void xenstore_parse_domain_config(int do } if (pasprintf(&buf, "%s/device/vbd", path) == -1) - goto out; + goto out; e = xs_directory(xsh, XBT_NULL, buf, &num); if (e == NULL) - goto out; + goto out; for (i = 0; i < num; i++) { - /* read the backend path */ - if (pasprintf(&buf, "%s/device/vbd/%s/backend", path, e[i]) == -1) - continue; - free(bpath); + /* read the backend path */ + if (pasprintf(&buf, "%s/device/vbd/%s/backend", path, e[i]) == -1) + continue; + free(bpath); bpath = xs_read(xsh, XBT_NULL, buf, &len); - if (bpath == NULL) - continue; - /* read the name of the device */ - if (pasprintf(&buf, "%s/dev", bpath) == -1) - continue; - free(dev); - dev = xs_read(xsh, XBT_NULL, buf, &len); - if (dev == NULL) - continue; - if (strncmp(dev, "hd", 2) || strlen(dev) != 3) - continue; - hd_index = dev[2] - 'a'; - if (hd_index >= MAX_DISKS) - continue; - /* read the type of the device */ - if (pasprintf(&buf, "%s/device/vbd/%s/device-type", path, e[i]) == -1) - continue; - free(type); - type = xs_read(xsh, XBT_NULL, buf, &len); - if (pasprintf(&buf, "%s/params", bpath) == -1) - continue; - free(params); - params = xs_read(xsh, XBT_NULL, buf, &len); - if (params == NULL) - continue; + if (bpath == NULL) + continue; + /* read the name of the device */ + if (pasprintf(&buf, "%s/dev", bpath) == -1) + continue; + free(dev); + dev = xs_read(xsh, XBT_NULL, buf, &len); + if (dev == NULL) + continue; + is_scsi = !strncmp(dev, "sd", 2); + if ((strncmp(dev, "hd", 2) && !is_scsi) || strlen(dev) != 3 ) + continue; + hd_index = dev[2] - 'a'; + if (hd_index >= (is_scsi ? MAX_SCSI_DISKS : MAX_DISKS)) + continue; + /* read the type of the device */ + if (pasprintf(&buf, "%s/device/vbd/%s/device-type", path, e[i]) == -1) + continue; + free(type); + type = xs_read(xsh, XBT_NULL, buf, &len); + if (pasprintf(&buf, "%s/params", bpath) == -1) + continue; + free(params); + params = xs_read(xsh, XBT_NULL, buf, &len); + if (params == NULL) + continue; /* * check if device has a phantom vbd; the phantom is hooked * to the frontend device (for ease of cleanup), so lookup * the frontend device, and see if there is a phantom_vbd * if there is, we will use resolution as the filename */ - if (pasprintf(&buf, "%s/device/vbd/%s/phantom_vbd", path, e[i]) == -1) - continue; - free(fpath); + if (pasprintf(&buf, "%s/device/vbd/%s/phantom_vbd", path, e[i]) == -1) + continue; + free(fpath); fpath = xs_read(xsh, XBT_NULL, buf, &len); - if (fpath) { - if (pasprintf(&buf, "%s/dev", fpath) == -1) - continue; - free(params); + if (fpath) { + if (pasprintf(&buf, "%s/dev", fpath) == -1) + continue; + free(params); params = xs_read(xsh, XBT_NULL, buf , &len); - if (params) { + if (params) { /* * wait for device, on timeout silently fail because we will * fail to open below @@ -163,19 +164,20 @@ void xenstore_parse_domain_config(int do } } - bs_table[hd_index] = bdrv_new(dev); - /* check if it is a cdrom */ - if (type && !strcmp(type, "cdrom")) { - bdrv_set_type_hint(bs_table[hd_index], BDRV_TYPE_CDROM); - if (pasprintf(&buf, "%s/params", bpath) != -1) - xs_watch(xsh, buf, dev); - } - /* open device now if media present */ - if (params[0]) { - if (bdrv_open(bs_table[hd_index], params, 0 /* snapshot */) < 0) + bs_table[hd_index + (is_scsi ? MAX_DISKS : 0)] = bdrv_new(dev); + /* check if it is a cdrom */ + if (type && !strcmp(type, "cdrom")) { + bdrv_set_type_hint(bs_table[hd_index], BDRV_TYPE_CDROM); + if (pasprintf(&buf, "%s/params", bpath) != -1) + xs_watch(xsh, buf, dev); + } + /* open device now if media present */ + if (params[0]) { + if (bdrv_open(bs_table[hd_index + (is_scsi ? MAX_DISKS : 0)], + params, 0 /* snapshot */) < 0) fprintf(stderr, "qemu: could not open hard disk image '%s'\n", params); - } + } } /* Set a watch for log-dirty requests from the migration tools */ @@ -199,7 +201,7 @@ int xenstore_fd(void) int xenstore_fd(void) { if (xsh) - return xs_fileno(xsh); + return xs_fileno(xsh); return -1; } @@ -316,7 +318,7 @@ void xenstore_process_event(void *opaque vec = xs_read_watch(xsh, &num); if (!vec) - return; + return; if (!strcmp(vec[XS_WATCH_TOKEN], "logdirty")) { xenstore_process_logdirty_event(); @@ -324,23 +326,23 @@ void xenstore_process_event(void *opaque } if (strncmp(vec[XS_WATCH_TOKEN], "hd", 2) || - strlen(vec[XS_WATCH_TOKEN]) != 3) - goto out; + strlen(vec[XS_WATCH_TOKEN]) != 3) + goto out; hd_index = vec[XS_WATCH_TOKEN][2] - 'a'; image = xs_read(xsh, XBT_NULL, vec[XS_WATCH_PATH], &len); if (image == NULL || !strcmp(image, bs_table[hd_index]->filename)) - goto out; /* gone or identical */ + goto out; /* gone or identical */ do_eject(0, vec[XS_WATCH_TOKEN]); bs_table[hd_index]->filename[0] = 0; if (media_filename[hd_index]) { - free(media_filename[hd_index]); - media_filename[hd_index] = NULL; + free(media_filename[hd_index]); + media_filename[hd_index] = NULL; } if (image[0]) { - media_filename[hd_index] = strdup(image); - xenstore_check_new_media_present(5000); + media_filename[hd_index] = strdup(image); + xenstore_check_new_media_present(5000); } out: @@ -354,7 +356,7 @@ void xenstore_write_vncport(int display) char *portstr = NULL; if (xsh == NULL) - return; + return; path = xs_get_domain_path(xsh, domid); if (path == NULL) { @@ -363,10 +365,10 @@ void xenstore_write_vncport(int display) } if (pasprintf(&buf, "%s/console/vnc-port", path) == -1) - goto out; + goto out; if (pasprintf(&portstr, "%d", 5900 + display) == -1) - goto out; + goto out; if (xs_write(xsh, XBT_NULL, buf, portstr, strlen(portstr)) == 0) fprintf(logfile, "xs_write() vncport failed\n"); @@ -383,41 +385,41 @@ int xenstore_read_vncpasswd(int domid) unsigned int i, len, rc = 0; if (xsh == NULL) { - return -1; + return -1; } path = xs_get_domain_path(xsh, domid); if (path == NULL) { - fprintf(logfile, "xs_get_domain_path() error. domid %d.\n", domid); - return -1; + fprintf(logfile, "xs_get_domain_path() error. domid %d.\n", domid); + return -1; } pasprintf(&buf, "%s/vm", path); uuid = xs_read(xsh, XBT_NULL, buf, &len); if (uuid == NULL) { - fprintf(logfile, "xs_read(): uuid get error. %s.\n", buf); - free(path); - return -1; + fprintf(logfile, "xs_read(): uuid get error. %s.\n", buf); + free(path); + return -1; } pasprintf(&buf, "%s/vncpasswd", uuid); passwd = xs_read(xsh, XBT_NULL, buf, &len); if (passwd == NULL) { - fprintf(logfile, "xs_read(): vncpasswd get error. %s.\n", buf); - free(uuid); - free(path); - return rc; + fprintf(logfile, "xs_read(): vncpasswd get error. %s.\n", buf); + free(uuid); + free(path); + return rc; } for (i=0; i<len && i<63; i++) { - vncpasswd[i] = passwd[i]; - passwd[i] = '\0'; + vncpasswd[i] = passwd[i]; + passwd[i] = '\0'; } vncpasswd[len] = '\0'; pasprintf(&buf, "%s/vncpasswd", uuid); if (xs_write(xsh, XBT_NULL, buf, passwd, len) == 0) { - fprintf(logfile, "xs_write() vncpasswd failed.\n"); - rc = -1; + fprintf(logfile, "xs_write() vncpasswd failed.\n"); + rc = -1; } free(passwd); @@ -443,7 +445,7 @@ char **xenstore_domain_get_devices(struc goto out; if (pasprintf(&buf, "%s/device/%s", path,devtype) == -1) - goto out; + goto out; e = xs_directory(handle, XBT_NULL, buf, num); @@ -496,13 +498,13 @@ char *xenstore_backend_read_variable(str buf = get_device_variable_path(devtype, inst, var); if (NULL == buf) - goto out; + goto out; value = xs_read(handle, XBT_NULL, buf, &len); free(buf); -out: + out: return value; } @@ -569,27 +571,27 @@ char *xenstore_vm_read(int domid, char * char *buf = NULL, *path = NULL, *value = NULL; if (xsh == NULL) - goto out; + goto out; path = xs_get_domain_path(xsh, domid); if (path == NULL) { - fprintf(logfile, "xs_get_domain_path(%d): error\n", domid); - goto out; + fprintf(logfile, "xs_get_domain_path(%d): error\n", domid); + goto out; } pasprintf(&buf, "%s/vm", path); free(path); path = xs_read(xsh, XBT_NULL, buf, NULL); if (path == NULL) { - fprintf(logfile, "xs_read(%s): read error\n", buf); - goto out; + fprintf(logfile, "xs_read(%s): read error\n", buf); + goto out; } pasprintf(&buf, "%s/%s", path, key); value = xs_read(xsh, XBT_NULL, buf, len); if (value == NULL) { - fprintf(logfile, "xs_read(%s): read error\n", buf); - goto out; + fprintf(logfile, "xs_read(%s): read error\n", buf); + goto out; } out: @@ -604,27 +606,27 @@ int xenstore_vm_write(int domid, char *k int rc = -1; if (xsh == NULL) - goto out; + goto out; path = xs_get_domain_path(xsh, domid); if (path == NULL) { - fprintf(logfile, "xs_get_domain_path: error\n"); - goto out; + fprintf(logfile, "xs_get_domain_path: error\n"); + goto out; } pasprintf(&buf, "%s/vm", path); free(path); path = xs_read(xsh, XBT_NULL, buf, NULL); if (path == NULL) { - fprintf(logfile, "xs_read(%s): read error\n", buf); - goto out; + fprintf(logfile, "xs_read(%s): read error\n", buf); + goto out; } pasprintf(&buf, "%s/%s", path, key); rc = xs_write(xsh, XBT_NULL, buf, value, strlen(value)); if (rc) { - fprintf(logfile, "xs_write(%s, %s): write error\n", buf, key); - goto out; + fprintf(logfile, "xs_write(%s, %s): write error\n", buf, key); + goto out; } out: diff -r 5bda20f0723d -r f92a79e39da8 tools/libfsimage/fat/fat.h --- a/tools/libfsimage/fat/fat.h Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/libfsimage/fat/fat.h Fri Apr 13 11:14:26 2007 +0100 @@ -84,17 +84,17 @@ struct fat_bpb { #define FAT_DIRENTRY_LENGTH 32 #define FAT_DIRENTRY_ATTRIB(entry) \ - (*((unsigned char *) (entry+11))) + (*((__u8 *) (entry+11))) #define FAT_DIRENTRY_VALID(entry) \ - ( ((*((unsigned char *) entry)) != 0) \ - && ((*((unsigned char *) entry)) != 0xE5) \ + ( ((*((__u8 *) entry)) != 0) \ + && ((*((__u8 *) entry)) != 0xE5) \ && !(FAT_DIRENTRY_ATTRIB(entry) & FAT_ATTRIB_NOT_OK_MASK) ) #define FAT_DIRENTRY_FIRST_CLUSTER(entry) \ - ((*((unsigned short *) (entry+26)))+(*((unsigned short *) (entry+20)) << 16)) + ((*((__u16 *) (entry+26)))+(*((__u16 *) (entry+20)) << 16)) #define FAT_DIRENTRY_FILELENGTH(entry) \ - (*((unsigned long *) (entry+28))) + (*((__u32 *) (entry+28))) #define FAT_LONGDIR_ID(entry) \ - (*((unsigned char *) (entry))) + (*((__u8 *) (entry))) #define FAT_LONGDIR_ALIASCHECKSUM(entry) \ - (*((unsigned char *) (entry+13))) + (*((__u8 *) (entry+13))) diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/Makefile --- a/tools/libxc/Makefile Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/libxc/Makefile Fri Apr 13 11:14:26 2007 +0100 @@ -26,8 +26,8 @@ CTRL_SRCS-$(CONFIG_X86_Linux) += xc_ptra GUEST_SRCS-y := GUEST_SRCS-y += xg_private.c -GUEST_SRCS-$(CONFIG_MIGRATE) += xc_domain_restore.c xc_linux_save.c -GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c xc_hvm_save.c +GUEST_SRCS-$(CONFIG_MIGRATE) += xc_domain_restore.c xc_domain_save.c +GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c # symlink libelf from xen/common/libelf/ LIBELF_SRCS := libelf-tools.c libelf-loader.c diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/ia64/xc_ia64_linux_save.c --- a/tools/libxc/ia64/xc_ia64_linux_save.c Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/libxc/ia64/xc_ia64_linux_save.c Fri Apr 13 11:14:26 2007 +0100 @@ -134,8 +134,10 @@ retry: } int -xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, - uint32_t max_factor, uint32_t flags, int (*suspend)(int)) +xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, + uint32_t max_factor, uint32_t flags, int (*suspend)(int), + int hvm, void *(*init_qemu_maps)(int, unsigned), + void (*qemu_flip_buffer)(int, int)) { DECLARE_DOMCTL; xc_dominfo_t info; diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/xc_domain.c --- a/tools/libxc/xc_domain.c Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/libxc/xc_domain.c Fri Apr 13 11:14:26 2007 +0100 @@ -8,6 +8,7 @@ #include "xc_private.h" #include <xen/memory.h> +#include <xen/hvm/hvm_op.h> int xc_domain_create(int xc_handle, uint32_t ssidref, @@ -655,6 +656,44 @@ int xc_domain_send_trigger(int xc_handle domctl.u.sendtrigger.vcpu = vcpu; return do_domctl(xc_handle, &domctl); +} + +int xc_set_hvm_param(int handle, domid_t dom, int param, unsigned long value) +{ + DECLARE_HYPERCALL; + xen_hvm_param_t arg; + int rc; + + hypercall.op = __HYPERVISOR_hvm_op; + hypercall.arg[0] = HVMOP_set_param; + hypercall.arg[1] = (unsigned long)&arg; + arg.domid = dom; + arg.index = param; + arg.value = value; + if ( lock_pages(&arg, sizeof(arg)) != 0 ) + return -1; + rc = do_xen_hypercall(handle, &hypercall); + unlock_pages(&arg, sizeof(arg)); + return rc; +} + +int xc_get_hvm_param(int handle, domid_t dom, int param, unsigned long *value) +{ + DECLARE_HYPERCALL; + xen_hvm_param_t arg; + int rc; + + hypercall.op = __HYPERVISOR_hvm_op; + hypercall.arg[0] = HVMOP_get_param; + hypercall.arg[1] = (unsigned long)&arg; + arg.domid = dom; + arg.index = param; + if ( lock_pages(&arg, sizeof(arg)) != 0 ) + return -1; + rc = do_xen_hypercall(handle, &hypercall); + unlock_pages(&arg, sizeof(arg)); + *value = arg.value; + return rc; } /* diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/xc_domain_restore.c --- a/tools/libxc/xc_domain_restore.c Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/libxc/xc_domain_restore.c Fri Apr 13 11:14:26 2007 +0100 @@ -688,33 +688,22 @@ int xc_domain_restore(int xc_handle, int ERROR("error zeroing magic pages"); goto out; } - - xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, magic_pfns[0]); - xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, magic_pfns[1]); - xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, magic_pfns[2]); - xc_set_hvm_param(xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae); - xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn); + + if ( (rc = xc_set_hvm_param(xc_handle, dom, + HVM_PARAM_IOREQ_PFN, magic_pfns[0])) + || (rc = xc_set_hvm_param(xc_handle, dom, + HVM_PARAM_BUFIOREQ_PFN, magic_pfns[1])) + || (rc = xc_set_hvm_param(xc_handle, dom, + HVM_PARAM_STORE_PFN, magic_pfns[2])) + || (rc = xc_set_hvm_param(xc_handle, dom, + HVM_PARAM_PAE_ENABLED, pae)) + || (rc = xc_set_hvm_param(xc_handle, dom, + HVM_PARAM_STORE_EVTCHN, store_evtchn)) ) + { + ERROR("error setting HVM params: %i", rc); + goto out; + } *store_mfn = magic_pfns[2]; - - /* Read vcpu contexts */ - for ( i = 0; i <= max_vcpu_id; i++ ) - { - if ( !(vcpumap & (1ULL << i)) ) - continue; - - if ( !read_exact(io_fd, &(ctxt), sizeof(ctxt)) ) - { - ERROR("error read vcpu context.\n"); - goto out; - } - - if ( (rc = xc_vcpu_setcontext(xc_handle, dom, i, &ctxt)) ) - { - ERROR("Could not set vcpu context, rc=%d", rc); - goto out; - } - rc = 1; - } /* Read HVM context */ if ( !read_exact(io_fd, &rec_len, sizeof(uint32_t)) ) diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/xc_domain_save.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/libxc/xc_domain_save.c Fri Apr 13 11:14:26 2007 +0100 @@ -0,0 +1,1587 @@ +/****************************************************************************** + * xc_linux_save.c + * + * Save the state of a running Linux session. + * + * Copyright (c) 2003, K A Fraser. + */ + +#include <inttypes.h> +#include <time.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/time.h> + +#include "xc_private.h" +#include "xc_dom.h" +#include "xg_private.h" +#include "xg_save_restore.h" + +#include <xen/hvm/params.h> +#include <xen/hvm/e820.h> + +/* +** Default values for important tuning parameters. Can override by passing +** non-zero replacement values to xc_domain_save(). +** +** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too. +** +*/ +#define DEF_MAX_ITERS 29 /* limit us to 30 times round loop */ +#define DEF_MAX_FACTOR 3 /* never send more than 3x p2m_size */ + +/* max mfn of the whole machine */ +static unsigned long max_mfn; + +/* virtual starting address of the hypervisor */ +static unsigned long hvirt_start; + +/* #levels of page tables used by the current guest */ +static unsigned int pt_levels; + +/* HVM: shared-memory bitmaps for getting log-dirty bits from qemu-dm */ +static unsigned long *qemu_bitmaps[2]; +static int qemu_active; +static int qemu_non_active; + +/* number of pfns this guest has (i.e. number of entries in the P2M) */ +static unsigned long p2m_size; + +/* Live mapping of the table mapping each PFN to its current MFN. */ +static xen_pfn_t *live_p2m = NULL; + +/* Live mapping of system MFN to PFN table. */ +static xen_pfn_t *live_m2p = NULL; +static unsigned long m2p_mfn0; + +/* grep fodder: machine_to_phys */ + +#define mfn_to_pfn(_mfn) live_m2p[(_mfn)] + +/* + * Returns TRUE if the given machine frame number has a unique mapping + * in the guest's pseudophysical map. + */ +#define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn) \ + (((_mfn) < (max_mfn)) && \ + ((mfn_to_pfn(_mfn) < (p2m_size)) && \ + (live_p2m[mfn_to_pfn(_mfn)] == (_mfn)))) + +/* Returns TRUE if MFN is successfully converted to a PFN. */ +#define translate_mfn_to_pfn(_pmfn) \ +({ \ + unsigned long mfn = *(_pmfn); \ + int _res = 1; \ + if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) ) \ + _res = 0; \ + else \ + *(_pmfn) = mfn_to_pfn(mfn); \ + _res; \ +}) + +/* +** During (live) save/migrate, we maintain a number of bitmaps to track +** which pages we have to send, to fixup, and to skip. +*/ + +#define BITS_PER_LONG (sizeof(unsigned long) * 8) +#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG) +#define BITMAP_SIZE (BITS_TO_LONGS(p2m_size) * sizeof(unsigned long)) + +#define BITMAP_ENTRY(_nr,_bmap) \ + ((volatile unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG] + +#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG) + +static inline int test_bit (int nr, volatile void * addr) +{ + return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1; +} + +static inline void clear_bit (int nr, volatile void * addr) +{ + BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr)); +} + +static inline void set_bit ( int nr, volatile void * addr) +{ + BITMAP_ENTRY(nr, addr) |= (1UL << BITMAP_SHIFT(nr)); +} + +/* Returns the hamming weight (i.e. the number of bits set) in a N-bit word */ +static inline unsigned int hweight32(unsigned int w) +{ + unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555); + res = (res & 0x33333333) + ((res >> 2) & 0x33333333); + res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F); + res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF); + return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF); +} + +static inline int count_bits ( int nr, volatile void *addr) +{ + int i, count = 0; + volatile unsigned long *p = (volatile unsigned long *)addr; + /* We know that the array is padded to unsigned long. */ + for ( i = 0; i < (nr / (sizeof(unsigned long)*8)); i++, p++ ) + count += hweight32(*p); + return count; +} + +static inline int permute( int i, int nr, int order_nr ) +{ + /* Need a simple permutation function so that we scan pages in a + pseudo random order, enabling us to get a better estimate of + the domain's page dirtying rate as we go (there are often + contiguous ranges of pfns that have similar behaviour, and we + want to mix them up. */ + + /* e.g. nr->oder 15->4 16->4 17->5 */ + /* 512MB domain, 128k pages, order 17 */ + + /* + QPONMLKJIHGFEDCBA + QPONMLKJIH + GFEDCBA + */ + + /* + QPONMLKJIHGFEDCBA + EDCBA + QPONM + LKJIHGF + */ + + do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); } + while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */ + + return i; +} + +static uint64_t tv_to_us(struct timeval *new) +{ + return (new->tv_sec * 1000000) + new->tv_usec; +} + +static uint64_t llgettimeofday(void) +{ + struct timeval now; + gettimeofday(&now, NULL); + return tv_to_us(&now); +} + +static uint64_t tv_delta(struct timeval *new, struct timeval *old) +{ + return (((new->tv_sec - old->tv_sec)*1000000) + + (new->tv_usec - old->tv_usec)); +} + +static int noncached_write(int fd, int live, void *buffer, int len) +{ + static int write_count = 0; + + int rc = write(fd,buffer,len); + + write_count += len; + if ( write_count >= (MAX_PAGECACHE_USAGE * PAGE_SIZE) ) + { + /* Time to discard cache - dont care if this fails */ + discard_file_cache(fd, 0 /* no flush */); + write_count = 0; + } + + return rc; +} + +#ifdef ADAPTIVE_SAVE + +/* +** We control the rate at which we transmit (or save) to minimize impact +** on running domains (including the target if we're doing live migrate). +*/ + +#define MAX_MBIT_RATE 500 /* maximum transmit rate for migrate */ +#define START_MBIT_RATE 100 /* initial transmit rate for migrate */ + +/* Scaling factor to convert between a rate (in Mb/s) and time (in usecs) */ +#define RATE_TO_BTU 781250 + +/* Amount in bytes we allow ourselves to send in a burst */ +#define BURST_BUDGET (100*1024) + +/* We keep track of the current and previous transmission rate */ +static int mbit_rate, ombit_rate = 0; + +/* Have we reached the maximum transmission rate? */ +#define RATE_IS_MAX() (mbit_rate == MAX_MBIT_RATE) + +static inline void initialize_mbit_rate() +{ + mbit_rate = START_MBIT_RATE; +} + +static int ratewrite(int io_fd, int live, void *buf, int n) +{ + static int budget = 0; + static int burst_time_us = -1; + static struct timeval last_put = { 0 }; + struct timeval now; + struct timespec delay; + long long delta; + + if ( START_MBIT_RATE == 0 ) + return noncached_write(io_fd, live, buf, n); + + budget -= n; + if ( budget < 0 ) + { + if ( mbit_rate != ombit_rate ) + { + burst_time_us = RATE_TO_BTU / mbit_rate; + ombit_rate = mbit_rate; + DPRINTF("rate limit: %d mbit/s burst budget %d slot time %d\n", + mbit_rate, BURST_BUDGET, burst_time_us); + } + if ( last_put.tv_sec == 0 ) + { + budget += BURST_BUDGET; + gettimeofday(&last_put, NULL); + } + else + { + while ( budget < 0 ) + { + gettimeofday(&now, NULL); + delta = tv_delta(&now, &last_put); + while ( delta > burst_time_us ) + { + budget += BURST_BUDGET; + last_put.tv_usec += burst_time_us; + if ( last_put.tv_usec > 1000000 + { + last_put.tv_usec -= 1000000; + last_put.tv_sec++; + } + delta -= burst_time_us; + } + if ( budget > 0 ) + break; + delay.tv_sec = 0; + delay.tv_nsec = 1000 * (burst_time_us - delta); + while ( delay.tv_nsec > 0 ) + if ( nanosleep(&delay, &delay) == 0 ) + break; + } + } + } + return noncached_write(io_fd, live, buf, n); +} + +#else /* ! ADAPTIVE SAVE */ + +#define RATE_IS_MAX() (0) +#define ratewrite(_io_fd, _live, _buf, _n) noncached_write((_io_fd), (_live), (_buf), (_n)) +#define initialize_mbit_rate() + +#endif + +static inline ssize_t write_exact(int fd, void *buf, size_t count) +{ + return (write(fd, buf, count) == count); +} + +static int print_stats(int xc_handle, uint32_t domid, int pages_sent, + xc_shadow_op_stats_t *stats, int print) +{ + static struct timeval wall_last; + static long long d0_cpu_last; + static long long d1_cpu_last; + + struct timeval wall_now; + long long wall_delta; + long long d0_cpu_now, d0_cpu_delta; + long long d1_cpu_now, d1_cpu_delta; + + gettimeofday(&wall_now, NULL); + + d0_cpu_now = xc_domain_get_cpu_usage(xc_handle, 0, /* FIXME */ 0)/1000; + d1_cpu_now = xc_domain_get_cpu_usage(xc_handle, domid, /* FIXME */ 0)/1000; + + if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) ) + DPRINTF("ARRHHH!!\n"); + + wall_delta = tv_delta(&wall_now,&wall_last)/1000; + if ( wall_delta == 0 ) + wall_delta = 1; + + d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000; + d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000; + + if ( print ) + DPRINTF("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, " + "dirtied %dMb/s %" PRId32 " pages\n", + wall_delta, + (int)((d0_cpu_delta*100)/wall_delta), + (int)((d1_cpu_delta*100)/wall_delta), + (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))), + (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))), + stats->dirty_count); + +#ifdef ADAPTIVE_SAVE + if ( ((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))) > mbit_rate ) + { + mbit_rate = (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))) + + 50; + if ( mbit_rate > MAX_MBIT_RATE ) + mbit_rate = MAX_MBIT_RATE; + } +#endif + + d0_cpu_last = d0_cpu_now; + d1_cpu_last = d1_cpu_now; + wall_last = wall_now; + + return 0; +} + + +static int analysis_phase(int xc_handle, uint32_t domid, int p2m_size, + unsigned long *arr, int runs) +{ + long long start, now; + xc_shadow_op_stats_t stats; + int j; + + start = llgettimeofday(); + + for ( j = 0; j < runs; j++ ) + { + int i; + + xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_CLEAN, + arr, p2m_size, NULL, 0, NULL); + DPRINTF("#Flush\n"); + for ( i = 0; i < 40; i++ ) + { + usleep(50000); + now = llgettimeofday(); + xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_PEEK, + NULL, 0, NULL, 0, &stats); + DPRINTF("now= %lld faults= %"PRId32" dirty= %"PRId32"\n", + ((now-start)+500)/1000, + stats.fault_count, stats.dirty_count); + } + } + + return -1; +} + + +static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd, + int dom, xc_dominfo_t *info) +{ + int i = 0; + + if ( !(*suspend)(dom) ) + { + ERROR("Suspend request failed"); + return -1; + } + + retry: + + if ( xc_domain_getinfo(xc_handle, dom, 1, info) != 1 ) + { + ERROR("Could not get domain info"); + return -1; + } + + if ( info->dying ) + { + ERROR("domain is dying"); + return -1; + } + + if ( info->crashed ) + { + ERROR("domain has crashed"); + return -1; + } + + if ( info->shutdown ) + { + switch ( info->shutdown_reason ) + { + case SHUTDOWN_poweroff: + case SHUTDOWN_reboot: + ERROR("domain has shut down"); + return -1; + case SHUTDOWN_suspend: + return 0; + case SHUTDOWN_crash: + ERROR("domain has crashed"); + return -1; + } + } + + if ( info->paused ) + { + /* Try unpausing domain, wait, and retest. */ + xc_domain_unpause( xc_handle, dom ); + ERROR("Domain was paused. Wait and re-test."); + usleep(10000); /* 10ms */ + goto retry; + } + + if ( ++i < 100 ) + { + ERROR("Retry suspend domain"); + usleep(10000); /* 10ms */ + goto retry; + } + + ERROR("Unable to suspend domain."); + + return -1; +} + +/* +** Map the top-level page of MFNs from the guest. The guest might not have +** finished resuming from a previous restore operation, so we wait a while for +** it to update the MFN to a reasonable value. +*/ +static void *map_frame_list_list(int xc_handle, uint32_t dom, + shared_info_t *shinfo) +{ + int count = 100; + void *p; + + while ( count-- && (shinfo->arch.pfn_to_mfn_frame_list_list == 0) ) + usleep(10000); + + if ( shinfo->arch.pfn_to_mfn_frame_list_list == 0 ) + { + ERROR("Timed out waiting for frame list updated."); + return NULL; + } + + p = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ, + shinfo->arch.pfn_to_mfn_frame_list_list); + if ( p == NULL ) + ERROR("Couldn't map p2m_frame_list_list (errno %d)", errno); + + return p; +} + +/* +** During transfer (or in the state file), all page-table pages must be +** converted into a 'canonical' form where references to actual mfns +** are replaced with references to the corresponding pfns. +** +** This function performs the appropriate conversion, taking into account +** which entries do not require canonicalization (in particular, those +** entries which map the virtual address reserved for the hypervisor). +*/ +static int canonicalize_pagetable(unsigned long type, unsigned long pfn, + const void *spage, void *dpage) +{ + + int i, pte_last, xen_start, xen_end, race = 0; + uint64_t pte; + + /* + ** We need to determine which entries in this page table hold + ** reserved hypervisor mappings. This depends on the current + ** page table type as well as the number of paging levels. + */ + xen_start = xen_end = pte_last = PAGE_SIZE / ((pt_levels == 2) ? 4 : 8); + + if ( (pt_levels == 2) && (type == XEN_DOMCTL_PFINFO_L2TAB) ) + xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT); + + if ( (pt_levels == 3) && (type == XEN_DOMCTL_PFINFO_L3TAB) ) + xen_start = L3_PAGETABLE_ENTRIES_PAE; + + /* + ** in PAE only the L2 mapping the top 1GB contains Xen mappings. + ** We can spot this by looking for the guest linear mapping which + ** Xen always ensures is present in that L2. Guests must ensure + ** that this check will fail for other L2s. + */ + if ( (pt_levels == 3) && (type == XEN_DOMCTL_PFINFO_L2TAB) ) + { + int hstart; + uint64_t he; + + hstart = (hvirt_start >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff; + he = ((const uint64_t *) spage)[hstart]; + + if ( ((he >> PAGE_SHIFT) & MFN_MASK_X86) == m2p_mfn0 ) + { + /* hvirt starts with xen stuff... */ + xen_start = hstart; + } + else if ( hvirt_start != 0xf5800000 ) + { + /* old L2s from before hole was shrunk... */ + hstart = (0xf5800000 >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff; + he = ((const uint64_t *) spage)[hstart]; + if ( ((he >> PAGE_SHIFT) & MFN_MASK_X86) == m2p_mfn0 ) + xen_start = hstart; + } + } + + if ( (pt_levels == 4) && (type == XEN_DOMCTL_PFINFO_L4TAB) ) + { + /* + ** XXX SMH: should compute these from hvirt_start (which we have) + ** and hvirt_end (which we don't) + */ + xen_start = 256; + xen_end = 272; + } + + /* Now iterate through the page table, canonicalizing each PTE */ + for (i = 0; i < pte_last; i++ ) + { + unsigned long pfn, mfn; + + if ( pt_levels == 2 ) + pte = ((const uint32_t*)spage)[i]; + else + pte = ((const uint64_t*)spage)[i]; + + if ( (i >= xen_start) && (i < xen_end) ) + pte = 0; + + if ( pte & _PAGE_PRESENT ) + { + mfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86; + if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) ) + { + /* This will happen if the type info is stale which + is quite feasible under live migration */ + pfn = 0; /* zap it - we'll retransmit this page later */ + race = 1; /* inform the caller of race; fatal if !live */ + } + else + pfn = mfn_to_pfn(mfn); + + pte &= ~MADDR_MASK_X86; + pte |= (uint64_t)pfn << PAGE_SHIFT; + + /* + * PAE guest L3Es can contain these flags when running on + * a 64bit hypervisor. We zap these here to avoid any + * surprise at restore time... + */ + if ( (pt_levels == 3) && + (type == XEN_DOMCTL_PFINFO_L3TAB) && + (pte & (_PAGE_USER|_PAGE_RW|_PAGE_ACCESSED)) ) + pte &= ~(_PAGE_USER|_PAGE_RW|_PAGE_ACCESSED); + } + + if ( pt_levels == 2 ) + ((uint32_t*)dpage)[i] = pte; + else + ((uint64_t*)dpage)[i] = pte; + } + + return race; +} + +static xen_pfn_t *xc_map_m2p(int xc_handle, + unsigned long max_mfn, + int prot) +{ + struct xen_machphys_mfn_list xmml; + privcmd_mmap_entry_t *entries; + unsigned long m2p_chunks, m2p_size; + xen_pfn_t *m2p; + xen_pfn_t *extent_start; + int i, rc; + + m2p_size = M2P_SIZE(max_mfn); + m2p_chunks = M2P_CHUNKS(max_mfn); + + xmml.max_extents = m2p_chunks; + if ( !(extent_start = malloc(m2p_chunks * sizeof(xen_pfn_t))) ) + { + ERROR("failed to allocate space for m2p mfns"); + return NULL; + } + set_xen_guest_handle(xmml.extent_start, extent_start); + + if ( xc_memory_op(xc_handle, XENMEM_machphys_mfn_list, &xmml) || + (xmml.nr_extents != m2p_chunks) ) + { + ERROR("xc_get_m2p_mfns"); + return NULL; + } + + if ( (m2p = mmap(NULL, m2p_size, prot, + MAP_SHARED, xc_handle, 0)) == MAP_FAILED ) + { + ERROR("failed to mmap m2p"); + return NULL; + } + + if ( !(entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t))) ) + { + ERROR("failed to allocate space for mmap entries"); + return NULL; + } + + for ( i = 0; i < m2p_chunks; i++ ) + { + entries[i].va = (unsigned long)(((void *)m2p) + (i * M2P_CHUNK_SIZE)); + entries[i].mfn = extent_start[i]; + entries[i].npages = M2P_CHUNK_SIZE >> PAGE_SHIFT; + } + + if ( (rc = xc_map_foreign_ranges(xc_handle, DOMID_XEN, + entries, m2p_chunks)) < 0 ) + { + ERROR("xc_mmap_foreign_ranges failed (rc = %d)", rc); + return NULL; + } + + m2p_mfn0 = entries[0].mfn; + + free(extent_start); + free(entries); + + return m2p; +} + + +static xen_pfn_t *map_and_save_p2m_table(int xc_handle, + int io_fd, + uint32_t dom, + unsigned long p2m_size, + shared_info_t *live_shinfo) +{ + vcpu_guest_context_t ctxt; + + /* Double and single indirect references to the live P2M table */ + xen_pfn_t *live_p2m_frame_list_list = NULL; + xen_pfn_t *live_p2m_frame_list = NULL; + + /* A copy of the pfn-to-mfn table frame list. */ + xen_pfn_t *p2m_frame_list = NULL; + + /* The mapping of the live p2m table itself */ + xen_pfn_t *p2m = NULL; + + int i, success = 0; + + live_p2m_frame_list_list = map_frame_list_list(xc_handle, dom, + live_shinfo); + if ( !live_p2m_frame_list_list ) + goto out; + + live_p2m_frame_list = + xc_map_foreign_batch(xc_handle, dom, PROT_READ, + live_p2m_frame_list_list, + P2M_FLL_ENTRIES); + if ( !live_p2m_frame_list ) + { + ERROR("Couldn't map p2m_frame_list"); + goto out; + } + + + /* Map all the frames of the pfn->mfn table. For migrate to succeed, + the guest must not change which frames are used for this purpose. + (its not clear why it would want to change them, and we'll be OK + from a safety POV anyhow. */ + + p2m = xc_map_foreign_batch(xc_handle, dom, PROT_READ, + live_p2m_frame_list, + P2M_FL_ENTRIES); + if ( !p2m ) + { + ERROR("Couldn't map p2m table"); + goto out; + } + live_p2m = p2m; /* So that translation macros will work */ + + /* Get a local copy of the live_P2M_frame_list */ + if ( !(p2m_frame_list = malloc(P2M_FL_SIZE)) ) + { + ERROR("Couldn't allocate p2m_frame_list array"); + goto out; + } + memcpy(p2m_frame_list, live_p2m_frame_list, P2M_FL_SIZE); + + /* Canonicalise the pfn-to-mfn table frame-number list. */ + for ( i = 0; i < p2m_size; i += fpp ) + { + if ( !translate_mfn_to_pfn(&p2m_frame_list[i/fpp]) ) + { + ERROR("Frame# in pfn-to-mfn frame list is not in pseudophys"); + ERROR("entry %d: p2m_frame_list[%ld] is 0x%"PRIx64, i, i/fpp, + (uint64_t)p2m_frame_list[i/fpp]); + goto out; + } + } + + if ( xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt) ) + { + ERROR("Could not get vcpu context"); + goto out; + } + + /* + * Write an extended-info structure to inform the restore code that + * a PAE guest understands extended CR3 (PDPTs above 4GB). Turns off + * slow paths in the restore code. + */ + if ( (pt_levels == 3) && + (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3)) ) + { + unsigned long signature = ~0UL; + uint32_t tot_sz = sizeof(struct vcpu_guest_context) + 8; + uint32_t chunk_sz = sizeof(struct vcpu_guest_context); + char chunk_sig[] = "vcpu"; + if ( !write_exact(io_fd, &signature, sizeof(signature)) || + !write_exact(io_fd, &tot_sz, sizeof(tot_sz)) || + !write_exact(io_fd, &chunk_sig, 4) || + !write_exact(io_fd, &chunk_sz, sizeof(chunk_sz)) || + !write_exact(io_fd, &ctxt, sizeof(ctxt)) ) + { + ERROR("write: extended info"); + goto out; + } + } + + if ( !write_exact(io_fd, p2m_frame_list, P2M_FL_SIZE) ) + { + ERROR("write: p2m_frame_list"); + goto out; + } + + success = 1; + + out: + + if ( !success && p2m ) + munmap(p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT)); + + if ( live_p2m_frame_list_list ) + munmap(live_p2m_frame_list_list, PAGE_SIZE); + + if ( live_p2m_frame_list ) + munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE); + + if ( p2m_frame_list ) + free(p2m_frame_list); + + return success ? p2m : NULL; +} + + + +int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, + uint32_t max_factor, uint32_t flags, int (*suspend)(int), + int hvm, void *(*init_qemu_maps)(int, unsigned), + void (*qemu_flip_buffer)(int, int)) +{ + xc_dominfo_t info; + + int rc = 1, i, j, last_iter, iter = 0; + int live = (flags & XCFLAGS_LIVE); + int debug = (flags & XCFLAGS_DEBUG); + int race = 0, sent_last_iter, skip_this_iter; + + /* The new domain's shared-info frame number. */ + unsigned long shared_info_frame; + + /* A copy of the CPU context of the guest. */ + vcpu_guest_context_t ctxt; + + /* A table containing the type of each PFN (/not/ MFN!). */ + unsigned long *pfn_type = NULL; + unsigned long *pfn_batch = NULL; + + /* A copy of one frame of guest memory. */ + char page[PAGE_SIZE]; + + /* Live mapping of shared info structure */ + shared_info_t *live_shinfo = NULL; + + /* base of the region in which domain memory is mapped */ + unsigned char *region_base = NULL; + + /* power of 2 order of p2m_size */ + int order_nr; + + /* bitmap of pages: + - that should be sent this iteration (unless later marked as skip); + - to skip this iteration because already dirty; + - to fixup by sending at the end if not already resent; */ + unsigned long *to_send = NULL, *to_skip = NULL, *to_fix = NULL; + + xc_shadow_op_stats_t stats; + + unsigned long needed_to_fix = 0; + unsigned long total_sent = 0; + + uint64_t vcpumap = 1ULL; + + /* HVM: a buffer for holding HVM context */ + uint32_t hvm_buf_size = 0; + uint8_t *hvm_buf = NULL; + + /* HVM: magic frames for ioreqs and xenstore comms. */ + uint64_t magic_pfns[3]; /* ioreq_pfn, bufioreq_pfn, store_pfn */ + + /* If no explicit control parameters given, use defaults */ + max_iters = max_iters ? : DEF_MAX_ITERS; + max_factor = max_factor ? : DEF_MAX_FACTOR; + + initialize_mbit_rate(); + + if ( !get_platform_info(xc_handle, dom, + &max_mfn, &hvirt_start, &pt_levels) ) + { + ERROR("Unable to get platform info."); + return 1; + } + + if ( xc_domain_getinfo(xc_handle, dom, 1, &info) != 1 ) + { + ERROR("Could not get domain info"); + return 1; + } + + shared_info_frame = info.shared_info_frame; + + /* Map the shared info frame */ + if ( !hvm ) + { + live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ, shared_info_frame); + if ( !live_shinfo ) + { + ERROR("Couldn't map live_shinfo"); + goto out; + } + } + + /* Get the size of the P2M table */ + p2m_size = xc_memory_op(xc_handle, XENMEM_maximum_gpfn, &dom); + + /* Domain is still running at this point */ + if ( live ) + { + /* Live suspend. Enable log-dirty mode. */ + if ( xc_shadow_control(xc_handle, dom, + XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY, + NULL, 0, NULL, 0, NULL) < 0 ) + { + ERROR("Couldn't enable shadow mode"); + goto out; + } + + if ( hvm ) + { + /* Get qemu-dm logging dirty pages too */ + void *seg = init_qemu_maps(dom, BITMAP_SIZE); + qemu_bitmaps[0] = seg; + qemu_bitmaps[1] = seg + BITMAP_SIZE; + qemu_active = 0; + qemu_non_active = 1; + } + } + else + { + /* This is a non-live suspend. Suspend the domain .*/ + if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info) ) + { + ERROR("Domain appears not to have suspended"); + goto out; + } + } + + last_iter = !live; + + /* pretend we sent all the pages last iteration */ + sent_last_iter = p2m_size; + + /* calculate the power of 2 order of p2m_size, e.g. + 15->4 16->4 17->5 */ + for ( i = p2m_size-1, order_nr = 0; i ; i >>= 1, order_nr++ ) + continue; + + /* Setup to_send / to_fix and to_skip bitmaps */ + to_send = malloc(BITMAP_SIZE); + to_fix = calloc(1, BITMAP_SIZE); + to_skip = malloc(BITMAP_SIZE); + + if ( !to_send || !to_fix || !to_skip ) + { + ERROR("Couldn't allocate to_send array"); + goto out; + } + + memset(to_send, 0xff, BITMAP_SIZE); + + if ( lock_pages(to_send, BITMAP_SIZE) ) + { + ERROR("Unable to lock to_send"); + return 1; + } + + /* (to fix is local only) */ + if ( lock_pages(to_skip, BITMAP_SIZE) ) + { + ERROR("Unable to lock to_skip"); + return 1; + } + + if ( hvm ) + { + /* Need another buffer for HVM context */ + hvm_buf_size = xc_domain_hvm_getcontext(xc_handle, dom, 0, 0); + if ( hvm_buf_size == -1 ) + { + ERROR("Couldn't get HVM context size from Xen"); + goto out; + } + hvm_buf = malloc(hvm_buf_size); + if ( !hvm_buf ) + { + ERROR("Couldn't allocate memory"); + goto out; + } + } + + analysis_phase(xc_handle, dom, p2m_size, to_skip, 0); + + /* We want zeroed memory so use calloc rather than malloc. */ + pfn_type = calloc(MAX_BATCH_SIZE, sizeof(*pfn_type)); + pfn_batch = calloc(MAX_BATCH_SIZE, sizeof(*pfn_batch)); + if ( (pfn_type == NULL) || (pfn_batch == NULL) ) + { + ERROR("failed to alloc memory for pfn_type and/or pfn_batch arrays"); + errno = ENOMEM; + goto out; + } + + if ( lock_pages(pfn_type, MAX_BATCH_SIZE * sizeof(*pfn_type)) ) + { + ERROR("Unable to lock"); + goto out; + } + + /* Setup the mfn_to_pfn table mapping */ + if ( !(live_m2p = xc_map_m2p(xc_handle, max_mfn, PROT_READ)) ) + { + ERROR("Failed to map live M2P table"); + goto out; + } + + /* Start writing out the saved-domain record. */ + if ( !write_exact(io_fd, &p2m_size, sizeof(unsigned long)) ) + { + ERROR("write: p2m_size"); + goto out; + } + + if ( !hvm ) + { + int err = 0; + unsigned long mfn; + + /* Map the P2M table, and write the list of P2M frames */ + live_p2m = map_and_save_p2m_table(xc_handle, io_fd, dom, + p2m_size, live_shinfo); + if ( live_p2m == NULL ) + { + ERROR("Failed to map/save the p2m frame list"); + goto out; + } + + /* + * Quick belt and braces sanity check. + */ + + for ( i = 0; i < p2m_size; i++ ) + { + mfn = live_p2m[i]; + if( (mfn != INVALID_P2M_ENTRY) && (mfn_to_pfn(mfn) != i) ) + { + DPRINTF("i=0x%x mfn=%lx live_m2p=%lx\n", i, + mfn, mfn_to_pfn(mfn)); + err++; + } + } + DPRINTF("Had %d unexplained entries in p2m table\n", err); + } + + print_stats(xc_handle, dom, 0, &stats, 0); + + /* Now write out each data page, canonicalising page tables as we go... */ + for ( ; ; ) + { + unsigned int prev_pc, sent_this_iter, N, batch; + + iter++; + sent_this_iter = 0; + skip_this_iter = 0; + prev_pc = 0; + N = 0; + + DPRINTF("Saving memory pages: iter %d 0%%", iter); + + while ( N < p2m_size ) + { + unsigned int this_pc = (N * 100) / p2m_size; + int rc; + + if ( (this_pc - prev_pc) >= 5 ) + { + DPRINTF("\b\b\b\b%3d%%", this_pc); + prev_pc = this_pc; + } + + if ( !last_iter ) + { + /* Slightly wasteful to peek the whole array evey time, + but this is fast enough for the moment. */ + rc = xc_shadow_control( + xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK, to_skip, + p2m_size, NULL, 0, NULL); + if ( rc != p2m_size ) + { + ERROR("Error peeking shadow bitmap"); + goto out; + } + } + + /* load pfn_type[] with the mfn of all the pages we're doing in + this batch. */ + for ( batch = 0; + (batch < MAX_BATCH_SIZE) && (N < p2m_size); + N++ ) + { + int n = permute(N, p2m_size, order_nr); + + if ( debug ) + DPRINTF("%d pfn= %08lx mfn= %08lx %d [mfn]= %08lx\n", + iter, (unsigned long)n, hvm ? 0 : live_p2m[n], + test_bit(n, to_send), + hvm ? 0 : mfn_to_pfn(live_p2m[n]&0xFFFFF)); + + if ( !last_iter && + test_bit(n, to_send) && + test_bit(n, to_skip) ) + skip_this_iter++; /* stats keeping */ + + if ( !((test_bit(n, to_send) && !test_bit(n, to_skip)) || + (test_bit(n, to_send) && last_iter) || + (test_bit(n, to_fix) && last_iter)) ) + continue; + + /* Skip PFNs that aren't really there */ + if ( hvm && ((n >= 0xa0 && n < 0xc0) /* VGA hole */ + || (n >= (HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT) + && n < (1ULL<<32) >> PAGE_SHIFT)) /* MMIO */ ) + continue; + + /* + ** we get here if: + ** 1. page is marked to_send & hasn't already been re-dirtied + ** 2. (ignore to_skip in last iteration) + ** 3. add in pages that still need fixup (net bufs) + */ + + pfn_batch[batch] = n; + + /* Hypercall interfaces operate in PFNs for HVM guests + * and MFNs for PV guests */ + if ( hvm ) + pfn_type[batch] = n; + else + pfn_type[batch] = live_p2m[n]; + + if ( !is_mapped(pfn_type[batch]) ) + { + /* + ** not currently in psuedo-physical map -- set bit + ** in to_fix since we must send this page in last_iter + ** unless its sent sooner anyhow, or it never enters + ** pseudo-physical map (e.g. for ballooned down doms) + */ + set_bit(n, to_fix); + continue; + } + + if ( last_iter && + test_bit(n, to_fix) && + !test_bit(n, to_send) ) + { + needed_to_fix++; + DPRINTF("Fix! iter %d, pfn %x. mfn %lx\n", + iter, n, pfn_type[batch]); + } + + clear_bit(n, to_fix); + + batch++; + } + + if ( batch == 0 ) + goto skip; /* vanishingly unlikely... */ + + region_base = xc_map_foreign_batch( + xc_handle, dom, PROT_READ, pfn_type, batch); + if ( region_base == NULL ) + { + ERROR("map batch failed"); + goto out; + } + + if ( !hvm ) + { + /* Get page types */ + for ( j = 0; j < batch; j++ ) + ((uint32_t *)pfn_type)[j] = pfn_type[j]; + if ( xc_get_pfn_type_batch(xc_handle, dom, batch, + (uint32_t *)pfn_type) ) + { + ERROR("get_pfn_type_batch failed"); + goto out; + } + for ( j = batch-1; j >= 0; j-- ) + pfn_type[j] = ((uint32_t *)pfn_type)[j]; + + for ( j = 0; j < batch; j++ ) + { + + if ( (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) == + XEN_DOMCTL_PFINFO_XTAB ) + { + DPRINTF("type fail: page %i mfn %08lx\n", + j, pfn_type[j]); + continue; + } + + if ( debug ) + DPRINTF("%d pfn= %08lx mfn= %08lx [mfn]= %08lx" + " sum= %08lx\n", + iter, + (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) | + pfn_batch[j], + pfn_type[j], + mfn_to_pfn(pfn_type[j] & + ~XEN_DOMCTL_PFINFO_LTAB_MASK), + csum_page(region_base + (PAGE_SIZE*j))); + + /* canonicalise mfn->pfn */ + pfn_type[j] = (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) | + pfn_batch[j]; + } + } + + if ( !write_exact(io_fd, &batch, sizeof(unsigned int)) ) + { + ERROR("Error when writing to state file (2) (errno %d)", + errno); + goto out; + } + + if ( !write_exact(io_fd, pfn_type, sizeof(unsigned long)*batch) ) + { + ERROR("Error when writing to state file (3) (errno %d)", + errno); + goto out; + } + + /* entering this loop, pfn_type is now in pfns (Not mfns) */ + for ( j = 0; j < batch; j++ ) + { + unsigned long pfn, pagetype; + void *spage = (char *)region_base + (PAGE_SIZE*j); + + pfn = pfn_type[j] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; + pagetype = pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK; + + /* write out pages in batch */ + if ( pagetype == XEN_DOMCTL_PFINFO_XTAB ) + continue; + + pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK; + + if ( (pagetype >= XEN_DOMCTL_PFINFO_L1TAB) && + (pagetype <= XEN_DOMCTL_PFINFO_L4TAB) ) + { + /* We have a pagetable page: need to rewrite it. */ + race = + canonicalize_pagetable(pagetype, pfn, spage, page); + + if ( race && !live ) + { + ERROR("Fatal PT race (pfn %lx, type %08lx)", pfn, + pagetype); + goto out; + } + + if ( ratewrite(io_fd, live, page, PAGE_SIZE) != PAGE_SIZE ) + { + ERROR("Error when writing to state file (4)" + " (errno %d)", errno); + goto out; + } + } + else + { + /* We have a normal page: just write it directly. */ + if ( ratewrite(io_fd, live, spage, PAGE_SIZE) != + PAGE_SIZE ) + { + ERROR("Error when writing to state file (5)" + " (errno %d)", errno); + goto out; + } + } + } /* end of the write out for this batch */ + + sent_this_iter += batch; + + munmap(region_base, batch*PAGE_SIZE); + + } /* end of this while loop for this iteration */ + + skip: + + total_sent += sent_this_iter; + + DPRINTF("\r %d: sent %d, skipped %d, ", + iter, sent_this_iter, skip_this_iter ); + + if ( last_iter ) + { + print_stats( xc_handle, dom, sent_this_iter, &stats, 1); + + DPRINTF("Total pages sent= %ld (%.2fx)\n", + total_sent, ((float)total_sent)/p2m_size ); + DPRINTF("(of which %ld were fixups)\n", needed_to_fix ); + } + + if ( last_iter && debug ) + { + int minusone = -1; + memset(to_send, 0xff, BITMAP_SIZE); + debug = 0; + DPRINTF("Entering debug resend-all mode\n"); + + /* send "-1" to put receiver into debug mode */ + if ( !write_exact(io_fd, &minusone, sizeof(int)) ) + { + ERROR("Error when writing to state file (6) (errno %d)", + errno); + goto out; + } + + continue; + } + + if ( last_iter ) + break; + + if ( live ) + { + if ( ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) || + (iter >= max_iters) || + (sent_this_iter+skip_this_iter < 50) || + (total_sent > p2m_size*max_factor) ) + { + DPRINTF("Start last iteration\n"); + last_iter = 1; + + if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info) ) + { + ERROR("Domain appears not to have suspended"); + goto out; + } + + DPRINTF("SUSPEND shinfo %08lx\n", info.shared_info_frame); + } + + if ( xc_shadow_control(xc_handle, dom, + XEN_DOMCTL_SHADOW_OP_CLEAN, to_send, + p2m_size, NULL, 0, &stats) != p2m_size ) + { + ERROR("Error flushing shadow PT"); + goto out; + } + + if ( hvm ) + { + /* Pull in the dirty bits from qemu-dm too */ + if ( !last_iter ) + { + qemu_active = qemu_non_active; + qemu_non_active = qemu_active ? 0 : 1; + qemu_flip_buffer(dom, qemu_active); + for ( j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++ ) + { + to_send[j] |= qemu_bitmaps[qemu_non_active][j]; + qemu_bitmaps[qemu_non_active][j] = 0; + } + } + else + { + for ( j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++ ) + to_send[j] |= qemu_bitmaps[qemu_active][j]; + } + } + + sent_last_iter = sent_this_iter; + + print_stats(xc_handle, dom, sent_this_iter, &stats, 1); + + } + } /* end of infinite for loop */ + + DPRINTF("All memory is saved\n"); + + { + struct { + int minustwo; + int max_vcpu_id; + uint64_t vcpumap; + } chunk = { -2, info.max_vcpu_id }; + + if ( info.max_vcpu_id >= 64 ) + { + ERROR("Too many VCPUS in guest!"); + goto out; + } + + for ( i = 1; i <= info.max_vcpu_id; i++ ) + { + xc_vcpuinfo_t vinfo; + if ( (xc_vcpu_getinfo(xc_handle, dom, i, &vinfo) == 0) && + vinfo.online ) + vcpumap |= 1ULL << i; + } + + chunk.vcpumap = vcpumap; + if ( !write_exact(io_fd, &chunk, sizeof(chunk)) ) + { + ERROR("Error when writing to state file (errno %d)", errno); + goto out; + } + } + + /* Zero terminate */ + i = 0; + if ( !write_exact(io_fd, &i, sizeof(int)) ) + { + ERROR("Error when writing to state file (6') (errno %d)", errno); + goto out; + } + + if ( hvm ) + { + uint32_t rec_size; + + /* Save magic-page locations. */ + memset(magic_pfns, 0, sizeof(magic_pfns)); + xc_get_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, + (unsigned long *)&magic_pfns[0]); + xc_get_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, + (unsigned long *)&magic_pfns[1]); + xc_get_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, + (unsigned long *)&magic_pfns[2]); + if ( !write_exact(io_fd, magic_pfns, sizeof(magic_pfns)) ) + { + ERROR("Error when writing to state file (7)"); + goto out; + } + + /* Get HVM context from Xen and save it too */ + if ( (rec_size = xc_domain_hvm_getcontext(xc_handle, dom, hvm_buf, + hvm_buf_size)) == -1 ) + { + ERROR("HVM:Could not get hvm buffer"); + goto out; + } + + if ( !write_exact(io_fd, &rec_size, sizeof(uint32_t)) ) + { + ERROR("error write hvm buffer size"); + goto out; + } + + if ( !write_exact(io_fd, hvm_buf, rec_size) ) + { + ERROR("write HVM info failed!\n"); + goto out; + } + + /* HVM guests are done now */ + rc = 0; + goto out; + } + + /* PV guests only from now on */ + + /* Send through a list of all the PFNs that were not in map at the close */ + { + unsigned int i,j; + unsigned long pfntab[1024]; + + for ( i = 0, j = 0; i < p2m_size; i++ ) + { + if ( !is_mapped(live_p2m[i]) ) + j++; + } + + if ( !write_exact(io_fd, &j, sizeof(unsigned int)) ) + { + ERROR("Error when writing to state file (6a) (errno %d)", errno); + goto out; + } + + for ( i = 0, j = 0; i < p2m_size; ) + { + if ( !is_mapped(live_p2m[i]) ) + pfntab[j++] = i; + + i++; + if ( (j == 1024) || (i == p2m_size) ) + { + if ( !write_exact(io_fd, &pfntab, sizeof(unsigned long)*j) ) + { + ERROR("Error when writing to state file (6b) (errno %d)", + errno); + goto out; + } + j = 0; + } + } + } + + if ( xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt) ) + { + ERROR("Could not get vcpu context"); + goto out; + } + + /* Canonicalise the suspend-record frame number. */ + if ( !translate_mfn_to_pfn(&ctxt.user_regs.edx) ) + { + ERROR("Suspend record is not in range of pseudophys map"); + goto out; + } + + for ( i = 0; i <= info.max_vcpu_id; i++ ) + { + if ( !(vcpumap & (1ULL << i)) ) + continue; + + if ( (i != 0) && xc_vcpu_getcontext(xc_handle, dom, i, &ctxt) ) + { + ERROR("No context for VCPU%d", i); + goto out; + } + + /* Canonicalise each GDT frame number. */ + for ( j = 0; (512*j) < ctxt.gdt_ents; j++ ) + { + if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[j]) ) + { + ERROR("GDT frame is not in range of pseudophys map"); + goto out; + } + } + + /* Canonicalise the page table base pointer. */ + if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[3])) ) + { + ERROR("PT base is not in range of pseudophys map"); + goto out; + } + ctxt.ctrlreg[3] = + xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[3]))); + + /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */ + if ( (pt_levels == 4) && ctxt.ctrlreg[1] ) + { + if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[1])) ) + { + ERROR("PT base is not in range of pseudophys map"); + goto out; + } + /* Least-significant bit means 'valid PFN'. */ + ctxt.ctrlreg[1] = 1 | + xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[1]))); + } + + if ( !write_exact(io_fd, &ctxt, sizeof(ctxt)) ) + { + ERROR("Error when writing to state file (1) (errno %d)", errno); + goto out; + } + } + + /* + * Reset the MFN to be a known-invalid value. See map_frame_list_list(). + */ + memcpy(page, live_shinfo, PAGE_SIZE); + ((shared_info_t *)page)->arch.pfn_to_mfn_frame_list_list = 0; + if ( !write_exact(io_fd, page, PAGE_SIZE) ) + { + ERROR("Error when writing to state file (1) (errno %d)", errno); + goto out; + } + + /* Success! */ + rc = 0; + + out: + + if ( live ) + { + if ( xc_shadow_control(xc_handle, dom, + XEN_DOMCTL_SHADOW_OP_OFF, + NULL, 0, NULL, 0, NULL) < 0 ) + DPRINTF("Warning - couldn't disable shadow mode"); + } + + /* Flush last write and discard cache for file. */ + discard_file_cache(io_fd, 1 /* flush */); + + if ( live_shinfo ) + munmap(live_shinfo, PAGE_SIZE); + + if ( live_p2m ) + munmap(live_p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT)); + + if ( live_m2p ) + munmap(live_m2p, M2P_SIZE(max_mfn)); + + free(pfn_type); + free(pfn_batch); + free(to_send); + free(to_fix); + free(to_skip); + + DPRINTF("Save exit rc=%d\n",rc); + + return !!rc; +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/xc_hvm_build.c --- a/tools/libxc/xc_hvm_build.c Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/libxc/xc_hvm_build.c Fri Apr 13 11:14:26 2007 +0100 @@ -28,47 +28,6 @@ typedef union vcpu_guest_context_x86_32_t c32; vcpu_guest_context_t c; } vcpu_guest_context_either_t; - - -int xc_set_hvm_param( - int handle, domid_t dom, int param, unsigned long value) -{ - DECLARE_HYPERCALL; - xen_hvm_param_t arg; - int rc; - - hypercall.op = __HYPERVISOR_hvm_op; - hypercall.arg[0] = HVMOP_set_param; - hypercall.arg[1] = (unsigned long)&arg; - arg.domid = dom; - arg.index = param; - arg.value = value; - if ( lock_pages(&arg, sizeof(arg)) != 0 ) - return -1; - rc = do_xen_hypercall(handle, &hypercall); - unlock_pages(&arg, sizeof(arg)); - return rc; -} - -int xc_get_hvm_param( - int handle, domid_t dom, int param, unsigned long *value) -{ - DECLARE_HYPERCALL; - xen_hvm_param_t arg; - int rc; - - hypercall.op = __HYPERVISOR_hvm_op; - hypercall.arg[0] = HVMOP_get_param; - hypercall.arg[1] = (unsigned long)&arg; - arg.domid = dom; - arg.index = param; - if ( lock_pages(&arg, sizeof(arg)) != 0 ) - return -1; - rc = do_xen_hypercall(handle, &hypercall); - unlock_pages(&arg, sizeof(arg)); - *value = arg.value; - return rc; -} static void build_e820map(void *e820_page, unsigned long long mem_size) { diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/xc_hvm_save.c --- a/tools/libxc/xc_hvm_save.c Thu Apr 12 16:37:32 2007 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,755 +0,0 @@ -/****************************************************************************** - * xc_hvm_save.c - * - * Save the state of a running HVM guest. - * - * Copyright (c) 2003, K A Fraser. - * Copyright (c) 2006 Intel Corperation - * rewriten for hvm guest by Zhai Edwin <edwin.zhai@xxxxxxxxx> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - */ - -#include <inttypes.h> -#include <time.h> -#include <stdlib.h> -#include <unistd.h> -#include <sys/time.h> - -#include "xc_private.h" -#include "xg_private.h" -#include "xg_save_restore.h" - -#include <xen/hvm/e820.h> -#include <xen/hvm/params.h> - -/* -** Default values for important tuning parameters. Can override by passing -** non-zero replacement values to xc_hvm_save(). -** -** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too. -** -*/ -#define DEF_MAX_ITERS 29 /* limit us to 30 times round loop */ -#define DEF_MAX_FACTOR 3 /* never send more than 3x nr_pfns */ - -/* Shared-memory bitmaps for getting log-dirty bits from qemu */ -static unsigned long *qemu_bitmaps[2]; -static int qemu_active; -static int qemu_non_active; - -/* -** During (live) save/migrate, we maintain a number of bitmaps to track -** which pages we have to send, to fixup, and to skip. -*/ - -#define BITS_PER_LONG (sizeof(unsigned long) * 8) -#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG) -#define BITMAP_SIZE (BITS_TO_LONGS(pfn_array_size) * sizeof(unsigned long)) - -#define BITMAP_ENTRY(_nr,_bmap) \ - ((unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG] - -#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG) - -static inline int test_bit (int nr, volatile void * addr) -{ - return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1; -} - -static inline void clear_bit (int nr, volatile void * addr) -{ - BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr)); -} - -static inline int permute( int i, int nr, int order_nr ) -{ - /* Need a simple permutation function so that we scan pages in a - pseudo random order, enabling us to get a better estimate of - the domain's page dirtying rate as we go (there are often - contiguous ranges of pfns that have similar behaviour, and we - want to mix them up. */ - - /* e.g. nr->oder 15->4 16->4 17->5 */ - /* 512MB domain, 128k pages, order 17 */ - - /* - QPONMLKJIHGFEDCBA - QPONMLKJIH - GFEDCBA - */ - - /* - QPONMLKJIHGFEDCBA - EDCBA - QPONM - LKJIHGF - */ - - do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); } - while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */ - - return i; -} - - -static uint64_t tv_to_us(struct timeval *new) -{ - return (new->tv_sec * 1000000) + new->tv_usec; -} - -static uint64_t llgettimeofday(void) -{ - struct timeval now; - gettimeofday(&now, NULL); - return tv_to_us(&now); -} - -static uint64_t tv_delta(struct timeval *new, struct timeval *old) -{ - return (((new->tv_sec - old->tv_sec)*1000000) + - (new->tv_usec - old->tv_usec)); -} - - -#define RATE_IS_MAX() (0) -#define ratewrite(_io_fd, _buf, _n) write((_io_fd), (_buf), (_n)) -#define initialize_mbit_rate() - -static inline ssize_t write_exact(int fd, void *buf, size_t count) -{ - return (write(fd, buf, count) == count); -} - -static int print_stats(int xc_handle, uint32_t domid, int pages_sent, - xc_shadow_op_stats_t *stats, int print) -{ - static struct timeval wall_last; - static long long d0_cpu_last; - static long long d1_cpu_last; - - struct timeval wall_now; - long long wall_delta; - long long d0_cpu_now, d0_cpu_delta; - long long d1_cpu_now, d1_cpu_delta; - - gettimeofday(&wall_now, NULL); - - d0_cpu_now = xc_domain_get_cpu_usage(xc_handle, 0, /* FIXME */ 0)/1000; - d1_cpu_now = xc_domain_get_cpu_usage(xc_handle, domid, /* FIXME */ 0)/1000; - - if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) ) - DPRINTF("ARRHHH!!\n"); - - wall_delta = tv_delta(&wall_now,&wall_last)/1000; - if ( wall_delta == 0 ) - wall_delta = 1; - - d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000; - d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000; - - if ( print ) - DPRINTF("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, " - "dirtied %dMb/s %" PRId32 " pages\n", - wall_delta, - (int)((d0_cpu_delta*100)/wall_delta), - (int)((d1_cpu_delta*100)/wall_delta), - (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))), - (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))), - stats->dirty_count); - - d0_cpu_last = d0_cpu_now; - d1_cpu_last = d1_cpu_now; - wall_last = wall_now; - - return 0; -} - -static int analysis_phase(int xc_handle, uint32_t domid, int pfn_array_size, - unsigned long *arr, int runs) -{ - long long start, now; - xc_shadow_op_stats_t stats; - int j; - - start = llgettimeofday(); - - for ( j = 0; j < runs; j++ ) - { - int i; - - xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_CLEAN, - arr, pfn_array_size, NULL, 0, NULL); - DPRINTF("#Flush\n"); - for ( i = 0; i < 40; i++ ) - { - usleep(50000); - now = llgettimeofday(); - xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_PEEK, - NULL, 0, NULL, 0, &stats); - DPRINTF("now= %lld faults= %"PRId32" dirty= %"PRId32"\n", - ((now-start)+500)/1000, - stats.fault_count, stats.dirty_count); - } - } - - return -1; -} - -static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd, - int dom, xc_dominfo_t *info, - vcpu_guest_context_t *ctxt) -{ - int i = 0; - - if ( !(*suspend)(dom) ) - { - ERROR("Suspend request failed"); - return -1; - } - - retry: - - if ( xc_domain_getinfo(xc_handle, dom, 1, info) != 1 ) - { - ERROR("Could not get domain info"); - return -1; - } - - if ( xc_vcpu_getcontext(xc_handle, dom, 0, ctxt) ) - ERROR("Could not get vcpu context"); - - if ( info->shutdown && (info->shutdown_reason == SHUTDOWN_suspend) ) - return 0; /* success */ - - if ( info->paused ) - { - /* Try unpausing domain, wait, and retest. */ - xc_domain_unpause( xc_handle, dom ); - ERROR("Domain was paused. Wait and re-test."); - usleep(10000); /* 10ms */ - goto retry; - } - - if ( ++i < 100 ) - { - ERROR("Retry suspend domain."); - usleep(10000); /* 10ms */ - goto retry; - } - - ERROR("Unable to suspend domain."); - - return -1; -} - -int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, - uint32_t max_factor, uint32_t flags, int (*suspend)(int), - void *(*init_qemu_maps)(int, unsigned), - void (*qemu_flip_buffer)(int, int)) -{ - xc_dominfo_t info; - - int rc = 1, i, j, last_iter, iter = 0; - int live = !!(flags & XCFLAGS_LIVE); - int debug = !!(flags & XCFLAGS_DEBUG); - int sent_last_iter, skip_this_iter; - - /* The highest guest-physical frame number used by the current guest */ - unsigned long max_pfn; - - /* The size of an array big enough to contain all guest pfns */ - unsigned long pfn_array_size; - - /* Magic frames: ioreqs and xenstore comms. */ - uint64_t magic_pfns[3]; /* ioreq_pfn, bufioreq_pfn, store_pfn */ - - /* A copy of the CPU context of the guest. */ - vcpu_guest_context_t ctxt; - - /* A table containg the PFNs (/not/ MFN!) to map. */ - xen_pfn_t *pfn_batch = NULL; - - /* A copy of hvm domain context buffer*/ - uint32_t hvm_buf_size; - uint8_t *hvm_buf = NULL; - - /* base of the region in which domain memory is mapped */ - unsigned char *region_base = NULL; - - uint32_t rec_size, nr_vcpus; - - /* power of 2 order of pfn_array_size */ - int order_nr; - - /* bitmap of pages: - - that should be sent this iteration (unless later marked as skip); - - to skip this iteration because already dirty; */ - unsigned long *to_send = NULL, *to_skip = NULL; - - xc_shadow_op_stats_t stats; - - unsigned long total_sent = 0; - - uint64_t vcpumap = 1ULL; - - DPRINTF("xc_hvm_save: dom=%d, max_iters=%d, max_factor=%d, flags=0x%x, " - "live=%d, debug=%d.\n", dom, max_iters, max_factor, flags, - live, debug); - - /* If no explicit control parameters given, use defaults */ - max_iters = max_iters ? : DEF_MAX_ITERS; - max_factor = max_factor ? : DEF_MAX_FACTOR; - - initialize_mbit_rate(); - - if ( xc_domain_getinfo(xc_handle, dom, 1, &info) != 1 ) - { - ERROR("HVM: Could not get domain info"); - return 1; - } - nr_vcpus = info.nr_online_vcpus; - - if ( mlock(&ctxt, sizeof(ctxt)) ) - { - ERROR("HVM: Unable to mlock ctxt"); - return 1; - } - - /* Only have to worry about vcpu 0 even for SMP */ - if ( xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt) ) - { - ERROR("HVM: Could not get vcpu context"); - goto out; - } - - DPRINTF("saved hvm domain info: max_memkb=0x%lx, nr_pages=0x%lx\n", - info.max_memkb, info.nr_pages); - - if ( live ) - { - /* Live suspend. Enable log-dirty mode. */ - if ( xc_shadow_control(xc_handle, dom, - XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY, - NULL, 0, NULL, 0, NULL) < 0 ) - { - ERROR("Couldn't enable shadow mode"); - goto out; - } - } - else - { - /* This is a non-live suspend. Suspend the domain .*/ - if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt) ) - { - ERROR("HVM Domain appears not to have suspended"); - goto out; - } - } - - last_iter = !live; - - max_pfn = xc_memory_op(xc_handle, XENMEM_maximum_gpfn, &dom); - - DPRINTF("after 1st handle hvm domain max_pfn=0x%lx, " - "max_memkb=0x%lx, live=%d.\n", - max_pfn, info.max_memkb, live); - - /* Size of any array that covers 0 ... max_pfn */ - pfn_array_size = max_pfn + 1; - if ( !write_exact(io_fd, &pfn_array_size, sizeof(unsigned long)) ) - { - ERROR("Error when writing to state file (1)"); - goto out; - } - - /* pretend we sent all the pages last iteration */ - sent_last_iter = pfn_array_size; - - /* calculate the power of 2 order of pfn_array_size, e.g. - 15->4 16->4 17->5 */ - for ( i = pfn_array_size-1, order_nr = 0; i ; i >>= 1, order_nr++ ) - continue; - - /* Setup to_send / to_fix and to_skip bitmaps */ - to_send = malloc(BITMAP_SIZE); - to_skip = malloc(BITMAP_SIZE); - - if ( live ) - { - /* Get qemu-dm logging dirty pages too */ - void *seg = init_qemu_maps(dom, BITMAP_SIZE); - qemu_bitmaps[0] = seg; - qemu_bitmaps[1] = seg + BITMAP_SIZE; - qemu_active = 0; - qemu_non_active = 1; - } - - hvm_buf_size = xc_domain_hvm_getcontext(xc_handle, dom, 0, 0); - if ( hvm_buf_size == -1 ) - { - ERROR("Couldn't get HVM context size from Xen"); - goto out; - } - hvm_buf = malloc(hvm_buf_size); - - if ( !to_send || !to_skip || !hvm_buf ) - { - ERROR("Couldn't allocate memory"); - goto out; - } - - memset(to_send, 0xff, BITMAP_SIZE); - - if ( lock_pages(to_send, BITMAP_SIZE) ) - { - ERROR("Unable to lock to_send"); - return 1; - } - - /* (to fix is local only) */ - if ( lock_pages(to_skip, BITMAP_SIZE) ) - { - ERROR("Unable to lock to_skip"); - return 1; - } - - analysis_phase(xc_handle, dom, pfn_array_size, to_skip, 0); - - /* We want zeroed memory so use calloc rather than malloc. */ - pfn_batch = calloc(MAX_BATCH_SIZE, sizeof(*pfn_batch)); - if ( pfn_batch == NULL ) - { - ERROR("failed to alloc memory for pfn_batch array"); - errno = ENOMEM; - goto out; - } - - for ( ; ; ) - { - unsigned int prev_pc, sent_this_iter, N, batch; - - iter++; - sent_this_iter = 0; - skip_this_iter = 0; - prev_pc = 0; - N=0; - - DPRINTF("Saving memory pages: iter %d 0%%", iter); - - while ( N < pfn_array_size ) - { - unsigned int this_pc = (N * 100) / pfn_array_size; - int rc; - - if ( (this_pc - prev_pc) >= 5 ) - { - DPRINTF("\b\b\b\b%3d%%", this_pc); - prev_pc = this_pc; - } - - if ( !last_iter ) - { - /* Slightly wasteful to peek the whole array evey time, - but this is fast enough for the moment. */ - rc = xc_shadow_control( - xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK, to_skip, - pfn_array_size, NULL, 0, NULL); - if ( rc != pfn_array_size ) - { - ERROR("Error peeking shadow bitmap"); - goto out; - } - } - - /* load pfn_batch[] with the mfn of all the pages we're doing in - this batch. */ - for ( batch = 0; - (batch < MAX_BATCH_SIZE) && (N < pfn_array_size); - N++ ) - { - int n = permute(N, pfn_array_size, order_nr); - - if ( 0 && debug ) - DPRINTF("%d pfn= %08lx %d \n", - iter, (unsigned long)n, test_bit(n, to_send)); - - if ( !last_iter && - test_bit(n, to_send) && - test_bit(n, to_skip) ) - skip_this_iter++; /* stats keeping */ - - if ( !((test_bit(n, to_send) && !test_bit(n, to_skip)) || - (test_bit(n, to_send) && last_iter)) ) - continue; - - /* Skip PFNs that aren't really there */ - if ( (n >= 0xa0 && n < 0xc0) /* VGA hole */ - || (n >= (HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT) && - n < (1ULL << 32) >> PAGE_SHIFT) /* 4G MMIO hole */ ) - continue; - - /* - ** we get here if: - ** 1. page is marked to_send & hasn't already been re-dirtied - ** 2. (ignore to_skip in last iteration) - */ - - pfn_batch[batch] = n; - - batch++; - } - - if ( batch == 0 ) - goto skip; /* vanishingly unlikely... */ - - region_base = xc_map_foreign_batch( - xc_handle, dom, PROT_READ, pfn_batch, batch); - if ( region_base == 0 ) - { - ERROR("map batch failed"); - goto out; - } - - /* write num of pfns */ - if ( !write_exact(io_fd, &batch, sizeof(unsigned int)) ) - { - ERROR("Error when writing to state file (2)"); - goto out; - } - - /* write all the pfns */ - if ( !write_exact(io_fd, pfn_batch, sizeof(unsigned long)*batch) ) - { - ERROR("Error when writing to state file (3)"); - goto out; - } - - for ( j = 0; j < batch; j++ ) - { - if ( pfn_batch[j] & XEN_DOMCTL_PFINFO_LTAB_MASK ) - continue; - if ( ratewrite(io_fd, region_base + j*PAGE_SIZE, - PAGE_SIZE) != PAGE_SIZE ) - { - ERROR("ERROR when writing to state file (4)"); - goto out; - } - } - - sent_this_iter += batch; - - munmap(region_base, batch*PAGE_SIZE); - - } /* end of this while loop for this iteration */ - - skip: - - total_sent += sent_this_iter; - - DPRINTF("\r %d: sent %d, skipped %d, ", - iter, sent_this_iter, skip_this_iter ); - - if ( last_iter ) - { - print_stats( xc_handle, dom, sent_this_iter, &stats, 1); - DPRINTF("Total pages sent= %ld (%.2fx)\n", - total_sent, ((float)total_sent)/pfn_array_size ); - } - - if ( last_iter && debug ) - { - int minusone = -1; - memset(to_send, 0xff, BITMAP_SIZE); - debug = 0; - DPRINTF("Entering debug resend-all mode\n"); - - /* send "-1" to put receiver into debug mode */ - if ( !write_exact(io_fd, &minusone, sizeof(int)) ) - { - ERROR("Error when writing to state file (6)"); - goto out; - } - - continue; - } - - if ( last_iter ) - break; - - if ( live ) - { - if ( ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) || - (iter >= max_iters) || - (sent_this_iter+skip_this_iter < 50) || - (total_sent > pfn_array_size*max_factor) ) - { - DPRINTF("Start last iteration for HVM domain\n"); - last_iter = 1; - - if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info, - &ctxt)) - { - ERROR("Domain appears not to have suspended"); - goto out; - } - - DPRINTF("SUSPEND eip %08lx edx %08lx\n", - (unsigned long)ctxt.user_regs.eip, - (unsigned long)ctxt.user_regs.edx); - } - - if ( xc_shadow_control(xc_handle, dom, - XEN_DOMCTL_SHADOW_OP_CLEAN, to_send, - pfn_array_size, NULL, - 0, &stats) != pfn_array_size ) - { - ERROR("Error flushing shadow PT"); - goto out; - } - - /* Pull in the dirty bits from qemu too */ - if ( !last_iter ) - { - qemu_active = qemu_non_active; - qemu_non_active = qemu_active ? 0 : 1; - qemu_flip_buffer(dom, qemu_active); - for ( j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++ ) - { - to_send[j] |= qemu_bitmaps[qemu_non_active][j]; - qemu_bitmaps[qemu_non_active][j] = 0; - } - } - else - { - for ( j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++ ) - to_send[j] |= qemu_bitmaps[qemu_active][j]; - } - - sent_last_iter = sent_this_iter; - - print_stats(xc_handle, dom, sent_this_iter, &stats, 1); - } - } /* end of while 1 */ - - - DPRINTF("All HVM memory is saved\n"); - - { - struct { - int minustwo; - int max_vcpu_id; - uint64_t vcpumap; - } chunk = { -2, info.max_vcpu_id }; - - if (info.max_vcpu_id >= 64) { - ERROR("Too many VCPUS in guest!"); - goto out; - } - - for (i = 1; i <= info.max_vcpu_id; i++) { - xc_vcpuinfo_t vinfo; - if ((xc_vcpu_getinfo(xc_handle, dom, i, &vinfo) == 0) && - vinfo.online) - vcpumap |= 1ULL << i; - } - - chunk.vcpumap = vcpumap; - if(!write_exact(io_fd, &chunk, sizeof(chunk))) { - ERROR("Error when writing to state file (errno %d)", errno); - goto out; - } - } - - /* Zero terminate */ - i = 0; - if ( !write_exact(io_fd, &i, sizeof(int)) ) - { - ERROR("Error when writing to state file (6)"); - goto out; - } - - /* Save magic-page locations. */ - memset(magic_pfns, 0, sizeof(magic_pfns)); - xc_get_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, - (unsigned long *)&magic_pfns[0]); - xc_get_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, - (unsigned long *)&magic_pfns[1]); - xc_get_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, - (unsigned long *)&magic_pfns[2]); - if ( !write_exact(io_fd, magic_pfns, sizeof(magic_pfns)) ) - { - ERROR("Error when writing to state file (7)"); - goto out; - } - - /* save vcpu/vmcs contexts */ - for ( i = 0; i < nr_vcpus; i++ ) - { - if ( !(vcpumap & (1ULL << i)) ) - continue; - - if ( xc_vcpu_getcontext(xc_handle, dom, i, &ctxt) ) - { - ERROR("HVM:Could not get vcpu context"); - goto out; - } - - DPRINTF("write vcpu %d context.\n", i); - if ( !write_exact(io_fd, &(ctxt), sizeof(ctxt)) ) - { - ERROR("write vcpu context failed!\n"); - goto out; - } - } - - if ( (rec_size = xc_domain_hvm_getcontext(xc_handle, dom, hvm_buf, - hvm_buf_size)) == -1 ) - { - ERROR("HVM:Could not get hvm buffer"); - goto out; - } - - if ( !write_exact(io_fd, &rec_size, sizeof(uint32_t)) ) - { - ERROR("error write hvm buffer size"); - goto out; - } - - if ( !write_exact(io_fd, hvm_buf, rec_size) ) - { - ERROR("write HVM info failed!\n"); - goto out; - } - - /* Success! */ - rc = 0; - - out: - - if ( live ) - { - if ( xc_shadow_control(xc_handle, dom, XEN_DOMCTL_SHADOW_OP_OFF, - NULL, 0, NULL, 0, NULL) < 0 ) - DPRINTF("Warning - couldn't disable shadow mode"); - } - - free(hvm_buf); - free(pfn_batch); - free(to_send); - free(to_skip); - - return !!rc; -} diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/xc_linux_save.c --- a/tools/libxc/xc_linux_save.c Thu Apr 12 16:37:32 2007 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1414 +0,0 @@ -/****************************************************************************** - * xc_linux_save.c - * - * Save the state of a running Linux session. - * - * Copyright (c) 2003, K A Fraser. - */ - -#include <inttypes.h> -#include <time.h> -#include <stdlib.h> -#include <unistd.h> -#include <sys/time.h> - -#include "xc_private.h" -#include "xc_dom.h" -#include "xg_private.h" -#include "xg_save_restore.h" - -/* -** Default values for important tuning parameters. Can override by passing -** non-zero replacement values to xc_linux_save(). -** -** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too. -** -*/ -#define DEF_MAX_ITERS 29 /* limit us to 30 times round loop */ -#define DEF_MAX_FACTOR 3 /* never send more than 3x p2m_size */ - -/* max mfn of the whole machine */ -static unsigned long max_mfn; - -/* virtual starting address of the hypervisor */ -static unsigned long hvirt_start; - -/* #levels of page tables used by the current guest */ -static unsigned int pt_levels; - -/* number of pfns this guest has (i.e. number of entries in the P2M) */ -static unsigned long p2m_size; - -/* Live mapping of the table mapping each PFN to its current MFN. */ -static xen_pfn_t *live_p2m = NULL; - -/* Live mapping of system MFN to PFN table. */ -static xen_pfn_t *live_m2p = NULL; -static unsigned long m2p_mfn0; - -/* grep fodder: machine_to_phys */ - -#define mfn_to_pfn(_mfn) live_m2p[(_mfn)] - -/* - * Returns TRUE if the given machine frame number has a unique mapping - * in the guest's pseudophysical map. - */ -#define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn) \ - (((_mfn) < (max_mfn)) && \ - ((mfn_to_pfn(_mfn) < (p2m_size)) && \ - (live_p2m[mfn_to_pfn(_mfn)] == (_mfn)))) - -/* Returns TRUE if MFN is successfully converted to a PFN. */ -#define translate_mfn_to_pfn(_pmfn) \ -({ \ - unsigned long mfn = *(_pmfn); \ - int _res = 1; \ - if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) ) \ - _res = 0; \ - else \ - *(_pmfn) = mfn_to_pfn(mfn); \ - _res; \ -}) - -/* -** During (live) save/migrate, we maintain a number of bitmaps to track -** which pages we have to send, to fixup, and to skip. -*/ - -#define BITS_PER_LONG (sizeof(unsigned long) * 8) -#define BITMAP_SIZE ((p2m_size + BITS_PER_LONG - 1) / 8) - -#define BITMAP_ENTRY(_nr,_bmap) \ - ((volatile unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG] - -#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG) - -static inline int test_bit (int nr, volatile void * addr) -{ - return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1; -} - -static inline void clear_bit (int nr, volatile void * addr) -{ - BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr)); -} - -static inline void set_bit ( int nr, volatile void * addr) -{ - BITMAP_ENTRY(nr, addr) |= (1UL << BITMAP_SHIFT(nr)); -} - -/* Returns the hamming weight (i.e. the number of bits set) in a N-bit word */ -static inline unsigned int hweight32(unsigned int w) -{ - unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555); - res = (res & 0x33333333) + ((res >> 2) & 0x33333333); - res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F); - res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF); - return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF); -} - -static inline int count_bits ( int nr, volatile void *addr) -{ - int i, count = 0; - volatile unsigned long *p = (volatile unsigned long *)addr; - /* We know that the array is padded to unsigned long. */ - for ( i = 0; i < (nr / (sizeof(unsigned long)*8)); i++, p++ ) - count += hweight32(*p); - return count; -} - -static inline int permute( int i, int nr, int order_nr ) -{ - /* Need a simple permutation function so that we scan pages in a - pseudo random order, enabling us to get a better estimate of - the domain's page dirtying rate as we go (there are often - contiguous ranges of pfns that have similar behaviour, and we - want to mix them up. */ - - /* e.g. nr->oder 15->4 16->4 17->5 */ - /* 512MB domain, 128k pages, order 17 */ - - /* - QPONMLKJIHGFEDCBA - QPONMLKJIH - GFEDCBA - */ - - /* - QPONMLKJIHGFEDCBA - EDCBA - QPONM - LKJIHGF - */ - - do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); } - while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */ - - return i; -} - -static uint64_t tv_to_us(struct timeval *new) -{ - return (new->tv_sec * 1000000) + new->tv_usec; -} - -static uint64_t llgettimeofday(void) -{ - struct timeval now; - gettimeofday(&now, NULL); - return tv_to_us(&now); -} - -static uint64_t tv_delta(struct timeval *new, struct timeval *old) -{ - return (((new->tv_sec - old->tv_sec)*1000000) + - (new->tv_usec - old->tv_usec)); -} - -static int noncached_write(int fd, int live, void *buffer, int len) -{ - static int write_count = 0; - - int rc = write(fd,buffer,len); - - write_count += len; - if ( write_count >= (MAX_PAGECACHE_USAGE * PAGE_SIZE) ) - { - /* Time to discard cache - dont care if this fails */ - discard_file_cache(fd, 0 /* no flush */); - write_count = 0; - } - - return rc; -} - -#ifdef ADAPTIVE_SAVE - -/* -** We control the rate at which we transmit (or save) to minimize impact -** on running domains (including the target if we're doing live migrate). -*/ - -#define MAX_MBIT_RATE 500 /* maximum transmit rate for migrate */ -#define START_MBIT_RATE 100 /* initial transmit rate for migrate */ - -/* Scaling factor to convert between a rate (in Mb/s) and time (in usecs) */ -#define RATE_TO_BTU 781250 - -/* Amount in bytes we allow ourselves to send in a burst */ -#define BURST_BUDGET (100*1024) - -/* We keep track of the current and previous transmission rate */ -static int mbit_rate, ombit_rate = 0; - -/* Have we reached the maximum transmission rate? */ -#define RATE_IS_MAX() (mbit_rate == MAX_MBIT_RATE) - -static inline void initialize_mbit_rate() -{ - mbit_rate = START_MBIT_RATE; -} - -static int ratewrite(int io_fd, int live, void *buf, int n) -{ - static int budget = 0; - static int burst_time_us = -1; - static struct timeval last_put = { 0 }; - struct timeval now; - struct timespec delay; - long long delta; - - if ( START_MBIT_RATE == 0 ) - return noncached_write(io_fd, live, buf, n); - - budget -= n; - if ( budget < 0 ) - { - if ( mbit_rate != ombit_rate ) - { - burst_time_us = RATE_TO_BTU / mbit_rate; - ombit_rate = mbit_rate; - DPRINTF("rate limit: %d mbit/s burst budget %d slot time %d\n", - mbit_rate, BURST_BUDGET, burst_time_us); - } - if ( last_put.tv_sec == 0 ) - { - budget += BURST_BUDGET; - gettimeofday(&last_put, NULL); - } - else - { - while ( budget < 0 ) - { - gettimeofday(&now, NULL); - delta = tv_delta(&now, &last_put); - while ( delta > burst_time_us ) - { - budget += BURST_BUDGET; - last_put.tv_usec += burst_time_us; - if ( last_put.tv_usec > 1000000 - { - last_put.tv_usec -= 1000000; - last_put.tv_sec++; - } - delta -= burst_time_us; - } - if ( budget > 0 ) - break; - delay.tv_sec = 0; - delay.tv_nsec = 1000 * (burst_time_us - delta); - while ( delay.tv_nsec > 0 ) - if ( nanosleep(&delay, &delay) == 0 ) - break; - } - } - } - return noncached_write(io_fd, live, buf, n); -} - -#else /* ! ADAPTIVE SAVE */ - -#define RATE_IS_MAX() (0) -#define ratewrite(_io_fd, _live, _buf, _n) noncached_write((_io_fd), (_live), (_buf), (_n)) -#define initialize_mbit_rate() - -#endif - -static inline ssize_t write_exact(int fd, void *buf, size_t count) -{ - return (write(fd, buf, count) == count); -} - -static int print_stats(int xc_handle, uint32_t domid, int pages_sent, - xc_shadow_op_stats_t *stats, int print) -{ - static struct timeval wall_last; - static long long d0_cpu_last; - static long long d1_cpu_last; - - struct timeval wall_now; - long long wall_delta; - long long d0_cpu_now, d0_cpu_delta; - long long d1_cpu_now, d1_cpu_delta; - - gettimeofday(&wall_now, NULL); - - d0_cpu_now = xc_domain_get_cpu_usage(xc_handle, 0, /* FIXME */ 0)/1000; - d1_cpu_now = xc_domain_get_cpu_usage(xc_handle, domid, /* FIXME */ 0)/1000; - - if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) ) - DPRINTF("ARRHHH!!\n"); - - wall_delta = tv_delta(&wall_now,&wall_last)/1000; - if ( wall_delta == 0 ) - wall_delta = 1; - - d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000; - d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000; - - if ( print ) - DPRINTF("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, " - "dirtied %dMb/s %" PRId32 " pages\n", - wall_delta, - (int)((d0_cpu_delta*100)/wall_delta), - (int)((d1_cpu_delta*100)/wall_delta), - (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))), - (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))), - stats->dirty_count); - -#ifdef ADAPTIVE_SAVE - if ( ((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))) > mbit_rate ) - { - mbit_rate = (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))) - + 50; - if ( mbit_rate > MAX_MBIT_RATE ) - mbit_rate = MAX_MBIT_RATE; - } -#endif - - d0_cpu_last = d0_cpu_now; - d1_cpu_last = d1_cpu_now; - wall_last = wall_now; - - return 0; -} - - -static int analysis_phase(int xc_handle, uint32_t domid, int p2m_size, - unsigned long *arr, int runs) -{ - long long start, now; - xc_shadow_op_stats_t stats; - int j; - - start = llgettimeofday(); - - for ( j = 0; j < runs; j++ ) - { - int i; - - xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_CLEAN, - arr, p2m_size, NULL, 0, NULL); - DPRINTF("#Flush\n"); - for ( i = 0; i < 40; i++ ) - { - usleep(50000); - now = llgettimeofday(); - xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_PEEK, - NULL, 0, NULL, 0, &stats); - DPRINTF("now= %lld faults= %"PRId32" dirty= %"PRId32"\n", - ((now-start)+500)/1000, - stats.fault_count, stats.dirty_count); - } - } - - return -1; -} - - -static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd, - int dom, xc_dominfo_t *info, - vcpu_guest_context_t *ctxt) -{ - int i = 0; - - if ( !(*suspend)(dom) ) - { - ERROR("Suspend request failed"); - return -1; - } - - retry: - - if ( xc_domain_getinfo(xc_handle, dom, 1, info) != 1 ) - { - ERROR("Could not get domain info"); - return -1; - } - - if ( xc_vcpu_getcontext(xc_handle, dom, 0, ctxt) ) - ERROR("Could not get vcpu context"); - - - if ( info->dying ) - { - ERROR("domain is dying"); - return -1; - } - - if ( info->crashed ) - { - ERROR("domain has crashed"); - return -1; - } - - if ( info->shutdown ) - { - switch ( info->shutdown_reason ) - { - case SHUTDOWN_poweroff: - case SHUTDOWN_reboot: - ERROR("domain has shut down"); - return -1; - case SHUTDOWN_suspend: - return 0; - case SHUTDOWN_crash: - ERROR("domain has crashed"); - return -1; - } - } - - if ( info->paused ) - { - /* Try unpausing domain, wait, and retest. */ - xc_domain_unpause( xc_handle, dom ); - ERROR("Domain was paused. Wait and re-test."); - usleep(10000); /* 10ms */ - goto retry; - } - - if ( ++i < 100 ) - { - ERROR("Retry suspend domain"); - usleep(10000); /* 10ms */ - goto retry; - } - - ERROR("Unable to suspend domain."); - - return -1; -} - -/* -** Map the top-level page of MFNs from the guest. The guest might not have -** finished resuming from a previous restore operation, so we wait a while for -** it to update the MFN to a reasonable value. -*/ -static void *map_frame_list_list(int xc_handle, uint32_t dom, - shared_info_t *shinfo) -{ - int count = 100; - void *p; - - while ( count-- && (shinfo->arch.pfn_to_mfn_frame_list_list == 0) ) - usleep(10000); - - if ( shinfo->arch.pfn_to_mfn_frame_list_list == 0 ) - { - ERROR("Timed out waiting for frame list updated."); - return NULL; - } - - p = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ, - shinfo->arch.pfn_to_mfn_frame_list_list); - if ( p == NULL ) - ERROR("Couldn't map p2m_frame_list_list (errno %d)", errno); - - return p; -} - -/* -** During transfer (or in the state file), all page-table pages must be -** converted into a 'canonical' form where references to actual mfns -** are replaced with references to the corresponding pfns. -** -** This function performs the appropriate conversion, taking into account -** which entries do not require canonicalization (in particular, those -** entries which map the virtual address reserved for the hypervisor). -*/ -static int canonicalize_pagetable(unsigned long type, unsigned long pfn, - const void *spage, void *dpage) -{ - - int i, pte_last, xen_start, xen_end, race = 0; - uint64_t pte; - - /* - ** We need to determine which entries in this page table hold - ** reserved hypervisor mappings. This depends on the current - ** page table type as well as the number of paging levels. - */ - xen_start = xen_end = pte_last = PAGE_SIZE / ((pt_levels == 2) ? 4 : 8); - - if ( (pt_levels == 2) && (type == XEN_DOMCTL_PFINFO_L2TAB) ) - xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT); - - if ( (pt_levels == 3) && (type == XEN_DOMCTL_PFINFO_L3TAB) ) - xen_start = L3_PAGETABLE_ENTRIES_PAE; - - /* - ** in PAE only the L2 mapping the top 1GB contains Xen mappings. - ** We can spot this by looking for the guest linear mapping which - ** Xen always ensures is present in that L2. Guests must ensure - ** that this check will fail for other L2s. - */ - if ( (pt_levels == 3) && (type == XEN_DOMCTL_PFINFO_L2TAB) ) - { - int hstart; - uint64_t he; - - hstart = (hvirt_start >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff; - he = ((const uint64_t *) spage)[hstart]; - - if ( ((he >> PAGE_SHIFT) & MFN_MASK_X86) == m2p_mfn0 ) - { - /* hvirt starts with xen stuff... */ - xen_start = hstart; - } - else if ( hvirt_start != 0xf5800000 ) - { - /* old L2s from before hole was shrunk... */ - hstart = (0xf5800000 >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff; - he = ((const uint64_t *) spage)[hstart]; - if ( ((he >> PAGE_SHIFT) & MFN_MASK_X86) == m2p_mfn0 ) - xen_start = hstart; - } - } - - if ( (pt_levels == 4) && (type == XEN_DOMCTL_PFINFO_L4TAB) ) - { - /* - ** XXX SMH: should compute these from hvirt_start (which we have) - ** and hvirt_end (which we don't) - */ - xen_start = 256; - xen_end = 272; - } - - /* Now iterate through the page table, canonicalizing each PTE */ - for (i = 0; i < pte_last; i++ ) - { - unsigned long pfn, mfn; - - if ( pt_levels == 2 ) - pte = ((const uint32_t*)spage)[i]; - else - pte = ((const uint64_t*)spage)[i]; - - if ( (i >= xen_start) && (i < xen_end) ) - pte = 0; - - if ( pte & _PAGE_PRESENT ) - { - mfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86; - if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) ) - { - /* This will happen if the type info is stale which - is quite feasible under live migration */ - pfn = 0; /* zap it - we'll retransmit this page later */ - race = 1; /* inform the caller of race; fatal if !live */ - } - else - pfn = mfn_to_pfn(mfn); - - pte &= ~MADDR_MASK_X86; - pte |= (uint64_t)pfn << PAGE_SHIFT; - - /* - * PAE guest L3Es can contain these flags when running on - * a 64bit hypervisor. We zap these here to avoid any - * surprise at restore time... - */ - if ( (pt_levels == 3) && - (type == XEN_DOMCTL_PFINFO_L3TAB) && - (pte & (_PAGE_USER|_PAGE_RW|_PAGE_ACCESSED)) ) - pte &= ~(_PAGE_USER|_PAGE_RW|_PAGE_ACCESSED); - } - - if ( pt_levels == 2 ) - ((uint32_t*)dpage)[i] = pte; - else - ((uint64_t*)dpage)[i] = pte; - } - - return race; -} - -static xen_pfn_t *xc_map_m2p(int xc_handle, - unsigned long max_mfn, - int prot) -{ - struct xen_machphys_mfn_list xmml; - privcmd_mmap_entry_t *entries; - unsigned long m2p_chunks, m2p_size; - xen_pfn_t *m2p; - xen_pfn_t *extent_start; - int i, rc; - - m2p_size = M2P_SIZE(max_mfn); - m2p_chunks = M2P_CHUNKS(max_mfn); - - xmml.max_extents = m2p_chunks; - if ( !(extent_start = malloc(m2p_chunks * sizeof(xen_pfn_t))) ) - { - ERROR("failed to allocate space for m2p mfns"); - return NULL; - } - set_xen_guest_handle(xmml.extent_start, extent_start); - - if ( xc_memory_op(xc_handle, XENMEM_machphys_mfn_list, &xmml) || - (xmml.nr_extents != m2p_chunks) ) - { - ERROR("xc_get_m2p_mfns"); - return NULL; - } - - if ( (m2p = mmap(NULL, m2p_size, prot, - MAP_SHARED, xc_handle, 0)) == MAP_FAILED ) - { - ERROR("failed to mmap m2p"); - return NULL; - } - - if ( !(entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t))) ) - { - ERROR("failed to allocate space for mmap entries"); - return NULL; - } - - for ( i = 0; i < m2p_chunks; i++ ) - { - entries[i].va = (unsigned long)(((void *)m2p) + (i * M2P_CHUNK_SIZE)); - entries[i].mfn = extent_start[i]; - entries[i].npages = M2P_CHUNK_SIZE >> PAGE_SHIFT; - } - - if ( (rc = xc_map_foreign_ranges(xc_handle, DOMID_XEN, - entries, m2p_chunks)) < 0 ) - { - ERROR("xc_mmap_foreign_ranges failed (rc = %d)", rc); - return NULL; - } - - m2p_mfn0 = entries[0].mfn; - - free(extent_start); - free(entries); - - return m2p; -} - -int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, - uint32_t max_factor, uint32_t flags, int (*suspend)(int)) -{ - xc_dominfo_t info; - - int rc = 1, i, j, last_iter, iter = 0; - int live = (flags & XCFLAGS_LIVE); - int debug = (flags & XCFLAGS_DEBUG); - int race = 0, sent_last_iter, skip_this_iter; - - /* The new domain's shared-info frame number. */ - unsigned long shared_info_frame; - - /* A copy of the CPU context of the guest. */ - vcpu_guest_context_t ctxt; - - /* A table containg the type of each PFN (/not/ MFN!). */ - unsigned long *pfn_type = NULL; - unsigned long *pfn_batch = NULL; - - /* A temporary mapping, and a copy, of one frame of guest memory. */ - char page[PAGE_SIZE]; - - /* Double and single indirect references to the live P2M table */ - xen_pfn_t *live_p2m_frame_list_list = NULL; - xen_pfn_t *live_p2m_frame_list = NULL; - - /* A copy of the pfn-to-mfn table frame list. */ - xen_pfn_t *p2m_frame_list = NULL; - - /* Live mapping of shared info structure */ - shared_info_t *live_shinfo = NULL; - - /* base of the region in which domain memory is mapped */ - unsigned char *region_base = NULL; - - /* power of 2 order of p2m_size */ - int order_nr; - - /* bitmap of pages: - - that should be sent this iteration (unless later marked as skip); - - to skip this iteration because already dirty; - - to fixup by sending at the end if not already resent; */ - unsigned long *to_send = NULL, *to_skip = NULL, *to_fix = NULL; - - xc_shadow_op_stats_t stats; - - unsigned long needed_to_fix = 0; - unsigned long total_sent = 0; - - uint64_t vcpumap = 1ULL; - - /* If no explicit control parameters given, use defaults */ - max_iters = max_iters ? : DEF_MAX_ITERS; - max_factor = max_factor ? : DEF_MAX_FACTOR; - - initialize_mbit_rate(); - - if ( !get_platform_info(xc_handle, dom, - &max_mfn, &hvirt_start, &pt_levels) ) - { - ERROR("Unable to get platform info."); - return 1; - } - - if ( xc_domain_getinfo(xc_handle, dom, 1, &info) != 1 ) - { - ERROR("Could not get domain info"); - return 1; - } - - if ( xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt) ) - { - ERROR("Could not get vcpu context"); - goto out; - } - shared_info_frame = info.shared_info_frame; - - /* Map the shared info frame */ - if ( !(live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, - PROT_READ, shared_info_frame)) ) - { - ERROR("Couldn't map live_shinfo"); - goto out; - } - - p2m_size = live_shinfo->arch.max_pfn; - - live_p2m_frame_list_list = map_frame_list_list(xc_handle, dom, - live_shinfo); - if ( !live_p2m_frame_list_list ) - goto out; - - live_p2m_frame_list = - xc_map_foreign_batch(xc_handle, dom, PROT_READ, - live_p2m_frame_list_list, - P2M_FLL_ENTRIES); - if ( !live_p2m_frame_list ) - { - ERROR("Couldn't map p2m_frame_list"); - goto out; - } - - /* Map all the frames of the pfn->mfn table. For migrate to succeed, - the guest must not change which frames are used for this purpose. - (its not clear why it would want to change them, and we'll be OK - from a safety POV anyhow. */ - - live_p2m = xc_map_foreign_batch(xc_handle, dom, PROT_READ, - live_p2m_frame_list, - P2M_FL_ENTRIES); - if ( !live_p2m ) - { - ERROR("Couldn't map p2m table"); - goto out; - } - - /* Setup the mfn_to_pfn table mapping */ - if ( !(live_m2p = xc_map_m2p(xc_handle, max_mfn, PROT_READ)) ) - { - ERROR("Failed to map live M2P table"); - goto out; - } - - - /* Get a local copy of the live_P2M_frame_list */ - if ( !(p2m_frame_list = malloc(P2M_FL_SIZE)) ) - { - ERROR("Couldn't allocate p2m_frame_list array"); - goto out; - } - memcpy(p2m_frame_list, live_p2m_frame_list, P2M_FL_SIZE); - - /* Canonicalise the pfn-to-mfn table frame-number list. */ - for ( i = 0; i < p2m_size; i += fpp ) - { - if ( !translate_mfn_to_pfn(&p2m_frame_list[i/fpp]) ) - { - ERROR("Frame# in pfn-to-mfn frame list is not in pseudophys"); - ERROR("entry %d: p2m_frame_list[%ld] is 0x%"PRIx64, i, i/fpp, - (uint64_t)p2m_frame_list[i/fpp]); - goto out; - } - } - - /* Domain is still running at this point */ - if ( live ) - { - /* Live suspend. Enable log-dirty mode. */ - if ( xc_shadow_control(xc_handle, dom, - XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY, - NULL, 0, NULL, 0, NULL) < 0 ) - { - ERROR("Couldn't enable shadow mode"); - goto out; - } - } - else - { - /* This is a non-live suspend. Suspend the domain .*/ - if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt) ) - { - ERROR("Domain appears not to have suspended"); - goto out; - } - } - - last_iter = !live; - - /* pretend we sent all the pages last iteration */ - sent_last_iter = p2m_size; - - /* calculate the power of 2 order of p2m_size, e.g. - 15->4 16->4 17->5 */ - for ( i = p2m_size-1, order_nr = 0; i ; i >>= 1, order_nr++ ) - continue; - - /* Setup to_send / to_fix and to_skip bitmaps */ - to_send = malloc(BITMAP_SIZE); - to_fix = calloc(1, BITMAP_SIZE); - to_skip = malloc(BITMAP_SIZE); - - if ( !to_send || !to_fix || !to_skip ) - { - ERROR("Couldn't allocate to_send array"); - goto out; - } - - memset(to_send, 0xff, BITMAP_SIZE); - - if ( lock_pages(to_send, BITMAP_SIZE) ) - { - ERROR("Unable to lock to_send"); - return 1; - } - - /* (to fix is local only) */ - if ( lock_pages(to_skip, BITMAP_SIZE) ) - { - ERROR("Unable to lock to_skip"); - return 1; - } - - analysis_phase(xc_handle, dom, p2m_size, to_skip, 0); - - /* We want zeroed memory so use calloc rather than malloc. */ - pfn_type = calloc(MAX_BATCH_SIZE, sizeof(*pfn_type)); - pfn_batch = calloc(MAX_BATCH_SIZE, sizeof(*pfn_batch)); - if ( (pfn_type == NULL) || (pfn_batch == NULL) ) - { - ERROR("failed to alloc memory for pfn_type and/or pfn_batch arrays"); - errno = ENOMEM; - goto out; - } - - if ( lock_pages(pfn_type, MAX_BATCH_SIZE * sizeof(*pfn_type)) ) - { - ERROR("Unable to lock"); - goto out; - } - - /* - * Quick belt and braces sanity check. - */ - { - int err=0; - unsigned long mfn; - for ( i = 0; i < p2m_size; i++ ) - { - mfn = live_p2m[i]; - if( (mfn != INVALID_P2M_ENTRY) && (mfn_to_pfn(mfn) != i) ) - { - DPRINTF("i=0x%x mfn=%lx live_m2p=%lx\n", i, - mfn, mfn_to_pfn(mfn)); - err++; - } - } - DPRINTF("Had %d unexplained entries in p2m table\n", err); - } - - /* Start writing out the saved-domain record. */ - if ( !write_exact(io_fd, &p2m_size, sizeof(unsigned long)) ) - { - ERROR("write: p2m_size"); - goto out; - } - - /* - * Write an extended-info structure to inform the restore code that - * a PAE guest understands extended CR3 (PDPTs above 4GB). Turns off - * slow paths in the restore code. - */ - if ( (pt_levels == 3) && - (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3)) ) - { - unsigned long signature = ~0UL; - uint32_t tot_sz = sizeof(struct vcpu_guest_context) + 8; - uint32_t chunk_sz = sizeof(struct vcpu_guest_context); - char chunk_sig[] = "vcpu"; - if ( !write_exact(io_fd, &signature, sizeof(signature)) || - !write_exact(io_fd, &tot_sz, sizeof(tot_sz)) || - !write_exact(io_fd, &chunk_sig, 4) || - !write_exact(io_fd, &chunk_sz, sizeof(chunk_sz)) || - !write_exact(io_fd, &ctxt, sizeof(ctxt)) ) - { - ERROR("write: extended info"); - goto out; - } - } - - if ( !write_exact(io_fd, p2m_frame_list, P2M_FL_SIZE) ) - { - ERROR("write: p2m_frame_list"); - goto out; - } - - print_stats(xc_handle, dom, 0, &stats, 0); - - /* Now write out each data page, canonicalising page tables as we go... */ - for ( ; ; ) - { - unsigned int prev_pc, sent_this_iter, N, batch; - - iter++; - sent_this_iter = 0; - skip_this_iter = 0; - prev_pc = 0; - N = 0; - - DPRINTF("Saving memory pages: iter %d 0%%", iter); - - while ( N < p2m_size ) - { - unsigned int this_pc = (N * 100) / p2m_size; - int rc; - - if ( (this_pc - prev_pc) >= 5 ) - { - DPRINTF("\b\b\b\b%3d%%", this_pc); - prev_pc = this_pc; - } - - if ( !last_iter ) - { - /* Slightly wasteful to peek the whole array evey time, - but this is fast enough for the moment. */ - rc = xc_shadow_control( - xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK, to_skip, - p2m_size, NULL, 0, NULL); - if ( rc != p2m_size ) - { - ERROR("Error peeking shadow bitmap"); - goto out; - } - } - - /* load pfn_type[] with the mfn of all the pages we're doing in - this batch. */ - for ( batch = 0; - (batch < MAX_BATCH_SIZE) && (N < p2m_size); - N++ ) - { - int n = permute(N, p2m_size, order_nr); - - if ( debug ) - DPRINTF("%d pfn= %08lx mfn= %08lx %d [mfn]= %08lx\n", - iter, (unsigned long)n, live_p2m[n], - test_bit(n, to_send), - mfn_to_pfn(live_p2m[n]&0xFFFFF)); - - if ( !last_iter && - test_bit(n, to_send) && - test_bit(n, to_skip) ) - skip_this_iter++; /* stats keeping */ - - if ( !((test_bit(n, to_send) && !test_bit(n, to_skip)) || - (test_bit(n, to_send) && last_iter) || - (test_bit(n, to_fix) && last_iter)) ) - continue; - - /* - ** we get here if: - ** 1. page is marked to_send & hasn't already been re-dirtied - ** 2. (ignore to_skip in last iteration) - ** 3. add in pages that still need fixup (net bufs) - */ - - pfn_batch[batch] = n; - pfn_type[batch] = live_p2m[n]; - - if ( !is_mapped(pfn_type[batch]) ) - { - /* - ** not currently in psuedo-physical map -- set bit - ** in to_fix since we must send this page in last_iter - ** unless its sent sooner anyhow, or it never enters - ** pseudo-physical map (e.g. for ballooned down domains) - */ - set_bit(n, to_fix); - continue; - } - - if ( last_iter && - test_bit(n, to_fix) && - !test_bit(n, to_send) ) - { - needed_to_fix++; - DPRINTF("Fix! iter %d, pfn %x. mfn %lx\n", - iter, n, pfn_type[batch]); - } - - clear_bit(n, to_fix); - - batch++; - } - - if ( batch == 0 ) - goto skip; /* vanishingly unlikely... */ - - region_base = xc_map_foreign_batch( - xc_handle, dom, PROT_READ, pfn_type, batch); - if ( region_base == NULL ) - { - ERROR("map batch failed"); - goto out; - } - - for ( j = 0; j < batch; j++ ) - ((uint32_t *)pfn_type)[j] = pfn_type[j]; - if ( xc_get_pfn_type_batch(xc_handle, dom, batch, - (uint32_t *)pfn_type) ) - { - ERROR("get_pfn_type_batch failed"); - goto out; - } - for ( j = batch-1; j >= 0; j-- ) - pfn_type[j] = ((uint32_t *)pfn_type)[j]; - - for ( j = 0; j < batch; j++ ) - { - - if ( (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) == - XEN_DOMCTL_PFINFO_XTAB ) - { - DPRINTF("type fail: page %i mfn %08lx\n", j, pfn_type[j]); - continue; - } - - if ( debug ) - DPRINTF("%d pfn= %08lx mfn= %08lx [mfn]= %08lx" - " sum= %08lx\n", - iter, - (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) | - pfn_batch[j], - pfn_type[j], - mfn_to_pfn(pfn_type[j] & - ~XEN_DOMCTL_PFINFO_LTAB_MASK), - csum_page(region_base + (PAGE_SIZE*j))); - - /* canonicalise mfn->pfn */ - pfn_type[j] = (pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK) | - pfn_batch[j]; - } - - if ( !write_exact(io_fd, &batch, sizeof(unsigned int)) ) - { - ERROR("Error when writing to state file (2) (errno %d)", - errno); - goto out; - } - - if ( !write_exact(io_fd, pfn_type, sizeof(unsigned long)*j) ) - { - ERROR("Error when writing to state file (3) (errno %d)", - errno); - goto out; - } - - /* entering this loop, pfn_type is now in pfns (Not mfns) */ - for ( j = 0; j < batch; j++ ) - { - unsigned long pfn, pagetype; - void *spage = (char *)region_base + (PAGE_SIZE*j); - - pfn = pfn_type[j] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; - pagetype = pfn_type[j] & XEN_DOMCTL_PFINFO_LTAB_MASK; - - /* write out pages in batch */ - if ( pagetype == XEN_DOMCTL_PFINFO_XTAB ) - continue; - - pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK; - - if ( (pagetype >= XEN_DOMCTL_PFINFO_L1TAB) && - (pagetype <= XEN_DOMCTL_PFINFO_L4TAB) ) - { - /* We have a pagetable page: need to rewrite it. */ - race = - canonicalize_pagetable(pagetype, pfn, spage, page); - - if ( race && !live ) - { - ERROR("Fatal PT race (pfn %lx, type %08lx)", pfn, - pagetype); - goto out; - } - - if ( ratewrite(io_fd, live, page, PAGE_SIZE) != PAGE_SIZE ) - { - ERROR("Error when writing to state file (4)" - " (errno %d)", errno); - goto out; - } - } - else - { - /* We have a normal page: just write it directly. */ - if ( ratewrite(io_fd, live, spage, PAGE_SIZE) != - PAGE_SIZE ) - { - ERROR("Error when writing to state file (5)" - " (errno %d)", errno); - goto out; - } - } - } /* end of the write out for this batch */ - - sent_this_iter += batch; - - munmap(region_base, batch*PAGE_SIZE); - - } /* end of this while loop for this iteration */ - - skip: - - total_sent += sent_this_iter; - - DPRINTF("\r %d: sent %d, skipped %d, ", - iter, sent_this_iter, skip_this_iter ); - - if ( last_iter ) - { - print_stats( xc_handle, dom, sent_this_iter, &stats, 1); - - DPRINTF("Total pages sent= %ld (%.2fx)\n", - total_sent, ((float)total_sent)/p2m_size ); - DPRINTF("(of which %ld were fixups)\n", needed_to_fix ); - } - - if ( last_iter && debug ) - { - int minusone = -1; - memset(to_send, 0xff, BITMAP_SIZE); - debug = 0; - DPRINTF("Entering debug resend-all mode\n"); - - /* send "-1" to put receiver into debug mode */ - if ( !write_exact(io_fd, &minusone, sizeof(int)) ) - { - ERROR("Error when writing to state file (6) (errno %d)", - errno); - goto out; - } - - continue; - } - - if ( last_iter ) - break; - - if ( live ) - { - if ( ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) || - (iter >= max_iters) || - (sent_this_iter+skip_this_iter < 50) || - (total_sent > p2m_size*max_factor) ) - { - DPRINTF("Start last iteration\n"); - last_iter = 1; - - if ( suspend_and_state(suspend, xc_handle, io_fd, dom, &info, - &ctxt) ) - { - ERROR("Domain appears not to have suspended"); - goto out; - } - - DPRINTF("SUSPEND shinfo %08lx eip %08lx edx %08lx\n", - info.shared_info_frame, - (unsigned long)ctxt.user_regs.eip, - (unsigned long)ctxt.user_regs.edx); - } - - if ( xc_shadow_control(xc_handle, dom, - XEN_DOMCTL_SHADOW_OP_CLEAN, to_send, - p2m_size, NULL, 0, &stats) != p2m_size ) - { - ERROR("Error flushing shadow PT"); - goto out; - } - - sent_last_iter = sent_this_iter; - - print_stats(xc_handle, dom, sent_this_iter, &stats, 1); - - } - } /* end of infinite for loop */ - - DPRINTF("All memory is saved\n"); - - { - struct { - int minustwo; - int max_vcpu_id; - uint64_t vcpumap; - } chunk = { -2, info.max_vcpu_id }; - - if ( info.max_vcpu_id >= 64 ) - { - ERROR("Too many VCPUS in guest!"); - goto out; - } - - for ( i = 1; i <= info.max_vcpu_id; i++ ) - { - xc_vcpuinfo_t vinfo; - if ( (xc_vcpu_getinfo(xc_handle, dom, i, &vinfo) == 0) && - vinfo.online ) - vcpumap |= 1ULL << i; - } - - chunk.vcpumap = vcpumap; - if ( !write_exact(io_fd, &chunk, sizeof(chunk)) ) - { - ERROR("Error when writing to state file (errno %d)", errno); - goto out; - } - } - - /* Zero terminate */ - i = 0; - if ( !write_exact(io_fd, &i, sizeof(int)) ) - { - ERROR("Error when writing to state file (6') (errno %d)", errno); - goto out; - } - - /* Send through a list of all the PFNs that were not in map at the close */ - { - unsigned int i,j; - unsigned long pfntab[1024]; - - for ( i = 0, j = 0; i < p2m_size; i++ ) - { - if ( !is_mapped(live_p2m[i]) ) - j++; - } - - if ( !write_exact(io_fd, &j, sizeof(unsigned int)) ) - { - ERROR("Error when writing to state file (6a) (errno %d)", errno); - goto out; - } - - for ( i = 0, j = 0; i < p2m_size; ) - { - if ( !is_mapped(live_p2m[i]) ) - pfntab[j++] = i; - - i++; - if ( (j == 1024) || (i == p2m_size) ) - { - if ( !write_exact(io_fd, &pfntab, sizeof(unsigned long)*j) ) - { - ERROR("Error when writing to state file (6b) (errno %d)", - errno); - goto out; - } - j = 0; - } - } - } - - /* Canonicalise the suspend-record frame number. */ - if ( !translate_mfn_to_pfn(&ctxt.user_regs.edx) ) - { - ERROR("Suspend record is not in range of pseudophys map"); - goto out; - } - - for ( i = 0; i <= info.max_vcpu_id; i++ ) - { - if ( !(vcpumap & (1ULL << i)) ) - continue; - - if ( (i != 0) && xc_vcpu_getcontext(xc_handle, dom, i, &ctxt) ) - { - ERROR("No context for VCPU%d", i); - goto out; - } - - /* Canonicalise each GDT frame number. */ - for ( j = 0; (512*j) < ctxt.gdt_ents; j++ ) - { - if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[j]) ) - { - ERROR("GDT frame is not in range of pseudophys map"); - goto out; - } - } - - /* Canonicalise the page table base pointer. */ - if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[3])) ) - { - ERROR("PT base is not in range of pseudophys map"); - goto out; - } - ctxt.ctrlreg[3] = - xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[3]))); - - /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */ - if ( (pt_levels == 4) && ctxt.ctrlreg[1] ) - { - if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[1])) ) - { - ERROR("PT base is not in range of pseudophys map"); - goto out; - } - /* Least-significant bit means 'valid PFN'. */ - ctxt.ctrlreg[1] = 1 | - xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[1]))); - } - - if ( !write_exact(io_fd, &ctxt, sizeof(ctxt)) ) - { - ERROR("Error when writing to state file (1) (errno %d)", errno); - goto out; - } - } - - /* - * Reset the MFN to be a known-invalid value. See map_frame_list_list(). - */ - memcpy(page, live_shinfo, PAGE_SIZE); - ((shared_info_t *)page)->arch.pfn_to_mfn_frame_list_list = 0; - if ( !write_exact(io_fd, page, PAGE_SIZE) ) - { - ERROR("Error when writing to state file (1) (errno %d)", errno); - goto out; - } - - /* Success! */ - rc = 0; - - out: - - if ( live ) - { - if ( xc_shadow_control(xc_handle, dom, - XEN_DOMCTL_SHADOW_OP_OFF, - NULL, 0, NULL, 0, NULL) < 0 ) - DPRINTF("Warning - couldn't disable shadow mode"); - } - - /* Flush last write and discard cache for file. */ - discard_file_cache(io_fd, 1 /* flush */); - - if ( live_shinfo ) - munmap(live_shinfo, PAGE_SIZE); - - if ( live_p2m_frame_list_list ) - munmap(live_p2m_frame_list_list, PAGE_SIZE); - - if ( live_p2m_frame_list ) - munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE); - - if ( live_p2m ) - munmap(live_p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT)); - - if ( live_m2p ) - munmap(live_m2p, M2P_SIZE(max_mfn)); - - free(pfn_type); - free(pfn_batch); - free(to_send); - free(to_fix); - free(to_skip); - - DPRINTF("Save exit rc=%d\n",rc); - - return !!rc; -} - -/* - * Local variables: - * mode: C - * c-set-style: "BSD" - * c-basic-offset: 4 - * tab-width: 4 - * indent-tabs-mode: nil - * End: - */ diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/xc_resume.c --- a/tools/libxc/xc_resume.c Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/libxc/xc_resume.c Fri Apr 13 11:14:26 2007 +0100 @@ -3,24 +3,71 @@ #include "xg_save_restore.h" #if defined(__i386__) || defined(__x86_64__) + +#include <xen/foreign/x86_32.h> +#include <xen/foreign/x86_64.h> +#include <xen/hvm/params.h> + +/* Need to provide the right flavour of vcpu context for Xen */ +typedef union +{ + vcpu_guest_context_x86_64_t c64; + vcpu_guest_context_x86_32_t c32; + vcpu_guest_context_t c; +} vcpu_guest_context_either_t; + static int modify_returncode(int xc_handle, uint32_t domid) { - vcpu_guest_context_t ctxt; + vcpu_guest_context_either_t ctxt; + xc_dominfo_t info; + xen_capabilities_info_t caps; int rc; - if ( (rc = xc_vcpu_getcontext(xc_handle, domid, 0, &ctxt)) != 0 ) - return rc; - ctxt.user_regs.eax = 1; - if ( (rc = xc_vcpu_setcontext(xc_handle, domid, 0, &ctxt)) != 0 ) + if ( xc_domain_getinfo(xc_handle, domid, 1, &info) != 1 ) + { + PERROR("Could not get domain info"); + return -1; + } + + /* HVM guests without PV drivers do not have a return code to modify. */ + if ( info.hvm ) + { + unsigned long irq = 0; + xc_get_hvm_param(xc_handle, domid, HVM_PARAM_CALLBACK_IRQ, &irq); + if ( !irq ) + return 0; + } + + if ( xc_version(xc_handle, XENVER_capabilities, &caps) != 0 ) + { + PERROR("Could not get Xen capabilities\n"); + return -1; + } + + if ( (rc = xc_vcpu_getcontext(xc_handle, domid, 0, &ctxt.c)) != 0 ) + return rc; + + if ( !info.hvm ) + ctxt.c.user_regs.eax = 1; + else if ( strstr(caps, "x86_64") ) + ctxt.c64.user_regs.eax = 1; + else + ctxt.c32.user_regs.eax = 1; + + if ( (rc = xc_vcpu_setcontext(xc_handle, domid, 0, &ctxt.c)) != 0 ) return rc; return 0; } + #else + static int modify_returncode(int xc_handle, uint32_t domid) { return 0; -} + +} + #endif static int xc_domain_resume_cooperative(int xc_handle, uint32_t domid) @@ -65,6 +112,12 @@ static int xc_domain_resume_any(int xc_h * (x86 only) Rewrite store_mfn and console_mfn back to MFN (from PFN). */ #if defined(__i386__) || defined(__x86_64__) + if ( info.hvm ) + { + ERROR("Cannot resume uncooperative HVM guests"); + return rc; + } + /* Map the shared info frame */ shinfo = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, PROT_READ, info.shared_info_frame); diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/libxc/xenctrl.h Fri Apr 13 11:14:26 2007 +0100 @@ -840,6 +840,9 @@ const char *xc_error_code_to_desc(int co */ xc_error_handler xc_set_error_handler(xc_error_handler handler); +int xc_set_hvm_param(int handle, domid_t dom, int param, unsigned long value); +int xc_get_hvm_param(int handle, domid_t dom, int param, unsigned long *value); + /* PowerPC specific. */ int xc_alloc_real_mode_area(int xc_handle, uint32_t domid, diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/xenguest.h --- a/tools/libxc/xenguest.h Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/libxc/xenguest.h Fri Apr 13 11:14:26 2007 +0100 @@ -16,26 +16,19 @@ /** - * This function will save a domain running Linux. + * This function will save a running domain. * * @parm xc_handle a handle to an open hypervisor interface * @parm fd the file descriptor to save a domain to * @parm dom the id of the domain * @return 0 on success, -1 on failure */ -int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, - uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */, - int (*suspend)(int domid)); +int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, + uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */, + int (*suspend)(int domid), int hvm, + void *(*init_qemu_maps)(int, unsigned), /* HVM only */ + void (*qemu_flip_buffer)(int, int)); /* HVM only */ -/** - * This function will save a hvm domain running unmodified guest. - * @return 0 on success, -1 on failure - */ -int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, - uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */, - int (*suspend)(int domid), - void *(*init_qemu_maps)(int, unsigned), - void (*qemu_flip_buffer)(int, int)); /** * This function will restore a saved domain. @@ -143,11 +136,6 @@ int xc_hvm_build_mem(int xc_handle, const char *image_buffer, unsigned long image_size); -int xc_set_hvm_param( - int handle, domid_t dom, int param, unsigned long value); -int xc_get_hvm_param( - int handle, domid_t dom, int param, unsigned long *value); - /* PowerPC specific. */ int xc_prose_build(int xc_handle, uint32_t domid, diff -r 5bda20f0723d -r f92a79e39da8 tools/libxc/xg_private.c --- a/tools/libxc/xg_private.c Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/libxc/xg_private.c Fri Apr 13 11:14:26 2007 +0100 @@ -196,29 +196,6 @@ __attribute__((weak)) { errno = ENOSYS; return -1; -} - -__attribute__((weak)) - int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, - uint32_t max_factor, uint32_t flags, - int (*suspend)(int domid), - void *(*init_qemu_maps)(int, unsigned), - void (*qemu_flip_buffer)(int, int)) -{ - errno = ENOSYS; - return -1; -} - -__attribute__((weak)) int xc_get_hvm_param( - int handle, domid_t dom, int param, unsigned long *value) -{ - return -ENOSYS; -} - -__attribute__((weak)) int xc_set_hvm_param( - int handle, domid_t dom, int param, unsigned long value) -{ - return -ENOSYS; } /* diff -r 5bda20f0723d -r f92a79e39da8 tools/libxen/include/xen_host_cpu.h --- a/tools/libxen/include/xen_host_cpu.h Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/libxen/include/xen_host_cpu.h Fri Apr 13 11:14:26 2007 +0100 @@ -70,6 +70,7 @@ typedef struct xen_host_cpu_record char *modelname; char *stepping; char *flags; + char *features; double utilisation; } xen_host_cpu_record; @@ -223,6 +224,13 @@ xen_host_cpu_get_flags(xen_session *sess /** + * Get the features field of the given host_cpu. + */ +extern bool +xen_host_cpu_get_features(xen_session *session, char **result, xen_host_cpu host_cpu); + + +/** * Get the utilisation field of the given host_cpu. */ extern bool diff -r 5bda20f0723d -r f92a79e39da8 tools/libxen/include/xen_vm.h --- a/tools/libxen/include/xen_vm.h Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/libxen/include/xen_vm.h Fri Apr 13 11:14:26 2007 +0100 @@ -838,6 +838,28 @@ xen_vm_set_vcpus_number_live(xen_session /** + * Add the given key-value pair to VM.VCPUs_params, and apply that + * value on the running VM. + */ +extern bool +xen_vm_add_to_vcpus_params_live(xen_session *session, xen_vm self, char *key, char *value); + + +/** + * Set memory_dynamic_max in database and on running VM. + */ +extern bool +xen_vm_set_memory_dynamic_max_live(xen_session *session, xen_vm self, int64_t max); + + +/** + * Set memory_dynamic_min in database and on running VM. + */ +extern bool +xen_vm_set_memory_dynamic_min_live(xen_session *session, xen_vm self, int64_t min); + + +/** * Send the given key as a sysrq to this VM. The key is specified as a * single character (a String of length 1). This can only be called when the * specified VM is in the Running state. diff -r 5bda20f0723d -r f92a79e39da8 tools/libxen/include/xen_vm_metrics.h --- a/tools/libxen/include/xen_vm_metrics.h Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/libxen/include/xen_vm_metrics.h Fri Apr 13 11:14:26 2007 +0100 @@ -22,6 +22,7 @@ #include "xen_common.h" #include "xen_int_float_map.h" #include "xen_int_int_map.h" +#include "xen_string_set.h" #include "xen_string_string_map.h" #include "xen_vm_metrics_decl.h" @@ -70,6 +71,7 @@ typedef struct xen_vm_metrics_record xen_int_float_map *vcpus_utilisation; xen_int_int_map *vcpus_cpu; xen_string_string_map *vcpus_params; + struct xen_string_set *state; time_t start_time; time_t last_updated; } xen_vm_metrics_record; @@ -210,6 +212,13 @@ xen_vm_metrics_get_vcpus_params(xen_sess /** + * Get the state field of the given VM_metrics. + */ +extern bool +xen_vm_metrics_get_state(xen_session *session, struct xen_string_set **result, xen_vm_metrics vm_metrics); + + +/** * Get the start_time field of the given VM_metrics. */ extern bool diff -r 5bda20f0723d -r f92a79e39da8 tools/libxen/src/xen_host_cpu.c --- a/tools/libxen/src/xen_host_cpu.c Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/libxen/src/xen_host_cpu.c Fri Apr 13 11:14:26 2007 +0100 @@ -61,6 +61,9 @@ static const struct_member xen_host_cpu_ { .key = "flags", .type = &abstract_type_string, .offset = offsetof(xen_host_cpu_record, flags) }, + { .key = "features", + .type = &abstract_type_string, + .offset = offsetof(xen_host_cpu_record, features) }, { .key = "utilisation", .type = &abstract_type_float, .offset = offsetof(xen_host_cpu_record, utilisation) } @@ -90,6 +93,7 @@ xen_host_cpu_record_free(xen_host_cpu_re free(record->modelname); free(record->stepping); free(record->flags); + free(record->features); free(record); } @@ -252,6 +256,23 @@ xen_host_cpu_get_flags(xen_session *sess bool +xen_host_cpu_get_features(xen_session *session, char **result, xen_host_cpu host_cpu) +{ + abstract_value param_values[] = + { + { .type = &abstract_type_string, + .u.string_val = host_cpu } + }; + + abstract_type result_type = abstract_type_string; + + *result = NULL; + XEN_CALL_("host_cpu.get_features"); + return session->ok; +} + + +bool xen_host_cpu_get_utilisation(xen_session *session, double *result, xen_host_cpu host_cpu) { abstract_value param_values[] = diff -r 5bda20f0723d -r f92a79e39da8 tools/libxen/src/xen_vm.c --- a/tools/libxen/src/xen_vm.c Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/libxen/src/xen_vm.c Fri Apr 13 11:14:26 2007 +0100 @@ -1610,6 +1610,56 @@ xen_vm_set_vcpus_number_live(xen_session bool +xen_vm_add_to_vcpus_params_live(xen_session *session, xen_vm self, char *key, char *value) +{ + abstract_value param_values[] = + { + { .type = &abstract_type_string, + .u.string_val = self }, + { .type = &abstract_type_string, + .u.string_val = key }, + { .type = &abstract_type_string, + .u.string_val = value } + }; + + xen_call_(session, "VM.add_to_VCPUs_params_live", param_values, 3, NULL, NULL); + return session->ok; +} + + +bool +xen_vm_set_memory_dynamic_max_live(xen_session *session, xen_vm self, int64_t max) +{ + abstract_value param_values[] = + { + { .type = &abstract_type_string, + .u.string_val = self }, + { .type = &abstract_type_int, + .u.int_val = max } + }; + + xen_call_(session, "VM.set_memory_dynamic_max_live", param_values, 2, NULL, NULL); + return session->ok; +} + + +bool +xen_vm_set_memory_dynamic_min_live(xen_session *session, xen_vm self, int64_t min) +{ + abstract_value param_values[] = + { + { .type = &abstract_type_string, + .u.string_val = self }, + { .type = &abstract_type_int, + .u.int_val = min } + }; + + xen_call_(session, "VM.set_memory_dynamic_min_live", param_values, 2, NULL, NULL); + return session->ok; +} + + +bool xen_vm_send_sysrq(xen_session *session, xen_vm vm, char *key) { abstract_value param_values[] = diff -r 5bda20f0723d -r f92a79e39da8 tools/libxen/src/xen_vm_metrics.c --- a/tools/libxen/src/xen_vm_metrics.c Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/libxen/src/xen_vm_metrics.c Fri Apr 13 11:14:26 2007 +0100 @@ -57,6 +57,9 @@ static const struct_member xen_vm_metric { .key = "VCPUs_params", .type = &abstract_type_string_string_map, .offset = offsetof(xen_vm_metrics_record, vcpus_params) }, + { .key = "state", + .type = &abstract_type_string_set, + .offset = offsetof(xen_vm_metrics_record, state) }, { .key = "start_time", .type = &abstract_type_datetime, .offset = offsetof(xen_vm_metrics_record, start_time) }, @@ -87,6 +90,7 @@ xen_vm_metrics_record_free(xen_vm_metric xen_int_float_map_free(record->vcpus_utilisation); xen_int_int_map_free(record->vcpus_cpu); xen_string_string_map_free(record->vcpus_params); + xen_string_set_free(record->state); free(record); } @@ -215,6 +219,23 @@ xen_vm_metrics_get_vcpus_params(xen_sess bool +xen_vm_metrics_get_state(xen_session *session, struct xen_string_set **result, xen_vm_metrics vm_metrics) +{ + abstract_value param_values[] = + { + { .type = &abstract_type_string, + .u.string_val = vm_metrics } + }; + + abstract_type result_type = abstract_type_string_set; + + *result = NULL; + XEN_CALL_("VM_metrics.get_state"); + return session->ok; +} + + +bool xen_vm_metrics_get_start_time(xen_session *session, time_t *result, xen_vm_metrics vm_metrics) { abstract_value param_values[] = diff -r 5bda20f0723d -r f92a79e39da8 tools/pygrub/src/LiloConf.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/pygrub/src/LiloConf.py Fri Apr 13 11:14:26 2007 +0100 @@ -0,0 +1,147 @@ +# +#LiloConf.py +# + +import sys, re, os +import logging +import GrubConf + +class LiloImage(object): + def __init__(self, lines, path): + self.reset(lines, path) + + def __repr__(self): + return ("title: %s\n" + " root: %s\n" + " kernel: %s\n" + " args: %s\n" + " initrd: %s\n" %(self.title, self.root, self.kernel, + self.args, self.initrd)) + def reset(self, lines, path): + self._root = self._initrd = self._kernel = self._args = None + self.title = "" + self.lines = [] + self.path = path + map(self.set_from_line, lines) + self.root = "" # dummy + + def set_from_line(self, line, replace = None): + (com, arg) = GrubConf.grub_exact_split(line, 2) + + if self.commands.has_key(com): + if self.commands[com] is not None: + exec("%s = r\'%s\'" %(self.commands[com], re.sub('^"(.+)"$', r"\1", arg.strip()))) + else: + logging.info("Ignored image directive %s" %(com,)) + else: + logging.warning("Unknown image directive %s" %(com,)) + + # now put the line in the list of lines + if replace is None: + self.lines.append(line) + else: + self.lines.pop(replace) + self.lines.insert(replace, line) + + def set_kernel(self, val): + self._kernel = (None, self.path + "/" + val) + def get_kernel(self): + return self._kernel + kernel = property(get_kernel, set_kernel) + + def set_initrd(self, val): + self._initrd = (None, self.path + "/" + val) + def get_initrd(self): + return self._initrd + initrd = property(get_initrd, set_initrd) + + # set up command handlers + commands = { "label": "self.title", + "root": "self.root", + "rootnoverify": "self.root", + "image": "self.kernel", + "initrd": "self.initrd", + "append": "self.args", + "read-only": None, + "chainloader": None, + "module": None} + +class LiloConfigFile(object): + def __init__(self, fn = None): + self.filename = fn + self.images = [] + self.timeout = -1 + self._default = 0 + + if fn is not None: + self.parse() + + def parse(self, buf = None): + if buf is None: + if self.filename is None: + raise ValueError, "No config file defined to parse!" + + f = open(self.filename, 'r') + lines = f.readlines() + f.close() + else: + lines = buf.split("\n") + + path = os.path.dirname(self.filename) + img = [] + for l in lines: + l = l.strip() + # skip blank lines + if len(l) == 0: + continue + # skip comments + if l.startswith('#'): + continue + # new image + if l.startswith("image"): + if len(img) > 0: + self.add_image(LiloImage(img, path)) + img = [l] + continue + + if len(img) > 0: + img.append(l) + continue + + (com, arg) = GrubConf.grub_exact_split(l, 2) + if self.commands.has_key(com): + if self.commands[com] is not None: + exec("%s = r\"%s\"" %(self.commands[com], arg.strip())) + else: + logging.info("Ignored directive %s" %(com,)) + else: + logging.warning("Unknown directive %s" %(com,)) + + if len(img) > 0: + self.add_image(LiloImage(img, path)) + + def add_image(self, image): + self.images.append(image) + + def _get_default(self): + for i in range(0, len(self.images) - 1): + if self.images[i].title == self._default: + return i + return 0 + def _set_default(self, val): + self._default = val + default = property(_get_default, _set_default) + + commands = { "default": "self.default", + "timeout": "self.timeout", + "prompt": None, + "relocatable": None, + } + +if __name__ == "__main__": + if sys.argv < 2: + raise RuntimeError, "Need a grub.conf to read" + g = LiloConfigFile(sys.argv[1]) + for i in g.images: + print i #, i.title, i.root, i.kernel, i.args, i.initrd + print g.default diff -r 5bda20f0723d -r f92a79e39da8 tools/pygrub/src/pygrub --- a/tools/pygrub/src/pygrub Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/pygrub/src/pygrub Fri Apr 13 11:14:26 2007 +0100 @@ -16,6 +16,7 @@ import os, sys, string, struct, tempfile import os, sys, string, struct, tempfile, re import copy import logging +import platform import curses, _curses, curses.wrapper, curses.textpad, curses.ascii import getopt @@ -24,6 +25,7 @@ sys.path = [ '/usr/lib/python' ] + sys.p import fsimage import grub.GrubConf +import grub.LiloConf PYGRUB_VER = 0.5 @@ -58,6 +60,13 @@ def get_active_partition(file): # active partition has 0x80 as the first byte if struct.unpack("<c", buf[poff:poff+1]) == ('\x80',): return buf[poff:poff+16] + + # type=0xee: GUID partition table + # XXX assume the first partition is active + if struct.unpack("<c", buf[poff+4:poff+5]) == ('\xee',): + os.lseek(fd, 0x400, 0) + buf = os.read(fd, 512) + return buf[24:40] # XXX buf[32:40] # if there's not a partition marked as active, fall back to # the first partition @@ -346,7 +355,13 @@ class Grub: if not os.access(fn, os.R_OK): raise RuntimeError, "Unable to access %s" %(fn,) - self.cf = grub.GrubConf.GrubConfigFile() + if platform.machine() == 'ia64': + self.cf = grub.LiloConf.LiloConfigFile() + file_list = ("/efi/redhat/elilo.conf",) + else: + self.cf = grub.GrubConf.GrubConfigFile() + file_list = ("/boot/grub/menu.lst", "/boot/grub/grub.conf", + "/grub/menu.lst", "/grub/grub.conf") if not fs: # set the config file and parse it @@ -354,18 +369,15 @@ class Grub: self.cf.parse() return - grubfile = None - for f in ("/boot/grub/menu.lst", "/boot/grub/grub.conf", - "/grub/menu.lst", "/grub/grub.conf"): + for f in file_list: if fs.file_exists(f): - grubfile = f - break - if grubfile is None: - raise RuntimeError, "we couldn't find grub config file in the image provided." - f = fs.open_file(grubfile) + self.cf.filename = f + break + if self.cf.filename is None: + raise RuntimeError, "couldn't find bootloader config file in the image provided." + f = fs.open_file(self.cf.filename) buf = f.read() del f - # then parse the grub config self.cf.parse(buf) def run(self): diff -r 5bda20f0723d -r f92a79e39da8 tools/python/README.XendConfig --- a/tools/python/README.XendConfig Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/python/README.XendConfig Fri Apr 13 11:14:26 2007 +0100 @@ -115,6 +115,7 @@ otherConfig image.nographic image.vnc image.sdl + image.monitor image.vncdisplay image.vncunused image.hvm.device_model diff -r 5bda20f0723d -r f92a79e39da8 tools/python/README.sxpcfg --- a/tools/python/README.sxpcfg Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/python/README.sxpcfg Fri Apr 13 11:14:26 2007 +0100 @@ -63,6 +63,7 @@ image - fdb - soundhw - localtime + - monitor - serial - stdvga - isa diff -r 5bda20f0723d -r f92a79e39da8 tools/python/xen/xend/XendConfig.py --- a/tools/python/xen/xend/XendConfig.py Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/python/xen/xend/XendConfig.py Fri Apr 13 11:14:26 2007 +0100 @@ -117,7 +117,7 @@ LEGACY_CFG_TO_XENAPI_CFG = reverse_dict( # Platform configuration keys. XENAPI_PLATFORM_CFG = [ 'acpi', 'apic', 'boot', 'device_model', 'display', - 'fda', 'fdb', 'keymap', 'isa', 'localtime', + 'fda', 'fdb', 'keymap', 'isa', 'localtime', 'monitor', 'nographic', 'pae', 'rtc_timeoffset', 'serial', 'sdl', 'soundhw','stdvga', 'usb', 'usbdevice', 'vnc', 'vncconsole', 'vncdisplay', 'vnclisten', diff -r 5bda20f0723d -r f92a79e39da8 tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/python/xen/xend/XendDomainInfo.py Fri Apr 13 11:14:26 2007 +0100 @@ -1601,7 +1601,6 @@ class XendDomainInfo: self.image = image.create(self, self.info) if self.image: self.image.createDeviceModel(True) - self.image.register_shutdown_watch() self._storeDomDetails() self._registerWatches() self.refreshShutdown() diff -r 5bda20f0723d -r f92a79e39da8 tools/python/xen/xend/XendNode.py --- a/tools/python/xen/xend/XendNode.py Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/python/xen/xend/XendNode.py Fri Apr 13 11:14:26 2007 +0100 @@ -603,7 +603,7 @@ class XendNode: return [[k, info[k]] for k in ITEM_ORDER] def xendinfo(self): - return [['xend_config_format', 3]] + return [['xend_config_format', 4]] # # utilisation tracking diff -r 5bda20f0723d -r f92a79e39da8 tools/python/xen/xend/image.py --- a/tools/python/xen/xend/image.py Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/python/xen/xend/image.py Fri Apr 13 11:14:26 2007 +0100 @@ -284,17 +284,16 @@ class HVMImageHandler(ImageHandler): log.debug("acpi = %d", self.acpi) log.debug("apic = %d", self.apic) - self.register_shutdown_watch() - self.register_reboot_feature_watch() - - return xc.hvm_build(domid = self.vm.getDomid(), - image = self.kernel, - store_evtchn = store_evtchn, - memsize = mem_mb, - vcpus = self.vm.getVCpuCount(), - pae = self.pae, - acpi = self.acpi, - apic = self.apic) + rc = xc.hvm_build(domid = self.vm.getDomid(), + image = self.kernel, + store_evtchn = store_evtchn, + memsize = mem_mb, + vcpus = self.vm.getVCpuCount(), + pae = self.pae, + acpi = self.acpi, + apic = self.apic) + rc['notes'] = { 'SUSPEND_CANCEL': 1 } + return rc # Return a list of cmd line args to the device models based on the # xm config file @@ -418,6 +417,8 @@ class HVMImageHandler(ImageHandler): else: ret.append('-nographic') + if int(vmConfig['platform'].get('monitor', 0)) != 0: + ret.append('-monitor vc') return ret def createDeviceModel(self, restore = False): @@ -448,13 +449,9 @@ class HVMImageHandler(ImageHandler): log.info("device model pid: %d", self.pid) def recreate(self): - self.register_shutdown_watch() - self.register_reboot_feature_watch() self.pid = self.vm.gatherDom(('image/device-model-pid', int)) def destroy(self, suspend = False): - self.unregister_shutdown_watch() - self.unregister_reboot_feature_watch(); if self.pid: try: sig = signal.SIGKILL @@ -473,74 +470,6 @@ class HVMImageHandler(ImageHandler): pass self.pid = None - def register_shutdown_watch(self): - """ add xen store watch on control/shutdown """ - self.shutdownWatch = xswatch(self.vm.dompath + "/control/shutdown", - self.hvm_shutdown) - log.debug("hvm shutdown watch registered") - - def unregister_shutdown_watch(self): - """Remove the watch on the control/shutdown, if any. Nothrow - guarantee.""" - - try: - if self.shutdownWatch: - self.shutdownWatch.unwatch() - except: - log.exception("Unwatching hvm shutdown watch failed.") - self.shutdownWatch = None - log.debug("hvm shutdown watch unregistered") - - def hvm_shutdown(self, _): - """ watch call back on node control/shutdown, - if node changed, this function will be called - """ - xd = xen.xend.XendDomain.instance() - try: - vm = xd.domain_lookup( self.vm.getDomid() ) - except XendError: - # domain isn't registered, no need to clean it up. - return False - - reason = vm.getShutdownReason() - log.debug("hvm_shutdown fired, shutdown reason=%s", reason) - if reason in REVERSE_DOMAIN_SHUTDOWN_REASONS: - vm.info['shutdown'] = 1 - vm.info['shutdown_reason'] = \ - REVERSE_DOMAIN_SHUTDOWN_REASONS[reason] - vm.refreshShutdown(vm.info) - - return True # Keep watching - - def register_reboot_feature_watch(self): - """ add xen store watch on control/feature-reboot """ - self.rebootFeatureWatch = xswatch(self.vm.dompath + "/control/feature-reboot", \ - self.hvm_reboot_feature) - log.debug("hvm reboot feature watch registered") - - def unregister_reboot_feature_watch(self): - """Remove the watch on the control/feature-reboot, if any. Nothrow - guarantee.""" - - try: - if self.rebootFeatureWatch: - self.rebootFeatureWatch.unwatch() - except: - log.exception("Unwatching hvm reboot feature watch failed.") - self.rebootFeatureWatch = None - log.debug("hvm reboot feature watch unregistered") - - def hvm_reboot_feature(self, _): - """ watch call back on node control/feature-reboot, - if node changed, this function will be called - """ - status = self.vm.readDom('control/feature-reboot') - log.debug("hvm_reboot_feature fired, module status=%s", status) - if status == '1': - self.unregister_shutdown_watch() - - return True # Keep watching - class IA64_HVM_ImageHandler(HVMImageHandler): diff -r 5bda20f0723d -r f92a79e39da8 tools/python/xen/xm/create.dtd --- a/tools/python/xen/xm/create.dtd Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/python/xen/xm/create.dtd Fri Apr 13 11:14:26 2007 +0100 @@ -95,7 +95,7 @@ src %URI; #REQUIRED type %VDI_TYPE; #REQUIRED size CDATA #REQUIRED - shareable CDATA #REQUIRED + sharable CDATA #REQUIRED read_only CDATA #REQUIRED> <!ELEMENT name (label, diff -r 5bda20f0723d -r f92a79e39da8 tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/python/xen/xm/create.py Fri Apr 13 11:14:26 2007 +0100 @@ -420,6 +420,10 @@ gopts.var('serial', val='FILE', gopts.var('serial', val='FILE', fn=set_value, default='', use="Path to serial or pty or vc") + +gopts.var('monitor', val='no|yes', + fn=set_bool, default=0, + use="""Should the device model use monitor?""") gopts.var('localtime', val='no|yes', fn=set_bool, default=0, diff -r 5bda20f0723d -r f92a79e39da8 tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/python/xen/xm/main.py Fri Apr 13 11:14:26 2007 +0100 @@ -1544,34 +1544,59 @@ def xm_info(args): host_metrics_record = server.xenapi.host_metrics.get_record(host_record["metrics"]) + def getVal(keys, default=""): + data = host_record + for key in keys: + if key in data: + data = data[key] + else: + return default + return data + + def getCpuMhz(): + cpu_speeds = [int(host_cpu_record["speed"]) + for host_cpu_record in host_cpu_records + if "speed" in host_cpu_record] + if len(cpu_speeds) > 0: + return sum(cpu_speeds) / len(cpu_speeds) + else: + return 0 + + getCpuMhz() + + def getCpuFeatures(): + if len(host_cpu_records) > 0: + return host_cpu_records[0].get("features", "") + else: + return "" + info = { - "host": host_record["name_label"], - "release": host_record["software_version"]["release"], - "version": host_record["software_version"]["version"], - "machine": host_record["software_version"]["machine"], - "nr_cpus": len(host_record["host_CPUs"]), - "nr_nodes": host_record["cpu_configuration"]["nr_nodes"], - "sockets_per_node": host_record["cpu_configuration"]["sockets_per_node"], - "cores_per_socket": host_record["cpu_configuration"]["cores_per_socket"], - "threads_per_core": host_record["cpu_configuration"]["threads_per_core"], - "cpu_mhz": sum([int(host_cpu_record["speed"]) for host_cpu_record in host_cpu_records]) - / len(host_cpu_records), - "hw_caps": host_cpu_records[0]["features"], + "host": getVal(["name_label"]), + "release": getVal(["software_version", "release"]), + "version": getVal(["software_version", "version"]), + "machine": getVal(["software_version", "machine"]), + "nr_cpus": len(getVal(["host_CPUs"], [])), + "nr_nodes": getVal(["cpu_configuration", "nr_nodes"]), + "sockets_per_node": getVal(["cpu_configuration", "sockets_per_node"]), + "cores_per_socket": getVal(["cpu_configuration", "cores_per_socket"]), + "threads_per_core": getVal(["cpu_configuration", "threads_per_core"]), + "cpu_mhz": getCpuMhz(), + "hw_caps": getCpuFeatures(), "total_memory": int(host_metrics_record["memory_total"])/1024/1024, "free_memory": int(host_metrics_record["memory_free"])/1024/1024, - "xen_major": host_record["software_version"]["xen_major"], - "xen_minor": host_record["software_version"]["xen_minor"], - "xen_extra": host_record["software_version"]["xen_extra"], - "xen_caps": " ".join(host_record["capabilities"]), - "xen_scheduler": host_record["sched_policy"], - "xen_pagesize": host_record["other_config"]["xen_pagesize"], - "platform_params": host_record["other_config"]["platform_params"], - "xen_changeset": host_record["software_version"]["xen_changeset"], - "cc_compiler": host_record["software_version"]["cc_compiler"], - "cc_compile_by": host_record["software_version"]["cc_compile_by"], - "cc_compile_domain": host_record["software_version"]["cc_compile_domain"], - "cc_compile_date": host_record["software_version"]["cc_compile_date"], - "xend_config_format":host_record["software_version"]["xend_config_format"] + "xen_major": getVal(["software_version", "xen_major"]), + "xen_minor": getVal(["software_version", "xen_minor"]), + "xen_extra": getVal(["software_version", "xen_extra"]), + "xen_caps": " ".join(getVal(["capabilities"], [])), + "xen_scheduler": getVal(["sched_policy"]), + "xen_pagesize": getVal(["other_config", "xen_pagesize"]), + "platform_params": getVal(["other_config", "platform_params"]), + "xen_changeset": getVal(["software_version", "xen_changeset"]), + "cc_compiler": getVal(["software_version", "cc_compiler"]), + "cc_compile_by": getVal(["software_version", "cc_compile_by"]), + "cc_compile_domain": getVal(["software_version", "cc_compile_domain"]), + "cc_compile_date": getVal(["software_version", "cc_compile_date"]), + "xend_config_format":getVal(["software_version", "xend_config_format"]) } sorted = info.items() diff -r 5bda20f0723d -r f92a79e39da8 tools/python/xen/xm/xenapi_create.py --- a/tools/python/xen/xm/xenapi_create.py Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/python/xen/xm/xenapi_create.py Fri Apr 13 11:14:26 2007 +0100 @@ -48,7 +48,7 @@ def get_name_description(node): def get_text_in_child_node(node, child): tag_node = node.getElementsByTagName(child)[0] - return tag_node.nodeValue + return " ".join([child.nodeValue for child in tag_node.childNodes]) def get_child_node_attribute(node, child, attribute): tag_node = node.getElementsByTagName(child)[0] @@ -212,8 +212,8 @@ class xenapi_create: "SR": self.DEFAULT_STORAGE_REPOSITORY, "virtual_size": vdi.attributes["size"].value, "type": vdi.attributes["type"].value, - "shareable": vdi.attributes["shareable"].value, - "read_only": vdi.attributes["read_only"].value, + "sharable": bool(vdi.attributes["sharable"].value), + "read_only": bool(vdi.attributes["read_only"].value), "other_config": {"location": vdi.attributes["src"].value} } @@ -264,7 +264,23 @@ class xenapi_create: "platform": get_child_nodes_as_dict(vm, "platform", "key", "value"), "other_config": - get_child_nodes_as_dict(vm, "other_config", "key", "value") + get_child_nodes_as_dict(vm, "other_config", "key", "value"), + "PV_bootloader": + "", + "PV_kernel": + "", + "PV_ramdisk": + "", + "PV_args": + "", + "PV_bootloader_args": + "", + "HVM_boot_policy": + "", + "HVM_boot_params": + {}, + "PCI_bus": + "" } if len(vm.getElementsByTagName("pv")) > 0: @@ -494,7 +510,7 @@ class sxp2xml: # Make version tag version = document.createElement("version") - version.appendChild(document.createTextNode("1.0")) + version.appendChild(document.createTextNode("0")) vm.appendChild(version) # Make pv or hvm tag @@ -629,10 +645,10 @@ class sxp2xml: vdi.attributes["src"] = src vdi.attributes["read_only"] \ = (get_child_by_name(vbd_sxp, "mode") != "w") \ - and "true" or "false" + and "True" or "False" vdi.attributes["size"] = '-1' vdi.attributes["type"] = "system" - vdi.attributes["shareable"] = "false" + vdi.attributes["sharable"] = "False" vdi.attributes["name"] = name vdi.appendChild(self.make_name_tag(name, document)) diff -r 5bda20f0723d -r f92a79e39da8 tools/xcutils/xc_save.c --- a/tools/xcutils/xc_save.c Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/xcutils/xc_save.c Fri Apr 13 11:14:26 2007 +0100 @@ -174,12 +174,9 @@ main(int argc, char **argv) max_f = atoi(argv[4]); flags = atoi(argv[5]); - if (flags & XCFLAGS_HVM) - ret = xc_hvm_save(xc_fd, io_fd, domid, maxit, max_f, flags, - &suspend, &init_qemu_maps, &qemu_flip_buffer); - else - ret = xc_linux_save(xc_fd, io_fd, domid, maxit, max_f, flags, - &suspend); + ret = xc_domain_save(xc_fd, io_fd, domid, maxit, max_f, flags, + &suspend, !!(flags & XCFLAGS_HVM), + &init_qemu_maps, &qemu_flip_buffer); xc_interface_close(xc_fd); diff -r 5bda20f0723d -r f92a79e39da8 tools/xm-test/lib/XmTestLib/NetConfig.py --- a/tools/xm-test/lib/XmTestLib/NetConfig.py Thu Apr 12 16:37:32 2007 -0500 +++ b/tools/xm-test/lib/XmTestLib/NetConfig.py Fri Apr 13 11:14:26 2007 +0100 @@ -44,7 +44,11 @@ def getXendNetConfig(): if not xconfig: xconfig = "/etc/xen/xend-config.sxp" - configfile = open(xconfig, 'r') + try: + configfile = open(xconfig, 'r') + except: + return "bridge" + S = configfile.read() pin = Parser() pin.input(S) diff -r 5bda20f0723d -r f92a79e39da8 unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h --- a/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h Thu Apr 12 16:37:32 2007 -0500 +++ b/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h Fri Apr 13 11:14:26 2007 +0100 @@ -2,8 +2,8 @@ #define COMPAT_INCLUDE_XEN_PLATFORM_COMPAT_H #include <linux/version.h> - #include <linux/spinlock.h> +#include <asm/maddr.h> #if defined(__LINUX_COMPILER_H) && !defined(__always_inline) #define __always_inline inline @@ -98,8 +98,6 @@ extern char *kasprintf(gfp_t gfp, const #if defined(_PAGE_PRESENT) && !defined(_PAGE_NX) #define _PAGE_NX 0 -#endif - /* * This variable at present is referenced by netfront, but only in code that * is dead when running in hvm guests. To detect potential active uses of it @@ -107,5 +105,6 @@ extern char *kasprintf(gfp_t gfp, const * mappings created with it will fault when accessed. */ #define __supported_pte_mask ((maddr_t)0) +#endif #endif diff -r 5bda20f0723d -r f92a79e39da8 unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c --- a/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c Thu Apr 12 16:37:32 2007 -0500 +++ b/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c Fri Apr 13 11:14:26 2007 +0100 @@ -6,21 +6,32 @@ #include "platform-pci.h" #include <asm/hypervisor.h> +struct ap_suspend_info { + int do_spin; + atomic_t nr_spinning; +}; + /* * Spinning prevents, for example, APs touching grant table entries while * the shared grant table is not mapped into the address space imemdiately * after resume. */ -static void ap_suspend(void *_ap_spin) +static void ap_suspend(void *_info) { - int *ap_spin = _ap_spin; + struct ap_suspend_info *info = _info; BUG_ON(!irqs_disabled()); - while (*ap_spin) { + atomic_inc(&info->nr_spinning); + mb(); + + while (info->do_spin) { cpu_relax(); HYPERVISOR_yield(); } + + mb(); + atomic_dec(&info->nr_spinning); } static int bp_suspend(void) @@ -42,7 +53,8 @@ static int bp_suspend(void) int __xen_suspend(int fast_suspend) { - int err, suspend_cancelled, ap_spin; + int err, suspend_cancelled, nr_cpus; + struct ap_suspend_info info; xenbus_suspend(); @@ -51,22 +63,30 @@ int __xen_suspend(int fast_suspend) /* Prevent any races with evtchn_interrupt() handler. */ disable_irq(xen_platform_pdev->irq); - ap_spin = 1; + info.do_spin = 1; + atomic_set(&info.nr_spinning, 0); smp_mb(); - err = smp_call_function(ap_suspend, &ap_spin, 0, 0); + nr_cpus = num_online_cpus() - 1; + + err = smp_call_function(ap_suspend, &info, 0, 0); if (err < 0) { preempt_enable(); xenbus_suspend_cancel(); return err; } + while (atomic_read(&info.nr_spinning) != nr_cpus) + cpu_relax(); + local_irq_disable(); suspend_cancelled = bp_suspend(); local_irq_enable(); smp_mb(); - ap_spin = 0; + info.do_spin = 0; + while (atomic_read(&info.nr_spinning) != 0) + cpu_relax(); enable_irq(xen_platform_pdev->irq); diff -r 5bda20f0723d -r f92a79e39da8 xen/arch/ia64/xen/hyperprivop.S --- a/xen/arch/ia64/xen/hyperprivop.S Thu Apr 12 16:37:32 2007 -0500 +++ b/xen/arch/ia64/xen/hyperprivop.S Fri Apr 13 11:14:26 2007 +0100 @@ -304,6 +304,8 @@ ENTRY(hyper_ssm_i) ;; adds r2=XSI_BANK1_R16_OFS-XSI_PSR_IC_OFS,r18; adds r3=(XSI_BANK1_R16_OFS+8)-XSI_PSR_IC_OFS,r18;; + // temporarily save ar.unat + mov r28=ar.unat bsw.1;; // FIXME?: ar.unat is not really handled correctly, // but may not matter if the OS is NaT-clean @@ -324,6 +326,12 @@ ENTRY(hyper_ssm_i) .mem.offset 0,0; st8.spill [r2]=r30,16; .mem.offset 8,0; st8.spill [r3]=r31,16 ;; bsw.0 ;; + mov r27=ar.unat + adds r26=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18 ;; + //save bank1 ar.unat + st8 [r26]=r27 + //restore ar.unat + mov ar.unat=r28 mov r2=r30 mov r3=r29 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;; @@ -1518,8 +1526,10 @@ ENTRY(hyper_get_psr) adds r20=XSI_PSR_I_ADDR_OFS-XSI_PSR_IC_OFS,r18 ;; ld8 r20=[r20];; ld1 r21=[r20];; - dep r8=r21,r8,IA64_PSR_I_BIT,1 - ;; + cmp.eq p8,p9=r0,r21 + ;; +(p8) dep r8=-1,r8,IA64_PSR_I_BIT,1 +(p9) dep r8=0,r8,IA64_PSR_I_BIT,1 // set vpsr.dfh adds r20=XSI_VPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;; ld1 r21=[r20];; diff -r 5bda20f0723d -r f92a79e39da8 xen/arch/ia64/xen/mm.c --- a/xen/arch/ia64/xen/mm.c Thu Apr 12 16:37:32 2007 -0500 +++ b/xen/arch/ia64/xen/mm.c Fri Apr 13 11:14:26 2007 +0100 @@ -673,7 +673,7 @@ unsigned long lookup_domain_mpa(struct d } else if (mpaddr - IO_PORTS_PADDR < IO_PORTS_SIZE) { /* Log I/O port probing, but complain less loudly about it */ gdprintk(XENLOG_INFO, "vcpu %d iip 0x%016lx: bad I/O port access " - "0x%lx\n ", current->vcpu_id, PSCB(current, iip), + "0x%lx\n", current->vcpu_id, PSCB(current, iip), IO_SPACE_SPARSE_DECODING(mpaddr - IO_PORTS_PADDR)); } else { gdprintk(XENLOG_WARNING, "vcpu %d iip 0x%016lx: bad mpa 0x%lx " diff -r 5bda20f0723d -r f92a79e39da8 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Thu Apr 12 16:37:32 2007 -0500 +++ b/xen/arch/x86/hvm/hvm.c Fri Apr 13 11:14:26 2007 +0100 @@ -191,6 +191,7 @@ static int hvm_save_cpu_ctxt(struct doma { struct vcpu *v; struct hvm_hw_cpu ctxt; + struct vcpu_guest_context *vc; for_each_vcpu(d, v) { @@ -199,7 +200,40 @@ static int hvm_save_cpu_ctxt(struct doma if ( test_bit(_VPF_down, &v->pause_flags) ) continue; + /* Architecture-specific vmcs/vmcb bits */ hvm_funcs.save_cpu_ctxt(v, &ctxt); + + /* Other vcpu register state */ + vc = &v->arch.guest_context; + if ( vc->flags & VGCF_i387_valid ) + memcpy(ctxt.fpu_regs, &vc->fpu_ctxt, sizeof(ctxt.fpu_regs)); + else + memset(ctxt.fpu_regs, 0, sizeof(ctxt.fpu_regs)); + ctxt.rax = vc->user_regs.eax; + ctxt.rbx = vc->user_regs.ebx; + ctxt.rcx = vc->user_regs.ecx; + ctxt.rdx = vc->user_regs.edx; + ctxt.rbp = vc->user_regs.ebp; + ctxt.rsi = vc->user_regs.esi; + ctxt.rdi = vc->user_regs.edi; + /* %rsp handled by arch-specific call above */ +#ifdef __x86_64__ + ctxt.r8 = vc->user_regs.r8; + ctxt.r9 = vc->user_regs.r9; + ctxt.r10 = vc->user_regs.r10; + ctxt.r11 = vc->user_regs.r11; + ctxt.r12 = vc->user_regs.r12; + ctxt.r13 = vc->user_regs.r13; + ctxt.r14 = vc->user_regs.r14; + ctxt.r15 = vc->user_regs.r15; +#endif + ctxt.dr0 = vc->debugreg[0]; + ctxt.dr1 = vc->debugreg[1]; + ctxt.dr2 = vc->debugreg[2]; + ctxt.dr3 = vc->debugreg[3]; + ctxt.dr6 = vc->debugreg[6]; + ctxt.dr7 = vc->debugreg[7]; + if ( hvm_save_entry(CPU, v->vcpu_id, h, &ctxt) != 0 ) return 1; } @@ -208,9 +242,10 @@ static int hvm_save_cpu_ctxt(struct doma static int hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h) { - int vcpuid; + int vcpuid, rc; struct vcpu *v; struct hvm_hw_cpu ctxt; + struct vcpu_guest_context *vc; /* Which vcpu is this? */ vcpuid = hvm_load_instance(h); @@ -219,12 +254,51 @@ static int hvm_load_cpu_ctxt(struct doma gdprintk(XENLOG_ERR, "HVM restore: domain has no vcpu %u\n", vcpuid); return -EINVAL; } + vc = &v->arch.guest_context; + + /* Need to init this vcpu before loading its contents */ + LOCK_BIGLOCK(d); + if ( !v->is_initialised ) + if ( (rc = boot_vcpu(d, vcpuid, vc)) != 0 ) + return rc; + UNLOCK_BIGLOCK(d); if ( hvm_load_entry(CPU, h, &ctxt) != 0 ) return -EINVAL; + /* Architecture-specific vmcs/vmcb bits */ if ( hvm_funcs.load_cpu_ctxt(v, &ctxt) < 0 ) return -EINVAL; + + /* Other vcpu register state */ + memcpy(&vc->fpu_ctxt, ctxt.fpu_regs, sizeof(ctxt.fpu_regs)); + vc->user_regs.eax = ctxt.rax; + vc->user_regs.ebx = ctxt.rbx; + vc->user_regs.ecx = ctxt.rcx; + vc->user_regs.edx = ctxt.rdx; + vc->user_regs.ebp = ctxt.rbp; + vc->user_regs.esi = ctxt.rsi; + vc->user_regs.edi = ctxt.rdi; + vc->user_regs.esp = ctxt.rsp; +#ifdef __x86_64__ + vc->user_regs.r8 = ctxt.r8; + vc->user_regs.r9 = ctxt.r9; + vc->user_regs.r10 = ctxt.r10; + vc->user_regs.r11 = ctxt.r11; + vc->user_regs.r12 = ctxt.r12; + vc->user_regs.r13 = ctxt.r13; + vc->user_regs.r14 = ctxt.r14; + vc->user_regs.r15 = ctxt.r15; +#endif + vc->debugreg[0] = ctxt.dr0; + vc->debugreg[1] = ctxt.dr1; + vc->debugreg[2] = ctxt.dr2; + vc->debugreg[3] = ctxt.dr3; + vc->debugreg[6] = ctxt.dr6; + vc->debugreg[7] = ctxt.dr7; + + vc->flags = VGCF_i387_valid | VGCF_online; + v->fpu_initialised = 1; /* Auxiliary processors should be woken immediately. */ if ( test_and_clear_bit(_VPF_down, &v->pause_flags) ) diff -r 5bda20f0723d -r f92a79e39da8 xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Thu Apr 12 16:37:32 2007 -0500 +++ b/xen/arch/x86/hvm/svm/svm.c Fri Apr 13 11:14:26 2007 +0100 @@ -233,7 +233,7 @@ int svm_vmcb_save(struct vcpu *v, struct { struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - c->eip = vmcb->rip; + c->rip = vmcb->rip; #ifdef HVM_DEBUG_SUSPEND printk("%s: eip=0x%"PRIx64".\n", @@ -241,10 +241,11 @@ int svm_vmcb_save(struct vcpu *v, struct inst_len, c->eip); #endif - c->esp = vmcb->rsp; - c->eflags = vmcb->rflags; + c->rsp = vmcb->rsp; + c->rflags = vmcb->rflags; c->cr0 = v->arch.hvm_svm.cpu_shadow_cr0; + c->cr2 = v->arch.hvm_svm.cpu_cr2; c->cr3 = v->arch.hvm_svm.cpu_cr3; c->cr4 = v->arch.hvm_svm.cpu_shadow_cr4; @@ -315,14 +316,16 @@ int svm_vmcb_restore(struct vcpu *v, str unsigned long mfn, old_base_mfn; struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - vmcb->rip = c->eip; - vmcb->rsp = c->esp; - vmcb->rflags = c->eflags; + vmcb->rip = c->rip; + vmcb->rsp = c->rsp; + vmcb->rflags = c->rflags; v->arch.hvm_svm.cpu_shadow_cr0 = c->cr0; vmcb->cr0 = c->cr0 | X86_CR0_WP | X86_CR0_ET; if ( !paging_mode_hap(v->domain) ) vmcb->cr0 |= X86_CR0_PG; + + v->arch.hvm_svm.cpu_cr2 = c->cr2; #ifdef HVM_DEBUG_SUSPEND printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n", @@ -421,6 +424,9 @@ int svm_vmcb_restore(struct vcpu *v, str vmcb->sysenter_esp = c->sysenter_esp; vmcb->sysenter_eip = c->sysenter_eip; + vmcb->dr6 = c->dr6; + vmcb->dr7 = c->dr7; + paging_update_paging_modes(v); return 0; @@ -440,6 +446,7 @@ void svm_save_cpu_state(struct vcpu *v, data->msr_cstar = vmcb->cstar; data->msr_syscall_mask = vmcb->sfmask; data->msr_efer = v->arch.hvm_svm.cpu_shadow_efer; + data->msr_flags = -1ULL; data->tsc = hvm_get_guest_time(v); } diff -r 5bda20f0723d -r f92a79e39da8 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Thu Apr 12 16:37:32 2007 -0500 +++ b/xen/arch/x86/hvm/vmx/vmx.c Fri Apr 13 11:14:26 2007 +0100 @@ -370,11 +370,12 @@ static inline void __restore_debug_regis int vmx_vmcs_save(struct vcpu *v, struct hvm_hw_cpu *c) { - c->eip = __vmread(GUEST_RIP); - c->esp = __vmread(GUEST_RSP); - c->eflags = __vmread(GUEST_RFLAGS); + c->rip = __vmread(GUEST_RIP); + c->rsp = __vmread(GUEST_RSP); + c->rflags = __vmread(GUEST_RFLAGS); c->cr0 = v->arch.hvm_vmx.cpu_shadow_cr0; + c->cr2 = v->arch.hvm_vmx.cpu_cr2; c->cr3 = v->arch.hvm_vmx.cpu_cr3; c->cr4 = v->arch.hvm_vmx.cpu_shadow_cr4; @@ -444,12 +445,14 @@ int vmx_vmcs_restore(struct vcpu *v, str vmx_vmcs_enter(v); - __vmwrite(GUEST_RIP, c->eip); - __vmwrite(GUEST_RSP, c->esp); - __vmwrite(GUEST_RFLAGS, c->eflags); + __vmwrite(GUEST_RIP, c->rip); + __vmwrite(GUEST_RSP, c->rsp); + __vmwrite(GUEST_RFLAGS, c->rflags); v->arch.hvm_vmx.cpu_shadow_cr0 = c->cr0; __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0); + + v->arch.hvm_vmx.cpu_cr2 = c->cr2; #ifdef HVM_DEBUG_SUSPEND printk("vmx_vmcs_restore: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n", @@ -555,6 +558,8 @@ int vmx_vmcs_restore(struct vcpu *v, str __vmwrite(GUEST_SYSENTER_ESP, c->sysenter_esp); __vmwrite(GUEST_SYSENTER_EIP, c->sysenter_eip); + __vmwrite(GUEST_DR7, c->dr7); + vmx_vmcs_exit(v); paging_update_paging_modes(v); @@ -590,7 +595,7 @@ void vmx_save_cpu_state(struct vcpu *v, data->shadow_gs = guest_state->shadow_gs; /* save msrs */ - data->flags = guest_flags; + data->msr_flags = guest_flags; data->msr_lstar = guest_state->msrs[VMX_INDEX_MSR_LSTAR]; data->msr_star = guest_state->msrs[VMX_INDEX_MSR_STAR]; data->msr_cstar = guest_state->msrs[VMX_INDEX_MSR_CSTAR]; @@ -607,7 +612,7 @@ void vmx_load_cpu_state(struct vcpu *v, struct vmx_msr_state *guest_state = &v->arch.hvm_vmx.msr_state; /* restore msrs */ - guest_state->flags = data->flags; + guest_state->flags = data->msr_flags; guest_state->msrs[VMX_INDEX_MSR_LSTAR] = data->msr_lstar; guest_state->msrs[VMX_INDEX_MSR_STAR] = data->msr_star; guest_state->msrs[VMX_INDEX_MSR_CSTAR] = data->msr_cstar; diff -r 5bda20f0723d -r f92a79e39da8 xen/include/public/hvm/save.h --- a/xen/include/public/hvm/save.h Thu Apr 12 16:37:32 2007 -0500 +++ b/xen/include/public/hvm/save.h Fri Apr 13 11:14:26 2007 +0100 @@ -87,12 +87,39 @@ DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct */ struct hvm_hw_cpu { - uint64_t eip; - uint64_t esp; - uint64_t eflags; + uint8_t fpu_regs[512]; + + uint64_t rax; + uint64_t rbx; + uint64_t rcx; + uint64_t rdx; + uint64_t rbp; + uint64_t rsi; + uint64_t rdi; + uint64_t rsp; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + + uint64_t rip; + uint64_t rflags; + uint64_t cr0; + uint64_t cr2; uint64_t cr3; uint64_t cr4; + + uint64_t dr0; + uint64_t dr1; + uint64_t dr2; + uint64_t dr3; + uint64_t dr6; + uint64_t dr7; uint32_t cs_sel; uint32_t ds_sel; @@ -142,9 +169,9 @@ struct hvm_hw_cpu { /* msr for em64t */ uint64_t shadow_gs; - uint64_t flags; /* msr content saved/restored. */ + uint64_t msr_flags; uint64_t msr_lstar; uint64_t msr_star; uint64_t msr_cstar; _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |