[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
# HG changeset patch # User awilliam@xxxxxxxxxxx # Node ID 6492b9b2796872caf7a1052fdb9236d7cc01b805 # Parent d5a46e4cc340dfaf6c638a64ae536d917065e2ba # Parent 8113c55a6efdc5e3c37c46a684ee8895a0568e65 merge with xen-unstable.hg --- tools/misc/mbootpack/GPL | 340 --- tools/misc/mbootpack/Makefile | 67 tools/misc/mbootpack/README | 75 tools/misc/mbootpack/bin2c.c | 356 --- tools/misc/mbootpack/bootsect.S | 136 - tools/misc/mbootpack/buildimage.c | 176 - tools/misc/mbootpack/mb_header.h | 90 tools/misc/mbootpack/mb_info.h | 217 -- tools/misc/mbootpack/mbootpack.c | 704 ------ tools/misc/mbootpack/mbootpack.h | 109 - tools/misc/mbootpack/setup.S | 1064 ---------- tools/security/example.txt | 376 --- tools/security/install.txt | 87 .hgignore | 3 .hgtags | 1 Config.mk | 56 Makefile | 5 buildconfigs/Rules.mk | 3 config/Linux.mk | 8 config/OpenBSD.mk | 1 config/StdGNU.mk | 30 config/SunOS.mk | 35 config/ia64.mk | 2 config/powerpc64.mk | 1 config/x86_32.mk | 10 config/x86_64.mk | 12 docs/Makefile | 5 docs/man/xm.pod.1 | 4 docs/src/interface.tex | 42 linux-2.6-xen-sparse/arch/i386/kernel/fixup.c | 3 linux-2.6-xen-sparse/arch/i386/kernel/microcode-xen.c | 38 linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c | 11 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c | 82 linux-2.6-xen-sparse/drivers/xen/netback/interface.c | 2 linux-2.6-xen-sparse/drivers/xen/netback/netback.c | 6 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c | 7 linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c | 8 patches/linux-2.6.16.29/net-gso-5-rcv-mss.patch | 11 tools/Makefile | 2 tools/Rules.mk | 8 tools/blktap/drivers/Makefile | 6 tools/blktap/lib/Makefile | 8 tools/check/check_brctl | 31 tools/check/check_iproute | 29 tools/check/check_python | 17 tools/check/check_zlib_devel | 17 tools/check/check_zlib_lib | 17 tools/check/chk | 19 tools/console/Makefile | 8 tools/console/daemon/io.c | 2 tools/console/daemon/utils.c | 2 tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c | 2 tools/examples/Makefile | 5 tools/examples/vtpm-common.sh | 6 tools/examples/xend-config.sxp | 4 tools/examples/xmexample.hvm | 5 tools/examples/xmexample.vti | 5 tools/firmware/Makefile | 6 tools/firmware/acpi/Makefile | 2 tools/firmware/hvmloader/Makefile | 10 tools/firmware/vmxassist/Makefile | 9 tools/firmware/vmxassist/head.S | 2 tools/firmware/vmxassist/trap.S | 2 tools/firmware/vmxassist/util.c | 25 tools/firmware/vmxassist/util.h | 1 tools/firmware/vmxassist/vm86.c | 48 tools/guest-headers/Makefile | 12 tools/ioemu/Makefile.target | 22 tools/ioemu/d3des.c | 434 ++++ tools/ioemu/d3des.h | 51 tools/ioemu/hw/ide.c | 4 tools/ioemu/hw/xen_platform.c | 8 tools/ioemu/target-i386-dm/rtc-dm.c | 107 + tools/ioemu/vl.c | 14 tools/ioemu/vl.h | 4 tools/ioemu/vnc.c | 112 + tools/ioemu/xenstore.c | 51 tools/libxc/Makefile | 18 tools/libxc/ia64/xc_ia64_linux_restore.c | 44 tools/libxc/ia64/xc_ia64_linux_save.c | 64 tools/libxc/xc_hvm_build.c | 35 tools/libxc/xc_linux_restore.c | 94 tools/libxc/xc_linux_save.c | 104 tools/libxc/xc_private.h | 8 tools/libxc/xc_ptrace.c | 2 tools/libxc/xc_ptrace.h | 24 tools/libxc/xc_ptrace_core.c | 2 tools/libxc/xc_solaris.c | 235 ++ tools/libxc/xenctrl.h | 35 tools/libxc/xg_private.c | 1 tools/libxc/xg_private.h | 4 tools/misc/Makefile | 10 tools/misc/lomount/Makefile | 5 tools/misc/miniterm/Makefile | 4 tools/misc/xend | 10 tools/pygrub/Makefile | 6 tools/python/Makefile | 6 tools/python/xen/util/auxbin.py | 2 tools/python/xen/util/security.py | 23 tools/python/xen/xend/XendDomainInfo.py | 2 tools/python/xen/xend/XendRoot.py | 11 tools/python/xen/xend/arch.py | 1 tools/python/xen/xend/image.py | 35 tools/python/xen/xend/osdep.py | 36 tools/python/xen/xend/server/SrvDaemon.py | 9 tools/python/xen/xend/server/blkif.py | 3 tools/python/xen/xm/addlabel.py | 9 tools/python/xen/xm/cfgbootpolicy.py | 134 - tools/python/xen/xm/create.py | 5 tools/python/xen/xm/getlabel.py | 3 tools/python/xen/xm/rmlabel.py | 3 tools/security/policy.txt | 163 - tools/security/policytools.txt | 148 + tools/security/readme.txt | 31 tools/security/secpol_tool.c | 14 tools/security/secpol_xml2bin.c | 10 tools/vnet/doc/Makefile | 5 tools/vnet/examples/Makefile | 6 tools/vnet/libxutil/Makefile | 7 tools/vnet/scripts/Makefile | 6 tools/vnet/vnetd/Makefile | 2 tools/vtpm/Rules.mk | 5 tools/vtpm_manager/Rules.mk | 5 tools/xcutils/Makefile | 6 tools/xcutils/readnotes.c | 6 tools/xenmon/Makefile | 9 tools/xenstat/libxenstat/Makefile | 12 tools/xenstat/xentop/Makefile | 7 tools/xenstat/xentop/xentop.c | 15 tools/xenstore/Makefile | 26 tools/xenstore/xenstored_core.c | 7 tools/xenstore/xenstored_core.h | 3 tools/xenstore/xenstored_linux.c | 4 tools/xenstore/xenstored_solaris.c | 66 tools/xenstore/xs_lib.c | 9 tools/xenstore/xsls.c | 1 tools/xentrace/Makefile | 5 tools/xentrace/formats | 29 tools/xm-test/README | 32 tools/xm-test/configure.ac | 15 tools/xm-test/grouptest/default | 1 tools/xm-test/grouptest/security | 1 tools/xm-test/lib/XmTestLib/XenDomain.py | 5 tools/xm-test/lib/XmTestLib/acm.py | 91 tools/xm-test/lib/XmTestLib/block_utils.py | 2 tools/xm-test/runtest.sh | 2 tools/xm-test/tests/Makefile.am | 1 tools/xm-test/tests/security-acm/01_security-acm_basic.py | 121 + tools/xm-test/tests/security-acm/02_security-acm_dom_start.py | 64 tools/xm-test/tests/security-acm/03_security-acm_dom_conflict.py | 60 tools/xm-test/tests/security-acm/04_security-acm_dom_res.py | 69 tools/xm-test/tests/security-acm/05_security-acm_dom_res_conf.py | 38 tools/xm-test/tests/security-acm/06_security-acm_dom_block_attach.py | 82 tools/xm-test/tests/security-acm/Makefile.am | 28 tools/xm-test/tests/security-acm/acm_utils.py | 15 tools/xm-test/tests/security-acm/xm-test-security_policy.xml | 110 + unmodified_drivers/linux-2.6/mkbuildtree | 55 xen/Makefile | 10 xen/Rules.mk | 23 xen/acm/acm_chinesewall_hooks.c | 2 xen/acm/acm_core.c | 107 - xen/acm/acm_simple_type_enforcement_hooks.c | 2 xen/arch/ia64/Rules.mk | 2 xen/arch/ia64/linux-xen/smpboot.c | 3 xen/arch/ia64/xen/xenmisc.c | 4 xen/arch/powerpc/Makefile | 2 xen/arch/powerpc/domain.c | 4 xen/arch/x86/Makefile | 5 xen/arch/x86/Rules.mk | 37 xen/arch/x86/boot/mkelf32.c | 5 xen/arch/x86/domain.c | 14 xen/arch/x86/domain_build.c | 5 xen/arch/x86/hvm/Makefile | 1 xen/arch/x86/hvm/hvm.c | 11 xen/arch/x86/hvm/i8254.c | 16 xen/arch/x86/hvm/i8259.c | 93 xen/arch/x86/hvm/intercept.c | 119 - xen/arch/x86/hvm/io.c | 44 xen/arch/x86/hvm/platform.c | 843 +++---- xen/arch/x86/hvm/rtc.c | 393 +++ xen/arch/x86/hvm/svm/intr.c | 38 xen/arch/x86/hvm/svm/svm.c | 86 xen/arch/x86/hvm/svm/vmcb.c | 9 xen/arch/x86/hvm/svm/x86_32/exits.S | 2 xen/arch/x86/hvm/svm/x86_64/exits.S | 2 xen/arch/x86/hvm/vioapic.c | 53 xen/arch/x86/hvm/vlapic.c | 3 xen/arch/x86/hvm/vmx/io.c | 2 xen/arch/x86/hvm/vmx/vmx.c | 247 +- xen/arch/x86/i387.c | 9 xen/arch/x86/io_apic.c | 11 xen/arch/x86/irq.c | 2 xen/arch/x86/mm/shadow/common.c | 226 +- xen/arch/x86/mm/shadow/multi.c | 872 ++------ xen/arch/x86/mm/shadow/multi.h | 4 xen/arch/x86/mm/shadow/private.h | 75 xen/arch/x86/mm/shadow/types.h | 123 - xen/arch/x86/oprofile/op_model_athlon.c | 28 xen/arch/x86/oprofile/op_model_p4.c | 10 xen/arch/x86/oprofile/op_model_ppro.c | 10 xen/arch/x86/oprofile/xenoprof.c | 127 - xen/arch/x86/setup.c | 45 xen/arch/x86/time.c | 7 xen/common/Makefile | 3 xen/common/domain.c | 2 xen/common/grant_table.c | 2 xen/common/keyhandler.c | 1 xen/common/page_alloc.c | 2 xen/common/schedule.c | 2 xen/common/time.c | 77 xen/common/trace.c | 2 xen/common/vsprintf.c | 2 xen/drivers/char/console.c | 4 xen/drivers/char/serial.c | 4 xen/include/acm/acm_hooks.h | 14 xen/include/acpi/platform/acenv.h | 2 xen/include/asm-ia64/linux-xen/asm/spinlock.h | 8 xen/include/asm-powerpc/spinlock.h | 12 xen/include/asm-x86/domain.h | 17 xen/include/asm-x86/hvm/io.h | 42 xen/include/asm-x86/hvm/support.h | 6 xen/include/asm-x86/hvm/svm/vmcb.h | 1 xen/include/asm-x86/hvm/vcpu.h | 5 xen/include/asm-x86/hvm/vpic.h | 2 xen/include/asm-x86/hvm/vpit.h | 56 xen/include/asm-x86/mm.h | 17 xen/include/asm-x86/perfc_defn.h | 2 xen/include/asm-x86/shadow.h | 7 xen/include/asm-x86/spinlock.h | 8 xen/include/asm-x86/x86_64/asm_defns.h | 6 xen/include/public/io/ring.h | 22 xen/include/public/trace.h | 13 xen/include/xen/cpumask.h | 8 xen/include/xen/domain.h | 2 xen/include/xen/lib.h | 2 xen/include/xen/spinlock.h | 8 xen/include/xen/stdarg.h | 5 xen/include/xen/time.h | 14 238 files changed, 5136 insertions(+), 6666 deletions(-) diff -r d5a46e4cc340 -r 6492b9b27968 .hgignore --- a/.hgignore Sun Oct 22 14:39:15 2006 -0600 +++ b/.hgignore Sun Oct 22 15:23:52 2006 -0600 @@ -15,8 +15,11 @@ .*\.rej$ .*/a\.out$ .*/cscope\..*$ +^cscope.*$ ^[^/]*\.bz2$ +^\.config$ ^TAGS$ +^tags$ ^dist/.*$ ^docs/.*\.aux$ ^docs/.*\.dvi$ diff -r d5a46e4cc340 -r 6492b9b27968 .hgtags --- a/.hgtags Sun Oct 22 14:39:15 2006 -0600 +++ b/.hgtags Sun Oct 22 15:23:52 2006 -0600 @@ -15,3 +15,4 @@ c8fdb0caa77b429cf47f9707926e83947778cb48 c8fdb0caa77b429cf47f9707926e83947778cb48 RELEASE-3.0.0 af0573e9e5258db0a9d28aa954dd302ddd2c2d23 3.0.2-rc d0d3fef37685be264a7f52201f8ef44c030daad3 3.0.2-branched +6ed4368b4a9e1924c983774c4b1a2b6baf8e98a6 3.0.3-branched diff -r d5a46e4cc340 -r 6492b9b27968 Config.mk --- a/Config.mk Sun Oct 22 14:39:15 2006 -0600 +++ b/Config.mk Sun Oct 22 15:23:52 2006 -0600 @@ -4,43 +4,21 @@ debug ?= n debug ?= n XEN_COMPILE_ARCH ?= $(shell uname -m | sed -e s/i.86/x86_32/ \ - -e s/ppc/powerpc/) + -e s/ppc/powerpc/ -e s/i86pc/x86_32/) XEN_TARGET_ARCH ?= $(XEN_COMPILE_ARCH) XEN_TARGET_X86_PAE ?= n +XEN_OS ?= $(shell uname -s) + +CONFIG_$(XEN_OS) := y # Tools to run on system hosting the build HOSTCC = gcc HOSTCFLAGS = -Wall -Werror -Wstrict-prototypes -O2 -fomit-frame-pointer -AS = $(CROSS_COMPILE)as -LD = $(CROSS_COMPILE)ld -CC = $(CROSS_COMPILE)gcc -CPP = $(CROSS_COMPILE)gcc -E -AR = $(CROSS_COMPILE)ar -RANLIB = $(CROSS_COMPILE)ranlib -NM = $(CROSS_COMPILE)nm -STRIP = $(CROSS_COMPILE)strip -OBJCOPY = $(CROSS_COMPILE)objcopy -OBJDUMP = $(CROSS_COMPILE)objdump - DISTDIR ?= $(XEN_ROOT)/dist DESTDIR ?= / -INSTALL = install -INSTALL_DIR = $(INSTALL) -d -m0755 -INSTALL_DATA = $(INSTALL) -m0644 -INSTALL_PROG = $(INSTALL) -m0755 - -ifneq ($(debug),y) -# Optimisation flags are overridable -CFLAGS ?= -O2 -fomit-frame-pointer -CFLAGS += -DNDEBUG -else -# Less than -O1 produces bad code and large stack frames -CFLAGS ?= -O1 -fno-omit-frame-pointer -CFLAGS += -g -endif - +include $(XEN_ROOT)/config/$(XEN_OS).mk include $(XEN_ROOT)/config/$(XEN_TARGET_ARCH).mk ifneq ($(EXTRA_PREFIX),) @@ -48,18 +26,30 @@ EXTRA_LIB += $(EXTRA_PREFIX)/$(LIBDIR) EXTRA_LIB += $(EXTRA_PREFIX)/$(LIBDIR) endif -test-gcc-flag = $(shell $(1) -v --help 2>&1 | grep -q " $(2) " && echo $(2)) +# cc-option +# Usage: cflags-y += $(call cc-option,$(CC),-march=winchip-c6,-march=i586) +cc-option = $(shell if test -z "`$(1) $(2) -S -o /dev/null -xc \ + /dev/null 2>&1`"; then echo "$(2)"; else echo "$(3)"; fi ;) + +ifneq ($(debug),y) +CFLAGS += -DNDEBUG +else +CFLAGS += -g +endif + +CFLAGS += -std=gnu99 CFLAGS += -Wall -Wstrict-prototypes -HOSTCFLAGS += $(call test-gcc-flag,$(HOSTCC),-Wdeclaration-after-statement) -CFLAGS += $(call test-gcc-flag,$(CC),-Wdeclaration-after-statement) +# -Wunused-value makes GCC 4.x too aggressive for my taste: ignoring the +# result of any casted expression causes a warning. +CFLAGS += -Wno-unused-value + +HOSTCFLAGS += $(call cc-option,$(HOSTCC),-Wdeclaration-after-statement,) +CFLAGS += $(call cc-option,$(CC),-Wdeclaration-after-statement,) LDFLAGS += $(foreach i, $(EXTRA_LIB), -L$(i)) CFLAGS += $(foreach i, $(EXTRA_INCLUDES), -I$(i)) - -# Choose the best mirror to download linux kernel -KERNEL_REPO = http://www.kernel.org # If ACM_SECURITY = y, then the access control module is compiled # into Xen and the policy type can be set by the boot policy file diff -r d5a46e4cc340 -r 6492b9b27968 Makefile --- a/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -1,11 +1,6 @@ # # Grand Unified Makefile for Xen. # - -KERNELS ?= linux-2.6-xen -# You may use wildcards in the above e.g. KERNELS=*2.6* - -XKERNELS := $(foreach kernel, $(KERNELS), $(patsubst buildconfigs/mk.%,%,$(wildcard buildconfigs/mk.$(kernel))) ) # Export target architecture overrides to Xen and Linux sub-trees. ifneq ($(XEN_TARGET_ARCH),) diff -r d5a46e4cc340 -r 6492b9b27968 buildconfigs/Rules.mk --- a/buildconfigs/Rules.mk Sun Oct 22 14:39:15 2006 -0600 +++ b/buildconfigs/Rules.mk Sun Oct 22 15:23:52 2006 -0600 @@ -2,6 +2,9 @@ include Config.mk include Config.mk export DESTDIR + +# Choose the best mirror to download linux kernel +KERNEL_REPO = http://www.kernel.org ALLKERNELS = $(patsubst buildconfigs/mk.%,%,$(wildcard buildconfigs/mk.*)) ALLSPARSETREES = $(patsubst %-xen-sparse,%,$(wildcard *-xen-sparse)) diff -r d5a46e4cc340 -r 6492b9b27968 config/ia64.mk --- a/config/ia64.mk Sun Oct 22 14:39:15 2006 -0600 +++ b/config/ia64.mk Sun Oct 22 15:23:52 2006 -0600 @@ -1,4 +1,6 @@ CONFIG_IA64 := y CONFIG_IA64 := y +CONFIG_IA64_$(XEN_OS) := y + CONFIG_IOEMU := y CONFIG_XCUTILS := y diff -r d5a46e4cc340 -r 6492b9b27968 config/powerpc64.mk --- a/config/powerpc64.mk Sun Oct 22 14:39:15 2006 -0600 +++ b/config/powerpc64.mk Sun Oct 22 15:23:52 2006 -0600 @@ -1,4 +1,5 @@ CONFIG_POWERPC := y CONFIG_POWERPC := y +CONFIG_POWERPC_$(XEN_OS) := y CFLAGS += -DELFSIZE=64 LIBDIR := lib diff -r d5a46e4cc340 -r 6492b9b27968 config/x86_32.mk --- a/config/x86_32.mk Sun Oct 22 14:39:15 2006 -0600 +++ b/config/x86_32.mk Sun Oct 22 15:23:52 2006 -0600 @@ -1,9 +1,17 @@ CONFIG_X86 := y CONFIG_X86 := y +CONFIG_X86_$(XEN_OS) := y + CONFIG_HVM := y CONFIG_MIGRATE := y CONFIG_XCUTILS := y CONFIG_IOEMU := y -CONFIG_MBOOTPACK := y CFLAGS += -m32 -march=i686 LIBDIR := lib + +# Use only if calling $(LD) directly. +ifeq ($(XEN_OS),OpenBSD) +LDFLAGS_DIRECT += -melf_i386_obsd +else +LDFLAGS_DIRECT += -melf_i386 +endif diff -r d5a46e4cc340 -r 6492b9b27968 config/x86_64.mk --- a/config/x86_64.mk Sun Oct 22 14:39:15 2006 -0600 +++ b/config/x86_64.mk Sun Oct 22 15:23:52 2006 -0600 @@ -1,9 +1,17 @@ CONFIG_X86 := y CONFIG_X86 := y +CONFIG_X86_$(XEN_OS) := y + CONFIG_HVM := y CONFIG_MIGRATE := y CONFIG_XCUTILS := y CONFIG_IOEMU := y -CONFIG_MBOOTPACK := y CFLAGS += -m64 -LIBDIR = lib64 +LIBDIR = $(LIB64DIR) + +# Use only if calling $(LD) directly. +ifeq ($(XEN_OS),OpenBSD) +LDFLAGS_DIRECT += -melf_x86_64_obsd +else +LDFLAGS_DIRECT += -melf_x86_64 +endif diff -r d5a46e4cc340 -r 6492b9b27968 docs/Makefile --- a/docs/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/docs/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -1,8 +1,9 @@ #!/usr/bin/make -f +XEN_ROOT=.. +include $(XEN_ROOT)/Config.mk + VERSION = xen-unstable -INSTALL = install -INSTALL_DIR = $(INSTALL) -d -m0755 PS2PDF := ps2pdf DVIPS := dvips diff -r d5a46e4cc340 -r 6492b9b27968 docs/man/xm.pod.1 --- a/docs/man/xm.pod.1 Sun Oct 22 14:39:15 2006 -0600 +++ b/docs/man/xm.pod.1 Sun Oct 22 15:23:52 2006 -0600 @@ -810,13 +810,13 @@ Loads the binary representation of the I Loads the binary representation of the I<policy> into Xen. The binary representation can be created with the B<makepolicy> subcommand. -=item B<cfgbootpolicy> I<policy> [I<kernelversion>] +=item B<cfgbootpolicy> I<policy> [I<boot title>] Configures I<policy> as the boot policy for Xen. It copies the binary policy representation into the /boot directory and adds a module line specifying the binary policy to the /boot/grub/menu.lst file. If your boot configuration includes multiple Xen boot titles, then use the -I<kernelversion> parameter to select the proper title. +I<boot title> parameter to specify a unique part of the proper title. =item B<dumppolicy> diff -r d5a46e4cc340 -r 6492b9b27968 docs/src/interface.tex --- a/docs/src/interface.tex Sun Oct 22 14:39:15 2006 -0600 +++ b/docs/src/interface.tex Sun Oct 22 15:23:52 2006 -0600 @@ -955,7 +955,6 @@ A {\bf /vm} entry contains the following A {\bf /vm} entry contains the following information: \begin{description} -\item[ssidref] ssid reference for domain \item[uuid] uuid of the domain (somewhat redundant) \item[on\_reboot] the action to take on a domain reboot request (destroy or restart) \item[on\_poweroff] the action to take on a domain halt request (destroy or restart) @@ -1125,6 +1124,16 @@ This path contains: \end{description} \end{description} + \item[security/] access control information for the domain + \begin{description} + \item[ssidref] security reference identifier used inside the hypervisor + \item[access\_control/] security label used by management tools + \begin{description} + \item[label] security label name + \item[policy] security policy name + \end{description} + \end{description} + \item[store/] per-domain information for the store \begin{description} \item[port] the event channel used for the store ring queue @@ -2168,18 +2177,45 @@ implementing them (in {\tt xen/common/do implementing them (in {\tt xen/common/dom0\_ops.c}) and in the user-space tools that use them (mostly in {\tt tools/libxc}). +\section{Access Control Module Hypercalls} +\label{s:acmops} + Hypercalls relating to the management of the Access Control Module are -also restricted to domain 0 access for now: +also restricted to domain 0 access for now. For more details on any or +all of these, please see {\tt xen/include/public/acm\_ops.h}. A +complete list is given below: \begin{quote} -\hypercall{acm\_op(struct acm\_op * u\_acm\_op)} +\hypercall{acm\_op(int cmd, void *args)} This hypercall can be used to configure the state of the ACM, query that state, request access control decisions and dump additional information. +\begin{description} + +\item [ACMOP\_SETPOLICY:] set the access control policy + +\item [ACMOP\_GETPOLICY:] get the current access control policy and + status + +\item [ACMOP\_DUMPSTATS:] get current access control hook invocation + statistics + +\item [ACMOP\_GETSSID:] get security access control information for a + domain + +\item [ACMOP\_GETDECISION:] get access decision based on the currently + enforced access control policy + +\end{description} \end{quote} + +Most of the above are best understood by looking at the code +implementing them (in {\tt xen/common/acm\_ops.c}) and in the +user-space tools that use them (mostly in {\tt tools/security} and +{\tt tools/python/xen/lowlevel/acm}). \section{Debugging Hypercalls} diff -r d5a46e4cc340 -r 6492b9b27968 linux-2.6-xen-sparse/arch/i386/kernel/fixup.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/fixup.c Sun Oct 22 14:39:15 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/fixup.c Sun Oct 22 15:23:52 2006 -0600 @@ -46,6 +46,9 @@ fastcall void do_fixup_4gb_segment(struc if (test_and_set_bit(0, &printed)) return; + if (current->tgid == 1) /* Ignore statically linked init */ + return; + HYPERVISOR_vm_assist( VMASST_CMD_disable, VMASST_TYPE_4gb_segments_notify); diff -r d5a46e4cc340 -r 6492b9b27968 linux-2.6-xen-sparse/arch/i386/kernel/microcode-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/microcode-xen.c Sun Oct 22 14:39:15 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/microcode-xen.c Sun Oct 22 15:23:52 2006 -0600 @@ -50,9 +50,6 @@ MODULE_LICENSE("GPL"); /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ static DECLARE_MUTEX(microcode_sem); - -static void __user *user_buffer; /* user area microcode data buffer */ -static unsigned int user_buffer_size; /* it's size */ static int microcode_open (struct inode *unused1, struct file *unused2) { @@ -60,21 +57,26 @@ static int microcode_open (struct inode } -static int do_microcode_update (void) +static int do_microcode_update (const void __user *ubuf, size_t len) { int err; - dom0_op_t op; + void *kbuf; - err = sys_mlock((unsigned long)user_buffer, user_buffer_size); - if (err != 0) - return err; + kbuf = vmalloc(len); + if (!kbuf) + return -ENOMEM; - op.cmd = DOM0_MICROCODE; - set_xen_guest_handle(op.u.microcode.data, user_buffer); - op.u.microcode.length = user_buffer_size; - err = HYPERVISOR_dom0_op(&op); + if (copy_from_user(kbuf, ubuf, len) == 0) { + dom0_op_t op; - (void)sys_munlock((unsigned long)user_buffer, user_buffer_size); + op.cmd = DOM0_MICROCODE; + set_xen_guest_handle(op.u.microcode.data, kbuf); + op.u.microcode.length = len; + err = HYPERVISOR_dom0_op(&op); + } else + err = -EFAULT; + + vfree(kbuf); return err; } @@ -88,17 +90,9 @@ static ssize_t microcode_write (struct f return -EINVAL; } - if ((len >> PAGE_SHIFT) > num_physpages) { - printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages); - return -EINVAL; - } - down(µcode_sem); - user_buffer = (void __user *) buf; - user_buffer_size = (int) len; - - ret = do_microcode_update(); + ret = do_microcode_update(buf, len); if (!ret) ret = (ssize_t)len; diff -r d5a46e4cc340 -r 6492b9b27968 linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c --- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c Sun Oct 22 14:39:15 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c Sun Oct 22 15:23:52 2006 -0600 @@ -343,6 +343,7 @@ static void backend_changed(struct xenbu case XenbusStateInitialising: case XenbusStateInitWait: case XenbusStateInitialised: + case XenbusStateUnknown: break; case XenbusStateConnected: @@ -351,13 +352,14 @@ static void backend_changed(struct xenbu case XenbusStateClosing: tpmif_set_connected_state(tp, 0); + xenbus_frontend_closed(dev); break; - case XenbusStateUnknown: case XenbusStateClosed: + tpmif_set_connected_state(tp, 0); if (tp->is_suspended == 0) device_unregister(&dev->dev); - xenbus_switch_state(dev, XenbusStateClosed); + xenbus_frontend_closed(dev); break; } } @@ -419,9 +421,10 @@ static int tpmfront_suspend(struct xenbu mutex_lock(&suspend_lock); tp->is_suspended = 1; - for (ctr = 0; atomic_read(&tp->tx_busy) && ctr <= 25; ctr++) { + for (ctr = 0; atomic_read(&tp->tx_busy) && ctr <= 300; ctr++) { if ((ctr % 10) == 0) - printk("TPM-FE [INFO]: Waiting for outstanding request.\n"); + printk("TPM-FE [INFO]: Waiting for outstanding " + "request.\n"); /* * Wait for a request to be responded to. */ diff -r d5a46e4cc340 -r 6492b9b27968 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Sun Oct 22 14:39:15 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Sun Oct 22 15:23:52 2006 -0600 @@ -845,28 +845,29 @@ static void fast_flush_area(pending_req_ uvaddr = MMAP_VADDR(info->user_vstart, u_idx, i); khandle = &pending_handle(mmap_idx, k_idx, i); - if (BLKTAP_INVALID_HANDLE(khandle)) { - WPRINTK("BLKTAP_INVALID_HANDLE\n"); - continue; - } - gnttab_set_unmap_op(&unmap[invcount], - idx_to_kaddr(mmap_idx, k_idx, i), - GNTMAP_host_map, khandle->kernel); - invcount++; - - if (create_lookup_pte_addr( - info->vma->vm_mm, - MMAP_VADDR(info->user_vstart, u_idx, i), - &ptep) !=0) { - WPRINTK("Couldn't get a pte addr!\n"); - return; - } - - gnttab_set_unmap_op(&unmap[invcount], - ptep, GNTMAP_host_map, - khandle->user); - invcount++; - + + if (khandle->kernel != 0xFFFF) { + gnttab_set_unmap_op(&unmap[invcount], + idx_to_kaddr(mmap_idx, k_idx, i), + GNTMAP_host_map, khandle->kernel); + invcount++; + } + + if (khandle->user != 0xFFFF) { + if (create_lookup_pte_addr( + info->vma->vm_mm, + MMAP_VADDR(info->user_vstart, u_idx, i), + &ptep) !=0) { + WPRINTK("Couldn't get a pte addr!\n"); + return; + } + + gnttab_set_unmap_op(&unmap[invcount], + ptep, GNTMAP_host_map, + khandle->user); + invcount++; + } + BLKTAP_INVALIDATE_HANDLE(khandle); } ret = HYPERVISOR_grant_table_op( @@ -1030,7 +1031,7 @@ static int do_block_io_op(blkif_t *blkif static int do_block_io_op(blkif_t *blkif) { blkif_back_ring_t *blk_ring = &blkif->blk_ring; - blkif_request_t *req; + blkif_request_t req; pending_req_t *pending_req; RING_IDX rc, rp; int more_to_do = 0; @@ -1082,24 +1083,24 @@ static int do_block_io_op(blkif_t *blkif break; } - req = RING_GET_REQUEST(blk_ring, rc); + memcpy(&req, RING_GET_REQUEST(blk_ring, rc), sizeof(req)); blk_ring->req_cons = ++rc; /* before make_response() */ - switch (req->operation) { + switch (req.operation) { case BLKIF_OP_READ: blkif->st_rd_req++; - dispatch_rw_block_io(blkif, req, pending_req); + dispatch_rw_block_io(blkif, &req, pending_req); break; case BLKIF_OP_WRITE: blkif->st_wr_req++; - dispatch_rw_block_io(blkif, req, pending_req); + dispatch_rw_block_io(blkif, &req, pending_req); break; default: WPRINTK("unknown operation [%d]\n", - req->operation); - make_response(blkif, req->id, req->operation, + req.operation); + make_response(blkif, req.id, req.operation, BLKIF_RSP_ERROR); free_req(pending_req); break; @@ -1128,9 +1129,10 @@ static void dispatch_rw_block_io(blkif_t int usr_idx = GET_NEXT_REQ(info->idx_map); uint16_t mmap_idx = pending_req->mem_idx; - /*Check we have space on user ring - should never fail*/ - if(usr_idx == INVALID_REQ) goto fail_flush; - + /* Check we have space on user ring - should never fail. */ + if (usr_idx == INVALID_REQ) + goto fail_response; + /* Check that number of segments is sane. */ nseg = req->nr_segments; if ( unlikely(nseg == 0) || @@ -1195,8 +1197,6 @@ static void dispatch_rw_block_io(blkif_t uvaddr, &ptep); if (ret) { WPRINTK("Couldn't get a pte addr!\n"); - fast_flush_area(pending_req, pending_idx, usr_idx, - blkif->dev_num); goto fail_flush; } @@ -1224,19 +1224,25 @@ static void dispatch_rw_block_io(blkif_t if (unlikely(map[i].status != 0)) { WPRINTK("invalid kernel buffer -- " "could not remap it\n"); - goto fail_flush; + ret |= 1; + map[i].handle = 0xFFFF; } if (unlikely(map[i+1].status != 0)) { WPRINTK("invalid user buffer -- " "could not remap it\n"); - goto fail_flush; + ret |= 1; + map[i+1].handle = 0xFFFF; } pending_handle(mmap_idx, pending_idx, i/2).kernel = map[i].handle; pending_handle(mmap_idx, pending_idx, i/2).user = map[i+1].handle; + + if (ret) + continue; + set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT, FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT)); offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT; @@ -1244,6 +1250,10 @@ static void dispatch_rw_block_io(blkif_t ((struct page **)info->vma->vm_private_data)[offset] = pg; } + + if (ret) + goto fail_flush; + /* Mark mapped pages as reserved: */ for (i = 0; i < req->nr_segments; i++) { unsigned long kvaddr; diff -r d5a46e4cc340 -r 6492b9b27968 linux-2.6-xen-sparse/drivers/xen/netback/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Sun Oct 22 14:39:15 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Sun Oct 22 15:23:52 2006 -0600 @@ -128,7 +128,7 @@ static struct ethtool_ops network_ethtoo netif_t *netif_alloc(domid_t domid, unsigned int handle) { - int err = 0, i; + int err = 0; struct net_device *dev; netif_t *netif; char name[IFNAMSIZ] = {}; diff -r d5a46e4cc340 -r 6492b9b27968 linux-2.6-xen-sparse/drivers/xen/netback/netback.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Sun Oct 22 14:39:15 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Sun Oct 22 15:23:52 2006 -0600 @@ -187,7 +187,7 @@ static struct sk_buff *netbk_copy_skb(st if (unlikely(!nskb)) goto err; - skb_reserve(nskb, 16); + skb_reserve(nskb, 16 + NET_IP_ALIGN); headlen = nskb->end - nskb->data; if (headlen > skb_headlen(skb)) headlen = skb_headlen(skb); @@ -1210,7 +1210,7 @@ static void net_tx_action(unsigned long ret < MAX_SKB_FRAGS) ? PKT_PROT_LEN : txreq.size; - skb = alloc_skb(data_len+16, GFP_ATOMIC); + skb = alloc_skb(data_len + 16 + NET_IP_ALIGN, GFP_ATOMIC); if (unlikely(skb == NULL)) { DPRINTK("Can't allocate a skb in start_xmit.\n"); netbk_tx_err(netif, &txreq, i); @@ -1218,7 +1218,7 @@ static void net_tx_action(unsigned long } /* Packets passed to netif_rx() must have some headroom. */ - skb_reserve(skb, 16); + skb_reserve(skb, 16 + NET_IP_ALIGN); if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { struct netif_extra_info *gso; diff -r d5a46e4cc340 -r 6492b9b27968 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Sun Oct 22 14:39:15 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Sun Oct 22 15:23:52 2006 -0600 @@ -684,7 +684,7 @@ static void network_alloc_rx_buffers(str * necessary here. * 16 bytes added as necessary headroom for netif_receive_skb. */ - skb = alloc_skb(RX_COPY_THRESHOLD + 16, + skb = alloc_skb(RX_COPY_THRESHOLD + 16 + NET_IP_ALIGN, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) goto no_skb; @@ -702,7 +702,7 @@ no_skb: break; } - skb_reserve(skb, 16); /* mimic dev_alloc_skb() */ + skb_reserve(skb, 16 + NET_IP_ALIGN); /* mimic dev_alloc_skb() */ skb_shinfo(skb)->frags[0].page = page; skb_shinfo(skb)->nr_frags = 1; __skb_queue_tail(&np->rx_batch, skb); @@ -2129,6 +2129,9 @@ module_init(netif_init); static void __exit netif_exit(void) { + if (is_initial_xendomain()) + return; + unregister_inetaddr_notifier(¬ifier_inetdev); return xenbus_unregister_driver(&netfront); diff -r d5a46e4cc340 -r 6492b9b27968 linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Sun Oct 22 14:39:15 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Sun Oct 22 15:23:52 2006 -0600 @@ -157,10 +157,12 @@ static void frontend_changed(struct xenb case XenbusStateClosing: be->instance = -1; - break; - - case XenbusStateUnknown: + xenbus_switch_state(dev, XenbusStateClosing); + break; + + case XenbusStateUnknown: /* keep it here */ case XenbusStateClosed: + xenbus_switch_state(dev, XenbusStateClosed); device_unregister(&be->dev->dev); tpmback_remove(dev); break; diff -r d5a46e4cc340 -r 6492b9b27968 tools/Makefile --- a/tools/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -60,7 +60,7 @@ check_clean: $(MAKE) -C check clean .PHONY: ioemu ioemuinstall ioemuclean -ifdef CONFIG_IOEMU +ifeq ($(CONFIG_IOEMU),y) export IOEMU_DIR ?= ioemu ioemu ioemuinstall: [ -f $(IOEMU_DIR)/config-host.mak ] || \ diff -r d5a46e4cc340 -r 6492b9b27968 tools/Rules.mk --- a/tools/Rules.mk Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/Rules.mk Sun Oct 22 15:23:52 2006 -0600 @@ -4,8 +4,6 @@ all: all: include $(XEN_ROOT)/Config.mk - -CONFIG_$(shell uname -s) := y XEN_XC = $(XEN_ROOT)/tools/python/xen/lowlevel/xc XEN_LIBXC = $(XEN_ROOT)/tools/libxc @@ -25,9 +23,9 @@ CFLAGS += -D__XEN_TOOLS__ %.o: %.cc $(CC) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< -OS = $(shell uname -s) +.PHONY: mk-symlinks mk-symlinks-xen mk-symlinks-$(XEN_OS) -.PHONY: mk-symlinks mk-symlinks-xen mk-symlinks-$(OS) +mk-symlinks-SunOS: mk-symlinks-Linux: LINUX_ROOT=$(XEN_ROOT)/linux-2.6-xen-sparse mk-symlinks-Linux: @@ -44,4 +42,4 @@ mk-symlinks-xen: mkdir -p xen/io ( cd xen/io && ln -sf ../../$(XEN_ROOT)/xen/include/public/io/*.h . ) -mk-symlinks: mk-symlinks-xen mk-symlinks-$(OS) +mk-symlinks: mk-symlinks-xen mk-symlinks-$(XEN_OS) diff -r d5a46e4cc340 -r 6492b9b27968 tools/blktap/drivers/Makefile --- a/tools/blktap/drivers/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/blktap/drivers/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -3,11 +3,9 @@ include $(XEN_ROOT)/tools/Rules.mk INCLUDES += -I.. -I../lib -INSTALL = install -INSTALL_PROG = $(INSTALL) -m0755 IBIN = blktapctrl tapdisk QCOW_UTIL = img2qcow qcow2raw qcow-create -INSTALL_DIR = /usr/sbin +INST_DIR = /usr/sbin LIBAIO_DIR = ../../libaio/src CFLAGS += -Werror @@ -58,7 +56,7 @@ img2qcow qcow2raw qcow-create: %: $(BLK- $(CC) $(CFLAGS) -o $* $(BLK-OBJS) $*.c $(AIOLIBS) $(LIBS) install: all - $(INSTALL_PROG) $(IBIN) $(QCOW_UTIL) $(DESTDIR)$(INSTALL_DIR) + $(INSTALL_PROG) $(IBIN) $(QCOW_UTIL) $(DESTDIR)$(INST_DIR) clean: rm -rf *.o *~ $(DEPS) xen TAGS $(IBIN) $(LIB) $(QCOW_UTIL) diff -r d5a46e4cc340 -r 6492b9b27968 tools/blktap/lib/Makefile --- a/tools/blktap/lib/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/blktap/lib/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -6,10 +6,6 @@ SONAME = libblktap.so.$(MAJOR) SONAME = libblktap.so.$(MAJOR) BLKTAP_INSTALL_DIR = /usr/sbin - -INSTALL = install -INSTALL_PROG = $(INSTALL) -m0755 -INSTALL_DIR = $(INSTALL) -d -m0755 INCLUDES += -I. -I.. -I $(XEN_LIBXC) -I $(XEN_XENSTORE) @@ -56,8 +52,8 @@ clean: rm -rf *.a *.so* *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS libblktap.a: $(OBJS) - $(CC) $(CFLAGS) -Wl,-soname -Wl,$(SONAME) -shared \ - -L$(XEN_XENSTORE) -l xenstore \ + $(CC) $(CFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,$(SONAME) $(SHLIB_CFLAGS) \ + -L$(XEN_XENSTORE) -l xenstore \ -o libblktap.so.$(MAJOR).$(MINOR) $^ $(LIBS) ln -sf libblktap.so.$(MAJOR).$(MINOR) libblktap.so.$(MAJOR) ln -sf libblktap.so.$(MAJOR) libblktap.so diff -r d5a46e4cc340 -r 6492b9b27968 tools/check/check_brctl --- a/tools/check/check_brctl Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/check/check_brctl Sun Oct 22 15:23:52 2006 -0600 @@ -1,10 +1,27 @@ -#!/bin/bash +#!/bin/sh # CHECK-INSTALL -function error { - echo - echo ' *** Check for the bridge control utils (brctl) FAILED' - exit 1 -} +RC=0 -which brctl 1>/dev/null 2>&1 || error +case ${OS} in +OpenBSD|NetBSD|FreeBSD) + # These systems have a bridge builtin + TOOL="brconfig" + which ${TOOL} 1>/dev/null 2>&1 || RC=1 + ;; +Linux) + TOOL="brctl" + which ${TOOL} 1>/dev/null 2>&1 || RC=1 + ;; +*) + TOOL="" + echo "Unknown OS" && RC=1 + ;; +esac + +if test ${RC} -ne 0; then + echo + echo " *** Check for the bridge control utils (${TOOL}) FAILED" +fi + +exit ${RC} diff -r d5a46e4cc340 -r 6492b9b27968 tools/check/check_iproute --- a/tools/check/check_iproute Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/check/check_iproute Sun Oct 22 15:23:52 2006 -0600 @@ -1,11 +1,26 @@ -#!/bin/bash +#!/bin/sh # CHECK-INSTALL -function error { - echo - echo ' *** Check for iproute (ip addr) FAILED' - exit 1 -} +RC=0 -ip addr list 1>/dev/null 2>&1 || error +case ${OS} in +OpenBSD|NetBSD|FreeBSD) + TOOL="ifconfig" + eval ${TOOL} -a 1>/dev/null 2>&1 || RC=1 + ;; +Linux) + TOOL="ip addr" + eval ${TOOL} list 1>/dev/null 2>&1 || RC=1 + ;; +*) + TOOL="" + echo "Unknown OS" && RC=1 + ;; +esac +if test ${RC} -ne 0; then + echo + echo " *** Check for iproute (${TOOL}) FAILED" +fi + +exit ${RC} diff -r d5a46e4cc340 -r 6492b9b27968 tools/check/check_python --- a/tools/check/check_python Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/check/check_python Sun Oct 22 15:23:52 2006 -0600 @@ -1,10 +1,13 @@ -#!/bin/bash +#!/bin/sh # CHECK-BUILD CHECK-INSTALL -function error { - echo - echo " *** Check for Python version >= 2.2 FAILED" - exit 1 -} +RC=0 -python -V 2>&1 | cut -d ' ' -f 2 | grep -q '^2.[2345]' || error +python -V 2>&1 | cut -d ' ' -f 2 | grep -q '^2.[2345]' || RC=1 + +if test ${RC} -ne 0; then + echo + echo " *** Check for Python version >= 2.2 FAILED" +fi + +exit ${RC} diff -r d5a46e4cc340 -r 6492b9b27968 tools/check/check_zlib_devel --- a/tools/check/check_zlib_devel Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/check/check_zlib_devel Sun Oct 22 15:23:52 2006 -0600 @@ -1,11 +1,14 @@ -#!/bin/bash +#!/bin/sh # CHECK-BUILD -function error { - echo - echo " *** Check for zlib headers FAILED" - exit 1 -} +RC=0 set -e -[ -e /usr/include/zlib.h ] || error +test -r /usr/include/zlib.h || RC=1 + +if test ${RC} -ne 0; then + echo + echo " *** Check for zlib headers FAILED" +fi + +exit ${RC} diff -r d5a46e4cc340 -r 6492b9b27968 tools/check/check_zlib_lib --- a/tools/check/check_zlib_lib Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/check/check_zlib_lib Sun Oct 22 15:23:52 2006 -0600 @@ -1,11 +1,14 @@ -#!/bin/bash +#!/bin/sh # CHECK-BUILD CHECK-INSTALL -function error { - echo - echo " *** Check for zlib library FAILED" - exit 1 -} +RC=0 set -e -ldconfig -p | grep -q libz.so || error +ldconfig -v 2>&1 | grep -q libz.so || RC=1 + +if test ${RC} -ne 0; then + echo + echo " *** Check for zlib library FAILED" +fi + +exit ${RC} diff -r d5a46e4cc340 -r 6492b9b27968 tools/check/chk --- a/tools/check/chk Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/check/chk Sun Oct 22 15:23:52 2006 -0600 @@ -1,8 +1,9 @@ -#!/bin/bash +#!/bin/sh -function usage { +func_usage () +{ echo "Usage:" - echo "\t$0 [build|install|clean]" + echo " $0 [build|install|clean]" echo echo "Check suitability for Xen build or install." echo "Exit with 0 if OK, 1 if not." @@ -12,7 +13,13 @@ function usage { exit 1 } -export PATH=${PATH}:/sbin:/usr/sbin +PATH=${PATH}:/sbin:/usr/sbin +OS=`uname -s` +export PATH OS + +if test "${OS}" = "SunOS"; then + exit 0 +fi case $1 in build) @@ -25,7 +32,7 @@ case $1 in exit 0 ;; *) - usage + func_usage ;; esac @@ -54,4 +61,4 @@ for f in check_* ; do fi done -exit $failed +exit ${failed} diff -r d5a46e4cc340 -r 6492b9b27968 tools/console/Makefile --- a/tools/console/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/console/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -4,10 +4,6 @@ include $(XEN_ROOT)/tools/Rules.mk DAEMON_INSTALL_DIR = /usr/sbin CLIENT_INSTALL_DIR = /usr/$(LIBDIR)/xen/bin - -INSTALL = install -INSTALL_PROG = $(INSTALL) -m0755 -INSTALL_DIR = $(INSTALL) -d -m0755 CFLAGS += -Werror -g @@ -26,11 +22,11 @@ clean: xenconsoled: $(patsubst %.c,%.o,$(wildcard daemon/*.c)) $(CC) $(CFLAGS) $^ -o $@ -L$(XEN_LIBXC) -L$(XEN_XENSTORE) \ - -lxenctrl -lxenstore + $(SOCKET_LIBS) -lxenctrl -lxenstore xenconsole: $(patsubst %.c,%.o,$(wildcard client/*.c)) $(CC) $(CFLAGS) $^ -o $@ -L$(XEN_LIBXC) -L$(XEN_XENSTORE) \ - -lxenctrl -lxenstore + $(SOCKET_LIBS) -lxenctrl -lxenstore .PHONY: install install: $(BIN) diff -r d5a46e4cc340 -r 6492b9b27968 tools/console/daemon/io.c --- a/tools/console/daemon/io.c Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/console/daemon/io.c Sun Oct 22 15:23:52 2006 -0600 @@ -147,7 +147,7 @@ static int domain_create_tty(struct doma int master; bool success; - if ((master = getpt()) == -1 || + if ((master = open("/dev/ptmx",O_RDWR|O_NOCTTY)) == -1 || grantpt(master) == -1 || unlockpt(master) == -1) { dolog(LOG_ERR, "Failed to create tty for domain-%d", dom->domid); diff -r d5a46e4cc340 -r 6492b9b27968 tools/console/daemon/utils.c --- a/tools/console/daemon/utils.c Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/console/daemon/utils.c Sun Oct 22 15:23:52 2006 -0600 @@ -95,7 +95,7 @@ void daemonize(const char *pidfile) exit(1); } - len = sprintf(buf, "%d\n", getpid()); + len = sprintf(buf, "%ld\n", (long)getpid()); if (write(fd, buf, len) < 0) exit(1); diff -r d5a46e4cc340 -r 6492b9b27968 tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c --- a/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c Sun Oct 22 15:23:52 2006 -0600 @@ -36,8 +36,6 @@ #include <unistd.h> #include <errno.h> #include <xenctrl.h> -#include <thread_db.h> -#include <xc_ptrace.h> #define TRACE_ENTER /* printf("enter %s\n", __FUNCTION__) */ diff -r d5a46e4cc340 -r 6492b9b27968 tools/examples/Makefile --- a/tools/examples/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/examples/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -1,10 +1,5 @@ XEN_ROOT = ../../ XEN_ROOT = ../../ include $(XEN_ROOT)/tools/Rules.mk - -INSTALL = install -INSTALL_DIR = $(INSTALL) -d -m0755 -INSTALL_PROG = $(INSTALL) -m0755 -INSTALL_DATA = $(INSTALL) -m0644 # Init scripts. XEND_INITD = init.d/xend diff -r d5a46e4cc340 -r 6492b9b27968 tools/examples/vtpm-common.sh --- a/tools/examples/vtpm-common.sh Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/examples/vtpm-common.sh Sun Oct 22 15:23:52 2006 -0600 @@ -226,7 +226,7 @@ function vtpmdb_remove_entry () { # Returns 'resume' or 'create' function vtpm_get_create_reason () { local resume - resume=$(xenstore-read $XENBUS_PATH/resume) + resume=$(xenstore_read $XENBUS_PATH/resume) if [ "$resume" == "True" ]; then echo "resume" else @@ -287,6 +287,8 @@ function vtpm_create_instance () { #entry is kept in the VTPMDB file. function vtpm_remove_instance () { local instance reason domname + #Stop script execution quietly if path does not exist (anymore) + xenstore-exists "$XENBUS_PATH"/domain domname=$(xenstore_read "$XENBUS_PATH"/domain) if [ "$domname" != "" ]; then @@ -383,7 +385,7 @@ function vtpm_domid_from_name () { local id name ids ids=$(xenstore-list /local/domain) for id in $ids; do - name=$(xenstore-read /local/domain/$id/name) + name=$(xenstore_read /local/domain/$id/name) if [ "$name" == "$1" ]; then echo "$id" return diff -r d5a46e4cc340 -r 6492b9b27968 tools/examples/xend-config.sxp --- a/tools/examples/xend-config.sxp Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/examples/xend-config.sxp Sun Oct 22 15:23:52 2006 -0600 @@ -135,3 +135,7 @@ # to 127.0.0.1 To restore old 'listen everywhere' behaviour # set this to 0.0.0.0 #(vnc-listen '127.0.0.1') + +# The default password for VNC console on HVM domain. +# Empty string is no authentication. +(vncpasswd '') diff -r d5a46e4cc340 -r 6492b9b27968 tools/examples/xmexample.hvm --- a/tools/examples/xmexample.hvm Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/examples/xmexample.hvm Sun Oct 22 15:23:52 2006 -0600 @@ -150,6 +150,11 @@ vnc=1 #vncconsole=0 #---------------------------------------------------------------------------- +# set password for domain's VNC console +# default is depents on vncpasswd in xend-config.sxp +vncpasswd='' + +#---------------------------------------------------------------------------- # no graphics, use serial port #nographic=0 diff -r d5a46e4cc340 -r 6492b9b27968 tools/examples/xmexample.vti --- a/tools/examples/xmexample.vti Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/examples/xmexample.vti Sun Oct 22 15:23:52 2006 -0600 @@ -95,6 +95,11 @@ vnc=0 #vncconsole=0 #---------------------------------------------------------------------------- +# set password for domain's VNC console +# default is depents on vncpasswd in xend-config.sxp +vncpasswd='' + +#---------------------------------------------------------------------------- # no graphics, use serial port #nographic=0 diff -r d5a46e4cc340 -r 6492b9b27968 tools/firmware/Makefile --- a/tools/firmware/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/firmware/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -4,7 +4,7 @@ include $(XEN_ROOT)/tools/Rules.mk # hvmloader is a 32-bit protected mode binary. # It belongs in /usr/lib, not /usr/lib64. TARGET := hvmloader/hvmloader -INSTALL_DIR := $(DESTDIR)/usr/lib/xen/boot +INST_DIR := $(DESTDIR)/usr/lib/xen/boot SUBDIRS := SUBDIRS += rombios @@ -29,8 +29,8 @@ all: .PHONY: install install: all - [ -d $(INSTALL_DIR) ] || install -d -m0755 $(INSTALL_DIR) - [ ! -e $(TARGET) ] || install -m0644 $(TARGET) $(INSTALL_DIR) + [ -d $(INST_DIR) ] || $(INSTALL_DIR) $(INST_DIR) + [ ! -e $(TARGET) ] || $(INSTALL_DATA) $(TARGET) $(INST_DIR) .PHONY: clean clean: diff -r d5a46e4cc340 -r 6492b9b27968 tools/firmware/acpi/Makefile --- a/tools/firmware/acpi/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/firmware/acpi/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -53,7 +53,7 @@ iasl: wget $(IASL_URL) tar xzf $(IASL_VER).tar.gz make -C $(IASL_VER)/compiler - install $(IASL_VER)/compiler/iasl /usr/bin/iasl + $(INSTALL_PROG) $(IASL_VER)/compiler/iasl /usr/bin/iasl $(ACPI_GEN):$(C_SRC) $(H_SRC) acpi_dsdt.c $(HOSTCC) -o $(ACPI_GEN) $(HOSTCFLAGS) $(shell ls *.c) diff -r d5a46e4cc340 -r 6492b9b27968 tools/firmware/hvmloader/Makefile --- a/tools/firmware/hvmloader/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/firmware/hvmloader/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -32,13 +32,13 @@ XENINC =-I$(XEN_ROOT)/tools/libxc XENINC =-I$(XEN_ROOT)/tools/libxc # Disable PIE/SSP if GCC supports them. They can break us. -CFLAGS += $(call test-gcc-flag,$(CC),-nopie) -CFLAGS += $(call test-gcc-flag,$(CC),-fno-stack-protector) -CFLAGS += $(call test-gcc-flag,$(CC),-fno-stack-protector-all) +CFLAGS += $(call cc-option,$(CC),-nopie,) +CFLAGS += $(call cc-option,$(CC),-fno-stack-protector,) +CFLAGS += $(call cc-option,$(CC),-fno-stack-protector-all,) OBJCOPY = objcopy CFLAGS += $(DEFINES) -I. $(XENINC) -fno-builtin -O2 -msoft-float -LDFLAGS = -m32 -nostdlib -Wl,-N -Wl,-Ttext -Wl,$(LOADADDR) +LDFLAGS = -nostdlib -Wl,-N -Wl,-Ttext -Wl,$(LOADADDR) SRCS = hvmloader.c acpi_madt.c mp_tables.c util.c smbios.c OBJS = $(patsubst %.c,%.o,$(SRCS)) @@ -48,7 +48,7 @@ all: hvmloader hvmloader: roms.h $(SRCS) $(CC) $(CFLAGS) -c $(SRCS) - $(CC) $(LDFLAGS) -o hvmloader.tmp $(OBJS) + $(CC) $(CFLAGS) $(LDFLAGS) -o hvmloader.tmp $(OBJS) $(OBJCOPY) hvmloader.tmp hvmloader rm -f hvmloader.tmp diff -r d5a46e4cc340 -r 6492b9b27968 tools/firmware/vmxassist/Makefile --- a/tools/firmware/vmxassist/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/firmware/vmxassist/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -32,14 +32,13 @@ XENINC=-I$(XEN_ROOT)/tools/libxc XENINC=-I$(XEN_ROOT)/tools/libxc # Disable PIE/SSP if GCC supports them. They can break us. -CFLAGS += $(call test-gcc-flag,$(CC),-nopie) -CFLAGS += $(call test-gcc-flag,$(CC),-fno-stack-protector) -CFLAGS += $(call test-gcc-flag,$(CC),-fno-stack-protector-all) +CFLAGS += $(call cc-option,$(CC),-nopie,) +CFLAGS += $(call cc-option,$(CC),-fno-stack-protector,) +CFLAGS += $(call cc-option,$(CC),-fno-stack-protector-all,) CPP = cpp -P OBJCOPY = objcopy -p -O binary -R .note -R .comment -R .bss -S --gap-fill=0 CFLAGS += $(DEFINES) -I. $(XENINC) -fno-builtin -O2 -msoft-float -LDFLAGS = -m elf_i386 OBJECTS = head.o trap.o vm86.o setup.o util.o @@ -48,7 +47,7 @@ all: vmxassist.bin vmxassist.bin: vmxassist.ld $(OBJECTS) $(CPP) $(DEFINES) vmxassist.ld > vmxassist.tmp - $(LD) -o vmxassist $(LDFLAGS) -nostdlib --fatal-warnings -N -T vmxassist.tmp $(OBJECTS) + $(LD) -o vmxassist $(LDFLAGS_DIRECT) -nostdlib --fatal-warnings -N -T vmxassist.tmp $(OBJECTS) nm -n vmxassist > vmxassist.sym $(OBJCOPY) vmxassist vmxassist.tmp dd if=vmxassist.tmp of=vmxassist.bin ibs=512 conv=sync diff -r d5a46e4cc340 -r 6492b9b27968 tools/firmware/vmxassist/head.S --- a/tools/firmware/vmxassist/head.S Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/firmware/vmxassist/head.S Sun Oct 22 15:23:52 2006 -0600 @@ -59,7 +59,7 @@ _start16: /* go to protected mode */ movl %cr0, %eax - orl $CR0_PE, %eax + orl $(CR0_PE), %eax movl %eax, %cr0 data32 ljmp $0x08, $1f diff -r d5a46e4cc340 -r 6492b9b27968 tools/firmware/vmxassist/trap.S --- a/tools/firmware/vmxassist/trap.S Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/firmware/vmxassist/trap.S Sun Oct 22 15:23:52 2006 -0600 @@ -106,7 +106,7 @@ common_trap: /* common trap handler * pushl %es pushal - movl $DATA_SELECTOR, %eax /* make sure these are sane */ + movl $(DATA_SELECTOR), %eax /* make sure these are sane */ movl %eax, %ds movl %eax, %es movl %eax, %fs diff -r d5a46e4cc340 -r 6492b9b27968 tools/firmware/vmxassist/util.c --- a/tools/firmware/vmxassist/util.c Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/firmware/vmxassist/util.c Sun Oct 22 15:23:52 2006 -0600 @@ -29,6 +29,31 @@ static char *printnum(char *, unsigned l static char *printnum(char *, unsigned long, int); static void _doprint(void (*)(int), char const *, va_list); +void +cpuid_addr_value(uint64_t addr, uint64_t *value) +{ + uint32_t addr_low = (uint32_t)addr; + uint32_t addr_high = (uint32_t)(addr >> 32); + uint32_t value_low, value_high; + static unsigned int addr_leaf; + + if (!addr_leaf) { + unsigned int eax, ebx, ecx, edx; + __asm__ __volatile__( + "cpuid" + : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) + : "0" (0x40000000)); + addr_leaf = eax + 1; + } + + __asm__ __volatile__( + "cpuid" + : "=c" (value_low), "=d" (value_high) + : "a" (addr_leaf), "0" (addr_low), "1" (addr_high) + : "ebx"); + + *value = (uint64_t)value_high << 32 | value_low; +} void dump_regs(struct regs *regs) diff -r d5a46e4cc340 -r 6492b9b27968 tools/firmware/vmxassist/util.h --- a/tools/firmware/vmxassist/util.h Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/firmware/vmxassist/util.h Sun Oct 22 15:23:52 2006 -0600 @@ -31,6 +31,7 @@ struct vmx_assist_context; +extern void cpuid_addr_value(uint64_t addr, uint64_t *value); extern void hexdump(unsigned char *, int); extern void dump_regs(struct regs *); extern void dump_vmx_context(struct vmx_assist_context *); diff -r d5a46e4cc340 -r 6492b9b27968 tools/firmware/vmxassist/vm86.c --- a/tools/firmware/vmxassist/vm86.c Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/firmware/vmxassist/vm86.c Sun Oct 22 15:23:52 2006 -0600 @@ -56,8 +56,8 @@ static char *rnames[] = { "ax", "cx", "d #define PT_ENTRY_PRESENT 0x1 /* We only support access to <=4G physical memory due to 1:1 mapping */ -static unsigned -guest_linear_to_real(uint32_t base) +static uint64_t +guest_linear_to_phys(uint32_t base) { uint32_t gcr3 = oldctx.cr3; uint64_t l2_mfn; @@ -89,23 +89,32 @@ guest_linear_to_real(uint32_t base) l2_mfn = ((uint64_t *)(long)gcr3)[(base >> 30) & 0x3]; if (!(l2_mfn & PT_ENTRY_PRESENT)) panic("l3 entry not present\n"); - l2_mfn &= 0x3fffff000ULL; - - l1_mfn = ((uint64_t *)(long)l2_mfn)[(base >> 21) & 0x1ff]; + l2_mfn &= 0xffffff000ULL; + + if (l2_mfn & 0xf00000000ULL) { + printf("l2 page above 4G\n"); + cpuid_addr_value(l2_mfn + 8 * ((base >> 21) & 0x1ff), &l1_mfn); + } else + l1_mfn = ((uint64_t *)(long)l2_mfn)[(base >> 21) & 0x1ff]; if (!(l1_mfn & PT_ENTRY_PRESENT)) panic("l2 entry not present\n"); if (l1_mfn & PDE_PS) { /* CR4.PSE is ignored in PAE mode */ - l0_mfn = l1_mfn & 0x3ffe00000ULL; + l0_mfn = l1_mfn & 0xfffe00000ULL; return l0_mfn + (base & 0x1fffff); } - l1_mfn &= 0x3fffff000ULL; - - l0_mfn = ((uint64_t *)(long)l1_mfn)[(base >> 12) & 0x1ff]; + l1_mfn &= 0xffffff000ULL; + + if (l1_mfn & 0xf00000000ULL) { + printf("l1 page above 4G\n"); + cpuid_addr_value(l1_mfn + 8 * ((base >> 12) & 0x1ff), &l0_mfn); + } else + l0_mfn = ((uint64_t *)(long)l1_mfn)[(base >> 12) & 0x1ff]; if (!(l0_mfn & PT_ENTRY_PRESENT)) panic("l1 entry not present\n"); - l0_mfn &= 0x3fffff000ULL; + + l0_mfn &= 0xffffff000ULL; return l0_mfn + (base & 0xfff); } @@ -114,6 +123,7 @@ static unsigned static unsigned address(struct regs *regs, unsigned seg, unsigned off) { + uint64_t gdt_phys_base; unsigned long long entry; unsigned seg_base, seg_limit; unsigned entry_low, entry_high; @@ -129,8 +139,13 @@ address(struct regs *regs, unsigned seg, (mode == VM86_REAL_TO_PROTECTED && regs->cs == seg)) return ((seg & 0xFFFF) << 4) + off; - entry = ((unsigned long long *) - guest_linear_to_real(oldctx.gdtr_base))[seg >> 3]; + gdt_phys_base = guest_linear_to_phys(oldctx.gdtr_base); + if (gdt_phys_base != (uint32_t)gdt_phys_base) { + printf("gdt base address above 4G\n"); + cpuid_addr_value(gdt_phys_base + 8 * (seg >> 3), &entry); + } else + entry = ((unsigned long long *)(long)gdt_phys_base)[seg >> 3]; + entry_high = entry >> 32; entry_low = entry & 0xFFFFFFFF; @@ -804,6 +819,7 @@ static int static int load_seg(unsigned long sel, uint32_t *base, uint32_t *limit, union vmcs_arbytes *arbytes) { + uint64_t gdt_phys_base; unsigned long long entry; /* protected mode: use seg as index into gdt */ @@ -815,8 +831,12 @@ load_seg(unsigned long sel, uint32_t *ba return 1; } - entry = ((unsigned long long *) - guest_linear_to_real(oldctx.gdtr_base))[sel >> 3]; + gdt_phys_base = guest_linear_to_phys(oldctx.gdtr_base); + if (gdt_phys_base != (uint32_t)gdt_phys_base) { + printf("gdt base address above 4G\n"); + cpuid_addr_value(gdt_phys_base + 8 * (sel >> 3), &entry); + } else + entry = ((unsigned long long *)(long)gdt_phys_base)[sel >> 3]; /* Check the P bit first */ if (!((entry >> (15+32)) & 0x1) && sel != 0) diff -r d5a46e4cc340 -r 6492b9b27968 tools/guest-headers/Makefile --- a/tools/guest-headers/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/guest-headers/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -1,5 +1,6 @@ +XEN_ROOT=../.. +include $(XEN_ROOT)/tools/Rules.mk -XEN_ROOT=../.. linuxsparsetree = $(XEN_ROOT)/linux-2.6-xen-sparse .PHONY: all @@ -8,10 +9,15 @@ all: .PHONY: check check: -.PHONY: install -install: +.PHONY: install install-Linux install-SunOS + +install-Linux: mkdir -p $(DESTDIR)/usr/include/xen/linux install -m0644 $(linuxsparsetree)/include/xen/public/*.h $(DESTDIR)/usr/include/xen/linux +install-SunOS: + +install: install-$(XEN_OS) + .PHONY: clean clean: diff -r d5a46e4cc340 -r 6492b9b27968 tools/ioemu/Makefile.target --- a/tools/ioemu/Makefile.target Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/ioemu/Makefile.target Sun Oct 22 15:23:52 2006 -0600 @@ -23,7 +23,7 @@ DEFINES+=-I$(SRC_PATH)/linux-user -I$(SR DEFINES+=-I$(SRC_PATH)/linux-user -I$(SRC_PATH)/linux-user/$(TARGET_ARCH) endif CFLAGS+=-Wall -O2 -g -fno-strict-aliasing -SSE2 := $(call test-gcc-flag,$(CC),-msse2) +SSE2 := $(call cc-option,$(CC),-msse2,) ifeq ($(SSE2),-msse2) CFLAGS += -DUSE_SSE2=1 -msse2 endif @@ -294,7 +294,11 @@ endif endif # qemu-dm objects +ifeq ($(ARCH),ia64) LIBOBJS=helper2.o exec-dm.o i8259-dm.o +else +LIBOBJS=helper2.o exec-dm.o i8259-dm.o rtc-dm.o +endif all: $(PROGS) @@ -354,7 +358,11 @@ ifeq ($(TARGET_BASE_ARCH), i386) ifeq ($(TARGET_BASE_ARCH), i386) # Hardware support VL_OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o $(AUDIODRV) +ifeq ($(ARCH),ia64) VL_OBJS+= fdc.o mc146818rtc.o serial.o pc.o +else +VL_OBJS+= fdc.o serial.o pc.o +endif VL_OBJS+= cirrus_vga.o mixeng.o parallel.o acpi.o piix_pci.o VL_OBJS+= usb-uhci.o VL_OBJS+= piix4acpi.o @@ -398,6 +406,7 @@ VL_OBJS+=sdl.o VL_OBJS+=sdl.o endif VL_OBJS+=vnc.o +VL_OBJS+=d3des.o ifdef CONFIG_COCOA VL_OBJS+=cocoa.o COCOA_LIBS=-F/System/Library/Frameworks -framework Cocoa -framework IOKit @@ -456,6 +465,9 @@ sdl.o: sdl.c keymaps.c sdl_keysym.h $(CC) $(CFLAGS) $(DEFINES) $(SDL_CFLAGS) -c -o $@ $< vnc.o: vnc.c keymaps.c sdl_keysym.h vnchextile.h + $(CC) $(CFLAGS) $(DEFINES) -c -o $@ $< + +d3des.o: d3des.c d3des.h $(CC) $(CFLAGS) $(DEFINES) -c -o $@ $< sdlaudio.o: sdlaudio.c @@ -555,10 +567,10 @@ install: all install: all mkdir -p "$(DESTDIR)$(bindir)" "$(DESTDIR)$(configdir)" ifneq ($(PROGS),) - $(INSTALL) -m 755 -s $(PROGS) "$(DESTDIR)$(bindir)" -endif - install -m 755 $(TARGET_PATH)/qemu-dm.debug "$(DESTDIR)$(bindir)" - install -m 755 $(TARGET_PATH)/qemu-ifup "$(DESTDIR)$(configdir)" + $(INSTALL_PROG) $(PROGS) "$(DESTDIR)$(bindir)" +endif + $(INSTALL_PROG) $(TARGET_PATH)/qemu-dm.debug "$(DESTDIR)$(bindir)" + $(INSTALL_PROG) $(TARGET_PATH)/qemu-ifup "$(DESTDIR)$(configdir)" ifneq ($(wildcard .depend),) include .depend diff -r d5a46e4cc340 -r 6492b9b27968 tools/ioemu/hw/ide.c --- a/tools/ioemu/hw/ide.c Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/ioemu/hw/ide.c Sun Oct 22 15:23:52 2006 -0600 @@ -557,9 +557,9 @@ static void ide_atapi_identify(IDEState padstr((uint8_t *)(p + 23), QEMU_VERSION, 8); /* firmware version */ padstr((uint8_t *)(p + 27), "QEMU CD-ROM", 40); /* model */ put_le16(p + 48, 1); /* dword I/O (XXX: should not be set on CDROM) */ - put_le16(p + 49, 1 << 9); /* LBA supported, no DMA */ + put_le16(p + 49, (1 << 11) | (1 << 9) | (1 << 8)); /* DMA and LBA supported */ put_le16(p + 53, 3); /* words 64-70, 54-58 valid */ - put_le16(p + 63, 0x103); /* DMA modes XXX: may be incorrect */ + put_le16(p + 63, 0x07); /* mdma0-2 supported */ put_le16(p + 64, 1); /* PIO modes */ put_le16(p + 65, 0xb4); /* minimum DMA multiword tx cycle time */ put_le16(p + 66, 0xb4); /* recommended DMA multiword tx cycle time */ diff -r d5a46e4cc340 -r 6492b9b27968 tools/ioemu/hw/xen_platform.c --- a/tools/ioemu/hw/xen_platform.c Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/ioemu/hw/xen_platform.c Sun Oct 22 15:23:52 2006 -0600 @@ -97,7 +97,8 @@ struct pci_config_header { uint8_t bist; /* Built in self test */ uint32_t base_address_regs[6]; uint32_t reserved1; - uint32_t reserved2; + uint16_t subsystem_vendor_id; + uint16_t subsystem_id; uint32_t rom_addr; uint32_t reserved3; uint32_t reserved4; @@ -126,6 +127,11 @@ void pci_xen_platform_init(PCIBus *bus) pch->header_type = 0; pch->interrupt_pin = 1; + /* Microsoft WHQL requires non-zero subsystem IDs. */ + /* http://www.pcisig.com/reflector/msg02205.html. */ + pch->subsystem_vendor_id = pch->vendor_id; /* Duplicate vendor id. */ + pch->subsystem_id = 0x0001; /* Hardcode sub-id as 1. */ + pci_register_io_region(d, 0, 0x100, PCI_ADDRESS_SPACE_IO, platform_ioport_map); diff -r d5a46e4cc340 -r 6492b9b27968 tools/ioemu/vl.c --- a/tools/ioemu/vl.c Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/ioemu/vl.c Sun Oct 22 15:23:52 2006 -0600 @@ -170,6 +170,9 @@ time_t timeoffset = 0; char domain_name[1024] = { 'H','V', 'M', 'X', 'E', 'N', '-'}; extern int domid; + +char vncpasswd[64]; +unsigned char challenge[AUTHCHALLENGESIZE]; /***********************************************************/ /* x86 ISA bus support */ @@ -3028,7 +3031,7 @@ void net_slirp_smb(const char *exported_ } /* XXX: better tmp dir construction */ - snprintf(smb_dir, sizeof(smb_dir), "/tmp/qemu-smb.%d", getpid()); + snprintf(smb_dir, sizeof(smb_dir), "/tmp/qemu-smb.%ld", (long)getpid()); if (mkdir(smb_dir, 0700) < 0) { fprintf(stderr, "qemu: could not create samba server dir '%s'\n", smb_dir); exit(1); @@ -3995,7 +3998,7 @@ static void create_pidfile(const char *f perror("Opening pidfile"); exit(1); } - fprintf(f, "%d\n", getpid()); + fprintf(f, "%ld\n", (long)getpid()); fclose(f); pid_filename = qemu_strdup(filename); if (!pid_filename) { @@ -5911,6 +5914,7 @@ int main(int argc, char **argv) vncunused = 0; kernel_filename = NULL; kernel_cmdline = ""; + *vncpasswd = '\0'; #ifndef CONFIG_DM #ifdef TARGET_PPC cdrom_index = 1; @@ -5942,7 +5946,7 @@ int main(int argc, char **argv) memset(&vnclisten_addr.sin_addr, 0, sizeof(vnclisten_addr.sin_addr)); /* init debug */ - sprintf(qemu_dm_logfilename, "/var/log/xen/qemu-dm.%d.log", getpid()); + sprintf(qemu_dm_logfilename, "/var/log/xen/qemu-dm.%ld.log", (long)getpid()); cpu_set_log_filename(qemu_dm_logfilename); cpu_set_log(0); @@ -6559,6 +6563,10 @@ int main(int argc, char **argv) init_ioports(); + /* read vncpasswd from xenstore */ + if (0 > xenstore_read_vncpasswd(domid)) + exit(1); + /* terminal init */ if (nographic) { dumb_display_init(ds); diff -r d5a46e4cc340 -r 6492b9b27968 tools/ioemu/vl.h --- a/tools/ioemu/vl.h Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/ioemu/vl.h Sun Oct 22 15:23:52 2006 -0600 @@ -1211,6 +1211,7 @@ void xenstore_process_event(void *opaque void xenstore_process_event(void *opaque); void xenstore_check_new_media_present(int timeout); void xenstore_write_vncport(int vnc_display); +int xenstore_read_vncpasswd(int domid); /* xen_platform.c */ void pci_xen_platform_init(PCIBus *bus); @@ -1222,4 +1223,7 @@ extern char domain_name[]; void destroy_hvm_domain(void); +/* VNC Authentication */ +#define AUTHCHALLENGESIZE 16 + #endif /* VL_H */ diff -r d5a46e4cc340 -r 6492b9b27968 tools/ioemu/vnc.c --- a/tools/ioemu/vnc.c Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/ioemu/vnc.c Sun Oct 22 15:23:52 2006 -0600 @@ -44,6 +44,7 @@ #include "vnc_keysym.h" #include "keymaps.c" +#include "d3des.h" #define XK_MISCELLANY #define XK_LATIN1 @@ -137,6 +138,9 @@ static void vnc_update_client(void *opaq static void vnc_update_client(void *opaque); static void vnc_client_read(void *opaque); static void framebuffer_set_updated(VncState *vs, int x, int y, int w, int h); +static int make_challenge(char *random, int size); +static void set_seed(unsigned int *seedp); +static void get_random(int len, unsigned char *buf); #if 0 static inline void vnc_set_bit(uint32_t *d, int k) @@ -1208,23 +1212,92 @@ static int protocol_client_init(VncState return 0; } +static int protocol_response(VncState *vs, char *client_response, size_t len) +{ + extern char vncpasswd[64]; + extern unsigned char challenge[AUTHCHALLENGESIZE]; + unsigned char cryptchallenge[AUTHCHALLENGESIZE]; + unsigned char key[8]; + int passwdlen, i, j; + + memcpy(cryptchallenge, challenge, AUTHCHALLENGESIZE); + + /* Calculate the sent challenge */ + passwdlen = strlen(vncpasswd); + for (i=0; i<8; i++) + key[i] = i<passwdlen ? vncpasswd[i] : 0; + deskey(key, EN0); + for (j = 0; j < AUTHCHALLENGESIZE; j += 8) + des(cryptchallenge+j, cryptchallenge+j); + + /* Check the actual response */ + if (memcmp(cryptchallenge, client_response, AUTHCHALLENGESIZE) != 0) { + /* password error */ + vnc_write_u32(vs, 1); + vnc_write_u32(vs, 22); + vnc_write(vs, "Authentication failure", 22); + vnc_flush(vs); + fprintf(stderr, "VNC Password error.\n"); + vnc_client_error(vs); + return 0; + } + + vnc_write_u32(vs, 0); + vnc_flush(vs); + + vnc_read_when(vs, protocol_client_init, 1); + + return 0; +} + static int protocol_version(VncState *vs, char *version, size_t len) { + extern char vncpasswd[64]; + extern unsigned char challenge[AUTHCHALLENGESIZE]; char local[13]; - int maj, min; + int support, maj, min; memcpy(local, version, 12); local[12] = 0; + /* protocol version check */ if (sscanf(local, "RFB %03d.%03d\n", &maj, &min) != 2) { + fprintf(stderr, "Protocol version error.\n"); vnc_client_error(vs); return 0; } - vnc_write_u32(vs, 1); /* None */ - vnc_flush(vs); - - vnc_read_when(vs, protocol_client_init, 1); + + support = 0; + if (maj = 3) { + if (min == 3 || min ==4) { + support = 1; + } + } + + if (! support) { + fprintf(stderr, "Client uses unsupported protocol version %d.%d.\n", + maj, min); + vnc_client_error(vs); + return 0; + } + + if (*vncpasswd == '\0') { + /* AuthType is None */ + vnc_write_u32(vs, 1); + vnc_flush(vs); + vnc_read_when(vs, protocol_client_init, 1); + } else { + /* AuthType is VncAuth */ + vnc_write_u32(vs, 2); + + /* Challenge-Responce authentication */ + /* Send Challenge */ + make_challenge(challenge, AUTHCHALLENGESIZE); + vnc_write(vs, challenge, AUTHCHALLENGESIZE); + vnc_flush(vs); + vnc_read_when(vs, protocol_response, AUTHCHALLENGESIZE); + } return 0; } @@ -1342,3 +1415,32 @@ int vnc_start_viewer(int port) return pid; } } + +unsigned int seed; + +static int make_challenge(char *random, int size) +{ + + set_seed(&seed); + get_random(size, random); + + return 0; +} + +static void set_seed(unsigned int *seedp) +{ + *seedp += (unsigned int)(time(NULL)+getpid()+getpid()*987654+rand()); + srand(*seedp); + + return; +} + +static void get_random(int len, unsigned char *buf) +{ + int i; + + for (i=0; i<len; i++) + buf[i] = (int) (256.0*rand()/(RAND_MAX+1.0)); + + return; +} diff -r d5a46e4cc340 -r 6492b9b27968 tools/ioemu/xenstore.c --- a/tools/ioemu/xenstore.c Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/ioemu/xenstore.c Sun Oct 22 15:23:52 2006 -0600 @@ -213,3 +213,54 @@ void xenstore_write_vncport(int display) free(portstr); free(buf); } + +int xenstore_read_vncpasswd(int domid) +{ + extern char vncpasswd[64]; + char *buf = NULL, *path, *uuid = NULL, *passwd = NULL; + unsigned int i, len, rc = 0; + + if (xsh == NULL) { + return -1; + } + + path = xs_get_domain_path(xsh, domid); + if (path == NULL) { + fprintf(logfile, "xs_get_domain_path() error. domid %d.\n", domid); + return -1; + } + + pasprintf(&buf, "%s/vm", path); + uuid = xs_read(xsh, XBT_NULL, buf, &len); + if (uuid == NULL) { + fprintf(logfile, "xs_read(): uuid get error. %s.\n", buf); + free(path); + return -1; + } + + pasprintf(&buf, "%s/vncpasswd", uuid); + passwd = xs_read(xsh, XBT_NULL, buf, &len); + if (passwd == NULL) { + fprintf(logfile, "xs_read(): vncpasswd get error. %s.\n", buf); + free(uuid); + free(path); + return rc; + } + + for (i=0; i<len && i<63; i++) { + vncpasswd[i] = passwd[i]; + passwd[i] = '\0'; + } + vncpasswd[len] = '\0'; + pasprintf(&buf, "%s/vncpasswd", uuid); + if (xs_write(xsh, XBT_NULL, buf, passwd, len) == 0) { + fprintf(logfile, "xs_write() vncpasswd failed.\n"); + rc = -1; + } + + free(passwd); + free(uuid); + free(path); + + return rc; +} diff -r d5a46e4cc340 -r 6492b9b27968 tools/libxc/Makefile --- a/tools/libxc/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/libxc/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -1,14 +1,8 @@ - -INSTALL = install -INSTALL_PROG = $(INSTALL) -m0755 -INSTALL_DATA = $(INSTALL) -m0644 -INSTALL_DIR = $(INSTALL) -d -m0755 +XEN_ROOT = ../.. +include $(XEN_ROOT)/tools/Rules.mk MAJOR = 3.0 MINOR = 0 - -XEN_ROOT = ../.. -include $(XEN_ROOT)/tools/Rules.mk CTRL_SRCS-y := CTRL_SRCS-y += xc_core.c @@ -21,8 +15,10 @@ CTRL_SRCS-y += xc_sedf.c CTRL_SRCS-y += xc_sedf.c CTRL_SRCS-y += xc_csched.c CTRL_SRCS-y += xc_tbuf.c -CTRL_SRCS-$(CONFIG_X86) += xc_ptrace.c xc_ptrace_core.c xc_pagetab.c +CTRL_SRCS-$(CONFIG_X86) += xc_pagetab.c CTRL_SRCS-$(CONFIG_Linux) += xc_linux.c +CTRL_SRCS-$(CONFIG_SunOS) += xc_solaris.c +CTRL_SRCS-$(CONFIG_X86_Linux) += xc_ptrace.c xc_ptrace_core.c GUEST_SRCS-y := GUEST_SRCS-y += xc_load_bin.c @@ -123,7 +119,7 @@ libxenctrl.so.$(MAJOR): libxenctrl.so.$( ln -sf $< $@ libxenctrl.so.$(MAJOR).$(MINOR): $(CTRL_PIC_OBJS) - $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-soname -Wl,libxenctrl.so.$(MAJOR) -shared -o $@ $^ + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,libxenctrl.so.$(MAJOR) $(SHLIB_CFLAGS) -o $@ $^ # libxenguest @@ -136,7 +132,7 @@ libxenguest.so.$(MAJOR): libxenguest.so. ln -sf $< $@ libxenguest.so.$(MAJOR).$(MINOR): $(GUEST_PIC_OBJS) libxenctrl.so - $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-soname -Wl,libxenguest.so.$(MAJOR) -shared -o $@ $^ -lz -lxenctrl + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,libxenguest.so.$(MAJOR) $(SHLIB_CFLAGS) -o $@ $(GUEST_PIC_OBJS) -lz -lxenctrl -include $(DEPS) diff -r d5a46e4cc340 -r 6492b9b27968 tools/libxc/ia64/xc_ia64_linux_restore.c --- a/tools/libxc/ia64/xc_ia64_linux_restore.c Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/libxc/ia64/xc_ia64_linux_restore.c Sun Oct 22 15:23:52 2006 -0600 @@ -44,11 +44,11 @@ read_page(int xc_handle, int io_fd, uint mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, pfn); if (mem == NULL) { - ERR("cannot map page"); + ERROR("cannot map page"); return -1; } if (!read_exact(io_fd, mem, PAGE_SIZE)) { - ERR("Error when reading from state file (5)"); + ERROR("Error when reading from state file (5)"); return -1; } munmap(mem, PAGE_SIZE); @@ -85,17 +85,17 @@ xc_linux_restore(int xc_handle, int io_f if (!read_exact(io_fd, &ver, sizeof(unsigned long))) { - ERR("Error when reading version"); + ERROR("Error when reading version"); goto out; } if (ver != 1) { - ERR("version of save doesn't match"); + ERROR("version of save doesn't match"); goto out; } if (mlock(&ctxt, sizeof(ctxt))) { /* needed for build domctl, but might as well do early */ - ERR("Unable to mlock ctxt"); + ERROR("Unable to mlock ctxt"); return 1; } @@ -103,7 +103,7 @@ xc_linux_restore(int xc_handle, int io_f domctl.cmd = XEN_DOMCTL_getdomaininfo; domctl.domain = (domid_t)dom; if (xc_domctl(xc_handle, &domctl) < 0) { - ERR("Could not get information on new domain"); + ERROR("Could not get information on new domain"); goto out; } shared_info_frame = domctl.u.getdomaininfo.shared_info_frame; @@ -115,7 +115,7 @@ xc_linux_restore(int xc_handle, int io_f if (xc_domain_memory_increase_reservation(xc_handle, dom, max_pfn, 0, 0, NULL) != 0) { - ERR("Failed to increase reservation by %ld KB", PFN_TO_KB(max_pfn)); + ERROR("Failed to increase reservation by %ld KB", PFN_TO_KB(max_pfn)); errno = ENOMEM; goto out; } @@ -123,7 +123,7 @@ xc_linux_restore(int xc_handle, int io_f DPRINTF("Increased domain reservation by %ld KB\n", PFN_TO_KB(max_pfn)); if (!read_exact(io_fd, &domctl.u.arch_setup, sizeof(domctl.u.arch_setup))) { - ERR("read: domain setup"); + ERROR("read: domain setup"); goto out; } @@ -141,13 +141,13 @@ xc_linux_restore(int xc_handle, int io_f /* Get pages. */ page_array = malloc(max_pfn * sizeof(unsigned long)); if (page_array == NULL ) { - ERR("Could not allocate memory"); + ERROR("Could not allocate memory"); goto out; } if (xc_ia64_get_pfn_list(xc_handle, dom, page_array, 0, max_pfn) != max_pfn) { - ERR("Could not get the page frame list"); + ERROR("Could not get the page frame list"); goto out; } @@ -155,7 +155,7 @@ xc_linux_restore(int xc_handle, int io_f while (1) { if (!read_exact(io_fd, &mfn, sizeof(unsigned long))) { - ERR("Error when reading batch size"); + ERROR("Error when reading batch size"); goto out; } if (mfn == INVALID_MFN) @@ -178,18 +178,18 @@ xc_linux_restore(int xc_handle, int io_f int rc; if (!read_exact(io_fd, &count, sizeof(count))) { - ERR("Error when reading pfn count"); + ERROR("Error when reading pfn count"); goto out; } pfntab = malloc(sizeof(unsigned long) * count); if (!pfntab) { - ERR("Out of memory"); + ERROR("Out of memory"); goto out; } if (!read_exact(io_fd, pfntab, sizeof(unsigned long)*count)) { - ERR("Error when reading pfntab"); + ERROR("Error when reading pfntab"); goto out; } @@ -211,7 +211,7 @@ xc_linux_restore(int xc_handle, int io_f rc = xc_memory_op(xc_handle, XENMEM_decrease_reservation, &reservation); if (rc != 1) { - ERR("Could not decrease reservation : %d", rc); + ERROR("Could not decrease reservation : %d", rc); goto out; } } @@ -221,7 +221,7 @@ xc_linux_restore(int xc_handle, int io_f if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) { - ERR("Error when reading ctxt"); + ERROR("Error when reading ctxt"); goto out; } @@ -234,7 +234,7 @@ xc_linux_restore(int xc_handle, int io_f domctl.u.vcpucontext.vcpu = 0; set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt); if (xc_domctl(xc_handle, &domctl) != 0) { - ERR("Couldn't set vcpu context"); + ERROR("Couldn't set vcpu context"); goto out; } @@ -245,19 +245,19 @@ xc_linux_restore(int xc_handle, int io_f domctl.u.vcpucontext.vcpu = 0; set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt); if (xc_domctl(xc_handle, &domctl) != 0) { - ERR("Couldn't set vcpu context"); + ERROR("Couldn't set vcpu context"); goto out; } /* Just a check. */ if (xc_vcpu_getcontext(xc_handle, dom, 0 /* XXX */, &ctxt)) { - ERR("Could not get vcpu context"); + ERROR("Could not get vcpu context"); goto out; } /* Then get privreg page. */ if (read_page(xc_handle, io_fd, dom, ctxt.privregs_pfn) < 0) { - ERR("Could not read vcpu privregs"); + ERROR("Could not read vcpu privregs"); goto out; } @@ -265,11 +265,11 @@ xc_linux_restore(int xc_handle, int io_f shared_info = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, shared_info_frame); if (shared_info == NULL) { - ERR("cannot map page"); + ERROR("cannot map page"); goto out; } if (!read_exact(io_fd, shared_info, PAGE_SIZE)) { - ERR("Error when reading shared_info page"); + ERROR("Error when reading shared_info page"); goto out; } diff -r d5a46e4cc340 -r 6492b9b27968 tools/libxc/ia64/xc_ia64_linux_save.c --- a/tools/libxc/ia64/xc_ia64_linux_save.c Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/libxc/ia64/xc_ia64_linux_save.c Sun Oct 22 15:23:52 2006 -0600 @@ -97,14 +97,14 @@ suspend_and_state(int (*suspend)(int), i int i = 0; if (!(*suspend)(dom)) { - ERR("Suspend request failed"); + ERROR("Suspend request failed"); return -1; } retry: if (xc_domain_getinfo(xc_handle, dom, 1, info) != 1) { - ERR("Could not get domain info"); + ERROR("Could not get domain info"); return -1; } @@ -115,7 +115,7 @@ retry: // try unpausing domain, wait, and retest xc_domain_unpause(xc_handle, dom); - ERR("Domain was paused. Wait and re-test."); + ERROR("Domain was paused. Wait and re-test."); usleep(10000); // 10ms goto retry; @@ -123,12 +123,12 @@ retry: if(++i < 100) { - ERR("Retry suspend domain."); + ERROR("Retry suspend domain."); usleep(10000); // 10ms goto retry; } - ERR("Unable to suspend domain."); + ERROR("Unable to suspend domain."); return -1; } @@ -191,7 +191,7 @@ xc_linux_save(int xc_handle, int io_fd, //initialize_mbit_rate(); if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) { - ERR("Could not get domain info"); + ERROR("Could not get domain info"); return 1; } @@ -200,7 +200,7 @@ xc_linux_save(int xc_handle, int io_fd, #if 0 /* cheesy sanity check */ if ((info.max_memkb >> (PAGE_SHIFT - 10)) > max_mfn) { - ERR("Invalid state record -- pfn count out of range: %lu", + ERROR("Invalid state record -- pfn count out of range: %lu", (info.max_memkb >> (PAGE_SHIFT - 10))); goto out; } @@ -210,7 +210,7 @@ xc_linux_save(int xc_handle, int io_fd, live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ, shared_info_frame); if (!live_shinfo) { - ERR("Couldn't map live_shinfo"); + ERROR("Couldn't map live_shinfo"); goto out; } @@ -218,13 +218,13 @@ xc_linux_save(int xc_handle, int io_fd, page_array = malloc(max_pfn * sizeof(unsigned long)); if (page_array == NULL) { - ERR("Could not allocate memory"); + ERROR("Could not allocate memory"); goto out; } /* This is expected by xm restore. */ if (!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) { - ERR("write: max_pfn"); + ERROR("write: max_pfn"); goto out; } @@ -237,7 +237,7 @@ xc_linux_save(int xc_handle, int io_fd, unsigned long version = 1; if (!write_exact(io_fd, &version, sizeof(unsigned long))) { - ERR("write: version"); + ERROR("write: version"); goto out; } } @@ -246,12 +246,12 @@ xc_linux_save(int xc_handle, int io_fd, domctl.domain = (domid_t)dom; domctl.u.arch_setup.flags = XEN_DOMAINSETUP_query; if (xc_domctl(xc_handle, &domctl) < 0) { - ERR("Could not get domain setup"); + ERROR("Could not get domain setup"); goto out; } if (!write_exact(io_fd, &domctl.u.arch_setup, sizeof(domctl.u.arch_setup))) { - ERR("write: domain setup"); + ERROR("write: domain setup"); goto out; } @@ -261,7 +261,7 @@ xc_linux_save(int xc_handle, int io_fd, if (xc_ia64_shadow_control(xc_handle, dom, XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY, NULL, 0, NULL ) < 0) { - ERR("Couldn't enable shadow mode"); + ERROR("Couldn't enable shadow mode"); goto out; } @@ -272,7 +272,7 @@ xc_linux_save(int xc_handle, int io_fd, to_skip = malloc(bitmap_size); if (!to_send || !to_skip) { - ERR("Couldn't allocate bitmap array"); + ERROR("Couldn't allocate bitmap array"); goto out; } @@ -280,11 +280,11 @@ xc_linux_save(int xc_handle, int io_fd, memset(to_send, 0xff, bitmap_size); if (mlock(to_send, bitmap_size)) { - ERR("Unable to mlock to_send"); + ERROR("Unable to mlock to_send"); goto out; } if (mlock(to_skip, bitmap_size)) { - ERR("Unable to mlock to_skip"); + ERROR("Unable to mlock to_skip"); goto out; } @@ -296,7 +296,7 @@ xc_linux_save(int xc_handle, int io_fd, last_iter = 1; if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info)) { - ERR("Domain appears not to have suspended"); + ERROR("Domain appears not to have suspended"); goto out; } @@ -315,7 +315,7 @@ xc_linux_save(int xc_handle, int io_fd, /* Get the pfn list, as it may change. */ if (xc_ia64_get_pfn_list(xc_handle, dom, page_array, 0, max_pfn) != max_pfn) { - ERR("Could not get the page frame list"); + ERROR("Could not get the page frame list"); goto out; } @@ -326,7 +326,7 @@ xc_linux_save(int xc_handle, int io_fd, if (xc_ia64_shadow_control(xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK, to_skip, max_pfn, NULL) != max_pfn) { - ERR("Error peeking shadow bitmap"); + ERROR("Error peeking shadow bitmap"); goto out; } } @@ -358,12 +358,12 @@ xc_linux_save(int xc_handle, int io_fd, } if (!write_exact(io_fd, &N, sizeof(N))) { - ERR("write: max_pfn"); + ERROR("write: max_pfn"); goto out; } if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) { - ERR("Error when writing to state file (5)"); + ERROR("Error when writing to state file (5)"); goto out; } munmap(mem, PAGE_SIZE); @@ -385,7 +385,7 @@ xc_linux_save(int xc_handle, int io_fd, last_iter = 1; if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info)) { - ERR("Domain appears not to have suspended"); + ERROR("Domain appears not to have suspended"); goto out; } } @@ -394,7 +394,7 @@ xc_linux_save(int xc_handle, int io_fd, if (xc_ia64_shadow_control(xc_handle, dom, XEN_DOMCTL_SHADOW_OP_CLEAN, to_send, max_pfn, NULL ) != max_pfn) { - ERR("Error flushing shadow PT"); + ERROR("Error flushing shadow PT"); goto out; } @@ -411,7 +411,7 @@ xc_linux_save(int xc_handle, int io_fd, { unsigned long pfn = INVALID_MFN; if (!write_exact(io_fd, &pfn, sizeof(pfn))) { - ERR("Error when writing to state file (6)"); + ERROR("Error when writing to state file (6)"); goto out; } } @@ -427,7 +427,7 @@ xc_linux_save(int xc_handle, int io_fd, } if (!write_exact(io_fd, &j, sizeof(unsigned int))) { - ERR("Error when writing to state file (6a)"); + ERROR("Error when writing to state file (6a)"); goto out; } @@ -439,7 +439,7 @@ xc_linux_save(int xc_handle, int io_fd, i++; if (j == 1024 || i == max_pfn) { if (!write_exact(io_fd, &pfntab, sizeof(unsigned long)*j)) { - ERR("Error when writing to state file (6b)"); + ERROR("Error when writing to state file (6b)"); goto out; } j = 0; @@ -449,12 +449,12 @@ xc_linux_save(int xc_handle, int io_fd, } if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) { - ERR("Could not get vcpu context"); + ERROR("Could not get vcpu context"); goto out; } if (!write_exact(io_fd, &ctxt, sizeof(ctxt))) { - ERR("Error when writing to state file (1)"); + ERROR("Error when writing to state file (1)"); goto out; } @@ -464,17 +464,17 @@ xc_linux_save(int xc_handle, int io_fd, mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, ctxt.privregs_pfn); if (mem == NULL) { - ERR("cannot map privreg page"); + ERROR("cannot map privreg page"); goto out; } if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) { - ERR("Error when writing privreg to state file (5)"); + ERROR("Error when writing privreg to state file (5)"); goto out; } munmap(mem, PAGE_SIZE); if (!write_exact(io_fd, live_shinfo, PAGE_SIZE)) { - ERR("Error when writing to state file (1)"); + ERROR("Error when writing to state file (1)"); goto out; } diff -r d5a46e4cc340 -r 6492b9b27968 tools/libxc/xc_hvm_build.c --- a/tools/libxc/xc_hvm_build.c Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/libxc/xc_hvm_build.c Sun Oct 22 15:23:52 2006 -0600 @@ -17,7 +17,6 @@ #include <xen/hvm/e820.h> #define HVM_LOADER_ENTR_ADDR 0x00100000 - static int parseelfimage( char *elfbase, unsigned long elfsize, struct domain_setup_info *dsi); @@ -82,15 +81,16 @@ static void build_e820map(void *e820_pag e820entry[nr_map].type = E820_IO; nr_map++; + e820entry[nr_map].addr = 0xEA000; + e820entry[nr_map].size = 0x01000; + e820entry[nr_map].type = E820_ACPI; + nr_map++; + e820entry[nr_map].addr = 0xF0000; e820entry[nr_map].size = 0x10000; e820entry[nr_map].type = E820_RESERVED; nr_map++; -/* ACPI data: 10 pages. */ -#define ACPI_DATA_PAGES 10 -/* ACPI NVS: 3 pages. */ -#define ACPI_NVS_PAGES 3 /* buffered io page. */ #define BUFFERED_IO_PAGES 1 /* xenstore page. */ @@ -102,36 +102,13 @@ static void build_e820map(void *e820_pag /* Most of the ram goes here */ e820entry[nr_map].addr = 0x100000; e820entry[nr_map].size = mem_size - 0x100000 - PAGE_SIZE * - (ACPI_DATA_PAGES + - ACPI_NVS_PAGES + - BUFFERED_IO_PAGES + + (BUFFERED_IO_PAGES + XENSTORE_PAGES + SHARED_IO_PAGES); e820entry[nr_map].type = E820_RAM; nr_map++; /* Statically allocated special pages */ - - /* For ACPI data */ - e820entry[nr_map].addr = mem_size - PAGE_SIZE * - (ACPI_DATA_PAGES + - ACPI_NVS_PAGES + - BUFFERED_IO_PAGES + - XENSTORE_PAGES + - SHARED_IO_PAGES); - e820entry[nr_map].size = PAGE_SIZE * ACPI_DATA_PAGES; - e820entry[nr_map].type = E820_ACPI; - nr_map++; - - /* For ACPI NVS */ - e820entry[nr_map].addr = mem_size - PAGE_SIZE * - (ACPI_NVS_PAGES + - BUFFERED_IO_PAGES + - XENSTORE_PAGES + - SHARED_IO_PAGES); - e820entry[nr_map].size = PAGE_SIZE * ACPI_NVS_PAGES; - e820entry[nr_map].type = E820_NVS; - nr_map++; /* For buffered IO requests */ e820entry[nr_map].addr = mem_size - PAGE_SIZE * diff -r d5a46e4cc340 -r 6492b9b27968 tools/libxc/xc_linux_restore.c --- a/tools/libxc/xc_linux_restore.c Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/libxc/xc_linux_restore.c Sun Oct 22 15:23:52 2006 -0600 @@ -79,7 +79,7 @@ int uncanonicalize_pagetable(unsigned lo if(pfn >= max_pfn) { /* This "page table page" is probably not one; bail. */ - ERR("Frame number in type %lu page table is out of range: " + ERROR("Frame number in type %lu page table is out of range: " "i=%d pfn=0x%lx max_pfn=%lu", type >> 28, i, pfn, max_pfn); return 0; @@ -158,24 +158,24 @@ int xc_linux_restore(int xc_handle, int if(!get_platform_info(xc_handle, dom, &max_mfn, &hvirt_start, &pt_levels)) { - ERR("Unable to get platform info."); + ERROR("Unable to get platform info."); return 1; } if (mlock(&ctxt, sizeof(ctxt))) { /* needed for build domctl, but might as well do early */ - ERR("Unable to mlock ctxt"); + ERROR("Unable to mlock ctxt"); return 1; } if (!(p2m_frame_list = malloc(P2M_FL_SIZE))) { - ERR("Couldn't allocate p2m_frame_list array"); + ERROR("Couldn't allocate p2m_frame_list array"); goto out; } /* Read first entry of P2M list, or extended-info signature (~0UL). */ if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) { - ERR("read extended-info signature failed"); + ERROR("read extended-info signature failed"); goto out; } @@ -184,7 +184,7 @@ int xc_linux_restore(int xc_handle, int /* Next 4 bytes: total size of following extended info. */ if (!read_exact(io_fd, &tot_bytes, sizeof(tot_bytes))) { - ERR("read extended-info size failed"); + ERROR("read extended-info size failed"); goto out; } @@ -195,7 +195,7 @@ int xc_linux_restore(int xc_handle, int /* 4-character chunk signature + 4-byte remaining chunk size. */ if (!read_exact(io_fd, chunk_sig, sizeof(chunk_sig)) || !read_exact(io_fd, &chunk_bytes, sizeof(chunk_bytes))) { - ERR("read extended-info chunk signature failed"); + ERROR("read extended-info chunk signature failed"); goto out; } tot_bytes -= 8; @@ -203,7 +203,7 @@ int xc_linux_restore(int xc_handle, int /* VCPU context structure? */ if (!strncmp(chunk_sig, "vcpu", 4)) { if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) { - ERR("read extended-info vcpu context failed"); + ERROR("read extended-info vcpu context failed"); goto out; } tot_bytes -= sizeof(struct vcpu_guest_context); @@ -219,7 +219,7 @@ int xc_linux_restore(int xc_handle, int if ( sz > P2M_FL_SIZE ) sz = P2M_FL_SIZE; if (!read_exact(io_fd, p2m_frame_list, sz)) { - ERR("read-and-discard extended-info chunk bytes failed"); + ERROR("read-and-discard extended-info chunk bytes failed"); goto out; } chunk_bytes -= sz; @@ -229,14 +229,14 @@ int xc_linux_restore(int xc_handle, int /* Now read the real first entry of P2M list. */ if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) { - ERR("read first entry of p2m_frame_list failed"); + ERROR("read first entry of p2m_frame_list failed"); goto out; } } /* First entry is already read into the p2m array. */ if (!read_exact(io_fd, &p2m_frame_list[1], P2M_FL_SIZE - sizeof(long))) { - ERR("read p2m_frame_list failed"); + ERROR("read p2m_frame_list failed"); goto out; } @@ -246,13 +246,13 @@ int xc_linux_restore(int xc_handle, int region_mfn = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t)); if ((p2m == NULL) || (pfn_type == NULL) || (region_mfn == NULL)) { - ERR("memory alloc failed"); + ERROR("memory alloc failed"); errno = ENOMEM; goto out; } if (mlock(region_mfn, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) { - ERR("Could not mlock region_mfn"); + ERROR("Could not mlock region_mfn"); goto out; } @@ -260,7 +260,7 @@ int xc_linux_restore(int xc_handle, int domctl.cmd = XEN_DOMCTL_getdomaininfo; domctl.domain = (domid_t)dom; if (xc_domctl(xc_handle, &domctl) < 0) { - ERR("Could not get information on new domain"); + ERROR("Could not get information on new domain"); goto out; } shared_info_frame = domctl.u.getdomaininfo.shared_info_frame; @@ -272,7 +272,7 @@ int xc_linux_restore(int xc_handle, int if(xc_domain_memory_increase_reservation( xc_handle, dom, max_pfn, 0, 0, NULL) != 0) { - ERR("Failed to increase reservation by %lx KB", PFN_TO_KB(max_pfn)); + ERROR("Failed to increase reservation by %lx KB", PFN_TO_KB(max_pfn)); errno = ENOMEM; goto out; } @@ -281,12 +281,12 @@ int xc_linux_restore(int xc_handle, int /* Build the pfn-to-mfn table. We choose MFN ordering returned by Xen. */ if (xc_get_pfn_list(xc_handle, dom, p2m, max_pfn) != max_pfn) { - ERR("Did not read correct number of frame numbers for new dom"); + ERROR("Did not read correct number of frame numbers for new dom"); goto out; } if(!(mmu = xc_init_mmu_updates(xc_handle, dom))) { - ERR("Could not initialise for MMU updates"); + ERROR("Could not initialise for MMU updates"); goto out; } @@ -312,7 +312,7 @@ int xc_linux_restore(int xc_handle, int } if (!read_exact(io_fd, &j, sizeof(int))) { - ERR("Error when reading batch size"); + ERROR("Error when reading batch size"); goto out; } @@ -328,12 +328,12 @@ int xc_linux_restore(int xc_handle, int break; /* our work here is done */ if (j > MAX_BATCH_SIZE) { - ERR("Max batch size exceeded. Giving up."); + ERROR("Max batch size exceeded. Giving up."); goto out; } if (!read_exact(io_fd, region_pfn_type, j*sizeof(unsigned long))) { - ERR("Error when reading region pfn types"); + ERROR("Error when reading region pfn types"); goto out; } @@ -353,7 +353,7 @@ int xc_linux_restore(int xc_handle, int xc_handle, dom, PROT_WRITE, region_mfn, j); if ( region_base == NULL ) { - ERR("map batch failed"); + ERROR("map batch failed"); goto out; } @@ -371,7 +371,7 @@ int xc_linux_restore(int xc_handle, int if ( pfn > max_pfn ) { - ERR("pfn out of range"); + ERROR("pfn out of range"); goto out; } @@ -383,7 +383,7 @@ int xc_linux_restore(int xc_handle, int page = verify ? (void *)buf : (region_base + i*PAGE_SIZE); if (!read_exact(io_fd, page, PAGE_SIZE)) { - ERR("Error when reading page (type was %lx)", pagetype); + ERROR("Error when reading page (type was %lx)", pagetype); goto out; } @@ -422,7 +422,7 @@ int xc_linux_restore(int xc_handle, int } else if ( pagetype != XEN_DOMCTL_PFINFO_NOTAB ) { - ERR("Bogus page type %lx page table is out of range: " + ERROR("Bogus page type %lx page table is out of range: " "i=%d max_pfn=%lu", pagetype, i, max_pfn); goto out; @@ -455,7 +455,7 @@ int xc_linux_restore(int xc_handle, int if (xc_add_mmu_update(xc_handle, mmu, (((unsigned long long)mfn) << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, pfn)) { - ERR("failed machpys update mfn=%lx pfn=%lx", mfn, pfn); + ERROR("failed machpys update mfn=%lx pfn=%lx", mfn, pfn); goto out; } } /* end of 'batch' for loop */ @@ -469,7 +469,7 @@ int xc_linux_restore(int xc_handle, int * reallocations below. */ if (xc_finish_mmu_updates(xc_handle, mmu)) { - ERR("Error doing finish_mmu_updates()"); + ERROR("Error doing finish_mmu_updates()"); goto out; } @@ -512,7 +512,7 @@ int xc_linux_restore(int xc_handle, int munmap(l3tab, PAGE_SIZE); if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) { - ERR("Couldn't get a page below 4GB :-("); + ERROR("Couldn't get a page below 4GB :-("); goto out; } @@ -521,7 +521,7 @@ int xc_linux_restore(int xc_handle, int (((unsigned long long)new_mfn) << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, i)) { - ERR("Couldn't m2p on PAE root pgdir"); + ERROR("Couldn't m2p on PAE root pgdir"); goto out; } @@ -554,14 +554,14 @@ int xc_linux_restore(int xc_handle, int if (!(region_base = xc_map_foreign_batch( xc_handle, dom, PROT_READ | PROT_WRITE, region_mfn, j))) { - ERR("map batch failed"); + ERROR("map batch failed"); goto out; } for(k = 0; k < j; k++) { if(!uncanonicalize_pagetable(XEN_DOMCTL_PFINFO_L1TAB, region_base + k*PAGE_SIZE)) { - ERR("failed uncanonicalize pt!"); + ERROR("failed uncanonicalize pt!"); goto out; } } @@ -572,7 +572,7 @@ int xc_linux_restore(int xc_handle, int } if (xc_finish_mmu_updates(xc_handle, mmu)) { - ERR("Error doing finish_mmu_updates()"); + ERROR("Error doing finish_mmu_updates()"); goto out; } } @@ -615,7 +615,7 @@ int xc_linux_restore(int xc_handle, int /* Batch full? Then flush. */ if (nr_pins == MAX_PIN_BATCH) { if (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0) { - ERR("Failed to pin batch of %d page tables", nr_pins); + ERROR("Failed to pin batch of %d page tables", nr_pins); goto out; } nr_pins = 0; @@ -624,7 +624,7 @@ int xc_linux_restore(int xc_handle, int /* Flush final partial batch. */ if ((nr_pins != 0) && (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0)) { - ERR("Failed to pin batch of %d page tables", nr_pins); + ERROR("Failed to pin batch of %d page tables", nr_pins); goto out; } @@ -638,17 +638,17 @@ int xc_linux_restore(int xc_handle, int int rc; if (!read_exact(io_fd, &count, sizeof(count))) { - ERR("Error when reading pfn count"); + ERROR("Error when reading pfn count"); goto out; } if(!(pfntab = malloc(sizeof(unsigned long) * count))) { - ERR("Out of memory"); + ERROR("Out of memory"); goto out; } if (!read_exact(io_fd, pfntab, sizeof(unsigned long)*count)) { - ERR("Error when reading pfntab"); + ERROR("Error when reading pfntab"); goto out; } @@ -675,7 +675,7 @@ int xc_linux_restore(int xc_handle, int if ((rc = xc_memory_op(xc_handle, XENMEM_decrease_reservation, &reservation)) != count) { - ERR("Could not decrease reservation : %d", rc); + ERROR("Could not decrease reservation : %d", rc); goto out; } else DPRINTF("Decreased reservation by %d pages\n", count); @@ -684,14 +684,14 @@ int xc_linux_restore(int xc_handle, int if (!read_exact(io_fd, &ctxt, sizeof(ctxt)) || !read_exact(io_fd, shared_info_page, PAGE_SIZE)) { - ERR("Error when reading ctxt or shared info page"); + ERROR("Error when reading ctxt or shared info page"); goto out; } /* Uncanonicalise the suspend-record frame number and poke resume rec. */ pfn = ctxt.user_regs.edx; if ((pfn >= max_pfn) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) { - ERR("Suspend record frame number is bad"); + ERROR("Suspend record frame number is bad"); goto out; } ctxt.user_regs.edx = mfn = p2m[pfn]; @@ -709,14 +709,14 @@ int xc_linux_restore(int xc_handle, int /* Uncanonicalise each GDT frame number. */ if (ctxt.gdt_ents > 8192) { - ERR("GDT entry count out of range"); + ERROR("GDT entry count out of range"); goto out; } for (i = 0; i < ctxt.gdt_ents; i += 512) { pfn = ctxt.gdt_frames[i]; if ((pfn >= max_pfn) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) { - ERR("GDT frame number is bad"); + ERROR("GDT frame number is bad"); goto out; } ctxt.gdt_frames[i] = p2m[pfn]; @@ -726,14 +726,14 @@ int xc_linux_restore(int xc_handle, int pfn = xen_cr3_to_pfn(ctxt.ctrlreg[3]); if (pfn >= max_pfn) { - ERR("PT base is bad: pfn=%lu max_pfn=%lu type=%08lx", + ERROR("PT base is bad: pfn=%lu max_pfn=%lu type=%08lx", pfn, max_pfn, pfn_type[pfn]); goto out; } if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) != ((unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) { - ERR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx", + ERROR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx", pfn, max_pfn, pfn_type[pfn], (unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT); goto out; @@ -757,7 +757,7 @@ int xc_linux_restore(int xc_handle, int for (i = 0; i < P2M_FL_ENTRIES; i++) { pfn = p2m_frame_list[i]; if ((pfn >= max_pfn) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) { - ERR("PFN-to-MFN frame number is bad"); + ERROR("PFN-to-MFN frame number is bad"); goto out; } @@ -767,7 +767,7 @@ int xc_linux_restore(int xc_handle, int /* Copy the P2M we've constructed to the 'live' P2M */ if (!(live_p2m = xc_map_foreign_batch(xc_handle, dom, PROT_WRITE, p2m_frame_list, P2M_FL_ENTRIES))) { - ERR("Couldn't map p2m table"); + ERROR("Couldn't map p2m table"); goto out; } @@ -803,7 +803,7 @@ int xc_linux_restore(int xc_handle, int (ctxt.ldt_ents > 8192) || (ctxt.ldt_base > hvirt_start) || ((ctxt.ldt_base + ctxt.ldt_ents*8) > hvirt_start)) { - ERR("Bad LDT base or size"); + ERROR("Bad LDT base or size"); goto out; } @@ -816,7 +816,7 @@ int xc_linux_restore(int xc_handle, int rc = xc_domctl(xc_handle, &domctl); if (rc != 0) { - ERR("Couldn't build the domain"); + ERROR("Couldn't build the domain"); goto out; } diff -r d5a46e4cc340 -r 6492b9b27968 tools/libxc/xc_linux_save.c --- a/tools/libxc/xc_linux_save.c Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/libxc/xc_linux_save.c Sun Oct 22 15:23:52 2006 -0600 @@ -363,19 +363,19 @@ static int suspend_and_state(int (*suspe int i = 0; if (!(*suspend)(dom)) { - ERR("Suspend request failed"); + ERROR("Suspend request failed"); return -1; } retry: if (xc_domain_getinfo(xc_handle, dom, 1, info) != 1) { - ERR("Could not get domain info"); + ERROR("Could not get domain info"); return -1; } if ( xc_vcpu_getcontext(xc_handle, dom, 0 /* XXX */, ctxt)) - ERR("Could not get vcpu context"); + ERROR("Could not get vcpu context"); if (info->shutdown && info->shutdown_reason == SHUTDOWN_suspend) @@ -385,7 +385,7 @@ static int suspend_and_state(int (*suspe // try unpausing domain, wait, and retest xc_domain_unpause( xc_handle, dom ); - ERR("Domain was paused. Wait and re-test."); + ERROR("Domain was paused. Wait and re-test."); usleep(10000); // 10ms goto retry; @@ -393,12 +393,12 @@ static int suspend_and_state(int (*suspe if( ++i < 100 ) { - ERR("Retry suspend domain."); + ERROR("Retry suspend domain."); usleep(10000); // 10ms goto retry; } - ERR("Unable to suspend domain."); + ERROR("Unable to suspend domain."); return -1; } @@ -516,25 +516,25 @@ static xen_pfn_t *xc_map_m2p(int xc_hand xmml.max_extents = m2p_chunks; if (!(extent_start = malloc(m2p_chunks * sizeof(xen_pfn_t)))) { - ERR("failed to allocate space for m2p mfns"); + ERROR("failed to allocate space for m2p mfns"); return NULL; } set_xen_guest_handle(xmml.extent_start, extent_start); if (xc_memory_op(xc_handle, XENMEM_machphys_mfn_list, &xmml) || (xmml.nr_extents != m2p_chunks)) { - ERR("xc_get_m2p_mfns"); + ERROR("xc_get_m2p_mfns"); return NULL; } if ((m2p = mmap(NULL, m2p_size, prot, MAP_SHARED, xc_handle, 0)) == MAP_FAILED) { - ERR("failed to mmap m2p"); + ERROR("failed to mmap m2p"); return NULL; } if (!(entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t)))) { - ERR("failed to allocate space for mmap entries"); + ERROR("failed to allocate space for mmap entries"); return NULL; } @@ -546,7 +546,7 @@ static xen_pfn_t *xc_map_m2p(int xc_hand if ((rc = xc_map_foreign_ranges(xc_handle, DOMID_XEN, entries, m2p_chunks)) < 0) { - ERR("xc_mmap_foreign_ranges failed (rc = %d)", rc); + ERROR("xc_mmap_foreign_ranges failed (rc = %d)", rc); return NULL; } @@ -619,23 +619,23 @@ int xc_linux_save(int xc_handle, int io_ if(!get_platform_info(xc_handle, dom, &max_mfn, &hvirt_start, &pt_levels)) { - ERR("Unable to get platform info."); + ERROR("Unable to get platform info."); return 1; } if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) { - ERR("Could not get domain info"); + ERROR("Could not get domain info"); return 1; } if (mlock(&ctxt, sizeof(ctxt))) { - ERR("Unable to mlock ctxt"); + ERROR("Unable to mlock ctxt"); return 1; } /* Only have to worry about vcpu 0 even for SMP */ if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) { - ERR("Could not get vcpu context"); + ERROR("Could not get vcpu context"); goto out; } shared_info_frame = info.shared_info_frame; @@ -643,13 +643,13 @@ int xc_linux_save(int xc_handle, int io_ /* A cheesy test to see whether the domain contains valid state. */ if (ctxt.ctrlreg[3] == 0) { - ERR("Domain is not in a valid Linux guest OS state"); + ERROR("Domain is not in a valid Linux guest OS state"); goto out; } /* cheesy sanity check */ if ((info.max_memkb >> (PAGE_SHIFT - 10)) > max_mfn) { - ERR("Invalid state record -- pfn count out of range: %lu", + ERROR("Invalid state record -- pfn count out of range: %lu", (info.max_memkb >> (PAGE_SHIFT - 10))); goto out; } @@ -657,7 +657,7 @@ int xc_linux_save(int xc_handle, int io_ /* Map the shared info frame */ if(!(live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ, shared_info_frame))) { - ERR("Couldn't map live_shinfo"); + ERROR("Couldn't map live_shinfo"); goto out; } @@ -668,7 +668,7 @@ int xc_linux_save(int xc_handle, int io_ live_shinfo->arch.pfn_to_mfn_frame_list_list); if (!live_p2m_frame_list_list) { - ERR("Couldn't map p2m_frame_list_list (errno %d)", errno); + ERROR("Couldn't map p2m_frame_list_list (errno %d)", errno); goto out; } @@ -678,7 +678,7 @@ int xc_linux_save(int xc_handle, int io_ P2M_FLL_ENTRIES); if (!live_p2m_frame_list) { - ERR("Couldn't map p2m_frame_list"); + ERROR("Couldn't map p2m_frame_list"); goto out; } @@ -692,20 +692,20 @@ int xc_linux_save(int xc_handle, int io_ P2M_FL_ENTRIES); if (!live_p2m) { - ERR("Couldn't map p2m table"); + ERROR("Couldn't map p2m table"); goto out; } /* Setup the mfn_to_pfn table mapping */ if(!(live_m2p = xc_map_m2p(xc_handle, max_mfn, PROT_READ))) { - ERR("Failed to map live M2P table"); + ERROR("Failed to map live M2P table"); goto out; } /* Get a local copy of the live_P2M_frame_list */ if(!(p2m_frame_list = malloc(P2M_FL_SIZE))) { - ERR("Couldn't allocate p2m_frame_list array"); + ERROR("Couldn't allocate p2m_frame_list array"); goto out; } memcpy(p2m_frame_list, live_p2m_frame_list, P2M_FL_SIZE); @@ -713,8 +713,8 @@ int xc_linux_save(int xc_handle, int io_ /* Canonicalise the pfn-to-mfn table frame-number list. */ for (i = 0; i < max_pfn; i += fpp) { if (!translate_mfn_to_pfn(&p2m_frame_list[i/fpp])) { - ERR("Frame# in pfn-to-mfn frame list is not in pseudophys"); - ERR("entry %d: p2m_frame_list[%ld] is 0x%"PRIx64, i, i/fpp, + ERROR("Frame# in pfn-to-mfn frame list is not in pseudophys"); + ERROR("entry %d: p2m_frame_list[%ld] is 0x%"PRIx64, i, i/fpp, (uint64_t)p2m_frame_list[i/fpp]); goto out; } @@ -726,7 +726,7 @@ int xc_linux_save(int xc_handle, int io_ if (xc_shadow_control(xc_handle, dom, XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY, NULL, 0, NULL, 0, NULL) < 0) { - ERR("Couldn't enable shadow mode"); + ERROR("Couldn't enable shadow mode"); goto out; } @@ -740,7 +740,7 @@ int xc_linux_save(int xc_handle, int io_ last_iter = 1; if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt)) { - ERR("Domain appears not to have suspended"); + ERROR("Domain appears not to have suspended"); goto out; } @@ -761,20 +761,20 @@ int xc_linux_save(int xc_handle, int io_ to_skip = malloc(BITMAP_SIZE); if (!to_send || !to_fix || !to_skip) { - ERR("Couldn't allocate to_send array"); + ERROR("Couldn't allocate to_send array"); goto out; } memset(to_send, 0xff, BITMAP_SIZE); if (mlock(to_send, BITMAP_SIZE)) { - ERR("Unable to mlock to_send"); + ERROR("Unable to mlock to_send"); return 1; } /* (to fix is local only) */ if (mlock(to_skip, BITMAP_SIZE)) { - ERR("Unable to mlock to_skip"); + ERROR("Unable to mlock to_skip"); return 1; } @@ -785,13 +785,13 @@ int xc_linux_save(int xc_handle, int io_ pfn_batch = calloc(MAX_BATCH_SIZE, sizeof(*pfn_batch)); if ((pfn_type == NULL) || (pfn_batch == NULL)) { - ERR("failed to alloc memory for pfn_type and/or pfn_batch arrays"); + ERROR("failed to alloc memory for pfn_type and/or pfn_batch arrays"); errno = ENOMEM; goto out; } if (mlock(pfn_type, MAX_BATCH_SIZE * sizeof(*pfn_type))) { - ERR("Unable to mlock"); + ERROR("Unable to mlock"); goto out; } @@ -817,7 +817,7 @@ int xc_linux_save(int xc_handle, int io_ /* Start writing out the saved-domain record. */ if (!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) { - ERR("write: max_pfn"); + ERROR("write: max_pfn"); goto out; } @@ -837,13 +837,13 @@ int xc_linux_save(int xc_handle, int io_ !write_exact(io_fd, &chunk_sig, 4) || !write_exact(io_fd, &chunk_sz, sizeof(chunk_sz)) || !write_exact(io_fd, &ctxt, sizeof(ctxt))) { - ERR("write: extended info"); + ERROR("write: extended info"); goto out; } } if (!write_exact(io_fd, p2m_frame_list, P2M_FL_SIZE)) { - ERR("write: p2m_frame_list"); + ERROR("write: p2m_frame_list"); goto out; } @@ -877,7 +877,7 @@ int xc_linux_save(int xc_handle, int io_ if (!last_iter && xc_shadow_control( xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK, to_skip, max_pfn, NULL, 0, NULL) != max_pfn) { - ERR("Error peeking shadow bitmap"); + ERROR("Error peeking shadow bitmap"); goto out; } @@ -942,12 +942,12 @@ int xc_linux_save(int xc_handle, int io_ if ((region_base = xc_map_foreign_batch( xc_handle, dom, PROT_READ, pfn_type, batch)) == 0) { - ERR("map batch failed"); + ERROR("map batch failed"); goto out; } if (xc_get_pfn_type_batch(xc_handle, dom, batch, pfn_type)) { - ERR("get_pfn_type_batch failed"); + ERROR("get_pfn_type_batch failed"); goto out; } @@ -978,12 +978,12 @@ int xc_linux_save(int xc_handle, int io_ } if(!write_exact(io_fd, &batch, sizeof(unsigned int))) { - ERR("Error when writing to state file (2)"); + ERROR("Error when writing to state file (2)"); goto out; } if(!write_exact(io_fd, pfn_type, sizeof(unsigned long)*j)) { - ERR("Error when writing to state file (3)"); + ERROR("Error when writing to state file (3)"); goto out; } @@ -1013,7 +1013,7 @@ int xc_linux_save(int xc_handle, int io_ goto out; if (ratewrite(io_fd, page, PAGE_SIZE) != PAGE_SIZE) { - ERR("Error when writing to state file (4)"); + ERROR("Error when writing to state file (4)"); goto out; } @@ -1021,7 +1021,7 @@ int xc_linux_save(int xc_handle, int io_ /* We have a normal page: just write it directly. */ if (ratewrite(io_fd, spage, PAGE_SIZE) != PAGE_SIZE) { - ERR("Error when writing to state file (5)"); + ERROR("Error when writing to state file (5)"); goto out; } } @@ -1056,7 +1056,7 @@ int xc_linux_save(int xc_handle, int io_ /* send "-1" to put receiver into debug mode */ if(!write_exact(io_fd, &minusone, sizeof(int))) { - ERR("Error when writing to state file (6)"); + ERROR("Error when writing to state file (6)"); goto out; } @@ -1079,7 +1079,7 @@ int xc_linux_save(int xc_handle, int io_ if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt)) { - ERR("Domain appears not to have suspended"); + ERROR("Domain appears not to have suspended"); goto out; } @@ -1092,7 +1092,7 @@ int xc_linux_save(int xc_handle, int io_ if (xc_shadow_control(xc_handle, dom, XEN_DOMCTL_SHADOW_OP_CLEAN, to_send, max_pfn, NULL, 0, &stats) != max_pfn) { - ERR("Error flushing shadow PT"); + ERROR("Error flushing shadow PT"); goto out; } @@ -1110,7 +1110,7 @@ int xc_linux_save(int xc_handle, int io_ /* Zero terminate */ i = 0; if (!write_exact(io_fd, &i, sizeof(int))) { - ERR("Error when writing to state file (6)"); + ERROR("Error when writing to state file (6)"); goto out; } @@ -1125,7 +1125,7 @@ int xc_linux_save(int xc_handle, int io_ } if(!write_exact(io_fd, &j, sizeof(unsigned int))) { - ERR("Error when writing to state file (6a)"); + ERROR("Error when writing to state file (6a)"); goto out; } @@ -1137,7 +1137,7 @@ int xc_linux_save(int xc_handle, int io_ i++; if (j == 1024 || i == max_pfn) { if(!write_exact(io_fd, &pfntab, sizeof(unsigned long)*j)) { - ERR("Error when writing to state file (6b)"); + ERROR("Error when writing to state file (6b)"); goto out; } j = 0; @@ -1148,21 +1148,21 @@ int xc_linux_save(int xc_handle, int io_ /* Canonicalise the suspend-record frame number. */ if ( !translate_mfn_to_pfn(&ctxt.user_regs.edx) ){ - ERR("Suspend record is not in range of pseudophys map"); + ERROR("Suspend record is not in range of pseudophys map"); goto out; } /* Canonicalise each GDT frame number. */ for ( i = 0; i < ctxt.gdt_ents; i += 512 ) { if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[i]) ) { - ERR("GDT frame is not in range of pseudophys map"); + ERROR("GDT frame is not in range of pseudophys map"); goto out; } } /* Canonicalise the page table base pointer. */ if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[3])) ) { - ERR("PT base is not in range of pseudophys map"); + ERROR("PT base is not in range of pseudophys map"); goto out; } ctxt.ctrlreg[3] = @@ -1170,7 +1170,7 @@ int xc_linux_save(int xc_handle, int io_ if (!write_exact(io_fd, &ctxt, sizeof(ctxt)) || !write_exact(io_fd, live_shinfo, PAGE_SIZE)) { - ERR("Error when writing to state file (1)"); + ERROR("Error when writing to state file (1)"); goto out; } diff -r d5a46e4cc340 -r 6492b9b27968 tools/libxc/xc_private.h --- a/tools/libxc/xc_private.h Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/libxc/xc_private.h Sun Oct 22 15:23:52 2006 -0600 @@ -30,6 +30,9 @@ #define DECLARE_SYSCTL struct xen_sysctl sysctl #endif +#undef PAGE_SHIFT +#undef PAGE_SIZE +#undef PAGE_MASK #define PAGE_SHIFT XC_PAGE_SHIFT #define PAGE_SIZE (1UL << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) @@ -55,11 +58,6 @@ #else #define PPRINTF(_f, _a...) #endif - -#define ERR(_f, _a...) do { \ - DPRINTF(_f ": %d\n" , ## _a, errno); \ - fflush(stderr); } \ -while (0) #define ERROR(_m, _a...) \ do { \ diff -r d5a46e4cc340 -r 6492b9b27968 tools/libxc/xc_ptrace.c --- a/tools/libxc/xc_ptrace.c Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/libxc/xc_ptrace.c Sun Oct 22 15:23:52 2006 -0600 @@ -1,5 +1,3 @@ -#define XC_PTRACE_PRIVATE - #include <sys/ptrace.h> #include <sys/wait.h> #include <time.h> diff -r d5a46e4cc340 -r 6492b9b27968 tools/libxc/xc_ptrace.h --- a/tools/libxc/xc_ptrace.h Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/libxc/xc_ptrace.h Sun Oct 22 15:23:52 2006 -0600 @@ -1,9 +1,6 @@ #ifndef XC_PTRACE_ #define XC_PTRACE_ -#include <thread_db.h> - -#ifdef XC_PTRACE_PRIVATE #define X86_CR0_PE 0x00000001 /* Enable Protected Mode (RW) */ #define X86_CR0_PG 0x80000000 /* Paging (RW) */ #define BSD_PAGE_MASK (PAGE_SIZE-1) @@ -160,25 +157,4 @@ struct gdb_regs { } #endif -#endif - -typedef void (*thr_ev_handler_t)(long); - -void xc_register_event_handler( - thr_ev_handler_t h, - td_event_e e); - -long xc_ptrace( - int xc_handle, - enum __ptrace_request request, - uint32_t domid, - long addr, - long data); - -int xc_waitdomain( - int xc_handle, - int domain, - int *status, - int options); - #endif /* XC_PTRACE */ diff -r d5a46e4cc340 -r 6492b9b27968 tools/libxc/xc_ptrace_core.c --- a/tools/libxc/xc_ptrace_core.c Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/libxc/xc_ptrace_core.c Sun Oct 22 15:23:52 2006 -0600 @@ -1,5 +1,3 @@ -#define XC_PTRACE_PRIVATE - #include <sys/ptrace.h> #include <sys/wait.h> #include "xc_private.h" diff -r d5a46e4cc340 -r 6492b9b27968 tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/libxc/xenctrl.h Sun Oct 22 15:23:52 2006 -0600 @@ -16,7 +16,6 @@ #include <stddef.h> #include <stdint.h> -#include <sys/ptrace.h> #include <xen/xen.h> #include <xen/domctl.h> #include <xen/sysctl.h> @@ -116,24 +115,44 @@ typedef struct xc_core_header { #define XC_CORE_MAGIC 0xF00FEBED -long xc_ptrace_core( - int xc_handle, - enum __ptrace_request request, - uint32_t domid, - long addr, - long data, - vcpu_guest_context_t *ctxt); +#ifdef __linux__ + +#include <sys/ptrace.h> +#include <thread_db.h> + void * map_domain_va_core( unsigned long domfd, int cpu, void *guest_va, vcpu_guest_context_t *ctxt); + int xc_waitdomain_core( int xc_handle, int domain, int *status, int options, vcpu_guest_context_t *ctxt); + +typedef void (*thr_ev_handler_t)(long); + +void xc_register_event_handler( + thr_ev_handler_t h, + td_event_e e); + +long xc_ptrace( + int xc_handle, + enum __ptrace_request request, + uint32_t domid, + long addr, + long data); + +int xc_waitdomain( + int xc_handle, + int domain, + int *status, + int options); + +#endif /* __linux__ */ /* * DOMAIN MANAGEMENT FUNCTIONS diff -r d5a46e4cc340 -r 6492b9b27968 tools/libxc/xg_private.c --- a/tools/libxc/xg_private.c Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/libxc/xg_private.c Sun Oct 22 15:23:52 2006 -0600 @@ -7,6 +7,7 @@ #include <stdlib.h> #include <unistd.h> #include <zlib.h> +#include <strings.h> #include "xg_private.h" diff -r d5a46e4cc340 -r 6492b9b27968 tools/libxc/xg_private.h --- a/tools/libxc/xg_private.h Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/libxc/xg_private.h Sun Oct 22 15:23:52 2006 -0600 @@ -79,10 +79,6 @@ unsigned long csum_page (void * page); #define L4_PAGETABLE_ENTRIES 512 #endif -#define PAGE_SHIFT XC_PAGE_SHIFT -#define PAGE_SIZE (1UL << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) - typedef uint32_t l1_pgentry_32_t; typedef uint32_t l2_pgentry_32_t; typedef uint64_t l1_pgentry_64_t; diff -r d5a46e4cc340 -r 6492b9b27968 tools/misc/Makefile --- a/tools/misc/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/misc/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -1,7 +1,3 @@ INSTALL = install -INSTALL = install -INSTALL_PROG = $(INSTALL) -m0755 -INSTALL_DIR = $(INSTALL) -d -m0755 - XEN_ROOT=../.. include $(XEN_ROOT)/tools/Rules.mk @@ -24,9 +20,6 @@ all: build .PHONY: build build: $(TARGETS) $(MAKE) -C miniterm -ifeq ($(CONFIG_MBOOTPACK),y) - $(MAKE) -C mbootpack -endif $(MAKE) -C lomount .PHONY: install @@ -38,14 +31,11 @@ install: build $(MAKE) -C lomount install # No sense in installing miniterm on the Xen box. # $(MAKE) -C miniterm install -# Likewise mbootpack -# $(MAKE) -C mbootpack install .PHONY: clean clean: $(RM) *.o $(TARGETS) *~ $(MAKE) -C miniterm clean - $(MAKE) -C mbootpack clean $(MAKE) -C lomount clean %.o: %.c $(HDRS) Makefile diff -r d5a46e4cc340 -r 6492b9b27968 tools/misc/lomount/Makefile --- a/tools/misc/lomount/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/misc/lomount/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -1,8 +1,3 @@ INSTALL = install -INSTALL = install -INSTALL_PROG = $(INSTALL) -m0755 -INSTALL_DIR = $(INSTALL) -d -m0755 -INSTALL_DATA = $(INSTALL) -m0644 - XEN_ROOT=../../.. include $(XEN_ROOT)/tools/Rules.mk diff -r d5a46e4cc340 -r 6492b9b27968 tools/misc/miniterm/Makefile --- a/tools/misc/miniterm/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/misc/miniterm/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -1,9 +1,5 @@ XEN_ROOT:=../../.. XEN_ROOT:=../../.. include $(XEN_ROOT)/tools/Rules.mk - -INSTALL = install -INSTALL_PROG = $(INSTALL) -m0755 -INSTALL_DIR = $(INSTALL) -d -m0755 TARGET = miniterm diff -r d5a46e4cc340 -r 6492b9b27968 tools/misc/xend --- a/tools/misc/xend Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/misc/xend Sun Oct 22 15:23:52 2006 -0600 @@ -19,6 +19,9 @@ The daemon should reconnect to device control interfaces and recover its state when restarted. + + On Solaris, the daemons are SMF managed, and you should not attempt + to start xend by hand. """ import os import os.path @@ -108,9 +111,10 @@ def main(): if not sys.argv[1:]: print 'usage: %s {start|stop|restart}' % sys.argv[0] elif sys.argv[1] == 'start': - start_xenstored() - start_consoled() - start_blktapctrl() + if os.uname()[0] != "SunOS": + start_xenstored() + start_consoled() + start_blktapctrl() return daemon.start() elif sys.argv[1] == 'trace_start': start_xenstored() diff -r d5a46e4cc340 -r 6492b9b27968 tools/pygrub/Makefile --- a/tools/pygrub/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/pygrub/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -6,16 +6,16 @@ all: build all: build .PHONY: build build: - CFLAGS="$(CFLAGS)" python setup.py build + CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py build .PHONY: install ifndef XEN_PYTHON_NATIVE_INSTALL install: all - CFLAGS="$(CFLAGS)" python setup.py install --home="$(DESTDIR)/usr" --prefix="" + CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install --home="$(DESTDIR)/usr" --prefix="" $(INSTALL_DIR) -p $(DESTDIR)/var/lib/xen else install: all - CFLAGS="$(CFLAGS)" python setup.py install --root="$(DESTDIR)" + CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install --root="$(DESTDIR)" $(INSTALL_DIR) -p $(DESTDIR)/var/lib/xen endif diff -r d5a46e4cc340 -r 6492b9b27968 tools/python/Makefile --- a/tools/python/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/python/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -6,15 +6,15 @@ all: build .PHONY: build build: - CFLAGS="$(CFLAGS)" python setup.py build + CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py build .PHONY: install ifndef XEN_PYTHON_NATIVE_INSTALL install: all - CFLAGS="$(CFLAGS)" python setup.py install --home="$(DESTDIR)/usr" --prefix="" --force + CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install --home="$(DESTDIR)/usr" --prefix="" --force else install: all - CFLAGS="$(CFLAGS)" python setup.py install --root="$(DESTDIR)" --force + CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install --root="$(DESTDIR)" --force endif .PHONY: test diff -r d5a46e4cc340 -r 6492b9b27968 tools/python/xen/util/auxbin.py --- a/tools/python/xen/util/auxbin.py Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/python/xen/util/auxbin.py Sun Oct 22 15:23:52 2006 -0600 @@ -21,7 +21,7 @@ LIB_BIN_SUFFIX = "xen/bin" LIB_BIN_SUFFIX = "xen/bin" ## The architectures on which the LIB_64 directory is used. This -# deliberately excludes ia64 and ppc64. +# deliberately excludes ia64 and ppc64, and Solaris. LIB_64_ARCHS = [ 'x86_64', 's390x', 'sparc64'] diff -r d5a46e4cc340 -r 6492b9b27968 tools/python/xen/util/security.py --- a/tools/python/xen/util/security.py Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/python/xen/util/security.py Sun Oct 22 15:23:52 2006 -0600 @@ -31,6 +31,7 @@ policy_dir_prefix = "/etc/xen/acm-securi policy_dir_prefix = "/etc/xen/acm-security/policies" res_label_filename = policy_dir_prefix + "/resource_labels" boot_filename = "/boot/grub/menu.lst" +altboot_filename = "/boot/grub/grub.conf" xensec_xml2bin = "/usr/sbin/xensec_xml2bin" xensec_tool = "/usr/sbin/xensec_tool" @@ -596,11 +597,33 @@ def get_res_security_details(resource): return (label, ssidref, policy) +def unify_resname(resource): + """Makes all resource locations absolute. In case of physical + resources, '/dev/' is added to local file names""" + + # sanity check on resource name + (type, resfile) = resource.split(":") + if type == "phy": + if not resfile.startswith("/"): + resfile = "/dev/" + resfile + + #file: resources must specified with absolute path + if (not resfile.startswith("/")) or (not os.path.exists(resfile)): + err("Invalid resource.") + + # from here on absolute file names with resources + resource = type + ":" + resfile + return resource + + def res_security_check(resource, domain_label): """Checks if the given resource can be used by the given domain label. Returns 1 if the resource can be used, otherwise 0. """ rtnval = 1 + + #build canonical resource name + resource = unify_resname(resource) # if security is on, ask the hypervisor for a decision if on(): diff -r d5a46e4cc340 -r 6492b9b27968 tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/python/xen/xend/XendDomainInfo.py Sun Oct 22 15:23:52 2006 -0600 @@ -391,6 +391,8 @@ def parseConfig(config): else: log.warn("Ignoring malformed and deprecated config option " "restart = %s", restart) + + result['start_time'] = get_cfg('start_time', float) log.debug("parseConfig: result is %s", result) return result diff -r d5a46e4cc340 -r 6492b9b27968 tools/python/xen/xend/XendRoot.py --- a/tools/python/xen/xend/XendRoot.py Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/python/xen/xend/XendRoot.py Sun Oct 22 15:23:52 2006 -0600 @@ -30,6 +30,7 @@ import string import string import sys +import osdep import XendLogging from XendError import XendError @@ -46,10 +47,10 @@ class XendRoot: config_var = "XEND_CONFIG" """Where network control scripts live.""" - network_script_dir = "/etc/xen/scripts" + network_script_dir = osdep.scripts_dir """Where block control scripts live.""" - block_script_dir = "/etc/xen/scripts" + block_script_dir = osdep.scripts_dir """Default path to the log file. """ logfile_default = "/var/log/xen/xend.log" @@ -95,6 +96,8 @@ class XendRoot: dom0_min_mem_default = '0' dom0_vcpus_default = '0' + + vncpasswd_default = None """Default interface to listen for VNC connections on""" xend_vnc_listen_default = '127.0.0.1' @@ -278,6 +281,10 @@ class XendRoot: def get_vnclisten_address(self): return self.get_config_value('vnc-listen', self.xend_vnc_listen_default) + def get_vncpasswd_default(self): + return self.get_config_value('vncpasswd', + self.vncpasswd_default) + def instance(): """Get an instance of XendRoot. Use this instead of the constructor. diff -r d5a46e4cc340 -r 6492b9b27968 tools/python/xen/xend/arch.py --- a/tools/python/xen/xend/arch.py Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/python/xen/xend/arch.py Sun Oct 22 15:23:52 2006 -0600 @@ -25,6 +25,7 @@ _types = { "i586": "x86", "i686": "x86", "x86_64": "x86", + "i86pc": "x86", "ia64": "ia64", "ppc": "powerpc", "ppc64": "powerpc", diff -r d5a46e4cc340 -r 6492b9b27968 tools/python/xen/xend/image.py --- a/tools/python/xen/xend/image.py Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/python/xen/xend/image.py Sun Oct 22 15:23:52 2006 -0600 @@ -20,6 +20,7 @@ import os, string import os, string import re import math +import signal import xen.lowlevel.xc from xen.xend import sxp @@ -312,6 +313,11 @@ class HVMImageHandler(ImageHandler): if v: ret.append("-%s" % a) ret.append("%s" % v) + + if a in ['fda', 'fdb' ]: + if v: + if not os.path.isfile(v): + raise VmError("Floppy file %s does not exist." % v) log.debug("args: %s, val: %s" % (a,v)) # Handle disk/network related options @@ -349,23 +355,49 @@ class HVMImageHandler(ImageHandler): sdl = sxp.child_value(config, 'sdl') ret = [] nographic = sxp.child_value(config, 'nographic') + + # get password from VM config (if password omitted, None) + vncpasswd_vmconfig = sxp.child_value(config, 'vncpasswd') + if nographic: ret.append('-nographic') + # remove password + if vncpasswd_vmconfig: + config.remove(['vncpasswd', vncpasswd_vmconfig]) return ret + if vnc: vncdisplay = sxp.child_value(config, 'vncdisplay', int(self.vm.getDomid())) + vncunused = sxp.child_value(config, 'vncunused') if vncunused: ret += ['-vncunused'] else: ret += ['-vnc', '%d' % vncdisplay] + ret += ['-k', 'en-us'] + vnclisten = sxp.child_value(config, 'vnclisten') if not(vnclisten): - vnclisten = xen.xend.XendRoot.instance().get_vnclisten_address() + vnclisten = (xen.xend.XendRoot.instance(). + get_vnclisten_address()) if vnclisten: ret += ['-vnclisten', vnclisten] + + vncpasswd = vncpasswd_vmconfig + if vncpasswd is None: + vncpasswd = (xen.xend.XendRoot.instance(). + get_vncpasswd_default()) + if vncpasswd is None: + raise VmError('vncpasswd is not set up in ' + + 'VMconfig and xend-config.') + if vncpasswd != '': + self.vm.storeVm("vncpasswd", vncpasswd) + + # remove password + config.remove(['vncpasswd', vncpasswd_vmconfig]) + return ret def createDeviceModel(self): @@ -390,7 +422,6 @@ class HVMImageHandler(ImageHandler): def destroy(self): self.unregister_shutdown_watch(); - import signal if not self.pid: return os.kill(self.pid, signal.SIGKILL) diff -r d5a46e4cc340 -r 6492b9b27968 tools/python/xen/xend/server/SrvDaemon.py --- a/tools/python/xen/xend/server/SrvDaemon.py Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/python/xen/xend/server/SrvDaemon.py Sun Oct 22 15:23:52 2006 -0600 @@ -17,6 +17,7 @@ import xen.lowlevel.xc import xen.lowlevel.xc from xen.xend.XendLogging import log +from xen.xend import osdep import relocate import SrvServer @@ -168,8 +169,14 @@ class Daemon: # ready to receive requests. All subsequent restarts we don't # want this behaviour, or the pipe will eventually fill up, so # we just pass None into run in subsequent cases (by clearing w - # in the parent of the first fork). + # in the parent of the first fork). On some operating systems, + # restart is managed externally, so we won't fork, and just exit. while True: + + if not osdep.xend_autorestart: + self.run(os.fdopen(w, 'w')) + break + pid = self.fork_pid() if pid: if w is not None: diff -r d5a46e4cc340 -r 6492b9b27968 tools/python/xen/xend/server/blkif.py --- a/tools/python/xen/xend/server/blkif.py Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/python/xen/xend/server/blkif.py Sun Oct 22 15:23:52 2006 -0600 @@ -81,6 +81,9 @@ class BlkifController(DevController): 'acm_policy' : policy}) devid = blkif.blkdev_name_to_number(dev) + if not devid: + raise VmError('Unable to find number for device (%s)' % (dev)) + front = { 'virtual-device' : "%i" % devid, 'device-type' : dev_type } diff -r d5a46e4cc340 -r 6492b9b27968 tools/python/xen/xm/addlabel.py --- a/tools/python/xen/xm/addlabel.py Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/python/xen/xm/addlabel.py Sun Oct 22 15:23:52 2006 -0600 @@ -72,13 +72,8 @@ def add_resource_label(label, resource, # sanity check: make sure this label can be instantiated later on ssidref = security.label2ssidref(label, policyref, 'res') - # sanity check on resource name - (type, file) = resource.split(":") - if type == "phy": - file = "/dev/" + file - if not os.path.exists(file): - print "Invalid resource '"+resource+"'" - return + #build canonical resource name + resource = security.unify_resname(resource) # see if this resource is already in the file access_control = {} diff -r d5a46e4cc340 -r 6492b9b27968 tools/python/xen/xm/cfgbootpolicy.py --- a/tools/python/xen/xm/cfgbootpolicy.py Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/python/xen/xm/cfgbootpolicy.py Sun Oct 22 15:23:52 2006 -0600 @@ -14,6 +14,7 @@ #============================================================================ # Copyright (C) 2006 International Business Machines Corp. # Author: Reiner Sailer <sailer@xxxxxxxxxx> +# Contributions: Stefan Berger <stefanb@xxxxxxxxxx> #============================================================================ """Configuring a security policy into the boot configuration """ @@ -24,67 +25,60 @@ import os, stat import os, stat import shutil import string -from xen.util.security import ACMError, err -from xen.util.security import policy_dir_prefix, boot_filename, xen_title_re -from xen.util.security import any_title_re, xen_kernel_re, kernel_ver_re, any_module_re +import re +from xen.util.security import err +from xen.util.security import policy_dir_prefix, xen_title_re +from xen.util.security import boot_filename, altboot_filename +from xen.util.security import any_title_re, xen_kernel_re, any_module_re from xen.util.security import empty_line_re, binary_name_re, policy_name_re from xen.xm.opts import OptionError def help(): return """ - Adds a 'module' line to the Xen grub.conf entry - so that xen boots into a specific access control - policy. If kernelversion is not given, then this - script tries to determine it by looking for a grub - entry with a line kernel xen.* If there are multiple - Xen entries, then it must be called with an explicit - version (it will fail otherwise).\n""" + Adds a 'module' line to the Xen grub configuration file entry + so that Xen boots with a specific access control policy. If + kernelversion is not given, then this script tries to determine + it by looking for a title starting with \"XEN\". If there are + multiple entries matching, then it must be called with the unique + beginning of the title's name.\n""" -def determine_kernelversion(user_specified): - within_xen_title = 0 - within_xen_entry = 0 - version_list = [] - guess_version = None +def strip_title(line): + """ + strips whitespace left and right and cuts 'title' + """ + s_title = string.strip(line) + pos = string.index(s_title, "title") + if pos >= 0: + return s_title[pos+6:] + else: + return s_title - grub_fd = open(boot_filename) - for line in grub_fd: - if xen_title_re.match(line): - within_xen_title = 1 - elif within_xen_title and xen_kernel_re.match(line): - within_xen_entry = 1 - elif within_xen_title and within_xen_entry and kernel_ver_re.match(line): - for i in line.split(): - if (i.find("vmlinuz-") >= 0): - # skip start until "vmlinuz-" - guess_version = i[i.find("vmlinuz-") + len("vmlinuz-"):] - if user_specified: - if (guess_version == user_specified): - version_list.append(guess_version) - else: - version_list.append(guess_version) - elif len(line.split()) > 0: - if line.split()[0] == "title": - within_xen_title = 0 - within_xen_entry = 0 - if len(version_list) > 1: - err("Cannot decide between entries for kernels %s" % version_list) - elif len(version_list) == 0: - err("Cannot find a boot entry candidate (please create a Xen boot entry first).") - else: - return version_list[0] - - -def insert_policy(boot_file, kernel_version, policy_name): +def insert_policy(boot_file, alt_boot_file, user_title, policy_name): """ inserts policy binary file as last line of the grub entry matching the kernel_version version """ + if user_title: + #replace "(" by "\(" and ")" by "\)" for matching + user_title = string.replace(user_title, "(", "\(") + user_title = string.replace(user_title, ")", "\)") + user_title_re = re.compile("\s*title\s+.*%s" \ + % user_title, re.IGNORECASE) + else: + user_title_re = xen_title_re + within_xen_title = 0 within_xen_entry = 0 insert_at_end_of_entry = 0 path_prefix = '' + this_title = '' + extended_titles = [] (tmp_fd, tmp_grub) = tempfile.mkstemp() + #First check whether menu.lst exists + if not os.path.isfile(boot_file): + #take alternate boot file (grub.conf) instead + boot_file = alt_boot_file #follow symlink since menue.lst might be linked to grub.conf if stat.S_ISLNK(os.lstat(boot_file)[stat.ST_MODE]): new_name = os.readlink(boot_file) @@ -95,30 +89,33 @@ def insert_policy(boot_file, kernel_vers path[len(path)-1] = new_name boot_file = '/'.join(path) if not os.path.exists(boot_file): - err("Boot file \'" + boot_file + "\' not found.") + err("Boot file \'%s\' not found." % boot_file) grub_fd = open(boot_file) for line in grub_fd: - if xen_title_re.match(line): + if user_title_re.match(line): + this_title = strip_title(line) within_xen_title = 1 elif within_xen_title and xen_kernel_re.match(line): - within_xen_entry = 1 - elif within_xen_title and within_xen_entry and kernel_ver_re.match(line): - for i in line.split(): - if (i.find("vmlinuz-") >= 0): - if kernel_version == i[i.find("vmlinuz-") + len("vmlinuz-"):]: - insert_at_end_of_entry = 1 - path_prefix = i[0:i.find("vmlinuz-")] + insert_at_end_of_entry = 1 + #use prefix from xen.gz path for policy + path_prefix = line.split()[1] + idx = path_prefix.rfind('/') + if idx >= 0: + path_prefix = path_prefix[0:idx+1] + else: + path_prefix = '' elif any_module_re.match(line) and insert_at_end_of_entry: if binary_name_re.match(line): #delete existing policy module line line='' elif any_title_re.match(line): within_xen_title = 0 - within_xen_entry = 0 - if (empty_line_re.match(line) or any_title_re.match(line)) and insert_at_end_of_entry: + if (empty_line_re.match(line) or any_title_re.match(line)) and \ + insert_at_end_of_entry: #newline or new title: we insert the policy module line here os.write(tmp_fd, "\tmodule " + path_prefix + policy_name + ".bin\n") + extended_titles.append(this_title) insert_at_end_of_entry = 0 #write the line that was read (except potential existing policy entry) os.write(tmp_fd, line) @@ -126,27 +123,36 @@ def insert_policy(boot_file, kernel_vers if insert_at_end_of_entry: #last entry, no empty line at end of file os.write(tmp_fd, "\tmodule " + path_prefix + policy_name + ".bin\n") + extended_titles.append(this_title) - #temp file might be destroyed when closing it, first copy ... + #if more than one entry was changed, abort + if len(extended_titles) > 1: + err("Following boot entries matched: %s. \nPlease specify " + "unique part of the boot title." % extended_titles) + if len(extended_titles) == 0: + err("Boot entry not found. Please specify unique part " + "of the boot title.") + + #temp file might be destroyed when closing it, first copy it shutil.move(boot_file, boot_file+"_save") shutil.copyfile(tmp_grub, boot_file) os.close(tmp_fd) - #temp file did not disappear on my system ... + #sometimes the temp file does not disappear try: os.remove(tmp_grub) except: pass - + return extended_titles[0] def main(argv): user_kver = None - policy = None + user_title = None if len(argv) == 2: policy = argv[1] elif len(argv) == 3: policy = argv[1] - user_kver = argv[2] + user_title = argv[2] else: raise OptionError('Invalid number of arguments') @@ -167,9 +173,10 @@ def main(argv): dst_binary_policy_file = "/boot/" + policy + ".bin" shutil.copyfile(src_binary_policy_file, dst_binary_policy_file) - kernel_version = determine_kernelversion(user_kver) - insert_policy(boot_filename, kernel_version, policy) - print "Boot entry created and \'%s\' copied to /boot" % (policy + ".bin") + entryname = insert_policy(boot_filename, altboot_filename, + user_title, policy) + print "Boot entry '%s' extended and \'%s\' copied to /boot" \ + % (entryname, policy + ".bin") if __name__ == '__main__': try: @@ -177,4 +184,3 @@ if __name__ == '__main__': except Exception, e: sys.stderr.write('Error: ' + str(e) + '\n') sys.exit(-1) - diff -r d5a46e4cc340 -r 6492b9b27968 tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/python/xen/xm/create.py Sun Oct 22 15:23:52 2006 -0600 @@ -103,6 +103,10 @@ gopts.opt('console_autoconnect', short=' gopts.opt('console_autoconnect', short='c', fn=set_true, default=0, use="Connect to the console after the domain is created.") + +gopts.var('vncpasswd', val='NAME', + fn=set_value, default=None, + use="Password for VNC console on HVM domain.") gopts.var('vncviewer', val='no|yes', fn=set_bool, default=None, @@ -643,6 +647,7 @@ def configure_hvm(config_image, vals): for a in args: if (vals.__dict__[a]): config_image.append([a, vals.__dict__[a]]) + config_image.append(['vncpasswd', vals.vncpasswd]) def run_bootloader(vals, config_image): if not os.access(vals.bootloader, os.X_OK): diff -r d5a46e4cc340 -r 6492b9b27968 tools/python/xen/xm/getlabel.py --- a/tools/python/xen/xm/getlabel.py Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/python/xen/xm/getlabel.py Sun Oct 22 15:23:52 2006 -0600 @@ -33,6 +33,9 @@ def get_resource_label(resource): def get_resource_label(resource): """Gets the resource label """ + #build canonical resource name + resource = security.unify_resname(resource) + # read in the resource file file = security.res_label_filename try: diff -r d5a46e4cc340 -r 6492b9b27968 tools/python/xen/xm/rmlabel.py --- a/tools/python/xen/xm/rmlabel.py Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/python/xen/xm/rmlabel.py Sun Oct 22 15:23:52 2006 -0600 @@ -37,6 +37,9 @@ def rm_resource_label(resource): def rm_resource_label(resource): """Removes a resource label from the global resource label file. """ + #build canonical resource name + resource = security.unify_resname(resource) + # read in the resource file file = security.res_label_filename try: diff -r d5a46e4cc340 -r 6492b9b27968 tools/security/policy.txt --- a/tools/security/policy.txt Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/security/policy.txt Sun Oct 22 15:23:52 2006 -0600 @@ -1,130 +1,12 @@ ## -# policy.txt <description to the Xen access control architecture> +# policy.txt <description to the sHype/Xen access control architecture> # # Author: -# Reiner Sailer 08/15/2005 <sailer@xxxxxxxxxxxxxx> -# -# -# This file gives an overview of the security policies currently -# provided and also gives some reasoning about how to assign -# labels to domains. +# Reiner Sailer 08/30/2006 <sailer@xxxxxxxxxxxxxx> +# +# +# This file gives an overview of the example security policies. ## - -Xen access control policies - - -General explanation of supported security policies: -===================================================== - -We have implemented the mandatory access control architecture of our -hypervisor security architecture (sHype) for the Xen hypervisor. It -controls communication (in Xen: event channels, grant tables) between -Virtual Machines (from here on called domains) and through this the -virtual block devices, networking, and shared memory are implemented -on top of these communication means. While we have implemented the -described policies and access control architecture for other -hypervisor systems, we will describe below specifically its -implementation and use in the Xen hypervisor. The policy enforcement -is called mandatory regarding user domains since the policy it is -given by the security administration and enforced independently of the -user domains by the Xen hypervisor in cooperation with the domain -management. - -The access control architecture consists of three parts: - -i) The access control policy determines the "command set" of the ACM -and the hooks with which they can be configured to constrain the -sharing of virtual resources. The current access control architecture -implemented for Xen supports two policies: Chinese Wall and Simple -Type Enforcement, which we describe in turn below. - - -ii) The actually enforced policy instantiation uses the policy -language (i) to configure the Xen access control in a way that suits -the specific application (home desktop environment, company desktop, -Web server system, etc.). We have defined an exemplary policy -instantiation for Chinese Wall (chwall policy) and Simple Type -Enforcement (ste policy) for a desktop system. We offer these policies -in combination since they are controlling orthogonal events. - - -iii) The access control module (ACM) and related hooks are part of the -core hypervisor and their controls cannot be bypassed by domains. The -ACM and hooks are the active security components. We refer to -publications that describe how access control is enforced in the Xen -hypervisor using the ACM (access decision) and the hooks (decision -enforcement) inserted into the setup of event channels and grant -tables, and into domain operations (create, destroy, save, restore, -migrate). These controls decide based on the active policy -configuration (see i. and ii.) if the operation proceeds of if the -operation is aborted (denied). - -In general, security policy instantiations in the Xen access control -framework are defined by XML policy files. Each security policy has -exactly one file including all the information the hypervisor needs to -enforce the policy. - -The name of a policy is unique and consists of a colon-separated list -of names, which can be translated into the location (subtree) where -this policy must be located. The last part of the name is the file -name pre-fix for the policy xml file. The preceding name parts are -translated into the local path relative to the global policy root -(/etc/xen/acm-security/policies) pointing to the policy xml file. For -example: example.chwall_ste.client_v1 denotes the policy file -example/chwall_ste/client_v1-security_policy.xml relative to the -global policy root directory. - -Every security policy has its own sub-directory under the global -policy root directory /etc/xen/acm-security/policies, which is -installed during the Xen installation or can be manually installed -(when switching from a "security disabled" Xen to a "security enabled" -Xen AFTER configuring security, see install.txt) by the command -sequence: - - cd "Xen-root"/tools/security/policies; make install - -We will describe those files for our example policy (Chinese Wall and -Simple Type Enforcement) in more detail as we go along. Eventually, we -will move towards a system installation where the policies will reside -under /etc. - - -CHINESE WALL -============ - -The Chinese Wall policy enables the user to define "which workloads -(domain payloads) cannot run on a single physical system at the same -time". Why would we want to prevent workloads from running at the same -time on the same system? This supports requirements that can (but -don't have to) be rooted in the measure of trust into the isolation of -different domains that share the same hardware. Since the access -control architecture aims at high performance and non-intrusive -implementation, it currently does not address covert (timing) channels -and aims at medium assurance. Users can apply the Chinese Wall policy -to guarantee an air-gap between very sensitive payloads both regarding -covert information channels and regarding resource starvation. - -To enable the CW control, each domain is labeled with a set of Chinese -Wall types and CW Conflict Sets are defined which include those CW -types that cannot run simultaneously on the same hardware. This -interpretation of conflict sets is the only policy rule for the Chines -Wall policy. - -This is enforced by controlling the start of domains according to -their assigned CW worload types. Domains with Chinese Wall types that -appear in a common conflict set are running mutually exclusive on a -platform, i.e., once a domain with one of the cw-types of a conflict -set is running, no domain with another cw-type of the same conflict -set can start until the first domain is destroyed, paused, or migrated -away from the physical system (this assumes that such a partition can -no longer be observed). The idea is to assign cw-types according to -the type of payload that a domain runs and to use the Chinese Wall -policy to ensure that payload types can be differentiated by the -hypervisor and can be prevented from being executed on the same system -at the same time. Using the flexible CW policy maintains system -consolidation and workload-balancing while introducing guaranteed -constraints where necessary. - Example of a Chinese Wall Policy Instantiation ---------------------------------------------- @@ -233,13 +115,12 @@ with all domains during their setup, and with all domains during their setup, and intercepts all communication between domains. Consequently, Dom0 needs to be assigned all types used and must be completely trusted to maintain the separation of -informatio ncoming from domains with different STE types. Thus a +information coming from domains with different STE types. Thus a refactoring of Dom0 is recommended for stronger confinement guarantees. Domain --> RESOURCES Access ''''''''''''''''''''''''''' -(current work) We define for each resource that we want to distinguish a separate STE type. Each STE type is assigned to the respective resource and to @@ -266,8 +147,7 @@ maximum security benefit from sHype. Example of a Simple Type Enforcement Policy Instantiation --------------------------------------------------------- - -We define the following types: +The example policies define the following types: * ste_SystemManagement identifies workloads (and domains that runs them) that must share information to accomplish the management of the @@ -384,19 +264,18 @@ co-operatively enforce the policy. In th co-operatively enforce the policy. In the storage domain example, we have three components that co-operate: -1. The ACM module inside the hypervisor enforces: communication between -user domains and the storage domain (only domains including types -ste_PersonalFinances or ste_InternetInsecure can communicate with the -storage domain and request access to logical resource). This confines -the sharing to the types assigned to the storage domain. - -2. The domain management will enforce (work in progress): assignment of -real resources (hda) to domains (storage domain) that share a -type with the resource. - -3. If the storage domain serves multiple STE types (as in our example), -it enforces (work in progress): that domains can access (mount) -logical resources only if they share an STE type with the respective +1. The ACM module inside the hypervisor enforces: communication +between user domains and the storage domain (only domains including +types ste_PersonalFinances or ste_InternetInsecure can communicate +with the storage domain and request access to logical resource). This +confines the sharing to the types assigned to the storage domain. + +2. The domain management enforces: assignment of real resources (hda) +to domains (storage domain) that share a type with the resource. + +3. If the storage domain serves multiple STE types (as in our +example), it enforces: that domains can access (mount) logical +resources only if they share an STE type with the respective resource. In our example, domains with the STE type ste_PersonalFinances can request access (mount) to logical resource hda1 from the storage domain. @@ -406,8 +285,8 @@ drive hda for serving logical disk parti drive hda for serving logical disk partitions exclusively to dom_HomeBanking and dom_Fun. -Similary, network domains can confine access to the network or -network communication between user domains. +Similary, network domains can confine access to the network or network +communication between user domains. As a result, device domains (e.g., storage domain, network domain) must be simple and small to ensure their correct co-operation in the diff -r d5a46e4cc340 -r 6492b9b27968 tools/security/readme.txt --- a/tools/security/readme.txt Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/security/readme.txt Sun Oct 22 15:23:52 2006 -0600 @@ -1,34 +1,33 @@ ## -# readme.txt <description to the xen access control architecture> +# readme.txt <description to the sHype/Xen access control architecture> # # Author: -# Reiner Sailer 08/15/2005 <sailer@xxxxxxxxxxxxxx> +# Reiner Sailer 08/30/2006 <sailer@xxxxxxxxxxxxxx> # # # This file is a toc for information regarding # the access control policy and tools in Xen. ## -1. 'xm' man page +1. Xen User Guide + + describes how to configure, install, and deploy the sHype Access + Control Module in Xen. See chapter "sHype/Xen Access Control". + +2. 'xm' man page describes the commands related to Xen management, including the commands to manage security policies and labels. Read the access - control subcommand section of the xm manual first. If it is not - built by default, check install.txt. + control subcommand section of the xm manual first. -2. policy.txt: +3. policy.txt - describes the general reasoning and examples for access - control policies in Xen + describes examples for access control policies in Xen. First read + the policy description in the Xen User Guide. -3. install.txt +4. policytools.txt - describes the activation of the access control framework - in Xen - -4. example.txt - - describes the available tools for managing security policies - in Xen and the tools to label domains + describes the available tools for creating and managing security + policies in Xen. diff -r d5a46e4cc340 -r 6492b9b27968 tools/security/secpol_tool.c --- a/tools/security/secpol_tool.c Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/security/secpol_tool.c Sun Oct 22 15:23:52 2006 -0600 @@ -42,6 +42,8 @@ #define PERROR(_m, _a...) \ fprintf(stderr, "ERROR: " _m " (%d = %s)\n" , ## _a , \ errno, strerror(errno)) + +#define ALIGN8(x) (void *)(((long)(x) + 7) & ~7) void usage(char *progname) { @@ -182,14 +184,14 @@ void acm_dump_policy_buffer(void *buf, i ntohl(pol->secondary_buffer_offset)); switch (ntohl(pol->primary_policy_code)) { case ACM_CHINESE_WALL_POLICY: - acm_dump_chinesewall_buffer(buf + - ntohl(pol->primary_buffer_offset), + acm_dump_chinesewall_buffer(ALIGN8(buf + + ntohl(pol->primary_buffer_offset)), ntohl(pol->len) - ntohl(pol->primary_buffer_offset)); break; case ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY: - acm_dump_ste_buffer(buf + ntohl(pol->primary_buffer_offset), + acm_dump_ste_buffer(ALIGN8(buf + ntohl(pol->primary_buffer_offset)), ntohl(pol->len) - ntohl(pol->primary_buffer_offset)); break; @@ -204,14 +206,14 @@ void acm_dump_policy_buffer(void *buf, i switch (ntohl(pol->secondary_policy_code)) { case ACM_CHINESE_WALL_POLICY: - acm_dump_chinesewall_buffer(buf + - ntohl(pol->secondary_buffer_offset), + acm_dump_chinesewall_buffer(ALIGN8(buf + + ntohl(pol->secondary_buffer_offset)), ntohl(pol->len) - ntohl(pol->secondary_buffer_offset)); break; case ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY: - acm_dump_ste_buffer(buf + ntohl(pol->secondary_buffer_offset), + acm_dump_ste_buffer(ALIGN8(buf + ntohl(pol->secondary_buffer_offset)), ntohl(pol->len) - ntohl(pol->secondary_buffer_offset)); break; diff -r d5a46e4cc340 -r 6492b9b27968 tools/security/secpol_xml2bin.c --- a/tools/security/secpol_xml2bin.c Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/security/secpol_xml2bin.c Sun Oct 22 15:23:52 2006 -0600 @@ -979,13 +979,15 @@ unsigned char *write_policy_reference_bi unsigned char *buf, *ptr; struct acm_policy_reference_buffer *pr_header; u_int32_t len; + u_int32_t name_len; if (policy_reference_name == NULL) { printf("ERROR: No policy reference name found.\n"); exit(EXIT_FAILURE); } - len = (sizeof(struct acm_policy_reference_buffer) + - strlen(policy_reference_name) + 1); + name_len = strlen(policy_reference_name) + 1; /* strend '\0' */ + len = sizeof(struct acm_policy_reference_buffer) + name_len; + len = (len + 7) & ~7; /* Alignment. */ buf = malloc(len); ptr = buf; @@ -994,9 +996,9 @@ unsigned char *write_policy_reference_bi ("ERROR: out of memory allocating label reference buffer.\n"); exit(EXIT_FAILURE); } + memset (buf, 0, len); pr_header = (struct acm_policy_reference_buffer *) buf; - pr_header->len = - htonl(strlen(policy_reference_name) + 1 /* strend \'0' */ ); + pr_header->len = htonl(name_len); ptr += sizeof(struct acm_policy_reference_buffer); strcpy((char *) ptr, policy_reference_name); diff -r d5a46e4cc340 -r 6492b9b27968 tools/vnet/doc/Makefile --- a/tools/vnet/doc/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/vnet/doc/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -1,11 +1,10 @@ #!/usr/bin/make -f # -*- mode: Makefile; -*- +XEN_ROOT = ../../.. +include $(XEN_ROOT)/tools/Rules.mk VERSION = 1.0 HEADER = Vnet - -INSTALL = install -INSTALL_DIR = $(INSTALL) -d -m0755 PS2PDF := ps2pdf DVIPS := dvips diff -r d5a46e4cc340 -r 6492b9b27968 tools/vnet/examples/Makefile --- a/tools/vnet/examples/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/vnet/examples/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -1,9 +1,7 @@ # -*- mode: Makefile; -*- #============================================================================ - -INSTALL = install -INSTALL_PROG = $(INSTALL) -m0755 -INSTALL_DIR = $(INSTALL) -d -m0755 +XEN_ROOT = ../../.. +include $(XEN_ROOT)/tools/Rules.mk XEN_SCRIPT_DIR = $(DESTDIR)/etc/xen/scripts diff -r d5a46e4cc340 -r 6492b9b27968 tools/vnet/libxutil/Makefile --- a/tools/vnet/libxutil/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/vnet/libxutil/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -2,11 +2,6 @@ export VNET_ROOT = $(shell cd .. && pwd) export VNET_ROOT = $(shell cd .. && pwd) include $(VNET_ROOT)/Make.env endif - -INSTALL = install -INSTALL_DATA = $(INSTALL) -m0644 -INSTALL_PROG = $(INSTALL) -m0755 -INSTALL_DIR = $(INSTALL) -d -m0755 include $(XEN_ROOT)/tools/Rules.mk @@ -60,7 +55,7 @@ libxutil.so.$(MAJOR): libxutil.so.$(MAJO ln -sf $^ $@ libxutil.so.$(MAJOR).$(MINOR): $(PIC_OBJS) - $(CC) $(CFLAGS) -Wl,-soname -Wl,libxutil.so.$(MAJOR) -shared -o $@ $^ + $(CC) $(CFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,libxutil.so.$(MAJOR) $(SHLIB_CFLAGS) -o $@ $^ libxutil.a: $(LIB_OBJS) $(AR) rc $@ $^ diff -r d5a46e4cc340 -r 6492b9b27968 tools/vnet/scripts/Makefile --- a/tools/vnet/scripts/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/vnet/scripts/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -1,9 +1,7 @@ # -*- mode: Makefile; -*- #============================================================================ - -INSTALL = install -INSTALL_PROG = $(INSTALL) -m0755 -INSTALL_DIR = $(INSTALL) -d -m0755 +XEN_ROOT = ../../.. +include $(XEN_ROOT)/tools/Rules.mk SBIN_DIR = $(DESTDIR)/usr/sbin diff -r d5a46e4cc340 -r 6492b9b27968 tools/vnet/vnetd/Makefile --- a/tools/vnet/vnetd/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/vnet/vnetd/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -110,7 +110,7 @@ vnetd: $(VNETD_OBJ) .PHONY: install install: vnetd mkdir -p $(DESTDIR)$(VNETD_INSTALL_DIR) - install -m 0755 vnetd $(DESTDIR)$(VNETD_INSTALL_DIR) + $(INSTALL_PROG) vnetd $(DESTDIR)$(VNETD_INSTALL_DIR) .PHONY: clean clean: diff -r d5a46e4cc340 -r 6492b9b27968 tools/vtpm/Rules.mk --- a/tools/vtpm/Rules.mk Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/vtpm/Rules.mk Sun Oct 22 15:23:52 2006 -0600 @@ -4,11 +4,6 @@ include $(XEN_ROOT)/tools/Rules.mk # # Tool definitions # - -# Installation program and options -INSTALL = install -INSTALL_PROG = $(INSTALL) -m0755 -INSTALL_DIR = $(INSTALL) -d -m0755 # Xen tools installation directory TOOLS_INSTALL_DIR = $(DESTDIR)/usr/bin diff -r d5a46e4cc340 -r 6492b9b27968 tools/vtpm_manager/Rules.mk --- a/tools/vtpm_manager/Rules.mk Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/vtpm_manager/Rules.mk Sun Oct 22 15:23:52 2006 -0600 @@ -4,11 +4,6 @@ include $(XEN_ROOT)/tools/Rules.mk # # Tool definitions # - -# Installation program and options -INSTALL = install -INSTALL_PROG = $(INSTALL) -m0755 -INSTALL_DIR = $(INSTALL) -d -m0755 # Xen tools installation directory TOOLS_INSTALL_DIR = $(DESTDIR)/usr/bin diff -r d5a46e4cc340 -r 6492b9b27968 tools/xcutils/Makefile --- a/tools/xcutils/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/xcutils/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -7,10 +7,6 @@ # # Copyright (C) 2005 by Christian Limpach # - -INSTALL = install -INSTALL_PROG = $(INSTALL) -m0755 -INSTALL_DIR = $(INSTALL) -d -m0755 XEN_ROOT = ../.. include $(XEN_ROOT)/tools/Rules.mk @@ -37,7 +33,7 @@ build: $(PROGRAMS) build: $(PROGRAMS) $(PROGRAMS): %: %.o - $(LINK.o) $^ $(LDLIBS) -o $@ + $(CC) $(CFLAGS) $^ $(LDLIBS) -o $@ .PHONY: install install: build diff -r d5a46e4cc340 -r 6492b9b27968 tools/xcutils/readnotes.c --- a/tools/xcutils/readnotes.c Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/xcutils/readnotes.c Sun Oct 22 15:23:52 2006 -0600 @@ -56,7 +56,8 @@ static void print_numeric_note(const cha prefix, *(uint64_t *)ELFNOTE_DESC(note)); break; default: - printf("%s: unknown data size %#x\n", prefix, note->n_descsz); + printf("%s: unknown data size %#lx\n", prefix, + (unsigned long)note->n_descsz); break; } } @@ -301,7 +302,8 @@ int main(int argc, char **argv) print_string_note("FEATURES", note); break; default: - printf("unknown note type %#x\n", note->n_type); + printf("unknown note type %#lx\n", + (unsigned long)note->n_type); break; } } diff -r d5a46e4cc340 -r 6492b9b27968 tools/xenmon/Makefile --- a/tools/xenmon/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/xenmon/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -10,15 +10,10 @@ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -INSTALL = install -INSTALL_PROG = $(INSTALL) -m0755 -INSTALL_DIR = $(INSTALL) -d -m0755 -INSTALL_DATA = $(INSTALL) -m0644 +XEN_ROOT=../.. +include $(XEN_ROOT)/tools/Rules.mk sbindir=/usr/sbin - -XEN_ROOT=../.. -include $(XEN_ROOT)/tools/Rules.mk CFLAGS += -Werror -g CFLAGS += -I $(XEN_XC) diff -r d5a46e4cc340 -r 6492b9b27968 tools/xenstat/libxenstat/Makefile --- a/tools/xenstat/libxenstat/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/xenstat/libxenstat/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -16,10 +16,6 @@ include $(XEN_ROOT)/tools/Rules.mk include $(XEN_ROOT)/tools/Rules.mk LINUX_ROOT := $(XEN_ROOT)/linux-2.6-xen-sparse -INSTALL = install -INSTALL_PROG = $(INSTALL) -m0755 -D -INSTALL_DATA = $(INSTALL) -m0644 -D - prefix=/usr includedir=$(prefix)/include libdir=$(prefix)/lib @@ -34,7 +30,7 @@ SHLIB=src/libxenstat.so.$(MAJOR).$(MINOR SHLIB=src/libxenstat.so.$(MAJOR).$(MINOR) SHLIB_LINKS=src/libxenstat.so.$(MAJOR) src/libxenstat.so OBJECTS=src/xenstat.o -SONAME_FLAGS=-Wl,-soname -Wl,libxenstat.so.$(MAJOR) +SONAME_FLAGS=-Wl,$(SONAME_LDFLAG) -Wl,libxenstat.so.$(MAJOR) WARN_FLAGS=-Wall -Werror @@ -49,7 +45,7 @@ all: $(LIB) $(RANLIB) $@ $(SHLIB): $(OBJECTS) - $(CC) $(CFLAGS) $(LDFLAGS) $(SONAME_FLAGS) -shared -o $@ $(OBJECTS) \ + $(CC) $(CFLAGS) $(LDFLAGS) $(SONAME_FLAGS) $(SHLIB_CFLAGS) -o $@ $(OBJECTS) \ -lxenstore -lxenctrl src/xenstat.o: src/xenstat.c src/xenstat.h @@ -101,7 +97,7 @@ PYTHON_FLAGS=-I/usr/include/python$(PYTH swig -python $(SWIG_FLAGS) -outdir $(@D) -o $(PYSRC) $< $(PYLIB): $(PYSRC) - $(CC) $(CFLAGS) $(LDFLAGS) $(PYTHON_FLAGS) -shared -lxenstat -o $@ $< + $(CC) $(CFLAGS) $(LDFLAGS) $(PYTHON_FLAGS) $(SHLIB_CFLAGS) -lxenstat -o $@ $< python-bindings: $(PYLIB) $(PYMOD) @@ -122,7 +118,7 @@ PERL_FLAGS=`perl -MConfig -e 'print "$$C swig -perl $(SWIG_FLAGS) -outdir $(@D) -o $(PERLSRC) $< $(PERLLIB): $(PERLSRC) - $(CC) $(CFLAGS) $(LDFLAGS) $(PERL_FLAGS) -shared -lxenstat -o $@ $< + $(CC) $(CFLAGS) $(LDFLAGS) $(PERL_FLAGS) $(SHLIB_CFLAGS) -lxenstat -o $@ $< .PHONY: perl-bindings perl-bindings: $(PERLLIB) $(PERLMOD) diff -r d5a46e4cc340 -r 6492b9b27968 tools/xenstat/xentop/Makefile --- a/tools/xenstat/xentop/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/xenstat/xentop/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -18,10 +18,6 @@ all install xentop: all install xentop: else -INSTALL = install -INSTALL_PROG = $(INSTALL) -m0755 -D -INSTALL_DATA = $(INSTALL) -m0644 -D - prefix=/usr mandir=$(prefix)/share/man man1dir=$(mandir)/man1 @@ -29,7 +25,7 @@ sbindir=$(prefix)/sbin CFLAGS += -DGCC_PRINTF -Wall -Werror -I$(XEN_LIBXENSTAT) LDFLAGS += -L$(XEN_LIBXENSTAT) -LDLIBS += -lxenstat -lncurses +LDLIBS += -lxenstat $(CURSES_LIBS) $(SOCKET_LIBS) .PHONY: all all: xentop @@ -37,6 +33,7 @@ all: xentop .PHONY: install install: xentop xentop.1 $(INSTALL_PROG) xentop $(DESTDIR)$(sbindir)/xentop + $(INSTALL_DIR) $(DESTDIR)$(man1dir) $(INSTALL_DATA) xentop.1 $(DESTDIR)$(man1dir)/xentop.1 endif diff -r d5a46e4cc340 -r 6492b9b27968 tools/xenstat/xentop/xentop.c --- a/tools/xenstat/xentop/xentop.c Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/xenstat/xentop/xentop.c Sun Oct 22 15:23:52 2006 -0600 @@ -23,6 +23,7 @@ #include <errno.h> #include <stdio.h> #include <stdlib.h> +#include <stdarg.h> #include <string.h> #include <sys/time.h> #include <time.h> @@ -186,6 +187,8 @@ int prompt_val_len = 0; int prompt_val_len = 0; void (*prompt_complete_func)(char *); +static WINDOW *cwin; + /* * Function definitions */ @@ -222,7 +225,7 @@ static void version(void) /* Clean up any open resources */ static void cleanup(void) { - if(!isendwin()) + if(cwin != NULL && !isendwin()) endwin(); if(prev_node != NULL) xenstat_free_node(prev_node); @@ -235,7 +238,7 @@ static void cleanup(void) /* Display the given message and gracefully exit */ static void fail(const char *str) { - if(!isendwin()) + if(cwin != NULL && !isendwin()) endwin(); fprintf(stderr, str); exit(1); @@ -266,7 +269,7 @@ static void print(const char *fmt, ...) if (!batch) { if((current_row() < lines()-1)) { va_start(args, fmt); - vw_printw(stdscr, fmt, args); + vwprintw(stdscr, (char *)fmt, args); va_end(args); } } else { @@ -280,7 +283,7 @@ static void attr_addstr(int attr, const static void attr_addstr(int attr, const char *str) { attron(attr); - addstr(str); + addstr((char *)str); attroff(attr); } @@ -1028,14 +1031,16 @@ int main(int argc, char **argv) if (!batch) { /* Begin curses stuff */ - initscr(); + cwin = initscr(); start_color(); cbreak(); noecho(); nonl(); keypad(stdscr, TRUE); halfdelay(5); +#ifndef __sun__ use_default_colors(); +#endif init_pair(1, -1, COLOR_YELLOW); do { diff -r d5a46e4cc340 -r 6492b9b27968 tools/xenstore/Makefile --- a/tools/xenstore/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/xenstore/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -4,11 +4,6 @@ XEN_LIBXC = $(XEN_ROOT)/tools/l MAJOR = 3.0 MINOR = 0 - -INSTALL = install -INSTALL_DATA = $(INSTALL) -m0644 -INSTALL_PROG = $(INSTALL) -m0755 -INSTALL_DIR = $(INSTALL) -d -m0755 PROFILE=#-pg BASECFLAGS=-Wall -g -Werror @@ -32,39 +27,40 @@ CLIENTS_OBJS := $(patsubst xenstore-%,xe XENSTORED_OBJS = xenstored_core.o xenstored_watch.o xenstored_domain.o xenstored_transaction.o xs_lib.o talloc.o utils.o tdb.o hashtable.o -XENSTORED_Linux = xenstored_linux.o +XENSTORED_OBJS_$(CONFIG_Linux) = xenstored_linux.o +XENSTORED_OBJS_$(CONFIG_SunOS) = xenstored_solaris.o -XENSTORED_OBJS += $(XENSTORED_$(OS)) +XENSTORED_OBJS += $(XENSTORED_OBJS_y) .PHONY: all all: libxenstore.so libxenstore.a xenstored $(CLIENTS) xs_tdb_dump xenstore-control xenstore-ls test_interleaved_transactions: test_interleaved_transactions.o - $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -L. -lxenstore -o $@ + $(CC) $(CFLAGS) $(LDFLAGS) $^ $(LOADLIBES) $(LDLIBS) -L. -lxenstore -o $@ .PHONY: testcode testcode: xs_test xenstored_test xs_random xenstored: $(XENSTORED_OBJS) - $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -lxenctrl -o $@ + $(CC) $(CFLAGS) $(LDFLAGS) $^ $(LOADLIBES) $(LDLIBS) -lxenctrl $(SOCKET_LIBS) -o $@ $(CLIENTS): xenstore-%: xenstore_%.o libxenstore.so - $(LINK.o) $< $(LOADLIBES) $(LDLIBS) -L. -lxenstore -o $@ + $(CC) $(CFLAGS) $(LDFLAGS) $< $(LOADLIBES) $(LDLIBS) -L. -lxenstore $(SOCKET_LIBS) -o $@ $(CLIENTS_OBJS): xenstore_%.o: xenstore_client.c $(COMPILE.c) -DCLIENT_$(*F) -o $@ $< xenstore-control: xenstore_control.o libxenstore.so - $(LINK.o) $< $(LOADLIBES) $(LDLIBS) -L. -lxenstore -o $@ + $(CC) $(CFLAGS) $(LDFLAGS) $< $(LOADLIBES) $(LDLIBS) -L. -lxenstore $(SOCKET_LIBS) -o $@ xenstore-ls: xsls.o libxenstore.so - $(LINK.o) $< $(LOADLIBES) $(LDLIBS) -L. -lxenstore -o $@ + $(CC) $(CFLAGS) $(LDFLAGS) $< $(LOADLIBES) $(LDLIBS) -L. -lxenstore $(SOCKET_LIBS) -o $@ xenstored_test: xenstored_core_test.o xenstored_watch_test.o xenstored_domain_test.o xenstored_transaction_test.o xs_lib.o talloc_test.o fake_libxc.o utils.o tdb.o - $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -o $@ + $(CC) $(CFLAGS) $(LDFLAGS) $^ $(LOADLIBES) $(LDLIBS) -o $@ xs_tdb_dump: xs_tdb_dump.o utils.o tdb.o talloc.o - $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -o $@ + $(CC) $(CFLAGS) $(LDFLAGS) $^ $(LOADLIBES) $(LDLIBS) -o $@ xs_test xs_random xs_stress xs_crashme: LDFLAGS+=-lpthread xs_test: xs_test.o xs_lib.o utils.o @@ -95,7 +91,7 @@ libxenstore.so.$(MAJOR): libxenstore.so. ln -sf $< $@ libxenstore.so.$(MAJOR).$(MINOR): xs.opic xs_lib.opic - $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-soname -Wl,libxenstore.so.$(MAJOR) -shared -o $@ $^ -lpthread + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,libxenstore.so.$(MAJOR) $(SHLIB_CFLAGS) -o $@ $^ -lpthread libxenstore.a: xs.o xs_lib.o $(AR) rcs libxenstore.a $^ diff -r d5a46e4cc340 -r 6492b9b27968 tools/xenstore/xenstored_core.c --- a/tools/xenstore/xenstored_core.c Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/xenstore/xenstored_core.c Sun Oct 22 15:23:52 2006 -0600 @@ -1688,7 +1688,7 @@ static void write_pidfile(const char *pi if (lockf(fd, F_TLOCK, 0) == -1) exit(0); - len = sprintf(buf, "%d\n", getpid()); + len = sprintf(buf, "%ld\n", (long)getpid()); if (write(fd, buf, len) != len) barf_perror("Writing pid file %s", pidfile); } @@ -1901,7 +1901,7 @@ int main(int argc, char *argv[]) restore_existing_connections(); if (outputpid) { - printf("%i\n", getpid()); + printf("%ld\n", (long)getpid()); fflush(stdout); } @@ -1923,6 +1923,9 @@ int main(int argc, char *argv[]) /* Get ready to listen to the tools. */ max = initialize_set(&inset, &outset, *sock, *ro_sock); + + /* Tell the kernel we're up and running. */ + xenbus_notify_running(); /* Main loop. */ /* FIXME: Rewrite so noone can starve. */ diff -r d5a46e4cc340 -r 6492b9b27968 tools/xenstore/xenstored_core.h --- a/tools/xenstore/xenstored_core.h Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/xenstore/xenstored_core.h Sun Oct 22 15:23:52 2006 -0600 @@ -172,6 +172,9 @@ void *xenbus_map(void); /* Return the event channel used by xenbus. */ evtchn_port_t xenbus_evtchn(void); +/* Tell the kernel xenstored is running. */ +void xenbus_notify_running(void); + #endif /* _XENSTORED_CORE_H */ /* diff -r d5a46e4cc340 -r 6492b9b27968 tools/xenstore/xenstored_linux.c --- a/tools/xenstore/xenstored_linux.c Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/xenstore/xenstored_linux.c Sun Oct 22 15:23:52 2006 -0600 @@ -67,3 +67,7 @@ void *xenbus_map(void) return addr; } + +void xenbus_notify_running(void) +{ +} diff -r d5a46e4cc340 -r 6492b9b27968 tools/xenstore/xs_lib.c --- a/tools/xenstore/xs_lib.c Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/xenstore/xs_lib.c Sun Oct 22 15:23:52 2006 -0600 @@ -76,7 +76,14 @@ const char *xs_domain_dev(void) const char *xs_domain_dev(void) { char *s = getenv("XENSTORED_PATH"); - return (s ? s : "/proc/xen/xenbus"); + if (s) + return s; + +#ifdef __linux__ + return "/proc/xen/xenbus"; +#else + return "/dev/xen/xenbus"; +#endif } /* Simple routines for writing to sockets, etc. */ diff -r d5a46e4cc340 -r 6492b9b27968 tools/xenstore/xsls.c --- a/tools/xenstore/xsls.c Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/xenstore/xsls.c Sun Oct 22 15:23:52 2006 -0600 @@ -6,6 +6,7 @@ #include <getopt.h> #include <unistd.h> #include <sys/ioctl.h> +#include <termios.h> static int max_width = 80; static int desired_width = 60; diff -r d5a46e4cc340 -r 6492b9b27968 tools/xentrace/Makefile --- a/tools/xentrace/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/xentrace/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -1,8 +1,3 @@ INSTALL = install -INSTALL = install -INSTALL_PROG = $(INSTALL) -m0755 -INSTALL_DIR = $(INSTALL) -d -m0755 -INSTALL_DATA = $(INSTALL) -m0644 - XEN_ROOT=../.. include $(XEN_ROOT)/tools/Rules.mk diff -r d5a46e4cc340 -r 6492b9b27968 tools/xentrace/formats --- a/tools/xentrace/formats Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/xentrace/formats Sun Oct 22 15:23:52 2006 -0600 @@ -12,10 +12,29 @@ 0x0002f00c CPU%(cpu)d %(tsc)d t_timer_f 0x0002f00c CPU%(cpu)d %(tsc)d t_timer_fn 0x0002f00d CPU%(cpu)d %(tsc)d dom_timer_fn -0x00080001 CPU%(cpu)d %(tsc)d VMX_VMEXIT [ domid = 0x%(1)08x, eip = 0x%(2)08x, reason = 0x%(3)08x ] -0x00080002 CPU%(cpu)d %(tsc)d VMX_VECTOR [ domid = 0x%(1)08x, eip = 0x%(2)08x, vector = 0x%(3)08x ] -0x00080003 CPU%(cpu)d %(tsc)d VMX_INT [ domid = 0x%(1)08x, trap = 0x%(2)08x, va = 0x%(3)08x ] +0x00080001 CPU%(cpu)d %(tsc)d VMX_VMEXIT [ domid = 0x%(1)08x, eip = 0x%(2)08x, reason = 0x%(3)08x ] +0x00084001 CPU%(cpu)d %(tsc)d VMX_INTR [ domid = 0x%(1)08x, trap = 0x%(2)08x, va = 0x%(3)08x ] -0x00081001 CPU%(cpu)d %(tsc)d VMEXIT 0x%(1)08x 0x%(2)08x 0x%(3)08x -0x00081002 CPU%(cpu)d %(tsc)d VMENTRY 0x%(1)08x 0x%(2)08x 0x%(3)08x 0x%(4)08x 0x%(5)08x +0x00081001 CPU%(cpu)d %(tsc)d VMEXIT_0 0x%(1)08x 0x%(2)08x 0x%(3)08x +0x00082001 CPU%(cpu)d %(tsc)d VMENTRY_0 0x%(1)08x 0x%(2)08x 0x%(3)08x 0x%(4)08x 0x%(5)08x +0x00081002 CPU%(cpu)d %(tsc)d VMEXIT_1 0x%(1)08x 0x%(2)08x 0x%(3)08x +0x00082002 CPU%(cpu)d %(tsc)d VMENTRY_1 0x%(1)08x 0x%(2)08x 0x%(3)08x 0x%(4)08x 0x%(5)08x + +0x00081003 CPU%(cpu)d %(tsc)d VMEXIT_2 0x%(1)08x 0x%(2)08x 0x%(3)08x +0x00082003 CPU%(cpu)d %(tsc)d VMENTRY_2 0x%(1)08x 0x%(2)08x 0x%(3)08x 0x%(4)08x 0x%(5)08x + +0x00081004 CPU%(cpu)d %(tsc)d VMEXIT_3 0x%(1)08x 0x%(2)08x 0x%(3)08x +0x00082004 CPU%(cpu)d %(tsc)d VMENTRY_3 0x%(1)08x 0x%(2)08x 0x%(3)08x 0x%(4)08x 0x%(5)08x + +0x00081005 CPU%(cpu)d %(tsc)d VMEXIT_4 0x%(1)08x 0x%(2)08x 0x%(3)08x +0x00082005 CPU%(cpu)d %(tsc)d VMENTRY_4 0x%(1)08x 0x%(2)08x 0x%(3)08x 0x%(4)08x 0x%(5)08x + +0x00081006 CPU%(cpu)d %(tsc)d VMEXIT_5 0x%(1)08x 0x%(2)08x 0x%(3)08x +0x00082006 CPU%(cpu)d %(tsc)d VMENTRY_5 0x%(1)08x 0x%(2)08x 0x%(3)08x 0x%(4)08x 0x%(5)08x + +0x00081007 CPU%(cpu)d %(tsc)d VMEXIT_6 0x%(1)08x 0x%(2)08x 0x%(3)08x +0x00082007 CPU%(cpu)d %(tsc)d VMENTRY_6 0x%(1)08x 0x%(2)08x 0x%(3)08x 0x%(4)08x 0x%(5)08x + +0x00081008 CPU%(cpu)d %(tsc)d VMEXIT_7 0x%(1)08x 0x%(2)08x 0x%(3)08x +0x00082008 CPU%(cpu)d %(tsc)d VMENTRY_7 0x%(1)08x 0x%(2)08x 0x%(3)08x 0x%(4)08x 0x%(5)08x diff -r d5a46e4cc340 -r 6492b9b27968 tools/xm-test/README --- a/tools/xm-test/README Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/xm-test/README Sun Oct 22 15:23:52 2006 -0600 @@ -110,6 +110,38 @@ the kernel, please use the key word "bui Xm-test will look for disk.img in the ramdisk directory when run by default. + + +BUILDING for ACM Security Testing +================================= + +A number of tests have been added to test the access control module (ACM) +in the Xen hypervisor and the tools for supporting ACM. Those tests are +located in the security-acm directory. If ACM support is compiled into Xen +(see the user guide for how to do this) those tests can be run with the +following command from the xm-test directory + +./runtest.sh [...] -g security <report> + +Some of these tests will work even without support of ACM by Xen. + +The xm test suite has been extended to support labeling of resources +as required by the existing tests. However, by default the test suite +is not allowed to automatically label resources since this may affect +existing labels. To enable this, the test suite must be configured with +the following parameter passed to the configure scripts (in addition to +any other desired parameters) + +./configure --enable-full-labeling + +To revoke the privilege at a later time run the configure scripts without +this parameter: + +./configure + +If a 'make' has previously been run for building the test suite, it is not +necessary to run 'make' again just for enabling or disabling the automatic +labeling of resources. Running diff -r d5a46e4cc340 -r 6492b9b27968 tools/xm-test/configure.ac --- a/tools/xm-test/configure.ac Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/xm-test/configure.ac Sun Oct 22 15:23:52 2006 -0600 @@ -37,6 +37,20 @@ fi AM_CONDITIONAL(HVM, test x$ENABLE_HVM = xTrue) AC_SUBST(ENABLE_HVM) + +AC_ARG_ENABLE(full-labeling, + [[ --enable-full-labeling allows the test suite to label all resources]], + [ + ENABLE_LABELING=True + ],[ + ENABLE_LABELING=False + ]) + +if test "x$ENABLE_LABELING" = "xTrue"; then + echo "ACM_LABEL_RESOURCES = True" > lib/XmTestLib/acm_config.py +else + rm -f lib/XmTestLib/acm_config.py* +fi # Network needs to know ips to use: dhcp or a range of IPs in the form # of: 192.168.1.1-192.168.1.100 @@ -127,6 +141,7 @@ AC_CONFIG_FILES([ tests/restore/Makefile tests/save/Makefile tests/sched-credit/Makefile + tests/security-acm/Makefile tests/sedf/Makefile tests/shutdown/Makefile tests/sysrq/Makefile diff -r d5a46e4cc340 -r 6492b9b27968 tools/xm-test/grouptest/default --- a/tools/xm-test/grouptest/default Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/xm-test/grouptest/default Sun Oct 22 15:23:52 2006 -0600 @@ -22,6 +22,7 @@ restore restore save sched-credit +security-acm shutdown sysrq unpause diff -r d5a46e4cc340 -r 6492b9b27968 tools/xm-test/lib/XmTestLib/XenDomain.py --- a/tools/xm-test/lib/XmTestLib/XenDomain.py Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/xm-test/lib/XmTestLib/XenDomain.py Sun Oct 22 15:23:52 2006 -0600 @@ -29,6 +29,7 @@ from config import * from config import * from Console import * from XenDevice import * +from acm import * BLOCK_ROOT_DEV = "hda" @@ -102,6 +103,9 @@ class XenConfig: self.defaultOpts["disk"] = [] self.defaultOpts["vif"] = [] self.defaultOpts["vtpm"] = [] + if isACMEnabled(): + #A default so every VM can start with ACM enabled + self.defaultOpts["access_control"] = ['policy=xm-test,label=red'] self.opts = self.defaultOpts @@ -129,6 +133,7 @@ class XenConfig: output = file(filename, "w") output.write(self.toString()) output.close() + ACMPrepareSystem(self.opts) def __str__(self): """When used as a string, we represent ourself by a config diff -r d5a46e4cc340 -r 6492b9b27968 tools/xm-test/lib/XmTestLib/block_utils.py --- a/tools/xm-test/lib/XmTestLib/block_utils.py Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/xm-test/lib/XmTestLib/block_utils.py Sun Oct 22 15:23:52 2006 -0600 @@ -6,6 +6,7 @@ import time import time from XmTestLib import * +from acm import * import xen.util.blkif @@ -26,6 +27,7 @@ def get_state(domain, devname): def block_attach(domain, phy, virt): + ACMLabelResource(phy) status, output = traceCommand("xm block-attach %s %s %s w" % (domain.getName(), phy, virt)) if status != 0: diff -r d5a46e4cc340 -r 6492b9b27968 tools/xm-test/runtest.sh --- a/tools/xm-test/runtest.sh Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/xm-test/runtest.sh Sun Oct 22 15:23:52 2006 -0600 @@ -197,6 +197,8 @@ unsafe=no unsafe=no GROUPENTERED=default +cp -f tests/security-acm/xm-test-security_policy.xml /etc/xen/acm-security/policies + # Resolve options while [ $# -gt 0 ] do diff -r d5a46e4cc340 -r 6492b9b27968 tools/xm-test/tests/Makefile.am --- a/tools/xm-test/tests/Makefile.am Sun Oct 22 14:39:15 2006 -0600 +++ b/tools/xm-test/tests/Makefile.am Sun Oct 22 15:23:52 2006 -0600 @@ -19,6 +19,7 @@ SUBDIRS = \ pause \ reboot \ sched-credit \ + security-acm \ sedf \ shutdown \ sysrq \ diff -r d5a46e4cc340 -r 6492b9b27968 unmodified_drivers/linux-2.6/mkbuildtree --- a/unmodified_drivers/linux-2.6/mkbuildtree Sun Oct 22 14:39:15 2006 -0600 +++ b/unmodified_drivers/linux-2.6/mkbuildtree Sun Oct 22 15:23:52 2006 -0600 @@ -1,4 +1,12 @@ #! /bin/sh + +if [ $1 ]; then + uname="$1" +else + uname=`uname -m` + echo "Defaulting to this machine's architecture, $uname, for linking." + echo "This may be overridden on the command line (i386,x86_64,ia64)." +fi C=$PWD @@ -27,34 +35,33 @@ ln -sf ${XEN}/include/public include/xen # Need to be quite careful here: we don't want the files we link in to # risk overriding the native Linux ones (in particular, system.h must # be native and not xenolinux). -uname=`uname -m` case "$uname" in "x86_64") - ln -sf ${XL}/include/asm-x86_64/mach-xen/asm/hypervisor.h include/asm - ln -sf ${XL}/include/asm-x86_64/mach-xen/asm/hypercall.h include/asm - ln -sf ${XL}/include/asm-x86_64/mach-xen/asm/synch_bitops.h include/asm - ln -sf ${XL}/include/asm-x86_64/mach-xen/asm/maddr.h include/asm - ln -sf ${XL}/include/asm-i386 include/asm-i386 - ;; + ln -sf ${XL}/include/asm-x86_64/mach-xen/asm/hypervisor.h include/asm + ln -sf ${XL}/include/asm-x86_64/mach-xen/asm/hypercall.h include/asm + ln -sf ${XL}/include/asm-x86_64/mach-xen/asm/synch_bitops.h include/asm + ln -sf ${XL}/include/asm-x86_64/mach-xen/asm/maddr.h include/asm + ln -sf ${XL}/include/asm-i386 include/asm-i386 + ;; i[34567]86) - ln -sf ${XL}/include/asm-i386/mach-xen/asm/hypervisor.h include/asm - ln -sf ${XL}/include/asm-i386/mach-xen/asm/hypercall.h include/asm - ln -sf ${XL}/include/asm-i386/mach-xen/asm/synch_bitops.h include/asm - ln -sf ${XL}/include/asm-i386/mach-xen/asm/maddr.h include/asm - ;; + ln -sf ${XL}/include/asm-i386/mach-xen/asm/hypervisor.h include/asm + ln -sf ${XL}/include/asm-i386/mach-xen/asm/hypercall.h include/asm + ln -sf ${XL}/include/asm-i386/mach-xen/asm/synch_bitops.h include/asm + ln -sf ${XL}/include/asm-i386/mach-xen/asm/maddr.h include/asm + ;; "ia64") - ln -sf ${XL}/include/asm-ia64/hypervisor.h include/asm - ln -sf ${XL}/include/asm-ia64/hypercall.h include/asm - ln -sf ${XL}/include/asm-ia64/synch_bitops.h include/asm - ln -sf ${XL}/include/asm-ia64/maddr.h include/asm - ln -sf ${XL}/include/asm-ia64/xen/xcom_hcall.h include/asm/xen - ln -sf ${XL}/include/asm-ia64/xen/xencomm.h include/asm/xen - ln -sf ${XL}/arch/ia64/xen/xcom_mini.c platform-pci - ln -sf ${XL}/arch/ia64/xen/xencomm.c platform-pci - ;; + ln -sf ${XL}/include/asm-ia64/hypervisor.h include/asm + ln -sf ${XL}/include/asm-ia64/hypercall.h include/asm + ln -sf ${XL}/include/asm-ia64/synch_bitops.h include/asm + ln -sf ${XL}/include/asm-ia64/maddr.h include/asm + ln -sf ${XL}/include/asm-ia64/xen/xcom_hcall.h include/asm/xen + ln -sf ${XL}/include/asm-ia64/xen/xencomm.h include/asm/xen + ln -sf ${XL}/arch/ia64/xen/xcom_mini.c platform-pci + ln -sf ${XL}/arch/ia64/xen/xencomm.c platform-pci + ;; *) - echo unknown architecture $uname - exit 1 - ;; + echo unknown architecture $uname + exit 1 + ;; esac diff -r d5a46e4cc340 -r 6492b9b27968 xen/Makefile --- a/xen/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -2,7 +2,7 @@ # All other places this is stored (eg. compile.h) should be autogenerated. export XEN_VERSION = 3 export XEN_SUBVERSION = 0 -export XEN_EXTRAVERSION ?= -unstable +export XEN_EXTRAVERSION ?= -unstable$(XEN_VENDORVERSION) export XEN_FULLVERSION = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION) -include xen-version @@ -93,14 +93,14 @@ include/xen/compile.h: include/xen/compi include/xen/compile.h: include/xen/compile.h.in @sed -e 's/@@date@@/$(shell LC_ALL=C date)/g' \ -e 's/@@time@@/$(shell LC_ALL=C date +%T)/g' \ - -e 's/@@whoami@@/$(shell whoami)/g' \ + -e 's/@@whoami@@/$(USER)/g' \ -e 's/@@domain@@/$(shell ([ -x /bin/dnsdomainname ] && /bin/dnsdomainname) || ([ -x /bin/domainname ] && /bin/domainname || echo [unknown]))/g' \ -e 's/@@hostname@@/$(shell hostname)/g' \ - -e 's|@@compiler@@|$(shell $(CC) $(CFLAGS) -v 2>&1 | tail -n 1 | sed -e "s;|;/;")|g' \ + -e 's!@@compiler@@!$(shell $(CC) $(CFLAGS) -v 2>&1 | grep -i "gcc.*version")!g' \ -e 's/@@version@@/$(XEN_VERSION)/g' \ -e 's/@@subversion@@/$(XEN_SUBVERSION)/g' \ -e 's/@@extraversion@@/$(XEN_EXTRAVERSION)/g' \ - -e 's!@@changeset@@!$(shell ((hg parents || head -n 7 ../ChangeLog || echo date: unavailable) | awk '{FS="changeset:[ ]+"}/^changeset/{CS=$$2};{FS="date:[ ]+"}/^date/{D=$$2}; END {print D, CS}') 2>/dev/null)!g' \ + -e 's!@@changeset@@!$(shell ((hg parents --template "{date|date} {rev}:{node|short}" >/dev/null && hg parents --template "{date|date} {rev}:{node|short}") || echo "unavailable") 2>/dev/null)!g' \ < include/xen/compile.h.in > $@.new tools/figlet/figlet -d tools/figlet Xen $(XEN_FULLVERSION) >> $@.new @mv -f $@.new $@ @@ -154,7 +154,7 @@ _cscope: .PHONY: MAP MAP: - $(NM) $(TARGET) | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw] \)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' | sort > System.map + $(NM) -n $(TARGET) | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw] \)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' > System.map .PHONY: FORCE FORCE: diff -r d5a46e4cc340 -r 6492b9b27968 xen/Rules.mk --- a/xen/Rules.mk Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/Rules.mk Sun Oct 22 15:23:52 2006 -0600 @@ -24,9 +24,11 @@ override COMPILE_SUBARCH := $(XEN_COMPIL override COMPILE_SUBARCH := $(XEN_COMPILE_ARCH) override TARGET_SUBARCH := $(XEN_TARGET_ARCH) override COMPILE_ARCH := $(shell echo $(XEN_COMPILE_ARCH) | \ - sed -e 's/\(x86\|powerpc\).*/\1/') + sed -e 's/x86.*/x86/' \ + -e 's/powerpc.*/powerpc/') override TARGET_ARCH := $(shell echo $(XEN_TARGET_ARCH) | \ - sed -e 's/\(x86\|powerpc\).*/\1/') + sed -e 's/x86.*/x86/' \ + -e 's/powerpc.*/powerpc/') TARGET := $(BASEDIR)/xen @@ -34,10 +36,6 @@ HDRS += $(wildcard $(BASEDIR)/include/pu HDRS += $(wildcard $(BASEDIR)/include/public/*.h) HDRS += $(wildcard $(BASEDIR)/include/asm-$(TARGET_ARCH)/*.h) HDRS += $(wildcard $(BASEDIR)/include/asm-$(TARGET_ARCH)/$(TARGET_SUBARCH)/*.h) - -INSTALL := install -INSTALL_DATA := $(INSTALL) -m0644 -INSTALL_DIR := $(INSTALL) -d -m0755 include $(BASEDIR)/arch/$(TARGET_ARCH)/Rules.mk @@ -65,8 +63,16 @@ AFLAGS-y += -D__ASSEMBLY__ AFLAGS-y += -D__ASSEMBLY__ ALL_OBJS := $(ALL_OBJS-y) + CFLAGS := $(strip $(CFLAGS) $(CFLAGS-y)) + +# Most CFLAGS are safe for assembly files: +# -std=gnu{89,99} gets confused by #-prefixed end-of-line comments AFLAGS := $(strip $(AFLAGS) $(AFLAGS-y)) +AFLAGS += $(patsubst -std=gnu%,,$(CFLAGS)) + +# LDFLAGS are only passed directly to $(LD) +LDFLAGS := $(strip $(LDFLAGS) $(LDFLAGS_DIRECT)) include Makefile @@ -104,10 +110,11 @@ _clean_%/: FORCE $(CC) $(CFLAGS) -c $< -o $@ %.o: %.S $(HDRS) Makefile - $(CC) $(CFLAGS) $(AFLAGS) -c $< -o $@ + $(CC) $(AFLAGS) -c $< -o $@ %.i: %.c $(HDRS) Makefile $(CPP) $(CFLAGS) $< -o $@ +# -std=gnu{89,99} gets confused by # as an end-of-line comment marker %.s: %.S $(HDRS) Makefile - $(CPP) $(CFLAGS) $(AFLAGS) $< -o $@ + $(CPP) $(AFLAGS) $< -o $@ diff -r d5a46e4cc340 -r 6492b9b27968 xen/acm/acm_chinesewall_hooks.c --- a/xen/acm/acm_chinesewall_hooks.c Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/acm/acm_chinesewall_hooks.c Sun Oct 22 15:23:52 2006 -0600 @@ -153,6 +153,8 @@ static int chwall_dump_policy(u8 * buf, ret = ntohl(chwall_buf->chwall_conflict_aggregate_offset) + sizeof(domaintype_t) * chwall_bin_pol.max_types; + + ret = (ret + 7) & ~7; if (buf_size < ret) return -EINVAL; diff -r d5a46e4cc340 -r 6492b9b27968 xen/acm/acm_core.c --- a/xen/acm/acm_core.c Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/acm/acm_core.c Sun Oct 22 15:23:52 2006 -0600 @@ -60,7 +60,7 @@ struct acm_operations *acm_secondary_ops /* acm global binary policy (points to 'local' primary and secondary policies */ struct acm_binary_policy acm_bin_pol; /* acm binary policy lock */ -rwlock_t acm_bin_pol_rwlock = RW_LOCK_UNLOCKED; +DEFINE_RWLOCK(acm_bin_pol_rwlock); /* until we have endian support in Xen, we discover it at runtime */ u8 little_endian = 1; @@ -100,9 +100,11 @@ acm_dump_policy_reference(u8 *buf, u32 b struct acm_policy_reference_buffer *pr_buf = (struct acm_policy_reference_buffer *)buf; int ret = sizeof(struct acm_policy_reference_buffer) + strlen(acm_bin_pol.policy_reference_name) + 1; + ret = (ret + 7) & ~7; if (buf_size < ret) return -EINVAL; + memset(buf, 0, ret); pr_buf->len = htonl(strlen(acm_bin_pol.policy_reference_name) + 1); /* including stringend '\0' */ strcpy((char *)(buf + sizeof(struct acm_policy_reference_buffer)), acm_bin_pol.policy_reference_name); @@ -187,85 +189,58 @@ acm_init_binary_policy(u32 policy_code) return ret; } +int +acm_is_policy(char *buf, unsigned long len) +{ + struct acm_policy_buffer *pol; + + if (buf == NULL || len < sizeof(struct acm_policy_buffer)) + return 0; + + pol = (struct acm_policy_buffer *)buf; + return ntohl(pol->magic) == ACM_MAGIC; +} + + static int -acm_setup(unsigned int *initrdidx, - const multiboot_info_t *mbi, - unsigned long initial_images_start) -{ - int i; - module_t *mod = (module_t *)__va(mbi->mods_addr); +acm_setup(char *policy_start, + unsigned long policy_len) +{ int rc = ACM_OK; - - if (mbi->mods_count > 1) - *initrdidx = 1; - - /* - * Try all modules and see whichever could be the binary policy. - * Adjust the initrdidx if module[1] is the binary policy. - */ - for (i = mbi->mods_count-1; i >= 1; i--) - { - struct acm_policy_buffer *pol; - char *_policy_start; - unsigned long _policy_len; -#if defined(__i386__) - _policy_start = (char *)(initial_images_start + (mod[i].mod_start-mod[0].mod_start)); -#elif defined(__x86_64__) - _policy_start = __va(initial_images_start + (mod[i].mod_start-mod[0].mod_start)); -#else -#error Architecture unsupported by sHype -#endif - _policy_len = mod[i].mod_end - mod[i].mod_start; - if (_policy_len < sizeof(struct acm_policy_buffer)) - continue; /* not a policy */ - - pol = (struct acm_policy_buffer *)_policy_start; - if (ntohl(pol->magic) == ACM_MAGIC) - { - rc = do_acm_set_policy((void *)_policy_start, - (u32)_policy_len); - if (rc == ACM_OK) - { - printkd("Policy len 0x%lx, start at %p.\n",_policy_len,_policy_start); - if (i == 1) - { - if (mbi->mods_count > 2) - { - *initrdidx = 2; - } - else { - *initrdidx = 0; - } - } - else - { - *initrdidx = 1; - } - break; - } - else - { - printk("Invalid policy. %d.th module line.\n", i+1); - /* load default policy later */ - acm_active_security_policy = ACM_POLICY_UNDEFINED; - } - } /* end if a binary policy definition, i.e., (ntohl(pol->magic) == ACM_MAGIC ) */ + struct acm_policy_buffer *pol; + + if (policy_start == NULL || policy_len < sizeof(struct acm_policy_buffer)) + return rc; + + pol = (struct acm_policy_buffer *)policy_start; + if (ntohl(pol->magic) != ACM_MAGIC) + return rc; + + rc = do_acm_set_policy((void *)policy_start, (u32)policy_len); + if (rc == ACM_OK) + { + printkd("Policy len 0x%lx, start at %p.\n",policy_len,policy_start); + } + else + { + printk("Invalid policy.\n"); + /* load default policy later */ + acm_active_security_policy = ACM_POLICY_UNDEFINED; } return rc; } int -acm_init(unsigned int *initrdidx, - const multiboot_info_t *mbi, - unsigned long initial_images_start) +acm_init(char *policy_start, + unsigned long policy_len) { int ret = ACM_OK; acm_set_endian(); /* first try to load the boot policy (uses its own locks) */ - acm_setup(initrdidx, mbi, initial_images_start); + acm_setup(policy_start, policy_len); if (acm_active_security_policy != ACM_POLICY_UNDEFINED) { diff -r d5a46e4cc340 -r 6492b9b27968 xen/acm/acm_simple_type_enforcement_hooks.c --- a/xen/acm/acm_simple_type_enforcement_hooks.c Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/acm/acm_simple_type_enforcement_hooks.c Sun Oct 22 15:23:52 2006 -0600 @@ -149,6 +149,8 @@ ste_dump_policy(u8 *buf, u32 buf_size) { ste_buf->ste_ssid_offset = htonl(sizeof(struct acm_ste_policy_buffer)); ret = ntohl(ste_buf->ste_ssid_offset) + sizeof(domaintype_t)*ste_bin_pol.max_ssidrefs*ste_bin_pol.max_types; + + ret = (ret + 7) & ~7; if (buf_size < ret) return -EINVAL; diff -r d5a46e4cc340 -r 6492b9b27968 xen/arch/ia64/Rules.mk --- a/xen/arch/ia64/Rules.mk Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/arch/ia64/Rules.mk Sun Oct 22 15:23:52 2006 -0600 @@ -17,8 +17,6 @@ endif # Used only by linux/Makefile. AFLAGS_KERNEL += -mconstant-gp -nostdinc $(CPPFLAGS) - -# Note: .S -> .o rule uses AFLAGS and CFLAGS. CFLAGS += -nostdinc -fno-builtin -fno-common -fno-strict-aliasing CFLAGS += -mconstant-gp diff -r d5a46e4cc340 -r 6492b9b27968 xen/arch/ia64/linux-xen/smpboot.c --- a/xen/arch/ia64/linux-xen/smpboot.c Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/arch/ia64/linux-xen/smpboot.c Sun Oct 22 15:23:52 2006 -0600 @@ -650,7 +650,8 @@ clear_cpu_sibling_map(int cpu) for_each_cpu_mask(i, cpu_core_map[cpu]) cpu_clear(cpu, cpu_core_map[i]); - cpu_sibling_map[cpu] = cpu_core_map[cpu] = CPU_MASK_NONE; + cpus_clear(cpu_sibling_map[cpu]); + cpus_clear(cpu_core_map[cpu]); } static void diff -r d5a46e4cc340 -r 6492b9b27968 xen/arch/ia64/xen/xenmisc.c --- a/xen/arch/ia64/xen/xenmisc.c Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/arch/ia64/xen/xenmisc.c Sun Oct 22 15:23:52 2006 -0600 @@ -165,6 +165,10 @@ void arch_dump_domain_info(struct domain { } +void arch_dump_vcpu_info(struct vcpu *v) +{ +} + void audit_domains_key(unsigned char key) { } diff -r d5a46e4cc340 -r 6492b9b27968 xen/arch/powerpc/Makefile --- a/xen/arch/powerpc/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/arch/powerpc/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -141,7 +141,7 @@ asm-offsets.s: $(TARGET_SUBARCH)/asm-off $(CC) $(CFLAGS) -S -o $@ $< xen.lds: xen.lds.S $(HDRS) - $(CC) $(CFLAGS) -P -E $(AFLAGS) -o $@ $< + $(CC) -P -E $(AFLAGS) -o $@ $< dom0.bin: $(DOM0_IMAGE) cp $< $@ diff -r d5a46e4cc340 -r 6492b9b27968 xen/arch/powerpc/domain.c --- a/xen/arch/powerpc/domain.c Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/arch/powerpc/domain.c Sun Oct 22 15:23:52 2006 -0600 @@ -286,6 +286,10 @@ void arch_dump_domain_info(struct domain { } +void arch_dump_vcpu_info(struct vcpu *v) +{ +} + extern void sleep(void); static void safe_halt(void) { diff -r d5a46e4cc340 -r 6492b9b27968 xen/arch/x86/Makefile --- a/xen/arch/x86/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/arch/x86/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -46,8 +46,7 @@ obj-$(crash_debug) += gdbstub.o $(TARGET): $(TARGET)-syms boot/mkelf32 ./boot/mkelf32 $(TARGET)-syms $(TARGET) 0x100000 \ - `$(NM) $(TARGET)-syms | sort | tail -n 1 | \ - sed -e 's/^\([^ ]*\).*/0x\1/'` + `$(NM) -nr $(TARGET)-syms | head -n 1 | sed -e 's/^\([^ ]*\).*/0x\1/'` $(TARGET)-syms: boot/$(TARGET_SUBARCH).o $(ALL_OBJS) xen.lds $(MAKE) -f $(BASEDIR)/Rules.mk $(BASEDIR)/common/symbols-dummy.o @@ -70,7 +69,7 @@ asm-offsets.s: $(TARGET_SUBARCH)/asm-off $(CC) $(CFLAGS) -S -o $@ $< xen.lds: $(TARGET_SUBARCH)/xen.lds.S $(HDRS) - $(CC) $(CFLAGS) -P -E -Ui386 $(AFLAGS) -o $@ $< + $(CC) -P -E -Ui386 $(AFLAGS) -o $@ $< boot/mkelf32: boot/mkelf32.c $(HOSTCC) $(HOSTCFLAGS) -o $@ $< diff -r d5a46e4cc340 -r 6492b9b27968 xen/arch/x86/Rules.mk --- a/xen/arch/x86/Rules.mk Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/arch/x86/Rules.mk Sun Oct 22 15:23:52 2006 -0600 @@ -11,41 +11,44 @@ pae ?= n pae ?= n supervisor_mode_kernel ?= n -CFLAGS += -nostdinc -fno-builtin -fno-common -fno-strict-aliasing -CFLAGS += -iwithprefix include -Werror -Wno-pointer-arith -pipe -CFLAGS += -I$(BASEDIR)/include -CFLAGS += -I$(BASEDIR)/include/asm-x86/mach-generic -CFLAGS += -I$(BASEDIR)/include/asm-x86/mach-default +# Solaris grabs stdarg.h and friends from the system include directory. +ifneq ($(XEN_OS),SunOS) +CFLAGS += -nostdinc +endif + +CFLAGS += -fno-builtin -fno-common -fno-strict-aliasing +CFLAGS += -iwithprefix include -Werror -Wno-pointer-arith -pipe +CFLAGS += -I$(BASEDIR)/include +CFLAGS += -I$(BASEDIR)/include/asm-x86/mach-generic +CFLAGS += -I$(BASEDIR)/include/asm-x86/mach-default # Prevent floating-point variables from creeping into Xen. -CFLAGS += -msoft-float +CFLAGS += -msoft-float # Disable PIE/SSP if GCC supports them. They can break us. -CFLAGS += $(call test-gcc-flag,$(CC),-nopie) -CFLAGS += $(call test-gcc-flag,$(CC),-fno-stack-protector) -CFLAGS += $(call test-gcc-flag,$(CC),-fno-stack-protector-all) +CFLAGS += $(call cc-option,$(CC),-nopie,) +CFLAGS += $(call cc-option,$(CC),-fno-stack-protector,) +CFLAGS += $(call cc-option,$(CC),-fno-stack-protector-all,) ifeq ($(TARGET_SUBARCH)$(pae),x86_32y) -CFLAGS += -DCONFIG_X86_PAE=1 +CFLAGS += -DCONFIG_X86_PAE=1 endif ifeq ($(supervisor_mode_kernel),y) -CFLAGS += -DCONFIG_X86_SUPERVISOR_MODE_KERNEL=1 +CFLAGS += -DCONFIG_X86_SUPERVISOR_MODE_KERNEL=1 endif ifeq ($(XEN_TARGET_ARCH),x86_32) -LDFLAGS += -m elf_i386 x86_32 := y x86_64 := n endif ifeq ($(TARGET_SUBARCH),x86_64) -CFLAGS += -mno-red-zone -fpic -fno-reorder-blocks -CFLAGS += -fno-asynchronous-unwind-tables +CFLAGS += -mno-red-zone -fpic -fno-reorder-blocks +CFLAGS += -fno-asynchronous-unwind-tables # -fvisibility=hidden reduces -fpic cost, if it's available -CFLAGS += $(shell $(CC) -v --help 2>&1 | grep " -fvisibility=" | \ - grep -q hidden && echo "-DGCC_HAS_VISIBILITY_ATTRIBUTE") -LDFLAGS += -m elf_x86_64 +CFLAGS += $(call cc-option,$(CC),-fvisibility=hidden,) +CFLAGS := $(subst -fvisibility=hidden,-DGCC_HAS_VISIBILITY_ATTRIBUTE,$(CFLAGS)) x86_32 := n x86_64 := y endif diff -r d5a46e4cc340 -r 6492b9b27968 xen/arch/x86/boot/mkelf32.c --- a/xen/arch/x86/boot/mkelf32.c Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/arch/x86/boot/mkelf32.c Sun Oct 22 15:23:52 2006 -0600 @@ -90,6 +90,11 @@ static Elf32_Shdr out_shdr[] = { } }; +/* Some system header files define these macros and pollute our namespace. */ +#undef swap16 +#undef swap32 +#undef swap64 + #define swap16(_v) ((((u16)(_v)>>8)&0xff)|(((u16)(_v)&0xff)<<8)) #define swap32(_v) (((u32)swap16((u16)(_v))<<16)|(u32)swap16((u32)((_v)>>16))) #define swap64(_v) (((u64)swap32((u32)(_v))<<32)|(u64)swap32((u32)((_v)>>32))) diff -r d5a46e4cc340 -r 6492b9b27968 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/arch/x86/domain.c Sun Oct 22 15:23:52 2006 -0600 @@ -1003,6 +1003,20 @@ void arch_dump_domain_info(struct domain } } +void arch_dump_vcpu_info(struct vcpu *v) +{ + if ( shadow_mode_enabled(v->domain) ) + { + if ( v->arch.shadow.mode ) + printk(" shadowed %u-on-%u, %stranslated\n", + v->arch.shadow.mode->guest_levels, + v->arch.shadow.mode->shadow_levels, + shadow_vcpu_mode_translate(v) ? "" : "not "); + else + printk(" not shadowed\n"); + } +} + /* * Local variables: * mode: C diff -r d5a46e4cc340 -r 6492b9b27968 xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/arch/x86/domain_build.c Sun Oct 22 15:23:52 2006 -0600 @@ -286,7 +286,8 @@ int construct_dom0(struct domain *d, nr_pages = avail_domheap_pages() + initial_images_nrpages() + dom0_nrpages; else - nr_pages = dom0_nrpages; + nr_pages = min(avail_domheap_pages() + initial_images_nrpages(), + (unsigned long)dom0_nrpages); if ( (rc = parseelfimage(&dsi)) != 0 ) return rc; @@ -668,6 +669,8 @@ int construct_dom0(struct domain *d, if ( opt_dom0_max_vcpus == 0 ) opt_dom0_max_vcpus = num_online_cpus(); + if ( opt_dom0_max_vcpus > num_online_cpus() ) + opt_dom0_max_vcpus = num_online_cpus(); if ( opt_dom0_max_vcpus > MAX_VIRT_CPUS ) opt_dom0_max_vcpus = MAX_VIRT_CPUS; printk("Dom0 has maximum %u VCPUs\n", opt_dom0_max_vcpus); diff -r d5a46e4cc340 -r 6492b9b27968 xen/arch/x86/hvm/Makefile --- a/xen/arch/x86/hvm/Makefile Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/arch/x86/hvm/Makefile Sun Oct 22 15:23:52 2006 -0600 @@ -4,6 +4,7 @@ obj-y += hvm.o obj-y += hvm.o obj-y += i8254.o obj-y += i8259.o +obj-y += rtc.o obj-y += instrlen.o obj-y += intercept.o obj-y += io.o diff -r d5a46e4cc340 -r 6492b9b27968 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/arch/x86/hvm/hvm.c Sun Oct 22 15:23:52 2006 -0600 @@ -40,8 +40,10 @@ #include <asm/processor.h> #include <asm/types.h> #include <asm/msr.h> +#include <asm/mc146818rtc.h> #include <asm/spinlock.h> #include <asm/hvm/hvm.h> +#include <asm/hvm/vpit.h> #include <asm/hvm/support.h> #include <public/sched.h> #include <public/hvm/ioreq.h> @@ -277,6 +279,7 @@ void hvm_setup_platform(struct domain* d init_timer(&platform->pl_time.periodic_tm.timer, pt_timer_fn, v, v->processor); pit_init(v, cpu_khz); + rtc_init(v, RTC_PORT(0), RTC_IRQ); } void pic_irq_request(void *data, int level) @@ -368,7 +371,7 @@ void hvm_hlt(unsigned long rflags) { struct vcpu *v = current; struct periodic_time *pt = &v->domain->arch.hvm_domain.pl_time.periodic_tm; - s_time_t next_pit = -1, next_wakeup; + s_time_t next_pt = -1, next_wakeup; /* * If we halt with interrupts disabled, that's a pretty sure sign that we @@ -379,10 +382,10 @@ void hvm_hlt(unsigned long rflags) return hvm_vcpu_down(); if ( !v->vcpu_id ) - next_pit = get_scheduled(v, pt->irq, pt); + next_pt = get_scheduled(v, pt->irq, pt); next_wakeup = get_apictime_scheduled(v); - if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 ) - next_wakeup = next_pit; + if ( (next_pt != -1 && next_pt < next_wakeup) || next_wakeup == -1 ) + next_wakeup = next_pt; if ( next_wakeup != - 1 ) set_timer(¤t->arch.hvm_vcpu.hlt_timer, next_wakeup); do_sched_op_compat(SCHEDOP_block, 0); diff -r d5a46e4cc340 -r 6492b9b27968 xen/arch/x86/hvm/i8254.c --- a/xen/arch/x86/hvm/i8254.c Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/arch/x86/hvm/i8254.c Sun Oct 22 15:23:52 2006 -0600 @@ -49,7 +49,6 @@ #define RW_STATE_WORD0 3 #define RW_STATE_WORD1 4 -#define ticks_per_sec(v) (v->domain->arch.hvm_domain.tsc_frequency) static int handle_pit_io(ioreq_t *p); static int handle_speaker_io(ioreq_t *p); @@ -77,17 +76,6 @@ uint64_t muldiv64(uint64_t a, uint32_t b return res.ll; } -/* - * get processor time. - * unit: TSC - */ -int64_t hvm_get_clock(struct vcpu *v) -{ - uint64_t gtsc; - gtsc = hvm_get_guest_time(v); - return gtsc; -} - static int pit_get_count(PITChannelState *s) { uint64_t d; @@ -215,11 +203,11 @@ static inline void pit_load_count(PITCha switch (s->mode) { case 2: /* create periodic time */ - s->pt = create_periodic_time (s, period, 0, 0); + s->pt = create_periodic_time (period, 0, 0, pit_time_fired, s); break; case 1: /* create one shot time */ - s->pt = create_periodic_time (s, period, 0, 1); + s->pt = create_periodic_time (period, 0, 1, pit_time_fired, s); #ifdef DEBUG_PIT printk("HVM_PIT: create one shot time.\n"); #endif diff -r d5a46e4cc340 -r 6492b9b27968 xen/arch/x86/hvm/i8259.c --- a/xen/arch/x86/hvm/i8259.c Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/arch/x86/hvm/i8259.c Sun Oct 22 15:23:52 2006 -0600 @@ -498,19 +498,19 @@ void pic_init(struct hvm_virpic *s, void static int intercept_pic_io(ioreq_t *p) { - struct hvm_virpic *pic; - struct vcpu *v = current; + struct hvm_virpic *pic; uint32_t data; unsigned long flags; - - if ( p->size != 1 || p->count != 1) { + + if ( p->size != 1 || p->count != 1 ) { printk("PIC_IO wrong access size %d!\n", (int)p->size); return 1; } - pic = &v->domain->arch.hvm_domain.vpic; - if ( p->dir == 0 ) { - if (p->pdata_valid) - (void)hvm_copy_from_guest_virt( + + pic = ¤t->domain->arch.hvm_domain.vpic; + if ( p->dir == IOREQ_WRITE ) { + if ( p->pdata_valid ) + (void)hvm_copy_from_guest_phys( &data, (unsigned long)p->u.pdata, p->size); else data = p->u.data; @@ -524,10 +524,10 @@ static int intercept_pic_io(ioreq_t *p) data = pic_ioport_read( (void*)&pic->pics[p->addr>>7], (uint32_t) p->addr); spin_unlock_irqrestore(&pic->lock, flags); - if (p->pdata_valid) - (void)hvm_copy_to_guest_virt( + if ( p->pdata_valid ) + (void)hvm_copy_to_guest_phys( (unsigned long)p->u.pdata, &data, p->size); - else + else p->u.data = (u64)data; } return 1; @@ -535,42 +535,41 @@ static int intercept_pic_io(ioreq_t *p) static int intercept_elcr_io(ioreq_t *p) { - struct hvm_virpic *s; - struct vcpu *v = current; + struct hvm_virpic *s; uint32_t data; unsigned long flags; - + if ( p->size != 1 || p->count != 1 ) { printk("PIC_IO wrong access size %d!\n", (int)p->size); return 1; } - s = &v->domain->arch.hvm_domain.vpic; - if ( p->dir == 0 ) { - if (p->pdata_valid) - (void)hvm_copy_from_guest_virt( + s = ¤t->domain->arch.hvm_domain.vpic; + if ( p->dir == IOREQ_WRITE ) { + if ( p->pdata_valid ) + (void)hvm_copy_from_guest_phys( &data, (unsigned long)p->u.pdata, p->size); else data = p->u.data; spin_lock_irqsave(&s->lock, flags); elcr_ioport_write((void*)&s->pics[p->addr&1], (uint32_t) p->addr, (uint32_t)( data & 0xff)); - get_sp(current->domain)->sp_global.pic_elcr = + get_sp(current->domain)->sp_global.pic_elcr = s->pics[0].elcr | ((u16)s->pics[1].elcr << 8); spin_unlock_irqrestore(&s->lock, flags); } else { data = (u64) elcr_ioport_read( (void*)&s->pics[p->addr&1], (uint32_t) p->addr); - if (p->pdata_valid) - (void)hvm_copy_to_guest_virt( + if ( p->pdata_valid ) + (void)hvm_copy_to_guest_phys( (unsigned long)p->u.pdata, &data, p->size); - else + else p->u.data = (u64)data; - } return 1; } + void register_pic_io_hook (void) { register_portio_handler(0x20, 2, intercept_pic_io); @@ -599,23 +598,47 @@ int cpu_get_pic_interrupt(struct vcpu *v return intno; } -int is_pit_irq(struct vcpu *v, int irq, int type) -{ - int pit_vec; - - if (type == APIC_DM_EXTINT) - pit_vec = v->domain->arch.hvm_domain.vpic.pics[0].irq_base; - else - pit_vec = - v->domain->arch.hvm_domain.vioapic.redirtbl[0].RedirForm.vector; - - return (irq == pit_vec); +int is_periodic_irq(struct vcpu *v, int irq, int type) +{ + int vec; + struct periodic_time *pt = + &(v->domain->arch.hvm_domain.pl_time.periodic_tm); + struct RTCState *vrtc = + &(v->domain->arch.hvm_domain.pl_time.vrtc); + + if (pt->irq == 0) { /* Is it pit irq? */ + if (type == APIC_DM_EXTINT) + vec = v->domain->arch.hvm_domain.vpic.pics[0].irq_base; + else + vec = + v->domain->arch.hvm_domain.vioapic.redirtbl[0].RedirForm.vector; + + if (irq == vec) + return 1; + } + + if (pt->irq == 8) { /* Or rtc irq? */ + if (type == APIC_DM_EXTINT) + vec = v->domain->arch.hvm_domain.vpic.pics[1].irq_base; + else + vec = + v->domain->arch.hvm_domain.vioapic.redirtbl[8].RedirForm.vector; + + if (irq == vec) + return is_rtc_periodic_irq(vrtc); + } + + return 0; } int is_irq_enabled(struct vcpu *v, int irq) { + struct hvm_vioapic *vioapic = &v->domain->arch.hvm_domain.vioapic; struct hvm_virpic *vpic=&v->domain->arch.hvm_domain.vpic; - + + if (vioapic->redirtbl[irq].RedirForm.mask == 0) + return 1; + if ( irq & 8 ) { return !( (1 << (irq&7)) & vpic->pics[1].imr); } diff -r d5a46e4cc340 -r 6492b9b27968 xen/arch/x86/hvm/intercept.c --- a/xen/arch/x86/hvm/intercept.c Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/arch/x86/hvm/intercept.c Sun Oct 22 15:23:52 2006 -0600 @@ -61,49 +61,39 @@ static inline void hvm_mmio_access(struc hvm_mmio_read_t read_handler, hvm_mmio_write_t write_handler) { - ioreq_t *req; - vcpu_iodata_t *vio = get_vio(v->domain, v->vcpu_id); unsigned int tmp1, tmp2; unsigned long data; - if (vio == NULL) { - printk("vlapic_access: bad shared page\n"); - domain_crash_synchronous(); - } - - req = &vio->vp_ioreq; - - switch (req->type) { + switch ( p->type ) { case IOREQ_TYPE_COPY: { - int sign = (req->df) ? -1 : 1, i; - - if (!req->pdata_valid) { - if (req->dir == IOREQ_READ){ - req->u.data = read_handler(v, req->addr, req->size); - } else { /* req->dir != IOREQ_READ */ - write_handler(v, req->addr, req->size, req->u.data); - } - } else { /* !req->pdata_valid */ - if (req->dir == IOREQ_READ) { - for (i = 0; i < req->count; i++) { + if ( !p->pdata_valid ) { + if ( p->dir == IOREQ_READ ) + p->u.data = read_handler(v, p->addr, p->size); + else /* p->dir == IOREQ_WRITE */ + write_handler(v, p->addr, p->size, p->u.data); + } else { /* !p->pdata_valid */ + int i, sign = (p->df) ? -1 : 1; + + if ( p->dir == IOREQ_READ ) { + for ( i = 0; i < p->count; i++ ) { data = read_handler(v, - req->addr + (sign * i * req->size), - req->size); - (void)hvm_copy_to_guest_virt( - (unsigned long)p->u.pdata + (sign * i * req->size), + p->addr + (sign * i * p->size), + p->size); + (void)hvm_copy_to_guest_phys( + (unsigned long)p->u.pdata + (sign * i * p->size), &data, p->size); } - } else { /* !req->dir == IOREQ_READ */ - for (i = 0; i < req->count; i++) { - (void)hvm_copy_from_guest_virt( + } else {/* p->dir == IOREQ_WRITE */ + for ( i = 0; i < p->count; i++ ) { + (void)hvm_copy_from_guest_phys( &data, - (unsigned long)p->u.pdata + (sign * i * req->size), + (unsigned long)p->u.pdata + (sign * i * p->size), p->size); write_handler(v, - req->addr + (sign * i * req->size), - req->size, data); + p->addr + (sign * i * p->size), + p->size, data); } } } @@ -111,44 +101,44 @@ static inline void hvm_mmio_access(struc } case IOREQ_TYPE_AND: - tmp1 = read_handler(v, req->addr, req->size); - if (req->dir == IOREQ_WRITE) { - tmp2 = tmp1 & (unsigned long) req->u.data; - write_handler(v, req->addr, req->size, tmp2); - } - req->u.data = tmp1; + tmp1 = read_handler(v, p->addr, p->size); + if ( p->dir == IOREQ_WRITE ) { + tmp2 = tmp1 & (unsigned long) p->u.data; + write_handler(v, p->addr, p->size, tmp2); + } + p->u.data = tmp1; break; case IOREQ_TYPE_OR: - tmp1 = read_handler(v, req->addr, req->size); - if (req->dir == IOREQ_WRITE) { - tmp2 = tmp1 | (unsigned long) req->u.data; - write_handler(v, req->addr, req->size, tmp2); - } - req->u.data = tmp1; + tmp1 = read_handler(v, p->addr, p->size); + if ( p->dir == IOREQ_WRITE ) { + tmp2 = tmp1 | (unsigned long) p->u.data; + write_handler(v, p->addr, p->size, tmp2); + } + p->u.data = tmp1; break; case IOREQ_TYPE_XOR: - tmp1 = read_handler(v, req->addr, req->size); - if (req->dir == IOREQ_WRITE) { - tmp2 = tmp1 ^ (unsigned long) req->u.data; - write_handler(v, req->addr, req->size, tmp2); - } - req->u.data = tmp1; + tmp1 = read_handler(v, p->addr, p->size); + if ( p->dir == IOREQ_WRITE ) { + tmp2 = tmp1 ^ (unsigned long) p->u.data; + write_handler(v, p->addr, p->size, tmp2); + } + p->u.data = tmp1; break; case IOREQ_TYPE_XCHG: - /* + /* * Note that we don't need to be atomic here since VCPU is accessing * its own local APIC. */ - tmp1 = read_handler(v, req->addr, req->size); - write_handler(v, req->addr, req->size, (unsigned long) req->u.data); - req->u.data = tmp1; + tmp1 = read_handler(v, p->addr, p->size); + write_handler(v, p->addr, p->size, (unsigned long) p->u.data); + p->u.data = tmp1; break; default: - printk("error ioreq type for local APIC %x\n", req->type); + printk("hvm_mmio_access: error ioreq type %x\n", p->type); domain_crash_synchronous(); break; } @@ -270,12 +260,11 @@ int register_io_handler(unsigned long ad return 1; } -/* hooks function for the HLT instruction emulation wakeup */ +/* Hook function for the HLT instruction emulation wakeup. */ void hlt_timer_fn(void *data) { struct vcpu *v = data; - - hvm_prod_vcpu(v); + vcpu_kick(v); } static __inline__ void missed_ticks(struct periodic_time *pt) @@ -325,17 +314,14 @@ void pickup_deactive_ticks(struct period * period: fire frequency in ns. */ struct periodic_time * create_periodic_time( - PITChannelState *s, u32 period, char irq, - char one_shot) -{ - struct vcpu *v = s->vcpu; - struct periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm); + char one_shot, + time_cb *cb, + void *data) +{ + struct periodic_time *pt = &(current->domain->arch.hvm_domain.pl_time.periodic_tm); if ( pt->enabled ) { - if ( v->vcpu_id != 0 ) { - printk("HVM_PIT: start 2nd periodic time on non BSP!\n"); - } stop_timer (&pt->timer); pt->enabled = 0; } @@ -355,7 +341,8 @@ struct periodic_time * create_periodic_t pt->scheduled = NOW() + period; set_timer (&pt->timer,pt->scheduled); pt->enabled = 1; - pt->priv = s; + pt->cb = cb; + pt->priv = data; return pt; } diff -r d5a46e4cc340 -r 6492b9b27968 xen/arch/x86/hvm/io.c --- a/xen/arch/x86/hvm/io.c Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/arch/x86/hvm/io.c Sun Oct 22 15:23:52 2006 -0600 @@ -369,18 +369,18 @@ static void hvm_pio_assist(struct cpu_us { if ( pio_opp->flags & REPZ ) regs->ecx -= p->count; + if ( p->dir == IOREQ_READ ) { - regs->edi += sign * p->count * p->size; if ( pio_opp->flags & OVERLAP ) { - unsigned long addr = regs->edi; - if (hvm_realmode(current)) - addr += regs->es << 4; - if (sign > 0) - addr -= p->size; - (void)hvm_copy_to_guest_virt(addr, &p->u.data, p->size); + unsigned long addr = pio_opp->addr; + if ( hvm_paging_enabled(current) ) + (void)hvm_copy_to_guest_virt(addr, &p->u.data, p->size); + else + (void)hvm_copy_to_guest_phys(addr, &p->u.data, p->size); } + regs->edi += sign * p->count * p->size; } else /* p->dir == IOREQ_WRITE */ { @@ -485,19 +485,22 @@ static void hvm_mmio_assist(struct cpu_u case INSTR_MOVS: sign = p->df ? -1 : 1; + + if (mmio_opp->flags & REPZ) + regs->ecx -= p->count; + + if ((mmio_opp->flags & OVERLAP) && p->dir == IOREQ_READ) { + unsigned long addr = mmio_opp->addr; + + if (hvm_paging_enabled(current)) + (void)hvm_copy_to_guest_virt(addr, &p->u.data, p->size); + else + (void)hvm_copy_to_guest_phys(addr, &p->u.data, p->size); + } + regs->esi += sign * p->count * p->size; regs->edi += sign * p->count * p->size; - if ((mmio_opp->flags & OVERLAP) && p->dir == IOREQ_READ) { - unsigned long addr = regs->edi; - - if (sign > 0) - addr -= p->size; - (void)hvm_copy_to_guest_virt(addr, &p->u.data, p->size); - } - - if (mmio_opp->flags & REPZ) - regs->ecx -= p->count; break; case INSTR_STOS: @@ -680,7 +683,7 @@ void hvm_interrupt_post(struct vcpu *v, struct periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm); - if ( is_pit_irq(v, vector, type) ) { + if ( pt->enabled && is_periodic_irq(v, vector, type) ) { if ( !pt->first_injected ) { pt->pending_intr_nr = 0; pt->last_plt_gtime = hvm_get_guest_time(v); @@ -691,8 +694,9 @@ void hvm_interrupt_post(struct vcpu *v, pt->pending_intr_nr--; pt->last_plt_gtime += pt->period_cycles; hvm_set_guest_time(v, pt->last_plt_gtime); - pit_time_fired(v, pt->priv); - } + } + if (pt->cb) + pt->cb(v, pt->priv); } switch(type) { diff -r d5a46e4cc340 -r 6492b9b27968 xen/arch/x86/hvm/platform.c --- a/xen/arch/x86/hvm/platform.c Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/arch/x86/hvm/platform.c Sun Oct 22 15:23:52 2006 -0600 @@ -30,6 +30,7 @@ #include <asm/regs.h> #include <asm/hvm/hvm.h> #include <asm/hvm/support.h> +#include <asm/hvm/io.h> #include <public/hvm/ioreq.h> #include <xen/lib.h> @@ -39,10 +40,13 @@ #define DECODE_success 1 #define DECODE_failure 0 +#define mk_operand(size_reg, index, seg, flag) \ + (((size_reg) << 24) | ((index) << 16) | ((seg) << 8) | (flag)) + #if defined (__x86_64__) static inline long __get_reg_value(unsigned long reg, int size) { - switch(size) { + switch ( size ) { case BYTE_64: return (char)(reg & 0xFF); case WORD: @@ -59,8 +63,8 @@ static inline long __get_reg_value(unsig long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs) { - if (size == BYTE) { - switch (index) { + if ( size == BYTE ) { + switch ( index ) { case 0: /* %al */ return (char)(regs->rax & 0xFF); case 1: /* %cl */ @@ -84,7 +88,7 @@ long get_reg_value(int size, int index, /* NOTREACHED */ } - switch (index) { + switch ( index ) { case 0: return __get_reg_value(regs->rax, size); case 1: return __get_reg_value(regs->rcx, size); case 2: return __get_reg_value(regs->rdx, size); @@ -109,7 +113,7 @@ long get_reg_value(int size, int index, #elif defined (__i386__) static inline long __get_reg_value(unsigned long reg, int size) { - switch(size) { + switch ( size ) { case WORD: return (short)(reg & 0xFFFF); case LONG: @@ -122,8 +126,8 @@ static inline long __get_reg_value(unsig long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs) { - if (size == BYTE) { - switch (index) { + if ( size == BYTE ) { + switch ( index ) { case 0: /* %al */ return (char)(regs->eax & 0xFF); case 1: /* %cl */ @@ -146,7 +150,7 @@ long get_reg_value(int size, int index, } } - switch (index) { + switch ( index ) { case 0: return __get_reg_value(regs->eax, size); case 1: return __get_reg_value(regs->ecx, size); case 2: return __get_reg_value(regs->edx, size); @@ -163,19 +167,21 @@ long get_reg_value(int size, int index, #endif static inline unsigned char *check_prefix(unsigned char *inst, - struct instruction *thread_inst, unsigned char *rex_p) -{ - while (1) { - switch (*inst) { + struct hvm_io_op *mmio_op, + unsigned char *op_size, + unsigned char *rex_p) +{ + while ( 1 ) { + switch ( *inst ) { /* rex prefix for em64t instructions */ case 0x40 ... 0x4e: *rex_p = *inst; break; case 0xf3: /* REPZ */ - thread_inst->flags = REPZ; + mmio_op->flags = REPZ; break; case 0xf2: /* REPNZ */ - thread_inst->flags = REPNZ; + mmio_op->flags = REPNZ; break; case 0xf0: /* LOCK */ break; @@ -185,10 +191,10 @@ static inline unsigned char *check_prefi case 0x26: /* ES */ case 0x64: /* FS */ case 0x65: /* GS */ - thread_inst->seg_sel = *inst; + //mmio_op->seg_sel = *inst; break; case 0x66: /* 32bit->16bit */ - thread_inst->op_size = WORD; + *op_size = WORD; break; case 0x67: break; @@ -199,7 +205,7 @@ static inline unsigned char *check_prefi } } -static inline unsigned long get_immediate(int op16,const unsigned char *inst, int op_size) +static inline unsigned long get_immediate(int op16, const unsigned char *inst, int op_size) { int mod, reg, rm; unsigned long val = 0; @@ -210,14 +216,14 @@ static inline unsigned long get_immediat rm = *inst & 7; inst++; //skip ModR/M byte - if (mod != 3 && rm == 4) { + if ( mod != 3 && rm == 4 ) { inst++; //skip SIB byte } - switch(mod) { + switch ( mod ) { case 0: - if (rm == 5 || rm == 4) { - if (op16) + if ( rm == 5 || rm == 4 ) { + if ( op16 ) inst = inst + 2; //disp16, skip 2 bytes else inst = inst + 4; //disp32, skip 4 bytes @@ -227,17 +233,17 @@ static inline unsigned long get_immediat inst++; //disp8, skip 1 byte break; case 2: - if (op16) + if ( op16 ) inst = inst + 2; //disp16, skip 2 bytes else inst = inst + 4; //disp32, skip 4 bytes break; } - if (op_size == QUAD) + if ( op_size == QUAD ) op_size = LONG; - for (i = 0; i < op_size; i++) { + for ( i = 0; i < op_size; i++ ) { val |= (*inst++ & 0xff) << (8 * i); } @@ -257,7 +263,7 @@ static inline int get_index(const unsign rex_b = rex & 1; //Only one operand in the instruction is register - if (mod == 3) { + if ( mod == 3 ) { return (rm + (rex_b << 3)); } else { return (reg + (rex_r << 3)); @@ -265,53 +271,52 @@ static inline int get_index(const unsign return 0; } -static void init_instruction(struct instruction *mmio_inst) -{ - mmio_inst->instr = 0; - mmio_inst->op_size = 0; - mmio_inst->immediate = 0; - mmio_inst->seg_sel = 0; - - mmio_inst->operand[0] = 0; - mmio_inst->operand[1] = 0; - - mmio_inst->flags = 0; -} - -#define GET_OP_SIZE_FOR_BYTE(op_size) \ +static void init_instruction(struct hvm_io_op *mmio_op) +{ + mmio_op->instr = 0; + + mmio_op->flags = 0; + //mmio_op->seg_sel = 0; + + mmio_op->operand[0] = 0; + mmio_op->operand[1] = 0; + mmio_op->immediate = 0; +} + +#define GET_OP_SIZE_FOR_BYTE(size_reg) \ do { \ - if (rex) \ - op_size = BYTE_64; \ + if ( rex ) \ + (size_reg) = BYTE_64; \ else \ - op_size = BYTE; \ - } while(0) + (size_reg) = BYTE; \ + } while( 0 ) #define GET_OP_SIZE_FOR_NONEBYTE(op_size) \ do { \ - if (rex & 0x8) \ - op_size = QUAD; \ - else if (op_size != WORD) \ - op_size = LONG; \ - } while(0) + if ( rex & 0x8 ) \ + (op_size) = QUAD; \ + else if ( (op_size) != WORD ) \ + (op_size) = LONG; \ + } while( 0 ) /* * Decode mem,accumulator operands (as in <opcode> m8/m16/m32, al,ax,eax) */ -static int mem_acc(unsigned char size, struct instruction *instr) -{ - instr->operand[0] = mk_operand(size, 0, 0, MEMORY); - instr->operand[1] = mk_operand(size, 0, 0, REGISTER); +static inline int mem_acc(unsigned char size, struct hvm_io_op *mmio) +{ + mmio->operand[0] = mk_operand(size, 0, 0, MEMORY); + mmio->operand[1] = mk_operand(size, 0, 0, REGISTER); return DECODE_success; } /* * Decode accumulator,mem operands (as in <opcode> al,ax,eax, m8/m16/m32) */ -static int acc_mem(unsigned char size, struct instruction *instr) -{ - instr->operand[0] = mk_operand(size, 0, 0, REGISTER); - instr->operand[1] = mk_operand(size, 0, 0, MEMORY); +static inline int acc_mem(unsigned char size, struct hvm_io_op *mmio) +{ + mmio->operand[0] = mk_operand(size, 0, 0, REGISTER); + mmio->operand[1] = mk_operand(size, 0, 0, MEMORY); return DECODE_success; } @@ -319,12 +324,12 @@ static int acc_mem(unsigned char size, s * Decode mem,reg operands (as in <opcode> r32/16, m32/16) */ static int mem_reg(unsigned char size, unsigned char *opcode, - struct instruction *instr, unsigned char rex) + struct hvm_io_op *mmio_op, unsigned char rex) { int index = get_index(opcode + 1, rex); - instr->operand[0] = mk_operand(size, 0, 0, MEMORY); - instr->operand[1] = mk_operand(size, index, 0, REGISTER); + mmio_op->operand[0] = mk_operand(size, 0, 0, MEMORY); + mmio_op->operand[1] = mk_operand(size, index, 0, REGISTER); return DECODE_success; } @@ -332,263 +337,273 @@ static int mem_reg(unsigned char size, u * Decode reg,mem operands (as in <opcode> m32/16, r32/16) */ static int reg_mem(unsigned char size, unsigned char *opcode, - struct instruction *instr, unsigned char rex) + struct hvm_io_op *mmio_op, unsigned char rex) { int index = get_index(opcode + 1, rex); - instr->operand[0] = mk_operand(size, index, 0, REGISTER); - instr->operand[1] = mk_operand(size, 0, 0, MEMORY); + mmio_op->operand[0] = mk_operand(size, index, 0, REGISTER); + mmio_op->operand[1] = mk_operand(size, 0, 0, MEMORY); return DECODE_success; } -static int hvm_decode(int realmode, unsigned char *opcode, struct instruction *instr) +static int hvm_decode(int realmode, unsigned char *opcode, + struct hvm_io_op *mmio_op, unsigned char *op_size) { unsigned char size_reg = 0; unsigned char rex = 0; int index; - init_instruction(instr); - - opcode = check_prefix(opcode, instr, &rex); - - if (realmode) { /* meaning is reversed */ - if (instr->op_size == WORD) - instr->op_size = LONG; - else if (instr->op_size == LONG) - instr->op_size = WORD; - else if (instr->op_size == 0) - instr->op_size = WORD; - } - - switch (*opcode) { - case 0x0A: /* or r8, m8 */ - instr->instr = INSTR_OR; - instr->op_size = BYTE; - GET_OP_SIZE_FOR_BYTE(size_reg); - return mem_reg(size_reg, opcode, instr, rex); + *op_size = 0; + init_instruction(mmio_op); + + opcode = check_prefix(opcode, mmio_op, op_size, &rex); + + if ( realmode ) { /* meaning is reversed */ + if ( *op_size == WORD ) + *op_size = LONG; + else if ( *op_size == LONG ) + *op_size = WORD; + else if ( *op_size == 0 ) + *op_size = WORD; + } + + /* the operands order in comments conforms to AT&T convention */ + + switch ( *opcode ) { + case 0x0A: /* or m8, r8 */ + mmio_op->instr = INSTR_OR; + *op_size = BYTE; + GET_OP_SIZE_FOR_BYTE(size_reg); + return mem_reg(size_reg, opcode, mmio_op, rex); case 0x0B: /* or m32/16, r32/16 */ - instr->instr = INSTR_OR; - GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); - return mem_reg(instr->op_size, opcode, instr, rex); + mmio_op->instr = INSTR_OR; + GET_OP_SIZE_FOR_NONEBYTE(*op_size); + return mem_reg(*op_size, opcode, mmio_op, rex); case 0x20: /* and r8, m8 */ - instr->instr = INSTR_AND; - instr->op_size = BYTE; - GET_OP_SIZE_FOR_BYTE(size_reg); - return reg_mem(size_reg, opcode, instr, rex); + mmio_op->instr = INSTR_AND; + *op_size = BYTE; + GET_OP_SIZE_FOR_BYTE(size_reg); + return reg_mem(size_reg, opcode, mmio_op, rex); case 0x21: /* and r32/16, m32/16 */ - instr->instr = INSTR_AND; - GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); - return reg_mem(instr->op_size, opcode, instr, rex); + mmio_op->instr = INSTR_AND; + GET_OP_SIZE_FOR_NONEBYTE(*op_size); + return reg_mem(*op_size, opcode, mmio_op, rex); case 0x22: /* and m8, r8 */ - instr->instr = INSTR_AND; - instr->op_size = BYTE; - GET_OP_SIZE_FOR_BYTE(size_reg); - return mem_reg(size_reg, opcode, instr, rex); + mmio_op->instr = INSTR_AND; + *op_size = BYTE; + GET_OP_SIZE_FOR_BYTE(size_reg); + return mem_reg(size_reg, opcode, mmio_op, rex); case 0x23: /* and m32/16, r32/16 */ - instr->instr = INSTR_AND; - GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); - return mem_reg(instr->op_size, opcode, instr, rex); + mmio_op->instr = INSTR_AND; + GET_OP_SIZE_FOR_NONEBYTE(*op_size); + return mem_reg(*op_size, opcode, mmio_op, rex); case 0x2B: /* sub m32/16, r32/16 */ - instr->instr = INSTR_SUB; - GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); - return mem_reg(instr->op_size, opcode, instr, rex); + mmio_op->instr = INSTR_SUB; + GET_OP_SIZE_FOR_NONEBYTE(*op_size); + return mem_reg(*op_size, opcode, mmio_op, rex); case 0x30: /* xor r8, m8 */ - instr->instr = INSTR_XOR; - instr->op_size = BYTE; - GET_OP_SIZE_FOR_BYTE(size_reg); - return reg_mem(size_reg, opcode, instr, rex); + mmio_op->instr = INSTR_XOR; + *op_size = BYTE; + GET_OP_SIZE_FOR_BYTE(size_reg); + return reg_mem(size_reg, opcode, mmio_op, rex); case 0x31: /* xor r32/16, m32/16 */ - instr->instr = INSTR_XOR; - GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); - return reg_mem(instr->op_size, opcode, instr, rex); - - case 0x32: /* xor m8, r8*/ - instr->instr = INSTR_XOR; - instr->op_size = BYTE; - GET_OP_SIZE_FOR_BYTE(size_reg); - return mem_reg(size_reg, opcode, instr, rex); + mmio_op->instr = INSTR_XOR; + GET_OP_SIZE_FOR_NONEBYTE(*op_size); + return reg_mem(*op_size, opcode, mmio_op, rex); + + case 0x32: /* xor m8, r8 */ + mmio_op->instr = INSTR_XOR; + *op_size = BYTE; + GET_OP_SIZE_FOR_BYTE(size_reg); + return mem_reg(size_reg, opcode, mmio_op, rex); + + case 0x38: /* cmp r8, m8 */ + mmio_op->instr = INSTR_CMP; + *op_size = BYTE; + GET_OP_SIZE_FOR_BYTE(size_reg); + return reg_mem(size_reg, opcode, mmio_op, rex); case 0x39: /* cmp r32/16, m32/16 */ - instr->instr = INSTR_CMP; - GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); - return reg_mem(instr->op_size, opcode, instr, rex); - - case 0x3A: /* cmp r8, r8/m8 */ - instr->instr = INSTR_CMP; - GET_OP_SIZE_FOR_BYTE(instr->op_size); - return reg_mem(instr->op_size, opcode, instr, rex); + mmio_op->instr = INSTR_CMP; + GET_OP_SIZE_FOR_NONEBYTE(*op_size); + return reg_mem(*op_size, opcode, mmio_op, rex); + + case 0x3A: /* cmp m8, r8 */ + mmio_op->instr = INSTR_CMP; + *op_size = BYTE; + GET_OP_SIZE_FOR_BYTE(size_reg); + return mem_reg(size_reg, opcode, mmio_op, rex); case 0x3B: /* cmp m32/16, r32/16 */ - instr->instr = INSTR_CMP; - GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); - return mem_reg(instr->op_size, opcode, instr, rex); + mmio_op->instr = INSTR_CMP; + GET_OP_SIZE_FOR_NONEBYTE(*op_size); + return mem_reg(*op_size, opcode, mmio_op, rex); case 0x80: case 0x81: case 0x83: - { - unsigned char ins_subtype = (opcode[1] >> 3) & 7; - - if (opcode[0] == 0x80) { - GET_OP_SIZE_FOR_BYTE(size_reg); - instr->op_size = BYTE; - } else if (opcode[0] == 0x81) { - GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); - size_reg = instr->op_size; - } else if (opcode[0] == 0x83) { - GET_OP_SIZE_FOR_NONEBYTE(size_reg); - instr->op_size = size_reg; - } - - /* opcode 0x83 always has a single byte operand */ - if (opcode[0] == 0x83) - instr->immediate = - (signed char)get_immediate(realmode, opcode+1, BYTE); - else - instr->immediate = - get_immediate(realmode, opcode+1, instr->op_size); - - instr->operand[0] = mk_operand(size_reg, 0, 0, IMMEDIATE); - instr->operand[1] = mk_operand(size_reg, 0, 0, MEMORY); - - switch (ins_subtype) { - case 7: /* cmp $imm, m32/16 */ - instr->instr = INSTR_CMP; - return DECODE_success; - - case 1: /* or $imm, m32/16 */ - instr->instr = INSTR_OR; - return DECODE_success; - - default: - printk("%x/%x, This opcode isn't handled yet!\n", - *opcode, ins_subtype); - return DECODE_failure; - } - } - - case 0x84: /* test m8, r8 */ - instr->instr = INSTR_TEST; - instr->op_size = BYTE; - GET_OP_SIZE_FOR_BYTE(size_reg); - return mem_reg(size_reg, opcode, instr, rex); - - case 0x85: /* text m16/32, r16/32 */ - instr->instr = INSTR_TEST; - GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); - return mem_reg(instr->op_size, opcode, instr, rex); + { + unsigned char ins_subtype = (opcode[1] >> 3) & 7; + + if ( opcode[0] == 0x80 ) { + *op_size = BYTE; + GET_OP_SIZE_FOR_BYTE(size_reg); + } else { + GET_OP_SIZE_FOR_NONEBYTE(*op_size); + size_reg = *op_size; + } + + /* opcode 0x83 always has a single byte operand */ + if ( opcode[0] == 0x83 ) + mmio_op->immediate = + (signed char)get_immediate(realmode, opcode + 1, BYTE); + else + mmio_op->immediate = + get_immediate(realmode, opcode + 1, *op_size); + + mmio_op->operand[0] = mk_operand(size_reg, 0, 0, IMMEDIATE); + mmio_op->operand[1] = mk_operand(size_reg, 0, 0, MEMORY); + + switch ( ins_subtype ) { + case 7: /* cmp $imm, m32/16 */ + mmio_op->instr = INSTR_CMP; + return DECODE_success; + + case 1: /* or $imm, m32/16 */ + mmio_op->instr = INSTR_OR; + return DECODE_success; + + default: + printk("%x/%x, This opcode isn't handled yet!\n", + *opcode, ins_subtype); + return DECODE_failure; + } + } + + case 0x84: /* test r8, m8 */ + mmio_op->instr = INSTR_TEST; + *op_size = BYTE; + GET_OP_SIZE_FOR_BYTE(size_reg); + return reg_mem(size_reg, opcode, mmio_op, rex); + + case 0x85: /* test r16/32, m16/32 */ + mmio_op->instr = INSTR_TEST; + GET_OP_SIZE_FOR_NONEBYTE(*op_size); + return reg_mem(*op_size, opcode, mmio_op, rex); case 0x87: /* xchg {r/m16|r/m32}, {m/r16|m/r32} */ - instr->instr = INSTR_XCHG; - GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); - if (((*(opcode+1)) & 0xc7) == 5) - return reg_mem(instr->op_size, opcode, instr, rex); + mmio_op->instr = INSTR_XCHG; + GET_OP_SIZE_FOR_NONEBYTE(*op_size); + if ( ((*(opcode+1)) & 0xc7) == 5 ) + return reg_mem(*op_size, opcode, mmio_op, rex); else - return mem_reg(instr->op_size, opcode, instr, rex); + return mem_reg(*op_size, opcode, mmio_op, rex); case 0x88: /* mov r8, m8 */ - instr->instr = INSTR_MOV; - instr->op_size = BYTE; - GET_OP_SIZE_FOR_BYTE(size_reg); - return reg_mem(size_reg, opcode, instr, rex); + mmio_op->instr = INSTR_MOV; + *op_size = BYTE; + GET_OP_SIZE_FOR_BYTE(size_reg); + return reg_mem(size_reg, opcode, mmio_op, rex); case 0x89: /* mov r32/16, m32/16 */ - instr->instr = INSTR_MOV; - GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); - return reg_mem(instr->op_size, opcode, instr, rex); + mmio_op->instr = INSTR_MOV; + GET_OP_SIZE_FOR_NONEBYTE(*op_size); + return reg_mem(*op_size, opcode, mmio_op, rex); case 0x8A: /* mov m8, r8 */ - instr->instr = INSTR_MOV; - instr->op_size = BYTE; - GET_OP_SIZE_FOR_BYTE(size_reg); - return mem_reg(size_reg, opcode, instr, rex); + mmio_op->instr = INSTR_MOV; + *op_size = BYTE; + GET_OP_SIZE_FOR_BYTE(size_reg); + return mem_reg(size_reg, opcode, mmio_op, rex); case 0x8B: /* mov m32/16, r32/16 */ - instr->instr = INSTR_MOV; - GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); - return mem_reg(instr->op_size, opcode, instr, rex); + mmio_op->instr = INSTR_MOV; + GET_OP_SIZE_FOR_NONEBYTE(*op_size); + return mem_reg(*op_size, opcode, mmio_op, rex); case 0xA0: /* mov <addr>, al */ - instr->instr = INSTR_MOV; - instr->op_size = BYTE; - GET_OP_SIZE_FOR_BYTE(size_reg); - return mem_acc(size_reg, instr); + mmio_op->instr = INSTR_MOV; + *op_size = BYTE; + GET_OP_SIZE_FOR_BYTE(size_reg); + return mem_acc(size_reg, mmio_op); case 0xA1: /* mov <addr>, ax/eax */ - instr->instr = INSTR_MOV; - GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); - return mem_acc(instr->op_size, instr); + mmio_op->instr = INSTR_MOV; + GET_OP_SIZE_FOR_NONEBYTE(*op_size); + return mem_acc(*op_size, mmio_op); case 0xA2: /* mov al, <addr> */ - instr->instr = INSTR_MOV; - instr->op_size = BYTE; - GET_OP_SIZE_FOR_BYTE(size_reg); - return acc_mem(size_reg, instr); + mmio_op->instr = INSTR_MOV; + *op_size = BYTE; + GET_OP_SIZE_FOR_BYTE(size_reg); + return acc_mem(size_reg, mmio_op); case 0xA3: /* mov ax/eax, <addr> */ - instr->instr = INSTR_MOV; - GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); - return acc_mem(instr->op_size, instr); + mmio_op->instr = INSTR_MOV; + GET_OP_SIZE_FOR_NONEBYTE(*op_size); + return acc_mem(*op_size, mmio_op); case 0xA4: /* movsb */ - instr->instr = INSTR_MOVS; - instr->op_size = BYTE; + mmio_op->instr = INSTR_MOVS; + *op_size = BYTE; return DECODE_success; case 0xA5: /* movsw/movsl */ - instr->instr = INSTR_MOVS; - GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); + mmio_op->instr = INSTR_MOVS; + GET_OP_SIZE_FOR_NONEBYTE(*op_size); return DECODE_success; case 0xAA: /* stosb */ - instr->instr = INSTR_STOS; - instr->op_size = BYTE; + mmio_op->instr = INSTR_STOS; + *op_size = BYTE; return DECODE_success; case 0xAB: /* stosw/stosl */ - instr->instr = INSTR_STOS; - GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); + mmio_op->instr = INSTR_STOS; + GET_OP_SIZE_FOR_NONEBYTE(*op_size); return DECODE_success; case 0xAC: /* lodsb */ - instr->instr = INSTR_LODS; - instr->op_size = BYTE; + mmio_op->instr = INSTR_LODS; + *op_size = BYTE; return DECODE_success; case 0xAD: /* lodsw/lodsl */ - instr->instr = INSTR_LODS; - GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); + mmio_op->instr = INSTR_LODS; + GET_OP_SIZE_FOR_NONEBYTE(*op_size); return DECODE_success; case 0xC6: - if (((opcode[1] >> 3) & 7) == 0) { /* mov $imm8, m8 */ - instr->instr = INSTR_MOV; - instr->op_size = BYTE; - - instr->operand[0] = mk_operand(instr->op_size, 0, 0, IMMEDIATE); - instr->immediate = get_immediate(realmode, opcode+1, instr->op_size); - instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY); + if ( ((opcode[1] >> 3) & 7) == 0 ) { /* mov $imm8, m8 */ + mmio_op->instr = INSTR_MOV; + *op_size = BYTE; + + mmio_op->operand[0] = mk_operand(*op_size, 0, 0, IMMEDIATE); + mmio_op->immediate = + get_immediate(realmode, opcode + 1, *op_size); + mmio_op->operand[1] = mk_operand(*op_size, 0, 0, MEMORY); return DECODE_success; } else return DECODE_failure; case 0xC7: - if (((opcode[1] >> 3) & 7) == 0) { /* mov $imm16/32, m16/32 */ - instr->instr = INSTR_MOV; - GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); - - instr->operand[0] = mk_operand(instr->op_size, 0, 0, IMMEDIATE); - instr->immediate = get_immediate(realmode, opcode+1, instr->op_size); - instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY); + if ( ((opcode[1] >> 3) & 7) == 0 ) { /* mov $imm16/32, m16/32 */ + mmio_op->instr = INSTR_MOV; + GET_OP_SIZE_FOR_NONEBYTE(*op_size); + + mmio_op->operand[0] = mk_operand(*op_size, 0, 0, IMMEDIATE); + mmio_op->immediate = + get_immediate(realmode, opcode + 1, *op_size); + mmio_op->operand[1] = mk_operand(*op_size, 0, 0, MEMORY); return DECODE_success; } else @@ -596,20 +611,21 @@ static int hvm_decode(int realmode, unsi case 0xF6: case 0xF7: - if (((opcode[1] >> 3) & 7) == 0) { /* test $imm8/16/32, m8/16/32 */ - instr->instr = INSTR_TEST; - - if (opcode[0] == 0xF6) { + if ( ((opcode[1] >> 3) & 7) == 0 ) { /* test $imm8/16/32, m8/16/32 */ + mmio_op->instr = INSTR_TEST; + + if ( opcode[0] == 0xF6 ) { + *op_size = BYTE; GET_OP_SIZE_FOR_BYTE(size_reg); - instr->op_size = BYTE; } else { - GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); - size_reg = instr->op_size; + GET_OP_SIZE_FOR_NONEBYTE(*op_size); + size_reg = *op_size; } - instr->operand[0] = mk_operand(size_reg, 0, 0, IMMEDIATE); - instr->immediate = get_immediate(realmode, opcode+1, instr->op_size); - instr->operand[1] = mk_operand(size_reg, 0, 0, MEMORY); + mmio_op->operand[0] = mk_operand(size_reg, 0, 0, IMMEDIATE); + mmio_op->immediate = + get_immediate(realmode, opcode + 1, *op_size); + mmio_op->operand[1] = mk_operand(size_reg, 0, 0, MEMORY); return DECODE_success; } else @@ -623,59 +639,59 @@ static int hvm_decode(int realmode, unsi return DECODE_failure; } - switch (*++opcode) { + switch ( *++opcode ) { case 0xB6: /* movzx m8, r16/r32/r64 */ - instr->instr = INSTR_MOVZX; - GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); + mmio_op->instr = INSTR_MOVZX; + GET_OP_SIZE_FOR_NONEBYTE(*op_size); index = get_index(opcode + 1, rex); - instr->operand[0] = mk_operand(BYTE, 0, 0, MEMORY); - instr->operand[1] = mk_operand(instr->op_size, index, 0, REGISTER); + mmio_op->operand[0] = mk_operand(BYTE, 0, 0, MEMORY); + mmio_op->operand[1] = mk_operand(*op_size, index, 0, REGISTER); return DECODE_success; case 0xB7: /* movzx m16/m32, r32/r64 */ - instr->instr = INSTR_MOVZX; - GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); + mmio_op->instr = INSTR_MOVZX; + GET_OP_SIZE_FOR_NONEBYTE(*op_size); index = get_index(opcode + 1, rex); - if (rex & 0x8) - instr->operand[0] = mk_operand(LONG, 0, 0, MEMORY); + if ( rex & 0x8 ) + mmio_op->operand[0] = mk_operand(LONG, 0, 0, MEMORY); else - instr->operand[0] = mk_operand(WORD, 0, 0, MEMORY); - instr->operand[1] = mk_operand(instr->op_size, index, 0, REGISTER); + mmio_op->operand[0] = mk_operand(WORD, 0, 0, MEMORY); + mmio_op->operand[1] = mk_operand(*op_size, index, 0, REGISTER); return DECODE_success; case 0xBE: /* movsx m8, r16/r32/r64 */ - instr->instr = INSTR_MOVSX; - GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); + mmio_op->instr = INSTR_MOVSX; + GET_OP_SIZE_FOR_NONEBYTE(*op_size); index = get_index(opcode + 1, rex); - instr->operand[0] = mk_operand(BYTE, 0, 0, MEMORY); - instr->operand[1] = mk_operand(instr->op_size, index, 0, REGISTER); + mmio_op->operand[0] = mk_operand(BYTE, 0, 0, MEMORY); + mmio_op->operand[1] = mk_operand(*op_size, index, 0, REGISTER); return DECODE_success; case 0xBF: /* movsx m16, r32/r64 */ - instr->instr = INSTR_MOVSX; - GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); + mmio_op->instr = INSTR_MOVSX; + GET_OP_SIZE_FOR_NONEBYTE(*op_size); index = get_index(opcode + 1, rex); - instr->operand[0] = mk_operand(WORD, 0, 0, MEMORY); - instr->operand[1] = mk_operand(instr->op_size, index, 0, REGISTER); + mmio_op->operand[0] = mk_operand(WORD, 0, 0, MEMORY); + mmio_op->operand[1] = mk_operand(*op_size, index, 0, REGISTER); return DECODE_success; case 0xA3: /* bt r32, m32 */ - instr->instr = INSTR_BT; + mmio_op->instr = INSTR_BT; index = get_index(opcode + 1, rex); - instr->op_size = LONG; - instr->operand[0] = mk_operand(instr->op_size, index, 0, REGISTER); - instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY); + *op_size = LONG; + mmio_op->operand[0] = mk_operand(*op_size, index, 0, REGISTER); + mmio_op->operand[1] = mk_operand(*op_size, 0, 0, MEMORY); return DECODE_success; case 0xBA: - if (((opcode[1] >> 3) & 7) == 4) /* BT $imm8, m16/32/64 */ + if ( ((opcode[1] >> 3) & 7) == 4 ) /* BT $imm8, m16/32/64 */ { - instr->instr = INSTR_BT; - GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); - instr->immediate = - (signed char)get_immediate(realmode, opcode+1, BYTE); - instr->operand[0] = mk_operand(BYTE, 0, 0, IMMEDIATE); - instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY); + mmio_op->instr = INSTR_BT; + GET_OP_SIZE_FOR_NONEBYTE(*op_size); + mmio_op->operand[0] = mk_operand(BYTE, 0, 0, IMMEDIATE); + mmio_op->immediate = + (signed char)get_immediate(realmode, opcode + 1, BYTE); + mmio_op->operand[1] = mk_operand(*op_size, 0, 0, MEMORY); return DECODE_success; } else @@ -692,9 +708,9 @@ static int hvm_decode(int realmode, unsi int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip, int inst_len) { - if (inst_len > MAX_INST_LEN || inst_len <= 0) + if ( inst_len > MAX_INST_LEN || inst_len <= 0 ) return 0; - if (hvm_copy_from_guest_virt(buf, guest_eip, inst_len)) + if ( hvm_copy_from_guest_virt(buf, guest_eip, inst_len) ) return 0; return inst_len; } @@ -716,27 +732,20 @@ static void hvm_send_assist_req(struct v notify_via_xen_event_channel(v->arch.hvm_vcpu.xen_port); } - -/* Wake up a vcpu whihc is waiting for interrupts to come in */ -void hvm_prod_vcpu(struct vcpu *v) -{ - vcpu_unblock(v); -} - -void send_pio_req(struct cpu_user_regs *regs, unsigned long port, - unsigned long count, int size, long value, int dir, int pvalid) +void send_pio_req(unsigned long port, unsigned long count, int size, + long value, int dir, int df, int pvalid) { struct vcpu *v = current; vcpu_iodata_t *vio; ioreq_t *p; - if (size == 0 || count == 0) { + if ( size == 0 || count == 0 ) { printf("null pio request? port %lx, count %lx, size %d, value %lx, dir %d, pvalid %d.\n", port, count, size, value, dir, pvalid); } vio = get_vio(v->domain, v->vcpu_id); - if (vio == NULL) { + if ( vio == NULL ) { printk("bad shared page: %lx\n", (unsigned long) vio); domain_crash_synchronous(); } @@ -745,6 +754,7 @@ void send_pio_req(struct cpu_user_regs * if ( p->state != STATE_INVALID ) printk("WARNING: send pio with something already pending (%d)?\n", p->state); + p->dir = dir; p->pdata_valid = pvalid; @@ -752,19 +762,20 @@ void send_pio_req(struct cpu_user_regs * p->size = size; p->addr = port; p->count = count; - p->df = regs->eflags & EF_DF ? 1 : 0; + p->df = df; p->io_count++; - if (pvalid) { - if (hvm_paging_enabled(current)) - p->u.data = shadow_gva_to_gpa(current, value); + if ( pvalid ) /* get physical address of data */ + { + if ( hvm_paging_enabled(current) ) + p->u.pdata = (void *)shadow_gva_to_gpa(current, value); else - p->u.pdata = (void *) value; /* guest VA == guest PA */ - } else + p->u.pdata = (void *)value; /* guest VA == guest PA */ + } else if ( dir == IOREQ_WRITE ) p->u.data = value; - if (hvm_portio_intercept(p)) { + if ( hvm_portio_intercept(p) ) { p->state = STATE_IORESP_READY; hvm_io_assist(v); return; @@ -773,21 +784,18 @@ void send_pio_req(struct cpu_user_regs * hvm_send_assist_req(v); } -static void send_mmio_req( - unsigned char type, unsigned long gpa, - unsigned long count, int size, long value, int dir, int pvalid) +static void send_mmio_req(unsigned char type, unsigned long gpa, + unsigned long count, int size, long value, + int dir, int df, int pvalid) { struct vcpu *v = current; vcpu_iodata_t *vio; ioreq_t *p; - struct cpu_user_regs *regs; - - if (size == 0 || count == 0) { + + if ( size == 0 || count == 0 ) { printf("null mmio request? type %d, gpa %lx, count %lx, size %d, value %lx, dir %d, pvalid %d.\n", type, gpa, count, size, value, dir, pvalid); } - - regs = ¤t->arch.hvm_vcpu.io_op.io_context; vio = get_vio(v->domain, v->vcpu_id); if (vio == NULL) { @@ -807,7 +815,7 @@ static void send_mmio_req( p->size = size; p->addr = gpa; p->count = count; - p->df = regs->eflags & EF_DF ? 1 : 0; + p->df = df; p->io_count++; @@ -828,58 +836,58 @@ static void send_mmio_req( hvm_send_assist_req(v); } -static void mmio_operands(int type, unsigned long gpa, struct instruction *inst, - struct hvm_io_op *mmio_opp, struct cpu_user_regs *regs) +static void mmio_operands(int type, unsigned long gpa, + struct hvm_io_op *mmio_op, + unsigned char op_size) { unsigned long value = 0; - int index, size_reg; - - size_reg = operand_size(inst->operand[0]); - - mmio_opp->flags = inst->flags; - mmio_opp->instr = inst->instr; - mmio_opp->operand[0] = inst->operand[0]; /* source */ - mmio_opp->operand[1] = inst->operand[1]; /* destination */ - mmio_opp->immediate = inst->immediate; - - if (inst->operand[0] & REGISTER) { /* dest is memory */ - index = operand_index(inst->operand[0]); + int df, index, size_reg; + struct cpu_user_regs *regs = &mmio_op->io_context; + + df = regs->eflags & X86_EFLAGS_DF ? 1 : 0; + + size_reg = operand_size(mmio_op->operand[0]); + + if ( mmio_op->operand[0] & REGISTER ) { /* dest is memory */ + index = operand_index(mmio_op->operand[0]); value = get_reg_value(size_reg, index, 0, regs); - send_mmio_req(type, gpa, 1, inst->op_size, value, IOREQ_WRITE, 0); - } else if (inst->operand[0] & IMMEDIATE) { /* dest is memory */ - value = inst->immediate; - send_mmio_req(type, gpa, 1, inst->op_size, value, IOREQ_WRITE, 0); - } else if (inst->operand[0] & MEMORY) { /* dest is register */ + send_mmio_req(type, gpa, 1, op_size, value, IOREQ_WRITE, df, 0); + } else if ( mmio_op->operand[0] & IMMEDIATE ) { /* dest is memory */ + value = mmio_op->immediate; + send_mmio_req(type, gpa, 1, op_size, value, IOREQ_WRITE, df, 0); + } else if ( mmio_op->operand[0] & MEMORY ) { /* dest is register */ /* send the request and wait for the value */ - if ( (inst->instr == INSTR_MOVZX) || (inst->instr == INSTR_MOVSX) ) - send_mmio_req(type, gpa, 1, size_reg, 0, IOREQ_READ, 0); + if ( (mmio_op->instr == INSTR_MOVZX) || + (mmio_op->instr == INSTR_MOVSX) ) + send_mmio_req(type, gpa, 1, size_reg, 0, IOREQ_READ, df, 0); else - send_mmio_req(type, gpa, 1, inst->op_size, 0, IOREQ_READ, 0); + send_mmio_req(type, gpa, 1, op_size, 0, IOREQ_READ, df, 0); } else { - printk("mmio_operands: invalid operand\n"); + printk("%s: invalid dest mode.\n", __func__); domain_crash_synchronous(); } } #define GET_REPEAT_COUNT() \ - (mmio_inst.flags & REPZ ? (realmode ? regs->ecx & 0xFFFF : regs->ecx) : 1) - -void handle_mmio(unsigned long va, unsigned long gpa) + (mmio_op->flags & REPZ ? (realmode ? regs->ecx & 0xFFFF : regs->ecx) : 1) + +void handle_mmio(unsigned long gpa) { unsigned long inst_addr; - struct hvm_io_op *mmio_opp; + struct hvm_io_op *mmio_op; struct cpu_user_regs *regs; - struct instruction mmio_inst; - unsigned char inst[MAX_INST_LEN]; - int i, realmode, ret, inst_len; + unsigned char inst[MAX_INST_LEN], op_size; + int i, realmode, df, inst_len; struct vcpu *v = current; - mmio_opp = &v->arch.hvm_vcpu.io_op; - regs = &mmio_opp->io_context; + mmio_op = &v->arch.hvm_vcpu.io_op; + regs = &mmio_op->io_context; /* Copy current guest state into io instruction state structure. */ memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES); hvm_store_cpu_guest_regs(v, regs, NULL); + + df = regs->eflags & X86_EFLAGS_DF ? 1 : 0; inst_len = hvm_instruction_length(regs, hvm_guest_x86_mode(v)); if ( inst_len <= 0 ) @@ -889,25 +897,21 @@ void handle_mmio(unsigned long va, unsig } realmode = hvm_realmode(v); - if (realmode) + if ( realmode ) inst_addr = (regs->cs << 4) + regs->eip; else inst_addr = regs->eip; memset(inst, 0, MAX_INST_LEN); - ret = inst_copy_from_guest(inst, inst_addr, inst_len); - if (ret != inst_len) { + if ( inst_copy_from_guest(inst, inst_addr, inst_len) != inst_len ) { printk("handle_mmio: failed to copy instruction\n"); domain_crash_synchronous(); } - init_instruction(&mmio_inst); - - if (hvm_decode(realmode, inst, &mmio_inst) == DECODE_failure) { + if ( hvm_decode(realmode, inst, mmio_op, &op_size) == DECODE_failure ) { printk("handle_mmio: failed to decode instruction\n"); - printk("mmio opcode: va 0x%lx, gpa 0x%lx, len %d:", - va, gpa, inst_len); - for (i = 0; i < inst_len; i++) + printk("mmio opcode: gpa 0x%lx, len %d:", gpa, inst_len); + for ( i = 0; i < inst_len; i++ ) printk(" %02x", inst[i] & 0xFF); printk("\n"); domain_crash_synchronous(); @@ -915,24 +919,23 @@ void handle_mmio(unsigned long va, unsig regs->eip += inst_len; /* advance %eip */ - switch (mmio_inst.instr) { + switch ( mmio_op->instr ) { case INSTR_MOV: - mmio_operands(IOREQ_TYPE_COPY, gpa, &mmio_inst, mmio_opp, regs); + mmio_operands(IOREQ_TYPE_COPY, gpa, mmio_op, op_size); break; case INSTR_MOVS: { unsigned long count = GET_REPEAT_COUNT(); - unsigned long size = mmio_inst.op_size; - int sign = regs->eflags & EF_DF ? -1 : 1; + int sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1; unsigned long addr = 0; - int dir; + int dir, size = op_size; ASSERT(count); /* determine non-MMIO address */ - if (realmode) { - if (((regs->es << 4) + (regs->edi & 0xFFFF)) == va) { + if ( realmode ) { + if ( ((regs->es << 4) + (regs->edi & 0xFFFF)) == gpa ) { dir = IOREQ_WRITE; addr = (regs->ds << 4) + (regs->esi & 0xFFFF); } else { @@ -940,7 +943,7 @@ void handle_mmio(unsigned long va, unsig addr = (regs->es << 4) + (regs->edi & 0xFFFF); } } else { - if (va == regs->edi) { + if ( gpa == regs->edi ) { dir = IOREQ_WRITE; addr = regs->esi; } else { @@ -949,58 +952,61 @@ void handle_mmio(unsigned long va, unsig } } - mmio_opp->flags = mmio_inst.flags; - mmio_opp->instr = mmio_inst.instr; - - if (addr & (size - 1)) - DPRINTK("Unaligned ioport access: %lx, %ld\n", addr, size); + if ( addr & (size - 1) ) + DPRINTK("Unaligned ioport access: %lx, %d\n", addr, size); /* * In case of a movs spanning multiple pages, we break the accesses * up into multiple pages (the device model works with non-continguous * physical guest pages). To copy just one page, we adjust %ecx and - * do not advance %eip so that the next "rep movs" copies the next page. + * do not advance %eip so that the next rep;movs copies the next page. * Unaligned accesses, for example movsl starting at PGSZ-2, are * turned into a single copy where we handle the overlapping memory * copy ourself. After this copy succeeds, "rep movs" is executed * again. */ - if ((addr & PAGE_MASK) != ((addr + sign * (size - 1)) & PAGE_MASK)) { + if ( (addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK) ) { unsigned long value = 0; DPRINTK("Single io request in a movs crossing page boundary.\n"); - mmio_opp->flags |= OVERLAP; - - regs->eip -= inst_len; /* do not advance %eip */ - - if (dir == IOREQ_WRITE) - (void)hvm_copy_from_guest_virt(&value, addr, size); - send_mmio_req(IOREQ_TYPE_COPY, gpa, 1, size, value, dir, 0); + mmio_op->flags |= OVERLAP; + + if ( dir == IOREQ_WRITE ) { + if ( hvm_paging_enabled(v) ) + (void)hvm_copy_from_guest_virt(&value, addr, size); + else + (void)hvm_copy_from_guest_phys(&value, addr, size); + } else + mmio_op->addr = addr; + + if ( count != 1 ) + regs->eip -= inst_len; /* do not advance %eip */ + + send_mmio_req(IOREQ_TYPE_COPY, gpa, 1, size, value, dir, df, 0); } else { - if ((addr & PAGE_MASK) != ((addr + sign * (count * size - 1)) & PAGE_MASK)) { + unsigned long last_addr = sign > 0 ? addr + count * size - 1 + : addr - (count - 1) * size; + + if ( (addr & PAGE_MASK) != (last_addr & PAGE_MASK) ) + { regs->eip -= inst_len; /* do not advance %eip */ - if (sign > 0) { + if ( sign > 0 ) count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size; - } else { - /* We need to make sure we advance to the point - where the next request will be on a different - page. If we're going down, that means - advancing until one byte before the start of - the page, hence +1. */ - count = ((addr + 1) & ~PAGE_MASK) / size; - } + else + count = (addr & ~PAGE_MASK) / size + 1; } ASSERT(count); - send_mmio_req(IOREQ_TYPE_COPY, gpa, count, size, addr, dir, 1); + + send_mmio_req(IOREQ_TYPE_COPY, gpa, count, size, addr, dir, df, 1); } break; } case INSTR_MOVZX: case INSTR_MOVSX: - mmio_operands(IOREQ_TYPE_COPY, gpa, &mmio_inst, mmio_opp, regs); + mmio_operands(IOREQ_TYPE_COPY, gpa, mmio_op, op_size); break; case INSTR_STOS: @@ -1008,10 +1014,8 @@ void handle_mmio(unsigned long va, unsig * Since the destination is always in (contiguous) mmio space we don't * need to break it up into pages. */ - mmio_opp->flags = mmio_inst.flags; - mmio_opp->instr = mmio_inst.instr; send_mmio_req(IOREQ_TYPE_COPY, gpa, - GET_REPEAT_COUNT(), mmio_inst.op_size, regs->eax, IOREQ_WRITE, 0); + GET_REPEAT_COUNT(), op_size, regs->eax, IOREQ_WRITE, df, 0); break; case INSTR_LODS: @@ -1019,87 +1023,70 @@ void handle_mmio(unsigned long va, unsig * Since the source is always in (contiguous) mmio space we don't * need to break it up into pages. */ - mmio_opp->flags = mmio_inst.flags; - mmio_opp->instr = mmio_inst.instr; send_mmio_req(IOREQ_TYPE_COPY, gpa, - GET_REPEAT_COUNT(), mmio_inst.op_size, 0, IOREQ_READ, 0); + GET_REPEAT_COUNT(), op_size, 0, IOREQ_READ, df, 0); break; case INSTR_OR: - mmio_operands(IOREQ_TYPE_OR, gpa, &mmio_inst, mmio_opp, regs); + mmio_operands(IOREQ_TYPE_OR, gpa, mmio_op, op_size); break; case INSTR_AND: - mmio_operands(IOREQ_TYPE_AND, gpa, &mmio_inst, mmio_opp, regs); + mmio_operands(IOREQ_TYPE_AND, gpa, mmio_op, op_size); break; case INSTR_XOR: - mmio_operands(IOREQ_TYPE_XOR, gpa, &mmio_inst, mmio_opp, regs); + mmio_operands(IOREQ_TYPE_XOR, gpa, mmio_op, op_size); break; case INSTR_CMP: /* Pass through */ case INSTR_TEST: case INSTR_SUB: - mmio_opp->flags = mmio_inst.flags; - mmio_opp->instr = mmio_inst.instr; - mmio_opp->operand[0] = mmio_inst.operand[0]; /* source */ - mmio_opp->operand[1] = mmio_inst.operand[1]; /* destination */ - mmio_opp->immediate = mmio_inst.immediate; - /* send the request and wait for the value */ - send_mmio_req(IOREQ_TYPE_COPY, gpa, 1, - mmio_inst.op_size, 0, IOREQ_READ, 0); + send_mmio_req(IOREQ_TYPE_COPY, gpa, 1, op_size, 0, IOREQ_READ, df, 0); break; case INSTR_BT: + { + unsigned long value = 0; + int index, size; + + if ( mmio_op->operand[0] & REGISTER ) { - unsigned long value = 0; - int index, size; - - mmio_opp->instr = mmio_inst.instr; - mmio_opp->operand[0] = mmio_inst.operand[0]; /* bit offset */ - mmio_opp->operand[1] = mmio_inst.operand[1]; /* bit base */ - - if ( mmio_inst.operand[0] & REGISTER ) - { - index = operand_index(mmio_inst.operand[0]); - size = operand_size(mmio_inst.operand[0]); - value = get_reg_value(size, index, 0, regs); - } - else if ( mmio_inst.operand[0] & IMMEDIATE ) - { - mmio_opp->immediate = mmio_inst.immediate; - value = mmio_inst.immediate; - } - send_mmio_req(IOREQ_TYPE_COPY, gpa + (value >> 5), 1, - mmio_inst.op_size, 0, IOREQ_READ, 0); - break; - } + index = operand_index(mmio_op->operand[0]); + size = operand_size(mmio_op->operand[0]); + value = get_reg_value(size, index, 0, regs); + } + else if ( mmio_op->operand[0] & IMMEDIATE ) + { + mmio_op->immediate = mmio_op->immediate; + value = mmio_op->immediate; + } + send_mmio_req(IOREQ_TYPE_COPY, gpa + (value >> 5), 1, + op_size, 0, IOREQ_READ, df, 0); + break; + } case INSTR_XCHG: - mmio_opp->flags = mmio_inst.flags; - mmio_opp->instr = mmio_inst.instr; - mmio_opp->operand[0] = mmio_inst.operand[0]; /* source */ - mmio_opp->operand[1] = mmio_inst.operand[1]; /* destination */ - if ( mmio_inst.operand[0] & REGISTER ) { + if ( mmio_op->operand[0] & REGISTER ) { long value; - unsigned long operand = mmio_inst.operand[0]; + unsigned long operand = mmio_op->operand[0]; value = get_reg_value(operand_size(operand), operand_index(operand), 0, regs); /* send the request and wait for the value */ send_mmio_req(IOREQ_TYPE_XCHG, gpa, 1, - mmio_inst.op_size, value, IOREQ_WRITE, 0); + op_size, value, IOREQ_WRITE, df, 0); } else { /* the destination is a register */ long value; - unsigned long operand = mmio_inst.operand[1]; + unsigned long operand = mmio_op->operand[1]; value = get_reg_value(operand_size(operand), operand_index(operand), 0, regs); /* send the request and wait for the value */ send_mmio_req(IOREQ_TYPE_XCHG, gpa, 1, - mmio_inst.op_size, value, IOREQ_WRITE, 0); + op_size, value, IOREQ_WRITE, df, 0); } break; diff -r d5a46e4cc340 -r 6492b9b27968 xen/arch/x86/hvm/svm/intr.c --- a/xen/arch/x86/hvm/svm/intr.c Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/arch/x86/hvm/svm/intr.c Sun Oct 22 15:23:52 2006 -0600 @@ -43,7 +43,7 @@ * to be suitable for SVM. */ -static inline int svm_inject_extint(struct vcpu *v, int trap, int error_code) +static inline int svm_inject_extint(struct vcpu *v, int trap) { struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; vintr_t intr; @@ -62,7 +62,7 @@ static inline int svm_inject_extint(stru // printf( "IRQ = %d\n", trap ); return 0; } - + asmlinkage void svm_intr_assist(void) { struct vcpu *v = current; @@ -74,7 +74,6 @@ asmlinkage void svm_intr_assist(void) int intr_type = APIC_DM_EXTINT; int intr_vector = -1; int re_injecting = 0; - unsigned long rflags; ASSERT(vmcb); @@ -82,19 +81,28 @@ asmlinkage void svm_intr_assist(void) /* Previous Interrupt delivery caused this Intercept? */ if (vmcb->exitintinfo.fields.v && (vmcb->exitintinfo.fields.type == 0)) { v->arch.hvm_svm.saved_irq_vector = vmcb->exitintinfo.fields.vector; -// printk("Injecting PF#: saving IRQ from ExitInfo\n"); vmcb->exitintinfo.bytes = 0; re_injecting = 1; } - /* Guest's interrputs masked? */ - rflags = vmcb->rflags; - if (irq_masked(rflags)) { - HVM_DBG_LOG(DBG_LEVEL_1, "Guest IRQs masked: rflags: %lx", rflags); - /* bail out, we won't be injecting an interrupt this time */ + /* + * If event requires injecting then do not inject int. + */ + if (unlikely(v->arch.hvm_svm.inject_event)) { + v->arch.hvm_svm.inject_event = 0; return; } - + + /* + * create a 'fake' virtual interrupt on to intercept as soon + * as the guest _can_ take interrupts + */ + if (irq_masked(vmcb->rflags) || vmcb->interrupt_shadow) { + vmcb->general1_intercepts |= GENERAL1_INTERCEPT_VINTR; + svm_inject_extint(v, 0x0); /* actual vector doesn't really matter */ + return; + } + /* Previous interrupt still pending? */ if (vmcb->vintr.fields.irq) { // printk("Re-injecting IRQ from Vintr\n"); @@ -140,14 +148,13 @@ asmlinkage void svm_intr_assist(void) case APIC_DM_FIXED: case APIC_DM_LOWEST: /* Re-injecting a PIT interruptt? */ - if (re_injecting && - is_pit_irq(v, intr_vector, intr_type)) { + if (re_injecting && pt->enabled && + is_periodic_irq(v, intr_vector, intr_type)) { ++pt->pending_intr_nr; } /* let's inject this interrupt */ - TRACE_3D(TRC_VMX_INT, v->domain->domain_id, intr_vector, 0); - svm_inject_extint(v, intr_vector, VMX_DELIVER_NO_ERROR_CODE); - hvm_interrupt_post(v, intr_vector, intr_type); + TRACE_3D(TRC_VMX_INTR, v->domain->domain_id, intr_vector, 0); + svm_inject_extint(v, intr_vector); break; case APIC_DM_SMI: case APIC_DM_NMI: @@ -158,6 +165,7 @@ asmlinkage void svm_intr_assist(void) BUG(); break; } + hvm_interrupt_post(v, intr_vector, intr_type); } } diff -r d5a46e4cc340 -r 6492b9b27968 xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/arch/x86/hvm/svm/svm.c Sun Oct 22 15:23:52 2006 -0600 @@ -54,13 +54,10 @@ /* External functions. We should move these to some suitable header file(s) */ -extern void do_nmi(struct cpu_user_regs *, unsigned long); extern int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip, int inst_len); extern uint32_t vlapic_update_ppr(struct vlapic *vlapic); extern asmlinkage void do_IRQ(struct cpu_user_regs *); -extern void send_pio_req(struct cpu_user_regs *regs, unsigned long port, - unsigned long count, int size, long value, int dir, int pvalid); extern void svm_dump_inst(unsigned long eip); extern int svm_dbg_on; void svm_dump_regs(const char *from, struct cpu_user_regs *regs); @@ -196,6 +193,7 @@ static inline void svm_inject_exception( ASSERT(vmcb->eventinj.fields.v == 0); vmcb->eventinj = event; + v->arch.hvm_svm.inject_event=1; } static void stop_svm(void) @@ -923,6 +921,7 @@ static void svm_relinquish_guest_resourc } kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer); + rtc_deinit(d); if ( d->arch.hvm_domain.shared_page_va ) unmap_domain_page_global( @@ -937,6 +936,7 @@ static void svm_migrate_timers(struct vc { struct periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm); + struct RTCState *vrtc = &v->domain->arch.hvm_domain.pl_time.vrtc; if ( pt->enabled ) { @@ -945,6 +945,8 @@ static void svm_migrate_timers(struct vc } if ( VLAPIC(v) != NULL ) migrate_timer(&VLAPIC(v)->vlapic_timer, v->processor); + migrate_timer(&vrtc->second_timer, v->processor); + migrate_timer(&vrtc->second_timer2, v->processor); } @@ -1410,7 +1412,7 @@ static void svm_io_instruction(struct vc struct cpu_user_regs *regs; struct hvm_io_op *pio_opp; unsigned int port; - unsigned int size, dir; + unsigned int size, dir, df; ioio_info_t info; struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; @@ -1429,6 +1431,8 @@ static void svm_io_instruction(struct vc port = info.fields.port; /* port used to be addr */ dir = info.fields.type; /* direction */ + df = regs->eflags & X86_EFLAGS_DF ? 1 : 0; + if (info.fields.sz32) size = 4; else if (info.fields.sz16) @@ -1445,7 +1449,7 @@ static void svm_io_instruction(struct vc if (info.fields.str) { unsigned long addr, count; - int sign = regs->eflags & EF_DF ? -1 : 1; + int sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1; if (!svm_get_io_address(v, regs, dir, &count, &addr)) { @@ -1475,25 +1479,37 @@ static void svm_io_instruction(struct vc unsigned long value = 0; pio_opp->flags |= OVERLAP; - - if (dir == IOREQ_WRITE) - (void)hvm_copy_from_guest_virt(&value, addr, size); - - send_pio_req(regs, port, 1, size, value, dir, 0); + pio_opp->addr = addr; + + if (dir == IOREQ_WRITE) /* OUTS */ + { + if (hvm_paging_enabled(current)) + (void)hvm_copy_from_guest_virt(&value, addr, size); + else + (void)hvm_copy_from_guest_phys(&value, addr, size); + } + + if (count == 1) + regs->eip = vmcb->exitinfo2; + + send_pio_req(port, 1, size, value, dir, df, 0); } else { - if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK)) + unsigned long last_addr = sign > 0 ? addr + count * size - 1 + : addr - (count - 1) * size; + + if ((addr & PAGE_MASK) != (last_addr & PAGE_MASK)) { if (sign > 0) count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size; else - count = (addr & ~PAGE_MASK) / size; + count = (addr & ~PAGE_MASK) / size + 1; } else regs->eip = vmcb->exitinfo2; - send_pio_req(regs, port, count, size, addr, dir, 1); + send_pio_req(port, count, size, addr, dir, df, 1); } } else @@ -1507,7 +1523,7 @@ static void svm_io_instruction(struct vc if (port == 0xe9 && dir == IOREQ_WRITE && size == 1) hvm_print_line(v, regs->eax); /* guest debug output */ - send_pio_req(regs, port, 1, size, regs->eax, dir, 0); + send_pio_req(port, 1, size, regs->eax, dir, df, 0); } } @@ -1539,9 +1555,8 @@ static int svm_set_cr0(unsigned long val if ((value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled) { /* The guest CR3 must be pointing to the guest physical. */ - if (!VALID_MFN(mfn = - get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)) - || !get_page(mfn_to_page(mfn), v->domain)) + mfn = get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT); + if ( !VALID_MFN(mfn) || !get_page(mfn_to_page(mfn), v->domain)) { printk("Invalid CR3 value = %lx\n", v->arch.hvm_svm.cpu_cr3); domain_crash_synchronous(); /* need to take a clean path */ @@ -1725,9 +1740,8 @@ static int mov_to_cr(int gpreg, int cr, * first. */ HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value); - if (((value >> PAGE_SHIFT) > v->domain->max_pages) - || !VALID_MFN(mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT)) - || !get_page(mfn_to_page(mfn), v->domain)) + mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT); + if ( !VALID_MFN(mfn) || !get_page(mfn_to_page(mfn), v->domain)) { printk("Invalid CR3 value=%lx\n", value); domain_crash_synchronous(); /* need to take a clean path */ @@ -1739,9 +1753,6 @@ static int mov_to_cr(int gpreg, int cr, if (old_base_mfn) put_page(mfn_to_page(old_base_mfn)); - /* - * arch.shadow_table should now hold the next CR3 for shadow - */ v->arch.hvm_svm.cpu_cr3 = value; update_cr3(v); vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; @@ -1764,9 +1775,8 @@ static int mov_to_cr(int gpreg, int cr, /* The guest is a 32-bit PAE guest. */ #if CONFIG_PAGING_LEVELS >= 3 unsigned long mfn, old_base_mfn; - - if ( !VALID_MFN(mfn = get_mfn_from_gpfn( - v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)) || + mfn = get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT); + if ( !VALID_MFN(mfn) || !get_page(mfn_to_page(mfn), v->domain) ) { printk("Invalid CR3 value = %lx", v->arch.hvm_svm.cpu_cr3); @@ -1787,10 +1797,6 @@ static int mov_to_cr(int gpreg, int cr, (unsigned long) (mfn << PAGE_SHIFT)); vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; - - /* - * arch->shadow_table should hold the next CR3 for shadow - */ HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx", @@ -2355,7 +2361,7 @@ void svm_dump_regs(const char *from, str { struct vcpu *v = current; struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - unsigned long pt = pagetable_get_paddr(v->arch.shadow_table); + unsigned long pt = v->arch.hvm_vcpu.hw_cr3; printf("%s: guest registers from %s:\n", __func__, from); #if defined (__x86_64__) @@ -2589,7 +2595,7 @@ asmlinkage void svm_vmexit_handler(struc save_svm_cpu_user_regs(v, regs); vmcb->tlb_control = 1; - + v->arch.hvm_svm.inject_event = 0; if (exit_reason == VMEXIT_INVALID) { @@ -2681,11 +2687,11 @@ asmlinkage void svm_vmexit_handler(struc if (do_debug) { printk("%s:+ guest_table = 0x%08x, monitor_table = 0x%08x, " - "shadow_table = 0x%08x\n", + "hw_cr3 = 0x%16lx\n", __func__, (int) v->arch.guest_table.pfn, (int) v->arch.monitor_table.pfn, - (int) v->arch.shadow_table.pfn); + (long unsigned int) v->arch.hvm_vcpu.hw_cr3); svm_dump_vmcb(__func__, vmcb); svm_dump_regs(__func__, regs); @@ -2729,7 +2735,6 @@ asmlinkage void svm_vmexit_handler(struc break; case VMEXIT_NMI: - do_nmi(regs, 0); break; case VMEXIT_SMI: @@ -2788,7 +2793,7 @@ asmlinkage void svm_vmexit_handler(struc v->arch.hvm_svm.cpu_cr2 = va; vmcb->cr2 = va; - TRACE_3D(TRC_VMX_INT, v->domain->domain_id, + TRACE_3D(TRC_VMX_INTR, v->domain->domain_id, VMEXIT_EXCEPTION_PF, va); } break; @@ -2802,6 +2807,11 @@ asmlinkage void svm_vmexit_handler(struc svm_inject_exception(v, TRAP_double_fault, 1, 0); break; + case VMEXIT_VINTR: + vmcb->vintr.fields.irq = 0; + vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_VINTR; + break; + case VMEXIT_INTR: break; @@ -2913,10 +2923,10 @@ asmlinkage void svm_vmexit_handler(struc if (do_debug) { printk("vmexit_handler():- guest_table = 0x%08x, " - "monitor_table = 0x%08x, shadow_table = 0x%08x\n", + "monitor_table = 0x%08x, hw_cr3 = 0x%16x\n", (int)v->arch.guest_table.pfn, (int)v->arch.monitor_table.pfn, - (int)v->arch.shadow_table.pfn); + (int)v->arch.hvm_vcpu.hw_cr3); printk("svm_vmexit_handler: Returning\n"); } #endif diff -r d5a46e4cc340 -r 6492b9b27968 xen/arch/x86/hvm/svm/vmcb.c --- a/xen/arch/x86/hvm/svm/vmcb.c Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/arch/x86/hvm/svm/vmcb.c Sun Oct 22 15:23:52 2006 -0600 @@ -372,8 +372,8 @@ void svm_do_launch(struct vcpu *v) if (svm_dbg_on) { unsigned long pt; - pt = pagetable_get_paddr(v->arch.shadow_table); - printk("%s: shadow_table = %lx\n", __func__, pt); + printk("%s: hw_cr3 = %llx\n", __func__, + (unsigned long long) v->arch.hvm_vcpu.hw_cr3); pt = pagetable_get_paddr(v->arch.guest_table); printk("%s: guest_table = %lx\n", __func__, pt); pt = pagetable_get_paddr(v->domain->arch.phys_table); @@ -387,8 +387,9 @@ void svm_do_launch(struct vcpu *v) { printk("%s: cr3 = %lx ", __func__, (unsigned long)vmcb->cr3); printk("init_guest_table: guest_table = 0x%08x, monitor_table = 0x%08x," - " shadow_table = 0x%08x\n", (int)v->arch.guest_table.pfn, - (int)v->arch.monitor_table.pfn, (int)v->arch.shadow_table.pfn); + " hw_cr3 = 0x%16llx\n", (int)v->arch.guest_table.pfn, + (int)v->arch.monitor_table.pfn, + (unsigned long long) v->arch.hvm_vcpu.hw_cr3); } v->arch.schedule_tail = arch_svm_do_resume; diff -r d5a46e4cc340 -r 6492b9b27968 xen/arch/x86/hvm/svm/x86_32/exits.S --- a/xen/arch/x86/hvm/svm/x86_32/exits.S Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/arch/x86/hvm/svm/x86_32/exits.S Sun Oct 22 15:23:52 2006 -0600 @@ -126,6 +126,8 @@ ENTRY(svm_asm_do_launch) HVM_SAVE_ALL_NOSEGREGS STGI +.globl svm_stgi_label; +svm_stgi_label: movl %esp,%eax push %eax call svm_vmexit_handler diff -r d5a46e4cc340 -r 6492b9b27968 xen/arch/x86/hvm/svm/x86_64/exits.S --- a/xen/arch/x86/hvm/svm/x86_64/exits.S Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/arch/x86/hvm/svm/x86_64/exits.S Sun Oct 22 15:23:52 2006 -0600 @@ -144,6 +144,8 @@ ENTRY(svm_asm_do_launch) VMLOAD STGI +.globl svm_stgi_label; +svm_stgi_label: movq %rsp,%rdi call svm_vmexit_handler jmp svm_asm_do_resume diff -r d5a46e4cc340 -r 6492b9b27968 xen/arch/x86/hvm/vioapic.c --- a/xen/arch/x86/hvm/vioapic.c Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/arch/x86/hvm/vioapic.c Sun Oct 22 15:23:52 2006 -0600 @@ -42,6 +42,9 @@ /* HACK: Route IRQ0 only to VCPU0 to prevent time jumps. */ #define IRQ0_SPECIAL_ROUTING 1 +#ifdef IRQ0_SPECIAL_ROUTING +static int redir_warning_done = 0; +#endif #if defined(__ia64__) #define opt_hvm_debug_level opt_vmx_debug_level @@ -155,6 +158,7 @@ static void hvm_vioapic_update_imr(struc clear_bit(index, &s->imr); } + static void hvm_vioapic_write_indirect(struct hvm_vioapic *s, unsigned long addr, unsigned long length, @@ -179,21 +183,35 @@ static void hvm_vioapic_write_indirect(s { uint32_t redir_index = 0; + redir_index = (s->ioregsel - 0x10) >> 1; + HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "hvm_vioapic_write_indirect " "change redir index %x val %lx\n", redir_index, val); - redir_index = (s->ioregsel - 0x10) >> 1; - if (redir_index >= 0 && redir_index < IOAPIC_NUM_PINS) { uint64_t redir_content; redir_content = s->redirtbl[redir_index].value; - if (s->ioregsel & 0x1) + if (s->ioregsel & 0x1) { +#ifdef IRQ0_SPECIAL_ROUTING + if ( !redir_warning_done && (redir_index == 0) && + ((val >> 24) != 0) ) { + /* + * Cannot yet handle delivering PIT interrupts to + * any VCPU != 0. Needs proper fixing, but for now + * simply spit a warning that we're going to ignore + * the target in practice & always deliver to VCPU 0 + */ + printk("IO-APIC: PIT (IRQ0) redirect to VCPU %lx " + "will be ignored.\n", val >> 24); + redir_warning_done = 1; + } +#endif redir_content = (((uint64_t)val & 0xffffffff) << 32) | (redir_content & 0xffffffff); - else + } else redir_content = ((redir_content >> 32) << 32) | (val & 0xffffffff); s->redirtbl[redir_index].value = redir_content; @@ -409,6 +427,8 @@ static void ioapic_deliver(hvm_vioapic_t uint8_t vector = s->redirtbl[irqno].RedirForm.vector; uint8_t trig_mode = s->redirtbl[irqno].RedirForm.trigmod; uint32_t deliver_bitmask; + struct vlapic *target; + HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "dest %x dest_mode %x delivery_mode %x vector %x trig_mode %x\n", @@ -427,9 +447,8 @@ static void ioapic_deliver(hvm_vioapic_t switch (delivery_mode) { case dest_LowestPrio: { - struct vlapic* target; - #ifdef IRQ0_SPECIAL_ROUTING + /* Force round-robin to pick VCPU 0 */ if (irqno == 0) target = s->lapic_info[0]; else @@ -450,19 +469,17 @@ static void ioapic_deliver(hvm_vioapic_t { uint8_t bit; for (bit = 0; bit < s->lapic_count; bit++) { - if (deliver_bitmask & (1 << bit)) { + if ( !(deliver_bitmask & (1 << bit)) ) + continue; #ifdef IRQ0_SPECIAL_ROUTING - if ( (irqno == 0) && (bit !=0) ) - { - printk("PIT irq to bit %x\n", bit); - domain_crash_synchronous(); - } -#endif - if (s->lapic_info[bit]) { - ioapic_inj_irq(s, s->lapic_info[bit], - vector, trig_mode, delivery_mode); - } - } + /* Do not deliver timer interrupts to VCPU != 0 */ + if ( (irqno == 0) && (bit !=0 ) ) + target = s->lapic_info[0]; + else +#endif + target = s->lapic_info[bit]; + if (target) + ioapic_inj_irq(s, target, vector, trig_mode, delivery_mode); } break; } diff -r d5a46e4cc340 -r 6492b9b27968 xen/arch/x86/hvm/vlapic.c --- a/xen/arch/x86/hvm/vlapic.c Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/arch/x86/hvm/vlapic.c Sun Oct 22 15:23:52 2006 -0600 @@ -231,7 +231,8 @@ static int vlapic_accept_irq(struct vcpu "level trig mode for vector %d\n", vector); vlapic_set_vector(vector, vlapic->regs + APIC_TMR); } - hvm_prod_vcpu(v); + + vcpu_kick(v); result = 1; break; diff -r d5a46e4cc340 -r 6492b9b27968 xen/arch/x86/hvm/vmx/io.c --- a/xen/arch/x86/hvm/vmx/io.c Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/arch/x86/hvm/vmx/io.c Sun Oct 22 15:23:52 2006 -0600 @@ -153,7 +153,7 @@ asmlinkage void vmx_intr_assist(void) case APIC_DM_FIXED: case APIC_DM_LOWEST: vmx_inject_extint(v, highest_vector, VMX_DELIVER_NO_ERROR_CODE); - TRACE_3D(TRC_VMX_INT, v->domain->domain_id, highest_vector, 0); + TRACE_3D(TRC_VMX_INTR, v->domain->domain_id, highest_vector, 0); break; case APIC_DM_SMI: diff -r d5a46e4cc340 -r 6492b9b27968 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/arch/x86/hvm/vmx/vmx.c Sun Oct 22 15:23:52 2006 -0600 @@ -146,6 +146,7 @@ static void vmx_relinquish_guest_resourc } kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer); + rtc_deinit(d); if ( d->arch.hvm_domain.shared_page_va ) unmap_domain_page_global( @@ -487,6 +488,7 @@ void vmx_migrate_timers(struct vcpu *v) void vmx_migrate_timers(struct vcpu *v) { struct periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm); + struct RTCState *vrtc = &v->domain->arch.hvm_domain.pl_time.vrtc; if ( pt->enabled ) { @@ -495,6 +497,8 @@ void vmx_migrate_timers(struct vcpu *v) } if ( VLAPIC(v) != NULL ) migrate_timer(&VLAPIC(v)->vlapic_timer, v->processor); + migrate_timer(&vrtc->second_timer, v->processor); + migrate_timer(&vrtc->second_timer2, v->processor); } static void vmx_store_cpu_guest_regs( @@ -867,7 +871,7 @@ static int vmx_do_page_fault(unsigned lo result = shadow_fault(va, regs); - TRACE_VMEXIT (2,result); + TRACE_VMEXIT(2, result); #if 0 if ( !result ) { @@ -898,7 +902,7 @@ static void vmx_do_no_device_fault(void) } #define bitmaskof(idx) (1U << ((idx)&31)) -static void vmx_vmexit_do_cpuid(struct cpu_user_regs *regs) +static void vmx_do_cpuid(struct cpu_user_regs *regs) { unsigned int input = (unsigned int)regs->eax; unsigned int count = (unsigned int)regs->ecx; @@ -917,7 +921,32 @@ static void vmx_vmexit_do_cpuid(struct c if ( input == CPUID_LEAF_0x4 ) { cpuid_count(input, count, &eax, &ebx, &ecx, &edx); - eax &= NUM_CORES_RESET_MASK; + eax &= NUM_CORES_RESET_MASK; + } + else if ( input == 0x40000003 ) + { + /* + * NB. Unsupported interface for private use of VMXASSIST only. + * Note that this leaf lives at <max-hypervisor-leaf> + 1. + */ + u64 value = ((u64)regs->edx << 32) | (u32)regs->ecx; + unsigned long mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT); + char *p; + + DPRINTK("Input address is 0x%"PRIx64".\n", value); + + /* 8-byte aligned valid pseudophys address from vmxassist, please. */ + if ( (value & 7) || (mfn == INVALID_MFN) || + !v->arch.hvm_vmx.vmxassist_enabled ) + domain_crash_synchronous(); + + p = map_domain_page(mfn); + value = *((uint64_t *)(p + (value & (PAGE_SIZE - 1)))); + unmap_domain_page(p); + + DPRINTK("Output value is 0x%"PRIx64".\n", value); + ecx = (u32)(value >> 0); + edx = (u32)(value >> 32); } else if ( !cpuid_hypervisor_leaves(input, &eax, &ebx, &ecx, &edx) ) { @@ -1023,14 +1052,14 @@ static void vmx_dr_access(unsigned long * Invalidate the TLB for va. Invalidate the shadow page corresponding * the address va. */ -static void vmx_vmexit_do_invlpg(unsigned long va) +static void vmx_do_invlpg(unsigned long va) { unsigned long eip; struct vcpu *v = current; __vmread(GUEST_RIP, &eip); - HVM_DBG_LOG(DBG_LEVEL_VMMU, "vmx_vmexit_do_invlpg: eip=%lx, va=%lx", + HVM_DBG_LOG(DBG_LEVEL_VMMU, "eip=%lx, va=%lx", eip, va); /* @@ -1041,14 +1070,20 @@ static void vmx_vmexit_do_invlpg(unsigne } -static int check_for_null_selector(unsigned long eip) +static int check_for_null_selector(unsigned long eip, int inst_len, int dir) { unsigned char inst[MAX_INST_LEN]; unsigned long sel; - int i, inst_len; + int i; int inst_copy_from_guest(unsigned char *, unsigned long, int); - inst_len = __get_instruction_length(); /* Safe: INS/OUTS */ + /* INS can only use ES segment register, and it can't be overridden */ + if ( dir == IOREQ_READ ) + { + __vmread(GUEST_ES_SELECTOR, &sel); + return sel == 0 ? 1 : 0; + } + memset(inst, 0, MAX_INST_LEN); if ( inst_copy_from_guest(inst, eip, inst_len) != inst_len ) { @@ -1093,18 +1128,13 @@ static int check_for_null_selector(unsig return 0; } -extern void send_pio_req(struct cpu_user_regs *regs, unsigned long port, - unsigned long count, int size, long value, - int dir, int pvalid); - static void vmx_io_instruction(unsigned long exit_qualification, unsigned long inst_len) { struct cpu_user_regs *regs; struct hvm_io_op *pio_opp; - unsigned long eip, cs, eflags; - unsigned long port, size, dir; - int vm86; + unsigned long port, size; + int dir, df, vm86; pio_opp = ¤t->arch.hvm_vcpu.io_op; pio_opp->instr = INSTR_PIO; @@ -1116,28 +1146,26 @@ static void vmx_io_instruction(unsigned memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES); hvm_store_cpu_guest_regs(current, regs, NULL); - eip = regs->eip; - cs = regs->cs; - eflags = regs->eflags; - - vm86 = eflags & X86_EFLAGS_VM ? 1 : 0; - - HVM_DBG_LOG(DBG_LEVEL_IO, - "vmx_io_instruction: vm86 %d, eip=%lx:%lx, " + vm86 = regs->eflags & X86_EFLAGS_VM ? 1 : 0; + df = regs->eflags & X86_EFLAGS_DF ? 1 : 0; + + HVM_DBG_LOG(DBG_LEVEL_IO, "vm86 %d, eip=%x:%lx, " "exit_qualification = %lx", - vm86, cs, eip, exit_qualification); - - if (test_bit(6, &exit_qualification)) + vm86, regs->cs, (unsigned long)regs->eip, exit_qualification); + + if ( test_bit(6, &exit_qualification) ) port = (exit_qualification >> 16) & 0xFFFF; else port = regs->edx & 0xffff; - TRACE_VMEXIT(1, port); + + TRACE_VMEXIT(1,port); + size = (exit_qualification & 7) + 1; dir = test_bit(3, &exit_qualification); /* direction */ - if (test_bit(4, &exit_qualification)) { /* string instruction */ + if ( test_bit(4, &exit_qualification) ) { /* string instruction */ unsigned long addr, count = 1; - int sign = regs->eflags & EF_DF ? -1 : 1; + int sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1; __vmread(GUEST_LINEAR_ADDRESS, &addr); @@ -1145,10 +1173,10 @@ static void vmx_io_instruction(unsigned * In protected mode, guest linear address is invalid if the * selector is null. */ - if (!vm86 && check_for_null_selector(eip)) + if ( !vm86 && check_for_null_selector(regs->eip, inst_len, dir) ) addr = dir == IOREQ_WRITE ? regs->esi : regs->edi; - if (test_bit(5, &exit_qualification)) { /* "rep" prefix */ + if ( test_bit(5, &exit_qualification) ) { /* "rep" prefix */ pio_opp->flags |= REPZ; count = vm86 ? regs->ecx & 0xFFFF : regs->ecx; } @@ -1157,30 +1185,45 @@ static void vmx_io_instruction(unsigned * Handle string pio instructions that cross pages or that * are unaligned. See the comments in hvm_domain.c/handle_mmio() */ - if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK)) { + if ( (addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK) ) { unsigned long value = 0; pio_opp->flags |= OVERLAP; - if (dir == IOREQ_WRITE) - (void)hvm_copy_from_guest_virt(&value, addr, size); - send_pio_req(regs, port, 1, size, value, dir, 0); + + if ( dir == IOREQ_WRITE ) /* OUTS */ + { + if ( hvm_paging_enabled(current) ) + (void)hvm_copy_from_guest_virt(&value, addr, size); + else + (void)hvm_copy_from_guest_phys(&value, addr, size); + } else + pio_opp->addr = addr; + + if ( count == 1 ) + regs->eip += inst_len; + + send_pio_req(port, 1, size, value, dir, df, 0); } else { - if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK)) { - if (sign > 0) + unsigned long last_addr = sign > 0 ? addr + count * size - 1 + : addr - (count - 1) * size; + + if ( (addr & PAGE_MASK) != (last_addr & PAGE_MASK) ) + { + if ( sign > 0 ) count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size; else - count = (addr & ~PAGE_MASK) / size; + count = (addr & ~PAGE_MASK) / size + 1; } else regs->eip += inst_len; - send_pio_req(regs, port, count, size, addr, dir, 1); + send_pio_req(port, count, size, addr, dir, df, 1); } } else { - if (port == 0xe9 && dir == IOREQ_WRITE && size == 1) + if ( port == 0xe9 && dir == IOREQ_WRITE && size == 1 ) hvm_print_line(current, regs->eax); /* guest debug output */ regs->eip += inst_len; - send_pio_req(regs, port, 1, size, regs->eax, dir, 0); + send_pio_req(port, 1, size, regs->eax, dir, df, 0); } } @@ -1280,12 +1323,13 @@ static int vmx_world_restore(struct vcpu * first. */ HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %x", c->cr3); - if ((c->cr3 >> PAGE_SHIFT) > v->domain->max_pages) { + mfn = get_mfn_from_gpfn(c->cr3 >> PAGE_SHIFT); + if ( !VALID_MFN(mfn) ) + { printk("Invalid CR3 value=%x", c->cr3); domain_crash_synchronous(); return 0; } - mfn = get_mfn_from_gpfn(c->cr3 >> PAGE_SHIFT); if(!get_page(mfn_to_page(mfn), v->domain)) return 0; old_base_mfn = pagetable_get_pfn(v->arch.guest_table); @@ -1465,9 +1509,8 @@ static int vmx_set_cr0(unsigned long val * Trying to enable guest paging. * The guest CR3 must be pointing to the guest physical. */ - if ( !VALID_MFN(mfn = get_mfn_from_gpfn( - v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT)) || - !get_page(mfn_to_page(mfn), v->domain) ) + mfn = get_mfn_from_gpfn(v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT); + if ( !VALID_MFN(mfn) || !get_page(mfn_to_page(mfn), v->domain) ) { printk("Invalid CR3 value = %lx (mfn=%lx)\n", v->arch.hvm_vmx.cpu_cr3, mfn); @@ -1628,6 +1671,10 @@ static int mov_to_cr(int gp, int cr, str printk("invalid gp: %d\n", gp); __hvm_bug(regs); } + + TRACE_VMEXIT(1, TYPE_MOV_TO_CR); + TRACE_VMEXIT(2, cr); + TRACE_VMEXIT(3, value); HVM_DBG_LOG(DBG_LEVEL_1, "CR%d, value = %lx", cr, value); @@ -1665,11 +1712,10 @@ static int mov_to_cr(int gp, int cr, str * first. */ HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value); - if ( ((value >> PAGE_SHIFT) > v->domain->max_pages ) || - !VALID_MFN(mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT)) || - !get_page(mfn_to_page(mfn), v->domain) ) + mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT); + if ( !VALID_MFN(mfn) || !get_page(mfn_to_page(mfn), v->domain) ) { - printk("Invalid CR3 value=%lx", value); + printk("Invalid CR3 value=%lx\n", value); domain_crash_synchronous(); /* need to take a clean path */ } old_base_mfn = pagetable_get_pfn(v->arch.guest_table); @@ -1698,15 +1744,13 @@ static int mov_to_cr(int gp, int cr, str /* The guest is a 32-bit PAE guest. */ #if CONFIG_PAGING_LEVELS >= 3 unsigned long mfn, old_base_mfn; - - if ( !VALID_MFN(mfn = get_mfn_from_gpfn( - v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT)) || + mfn = get_mfn_from_gpfn(v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT); + if ( !VALID_MFN(mfn) || !get_page(mfn_to_page(mfn), v->domain) ) { printk("Invalid CR3 value = %lx", v->arch.hvm_vmx.cpu_cr3); domain_crash_synchronous(); /* need to take a clean path */ } - /* * Now arch.guest_table points to machine physical. @@ -1810,6 +1854,10 @@ static void mov_from_cr(int cr, int gp, __hvm_bug(regs); } + TRACE_VMEXIT(1, TYPE_MOV_FROM_CR); + TRACE_VMEXIT(2, cr); + TRACE_VMEXIT(3, value); + HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR%d, value = %lx", cr, value); } @@ -1824,20 +1872,14 @@ static int vmx_cr_access(unsigned long e case TYPE_MOV_TO_CR: gp = exit_qualification & CONTROL_REG_ACCESS_REG; cr = exit_qualification & CONTROL_REG_ACCESS_NUM; - TRACE_VMEXIT(1,TYPE_MOV_TO_CR); - TRACE_VMEXIT(2,cr); - TRACE_VMEXIT(3,gp); return mov_to_cr(gp, cr, regs); case TYPE_MOV_FROM_CR: gp = exit_qualification & CONTROL_REG_ACCESS_REG; cr = exit_qualification & CONTROL_REG_ACCESS_NUM; - TRACE_VMEXIT(1,TYPE_MOV_FROM_CR); - TRACE_VMEXIT(2,cr); - TRACE_VMEXIT(3,gp); mov_from_cr(cr, gp, regs); break; case TYPE_CLTS: - TRACE_VMEXIT(1,TYPE_CLTS); + TRACE_VMEXIT(1, TYPE_CLTS); /* We initialise the FPU now, to avoid needing another vmexit. */ setup_fpu(v); @@ -1852,10 +1894,11 @@ static int vmx_cr_access(unsigned long e __vmwrite(CR0_READ_SHADOW, value); break; case TYPE_LMSW: - TRACE_VMEXIT(1,TYPE_LMSW); __vmread_vcpu(v, CR0_READ_SHADOW, &value); value = (value & ~0xF) | (((exit_qualification & LMSW_SOURCE_DATA) >> 16) & 0xF); + TRACE_VMEXIT(1, TYPE_LMSW); + TRACE_VMEXIT(2, value); return vmx_set_cr0(value); break; default: @@ -1871,7 +1914,7 @@ static inline void vmx_do_msr_read(struc u32 eax, edx; struct vcpu *v = current; - HVM_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read: ecx=%lx, eax=%lx, edx=%lx", + HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%lx, eax=%lx, edx=%lx", (unsigned long)regs->ecx, (unsigned long)regs->eax, (unsigned long)regs->edx); switch (regs->ecx) { @@ -1908,8 +1951,7 @@ static inline void vmx_do_msr_read(struc regs->eax = msr_content & 0xFFFFFFFF; regs->edx = msr_content >> 32; - HVM_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read returns: " - "ecx=%lx, eax=%lx, edx=%lx", + HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%lx, eax=%lx, edx=%lx", (unsigned long)regs->ecx, (unsigned long)regs->eax, (unsigned long)regs->edx); } @@ -1919,7 +1961,7 @@ static inline void vmx_do_msr_write(stru u64 msr_content; struct vcpu *v = current; - HVM_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_write: ecx=%lx, eax=%lx, edx=%lx", + HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%lx, eax=%lx, edx=%lx", (unsigned long)regs->ecx, (unsigned long)regs->eax, (unsigned long)regs->edx); @@ -1947,20 +1989,19 @@ static inline void vmx_do_msr_write(stru break; } - HVM_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_write returns: " - "ecx=%lx, eax=%lx, edx=%lx", + HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%lx, eax=%lx, edx=%lx", (unsigned long)regs->ecx, (unsigned long)regs->eax, (unsigned long)regs->edx); } -void vmx_vmexit_do_hlt(void) +static void vmx_do_hlt(void) { unsigned long rflags; __vmread(GUEST_RFLAGS, &rflags); hvm_hlt(rflags); } -static inline void vmx_vmexit_do_extint(struct cpu_user_regs *regs) +static inline void vmx_do_extint(struct cpu_user_regs *regs) { unsigned int vector; int error; @@ -1981,7 +2022,7 @@ static inline void vmx_vmexit_do_extint( __hvm_bug(regs); vector &= INTR_INFO_VECTOR_MASK; - TRACE_VMEXIT(1,vector); + TRACE_VMEXIT(1, vector); switch(vector) { case LOCAL_TIMER_VECTOR: @@ -2112,7 +2153,7 @@ asmlinkage void vmx_vmexit_handler(struc asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs) { unsigned int exit_reason; - unsigned long exit_qualification, rip, inst_len = 0; + unsigned long exit_qualification, inst_len = 0; struct vcpu *v = current; __vmread(VM_EXIT_REASON, &exit_reason); @@ -2154,7 +2195,7 @@ asmlinkage void vmx_vmexit_handler(struc domain_crash_synchronous(); } - TRACE_VMEXIT(0,exit_reason); + TRACE_VMEXIT(0, exit_reason); switch ( exit_reason ) { @@ -2166,14 +2207,13 @@ asmlinkage void vmx_vmexit_handler(struc * (2) NMI */ unsigned int vector; - unsigned long va; if ( __vmread(VM_EXIT_INTR_INFO, &vector) || !(vector & INTR_INFO_VALID_MASK) ) domain_crash_synchronous(); vector &= INTR_INFO_VECTOR_MASK; - TRACE_VMEXIT(1,vector); + TRACE_VMEXIT(1, vector); perfc_incra(cause_vector, vector); switch ( vector ) { @@ -2229,11 +2269,11 @@ asmlinkage void vmx_vmexit_handler(struc } case TRAP_page_fault: { - __vmread(EXIT_QUALIFICATION, &va); + __vmread(EXIT_QUALIFICATION, &exit_qualification); __vmread(VM_EXIT_INTR_ERROR_CODE, ®s->error_code); TRACE_VMEXIT(3, regs->error_code); - TRACE_VMEXIT(4, va); + TRACE_VMEXIT(4, exit_qualification); HVM_DBG_LOG(DBG_LEVEL_VMMU, "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx", @@ -2241,13 +2281,13 @@ asmlinkage void vmx_vmexit_handler(struc (unsigned long)regs->ecx, (unsigned long)regs->edx, (unsigned long)regs->esi, (unsigned long)regs->edi); - if ( !vmx_do_page_fault(va, regs) ) + if ( !vmx_do_page_fault(exit_qualification, regs) ) { /* Inject #PG using Interruption-Information Fields. */ vmx_inject_hw_exception(v, TRAP_page_fault, regs->error_code); - v->arch.hvm_vmx.cpu_cr2 = va; - TRACE_3D(TRC_VMX_INT, v->domain->domain_id, - TRAP_page_fault, va); + v->arch.hvm_vmx.cpu_cr2 = exit_qualification; + TRACE_3D(TRC_VMX_INTR, v->domain->domain_id, + TRAP_page_fault, exit_qualification); } break; } @@ -2261,7 +2301,7 @@ asmlinkage void vmx_vmexit_handler(struc break; } case EXIT_REASON_EXTERNAL_INTERRUPT: - vmx_vmexit_do_extint(regs); + vmx_do_extint(regs); break; case EXIT_REASON_TRIPLE_FAULT: domain_crash_synchronous(); @@ -2278,39 +2318,35 @@ asmlinkage void vmx_vmexit_handler(struc case EXIT_REASON_CPUID: inst_len = __get_instruction_length(); /* Safe: CPUID */ __update_guest_eip(inst_len); - vmx_vmexit_do_cpuid(regs); + vmx_do_cpuid(regs); break; case EXIT_REASON_HLT: inst_len = __get_instruction_length(); /* Safe: HLT */ __update_guest_eip(inst_len); - vmx_vmexit_do_hlt(); + vmx_do_hlt(); break; case EXIT_REASON_INVLPG: { - unsigned long va; inst_len = __get_instruction_length(); /* Safe: INVLPG */ __update_guest_eip(inst_len); - __vmread(EXIT_QUALIFICATION, &va); - vmx_vmexit_do_invlpg(va); + __vmread(EXIT_QUALIFICATION, &exit_qualification); + vmx_do_invlpg(exit_qualification); + TRACE_VMEXIT(4, exit_qualification); break; } case EXIT_REASON_VMCALL: { inst_len = __get_instruction_length(); /* Safe: VMCALL */ __update_guest_eip(inst_len); - __vmread(GUEST_RIP, &rip); - __vmread(EXIT_QUALIFICATION, &exit_qualification); hvm_do_hypercall(regs); break; } case EXIT_REASON_CR_ACCESS: { - __vmread(GUEST_RIP, &rip); __vmread(EXIT_QUALIFICATION, &exit_qualification); inst_len = __get_instruction_length(); /* Safe: MOV Cn, LMSW, CLTS */ if ( vmx_cr_access(exit_qualification, regs) ) __update_guest_eip(inst_len); - TRACE_VMEXIT(3, regs->error_code); TRACE_VMEXIT(4, exit_qualification); break; } @@ -2322,17 +2358,23 @@ asmlinkage void vmx_vmexit_handler(struc __vmread(EXIT_QUALIFICATION, &exit_qualification); inst_len = __get_instruction_length(); /* Safe: IN, INS, OUT, OUTS */ vmx_io_instruction(exit_qualification, inst_len); - TRACE_VMEXIT(4,exit_qualification); + TRACE_VMEXIT(4, exit_qualification); break; case EXIT_REASON_MSR_READ: inst_len = __get_instruction_length(); /* Safe: RDMSR */ __update_guest_eip(inst_len); vmx_do_msr_read(regs); + TRACE_VMEXIT(1, regs->ecx); + TRACE_VMEXIT(2, regs->eax); + TRACE_VMEXIT(3, regs->edx); break; case EXIT_REASON_MSR_WRITE: inst_len = __get_instruction_length(); /* Safe: WRMSR */ __update_guest_eip(inst_len); vmx_do_msr_write(regs); + TRACE_VMEXIT(1, regs->ecx); + TRACE_VMEXIT(2, regs->eax); + TRACE_VMEXIT(3, regs->edx); break; case EXIT_REASON_MWAIT_INSTRUCTION: case EXIT_REASON_MONITOR_INSTRUCTION: @@ -2366,26 +2408,25 @@ asmlinkage void vmx_load_cr2(void) asm volatile("mov %0,%%cr2": :"r" (v->arch.hvm_vmx.cpu_cr2)); } -asmlinkage void vmx_trace_vmentry (void) -{ - TRACE_5D(TRC_VMX_VMENTRY, +asmlinkage void vmx_trace_vmentry(void) +{ + TRACE_5D(TRC_VMX_VMENTRY + current->vcpu_id, this_cpu(trace_values)[0], this_cpu(trace_values)[1], this_cpu(trace_values)[2], this_cpu(trace_values)[3], this_cpu(trace_values)[4]); - TRACE_VMEXIT(0,9); - TRACE_VMEXIT(1,9); - TRACE_VMEXIT(2,9); - TRACE_VMEXIT(3,9); - TRACE_VMEXIT(4,9); - return; + + TRACE_VMEXIT(0, 0); + TRACE_VMEXIT(1, 0); + TRACE_VMEXIT(2, 0); + TRACE_VMEXIT(3, 0); + TRACE_VMEXIT(4, 0); } asmlinkage void vmx_trace_vmexit (void) { - TRACE_3D(TRC_VMX_VMEXIT,0,0,0); - return; + TRACE_3D(TRC_VMX_VMEXIT + current->vcpu_id, 0, 0, 0); } /* diff -r d5a46e4cc340 -r 6492b9b27968 xen/arch/x86/i387.c --- a/xen/arch/x86/i387.c Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/arch/x86/i387.c Sun Oct 22 15:23:52 2006 -0600 @@ -14,6 +14,7 @@ #include <asm/processor.h> #include <asm/hvm/support.h> #include <asm/i387.h> +#include <asm/asm_defns.h> void init_fpu(void) { @@ -41,11 +42,11 @@ void save_init_fpu(struct vcpu *v) #else /* __x86_64__ */ /* * The only way to force fxsaveq on a wide range of gas versions. On - * older versions the rex64 prefix works only if we force an addressing - * mode that doesn't require extended registers. + * older versions the rex64 prefix works only if we force an + * addressing mode that doesn't require extended registers. */ __asm__ __volatile__ ( - "rex64/fxsave (%1)" + REX64_PREFIX "fxsave (%1)" : "=m" (*fpu_ctxt) : "cdaSDb" (fpu_ctxt) ); #endif @@ -95,7 +96,7 @@ void restore_fpu(struct vcpu *v) "1: fxrstor %0 \n" #else /* __x86_64__ */ /* See above for why the operands/constraints are this way. */ - "1: rex64/fxrstor (%2) \n" + "1: " REX64_PREFIX "fxrstor (%2)\n" #endif ".section .fixup,\"ax\" \n" "2: push %%"__OP"ax \n" diff -r d5a46e4cc340 -r 6492b9b27968 xen/arch/x86/io_apic.c --- a/xen/arch/x86/io_apic.c Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/arch/x86/io_apic.c Sun Oct 22 15:23:52 2006 -0600 @@ -269,13 +269,10 @@ static void set_ioapic_affinity_irq(unsi int pin; struct irq_pin_list *entry = irq_2_pin + irq; unsigned int apicid_value; - cpumask_t tmp; - - cpus_and(tmp, cpumask, cpu_online_map); - if (cpus_empty(tmp)) - tmp = TARGET_CPUS; - - cpus_and(cpumask, tmp, CPU_MASK_ALL); + + cpus_and(cpumask, cpumask, cpu_online_map); + if (cpus_empty(cpumask)) + cpumask = TARGET_CPUS; apicid_value = cpu_mask_to_apicid(cpumask); /* Prepare to do the io_apic_write */ diff -r d5a46e4cc340 -r 6492b9b27968 xen/arch/x86/irq.c --- a/xen/arch/x86/irq.c Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/arch/x86/irq.c Sun Oct 22 15:23:52 2006 -0600 @@ -450,7 +450,7 @@ int pirq_guest_bind(struct vcpu *v, int action->in_flight = 0; action->shareable = will_share; action->ack_type = pirq_acktype(irq); - action->cpu_eoi_map = CPU_MASK_NONE; + cpus_clear(action->cpu_eoi_map); desc->depth = 0; desc->status |= IRQ_GUEST; diff -r d5a46e4cc340 -r 6492b9b27968 xen/arch/x86/mm/shadow/common.c --- a/xen/arch/x86/mm/shadow/common.c Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/arch/x86/mm/shadow/common.c Sun Oct 22 15:23:52 2006 -0600 @@ -283,11 +283,8 @@ __shadow_validate_guest_entry(struct vcp if ( page->shadow_flags & SHF_L2H_PAE ) result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2he, 3, 3) (v, gmfn, entry, size); - if ( page->shadow_flags & SHF_L3_PAE ) - result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl3e, 3, 3) - (v, gmfn, entry, size); #else /* 32-bit non-PAE hypervisor does not support PAE guests */ - ASSERT((page->shadow_flags & (SHF_L3_PAE|SHF_L2_PAE|SHF_L1_PAE)) == 0); + ASSERT((page->shadow_flags & (SHF_L2H_PAE|SHF_L2_PAE|SHF_L1_PAE)) == 0); #endif #if CONFIG_PAGING_LEVELS >= 4 @@ -343,8 +340,11 @@ shadow_validate_guest_pt_write(struct vc if ( rc & SHADOW_SET_ERROR ) { /* This page is probably not a pagetable any more: tear it out of the - * shadows, along with any tables that reference it */ - shadow_remove_all_shadows_and_parents(v, gmfn); + * shadows, along with any tables that reference it. + * Since the validate call above will have made a "safe" (i.e. zero) + * shadow entry, we can let the domain live even if we can't fully + * unshadow the page. */ + sh_remove_shadows(v, gmfn, 0, 0); } } @@ -424,22 +424,16 @@ shadow_validate_guest_pt_write(struct vc /* Allocating shadow pages * ----------------------- * - * Most shadow pages are allocated singly, but there are two cases where we - * need to allocate multiple pages together. - * - * 1: Shadowing 32-bit guest tables on PAE or 64-bit shadows. - * A 32-bit guest l1 table covers 4MB of virtuial address space, - * and needs to be shadowed by two PAE/64-bit l1 tables (covering 2MB - * of virtual address space each). Similarly, a 32-bit guest l2 table - * (4GB va) needs to be shadowed by four PAE/64-bit l2 tables (1GB va - * each). These multi-page shadows are contiguous and aligned; - * functions for handling offsets into them are defined in shadow.c - * (shadow_l1_index() etc.) + * Most shadow pages are allocated singly, but there is one case where + * we need to allocate multiple pages together: shadowing 32-bit guest + * tables on PAE or 64-bit shadows. A 32-bit guest l1 table covers 4MB + * of virtuial address space, and needs to be shadowed by two PAE/64-bit + * l1 tables (covering 2MB of virtual address space each). Similarly, a + * 32-bit guest l2 table (4GB va) needs to be shadowed by four + * PAE/64-bit l2 tables (1GB va each). These multi-page shadows are + * contiguous and aligned; functions for handling offsets into them are + * defined in shadow.c (shadow_l1_index() etc.) * - * 2: Shadowing PAE top-level pages. Each guest page that contains - * any PAE top-level pages requires two shadow pages to shadow it. - * They contain alternating l3 tables and pae_l3_bookkeeping structs. - * * This table shows the allocation behaviour of the different modes: * * Xen paging 32b pae pae 64b 64b 64b @@ -449,7 +443,7 @@ shadow_validate_guest_pt_write(struct vc * * sl1 size 4k 8k 4k 8k 4k 4k * sl2 size 4k 16k 4k 16k 4k 4k - * sl3 size - - 8k - 8k 4k + * sl3 size - - - - - 4k * sl4 size - - - - - 4k * * We allocate memory from xen in four-page units and break them down @@ -503,7 +497,6 @@ shadow_order(u32 shadow_type) 0, /* PGC_SH_fl1_pae_shadow */ 0, /* PGC_SH_l2_pae_shadow */ 0, /* PGC_SH_l2h_pae_shadow */ - 1, /* PGC_SH_l3_pae_shadow */ 0, /* PGC_SH_l1_64_shadow */ 0, /* PGC_SH_fl1_64_shadow */ 0, /* PGC_SH_l2_64_shadow */ @@ -546,7 +539,8 @@ void shadow_unhook_mappings(struct vcpu #endif break; #if CONFIG_PAGING_LEVELS >= 3 - case PGC_SH_l3_pae_shadow >> PGC_SH_type_shift: + case PGC_SH_l2_pae_shadow >> PGC_SH_type_shift: + case PGC_SH_l2h_pae_shadow >> PGC_SH_type_shift: SHADOW_INTERNAL_NAME(sh_unhook_pae_mappings,3,3)(v,smfn); break; #endif @@ -573,13 +567,18 @@ void shadow_prealloc(struct domain *d, u { /* Need a vpcu for calling unpins; for now, since we don't have * per-vcpu shadows, any will do */ - struct vcpu *v = d->vcpu[0]; + struct vcpu *v, *v2; struct list_head *l, *t; struct page_info *pg; + cpumask_t flushmask = CPU_MASK_NONE; mfn_t smfn; if ( chunk_is_available(d, order) ) return; + v = current; + if ( v->domain != d ) + v = d->vcpu[0]; + /* Stage one: walk the list of top-level pages, unpinning them */ perfc_incrc(shadow_prealloc_1); list_for_each_backwards_safe(l, t, &d->arch.shadow.toplevel_shadows) @@ -587,18 +586,8 @@ void shadow_prealloc(struct domain *d, u pg = list_entry(l, struct page_info, list); smfn = page_to_mfn(pg); -#if CONFIG_PAGING_LEVELS >= 3 - if ( (pg->count_info & PGC_SH_type_mask) == PGC_SH_l3_pae_shadow ) - { - /* For PAE, we need to unpin each subshadow on this shadow */ - SHADOW_INTERNAL_NAME(sh_unpin_all_l3_subshadows,3,3)(v, smfn); - } - else -#endif /* 32-bit code always takes this branch */ - { - /* Unpin this top-level shadow */ - sh_unpin(v, smfn); - } + /* Unpin this top-level shadow */ + sh_unpin(v, smfn); /* See if that freed up a chunk of appropriate size */ if ( chunk_is_available(d, order) ) return; @@ -608,24 +597,30 @@ void shadow_prealloc(struct domain *d, u * loaded in cr3 on some vcpu. Walk them, unhooking the non-Xen * mappings. */ perfc_incrc(shadow_prealloc_2); - v = current; - if ( v->domain != d ) - v = d->vcpu[0]; - /* Walk the list from the tail: recently used toplevels have been pulled - * to the head */ list_for_each_backwards_safe(l, t, &d->arch.shadow.toplevel_shadows) { pg = list_entry(l, struct page_info, list); smfn = page_to_mfn(pg); shadow_unhook_mappings(v, smfn); - /* Need to flush TLB if we've altered our own tables */ - if ( !shadow_mode_external(d) - && pagetable_get_pfn(current->arch.shadow_table) == mfn_x(smfn) ) - local_flush_tlb(); - + /* Remember to flush TLBs: we have removed shadow entries that + * were in use by some vcpu(s). */ + for_each_vcpu(d, v2) + { + if ( pagetable_get_pfn(v2->arch.shadow_table[0]) == mfn_x(smfn) + || pagetable_get_pfn(v2->arch.shadow_table[1]) == mfn_x(smfn) + || pagetable_get_pfn(v2->arch.shadow_table[2]) == mfn_x(smfn) + || pagetable_get_pfn(v2->arch.shadow_table[3]) == mfn_x(smfn) + ) + cpus_or(flushmask, v2->vcpu_dirty_cpumask, flushmask); + } + /* See if that freed up a chunk of appropriate size */ - if ( chunk_is_available(d, order) ) return; + if ( chunk_is_available(d, order) ) + { + flush_tlb_mask(flushmask); + return; + } } /* Nothing more we can do: all remaining shadows are of pages that @@ -732,6 +727,15 @@ void shadow_free(struct domain *d, mfn_t for ( i = 0; i < 1<<order; i++ ) { +#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC + struct vcpu *v; + for_each_vcpu(d, v) + { + /* No longer safe to look for a writeable mapping in this shadow */ + if ( v->arch.shadow.last_writeable_pte_smfn == mfn_x(smfn) + i ) + v->arch.shadow.last_writeable_pte_smfn = 0; + } +#endif /* Strip out the type: this is now a free shadow page */ pg[i].count_info = 0; /* Remember the TLB timestamp so we will know whether to flush @@ -920,9 +924,20 @@ p2m_next_level(struct domain *d, mfn_t * #if CONFIG_PAGING_LEVELS == 3 if (type == PGT_l2_page_table) { + struct vcpu *v; /* We have written to the p2m l3: need to sync the per-vcpu * copies of it in the monitor tables */ p2m_install_entry_in_monitors(d, (l3_pgentry_t *)p2m_entry); + /* Also, any vcpus running on shadows of the p2m need to + * reload their CR3s so the change propagates to the shadow */ + ASSERT(shadow_lock_is_acquired(d)); + for_each_vcpu(d, v) + { + if ( pagetable_get_pfn(v->arch.guest_table) + == pagetable_get_pfn(d->arch.phys_table) + && v->arch.shadow.mode != NULL ) + v->arch.shadow.mode->update_cr3(v); + } } #endif /* The P2M can be shadowed: keep the shadows synced */ @@ -1711,9 +1726,6 @@ void sh_destroy_shadow(struct vcpu *v, m case PGC_SH_l2h_pae_shadow >> PGC_SH_type_shift: SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, 3, 3)(v, smfn); break; - case PGC_SH_l3_pae_shadow >> PGC_SH_type_shift: - SHADOW_INTERNAL_NAME(sh_destroy_l3_shadow, 3, 3)(v, smfn); - break; #endif #if CONFIG_PAGING_LEVELS >= 4 @@ -1768,7 +1780,6 @@ int shadow_remove_write_access(struct vc #endif NULL, /* l2_pae */ NULL, /* l2h_pae */ - NULL, /* l3_pae */ #if CONFIG_PAGING_LEVELS >= 4 SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* l1_64 */ SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* fl1_64 */ @@ -1825,12 +1836,11 @@ int shadow_remove_write_access(struct vc unsigned long gfn; /* Heuristic: there is likely to be only one writeable mapping, * and that mapping is likely to be in the current pagetable, - * either in the guest's linear map (linux, windows) or in a - * magic slot used to map high memory regions (linux HIGHTPTE) */ + * in the guest's linear map (on non-HIGHPTE linux and windows)*/ #define GUESS(_a, _h) do { \ - if ( v->arch.shadow.mode->guess_wrmap(v, (_a), gmfn) ) \ - perfc_incrc(shadow_writeable_h_ ## _h); \ + if ( v->arch.shadow.mode->guess_wrmap(v, (_a), gmfn) ) \ + perfc_incrc(shadow_writeable_h_ ## _h); \ if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 ) \ return 1; \ } while (0) @@ -1880,9 +1890,35 @@ int shadow_remove_write_access(struct vc #endif /* CONFIG_PAGING_LEVELS >= 3 */ #undef GUESS - - } -#endif + } + + if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 ) + return 1; + + /* Second heuristic: on HIGHPTE linux, there are two particular PTEs + * (entries in the fixmap) where linux maps its pagetables. Since + * we expect to hit them most of the time, we start the search for + * the writeable mapping by looking at the same MFN where the last + * brute-force search succeeded. */ + + if ( v->arch.shadow.last_writeable_pte_smfn != 0 ) + { + unsigned long old_count = (pg->u.inuse.type_info & PGT_count_mask); + mfn_t last_smfn = _mfn(v->arch.shadow.last_writeable_pte_smfn); + int shtype = (mfn_to_page(last_smfn)->count_info & PGC_SH_type_mask) + >> PGC_SH_type_shift; + + if ( callbacks[shtype] ) + callbacks[shtype](v, last_smfn, gmfn); + + if ( (pg->u.inuse.type_info & PGT_count_mask) != old_count ) + perfc_incrc(shadow_writeable_h_5); + } + + if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 ) + return 1; + +#endif /* SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC */ /* Brute-force search of all the shadows, by walking the hash */ perfc_incrc(shadow_writeable_bf); @@ -1932,7 +1968,6 @@ int shadow_remove_all_mappings(struct vc #endif NULL, /* l2_pae */ NULL, /* l2h_pae */ - NULL, /* l3_pae */ #if CONFIG_PAGING_LEVELS >= 4 SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* l1_64 */ SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* fl1_64 */ @@ -2005,7 +2040,8 @@ static int sh_remove_shadow_via_pointer( ASSERT((pg->count_info & PGC_SH_type_mask) > 0); ASSERT((pg->count_info & PGC_SH_type_mask) < PGC_SH_max_shadow); ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l2_32_shadow); - ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l3_pae_shadow); + ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l2_pae_shadow); + ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l2h_pae_shadow); ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l4_64_shadow); if (pg->up == 0) return 0; @@ -2034,7 +2070,6 @@ static int sh_remove_shadow_via_pointer( case PGC_SH_l1_pae_shadow: case PGC_SH_l2_pae_shadow: case PGC_SH_l2h_pae_shadow: - case PGC_SH_l3_pae_shadow: SHADOW_INTERNAL_NAME(sh_clear_shadow_entry,3,3)(v, vaddr, pmfn); break; #if CONFIG_PAGING_LEVELS >= 4 @@ -2058,17 +2093,20 @@ static int sh_remove_shadow_via_pointer( return rc; } -void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int all) +void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all) /* Remove the shadows of this guest page. - * If all != 0, find all shadows, if necessary by walking the tables. - * Otherwise, just try the (much faster) heuristics, which will remove - * at most one reference to each shadow of the page. */ + * If fast != 0, just try the quick heuristic, which will remove + * at most one reference to each shadow of the page. Otherwise, walk + * all the shadow tables looking for refs to shadows of this gmfn. + * If all != 0, kill the domain if we can't find all the shadows. + * (all != 0 implies fast == 0) + */ { struct page_info *pg; mfn_t smfn; u32 sh_flags; unsigned char t; - + /* Dispatch table for getting per-type functions: each level must * be called with the function to remove a lower-level shadow. */ static hash_callback_t callbacks[16] = { @@ -2085,11 +2123,9 @@ void sh_remove_shadows(struct vcpu *v, m #if CONFIG_PAGING_LEVELS >= 3 SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,3), /* l2_pae */ SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,3), /* l2h_pae */ - SHADOW_INTERNAL_NAME(sh_remove_l2_shadow,3,3), /* l3_pae */ #else NULL, /* l2_pae */ NULL, /* l2h_pae */ - NULL, /* l3_pae */ #endif NULL, /* l1_64 */ NULL, /* fl1_64 */ @@ -2115,9 +2151,8 @@ void sh_remove_shadows(struct vcpu *v, m ((1 << (PGC_SH_l2h_pae_shadow >> PGC_SH_type_shift)) | (1 << (PGC_SH_l2_pae_shadow >> PGC_SH_type_shift))), /* l1_pae */ 0, /* fl1_pae */ - 1 << (PGC_SH_l3_pae_shadow >> PGC_SH_type_shift), /* l2_pae */ - 1 << (PGC_SH_l3_pae_shadow >> PGC_SH_type_shift), /* l2h_pae */ - 0, /* l3_pae */ + 0, /* l2_pae */ + 0, /* l2h_pae */ 1 << (PGC_SH_l2_64_shadow >> PGC_SH_type_shift), /* l1_64 */ 0, /* fl1_64 */ 1 << (PGC_SH_l3_64_shadow >> PGC_SH_type_shift), /* l2_64 */ @@ -2128,6 +2163,7 @@ void sh_remove_shadows(struct vcpu *v, m }; ASSERT(shadow_lock_is_acquired(v->domain)); + ASSERT(!(all && fast)); pg = mfn_to_page(gmfn); @@ -2147,29 +2183,26 @@ void sh_remove_shadows(struct vcpu *v, m * call will remove at most one shadow, and terminate immediately when * it does remove it, so we never walk the hash after doing a deletion. */ #define DO_UNSHADOW(_type) do { \ - t = (_type) >> PGC_SH_type_shift; \ - smfn = shadow_hash_lookup(v, mfn_x(gmfn), t); \ - if ( !sh_remove_shadow_via_pointer(v, smfn) && all ) \ + t = (_type) >> PGC_SH_type_shift; \ + smfn = shadow_hash_lookup(v, mfn_x(gmfn), t); \ + if ( !sh_remove_shadow_via_pointer(v, smfn) && !fast ) \ hash_foreach(v, masks[t], callbacks, smfn); \ } while (0) /* Top-level shadows need to be unpinned */ -#define DO_UNPIN(_type) do { \ +#define DO_UNPIN(_type) do { \ t = (_type) >> PGC_SH_type_shift; \ smfn = shadow_hash_lookup(v, mfn_x(gmfn), t); \ if ( mfn_to_page(smfn)->count_info & PGC_SH_pinned ) \ sh_unpin(v, smfn); \ - if ( (_type) == PGC_SH_l3_pae_shadow ) \ - SHADOW_INTERNAL_NAME(sh_unpin_all_l3_subshadows,3,3)(v, smfn); \ } while (0) if ( sh_flags & SHF_L1_32 ) DO_UNSHADOW(PGC_SH_l1_32_shadow); if ( sh_flags & SHF_L2_32 ) DO_UNPIN(PGC_SH_l2_32_shadow); #if CONFIG_PAGING_LEVELS >= 3 if ( sh_flags & SHF_L1_PAE ) DO_UNSHADOW(PGC_SH_l1_pae_shadow); - if ( sh_flags & SHF_L2_PAE ) DO_UNSHADOW(PGC_SH_l2_pae_shadow); - if ( sh_flags & SHF_L2H_PAE ) DO_UNSHADOW(PGC_SH_l2h_pae_shadow); - if ( sh_flags & SHF_L3_PAE ) DO_UNPIN(PGC_SH_l3_pae_shadow); + if ( sh_flags & SHF_L2_PAE ) DO_UNPIN(PGC_SH_l2_pae_shadow); + if ( sh_flags & SHF_L2H_PAE ) DO_UNPIN(PGC_SH_l2h_pae_shadow); #if CONFIG_PAGING_LEVELS >= 4 if ( sh_flags & SHF_L1_64 ) DO_UNSHADOW(PGC_SH_l1_64_shadow); if ( sh_flags & SHF_L2_64 ) DO_UNSHADOW(PGC_SH_l2_64_shadow); @@ -2181,21 +2214,19 @@ void sh_remove_shadows(struct vcpu *v, m #undef DO_UNSHADOW #undef DO_UNPIN - -#if CONFIG_PAGING_LEVELS > 2 - /* We may have caused some PAE l3 entries to change: need to - * fix up the copies of them in various places */ - if ( sh_flags & (SHF_L2_PAE|SHF_L2H_PAE) ) - sh_pae_recopy(v->domain); -#endif - /* If that didn't catch the shadows, something is wrong */ - if ( all && (pg->count_info & PGC_page_table) ) - { - SHADOW_ERROR("can't find all shadows of mfn %05lx (shadow_flags=%08x)\n", + if ( !fast && (pg->count_info & PGC_page_table) ) + { + SHADOW_ERROR("can't find all shadows of mfn %05lx " + "(shadow_flags=%08x)\n", mfn_x(gmfn), pg->shadow_flags); - domain_crash(v->domain); - } + if ( all ) + domain_crash(v->domain); + } + + /* Need to flush TLBs now, so that linear maps are safe next time we + * take a fault. */ + flush_tlb_mask(v->domain->domain_dirty_cpumask); } void @@ -2681,7 +2712,7 @@ int shadow_test_enable(struct domain *d) if ( shadow_mode_enabled(d) ) { SHADOW_ERROR("Don't support enabling test mode" - "on already shadowed doms\n"); + " on already shadowed doms\n"); ret = -EINVAL; goto out; } @@ -2754,7 +2785,7 @@ static int shadow_log_dirty_enable(struc if ( shadow_mode_enabled(d) ) { SHADOW_ERROR("Don't (yet) support enabling log-dirty" - "on already shadowed doms\n"); + " on already shadowed doms\n"); ret = -EINVAL; goto out; } @@ -3118,7 +3149,6 @@ void shadow_audit_tables(struct vcpu *v) SHADOW_INTERNAL_NAME(sh_audit_fl1_table,3,3), /* fl1_pae */ SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,3), /* l2_pae */ SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,3), /* l2h_pae */ - SHADOW_INTERNAL_NAME(sh_audit_l3_table,3,3), /* l3_pae */ #if CONFIG_PAGING_LEVELS >= 4 SHADOW_INTERNAL_NAME(sh_audit_l1_table,4,4), /* l1_64 */ SHADOW_INTERNAL_NAME(sh_audit_fl1_table,4,4), /* fl1_64 */ @@ -3143,7 +3173,7 @@ void shadow_audit_tables(struct vcpu *v) { case 2: mask = (SHF_L1_32|SHF_FL1_32|SHF_L2_32); break; case 3: mask = (SHF_L1_PAE|SHF_FL1_PAE|SHF_L2_PAE - |SHF_L2H_PAE|SHF_L3_PAE); break; + |SHF_L2H_PAE); break; case 4: mask = (SHF_L1_64|SHF_FL1_64|SHF_L2_64 |SHF_L3_64|SHF_L4_64); break; default: BUG(); diff -r d5a46e4cc340 -r 6492b9b27968 xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Sun Oct 22 14:39:15 2006 -0600 +++ b/xen/arch/x86/mm/shadow/multi.c Sun Oct 22 15:23:52 2006 -0600 @@ -20,20 +20,6 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -// DESIGN QUESTIONS: -// Why use subshadows for PAE guests? -// - reduces pressure in the hash table -// - reduces shadow size (64-vs-4096 bytes of shadow for 32 bytes of guest L3) -// - would need to find space in the page_info to store 7 more bits of -// backpointer -// - independent shadows of 32 byte chunks makes it non-obvious how to quickly -// figure out when to demote the guest page from l3 status -// -// PAE Xen HVM guests are restricted to 8GB of pseudo-physical address space. -// - Want to map the P2M table into the 16MB RO_MPT hole in Xen's address -// space for both PV and HVM guests. -// #include <xen/config.h> #include <xen/types.h> @@ -118,9 +104,6 @@ static char *fetch_type_names[] = { #endif /* XXX forward declarations */ -#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3) -static unsigned long hvm_pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab, int clear_res); -#endif static inline void sh_update_linear_entries(struct vcpu *v); /**************************************************************************/ @@ -129,8 +112,6 @@ static inline void sh_update_linear_entr * Normal case: maps the mfn of a guest page to the mfn of its shadow page. * FL1's: maps the *gfn* of the start of a superpage to the mfn of a * shadow L1 which maps its "splinters". - * PAE CR3s: maps the 32-byte aligned, 32-bit CR3 value to the mfn of the - * PAE L3 info page for that CR3 value. */ static inline mfn_t @@ -215,7 +196,6 @@ delete_fl1_shadow_status(struct vcpu *v, { SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n", gfn_x(gfn), PGC_SH_fl1_shadow, mfn_x(smfn)); - shadow_hash_delete(v, gfn_x(gfn), PGC_SH_fl1_shadow >> PGC_SH_type_shift, smfn); } @@ -429,18 +409,16 @@ static void sh_audit_gw(struct vcpu *v, if ( !(SHADOW_AUDIT_ENABLE) ) return; -#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */ #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ if ( valid_mfn(gw->l4mfn) && valid_mfn((smfn = get_shadow_status(v, gw->l4mfn, PGC_SH_l4_shadow))) ) (void) sh_audit_l4_table(v, smfn, _mfn(INVALID_MFN)); -#endif /* PAE or 64... */ if ( valid_mfn(gw->l3mfn) && valid_mfn((smfn = get_shadow_status(v, gw->l3mfn, PGC_SH_l3_shadow))) ) (void) sh_audit_l3_table(v, smfn, _mfn(INVALID_MFN)); -#endif /* All levels... */ +#endif /* PAE or 64... */ if ( valid_mfn(gw->l2mfn) ) { if ( valid_mfn((smfn = get_shadow_status(v, gw->l2mfn, @@ -498,8 +476,7 @@ static u32 guest_set_ad_bits(struct vcpu flags = guest_l1e_get_flags(*ep); /* PAE l3s do not have A and D bits */ - if ( unlikely(GUEST_PAGING_LEVELS == 3 && level == 3) ) - return flags; + ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3); /* Need the D bit as well for writes, in L1es and PSE L2es. */ if ( ft == ft_demand_write @@ -646,37 +623,13 @@ shadow_l2_index(mfn_t *smfn, u32 guest_i #endif } -#if GUEST_PAGING_LEVELS >= 3 +#if GUEST_PAGING_LEVELS >= 4 static inline u32 shadow_l3_index(mfn_t *smfn, u32 guest_index) { -#if GUEST_PAGING_LEVELS == 3 - u32 group_id; - - // Because we use twice the space in L3 shadows as was consumed in guest - // L3s, the number of guest entries per shadow page is - // SHADOW_L2_PAGETABLE_ENTRIES/2. (Note this is *not* - // SHADOW_L3_PAGETABLE_ENTRIES, which in this case is 4...) - // - *smfn = _mfn(mfn_x(*smfn) + - (guest_index / (SHADOW_L2_PAGETABLE_ENTRIES / 2))); - - // We store PAE L3 shadows in groups of 4, alternating shadows and - // pae_l3_bookkeeping structs. So the effective shadow index is - // the the group_id * 8 + the offset within the group. - // - guest_index %= (SHADOW_L2_PAGETABLE_ENTRIES / 2); - group_id = guest_index / 4; - return (group_id * 8) + (guest_index % 4); -#else return guest_index; -#endif -} - -#endif // GUEST_PAGING_LEVELS >= 3 - -#if GUEST_PAGING_LEVELS >= 4 +} static inline u32 shadow_l4_index(mfn_t *smfn, u32 guest_index) @@ -722,6 +675,9 @@ do { u32 pass_thru_flags; u32 sflags; + /* We don't shadow PAE l3s */ + ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3); + // XXX -- might want to think about PAT support for HVM guests... #ifndef NDEBUG @@ -757,29 +713,16 @@ do { if ( guest_entry_ptr && (ft & FETCH_TYPE_DEMAND) ) gflags = guest_set_ad_bits(v, gmfn, guest_entry_ptr, level, ft); - // PAE does not allow NX, RW, USER, ACCESSED, or DIRTY bits in its L3e's... - // - if ( (SHADOW_PAGING_LEVELS == 3) && (level == 3) ) - pass_thru_flags = _PAGE_PRESENT; - else - { - pass_thru_flags = (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER | - _PAGE_RW | _PAGE_PRESENT); - if ( guest_supports_nx(v) ) - pass_thru_flags |= _PAGE_NX_BIT; - } - - // PAE guests can not put NX, RW, USER, ACCESSED, or DIRTY bits into their - // L3e's; they are all implied. So we emulate them here. - // - if ( (GUEST_PAGING_LEVELS == 3) && (level == 3) ) - gflags = pass_thru_flags; // Propagate bits from the guest to the shadow. // Some of these may be overwritten, below. // Since we know the guest's PRESENT bit is set, we also set the shadow's // SHADOW_PRESENT bit. // + pass_thru_flags = (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER | + _PAGE_RW | _PAGE_PRESENT); + if ( guest_supports_nx(v) ) + pass_thru_flags |= _PAGE_NX_BIT; sflags = (gflags & pass_thru_flags) | _PAGE_SHADOW_PRESENT; // Copy the guest's RW bit into the SHADOW_RW bit. @@ -800,8 +743,7 @@ do { // If the A or D bit has not yet been set in the guest, then we must // prevent the corresponding kind of access. // - if ( unlikely(!((GUEST_PAGING_LEVELS == 3) && (level == 3)) && - !(gflags & _PAGE_ACCESSED)) ) + if ( unlikely(!(gflags & _PAGE_ACCESSED)) ) sflags &= ~_PAGE_PRESENT; /* D bits exist in L1es and PSE L2es */ @@ -890,9 +832,7 @@ l4e_propagate_from_guest(struct vcpu *v, fetch_type_names[ft], gl4e->l4, sl4p->l4); ASSERT(sflags != -1); } -#endif // GUEST_PAGING_LEVELS >= 4 - -#if GUEST_PAGING_LEVELS >= 3 + static void l3e_propagate_from_guest(struct vcpu *v, guest_l3e_t *gl3e, @@ -912,7 +852,7 @@ l3e_propagate_from_guest(struct vcpu *v, fetch_type_names[ft], gl3e->l3, sl3p->l3); ASSERT(sflags != -1); } -#endif // GUEST_PAGING_LEVELS >= 3 +#endif // GUEST_PAGING_LEVELS >= 4 static void l2e_propagate_from_guest(struct vcpu *v, @@ -1081,9 +1021,6 @@ shadow_write_entries(void *d, void *s, i safe_write_entry(dst++, src++); if ( map != NULL ) sh_unmap_domain_page(map); - - /* XXX TODO: - * Update min/max field in page_info struct of this mfn */ } static inline int @@ -1195,9 +1132,7 @@ static int shadow_set_l4e(struct vcpu *v } return flags; } -#endif /* GUEST_PAGING_LEVELS >= 4 */ - -#if GUEST_PAGING_LEVELS >= 3 + static int shadow_set_l3e(struct vcpu *v, shadow_l3e_t *sl3e, shadow_l3e_t new_sl3e, @@ -1224,28 +1159,6 @@ static int shadow_set_l3e(struct vcpu *v shadow_write_entries(sl3e, &new_sl3e, 1, sl3mfn); flags |= SHADOW_SET_CHANGED; -#if GUEST_PAGING_LEVELS == 3 - /* We wrote a guest l3e in a PAE pagetable. This table is copied in - * the linear pagetable entries of its l2s, and may also be copied - * to a low memory location to make it fit in CR3. Report that we - * need to resync those copies (we can't wait for the guest to flush - * the TLB because it might be an increase in rights). */ - { - struct vcpu *vcpu; - - struct pae_l3_bookkeeping *info = sl3p_to_info(sl3e); - for_each_vcpu(v->domain, vcpu) - { - if (info->vcpus & (1 << vcpu->vcpu_id)) - { - // Remember that this flip/update needs to occur. - vcpu->arch.shadow.pae_flip_pending = 1; - flags |= SHADOW_SET_L3PAE_RECOPY; - } - } - } -#endif - if ( shadow_l3e_get_flags(old_sl3e) & _PAGE_PRESENT ) { /* We lost a reference to an old mfn. */ @@ -1260,7 +1173,7 @@ static int shadow_set_l3e(struct vcpu *v } return flags; } -#endif /* GUEST_PAGING_LEVELS >= 3 */ +#endif /* GUEST_PAGING_LEVELS >= 4 */ static int shadow_set_l2e(struct vcpu *v, shadow_l2e_t *sl2e, @@ -1535,51 +1448,7 @@ do { #endif /* different kinds of l2 */ -#if GUEST_PAGING_LEVELS == 3 - -/* PAE l3 subshadow: touch all entries (FOREACH_L2E will find Xen l2es). */ -#define SHADOW_FOREACH_L3E_SUB(_sl3e, _gl3p, _done, _code) \ -do { \ - int _i; \ - for ( _i = 0; _i < 4; _i++ ) \ - { \ - if ( shadow_l3e_get_flags(*(_sl3e)) & _PAGE_PRESENT ) \ - {_code} \ - if ( _done ) break; \ - _sl3e++; \ - increment_ptr_to_guest_entry(_gl3p); \ - } \ -} while (0) - -/* PAE l3 full shadow: call subshadow walk on all valid l3 subshadows */ -#define SHADOW_FOREACH_L3E(_sl3mfn, _sl3e, _gl3p, _done, _code) \ -do { \ - int _i, _j, _k, __done = 0; \ - ASSERT((mfn_to_page(_sl3mfn)->count_info & PGC_SH_type_mask) \ - == PGC_SH_l3_pae_shadow); \ - /* The subshadows are split, 64 on each page of the shadow */ \ - for ( _j = 0; _j < 2 && !__done; _j++ ) \ - { \ - void *_sp = sh_map_domain_page(_sl3mfn); \ - for ( _i = 0; _i < 64; _i++ ) \ - { \ - /* Every second 32-byte region is a bookkeeping entry */ \ - _sl3e = (shadow_l3e_t *)(_sp + (64 * _i)); \ - if ( (sl3p_to_info(_sl3e))->refcount > 0 ) \ - SHADOW_FOREACH_L3E_SUB(_sl3e, _gl3p, \ - ({ __done = (_done); __done; }), \ - _code); \ - else \ - for ( _k = 0 ; _k < 4 ; _k++ ) \ - increment_ptr_to_guest_entry(_gl3p); \ - if ( __done ) break; \ - } \ - sh_unmap_domain_page(_sp); \ - _sl3mfn = _mfn(mfn_x(_sl3mfn) + 1); \ - } \ -} while (0) - -#elif GUEST_PAGING_LEVELS == 4 +#if GUEST_PAGING_LEVELS == 4 /* 64-bit l3: touch all entries */ #define SHADOW_FOREACH_L3E(_sl3mfn, _sl3e, _gl3p, _done, _code) \ @@ -1711,8 +1580,8 @@ void sh_install_xen_entries_in_l2h(struc /* We don't set up a linear mapping here because we can't until this * l2h is installed in an l3e. sh_update_linear_entries() handles - * the linear mappings when the l3 is loaded. We zero them here, just as - * a safety measure. + * the linear mappings when CR3 (and so the fourth l3e) is loaded. + * We zero them here, just as a safety measure. */ for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ ) sl2e[shadow_l2_table_offset(LINEAR_PT_VIRT_START) + i] = @@ -1739,37 +1608,6 @@ void sh_install_xen_entries_in_l2h(struc } sh_unmap_domain_page(sl2e); -} - -void sh_install_xen_entries_in_l3(struct vcpu *v, mfn_t gl3mfn, mfn_t sl3mfn) -{ - shadow_l3e_t *sl3e; - guest_l3e_t *gl3e = v->arch.guest_vtable; - shadow_l3e_t new_sl3e; - gfn_t l2gfn; - mfn_t l2gmfn, l2smfn; - int r; - - ASSERT(!shadow_mode_external(v->domain)); - ASSERT(guest_l3e_get_flags(gl3e[3]) & _PAGE_PRESENT); - l2gfn = guest_l3e_get_gfn(gl3e[3]); - l2gmfn = sh_gfn_to_mfn(v->domain, gfn_x(l2gfn)); - l2smfn = get_shadow_status(v, l2gmfn, PGC_SH_l2h_shadow); - if ( !valid_mfn(l2smfn) ) - { - /* must remove write access to this page before shadowing it */ - // XXX -- should check to see whether this is better with level==0 or - // level==2... - if ( shadow_remove_write_access(v, l2gmfn, 2, 0xc0000000ul) != 0 ) - flush_tlb_mask(v->domain->domain_dirty_cpumask); - - l2smfn = sh_make_shadow(v, l2gmfn, PGC_SH_l2h_shadow); - } - l3e_propagate_from_guest(v, &gl3e[3], gl3mfn, l2smfn, &new_sl3e, - ft_prefetch); - sl3e = sh_map_domain_page(sl3mfn); - r = shadow_set_l3e(v, &sl3e[3], new_sl3e, sl3mfn); - sh_unmap_domain_page(sl3e); } #endif @@ -1827,8 +1665,6 @@ void sh_install_xen_entries_in_l2(struct - - /**************************************************************************/ /* Create a shadow of a given guest page. */ @@ -1839,7 +1675,10 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf SHADOW_DEBUG(MAKE_SHADOW, "(%05lx, %u)=>%05lx\n", mfn_x(gmfn), shadow_type, mfn_x(smfn)); - if ( shadow_type != PGC_SH_guest_root_type ) + if ( shadow_type != PGC_SH_l2_32_shadow + && shadow_type != PGC_SH_l2_pae_shadow + && shadow_type != PGC_SH_l2h_pae_shadow + && shadow_type != PGC_SH_l4_64_shadow ) /* Lower-level shadow, not yet linked form a higher level */ mfn_to_page(smfn)->up = 0; @@ -1853,8 +1692,6 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf sh_install_xen_entries_in_l4(v, gmfn, smfn); break; #endif #if CONFIG_PAGING_LEVELS == 3 && GUEST_PAGING_LEVELS == 3 - case PGC_SH_l3_shadow: - sh_install_xen_entries_in_l3(v, gmfn, smfn); break; case PGC_SH_l2h_shadow: sh_install_xen_entries_in_l2h(v, smfn); break; #endif @@ -1988,20 +1825,16 @@ static shadow_l4e_t * shadow_get_and_cre mfn_t *sl4mfn) { /* There is always a shadow of the top level table. Get it. */ - *sl4mfn = pagetable_get_mfn(v->arch.shadow_table); + *sl4mfn = pagetable_get_mfn(v->arch.shadow_table[0]); /* Reading the top level table is always valid. */ return sh_linear_l4_table(v) + shadow_l4_linear_offset(gw->va); } -#endif /* GUEST_PAGING_LEVELS >= 4 */ - - -#if GUEST_PAGING_LEVELS >= 3 + static shadow_l3e_t * shadow_get_and_create_l3e(struct vcpu *v, walk_t *gw, mfn_t *sl3mfn, fetch_type_t ft) { -#if GUEST_PAGING_LEVELS >= 4 /* 64bit... */ mfn_t sl4mfn; shadow_l4e_t *sl4e; if ( !valid_mfn(gw->l3mfn) ) return NULL; /* No guest page. */ @@ -2032,19 +1865,8 @@ static shadow_l3e_t * shadow_get_and_cre } /* Now follow it down a level. Guaranteed to succeed. */ return sh_linear_l3_table(v) + shadow_l3_linear_offset(gw->va); -#else /* PAE... */ - /* There is always a shadow of the top level table. Get it. */ - *sl3mfn = pagetable_get_mfn(v->arch.shadow_table); - /* This next line is important: the shadow l3 table is in an 8k - * shadow and we need to return the right mfn of the pair. This call - * will set it for us as a side-effect. */ - (void) shadow_l3_index(sl3mfn, guest_index(gw->l3e)); - ASSERT(v->arch.shadow_vtable); - return ((shadow_l3e_t *)v->arch.shadow_vtable) - + shadow_l3_table_offset(gw->va); +} #endif /* GUEST_PAGING_LEVELS >= 4 */ -} -#endif /* GUEST_PAGING_LEVELS >= 3 */ static shadow_l2e_t * shadow_get_and_create_l2e(struct vcpu *v, @@ -2052,7 +1874,7 @@ static shadow_l2e_t * shadow_get_and_cre mfn_t *sl2mfn, fetch_type_t ft) { -#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64bit... */ +#if GUEST_PAGING_LEVELS >= 4 /* 64bit... */ mfn_t sl3mfn = _mfn(INVALID_MFN); shadow_l3e_t *sl3e; if ( !valid_mfn(gw->l2mfn) ) return NULL; /* No guest page. */ @@ -2080,17 +1902,22 @@ static shadow_l2e_t * shadow_get_and_cre *sl2mfn, &new_sl3e, ft); r = shadow_set_l3e(v, sl3e, new_sl3e, sl3mfn); ASSERT((r & SHADOW_SET_FLUSH) == 0); -#if GUEST_PAGING_LEVELS == 3 - /* Need to sync up the linear maps, as we are about to use them */ - ASSERT( r & SHADOW_SET_L3PAE_RECOPY ); - sh_pae_recopy(v->domain); -#endif } /* Now follow it down a level. Guaranteed to succeed. */ + return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va); +#elif GUEST_PAGING_LEVELS == 3 /* PAE... */ + /* We never demand-shadow PAE l3es: they are only created in + * sh_update_cr3(). Check if the relevant sl3e is present. */ + shadow_l3e_t *sl3e = ((shadow_l3e_t *)&v->arch.shadow.l3table) + + shadow_l3_linear_offset(gw->va); + if ( !(shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) ) + return NULL; + *sl2mfn = shadow_l3e_get_mfn(*sl3e); + ASSERT(valid_mfn(*sl2mfn)); return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va); #else /* 32bit... */ /* There is always a shadow of the top level table. Get it. */ - *sl2mfn = pagetable_get_mfn(v->arch.shadow_table); + *sl2mfn = pagetable_get_mfn(v->arch.shadow_table[0]); /* This next line is important: the guest l2 has a 16k * shadow, we need to return the right mfn of the four. This * call will set it for us as a side-effect. */ @@ -2213,9 +2040,7 @@ void sh_destroy_l4_shadow(struct vcpu *v /* Put the memory back in the pool */ shadow_free(v->domain, smfn); } -#endif - -#if GUEST_PAGING_LEVELS >= 3 + void sh_destroy_l3_shadow(struct vcpu *v, mfn_t smfn) { shadow_l3e_t *sl3e; @@ -2230,10 +2055,6 @@ void sh_destroy_l3_shadow(struct vcpu *v gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info); delete_shadow_status(v, gmfn, t, smfn); shadow_demote(v, gmfn, t); -#if GUEST_PAGING_LEVELS == 3 - /* Take this shadow off the list of root shadows */ - list_del_init(&mfn_to_page(smfn)->list); -#endif /* Decrement refcounts of all the old entries */ sl3mfn = smfn; @@ -2247,53 +2068,8 @@ void sh_destroy_l3_shadow(struct vcpu *v /* Put the memory back in the pool */ shadow_free(v->domain, smfn); } -#endif - - -#if GUEST_PAGING_LEVELS == 3 -static void sh_destroy_l3_subshadow(struct vcpu *v, - shadow_l3e_t *sl3e) -/* Tear down just a single 4-entry l3 on a 2-page l3 shadow. */ -{ - int i; - mfn_t sl3mfn = _mfn(maddr_from_mapped_domain_page(sl3e) >> PAGE_SHIFT); - ASSERT((unsigned long)sl3e % (4 * sizeof (shadow_l3e_t)) == 0); - for ( i = 0; i < GUEST_L3_PAGETABLE_ENTRIES; i++ ) - if ( shadow_l3e_get_flags(sl3e[i]) & _PAGE_PRESENT ) - shadow_set_l3e(v, &sl3e[i], shadow_l3e_empty(), sl3mfn); -} -#endif - -#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3) -void sh_unpin_all_l3_subshadows(struct vcpu *v, mfn_t smfn) -/* Walk a full PAE l3 shadow, unpinning all of the subshadows on it */ -{ - int i, j; - struct pae_l3_bookkeeping *bk; - - ASSERT((mfn_to_page(smfn)->count_info & PGC_SH_type_mask) - == PGC_SH_l3_pae_shadow); - /* The subshadows are split, 64 on each page of the shadow */ - for ( i = 0; i < 2; i++ ) - { - void *p = sh_map_domain_page(_mfn(mfn_x(smfn) + i)); - for ( j = 0; j < 64; j++ ) - { - /* Every second 32-byte region is a bookkeeping entry */ - bk = (struct pae_l3_bookkeeping *)(p + (64 * j) + 32); - if ( bk->pinned ) - sh_unpin_l3_subshadow(v, (shadow_l3e_t *)(p + (64*j)), smfn); - /* Check whether we've just freed the whole shadow */ - if ( (mfn_to_page(smfn)->count_info & PGC_SH_count_mask) == 0 ) - { - sh_unmap_domain_page(p); - return; - } - } - sh_unmap_domain_page(p); - } -} -#endif +#endif /* GUEST_PAGING_LEVELS >= 4 */ + void sh_destroy_l2_shadow(struct vcpu *v, mfn_t smfn) { @@ -2311,7 +2087,7 @@ void sh_destroy_l2_shadow(struct vcpu *v _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |