[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] [merge] with xen-unstable
# HG changeset patch # User Christian Limpach <Christian.Limpach@xxxxxxxxxxxxx> # Date 1169133487 0 # Node ID 8475a4e0425ed158923d9849a8e5a6821e8bdb34 # Parent 3464bb656a9c4428713bdf18b2bfb94e922f9d74 # Parent 8e79d8d87ecd371c3b1881be1fcdccfcdcf50b22 [merge] with xen-unstable --- extras/mini-os/minios-x86_32.lds | 45 extras/mini-os/minios-x86_64.lds | 54 extras/mini-os/x86_32.S | 287 -- extras/mini-os/x86_64.S | 385 --- linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/kmap_types.h | 31 patches/linux-2.6.18/ipv6-no-autoconf.patch | 18 tools/xm-test/lib/XmTestLib/XenManagedDomain.py | 177 - .hgignore | 4 Makefile | 9 buildconfigs/linux-defconfig_xen0_x86_32 | 1 buildconfigs/linux-defconfig_xenU_x86_32 | 1 buildconfigs/linux-defconfig_xen_x86_32 | 1 docs/man/xm.pod.1 | 3 docs/man/xmdomain.cfg.pod.5 | 49 docs/xen-api/wire-protocol.tex | 2 extras/mini-os/Makefile | 160 - extras/mini-os/arch/x86/Makefile | 29 extras/mini-os/arch/x86/arch.mk | 28 extras/mini-os/arch/x86/minios-x86_32.lds | 45 extras/mini-os/arch/x86/minios-x86_64.lds | 54 extras/mini-os/arch/x86/x86_32.S | 287 ++ extras/mini-os/arch/x86/x86_64.S | 385 +++ extras/mini-os/gnttab.c | 36 extras/mini-os/include/hypervisor.h | 1 extras/mini-os/include/netfront.h | 2 extras/mini-os/include/x86/x86_32/hypercall-x86_32.h | 8 extras/mini-os/include/x86/x86_64/hypercall-x86_64.h | 8 extras/mini-os/include/xenbus.h | 3 extras/mini-os/kernel.c | 11 extras/mini-os/minios.mk | 62 extras/mini-os/netfront.c | 455 ++++ extras/mini-os/xenbus/xenbus.c | 86 linux-2.6-xen-sparse/arch/i386/Kconfig | 2 linux-2.6-xen-sparse/arch/i386/kernel/microcode-xen.c | 2 linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c | 19 linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c | 74 linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c | 18 linux-2.6-xen-sparse/arch/i386/mm/highmem-xen.c | 1 linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c | 30 linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c | 16 linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c | 32 linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c | 32 linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c | 4 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/dma-mapping.h | 6 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgalloc.h | 2 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/swiotlb.h | 4 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgalloc.h | 87 linux-2.6-xen-sparse/kernel/kexec.c | 8 patches/linux-2.6.18/series | 1 tools/check/check_udev | 4 tools/examples/vtpm-common.sh | 4 tools/examples/xen-network-common.sh | 5 tools/examples/xmexample1 | 34 tools/examples/xmexample2 | 34 tools/examples/xmexample3 | 34 tools/libfsimage/common/fsimage.c | 2 tools/libfsimage/common/fsimage_grub.c | 2 tools/libfsimage/common/fsimage_plugin.c | 8 tools/libfsimage/common/fsimage_plugin.h | 5 tools/libfsimage/common/mapfile-GNU | 3 tools/libfsimage/common/mapfile-SunOS | 3 tools/libfsimage/ext2fs-lib/ext2fs-lib.c | 2 tools/libxc/xc_hvm_build.c | 3 tools/libxc/xc_linux_build.c | 2 tools/libxc/xc_linux_restore.c | 207 +- tools/libxc/xc_linux_save.c | 7 tools/libxc/xc_load_elf.c | 14 tools/libxc/xc_ptrace.c | 18 tools/pygrub/src/pygrub | 280 +- tools/python/xen/xend/XendBootloader.py | 6 tools/python/xen/xend/XendCheckpoint.py | 8 tools/python/xen/xend/XendConfig.py | 10 tools/python/xen/xend/XendDomain.py | 8 tools/python/xen/xend/XendDomainInfo.py | 37 tools/python/xen/xend/XendNode.py | 10 tools/python/xen/xend/osdep.py | 5 tools/python/xen/xend/server/blkif.py | 1 tools/python/xen/xm/create.py | 20 tools/tests/Makefile | 13 tools/tests/blowfish.c | 439 ++++ tools/tests/blowfish.mk | 23 tools/tests/test_x86_emulator.c | 134 + tools/xenstat/xentop/xentop.c | 2 tools/xm-test/README | 43 tools/xm-test/configure.ac | 1 tools/xm-test/grouptest/xapi | 1 tools/xm-test/lib/XmTestLib/DomainTracking.py | 61 tools/xm-test/lib/XmTestLib/XenAPIDomain.py | 176 + tools/xm-test/lib/XmTestLib/XenDomain.py | 28 tools/xm-test/lib/XmTestLib/Xm.py | 2 tools/xm-test/lib/XmTestLib/xapi.py | 79 tools/xm-test/ramdisk/Makefile.am | 13 tools/xm-test/ramdisk/skel/etc/init.d/rcS | 11 tools/xm-test/runtest.sh | 8 tools/xm-test/tests/sched-credit/01_sched_credit_weight_cap_pos.py | 33 tools/xm-test/tests/vtpm/09_vtpm-xapi.py | 99 tools/xm-test/tests/xapi/01_xapi-vm_basic.py | 61 tools/xm-test/tests/xapi/Makefile.am | 19 unmodified_drivers/linux-2.6/platform-pci/platform-pci.c | 34 unmodified_drivers/linux-2.6/platform-pci/platform-pci.h | 4 xen/arch/ia64/linux-xen/unaligned.c | 2 xen/arch/ia64/xen/xenmisc.c | 4 xen/arch/x86/domain.c | 2 xen/arch/x86/hvm/hpet.c | 2 xen/arch/x86/hvm/hvm.c | 2 xen/arch/x86/hvm/i8254.c | 14 xen/arch/x86/hvm/irq.c | 186 + xen/arch/x86/hvm/svm/svm.c | 4 xen/arch/x86/hvm/vioapic.c | 11 xen/arch/x86/hvm/vmx/vmcs.c | 9 xen/arch/x86/hvm/vmx/vmx.c | 140 - xen/arch/x86/hvm/vmx/x86_32/exits.S | 33 xen/arch/x86/hvm/vmx/x86_64/exits.S | 29 xen/arch/x86/microcode.c | 9 xen/arch/x86/mm.c | 116 - xen/arch/x86/mm/shadow/common.c | 27 xen/arch/x86/mm/shadow/multi.c | 6 xen/arch/x86/mm/shadow/private.h | 5 xen/arch/x86/oprofile/nmi_int.c | 2 xen/arch/x86/setup.c | 25 xen/arch/x86/traps.c | 18 xen/arch/x86/x86_64/compat/mm.c | 20 xen/arch/x86/x86_emulate.c | 1027 ++++++++-- xen/common/elf.c | 2 xen/common/kexec.c | 122 - xen/common/keyhandler.c | 13 xen/common/lib.c | 644 ++---- xen/common/symbols.c | 3 xen/common/xencomm.c | 4 xen/drivers/video/vga.c | 11 xen/include/Makefile | 52 xen/include/asm-powerpc/byteorder.h | 80 xen/include/asm-x86/byteorder.h | 36 xen/include/asm-x86/guest_access.h | 18 xen/include/asm-x86/hvm/irq.h | 16 xen/include/asm-x86/x86_32/kexec.h | 1 xen/include/asm-x86/x86_64/kexec.h | 1 xen/include/asm-x86/x86_emulate.h | 23 xen/include/public/elfnote.h | 9 xen/include/public/hvm/params.h | 22 xen/include/xen/byteorder/big_endian.h | 106 + xen/include/xen/byteorder/generic.h | 68 xen/include/xen/byteorder/little_endian.h | 106 + xen/include/xen/byteorder/swab.h | 185 + xen/include/xen/config.h | 2 xen/include/xen/elfcore.h | 57 xen/include/xen/types.h | 7 xen/tools/compat-build-header.py | 21 xen/tools/compat-build-source.py | 27 xen/tools/get-fields.sh | 53 150 files changed, 5874 insertions(+), 2633 deletions(-) diff -r 3464bb656a9c -r 8475a4e0425e .hgignore --- a/.hgignore Thu Jan 18 09:54:33 2007 +0000 +++ b/.hgignore Thu Jan 18 15:18:07 2007 +0000 @@ -58,7 +58,7 @@ ^docs/xen-api/xenapi-datamodel-graph.eps$ ^extras/mini-os/h/hypervisor-ifs$ ^extras/mini-os/h/xen-public$ -^extras/mini-os/mini-os\..*$ +^extras/mini-os/mini-os.*$ ^install/.*$ ^linux-[^/]*-native/.*$ ^linux-[^/]*-xen/.*$ @@ -142,6 +142,8 @@ ^tools/python/build/.*$ ^tools/security/secpol_tool$ ^tools/security/xen/.*$ +^tools/tests/blowfish\.bin$ +^tools/tests/blowfish\.h$ ^tools/tests/test_x86_emulator$ ^tools/vnet/Make.local$ ^tools/vnet/build/.*$ diff -r 3464bb656a9c -r 8475a4e0425e Makefile --- a/Makefile Thu Jan 18 09:54:33 2007 +0000 +++ b/Makefile Thu Jan 18 15:18:07 2007 +0000 @@ -2,18 +2,15 @@ # Grand Unified Makefile for Xen. # -# Export target architecture overrides to Xen and Linux sub-trees. -ifneq ($(XEN_TARGET_ARCH),) -SUBARCH := $(subst x86_32,i386,$(XEN_TARGET_ARCH)) -export XEN_TARGET_ARCH SUBARCH XEN_SYSTYPE -endif - # Default target must appear before any include lines .PHONY: all all: dist export XEN_ROOT=$(CURDIR) include Config.mk + +SUBARCH := $(subst x86_32,i386,$(XEN_TARGET_ARCH)) +export XEN_TARGET_ARCH SUBARCH XEN_SYSTYPE include buildconfigs/Rules.mk ifeq ($(XEN_TARGET_X86_PAE),y) diff -r 3464bb656a9c -r 8475a4e0425e buildconfigs/linux-defconfig_xen0_x86_32 --- a/buildconfigs/linux-defconfig_xen0_x86_32 Thu Jan 18 09:54:33 2007 +0000 +++ b/buildconfigs/linux-defconfig_xen0_x86_32 Thu Jan 18 15:18:07 2007 +0000 @@ -177,6 +177,7 @@ CONFIG_FLAT_NODE_MEM_MAP=y # CONFIG_SPARSEMEM_STATIC is not set CONFIG_SPLIT_PTLOCK_CPUS=4096 CONFIG_RESOURCES_64BIT=y +# CONFIG_HIGHPTE is not set CONFIG_MTRR=y # CONFIG_REGPARM is not set CONFIG_SECCOMP=y diff -r 3464bb656a9c -r 8475a4e0425e buildconfigs/linux-defconfig_xenU_x86_32 --- a/buildconfigs/linux-defconfig_xenU_x86_32 Thu Jan 18 09:54:33 2007 +0000 +++ b/buildconfigs/linux-defconfig_xenU_x86_32 Thu Jan 18 15:18:07 2007 +0000 @@ -174,6 +174,7 @@ CONFIG_FLAT_NODE_MEM_MAP=y # CONFIG_SPARSEMEM_STATIC is not set CONFIG_SPLIT_PTLOCK_CPUS=4096 CONFIG_RESOURCES_64BIT=y +# CONFIG_HIGHPTE is not set # CONFIG_REGPARM is not set CONFIG_SECCOMP=y CONFIG_HZ_100=y diff -r 3464bb656a9c -r 8475a4e0425e buildconfigs/linux-defconfig_xen_x86_32 --- a/buildconfigs/linux-defconfig_xen_x86_32 Thu Jan 18 09:54:33 2007 +0000 +++ b/buildconfigs/linux-defconfig_xen_x86_32 Thu Jan 18 15:18:07 2007 +0000 @@ -182,6 +182,7 @@ CONFIG_FLAT_NODE_MEM_MAP=y # CONFIG_SPARSEMEM_STATIC is not set CONFIG_SPLIT_PTLOCK_CPUS=4096 CONFIG_RESOURCES_64BIT=y +# CONFIG_HIGHPTE is not set CONFIG_MTRR=y CONFIG_REGPARM=y CONFIG_SECCOMP=y diff -r 3464bb656a9c -r 8475a4e0425e docs/man/xm.pod.1 --- a/docs/man/xm.pod.1 Thu Jan 18 09:54:33 2007 +0000 +++ b/docs/man/xm.pod.1 Thu Jan 18 15:18:07 2007 +0000 @@ -451,6 +451,7 @@ make the man page more readable): xen_minor : 0 xen_extra : -devel xen_caps : xen-3.0-x86_32 + xen_scheduler : credit xen_pagesize : 4096 platform_params : virt_start=0xfc000000 xen_changeset : Mon Nov 14 18:13:38 2005 +0100 @@ -460,7 +461,7 @@ make the man page more readable): cc_compile_by : sdague cc_compile_domain : (none) cc_compile_date : Mon Nov 14 14:16:48 EST 2005 - xend_config_format : 2 + xend_config_format : 3 B<FIELDS> diff -r 3464bb656a9c -r 8475a4e0425e docs/man/xmdomain.cfg.pod.5 --- a/docs/man/xmdomain.cfg.pod.5 Thu Jan 18 09:54:33 2007 +0000 +++ b/docs/man/xmdomain.cfg.pod.5 Thu Jan 18 15:18:07 2007 +0000 @@ -135,6 +135,55 @@ one will be randomly chosen by xen with =back +=item B<vfb> + +A virtual frame buffer stanza in the form: + + vfb = [ "stanza" ] + +The stanza specifies a set of I<name = value> options separated by +commas, in the form: "name1=value1, name2=value2, ..." + +B<OPTIONS> + +=over 4 + +=item I<type> + +There are currently two valid options: I<vnc> starts a VNC server that +lets you connect an external VNC viewer, and I<sdl> starts an internal +viewer. + +=item I<vncdisplay> + +The VNC display number to use, defaults to the domain ID. The +VNC server listens on port 5900 + display number. + +=item I<vnclisten> + +The listening address for the VNC server, default 127.0.0.1. + +=item I<vncunused> + +If non-zero, the VNC server listens on the first unused port above +5900. + +=item I<vncpasswd> + +Overrides the XenD configured default password. + +=item I<display> + +Display to use for the internal viewer, defaults to environment +variable I<DISPLAY>. + +=item I<xauthority> + +Authority file to use for the internal viewer, defaults to environment +variable I<XAUTHORITY>. + +=back + =back =head1 ADDITIONAL OPTIONS diff -r 3464bb656a9c -r 8475a4e0425e docs/xen-api/wire-protocol.tex --- a/docs/xen-api/wire-protocol.tex Thu Jan 18 09:54:33 2007 +0000 +++ b/docs/xen-api/wire-protocol.tex Thu Jan 18 15:18:07 2007 +0000 @@ -153,7 +153,7 @@ you must login and initiate a session. F \end{verbatim} Where {\tt uname} and {\tt password} refer to your username and password respectively, as defined by the Xen administrator. -The {\tt session\_id} returned by {\tt session.login_with_password} is passed +The {\tt session\_id} returned by {\tt session.login\_with\_password} is passed to subequent RPC calls as an authentication token. A session can be terminated with the {\tt session.logout} function: diff -r 3464bb656a9c -r 8475a4e0425e extras/mini-os/Makefile --- a/extras/mini-os/Makefile Thu Jan 18 09:54:33 2007 +0000 +++ b/extras/mini-os/Makefile Thu Jan 18 15:18:07 2007 +0000 @@ -1,106 +1,88 @@ debug ?= y -debug ?= y +# Common Makefile for mini-os. +# +# Every architecture directory below mini-os/arch has to have a +# Makefile and a arch.mk. +# + pae ?= n XEN_ROOT = ../.. include $(XEN_ROOT)/Config.mk +XEN_INTERFACE_VERSION := 0x00030204 +export XEN_INTERFACE_VERSION + # Set TARGET_ARCH -override TARGET_ARCH := $(XEN_TARGET_ARCH) +override TARGET_ARCH := $(XEN_TARGET_ARCH) -XEN_INTERFACE_VERSION := 0x00030203 +# Set mini-os root path, used in mini-os.mk. +MINI-OS_ROOT=$(PWD) +export MINI-OS_ROOT -# NB. '-Wcast-qual' is nasty, so I omitted it. -CFLAGS := -fno-builtin -Wall -Werror -Wredundant-decls -Wno-format -CFLAGS += -Wstrict-prototypes -Wnested-externs -Wpointer-arith -Winline -CFLAGS += -D__XEN_INTERFACE_VERSION__=$(XEN_INTERFACE_VERSION) +# Try to find out the architecture family TARGET_ARCH_FAM. +# First check whether x86_... is contained (for x86_32, x86_32y, x86_64). +# If not x86 then use $(TARGET_ARCH) -> for ia64, ... +ifeq ($(findstring x86_,$(TARGET_ARCH)),x86_) +TARGET_ARCH_FAM = x86 +else +TARGET_ARCH_FAM = $(TARGET_ARCH) +endif -ASFLAGS = -D__ASSEMBLY__ +# The architecture family directory below mini-os. +TARGET_ARCH_DIR := arch/$(TARGET_ARCH_FAM) -LDLIBS = -L. -lminios -LDFLAGS := -N -T minios-$(TARGET_ARCH).lds +# Export these variables for possible use in architecture dependent makefiles. +export TARGET_ARCH +export TARGET_ARCH_DIR +export TARGET_ARCH_FAM -# For possible special source directories. -EXTRA_SRC = +# This is used for architecture specific links. +# This can be overwritten from arch specific rules. +ARCH_LINKS = + # For possible special header directories. +# This can be overwritten from arch specific rules. EXTRA_INC = -# Standard name for architecture specific subdirectories. -TARGET_ARCH_DIR = $(TARGET_ARCH) -# This is used for architecture specific links. -ARCH_LINKS = +# Special build dependencies. +# Build all after touching this/these file(s) (see minios.mk) +SPEC_DEPENDS = minios.mk -ifeq ($(TARGET_ARCH),x86_32) -CFLAGS += -m32 -march=i686 -LDFLAGS += -m elf_i386 -TARGET_ARCH_DIR = x86 -EXTRA_INC += $(TARGET_ARCH_DIR)/$(TARGET_ARCH) -EXTRA_SRC += arch/$(EXTRA_INC) -endif +# Include the architecture family's special makerules. +# This must be before include minios.mk! +include $(TARGET_ARCH_DIR)/arch.mk -ifeq ($(TARGET_ARCH)$(pae),x86_32y) -CFLAGS += -DCONFIG_X86_PAE=1 -ASFLAGS += -DCONFIG_X86_PAE=1 -TARGET_ARCH_DIR = x86 -EXTRA_INC += $(TARGET_ARCH_DIR)/$(TARGET_ARCH) -EXTRA_SRC += arch/$(EXTRA_INC) -endif +# Include common mini-os makerules. +include minios.mk -ifeq ($(TARGET_ARCH),x86_64) -CFLAGS += -m64 -mno-red-zone -fpic -fno-reorder-blocks -CFLAGS += -fno-asynchronous-unwind-tables -LDFLAGS += -m elf_x86_64 -TARGET_ARCH_DIR = x86 -EXTRA_INC += $(TARGET_ARCH_DIR)/$(TARGET_ARCH) -EXTRA_SRC += arch/$(EXTRA_INC) -endif +# Define some default flags for linking. +LDLIBS := +LDFLAGS := +LDARCHLIB := -L$(TARGET_ARCH_DIR) -l$(ARCH_LIB_NAME) +LDFLAGS_FINAL := -N -T $(TARGET_ARCH_DIR)/minios-$(TARGET_ARCH).lds -ifeq ($(TARGET_ARCH),ia64) -CFLAGS += -mfixed-range=f2-f5,f12-f15,f32-f127 -mconstant-gp -ASFLAGS += -x assembler-with-cpp -Wall -ASFLAGS += -mfixed-range=f2-f5,f12-f15,f32-f127 -fomit-frame-pointer -ASFLAGS += -fno-builtin -fno-common -fno-strict-aliasing -mconstant-gp -ARCH_LINKS = IA64_LINKS # Special link on ia64 needed -define arch_links -[ -e include/ia64/asm-xsi-offsets.h ] || ln -sf ../../../../xen/include/asm-ia64/asm-xsi-offsets.h include/ia64/asm-xsi-offsets.h -endef -endif - -ifeq ($(debug),y) -CFLAGS += -g -else -CFLAGS += -O3 -endif - -# Add the special header directories to the include paths. -extra_incl := $(foreach dir,$(EXTRA_INC),-Iinclude/$(dir)) -override CPPFLAGS := -Iinclude $(CPPFLAGS) -Iinclude/$(TARGET_ARCH_DIR) $(extra_incl) +# Prefix for global API names. All other symbols are localised before +# linking with EXTRA_OBJS. +GLOBAL_PREFIX := xenos_ +EXTRA_OBJS = TARGET := mini-os -HEAD := $(TARGET_ARCH).o +# Subdirectories common to mini-os +SUBDIRS := lib xenbus console + +# The common mini-os objects to build. OBJS := $(patsubst %.c,%.o,$(wildcard *.c)) OBJS += $(patsubst %.c,%.o,$(wildcard lib/*.c)) OBJS += $(patsubst %.c,%.o,$(wildcard xenbus/*.c)) OBJS += $(patsubst %.c,%.o,$(wildcard console/*.c)) -OBJS += $(patsubst %.S,%.o,$(wildcard arch/$(TARGET_ARCH_DIR)/*.S)) -OBJS += $(patsubst %.c,%.o,$(wildcard arch/$(TARGET_ARCH_DIR)/*.c)) -# For special wanted source directories. -extra_objs := $(foreach dir,$(EXTRA_SRC),$(patsubst %.c,%.o,$(wildcard $(dir)/*.c))) -OBJS += $(extra_objs) -extra_objs := $(foreach dir,$(EXTRA_SRC),$(patsubst %.S,%.o,$(wildcard $(dir)/*.S))) -OBJS += $(extra_objs) -HDRS := $(wildcard include/*.h) -HDRS += $(wildcard include/xen/*.h) -HDRS += $(wildcard include/$(TARGET_ARCH_DIR)/*.h) -# For special wanted header directories. -extra_heads := $(foreach dir,$(EXTRA_INC),$(wildcard $(dir)/*.h)) -HDRS += $(extra_heads) .PHONY: default default: $(TARGET) -# Create special architecture specific links. +# Create special architecture specific links. The function arch_links +# has to be defined in arch.mk (see include above). ifneq ($(ARCH_LINKS),) $(ARCH_LINKS): $(arch_links) @@ -110,26 +92,29 @@ links: $(ARCH_LINKS) links: $(ARCH_LINKS) [ -e include/xen ] || ln -sf ../../../xen/include/public include/xen -libminios.a: links $(OBJS) $(HEAD) - $(AR) r libminios.a $(HEAD) $(OBJS) +.PHONY: arch_lib +arch_lib: + $(MAKE) --directory=$(TARGET_ARCH_DIR) || exit 1; -$(TARGET): libminios.a $(HEAD) - $(LD) $(LDFLAGS) $(HEAD) $(LDLIBS) -o $@.elf - gzip -f -9 -c $@.elf >$@.gz +$(TARGET): links $(OBJS) arch_lib + $(LD) -r $(LDFLAGS) $(HEAD_OBJ) $(OBJS) $(LDARCHLIB) -o $@.o + $(OBJCOPY) -w -G $(GLOBAL_PREFIX)* -G _start $@.o $@.o + $(LD) $(LDFLAGS) $(LDFLAGS_FINAL) $@.o $(EXTRA_OBJS) -o $@ + gzip -f -9 -c $@ >$@.gz -.PHONY: clean -clean: - find . -type f -name '*.o' | xargs rm -f +.PHONY: clean arch_clean + +arch_clean: + $(MAKE) --directory=$(TARGET_ARCH_DIR) clean || exit 1; + +clean: arch_clean + for dir in $(SUBDIRS); do \ + rm -f $$dir/*.o; \ + done rm -f *.o *~ core $(TARGET).elf $(TARGET).raw $(TARGET) $(TARGET).gz - rm -f libminios.a find . -type l | xargs rm -f rm -f tags TAGS -%.o: %.c $(HDRS) Makefile - $(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@ - -%.o: %.S $(HDRS) Makefile - $(CC) $(ASFLAGS) $(CPPFLAGS) -c $< -o $@ define all_sources ( find . -follow -name SCCS -prune -o -name '*.[chS]' -print ) @@ -143,3 +128,4 @@ cscope: .PHONY: tags tags: $(all_sources) | xargs ctags + diff -r 3464bb656a9c -r 8475a4e0425e extras/mini-os/arch/x86/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extras/mini-os/arch/x86/Makefile Thu Jan 18 15:18:07 2007 +0000 @@ -0,0 +1,29 @@ +# +# x86 architecture specific makefiles. +# It's is used for x86_32, x86_32y and x86_64 +# + +# Rebuild all after touching this/these extra file(s) (see mini-os.mk) +SPEC_DEP = arch.mk + +# include arch.mk has to be before mini-os.mk! +include arch.mk +include ../../minios.mk + +# Sources here are all *.c *.S without $(TARGET_ARCH).S +# This is handled in $(HEAD_ARCH_OBJ) +ARCH_SRCS := $(wildcard *.c) + +# The objects built from the sources. +ARCH_OBJS := $(patsubst %.c,%.o,$(ARCH_SRCS)) + +all: $(ARCH_LIB) + +# $(HEAD_ARCH_OBJ) is only build here, needed on linking +# in ../../Makefile. +$(ARCH_LIB): $(ARCH_OBJS) $(HEAD_ARCH_OBJ) + $(AR) rv $(ARCH_LIB) $(ARCH_OBJS) + +clean: + rm -f $(ARCH_LIB) $(ARCH_OBJS) + diff -r 3464bb656a9c -r 8475a4e0425e extras/mini-os/arch/x86/arch.mk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extras/mini-os/arch/x86/arch.mk Thu Jan 18 15:18:07 2007 +0000 @@ -0,0 +1,28 @@ +# +# Architecture special makerules for x86 family +# (including x86_32, x86_32y and x86_64). +# + +ifeq ($(TARGET_ARCH),x86_32) +ARCH_CFLAGS := -m32 -march=i686 +ARCH_LDFLAGS := -m elf_i386 +EXTRA_INC += $(TARGET_ARCH_FAM)/$(TARGET_ARCH) +EXTRA_SRC += arch/$(EXTRA_INC) +endif + +ifeq ($(TARGET_ARCH)$(pae),x86_32y) +ARCH_CFLAGS := -DCONFIG_X86_PAE=1 +ARCH_ASFLAGS := -DCONFIG_X86_PAE=1 +EXTRA_INC += $(TARGET_ARCH_FAM)/$(TARGET_ARCH) +EXTRA_SRC += arch/$(EXTRA_INC) +endif + +ifeq ($(TARGET_ARCH),x86_64) +ARCH_CFLAGS := -m64 -mno-red-zone -fpic -fno-reorder-blocks +ARCH_CFLAGS := -fno-asynchronous-unwind-tables +ARCH_LDFLAGS := -m elf_x86_64 +EXTRA_INC += $(TARGET_ARCH_FAM)/$(TARGET_ARCH) +EXTRA_SRC += arch/$(EXTRA_INC) +endif + + diff -r 3464bb656a9c -r 8475a4e0425e extras/mini-os/arch/x86/minios-x86_32.lds --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extras/mini-os/arch/x86/minios-x86_32.lds Thu Jan 18 15:18:07 2007 +0000 @@ -0,0 +1,45 @@ +OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") +OUTPUT_ARCH(i386) +ENTRY(_start) +SECTIONS +{ + . = 0x0; + _text = .; /* Text and read-only data */ + .text : { + *(.text) + *(.gnu.warning) + } = 0x9090 + + _etext = .; /* End of text section */ + + .rodata : { *(.rodata) *(.rodata.*) } + + .data : { /* Data */ + *(.data) + CONSTRUCTORS + } + + _edata = .; /* End of data section */ + + __bss_start = .; /* BSS */ + .bss : { + *(.bss) + } + _end = . ; + + /* Sections to be discarded */ + /DISCARD/ : { + *(.text.exit) + *(.data.exit) + *(.exitcall.exit) + } + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } +} diff -r 3464bb656a9c -r 8475a4e0425e extras/mini-os/arch/x86/minios-x86_64.lds --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extras/mini-os/arch/x86/minios-x86_64.lds Thu Jan 18 15:18:07 2007 +0000 @@ -0,0 +1,54 @@ +OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") +OUTPUT_ARCH(i386:x86-64) +ENTRY(_start) +SECTIONS +{ + . = 0x0; + _text = .; /* Text and read-only data */ + .text : { + *(.text) + *(.gnu.warning) + } = 0x9090 + + _etext = .; /* End of text section */ + + .rodata : { *(.rodata) *(.rodata.*) } + + .data : { /* Data */ + *(.data) + CONSTRUCTORS + } + + _edata = .; /* End of data section */ + + . = ALIGN(8192); /* init_task */ + .data.init_task : { *(.data.init_task) } + + . = ALIGN(4096); + .data.page_aligned : { *(.data.idt) } + + . = ALIGN(32); + .data.cacheline_aligned : { *(.data.cacheline_aligned) } + + __bss_start = .; /* BSS */ + .bss : { + *(.bss) + } + _end = . ; + + /* Sections to be discarded */ + /DISCARD/ : { + *(.text.exit) + *(.data.exit) + *(.exitcall.exit) + } + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } +} diff -r 3464bb656a9c -r 8475a4e0425e extras/mini-os/arch/x86/x86_32.S --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extras/mini-os/arch/x86/x86_32.S Thu Jan 18 15:18:07 2007 +0000 @@ -0,0 +1,287 @@ +#include <os.h> +#include <xen/arch-x86_32.h> + +.section __xen_guest + .ascii "GUEST_OS=Mini-OS" + .ascii ",XEN_VER=xen-3.0" + .ascii ",VIRT_BASE=0x0" /* &_text from minios_x86_32.lds */ + .ascii ",ELF_PADDR_OFFSET=0x0" + .ascii ",HYPERCALL_PAGE=0x2" +#ifdef CONFIG_X86_PAE + .ascii ",PAE=yes" +#else + .ascii ",PAE=no" +#endif + .ascii ",LOADER=generic" + .byte 0 +.text + +.globl _start, shared_info, hypercall_page + +_start: + cld + lss stack_start,%esp + push %esi + call start_kernel + +stack_start: + .long stack+8192, __KERNEL_SS + + /* Unpleasant -- the PTE that maps this page is actually overwritten */ + /* to map the real shared-info page! :-) */ + .org 0x1000 +shared_info: + .org 0x2000 + +hypercall_page: + .org 0x3000 + +ES = 0x20 +ORIG_EAX = 0x24 +EIP = 0x28 +CS = 0x2C + +#define ENTRY(X) .globl X ; X : + +#define SAVE_ALL \ + cld; \ + pushl %es; \ + pushl %ds; \ + pushl %eax; \ + pushl %ebp; \ + pushl %edi; \ + pushl %esi; \ + pushl %edx; \ + pushl %ecx; \ + pushl %ebx; \ + movl $(__KERNEL_DS),%edx; \ + movl %edx,%ds; \ + movl %edx,%es; + +#define RESTORE_ALL \ + popl %ebx; \ + popl %ecx; \ + popl %edx; \ + popl %esi; \ + popl %edi; \ + popl %ebp; \ + popl %eax; \ + popl %ds; \ + popl %es; \ + addl $4,%esp; \ + iret; \ + +ENTRY(divide_error) + pushl $0 # no error code + pushl $do_divide_error +do_exception: + pushl %ds + pushl %eax + xorl %eax, %eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + decl %eax # eax = -1 + pushl %ecx + pushl %ebx + cld + movl %es, %ecx + movl ES(%esp), %edi # get the function address + movl ORIG_EAX(%esp), %edx # get the error code + movl %eax, ORIG_EAX(%esp) + movl %ecx, ES(%esp) + movl $(__KERNEL_DS), %ecx + movl %ecx, %ds + movl %ecx, %es + movl %esp,%eax # pt_regs pointer + pushl %edx + pushl %eax + call *%edi + jmp ret_from_exception + +ret_from_exception: + movb CS(%esp),%cl + test $2,%cl # slow return to ring 2 or 3 + jne safesti + RESTORE_ALL + +# A note on the "critical region" in our callback handler. +# We want to avoid stacking callback handlers due to events occurring +# during handling of the last event. To do this, we keep events disabled +# until weve done all processing. HOWEVER, we must enable events before +# popping the stack frame (cant be done atomically) and so it would still +# be possible to get enough handler activations to overflow the stack. +# Although unlikely, bugs of that kind are hard to track down, so wed +# like to avoid the possibility. +# So, on entry to the handler we detect whether we interrupted an +# existing activation in its critical region -- if so, we pop the current +# activation and restart the handler using the previous one. +ENTRY(hypervisor_callback) + pushl %eax + SAVE_ALL + movl EIP(%esp),%eax + cmpl $scrit,%eax + jb 11f + cmpl $ecrit,%eax + jb critical_region_fixup +11: push %esp + call do_hypervisor_callback + add $4,%esp + movl HYPERVISOR_shared_info,%esi + xorl %eax,%eax + movb CS(%esp),%cl + test $2,%cl # slow return to ring 2 or 3 + jne safesti +safesti:movb $0,1(%esi) # reenable event callbacks +scrit: /**** START OF CRITICAL REGION ****/ + testb $0xFF,(%esi) + jnz 14f # process more events if necessary... + RESTORE_ALL +14: movb $1,1(%esi) + jmp 11b +ecrit: /**** END OF CRITICAL REGION ****/ +# [How we do the fixup]. We want to merge the current stack frame with the +# just-interrupted frame. How we do this depends on where in the critical +# region the interrupted handler was executing, and so how many saved +# registers are in each frame. We do this quickly using the lookup table +# 'critical_fixup_table'. For each byte offset in the critical region, it +# provides the number of bytes which have already been popped from the +# interrupted stack frame. +critical_region_fixup: + addl $critical_fixup_table-scrit,%eax + movzbl (%eax),%eax # %eax contains num bytes popped + mov %esp,%esi + add %eax,%esi # %esi points at end of src region + mov %esp,%edi + add $0x34,%edi # %edi points at end of dst region + mov %eax,%ecx + shr $2,%ecx # convert words to bytes + je 16f # skip loop if nothing to copy +15: subl $4,%esi # pre-decrementing copy loop + subl $4,%edi + movl (%esi),%eax + movl %eax,(%edi) + loop 15b +16: movl %edi,%esp # final %edi is top of merged stack + jmp 11b + +critical_fixup_table: + .byte 0x00,0x00,0x00 # testb $0xff,(%esi) + .byte 0x00,0x00 # jne 14f + .byte 0x00 # pop %ebx + .byte 0x04 # pop %ecx + .byte 0x08 # pop %edx + .byte 0x0c # pop %esi + .byte 0x10 # pop %edi + .byte 0x14 # pop %ebp + .byte 0x18 # pop %eax + .byte 0x1c # pop %ds + .byte 0x20 # pop %es + .byte 0x24,0x24,0x24 # add $4,%esp + .byte 0x28 # iret + .byte 0x00,0x00,0x00,0x00 # movb $1,1(%esi) + .byte 0x00,0x00 # jmp 11b + +# Hypervisor uses this for application faults while it executes. +ENTRY(failsafe_callback) + pop %ds + pop %es + pop %fs + pop %gs + iret + +ENTRY(coprocessor_error) + pushl $0 + pushl $do_coprocessor_error + jmp do_exception + +ENTRY(simd_coprocessor_error) + pushl $0 + pushl $do_simd_coprocessor_error + jmp do_exception + +ENTRY(device_not_available) + iret + +ENTRY(debug) + pushl $0 + pushl $do_debug + jmp do_exception + +ENTRY(int3) + pushl $0 + pushl $do_int3 + jmp do_exception + +ENTRY(overflow) + pushl $0 + pushl $do_overflow + jmp do_exception + +ENTRY(bounds) + pushl $0 + pushl $do_bounds + jmp do_exception + +ENTRY(invalid_op) + pushl $0 + pushl $do_invalid_op + jmp do_exception + + +ENTRY(coprocessor_segment_overrun) + pushl $0 + pushl $do_coprocessor_segment_overrun + jmp do_exception + + +ENTRY(invalid_TSS) + pushl $do_invalid_TSS + jmp do_exception + + +ENTRY(segment_not_present) + pushl $do_segment_not_present + jmp do_exception + + +ENTRY(stack_segment) + pushl $do_stack_segment + jmp do_exception + + +ENTRY(general_protection) + pushl $do_general_protection + jmp do_exception + + +ENTRY(alignment_check) + pushl $do_alignment_check + jmp do_exception + + +ENTRY(page_fault) + pushl $do_page_fault + jmp do_exception + +ENTRY(machine_check) + pushl $0 + pushl $do_machine_check + jmp do_exception + + +ENTRY(spurious_interrupt_bug) + pushl $0 + pushl $do_spurious_interrupt_bug + jmp do_exception + + + +ENTRY(thread_starter) + popl %eax + popl %ebx + pushl %eax + call *%ebx + call exit_thread + diff -r 3464bb656a9c -r 8475a4e0425e extras/mini-os/arch/x86/x86_64.S --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extras/mini-os/arch/x86/x86_64.S Thu Jan 18 15:18:07 2007 +0000 @@ -0,0 +1,385 @@ +#include <os.h> +#include <xen/features.h> + +.section __xen_guest + .ascii "GUEST_OS=Mini-OS" + .ascii ",XEN_VER=xen-3.0" + .ascii ",VIRT_BASE=0x0" /* &_text from minios_x86_64.lds */ + .ascii ",ELF_PADDR_OFFSET=0x0" + .ascii ",HYPERCALL_PAGE=0x2" + .ascii ",LOADER=generic" + .byte 0 +.text + +#define ENTRY(X) .globl X ; X : +.globl _start, shared_info, hypercall_page + + +_start: + cld + movq stack_start(%rip),%rsp + movq %rsi,%rdi + call start_kernel + +stack_start: + .quad stack+8192 + + /* Unpleasant -- the PTE that maps this page is actually overwritten */ + /* to map the real shared-info page! :-) */ + .org 0x1000 +shared_info: + .org 0x2000 + +hypercall_page: + .org 0x3000 + + +/* Offsets into shared_info_t. */ +#define evtchn_upcall_pending /* 0 */ +#define evtchn_upcall_mask 1 + +NMI_MASK = 0x80000000 + +#define RDI 112 +#define ORIG_RAX 120 /* + error_code */ +#define EFLAGS 144 + +#define REST_SKIP 6*8 +.macro SAVE_REST + subq $REST_SKIP,%rsp +# CFI_ADJUST_CFA_OFFSET REST_SKIP + movq %rbx,5*8(%rsp) +# CFI_REL_OFFSET rbx,5*8 + movq %rbp,4*8(%rsp) +# CFI_REL_OFFSET rbp,4*8 + movq %r12,3*8(%rsp) +# CFI_REL_OFFSET r12,3*8 + movq %r13,2*8(%rsp) +# CFI_REL_OFFSET r13,2*8 + movq %r14,1*8(%rsp) +# CFI_REL_OFFSET r14,1*8 + movq %r15,(%rsp) +# CFI_REL_OFFSET r15,0*8 +.endm + + +.macro RESTORE_REST + movq (%rsp),%r15 +# CFI_RESTORE r15 + movq 1*8(%rsp),%r14 +# CFI_RESTORE r14 + movq 2*8(%rsp),%r13 +# CFI_RESTORE r13 + movq 3*8(%rsp),%r12 +# CFI_RESTORE r12 + movq 4*8(%rsp),%rbp +# CFI_RESTORE rbp + movq 5*8(%rsp),%rbx +# CFI_RESTORE rbx + addq $REST_SKIP,%rsp +# CFI_ADJUST_CFA_OFFSET -(REST_SKIP) +.endm + + +#define ARG_SKIP 9*8 +.macro RESTORE_ARGS skiprax=0,addskip=0,skiprcx=0,skipr11=0,skipr8910=0,skiprdx=0 + .if \skipr11 + .else + movq (%rsp),%r11 +# CFI_RESTORE r11 + .endif + .if \skipr8910 + .else + movq 1*8(%rsp),%r10 +# CFI_RESTORE r10 + movq 2*8(%rsp),%r9 +# CFI_RESTORE r9 + movq 3*8(%rsp),%r8 +# CFI_RESTORE r8 + .endif + .if \skiprax + .else + movq 4*8(%rsp),%rax +# CFI_RESTORE rax + .endif + .if \skiprcx + .else + movq 5*8(%rsp),%rcx +# CFI_RESTORE rcx + .endif + .if \skiprdx + .else + movq 6*8(%rsp),%rdx +# CFI_RESTORE rdx + .endif + movq 7*8(%rsp),%rsi +# CFI_RESTORE rsi + movq 8*8(%rsp),%rdi +# CFI_RESTORE rdi + .if ARG_SKIP+\addskip > 0 + addq $ARG_SKIP+\addskip,%rsp +# CFI_ADJUST_CFA_OFFSET -(ARG_SKIP+\addskip) + .endif +.endm + + +.macro HYPERVISOR_IRET flag +# testb $3,1*8(%rsp) /* Don't need to do that in Mini-os, as */ +# jnz 2f /* there is no userspace? */ + testl $NMI_MASK,2*8(%rsp) + jnz 2f + + testb $1,(xen_features+XENFEAT_supervisor_mode_kernel) + jnz 1f + + /* Direct iret to kernel space. Correct CS and SS. */ + orb $3,1*8(%rsp) + orb $3,4*8(%rsp) +1: iretq + +2: /* Slow iret via hypervisor. */ + andl $~NMI_MASK, 16(%rsp) + pushq $\flag + jmp hypercall_page + (__HYPERVISOR_iret * 32) +.endm + +/* + * Exception entry point. This expects an error code/orig_rax on the stack + * and the exception handler in %rax. + */ +ENTRY(error_entry) +# _frame RDI + /* rdi slot contains rax, oldrax contains error code */ + cld + subq $14*8,%rsp +# CFI_ADJUST_CFA_OFFSET (14*8) + movq %rsi,13*8(%rsp) +# CFI_REL_OFFSET rsi,RSI + movq 14*8(%rsp),%rsi /* load rax from rdi slot */ + movq %rdx,12*8(%rsp) +# CFI_REL_OFFSET rdx,RDX + movq %rcx,11*8(%rsp) +# CFI_REL_OFFSET rcx,RCX + movq %rsi,10*8(%rsp) /* store rax */ +# CFI_REL_OFFSET rax,RAX + movq %r8, 9*8(%rsp) +# CFI_REL_OFFSET r8,R8 + movq %r9, 8*8(%rsp) +# CFI_REL_OFFSET r9,R9 + movq %r10,7*8(%rsp) +# CFI_REL_OFFSET r10,R10 + movq %r11,6*8(%rsp) +# CFI_REL_OFFSET r11,R11 + movq %rbx,5*8(%rsp) +# CFI_REL_OFFSET rbx,RBX + movq %rbp,4*8(%rsp) +# CFI_REL_OFFSET rbp,RBP + movq %r12,3*8(%rsp) +# CFI_REL_OFFSET r12,R12 + movq %r13,2*8(%rsp) +# CFI_REL_OFFSET r13,R13 + movq %r14,1*8(%rsp) +# CFI_REL_OFFSET r14,R14 + movq %r15,(%rsp) +# CFI_REL_OFFSET r15,R15 +#if 0 + cmpl $__KERNEL_CS,CS(%rsp) + je error_kernelspace +#endif +error_call_handler: + movq %rdi, RDI(%rsp) + movq %rsp,%rdi + movq ORIG_RAX(%rsp),%rsi # get error code + movq $-1,ORIG_RAX(%rsp) + call *%rax + +.macro zeroentry sym +# INTR_FRAME + movq (%rsp),%rcx + movq 8(%rsp),%r11 + addq $0x10,%rsp /* skip rcx and r11 */ + pushq $0 /* push error code/oldrax */ +# CFI_ADJUST_CFA_OFFSET 8 + pushq %rax /* push real oldrax to the rdi slot */ +# CFI_ADJUST_CFA_OFFSET 8 + leaq \sym(%rip),%rax + jmp error_entry +# CFI_ENDPROC +.endm + +.macro errorentry sym +# XCPT_FRAME + movq (%rsp),%rcx + movq 8(%rsp),%r11 + addq $0x10,%rsp /* rsp points to the error code */ + pushq %rax +# CFI_ADJUST_CFA_OFFSET 8 + leaq \sym(%rip),%rax + jmp error_entry +# CFI_ENDPROC +.endm + +#define XEN_GET_VCPU_INFO(reg) movq HYPERVISOR_shared_info,reg +#define XEN_PUT_VCPU_INFO(reg) +#define XEN_PUT_VCPU_INFO_fixup +#define XEN_LOCKED_BLOCK_EVENTS(reg) movb $1,evtchn_upcall_mask(reg) +#define XEN_LOCKED_UNBLOCK_EVENTS(reg) movb $0,evtchn_upcall_mask(reg) +#define XEN_TEST_PENDING(reg) testb $0xFF,evtchn_upcall_pending(reg) + +#define XEN_BLOCK_EVENTS(reg) XEN_GET_VCPU_INFO(reg) ; \ + XEN_LOCKED_BLOCK_EVENTS(reg) ; \ + XEN_PUT_VCPU_INFO(reg) + +#define XEN_UNBLOCK_EVENTS(reg) XEN_GET_VCPU_INFO(reg) ; \ + XEN_LOCKED_UNBLOCK_EVENTS(reg) ; \ + XEN_PUT_VCPU_INFO(reg) + + + +ENTRY(hypervisor_callback) + zeroentry hypervisor_callback2 + +ENTRY(hypervisor_callback2) + movq %rdi, %rsp +11: movq %gs:8,%rax + incl %gs:0 + cmovzq %rax,%rsp + pushq %rdi + call do_hypervisor_callback + popq %rsp + decl %gs:0 + jmp error_exit + +# ALIGN +restore_all_enable_events: + XEN_UNBLOCK_EVENTS(%rsi) # %rsi is already set up... + +scrit: /**** START OF CRITICAL REGION ****/ + XEN_TEST_PENDING(%rsi) + jnz 14f # process more events if necessary... + XEN_PUT_VCPU_INFO(%rsi) + RESTORE_ARGS 0,8,0 + HYPERVISOR_IRET 0 + +14: XEN_LOCKED_BLOCK_EVENTS(%rsi) + XEN_PUT_VCPU_INFO(%rsi) + SAVE_REST + movq %rsp,%rdi # set the argument again + jmp 11b +ecrit: /**** END OF CRITICAL REGION ****/ + + +retint_kernel: +retint_restore_args: + movl EFLAGS-REST_SKIP(%rsp), %eax + shr $9, %eax # EAX[0] == IRET_EFLAGS.IF + XEN_GET_VCPU_INFO(%rsi) + andb evtchn_upcall_mask(%rsi),%al + andb $1,%al # EAX[0] == IRET_EFLAGS.IF & event_mask + jnz restore_all_enable_events # != 0 => enable event delivery + XEN_PUT_VCPU_INFO(%rsi) + + RESTORE_ARGS 0,8,0 + HYPERVISOR_IRET 0 + + +error_exit: + RESTORE_REST +/* cli */ + XEN_BLOCK_EVENTS(%rsi) + jmp retint_kernel + + + +ENTRY(failsafe_callback) + popq %rcx + popq %r11 + iretq + + +ENTRY(coprocessor_error) + zeroentry do_coprocessor_error + + +ENTRY(simd_coprocessor_error) + zeroentry do_simd_coprocessor_error + + +ENTRY(device_not_available) + zeroentry do_device_not_available + + +ENTRY(debug) +# INTR_FRAME +# CFI_ADJUST_CFA_OFFSET 8 */ + zeroentry do_debug +# CFI_ENDPROC + + +ENTRY(int3) +# INTR_FRAME +# CFI_ADJUST_CFA_OFFSET 8 */ + zeroentry do_int3 +# CFI_ENDPROC + +ENTRY(overflow) + zeroentry do_overflow + + +ENTRY(bounds) + zeroentry do_bounds + + +ENTRY(invalid_op) + zeroentry do_invalid_op + + +ENTRY(coprocessor_segment_overrun) + zeroentry do_coprocessor_segment_overrun + + +ENTRY(invalid_TSS) + errorentry do_invalid_TSS + + +ENTRY(segment_not_present) + errorentry do_segment_not_present + + +/* runs on exception stack */ +ENTRY(stack_segment) +# XCPT_FRAME + errorentry do_stack_segment +# CFI_ENDPROC + + +ENTRY(general_protection) + errorentry do_general_protection + + +ENTRY(alignment_check) + errorentry do_alignment_check + + +ENTRY(divide_error) + zeroentry do_divide_error + + +ENTRY(spurious_interrupt_bug) + zeroentry do_spurious_interrupt_bug + + +ENTRY(page_fault) + errorentry do_page_fault + + + + + +ENTRY(thread_starter) + popq %rdi + popq %rbx + call *%rbx + call exit_thread + + diff -r 3464bb656a9c -r 8475a4e0425e extras/mini-os/gnttab.c --- a/extras/mini-os/gnttab.c Thu Jan 18 09:54:33 2007 +0000 +++ b/extras/mini-os/gnttab.c Thu Jan 18 15:18:07 2007 +0000 @@ -23,31 +23,24 @@ #define NR_GRANT_FRAMES 4 #define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(grant_entry_t)) -#define GNTTAB_LIST_END (NR_GRANT_ENTRIES + 1) static grant_entry_t *gnttab_table; static grant_ref_t gnttab_list[NR_GRANT_ENTRIES]; -static grant_ref_t gnttab_free_head; + +static void +put_free_entry(grant_ref_t ref) +{ + gnttab_list[ref] = gnttab_list[0]; + gnttab_list[0] = ref; + +} static grant_ref_t -get_free_entries(int count) +get_free_entry(void) { - grant_ref_t ref; - grant_ref_t head; - - ref = head = gnttab_free_head; - while (count-- > 1) - head = gnttab_list[head]; - gnttab_free_head = gnttab_list[head]; - gnttab_list[head] = GNTTAB_LIST_END; + unsigned int ref = gnttab_list[0]; + gnttab_list[0] = gnttab_list[ref]; return ref; -} - -static void -put_free_entry(grant_ref_t gref) -{ - gnttab_list[gref] = gnttab_free_head; - gnttab_free_head = gref; } grant_ref_t @@ -55,7 +48,7 @@ gnttab_grant_access(domid_t domid, unsig { grant_ref_t ref; - ref = get_free_entries(1); + ref = get_free_entry(); gnttab_table[ref].frame = frame; gnttab_table[ref].domid = domid; wmb(); @@ -70,7 +63,7 @@ gnttab_grant_transfer(domid_t domid, uns { grant_ref_t ref; - ref = get_free_entries(1); + ref = get_free_entry(); gnttab_table[ref].frame = pfn; gnttab_table[ref].domid = domid; wmb(); @@ -157,8 +150,7 @@ init_gnttab(void) int i; for (i = NR_RESERVED_ENTRIES; i < NR_GRANT_ENTRIES; i++) - gnttab_list[i] = i + 1; - gnttab_free_head = NR_RESERVED_ENTRIES; + put_free_entry(i); setup.dom = DOMID_SELF; setup.nr_frames = NR_GRANT_FRAMES; diff -r 3464bb656a9c -r 8475a4e0425e extras/mini-os/include/hypervisor.h --- a/extras/mini-os/include/hypervisor.h Thu Jan 18 09:54:33 2007 +0000 +++ b/extras/mini-os/include/hypervisor.h Thu Jan 18 15:18:07 2007 +0000 @@ -15,7 +15,6 @@ #include <types.h> #include <xen/xen.h> -#include <xen/dom0_ops.h> #if defined(__i386__) #include <hypercall-x86_32.h> #elif defined(__x86_64__) diff -r 3464bb656a9c -r 8475a4e0425e extras/mini-os/include/netfront.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extras/mini-os/include/netfront.h Thu Jan 18 15:18:07 2007 +0000 @@ -0,0 +1,2 @@ +void init_netfront(void*); +void netfront_xmit(unsigned char* data,int len); diff -r 3464bb656a9c -r 8475a4e0425e extras/mini-os/include/x86/x86_32/hypercall-x86_32.h --- a/extras/mini-os/include/x86/x86_32/hypercall-x86_32.h Thu Jan 18 09:54:33 2007 +0000 +++ b/extras/mini-os/include/x86/x86_32/hypercall-x86_32.h Thu Jan 18 15:18:07 2007 +0000 @@ -179,14 +179,6 @@ HYPERVISOR_set_timer_op( unsigned long timeout_hi = (unsigned long)(timeout>>32); unsigned long timeout_lo = (unsigned long)timeout; return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi); -} - -static inline int -HYPERVISOR_dom0_op( - dom0_op_t *dom0_op) -{ - dom0_op->interface_version = DOM0_INTERFACE_VERSION; - return _hypercall1(int, dom0_op, dom0_op); } static inline int diff -r 3464bb656a9c -r 8475a4e0425e extras/mini-os/include/x86/x86_64/hypercall-x86_64.h --- a/extras/mini-os/include/x86/x86_64/hypercall-x86_64.h Thu Jan 18 09:54:33 2007 +0000 +++ b/extras/mini-os/include/x86/x86_64/hypercall-x86_64.h Thu Jan 18 15:18:07 2007 +0000 @@ -181,14 +181,6 @@ HYPERVISOR_set_timer_op( u64 timeout) { return _hypercall1(long, set_timer_op, timeout); -} - -static inline int -HYPERVISOR_dom0_op( - dom0_op_t *dom0_op) -{ - dom0_op->interface_version = DOM0_INTERFACE_VERSION; - return _hypercall1(int, dom0_op, dom0_op); } static inline int diff -r 3464bb656a9c -r 8475a4e0425e extras/mini-os/include/xenbus.h --- a/extras/mini-os/include/xenbus.h Thu Jan 18 09:54:33 2007 +0000 +++ b/extras/mini-os/include/xenbus.h Thu Jan 18 15:18:07 2007 +0000 @@ -11,6 +11,9 @@ void init_xenbus(void); string on failure and sets *value to NULL. On success, *value is set to a malloc'd copy of the value. */ char *xenbus_read(xenbus_transaction_t xbt, const char *path, char **value); + +char *xenbus_watch_path(xenbus_transaction_t xbt, const char *path); +char* xenbus_wait_for_value(const char*,const char*); /* Associates a value with a path. Returns a malloc'd error string on failure. */ diff -r 3464bb656a9c -r 8475a4e0425e extras/mini-os/kernel.c --- a/extras/mini-os/kernel.c Thu Jan 18 09:54:33 2007 +0000 +++ b/extras/mini-os/kernel.c Thu Jan 18 15:18:07 2007 +0000 @@ -37,6 +37,7 @@ #include <sched.h> #include <xenbus.h> #include <gnttab.h> +#include <netfront.h> #include <xen/features.h> #include <xen/version.h> @@ -61,13 +62,13 @@ void setup_xen_features(void) void test_xenbus(void); -void xenbus_tester(void *p) +static void xenbus_tester(void *p) { printk("Xenbus tests disabled, because of a Xend bug.\n"); /* test_xenbus(); */ } -void periodic_thread(void *p) +static void periodic_thread(void *p) { struct timeval tv; printk("Periodic thread started.\n"); @@ -79,12 +80,18 @@ void periodic_thread(void *p) } } +static void netfront_thread(void *p) +{ + init_netfront(&start_info); +} + /* This should be overridden by the application we are linked against. */ __attribute__((weak)) int app_main(start_info_t *si) { printk("Dummy main: start_info=%p\n", si); create_thread("xenbus_tester", xenbus_tester, si); create_thread("periodic_thread", periodic_thread, si); + create_thread("netfront", netfront_thread, si); return 0; } diff -r 3464bb656a9c -r 8475a4e0425e extras/mini-os/minios-x86_32.lds --- a/extras/mini-os/minios-x86_32.lds Thu Jan 18 09:54:33 2007 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,45 +0,0 @@ -OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") -OUTPUT_ARCH(i386) -ENTRY(_start) -SECTIONS -{ - . = 0x0; - _text = .; /* Text and read-only data */ - .text : { - *(.text) - *(.gnu.warning) - } = 0x9090 - - _etext = .; /* End of text section */ - - .rodata : { *(.rodata) *(.rodata.*) } - - .data : { /* Data */ - *(.data) - CONSTRUCTORS - } - - _edata = .; /* End of data section */ - - __bss_start = .; /* BSS */ - .bss : { - *(.bss) - } - _end = . ; - - /* Sections to be discarded */ - /DISCARD/ : { - *(.text.exit) - *(.data.exit) - *(.exitcall.exit) - } - - /* Stabs debugging sections. */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } -} diff -r 3464bb656a9c -r 8475a4e0425e extras/mini-os/minios-x86_64.lds --- a/extras/mini-os/minios-x86_64.lds Thu Jan 18 09:54:33 2007 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,54 +0,0 @@ -OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") -OUTPUT_ARCH(i386:x86-64) -ENTRY(_start) -SECTIONS -{ - . = 0x0; - _text = .; /* Text and read-only data */ - .text : { - *(.text) - *(.gnu.warning) - } = 0x9090 - - _etext = .; /* End of text section */ - - .rodata : { *(.rodata) *(.rodata.*) } - - .data : { /* Data */ - *(.data) - CONSTRUCTORS - } - - _edata = .; /* End of data section */ - - . = ALIGN(8192); /* init_task */ - .data.init_task : { *(.data.init_task) } - - . = ALIGN(4096); - .data.page_aligned : { *(.data.idt) } - - . = ALIGN(32); - .data.cacheline_aligned : { *(.data.cacheline_aligned) } - - __bss_start = .; /* BSS */ - .bss : { - *(.bss) - } - _end = . ; - - /* Sections to be discarded */ - /DISCARD/ : { - *(.text.exit) - *(.data.exit) - *(.exitcall.exit) - } - - /* Stabs debugging sections. */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } -} diff -r 3464bb656a9c -r 8475a4e0425e extras/mini-os/minios.mk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extras/mini-os/minios.mk Thu Jan 18 15:18:07 2007 +0000 @@ -0,0 +1,62 @@ +# +# The file contains the common make rules for building mini-os. +# + +debug = y + +# Define some default flags. +# NB. '-Wcast-qual' is nasty, so I omitted it. +DEF_CFLAGS := -fno-builtin -Wall -Werror -Wredundant-decls -Wno-format +DEF_CFLAGS += -Wstrict-prototypes -Wnested-externs -Wpointer-arith -Winline +DEF_CFLAGS += -D__XEN_INTERFACE_VERSION__=$(XEN_INTERFACE_VERSION) + +DEF_ASFLAGS = -D__ASSEMBLY__ + +ifeq ($(debug),y) +DEF_CFLAGS += -g +else +DEF_CFLAGS += -O3 +endif + +# Build the CFLAGS and ASFLAGS for compiling and assembling. +# DEF_... flags are the common mini-os flags, +# ARCH_... flags may be defined in arch/$(TARGET_ARCH_FAM/rules.mk +CFLAGS := $(DEF_CFLAGS) $(ARCH_CFLAGS) +ASFLAGS := $(DEF_ASFLAGS) $(ARCH_ASFLAGS) + +# The path pointing to the architecture specific header files. +ARCH_SPEC_INC := $(MINI-OS_ROOT)/include/$(TARGET_ARCH_FAM) + +# Find all header files for checking dependencies. +HDRS := $(wildcard $(MINI-OS_ROOT)/include/*.h) +HDRS += $(wildcard $(MINI-OS_ROOT)/include/xen/*.h) +HDRS += $(wildcard $(ARCH_SPEC_INC)/*.h) +# For special wanted header directories. +extra_heads := $(foreach dir,$(EXTRA_INC),$(wildcard $(dir)/*.h)) +HDRS += $(extra_heads) + +# Add the special header directories to the include paths. +extra_incl := $(foreach dir,$(EXTRA_INC),-I$(MINI-OS_ROOT)/include/$(dir)) +override CPPFLAGS := -I$(MINI-OS_ROOT)/include $(CPPFLAGS) -I$(ARCH_SPEC_INC) $(extra_incl) + +# The name of the architecture specific library. +# This is on x86_32: libx86_32.a +# $(ARCH_LIB) has to built in the architecture specific directory. +ARCH_LIB_NAME = $(TARGET_ARCH) +ARCH_LIB := lib$(ARCH_LIB_NAME).a + +# This object contains the entrypoint for startup from Xen. +# $(HEAD_ARCH_OBJ) has to be built in the architecture specific directory. +HEAD_ARCH_OBJ := $(TARGET_ARCH).o +HEAD_OBJ := $(TARGET_ARCH_DIR)/$(HEAD_ARCH_OBJ) + + +%.o: %.c $(HDRS) Makefile $(SPEC_DEPENDS) + $(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@ + +%.o: %.S $(HDRS) Makefile $(SPEC_DEPENDS) + $(CC) $(ASFLAGS) $(CPPFLAGS) -c $< -o $@ + + + + diff -r 3464bb656a9c -r 8475a4e0425e extras/mini-os/netfront.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extras/mini-os/netfront.c Thu Jan 18 15:18:07 2007 +0000 @@ -0,0 +1,455 @@ +/* Minimal network driver for Mini-OS. + * Copyright (c) 2006-2007 Jacob Gorm Hansen, University of Copenhagen. + * Based on netfront.c from Xen Linux. + * + * Does not handle fragments or extras. + */ + +#include <os.h> +#include <xenbus.h> +#include <events.h> +#include <errno.h> +#include <xen/io/netif.h> +#include <gnttab.h> +#include <xmalloc.h> +#include <time.h> + +void init_rx_buffers(void); + +struct net_info { + struct netif_tx_front_ring tx; + struct netif_rx_front_ring rx; + int tx_ring_ref; + int rx_ring_ref; + unsigned int evtchn, local_port; + +} net_info; + + +char* xenbus_printf(xenbus_transaction_t xbt, + char* node,char* path, + char* fmt,unsigned int arg) +{ + char fullpath[256]; + char val[256]; + + sprintf(fullpath,"%s/%s",node,path); + sprintf(val,fmt,arg); + xenbus_write(xbt,fullpath,val); + + return NULL; +} + + +#define NET_TX_RING_SIZE __RING_SIZE((struct netif_tx_sring *)0, PAGE_SIZE) +#define NET_RX_RING_SIZE __RING_SIZE((struct netif_rx_sring *)0, PAGE_SIZE) +#define GRANT_INVALID_REF 0 + + +unsigned short rx_freelist[NET_RX_RING_SIZE]; +unsigned short tx_freelist[NET_TX_RING_SIZE]; + +struct net_buffer { + void* page; + int gref; +}; +struct net_buffer rx_buffers[NET_RX_RING_SIZE]; +struct net_buffer tx_buffers[NET_TX_RING_SIZE]; + +static inline void add_id_to_freelist(unsigned int id,unsigned short* freelist) +{ + freelist[id] = freelist[0]; + freelist[0] = id; +} + +static inline unsigned short get_id_from_freelist(unsigned short* freelist) +{ + unsigned int id = freelist[0]; + freelist[0] = freelist[id]; + return id; +} + +__attribute__((weak)) void netif_rx(unsigned char* data,int len) +{ + printk("%d bytes incoming at %p\n",len,data); +} + +__attribute__((weak)) void net_app_main(void*si,unsigned char*mac) {} + +static inline int xennet_rxidx(RING_IDX idx) +{ + return idx & (NET_RX_RING_SIZE - 1); +} + +void network_rx(void) +{ + struct net_info *np = &net_info; + RING_IDX rp,cons; + struct netif_rx_response *rx; + + +moretodo: + rp = np->rx.sring->rsp_prod; + rmb(); /* Ensure we see queued responses up to 'rp'. */ + cons = np->rx.rsp_cons; + + int nr_consumed=0; + while ((cons != rp)) + { + struct net_buffer* buf; + unsigned char* page; + + rx = RING_GET_RESPONSE(&np->rx, cons); + + if (rx->flags & NETRXF_extra_info) + { + printk("+++++++++++++++++++++ we have extras!\n"); + continue; + } + + + if (rx->status == NETIF_RSP_NULL) continue; + + int id = rx->id; + + buf = &rx_buffers[id]; + page = (unsigned char*)buf->page; + gnttab_end_access(buf->gref); + + if(rx->status>0) + { + netif_rx(page+rx->offset,rx->status); + } + + add_id_to_freelist(id,rx_freelist); + + nr_consumed++; + + ++cons; + } + np->rx.rsp_cons=rp; + + int more; + RING_FINAL_CHECK_FOR_RESPONSES(&np->rx,more); + if(more) goto moretodo; + + RING_IDX req_prod = np->rx.req_prod_pvt; + + int i; + netif_rx_request_t *req; + + for(i=0; i<nr_consumed; i++) + { + int id = xennet_rxidx(req_prod + i); + req = RING_GET_REQUEST(&np->rx, req_prod + i); + struct net_buffer* buf = &rx_buffers[id]; + void* page = buf->page; + + buf->gref = req->gref = + gnttab_grant_access(0,virt_to_mfn(page),0); + + req->id = id; + } + + wmb(); + + np->rx.req_prod_pvt = req_prod + i; + + int notify; + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify); + if (notify) + notify_remote_via_evtchn(np->evtchn); + +} + +void network_tx_buf_gc(void) +{ + + + RING_IDX cons, prod; + unsigned short id; + struct net_info *np = &net_info; + + do { + prod = np->tx.sring->rsp_prod; + rmb(); /* Ensure we see responses up to 'rp'. */ + + for (cons = np->tx.rsp_cons; cons != prod; cons++) + { + struct netif_tx_response *txrsp; + + txrsp = RING_GET_RESPONSE(&np->tx, cons); + if (txrsp->status == NETIF_RSP_NULL) + continue; + + id = txrsp->id; + struct net_buffer* buf = &tx_buffers[id]; + gnttab_end_access(buf->gref); + buf->gref=GRANT_INVALID_REF; + + add_id_to_freelist(id,tx_freelist); + } + + np->tx.rsp_cons = prod; + + /* + * Set a new event, then check for race with update of tx_cons. + * Note that it is essential to schedule a callback, no matter + * how few tx_buffers are pending. Even if there is space in the + * transmit ring, higher layers may be blocked because too much + * data is outstanding: in such cases notification from Xen is + * likely to be the only kick that we'll get. + */ + np->tx.sring->rsp_event = + prod + ((np->tx.sring->req_prod - prod) >> 1) + 1; + mb(); + } while ((cons == prod) && (prod != np->tx.sring->rsp_prod)); + + +} + +void netfront_handler(evtchn_port_t port, struct pt_regs *regs, void *data) +{ + int flags; + + local_irq_save(flags); + + network_tx_buf_gc(); + network_rx(); + + local_irq_restore(flags); +} + +char* backend; + +void init_netfront(void* si) +{ + xenbus_transaction_t xbt; + struct net_info* info = &net_info; + char* err; + char* message=NULL; + char nodename[] = "device/vif/0"; + struct netif_tx_sring *txs; + struct netif_rx_sring *rxs; + int retry=0; + int i; + char* mac; + char* msg; + + printk("************************ NETFRONT **********\n\n\n"); + + for(i=0;i<NET_TX_RING_SIZE;i++) + { + add_id_to_freelist(i,tx_freelist); + tx_buffers[i].page = (char*)alloc_page(); + } + + for(i=0;i<NET_RX_RING_SIZE;i++) + { + add_id_to_freelist(i,rx_freelist); + rx_buffers[i].page = (char*)alloc_page(); + } + + txs = (struct netif_tx_sring*) alloc_page(); + rxs = (struct netif_rx_sring *) alloc_page(); + memset(txs,0,PAGE_SIZE); + memset(rxs,0,PAGE_SIZE); + + + SHARED_RING_INIT(txs); + SHARED_RING_INIT(rxs); + FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE); + FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE); + + info->tx_ring_ref = gnttab_grant_access(0,virt_to_mfn(txs),0); + info->rx_ring_ref = gnttab_grant_access(0,virt_to_mfn(rxs),0); + + evtchn_alloc_unbound_t op; + op.dom = DOMID_SELF; + op.remote_dom = 0; + HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &op); + clear_evtchn(op.port); /* Without, handler gets invoked now! */ + info->local_port = bind_evtchn(op.port, netfront_handler, NULL); + info->evtchn=op.port; + +again: + err = xenbus_transaction_start(&xbt); + if (err) { + printk("starting transaction\n"); + } + + err = xenbus_printf(xbt, nodename, "tx-ring-ref","%u", + info->tx_ring_ref); + if (err) { + message = "writing tx ring-ref"; + goto abort_transaction; + } + err = xenbus_printf(xbt, nodename, "rx-ring-ref","%u", + info->rx_ring_ref); + if (err) { + message = "writing rx ring-ref"; + goto abort_transaction; + } + err = xenbus_printf(xbt, nodename, + "event-channel", "%u", info->evtchn); + if (err) { + message = "writing event-channel"; + goto abort_transaction; + } + + err = xenbus_printf(xbt, nodename, "request-rx-copy", "%u", 1); + + if (err) { + message = "writing request-rx-copy"; + goto abort_transaction; + } + + err = xenbus_printf(xbt, nodename, "state", "%u", + 4); /* connected */ + + + err = xenbus_transaction_end(xbt, 0, &retry); + if (retry) { + goto again; + printk("completing transaction\n"); + } + + goto done; + +abort_transaction: + xenbus_transaction_end(xbt, 1, &retry); + +done: + + msg = xenbus_read(XBT_NIL, "device/vif/0/backend", &backend); + msg = xenbus_read(XBT_NIL, "device/vif/0/mac", &mac); + + printk("backend at %s\n",backend); + printk("mac is %s\n",mac); + + char *res; + char path[256]; + sprintf(path,"%s/state",backend); + + xenbus_watch_path(XBT_NIL, path); + + xenbus_wait_for_value(path,"4"); + + //free(backend); + free(res); + + printk("**************************\n"); + + init_rx_buffers(); + + unsigned char rawmac[6]; + sscanf(mac,"%x:%x:%x:%x:%x:%x", + &rawmac[0], + &rawmac[1], + &rawmac[2], + &rawmac[3], + &rawmac[4], + &rawmac[5]); + + net_app_main(si,rawmac); +} + +void shutdown_netfront(void) +{ + //xenbus_transaction_t xbt; + char* err; + char nodename[] = "device/vif/0"; + + char path[256]; + + printk("close network: backend at %s\n",backend); + + err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 6); /* closing */ + sprintf(path,"%s/state",backend); + + xenbus_wait_for_value(path,"6"); + + err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 1); + + xenbus_wait_for_value(path,"2"); + + unbind_all_ports(); + +} + + +void init_rx_buffers(void) +{ + struct net_info* np = &net_info; + int i, requeue_idx; + netif_rx_request_t *req; + int notify; + + np->rx.req_prod_pvt = requeue_idx; + + + /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */ + for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) + { + struct net_buffer* buf = &rx_buffers[requeue_idx]; + req = RING_GET_REQUEST(&np->rx, requeue_idx); + + buf->gref = req->gref = + gnttab_grant_access(0,virt_to_mfn(buf->page),0); + + req->id = requeue_idx; + + requeue_idx++; + } + + np->rx.req_prod_pvt = requeue_idx; + + + + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify); + + if(notify) + notify_remote_via_evtchn(np->evtchn); + + np->rx.sring->rsp_event = np->rx.rsp_cons + 1; + + +} + + +void netfront_xmit(unsigned char* data,int len) +{ + int flags; + local_irq_save(flags); + + struct net_info* info = &net_info; + struct netif_tx_request *tx; + RING_IDX i = info->tx.req_prod_pvt; + int notify; + int id = get_id_from_freelist(tx_freelist); + struct net_buffer* buf = &tx_buffers[id]; + void* page = buf->page; + + tx = RING_GET_REQUEST(&info->tx, i); + + memcpy(page,data,len); + + buf->gref = + tx->gref = gnttab_grant_access(0,virt_to_mfn(page),0); + + tx->offset=0; + tx->size = len; + tx->flags=0; + tx->id = id; + info->tx.req_prod_pvt = i + 1; + + wmb(); + + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->tx, notify); + + if(notify) notify_remote_via_evtchn(info->evtchn); + + network_tx_buf_gc(); + + local_irq_restore(flags); +} diff -r 3464bb656a9c -r 8475a4e0425e extras/mini-os/x86_32.S --- a/extras/mini-os/x86_32.S Thu Jan 18 09:54:33 2007 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,287 +0,0 @@ -#include <os.h> -#include <xen/arch-x86_32.h> - -.section __xen_guest - .ascii "GUEST_OS=Mini-OS" - .ascii ",XEN_VER=xen-3.0" - .ascii ",VIRT_BASE=0x0" /* &_text from minios_x86_32.lds */ - .ascii ",ELF_PADDR_OFFSET=0x0" - .ascii ",HYPERCALL_PAGE=0x2" -#ifdef CONFIG_X86_PAE - .ascii ",PAE=yes" -#else - .ascii ",PAE=no" -#endif - .ascii ",LOADER=generic" - .byte 0 -.text - -.globl _start, shared_info, hypercall_page - -_start: - cld - lss stack_start,%esp - push %esi - call start_kernel - -stack_start: - .long stack+8192, __KERNEL_SS - - /* Unpleasant -- the PTE that maps this page is actually overwritten */ - /* to map the real shared-info page! :-) */ - .org 0x1000 -shared_info: - .org 0x2000 - -hypercall_page: - .org 0x3000 - -ES = 0x20 -ORIG_EAX = 0x24 -EIP = 0x28 -CS = 0x2C - -#define ENTRY(X) .globl X ; X : - -#define SAVE_ALL \ - cld; \ - pushl %es; \ - pushl %ds; \ - pushl %eax; \ - pushl %ebp; \ - pushl %edi; \ - pushl %esi; \ - pushl %edx; \ - pushl %ecx; \ - pushl %ebx; \ - movl $(__KERNEL_DS),%edx; \ - movl %edx,%ds; \ - movl %edx,%es; - -#define RESTORE_ALL \ - popl %ebx; \ - popl %ecx; \ - popl %edx; \ - popl %esi; \ - popl %edi; \ - popl %ebp; \ - popl %eax; \ - popl %ds; \ - popl %es; \ - addl $4,%esp; \ - iret; \ - -ENTRY(divide_error) - pushl $0 # no error code - pushl $do_divide_error -do_exception: - pushl %ds - pushl %eax - xorl %eax, %eax - pushl %ebp - pushl %edi - pushl %esi - pushl %edx - decl %eax # eax = -1 - pushl %ecx - pushl %ebx - cld - movl %es, %ecx - movl ES(%esp), %edi # get the function address - movl ORIG_EAX(%esp), %edx # get the error code - movl %eax, ORIG_EAX(%esp) - movl %ecx, ES(%esp) - movl $(__KERNEL_DS), %ecx - movl %ecx, %ds - movl %ecx, %es - movl %esp,%eax # pt_regs pointer - pushl %edx - pushl %eax - call *%edi - jmp ret_from_exception - -ret_from_exception: - movb CS(%esp),%cl - test $2,%cl # slow return to ring 2 or 3 - jne safesti - RESTORE_ALL - -# A note on the "critical region" in our callback handler. -# We want to avoid stacking callback handlers due to events occurring -# during handling of the last event. To do this, we keep events disabled -# until weve done all processing. HOWEVER, we must enable events before -# popping the stack frame (cant be done atomically) and so it would still -# be possible to get enough handler activations to overflow the stack. -# Although unlikely, bugs of that kind are hard to track down, so wed -# like to avoid the possibility. -# So, on entry to the handler we detect whether we interrupted an -# existing activation in its critical region -- if so, we pop the current -# activation and restart the handler using the previous one. -ENTRY(hypervisor_callback) - pushl %eax - SAVE_ALL - movl EIP(%esp),%eax - cmpl $scrit,%eax - jb 11f - cmpl $ecrit,%eax - jb critical_region_fixup -11: push %esp - call do_hypervisor_callback - add $4,%esp - movl HYPERVISOR_shared_info,%esi - xorl %eax,%eax - movb CS(%esp),%cl - test $2,%cl # slow return to ring 2 or 3 - jne safesti -safesti:movb $0,1(%esi) # reenable event callbacks -scrit: /**** START OF CRITICAL REGION ****/ - testb $0xFF,(%esi) - jnz 14f # process more events if necessary... - RESTORE_ALL -14: movb $1,1(%esi) - jmp 11b -ecrit: /**** END OF CRITICAL REGION ****/ -# [How we do the fixup]. We want to merge the current stack frame with the -# just-interrupted frame. How we do this depends on where in the critical -# region the interrupted handler was executing, and so how many saved -# registers are in each frame. We do this quickly using the lookup table -# 'critical_fixup_table'. For each byte offset in the critical region, it -# provides the number of bytes which have already been popped from the -# interrupted stack frame. -critical_region_fixup: - addl $critical_fixup_table-scrit,%eax - movzbl (%eax),%eax # %eax contains num bytes popped - mov %esp,%esi - add %eax,%esi # %esi points at end of src region - mov %esp,%edi - add $0x34,%edi # %edi points at end of dst region - mov %eax,%ecx - shr $2,%ecx # convert words to bytes - je 16f # skip loop if nothing to copy -15: subl $4,%esi # pre-decrementing copy loop - subl $4,%edi - movl (%esi),%eax - movl %eax,(%edi) - loop 15b -16: movl %edi,%esp # final %edi is top of merged stack - jmp 11b - -critical_fixup_table: - .byte 0x00,0x00,0x00 # testb $0xff,(%esi) - .byte 0x00,0x00 # jne 14f - .byte 0x00 # pop %ebx - .byte 0x04 # pop %ecx - .byte 0x08 # pop %edx - .byte 0x0c # pop %esi - .byte 0x10 # pop %edi - .byte 0x14 # pop %ebp - .byte 0x18 # pop %eax - .byte 0x1c # pop %ds - .byte 0x20 # pop %es - .byte 0x24,0x24,0x24 # add $4,%esp - .byte 0x28 # iret - .byte 0x00,0x00,0x00,0x00 # movb $1,1(%esi) - .byte 0x00,0x00 # jmp 11b - -# Hypervisor uses this for application faults while it executes. -ENTRY(failsafe_callback) - pop %ds - pop %es - pop %fs - pop %gs - iret - -ENTRY(coprocessor_error) - pushl $0 - pushl $do_coprocessor_error - jmp do_exception - -ENTRY(simd_coprocessor_error) - pushl $0 - pushl $do_simd_coprocessor_error - jmp do_exception - -ENTRY(device_not_available) - iret - -ENTRY(debug) - pushl $0 - pushl $do_debug - jmp do_exception - -ENTRY(int3) - pushl $0 - pushl $do_int3 - jmp do_exception - -ENTRY(overflow) - pushl $0 - pushl $do_overflow - jmp do_exception - -ENTRY(bounds) - pushl $0 - pushl $do_bounds - jmp do_exception - -ENTRY(invalid_op) - pushl $0 - pushl $do_invalid_op - jmp do_exception - - -ENTRY(coprocessor_segment_overrun) - pushl $0 - pushl $do_coprocessor_segment_overrun - jmp do_exception - - -ENTRY(invalid_TSS) - pushl $do_invalid_TSS - jmp do_exception - - -ENTRY(segment_not_present) - pushl $do_segment_not_present - jmp do_exception - - -ENTRY(stack_segment) - pushl $do_stack_segment - jmp do_exception - - -ENTRY(general_protection) - pushl $do_general_protection - jmp do_exception - - -ENTRY(alignment_check) - pushl $do_alignment_check - jmp do_exception - - -ENTRY(page_fault) - pushl $do_page_fault - jmp do_exception - -ENTRY(machine_check) - pushl $0 - pushl $do_machine_check - jmp do_exception - - -ENTRY(spurious_interrupt_bug) - pushl $0 - pushl $do_spurious_interrupt_bug - jmp do_exception - - - -ENTRY(thread_starter) - popl %eax - popl %ebx - pushl %eax - call *%ebx - call exit_thread - diff -r 3464bb656a9c -r 8475a4e0425e extras/mini-os/x86_64.S --- a/extras/mini-os/x86_64.S Thu Jan 18 09:54:33 2007 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,385 +0,0 @@ -#include <os.h> -#include <xen/features.h> - -.section __xen_guest - .ascii "GUEST_OS=Mini-OS" - .ascii ",XEN_VER=xen-3.0" - .ascii ",VIRT_BASE=0x0" /* &_text from minios_x86_64.lds */ - .ascii ",ELF_PADDR_OFFSET=0x0" - .ascii ",HYPERCALL_PAGE=0x2" - .ascii ",LOADER=generic" - .byte 0 -.text - -#define ENTRY(X) .globl X ; X : -.globl _start, shared_info, hypercall_page - - -_start: - cld - movq stack_start(%rip),%rsp - movq %rsi,%rdi - call start_kernel - -stack_start: - .quad stack+8192 - - /* Unpleasant -- the PTE that maps this page is actually overwritten */ - /* to map the real shared-info page! :-) */ - .org 0x1000 -shared_info: - .org 0x2000 - -hypercall_page: - .org 0x3000 - - -/* Offsets into shared_info_t. */ -#define evtchn_upcall_pending /* 0 */ -#define evtchn_upcall_mask 1 - -NMI_MASK = 0x80000000 - -#define RDI 112 -#define ORIG_RAX 120 /* + error_code */ -#define EFLAGS 144 - -#define REST_SKIP 6*8 -.macro SAVE_REST - subq $REST_SKIP,%rsp -# CFI_ADJUST_CFA_OFFSET REST_SKIP - movq %rbx,5*8(%rsp) -# CFI_REL_OFFSET rbx,5*8 - movq %rbp,4*8(%rsp) -# CFI_REL_OFFSET rbp,4*8 - movq %r12,3*8(%rsp) -# CFI_REL_OFFSET r12,3*8 - movq %r13,2*8(%rsp) -# CFI_REL_OFFSET r13,2*8 - movq %r14,1*8(%rsp) -# CFI_REL_OFFSET r14,1*8 - movq %r15,(%rsp) -# CFI_REL_OFFSET r15,0*8 -.endm - - -.macro RESTORE_REST - movq (%rsp),%r15 -# CFI_RESTORE r15 - movq 1*8(%rsp),%r14 -# CFI_RESTORE r14 - movq 2*8(%rsp),%r13 -# CFI_RESTORE r13 - movq 3*8(%rsp),%r12 -# CFI_RESTORE r12 - movq 4*8(%rsp),%rbp -# CFI_RESTORE rbp - movq 5*8(%rsp),%rbx -# CFI_RESTORE rbx - addq $REST_SKIP,%rsp -# CFI_ADJUST_CFA_OFFSET -(REST_SKIP) -.endm - - -#define ARG_SKIP 9*8 -.macro RESTORE_ARGS skiprax=0,addskip=0,skiprcx=0,skipr11=0,skipr8910=0,skiprdx=0 - .if \skipr11 - .else - movq (%rsp),%r11 -# CFI_RESTORE r11 - .endif - .if \skipr8910 - .else - movq 1*8(%rsp),%r10 -# CFI_RESTORE r10 - movq 2*8(%rsp),%r9 -# CFI_RESTORE r9 - movq 3*8(%rsp),%r8 -# CFI_RESTORE r8 - .endif - .if \skiprax - .else - movq 4*8(%rsp),%rax -# CFI_RESTORE rax - .endif - .if \skiprcx - .else - movq 5*8(%rsp),%rcx -# CFI_RESTORE rcx - .endif - .if \skiprdx - .else - movq 6*8(%rsp),%rdx -# CFI_RESTORE rdx - .endif - movq 7*8(%rsp),%rsi -# CFI_RESTORE rsi - movq 8*8(%rsp),%rdi -# CFI_RESTORE rdi - .if ARG_SKIP+\addskip > 0 - addq $ARG_SKIP+\addskip,%rsp -# CFI_ADJUST_CFA_OFFSET -(ARG_SKIP+\addskip) - .endif -.endm - - -.macro HYPERVISOR_IRET flag -# testb $3,1*8(%rsp) /* Don't need to do that in Mini-os, as */ -# jnz 2f /* there is no userspace? */ - testl $NMI_MASK,2*8(%rsp) - jnz 2f - - testb $1,(xen_features+XENFEAT_supervisor_mode_kernel) - jnz 1f - - /* Direct iret to kernel space. Correct CS and SS. */ - orb $3,1*8(%rsp) - orb $3,4*8(%rsp) -1: iretq - -2: /* Slow iret via hypervisor. */ - andl $~NMI_MASK, 16(%rsp) - pushq $\flag - jmp hypercall_page + (__HYPERVISOR_iret * 32) -.endm - -/* - * Exception entry point. This expects an error code/orig_rax on the stack - * and the exception handler in %rax. - */ -ENTRY(error_entry) -# _frame RDI - /* rdi slot contains rax, oldrax contains error code */ - cld - subq $14*8,%rsp -# CFI_ADJUST_CFA_OFFSET (14*8) - movq %rsi,13*8(%rsp) -# CFI_REL_OFFSET rsi,RSI - movq 14*8(%rsp),%rsi /* load rax from rdi slot */ - movq %rdx,12*8(%rsp) -# CFI_REL_OFFSET rdx,RDX - movq %rcx,11*8(%rsp) -# CFI_REL_OFFSET rcx,RCX - movq %rsi,10*8(%rsp) /* store rax */ -# CFI_REL_OFFSET rax,RAX - movq %r8, 9*8(%rsp) -# CFI_REL_OFFSET r8,R8 - movq %r9, 8*8(%rsp) -# CFI_REL_OFFSET r9,R9 - movq %r10,7*8(%rsp) -# CFI_REL_OFFSET r10,R10 - movq %r11,6*8(%rsp) -# CFI_REL_OFFSET r11,R11 - movq %rbx,5*8(%rsp) -# CFI_REL_OFFSET rbx,RBX - movq %rbp,4*8(%rsp) -# CFI_REL_OFFSET rbp,RBP - movq %r12,3*8(%rsp) -# CFI_REL_OFFSET r12,R12 - movq %r13,2*8(%rsp) -# CFI_REL_OFFSET r13,R13 - movq %r14,1*8(%rsp) -# CFI_REL_OFFSET r14,R14 - movq %r15,(%rsp) -# CFI_REL_OFFSET r15,R15 -#if 0 - cmpl $__KERNEL_CS,CS(%rsp) - je error_kernelspace -#endif -error_call_handler: - movq %rdi, RDI(%rsp) - movq %rsp,%rdi - movq ORIG_RAX(%rsp),%rsi # get error code - movq $-1,ORIG_RAX(%rsp) - call *%rax - -.macro zeroentry sym -# INTR_FRAME - movq (%rsp),%rcx - movq 8(%rsp),%r11 - addq $0x10,%rsp /* skip rcx and r11 */ - pushq $0 /* push error code/oldrax */ -# CFI_ADJUST_CFA_OFFSET 8 - pushq %rax /* push real oldrax to the rdi slot */ -# CFI_ADJUST_CFA_OFFSET 8 - leaq \sym(%rip),%rax - jmp error_entry -# CFI_ENDPROC -.endm - -.macro errorentry sym -# XCPT_FRAME - movq (%rsp),%rcx - movq 8(%rsp),%r11 - addq $0x10,%rsp /* rsp points to the error code */ - pushq %rax -# CFI_ADJUST_CFA_OFFSET 8 - leaq \sym(%rip),%rax - jmp error_entry -# CFI_ENDPROC -.endm - -#define XEN_GET_VCPU_INFO(reg) movq HYPERVISOR_shared_info,reg -#define XEN_PUT_VCPU_INFO(reg) -#define XEN_PUT_VCPU_INFO_fixup -#define XEN_LOCKED_BLOCK_EVENTS(reg) movb $1,evtchn_upcall_mask(reg) -#define XEN_LOCKED_UNBLOCK_EVENTS(reg) movb $0,evtchn_upcall_mask(reg) -#define XEN_TEST_PENDING(reg) testb $0xFF,evtchn_upcall_pending(reg) - -#define XEN_BLOCK_EVENTS(reg) XEN_GET_VCPU_INFO(reg) ; \ - XEN_LOCKED_BLOCK_EVENTS(reg) ; \ - XEN_PUT_VCPU_INFO(reg) - -#define XEN_UNBLOCK_EVENTS(reg) XEN_GET_VCPU_INFO(reg) ; \ - XEN_LOCKED_UNBLOCK_EVENTS(reg) ; \ - XEN_PUT_VCPU_INFO(reg) - - - -ENTRY(hypervisor_callback) - zeroentry hypervisor_callback2 - -ENTRY(hypervisor_callback2) - movq %rdi, %rsp -11: movq %gs:8,%rax - incl %gs:0 - cmovzq %rax,%rsp - pushq %rdi - call do_hypervisor_callback - popq %rsp - decl %gs:0 - jmp error_exit - -# ALIGN -restore_all_enable_events: - XEN_UNBLOCK_EVENTS(%rsi) # %rsi is already set up... - -scrit: /**** START OF CRITICAL REGION ****/ - XEN_TEST_PENDING(%rsi) - jnz 14f # process more events if necessary... - XEN_PUT_VCPU_INFO(%rsi) - RESTORE_ARGS 0,8,0 - HYPERVISOR_IRET 0 - -14: XEN_LOCKED_BLOCK_EVENTS(%rsi) - XEN_PUT_VCPU_INFO(%rsi) - SAVE_REST - movq %rsp,%rdi # set the argument again - jmp 11b -ecrit: /**** END OF CRITICAL REGION ****/ - - -retint_kernel: -retint_restore_args: - movl EFLAGS-REST_SKIP(%rsp), %eax - shr $9, %eax # EAX[0] == IRET_EFLAGS.IF - XEN_GET_VCPU_INFO(%rsi) - andb evtchn_upcall_mask(%rsi),%al - andb $1,%al # EAX[0] == IRET_EFLAGS.IF & event_mask - jnz restore_all_enable_events # != 0 => enable event delivery - XEN_PUT_VCPU_INFO(%rsi) - - RESTORE_ARGS 0,8,0 - HYPERVISOR_IRET 0 - - -error_exit: - RESTORE_REST -/* cli */ - XEN_BLOCK_EVENTS(%rsi) - jmp retint_kernel - - - -ENTRY(failsafe_callback) - popq %rcx - popq %r11 - iretq - - -ENTRY(coprocessor_error) - zeroentry do_coprocessor_error - - -ENTRY(simd_coprocessor_error) - zeroentry do_simd_coprocessor_error - - -ENTRY(device_not_available) - zeroentry do_device_not_available - - -ENTRY(debug) -# INTR_FRAME -# CFI_ADJUST_CFA_OFFSET 8 */ - zeroentry do_debug -# CFI_ENDPROC - - -ENTRY(int3) -# INTR_FRAME -# CFI_ADJUST_CFA_OFFSET 8 */ - zeroentry do_int3 -# CFI_ENDPROC - -ENTRY(overflow) - zeroentry do_overflow - - -ENTRY(bounds) - zeroentry do_bounds - - -ENTRY(invalid_op) - zeroentry do_invalid_op - - -ENTRY(coprocessor_segment_overrun) - zeroentry do_coprocessor_segment_overrun - - -ENTRY(invalid_TSS) - errorentry do_invalid_TSS - - -ENTRY(segment_not_present) - errorentry do_segment_not_present - - -/* runs on exception stack */ -ENTRY(stack_segment) -# XCPT_FRAME - errorentry do_stack_segment -# CFI_ENDPROC - - -ENTRY(general_protection) - errorentry do_general_protection - - -ENTRY(alignment_check) - errorentry do_alignment_check - - -ENTRY(divide_error) - zeroentry do_divide_error - - -ENTRY(spurious_interrupt_bug) - zeroentry do_spurious_interrupt_bug - - -ENTRY(page_fault) - errorentry do_page_fault - - - - - -ENTRY(thread_starter) - popq %rdi - popq %rbx - call *%rbx - call exit_thread - - diff -r 3464bb656a9c -r 8475a4e0425e extras/mini-os/xenbus/xenbus.c --- a/extras/mini-os/xenbus/xenbus.c Thu Jan 18 09:54:33 2007 +0000 +++ b/extras/mini-os/xenbus/xenbus.c Thu Jan 18 15:18:07 2007 +0000 @@ -45,9 +45,9 @@ #define DEBUG(_f, _a...) ((void)0) #endif - static struct xenstore_domain_interface *xenstore_buf; static DECLARE_WAIT_QUEUE_HEAD(xb_waitq); +static DECLARE_WAIT_QUEUE_HEAD(watch_queue); struct xenbus_req_info { int in_use:1; @@ -71,6 +71,34 @@ static void memcpy_from_ring(const void memcpy(dest, ring + off, c1); memcpy(dest + c1, ring, c2); } + +static inline void wait_for_watch(void) +{ + DEFINE_WAIT(w); + add_waiter(w,watch_queue); + schedule(); + wake(current); +} + +char* xenbus_wait_for_value(const char* path,const char* value) +{ + for(;;) + { + char *res, *msg; + int r; + + msg = xenbus_read(XBT_NIL, path, &res); + if(msg) return msg; + + r = strcmp(value,res); + free(res); + + if(r==0) break; + else wait_for_watch(); + } + return NULL; +} + static void xenbus_thread_func(void *ign) { @@ -101,13 +129,35 @@ static void xenbus_thread_func(void *ign break; DEBUG("Message is good.\n"); - req_info[msg.req_id].reply = malloc(sizeof(msg) + msg.len); - memcpy_from_ring(xenstore_buf->rsp, + + if(msg.type == XS_WATCH_EVENT) + { + char* payload = (char*)malloc(sizeof(msg) + msg.len); + char *path,*token; + + memcpy_from_ring(xenstore_buf->rsp, + payload, + MASK_XENSTORE_IDX(xenstore_buf->rsp_cons), + msg.len + sizeof(msg)); + + path = payload + sizeof(msg); + token = path + strlen(path) + 1; + + xenstore_buf->rsp_cons += msg.len + sizeof(msg); + free(payload); + wake_up(&watch_queue); + } + + else + { + req_info[msg.req_id].reply = malloc(sizeof(msg) + msg.len); + memcpy_from_ring(xenstore_buf->rsp, req_info[msg.req_id].reply, MASK_XENSTORE_IDX(xenstore_buf->rsp_cons), msg.len + sizeof(msg)); - wake_up(&req_info[msg.req_id].waitq); - xenstore_buf->rsp_cons += msg.len + sizeof(msg); + xenstore_buf->rsp_cons += msg.len + sizeof(msg); + wake_up(&req_info[msg.req_id].waitq); + } } } } @@ -381,9 +431,29 @@ char *xenbus_write(xenbus_transaction_t struct xsd_sockmsg *rep; rep = xenbus_msg_reply(XS_WRITE, xbt, req, ARRAY_SIZE(req)); char *msg = errmsg(rep); - if (msg) - return msg; - free(rep); + if (msg) return msg; + free(rep); + return NULL; +} + +char* xenbus_watch_path( xenbus_transaction_t xbt, const char *path) +{ + /* in the future one could have multiple watch queues, and use + * the token for demuxing. For now the token is 0. */ + + struct xsd_sockmsg *rep; + + struct write_req req[] = { + {path, strlen(path) + 1}, + {"0",2 }, + }; + + rep = xenbus_msg_reply(XS_WATCH, xbt, req, ARRAY_SIZE(req)); + + char *msg = errmsg(rep); + if (msg) return msg; + free(rep); + return NULL; } diff -r 3464bb656a9c -r 8475a4e0425e linux-2.6-xen-sparse/arch/i386/Kconfig --- a/linux-2.6-xen-sparse/arch/i386/Kconfig Thu Jan 18 09:54:33 2007 +0000 +++ b/linux-2.6-xen-sparse/arch/i386/Kconfig Thu Jan 18 15:18:07 2007 +0000 @@ -629,7 +629,7 @@ config HAVE_ARCH_EARLY_PFN_TO_NID config HIGHPTE bool "Allocate 3rd-level pagetables from highmem" - depends on (HIGHMEM4G || HIGHMEM64G) && !X86_XEN + depends on HIGHMEM4G || HIGHMEM64G help The VM uses one page table entry for each page of physical memory. For systems with a lot of RAM, this can be wasteful of precious diff -r 3464bb656a9c -r 8475a4e0425e linux-2.6-xen-sparse/arch/i386/kernel/microcode-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/microcode-xen.c Thu Jan 18 09:54:33 2007 +0000 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/microcode-xen.c Thu Jan 18 15:18:07 2007 +0000 @@ -89,7 +89,7 @@ static ssize_t microcode_write (struct f { ssize_t ret; - if (len < DEFAULT_UCODE_TOTALSIZE) { + if (len < MC_HEADER_SIZE) { printk(KERN_ERR "microcode: not enough data\n"); return -EINVAL; } diff -r 3464bb656a9c -r 8475a4e0425e linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c Thu Jan 18 09:54:33 2007 +0000 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c Thu Jan 18 15:18:07 2007 +0000 @@ -114,13 +114,7 @@ dma_unmap_sg(struct device *hwdev, struc } EXPORT_SYMBOL(dma_unmap_sg); -/* - * XXX This file is also used by xenLinux/ia64. - * "defined(__i386__) || defined (__x86_64__)" means "!defined(__ia64__)". - * This #if work around should be removed once this file is merbed back into - * i386' pci-dma or is moved to drivers/xen/core. - */ -#if defined(__i386__) || defined(__x86_64__) +#ifdef CONFIG_HIGHMEM dma_addr_t dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction) @@ -150,7 +144,7 @@ dma_unmap_page(struct device *dev, dma_a swiotlb_unmap_page(dev, dma_address, size, direction); } EXPORT_SYMBOL(dma_unmap_page); -#endif /* defined(__i386__) || defined(__x86_64__) */ +#endif /* CONFIG_HIGHMEM */ int dma_mapping_error(dma_addr_t dma_addr) @@ -181,6 +175,8 @@ void *dma_alloc_coherent(struct device * struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; unsigned int order = get_order(size); unsigned long vstart; + u64 mask; + /* ignore region specifiers */ gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); @@ -203,9 +199,14 @@ void *dma_alloc_coherent(struct device * vstart = __get_free_pages(gfp, order); ret = (void *)vstart; + if (dev != NULL && dev->coherent_dma_mask) + mask = dev->coherent_dma_mask; + else + mask = 0xffffffff; + if (ret != NULL) { if (xen_create_contiguous_region(vstart, order, - dma_bits) != 0) { + fls64(mask)) != 0) { free_pages(vstart, order); return NULL; } diff -r 3464bb656a9c -r 8475a4e0425e linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c Thu Jan 18 09:54:33 2007 +0000 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c Thu Jan 18 15:18:07 2007 +0000 @@ -47,9 +47,6 @@ EXPORT_SYMBOL(swiotlb); */ #define IO_TLB_SHIFT 11 -/* Width of DMA addresses. 30 bits is a b44 limitation. */ -#define DEFAULT_DMA_BITS 30 - static int swiotlb_force; static char *iotlb_virt_start; static unsigned long iotlb_nslabs; @@ -98,11 +95,12 @@ static struct phys_addr { */ static DEFINE_SPINLOCK(io_tlb_lock); -unsigned int dma_bits = DEFAULT_DMA_BITS; +static unsigned int dma_bits; +static unsigned int __initdata max_dma_bits = 32; static int __init setup_dma_bits(char *str) { - dma_bits = simple_strtoul(str, NULL, 0); + max_dma_bits = simple_strtoul(str, NULL, 0); return 0; } __setup("dma_bits=", setup_dma_bits); @@ -143,6 +141,7 @@ swiotlb_init_with_default_size (size_t d swiotlb_init_with_default_size (size_t default_size) { unsigned long i, bytes; + int rc; if (!iotlb_nslabs) { iotlb_nslabs = (default_size >> IO_TLB_SHIFT); @@ -159,16 +158,33 @@ swiotlb_init_with_default_size (size_t d */ iotlb_virt_start = alloc_bootmem_low_pages(bytes); if (!iotlb_virt_start) - panic("Cannot allocate SWIOTLB buffer!\n" - "Use dom0_mem Xen boot parameter to reserve\n" - "some DMA memory (e.g., dom0_mem=-128M).\n"); - + panic("Cannot allocate SWIOTLB buffer!\n"); + + dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT; for (i = 0; i < iotlb_nslabs; i += IO_TLB_SEGSIZE) { - int rc = xen_create_contiguous_region( - (unsigned long)iotlb_virt_start + (i << IO_TLB_SHIFT), - get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT), - dma_bits); - BUG_ON(rc); + do { + rc = xen_create_contiguous_region( + (unsigned long)iotlb_virt_start + (i << IO_TLB_SHIFT), + get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT), + dma_bits); + } while (rc && dma_bits++ < max_dma_bits); + if (rc) { + if (i == 0) + panic("No suitable physical memory available for SWIOTLB buffer!\n" + "Use dom0_mem Xen boot parameter to reserve\n" + "some DMA memory (e.g., dom0_mem=-128M).\n"); + iotlb_nslabs = i; + i <<= IO_TLB_SHIFT; + free_bootmem(__pa(iotlb_virt_start + i), bytes - i); + bytes = i; + for (dma_bits = 0; i > 0; i -= IO_TLB_SEGSIZE << IO_TLB_SHIFT) { + unsigned int bits = fls64(virt_to_bus(iotlb_virt_start + i - 1)); + + if (bits > dma_bits) + dma_bits = bits; + } + break; + } } /* @@ -186,17 +202,27 @@ swiotlb_init_with_default_size (size_t d * Get the overflow emergency buffer */ io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); + if (!io_tlb_overflow_buffer) + panic("Cannot allocate SWIOTLB overflow buffer!\n"); + + do { + rc = xen_create_contiguous_region( + (unsigned long)io_tlb_overflow_buffer, + get_order(io_tlb_overflow), + dma_bits); + } while (rc && dma_bits++ < max_dma_bits); + if (rc) + panic("No suitable physical memory available for SWIOTLB overflow buffer!\n"); iotlb_pfn_start = __pa(iotlb_virt_start) >> PAGE_SHIFT; iotlb_pfn_end = iotlb_pfn_start + (bytes >> PAGE_SHIFT); printk(KERN_INFO "Software IO TLB enabled: \n" " Aperture: %lu megabytes\n" - " Kernel range: 0x%016lx - 0x%016lx\n" + " Kernel range: %p - %p\n" " Address size: %u bits\n", bytes >> 20, - (unsigned long)iotlb_virt_start, - (unsigned long)iotlb_virt_start + bytes, + iotlb_virt_start, iotlb_virt_start + bytes, dma_bits); } @@ -238,9 +264,12 @@ __sync_single(struct phys_addr buffer, c char *dev, *host, *kmp; len = size; while (len != 0) { + unsigned long flags; + if (((bytes = len) + buffer.offset) > PAGE_SIZE) bytes = PAGE_SIZE - buffer.offset; - kmp = kmap_atomic(buffer.page, KM_SWIOTLB); + local_irq_save(flags); /* protects KM_BOUNCE_READ */ + kmp = kmap_atomic(buffer.page, KM_BOUNCE_READ); dev = dma_addr + size - len; host = kmp + buffer.offset; if (dir == DMA_FROM_DEVICE) { @@ -248,7 +277,8 @@ __sync_single(struct phys_addr buffer, c /* inaccessible */; } else memcpy(dev, host, bytes); - kunmap_atomic(kmp, KM_SWIOTLB); + kunmap_atomic(kmp, KM_BOUNCE_READ); + local_irq_restore(flags); len -= bytes; buffer.page++; buffer.offset = 0; @@ -617,6 +647,8 @@ swiotlb_sync_sg_for_device(struct device sg->dma_length, dir); } +#ifdef CONFIG_HIGHMEM + dma_addr_t swiotlb_map_page(struct device *hwdev, struct page *page, unsigned long offset, size_t size, @@ -650,6 +682,8 @@ swiotlb_unmap_page(struct device *hwdev, unmap_single(hwdev, bus_to_virt(dma_address), size, direction); } +#endif + int swiotlb_dma_mapping_error(dma_addr_t dma_addr) { @@ -677,7 +711,5 @@ EXPORT_SYMBOL(swiotlb_sync_single_for_de EXPORT_SYMBOL(swiotlb_sync_single_for_device); EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu); EXPORT_SYMBOL(swiotlb_sync_sg_for_device); -EXPORT_SYMBOL(swiotlb_map_page); -EXPORT_SYMBOL(swiotlb_unmap_page); EXPORT_SYMBOL(swiotlb_dma_mapping_error); EXPORT_SYMBOL(swiotlb_dma_supported); diff -r 3464bb656a9c -r 8475a4e0425e linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c --- a/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c Thu Jan 18 09:54:33 2007 +0000 +++ b/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c Thu Jan 18 15:18:07 2007 +0000 @@ -262,16 +262,19 @@ static void dump_fault_path(unsigned lon p += (address >> 30) * 2; printk(KERN_ALERT "%08lx -> *pde = %08lx:%08lx\n", page, p[1], p[0]); if (p[0] & 1) { - mfn = (p[0] >> PAGE_SHIFT) | ((p[1] & 0x7) << 20); + mfn = (p[0] >> PAGE_SHIFT) | (p[1] << 20); page = mfn_to_pfn(mfn) << PAGE_SHIFT; p = (unsigned long *)__va(page); address &= 0x3fffffff; p += (address >> 21) * 2; printk(KERN_ALERT "%08lx -> *pme = %08lx:%08lx\n", page, p[1], p[0]); -#ifndef CONFIG_HIGHPTE + mfn = (p[0] >> PAGE_SHIFT) | (p[1] << 20); +#ifdef CONFIG_HIGHPTE + if (mfn_to_pfn(mfn) >= highstart_pfn) + return; +#endif if (p[0] & 1) { - mfn = (p[0] >> PAGE_SHIFT) | ((p[1] & 0x7) << 20); page = mfn_to_pfn(mfn) << PAGE_SHIFT; p = (unsigned long *) __va(page); address &= 0x001fffff; @@ -279,7 +282,6 @@ static void dump_fault_path(unsigned lon printk(KERN_ALERT "%08lx -> *pte = %08lx:%08lx\n", page, p[1], p[0]); } -#endif } } #else @@ -294,11 +296,14 @@ static void dump_fault_path(unsigned lon machine_to_phys(page)); /* * We must not directly access the pte in the highpte - * case, the page table might be allocated in highmem. + * case if the page table is located in highmem. * And lets rather not kmap-atomic the pte, just in case * it's allocated already. */ -#ifndef CONFIG_HIGHPTE +#ifdef CONFIG_HIGHPTE + if ((page >> PAGE_SHIFT) >= highstart_pfn) + return; +#endif if ((page & 1) && oops_may_print()) { page &= PAGE_MASK; address &= 0x003ff000; @@ -307,7 +312,6 @@ static void dump_fault_path(unsigned lon printk(KERN_ALERT "*pte = ma %08lx pa %08lx\n", page, machine_to_phys(page)); } -#endif } #endif diff -r 3464bb656a9c -r 8475a4e0425e linux-2.6-xen-sparse/arch/i386/mm/highmem-xen.c --- a/linux-2.6-xen-sparse/arch/i386/mm/highmem-xen.c Thu Jan 18 09:54:33 2007 +0000 +++ b/linux-2.6-xen-sparse/arch/i386/mm/highmem-xen.c Thu Jan 18 15:18:07 2007 +0000 @@ -129,5 +129,6 @@ EXPORT_SYMBOL(kmap); EXPORT_SYMBOL(kmap); EXPORT_SYMBOL(kunmap); EXPORT_SYMBOL(kmap_atomic); +EXPORT_SYMBOL(kmap_atomic_pte); EXPORT_SYMBOL(kunmap_atomic); EXPORT_SYMBOL(kmap_atomic_to_page); diff -r 3464bb656a9c -r 8475a4e0425e linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c --- a/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c Thu Jan 18 09:54:33 2007 +0000 +++ b/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c Thu Jan 18 15:18:07 2007 +0000 @@ -239,23 +239,41 @@ struct page *pte_alloc_one(struct mm_str #ifdef CONFIG_HIGHPTE pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0); + if (pte && PageHighMem(pte)) { + struct mmuext_op op; + + kmap_flush_unused(); + op.cmd = MMUEXT_PIN_L1_TABLE; + op.arg1.mfn = pfn_to_mfn(page_to_pfn(pte)); + BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); + } #else pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); +#endif if (pte) { SetPageForeign(pte, pte_free); init_page_count(pte); } -#endif return pte; } void pte_free(struct page *pte) { - unsigned long va = (unsigned long)__va(page_to_pfn(pte)<<PAGE_SHIFT); - - if (!pte_write(*virt_to_ptep(va))) - BUG_ON(HYPERVISOR_update_va_mapping( - va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0)); + unsigned long pfn = page_to_pfn(pte); + + if (!PageHighMem(pte)) { + unsigned long va = (unsigned long)__va(pfn << PAGE_SHIFT); + + if (!pte_write(*virt_to_ptep(va))) + BUG_ON(HYPERVISOR_update_va_mapping( + va, pfn_pte(pfn, PAGE_KERNEL), 0)); + } else { + struct mmuext_op op; + + op.cmd = MMUEXT_UNPIN_TABLE; + op.arg1.mfn = pfn_to_mfn(pfn); + BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); + } ClearPageForeign(pte); init_page_count(pte); diff -r 3464bb656a9c -r 8475a4e0425e linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c Thu Jan 18 09:54:33 2007 +0000 +++ b/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c Thu Jan 18 15:18:07 2007 +0000 @@ -163,6 +163,18 @@ void _arch_exit_mmap(struct mm_struct *m mm_unpin(mm); } +struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) +{ + struct page *pte; + + pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); + if (pte) { + SetPageForeign(pte, pte_free); + init_page_count(pte); + } + return pte; +} + void pte_free(struct page *pte) { unsigned long va = (unsigned long)__va(page_to_pfn(pte)<<PAGE_SHIFT); @@ -170,6 +182,10 @@ void pte_free(struct page *pte) if (!pte_write(*virt_to_ptep(va))) BUG_ON(HYPERVISOR_update_va_mapping( va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0)); + + ClearPageForeign(pte); + init_page_count(pte); + __free_page(pte); } #endif /* CONFIG_XEN */ diff -r 3464bb656a9c -r 8475a4e0425e linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Thu Jan 18 09:54:33 2007 +0000 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Thu Jan 18 15:18:07 2007 +0000 @@ -42,9 +42,30 @@ static void backend_changed(struct xenbu static void backend_changed(struct xenbus_watch *, const char **, unsigned int); +static int blkback_name(blkif_t *blkif, char *buf) +{ + char *devpath, *devname; + struct xenbus_device *dev = blkif->be->dev; + + devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL); + if (IS_ERR(devpath)) + return PTR_ERR(devpath); + + if ((devname = strstr(devpath, "/dev/")) != NULL) + devname += strlen("/dev/"); + else + devname = devpath; + + snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname); + kfree(devpath); + + return 0; +} + static void update_blkif_status(blkif_t *blkif) { int err; + char name[TASK_COMM_LEN]; /* Not ready to connect? */ if (!blkif->irq || !blkif->vbd.bdev) @@ -59,10 +80,13 @@ static void update_blkif_status(blkif_t if (blkif->be->dev->state != XenbusStateConnected) return; - blkif->xenblkd = kthread_run(blkif_schedule, blkif, - "xvd %d %02x:%02x", - blkif->domid, - blkif->be->major, blkif->be->minor); + err = blkback_name(blkif, name); + if (err) { + xenbus_dev_error(blkif->be->dev, err, "get blkback dev name"); + return; + } + + blkif->xenblkd = kthread_run(blkif_schedule, blkif, name); if (IS_ERR(blkif->xenblkd)) { err = PTR_ERR(blkif->xenblkd); blkif->xenblkd = NULL; diff -r 3464bb656a9c -r 8475a4e0425e linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c Thu Jan 18 09:54:33 2007 +0000 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c Thu Jan 18 15:18:07 2007 +0000 @@ -92,9 +92,30 @@ static long get_id(const char *str) return simple_strtol(num, NULL, 10); } +static int blktap_name(blkif_t *blkif, char *buf) +{ + char *devpath, *devname; + struct xenbus_device *dev = blkif->be->dev; + + devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL); + if (IS_ERR(devpath)) + return PTR_ERR(devpath); + + if ((devname = strstr(devpath, "/dev/")) != NULL) + devname += strlen("/dev/"); + else + devname = devpath; + + snprintf(buf, TASK_COMM_LEN, "blktap.%d.%s", blkif->domid, devname); + kfree(devpath); + + return 0; +} + static void tap_update_blkif_status(blkif_t *blkif) { int err; + char name[TASK_COMM_LEN]; /* Not ready to connect? */ if(!blkif->irq || !blkif->sectors) { @@ -110,10 +131,13 @@ static void tap_update_blkif_status(blki if (blkif->be->dev->state != XenbusStateConnected) return; - blkif->xenblkd = kthread_run(tap_blkif_schedule, blkif, - "xvd %d", - blkif->domid); - + err = blktap_name(blkif, name); + if (err) { + xenbus_dev_error(blkif->be->dev, err, "get blktap dev name"); + return; + } + + blkif->xenblkd = kthread_run(tap_blkif_schedule, blkif, name); if (IS_ERR(blkif->xenblkd)) { err = PTR_ERR(blkif->xenblkd); blkif->xenblkd = NULL; diff -r 3464bb656a9c -r 8475a4e0425e linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c --- a/linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c Thu Jan 18 09:54:33 2007 +0000 +++ b/linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c Thu Jan 18 15:18:07 2007 +0000 @@ -98,8 +98,8 @@ void xen_machine_kexec_setup_resources(v err: /* * It isn't possible to free xen_phys_cpus this early in the - * boot. Since failure at this stage is unexpected and the - * amount is small we leak the memory. + * boot. Failure at this stage is unexpected and the amount of + * memory is small therefore we tolerate the potential leak. */ xen_max_nr_phys_cpus = 0; return; diff -r 3464bb656a9c -r 8475a4e0425e linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/dma-mapping.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/dma-mapping.h Thu Jan 18 09:54:33 2007 +0000 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/dma-mapping.h Thu Jan 18 15:18:07 2007 +0000 @@ -53,6 +53,7 @@ extern void dma_unmap_sg(struct device * extern void dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, enum dma_data_direction direction); +#ifdef CONFIG_HIGHMEM extern dma_addr_t dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction); @@ -60,6 +61,11 @@ extern void extern void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, enum dma_data_direction direction); +#else +#define dma_map_page(dev, page, offset, size, dir) \ + dma_map_single(dev, page_address(page) + (offset), (size), (dir)) +#define dma_unmap_page dma_unmap_single +#endif extern void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, diff -r 3464bb656a9c -r 8475a4e0425e linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/kmap_types.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/kmap_types.h Thu Jan 18 09:54:33 2007 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,31 +0,0 @@ -#ifndef _ASM_KMAP_TYPES_H -#define _ASM_KMAP_TYPES_H - - -#ifdef CONFIG_DEBUG_HIGHMEM -# define D(n) __KM_FENCE_##n , -#else -# define D(n) -#endif - -enum km_type { -D(0) KM_BOUNCE_READ, -D(1) KM_SKB_SUNRPC_DATA, -D(2) KM_SKB_DATA_SOFTIRQ, -D(3) KM_USER0, -D(4) KM_USER1, -D(5) KM_BIO_SRC_IRQ, -D(6) KM_BIO_DST_IRQ, -D(7) KM_PTE0, -D(8) KM_PTE1, -D(9) KM_IRQ0, -D(10) KM_IRQ1, -D(11) KM_SOFTIRQ0, -D(12) KM_SOFTIRQ1, -D(13) KM_SWIOTLB, -D(14) KM_TYPE_NR -}; - -#undef D - -#endif diff -r 3464bb656a9c -r 8475a4e0425e linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgalloc.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgalloc.h Thu Jan 18 09:54:33 2007 +0000 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgalloc.h Thu Jan 18 15:18:07 2007 +0000 @@ -41,7 +41,7 @@ static inline void pte_free_kernel(pte_t static inline void pte_free_kernel(pte_t *pte) { free_page((unsigned long)pte); - make_page_writable(pte, XENFEAT_writable_page_tables); + make_lowmem_page_writable(pte, XENFEAT_writable_page_tables); } extern void pte_free(struct page *pte); diff -r 3464bb656a9c -r 8475a4e0425e linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/swiotlb.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/swiotlb.h Thu Jan 18 09:54:33 2007 +0000 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/swiotlb.h Thu Jan 18 15:18:07 2007 +0000 @@ -26,15 +26,15 @@ extern void swiotlb_unmap_sg(struct devi extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, int direction); extern int swiotlb_dma_mapping_error(dma_addr_t dma_addr); +#ifdef CONFIG_HIGHMEM extern dma_addr_t swiotlb_map_page(struct device *hwdev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction); extern void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dma_address, size_t size, enum dma_data_direction direction); +#endif extern int swiotlb_dma_supported(struct device *hwdev, u64 mask); extern void swiotlb_init(void); - -extern unsigned int dma_bits; #ifdef CONFIG_SWIOTLB extern int swiotlb; diff -r 3464bb656a9c -r 8475a4e0425e linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgalloc.h --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgalloc.h Thu Jan 18 09:54:33 2007 +0000 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgalloc.h Thu Jan 18 15:18:07 2007 +0000 @@ -64,42 +64,35 @@ static inline void pgd_populate(struct m } } +extern struct page *pte_alloc_one(struct mm_struct *mm, unsigned long addr); +extern void pte_free(struct page *pte); + +static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + struct page *pg; + + pg = pte_alloc_one(mm, addr); + return pg ? page_address(pg) : NULL; +} + static inline void pmd_free(pmd_t *pmd) { - pte_t *ptep = virt_to_ptep(pmd); - - if (!pte_write(*ptep)) { - BUG_ON(HYPERVISOR_update_va_mapping( - (unsigned long)pmd, - pfn_pte(virt_to_phys(pmd)>>PAGE_SHIFT, PAGE_KERNEL), - 0)); - } - free_page((unsigned long)pmd); -} - -static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) -{ - pmd_t *pmd = (pmd_t *) get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); - return pmd; + BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); + pte_free(virt_to_page(pmd)); } static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - pud_t *pud = (pud_t *) get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); - return pud; + struct page *pg; + + pg = pte_alloc_one(mm, addr); + return pg ? page_address(pg) : NULL; } static inline void pud_free(pud_t *pud) { - pte_t *ptep = virt_to_ptep(pud); - - if (!pte_write(*ptep)) { - BUG_ON(HYPERVISOR_update_va_mapping( - (unsigned long)pud, - pfn_pte(virt_to_phys(pud)>>PAGE_SHIFT, PAGE_KERNEL), - 0)); - } - free_page((unsigned long)pud); + BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); + pte_free(virt_to_page(pud)); } static inline void pgd_list_add(pgd_t *pgd) @@ -130,10 +123,10 @@ static inline void pgd_list_del(pgd_t *p static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - /* - * We allocate two contiguous pages for kernel and user. - */ - unsigned boundary; + /* + * We allocate two contiguous pages for kernel and user. + */ + unsigned boundary; pgd_t *pgd = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_REPEAT, 1); if (!pgd) return NULL; @@ -150,11 +143,11 @@ static inline pgd_t *pgd_alloc(struct mm (PTRS_PER_PGD - boundary) * sizeof(pgd_t)); memset(__user_pgd(pgd), 0, PAGE_SIZE); /* clean up user pgd */ - /* - * Set level3_user_pgt for vsyscall area - */ + /* + * Set level3_user_pgt for vsyscall area + */ set_pgd(__user_pgd(pgd) + pgd_index(VSYSCALL_START), - mk_kernel_pgd(__pa_symbol(level3_user_pgt))); + mk_kernel_pgd(__pa_symbol(level3_user_pgt))); return pgd; } @@ -187,18 +180,10 @@ static inline void pgd_free(pgd_t *pgd) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); - if (pte) + pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); + if (pte) make_page_readonly(pte, XENFEAT_writable_page_tables); - return pte; -} - -static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) -{ - struct page *pte; - - pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); return pte; } @@ -208,18 +193,12 @@ static inline void pte_free_kernel(pte_t static inline void pte_free_kernel(pte_t *pte) { BUG_ON((unsigned long)pte & (PAGE_SIZE-1)); - make_page_writable(pte, XENFEAT_writable_page_tables); + make_page_writable(pte, XENFEAT_writable_page_tables); free_page((unsigned long)pte); } -extern void pte_free(struct page *pte); - -//#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte)) -//#define __pmd_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x)) -//#define __pud_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x)) - -#define __pte_free_tlb(tlb,x) pte_free((x)) -#define __pmd_free_tlb(tlb,x) pmd_free((x)) -#define __pud_free_tlb(tlb,x) pud_free((x)) +#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte)) +#define __pmd_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x)) +#define __pud_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x)) #endif /* _X86_64_PGALLOC_H */ diff -r 3464bb656a9c -r 8475a4e0425e linux-2.6-xen-sparse/kernel/kexec.c --- a/linux-2.6-xen-sparse/kernel/kexec.c Thu Jan 18 09:54:33 2007 +0000 +++ b/linux-2.6-xen-sparse/kernel/kexec.c Thu Jan 18 15:18:07 2007 +0000 @@ -1012,9 +1012,11 @@ asmlinkage long sys_kexec_load(unsigned goto out; } #ifdef CONFIG_XEN - result = xen_machine_kexec_load(image); - if (result) - goto out; + if (image) { + result = xen_machine_kexec_load(image); + if (result) + goto out; + } #endif /* Install the new kernel, and Uninstall the old */ image = xchg(dest_image, image); diff -r 3464bb656a9c -r 8475a4e0425e patches/linux-2.6.18/ipv6-no-autoconf.patch --- a/patches/linux-2.6.18/ipv6-no-autoconf.patch Thu Jan 18 09:54:33 2007 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,19 +0,0 @@ -diff -pruN ../orig-linux-2.6.18/net/ipv6/addrconf.c ./net/ipv6/addrconf.c ---- ../orig-linux-2.6.18/net/ipv6/addrconf.c 2006-09-20 04:42:06.000000000 +0100 -+++ ./net/ipv6/addrconf.c 2007-01-12 16:08:07.000000000 +0000 -@@ -2514,6 +2514,7 @@ static void addrconf_dad_start(struct in - spin_lock_bh(&ifp->lock); - - if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || -+ !(dev->flags&IFF_MULTICAST) || - !(ifp->flags&IFA_F_TENTATIVE)) { - ifp->flags &= ~IFA_F_TENTATIVE; - spin_unlock_bh(&ifp->lock); -@@ -2598,6 +2599,7 @@ static void addrconf_dad_completed(struc - if (ifp->idev->cnf.forwarding == 0 && - ifp->idev->cnf.rtr_solicits > 0 && - (dev->flags&IFF_LOOPBACK) == 0 && -+ (dev->flags & IFF_MULTICAST) && - (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) { - struct in6_addr all_routers; - diff -r 3464bb656a9c -r 8475a4e0425e patches/linux-2.6.18/series --- a/patches/linux-2.6.18/series Thu Jan 18 09:54:33 2007 +0000 +++ b/patches/linux-2.6.18/series Thu Jan 18 15:18:07 2007 +0000 @@ -5,7 +5,6 @@ blktap-aio-16_03_06.patch blktap-aio-16_03_06.patch fix-ide-cd-pio-mode.patch i386-mach-io-check-nmi.patch -ipv6-no-autoconf.patch net-csum.patch net-gso-5-rcv-mss.patch net-gso-6-linear-segmentation.patch diff -r 3464bb656a9c -r 8475a4e0425e tools/check/check_udev --- a/tools/check/check_udev Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/check/check_udev Thu Jan 18 15:18:07 2007 +0000 @@ -11,7 +11,7 @@ Linux) Linux) TOOL="udevinfo" UDEV_VERSION="0" - test -x "$(which ${TOOL})" && \ + test -x "$(which ${TOOL} 2>/dev/null)" && \ UDEV_VERSION=$(${TOOL} -V | sed -e 's/^[^0-9]* \([0-9]\{1,\}\)[^0-9]\{0,\}/\1/') if test "${UDEV_VERSION}" -ge 059; then RC=0 @@ -28,7 +28,7 @@ esac if test ${RC} -ne 0; then echo - echo ' *** Check for ${TOOL} FAILED' + echo " *** Check for ${TOOL} FAILED" fi exit ${RC} diff -r 3464bb656a9c -r 8475a4e0425e tools/examples/vtpm-common.sh --- a/tools/examples/vtpm-common.sh Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/examples/vtpm-common.sh Thu Jan 18 15:18:07 2007 +0000 @@ -24,7 +24,9 @@ VTPMDB="/etc/xen/vtpm.db" #In the vtpm-impl file some commands should be defined: # vtpm_create, vtpm_setup, vtpm_start, etc. (see below) -if [ -r "$dir/vtpm-impl" ]; then +if [ -r "$dir/vtpm-impl.alt" ]; then + . "$dir/vtpm-impl.alt" +elif [ -r "$dir/vtpm-impl" ]; then . "$dir/vtpm-impl" else function vtpm_create () { diff -r 3464bb656a9c -r 8475a4e0425e tools/examples/xen-network-common.sh --- a/tools/examples/xen-network-common.sh Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/examples/xen-network-common.sh Thu Jan 18 15:18:07 2007 +0000 @@ -117,7 +117,12 @@ create_bridge () { ip link set ${bridge} arp off ip link set ${bridge} multicast off fi + + # A small MTU disables IPv6 (and therefore IPv6 addrconf). + mtu=$(ip link show ${bridge} | sed -n 's/.* mtu \([0-9]\+\).*/\1/p') + ip link set ${bridge} mtu 68 ip link set ${bridge} up + ip link set ${bridge} mtu ${mtu:-1500} } # Usage: add_to_bridge bridge dev diff -r 3464bb656a9c -r 8475a4e0425e tools/examples/xmexample1 --- a/tools/examples/xmexample1 Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/examples/xmexample1 Thu Jan 18 15:18:07 2007 +0000 @@ -64,6 +64,40 @@ vif = [ '' ] # and MODE is r for read-only, w for read-write. disk = [ 'phy:hda1,hda1,w' ] + +#---------------------------------------------------------------------------- +# Define frame buffer device. +# +# By default, no frame buffer device is configured. +# +# To create one using the SDL backend and sensible defaults: +# +# vfb = [ 'type=sdl' ] +# +# This uses environment variables XAUTHORITY and DISPLAY. You +# can override that: +# +# vfb = [ 'type=sdl,xauthority=/home/bozo/.Xauthority,display=:1' ] +# +# To create one using the VNC backend and sensible defaults: +# +# vfb = [ 'type=vnc' ] +# +# The backend listens on 127.0.0.1 port 5900+N by default, where N is +# the domain ID. You can override both address and N: +# +# vfb = [ 'type=vnc,vnclisten=127.0.0.1,vncdisplay=1' ] +# +# Or you can bind the first unused port above 5900: +# +# vfb = [ 'type=vnc,vnclisten=0.0.0.0,vnunused=1' ] +# +# You can override the password: +# +# vfb = [ 'type=vnc,vncpasswd=MYPASSWD' ] +# +# Empty password disables authentication. Defaults to the vncpasswd +# configured in xend-config.sxp. #---------------------------------------------------------------------------- # Define to which TPM instance the user domain should communicate. diff -r 3464bb656a9c -r 8475a4e0425e tools/examples/xmexample2 --- a/tools/examples/xmexample2 Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/examples/xmexample2 Thu Jan 18 15:18:07 2007 +0000 @@ -100,6 +100,40 @@ vif = [ '' ] # All domains get sda6 read-only (to use for /usr, see below). disk = [ 'phy:sda%d,sda1,w' % (7+vmid), 'phy:sda6,sda6,r' ] + +#---------------------------------------------------------------------------- +# Define frame buffer device. +# +# By default, no frame buffer device is configured. +# +# To create one using the SDL backend and sensible defaults: +# +# vfb = [ 'type=sdl' ] +# +# This uses environment variables XAUTHORITY and DISPLAY. You +# can override that: +# +# vfb = [ 'type=sdl,xauthority=/home/bozo/.Xauthority,display=:1' ] +# +# To create one using the VNC backend and sensible defaults: +# +# vfb = [ 'type=vnc' ] +# +# The backend listens on 127.0.0.1 port 5900+N by default, where N is +# the domain ID. You can override both address and N: +# +# vfb = [ 'type=vnc,vnclisten=127.0.0.1,vncdisplay=%d' % vmid ] +# +# Or you can bind the first unused port above 5900: +# +# vfb = [ 'type=vnc,vnclisten=0.0.0.0,vnunused=1' ] +# +# You can override the password: +# +# vfb = [ 'type=vnc,vncpasswd=MYPASSWD' ] +# +# Empty password disables authentication. Defaults to the vncpasswd +# configured in xend-config.sxp. #---------------------------------------------------------------------------- # Define to which TPM instance the user domain should communicate. diff -r 3464bb656a9c -r 8475a4e0425e tools/examples/xmexample3 --- a/tools/examples/xmexample3 Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/examples/xmexample3 Thu Jan 18 15:18:07 2007 +0000 @@ -85,6 +85,40 @@ vif = [ 'ip=192.168.%d.1/24' % (vmid)] # to all domains as sda1. # All domains get sda6 read-only (to use for /usr, see below). disk = [ 'phy:hda%d,hda1,w' % (vmid)] + +#---------------------------------------------------------------------------- +# Define frame buffer device. +# +# By default, no frame buffer device is configured. +# +# To create one using the SDL backend and sensible defaults: +# +# vfb = [ 'type=sdl' ] +# +# This uses environment variables XAUTHORITY and DISPLAY. You +# can override that: +# +# vfb = [ 'type=sdl,xauthority=/home/bozo/.Xauthority,display=:1' ] +# +# To create one using the VNC backend and sensible defaults: +# +# vfb = [ 'type=vnc' ] +# +# The backend listens on 127.0.0.1 port 5900+N by default, where N is +# the domain ID. You can override both address and N: +# +# vfb = [ 'type=vnc,vnclisten=127.0.0.1,vncdisplay=%d' % vmid ] +# +# Or you can bind the first unused port above 5900: +# +# vfb = [ 'type=vnc,vnclisten=0.0.0.0,vnunused=1' ] +# +# You can override the password: +# +# vfb = [ 'type=vnc,vncpasswd=MYPASSWD' ] +# +# Empty password disables authentication. Defaults to the vncpasswd +# configured in xend-config.sxp. #---------------------------------------------------------------------------- # Define to which TPM instance the user domain should communicate. diff -r 3464bb656a9c -r 8475a4e0425e tools/libfsimage/common/fsimage.c --- a/tools/libfsimage/common/fsimage.c Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/libfsimage/common/fsimage.c Thu Jan 18 15:18:07 2007 +0000 @@ -74,7 +74,7 @@ void fsi_close_fsimage(fsi_t *fsi) pthread_mutex_lock(&fsi_lock); fsi->f_plugin->fp_ops->fpo_umount(fsi); (void) close(fsi->f_fd); - fsip_fs_free(fsi); + free(fsi); pthread_mutex_unlock(&fsi_lock); } diff -r 3464bb656a9c -r 8475a4e0425e tools/libfsimage/common/fsimage_grub.c --- a/tools/libfsimage/common/fsimage_grub.c Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/libfsimage/common/fsimage_grub.c Thu Jan 18 15:18:07 2007 +0000 @@ -193,6 +193,7 @@ static int static int fsig_umount(fsi_t *fsi) { + free(fsi->f_data); return (0); } @@ -250,6 +251,7 @@ static int static int fsig_close(fsi_file_t *ffi) { + free(ffi->ff_data); fsip_file_free(ffi); return (0); } diff -r 3464bb656a9c -r 8475a4e0425e tools/libfsimage/common/fsimage_plugin.c --- a/tools/libfsimage/common/fsimage_plugin.c Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/libfsimage/common/fsimage_plugin.c Thu Jan 18 15:18:07 2007 +0000 @@ -40,13 +40,6 @@ fsip_fs_set_data(fsi_t *fsi, void *data) fsi->f_data = data; } -void -fsip_fs_free(fsi_t *fsi) -{ - free(fsi->f_data); - free(fsi); -} - fsi_file_t * fsip_file_alloc(fsi_t *fsi, void *data) { @@ -64,7 +57,6 @@ void void fsip_file_free(fsi_file_t *ffi) { - free(ffi->ff_data); free(ffi); } diff -r 3464bb656a9c -r 8475a4e0425e tools/libfsimage/common/fsimage_plugin.h --- a/tools/libfsimage/common/fsimage_plugin.h Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/libfsimage/common/fsimage_plugin.h Thu Jan 18 15:18:07 2007 +0000 @@ -50,11 +50,10 @@ typedef fsi_plugin_ops_t * (*fsi_plugin_init_t)(int, fsi_plugin_t *, const char **); void fsip_fs_set_data(fsi_t *, void *); -void fsip_fs_free(fsi_t *); fsi_file_t *fsip_file_alloc(fsi_t *, void *); void fsip_file_free(fsi_file_t *); -fsi_t * fsip_fs(fsi_file_t *ffi); -uint64_t fsip_fs_offset(fsi_t *fsi); +fsi_t *fsip_fs(fsi_file_t *); +uint64_t fsip_fs_offset(fsi_t *); void *fsip_fs_data(fsi_t *); void *fsip_file_data(fsi_file_t *); diff -r 3464bb656a9c -r 8475a4e0425e tools/libfsimage/common/mapfile-GNU --- a/tools/libfsimage/common/mapfile-GNU Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/libfsimage/common/mapfile-GNU Thu Jan 18 15:18:07 2007 +0000 @@ -1,5 +1,5 @@ VERSION { VERSION { - libfsimage.so.1.1 { + libfsimage.so.1.0 { global: fsi_open_fsimage; fsi_close_fsimage; @@ -10,7 +10,6 @@ VERSION { fsi_pread_file; fsip_fs_set_data; - fsip_fs_free; fsip_file_alloc; fsip_file_free; fsip_fs; diff -r 3464bb656a9c -r 8475a4e0425e tools/libfsimage/common/mapfile-SunOS --- a/tools/libfsimage/common/mapfile-SunOS Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/libfsimage/common/mapfile-SunOS Thu Jan 18 15:18:07 2007 +0000 @@ -1,4 +1,4 @@ libfsimage.so.1.1 { -libfsimage.so.1.1 { +libfsimage.so.1.0 { global: fsi_open_fsimage; fsi_close_fsimage; @@ -9,7 +9,6 @@ libfsimage.so.1.1 { fsi_pread_file; fsip_fs_set_data; - fsip_fs_free; fsip_file_alloc; fsip_file_free; fsip_fs; diff -r 3464bb656a9c -r 8475a4e0425e tools/libfsimage/ext2fs-lib/ext2fs-lib.c --- a/tools/libfsimage/ext2fs-lib/ext2fs-lib.c Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/libfsimage/ext2fs-lib/ext2fs-lib.c Thu Jan 18 15:18:07 2007 +0000 @@ -58,9 +58,11 @@ ext2lib_umount(fsi_t *fsi) { ext2_filsys *fs = fsip_fs_data(fsi); if (ext2fs_close(*fs) != 0) { + free(fs); errno = EINVAL; return (-1); } + free(fs); return (0); } diff -r 3464bb656a9c -r 8475a4e0425e tools/libxc/xc_hvm_build.c --- a/tools/libxc/xc_hvm_build.c Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/libxc/xc_hvm_build.c Thu Jan 18 15:18:07 2007 +0000 @@ -233,8 +233,7 @@ static int setup_guest(int xc_handle, SCRATCH_PFN)) == NULL) ) goto error_out; memset(shared_info, 0, PAGE_SIZE); - for ( i = 0; i < MAX_VIRT_CPUS; i++ ) - shared_info->vcpu_info[i].evtchn_upcall_mask = 1; + /* NB. evtchn_upcall_mask is unused: leave as zero. */ memset(&shared_info->evtchn_mask[0], 0xff, sizeof(shared_info->evtchn_mask)); munmap(shared_info, PAGE_SIZE); diff -r 3464bb656a9c -r 8475a4e0425e tools/libxc/xc_linux_build.c --- a/tools/libxc/xc_linux_build.c Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/libxc/xc_linux_build.c Thu Jan 18 15:18:07 2007 +0000 @@ -741,7 +741,7 @@ static int setup_guest(int xc_handle, /* * Enable shadow translate mode. This must happen after * populate physmap because the p2m reservation is based on - * the domains current memory allocation. + * the domain's current memory allocation. */ if ( xc_shadow_control(xc_handle, dom, XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE, diff -r 3464bb656a9c -r 8475a4e0425e tools/libxc/xc_linux_restore.c --- a/tools/libxc/xc_linux_restore.c Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/libxc/xc_linux_restore.c Thu Jan 18 15:18:07 2007 +0000 @@ -12,7 +12,7 @@ #include "xg_private.h" #include "xg_save_restore.h" -/* max mfn of the whole machine */ +/* max mfn of the current host machine */ static unsigned long max_mfn; /* virtual starting address of the hypervisor */ @@ -29,6 +29,9 @@ static xen_pfn_t *live_p2m = NULL; /* A table mapping each PFN to its new MFN. */ static xen_pfn_t *p2m = NULL; + +/* A table of P2M mappings in the current region */ +static xen_pfn_t *p2m_batch = NULL; static ssize_t @@ -57,46 +60,78 @@ read_exact(int fd, void *buf, size_t cou ** This function inverts that operation, replacing the pfn values with ** the (now known) appropriate mfn values. */ -static int uncanonicalize_pagetable(unsigned long type, void *page) +static int uncanonicalize_pagetable(int xc_handle, uint32_t dom, + unsigned long type, void *page) { int i, pte_last; unsigned long pfn; uint64_t pte; + int nr_mfns = 0; pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8); - /* Now iterate through the page table, uncanonicalizing each PTE */ + /* First pass: work out how many (if any) MFNs we need to alloc */ for(i = 0; i < pte_last; i++) { - + if(pt_levels == 2) pte = ((uint32_t *)page)[i]; else pte = ((uint64_t *)page)[i]; - - if(pte & _PAGE_PRESENT) { - - pfn = (pte >> PAGE_SHIFT) & 0xffffffff; - - if(pfn >= max_pfn) { - /* This "page table page" is probably not one; bail. */ - ERROR("Frame number in type %lu page table is out of range: " - "i=%d pfn=0x%lx max_pfn=%lu", - type >> 28, i, pfn, max_pfn); - return 0; - } - - - pte &= 0xffffff0000000fffULL; - pte |= (uint64_t)p2m[pfn] << PAGE_SHIFT; - - if(pt_levels == 2) - ((uint32_t *)page)[i] = (uint32_t)pte; - else - ((uint64_t *)page)[i] = (uint64_t)pte; - - - - } + + /* XXX SMH: below needs fixing for PROT_NONE etc */ + if(!(pte & _PAGE_PRESENT)) + continue; + + pfn = (pte >> PAGE_SHIFT) & 0xffffffff; + + if(pfn >= max_pfn) { + /* This "page table page" is probably not one; bail. */ + ERROR("Frame number in type %lu page table is out of range: " + "i=%d pfn=0x%lx max_pfn=%lu", + type >> 28, i, pfn, max_pfn); + return 0; + } + + if(p2m[pfn] == INVALID_P2M_ENTRY) { + /* Have a 'valid' PFN without a matching MFN - need to alloc */ + p2m_batch[nr_mfns++] = pfn; + } + } + + + /* Alllocate the requistite number of mfns */ + if (nr_mfns && xc_domain_memory_populate_physmap( + xc_handle, dom, nr_mfns, 0, 0, p2m_batch) != 0) { + ERROR("Failed to allocate memory for batch.!\n"); + errno = ENOMEM; + return 0; + } + + /* Second pass: uncanonicalize each present PTE */ + nr_mfns = 0; + for(i = 0; i < pte_last; i++) { + + if(pt_levels == 2) + pte = ((uint32_t *)page)[i]; + else + pte = ((uint64_t *)page)[i]; + + /* XXX SMH: below needs fixing for PROT_NONE etc */ + if(!(pte & _PAGE_PRESENT)) + continue; + + pfn = (pte >> PAGE_SHIFT) & 0xffffffff; + + if(p2m[pfn] == INVALID_P2M_ENTRY) + p2m[pfn] = p2m_batch[nr_mfns++]; + + pte &= 0xffffff0000000fffULL; + pte |= (uint64_t)p2m[pfn] << PAGE_SHIFT; + + if(pt_levels == 2) + ((uint32_t *)page)[i] = (uint32_t)pte; + else + ((uint64_t *)page)[i] = (uint64_t)pte; } return 1; @@ -140,6 +175,7 @@ int xc_linux_restore(int xc_handle, int /* A temporary mapping of the guest's start_info page. */ start_info_t *start_info; + /* Our mapping of the current region (batch) */ char *region_base; xc_mmu_t *mmu = NULL; @@ -244,8 +280,10 @@ int xc_linux_restore(int xc_handle, int p2m = calloc(max_pfn, sizeof(xen_pfn_t)); pfn_type = calloc(max_pfn, sizeof(unsigned long)); region_mfn = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t)); - - if ((p2m == NULL) || (pfn_type == NULL) || (region_mfn == NULL)) { + p2m_batch = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t)); + + if ((p2m == NULL) || (pfn_type == NULL) || + (region_mfn == NULL) || (p2m_batch == NULL)) { ERROR("memory alloc failed"); errno = ENOMEM; goto out; @@ -253,6 +291,11 @@ int xc_linux_restore(int xc_handle, int if (lock_pages(region_mfn, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) { ERROR("Could not lock region_mfn"); + goto out; + } + + if (lock_pages(p2m_batch, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) { + ERROR("Could not lock p2m_batch"); goto out; } @@ -270,17 +313,9 @@ int xc_linux_restore(int xc_handle, int goto out; } + /* Mark all PFNs as invalid; we allocate on demand */ for ( pfn = 0; pfn < max_pfn; pfn++ ) - p2m[pfn] = pfn; - - if (xc_domain_memory_populate_physmap(xc_handle, dom, max_pfn, - 0, 0, p2m) != 0) { - ERROR("Failed to increase reservation by %lx KB", PFN_TO_KB(max_pfn)); - errno = ENOMEM; - goto out; - } - - DPRINTF("Increased domain reservation by %lx KB\n", PFN_TO_KB(max_pfn)); + p2m[pfn] = INVALID_P2M_ENTRY; if(!(mmu = xc_init_mmu_updates(xc_handle, dom))) { ERROR("Could not initialise for MMU updates"); @@ -298,7 +333,7 @@ int xc_linux_restore(int xc_handle, int n = 0; while (1) { - int j; + int j, nr_mfns = 0; this_pc = (n * 100) / max_pfn; if ( (this_pc - prev_pc) >= 5 ) @@ -333,20 +368,57 @@ int xc_linux_restore(int xc_handle, int goto out; } + /* First pass for this batch: work out how much memory to alloc */ + nr_mfns = 0; for ( i = 0; i < j; i++ ) { unsigned long pfn, pagetype; pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK; + if ( (pagetype != XEN_DOMCTL_PFINFO_XTAB) && + (p2m[pfn] == INVALID_P2M_ENTRY) ) + { + /* Have a live PFN which hasn't had an MFN allocated */ + p2m_batch[nr_mfns++] = pfn; + } + } + + + /* Now allocate a bunch of mfns for this batch */ + if (nr_mfns && xc_domain_memory_populate_physmap( + xc_handle, dom, nr_mfns, 0, 0, p2m_batch) != 0) { + ERROR("Failed to allocate memory for batch.!\n"); + errno = ENOMEM; + goto out; + } + + /* Second pass for this batch: update p2m[] and region_mfn[] */ + nr_mfns = 0; + for ( i = 0; i < j; i++ ) + { + unsigned long pfn, pagetype; + pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; + pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK; + if ( pagetype == XEN_DOMCTL_PFINFO_XTAB) - region_mfn[i] = 0; /* we know map will fail, but don't care */ - else - region_mfn[i] = p2m[pfn]; - } - + region_mfn[i] = ~0UL; /* map will fail but we don't care */ + else + { + if (p2m[pfn] == INVALID_P2M_ENTRY) { + /* We just allocated a new mfn above; update p2m */ + p2m[pfn] = p2m_batch[nr_mfns++]; + } + + /* setup region_mfn[] for batch map */ + region_mfn[i] = p2m[pfn]; + } + } + + /* Map relevant mfns */ region_base = xc_map_foreign_batch( xc_handle, dom, PROT_WRITE, region_mfn, j); + if ( region_base == NULL ) { ERROR("map batch failed"); @@ -401,7 +473,8 @@ int xc_linux_restore(int xc_handle, int pae_extended_cr3 || (pagetype != XEN_DOMCTL_PFINFO_L1TAB)) { - if (!uncanonicalize_pagetable(pagetype, page)) { + if (!uncanonicalize_pagetable(xc_handle, dom, + pagetype, page)) { /* ** Failing to uncanonicalize a page table can be ok ** under live migration since the pages type may have @@ -411,10 +484,8 @@ int xc_linux_restore(int xc_handle, int pagetype >> 28, pfn, mfn); nraces++; continue; - } - - } - + } + } } else if ( pagetype != XEN_DOMCTL_PFINFO_NOTAB ) { @@ -486,7 +557,7 @@ int xc_linux_restore(int xc_handle, int */ int j, k; - + /* First pass: find all L3TABs current in > 4G mfns and get new mfns */ for ( i = 0; i < max_pfn; i++ ) { @@ -555,7 +626,8 @@ int xc_linux_restore(int xc_handle, int } for(k = 0; k < j; k++) { - if(!uncanonicalize_pagetable(XEN_DOMCTL_PFINFO_L1TAB, + if(!uncanonicalize_pagetable(xc_handle, dom, + XEN_DOMCTL_PFINFO_L1TAB, region_base + k*PAGE_SIZE)) { ERROR("failed uncanonicalize pt!"); goto out; @@ -631,7 +703,7 @@ int xc_linux_restore(int xc_handle, int { unsigned int count; unsigned long *pfntab; - int rc; + int nr_frees, rc; if (!read_exact(io_fd, &count, sizeof(count))) { ERROR("Error when reading pfn count"); @@ -648,29 +720,30 @@ int xc_linux_restore(int xc_handle, int goto out; } + nr_frees = 0; for (i = 0; i < count; i++) { unsigned long pfn = pfntab[i]; - if(pfn > max_pfn) - /* shouldn't happen - continue optimistically */ - continue; - - pfntab[i] = p2m[pfn]; - p2m[pfn] = INVALID_P2M_ENTRY; // not in pseudo-physical map - } - - if (count > 0) { + if(p2m[pfn] != INVALID_P2M_ENTRY) { + /* pfn is not in physmap now, but was at some point during + the save/migration process - need to free it */ + pfntab[nr_frees++] = p2m[pfn]; + p2m[pfn] = INVALID_P2M_ENTRY; // not in pseudo-physical map + } + } + + if (nr_frees > 0) { struct xen_memory_reservation reservation = { - .nr_extents = count, + .nr_extents = nr_frees, .extent_order = 0, .domid = dom }; set_xen_guest_handle(reservation.extent_start, pfntab); if ((rc = xc_memory_op(xc_handle, XENMEM_decrease_reservation, - &reservation)) != count) { + &reservation)) != nr_frees) { ERROR("Could not decrease reservation : %d", rc); goto out; } else @@ -791,6 +864,6 @@ int xc_linux_restore(int xc_handle, int free(pfn_type); DPRINTF("Restore exit with rc=%d\n", rc); - + return rc; } diff -r 3464bb656a9c -r 8475a4e0425e tools/libxc/xc_linux_save.c --- a/tools/libxc/xc_linux_save.c Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/libxc/xc_linux_save.c Thu Jan 18 15:18:07 2007 +0000 @@ -660,13 +660,6 @@ int xc_linux_save(int xc_handle, int io_ goto out; } - /* cheesy sanity check */ - if ((info.max_memkb >> (PAGE_SHIFT - 10)) > max_mfn) { - ERROR("Invalid state record -- pfn count out of range: %lu", - (info.max_memkb >> (PAGE_SHIFT - 10))); - goto out; - } - /* Map the shared info frame */ if(!(live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ, shared_info_frame))) { diff -r 3464bb656a9c -r 8475a4e0425e tools/libxc/xc_load_elf.c --- a/tools/libxc/xc_load_elf.c Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/libxc/xc_load_elf.c Thu Jan 18 15:18:07 2007 +0000 @@ -406,17 +406,19 @@ static int parseelfimage(const char *ima } /* - * A "bimodal" ELF note indicates the kernel will adjust to the - * current paging mode, including handling extended cr3 syntax. - * If we have ELF notes then PAE=yes implies that we must support - * the extended cr3 syntax. Otherwise we need to find the - * [extended-cr3] syntax in the __xen_guest string. + * A "bimodal" ELF note indicates the kernel will adjust to the current + * paging mode, including handling extended cr3 syntax. If we have ELF + * notes then PAE=yes implies that we must support the extended cr3 syntax. + * Otherwise we need to find the [extended-cr3] syntax in the __xen_guest + * string. We use strstr() to look for "bimodal" to allow guests to use + * "yes,bimodal" or "no,bimodal" for compatibility reasons. */ + dsi->pae_kernel = PAEKERN_no; if ( dsi->__elfnote_section ) { p = xen_elfnote_string(dsi, XEN_ELFNOTE_PAE_MODE); - if ( p != NULL && strncmp(p, "bimodal", 7) == 0 ) + if ( p != NULL && strstr(p, "bimodal") != NULL ) dsi->pae_kernel = PAEKERN_bimodal; else if ( p != NULL && strncmp(p, "yes", 3) == 0 ) dsi->pae_kernel = PAEKERN_extended_cr3; diff -r 3464bb656a9c -r 8475a4e0425e tools/libxc/xc_ptrace.c --- a/tools/libxc/xc_ptrace.c Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/libxc/xc_ptrace.c Thu Jan 18 15:18:07 2007 +0000 @@ -166,14 +166,11 @@ static unsigned long *page_arr * tables. * */ -static unsigned long -to_ma(int cpu, - unsigned long in_addr) -{ - unsigned long maddr = in_addr; - +static uint64_t +to_ma(int cpu, uint64_t maddr) +{ if ( current_is_hvm && paging_enabled(&ctxt[cpu]) ) - maddr = page_array[maddr >> PAGE_SHIFT] << PAGE_SHIFT; + maddr = (uint64_t)page_array[maddr >> PAGE_SHIFT] << PAGE_SHIFT; return maddr; } @@ -225,7 +222,8 @@ map_domain_va_pae( void *guest_va, int perm) { - unsigned long l3e, l2e, l1e, l2p, l1p, p, va = (unsigned long)guest_va; + uint64_t l3e, l2e, l1e, l2p, l1p, p; + unsigned long va = (unsigned long)guest_va; uint64_t *l3, *l2, *l1; static void *v[MAX_VIRT_CPUS]; @@ -380,12 +378,12 @@ map_domain_va( if (!paging_enabled(&ctxt[cpu])) { static void * v; - unsigned long page; + uint64_t page; if ( v != NULL ) munmap(v, PAGE_SIZE); - page = to_ma(cpu, page_array[va >> PAGE_SHIFT]); + page = to_ma(cpu, va); v = xc_map_foreign_range( xc_handle, current_domid, PAGE_SIZE, perm, page >> PAGE_SHIFT); diff -r 3464bb656a9c -r 8475a4e0425e tools/pygrub/src/pygrub --- a/tools/pygrub/src/pygrub Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/pygrub/src/pygrub Thu Jan 18 15:18:07 2007 +0000 @@ -13,7 +13,7 @@ # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # -import os, sys, string, struct, tempfile +import os, sys, string, struct, tempfile, re import copy import logging @@ -48,8 +48,7 @@ def is_disk_image(file): return True return False -SECTOR_SIZE=512 -def get_active_offset(file): +def get_active_partition(file): """Find the offset for the start of the first active partition " "in the disk image file.""" @@ -58,13 +57,56 @@ def get_active_offset(file): for poff in (446, 462, 478, 494): # partition offsets # active partition has 0x80 as the first byte if struct.unpack("<c", buf[poff:poff+1]) == ('\x80',): - return struct.unpack("<L", - buf[poff+8:poff+12])[0] * SECTOR_SIZE + return buf[poff:poff+16] # if there's not a partition marked as active, fall back to # the first partition - P1 = 446 - return struct.unpack("<L", buf[P1+8:P1+12])[0] * SECTOR_SIZE + return buf[446:446+16] + +SECTOR_SIZE=512 +DK_LABEL_LOC=1 +DKL_MAGIC=0xdabe +V_ROOT=0x2 + +def get_solaris_slice(file, offset): + """Find the root slice in a Solaris VTOC.""" + + fd = os.open(file, os.O_RDONLY) + os.lseek(fd, offset + (DK_LABEL_LOC * SECTOR_SIZE), 0) + buf = os.read(fd, 512) + if struct.unpack("<H", buf[508:510])[0] != DKL_MAGIC: + raise RuntimeError, "Invalid disklabel magic" + + nslices = struct.unpack("<H", buf[30:32])[0] + + for i in range(nslices): + sliceoff = 72 + 12 * i + slicetag = struct.unpack("<H", buf[sliceoff:sliceoff+2])[0] + slicesect = struct.unpack("<L", buf[sliceoff+4:sliceoff+8])[0] + if slicetag == V_ROOT: + return slicesect * SECTOR_SIZE + + raise RuntimeError, "No root slice found" + +FDISK_PART_SOLARIS=0xbf +FDISK_PART_SOLARIS_OLD=0x82 + +def get_fs_offset(file): + if not is_disk_image(file): + return 0 + + partbuf = get_active_partition(file) + if len(partbuf) == 0: + raise RuntimeError, "Unable to find active partition on disk" + + offset = struct.unpack("<L", partbuf[8:12])[0] * SECTOR_SIZE + + type = struct.unpack("<B", partbuf[4:5])[0] + + if type == FDISK_PART_SOLARIS or type == FDISK_PART_SOLARIS_OLD: + offset += get_solaris_slice(file, offset) + + return offset class GrubLineEditor(curses.textpad.Textbox): def __init__(self, screen, startx, starty, line = ""): @@ -143,12 +185,12 @@ class GrubLineEditor(curses.textpad.Text class Grub: - def __init__(self, file, isconfig = False): + def __init__(self, file, fs = None): self.screen = None self.entry_win = None self.text_win = None if file: - self.read_config(file, isconfig) + self.read_config(file, fs) def draw_main_windows(self): if self.screen is None: #only init stuff once @@ -295,8 +337,8 @@ class Grub: # else, we cancelled and should just go back break - def read_config(self, fn, isConfig = False): - """Read the given file to parse the config. If isconfig, then + def read_config(self, fn, fs = None): + """Read the given file to parse the config. If fs = None, then we're being given a raw config file rather than a disk image.""" if not os.access(fn, os.R_OK): @@ -304,38 +346,25 @@ class Grub: self.cf = grub.GrubConf.GrubConfigFile() - if isConfig: + if not fs: # set the config file and parse it self.cf.filename = fn self.cf.parse() return - offset = 0 - if is_disk_image(fn): - offset = get_active_offset(fn) - if offset == -1: - raise RuntimeError, "Unable to find active partition on disk" - - # open the image and read the grub config - fs = fsimage.open(fn, offset) - - if fs is not None: - grubfile = None - for f in ("/boot/grub/menu.lst", "/boot/grub/grub.conf", - "/grub/menu.lst", "/grub/grub.conf"): - if fs.file_exists(f): - grubfile = f - break - if grubfile is None: - raise RuntimeError, "we couldn't find grub config file in the image provided." - f = fs.open_file(grubfile) - buf = f.read() - del f - del fs - # then parse the grub config - self.cf.parse(buf) - else: - raise RuntimeError, "Unable to read filesystem" + grubfile = None + for f in ("/boot/grub/menu.lst", "/boot/grub/grub.conf", + "/grub/menu.lst", "/grub/grub.conf"): + if fs.file_exists(f): + grubfile = f + break + if grubfile is None: + raise RuntimeError, "we couldn't find grub config file in the image provided." + f = fs.open_file(grubfile) + buf = f.read() + del f + # then parse the grub config + self.cf.parse(buf) def run(self): timeout = int(self.cf.timeout) @@ -376,6 +405,9 @@ class Grub: c = self.screen.getch() if mytime != -1: mytime += 1 + if mytime >= int(timeout): + self.isdone = True + break # handle keypresses if c == ord('c'): @@ -431,19 +463,93 @@ def get_entry_idx(cf, entry): return None +def run_grub(file, entry, fs): + global g + global sel + + def run_main(scr, *args): + global sel + global g + sel = g.run() + + g = Grub(file, fs) + if interactive: + curses.wrapper(run_main) + else: + sel = g.cf.default + + # set the entry to boot as requested + if entry is not None: + idx = get_entry_idx(g.cf, entry) + if idx is not None and idx > 0 and idx < len(g.cf.images): + sel = idx + + if sel == -1: + print "No kernel image selected!" + sys.exit(1) + + img = g.cf.images[sel] + + grubcfg = { "kernel": None, "ramdisk": None, "args": None } + + grubcfg["kernel"] = img.kernel[1] + if img.initrd: + grubcfg["ramdisk"] = img.initrd[1] + if img.args: + grubcfg["args"] = img.args + + return grubcfg + +# If nothing has been specified, look for a Solaris domU. If found, perform the +# necessary tweaks. +def sniff_solaris(fs, cfg): + if not fs.file_exists("/platform/i86xen/kernel/unix"): + return cfg + + # darned python + longmode = (sys.maxint != 2147483647L) + if not longmode: + longmode = os.uname()[4] == "x86_64" + if not longmode: + if (os.access("/usr/bin/isainfo", os.R_OK) and + os.popen("/usr/bin/isainfo -b").read() == "64\n"): + longmode = True + + if not cfg["kernel"]: + cfg["kernel"] = "/platform/i86xen/kernel/unix" + cfg["ramdisk"] = "/platform/i86pc/boot_archive" + if longmode: + cfg["kernel"] = "/platform/i86xen/kernel/amd64/unix" + cfg["ramdisk"] = "/platform/i86pc/amd64/boot_archive" + + # Unpleasant. Typically we'll have 'root=foo -k' or 'root=foo /kernel -k', + # and we need to maintain Xen properties (root= and ip=) and the kernel + # before any user args. + + xenargs = "" + userargs = "" + + if not cfg["args"]: + cfg["args"] = cfg["kernel"] + else: + for arg in cfg["args"].split(): + if re.match("^root=", arg) or re.match("^ip=", arg): + xenargs += arg + " " + elif arg != cfg["kernel"]: + userargs += arg + " " + cfg["args"] = xenargs + " " + cfg["kernel"] + " " + userargs + + return cfg + if __name__ == "__main__": sel = None - def run_main(scr, *args): - global sel - sel = g.run() - def usage(): - print >> sys.stderr, "Usage: %s [-q|--quiet] [--output=] [--entry=] <image>" %(sys.argv[0],) + print >> sys.stderr, "Usage: %s [-q|--quiet] [--output=] [--kernel=] [--ramdisk=] [--args=] [--entry=] <image>" %(sys.argv[0],) try: opts, args = getopt.gnu_getopt(sys.argv[1:], 'qh::', - ["quiet", "help", "output=", "entry=", + ["quiet", "help", "output=", "entry=", "kernel=", "ramdisk=", "args=", "isconfig"]) except getopt.GetoptError: usage() @@ -458,6 +564,14 @@ if __name__ == "__main__": entry = None interactive = True isconfig = False + + # what was passed in + incfg = { "kernel": None, "ramdisk": None, "args": None } + # what grub or sniffing chose + chosencfg = { "kernel": None, "ramdisk": None, "args": None } + # what to boot + bootcfg = { "kernel": None, "ramdisk": None, "args": None } + for o, a in opts: if o in ("-q", "--quiet"): interactive = False @@ -466,6 +580,12 @@ if __name__ == "__main__": sys.exit() elif o in ("--output",): output = a + elif o in ("--kernel",): + incfg["kernel"] = a + elif o in ("--ramdisk",): + incfg["ramdisk"] = a + elif o in ("--args",): + incfg["args"] = a elif o in ("--entry",): entry = a # specifying the entry to boot implies non-interactive @@ -478,58 +598,42 @@ if __name__ == "__main__": else: fd = os.open(output, os.O_WRONLY) - g = Grub(file, isconfig) - if interactive: - curses.wrapper(run_main) - else: - sel = g.cf.default - - # set the entry to boot as requested - if entry is not None: - idx = get_entry_idx(g.cf, entry) - if idx is not None and idx > 0 and idx < len(g.cf.images): - sel = idx - - if sel == -1: - print "No kernel image selected!" - sys.exit(1) - - img = g.cf.images[sel] - print "Going to boot %s" %(img.title) - print " kernel: %s" %(img.kernel[1],) - if img.initrd: - print " initrd: %s" %(img.initrd[1],) - + # debug if isconfig: - print " args: %s" %(img.args,) + chosencfg = run_grub(file, entry) + print " kernel: %s" % chosencfg["kernel"] + if img.initrd: + print " initrd: %s" % chosencfg["ramdisk"] + print " args: %s" % chosencfg["args"] sys.exit(0) - - offset = 0 - if is_disk_image(file): - offset = get_active_offset(file) - if offset == -1: - raise RuntimeError, "Unable to find active partition on disk" - - # read the kernel and initrd onto the hostfs - fs = fsimage.open(file, offset) - - kernel = fs.open_file(img.kernel[1],).read() - (tfd, fn) = tempfile.mkstemp(prefix="boot_kernel.", + + fs = fsimage.open(file, get_fs_offset(file)) + + chosencfg = sniff_solaris(fs, incfg) + + if not chosencfg["kernel"]: + chosencfg = run_grub(file, entry, fs) + + data = fs.open_file(chosencfg["kernel"]).read() + (tfd, bootcfg["kernel"]) = tempfile.mkstemp(prefix="boot_kernel.", dir="/var/run/xend/boot") - os.write(tfd, kernel) + os.write(tfd, data) os.close(tfd) - sxp = "linux (kernel %s)" %(fn,) - - if img.initrd: - initrd = fs.open_file(img.initrd[1],).read() - (tfd, fn) = tempfile.mkstemp(prefix="boot_ramdisk.", + + if chosencfg["ramdisk"]: + data = fs.open_file(chosencfg["ramdisk"],).read() + (tfd, bootcfg["ramdisk"]) = tempfile.mkstemp(prefix="boot_ramdisk.", dir="/var/run/xend/boot") - os.write(tfd, initrd) + os.write(tfd, data) os.close(tfd) - sxp += "(ramdisk %s)" %(fn,) else: initrd = None - sxp += "(args '%s')" %(img.args,) + + sxp = "linux (kernel %s)" % bootcfg["kernel"] + if bootcfg["ramdisk"]: + sxp += "(ramdisk %s)" % bootcfg["ramdisk"] + if chosencfg["args"]: + sxp += "(args \"%s\")" % chosencfg["args"] sys.stdout.flush() os.write(fd, sxp) diff -r 3464bb656a9c -r 8475a4e0425e tools/python/xen/xend/XendBootloader.py --- a/tools/python/xen/xend/XendBootloader.py Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/python/xen/xend/XendBootloader.py Thu Jan 18 15:18:07 2007 +0000 @@ -53,6 +53,12 @@ def bootloader(blexec, disk, quiet = Fal child = os.fork() if (not child): args = [ blexec ] + if kernel: + args.append("--kernel=%s" % kernel) + if ramdisk: + args.append("--ramdisk=%s" % ramdisk) + if kernel_args: + args.append("--args=%s" % kernel_args) if quiet: args.append("-q") args.append("--output=%s" % fifo) diff -r 3464bb656a9c -r 8475a4e0425e tools/python/xen/xend/XendCheckpoint.py --- a/tools/python/xen/xend/XendCheckpoint.py Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/python/xen/xend/XendCheckpoint.py Thu Jan 18 15:18:07 2007 +0000 @@ -147,18 +147,20 @@ def restore(xd, fd, dominfo = None, paus assert store_port assert console_port + nr_pfns = (dominfo.getMemoryTarget() + 3) / 4 + try: l = read_exact(fd, sizeof_unsigned_long, "not a valid guest state file: pfn count read") - nr_pfns = unpack("L", l)[0] # native sizeof long - if nr_pfns > 16*1024*1024: # XXX + max_pfn = unpack("L", l)[0] # native sizeof long + if max_pfn > 16*1024*1024: # XXX raise XendError( "not a valid guest state file: pfn count out of range") balloon.free(xc.pages_to_kib(nr_pfns)) cmd = map(str, [xen.util.auxbin.pathTo(XC_RESTORE), - fd, dominfo.getDomid(), nr_pfns, + fd, dominfo.getDomid(), max_pfn, store_port, console_port]) log.debug("[xc_restore]: %s", string.join(cmd)) diff -r 3464bb656a9c -r 8475a4e0425e tools/python/xen/xend/XendConfig.py --- a/tools/python/xen/xend/XendConfig.py Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/python/xen/xend/XendConfig.py Thu Jan 18 15:18:07 2007 +0000 @@ -126,6 +126,7 @@ XENAPI_CFG_TYPES = { 'memory_dynamic_min': int, 'memory_dynamic_max': int, 'memory_actual': int, + 'cpus': list, 'vcpus_policy': str, 'vcpus_params': str, 'vcpus_number': int, @@ -1020,10 +1021,11 @@ class XendConfig(dict): @return: Returns True if succesfully found and updated a device conf """ if dev_uuid in self['devices']: - config = sxp.child0(cfg_sxp) - dev_type = sxp.name(config) - dev_info = {} - + if sxp.child0(cfg_sxp) == 'device': + config = sxp.child0(cfg_sxp) + else: + config = cfg_sxp + for opt_val in config[1:]: try: opt, val = opt_val diff -r 3464bb656a9c -r 8475a4e0425e tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/python/xen/xend/XendDomain.py Thu Jan 18 15:18:07 2007 +0000 @@ -377,7 +377,7 @@ class XendDomain: dom0.setVCpuCount(target) - def _refresh(self): + def _refresh(self, refresh_shutdown = True): """Refresh the domain list. Needs to be called when either xenstore has changed or when a method requires up to date information (like uptime, cputime stats). @@ -393,7 +393,7 @@ class XendDomain: for dom in running: domid = dom['domid'] if domid in self.domains: - self.domains[domid].update(dom) + self.domains[domid].update(dom, refresh_shutdown) elif domid not in self.domains and dom['dying'] != 1: try: new_dom = XendDomainInfo.recreate(dom, False) @@ -495,7 +495,7 @@ class XendDomain: """ self.domains_lock.acquire() try: - self._refresh() + self._refresh(refresh_shutdown = False) dom = self.domain_lookup_nr(domid) if not dom: raise XendError("No domain named '%s'." % str(domid)) @@ -731,7 +731,7 @@ class XendDomain: self.domains_lock.acquire() try: - self._refresh() + self._refresh(refresh_shutdown = False) # active domains active_domains = self.domains.values() diff -r 3464bb656a9c -r 8475a4e0425e tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/python/xen/xend/XendDomainInfo.py Thu Jan 18 15:18:07 2007 +0000 @@ -37,7 +37,7 @@ from xen.util.blkif import blkdev_uname_ from xen.util.blkif import blkdev_uname_to_file from xen.util import security -from xen.xend import balloon, sxp, uuid, image, arch +from xen.xend import balloon, sxp, uuid, image, arch, osdep from xen.xend import XendRoot, XendNode, XendConfig from xen.xend.XendConfig import scrub_password @@ -496,7 +496,7 @@ class XendDomainInfo: self._waitForDevice(dev_type, devid) return self.getDeviceController(dev_type).sxpr(devid) - def device_configure(self, dev_config, devid = None): + def device_configure(self, dev_sxp, devid = None): """Configure an existing device. @param dev_config: device configuration @@ -506,19 +506,24 @@ class XendDomainInfo: @return: Returns True if successfully updated device @rtype: boolean """ - deviceClass = sxp.name(dev_config) - - # look up uuid of the device - dev_control = self.getDeviceController(deviceClass) - dev_sxpr = dev_control.sxpr(devid) - dev_uuid = sxp.child_value(dev_sxpr, 'uuid') - if not dev_uuid: - return False - - self.info.device_update(dev_uuid, dev_config) - dev_config_dict = self.info['devices'].get(dev_uuid) - if dev_config_dict: - dev_control.reconfigureDevice(devid, dev_config_dict[1]) + + # convert device sxp to a dict + dev_class = sxp.name(dev_sxp) + dev_config = {} + for opt_val in dev_sxp[1:]: + try: + dev_config[opt_val[0]] = opt_val[1] + except IndexError: + pass + + # use DevController.reconfigureDevice to change device config + dev_control = self.getDeviceController(dev_class) + dev_uuid = dev_control.reconfigureDevice(devid, dev_config) + + # update XendConfig with new device info + if dev_uuid: + self.info.device_update(dev_uuid, dev_sxp) + return True def waitForDevices(self): @@ -1575,7 +1580,7 @@ class XendDomainInfo: else: # Boot using bootloader if not blexec or blexec == 'pygrub': - blexec = '/usr/bin/pygrub' + blexec = osdep.pygrub_path blcfg = None for (devtype, devinfo) in self.info.all_devices_sxpr(): diff -r 3464bb656a9c -r 8475a4e0425e tools/python/xen/xend/XendNode.py --- a/tools/python/xen/xend/XendNode.py Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/python/xen/xend/XendNode.py Thu Jan 18 15:18:07 2007 +0000 @@ -365,14 +365,24 @@ class XendNode: return [[k, info[k]] for k in ITEM_ORDER] + def xenschedinfo(self): + sched_id = self.xc.sched_id_get() + if sched_id == xen.lowlevel.xc.XEN_SCHEDULER_SEDF: + return 'sedf' + elif sched_id == xen.lowlevel.xc.XEN_SCHEDULER_CREDIT: + return 'credit' + else: + return 'unknown' def xeninfo(self): info = self.xc.xeninfo() + info['xen_scheduler'] = self.xenschedinfo() ITEM_ORDER = ['xen_major', 'xen_minor', 'xen_extra', 'xen_caps', + 'xen_scheduler', 'xen_pagesize', 'platform_params', 'xen_changeset', diff -r 3464bb656a9c -r 8475a4e0425e tools/python/xen/xend/osdep.py --- a/tools/python/xen/xend/osdep.py Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/python/xen/xend/osdep.py Thu Jan 18 15:18:07 2007 +0000 @@ -29,8 +29,13 @@ _xend_autorestart = { "SunOS": False, } +_pygrub_path = { + "SunOS": "/usr/lib/xen/bin/pygrub" +} + def _get(var, default=None): return var.get(os.uname()[0], default) scripts_dir = _get(_scripts_dir, "/etc/xen/scripts") xend_autorestart = _get(_xend_autorestart) +pygrub_path = _get(_pygrub_path, "/usr/bin/pygrub") diff -r 3464bb656a9c -r 8475a4e0425e tools/python/xen/xend/server/blkif.py --- a/tools/python/xen/xend/server/blkif.py Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/python/xen/xend/server/blkif.py Thu Jan 18 15:18:07 2007 +0000 @@ -101,6 +101,7 @@ class BlkifController(DevController): self.writeBackend(devid, 'type', new_back['type'], 'params', new_back['params']) + return new_back.get('uuid') else: raise VmError('Refusing to reconfigure device %s:%d to %s' % (self.deviceClass, devid, config)) diff -r 3464bb656a9c -r 8475a4e0425e tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/python/xen/xm/create.py Thu Jan 18 15:18:07 2007 +0000 @@ -28,6 +28,7 @@ import xmlrpclib from xen.xend import sxp from xen.xend import PrettyPrint +from xen.xend import osdep import xen.xend.XendClient from xen.xend.XendBootloader import bootloader from xen.util import blkif @@ -291,7 +292,8 @@ gopts.var('vfb', val="type={vnc,sdl},vnc For type=vnc, connect an external vncviewer. The server will listen on ADDR (default 127.0.0.1) on port N+5900. N defaults to the domain id. If vncunused=1, the server will try to find an arbitrary - unused port above 5900. + unused port above 5900. vncpasswd overrides the XenD configured + default password. For type=sdl, a viewer will be started automatically using the given DISPLAY and XAUTHORITY, which default to the current user's ones.""") @@ -718,8 +720,11 @@ def run_bootloader(vals, config_image): "--entry= directly.") vals.bootargs = "--entry=%s" %(vals.bootentry,) + kernel = sxp.child_value(config_image, 'kernel') + ramdisk = sxp.child_value(config_image, 'ramdisk') + args = sxp.child_value(config_image, 'args') return bootloader(vals.bootloader, file, not vals.console_autoconnect, - vals.bootargs, config_image) + vals.bootargs, kernel, ramdisk, args) def make_config(vals): """Create the domain configuration. @@ -759,7 +764,14 @@ def make_config(vals): config_image = configure_image(vals) if vals.bootloader: - config_image = run_bootloader(vals, config_image) + if vals.bootloader == "pygrub": + vals.bootloader = osdep.pygrub_path + + # if a kernel is specified, we're using the bootloader + # non-interactively, and need to let xend run it so we preserve the + # real kernel choice. + if not vals.kernel: + config_image = run_bootloader(vals, config_image) config.append(['bootloader', vals.bootloader]) if vals.bootargs: config.append(['bootloader_args', vals.bootargs]) @@ -990,8 +1002,6 @@ def preprocess_vnc(vals): vals.extra = vnc + ' ' + vals.extra def preprocess(vals): - if not vals.kernel and not vals.bootloader: - err("No kernel specified") preprocess_disk(vals) preprocess_pci(vals) preprocess_ioports(vals) diff -r 3464bb656a9c -r 8475a4e0425e tools/tests/Makefile --- a/tools/tests/Makefile Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/tests/Makefile Thu Jan 18 15:18:07 2007 +0000 @@ -7,12 +7,21 @@ TARGET := test_x86_emulator .PHONY: all all: $(TARGET) +.PHONY: blowfish.bin +blowfish.bin: + make -f blowfish.mk all + +blowfish.h: blowfish.bin + (echo "static unsigned int blowfish_code[] = {"; \ + od -v -t x $< | sed 's/^[0-9]* /0x/' | sed 's/ /, 0x/g' | sed 's/$$/,/';\ + echo "};") >$@ + $(TARGET): x86_emulate.o test_x86_emulator.o $(HOSTCC) -o $@ $^ .PHONY: clean clean: - rm -rf $(TARGET) *.o *~ core + rm -rf $(TARGET) *.o *~ core blowfish.h blowfish.bin .PHONY: install install: @@ -20,5 +29,5 @@ x86_emulate.o: $(XEN_ROOT)/xen/arch/x86/ x86_emulate.o: $(XEN_ROOT)/xen/arch/x86/x86_emulate.c $(HOSTCC) $(HOSTCFLAGS) -I$(XEN_ROOT)/xen/include -c -o $@ $< -%.o: %.c +test_x86_emulator.o: test_x86_emulator.c blowfish.h $(HOSTCC) $(HOSTCFLAGS) -I$(XEN_ROOT)/xen/include -c -o $@ $< diff -r 3464bb656a9c -r 8475a4e0425e tools/tests/blowfish.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/tests/blowfish.c Thu Jan 18 15:18:07 2007 +0000 @@ -0,0 +1,439 @@ +/* +blowfish.c: C implementation of the Blowfish algorithm. + +Copyright (C) 1997 by Paul Kocher + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3) +#define __attribute_used__ __attribute__((__used__)) +#else +#define __attribute_used__ __attribute__((__unused__)) +#endif + +static unsigned long long blowfish_test(unsigned long long input) + __attribute_used__; + +asm ( + ".globl _start\n" + "_start:\n" + "push %edx; push %eax; " + "call blowfish_test; " + "addl $8,%esp; " + "ret" + ); + +typedef struct { + unsigned long P[16 + 2]; + unsigned long S[4][256]; +} BLOWFISH_CTX; + +#define N 16 + +static const unsigned long ORIG_P[16 + 2] = { + 0x243F6A88L, 0x85A308D3L, 0x13198A2EL, 0x03707344L, + 0xA4093822L, 0x299F31D0L, 0x082EFA98L, 0xEC4E6C89L, + 0x452821E6L, 0x38D01377L, 0xBE5466CFL, 0x34E90C6CL, + 0xC0AC29B7L, 0xC97C50DDL, 0x3F84D5B5L, 0xB5470917L, + 0x9216D5D9L, 0x8979FB1BL +}; + +static const unsigned long ORIG_S[4][256] = { + { 0xD1310BA6L, 0x98DFB5ACL, 0x2FFD72DBL, 0xD01ADFB7L, + 0xB8E1AFEDL, 0x6A267E96L, 0xBA7C9045L, 0xF12C7F99L, + 0x24A19947L, 0xB3916CF7L, 0x0801F2E2L, 0x858EFC16L, + 0x636920D8L, 0x71574E69L, 0xA458FEA3L, 0xF4933D7EL, + 0x0D95748FL, 0x728EB658L, 0x718BCD58L, 0x82154AEEL, + 0x7B54A41DL, 0xC25A59B5L, 0x9C30D539L, 0x2AF26013L, + 0xC5D1B023L, 0x286085F0L, 0xCA417918L, 0xB8DB38EFL, + 0x8E79DCB0L, 0x603A180EL, 0x6C9E0E8BL, 0xB01E8A3EL, + 0xD71577C1L, 0xBD314B27L, 0x78AF2FDAL, 0x55605C60L, + 0xE65525F3L, 0xAA55AB94L, 0x57489862L, 0x63E81440L, + 0x55CA396AL, 0x2AAB10B6L, 0xB4CC5C34L, 0x1141E8CEL, + 0xA15486AFL, 0x7C72E993L, 0xB3EE1411L, 0x636FBC2AL, + 0x2BA9C55DL, 0x741831F6L, 0xCE5C3E16L, 0x9B87931EL, + 0xAFD6BA33L, 0x6C24CF5CL, 0x7A325381L, 0x28958677L, + 0x3B8F4898L, 0x6B4BB9AFL, 0xC4BFE81BL, 0x66282193L, + 0x61D809CCL, 0xFB21A991L, 0x487CAC60L, 0x5DEC8032L, + 0xEF845D5DL, 0xE98575B1L, 0xDC262302L, 0xEB651B88L, + 0x23893E81L, 0xD396ACC5L, 0x0F6D6FF3L, 0x83F44239L, + 0x2E0B4482L, 0xA4842004L, 0x69C8F04AL, 0x9E1F9B5EL, + 0x21C66842L, 0xF6E96C9AL, 0x670C9C61L, 0xABD388F0L, + 0x6A51A0D2L, 0xD8542F68L, 0x960FA728L, 0xAB5133A3L, + 0x6EEF0B6CL, 0x137A3BE4L, 0xBA3BF050L, 0x7EFB2A98L, + 0xA1F1651DL, 0x39AF0176L, 0x66CA593EL, 0x82430E88L, + 0x8CEE8619L, 0x456F9FB4L, 0x7D84A5C3L, 0x3B8B5EBEL, + 0xE06F75D8L, 0x85C12073L, 0x401A449FL, 0x56C16AA6L, + 0x4ED3AA62L, 0x363F7706L, 0x1BFEDF72L, 0x429B023DL, + 0x37D0D724L, 0xD00A1248L, 0xDB0FEAD3L, 0x49F1C09BL, + 0x075372C9L, 0x80991B7BL, 0x25D479D8L, 0xF6E8DEF7L, + 0xE3FE501AL, 0xB6794C3BL, 0x976CE0BDL, 0x04C006BAL, + 0xC1A94FB6L, 0x409F60C4L, 0x5E5C9EC2L, 0x196A2463L, + 0x68FB6FAFL, 0x3E6C53B5L, 0x1339B2EBL, 0x3B52EC6FL, + 0x6DFC511FL, 0x9B30952CL, 0xCC814544L, 0xAF5EBD09L, + 0xBEE3D004L, 0xDE334AFDL, 0x660F2807L, 0x192E4BB3L, + 0xC0CBA857L, 0x45C8740FL, 0xD20B5F39L, 0xB9D3FBDBL, + 0x5579C0BDL, 0x1A60320AL, 0xD6A100C6L, 0x402C7279L, + 0x679F25FEL, 0xFB1FA3CCL, 0x8EA5E9F8L, 0xDB3222F8L, + 0x3C7516DFL, 0xFD616B15L, 0x2F501EC8L, 0xAD0552ABL, + 0x323DB5FAL, 0xFD238760L, 0x53317B48L, 0x3E00DF82L, + 0x9E5C57BBL, 0xCA6F8CA0L, 0x1A87562EL, 0xDF1769DBL, + 0xD542A8F6L, 0x287EFFC3L, 0xAC6732C6L, 0x8C4F5573L, + 0x695B27B0L, 0xBBCA58C8L, 0xE1FFA35DL, 0xB8F011A0L, + 0x10FA3D98L, 0xFD2183B8L, 0x4AFCB56CL, 0x2DD1D35BL, + 0x9A53E479L, 0xB6F84565L, 0xD28E49BCL, 0x4BFB9790L, + 0xE1DDF2DAL, 0xA4CB7E33L, 0x62FB1341L, 0xCEE4C6E8L, + 0xEF20CADAL, 0x36774C01L, 0xD07E9EFEL, 0x2BF11FB4L, + 0x95DBDA4DL, 0xAE909198L, 0xEAAD8E71L, 0x6B93D5A0L, + 0xD08ED1D0L, 0xAFC725E0L, 0x8E3C5B2FL, 0x8E7594B7L, + 0x8FF6E2FBL, 0xF2122B64L, 0x8888B812L, 0x900DF01CL, + 0x4FAD5EA0L, 0x688FC31CL, 0xD1CFF191L, 0xB3A8C1ADL, + 0x2F2F2218L, 0xBE0E1777L, 0xEA752DFEL, 0x8B021FA1L, + 0xE5A0CC0FL, 0xB56F74E8L, 0x18ACF3D6L, 0xCE89E299L, + 0xB4A84FE0L, 0xFD13E0B7L, 0x7CC43B81L, 0xD2ADA8D9L, + 0x165FA266L, 0x80957705L, 0x93CC7314L, 0x211A1477L, + 0xE6AD2065L, 0x77B5FA86L, 0xC75442F5L, 0xFB9D35CFL, + 0xEBCDAF0CL, 0x7B3E89A0L, 0xD6411BD3L, 0xAE1E7E49L, + 0x00250E2DL, 0x2071B35EL, 0x226800BBL, 0x57B8E0AFL, + 0x2464369BL, 0xF009B91EL, 0x5563911DL, 0x59DFA6AAL, + 0x78C14389L, 0xD95A537FL, 0x207D5BA2L, 0x02E5B9C5L, + 0x83260376L, 0x6295CFA9L, 0x11C81968L, 0x4E734A41L, + 0xB3472DCAL, 0x7B14A94AL, 0x1B510052L, 0x9A532915L, + 0xD60F573FL, 0xBC9BC6E4L, 0x2B60A476L, 0x81E67400L, + 0x08BA6FB5L, 0x571BE91FL, 0xF296EC6BL, 0x2A0DD915L, + 0xB6636521L, 0xE7B9F9B6L, 0xFF34052EL, 0xC5855664L, + 0x53B02D5DL, 0xA99F8FA1L, 0x08BA4799L, 0x6E85076AL }, + { 0x4B7A70E9L, 0xB5B32944L, 0xDB75092EL, 0xC4192623L, + 0xAD6EA6B0L, 0x49A7DF7DL, 0x9CEE60B8L, 0x8FEDB266L, + 0xECAA8C71L, 0x699A17FFL, 0x5664526CL, 0xC2B19EE1L, + 0x193602A5L, 0x75094C29L, 0xA0591340L, 0xE4183A3EL, + 0x3F54989AL, 0x5B429D65L, 0x6B8FE4D6L, 0x99F73FD6L, + 0xA1D29C07L, 0xEFE830F5L, 0x4D2D38E6L, 0xF0255DC1L, + 0x4CDD2086L, 0x8470EB26L, 0x6382E9C6L, 0x021ECC5EL, + 0x09686B3FL, 0x3EBAEFC9L, 0x3C971814L, 0x6B6A70A1L, + 0x687F3584L, 0x52A0E286L, 0xB79C5305L, 0xAA500737L, + 0x3E07841CL, 0x7FDEAE5CL, 0x8E7D44ECL, 0x5716F2B8L, + 0xB03ADA37L, 0xF0500C0DL, 0xF01C1F04L, 0x0200B3FFL, + 0xAE0CF51AL, 0x3CB574B2L, 0x25837A58L, 0xDC0921BDL, + 0xD19113F9L, 0x7CA92FF6L, 0x94324773L, 0x22F54701L, + 0x3AE5E581L, 0x37C2DADCL, 0xC8B57634L, 0x9AF3DDA7L, + 0xA9446146L, 0x0FD0030EL, 0xECC8C73EL, 0xA4751E41L, + 0xE238CD99L, 0x3BEA0E2FL, 0x3280BBA1L, 0x183EB331L, + 0x4E548B38L, 0x4F6DB908L, 0x6F420D03L, 0xF60A04BFL, + 0x2CB81290L, 0x24977C79L, 0x5679B072L, 0xBCAF89AFL, + 0xDE9A771FL, 0xD9930810L, 0xB38BAE12L, 0xDCCF3F2EL, + 0x5512721FL, 0x2E6B7124L, 0x501ADDE6L, 0x9F84CD87L, + 0x7A584718L, 0x7408DA17L, 0xBC9F9ABCL, 0xE94B7D8CL, + 0xEC7AEC3AL, 0xDB851DFAL, 0x63094366L, 0xC464C3D2L, + 0xEF1C1847L, 0x3215D908L, 0xDD433B37L, 0x24C2BA16L, + 0x12A14D43L, 0x2A65C451L, 0x50940002L, 0x133AE4DDL, + 0x71DFF89EL, 0x10314E55L, 0x81AC77D6L, 0x5F11199BL, + 0x043556F1L, 0xD7A3C76BL, 0x3C11183BL, 0x5924A509L, + 0xF28FE6EDL, 0x97F1FBFAL, 0x9EBABF2CL, 0x1E153C6EL, + 0x86E34570L, 0xEAE96FB1L, 0x860E5E0AL, 0x5A3E2AB3L, + 0x771FE71CL, 0x4E3D06FAL, 0x2965DCB9L, 0x99E71D0FL, + 0x803E89D6L, 0x5266C825L, 0x2E4CC978L, 0x9C10B36AL, + 0xC6150EBAL, 0x94E2EA78L, 0xA5FC3C53L, 0x1E0A2DF4L, + 0xF2F74EA7L, 0x361D2B3DL, 0x1939260FL, 0x19C27960L, + 0x5223A708L, 0xF71312B6L, 0xEBADFE6EL, 0xEAC31F66L, + 0xE3BC4595L, 0xA67BC883L, 0xB17F37D1L, 0x018CFF28L, + 0xC332DDEFL, 0xBE6C5AA5L, 0x65582185L, 0x68AB9802L, + 0xEECEA50FL, 0xDB2F953BL, 0x2AEF7DADL, 0x5B6E2F84L, + 0x1521B628L, 0x29076170L, 0xECDD4775L, 0x619F1510L, + 0x13CCA830L, 0xEB61BD96L, 0x0334FE1EL, 0xAA0363CFL, + 0xB5735C90L, 0x4C70A239L, 0xD59E9E0BL, 0xCBAADE14L, + 0xEECC86BCL, 0x60622CA7L, 0x9CAB5CABL, 0xB2F3846EL, + 0x648B1EAFL, 0x19BDF0CAL, 0xA02369B9L, 0x655ABB50L, + 0x40685A32L, 0x3C2AB4B3L, 0x319EE9D5L, 0xC021B8F7L, + 0x9B540B19L, 0x875FA099L, 0x95F7997EL, 0x623D7DA8L, + 0xF837889AL, 0x97E32D77L, 0x11ED935FL, 0x16681281L, + 0x0E358829L, 0xC7E61FD6L, 0x96DEDFA1L, 0x7858BA99L, + 0x57F584A5L, 0x1B227263L, 0x9B83C3FFL, 0x1AC24696L, + 0xCDB30AEBL, 0x532E3054L, 0x8FD948E4L, 0x6DBC3128L, + 0x58EBF2EFL, 0x34C6FFEAL, 0xFE28ED61L, 0xEE7C3C73L, + 0x5D4A14D9L, 0xE864B7E3L, 0x42105D14L, 0x203E13E0L, + 0x45EEE2B6L, 0xA3AAABEAL, 0xDB6C4F15L, 0xFACB4FD0L, + 0xC742F442L, 0xEF6ABBB5L, 0x654F3B1DL, 0x41CD2105L, + 0xD81E799EL, 0x86854DC7L, 0xE44B476AL, 0x3D816250L, + 0xCF62A1F2L, 0x5B8D2646L, 0xFC8883A0L, 0xC1C7B6A3L, + 0x7F1524C3L, 0x69CB7492L, 0x47848A0BL, 0x5692B285L, + 0x095BBF00L, 0xAD19489DL, 0x1462B174L, 0x23820E00L, + 0x58428D2AL, 0x0C55F5EAL, 0x1DADF43EL, 0x233F7061L, + 0x3372F092L, 0x8D937E41L, 0xD65FECF1L, 0x6C223BDBL, + 0x7CDE3759L, 0xCBEE7460L, 0x4085F2A7L, 0xCE77326EL, + 0xA6078084L, 0x19F8509EL, 0xE8EFD855L, 0x61D99735L, + 0xA969A7AAL, 0xC50C06C2L, 0x5A04ABFCL, 0x800BCADCL, + 0x9E447A2EL, 0xC3453484L, 0xFDD56705L, 0x0E1E9EC9L, + 0xDB73DBD3L, 0x105588CDL, 0x675FDA79L, 0xE3674340L, + 0xC5C43465L, 0x713E38D8L, 0x3D28F89EL, 0xF16DFF20L, + 0x153E21E7L, 0x8FB03D4AL, 0xE6E39F2BL, 0xDB83ADF7L }, + { 0xE93D5A68L, 0x948140F7L, 0xF64C261CL, 0x94692934L, + 0x411520F7L, 0x7602D4F7L, 0xBCF46B2EL, 0xD4A20068L, + 0xD4082471L, 0x3320F46AL, 0x43B7D4B7L, 0x500061AFL, + 0x1E39F62EL, 0x97244546L, 0x14214F74L, 0xBF8B8840L, + 0x4D95FC1DL, 0x96B591AFL, 0x70F4DDD3L, 0x66A02F45L, + 0xBFBC09ECL, 0x03BD9785L, 0x7FAC6DD0L, 0x31CB8504L, + 0x96EB27B3L, 0x55FD3941L, 0xDA2547E6L, 0xABCA0A9AL, + 0x28507825L, 0x530429F4L, 0x0A2C86DAL, 0xE9B66DFBL, + 0x68DC1462L, 0xD7486900L, 0x680EC0A4L, 0x27A18DEEL, + 0x4F3FFEA2L, 0xE887AD8CL, 0xB58CE006L, 0x7AF4D6B6L, + 0xAACE1E7CL, 0xD3375FECL, 0xCE78A399L, 0x406B2A42L, + 0x20FE9E35L, 0xD9F385B9L, 0xEE39D7ABL, 0x3B124E8BL, + 0x1DC9FAF7L, 0x4B6D1856L, 0x26A36631L, 0xEAE397B2L, + 0x3A6EFA74L, 0xDD5B4332L, 0x6841E7F7L, 0xCA7820FBL, + 0xFB0AF54EL, 0xD8FEB397L, 0x454056ACL, 0xBA489527L, + 0x55533A3AL, 0x20838D87L, 0xFE6BA9B7L, 0xD096954BL, + 0x55A867BCL, 0xA1159A58L, 0xCCA92963L, 0x99E1DB33L, + 0xA62A4A56L, 0x3F3125F9L, 0x5EF47E1CL, 0x9029317CL, + 0xFDF8E802L, 0x04272F70L, 0x80BB155CL, 0x05282CE3L, + 0x95C11548L, 0xE4C66D22L, 0x48C1133FL, 0xC70F86DCL, + 0x07F9C9EEL, 0x41041F0FL, 0x404779A4L, 0x5D886E17L, + 0x325F51EBL, 0xD59BC0D1L, 0xF2BCC18FL, 0x41113564L, + 0x257B7834L, 0x602A9C60L, 0xDFF8E8A3L, 0x1F636C1BL, + 0x0E12B4C2L, 0x02E1329EL, 0xAF664FD1L, 0xCAD18115L, + 0x6B2395E0L, 0x333E92E1L, 0x3B240B62L, 0xEEBEB922L, + 0x85B2A20EL, 0xE6BA0D99L, 0xDE720C8CL, 0x2DA2F728L, + 0xD0127845L, 0x95B794FDL, 0x647D0862L, 0xE7CCF5F0L, + 0x5449A36FL, 0x877D48FAL, 0xC39DFD27L, 0xF33E8D1EL, + 0x0A476341L, 0x992EFF74L, 0x3A6F6EABL, 0xF4F8FD37L, + 0xA812DC60L, 0xA1EBDDF8L, 0x991BE14CL, 0xDB6E6B0DL, + 0xC67B5510L, 0x6D672C37L, 0x2765D43BL, 0xDCD0E804L, + 0xF1290DC7L, 0xCC00FFA3L, 0xB5390F92L, 0x690FED0BL, + 0x667B9FFBL, 0xCEDB7D9CL, 0xA091CF0BL, 0xD9155EA3L, + 0xBB132F88L, 0x515BAD24L, 0x7B9479BFL, 0x763BD6EBL, + 0x37392EB3L, 0xCC115979L, 0x8026E297L, 0xF42E312DL, + 0x6842ADA7L, 0xC66A2B3BL, 0x12754CCCL, 0x782EF11CL, + 0x6A124237L, 0xB79251E7L, 0x06A1BBE6L, 0x4BFB6350L, + 0x1A6B1018L, 0x11CAEDFAL, 0x3D25BDD8L, 0xE2E1C3C9L, + 0x44421659L, 0x0A121386L, 0xD90CEC6EL, 0xD5ABEA2AL, + 0x64AF674EL, 0xDA86A85FL, 0xBEBFE988L, 0x64E4C3FEL, + 0x9DBC8057L, 0xF0F7C086L, 0x60787BF8L, 0x6003604DL, + 0xD1FD8346L, 0xF6381FB0L, 0x7745AE04L, 0xD736FCCCL, + 0x83426B33L, 0xF01EAB71L, 0xB0804187L, 0x3C005E5FL, + 0x77A057BEL, 0xBDE8AE24L, 0x55464299L, 0xBF582E61L, + 0x4E58F48FL, 0xF2DDFDA2L, 0xF474EF38L, 0x8789BDC2L, + 0x5366F9C3L, 0xC8B38E74L, 0xB475F255L, 0x46FCD9B9L, + 0x7AEB2661L, 0x8B1DDF84L, 0x846A0E79L, 0x915F95E2L, + 0x466E598EL, 0x20B45770L, 0x8CD55591L, 0xC902DE4CL, + 0xB90BACE1L, 0xBB8205D0L, 0x11A86248L, 0x7574A99EL, + 0xB77F19B6L, 0xE0A9DC09L, 0x662D09A1L, 0xC4324633L, + 0xE85A1F02L, 0x09F0BE8CL, 0x4A99A025L, 0x1D6EFE10L, + 0x1AB93D1DL, 0x0BA5A4DFL, 0xA186F20FL, 0x2868F169L, + 0xDCB7DA83L, 0x573906FEL, 0xA1E2CE9BL, 0x4FCD7F52L, + 0x50115E01L, 0xA70683FAL, 0xA002B5C4L, 0x0DE6D027L, + 0x9AF88C27L, 0x773F8641L, 0xC3604C06L, 0x61A806B5L, + 0xF0177A28L, 0xC0F586E0L, 0x006058AAL, 0x30DC7D62L, + 0x11E69ED7L, 0x2338EA63L, 0x53C2DD94L, 0xC2C21634L, + 0xBBCBEE56L, 0x90BCB6DEL, 0xEBFC7DA1L, 0xCE591D76L, + 0x6F05E409L, 0x4B7C0188L, 0x39720A3DL, 0x7C927C24L, + 0x86E3725FL, 0x724D9DB9L, 0x1AC15BB4L, 0xD39EB8FCL, + 0xED545578L, 0x08FCA5B5L, 0xD83D7CD3L, 0x4DAD0FC4L, + 0x1E50EF5EL, 0xB161E6F8L, 0xA28514D9L, 0x6C51133CL, + 0x6FD5C7E7L, 0x56E14EC4L, 0x362ABFCEL, 0xDDC6C837L, + 0xD79A3234L, 0x92638212L, 0x670EFA8EL, 0x406000E0L }, + { 0x3A39CE37L, 0xD3FAF5CFL, 0xABC27737L, 0x5AC52D1BL, + 0x5CB0679EL, 0x4FA33742L, 0xD3822740L, 0x99BC9BBEL, + 0xD5118E9DL, 0xBF0F7315L, 0xD62D1C7EL, 0xC700C47BL, + 0xB78C1B6BL, 0x21A19045L, 0xB26EB1BEL, 0x6A366EB4L, + 0x5748AB2FL, 0xBC946E79L, 0xC6A376D2L, 0x6549C2C8L, + 0x530FF8EEL, 0x468DDE7DL, 0xD5730A1DL, 0x4CD04DC6L, + 0x2939BBDBL, 0xA9BA4650L, 0xAC9526E8L, 0xBE5EE304L, + 0xA1FAD5F0L, 0x6A2D519AL, 0x63EF8CE2L, 0x9A86EE22L, + 0xC089C2B8L, 0x43242EF6L, 0xA51E03AAL, 0x9CF2D0A4L, + 0x83C061BAL, 0x9BE96A4DL, 0x8FE51550L, 0xBA645BD6L, + 0x2826A2F9L, 0xA73A3AE1L, 0x4BA99586L, 0xEF5562E9L, + 0xC72FEFD3L, 0xF752F7DAL, 0x3F046F69L, 0x77FA0A59L, + 0x80E4A915L, 0x87B08601L, 0x9B09E6ADL, 0x3B3EE593L, + 0xE990FD5AL, 0x9E34D797L, 0x2CF0B7D9L, 0x022B8B51L, + 0x96D5AC3AL, 0x017DA67DL, 0xD1CF3ED6L, 0x7C7D2D28L, + 0x1F9F25CFL, 0xADF2B89BL, 0x5AD6B472L, 0x5A88F54CL, + 0xE029AC71L, 0xE019A5E6L, 0x47B0ACFDL, 0xED93FA9BL, + 0xE8D3C48DL, 0x283B57CCL, 0xF8D56629L, 0x79132E28L, + 0x785F0191L, 0xED756055L, 0xF7960E44L, 0xE3D35E8CL, + 0x15056DD4L, 0x88F46DBAL, 0x03A16125L, 0x0564F0BDL, + 0xC3EB9E15L, 0x3C9057A2L, 0x97271AECL, 0xA93A072AL, + 0x1B3F6D9BL, 0x1E6321F5L, 0xF59C66FBL, 0x26DCF319L, + 0x7533D928L, 0xB155FDF5L, 0x03563482L, 0x8ABA3CBBL, + 0x28517711L, 0xC20AD9F8L, 0xABCC5167L, 0xCCAD925FL, + 0x4DE81751L, 0x3830DC8EL, 0x379D5862L, 0x9320F991L, + 0xEA7A90C2L, 0xFB3E7BCEL, 0x5121CE64L, 0x774FBE32L, + 0xA8B6E37EL, 0xC3293D46L, 0x48DE5369L, 0x6413E680L, + 0xA2AE0810L, 0xDD6DB224L, 0x69852DFDL, 0x09072166L, + 0xB39A460AL, 0x6445C0DDL, 0x586CDECFL, 0x1C20C8AEL, + 0x5BBEF7DDL, 0x1B588D40L, 0xCCD2017FL, 0x6BB4E3BBL, + 0xDDA26A7EL, 0x3A59FF45L, 0x3E350A44L, 0xBCB4CDD5L, + 0x72EACEA8L, 0xFA6484BBL, 0x8D6612AEL, 0xBF3C6F47L, + 0xD29BE463L, 0x542F5D9EL, 0xAEC2771BL, 0xF64E6370L, + 0x740E0D8DL, 0xE75B1357L, 0xF8721671L, 0xAF537D5DL, + 0x4040CB08L, 0x4EB4E2CCL, 0x34D2466AL, 0x0115AF84L, + 0xE1B00428L, 0x95983A1DL, 0x06B89FB4L, 0xCE6EA048L, + 0x6F3F3B82L, 0x3520AB82L, 0x011A1D4BL, 0x277227F8L, + 0x611560B1L, 0xE7933FDCL, 0xBB3A792BL, 0x344525BDL, + 0xA08839E1L, 0x51CE794BL, 0x2F32C9B7L, 0xA01FBAC9L, + 0xE01CC87EL, 0xBCC7D1F6L, 0xCF0111C3L, 0xA1E8AAC7L, + 0x1A908749L, 0xD44FBD9AL, 0xD0DADECBL, 0xD50ADA38L, + 0x0339C32AL, 0xC6913667L, 0x8DF9317CL, 0xE0B12B4FL, + 0xF79E59B7L, 0x43F5BB3AL, 0xF2D519FFL, 0x27D9459CL, + 0xBF97222CL, 0x15E6FC2AL, 0x0F91FC71L, 0x9B941525L, + 0xFAE59361L, 0xCEB69CEBL, 0xC2A86459L, 0x12BAA8D1L, + 0xB6C1075EL, 0xE3056A0CL, 0x10D25065L, 0xCB03A442L, + 0xE0EC6E0EL, 0x1698DB3BL, 0x4C98A0BEL, 0x3278E964L, + 0x9F1F9532L, 0xE0D392DFL, 0xD3A0342BL, 0x8971F21EL, + 0x1B0A7441L, 0x4BA3348CL, 0xC5BE7120L, 0xC37632D8L, + 0xDF359F8DL, 0x9B992F2EL, 0xE60B6F47L, 0x0FE3F11DL, + 0xE54CDA54L, 0x1EDAD891L, 0xCE6279CFL, 0xCD3E7E6FL, + 0x1618B166L, 0xFD2C1D05L, 0x848FD2C5L, 0xF6FB2299L, + 0xF523F357L, 0xA6327623L, 0x93A83531L, 0x56CCCD02L, + 0xACF08162L, 0x5A75EBB5L, 0x6E163697L, 0x88D273CCL, + 0xDE966292L, 0x81B949D0L, 0x4C50901BL, 0x71C65614L, + 0xE6C6C7BDL, 0x327A140AL, 0x45E1D006L, 0xC3F27B9AL, + 0xC9AA53FDL, 0x62A80F00L, 0xBB25BFE2L, 0x35BDD2F6L, + 0x71126905L, 0xB2040222L, 0xB6CBCF7CL, 0xCD769C2BL, + 0x53113EC0L, 0x1640E3D3L, 0x38ABBD60L, 0x2547ADF0L, + 0xBA38209CL, 0xF746CE76L, 0x77AFA1C5L, 0x20756060L, + 0x85CBFE4EL, 0x8AE88DD8L, 0x7AAAF9B0L, 0x4CF9AA7EL, + 0x1948C25CL, 0x02FB8A8CL, 0x01C36AE4L, 0xD6EBE1F9L, + 0x90D4F869L, 0xA65CDEA0L, 0x3F09252DL, 0xC208E69FL, + 0xB74E6132L, 0xCE77E25BL, 0x578FDFE3L, 0x3AC372E6L } +}; + + +static unsigned long F(BLOWFISH_CTX *ctx, unsigned long x) { + unsigned short a, b, c, d; + unsigned long y; + + d = (unsigned short)(x & 0xFF); + x >>= 8; + c = (unsigned short)(x & 0xFF); + x >>= 8; + b = (unsigned short)(x & 0xFF); + x >>= 8; + a = (unsigned short)(x & 0xFF); + y = ctx->S[0][a] + ctx->S[1][b]; + y = y ^ ctx->S[2][c]; + y = y + ctx->S[3][d]; + + return y; +} + + +static void Blowfish_Encrypt(BLOWFISH_CTX *ctx, unsigned long *xl, unsigned long *xr){ + unsigned long Xl; + unsigned long Xr; + unsigned long temp; + short i; + + Xl = *xl; + Xr = *xr; + + for (i = 0; i < N; ++i) { + Xl = Xl ^ ctx->P[i]; + Xr = F(ctx, Xl) ^ Xr; + + temp = Xl; + Xl = Xr; + Xr = temp; + } + + temp = Xl; + Xl = Xr; + Xr = temp; + + Xr = Xr ^ ctx->P[N]; + Xl = Xl ^ ctx->P[N + 1]; + + *xl = Xl; + *xr = Xr; +} + + +static void Blowfish_Decrypt(BLOWFISH_CTX *ctx, unsigned long *xl, unsigned long *xr){ + unsigned long Xl; + unsigned long Xr; + unsigned long temp; + short i; + + Xl = *xl; + Xr = *xr; + + for (i = N + 1; i > 1; --i) { + Xl = Xl ^ ctx->P[i]; + Xr = F(ctx, Xl) ^ Xr; + + /* Exchange Xl and Xr */ + temp = Xl; + Xl = Xr; + Xr = temp; + } + + /* Exchange Xl and Xr */ + temp = Xl; + Xl = Xr; + Xr = temp; + + Xr = Xr ^ ctx->P[1]; + Xl = Xl ^ ctx->P[0]; + + *xl = Xl; + *xr = Xr; +} + +static void Blowfish_Init(BLOWFISH_CTX *ctx, unsigned char *key, int keyLen) { + int i, j, k; + unsigned long data, datal, datar; + + for (i = 0; i < 4; i++) { + for (j = 0; j < 256; j++) + ctx->S[i][j] = ORIG_S[i][j]; + } + + j = 0; + for (i = 0; i < N + 2; ++i) { + data = 0x00000000; + for (k = 0; k < 4; ++k) { + data = (data << 8) | key[j]; + j = j + 1; + if (j >= keyLen) + j = 0; + } + ctx->P[i] = ORIG_P[i] ^ data; + } + + datal = 0x00000000; + datar = 0x00000000; + + for (i = 0; i < N + 2; i += 2) { + Blowfish_Encrypt(ctx, &datal, &datar); + ctx->P[i] = datal; + ctx->P[i + 1] = datar; + } + + for (i = 0; i < 4; ++i) { + for (j = 0; j < 256; j += 2) { + Blowfish_Encrypt(ctx, &datal, &datar); + ctx->S[i][j] = datal; + ctx->S[i][j + 1] = datar; + } + } +} + +static unsigned long long blowfish_test(unsigned long long input) +{ + unsigned long L = input >> 32, R = input; + BLOWFISH_CTX ctx; + Blowfish_Init(&ctx, (unsigned char*)"TESTKEY", 7); + Blowfish_Encrypt(&ctx, &L, &R); + Blowfish_Decrypt(&ctx, &L, &R); + return ((unsigned long long)L << 32) | R; +} diff -r 3464bb656a9c -r 8475a4e0425e tools/tests/blowfish.mk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/tests/blowfish.mk Thu Jan 18 15:18:07 2007 +0000 @@ -0,0 +1,23 @@ + +override XEN_TARGET_ARCH = x86_32 +XEN_ROOT = ../.. +CFLAGS := +include $(XEN_ROOT)/tools/Rules.mk + +# Disable PIE/SSP if GCC supports them. They can break us. +CFLAGS += $(call cc-option,$(CC),-nopie,) +CFLAGS += $(call cc-option,$(CC),-fno-stack-protector,) +CFLAGS += $(call cc-option,$(CC),-fno-stack-protector-all,) + +OBJCOPY = objcopy +CFLAGS += -fno-builtin -O2 -msoft-float +LDFLAGS = -nostdlib -Wl,-N -Wl,-Ttext -Wl,0x100000 + +.PHONY: all +all: blowfish.bin + +blowfish.bin: blowfish.c + $(CC) $(CFLAGS) -c blowfish.c + $(CC) $(CFLAGS) $(LDFLAGS) -o blowfish.tmp blowfish.o + $(OBJCOPY) -O binary blowfish.tmp blowfish.bin + rm -f blowfish.tmp diff -r 3464bb656a9c -r 8475a4e0425e tools/tests/test_x86_emulator.c --- a/tools/tests/test_x86_emulator.c Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/tests/test_x86_emulator.c Thu Jan 18 15:18:07 2007 +0000 @@ -15,6 +15,19 @@ typedef int64_t s64; #include <asm-x86/x86_emulate.h> #include <sys/mman.h> +#include "blowfish.h" + +#define MMAP_SZ 16384 + +/* EFLAGS bit definitions. */ +#define EFLG_OF (1<<11) +#define EFLG_DF (1<<10) +#define EFLG_SF (1<<7) +#define EFLG_ZF (1<<6) +#define EFLG_AF (1<<4) +#define EFLG_PF (1<<2) +#define EFLG_CF (1<<0) + static int read( unsigned int seg, unsigned long offset, @@ -97,20 +110,25 @@ int main(int argc, char **argv) { struct x86_emulate_ctxt ctxt; struct cpu_user_regs regs; - char instr[20] = { 0x01, 0x08 }; /* add %ecx,(%eax) */ - unsigned int *res; + char *instr; + unsigned int *res, i; int rc; +#ifndef __x86_64__ + unsigned int bcdres_native, bcdres_emul; +#endif ctxt.regs = ®s; - ctxt.address_bytes = 4; - - res = mmap((void *)0x100000, 0x1000, PROT_READ|PROT_WRITE, + ctxt.addr_size = 32; + ctxt.sp_size = 32; + + res = mmap((void *)0x100000, MMAP_SZ, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); if ( res == MAP_FAILED ) { fprintf(stderr, "mmap to low address failed\n"); exit(1); } + instr = (char *)res + 0x100; printf("%-40s", "Testing addl %%ecx,(%%eax)..."); instr[0] = 0x01; instr[1] = 0x08; @@ -399,6 +417,112 @@ int main(int argc, char **argv) goto fail; printf("okay\n"); + printf("%-40s", "Testing daa/das (all inputs)..."); +#ifndef __x86_64__ + /* Bits 0-7: AL; Bit 8: EFLG_AF; Bit 9: EFLG_CF; Bit 10: DAA vs. DAS. */ + for ( i = 0; i < 0x800; i++ ) + { + regs.eflags = (i & 0x200) ? EFLG_CF : 0; + regs.eflags |= (i & 0x100) ? EFLG_AF : 0; + if ( i & 0x400 ) + __asm__ ( + "pushf; and $0xffffffee,(%%esp); or %1,(%%esp); popf; das; " + "pushf; popl %1" + : "=a" (bcdres_native), "=r" (regs.eflags) + : "0" (i & 0xff), "1" (regs.eflags) ); + else + __asm__ ( + "pushf; and $0xffffffee,(%%esp); or %1,(%%esp); popf; daa; " + "pushf; popl %1" + : "=a" (bcdres_native), "=r" (regs.eflags) + : "0" (i & 0xff), "1" (regs.eflags) ); + bcdres_native |= (regs.eflags & EFLG_PF) ? 0x1000 : 0; + bcdres_native |= (regs.eflags & EFLG_ZF) ? 0x800 : 0; + bcdres_native |= (regs.eflags & EFLG_SF) ? 0x400 : 0; + bcdres_native |= (regs.eflags & EFLG_CF) ? 0x200 : 0; + bcdres_native |= (regs.eflags & EFLG_AF) ? 0x100 : 0; + + instr[0] = (i & 0x400) ? 0x2f: 0x27; /* daa/das */ + regs.eflags = (i & 0x200) ? EFLG_CF : 0; + regs.eflags |= (i & 0x100) ? EFLG_AF : 0; + regs.eip = (unsigned long)&instr[0]; + regs.eax = (unsigned char)i; + rc = x86_emulate(&ctxt, &emulops); + bcdres_emul = regs.eax; + bcdres_emul |= (regs.eflags & EFLG_PF) ? 0x1000 : 0; + bcdres_emul |= (regs.eflags & EFLG_ZF) ? 0x800 : 0; + bcdres_emul |= (regs.eflags & EFLG_SF) ? 0x400 : 0; + bcdres_emul |= (regs.eflags & EFLG_CF) ? 0x200 : 0; + bcdres_emul |= (regs.eflags & EFLG_AF) ? 0x100 : 0; + if ( (rc != 0) || (regs.eax > 255) || + (regs.eip != (unsigned long)&instr[1]) ) + goto fail; + + if ( bcdres_emul != bcdres_native ) + { + printf("%s: AL=%02x %s %s\n" + "Output: AL=%02x %s %s %s %s %s\n" + "Emul.: AL=%02x %s %s %s %s %s\n", + (i & 0x400) ? "DAS" : "DAA", + (unsigned char)i, + (i & 0x200) ? "CF" : " ", + (i & 0x100) ? "AF" : " ", + (unsigned char)bcdres_native, + (bcdres_native & 0x200) ? "CF" : " ", + (bcdres_native & 0x100) ? "AF" : " ", + (bcdres_native & 0x1000) ? "PF" : " ", + (bcdres_native & 0x800) ? "ZF" : " ", + (bcdres_native & 0x400) ? "SF" : " ", + (unsigned char)bcdres_emul, + (bcdres_emul & 0x200) ? "CF" : " ", + (bcdres_emul & 0x100) ? "AF" : " ", + (bcdres_emul & 0x1000) ? "PF" : " ", + (bcdres_emul & 0x800) ? "ZF" : " ", + (bcdres_emul & 0x400) ? "SF" : " "); + goto fail; + } + } + printf("okay\n"); +#else + printf("skipped\n"); +#endif + + printf("Testing blowfish code sequence"); + memcpy(res, blowfish_code, sizeof(blowfish_code)); + regs.eax = 2; + regs.edx = 1; + regs.eip = (unsigned long)res; + regs.esp = (unsigned long)res + MMAP_SZ - 4; + *(uint32_t *)(unsigned long)regs.esp = 0x12345678; + regs.eflags = 2; + i = 0; + while ( (uint32_t)regs.eip != 0x12345678 ) + { + if ( (i++ & 8191) == 0 ) + printf("."); + rc = x86_emulate(&ctxt, &emulops); + if ( rc != 0 ) + { + printf("failed at %%eip == %08x\n", (unsigned int)regs.eip); + return 1; + } + } + if ( (regs.esp != ((unsigned long)res + MMAP_SZ)) || + (regs.eax != 2) || (regs.edx != 1) ) + goto fail; + printf("okay\n"); + +#ifndef __x86_64__ + printf("%-40s", "Testing blowfish native execution..."); + asm volatile ( + "movl $0x100000,%%ecx; call *%%ecx" + : "=a" (regs.eax), "=d" (regs.edx) + : "0" (2), "1" (1) : "ecx" ); + if ( (regs.eax != 2) || (regs.edx != 1) ) + goto fail; + printf("okay\n"); +#endif + return 0; fail: diff -r 3464bb656a9c -r 8475a4e0425e tools/xenstat/xentop/xentop.c --- a/tools/xenstat/xentop/xentop.c Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/xenstat/xentop/xentop.c Thu Jan 18 15:18:07 2007 +0000 @@ -1067,9 +1067,9 @@ int main(int argc, char **argv) gettimeofday(&curtime, NULL); top(); oldtime = curtime; - sleep(delay); if ((!loop) && !(--iterations)) break; + sleep(delay); } while (1); } diff -r 3464bb656a9c -r 8475a4e0425e tools/xm-test/README --- a/tools/xm-test/README Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/xm-test/README Thu Jan 18 15:18:07 2007 +0000 @@ -207,6 +207,49 @@ running DomUs on the system to provide e running DomUs on the system to provide each test with a "clean slate". +Testing the XML-RPC and Xen-API interfaces of xend +================================================== + +The xm-test suite can be used to test xm's interface with xend using +either XML-RPC or the Xen-API. In order to use either one of these modes, +xm needs to be configured using its configuration file +'/etc/xen/xm-config.xml'. +Note: The current default configuration after a fresh install of the xen +sources currently is to use the XML-RPC interface for communication with xend. + +Example content for the xm-config.xml for using the Xen-API looks as +follows: + +<xm> + <server type='Xen-API' + uri='http://localhost:9363/' + username='me' + password='mypassword' /> +</xm> + +This configuration makes xm talk to xend using port 9363. For this to +work, also xend needs to be configured to listen to port 9363. Therefore +The following line must be in /etc/xen/xend-config.sxp. + +(xen-api-server (( 127.0.0.1:9363 none ))) + +To communicate via the legacy XML-RPC interface, the file +'/etc/xen/xm-config.xml' may simply have the following content or +may be complete remove from the /etc/xen directory. + +<xm> +</xm> + +A few tests have been written for the xm-test suite that test the +Xen-API interface directly without relying on 'xm'. These tests can be +found in the grouptest 'xapi' and for them to work properly, xm must have +been configured to use the Xen-API following the instructions above. To +run these test, the following command line can be invoked: + + # ./runtest.sh -g xapi <logfile> + + + Extending ========= diff -r 3464bb656a9c -r 8475a4e0425e tools/xm-test/configure.ac --- a/tools/xm-test/configure.ac Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/xm-test/configure.ac Thu Jan 18 15:18:07 2007 +0000 @@ -150,6 +150,7 @@ AC_CONFIG_FILES([ tests/vcpu-pin/Makefile tests/vcpu-disable/Makefile tests/vtpm/Makefile + tests/xapi/Makefile tests/enforce_dom0_cpus/Makefile lib/XmTestReport/xmtest.py lib/XmTestLib/config.py diff -r 3464bb656a9c -r 8475a4e0425e tools/xm-test/grouptest/xapi --- a/tools/xm-test/grouptest/xapi Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/xm-test/grouptest/xapi Thu Jan 18 15:18:07 2007 +0000 @@ -1,1 +1,2 @@ vtpm 09_vtpm-xapi.test +xapi vtpm 09_vtpm-xapi.test diff -r 3464bb656a9c -r 8475a4e0425e tools/xm-test/lib/XmTestLib/DomainTracking.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/xm-test/lib/XmTestLib/DomainTracking.py Thu Jan 18 15:18:07 2007 +0000 @@ -0,0 +1,61 @@ +#!/usr/bin/python +""" + Copyright (C) International Business Machines Corp., 2005 + Author: Dan Smith <danms@xxxxxxxxxx> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; under version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +""" + +import atexit +import Test +import xapi + +# Tracking of managed domains +_managedDomains = [] +_VMuuids = [] +registered = 0 + +def addManagedDomain(name): + global registered + _managedDomains.append(name) + if not registered: + atexit.register(destroyManagedDomains) + registered = 1 + +def delManagedDomain(name): + if name in _managedDomains: + del _managedDomains[_managedDomains.index(name)] + +def addXAPIDomain(uuid): + global registered + _VMuuids.append(uuid) + if not registered: + atexit.register(destroyManagedDomains) + registered = 1 + +def delXAPIDomain(uuid): + _VMuuids.remove(uuid) + +def destroyManagedDomains(): + if len(_managedDomains) > 0: + for m in _managedDomains: + Test.traceCommand("xm destroy %s" % m) + Test.traceCommand("xm delete %s" % m) + if len(_VMuuids) > 0: + for uuid in _VMuuids: + Test.traceCommand("xm destroy %s" % uuid) + Test.traceCommand("xm delete %s" % uuid) + + diff -r 3464bb656a9c -r 8475a4e0425e tools/xm-test/lib/XmTestLib/XenAPIDomain.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/xm-test/lib/XmTestLib/XenAPIDomain.py Thu Jan 18 15:18:07 2007 +0000 @@ -0,0 +1,176 @@ +#!/usr/bin/python +""" + Copyright (C) International Business Machines Corp., 2005 + Author: Stefan Berger <stefanb@xxxxxxxxxx> + + Based on XenDomain.py by Dan Smith <danms@xxxxxxxxxx> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; under version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +""" +import os +import sys +from XmTestLib import * +from xen.util.xmlrpclib2 import ServerProxy +from types import DictType + + +class XenAPIConfig: + """An object to help create a VM configuration usable via Xen-API""" + def __init__(self): + self.opts = {} + #Array to translate old option to new ones + self.opttrlate = { 'name' : 'name_label' , + 'memory' : [ 'memory_static_max' , + 'memory_static_min' , + 'memory_dynamic_min', + 'memory_dynamic_max' ], + 'kernel' : 'PV_kernel', + 'ramdisk': 'PV_ramdisk', + 'root' : 'PV_args'} + + def setOpt(self, name, value): + """Set an option in the config""" + if name in self.opttrlate.keys(): + _name = self.opttrlate[name] + else: + _name = name + + if isinstance(_name, list): + for _n in _name: + self.opts[_n] = value + else: + self.opts[_name] = value + + def getOpt(self, name): + """Return the value of a config option""" + if name in self.opts.keys(): + return self.opts[name] + else: + return None + + def setOpts(self, opts): + """Batch-set options from a dictionary""" + for k, v in opts.items(): + self.setOpt(k, v) + + def getOpts(self): + return self.opts + + +class XenAPIDomain(XenDomain): + + def __init__(self, name=None, config=None): + if name: + self.name = name + else: + self.name = getUniqueName() + + self.config = config + self.console = None + self.netEnv = "bridge" + + self.session = xapi.connect() + session = self.session + try: + self.vm_uuid = session.xenapi.VM.create(self.config.getOpts()) + addXAPIDomain(self.vm_uuid) + except: + raise DomainError("Could not create VM config file for " + "managed domain.") + + #Only support PV for now. + self.type = "PV" + + def start(self, noConsole=False, startpaused=False): + #start the VM + session = self.session + if self.vm_uuid: + try: + session.xenapi.VM.start(self.vm_uuid, startpaused) + except: + raise DomainError("Could not start domain") + else: + raise DomainError("VM has no UUID - does VM config exist?") + + if startpaused: + return + + if self.getDomainType() == "HVM": + waitForBoot() + + if self.console and noConsole == True: + self.closeConsole() + + elif self.console and noConsole == False: + return self.console + + elif not self.console and noConsole == False: + return self.getConsole() + + def stop(self): + if self.vm_uuid: + self.session.xenapi.VM.hard_shutdown(self.vm_uuid) + else: + raise DomainError("VM has no UUID - does VM config exist?") + + def destroy(self): + #Stop VM first. + self.stop() + if self.vm_uuid: + self.session.xenapi.VM.destroy(self.vm_uuid) + delXAPIDomain(self.vm_uuid) + else: + raise DomainError("VM has no UUID - does VM config exist?") + + def get_uuid(self): + return self.vm_uuid + + def newDevice(self, Device, *args): + raise DomainError("No support for newDevice().") + + def removeDevice(self, id): + raise DomainError("No support for removeDevice().") + + def removeAllDevices(self, id): + raise DomainError("No support for removeAllDevices().") + + def isRunning(self): + return isDomainRunning(self.name) + + def getDevice(self, id): + raise DomainError("No support for getDevice().") + + +class XmTestAPIDomain(XenAPIDomain): + + """Create a new managed xm-test domain + @param name: The requested domain name + @param extraConfig: Additional configuration options + @param baseConfig: The initial configuration defaults to use + """ + def __init__(self, name=None, extraConfig=None, + baseConfig=arch.configDefaults): + config = XenAPIConfig() + config.setOpts(baseConfig) + if extraConfig: + config.setOpts(extraConfig) + + if name: + config.setOpt("name_label", name) + elif not config.getOpt("name_label"): + config.setOpt("name_label", getUniqueName()) + + XenAPIDomain.__init__(self, config.getOpt("name_label"), + config=config) diff -r 3464bb656a9c -r 8475a4e0425e tools/xm-test/lib/XmTestLib/XenDomain.py --- a/tools/xm-test/lib/XmTestLib/XenDomain.py Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/xm-test/lib/XmTestLib/XenDomain.py Thu Jan 18 15:18:07 2007 +0000 @@ -29,6 +29,7 @@ from config import * from config import * from Console import * from XenDevice import * +from DomainTracking import * from acm import * @@ -147,7 +148,7 @@ class DomainError(Exception): class XenDomain: - def __init__(self, name=None, config=None): + def __init__(self, name=None, config=None, isManaged=False): """Create a domain object. @param config: String filename of config file """ @@ -162,6 +163,10 @@ class XenDomain: self.devices = {} self.netEnv = "bridge" + if os.getenv("XM_MANAGED_DOMAINS"): + isManaged = True + self.isManaged = isManaged + # Set domain type, either PV for ParaVirt domU or HVM for # FullVirt domain if ENABLE_HVM_SUPPORT: @@ -171,7 +176,17 @@ class XenDomain: def start(self, noConsole=False): - ret, output = traceCommand("xm create %s" % self.config) + if not self.isManaged: + ret, output = traceCommand("xm create %s" % self.config) + else: + ret, output = traceCommand("xm new %s" % self.config) + if ret != 0: + _ret, output = traceCommand("xm delete " + + self.config.getOpt("name")) + else: + ret, output = traceCommand("xm start " + + self.config.getOpt("name")) + addManagedDomain(self.config.getOpt("name")) if ret != 0: raise DomainError("Failed to create domain", @@ -218,6 +233,10 @@ class XenDomain: self.closeConsole() ret, output = traceCommand(prog + cmd + self.config.getOpt("name")) + if self.isManaged: + ret, output = traceCommand(prog + " delete " + + self.config.getOpt("name")) + delManagedDomain(self.config.getOpt("name")) return ret @@ -296,7 +315,7 @@ class XmTestDomain(XenDomain): class XmTestDomain(XenDomain): def __init__(self, name=None, extraConfig=None, - baseConfig=arch.configDefaults): + baseConfig=arch.configDefaults, isManaged=False): """Create a new xm-test domain @param name: The requested domain name @param extraConfig: Additional configuration options @@ -312,7 +331,8 @@ class XmTestDomain(XenDomain): elif not config.getOpt("name"): config.setOpt("name", getUniqueName()) - XenDomain.__init__(self, config.getOpt("name"), config=config) + XenDomain.__init__(self, config.getOpt("name"), config=config, + isManaged=isManaged) def minSafeMem(self): return arch.minSafeMem diff -r 3464bb656a9c -r 8475a4e0425e tools/xm-test/lib/XmTestLib/XenManagedDomain.py --- a/tools/xm-test/lib/XmTestLib/XenManagedDomain.py Thu Jan 18 09:54:33 2007 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,177 +0,0 @@ -#!/usr/bin/python -""" - Copyright (C) International Business Machines Corp., 2005 - Author: Stefan Berger <stefanb@xxxxxxxxxx> - - Based on XenDomain.py by Dan Smith <danms@xxxxxxxxxx> - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; under version 2 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -""" -import os -import sys -from XmTestLib import * -from xen.util.xmlrpclib2 import ServerProxy -from types import DictType - - -class XenManagedConfig: - """An object to help create a VM configuration usable via Xen-API""" - def __init__(self): - self.opts = {} - #Array to translate old option to new ones - self.opttrlate = { 'name' : 'name_label' , - 'memory' : [ 'memory_static_max' , - 'memory_static_min' , - 'memory_dynamic_min', - 'memory_dynamic_max' ], - 'kernel' : 'kernel_kernel', - 'ramdisk': 'kernel_initrd', - 'root' : 'kernel_args'} - - def setOpt(self, name, value): - """Set an option in the config""" - if name in self.opttrlate.keys(): - _name = self.opttrlate[name] - else: - _name = name - - if isinstance(_name, list): - for _n in _name: - self.opts[_n] = value - else: - self.opts[_name] = value - - def getOpt(self, name): - """Return the value of a config option""" - if name in self.opts.keys(): - return self.opts[name] - else: - return None - - def setOpts(self, opts): - """Batch-set options from a dictionary""" - for k, v in opts.items(): - self.setOpt(k, v) - - def getOpts(self): - return self.opts - - -class XenManagedDomain(XenDomain): - - def __init__(self, name=None, config=None): - if name: - self.name = name - else: - self.name = getUniqueName() - - self.config = config - self.console = None - self.netEnv = "bridge" - - self.server, self.session = xapi._connect() - server = self.server - try: - self.vm_uuid = xapi.execute(server.VM.create, self.session, - self.config.getOpts()) - xapi._VMuuids.append(self.vm_uuid) - except: - raise DomainError("Could not create VM config file for " - "managed domain.") - - #Only support PV for now. - self.type = "PV" - - def start(self, noConsole=False, startpaused=False): - #start the VM - server = self.server - if self.vm_uuid: - try: - xapi.execute(server.VM.start, self.session, self.vm_uuid, - startpaused) - except: - raise DomainError("Could not start domain") - else: - raise DomainError("VM has not UUID - VM config does not exist?") - - if self.getDomainType() == "HVM": - waitForBoot() - - if self.console and noConsole == True: - self.closeConsole() - - elif self.console and noConsole == False: - return self.console - - elif not self.console and noConsole == False: - return self.getConsole() - - def stop(self): - if self.vm_uuid: - server = self.server - xapi.execute(server.VM.hard_shutdown, self.session, self.vm_uuid) - else: - raise DomainError("VM has not UUID - VM config does not exist?") - - def destroy(self): - #Stop VM first. - self.stop() - if self.vm_uuid: - server = self.server - xapi.execute(server.VM.destroy, self.session, self.vm_uuid) - xapi._VMuuids.remove(self.vm_uuid) - else: - raise DomainError("VM has not UUID - VM config does not exist?") - - def get_uuid(self): - return self.vm_uuid - - def newDevice(self, Device, *args): - raise DomainError("No support for newDevice().") - - def removeDevice(self, id): - raise DomainError("No support for removeDevice().") - - def removeAllDevices(self, id): - raise DomainError("No support for removeAllDevices().") - - def isRunning(self): - return isDomainRunning(self.name) - - def getDevice(self, id): - raise DomainError("No support for getDevice().") - - -class XmTestManagedDomain(XenManagedDomain): - - """Create a new managed xm-test domain - @param name: The requested domain name - @param extraConfig: Additional configuration options - @param baseConfig: The initial configuration defaults to use - """ - def __init__(self, name=None, extraConfig=None, - baseConfig=arch.configDefaults): - config = XenManagedConfig() - config.setOpts(baseConfig) - if extraConfig: - config.setOpts(extraConfig) - - if name: - config.setOpt("name_label", name) - elif not config.getOpt("name_label"): - config.setOpt("name_label", getUniqueName()) - - XenManagedDomain.__init__(self, config.getOpt("name_label"), - config=config) diff -r 3464bb656a9c -r 8475a4e0425e tools/xm-test/lib/XmTestLib/Xm.py --- a/tools/xm-test/lib/XmTestLib/Xm.py Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/xm-test/lib/XmTestLib/Xm.py Thu Jan 18 15:18:07 2007 +0000 @@ -48,6 +48,8 @@ def domid(name): status, output = traceCommand("xm domid " + name); if status != 0 or "Traceback" in output: + return -1 + if output == "None": return -1 try: return int(output) diff -r 3464bb656a9c -r 8475a4e0425e tools/xm-test/lib/XmTestLib/xapi.py --- a/tools/xm-test/lib/XmTestLib/xapi.py Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/xm-test/lib/XmTestLib/xapi.py Thu Jan 18 15:18:07 2007 +0000 @@ -17,50 +17,49 @@ # Copyright (C) 2006 IBM Corporation #============================================================================ +import atexit import os import sys from XmTestLib import * -from xen.util.xmlrpclib2 import ServerProxy +from xen.xm import main as xmmain +from xen.xm import XenAPI +from xen.xm.opts import OptionError from types import DictType +import xml.dom.minidom +def get_login_pwd(): + if xmmain.serverType == xmmain.SERVER_XEN_API: + try: + login, password = xmmain.parseAuthentication() + return (login, password) + except: + raise OptionError("Configuration for login/pwd not found. " + "Need to run xapi-setup.py?") + raise OptionError("Xm configuration file not using Xen-API for " + "communication with xend.") -XAPI_DEFAULT_LOGIN = " " -XAPI_DEFAULT_PASSWORD = " " +sessions=[] -class XenAPIError(Exception): - pass - - -#A list of VMs' UUIDs that were created using vm_create -_VMuuids = [] - -#Terminate previously created managed(!) VMs and destroy their configs -def vm_destroy_all(): - server, session = _connect() - for uuid in _VMuuids: - execute(server.VM.hard_shutdown, session, uuid) - execute(server.VM.destroy , session, uuid) - - -def execute(fn, *args): - result = fn(*args) - if type(result) != DictType: - raise TypeError("Function returned object of type: %s" % - str(type(result))) - if 'Value' not in result: - raise XenAPIError(*result['ErrorDescription']) - return result['Value'] - -_initialised = False -_server = None -_session = None -def _connect(*args): - global _server, _session, _initialised - if not _initialised: - _server = ServerProxy('httpu:///var/run/xend/xen-api.sock') - login = XAPI_DEFAULT_LOGIN - password = XAPI_DEFAULT_PASSWORD - creds = (login, password) - _session = execute(_server.session.login_with_password, *creds) - _initialised = True - return (_server, _session) +def connect(*args): + try: + creds = get_login_pwd() + except Exception, e: + FAIL("%s" % str(e)) + try: + session = XenAPI.Session(xmmain.serverURI) + except: + raise OptionError("Could not create XenAPI session with Xend." \ + "URI=%s" % xmmain.serverURI) + try: + session.login_with_password(*creds) + except: + raise OptionError("Could not login to Xend. URI=%s" % xmmain.serverURI) + def logout(): + try: + for s in sessions: + s.xenapi.session.logout() + except: + pass + sessions.append(session) + atexit.register(logout) + return session diff -r 3464bb656a9c -r 8475a4e0425e tools/xm-test/ramdisk/Makefile.am --- a/tools/xm-test/ramdisk/Makefile.am Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/xm-test/ramdisk/Makefile.am Thu Jan 18 15:18:07 2007 +0000 @@ -36,7 +36,12 @@ XMTEST_VER_IMG = initrd-$(XMTEST_MAJ_VER XMTEST_VER_IMG = initrd-$(XMTEST_MAJ_VER)-$(BR_ARCH).img XMTEST_DL_IMG = $(shell echo $(XMTEST_VER_IMG) | sed -e 's/x86_64/i386/g') -EXTRA_ROOT_DIRS = sys +EXTRA_ROOT_DIRS = sys modules + +BLKDRV = /lib/modules/$(shell uname -r)/kernel/drivers/xen/blkfront/xenblk.ko +NETDRV = /lib/modules/$(shell uname -r)/kernel/drivers/xen/netfront/xennet.ko +PKTDRV = /lib/modules/$(shell uname -r)/kernel/net/packet/af_packet.ko + if HVM all: initrd.img disk.img @@ -60,7 +65,11 @@ endif $(XMTEST_VER_IMG): $(BR_IMG) chmod a+x skel/etc/init.d/rcS - (cd skel; mkdir -p $(EXTRA_ROOT_DIRS); tar cf - .) \ + cd skel && mkdir -p $(EXTRA_ROOT_DIRS) + -[ -e "$(BLKDRV)" ] && cp $(BLKDRV) skel/modules + -[ -e "$(NETDRV)" ] && cp $(NETDRV) skel/modules + -[ -e "$(PKTDRV)" ] && cp $(PKTDRV) skel/modules + (cd skel; tar cf - .) \ | (cd $(BR_SRC)/$(BR_ROOT); tar xvf -) cd $(BR_SRC) && make cp $(BR_IMG) $(XMTEST_VER_IMG) diff -r 3464bb656a9c -r 8475a4e0425e tools/xm-test/ramdisk/skel/etc/init.d/rcS --- a/tools/xm-test/ramdisk/skel/etc/init.d/rcS Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/xm-test/ramdisk/skel/etc/init.d/rcS Thu Jan 18 15:18:07 2007 +0000 @@ -6,3 +6,14 @@ if uname -r | grep -q '^2.6'; then if uname -r | grep -q '^2.6'; then mount -t sysfs none /sys fi + +# If the block, net, and packet drivers are modules, we need to load them +if test -e /modules/xenblk.ko; then + insmod /modules/xenblk.ko > /dev/null 2>&1 +fi +if test -e /modules/xennet.ko; then + insmod /modules/xennet.ko > /dev/null 2>&1 +fi +if test -e /modules/af_packet.ko; then + insmod /modules/af_packet.ko > /dev/null 2>&1 +fi diff -r 3464bb656a9c -r 8475a4e0425e tools/xm-test/runtest.sh --- a/tools/xm-test/runtest.sh Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/xm-test/runtest.sh Thu Jan 18 15:18:07 2007 +0000 @@ -16,6 +16,7 @@ usage() { echo " -r <url> : url of test results repository to use" echo " -s <report> : just submit report <report>" echo " -u : unsafe -- do not run the sanity checks before starting" + echo " -md : all created domains are xend-'managed' domains" echo " -h | --help : show this help" } @@ -218,10 +219,13 @@ unsafe=no unsafe=no GROUPENTERED=default +#Prepare for usage with ACM if [ -d /etc/xen/acm-security/policies ]; then cp -f tests/security-acm/xm-test-security_policy.xml \ /etc/xen/acm-security/policies fi + +unset XM_MANAGED_DOMAINS # Resolve options while [ $# -gt 0 ] @@ -260,6 +264,10 @@ while [ $# -gt 0 ] unsafe=yes report=no ;; + -md) + echo "(use managed domains)" + export XM_MANAGED_DOMAINS=1 + ;; -h|--help) usage exit 0 diff -r 3464bb656a9c -r 8475a4e0425e tools/xm-test/tests/sched-credit/01_sched_credit_weight_cap_pos.py --- a/tools/xm-test/tests/sched-credit/01_sched_credit_weight_cap_pos.py Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/xm-test/tests/sched-credit/01_sched_credit_weight_cap_pos.py Thu Jan 18 15:18:07 2007 +0000 @@ -2,14 +2,27 @@ # # Sched-credit tests modified from SEDF tests # + +import re + from XmTestLib import * +paramsRE = re.compile(r'^[^ ]* *[^ ]* *([^ ]*) *([^ ]*)$') + def get_sched_credit_params(domain): - status, output = traceCommand("xm sched-credit -d %s" %(domain.getName())) - params = output.strip('{}').split(', ') - cap = int(params[0].split(':')[1].strip(' ')) - weight = int(params[1].split(':')[1].strip(' ')) - return (status, (weight, cap)) + status, output = traceCommand("xm sched-credit -d %s | tail -1" % + domain.getName()) + + if status != 0: + FAIL("Getting sched-credit parameters return non-zero rv (%d)", + status) + + m = paramsRE.match(output) + if not m: + FAIL("xm sched-credit gave bad output") + weight = int(m.group(1)) + cap = int(m.group(2)) + return (weight, cap) def set_sched_credit_weight(domain, weight): status, output = traceCommand("xm sched-credit -d %s -w %d" %(domain.getName(), weight)) @@ -31,11 +44,8 @@ except DomainError, e: FAIL(str(e)) # check default param values -(status, params) = get_sched_credit_params(domain) -if status != 0: - FAIL("Getting sched-credit parameters return non-zero rv (%d)", status) +(weight, cap) = get_sched_credit_params(domain) -(weight, cap) = params if weight != 256: FAIL("default weight is 256 (got %d)", weight) if cap != 0: @@ -51,11 +61,8 @@ if status != 0: FAIL("Setting sched-credit cap return non-zero rv (%d)", status) # check new param values -(status, params) = get_sched_credit_params(domain) -if status != 0: - FAIL("Getting sched-credit parameters return non-zero rv (%d)", status) +(weight, cap) = get_sched_credit_params(domain) -(weight, cap) = params if weight != 512: FAIL("expected weight of 512 (got %d)", weight) if cap != 100: diff -r 3464bb656a9c -r 8475a4e0425e tools/xm-test/tests/vtpm/09_vtpm-xapi.py --- a/tools/xm-test/tests/vtpm/09_vtpm-xapi.py Thu Jan 18 09:54:33 2007 +0000 +++ b/tools/xm-test/tests/vtpm/09_vtpm-xapi.py Thu Jan 18 15:18:07 2007 +0000 @@ -6,71 +6,66 @@ # Test to test the vtpm class through the Xen-API from XmTestLib import xapi -from XmTestLib.XenManagedDomain import XmTestManagedDomain +from XmTestLib.XenAPIDomain import XmTestAPIDomain from XmTestLib import * from vtpm_utils import * import commands import os -def do_test(): - domain = XmTestManagedDomain() - vm_uuid = domain.get_uuid() +try: + # XmTestAPIDomain tries to establish a connection to XenD + domain = XmTestAPIDomain() +except Exception, e: + SKIP("Skipping test. Error: %s" % str(e)) +vm_uuid = domain.get_uuid() - vtpmcfg = {} - vtpmcfg['type'] = "paravirtualised" - vtpmcfg['backend'] = "Domain-0" - vtpmcfg['instance'] = 1 - vtpmcfg['VM'] = vm_uuid +vtpmcfg = {} +vtpmcfg['type'] = "paravirtualised" +vtpmcfg['backend'] = "Domain-0" +vtpmcfg['instance'] = 1 +vtpmcfg['VM'] = vm_uuid - server, session = xapi._connect() +session = xapi.connect() - vtpm_uuid = xapi.execute(server.VTPM.create, session, vtpmcfg) +vtpm_uuid = session.xenapi.VTPM.create(vtpmcfg) - vtpm_id = xapi.execute(server.VTPM.get_instance, session, vtpm_uuid) - vtpm_be = xapi.execute(server.VTPM.get_backend , session, vtpm_uuid) - if vtpm_be != vtpmcfg['backend']: - FAIL("vTPM's backend is in '%s', expected: '%s'" % - (vtpm_be, vtpmcfg['backend'])) +vtpm_id = session.xenapi.VTPM.get_instance(vtpm_uuid) +vtpm_be = session.xenapi.VTPM.get_backend(vtpm_uuid) +if vtpm_be != vtpmcfg['backend']: + FAIL("vTPM's backend is in '%s', expected: '%s'" % + (vtpm_be, vtpmcfg['backend'])) - driver = xapi.execute(server.VTPM.get_driver, session, vtpm_uuid) - if driver != vtpmcfg['type']: - FAIL("vTPM has driver type '%s', expected: '%s'" % - (driver, vtpmcfg['type'])) +driver = session.xenapi.VTPM.get_driver(vtpm_uuid) +if driver != vtpmcfg['type']: + FAIL("vTPM has driver type '%s', expected: '%s'" % + (driver, vtpmcfg['type'])) - vtpm_rec = xapi.execute(server.VTPM.get_record, session, vtpm_uuid) +vtpm_rec = session.xenapi.VTPM.get_record(vtpm_uuid) - if vtpm_rec['driver'] != vtpmcfg['type']: - FAIL("vTPM record shows driver type '%s', expected: '%s'" % - (vtpm_rec['driver'], vtpmcfg['type'])) - if vtpm_rec['uuid'] != vtpm_uuid: - FAIL("vTPM record shows vtpm uuid '%s', expected: '%s'" % - (vtpm_rec['uuid'], vtpm_uuid)) - if vtpm_rec['VM'] != vm_uuid: - FAIL("vTPM record shows VM uuid '%s', expected: '%s'" % - (vtpm_rec['VM'], vm_uuid)) +if vtpm_rec['driver'] != vtpmcfg['type']: + FAIL("vTPM record shows driver type '%s', expected: '%s'" % + (vtpm_rec['driver'], vtpmcfg['type'])) +if vtpm_rec['uuid'] != vtpm_uuid: + FAIL("vTPM record shows vtpm uuid '%s', expected: '%s'" % + (vtpm_rec['uuid'], vtpm_uuid)) +if vtpm_rec['VM'] != vm_uuid: + FAIL("vTPM record shows VM uuid '%s', expected: '%s'" % + (vtpm_rec['VM'], vm_uuid)) - success = domain.start() +success = domain.start() - console = domain.getConsole() - - try: - run = console.runCmd("cat /sys/devices/xen/vtpm-0/pcrs") - except ConsoleError, e: - saveLog(console.getHistory()) - vtpm_cleanup(domName) - FAIL("No result from dumping the PCRs") - - if re.search("No such file",run["output"]): - vtpm_cleanup(domName) - FAIL("TPM frontend support not compiled into (domU?) kernel") - - domain.stop() - domain.destroy() - - +console = domain.getConsole() try: - do_test() -finally: - #Make sure all domains are gone that were created in this test case - xapi.vm_destroy_all() + run = console.runCmd("cat /sys/devices/xen/vtpm-0/pcrs") +except ConsoleError, e: + saveLog(console.getHistory()) + vtpm_cleanup(domName) + FAIL("No result from dumping the PCRs") + +if re.search("No such file",run["output"]): + vtpm_cleanup(domName) + FAIL("TPM frontend support not compiled into (domU?) kernel") + +domain.stop() +domain.destroy() diff -r 3464bb656a9c -r 8475a4e0425e tools/xm-test/tests/xapi/01_xapi-vm_basic.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/xm-test/tests/xapi/01_xapi-vm_basic.py Thu Jan 18 15:18:07 2007 +0000 @@ -0,0 +1,61 @@ +#!/usr/bin/python + +# Copyright (C) International Business Machines Corp., 2006 +# Author: Stefan Berger <stefanb@xxxxxxxxxx> + +# Basic VM creation test + +from XmTestLib import xapi +from XmTestLib.XenAPIDomain import XmTestAPIDomain +from XmTestLib import * +from xen.xend import XendAPIConstants +import commands +import os + +try: + # XmTestAPIDomain tries to establish a connection to XenD + domain = XmTestAPIDomain() +except Exception, e: + SKIP("Skipping test. Error: %s" % str(e)) +vm_uuid = domain.get_uuid() + +session = xapi.connect() + +domain.start(startpaused=True) + +res = session.xenapi.VM.get_power_state(vm_uuid) + +if res != XendAPIConstants.XEN_API_VM_POWER_STATE[XendAPIConstants.XEN_API_VM_POWER_STATE_PAUSED]: + FAIL("VM was not started in 'paused' state") + +res = session.xenapi.VM.unpause(vm_uuid) + +res = session.xenapi.VM.get_power_state(vm_uuid) + +if res != XendAPIConstants.XEN_API_VM_POWER_STATE[XendAPIConstants.XEN_API_VM_POWER_STATE_RUNNING]: + FAIL("VM could not be put into 'running' state") + +console = domain.getConsole() + +try: + run = console.runCmd("cat /proc/interrupts") +except ConsoleError, e: + saveLog(console.getHistory()) + FAIL("Could not access proc-filesystem") + +res = session.xenapi.VM.pause(vm_uuid) + +res = session.xenapi.VM.get_power_state(vm_uuid) + +if res != XendAPIConstants.XEN_API_VM_POWER_STATE[XendAPIConstants.XEN_API_VM_POWER_STATE_PAUSED]: + FAIL("VM could not be put into 'paused' state") + +res = session.xenapi.VM.unpause(vm_uuid) + +res = session.xenapi.VM.get_power_state(vm_uuid) + +if res != XendAPIConstants.XEN_API_VM_POWER_STATE[XendAPIConstants.XEN_API_VM_POWER_STATE_RUNNING]: + FAIL("VM could not be 'unpaused'") + +domain.stop() +domain.destroy() diff -r 3464bb656a9c -r 8475a4e0425e tools/xm-test/tests/xapi/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/xm-test/tests/xapi/Makefile.am Thu Jan 18 15:18:07 2007 +0000 @@ -0,0 +1,19 @@ +SUBDIRS = + +TESTS = 01_xapi-vm_basic.test + +XFAIL_TESTS = + +EXTRA_DIST = $(TESTS) $(XFAIL_TESTS) xapi_utils.py +TESTS_ENVIRONMENT=@TENV@ + +%.test: %.py + cp $< $@ + chmod +x $@ + +clean-local: am_config_clean-local + +am_config_clean-local: + rm -f *test + rm -f *log + rm -f *~ diff -r 3464bb656a9c -r 8475a4e0425e unmodified_drivers/linux-2.6/platform-pci/platform-pci.c --- a/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c Thu Jan 18 09:54:33 2007 +0000 +++ b/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c Thu Jan 18 15:18:07 2007 +0000 @@ -179,7 +179,7 @@ static int get_hypercall_stubs(void) #define get_hypercall_stubs() (0) #endif -static int get_callback_irq(struct pci_dev *pdev) +static uint64_t get_callback_via(struct pci_dev *pdev) { #ifdef __ia64__ int irq; @@ -189,16 +189,24 @@ static int get_callback_irq(struct pci_d } return 0; #else /* !__ia64__ */ - return pdev->irq; + if (pdev->irq < 16) + return pdev->irq; /* ISA IRQ */ + /* We don't know the GSI. Specify the PCI INTx line instead. */ + return (((uint64_t)0x01 << 56) | /* PCI INTx identifier */ + ((uint64_t)pci_domain_nr(pdev->bus) << 32) | + ((uint64_t)pdev->bus->number << 16) | + ((uint64_t)(pdev->devfn & 0xff) << 8) | + ((uint64_t)(pdev->pin - 1) & 3)); #endif } static int __devinit platform_pci_init(struct pci_dev *pdev, const struct pci_device_id *ent) { - int i, ret, callback_irq; + int i, ret; long ioaddr, iolen; long mmio_addr, mmio_len; + uint64_t callback_via; i = pci_enable_device(pdev); if (i) @@ -210,9 +218,9 @@ static int __devinit platform_pci_init(s mmio_addr = pci_resource_start(pdev, 1); mmio_len = pci_resource_len(pdev, 1); - callback_irq = get_callback_irq(pdev); - - if (mmio_addr == 0 || ioaddr == 0 || callback_irq == 0) { + callback_via = get_callback_via(pdev); + + if (mmio_addr == 0 || ioaddr == 0 || callback_via == 0) { printk(KERN_WARNING DRV_NAME ":no resources found\n"); return -ENOENT; } @@ -242,12 +250,12 @@ static int __devinit platform_pci_init(s if ((ret = init_xen_info())) goto out; - if ((ret = request_irq(pdev->irq, evtchn_interrupt, SA_SHIRQ, - "xen-platform-pci", pdev))) { - goto out; - } - - if ((ret = set_callback_irq(callback_irq))) + if ((ret = request_irq(pdev->irq, evtchn_interrupt, + SA_SHIRQ | SA_SAMPLE_RANDOM, + "xen-platform-pci", pdev))) + goto out; + + if ((ret = set_callback_via(callback_via))) goto out; out: @@ -297,7 +305,7 @@ static void __exit platform_pci_module_c { printk(KERN_INFO DRV_NAME ":Do platform module cleanup\n"); /* disable hypervisor for callback irq */ - set_callback_irq(0); + set_callback_via(0); if (pci_device_registered) pci_unregister_driver(&platform_driver); } diff -r 3464bb656a9c -r 8475a4e0425e unmodified_drivers/linux-2.6/platform-pci/platform-pci.h --- a/unmodified_drivers/linux-2.6/platform-pci/platform-pci.h Thu Jan 18 09:54:33 2007 +0000 +++ b/unmodified_drivers/linux-2.6/platform-pci/platform-pci.h Thu Jan 18 15:18:07 2007 +0000 @@ -24,13 +24,13 @@ #include <linux/interrupt.h> #include <xen/interface/hvm/params.h> -static inline int set_callback_irq(int irq) +static inline int set_callback_via(uint64_t via) { struct xen_hvm_param a; a.domid = DOMID_SELF; a.index = HVM_PARAM_CALLBACK_IRQ; - a.value = irq; + a.value = via; return HYPERVISOR_hvm_op(HVMOP_set_param, &a); } diff -r 3464bb656a9c -r 8475a4e0425e xen/arch/ia64/linux-xen/unaligned.c --- a/xen/arch/ia64/linux-xen/unaligned.c Thu Jan 18 09:54:33 2007 +0000 +++ b/xen/arch/ia64/linux-xen/unaligned.c Thu Jan 18 15:18:07 2007 +0000 @@ -24,7 +24,7 @@ #include <asm/uaccess.h> #include <asm/unaligned.h> -extern void die_if_kernel(char *str, struct pt_regs *regs, long err) __attribute__ ((noreturn)); +extern void die_if_kernel(char *str, struct pt_regs *regs, long err); #undef DEBUG_UNALIGNED_TRAP diff -r 3464bb656a9c -r 8475a4e0425e xen/arch/ia64/xen/xenmisc.c --- a/xen/arch/ia64/xen/xenmisc.c Thu Jan 18 09:54:33 2007 +0000 +++ b/xen/arch/ia64/xen/xenmisc.c Thu Jan 18 15:18:07 2007 +0000 @@ -77,7 +77,7 @@ void console_print(char *msg) // called from unaligned.c //////////////////////////////////// -void die_if_kernel(char *str, struct pt_regs *regs, long err) /* __attribute__ ((noreturn)) */ +void die_if_kernel(char *str, struct pt_regs *regs, long err) { if (user_mode(regs)) return; @@ -88,7 +88,7 @@ void die_if_kernel(char *str, struct pt_ domain_crash_synchronous(); } -void vmx_die_if_kernel(char *str, struct pt_regs *regs, long err) /* __attribute__ ((noreturn)) */ +void vmx_die_if_kernel(char *str, struct pt_regs *regs, long err) { if (vmx_user_mode(regs)) return; diff -r 3464bb656a9c -r 8475a4e0425e xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Thu Jan 18 09:54:33 2007 +0000 +++ b/xen/arch/x86/domain.c Thu Jan 18 15:18:07 2007 +0000 @@ -1047,7 +1047,7 @@ void context_switch(struct vcpu *prev, s local_irq_disable(); - if ( is_hvm_vcpu(prev) ) + if ( is_hvm_vcpu(prev) && !list_empty(&prev->arch.hvm_vcpu.tm_list) ) pt_freeze_time(prev); set_current(next); diff -r 3464bb656a9c -r 8475a4e0425e xen/arch/x86/hvm/hpet.c --- a/xen/arch/x86/hvm/hpet.c Thu Jan 18 09:54:33 2007 +0000 +++ b/xen/arch/x86/hvm/hpet.c Thu Jan 18 15:18:07 2007 +0000 @@ -356,8 +356,6 @@ static void hpet_timer_fn(void *opaque) } set_timer(&h->timers[tn], NOW() + hpet_tick_to_ns(h, h->period[tn])); } - - vcpu_kick(h->vcpu); } void hpet_migrate_timers(struct vcpu *v) diff -r 3464bb656a9c -r 8475a4e0425e xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Thu Jan 18 09:54:33 2007 +0000 +++ b/xen/arch/x86/hvm/hvm.c Thu Jan 18 15:18:07 2007 +0000 @@ -800,7 +800,7 @@ long do_hvm_op(unsigned long op, XEN_GUE d->arch.hvm_domain.buffered_io_va = (unsigned long)p; break; case HVM_PARAM_CALLBACK_IRQ: - hvm_set_callback_gsi(d, a.value); + hvm_set_callback_via(d, a.value); break; } d->arch.hvm_domain.params[a.index] = a.value; diff -r 3464bb656a9c -r 8475a4e0425e xen/arch/x86/hvm/i8254.c --- a/xen/arch/x86/hvm/i8254.c Thu Jan 18 09:54:33 2007 +0000 +++ b/xen/arch/x86/hvm/i8254.c Thu Jan 18 15:18:07 2007 +0000 @@ -182,11 +182,9 @@ void pit_time_fired(struct vcpu *v, void s->count_load_time = hvm_get_guest_time(v); } -static inline void pit_load_count(PITChannelState *s, int val) +static inline void pit_load_count(PITChannelState *s, int channel, int val) { u32 period; - PITChannelState *ch0 = - ¤t->domain->arch.hvm_domain.pl_time.vpit.channels[0]; if (val == 0) val = 0x10000; @@ -194,7 +192,7 @@ static inline void pit_load_count(PITCha s->count = val; period = DIV_ROUND((val * 1000000000ULL), PIT_FREQ); - if (s != ch0) + if (channel != 0) return; #ifdef DEBUG_PIT @@ -282,17 +280,17 @@ static void pit_ioport_write(void *opaqu switch(s->write_state) { default: case RW_STATE_LSB: - pit_load_count(s, val); + pit_load_count(s, addr, val); break; case RW_STATE_MSB: - pit_load_count(s, val << 8); + pit_load_count(s, addr, val << 8); break; case RW_STATE_WORD0: s->write_latch = val; s->write_state = RW_STATE_WORD1; break; case RW_STATE_WORD1: - pit_load_count(s, s->write_latch | (val << 8)); + pit_load_count(s, addr, s->write_latch | (val << 8)); s->write_state = RW_STATE_WORD0; break; } @@ -369,7 +367,7 @@ static void pit_reset(void *opaque) destroy_periodic_time(&s->pt); s->mode = 0xff; /* the init mode */ s->gate = (i != 2); - pit_load_count(s, 0); + pit_load_count(s, i, 0); } } diff -r 3464bb656a9c -r 8475a4e0425e xen/arch/x86/hvm/irq.c --- a/xen/arch/x86/hvm/irq.c Thu Jan 18 09:54:33 2007 +0000 +++ b/xen/arch/x86/hvm/irq.c Thu Jan 18 15:18:07 2007 +0000 @@ -25,7 +25,7 @@ #include <xen/sched.h> #include <asm/hvm/domain.h> -void hvm_pci_intx_assert( +static void __hvm_pci_intx_assert( struct domain *d, unsigned int device, unsigned int intx) { struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq; @@ -33,10 +33,8 @@ void hvm_pci_intx_assert( ASSERT((device <= 31) && (intx <= 3)); - spin_lock(&hvm_irq->lock); - if ( __test_and_set_bit(device*4 + intx, &hvm_irq->pci_intx) ) - goto out; + return; gsi = hvm_pci_intx_gsi(device, intx); if ( hvm_irq->gsi_assert_count[gsi]++ == 0 ) @@ -50,12 +48,19 @@ void hvm_pci_intx_assert( vioapic_irq_positive_edge(d, isa_irq); vpic_irq_positive_edge(d, isa_irq); } - - out: - spin_unlock(&hvm_irq->lock); -} - -void hvm_pci_intx_deassert( +} + +void hvm_pci_intx_assert( + struct domain *d, unsigned int device, unsigned int intx) +{ + struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq; + + spin_lock(&hvm_irq->lock); + __hvm_pci_intx_assert(d, device, intx); + spin_unlock(&hvm_irq->lock); +} + +static void __hvm_pci_intx_deassert( struct domain *d, unsigned int device, unsigned int intx) { struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq; @@ -63,10 +68,8 @@ void hvm_pci_intx_deassert( ASSERT((device <= 31) && (intx <= 3)); - spin_lock(&hvm_irq->lock); - if ( !__test_and_clear_bit(device*4 + intx, &hvm_irq->pci_intx) ) - goto out; + return; gsi = hvm_pci_intx_gsi(device, intx); --hvm_irq->gsi_assert_count[gsi]; @@ -76,8 +79,15 @@ void hvm_pci_intx_deassert( if ( (--hvm_irq->pci_link_assert_count[link] == 0) && isa_irq && (--hvm_irq->gsi_assert_count[isa_irq] == 0) ) vpic_irq_negative_edge(d, isa_irq); - - out: +} + +void hvm_pci_intx_deassert( + struct domain *d, unsigned int device, unsigned int intx) +{ + struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq; + + spin_lock(&hvm_irq->lock); + __hvm_pci_intx_deassert(d, device, intx); spin_unlock(&hvm_irq->lock); } @@ -123,36 +133,47 @@ void hvm_set_callback_irq_level(void) struct vcpu *v = current; struct domain *d = v->domain; struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq; - unsigned int gsi = hvm_irq->callback_gsi; + unsigned int gsi, pdev, pintx, asserted; /* Fast lock-free tests. */ - if ( (v->vcpu_id != 0) || (gsi == 0) ) + if ( (v->vcpu_id != 0) || + (hvm_irq->callback_via_type == HVMIRQ_callback_none) ) return; spin_lock(&hvm_irq->lock); - gsi = hvm_irq->callback_gsi; - if ( gsi == 0 ) + /* NB. Do not check the evtchn_upcall_mask. It is not used in HVM mode. */ + asserted = !!vcpu_info(v, evtchn_upcall_pending); + if ( hvm_irq->callback_via_asserted == asserted ) goto out; - - if ( local_events_need_delivery() ) - { - if ( !__test_and_set_bit(0, &hvm_irq->callback_irq_wire) && - (hvm_irq->gsi_assert_count[gsi]++ == 0) ) + hvm_irq->callback_via_asserted = asserted; + + /* Callback status has changed. Update the callback via. */ + switch ( hvm_irq->callback_via_type ) + { + case HVMIRQ_callback_gsi: + gsi = hvm_irq->callback_via.gsi; + if ( asserted && (hvm_irq->gsi_assert_count[gsi]++ == 0) ) { vioapic_irq_positive_edge(d, gsi); if ( gsi <= 15 ) vpic_irq_positive_edge(d, gsi); } - } - else - { - if ( __test_and_clear_bit(0, &hvm_irq->callback_irq_wire) && - (--hvm_irq->gsi_assert_count[gsi] == 0) ) + else if ( !asserted && (--hvm_irq->gsi_assert_count[gsi] == 0) ) { if ( gsi <= 15 ) vpic_irq_negative_edge(d, gsi); } + break; + case HVMIRQ_callback_pci_intx: + pdev = hvm_irq->callback_via.pci.dev; + pintx = hvm_irq->callback_via.pci.intx; + if ( asserted ) + __hvm_pci_intx_assert(d, pdev, pintx); + else + __hvm_pci_intx_deassert(d, pdev, pintx); + default: + break; } out: @@ -192,40 +213,79 @@ void hvm_set_pci_link_route(struct domai d->domain_id, link, old_isa_irq, isa_irq); } -void hvm_set_callback_gsi(struct domain *d, unsigned int gsi) -{ - struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq; - unsigned int old_gsi; - - if ( gsi >= ARRAY_SIZE(hvm_irq->gsi_assert_count) ) - gsi = 0; - - spin_lock(&hvm_irq->lock); - - old_gsi = hvm_irq->callback_gsi; - if ( old_gsi == gsi ) - goto out; - hvm_irq->callback_gsi = gsi; - - if ( !test_bit(0, &hvm_irq->callback_irq_wire) ) - goto out; - - if ( old_gsi && (--hvm_irq->gsi_assert_count[old_gsi] == 0) ) - if ( old_gsi <= 15 ) - vpic_irq_negative_edge(d, old_gsi); - - if ( gsi && (hvm_irq->gsi_assert_count[gsi]++ == 0) ) - { - vioapic_irq_positive_edge(d, gsi); - if ( gsi <= 15 ) - vpic_irq_positive_edge(d, gsi); - } - - out: - spin_unlock(&hvm_irq->lock); - - dprintk(XENLOG_G_INFO, "Dom%u callback GSI changed %u -> %u\n", - d->domain_id, old_gsi, gsi); +void hvm_set_callback_via(struct domain *d, uint64_t via) +{ + struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq; + unsigned int gsi=0, pdev=0, pintx=0; + uint8_t via_type; + + via_type = (uint8_t)(via >> 56) + 1; + if ( ((via_type == HVMIRQ_callback_gsi) && (via == 0)) || + (via_type > HVMIRQ_callback_pci_intx) ) + via_type = HVMIRQ_callback_none; + + spin_lock(&hvm_irq->lock); + + /* Tear down old callback via. */ + if ( hvm_irq->callback_via_asserted ) + { + switch ( hvm_irq->callback_via_type ) + { + case HVMIRQ_callback_gsi: + gsi = hvm_irq->callback_via.gsi; + if ( (--hvm_irq->gsi_assert_count[gsi] == 0) && (gsi <= 15) ) + vpic_irq_negative_edge(d, gsi); + break; + case HVMIRQ_callback_pci_intx: + pdev = hvm_irq->callback_via.pci.dev; + pintx = hvm_irq->callback_via.pci.intx; + __hvm_pci_intx_deassert(d, pdev, pintx); + break; + default: + break; + } + } + + /* Set up new callback via. */ + switch ( hvm_irq->callback_via_type = via_type ) + { + case HVMIRQ_callback_gsi: + gsi = hvm_irq->callback_via.gsi = (uint8_t)via; + if ( (gsi == 0) || (gsi >= ARRAY_SIZE(hvm_irq->gsi_assert_count)) ) + hvm_irq->callback_via_type = HVMIRQ_callback_none; + else if ( hvm_irq->callback_via_asserted && + (hvm_irq->gsi_assert_count[gsi]++ == 0) ) + { + vioapic_irq_positive_edge(d, gsi); + if ( gsi <= 15 ) + vpic_irq_positive_edge(d, gsi); + } + break; + case HVMIRQ_callback_pci_intx: + pdev = hvm_irq->callback_via.pci.dev = (uint8_t)(via >> 11) & 31; + pintx = hvm_irq->callback_via.pci.intx = (uint8_t)via & 3; + if ( hvm_irq->callback_via_asserted ) + __hvm_pci_intx_assert(d, pdev, pintx); + break; + default: + break; + } + + spin_unlock(&hvm_irq->lock); + + dprintk(XENLOG_G_INFO, "Dom%u callback via changed to ", d->domain_id); + switch ( via_type ) + { + case HVMIRQ_callback_gsi: + printk("GSI %u\n", gsi); + break; + case HVMIRQ_callback_pci_intx: + printk("PCI INTx Dev 0x%02x Int%c\n", pdev, 'A' + pintx); + break; + default: + printk("None\n"); + break; + } } int cpu_has_pending_irq(struct vcpu *v) diff -r 3464bb656a9c -r 8475a4e0425e xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Thu Jan 18 09:54:33 2007 +0000 +++ b/xen/arch/x86/hvm/svm/svm.c Thu Jan 18 15:18:07 2007 +0000 @@ -482,8 +482,8 @@ static int svm_guest_x86_mode(struct vcp { struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - if ( vmcb->efer & EFER_LMA ) - return (vmcb->cs.attr.fields.l ? 8 : 4); + if ( (vmcb->efer & EFER_LMA) && vmcb->cs.attr.fields.l ) + return 8; if ( svm_realmode(v) ) return 2; diff -r 3464bb656a9c -r 8475a4e0425e xen/arch/x86/hvm/vioapic.c --- a/xen/arch/x86/hvm/vioapic.c Thu Jan 18 09:54:33 2007 +0000 +++ b/xen/arch/x86/hvm/vioapic.c Thu Jan 18 15:18:07 2007 +0000 @@ -309,6 +309,13 @@ static uint32_t ioapic_get_delivery_bitm return mask; } +static inline int pit_channel0_enabled(void) +{ + PITState *pit = ¤t->domain->arch.hvm_domain.pl_time.vpit; + struct periodic_time *pt = &pit->channels[0].pt; + return pt->enabled; +} + static void vioapic_deliver(struct vioapic *vioapic, int irq) { uint16_t dest = vioapic->redirtbl[irq].fields.dest_id; @@ -341,7 +348,7 @@ static void vioapic_deliver(struct vioap { #ifdef IRQ0_SPECIAL_ROUTING /* Force round-robin to pick VCPU 0 */ - if ( irq == hvm_isa_irq_to_gsi(0) ) + if ( (irq == hvm_isa_irq_to_gsi(0)) && pit_channel0_enabled() ) { v = vioapic_domain(vioapic)->vcpu[0]; target = v ? vcpu_vlapic(v) : NULL; @@ -374,7 +381,7 @@ static void vioapic_deliver(struct vioap deliver_bitmask &= ~(1 << bit); #ifdef IRQ0_SPECIAL_ROUTING /* Do not deliver timer interrupts to VCPU != 0 */ - if ( irq == hvm_isa_irq_to_gsi(0) ) + if ( (irq == hvm_isa_irq_to_gsi(0)) && pit_channel0_enabled() ) v = vioapic_domain(vioapic)->vcpu[0]; else #endif diff -r 3464bb656a9c -r 8475a4e0425e xen/arch/x86/hvm/vmx/vmcs.c --- a/xen/arch/x86/hvm/vmx/vmcs.c Thu Jan 18 09:54:33 2007 +0000 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Thu Jan 18 15:18:07 2007 +0000 @@ -278,7 +278,14 @@ static void vmx_set_host_env(struct vcpu host_env.tr_base = (unsigned long) &init_tss[cpu]; __vmwrite(HOST_TR_SELECTOR, host_env.tr_selector); __vmwrite(HOST_TR_BASE, host_env.tr_base); - __vmwrite(HOST_RSP, (unsigned long)get_stack_bottom()); + + /* + * Skip end of cpu_user_regs when entering the hypervisor because the + * CPU does not save context onto the stack. SS,RSP,CS,RIP,RFLAGS,etc + * all get saved into the VMCS instead. + */ + __vmwrite(HOST_RSP, + (unsigned long)&get_cpu_info()->guest_cpu_user_regs.error_code); } static void construct_vmcs(struct vcpu *v) diff -r 3464bb656a9c -r 8475a4e0425e xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Thu Jan 18 09:54:33 2007 +0000 +++ b/xen/arch/x86/hvm/vmx/vmx.c Thu Jan 18 15:18:07 2007 +0000 @@ -410,10 +410,6 @@ static void vmx_store_cpu_guest_regs( regs->eflags = __vmread(GUEST_RFLAGS); regs->ss = __vmread(GUEST_SS_SELECTOR); regs->cs = __vmread(GUEST_CS_SELECTOR); - regs->ds = __vmread(GUEST_DS_SELECTOR); - regs->es = __vmread(GUEST_ES_SELECTOR); - regs->gs = __vmread(GUEST_GS_SELECTOR); - regs->fs = __vmread(GUEST_FS_SELECTOR); regs->eip = __vmread(GUEST_RIP); regs->esp = __vmread(GUEST_RSP); } @@ -429,62 +425,39 @@ static void vmx_store_cpu_guest_regs( vmx_vmcs_exit(v); } -/* - * The VMX spec (section 4.3.1.2, Checks on Guest Segment - * Registers) says that virtual-8086 mode guests' segment - * base-address fields in the VMCS must be equal to their - * corresponding segment selector field shifted right by - * four bits upon vmentry. - * - * This function (called only for VM86-mode guests) fixes - * the bases to be consistent with the selectors in regs - * if they're not already. Without this, we can fail the - * vmentry check mentioned above. - */ -static void fixup_vm86_seg_bases(struct cpu_user_regs *regs) +static void vmx_load_cpu_guest_regs(struct vcpu *v, struct cpu_user_regs *regs) { unsigned long base; - base = __vmread(GUEST_ES_BASE); - if (regs->es << 4 != base) - __vmwrite(GUEST_ES_BASE, regs->es << 4); - base = __vmread(GUEST_CS_BASE); - if (regs->cs << 4 != base) - __vmwrite(GUEST_CS_BASE, regs->cs << 4); - base = __vmread(GUEST_SS_BASE); - if (regs->ss << 4 != base) - __vmwrite(GUEST_SS_BASE, regs->ss << 4); - base = __vmread(GUEST_DS_BASE); - if (regs->ds << 4 != base) - __vmwrite(GUEST_DS_BASE, regs->ds << 4); - base = __vmread(GUEST_FS_BASE); - if (regs->fs << 4 != base) - __vmwrite(GUEST_FS_BASE, regs->fs << 4); - base = __vmread(GUEST_GS_BASE); - if (regs->gs << 4 != base) - __vmwrite(GUEST_GS_BASE, regs->gs << 4); -} - -static void vmx_load_cpu_guest_regs(struct vcpu *v, struct cpu_user_regs *regs) -{ vmx_vmcs_enter(v); __vmwrite(GUEST_SS_SELECTOR, regs->ss); - __vmwrite(GUEST_DS_SELECTOR, regs->ds); - __vmwrite(GUEST_ES_SELECTOR, regs->es); - __vmwrite(GUEST_GS_SELECTOR, regs->gs); - __vmwrite(GUEST_FS_SELECTOR, regs->fs); - __vmwrite(GUEST_RSP, regs->esp); /* NB. Bit 1 of RFLAGS must be set for VMENTRY to succeed. */ __vmwrite(GUEST_RFLAGS, regs->eflags | 2UL); - if (regs->eflags & EF_TF) + + if ( regs->eflags & EF_TF ) __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB); else __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB); - if (regs->eflags & EF_VM) - fixup_vm86_seg_bases(regs); + + if ( regs->eflags & EF_VM ) + { + /* + * The VMX spec (section 4.3.1.2, Checks on Guest Segment + * Registers) says that virtual-8086 mode guests' segment + * base-address fields in the VMCS must be equal to their + * corresponding segment selector field shifted right by + * four bits upon vmentry. + */ + base = __vmread(GUEST_CS_BASE); + if ( (regs->cs << 4) != base ) + __vmwrite(GUEST_CS_BASE, regs->cs << 4); + base = __vmread(GUEST_SS_BASE); + if ( (regs->ss << 4) != base ) + __vmwrite(GUEST_SS_BASE, regs->ss << 4); + } __vmwrite(GUEST_CS_SELECTOR, regs->cs); __vmwrite(GUEST_RIP, regs->eip); @@ -518,8 +491,7 @@ static unsigned long vmx_get_segment_bas ASSERT(v == current); #ifdef __x86_64__ - if ( vmx_long_mode_enabled(v) && - (__vmread(GUEST_CS_AR_BYTES) & (1u<<13)) ) + if ( vmx_long_mode_enabled(v) && (__vmread(GUEST_CS_AR_BYTES) & (1u<<13)) ) long_mode = 1; #endif @@ -694,8 +666,8 @@ static int vmx_guest_x86_mode(struct vcp cs_ar_bytes = __vmread(GUEST_CS_AR_BYTES); - if ( vmx_long_mode_enabled(v) ) - return ((cs_ar_bytes & (1u<<13)) ? 8 : 4); + if ( vmx_long_mode_enabled(v) && (cs_ar_bytes & (1u<<13)) ) + return 8; if ( vmx_realmode(v) ) return 2; @@ -2251,47 +2223,54 @@ static void vmx_reflect_exception(struct } } +static void vmx_failed_vmentry(unsigned int exit_reason) +{ + unsigned int failed_vmentry_reason = (uint16_t)exit_reason; + unsigned long exit_qualification; + + exit_qualification = __vmread(EXIT_QUALIFICATION); + printk("Failed vm entry (exit reason 0x%x) ", exit_reason); + switch ( failed_vmentry_reason ) + { + case EXIT_REASON_INVALID_GUEST_STATE: + printk("caused by invalid guest state (%ld).\n", exit_qualification); + break; + case EXIT_REASON_MSR_LOADING: + printk("caused by MSR entry %ld loading.\n", exit_qualification); + break; + case EXIT_REASON_MACHINE_CHECK: + printk("caused by machine check.\n"); + break; + default: + printk("reason not known yet!"); + break; + } + + printk("************* VMCS Area **************\n"); + vmcs_dump_vcpu(); + printk("**************************************\n"); + + domain_crash(current->domain); +} + asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs) { unsigned int exit_reason; unsigned long exit_qualification, inst_len = 0; struct vcpu *v = current; + TRACE_3D(TRC_VMX_VMEXIT + v->vcpu_id, 0, 0, 0); + exit_reason = __vmread(VM_EXIT_REASON); perfc_incra(vmexits, exit_reason); + TRACE_VMEXIT(0, exit_reason); if ( exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT ) local_irq_enable(); if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) ) - { - unsigned int failed_vmentry_reason = exit_reason & 0xFFFF; - - exit_qualification = __vmread(EXIT_QUALIFICATION); - printk("Failed vm entry (exit reason 0x%x) ", exit_reason); - switch ( failed_vmentry_reason ) { - case EXIT_REASON_INVALID_GUEST_STATE: - printk("caused by invalid guest state (%ld).\n", exit_qualification); - break; - case EXIT_REASON_MSR_LOADING: - printk("caused by MSR entry %ld loading.\n", exit_qualification); - break; - case EXIT_REASON_MACHINE_CHECK: - printk("caused by machine check.\n"); - break; - default: - printk("reason not known yet!"); - break; - } - - printk("************* VMCS Area **************\n"); - vmcs_dump_vcpu(); - printk("**************************************\n"); - goto exit_and_crash; - } - - TRACE_VMEXIT(0, exit_reason); + return vmx_failed_vmentry(exit_reason); switch ( exit_reason ) { @@ -2519,11 +2498,6 @@ asmlinkage void vmx_trace_vmentry(void) TRACE_VMEXIT(4, 0); } -asmlinkage void vmx_trace_vmexit (void) -{ - TRACE_3D(TRC_VMX_VMEXIT + current->vcpu_id, 0, 0, 0); -} - /* * Local variables: * mode: C diff -r 3464bb656a9c -r 8475a4e0425e xen/arch/x86/hvm/vmx/x86_32/exits.S --- a/xen/arch/x86/hvm/vmx/x86_32/exits.S Thu Jan 18 09:54:33 2007 +0000 +++ b/xen/arch/x86/hvm/vmx/x86_32/exits.S Thu Jan 18 15:18:07 2007 +0000 @@ -29,35 +29,7 @@ andl $~3,reg; \ movl (reg),reg; -/* - * At VMExit time the processor saves the guest selectors, esp, eip, - * and eflags. Therefore we don't save them, but simply decrement - * the kernel stack pointer to make it consistent with the stack frame - * at usual interruption time. The eflags of the host is not saved by VMX, - * and we set it to the fixed value. - * - * We also need the room, especially because orig_eax field is used - * by do_IRQ(). Compared the cpu_user_regs, we skip pushing for the following: - * (10) u32 gs; - * (9) u32 fs; - * (8) u32 ds; - * (7) u32 es; - * <- get_stack_bottom() (= HOST_ESP) - * (6) u32 ss; - * (5) u32 esp; - * (4) u32 eflags; - * (3) u32 cs; - * (2) u32 eip; - * (2/1) u16 entry_vector; - * (1/1) u16 error_code; - * However, get_stack_bottom() actually returns 20 bytes before the real - * bottom of the stack to allow space for: - * domain pointer, DS, ES, FS, GS. Therefore, we effectively skip 6 registers. - */ - -#define NR_SKIPPED_REGS 6 /* See the above explanation */ #define HVM_SAVE_ALL_NOSEGREGS \ - subl $(NR_SKIPPED_REGS*4), %esp; \ movl $0, 0xc(%esp); /* XXX why do we need to force eflags==0 ?? */ \ pushl %eax; \ pushl %ebp; \ @@ -74,14 +46,11 @@ popl %esi; \ popl %edi; \ popl %ebp; \ - popl %eax; \ - addl $(NR_SKIPPED_REGS*4), %esp + popl %eax ALIGN ENTRY(vmx_asm_vmexit_handler) - /* selectors are restored/saved by VMX */ HVM_SAVE_ALL_NOSEGREGS - call vmx_trace_vmexit movl %esp,%eax push %eax call vmx_vmexit_handler diff -r 3464bb656a9c -r 8475a4e0425e xen/arch/x86/hvm/vmx/x86_64/exits.S --- a/xen/arch/x86/hvm/vmx/x86_64/exits.S Thu Jan 18 09:54:33 2007 +0000 +++ b/xen/arch/x86/hvm/vmx/x86_64/exits.S Thu Jan 18 15:18:07 2007 +0000 @@ -29,31 +29,7 @@ andq $~7,reg; \ movq (reg),reg; -/* - * At VMExit time the processor saves the guest selectors, rsp, rip, - * and rflags. Therefore we don't save them, but simply decrement - * the kernel stack pointer to make it consistent with the stack frame - * at usual interruption time. The rflags of the host is not saved by VMX, - * and we set it to the fixed value. - * - * We also need the room, especially because orig_eax field is used - * by do_IRQ(). Compared the cpu_user_regs, we skip pushing for the following: - * (10) u64 gs; - * (9) u64 fs; - * (8) u64 ds; - * (7) u64 es; - * <- get_stack_bottom() (= HOST_ESP) - * (6) u64 ss; - * (5) u64 rsp; - * (4) u64 rflags; - * (3) u64 cs; - * (2) u64 rip; - * (2/1) u32 entry_vector; - * (1/1) u32 error_code; - */ -#define NR_SKIPPED_REGS 6 /* See the above explanation */ #define HVM_SAVE_ALL_NOSEGREGS \ - subq $(NR_SKIPPED_REGS*8), %rsp; \ pushq %rdi; \ pushq %rsi; \ pushq %rdx; \ @@ -85,14 +61,11 @@ popq %rcx; \ popq %rdx; \ popq %rsi; \ - popq %rdi; \ - addq $(NR_SKIPPED_REGS*8), %rsp; + popq %rdi ALIGN ENTRY(vmx_asm_vmexit_handler) - /* selectors are restored/saved by VMX */ HVM_SAVE_ALL_NOSEGREGS - call vmx_trace_vmexit movq %rsp,%rdi call vmx_vmexit_handler jmp vmx_asm_do_vmentry diff -r 3464bb656a9c -r 8475a4e0425e xen/arch/x86/microcode.c --- a/xen/arch/x86/microcode.c Thu Jan 18 09:54:33 2007 +0000 +++ b/xen/arch/x86/microcode.c Thu Jan 18 15:18:07 2007 +0000 @@ -249,14 +249,14 @@ static int find_matching_ucodes (void) } total_size = get_totalsize(&mc_header); - if ((cursor + total_size > user_buffer_size) || (total_size < DEFAULT_UCODE_TOTALSIZE)) { + if (cursor + total_size > user_buffer_size) { printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); error = -EINVAL; goto out; } data_size = get_datasize(&mc_header); - if ((data_size + MC_HEADER_SIZE > total_size) || (data_size < DEFAULT_UCODE_DATASIZE)) { + if (data_size + MC_HEADER_SIZE > total_size) { printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); error = -EINVAL; goto out; @@ -459,11 +459,6 @@ int microcode_update(XEN_GUEST_HANDLE(vo { int ret; - if (len < DEFAULT_UCODE_TOTALSIZE) { - printk(KERN_ERR "microcode: not enough data\n"); - return -EINVAL; - } - if (len != (typeof(user_buffer_size))len) { printk(KERN_ERR "microcode: too much data\n"); return -E2BIG; diff -r 3464bb656a9c -r 8475a4e0425e xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Thu Jan 18 09:54:33 2007 +0000 +++ b/xen/arch/x86/mm.c Thu Jan 18 15:18:07 2007 +0000 @@ -154,6 +154,15 @@ l2_pgentry_t *compat_idle_pg_table_l2 = #define l3_disallow_mask(d) L3_DISALLOW_MASK #endif +static void queue_deferred_ops(struct domain *d, unsigned int ops) +{ + if ( d == current->domain ) + this_cpu(percpu_mm_info).deferred_ops |= ops; + else + BUG_ON(!test_bit(_DOMF_paused, &d->domain_flags) || + !cpus_empty(d->domain_dirty_cpumask)); +} + void __init init_frametable(void) { unsigned long nr_pages, page_step, i, mfn; @@ -416,8 +425,7 @@ void invalidate_shadow_ldt(struct vcpu * } /* Dispose of the (now possibly invalid) mappings from the TLB. */ - ASSERT(v->processor == smp_processor_id()); - this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_TLB | DOP_RELOAD_LDT; + queue_deferred_ops(v->domain, DOP_FLUSH_TLB | DOP_RELOAD_LDT); } @@ -826,7 +834,7 @@ static void put_page_from_l2e(l2_pgentry { if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) && (l2e_get_pfn(l2e) != pfn) ) - put_page_and_type(mfn_to_page(l2e_get_pfn(l2e))); + put_page_and_type(l2e_get_page(l2e)); } @@ -835,7 +843,7 @@ static void put_page_from_l3e(l3_pgentry { if ( (l3e_get_flags(l3e) & _PAGE_PRESENT) && (l3e_get_pfn(l3e) != pfn) ) - put_page_and_type(mfn_to_page(l3e_get_pfn(l3e))); + put_page_and_type(l3e_get_page(l3e)); } #endif @@ -844,7 +852,7 @@ static void put_page_from_l4e(l4_pgentry { if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) && (l4e_get_pfn(l4e) != pfn) ) - put_page_and_type(mfn_to_page(l4e_get_pfn(l4e))); + put_page_and_type(l4e_get_page(l4e)); } #endif @@ -945,7 +953,8 @@ static int create_pae_xen_mappings(struc } #else memcpy(&pl2e[COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d)], - &compat_idle_pg_table_l2[l2_table_offset(HIRO_COMPAT_MPT_VIRT_START)], + &compat_idle_pg_table_l2[ + l2_table_offset(HIRO_COMPAT_MPT_VIRT_START)], COMPAT_L2_PAGETABLE_XEN_SLOTS(d) * sizeof(*pl2e)); #endif unmap_domain_page(pl2e); @@ -1376,7 +1385,7 @@ static int mod_l2_entry(l2_pgentry_t *pl if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT)) return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current); - if ( unlikely(!get_page_from_l2e(nl2e, pfn, current->domain)) ) + if ( unlikely(!get_page_from_l2e(nl2e, pfn, d)) ) return 0; if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current)) ) @@ -1439,7 +1448,7 @@ static int mod_l3_entry(l3_pgentry_t *pl if (!l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT)) return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current); - if ( unlikely(!get_page_from_l3e(nl3e, pfn, current->domain)) ) + if ( unlikely(!get_page_from_l3e(nl3e, pfn, d)) ) return 0; if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current)) ) @@ -1561,7 +1570,7 @@ void free_page_type(struct page_info *pa * (e.g., update_va_mapping()) or we could end up modifying a page * that is no longer a page table (and hence screw up ref counts). */ - this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_ALL_TLBS; + queue_deferred_ops(owner, DOP_FLUSH_ALL_TLBS); if ( unlikely(shadow_mode_enabled(owner)) ) { @@ -1759,24 +1768,14 @@ int new_guest_cr3(unsigned long mfn) int okay; unsigned long old_base_mfn; - if ( is_hvm_domain(d) && !hvm_paging_enabled(v) ) - return 0; - #ifdef CONFIG_COMPAT if ( IS_COMPAT(d) ) { - l4_pgentry_t l4e = l4e_from_pfn(mfn, _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED); - - if ( shadow_mode_refcounts(d) ) - { - okay = get_page_from_pagenr(mfn, d); - old_base_mfn = l4e_get_pfn(l4e); - if ( okay && old_base_mfn ) - put_page(mfn_to_page(old_base_mfn)); - } - else - okay = mod_l4_entry(__va(pagetable_get_paddr(v->arch.guest_table)), - l4e, 0); + okay = shadow_mode_refcounts(d) + ? 0 /* Old code was broken, but what should it be? */ + : mod_l4_entry(__va(pagetable_get_paddr(v->arch.guest_table)), + l4e_from_pfn(mfn, (_PAGE_PRESENT|_PAGE_RW| + _PAGE_USER|_PAGE_ACCESSED)), 0); if ( unlikely(!okay) ) { MEM_LOG("Error while installing new compat baseptr %lx", mfn); @@ -1789,41 +1788,13 @@ int new_guest_cr3(unsigned long mfn) return 1; } #endif - if ( shadow_mode_refcounts(d) ) - { - okay = get_page_from_pagenr(mfn, d); - if ( unlikely(!okay) ) - { - MEM_LOG("Error while installing new baseptr %lx", mfn); - return 0; - } - } - else - { - okay = get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d); - if ( unlikely(!okay) ) - { - /* Switch to idle pagetable: this VCPU has no active p.t. now. */ - MEM_LOG("New baseptr %lx: slow path via idle pagetables", mfn); - old_base_mfn = pagetable_get_pfn(v->arch.guest_table); - v->arch.guest_table = pagetable_null(); - update_cr3(v); - write_cr3(__pa(idle_pg_table)); - if ( old_base_mfn != 0 ) - put_page_and_type(mfn_to_page(old_base_mfn)); - - /* Retry the validation with no active p.t. for this VCPU. */ - okay = get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d); - if ( !okay ) - { - /* Failure here is unrecoverable: the VCPU has no pagetable! */ - MEM_LOG("Fatal error while installing new baseptr %lx", mfn); - domain_crash(d); - ASSERT(v->processor == smp_processor_id()); - this_cpu(percpu_mm_info).deferred_ops = 0; - return 0; - } - } + okay = shadow_mode_refcounts(d) + ? get_page_from_pagenr(mfn, d) + : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d); + if ( unlikely(!okay) ) + { + MEM_LOG("Error while installing new baseptr %lx", mfn); + return 0; } invalidate_shadow_ldt(v); @@ -1831,7 +1802,7 @@ int new_guest_cr3(unsigned long mfn) old_base_mfn = pagetable_get_pfn(v->arch.guest_table); v->arch.guest_table = pagetable_from_pfn(mfn); - update_cr3(v); /* update shadow_table and cr3 fields of vcpu struct */ + update_cr3(v); write_ptbase(v); @@ -3182,7 +3153,7 @@ static int ptwr_emulated_update( unsigned int do_cmpxchg, struct ptwr_emulate_ctxt *ptwr_ctxt) { - unsigned long gmfn, mfn; + unsigned long mfn; struct page_info *page; l1_pgentry_t pte, ol1e, nl1e, *pl1e; struct vcpu *v = current; @@ -3222,8 +3193,7 @@ static int ptwr_emulated_update( } pte = ptwr_ctxt->pte; - gmfn = l1e_get_pfn(pte); - mfn = gmfn_to_mfn(d, gmfn); + mfn = l1e_get_pfn(pte); page = mfn_to_page(mfn); /* We are looking only for read-only mappings of p.t. pages. */ @@ -3237,15 +3207,14 @@ static int ptwr_emulated_update( if ( unlikely(!get_page_from_l1e(gl1e_to_ml1e(d, nl1e), d)) ) { if ( (CONFIG_PAGING_LEVELS == 3 || IS_COMPAT(d)) && - (bytes == 4) && - !do_cmpxchg && + (bytes == 4) && (addr & 4) && !do_cmpxchg && (l1e_get_flags(nl1e) & _PAGE_PRESENT) ) { /* - * If this is a half-write to a PAE PTE then we assume that the - * guest has simply got the two writes the wrong way round. We - * zap the PRESENT bit on the assumption the bottom half will be - * written immediately after we return to the guest. + * If this is an upper-half write to a PAE PTE then we assume that + * the guest has simply got the two writes the wrong way round. We + * zap the PRESENT bit on the assumption that the bottom half will + * be written immediately after we return to the guest. */ MEM_LOG("ptwr_emulate: fixing up invalid PAE PTE %"PRIpte, l1e_get_intpte(nl1e)); @@ -3354,7 +3323,6 @@ int ptwr_do_page_fault(struct vcpu *v, u struct cpu_user_regs *regs) { struct domain *d = v->domain; - unsigned long pfn; struct page_info *page; l1_pgentry_t pte; struct ptwr_emulate_ctxt ptwr_ctxt; @@ -3368,8 +3336,7 @@ int ptwr_do_page_fault(struct vcpu *v, u guest_get_eff_l1e(v, addr, &pte); if ( !(l1e_get_flags(pte) & _PAGE_PRESENT) ) goto bail; - pfn = l1e_get_pfn(pte); - page = mfn_to_page(pfn); + page = l1e_get_page(pte); /* We are looking only for read-only mappings of p.t. pages. */ if ( ((l1e_get_flags(pte) & (_PAGE_PRESENT|_PAGE_RW)) != _PAGE_PRESENT) || @@ -3378,8 +3345,9 @@ int ptwr_do_page_fault(struct vcpu *v, u (page_get_owner(page) != d) ) goto bail; - ptwr_ctxt.ctxt.regs = guest_cpu_user_regs(); - ptwr_ctxt.ctxt.address_bytes = IS_COMPAT(d) ? 4 : sizeof(long); + ptwr_ctxt.ctxt.regs = regs; + ptwr_ctxt.ctxt.addr_size = ptwr_ctxt.ctxt.sp_size = + IS_COMPAT(d) ? 32 : BITS_PER_LONG; ptwr_ctxt.cr2 = addr; ptwr_ctxt.pte = pte; if ( x86_emulate(&ptwr_ctxt.ctxt, &ptwr_emulate_ops) ) diff -r 3464bb656a9c -r 8475a4e0425e xen/arch/x86/mm/shadow/common.c --- a/xen/arch/x86/mm/shadow/common.c Thu Jan 18 09:54:33 2007 +0000 +++ b/xen/arch/x86/mm/shadow/common.c Thu Jan 18 15:18:07 2007 +0000 @@ -110,7 +110,7 @@ static int hvm_translate_linear_addr( unsigned long limit, addr = offset; uint32_t last_byte; - if ( sh_ctxt->ctxt.address_bytes != 8 ) + if ( sh_ctxt->ctxt.addr_size != 64 ) { /* * COMPATIBILITY MODE: Apply segment checks and add base. @@ -399,7 +399,7 @@ struct x86_emulate_ops *shadow_init_emul struct x86_emulate_ops *shadow_init_emulation( struct sh_emulate_ctxt *sh_ctxt, struct cpu_user_regs *regs) { - struct segment_register *creg; + struct segment_register *creg, *sreg; struct vcpu *v = current; unsigned long addr; @@ -407,7 +407,7 @@ struct x86_emulate_ops *shadow_init_emul if ( !is_hvm_vcpu(v) ) { - sh_ctxt->ctxt.address_bytes = sizeof(long); + sh_ctxt->ctxt.addr_size = sh_ctxt->ctxt.sp_size = BITS_PER_LONG; return &pv_shadow_emulator_ops; } @@ -416,12 +416,20 @@ struct x86_emulate_ops *shadow_init_emul creg = hvm_get_seg_reg(x86_seg_cs, sh_ctxt); /* Work out the emulation mode. */ - if ( hvm_long_mode_enabled(v) ) - sh_ctxt->ctxt.address_bytes = creg->attr.fields.l ? 8 : 4; + if ( hvm_long_mode_enabled(v) && creg->attr.fields.l ) + { + sh_ctxt->ctxt.addr_size = sh_ctxt->ctxt.sp_size = 64; + } else if ( regs->eflags & X86_EFLAGS_VM ) - sh_ctxt->ctxt.address_bytes = 2; + { + sh_ctxt->ctxt.addr_size = sh_ctxt->ctxt.sp_size = 16; + } else - sh_ctxt->ctxt.address_bytes = creg->attr.fields.db ? 4 : 2; + { + sreg = hvm_get_seg_reg(x86_seg_ss, sh_ctxt); + sh_ctxt->ctxt.addr_size = creg->attr.fields.db ? 32 : 16; + sh_ctxt->ctxt.sp_size = sreg->attr.fields.db ? 32 : 16; + } /* Attempt to prefetch whole instruction. */ sh_ctxt->insn_buf_bytes = @@ -1303,6 +1311,9 @@ shadow_alloc_p2m_table(struct domain *d) if ( !shadow_set_p2m_entry(d, gfn, mfn) ) goto error; + /* Build a p2m map that matches the m2p entries for this domain's + * allocated pages. Skip any pages that have an explicitly invalid + * or obviously bogus m2p entry. */ for ( entry = d->page_list.next; entry != &d->page_list; entry = entry->next ) @@ -1318,6 +1329,8 @@ shadow_alloc_p2m_table(struct domain *d) (gfn != 0x55555555L) #endif && gfn != INVALID_M2P_ENTRY + && (gfn < + (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof (l1_pgentry_t)) && !shadow_set_p2m_entry(d, gfn, mfn) ) goto error; } diff -r 3464bb656a9c -r 8475a4e0425e xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Thu Jan 18 09:54:33 2007 +0000 +++ b/xen/arch/x86/mm/shadow/multi.c Thu Jan 18 15:18:07 2007 +0000 @@ -851,9 +851,7 @@ static inline void safe_write_entry(void * then writing the high word before the low word. */ BUILD_BUG_ON(sizeof (shadow_l1e_t) != 2 * sizeof (unsigned long)); d[0] = 0; - wmb(); d[1] = s[1]; - wmb(); d[0] = s[0]; #else /* In 32-bit and 64-bit, sizeof(pte) == sizeof(ulong) == 1 word, @@ -3946,7 +3944,7 @@ sh_x86_emulate_write(struct vcpu *v, uns if ( !skip ) sh_validate_guest_pt_write(v, mfn, addr, bytes); /* If we are writing zeros to this page, might want to unshadow */ - if ( likely(bytes >= 4) && (*(u32 *)addr == 0) ) + if ( likely(bytes >= 4) && (*(u32 *)addr == 0) && is_lo_pte(vaddr) ) check_for_early_unshadow(v, mfn); sh_unmap_domain_page(addr); @@ -3998,7 +3996,7 @@ sh_x86_emulate_cmpxchg(struct vcpu *v, u vaddr, prev, old, new, *(unsigned long *)addr, bytes); /* If we are writing zeros to this page, might want to unshadow */ - if ( likely(bytes >= 4) && (*(u32 *)addr == 0) ) + if ( likely(bytes >= 4) && (*(u32 *)addr == 0) && is_lo_pte(vaddr) ) check_for_early_unshadow(v, mfn); sh_unmap_domain_page(addr); diff -r 3464bb656a9c -r 8475a4e0425e xen/arch/x86/mm/shadow/private.h --- a/xen/arch/x86/mm/shadow/private.h Thu Jan 18 09:54:33 2007 +0000 +++ b/xen/arch/x86/mm/shadow/private.h Thu Jan 18 15:18:07 2007 +0000 @@ -427,6 +427,11 @@ extern int sh_remove_write_access(struct #undef mfn_valid #define mfn_valid(_mfn) (mfn_x(_mfn) < max_page) +#if GUEST_PAGING_LEVELS >= 3 +# define is_lo_pte(_vaddr) (((_vaddr)&0x4)==0) +#else +# define is_lo_pte(_vaddr) (1) +#endif static inline int sh_mfn_is_a_page_table(mfn_t gmfn) diff -r 3464bb656a9c -r 8475a4e0425e xen/arch/x86/oprofile/nmi_int.c --- a/xen/arch/x86/oprofile/nmi_int.c Thu Jan 18 09:54:33 2007 +0000 +++ b/xen/arch/x86/oprofile/nmi_int.c Thu Jan 18 15:18:07 2007 +0000 @@ -42,7 +42,7 @@ extern size_t strlcpy(char *dest, const extern size_t strlcpy(char *dest, const char *src, size_t size); -int nmi_callback(struct cpu_user_regs *regs, int cpu) +static int nmi_callback(struct cpu_user_regs *regs, int cpu) { int xen_mode, ovf; diff -r 3464bb656a9c -r 8475a4e0425e xen/arch/x86/setup.c --- a/xen/arch/x86/setup.c Thu Jan 18 09:54:33 2007 +0000 +++ b/xen/arch/x86/setup.c Thu Jan 18 15:18:07 2007 +0000 @@ -411,6 +411,23 @@ void __init __start_xen(multiboot_info_t printk("WARNING: Buggy e820 map detected and fixed " "(truncated length fields).\n"); + /* Ensure that all E820 RAM regions are page-aligned and -sized. */ + for ( i = 0; i < e820_raw_nr; i++ ) + { + uint64_t s, e; + if ( e820_raw[i].type != E820_RAM ) + continue; + s = PFN_UP(e820_raw[i].addr); + e = PFN_DOWN(e820_raw[i].addr + e820_raw[i].size); + e820_raw[i].size = 0; /* discarded later */ + if ( s < e ) + { + e820_raw[i].addr = s << PAGE_SHIFT; + e820_raw[i].size = (e - s) << PAGE_SHIFT; + } + } + + /* Sanitise the raw E820 map to produce a final clean version. */ max_page = init_e820(e820_raw, &e820_raw_nr); modules_length = mod[mbi->mods_count-1].mod_end - mod[0].mod_start; @@ -423,7 +440,7 @@ void __init __start_xen(multiboot_info_t printk("Not enough memory to stash the DOM0 kernel image.\n"); for ( ; ; ) ; } - + if ( (e820.map[i].type == E820_RAM) && (e820.map[i].size >= modules_length) && ((e820.map[i].addr + e820.map[i].size) >= @@ -474,10 +491,10 @@ void __init __start_xen(multiboot_info_t start = PFN_UP(e820.map[i].addr); end = PFN_DOWN(e820.map[i].addr + e820.map[i].size); /* Clip the range to exclude what the bootstrapper initialised. */ - if ( end < init_mapped ) - continue; if ( start < init_mapped ) start = init_mapped; + if ( end <= start ) + continue; /* Request the mapping. */ map_pages_to_xen( PAGE_OFFSET + (start << PAGE_SHIFT), @@ -486,7 +503,7 @@ void __init __start_xen(multiboot_info_t #endif } - if ( kexec_crash_area.size > 0 ) + if ( kexec_crash_area.size > 0 && kexec_crash_area.start > 0) { unsigned long kdump_start, kdump_size, k; diff -r 3464bb656a9c -r 8475a4e0425e xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Thu Jan 18 09:54:33 2007 +0000 +++ b/xen/arch/x86/traps.c Thu Jan 18 15:18:07 2007 +0000 @@ -1121,7 +1121,7 @@ static int emulate_privileged_op(struct { struct vcpu *v = current; unsigned long *reg, eip = regs->eip, res; - u8 opcode, modrm_reg = 0, modrm_rm = 0, rep_prefix = 0, rex = 0; + u8 opcode, modrm_reg = 0, modrm_rm = 0, rep_prefix = 0, lock = 0, rex = 0; enum { lm_seg_none, lm_seg_fs, lm_seg_gs } lm_ovr = lm_seg_none; unsigned int port, i, data_sel, ar, data, rc; unsigned int op_bytes, op_default, ad_bytes, ad_default; @@ -1184,6 +1184,7 @@ static int emulate_privileged_op(struct data_sel = regs->ss; continue; case 0xf0: /* LOCK */ + lock = 1; continue; case 0xf2: /* REPNE/REPNZ */ case 0xf3: /* REP/REPE/REPZ */ @@ -1210,6 +1211,9 @@ static int emulate_privileged_op(struct if ( opcode == 0x0f ) goto twobyte_opcode; + if ( lock ) + goto fail; + /* Input/Output String instructions. */ if ( (opcode >= 0x6c) && (opcode <= 0x6f) ) { @@ -1472,6 +1476,8 @@ static int emulate_privileged_op(struct /* Privileged (ring 0) instructions. */ opcode = insn_fetch(u8, code_base, eip, code_limit); + if ( lock && (opcode & ~3) != 0x20 ) + goto fail; switch ( opcode ) { case 0x06: /* CLTS */ @@ -1490,7 +1496,7 @@ static int emulate_privileged_op(struct case 0x20: /* MOV CR?,<reg> */ opcode = insn_fetch(u8, code_base, eip, code_limit); - modrm_reg |= (opcode >> 3) & 7; + modrm_reg += ((opcode >> 3) & 7) + (lock << 3); modrm_rm |= (opcode >> 0) & 7; reg = decode_register(modrm_rm, regs, 0); switch ( modrm_reg ) @@ -1530,7 +1536,7 @@ static int emulate_privileged_op(struct case 0x21: /* MOV DR?,<reg> */ opcode = insn_fetch(u8, code_base, eip, code_limit); - modrm_reg |= (opcode >> 3) & 7; + modrm_reg += ((opcode >> 3) & 7) + (lock << 3); modrm_rm |= (opcode >> 0) & 7; reg = decode_register(modrm_rm, regs, 0); if ( (res = do_get_debugreg(modrm_reg)) > (unsigned long)-256 ) @@ -1540,7 +1546,7 @@ static int emulate_privileged_op(struct case 0x22: /* MOV <reg>,CR? */ opcode = insn_fetch(u8, code_base, eip, code_limit); - modrm_reg |= (opcode >> 3) & 7; + modrm_reg += ((opcode >> 3) & 7) + (lock << 3); modrm_rm |= (opcode >> 0) & 7; reg = decode_register(modrm_rm, regs, 0); switch ( modrm_reg ) @@ -1588,7 +1594,7 @@ static int emulate_privileged_op(struct case 0x23: /* MOV <reg>,DR? */ opcode = insn_fetch(u8, code_base, eip, code_limit); - modrm_reg |= (opcode >> 3) & 7; + modrm_reg += ((opcode >> 3) & 7) + (lock << 3); modrm_rm |= (opcode >> 0) & 7; reg = decode_register(modrm_rm, regs, 0); if ( do_set_debugreg(modrm_reg, *reg) != 0 ) @@ -1854,7 +1860,7 @@ static int dummy_nmi_callback(struct cpu } static nmi_callback_t nmi_callback = dummy_nmi_callback; - + asmlinkage void do_nmi(struct cpu_user_regs *regs) { unsigned int cpu = smp_processor_id(); diff -r 3464bb656a9c -r 8475a4e0425e xen/arch/x86/x86_64/compat/mm.c --- a/xen/arch/x86/x86_64/compat/mm.c Thu Jan 18 09:54:33 2007 +0000 +++ b/xen/arch/x86/x86_64/compat/mm.c Thu Jan 18 15:18:07 2007 +0000 @@ -1,6 +1,7 @@ #ifdef CONFIG_COMPAT #include <xen/event.h> +#include <xen/multicall.h> #include <compat/memory.h> #include <compat/xen.h> @@ -289,20 +290,27 @@ int compat_mmuext_op(XEN_GUEST_HANDLE(mm if ( err == __HYPERVISOR_mmuext_op ) { struct cpu_user_regs *regs = guest_cpu_user_regs(); - unsigned int left = regs->ecx & ~MMU_UPDATE_PREEMPTED; - - BUG_ON(!(regs->ecx & MMU_UPDATE_PREEMPTED)); + struct mc_state *mcs = &this_cpu(mc_state); + unsigned int arg1 = !test_bit(_MCSF_in_multicall, &mcs->flags) + ? regs->ecx + : mcs->call.args[1]; + unsigned int left = arg1 & ~MMU_UPDATE_PREEMPTED; + + BUG_ON(left == arg1); BUG_ON(left > count); guest_handle_add_offset(nat_ops, count - left); BUG_ON(left + i < count); guest_handle_add_offset(cmp_uops, (signed int)(count - left - i)); left = 1; BUG_ON(!hypercall_xlat_continuation(&left, 0x01, nat_ops, cmp_uops)); - BUG_ON(left != regs->ecx); - regs->ecx += count - i; + BUG_ON(left != arg1); + if (!test_bit(_MCSF_in_multicall, &mcs->flags)) + regs->_ecx += count - i; + else + mcs->compat_call.args[1] += count - i; } else - BUG_ON(rc > 0); + BUG_ON(err > 0); rc = err; } diff -r 3464bb656a9c -r 8475a4e0425e xen/arch/x86/x86_emulate.c --- a/xen/arch/x86/x86_emulate.c Thu Jan 18 09:54:33 2007 +0000 +++ b/xen/arch/x86/x86_emulate.c Thu Jan 18 15:18:07 2007 +0000 @@ -3,7 +3,21 @@ * * Generic x86 (32-bit and 64-bit) instruction decoder and emulator. * - * Copyright (c) 2005 Keir Fraser + * Copyright (c) 2005-2007 Keir Fraser + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef __XEN__ @@ -33,9 +47,8 @@ #define SrcReg (1<<3) /* Register operand. */ #define SrcMem (2<<3) /* Memory operand. */ #define SrcMem16 (3<<3) /* Memory operand (16-bit). */ -#define SrcMem32 (4<<3) /* Memory operand (32-bit). */ -#define SrcImm (5<<3) /* Immediate operand. */ -#define SrcImmByte (6<<3) /* 8-bit sign-extended immediate operand. */ +#define SrcImm (4<<3) /* Immediate operand. */ +#define SrcImmByte (5<<3) /* 8-bit sign-extended immediate operand. */ #define SrcMask (7<<3) /* Generic ModRM decode. */ #define ModRM (1<<6) @@ -62,19 +75,19 @@ static uint8_t opcode_table[256] = { /* 0x20 - 0x27 */ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, 0, + ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps, /* 0x28 - 0x2F */ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, 0, + ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps, /* 0x30 - 0x37 */ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, 0, + ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps, /* 0x38 - 0x3F */ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, - ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, 0, + ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps, /* 0x40 - 0x4F */ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, @@ -85,9 +98,13 @@ static uint8_t opcode_table[256] = { ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, - /* 0x60 - 0x6F */ - 0, 0, 0, DstReg|SrcMem32|ModRM|Mov /* movsxd (x86/64) */, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x60 - 0x67 */ + ImplicitOps, ImplicitOps, DstReg|SrcMem|ModRM, DstReg|SrcMem16|ModRM|Mov, + 0, 0, 0, 0, + /* 0x68 - 0x6F */ + ImplicitOps|Mov, DstMem|SrcImm|ModRM|Mov, + ImplicitOps|Mov, DstMem|SrcImmByte|ModRM|Mov, + 0, 0, 0, 0, /* 0x70 - 0x77 */ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, @@ -107,13 +124,14 @@ static uint8_t opcode_table[256] = { ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, /* 0x98 - 0x9F */ - 0, 0, 0, 0, 0, 0, 0, 0, + ImplicitOps, ImplicitOps, 0, 0, 0, 0, ImplicitOps, ImplicitOps, /* 0xA0 - 0xA7 */ ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, 0, 0, /* 0xA8 - 0xAF */ - 0, 0, ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, + ByteOp|DstReg|SrcImm, DstReg|SrcImm, + ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, 0, 0, /* 0xB0 - 0xB7 */ ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov, @@ -124,20 +142,21 @@ static uint8_t opcode_table[256] = { DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, /* 0xC0 - 0xC7 */ - ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM, 0, 0, + ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM, + ImplicitOps, ImplicitOps, 0, 0, ByteOp|DstMem|SrcImm|ModRM|Mov, DstMem|SrcImm|ModRM|Mov, /* 0xC8 - 0xCF */ 0, 0, 0, 0, 0, 0, 0, 0, /* 0xD0 - 0xD7 */ ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM, ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM, - 0, 0, 0, 0, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, /* 0xD8 - 0xDF */ 0, 0, 0, 0, 0, 0, 0, 0, /* 0xE0 - 0xE7 */ - 0, 0, 0, ImplicitOps, 0, 0, 0, 0, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, 0, 0, 0, 0, /* 0xE8 - 0xEF */ - 0, ImplicitOps, 0, ImplicitOps, 0, 0, 0, 0, + ImplicitOps, ImplicitOps, 0, ImplicitOps, 0, 0, 0, 0, /* 0xF0 - 0xF7 */ 0, 0, 0, 0, 0, ImplicitOps, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM, @@ -149,8 +168,11 @@ static uint8_t twobyte_table[256] = { static uint8_t twobyte_table[256] = { /* 0x00 - 0x0F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0, - /* 0x10 - 0x1F */ - 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0, 0, 0, 0, 0, 0, + /* 0x10 - 0x17 */ + 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x18 - 0x1F */ + ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM, + ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM, /* 0x20 - 0x2F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30 - 0x3F */ @@ -177,24 +199,34 @@ static uint8_t twobyte_table[256] = { /* 0x88 - 0x8F */ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - /* 0x90 - 0x9F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x90 - 0x97 */ + ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov, + ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov, + ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov, + ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov, + /* 0x98 - 0x9F */ + ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov, + ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov, + ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov, + ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov, /* 0xA0 - 0xA7 */ 0, 0, 0, DstBitBase|SrcReg|ModRM, 0, 0, 0, 0, /* 0xA8 - 0xAF */ - 0, 0, 0, DstBitBase|SrcReg|ModRM, 0, 0, 0, 0, + 0, 0, 0, DstBitBase|SrcReg|ModRM, 0, 0, 0, DstReg|SrcMem|ModRM, /* 0xB0 - 0xB7 */ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, 0, DstBitBase|SrcReg|ModRM, 0, 0, ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov, /* 0xB8 - 0xBF */ 0, 0, DstBitBase|SrcImmByte|ModRM, DstBitBase|SrcReg|ModRM, - 0, 0, ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov, + DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, + ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov, /* 0xC0 - 0xC7 */ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, 0, 0, 0, 0, 0, ImplicitOps|ModRM, /* 0xC8 - 0xCF */ - 0, 0, 0, 0, 0, 0, 0, 0, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, /* 0xD0 - 0xDF */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xE0 - 0xEF */ @@ -205,7 +237,7 @@ static uint8_t twobyte_table[256] = { /* Type, address-of, and value of an instruction's operand. */ struct operand { - enum { OP_REG, OP_MEM, OP_IMM } type; + enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type; unsigned int bytes; unsigned long val, orig_val; union { @@ -227,6 +259,12 @@ struct operand { #define EFLG_AF (1<<4) #define EFLG_PF (1<<2) #define EFLG_CF (1<<0) + +/* Exception definitions. */ +#define EXC_DE 0 +#define EXC_BR 5 +#define EXC_UD 6 +#define EXC_GP 13 /* * Instruction emulation: @@ -285,7 +323,8 @@ do{ unsigned long _tmp; _op"w %"_wx"3,%1; " \ _POST_EFLAGS("0","4","2") \ : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ - : _wy ((_src).val), "i" (EFLAGS_MASK) ); \ + : _wy ((_src).val), "i" (EFLAGS_MASK), \ + "m" (_eflags), "m" ((_dst).val) ); \ break; \ case 4: \ __asm__ __volatile__ ( \ @@ -293,7 +332,8 @@ do{ unsigned long _tmp; _op"l %"_lx"3,%1; " \ _POST_EFLAGS("0","4","2") \ : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ - : _ly ((_src).val), "i" (EFLAGS_MASK) ); \ + : _ly ((_src).val), "i" (EFLAGS_MASK), \ + "m" (_eflags), "m" ((_dst).val) ); \ break; \ case 8: \ __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy); \ @@ -310,7 +350,8 @@ do{ unsigned long _tmp; _op"b %"_bx"3,%1; " \ _POST_EFLAGS("0","4","2") \ : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ - : _by ((_src).val), "i" (EFLAGS_MASK) ); \ + : _by ((_src).val), "i" (EFLAGS_MASK), \ + "m" (_eflags), "m" ((_dst).val) ); \ break; \ default: \ __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy);\ @@ -341,7 +382,7 @@ do{ unsigned long _tmp; _op"b %1; " \ _POST_EFLAGS("0","3","2") \ : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ - : "i" (EFLAGS_MASK) ); \ + : "i" (EFLAGS_MASK), "m" (_eflags), "m" ((_dst).val) ); \ break; \ case 2: \ __asm__ __volatile__ ( \ @@ -349,7 +390,7 @@ do{ unsigned long _tmp; _op"w %1; " \ _POST_EFLAGS("0","3","2") \ : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ - : "i" (EFLAGS_MASK) ); \ + : "i" (EFLAGS_MASK), "m" (_eflags), "m" ((_dst).val) ); \ break; \ case 4: \ __asm__ __volatile__ ( \ @@ -357,7 +398,7 @@ do{ unsigned long _tmp; _op"l %1; " \ _POST_EFLAGS("0","3","2") \ : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ - : "i" (EFLAGS_MASK) ); \ + : "i" (EFLAGS_MASK), "m" (_eflags), "m" ((_dst).val) ); \ break; \ case 8: \ __emulate_1op_8byte(_op, _dst, _eflags); \ @@ -373,7 +414,8 @@ do{ __asm__ __volatile__ ( _op"q %"_qx"3,%1; " \ _POST_EFLAGS("0","4","2") \ : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ - : _qy ((_src).val), "i" (EFLAGS_MASK) ); \ + : _qy ((_src).val), "i" (EFLAGS_MASK), \ + "m" (_eflags), "m" ((_dst).val) ); \ } while (0) #define __emulate_1op_8byte(_op, _dst, _eflags) \ do{ __asm__ __volatile__ ( \ @@ -381,7 +423,7 @@ do{ __asm__ __volatile__ ( _op"q %1; " \ _POST_EFLAGS("0","3","2") \ : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ - : "i" (EFLAGS_MASK) ); \ + : "i" (EFLAGS_MASK), "m" (_eflags), "m" ((_dst).val) ); \ } while (0) #elif defined(__i386__) #define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy) @@ -390,38 +432,69 @@ do{ __asm__ __volatile__ ( /* Fetch next part of the instruction being emulated. */ #define insn_fetch_bytes(_size) \ -({ unsigned long _x, _eip = _truncate_ea(_regs.eip, def_ad_bytes); \ +({ unsigned long _x, _eip = _regs.eip; \ if ( !mode_64bit() ) _eip = (uint32_t)_eip; /* ignore upper dword */ \ + _regs.eip += (_size); /* real hardware doesn't truncate */ \ + generate_exception_if((uint8_t)(_regs.eip - ctxt->regs->eip) > 15, \ + EXC_GP); \ rc = ops->insn_fetch(x86_seg_cs, _eip, &_x, (_size), ctxt); \ if ( rc ) goto done; \ - _regs.eip += (_size); /* real hardware doesn't truncate */ \ _x; \ }) #define insn_fetch_type(_type) ((_type)insn_fetch_bytes(sizeof(_type))) -#define _truncate_ea(ea, byte_width) \ -({ unsigned long __ea = (ea); \ - (((byte_width) == sizeof(unsigned long)) ? __ea : \ - (__ea & ((1UL << ((byte_width) << 3)) - 1))); \ +#define _truncate_ea(ea, byte_width) \ +({ unsigned long __ea = (ea); \ + unsigned int _width = (byte_width); \ + ((_width == sizeof(unsigned long)) ? __ea : \ + (__ea & ((1UL << (_width << 3)) - 1))); \ }) #define truncate_ea(ea) _truncate_ea((ea), ad_bytes) #define mode_64bit() (def_ad_bytes == 8) + +#define fail_if(p) \ +do { \ + rc = (p) ? X86EMUL_UNHANDLEABLE : 0; \ + if ( rc ) goto done; \ +} while (0) + +/* In future we will be able to generate arbitrary exceptions. */ +#define generate_exception_if(p, e) fail_if(p) + +/* Given byte has even parity (even number of 1s)? */ +static int even_parity(uint8_t v) +{ + __asm__ ( "test %%al,%%al; setp %%al" + : "=a" (v) : "0" (v) ); + return v; +} /* Update address held in a register, based on addressing mode. */ #define _register_address_increment(reg, inc, byte_width) \ do { \ int _inc = (inc); /* signed type ensures sign extension to long */ \ - if ( (byte_width) == sizeof(unsigned long) ) \ + unsigned int _width = (byte_width); \ + if ( _width == sizeof(unsigned long) ) \ (reg) += _inc; \ else if ( mode_64bit() ) \ - (reg) = ((reg) + _inc) & ((1UL << ((byte_width) << 3)) - 1); \ + (reg) = ((reg) + _inc) & ((1UL << (_width << 3)) - 1); \ else \ - (reg) = ((reg) & ~((1UL << ((byte_width) << 3)) - 1)) | \ - (((reg) + _inc) & ((1UL << ((byte_width) << 3)) - 1)); \ + (reg) = ((reg) & ~((1UL << (_width << 3)) - 1)) | \ + (((reg) + _inc) & ((1UL << (_width << 3)) - 1)); \ } while (0) #define register_address_increment(reg, inc) \ _register_address_increment((reg), (inc), ad_bytes) + +#define sp_pre_dec(dec) ({ \ + _register_address_increment(_regs.esp, -(dec), ctxt->sp_size/8); \ + _truncate_ea(_regs.esp, ctxt->sp_size/8); \ +}) +#define sp_post_inc(inc) ({ \ + unsigned long __esp = _truncate_ea(_regs.esp, ctxt->sp_size/8); \ + _register_address_increment(_regs.esp, (inc), ctxt->sp_size/8); \ + __esp; \ +}) #define jmp_rel(rel) \ do { \ @@ -430,6 +503,92 @@ do { _regs.eip = ((op_bytes == 2) \ ? (uint16_t)_regs.eip : (uint32_t)_regs.eip); \ } while (0) + +/* + * Unsigned multiplication with double-word result. + * IN: Multiplicand=m[0], Multiplier=m[1] + * OUT: Return CF/OF (overflow status); Result=m[1]:m[0] + */ +static int mul_dbl(unsigned long m[2]) +{ + int rc; + asm ( "mul %4; seto %b2" + : "=a" (m[0]), "=d" (m[1]), "=q" (rc) + : "0" (m[0]), "1" (m[1]), "2" (0) ); + return rc; +} + +/* + * Signed multiplication with double-word result. + * IN: Multiplicand=m[0], Multiplier=m[1] + * OUT: Return CF/OF (overflow status); Result=m[1]:m[0] + */ +static int imul_dbl(unsigned long m[2]) +{ + int rc; + asm ( "imul %4; seto %b2" + : "=a" (m[0]), "=d" (m[1]), "=q" (rc) + : "0" (m[0]), "1" (m[1]), "2" (0) ); + return rc; +} + +/* + * Unsigned division of double-word dividend. + * IN: Dividend=u[1]:u[0], Divisor=v + * OUT: Return 1: #DE + * Return 0: Quotient=u[0], Remainder=u[1] + */ +static int div_dbl(unsigned long u[2], unsigned long v) +{ + if ( (v == 0) || (u[1] > v) || ((u[1] == v) && (u[0] != 0)) ) + return 1; + asm ( "div %4" + : "=a" (u[0]), "=d" (u[1]) + : "0" (u[0]), "1" (u[1]), "r" (v) ); + return 0; +} + +/* + * Signed division of double-word dividend. + * IN: Dividend=u[1]:u[0], Divisor=v + * OUT: Return 1: #DE + * Return 0: Quotient=u[0], Remainder=u[1] + * NB. We don't use idiv directly as it's moderately hard to work out + * ahead of time whether it will #DE, which we cannot allow to happen. + */ +static int idiv_dbl(unsigned long u[2], unsigned long v) +{ + int negu = (long)u[1] < 0, negv = (long)v < 0; + + /* u = abs(u) */ + if ( negu ) + { + u[1] = ~u[1]; + if ( (u[0] = -u[0]) == 0 ) + u[1]++; + } + + /* abs(u) / abs(v) */ + if ( div_dbl(u, negv ? -v : v) ) + return 1; + + /* Remainder has same sign as dividend. It cannot overflow. */ + if ( negu ) + u[1] = -u[1]; + + /* Quotient is overflowed if sign bit is set. */ + if ( negu ^ negv ) + { + if ( (long)u[0] >= 0 ) + u[0] = -u[0]; + else if ( (u[0] << 1) != 0 ) /* == 0x80...0 is okay */ + return 1; + } + else if ( (long)u[0] < 0 ) + return 1; + + return 0; +} static int test_cc( @@ -519,8 +678,8 @@ x86_emulate( uint8_t b, d, sib, sib_index, sib_base, twobyte = 0, rex_prefix = 0; uint8_t modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0; - unsigned int op_bytes, ad_bytes, def_ad_bytes; - unsigned int lock_prefix = 0, rep_prefix = 0, i; + unsigned int op_bytes, def_op_bytes, ad_bytes, def_ad_bytes; + unsigned int lock_prefix = 0, rep_prefix = 0; int rc = 0; struct operand src, dst; @@ -532,28 +691,25 @@ x86_emulate( ea.mem.seg = x86_seg_ds; ea.mem.off = 0; - op_bytes = ad_bytes = def_ad_bytes = ctxt->address_bytes; + op_bytes = def_op_bytes = ad_bytes = def_ad_bytes = ctxt->addr_size/8; if ( op_bytes == 8 ) { - op_bytes = 4; + op_bytes = def_op_bytes = 4; #ifndef __x86_64__ return -1; #endif } /* Prefix bytes. */ - for ( i = 0; i < 8; i++ ) + for ( ; ; ) { switch ( b = insn_fetch_type(uint8_t) ) { case 0x66: /* operand-size override */ - op_bytes ^= 6; /* switch between 2/4 bytes */ + op_bytes = def_op_bytes ^ 6; break; case 0x67: /* address-size override */ - if ( mode_64bit() ) - ad_bytes ^= 12; /* switch between 4/8 bytes */ - else - ad_bytes ^= 6; /* switch between 2/4 bytes */ + ad_bytes = def_ad_bytes ^ (mode_64bit() ? 12 : 6); break; case 0x2e: /* CS override */ ea.mem.seg = x86_seg_cs; @@ -613,6 +769,9 @@ x86_emulate( if ( d == 0 ) goto cannot_emulate; } + + /* Lock prefix is allowed only on RMW instructions. */ + generate_exception_if((d & Mov) && lock_prefix, EXC_GP); /* ModRM and SIB bytes. */ if ( d & ModRM ) @@ -746,9 +905,6 @@ x86_emulate( case SrcMem16: ea.bytes = 2; goto srcmem_common; - case SrcMem32: - ea.bytes = 4; - goto srcmem_common; case SrcMem: ea.bytes = (d & ByteOp) ? 1 : op_bytes; srcmem_common: @@ -817,7 +973,7 @@ x86_emulate( { /* * EA += BitOffset DIV op_bytes*8 - * BitOffset = BitOffset MOD op_byte*8 + * BitOffset = BitOffset MOD op_bytes*8 * DIV truncates towards negative infinity. * MOD always produces a positive result. */ @@ -853,13 +1009,18 @@ x86_emulate( case 8: dst.val = *(uint64_t *)dst.reg; break; } } - else if ( !(d & Mov) && /* optimisation - avoid slow emulated read */ - (rc = ops->read(dst.mem.seg, dst.mem.off, - &dst.val, dst.bytes, ctxt)) ) - goto done; - break; - } - dst.orig_val = dst.val; + else if ( !(d & Mov) ) /* optimisation - avoid slow emulated read */ + { + if ( (rc = ops->read(dst.mem.seg, dst.mem.off, + &dst.val, dst.bytes, ctxt)) ) + goto done; + dst.orig_val = dst.val; + } + break; + } + + /* LOCK prefix allowed only on instructions with memory destination. */ + generate_exception_if(lock_prefix && (dst.type != OP_MEM), EXC_GP); if ( twobyte ) goto twobyte_insn; @@ -868,67 +1029,136 @@ x86_emulate( { case 0x04 ... 0x05: /* add imm,%%eax */ dst.reg = (unsigned long *)&_regs.eax; - dst.val = dst.orig_val = _regs.eax; + dst.val = _regs.eax; case 0x00 ... 0x03: add: /* add */ emulate_2op_SrcV("add", src, dst, _regs.eflags); break; case 0x0c ... 0x0d: /* or imm,%%eax */ dst.reg = (unsigned long *)&_regs.eax; - dst.val = dst.orig_val = _regs.eax; + dst.val = _regs.eax; case 0x08 ... 0x0b: or: /* or */ emulate_2op_SrcV("or", src, dst, _regs.eflags); break; case 0x14 ... 0x15: /* adc imm,%%eax */ dst.reg = (unsigned long *)&_regs.eax; - dst.val = dst.orig_val = _regs.eax; + dst.val = _regs.eax; case 0x10 ... 0x13: adc: /* adc */ emulate_2op_SrcV("adc", src, dst, _regs.eflags); break; case 0x1c ... 0x1d: /* sbb imm,%%eax */ dst.reg = (unsigned long *)&_regs.eax; - dst.val = dst.orig_val = _regs.eax; + dst.val = _regs.eax; case 0x18 ... 0x1b: sbb: /* sbb */ emulate_2op_SrcV("sbb", src, dst, _regs.eflags); break; case 0x24 ... 0x25: /* and imm,%%eax */ dst.reg = (unsigned long *)&_regs.eax; - dst.val = dst.orig_val = _regs.eax; + dst.val = _regs.eax; case 0x20 ... 0x23: and: /* and */ emulate_2op_SrcV("and", src, dst, _regs.eflags); break; case 0x2c ... 0x2d: /* sub imm,%%eax */ dst.reg = (unsigned long *)&_regs.eax; - dst.val = dst.orig_val = _regs.eax; + dst.val = _regs.eax; case 0x28 ... 0x2b: sub: /* sub */ emulate_2op_SrcV("sub", src, dst, _regs.eflags); break; case 0x34 ... 0x35: /* xor imm,%%eax */ dst.reg = (unsigned long *)&_regs.eax; - dst.val = dst.orig_val = _regs.eax; + dst.val = _regs.eax; case 0x30 ... 0x33: xor: /* xor */ emulate_2op_SrcV("xor", src, dst, _regs.eflags); break; case 0x3c ... 0x3d: /* cmp imm,%%eax */ dst.reg = (unsigned long *)&_regs.eax; - dst.val = dst.orig_val = _regs.eax; + dst.val = _regs.eax; case 0x38 ... 0x3b: cmp: /* cmp */ emulate_2op_SrcV("cmp", src, dst, _regs.eflags); break; - case 0x63: /* movsxd */ - if ( !mode_64bit() ) - goto cannot_emulate; - dst.val = (int32_t)src.val; - break; - - case 0x80 ... 0x83: /* Grp1 */ + case 0x62: /* bound */ { + unsigned long src_val2; + int lb, ub, idx; + generate_exception_if(mode_64bit() || (src.type != OP_MEM), EXC_UD); + if ( (rc = ops->read(src.mem.seg, src.mem.off + op_bytes, + &src_val2, op_bytes, ctxt)) ) + goto done; + ub = (op_bytes == 2) ? (int16_t)src_val2 : (int32_t)src_val2; + lb = (op_bytes == 2) ? (int16_t)src.val : (int32_t)src.val; + idx = (op_bytes == 2) ? (int16_t)dst.val : (int32_t)dst.val; + generate_exception_if((idx < lb) || (idx > ub), EXC_BR); + dst.type = OP_NONE; + break; + } + + case 0x63: /* movsxd (x86/64) / arpl (x86/32) */ + if ( mode_64bit() ) + { + /* movsxd */ + if ( src.type == OP_REG ) + src.val = *(int32_t *)src.reg; + else if ( (rc = ops->read(src.mem.seg, src.mem.off, + &src.val, 4, ctxt)) ) + goto done; + dst.val = (int32_t)src.val; + } + else + { + /* arpl */ + uint16_t src_val = dst.val; + dst = src; + _regs.eflags &= ~EFLG_ZF; + _regs.eflags |= ((src_val & 3) > (dst.val & 3)) ? EFLG_ZF : 0; + if ( _regs.eflags & EFLG_ZF ) + dst.val = (dst.val & ~3) | (src_val & 3); + else + dst.type = OP_NONE; + } + break; + + case 0x69: /* imul imm16/32 */ + case 0x6b: /* imul imm8 */ { + unsigned long reg = *(long *)decode_register(modrm_reg, &_regs, 0); + _regs.eflags &= ~(EFLG_OF|EFLG_CF); + switch ( dst.bytes ) + { + case 2: + dst.val = ((uint32_t)(int16_t)src.val * + (uint32_t)(int16_t)reg); + if ( (int16_t)dst.val != (uint32_t)dst.val ) + _regs.eflags |= EFLG_OF|EFLG_CF; + break; +#ifdef __x86_64__ + case 4: + dst.val = ((uint64_t)(int32_t)src.val * + (uint64_t)(int32_t)reg); + if ( (int32_t)dst.val != dst.val ) + _regs.eflags |= EFLG_OF|EFLG_CF; + break; +#endif + default: { + unsigned long m[2] = { src.val, reg }; + if ( imul_dbl(m) ) + _regs.eflags |= EFLG_OF|EFLG_CF; + dst.val = m[0]; + break; + } + } + dst.type = OP_REG; + dst.reg = decode_register(modrm_reg, &_regs, 0); + break; + } + + case 0x82: /* Grp1 (x86/32 only) */ + generate_exception_if(mode_64bit(), EXC_UD); + case 0x80: case 0x81: case 0x83: /* Grp1 */ switch ( modrm_reg & 7 ) { case 0: goto add; @@ -942,6 +1172,9 @@ x86_emulate( } break; + case 0xa8 ... 0xa9: /* test imm,%%eax */ + dst.reg = (unsigned long *)&_regs.eax; + dst.val = _regs.eax; case 0x84 ... 0x85: test: /* test */ emulate_2op_SrcV("test", src, dst, _regs.eflags); break; @@ -960,8 +1193,9 @@ x86_emulate( lock_prefix = 1; break; + case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */ + generate_exception_if((modrm_reg & 7) != 0, EXC_UD); case 0x88 ... 0x8b: /* mov */ - case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */ dst.val = src.val; break; @@ -970,13 +1204,13 @@ x86_emulate( break; case 0x8f: /* pop (sole member of Grp1a) */ + generate_exception_if((modrm_reg & 7) != 0, EXC_UD); /* 64-bit mode: POP defaults to a 64-bit operand. */ if ( mode_64bit() && (dst.bytes == 4) ) dst.bytes = 8; - if ( (rc = ops->read(x86_seg_ss, truncate_ea(_regs.esp), + if ( (rc = ops->read(x86_seg_ss, sp_post_inc(dst.bytes), &dst.val, dst.bytes, ctxt)) != 0 ) goto done; - register_address_increment(_regs.esp, dst.bytes); break; case 0xb0 ... 0xb7: /* mov imm8,r8 */ @@ -1051,12 +1285,191 @@ x86_emulate( case 3: /* neg */ emulate_1op("neg", dst, _regs.eflags); break; + case 4: /* mul */ + src = dst; + dst.type = OP_REG; + dst.reg = (unsigned long *)&_regs.eax; + dst.val = *dst.reg; + _regs.eflags &= ~(EFLG_OF|EFLG_CF); + switch ( src.bytes ) + { + case 1: + dst.val *= src.val; + if ( (uint8_t)dst.val != (uint16_t)dst.val ) + _regs.eflags |= EFLG_OF|EFLG_CF; + break; + case 2: + dst.val *= src.val; + if ( (uint16_t)dst.val != (uint32_t)dst.val ) + _regs.eflags |= EFLG_OF|EFLG_CF; + *(uint16_t *)&_regs.edx = dst.val >> 16; + break; +#ifdef __x86_64__ + case 4: + dst.val *= src.val; + if ( (uint32_t)dst.val != dst.val ) + _regs.eflags |= EFLG_OF|EFLG_CF; + _regs.edx = (uint32_t)(dst.val >> 32); + break; +#endif + default: { + unsigned long m[2] = { src.val, dst.val }; + if ( mul_dbl(m) ) + _regs.eflags |= EFLG_OF|EFLG_CF; + _regs.edx = m[1]; + dst.val = m[0]; + break; + } + } + break; + case 5: /* imul */ + src = dst; + dst.type = OP_REG; + dst.reg = (unsigned long *)&_regs.eax; + dst.val = *dst.reg; + _regs.eflags &= ~(EFLG_OF|EFLG_CF); + switch ( src.bytes ) + { + case 1: + dst.val = ((uint16_t)(int8_t)src.val * + (uint16_t)(int8_t)dst.val); + if ( (int8_t)dst.val != (uint16_t)dst.val ) + _regs.eflags |= EFLG_OF|EFLG_CF; + break; + case 2: + dst.val = ((uint32_t)(int16_t)src.val * + (uint32_t)(int16_t)dst.val); + if ( (int16_t)dst.val != (uint32_t)dst.val ) + _regs.eflags |= EFLG_OF|EFLG_CF; + *(uint16_t *)&_regs.edx = dst.val >> 16; + break; +#ifdef __x86_64__ + case 4: + dst.val = ((uint64_t)(int32_t)src.val * + (uint64_t)(int32_t)dst.val); + if ( (int32_t)dst.val != dst.val ) + _regs.eflags |= EFLG_OF|EFLG_CF; + _regs.edx = (uint32_t)(dst.val >> 32); + break; +#endif + default: { + unsigned long m[2] = { src.val, dst.val }; + if ( imul_dbl(m) ) + _regs.eflags |= EFLG_OF|EFLG_CF; + _regs.edx = m[1]; + dst.val = m[0]; + break; + } + } + break; + case 6: /* div */ { + unsigned long u[2], v; + src = dst; + dst.type = OP_REG; + dst.reg = (unsigned long *)&_regs.eax; + switch ( src.bytes ) + { + case 1: + u[0] = (uint16_t)_regs.eax; + u[1] = 0; + v = (uint8_t)src.val; + generate_exception_if( + div_dbl(u, v) || ((uint8_t)u[0] != (uint16_t)u[0]), + EXC_DE); + dst.val = (uint8_t)u[0]; + ((uint8_t *)&_regs.eax)[1] = u[1]; + break; + case 2: + u[0] = ((uint32_t)_regs.edx << 16) | (uint16_t)_regs.eax; + u[1] = 0; + v = (uint16_t)src.val; + generate_exception_if( + div_dbl(u, v) || ((uint16_t)u[0] != (uint32_t)u[0]), + EXC_DE); + dst.val = (uint16_t)u[0]; + *(uint16_t *)&_regs.edx = u[1]; + break; +#ifdef __x86_64__ + case 4: + u[0] = (_regs.edx << 32) | (uint32_t)_regs.eax; + u[1] = 0; + v = (uint32_t)src.val; + generate_exception_if( + div_dbl(u, v) || ((uint32_t)u[0] != u[0]), + EXC_DE); + dst.val = (uint32_t)u[0]; + _regs.edx = (uint32_t)u[1]; + break; +#endif + default: + u[0] = _regs.eax; + u[1] = _regs.edx; + v = src.val; + generate_exception_if(div_dbl(u, v), EXC_DE); + dst.val = u[0]; + _regs.edx = u[1]; + break; + } + break; + } + case 7: /* idiv */ { + unsigned long u[2], v; + src = dst; + dst.type = OP_REG; + dst.reg = (unsigned long *)&_regs.eax; + switch ( src.bytes ) + { + case 1: + u[0] = (int16_t)_regs.eax; + u[1] = ((long)u[0] < 0) ? ~0UL : 0UL; + v = (int8_t)src.val; + generate_exception_if( + idiv_dbl(u, v) || ((int8_t)u[0] != (int16_t)u[0]), + EXC_DE); + dst.val = (int8_t)u[0]; + ((int8_t *)&_regs.eax)[1] = u[1]; + break; + case 2: + u[0] = (int32_t)((_regs.edx << 16) | (uint16_t)_regs.eax); + u[1] = ((long)u[0] < 0) ? ~0UL : 0UL; + v = (int16_t)src.val; + generate_exception_if( + idiv_dbl(u, v) || ((int16_t)u[0] != (int32_t)u[0]), + EXC_DE); + dst.val = (int16_t)u[0]; + *(int16_t *)&_regs.edx = u[1]; + break; +#ifdef __x86_64__ + case 4: + u[0] = (_regs.edx << 32) | (uint32_t)_regs.eax; + u[1] = ((long)u[0] < 0) ? ~0UL : 0UL; + v = (int32_t)src.val; + generate_exception_if( + idiv_dbl(u, v) || ((int32_t)u[0] != u[0]), + EXC_DE); + dst.val = (int32_t)u[0]; + _regs.edx = (uint32_t)u[1]; + break; +#endif + default: + u[0] = _regs.eax; + u[1] = _regs.edx; + v = src.val; + generate_exception_if(idiv_dbl(u, v), EXC_DE); + dst.val = u[0]; + _regs.edx = u[1]; + break; + } + break; + } default: goto cannot_emulate; } break; - case 0xfe ... 0xff: /* Grp4/Grp5 */ + case 0xfe: /* Grp4 */ + generate_exception_if((modrm_reg & 7) >= 2, EXC_UD); + case 0xff: /* Grp5 */ switch ( modrm_reg & 7 ) { case 0: /* inc */ @@ -1064,6 +1477,20 @@ x86_emulate( break; case 1: /* dec */ emulate_1op("dec", dst, _regs.eflags); + break; + case 2: /* call (near) */ + case 4: /* jmp (near) */ + if ( ((op_bytes = dst.bytes) != 8) && mode_64bit() ) + { + dst.bytes = op_bytes = 8; + if ( (rc = ops->read(dst.mem.seg, dst.mem.off, + &dst.val, 8, ctxt)) != 0 ) + goto done; + } + src.val = _regs.eip; + _regs.eip = dst.val; + if ( (modrm_reg & 7) == 2 ) + goto push; /* call */ break; case 6: /* push */ /* 64-bit mode: PUSH defaults to a 64-bit operand. */ @@ -1074,12 +1501,13 @@ x86_emulate( &dst.val, 8, ctxt)) != 0 ) goto done; } - register_address_increment(_regs.esp, -dst.bytes); - if ( (rc = ops->write(x86_seg_ss, truncate_ea(_regs.esp), + if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(dst.bytes), dst.val, dst.bytes, ctxt)) != 0 ) goto done; - dst.val = dst.orig_val; /* skanky: disable writeback */ - break; + dst.type = OP_NONE; + break; + case 7: + generate_exception_if(1, EXC_UD); default: goto cannot_emulate; } @@ -1087,33 +1515,32 @@ x86_emulate( } writeback: - if ( (d & Mov) || (dst.orig_val != dst.val) ) + switch ( dst.type ) { - switch ( dst.type ) - { - case OP_REG: - /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */ - switch ( dst.bytes ) - { - case 1: *(uint8_t *)dst.reg = (uint8_t)dst.val; break; - case 2: *(uint16_t *)dst.reg = (uint16_t)dst.val; break; - case 4: *dst.reg = (uint32_t)dst.val; break; /* 64b: zero-ext */ - case 8: *dst.reg = dst.val; break; - } - break; - case OP_MEM: - if ( lock_prefix ) - rc = ops->cmpxchg( - dst.mem.seg, dst.mem.off, dst.orig_val, - dst.val, dst.bytes, ctxt); - else - rc = ops->write( - dst.mem.seg, dst.mem.off, dst.val, dst.bytes, ctxt); - if ( rc != 0 ) - goto done; - default: - break; - } + case OP_REG: + /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */ + switch ( dst.bytes ) + { + case 1: *(uint8_t *)dst.reg = (uint8_t)dst.val; break; + case 2: *(uint16_t *)dst.reg = (uint16_t)dst.val; break; + case 4: *dst.reg = (uint32_t)dst.val; break; /* 64b: zero-ext */ + case 8: *dst.reg = dst.val; break; + } + break; + case OP_MEM: + if ( !(d & Mov) && (dst.orig_val == dst.val) ) + /* nothing to do */; + else if ( lock_prefix ) + rc = ops->cmpxchg( + dst.mem.seg, dst.mem.off, dst.orig_val, + dst.val, dst.bytes, ctxt); + else + rc = ops->write( + dst.mem.seg, dst.mem.off, dst.val, dst.bytes, ctxt); + if ( rc != 0 ) + goto done; + default: + break; } /* Commit shadow register state. */ @@ -1123,8 +1550,13 @@ x86_emulate( return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; special_insn: - /* Default action: disable writeback. There may be no dest operand. */ - dst.orig_val = dst.val; + dst.type = OP_NONE; + + /* + * The only implicit-operands instruction allowed a LOCK prefix is + * CMPXCHG{8,16}B. + */ + generate_exception_if(lock_prefix && (b != 0xc7), EXC_GP); if ( twobyte ) goto twobyte_special_insn; @@ -1142,11 +1574,70 @@ x86_emulate( switch ( b ) { + case 0x27: /* daa */ { + uint8_t al = _regs.eax; + unsigned long eflags = _regs.eflags; + generate_exception_if(mode_64bit(), EXC_UD); + _regs.eflags &= ~(EFLG_CF|EFLG_AF); + if ( ((al & 0x0f) > 9) || (eflags & EFLG_AF) ) + { + *(uint8_t *)&_regs.eax += 6; + _regs.eflags |= EFLG_AF; + } + if ( (al > 0x99) || (eflags & EFLG_CF) ) + { + *(uint8_t *)&_regs.eax += 0x60; + _regs.eflags |= EFLG_CF; + } + _regs.eflags &= ~(EFLG_SF|EFLG_ZF|EFLG_PF); + _regs.eflags |= ((uint8_t)_regs.eax == 0) ? EFLG_ZF : 0; + _regs.eflags |= (( int8_t)_regs.eax < 0) ? EFLG_SF : 0; + _regs.eflags |= even_parity(_regs.eax) ? EFLG_PF : 0; + break; + } + + case 0x2f: /* das */ { + uint8_t al = _regs.eax; + unsigned long eflags = _regs.eflags; + generate_exception_if(mode_64bit(), EXC_UD); + _regs.eflags &= ~(EFLG_CF|EFLG_AF); + if ( ((al & 0x0f) > 9) || (eflags & EFLG_AF) ) + { + _regs.eflags |= EFLG_AF; + if ( (al < 6) || (eflags & EFLG_CF) ) + _regs.eflags |= EFLG_CF; + *(uint8_t *)&_regs.eax -= 6; + } + if ( (al > 0x99) || (eflags & EFLG_CF) ) + { + *(uint8_t *)&_regs.eax -= 0x60; + _regs.eflags |= EFLG_CF; + } + _regs.eflags &= ~(EFLG_SF|EFLG_ZF|EFLG_PF); + _regs.eflags |= ((uint8_t)_regs.eax == 0) ? EFLG_ZF : 0; + _regs.eflags |= (( int8_t)_regs.eax < 0) ? EFLG_SF : 0; + _regs.eflags |= even_parity(_regs.eax) ? EFLG_PF : 0; + break; + } + + case 0x37: /* aaa */ + case 0x3f: /* aas */ + generate_exception_if(mode_64bit(), EXC_UD); + _regs.eflags &= ~EFLG_CF; + if ( ((uint8_t)_regs.eax > 9) || (_regs.eflags & EFLG_AF) ) + { + ((uint8_t *)&_regs.eax)[0] += (b == 0x37) ? 6 : -6; + ((uint8_t *)&_regs.eax)[1] += (b == 0x37) ? 1 : -1; + _regs.eflags |= EFLG_CF | EFLG_AF; + } + ((uint8_t *)&_regs.eax)[0] &= 0x0f; + break; + case 0x40 ... 0x4f: /* inc/dec reg */ dst.type = OP_REG; dst.reg = decode_register(b & 7, &_regs, 0); dst.bytes = op_bytes; - dst.orig_val = dst.val = *dst.reg; + dst.val = *dst.reg; if ( b & 8 ) emulate_1op("dec", dst, _regs.eflags); else @@ -1154,16 +1645,9 @@ x86_emulate( break; case 0x50 ... 0x57: /* push reg */ - dst.type = OP_MEM; - dst.bytes = op_bytes; - if ( mode_64bit() && (dst.bytes == 4) ) - dst.bytes = 8; - dst.val = *(unsigned long *)decode_register( + src.val = *(unsigned long *)decode_register( (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0); - register_address_increment(_regs.esp, -dst.bytes); - dst.mem.seg = x86_seg_ss; - dst.mem.off = truncate_ea(_regs.esp); - break; + goto push; case 0x58 ... 0x5f: /* pop reg */ dst.type = OP_REG; @@ -1172,10 +1656,56 @@ x86_emulate( dst.bytes = op_bytes; if ( mode_64bit() && (dst.bytes == 4) ) dst.bytes = 8; - if ( (rc = ops->read(x86_seg_ss, truncate_ea(_regs.esp), + if ( (rc = ops->read(x86_seg_ss, sp_post_inc(dst.bytes), &dst.val, dst.bytes, ctxt)) != 0 ) goto done; - register_address_increment(_regs.esp, dst.bytes); + break; + + case 0x60: /* pusha */ { + int i; + unsigned long regs[] = { + _regs.eax, _regs.ecx, _regs.edx, _regs.ebx, + _regs.esp, _regs.ebp, _regs.esi, _regs.edi }; + generate_exception_if(mode_64bit(), EXC_UD); + for ( i = 0; i < 8; i++ ) + if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes), + regs[i], op_bytes, ctxt)) != 0 ) + goto done; + break; + } + + case 0x61: /* popa */ { + int i; + unsigned long dummy_esp, *regs[] = { + (unsigned long *)&_regs.edi, (unsigned long *)&_regs.esi, + (unsigned long *)&_regs.ebp, (unsigned long *)&dummy_esp, + (unsigned long *)&_regs.ebx, (unsigned long *)&_regs.edx, + (unsigned long *)&_regs.ecx, (unsigned long *)&_regs.eax }; + generate_exception_if(mode_64bit(), EXC_UD); + for ( i = 0; i < 8; i++ ) + if ( (rc = ops->read(x86_seg_ss, sp_post_inc(op_bytes), + regs[i], op_bytes, ctxt)) != 0 ) + goto done; + break; + } + + case 0x68: /* push imm{16,32,64} */ + src.val = ((op_bytes == 2) + ? (int32_t)insn_fetch_type(int16_t) + : insn_fetch_type(int32_t)); + goto push; + + case 0x6a: /* push imm8 */ + src.val = insn_fetch_type(int8_t); + push: + d |= Mov; /* force writeback */ + dst.type = OP_MEM; + dst.bytes = op_bytes; + if ( mode_64bit() && (dst.bytes == 4) ) + dst.bytes = 8; + dst.val = src.val; + dst.mem.seg = x86_seg_ss; + dst.mem.off = sp_pre_dec(dst.bytes); break; case 0x70 ... 0x7f: /* jcc (short) */ { @@ -1196,8 +1726,40 @@ x86_emulate( src.val = *src.reg; dst.reg = decode_register( (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0); - dst.val = dst.orig_val = *dst.reg; + dst.val = *dst.reg; goto xchg; + + case 0x98: /* cbw/cwde/cdqe */ + switch ( op_bytes ) + { + case 2: *(int16_t *)&_regs.eax = (int8_t)_regs.eax; break; /* cbw */ + case 4: _regs.eax = (uint32_t)(int16_t)_regs.eax; break; /* cwde */ + case 8: _regs.eax = (int32_t)_regs.eax; break; /* cdqe */ + } + break; + + case 0x99: /* cwd/cdq/cqo */ + switch ( op_bytes ) + { + case 2: + *(int16_t *)&_regs.edx = ((int16_t)_regs.eax < 0) ? -1 : 0; + break; + case 4: + _regs.edx = (uint32_t)(((int32_t)_regs.eax < 0) ? -1 : 0); + break; + case 8: + _regs.edx = (_regs.eax < 0) ? -1 : 0; + break; + } + break; + + case 0x9e: /* sahf */ + *(uint8_t *)_regs.eflags = (((uint8_t *)&_regs.eax)[1] & 0xd7) | 0x02; + break; + + case 0x9f: /* lahf */ + ((uint8_t *)&_regs.eax)[1] = (_regs.eflags & 0xd7) | 0x02; + break; case 0xa0 ... 0xa1: /* mov mem.offs,{%al,%ax,%eax,%rax} */ /* Source EA is not encoded via ModRM. */ @@ -1253,6 +1815,81 @@ x86_emulate( _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes); break; + case 0xc2: /* ret imm16 (near) */ + case 0xc3: /* ret (near) */ { + int offset = (b == 0xc2) ? insn_fetch_type(uint16_t) : 0; + op_bytes = mode_64bit() ? 8 : op_bytes; + if ( (rc = ops->read(x86_seg_ss, sp_post_inc(op_bytes + offset), + &dst.val, op_bytes, ctxt)) != 0 ) + goto done; + _regs.eip = dst.val; + break; + } + + case 0xd4: /* aam */ { + unsigned int base = insn_fetch_type(uint8_t); + uint8_t al = _regs.eax; + generate_exception_if(mode_64bit(), EXC_UD); + generate_exception_if(base == 0, EXC_DE); + *(uint16_t *)&_regs.eax = ((al / base) << 8) | (al % base); + _regs.eflags &= ~(EFLG_SF|EFLG_ZF|EFLG_PF); + _regs.eflags |= ((uint8_t)_regs.eax == 0) ? EFLG_ZF : 0; + _regs.eflags |= (( int8_t)_regs.eax < 0) ? EFLG_SF : 0; + _regs.eflags |= even_parity(_regs.eax) ? EFLG_PF : 0; + break; + } + + case 0xd5: /* aad */ { + unsigned int base = insn_fetch_type(uint8_t); + uint16_t ax = _regs.eax; + generate_exception_if(mode_64bit(), EXC_UD); + *(uint16_t *)&_regs.eax = (uint8_t)(ax + ((ax >> 8) * base)); + _regs.eflags &= ~(EFLG_SF|EFLG_ZF|EFLG_PF); + _regs.eflags |= ((uint8_t)_regs.eax == 0) ? EFLG_ZF : 0; + _regs.eflags |= (( int8_t)_regs.eax < 0) ? EFLG_SF : 0; + _regs.eflags |= even_parity(_regs.eax) ? EFLG_PF : 0; + break; + } + + case 0xd6: /* salc */ + generate_exception_if(mode_64bit(), EXC_UD); + *(uint8_t *)&_regs.eax = (_regs.eflags & EFLG_CF) ? 0xff : 0x00; + break; + + case 0xd7: /* xlat */ { + unsigned long al = (uint8_t)_regs.eax; + if ( (rc = ops->read(ea.mem.seg, truncate_ea(_regs.ebx + al), + &al, 1, ctxt)) != 0 ) + goto done; + *(uint8_t *)&_regs.eax = al; + break; + } + + case 0xe0 ... 0xe2: /* loop{,z,nz} */ { + int rel = insn_fetch_type(int8_t); + int do_jmp = !(_regs.eflags & EFLG_ZF); /* loopnz */ + if ( b == 0xe1 ) + do_jmp = !do_jmp; /* loopz */ + else if ( b == 0xe2 ) + do_jmp = 1; /* loop */ + switch ( ad_bytes ) + { + case 2: + do_jmp &= --(*(uint16_t *)&_regs.ecx) != 0; + break; + case 4: + do_jmp &= --(*(uint32_t *)&_regs.ecx) != 0; + _regs.ecx = (uint32_t)_regs.ecx; /* zero extend in x86/64 mode */ + break; + default: /* case 8: */ + do_jmp &= --_regs.ecx != 0; + break; + } + if ( do_jmp ) + jmp_rel(rel); + break; + } + case 0xe3: /* jcxz/jecxz (short) */ { int rel = insn_fetch_type(int8_t); if ( (ad_bytes == 2) ? !(uint16_t)_regs.ecx : @@ -1261,12 +1898,26 @@ x86_emulate( break; } - case 0xe9: /* jmp (short) */ + case 0xe8: /* call (near) */ { + int rel = (((op_bytes == 2) && !mode_64bit()) + ? (int32_t)insn_fetch_type(int16_t) + : insn_fetch_type(int32_t)); + op_bytes = mode_64bit() ? 8 : op_bytes; + src.val = _regs.eip; + jmp_rel(rel); + goto push; + } + + case 0xe9: /* jmp (near) */ { + int rel = (((op_bytes == 2) && !mode_64bit()) + ? (int32_t)insn_fetch_type(int16_t) + : insn_fetch_type(int32_t)); + jmp_rel(rel); + break; + } + + case 0xeb: /* jmp (short) */ jmp_rel(insn_fetch_type(int8_t)); - break; - - case 0xeb: /* jmp (near) */ - jmp_rel(insn_fetch_bytes(mode_64bit() ? 4 : op_bytes)); break; case 0xf5: /* cmc */ @@ -1294,9 +1945,14 @@ x86_emulate( twobyte_insn: switch ( b ) { - case 0x40 ... 0x4f: /* cmov */ - dst.val = dst.orig_val = src.val; - d = (d & ~Mov) | (test_cc(b, _regs.eflags) ? Mov : 0); + case 0x40 ... 0x4f: /* cmovcc */ + dst.val = src.val; + if ( !test_cc(b, _regs.eflags) ) + dst.type = OP_NONE; + break; + + case 0x90 ... 0x9f: /* setcc */ + dst.val = test_cc(b, _regs.eflags); break; case 0xb0 ... 0xb1: /* cmpxchg */ @@ -1331,6 +1987,34 @@ x86_emulate( emulate_2op_SrcV_nobyte("bts", src, dst, _regs.eflags); break; + case 0xaf: /* imul */ + _regs.eflags &= ~(EFLG_OF|EFLG_CF); + switch ( dst.bytes ) + { + case 2: + dst.val = ((uint32_t)(int16_t)src.val * + (uint32_t)(int16_t)dst.val); + if ( (int16_t)dst.val != (uint32_t)dst.val ) + _regs.eflags |= EFLG_OF|EFLG_CF; + break; +#ifdef __x86_64__ + case 4: + dst.val = ((uint64_t)(int32_t)src.val * + (uint64_t)(int32_t)dst.val); + if ( (int32_t)dst.val != dst.val ) + _regs.eflags |= EFLG_OF|EFLG_CF; + break; +#endif + default: { + unsigned long m[2] = { src.val, dst.val }; + if ( imul_dbl(m) ) + _regs.eflags |= EFLG_OF|EFLG_CF; + dst.val = m[0]; + break; + } + } + break; + case 0xb6: /* movzx rm8,r{16,32,64} */ /* Recompute DstReg as we may have decoded AH/BH/CH/DH. */ dst.reg = decode_register(modrm_reg, &_regs, 0); @@ -1338,6 +2022,26 @@ x86_emulate( dst.val = (uint8_t)src.val; break; + case 0xbc: /* bsf */ { + int zf; + asm ( "bsf %2,%0; setz %b1" + : "=r" (dst.val), "=q" (zf) + : "r" (src.val), "1" (0) ); + _regs.eflags &= ~EFLG_ZF; + _regs.eflags |= zf ? EFLG_ZF : 0; + break; + } + + case 0xbd: /* bsr */ { + int zf; + asm ( "bsr %2,%0; setz %b1" + : "=r" (dst.val), "=q" (zf) + : "r" (src.val), "1" (0) ); + _regs.eflags &= ~EFLG_ZF; + _regs.eflags |= zf ? EFLG_ZF : 0; + break; + } + case 0xb7: /* movzx rm16,r{16,32,64} */ dst.val = (uint16_t)src.val; break; @@ -1347,12 +2051,13 @@ x86_emulate( break; case 0xba: /* Grp8 */ - switch ( modrm_reg & 3 ) - { - case 0: goto bt; - case 1: goto bts; - case 2: goto btr; - case 3: goto btc; + switch ( modrm_reg & 7 ) + { + case 4: goto bt; + case 5: goto bts; + case 6: goto btr; + case 7: goto btc; + default: generate_exception_if(1, EXC_UD); } break; @@ -1385,10 +2090,13 @@ x86_emulate( { case 0x0d: /* GrpP (prefetch) */ case 0x18: /* Grp16 (prefetch/nop) */ + case 0x19 ... 0x1f: /* nop (amd-defined) */ break; case 0x80 ... 0x8f: /* jcc (near) */ { - int rel = insn_fetch_bytes(mode_64bit() ? 4 : op_bytes); + int rel = (((op_bytes == 2) && !mode_64bit()) + ? (int32_t)insn_fetch_type(int16_t) + : insn_fetch_type(int32_t)); if ( test_cc(b, _regs.eflags) ) jmp_rel(rel); break; @@ -1398,6 +2106,7 @@ x86_emulate( #if defined(__i386__) { unsigned long old_lo, old_hi; + generate_exception_if((modrm_reg & 7) != 1, EXC_UD); if ( (rc = ops->read(ea.mem.seg, ea.mem.off+0, &old_lo, 4, ctxt)) || (rc = ops->read(ea.mem.seg, ea.mem.off+4, &old_hi, 4, ctxt)) ) goto done; @@ -1424,6 +2133,7 @@ x86_emulate( #elif defined(__x86_64__) { unsigned long old, new; + generate_exception_if((modrm_reg & 7) != 1, EXC_UD); if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &old, 8, ctxt)) != 0 ) goto done; if ( ((uint32_t)(old>>0) != (uint32_t)_regs.eax) || @@ -1444,6 +2154,37 @@ x86_emulate( break; } #endif + + case 0xc8 ... 0xcf: /* bswap */ + dst.type = OP_REG; + dst.reg = decode_register(b & 7, &_regs, 0); + dst.val = *dst.reg; + switch ( dst.bytes = op_bytes ) + { + case 2: + dst.val = (((dst.val & 0x00FFUL) << 8) | + ((dst.val & 0xFF00UL) >> 8)); + break; + case 4: + dst.val = (((dst.val & 0x000000FFUL) << 24) | + ((dst.val & 0x0000FF00UL) << 8) | + ((dst.val & 0x00FF0000UL) >> 8) | + ((dst.val & 0xFF000000UL) >> 24)); + break; +#ifdef __x86_64__ + case 8: + dst.val = (((dst.val & 0x00000000000000FFUL) << 56) | + ((dst.val & 0x000000000000FF00UL) << 40) | + ((dst.val & 0x0000000000FF0000UL) << 24) | + ((dst.val & 0x00000000FF000000UL) << 8) | + ((dst.val & 0x000000FF00000000UL) >> 8) | + ((dst.val & 0x0000FF0000000000UL) >> 24) | + ((dst.val & 0x00FF000000000000UL) >> 40) | + ((dst.val & 0xFF00000000000000UL) >> 56)); + break; +#endif + } + break; } goto writeback; diff -r 3464bb656a9c -r 8475a4e0425e xen/common/elf.c --- a/xen/common/elf.c Thu Jan 18 09:54:33 2007 +0000 +++ b/xen/common/elf.c Thu Jan 18 15:18:07 2007 +0000 @@ -300,7 +300,7 @@ int parseelfimage(struct domain_setup_in if ( dsi->__elfnote_section ) { p = xen_elfnote_string(dsi, XEN_ELFNOTE_PAE_MODE); - if ( p != NULL && strncmp(p, "bimodal", 7) == 0 ) + if ( p != NULL && strstr(p, "bimodal") != NULL ) dsi->pae_kernel = PAEKERN_bimodal; else if ( p != NULL && strncmp(p, "yes", 3) == 0 ) dsi->pae_kernel = PAEKERN_extended_cr3; diff -r 3464bb656a9c -r 8475a4e0425e xen/common/kexec.c --- a/xen/common/kexec.c Thu Jan 18 09:54:33 2007 +0000 +++ b/xen/common/kexec.c Thu Jan 18 15:18:07 2007 +0000 @@ -26,36 +26,34 @@ typedef long ret_t; -DEFINE_PER_CPU (crash_note_t, crash_notes); -cpumask_t crash_saved_cpus; - -xen_kexec_image_t kexec_image[KEXEC_IMAGE_NR]; +#define ELFNOTE_ALIGN(_n_) (((_n_)+3)&~3) _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |