[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [XenPPC] [xenppc-unstable] branch merge with xen-unstable.hg
# HG changeset patch # User Jimi Xenidis <jimix@xxxxxxxxxxxxxx> # Node ID 3f6a2745b3a3b40bcdd51f9111b0993bad2c7ec6 # Parent 5568efb41da42a55318fa05d3ce0aa73e774e6d1 # Parent fd2667419c53ce2555c799acf3e84dd25912bcb5 branch merge with xen-unstable.hg --- linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/kmap_types.h | 32 patches/linux-2.6.16.33/ipv6-no-autoconf.patch | 18 tools/xm-test/lib/XmTestLib/XenManagedDomain.py | 177 - .hgignore | 6 Config.mk | 2 buildconfigs/linux-defconfig_xen0_x86_32 | 1 buildconfigs/linux-defconfig_xenU_x86_32 | 1 buildconfigs/linux-defconfig_xen_x86_32 | 1 buildconfigs/mk.linux-2.6-xen | 31 config/x86_64.mk | 1 docs/xen-api/wire-protocol.tex | 30 docs/xen-api/xenapi-datamodel.tex | 137 extras/mini-os/Makefile | 24 extras/mini-os/gnttab.c | 36 extras/mini-os/include/hypervisor.h | 1 extras/mini-os/include/netfront.h | 2 extras/mini-os/include/x86/x86_32/hypercall-x86_32.h | 8 extras/mini-os/include/x86/x86_64/hypercall-x86_64.h | 8 extras/mini-os/include/xenbus.h | 3 extras/mini-os/kernel.c | 11 extras/mini-os/netfront.c | 455 +++ extras/mini-os/xenbus/xenbus.c | 86 linux-2.6-xen-sparse/arch/i386/Kconfig | 2 linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c | 19 linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c | 46 linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c | 74 linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c | 4 linux-2.6-xen-sparse/arch/i386/mm/highmem-xen.c | 1 linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c | 30 linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c | 5 linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c | 49 linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c | 16 linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c | 6 linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c | 32 linux-2.6-xen-sparse/drivers/xen/blktap/Makefile | 4 linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c | 32 linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c | 4 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/dma-mapping.h | 6 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgalloc.h | 2 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/swiotlb.h | 4 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgalloc.h | 85 linux-2.6-xen-sparse/kernel/kexec.c | 8 linux-2.6-xen-sparse/net/core/dev.c | 3 patches/linux-2.6.16.33/series | 1 patches/linux-2.6.16.33/vsnprintf.patch | 1 tools/check/check_udev | 4 tools/examples/blktap | 2 tools/examples/block | 2 tools/examples/block-enbd | 2 tools/examples/block-nbd | 2 tools/examples/external-device-migrate | 2 tools/examples/network-bridge | 2 tools/examples/network-nat | 2 tools/examples/network-route | 2 tools/examples/vif-bridge | 2 tools/examples/vif-common.sh | 8 tools/examples/vif-nat | 2 tools/examples/vif-route | 2 tools/examples/vtpm | 2 tools/examples/vtpm-common.sh | 4 tools/examples/vtpm-delete | 2 tools/examples/xen-backend.agent | 2 tools/examples/xen-hotplug-cleanup | 2 tools/examples/xen-network-common.sh | 5 tools/examples/xmexample1 | 34 tools/examples/xmexample2 | 34 tools/examples/xmexample3 | 34 tools/firmware/rombios/rombios.c | 482 ++- tools/ioemu/hw/pc.c | 2 tools/ioemu/target-i386-dm/helper2.c | 21 tools/ioemu/vl.c | 2 tools/libxc/xc_hvm_build.c | 3 tools/libxc/xc_linux_build.c | 4 tools/libxc/xc_linux_restore.c | 207 + tools/libxc/xc_linux_save.c | 34 tools/libxc/xc_load_elf.c | 14 tools/libxc/xc_ptrace.c | 18 tools/libxc/xg_save_restore.h | 9 tools/libxen/include/xen_cpu_feature.h | 4 tools/libxen/src/xen_common.c | 48 tools/libxen/src/xen_cpu_feature.c | 4 tools/pygrub/src/pygrub | 280 +- tools/python/scripts/xapi.py | 95 tools/python/xen/xend/XendAPI.py | 10 tools/python/xen/xend/XendAPIConstants.py | 3 tools/python/xen/xend/XendBootloader.py | 6 tools/python/xen/xend/XendCheckpoint.py | 8 tools/python/xen/xend/XendConfig.py | 57 tools/python/xen/xend/XendConstants.py | 1 tools/python/xen/xend/XendDomain.py | 8 tools/python/xen/xend/XendDomainInfo.py | 57 tools/python/xen/xend/XendNode.py | 2 tools/python/xen/xend/osdep.py | 5 tools/python/xen/xend/server/SrvDaemon.py | 2 tools/python/xen/xend/server/blkif.py | 1 tools/python/xen/xend/server/netif.py | 8 tools/python/xen/xend/server/vfbif.py | 2 tools/python/xen/xm/create.py | 25 tools/python/xen/xm/main.py | 44 tools/python/xen/xm/migrate.py | 1 tools/python/xen/xm/opts.py | 8 tools/python/xen/xm/shutdown.py | 1 tools/tests/Makefile | 13 tools/tests/blowfish.c | 439 +++ tools/tests/blowfish.mk | 23 tools/tests/test_x86_emulator.c | 193 + tools/xenstat/xentop/xentop.c | 2 tools/xm-test/README | 43 tools/xm-test/configure.ac | 1 tools/xm-test/grouptest/xapi | 1 tools/xm-test/lib/XmTestLib/DomainTracking.py | 61 tools/xm-test/lib/XmTestLib/XenAPIDomain.py | 176 + tools/xm-test/lib/XmTestLib/XenDomain.py | 28 tools/xm-test/lib/XmTestLib/Xm.py | 2 tools/xm-test/lib/XmTestLib/xapi.py | 79 tools/xm-test/ramdisk/Makefile.am | 13 tools/xm-test/ramdisk/skel/etc/init.d/rcS | 11 tools/xm-test/runtest.sh | 8 tools/xm-test/tests/block-destroy/06_block-destroy_check_list_pos.py | 8 tools/xm-test/tests/sched-credit/01_sched_credit_weight_cap_pos.py | 33 tools/xm-test/tests/vtpm/09_vtpm-xapi.py | 99 tools/xm-test/tests/xapi/01_xapi-vm_basic.py | 61 tools/xm-test/tests/xapi/Makefile.am | 19 unmodified_drivers/linux-2.6/platform-pci/evtchn.c | 6 unmodified_drivers/linux-2.6/platform-pci/platform-pci.c | 34 unmodified_drivers/linux-2.6/platform-pci/platform-pci.h | 4 xen/Makefile | 12 xen/Rules.mk | 1 xen/arch/ia64/linux-xen/unaligned.c | 2 xen/arch/ia64/xen/domain.c | 20 xen/arch/ia64/xen/xenmisc.c | 24 xen/arch/powerpc/domain.c | 6 xen/arch/powerpc/domctl.c | 6 xen/arch/powerpc/setup.c | 13 xen/arch/powerpc/xen.lds.S | 4 xen/arch/x86/boot/x86_32.S | 2 xen/arch/x86/boot/x86_64.S | 29 xen/arch/x86/compat.c | 14 xen/arch/x86/crash.c | 3 xen/arch/x86/domain.c | 691 ++++- xen/arch/x86/domain_build.c | 225 + xen/arch/x86/domctl.c | 113 xen/arch/x86/e820.c | 35 xen/arch/x86/hvm/hpet.c | 2 xen/arch/x86/hvm/hvm.c | 2 xen/arch/x86/hvm/instrlen.c | 32 xen/arch/x86/hvm/intercept.c | 2 xen/arch/x86/hvm/irq.c | 186 - xen/arch/x86/hvm/platform.c | 22 xen/arch/x86/hvm/svm/svm.c | 12 xen/arch/x86/hvm/vioapic.c | 11 xen/arch/x86/hvm/vmx/vmcs.c | 9 xen/arch/x86/hvm/vmx/vmx.c | 148 - xen/arch/x86/hvm/vmx/x86_32/exits.S | 33 xen/arch/x86/hvm/vmx/x86_64/exits.S | 29 xen/arch/x86/irq.c | 12 xen/arch/x86/mm.c | 277 +- xen/arch/x86/mm/shadow/common.c | 41 xen/arch/x86/mm/shadow/multi.c | 78 xen/arch/x86/mm/shadow/private.h | 5 xen/arch/x86/oprofile/nmi_int.c | 2 xen/arch/x86/physdev.c | 13 xen/arch/x86/platform_hypercall.c | 19 xen/arch/x86/setup.c | 41 xen/arch/x86/sysctl.c | 8 xen/arch/x86/time.c | 12 xen/arch/x86/traps.c | 358 +- xen/arch/x86/x86_32/mm.c | 12 xen/arch/x86/x86_32/traps.c | 6 xen/arch/x86/x86_32/xen.lds.S | 17 xen/arch/x86/x86_64/Makefile | 19 xen/arch/x86/x86_64/asm-offsets.c | 39 xen/arch/x86/x86_64/compat.c | 30 xen/arch/x86/x86_64/compat/entry.S | 365 ++ xen/arch/x86/x86_64/compat/mm.c | 337 ++ xen/arch/x86/x86_64/compat/traps.c | 338 ++ xen/arch/x86/x86_64/domain.c | 68 xen/arch/x86/x86_64/domctl.c | 111 xen/arch/x86/x86_64/entry.S | 29 xen/arch/x86/x86_64/mm.c | 93 xen/arch/x86/x86_64/physdev.c | 48 xen/arch/x86/x86_64/platform_hypercall.c | 29 xen/arch/x86/x86_64/sysctl.c | 33 xen/arch/x86/x86_64/traps.c | 19 xen/arch/x86/x86_64/xen.lds.S | 17 xen/arch/x86/x86_emulate.c | 1376 ++++++++-- xen/common/Makefile | 12 xen/common/acm_ops.c | 21 xen/common/compat/Makefile | 13 xen/common/compat/acm_ops.c | 47 xen/common/compat/domain.c | 91 xen/common/compat/domctl.c | 137 xen/common/compat/grant_table.c | 218 + xen/common/compat/kernel.c | 59 xen/common/compat/kexec.c | 33 xen/common/compat/memory.c | 364 ++ xen/common/compat/multicall.c | 31 xen/common/compat/schedule.c | 51 xen/common/compat/sysctl.c | 95 xen/common/compat/xenoprof.c | 40 xen/common/compat/xlat.c | 73 xen/common/domain.c | 53 xen/common/domctl.c | 98 xen/common/elf.c | 4 xen/common/elf32.c | 19 xen/common/event_channel.c | 21 xen/common/grant_table.c | 4 xen/common/kernel.c | 24 xen/common/kexec.c | 194 - xen/common/keyhandler.c | 26 xen/common/lib.c | 644 ++-- xen/common/memory.c | 22 xen/common/multicall.c | 5 xen/common/schedule.c | 38 xen/common/symbols.c | 9 xen/common/sysctl.c | 18 xen/common/trace.c | 78 xen/common/xencomm.c | 4 xen/common/xenoprof.c | 70 xen/drivers/video/vga.c | 11 xen/include/Makefile | 73 xen/include/asm-ia64/init.h | 25 xen/include/asm-ia64/shared.h | 4 xen/include/asm-powerpc/init.h | 19 xen/include/asm-powerpc/shared.h | 4 xen/include/asm-x86/compat.h | 8 xen/include/asm-x86/config.h | 63 xen/include/asm-x86/desc.h | 99 xen/include/asm-x86/domain.h | 5 xen/include/asm-x86/event.h | 14 xen/include/asm-x86/guest_access.h | 18 xen/include/asm-x86/hvm/hvm.h | 2 xen/include/asm-x86/hvm/irq.h | 16 xen/include/asm-x86/hypercall.h | 20 xen/include/asm-x86/init.h | 25 xen/include/asm-x86/ldt.h | 3 xen/include/asm-x86/mm.h | 26 xen/include/asm-x86/multicall.h | 25 xen/include/asm-x86/page.h | 5 xen/include/asm-x86/processor.h | 6 xen/include/asm-x86/regs.h | 3 xen/include/asm-x86/shadow.h | 10 xen/include/asm-x86/shared.h | 78 xen/include/asm-x86/x86_32/kexec.h | 1 xen/include/asm-x86/x86_32/page-2level.h | 2 xen/include/asm-x86/x86_32/page-3level.h | 2 xen/include/asm-x86/x86_32/regs.h | 2 xen/include/asm-x86/x86_32/uaccess.h | 2 xen/include/asm-x86/x86_64/kexec.h | 1 xen/include/asm-x86/x86_64/page.h | 7 xen/include/asm-x86/x86_64/regs.h | 7 xen/include/asm-x86/x86_64/uaccess.h | 15 xen/include/asm-x86/x86_emulate.h | 43 xen/include/public/arch-x86/xen-x86_64.h | 5 xen/include/public/arch-x86/xen.h | 10 xen/include/public/domctl.h | 5 xen/include/public/elfnote.h | 9 xen/include/public/hvm/ioreq.h | 5 xen/include/public/hvm/params.h | 22 xen/include/public/xen.h | 4 xen/include/public/xenoprof.h | 2 xen/include/xen/compat.h | 180 + xen/include/xen/domain.h | 17 xen/include/xen/elf.h | 9 xen/include/xen/elfcore.h | 57 xen/include/xen/hypercall.h | 19 xen/include/xen/init.h | 19 xen/include/xen/kernel.h | 17 xen/include/xen/multicall.h | 10 xen/include/xen/perfc.h | 3 xen/include/xen/sched.h | 43 xen/include/xen/shared.h | 54 xen/include/xen/symbols.h | 5 xen/include/xen/xenoprof.h | 26 xen/include/xlat.lst | 52 xen/tools/compat-build-header.py | 21 xen/tools/compat-build-source.py | 27 xen/tools/get-fields.sh | 432 +++ xen/tools/symbols.c | 479 --- 279 files changed, 11480 insertions(+), 3343 deletions(-) diff -r 5568efb41da4 -r 3f6a2745b3a3 .hgignore --- a/.hgignore Mon Jan 15 13:27:20 2007 -0500 +++ b/.hgignore Wed Jan 17 09:56:40 2007 -0500 @@ -20,6 +20,7 @@ ^\.config$ ^TAGS$ ^tags$ +^build.*$ ^dist/.*$ ^docs/.*\.aux$ ^docs/.*\.dvi$ @@ -57,7 +58,7 @@ ^docs/xen-api/xenapi-datamodel-graph.eps$ ^extras/mini-os/h/hypervisor-ifs$ ^extras/mini-os/h/xen-public$ -^extras/mini-os/mini-os\..*$ +^extras/mini-os/mini-os.*$ ^install/.*$ ^linux-[^/]*-native/.*$ ^linux-[^/]*-xen/.*$ @@ -141,6 +142,8 @@ ^tools/python/build/.*$ ^tools/security/secpol_tool$ ^tools/security/xen/.*$ +^tools/tests/blowfish\.bin$ +^tools/tests/blowfish\.h$ ^tools/tests/test_x86_emulator$ ^tools/vnet/Make.local$ ^tools/vnet/build/.*$ @@ -207,6 +210,7 @@ ^xen/ddb/.*$ ^xen/include/asm$ ^xen/include/asm-.*/asm-offsets\.h$ +^xen/include/compat/.*$ ^xen/include/hypervisor-ifs/arch$ ^xen/include/public/public$ ^xen/include/xen/.*\.new$ diff -r 5568efb41da4 -r 3f6a2745b3a3 Config.mk --- a/Config.mk Mon Jan 15 13:27:20 2007 -0500 +++ b/Config.mk Wed Jan 17 09:56:40 2007 -0500 @@ -10,6 +10,8 @@ XEN_OS ?= $(shell uname -s) XEN_OS ?= $(shell uname -s) CONFIG_$(XEN_OS) := y + +SHELL ?= /bin/sh # Tools to run on system hosting the build HOSTCC = gcc diff -r 5568efb41da4 -r 3f6a2745b3a3 buildconfigs/linux-defconfig_xen0_x86_32 --- a/buildconfigs/linux-defconfig_xen0_x86_32 Mon Jan 15 13:27:20 2007 -0500 +++ b/buildconfigs/linux-defconfig_xen0_x86_32 Wed Jan 17 09:56:40 2007 -0500 @@ -172,6 +172,7 @@ CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_FLAT_NODE_MEM_MAP=y # CONFIG_SPARSEMEM_STATIC is not set CONFIG_SPLIT_PTLOCK_CPUS=4096 +# CONFIG_HIGHPTE is not set CONFIG_MTRR=y # CONFIG_REGPARM is not set CONFIG_SECCOMP=y diff -r 5568efb41da4 -r 3f6a2745b3a3 buildconfigs/linux-defconfig_xenU_x86_32 --- a/buildconfigs/linux-defconfig_xenU_x86_32 Mon Jan 15 13:27:20 2007 -0500 +++ b/buildconfigs/linux-defconfig_xenU_x86_32 Wed Jan 17 09:56:40 2007 -0500 @@ -172,6 +172,7 @@ CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_FLAT_NODE_MEM_MAP=y # CONFIG_SPARSEMEM_STATIC is not set CONFIG_SPLIT_PTLOCK_CPUS=4096 +# CONFIG_HIGHPTE is not set # CONFIG_REGPARM is not set CONFIG_SECCOMP=y CONFIG_HZ_100=y diff -r 5568efb41da4 -r 3f6a2745b3a3 buildconfigs/linux-defconfig_xen_x86_32 --- a/buildconfigs/linux-defconfig_xen_x86_32 Mon Jan 15 13:27:20 2007 -0500 +++ b/buildconfigs/linux-defconfig_xen_x86_32 Wed Jan 17 09:56:40 2007 -0500 @@ -180,6 +180,7 @@ CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_FLAT_NODE_MEM_MAP=y # CONFIG_SPARSEMEM_STATIC is not set CONFIG_SPLIT_PTLOCK_CPUS=4096 +# CONFIG_HIGHPTE is not set CONFIG_MTRR=y CONFIG_REGPARM=y CONFIG_SECCOMP=y diff -r 5568efb41da4 -r 3f6a2745b3a3 buildconfigs/mk.linux-2.6-xen --- a/buildconfigs/mk.linux-2.6-xen Mon Jan 15 13:27:20 2007 -0500 +++ b/buildconfigs/mk.linux-2.6-xen Wed Jan 17 09:56:40 2007 -0500 @@ -3,7 +3,8 @@ LINUX_VER = 2.6.16.33 EXTRAVERSION ?= xen -LINUX_DIR = linux-$(LINUX_VER)-$(EXTRAVERSION) +LINUX_SRCDIR = linux-$(LINUX_VER)-xen +LINUX_DIR = build-linux-$(LINUX_VER)-$(EXTRAVERSION)_$(XEN_TARGET_ARCH) IMAGE_TARGET ?= vmlinuz INSTALL_BOOT_PATH ?= $(DESTDIR) @@ -23,24 +24,31 @@ build: $(LINUX_DIR)/include/linux/autoco mkdir -p $(INSTALL_BOOT_PATH) $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) INSTALL_PATH=$(INSTALL_BOOT_PATH) install -$(LINUX_DIR)/include/linux/autoconf.h: ref-linux-$(LINUX_VER)/.valid-ref - rm -rf $(LINUX_DIR) - cp -al $(<D) $(LINUX_DIR) +$(LINUX_SRCDIR)/.valid-src: ref-linux-$(LINUX_VER)/.valid-ref + rm -rf $(LINUX_SRCDIR) + cp -al $(<D) $(LINUX_SRCDIR) # Apply arch-xen patches ( cd linux-$(LINUX_SERIES)-xen-sparse && \ - LINUX_ARCH=$(LINUX_ARCH) bash ./mkbuildtree ../$(LINUX_DIR) ) + LINUX_ARCH=$(LINUX_ARCH) bash ./mkbuildtree ../$(LINUX_SRCDIR) ) + # Patch kernel Makefile to set EXTRAVERSION + ( cd $(LINUX_SRCDIR) ; \ + sed -e 's,^EXTRAVERSION.*,&$$(XENGUEST),' \ + -e 's,^KERNELRELEASE,XENGUEST := $$(shell [ -r $$(objtree)/.xenguest ] \&\& cat $$(objtree)/.xenguest)\n&,' Makefile >Mk.tmp ; \ + rm -f Makefile ; mv Mk.tmp Makefile ) + touch $@ + +$(LINUX_DIR)/include/linux/autoconf.h: $(LINUX_SRCDIR)/.valid-src + rm -rf $(LINUX_DIR) + mkdir -p $(LINUX_DIR) # Re-use config from install dir if one exits else use default config - CONFIG_VERSION=$$(sed -ne 's/^EXTRAVERSION = //p' $(LINUX_DIR)/Makefile); \ + CONFIG_VERSION=$$(sed -ne 's/$$(XENGUEST)//; s/^EXTRAVERSION = //p' $(LINUX_SRCDIR)/Makefile); \ [ -r $(DESTDIR)/boot/config-$(LINUX_VER3)$$CONFIG_VERSION-$(EXTRAVERSION) ] && \ cp $(DESTDIR)/boot/config-$(LINUX_VER3)$$CONFIG_VERSION-$(EXTRAVERSION) $(LINUX_DIR)/.config \ || sh buildconfigs/create_config.sh $(LINUX_DIR)/.config $(EXTRAVERSION) $(XEN_TARGET_ARCH) $(XEN_SYSTYPE) # See if we need to munge config to enable PAE $(MAKE) CONFIG_FILE=$(LINUX_DIR)/.config -f buildconfigs/Rules.mk config-update-pae - # Patch kernel Makefile to set EXTRAVERSION - ( cd $(LINUX_DIR) ; \ - sed -e 's/^EXTRAVERSION.*/&$$(XENGUEST)\nXENGUEST = -$(EXTRAVERSION)/' Makefile >Mk.tmp ; \ - rm -f Makefile ; mv Mk.tmp Makefile ) - $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) oldconfig + echo "-$(EXTRAVERSION)" >$(LINUX_DIR)/.xenguest + $(MAKE) -C $(LINUX_SRCDIR) ARCH=$(LINUX_ARCH) oldconfig O=$$(/bin/pwd)/$(LINUX_DIR) .PHONY: prep prep: $(LINUX_DIR)/include/linux/autoconf.h @@ -62,4 +70,5 @@ delete: .PHONY: mrpropper mrpropper: + rm -rf $(LINUX_SRCDIR) rm -f linux-$(LINUX_VER).tar.bz2 diff -r 5568efb41da4 -r 3f6a2745b3a3 config/x86_64.mk --- a/config/x86_64.mk Mon Jan 15 13:27:20 2007 -0500 +++ b/config/x86_64.mk Wed Jan 17 09:56:40 2007 -0500 @@ -2,6 +2,7 @@ CONFIG_X86_64 := y CONFIG_X86_64 := y CONFIG_X86_$(XEN_OS) := y +CONFIG_COMPAT := y CONFIG_HVM := y CONFIG_MIGRATE := y CONFIG_XCUTILS := y diff -r 5568efb41da4 -r 3f6a2745b3a3 docs/xen-api/wire-protocol.tex --- a/docs/xen-api/wire-protocol.tex Mon Jan 15 13:27:20 2007 -0500 +++ b/docs/xen-api/wire-protocol.tex Wed Jan 17 09:56:40 2007 -0500 @@ -149,16 +149,16 @@ The XML-RPC interface is session-based; The XML-RPC interface is session-based; before you can make arbitrary RPC calls you must login and initiate a session. For example: \begin{verbatim} - session_id Session.login_with_password(string uname, string pwd) + session_id session.login_with_password(string uname, string pwd) \end{verbatim} Where {\tt uname} and {\tt password} refer to your username and password respectively, as defined by the Xen administrator. -The {\tt session\_id} returned by {\tt Session.Login} is passed to subequent -RPC calls as an authentication token. - -A session can be terminated with the {\tt Session.Logout} function: -\begin{verbatim} - void Session.Logout(session_id session) +The {\tt session\_id} returned by {\tt session.login\_with\_password} is passed +to subequent RPC calls as an authentication token. + +A session can be terminated with the {\tt session.logout} function: +\begin{verbatim} + void session.logout(session_id session) \end{verbatim} \subsection{Synchronous and Asynchronous invocation} @@ -251,14 +251,20 @@ call takes the session token as the only '2045dbc0-0734-4eea-9cb2-b8218c6b5bf2', '3202ae18-a046-4c32-9fda-e32e9631866e'] \end{verbatim} -Note the VM references are internally UUIDs. Once a reference to a VM has been acquired a lifecycle operation may be invoked: +The VM references here are UUIDs, though they may not be that simple in the +future, and you should treat them as opaque strings. Once a reference to a VM +has been acquired a lifecycle operation may be invoked: \begin{verbatim} >>> xen.VM.start(session, all_vms[3], False) -{'Status': 'Failure', 'ErrorDescription': 'Operation not implemented'} -\end{verbatim} - -In this case the {\tt start} message has not been implemented and an error response has been returned. Currently these high-level errors are returned as structured data (rather than as XMLRPC faults), allowing for internationalised errors in future. Finally, here are some examples of using accessors for object fields: +{'Status': 'Failure', 'ErrorDescription': ['VM_BAD_POWER_STATE', 'Halted', 'Running']} +\end{verbatim} + +In this case the {\tt start} message has been rejected, because the VM is +already running, and so an error response has been returned. These high-level +errors are returned as structured data (rather than as XML-RPC faults), +allowing them to be internationalised. Finally, here are some examples of +using accessors for object fields: \begin{verbatim} >>> xen.VM.get_name_label(session, all_vms[3])['Value'] diff -r 5568efb41da4 -r 3f6a2745b3a3 docs/xen-api/xenapi-datamodel.tex --- a/docs/xen-api/xenapi-datamodel.tex Mon Jan 15 13:27:20 2007 -0500 +++ b/docs/xen-api/xenapi-datamodel.tex Wed Jan 17 09:56:40 2007 -0500 @@ -184,8 +184,8 @@ The following enumeration types are used \hspace{0.5cm}{\tt NX} & Execute Disable \\ \hspace{0.5cm}{\tt MMXEXT} & AMD MMX extensions \\ \hspace{0.5cm}{\tt LM} & Long Mode (x86-64) \\ -\hspace{0.5cm}{\tt 3DNOWEXT} & AMD 3DNow! extensions \\ -\hspace{0.5cm}{\tt 3DNOW} & 3DNow! \\ +\hspace{0.5cm}{\tt THREEDNOWEXT} & AMD 3DNow! extensions \\ +\hspace{0.5cm}{\tt THREEDNOW} & 3DNow! \\ \hspace{0.5cm}{\tt RECOVERY} & CPU in recovery mode \\ \hspace{0.5cm}{\tt LONGRUN} & Longrun power control \\ \hspace{0.5cm}{\tt LRTI} & LongRun table interface \\ @@ -286,6 +286,7 @@ Quals & Field & Type & Description \\ $\mathit{RO}_\mathit{run}$ & {\tt uuid} & string & unique identifier/object reference \\ $\mathit{RO}_\mathit{ins}$ & {\tt this\_host} & host ref & Currently connected host \\ $\mathit{RO}_\mathit{ins}$ & {\tt this\_user} & user ref & Currently connected user \\ +$\mathit{RO}_\mathit{run}$ & {\tt last\_active} & int & Timestamp for last time session was active \\ \hline \end{longtable} \subsection{Additional RPCs associated with class: session} @@ -440,45 +441,13 @@ value of the field \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} -\subsubsection{RPC name:~create} - -{\bf Overview:} -Create a new session instance, and return its handle. - - \noindent {\bf Signature:} -\begin{verbatim} (session ref) create (session_id s, session record args)\end{verbatim} - - -\noindent{\bf Arguments:} - - -\vspace{0.3cm} -\begin{tabular}{|c|c|p{7cm}|} - \hline -{\bf type} & {\bf name} & {\bf description} \\ \hline -{\tt session record } & args & All constructor arguments \\ \hline - -\end{tabular} - -\vspace{0.3cm} - - \noindent {\bf Return Type:} -{\tt -session ref -} - - -reference to the newly created object -\vspace{0.3cm} -\vspace{0.3cm} -\vspace{0.3cm} -\subsubsection{RPC name:~destroy} - -{\bf Overview:} -Destroy the specified session instance. - - \noindent {\bf Signature:} -\begin{verbatim} void destroy (session_id s, session ref self)\end{verbatim} +\subsubsection{RPC name:~get\_last\_active} + +{\bf Overview:} +Get the last\_active field of the given session. + + \noindent {\bf Signature:} +\begin{verbatim} int get_last_active (session_id s, session ref self)\end{verbatim} \noindent{\bf Arguments:} @@ -496,11 +465,11 @@ Destroy the specified session instance. \noindent {\bf Return Type:} {\tt -void -} - - - +int +} + + +value of the field \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} @@ -596,7 +565,7 @@ Quals & Field & Type & Description \\ \subsubsection{RPC name:~get\_all} {\bf Overview:} -Return a list of all the tasks known to the system +Return a list of all the tasks known to the system. \noindent {\bf Signature:} \begin{verbatim} ((task ref) Set) get_all (session_id s)\end{verbatim} @@ -999,70 +968,6 @@ string Set value of the field -\vspace{0.3cm} -\vspace{0.3cm} -\vspace{0.3cm} -\subsubsection{RPC name:~create} - -{\bf Overview:} -Create a new task instance, and return its handle. - - \noindent {\bf Signature:} -\begin{verbatim} (task ref) create (session_id s, task record args)\end{verbatim} - - -\noindent{\bf Arguments:} - - -\vspace{0.3cm} -\begin{tabular}{|c|c|p{7cm}|} - \hline -{\bf type} & {\bf name} & {\bf description} \\ \hline -{\tt task record } & args & All constructor arguments \\ \hline - -\end{tabular} - -\vspace{0.3cm} - - \noindent {\bf Return Type:} -{\tt -task ref -} - - -reference to the newly created object -\vspace{0.3cm} -\vspace{0.3cm} -\vspace{0.3cm} -\subsubsection{RPC name:~destroy} - -{\bf Overview:} -Destroy the specified task instance. - - \noindent {\bf Signature:} -\begin{verbatim} void destroy (session_id s, task ref self)\end{verbatim} - - -\noindent{\bf Arguments:} - - -\vspace{0.3cm} -\begin{tabular}{|c|c|p{7cm}|} - \hline -{\bf type} & {\bf name} & {\bf description} \\ \hline -{\tt task ref } & self & reference to the object \\ \hline - -\end{tabular} - -\vspace{0.3cm} - - \noindent {\bf Return Type:} -{\tt -void -} - - - \vspace{0.3cm} \vspace{0.3cm} \vspace{0.3cm} @@ -10575,6 +10480,16 @@ expected parameters are returned. \begin{verbatim}MESSAGE_PARAMETER_COUNT_MISMATCH(method, expected, received)\end{verbatim} \begin{center}\rule{10em}{0.1pt}\end{center} +\subsubsection{NETWORK\_ALREADY\_CONNECTED} + +You tried to create a PIF, but the network you tried to attach it to is +already attached to some other PIF, and so the creation failed. + +\vspace{0.3cm} +{\bf Signature:} +\begin{verbatim}NETWORK_ALREADY_CONNECTED(network, connected PIF)\end{verbatim} +\begin{center}\rule{10em}{0.1pt}\end{center} + \subsubsection{SESSION\_AUTHENTICATION\_FAILED} The credentials given by the user are incorrect, so access has been denied, diff -r 5568efb41da4 -r 3f6a2745b3a3 extras/mini-os/Makefile --- a/extras/mini-os/Makefile Mon Jan 15 13:27:20 2007 -0500 +++ b/extras/mini-os/Makefile Wed Jan 17 09:56:40 2007 -0500 @@ -7,7 +7,7 @@ include $(XEN_ROOT)/Config.mk # Set TARGET_ARCH override TARGET_ARCH := $(XEN_TARGET_ARCH) -XEN_INTERFACE_VERSION := 0x00030203 +XEN_INTERFACE_VERSION := 0x00030204 # NB. '-Wcast-qual' is nasty, so I omitted it. CFLAGS := -fno-builtin -Wall -Werror -Wredundant-decls -Wno-format @@ -17,7 +17,13 @@ ASFLAGS = -D__ASSEMBLY__ ASFLAGS = -D__ASSEMBLY__ LDLIBS = -L. -lminios -LDFLAGS := -N -T minios-$(TARGET_ARCH).lds +LDFLAGS_FINAL := -N -T minios-$(TARGET_ARCH).lds +LDFLAGS := + +# Prefix for global API names. All other symbols are localised before +# linking with EXTRA_OBJS. +GLOBAL_PREFIX := xenos_ +EXTRA_OBJS = # For possible special source directories. EXTRA_SRC = @@ -110,18 +116,16 @@ links: $(ARCH_LINKS) links: $(ARCH_LINKS) [ -e include/xen ] || ln -sf ../../../xen/include/public include/xen -libminios.a: links $(OBJS) $(HEAD) - $(AR) r libminios.a $(HEAD) $(OBJS) - -$(TARGET): libminios.a $(HEAD) - $(LD) $(LDFLAGS) $(HEAD) $(LDLIBS) -o $@.elf - gzip -f -9 -c $@.elf >$@.gz +$(TARGET): links $(OBJS) $(HEAD) + $(LD) -r $(LDFLAGS) $(HEAD) $(OBJS) -o $@.o + $(OBJCOPY) -w -G $(GLOBAL_PREFIX)* -G _start $@.o $@.o + $(LD) $(LDFLAGS) $(LDFLAGS_FINAL) $@.o $(EXTRA_OBJS) -o $@ + gzip -f -9 -c $@ >$@.gz .PHONY: clean clean: find . -type f -name '*.o' | xargs rm -f - rm -f *.o *~ core $(TARGET).elf $(TARGET).raw $(TARGET) $(TARGET).gz - rm -f libminios.a + rm -f *.o *~ core $(TARGET) $(TARGET).gz find . -type l | xargs rm -f rm -f tags TAGS diff -r 5568efb41da4 -r 3f6a2745b3a3 extras/mini-os/gnttab.c --- a/extras/mini-os/gnttab.c Mon Jan 15 13:27:20 2007 -0500 +++ b/extras/mini-os/gnttab.c Wed Jan 17 09:56:40 2007 -0500 @@ -23,31 +23,24 @@ #define NR_GRANT_FRAMES 4 #define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(grant_entry_t)) -#define GNTTAB_LIST_END (NR_GRANT_ENTRIES + 1) static grant_entry_t *gnttab_table; static grant_ref_t gnttab_list[NR_GRANT_ENTRIES]; -static grant_ref_t gnttab_free_head; + +static void +put_free_entry(grant_ref_t ref) +{ + gnttab_list[ref] = gnttab_list[0]; + gnttab_list[0] = ref; + +} static grant_ref_t -get_free_entries(int count) +get_free_entry(void) { - grant_ref_t ref; - grant_ref_t head; - - ref = head = gnttab_free_head; - while (count-- > 1) - head = gnttab_list[head]; - gnttab_free_head = gnttab_list[head]; - gnttab_list[head] = GNTTAB_LIST_END; + unsigned int ref = gnttab_list[0]; + gnttab_list[0] = gnttab_list[ref]; return ref; -} - -static void -put_free_entry(grant_ref_t gref) -{ - gnttab_list[gref] = gnttab_free_head; - gnttab_free_head = gref; } grant_ref_t @@ -55,7 +48,7 @@ gnttab_grant_access(domid_t domid, unsig { grant_ref_t ref; - ref = get_free_entries(1); + ref = get_free_entry(); gnttab_table[ref].frame = frame; gnttab_table[ref].domid = domid; wmb(); @@ -70,7 +63,7 @@ gnttab_grant_transfer(domid_t domid, uns { grant_ref_t ref; - ref = get_free_entries(1); + ref = get_free_entry(); gnttab_table[ref].frame = pfn; gnttab_table[ref].domid = domid; wmb(); @@ -157,8 +150,7 @@ init_gnttab(void) int i; for (i = NR_RESERVED_ENTRIES; i < NR_GRANT_ENTRIES; i++) - gnttab_list[i] = i + 1; - gnttab_free_head = NR_RESERVED_ENTRIES; + put_free_entry(i); setup.dom = DOMID_SELF; setup.nr_frames = NR_GRANT_FRAMES; diff -r 5568efb41da4 -r 3f6a2745b3a3 extras/mini-os/include/hypervisor.h --- a/extras/mini-os/include/hypervisor.h Mon Jan 15 13:27:20 2007 -0500 +++ b/extras/mini-os/include/hypervisor.h Wed Jan 17 09:56:40 2007 -0500 @@ -15,7 +15,6 @@ #include <types.h> #include <xen/xen.h> -#include <xen/dom0_ops.h> #if defined(__i386__) #include <hypercall-x86_32.h> #elif defined(__x86_64__) diff -r 5568efb41da4 -r 3f6a2745b3a3 extras/mini-os/include/x86/x86_32/hypercall-x86_32.h --- a/extras/mini-os/include/x86/x86_32/hypercall-x86_32.h Mon Jan 15 13:27:20 2007 -0500 +++ b/extras/mini-os/include/x86/x86_32/hypercall-x86_32.h Wed Jan 17 09:56:40 2007 -0500 @@ -179,14 +179,6 @@ HYPERVISOR_set_timer_op( unsigned long timeout_hi = (unsigned long)(timeout>>32); unsigned long timeout_lo = (unsigned long)timeout; return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi); -} - -static inline int -HYPERVISOR_dom0_op( - dom0_op_t *dom0_op) -{ - dom0_op->interface_version = DOM0_INTERFACE_VERSION; - return _hypercall1(int, dom0_op, dom0_op); } static inline int diff -r 5568efb41da4 -r 3f6a2745b3a3 extras/mini-os/include/x86/x86_64/hypercall-x86_64.h --- a/extras/mini-os/include/x86/x86_64/hypercall-x86_64.h Mon Jan 15 13:27:20 2007 -0500 +++ b/extras/mini-os/include/x86/x86_64/hypercall-x86_64.h Wed Jan 17 09:56:40 2007 -0500 @@ -181,14 +181,6 @@ HYPERVISOR_set_timer_op( u64 timeout) { return _hypercall1(long, set_timer_op, timeout); -} - -static inline int -HYPERVISOR_dom0_op( - dom0_op_t *dom0_op) -{ - dom0_op->interface_version = DOM0_INTERFACE_VERSION; - return _hypercall1(int, dom0_op, dom0_op); } static inline int diff -r 5568efb41da4 -r 3f6a2745b3a3 extras/mini-os/include/xenbus.h --- a/extras/mini-os/include/xenbus.h Mon Jan 15 13:27:20 2007 -0500 +++ b/extras/mini-os/include/xenbus.h Wed Jan 17 09:56:40 2007 -0500 @@ -11,6 +11,9 @@ void init_xenbus(void); string on failure and sets *value to NULL. On success, *value is set to a malloc'd copy of the value. */ char *xenbus_read(xenbus_transaction_t xbt, const char *path, char **value); + +char *xenbus_watch_path(xenbus_transaction_t xbt, const char *path); +char* xenbus_wait_for_value(const char*,const char*); /* Associates a value with a path. Returns a malloc'd error string on failure. */ diff -r 5568efb41da4 -r 3f6a2745b3a3 extras/mini-os/kernel.c --- a/extras/mini-os/kernel.c Mon Jan 15 13:27:20 2007 -0500 +++ b/extras/mini-os/kernel.c Wed Jan 17 09:56:40 2007 -0500 @@ -37,6 +37,7 @@ #include <sched.h> #include <xenbus.h> #include <gnttab.h> +#include <netfront.h> #include <xen/features.h> #include <xen/version.h> @@ -61,13 +62,13 @@ void setup_xen_features(void) void test_xenbus(void); -void xenbus_tester(void *p) +static void xenbus_tester(void *p) { printk("Xenbus tests disabled, because of a Xend bug.\n"); /* test_xenbus(); */ } -void periodic_thread(void *p) +static void periodic_thread(void *p) { struct timeval tv; printk("Periodic thread started.\n"); @@ -79,12 +80,18 @@ void periodic_thread(void *p) } } +static void netfront_thread(void *p) +{ + init_netfront(&start_info); +} + /* This should be overridden by the application we are linked against. */ __attribute__((weak)) int app_main(start_info_t *si) { printk("Dummy main: start_info=%p\n", si); create_thread("xenbus_tester", xenbus_tester, si); create_thread("periodic_thread", periodic_thread, si); + create_thread("netfront", netfront_thread, si); return 0; } diff -r 5568efb41da4 -r 3f6a2745b3a3 extras/mini-os/xenbus/xenbus.c --- a/extras/mini-os/xenbus/xenbus.c Mon Jan 15 13:27:20 2007 -0500 +++ b/extras/mini-os/xenbus/xenbus.c Wed Jan 17 09:56:40 2007 -0500 @@ -45,9 +45,9 @@ #define DEBUG(_f, _a...) ((void)0) #endif - static struct xenstore_domain_interface *xenstore_buf; static DECLARE_WAIT_QUEUE_HEAD(xb_waitq); +static DECLARE_WAIT_QUEUE_HEAD(watch_queue); struct xenbus_req_info { int in_use:1; @@ -71,6 +71,34 @@ static void memcpy_from_ring(const void memcpy(dest, ring + off, c1); memcpy(dest + c1, ring, c2); } + +static inline void wait_for_watch(void) +{ + DEFINE_WAIT(w); + add_waiter(w,watch_queue); + schedule(); + wake(current); +} + +char* xenbus_wait_for_value(const char* path,const char* value) +{ + for(;;) + { + char *res, *msg; + int r; + + msg = xenbus_read(XBT_NIL, path, &res); + if(msg) return msg; + + r = strcmp(value,res); + free(res); + + if(r==0) break; + else wait_for_watch(); + } + return NULL; +} + static void xenbus_thread_func(void *ign) { @@ -101,13 +129,35 @@ static void xenbus_thread_func(void *ign break; DEBUG("Message is good.\n"); - req_info[msg.req_id].reply = malloc(sizeof(msg) + msg.len); - memcpy_from_ring(xenstore_buf->rsp, + + if(msg.type == XS_WATCH_EVENT) + { + char* payload = (char*)malloc(sizeof(msg) + msg.len); + char *path,*token; + + memcpy_from_ring(xenstore_buf->rsp, + payload, + MASK_XENSTORE_IDX(xenstore_buf->rsp_cons), + msg.len + sizeof(msg)); + + path = payload + sizeof(msg); + token = path + strlen(path) + 1; + + xenstore_buf->rsp_cons += msg.len + sizeof(msg); + free(payload); + wake_up(&watch_queue); + } + + else + { + req_info[msg.req_id].reply = malloc(sizeof(msg) + msg.len); + memcpy_from_ring(xenstore_buf->rsp, req_info[msg.req_id].reply, MASK_XENSTORE_IDX(xenstore_buf->rsp_cons), msg.len + sizeof(msg)); - wake_up(&req_info[msg.req_id].waitq); - xenstore_buf->rsp_cons += msg.len + sizeof(msg); + xenstore_buf->rsp_cons += msg.len + sizeof(msg); + wake_up(&req_info[msg.req_id].waitq); + } } } } @@ -381,9 +431,29 @@ char *xenbus_write(xenbus_transaction_t struct xsd_sockmsg *rep; rep = xenbus_msg_reply(XS_WRITE, xbt, req, ARRAY_SIZE(req)); char *msg = errmsg(rep); - if (msg) - return msg; - free(rep); + if (msg) return msg; + free(rep); + return NULL; +} + +char* xenbus_watch_path( xenbus_transaction_t xbt, const char *path) +{ + /* in the future one could have multiple watch queues, and use + * the token for demuxing. For now the token is 0. */ + + struct xsd_sockmsg *rep; + + struct write_req req[] = { + {path, strlen(path) + 1}, + {"0",2 }, + }; + + rep = xenbus_msg_reply(XS_WATCH, xbt, req, ARRAY_SIZE(req)); + + char *msg = errmsg(rep); + if (msg) return msg; + free(rep); + return NULL; } diff -r 5568efb41da4 -r 3f6a2745b3a3 linux-2.6-xen-sparse/arch/i386/Kconfig --- a/linux-2.6-xen-sparse/arch/i386/Kconfig Mon Jan 15 13:27:20 2007 -0500 +++ b/linux-2.6-xen-sparse/arch/i386/Kconfig Wed Jan 17 09:56:40 2007 -0500 @@ -594,7 +594,7 @@ config HAVE_ARCH_EARLY_PFN_TO_NID config HIGHPTE bool "Allocate 3rd-level pagetables from highmem" - depends on (HIGHMEM4G || HIGHMEM64G) && !X86_XEN + depends on HIGHMEM4G || HIGHMEM64G help The VM uses one page table entry for each page of physical memory. For systems with a lot of RAM, this can be wasteful of precious diff -r 5568efb41da4 -r 3f6a2745b3a3 linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c Mon Jan 15 13:27:20 2007 -0500 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c Wed Jan 17 09:56:40 2007 -0500 @@ -94,13 +94,7 @@ dma_unmap_sg(struct device *hwdev, struc } EXPORT_SYMBOL(dma_unmap_sg); -/* - * XXX This file is also used by xenLinux/ia64. - * "defined(__i386__) || defined (__x86_64__)" means "!defined(__ia64__)". - * This #if work around should be removed once this file is merbed back into - * i386' pci-dma or is moved to drivers/xen/core. - */ -#if defined(__i386__) || defined(__x86_64__) +#ifdef CONFIG_HIGHMEM dma_addr_t dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction) @@ -130,7 +124,7 @@ dma_unmap_page(struct device *dev, dma_a swiotlb_unmap_page(dev, dma_address, size, direction); } EXPORT_SYMBOL(dma_unmap_page); -#endif /* defined(__i386__) || defined(__x86_64__) */ +#endif /* CONFIG_HIGHMEM */ int dma_mapping_error(dma_addr_t dma_addr) @@ -161,6 +155,8 @@ void *dma_alloc_coherent(struct device * struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; unsigned int order = get_order(size); unsigned long vstart; + u64 mask; + /* ignore region specifiers */ gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); @@ -183,9 +179,14 @@ void *dma_alloc_coherent(struct device * vstart = __get_free_pages(gfp, order); ret = (void *)vstart; + if (dev != NULL && dev->coherent_dma_mask) + mask = dev->coherent_dma_mask; + else + mask = 0xffffffff; + if (ret != NULL) { if (xen_create_contiguous_region(vstart, order, - dma_bits) != 0) { + fls64(mask)) != 0) { free_pages(vstart, order); return NULL; } diff -r 5568efb41da4 -r 3f6a2745b3a3 linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c Mon Jan 15 13:27:20 2007 -0500 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c Wed Jan 17 09:56:40 2007 -0500 @@ -101,8 +101,24 @@ void enable_hlt(void) EXPORT_SYMBOL(enable_hlt); -/* XXX XEN doesn't use default_idle(), poll_idle(). Use xen_idle() instead. */ -void xen_idle(void) +/* + * On SMP it's slightly faster (but much more power-consuming!) + * to poll the ->work.need_resched flag instead of waiting for the + * cross-CPU IPI to arrive. Use this option with caution. + */ +static void poll_idle(void) +{ + local_irq_enable(); + + asm volatile( + "2:" + "testl %0, %1;" + "rep; nop;" + "je 2b;" + : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags)); +} + +static void xen_idle(void) { local_irq_disable(); @@ -152,17 +168,22 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { while (!need_resched()) { + void (*idle)(void); if (__get_cpu_var(cpu_idle_state)) __get_cpu_var(cpu_idle_state) = 0; rmb(); + idle = pm_idle; + + if (!idle) + idle = xen_idle; if (cpu_is_offline(cpu)) play_dead(); __get_cpu_var(irq_stat).idle_timestamp = jiffies; - xen_idle(); + idle(); } preempt_enable_no_resched(); schedule(); @@ -198,9 +219,22 @@ void cpu_idle_wait(void) } EXPORT_SYMBOL_GPL(cpu_idle_wait); -/* XXX XEN doesn't use mwait_idle(), select_idle_routine(), idle_setup(). */ -/* Always use xen_idle() instead. */ -void __devinit select_idle_routine(const struct cpuinfo_x86 *c) {} +void __devinit select_idle_routine(const struct cpuinfo_x86 *c) +{ +} + +static int __init idle_setup (char *str) +{ + if (!strncmp(str, "poll", 4)) { + printk("using polling idle threads.\n"); + pm_idle = poll_idle; + } + + boot_option_idle_override = 1; + return 1; +} + +__setup("idle=", idle_setup); void show_regs(struct pt_regs * regs) { diff -r 5568efb41da4 -r 3f6a2745b3a3 linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c Mon Jan 15 13:27:20 2007 -0500 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c Wed Jan 17 09:56:40 2007 -0500 @@ -47,9 +47,6 @@ EXPORT_SYMBOL(swiotlb); */ #define IO_TLB_SHIFT 11 -/* Width of DMA addresses. 30 bits is a b44 limitation. */ -#define DEFAULT_DMA_BITS 30 - static int swiotlb_force; static char *iotlb_virt_start; static unsigned long iotlb_nslabs; @@ -98,11 +95,12 @@ static struct phys_addr { */ static DEFINE_SPINLOCK(io_tlb_lock); -unsigned int dma_bits = DEFAULT_DMA_BITS; +static unsigned int dma_bits; +static unsigned int __initdata max_dma_bits = 32; static int __init setup_dma_bits(char *str) { - dma_bits = simple_strtoul(str, NULL, 0); + max_dma_bits = simple_strtoul(str, NULL, 0); return 0; } __setup("dma_bits=", setup_dma_bits); @@ -143,6 +141,7 @@ swiotlb_init_with_default_size (size_t d swiotlb_init_with_default_size (size_t default_size) { unsigned long i, bytes; + int rc; if (!iotlb_nslabs) { iotlb_nslabs = (default_size >> IO_TLB_SHIFT); @@ -159,16 +158,33 @@ swiotlb_init_with_default_size (size_t d */ iotlb_virt_start = alloc_bootmem_low_pages(bytes); if (!iotlb_virt_start) - panic("Cannot allocate SWIOTLB buffer!\n" - "Use dom0_mem Xen boot parameter to reserve\n" - "some DMA memory (e.g., dom0_mem=-128M).\n"); - + panic("Cannot allocate SWIOTLB buffer!\n"); + + dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT; for (i = 0; i < iotlb_nslabs; i += IO_TLB_SEGSIZE) { - int rc = xen_create_contiguous_region( - (unsigned long)iotlb_virt_start + (i << IO_TLB_SHIFT), - get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT), - dma_bits); - BUG_ON(rc); + do { + rc = xen_create_contiguous_region( + (unsigned long)iotlb_virt_start + (i << IO_TLB_SHIFT), + get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT), + dma_bits); + } while (rc && dma_bits++ < max_dma_bits); + if (rc) { + if (i == 0) + panic("No suitable physical memory available for SWIOTLB buffer!\n" + "Use dom0_mem Xen boot parameter to reserve\n" + "some DMA memory (e.g., dom0_mem=-128M).\n"); + iotlb_nslabs = i; + i <<= IO_TLB_SHIFT; + free_bootmem(__pa(iotlb_virt_start + i), bytes - i); + bytes = i; + for (dma_bits = 0; i > 0; i -= IO_TLB_SEGSIZE << IO_TLB_SHIFT) { + unsigned int bits = fls64(virt_to_bus(iotlb_virt_start + i - 1)); + + if (bits > dma_bits) + dma_bits = bits; + } + break; + } } /* @@ -186,17 +202,27 @@ swiotlb_init_with_default_size (size_t d * Get the overflow emergency buffer */ io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); + if (!io_tlb_overflow_buffer) + panic("Cannot allocate SWIOTLB overflow buffer!\n"); + + do { + rc = xen_create_contiguous_region( + (unsigned long)io_tlb_overflow_buffer, + get_order(io_tlb_overflow), + dma_bits); + } while (rc && dma_bits++ < max_dma_bits); + if (rc) + panic("No suitable physical memory available for SWIOTLB overflow buffer!\n"); iotlb_pfn_start = __pa(iotlb_virt_start) >> PAGE_SHIFT; iotlb_pfn_end = iotlb_pfn_start + (bytes >> PAGE_SHIFT); printk(KERN_INFO "Software IO TLB enabled: \n" " Aperture: %lu megabytes\n" - " Kernel range: 0x%016lx - 0x%016lx\n" + " Kernel range: %p - %p\n" " Address size: %u bits\n", bytes >> 20, - (unsigned long)iotlb_virt_start, - (unsigned long)iotlb_virt_start + bytes, + iotlb_virt_start, iotlb_virt_start + bytes, dma_bits); } @@ -238,9 +264,12 @@ __sync_single(struct phys_addr buffer, c char *dev, *host, *kmp; len = size; while (len != 0) { + unsigned long flags; + if (((bytes = len) + buffer.offset) > PAGE_SIZE) bytes = PAGE_SIZE - buffer.offset; - kmp = kmap_atomic(buffer.page, KM_SWIOTLB); + local_irq_save(flags); /* protects KM_BOUNCE_READ */ + kmp = kmap_atomic(buffer.page, KM_BOUNCE_READ); dev = dma_addr + size - len; host = kmp + buffer.offset; if (dir == DMA_FROM_DEVICE) { @@ -248,7 +277,8 @@ __sync_single(struct phys_addr buffer, c /* inaccessible */; } else memcpy(dev, host, bytes); - kunmap_atomic(kmp, KM_SWIOTLB); + kunmap_atomic(kmp, KM_BOUNCE_READ); + local_irq_restore(flags); len -= bytes; buffer.page++; buffer.offset = 0; @@ -617,6 +647,8 @@ swiotlb_sync_sg_for_device(struct device sg->dma_length, dir); } +#ifdef CONFIG_HIGHMEM + dma_addr_t swiotlb_map_page(struct device *hwdev, struct page *page, unsigned long offset, size_t size, @@ -650,6 +682,8 @@ swiotlb_unmap_page(struct device *hwdev, unmap_single(hwdev, bus_to_virt(dma_address), size, direction); } +#endif + int swiotlb_dma_mapping_error(dma_addr_t dma_addr) { @@ -677,7 +711,5 @@ EXPORT_SYMBOL(swiotlb_sync_single_for_de EXPORT_SYMBOL(swiotlb_sync_single_for_device); EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu); EXPORT_SYMBOL(swiotlb_sync_sg_for_device); -EXPORT_SYMBOL(swiotlb_map_page); -EXPORT_SYMBOL(swiotlb_unmap_page); EXPORT_SYMBOL(swiotlb_dma_mapping_error); EXPORT_SYMBOL(swiotlb_dma_supported); diff -r 5568efb41da4 -r 3f6a2745b3a3 linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c --- a/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c Mon Jan 15 13:27:20 2007 -0500 +++ b/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c Wed Jan 17 09:56:40 2007 -0500 @@ -225,7 +225,7 @@ static void dump_fault_path(unsigned lon p += (address >> 30) * 2; printk(KERN_ALERT "%08lx -> *pde = %08lx:%08lx\n", page, p[1], p[0]); if (p[0] & 1) { - mfn = (p[0] >> PAGE_SHIFT) | ((p[1] & 0x7) << 20); + mfn = (p[0] >> PAGE_SHIFT) | (p[1] << 20); page = mfn_to_pfn(mfn) << PAGE_SHIFT; p = (unsigned long *)__va(page); address &= 0x3fffffff; @@ -234,7 +234,7 @@ static void dump_fault_path(unsigned lon page, p[1], p[0]); #ifndef CONFIG_HIGHPTE if (p[0] & 1) { - mfn = (p[0] >> PAGE_SHIFT) | ((p[1] & 0x7) << 20); + mfn = (p[0] >> PAGE_SHIFT) | (p[1] << 20); page = mfn_to_pfn(mfn) << PAGE_SHIFT; p = (unsigned long *) __va(page); address &= 0x001fffff; diff -r 5568efb41da4 -r 3f6a2745b3a3 linux-2.6-xen-sparse/arch/i386/mm/highmem-xen.c --- a/linux-2.6-xen-sparse/arch/i386/mm/highmem-xen.c Mon Jan 15 13:27:20 2007 -0500 +++ b/linux-2.6-xen-sparse/arch/i386/mm/highmem-xen.c Wed Jan 17 09:56:40 2007 -0500 @@ -129,5 +129,6 @@ EXPORT_SYMBOL(kmap); EXPORT_SYMBOL(kmap); EXPORT_SYMBOL(kunmap); EXPORT_SYMBOL(kmap_atomic); +EXPORT_SYMBOL(kmap_atomic_pte); EXPORT_SYMBOL(kunmap_atomic); EXPORT_SYMBOL(kmap_atomic_to_page); diff -r 5568efb41da4 -r 3f6a2745b3a3 linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c --- a/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c Mon Jan 15 13:27:20 2007 -0500 +++ b/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c Wed Jan 17 09:56:40 2007 -0500 @@ -238,23 +238,41 @@ struct page *pte_alloc_one(struct mm_str #ifdef CONFIG_HIGHPTE pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0); + if (pte && PageHighMem(pte)) { + struct mmuext_op op; + + kmap_flush_unused(); + op.cmd = MMUEXT_PIN_L1_TABLE; + op.arg1.mfn = pfn_to_mfn(page_to_pfn(pte)); + BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); + } #else pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); +#endif if (pte) { SetPageForeign(pte, pte_free); set_page_count(pte, 1); } -#endif return pte; } void pte_free(struct page *pte) { - unsigned long va = (unsigned long)__va(page_to_pfn(pte)<<PAGE_SHIFT); - - if (!pte_write(*virt_to_ptep(va))) - BUG_ON(HYPERVISOR_update_va_mapping( - va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0)); + unsigned long pfn = page_to_pfn(pte); + + if (!PageHighMem(pte)) { + unsigned long va = (unsigned long)__va(pfn << PAGE_SHIFT); + + if (!pte_write(*virt_to_ptep(va))) + BUG_ON(HYPERVISOR_update_va_mapping( + va, pfn_pte(pfn, PAGE_KERNEL), 0)); + } else { + struct mmuext_op op; + + op.cmd = MMUEXT_UNPIN_TABLE; + op.arg1.mfn = pfn_to_mfn(pfn); + BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); + } ClearPageForeign(pte); set_page_count(pte, 1); diff -r 5568efb41da4 -r 3f6a2745b3a3 linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c --- a/linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c Mon Jan 15 13:27:20 2007 -0500 +++ b/linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c Wed Jan 17 09:56:40 2007 -0500 @@ -110,7 +110,6 @@ static struct irq_routing_table * __init if (rt) return rt; } - return NULL; } @@ -261,13 +260,13 @@ static int pirq_via_set(struct pci_dev * */ static int pirq_via586_get(struct pci_dev *router, struct pci_dev *dev, int pirq) { - static unsigned int pirqmap[4] = { 3, 2, 5, 1 }; + static unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 }; return read_config_nybble(router, 0x55, pirqmap[pirq-1]); } static int pirq_via586_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) { - static unsigned int pirqmap[4] = { 3, 2, 5, 1 }; + static unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 }; write_config_nybble(router, 0x55, pirqmap[pirq-1], irq); return 1; } diff -r 5568efb41da4 -r 3f6a2745b3a3 linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c Mon Jan 15 13:27:20 2007 -0500 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c Wed Jan 17 09:56:40 2007 -0500 @@ -119,8 +119,26 @@ void exit_idle(void) __exit_idle(); } -/* XXX XEN doesn't use default_idle(), poll_idle(). Use xen_idle() instead. */ -void xen_idle(void) +/* + * On SMP it's slightly faster (but much more power-consuming!) + * to poll the ->need_resched flag instead of waiting for the + * cross-CPU IPI to arrive. Use this option with caution. + */ +static void poll_idle(void) +{ + local_irq_enable(); + + asm volatile( + "2:" + "testl %0,%1;" + "rep; nop;" + "je 2b;" + : : + "i" (_TIF_NEED_RESCHED), + "m" (current_thread_info()->flags)); +} + +static void xen_idle(void) { local_irq_disable(); @@ -164,14 +182,18 @@ void cpu_idle (void) /* endless idle loop with no priority at all */ while (1) { while (!need_resched()) { + void (*idle)(void); + if (__get_cpu_var(cpu_idle_state)) __get_cpu_var(cpu_idle_state) = 0; rmb(); - + idle = pm_idle; + if (!idle) + idle = xen_idle; if (cpu_is_offline(smp_processor_id())) play_dead(); enter_idle(); - xen_idle(); + idle(); __exit_idle(); } @@ -210,9 +232,22 @@ void cpu_idle_wait(void) } EXPORT_SYMBOL_GPL(cpu_idle_wait); -/* XXX XEN doesn't use mwait_idle(), select_idle_routine(), idle_setup(). */ -/* Always use xen_idle() instead. */ -void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) {} +void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) +{ +} + +static int __init idle_setup (char *str) +{ + if (!strncmp(str, "poll", 4)) { + printk("using polling idle threads.\n"); + pm_idle = poll_idle; + } + + boot_option_idle_override = 1; + return 1; +} + +__setup("idle=", idle_setup); /* Prints also some state that isn't saved in the pt_regs */ void __show_regs(struct pt_regs * regs) diff -r 5568efb41da4 -r 3f6a2745b3a3 linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c Mon Jan 15 13:27:20 2007 -0500 +++ b/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c Wed Jan 17 09:56:40 2007 -0500 @@ -164,6 +164,18 @@ void _arch_exit_mmap(struct mm_struct *m mm_unpin(mm); } +struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) +{ + struct page *pte; + + pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); + if (pte) { + SetPageForeign(pte, pte_free); + set_page_count(pte, 1); + } + return pte; +} + void pte_free(struct page *pte) { unsigned long va = (unsigned long)__va(page_to_pfn(pte)<<PAGE_SHIFT); @@ -171,6 +183,10 @@ void pte_free(struct page *pte) if (!pte_write(*virt_to_ptep(va))) BUG_ON(HYPERVISOR_update_va_mapping( va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0)); + + ClearPageForeign(pte); + set_page_count(pte, 1); + __free_page(pte); } #endif /* CONFIG_XEN */ diff -r 5568efb41da4 -r 3f6a2745b3a3 linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c --- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Mon Jan 15 13:27:20 2007 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Wed Jan 17 09:56:40 2007 -0500 @@ -446,8 +446,10 @@ static struct notifier_block xenstore_no static int __init balloon_init(void) { +#ifdef CONFIG_X86 unsigned long pfn; struct page *page; +#endif if (!is_running_on_xen()) return -ENODEV; @@ -476,13 +478,15 @@ static int __init balloon_init(void) balloon_pde->write_proc = balloon_write; #endif balloon_sysfs_init(); - + +#ifdef CONFIG_X86 /* Initialise the balloon with excess memory space. */ for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { page = pfn_to_page(pfn); if (!PageReserved(page)) balloon_append(page); } +#endif target_watch.callback = watch_target; xenstore_notifier.notifier_call = balloon_init_watcher; diff -r 5568efb41da4 -r 3f6a2745b3a3 linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Mon Jan 15 13:27:20 2007 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Wed Jan 17 09:56:40 2007 -0500 @@ -42,9 +42,30 @@ static void backend_changed(struct xenbu static void backend_changed(struct xenbus_watch *, const char **, unsigned int); +static int blkback_name(blkif_t *blkif, char *buf) +{ + char *devpath, *devname; + struct xenbus_device *dev = blkif->be->dev; + + devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL); + if (IS_ERR(devpath)) + return PTR_ERR(devpath); + + if ((devname = strstr(devpath, "/dev/")) != NULL) + devname += strlen("/dev/"); + else + devname = devpath; + + snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname); + kfree(devpath); + + return 0; +} + static void update_blkif_status(blkif_t *blkif) { int err; + char name[TASK_COMM_LEN]; /* Not ready to connect? */ if (!blkif->irq || !blkif->vbd.bdev) @@ -59,10 +80,13 @@ static void update_blkif_status(blkif_t if (blkif->be->dev->state != XenbusStateConnected) return; - blkif->xenblkd = kthread_run(blkif_schedule, blkif, - "xvd %d %02x:%02x", - blkif->domid, - blkif->be->major, blkif->be->minor); + err = blkback_name(blkif, name); + if (err) { + xenbus_dev_error(blkif->be->dev, err, "get blkback dev name"); + return; + } + + blkif->xenblkd = kthread_run(blkif_schedule, blkif, name); if (IS_ERR(blkif->xenblkd)) { err = PTR_ERR(blkif->xenblkd); blkif->xenblkd = NULL; diff -r 5568efb41da4 -r 3f6a2745b3a3 linux-2.6-xen-sparse/drivers/xen/blktap/Makefile --- a/linux-2.6-xen-sparse/drivers/xen/blktap/Makefile Mon Jan 15 13:27:20 2007 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/Makefile Wed Jan 17 09:56:40 2007 -0500 @@ -1,3 +1,5 @@ LINUXINCLUDE += -I../xen/include/public/ LINUXINCLUDE += -I../xen/include/public/io -obj-y := xenbus.o interface.o blktap.o +obj-$(CONFIG_XEN_BLKDEV_TAP) := xenblktap.o + +xenblktap-y := xenbus.o interface.o blktap.o diff -r 5568efb41da4 -r 3f6a2745b3a3 linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c Mon Jan 15 13:27:20 2007 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c Wed Jan 17 09:56:40 2007 -0500 @@ -92,9 +92,30 @@ static long get_id(const char *str) return simple_strtol(num, NULL, 10); } +static int blktap_name(blkif_t *blkif, char *buf) +{ + char *devpath, *devname; + struct xenbus_device *dev = blkif->be->dev; + + devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL); + if (IS_ERR(devpath)) + return PTR_ERR(devpath); + + if ((devname = strstr(devpath, "/dev/")) != NULL) + devname += strlen("/dev/"); + else + devname = devpath; + + snprintf(buf, TASK_COMM_LEN, "blktap.%d.%s", blkif->domid, devname); + kfree(devpath); + + return 0; +} + static void tap_update_blkif_status(blkif_t *blkif) { int err; + char name[TASK_COMM_LEN]; /* Not ready to connect? */ if(!blkif->irq || !blkif->sectors) { @@ -110,10 +131,13 @@ static void tap_update_blkif_status(blki if (blkif->be->dev->state != XenbusStateConnected) return; - blkif->xenblkd = kthread_run(tap_blkif_schedule, blkif, - "xvd %d", - blkif->domid); - + err = blktap_name(blkif, name); + if (err) { + xenbus_dev_error(blkif->be->dev, err, "get blktap dev name"); + return; + } + + blkif->xenblkd = kthread_run(tap_blkif_schedule, blkif, name); if (IS_ERR(blkif->xenblkd)) { err = PTR_ERR(blkif->xenblkd); blkif->xenblkd = NULL; diff -r 5568efb41da4 -r 3f6a2745b3a3 linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c --- a/linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c Mon Jan 15 13:27:20 2007 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c Wed Jan 17 09:56:40 2007 -0500 @@ -98,8 +98,8 @@ void xen_machine_kexec_setup_resources(v err: /* * It isn't possible to free xen_phys_cpus this early in the - * boot. Since failure at this stage is unexpected and the - * amount is small we leak the memory. + * boot. Failure at this stage is unexpected and the amount of + * memory is small therefore we tolerate the potential leak. */ xen_max_nr_phys_cpus = 0; return; diff -r 5568efb41da4 -r 3f6a2745b3a3 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/dma-mapping.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/dma-mapping.h Mon Jan 15 13:27:20 2007 -0500 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/dma-mapping.h Wed Jan 17 09:56:40 2007 -0500 @@ -53,6 +53,7 @@ extern void dma_unmap_sg(struct device * extern void dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, enum dma_data_direction direction); +#ifdef CONFIG_HIGHMEM extern dma_addr_t dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction); @@ -60,6 +61,11 @@ extern void extern void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, enum dma_data_direction direction); +#else +#define dma_map_page(dev, page, offset, size, dir) \ + dma_map_single(dev, page_address(page) + (offset), (size), (dir)) +#define dma_unmap_page dma_unmap_single +#endif extern void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, diff -r 5568efb41da4 -r 3f6a2745b3a3 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgalloc.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgalloc.h Mon Jan 15 13:27:20 2007 -0500 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgalloc.h Wed Jan 17 09:56:40 2007 -0500 @@ -42,7 +42,7 @@ static inline void pte_free_kernel(pte_t static inline void pte_free_kernel(pte_t *pte) { free_page((unsigned long)pte); - make_page_writable(pte, XENFEAT_writable_page_tables); + make_lowmem_page_writable(pte, XENFEAT_writable_page_tables); } extern void pte_free(struct page *pte); diff -r 5568efb41da4 -r 3f6a2745b3a3 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/swiotlb.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/swiotlb.h Mon Jan 15 13:27:20 2007 -0500 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/swiotlb.h Wed Jan 17 09:56:40 2007 -0500 @@ -26,15 +26,15 @@ extern void swiotlb_unmap_sg(struct devi extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, int direction); extern int swiotlb_dma_mapping_error(dma_addr_t dma_addr); +#ifdef CONFIG_HIGHMEM extern dma_addr_t swiotlb_map_page(struct device *hwdev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction); extern void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dma_address, size_t size, enum dma_data_direction direction); +#endif extern int swiotlb_dma_supported(struct device *hwdev, u64 mask); extern void swiotlb_init(void); - -extern unsigned int dma_bits; #ifdef CONFIG_SWIOTLB extern int swiotlb; diff -r 5568efb41da4 -r 3f6a2745b3a3 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgalloc.h --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgalloc.h Mon Jan 15 13:27:20 2007 -0500 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgalloc.h Wed Jan 17 09:56:40 2007 -0500 @@ -64,50 +64,43 @@ static inline void pgd_populate(struct m } } -static inline void pmd_free(pmd_t *pmd) -{ - pte_t *ptep = virt_to_ptep(pmd); - - if (!pte_write(*ptep)) { - BUG_ON(HYPERVISOR_update_va_mapping( - (unsigned long)pmd, - pfn_pte(virt_to_phys(pmd)>>PAGE_SHIFT, PAGE_KERNEL), - 0)); - } - free_page((unsigned long)pmd); -} +extern struct page *pte_alloc_one(struct mm_struct *mm, unsigned long addr); +extern void pte_free(struct page *pte); static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - pmd_t *pmd = (pmd_t *) get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); - return pmd; + struct page *pg; + + pg = pte_alloc_one(mm, addr); + return pg ? page_address(pg) : NULL; +} + +static inline void pmd_free(pmd_t *pmd) +{ + BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); + pte_free(virt_to_page(pmd)); } static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - pud_t *pud = (pud_t *) get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); - return pud; + struct page *pg; + + pg = pte_alloc_one(mm, addr); + return pg ? page_address(pg) : NULL; } static inline void pud_free(pud_t *pud) { - pte_t *ptep = virt_to_ptep(pud); - - if (!pte_write(*ptep)) { - BUG_ON(HYPERVISOR_update_va_mapping( - (unsigned long)pud, - pfn_pte(virt_to_phys(pud)>>PAGE_SHIFT, PAGE_KERNEL), - 0)); - } - free_page((unsigned long)pud); + BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); + pte_free(virt_to_page(pud)); } static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - /* - * We allocate two contiguous pages for kernel and user. - */ - unsigned boundary; + /* + * We allocate two contiguous pages for kernel and user. + */ + unsigned boundary; pgd_t *pgd = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_REPEAT, 1); if (!pgd) @@ -124,11 +117,11 @@ static inline pgd_t *pgd_alloc(struct mm (PTRS_PER_PGD - boundary) * sizeof(pgd_t)); memset(__user_pgd(pgd), 0, PAGE_SIZE); /* clean up user pgd */ - /* - * Set level3_user_pgt for vsyscall area - */ + /* + * Set level3_user_pgt for vsyscall area + */ set_pgd(__user_pgd(pgd) + pgd_index(VSYSCALL_START), - mk_kernel_pgd(__pa_symbol(level3_user_pgt))); + mk_kernel_pgd(__pa_symbol(level3_user_pgt))); return pgd; } @@ -160,18 +153,10 @@ static inline void pgd_free(pgd_t *pgd) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); - if (pte) + pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); + if (pte) make_page_readonly(pte, XENFEAT_writable_page_tables); - return pte; -} - -static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) -{ - struct page *pte; - - pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); return pte; } @@ -181,18 +166,12 @@ static inline void pte_free_kernel(pte_t static inline void pte_free_kernel(pte_t *pte) { BUG_ON((unsigned long)pte & (PAGE_SIZE-1)); - make_page_writable(pte, XENFEAT_writable_page_tables); + make_page_writable(pte, XENFEAT_writable_page_tables); free_page((unsigned long)pte); } -extern void pte_free(struct page *pte); - -//#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte)) -//#define __pmd_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x)) -//#define __pud_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x)) - -#define __pte_free_tlb(tlb,x) pte_free((x)) -#define __pmd_free_tlb(tlb,x) pmd_free((x)) -#define __pud_free_tlb(tlb,x) pud_free((x)) +#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte)) +#define __pmd_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x)) +#define __pud_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x)) #endif /* _X86_64_PGALLOC_H */ diff -r 5568efb41da4 -r 3f6a2745b3a3 linux-2.6-xen-sparse/kernel/kexec.c --- a/linux-2.6-xen-sparse/kernel/kexec.c Mon Jan 15 13:27:20 2007 -0500 +++ b/linux-2.6-xen-sparse/kernel/kexec.c Wed Jan 17 09:56:40 2007 -0500 @@ -1012,9 +1012,11 @@ asmlinkage long sys_kexec_load(unsigned goto out; } #ifdef CONFIG_XEN - result = xen_machine_kexec_load(image); - if (result) - goto out; + if (image) { + result = xen_machine_kexec_load(image); + if (result) + goto out; + } #endif /* Install the new kernel, and Uninstall the old */ image = xchg(dest_image, image); diff -r 5568efb41da4 -r 3f6a2745b3a3 linux-2.6-xen-sparse/net/core/dev.c --- a/linux-2.6-xen-sparse/net/core/dev.c Mon Jan 15 13:27:20 2007 -0500 +++ b/linux-2.6-xen-sparse/net/core/dev.c Wed Jan 17 09:56:40 2007 -0500 @@ -1248,14 +1248,13 @@ static int dev_gso_segment(struct sk_buf /* Verifying header integrity only. */ if (!segs) return 0; - + if (unlikely(IS_ERR(segs))) return PTR_ERR(segs); skb->next = segs; DEV_GSO_CB(skb)->destructor = skb->destructor; skb->destructor = dev_gso_skb_destructor; - return 0; } diff -r 5568efb41da4 -r 3f6a2745b3a3 patches/linux-2.6.16.33/series --- a/patches/linux-2.6.16.33/series Mon Jan 15 13:27:20 2007 -0500 +++ b/patches/linux-2.6.16.33/series Wed Jan 17 09:56:40 2007 -0500 @@ -9,7 +9,6 @@ fix-hz-suspend.patch fix-hz-suspend.patch fix-ide-cd-pio-mode.patch i386-mach-io-check-nmi.patch -ipv6-no-autoconf.patch net-csum.patch net-gso-0-base.patch net-gso-1-check-dodgy.patch diff -r 5568efb41da4 -r 3f6a2745b3a3 patches/linux-2.6.16.33/vsnprintf.patch --- a/patches/linux-2.6.16.33/vsnprintf.patch Mon Jan 15 13:27:20 2007 -0500 +++ b/patches/linux-2.6.16.33/vsnprintf.patch Wed Jan 17 09:56:40 2007 -0500 @@ -203,7 +203,7 @@ index b07db5c..f595947 100644 + if (str < end) + *str = '\0'; + else -+ *end = '\0'; ++ end[-1] = '\0'; + } + /* the trailing null byte doesn't count towards the total */ return str-buf; diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/check/check_udev --- a/tools/check/check_udev Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/check/check_udev Wed Jan 17 09:56:40 2007 -0500 @@ -11,7 +11,7 @@ Linux) Linux) TOOL="udevinfo" UDEV_VERSION="0" - test -x "$(which ${TOOL})" && \ + test -x "$(which ${TOOL} 2>/dev/null)" && \ UDEV_VERSION=$(${TOOL} -V | sed -e 's/^[^0-9]* \([0-9]\{1,\}\)[^0-9]\{0,\}/\1/') if test "${UDEV_VERSION}" -ge 059; then RC=0 @@ -28,7 +28,7 @@ esac if test ${RC} -ne 0; then echo - echo ' *** Check for ${TOOL} FAILED' + echo " *** Check for ${TOOL} FAILED" fi exit ${RC} diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/blktap --- a/tools/examples/blktap Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/examples/blktap Wed Jan 17 09:56:40 2007 -0500 @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Copyright (c) 2005, XenSource Ltd. diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/block --- a/tools/examples/block Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/examples/block Wed Jan 17 09:56:40 2007 -0500 @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash dir=$(dirname "$0") . "$dir/block-common.sh" diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/block-enbd --- a/tools/examples/block-enbd Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/examples/block-enbd Wed Jan 17 09:56:40 2007 -0500 @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Usage: block-enbd [bind server ctl_port |unbind node] # diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/block-nbd --- a/tools/examples/block-nbd Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/examples/block-nbd Wed Jan 17 09:56:40 2007 -0500 @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Usage: block-nbd [bind server ctl_port |unbind node] # diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/external-device-migrate --- a/tools/examples/external-device-migrate Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/examples/external-device-migrate Wed Jan 17 09:56:40 2007 -0500 @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Copyright (c) 2005 IBM Corporation # diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/network-bridge --- a/tools/examples/network-bridge Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/examples/network-bridge Wed Jan 17 09:56:40 2007 -0500 @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash #============================================================================ # Default Xen network start/stop script. # Xend calls a network script when it starts. diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/network-nat --- a/tools/examples/network-nat Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/examples/network-nat Wed Jan 17 09:56:40 2007 -0500 @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash #============================================================================ # Default Xen network start/stop script when using NAT. # Xend calls a network script when it starts. diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/network-route --- a/tools/examples/network-route Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/examples/network-route Wed Jan 17 09:56:40 2007 -0500 @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash #============================================================================ # Default Xen network start/stop script. # Xend calls a network script when it starts. diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/vif-bridge --- a/tools/examples/vif-bridge Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/examples/vif-bridge Wed Jan 17 09:56:40 2007 -0500 @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash #============================================================================ # /etc/xen/vif-bridge # diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/vif-common.sh --- a/tools/examples/vif-common.sh Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/examples/vif-common.sh Wed Jan 17 09:56:40 2007 -0500 @@ -64,7 +64,7 @@ fi fi -function frob_iptable() +frob_iptable() { if [ "$command" == "online" ] then @@ -89,7 +89,7 @@ If you are using iptables, this may affe # to those coming from the specified networks, though we allow DHCP requests # as well. # -function handle_iptable() +handle_iptable() { # Check for a working iptables installation. Checking for the iptables # binary is not sufficient, because the user may not have the appropriate @@ -123,7 +123,7 @@ function handle_iptable() # Print the IP address currently in use at the given interface, or nothing if # the interface is not up. # -function ip_of() +ip_of() { ip addr show "$1" | awk "/^.*inet.*$1\$/{print \$2}" | sed -n '1 s,/.*,,p' } @@ -137,7 +137,7 @@ function ip_of() # to these scripts, or eth0 by default. This function will call fatal if no # such interface could be found. # -function dom0_ip() +dom0_ip() { local nd=${netdev:-eth0} local result=$(ip_of "$nd") diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/vif-nat --- a/tools/examples/vif-nat Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/examples/vif-nat Wed Jan 17 09:56:40 2007 -0500 @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash #============================================================================ # /etc/xen/vif-nat # diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/vif-route --- a/tools/examples/vif-route Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/examples/vif-route Wed Jan 17 09:56:40 2007 -0500 @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash #============================================================================ # /etc/xen/vif-route # diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/vtpm --- a/tools/examples/vtpm Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/examples/vtpm Wed Jan 17 09:56:40 2007 -0500 @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash dir=$(dirname "$0") . "$dir/vtpm-hotplug-common.sh" diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/vtpm-common.sh --- a/tools/examples/vtpm-common.sh Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/examples/vtpm-common.sh Wed Jan 17 09:56:40 2007 -0500 @@ -24,7 +24,9 @@ VTPMDB="/etc/xen/vtpm.db" #In the vtpm-impl file some commands should be defined: # vtpm_create, vtpm_setup, vtpm_start, etc. (see below) -if [ -r "$dir/vtpm-impl" ]; then +if [ -r "$dir/vtpm-impl.alt" ]; then + . "$dir/vtpm-impl.alt" +elif [ -r "$dir/vtpm-impl" ]; then . "$dir/vtpm-impl" else function vtpm_create () { diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/vtpm-delete --- a/tools/examples/vtpm-delete Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/examples/vtpm-delete Wed Jan 17 09:56:40 2007 -0500 @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # This scripts must be called the following way: # vtpm-delete <domain name> diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/xen-backend.agent --- a/tools/examples/xen-backend.agent Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/examples/xen-backend.agent Wed Jan 17 09:56:40 2007 -0500 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash PATH=/etc/xen/scripts:$PATH diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/xen-hotplug-cleanup --- a/tools/examples/xen-hotplug-cleanup Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/examples/xen-hotplug-cleanup Wed Jan 17 09:56:40 2007 -0500 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash dir=$(dirname "$0") . "$dir/xen-hotplug-common.sh" diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/xen-network-common.sh --- a/tools/examples/xen-network-common.sh Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/examples/xen-network-common.sh Wed Jan 17 09:56:40 2007 -0500 @@ -117,7 +117,12 @@ create_bridge () { ip link set ${bridge} arp off ip link set ${bridge} multicast off fi + + # A small MTU disables IPv6 (and therefore IPv6 addrconf). + mtu=$(ip link show ${bridge} | sed -n 's/.* mtu \([0-9]\+\).*/\1/p') + ip link set ${bridge} mtu 68 ip link set ${bridge} up + ip link set ${bridge} mtu ${mtu:-1500} } # Usage: add_to_bridge bridge dev diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/xmexample1 --- a/tools/examples/xmexample1 Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/examples/xmexample1 Wed Jan 17 09:56:40 2007 -0500 @@ -64,6 +64,40 @@ vif = [ '' ] # and MODE is r for read-only, w for read-write. disk = [ 'phy:hda1,hda1,w' ] + +#---------------------------------------------------------------------------- +# Define frame buffer device. +# +# By default, no frame buffer device is configured. +# +# To create one using the SDL backend and sensible defaults: +# +# vfb = [ 'type=sdl' ] +# +# This uses environment variables XAUTHORITY and DISPLAY. You +# can override that: +# +# vfb = [ 'type=sdl,xauthority=/home/bozo/.Xauthority,display=:1' ] +# +# To create one using the VNC backend and sensible defaults: +# +# vfb = [ 'type=vnc' ] +# +# The backend listens on 127.0.0.1 port 5900+N by default, where N is +# the domain ID. You can override both address and N: +# +# vfb = [ 'type=vnc,vnclisten=127.0.0.1,vncdisplay=1' ] +# +# Or you can bind the first unused port above 5900: +# +# vfb = [ 'type=vnc,vnclisten=0.0.0.0,vnunused=1' ] +# +# You can override the password: +# +# vfb = [ 'type=vnc,vncpasswd=MYPASSWD' ] +# +# Empty password disables authentication. Defaults to the vncpasswd +# configured in xend-config.sxp. #---------------------------------------------------------------------------- # Define to which TPM instance the user domain should communicate. diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/xmexample2 --- a/tools/examples/xmexample2 Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/examples/xmexample2 Wed Jan 17 09:56:40 2007 -0500 @@ -100,6 +100,40 @@ vif = [ '' ] # All domains get sda6 read-only (to use for /usr, see below). disk = [ 'phy:sda%d,sda1,w' % (7+vmid), 'phy:sda6,sda6,r' ] + +#---------------------------------------------------------------------------- +# Define frame buffer device. +# +# By default, no frame buffer device is configured. +# +# To create one using the SDL backend and sensible defaults: +# +# vfb = [ 'type=sdl' ] +# +# This uses environment variables XAUTHORITY and DISPLAY. You +# can override that: +# +# vfb = [ 'type=sdl,xauthority=/home/bozo/.Xauthority,display=:1' ] +# +# To create one using the VNC backend and sensible defaults: +# +# vfb = [ 'type=vnc' ] +# +# The backend listens on 127.0.0.1 port 5900+N by default, where N is +# the domain ID. You can override both address and N: +# +# vfb = [ 'type=vnc,vnclisten=127.0.0.1,vncdisplay=%d' % vmid ] +# +# Or you can bind the first unused port above 5900: +# +# vfb = [ 'type=vnc,vnclisten=0.0.0.0,vnunused=1' ] +# +# You can override the password: +# +# vfb = [ 'type=vnc,vncpasswd=MYPASSWD' ] +# +# Empty password disables authentication. Defaults to the vncpasswd +# configured in xend-config.sxp. #---------------------------------------------------------------------------- # Define to which TPM instance the user domain should communicate. diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/examples/xmexample3 --- a/tools/examples/xmexample3 Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/examples/xmexample3 Wed Jan 17 09:56:40 2007 -0500 @@ -85,6 +85,40 @@ vif = [ 'ip=192.168.%d.1/24' % (vmid)] # to all domains as sda1. # All domains get sda6 read-only (to use for /usr, see below). disk = [ 'phy:hda%d,hda1,w' % (vmid)] + +#---------------------------------------------------------------------------- +# Define frame buffer device. +# +# By default, no frame buffer device is configured. +# +# To create one using the SDL backend and sensible defaults: +# +# vfb = [ 'type=sdl' ] +# +# This uses environment variables XAUTHORITY and DISPLAY. You +# can override that: +# +# vfb = [ 'type=sdl,xauthority=/home/bozo/.Xauthority,display=:1' ] +# +# To create one using the VNC backend and sensible defaults: +# +# vfb = [ 'type=vnc' ] +# +# The backend listens on 127.0.0.1 port 5900+N by default, where N is +# the domain ID. You can override both address and N: +# +# vfb = [ 'type=vnc,vnclisten=127.0.0.1,vncdisplay=%d' % vmid ] +# +# Or you can bind the first unused port above 5900: +# +# vfb = [ 'type=vnc,vnclisten=0.0.0.0,vnunused=1' ] +# +# You can override the password: +# +# vfb = [ 'type=vnc,vncpasswd=MYPASSWD' ] +# +# Empty password disables authentication. Defaults to the vncpasswd +# configured in xend-config.sxp. #---------------------------------------------------------------------------- # Define to which TPM instance the user domain should communicate. diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/firmware/rombios/rombios.c --- a/tools/firmware/rombios/rombios.c Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/firmware/rombios/rombios.c Wed Jan 17 09:56:40 2007 -0500 @@ -278,7 +278,6 @@ typedef unsigned short bx_bool; typedef unsigned short bx_bool; typedef unsigned long Bit32u; -#if BX_USE_ATADRV void memsetb(seg,offset,value,count); void memcpyb(dseg,doffset,sseg,soffset,count); @@ -418,7 +417,6 @@ typedef unsigned long Bit32u; ASM_END } #endif -#endif //BX_USE_ATADRV // read_dword and write_dword functions static Bit32u read_dword(); @@ -728,6 +726,8 @@ typedef struct { // The EBDA structure should conform to // http://www.cybertrails.com/~fys/rombios.htm document // I made the ata and cdemu structs begin at 0x121 in the EBDA seg + // EBDA must be at most 768 bytes; it lives at 0x9fc00, and the boot + // device tables are at 0x9ff00 -- 0x9ffff typedef struct { unsigned char filler1[0x3D]; @@ -885,7 +885,7 @@ static void int15_function(); static void int15_function(); static void int16_function(); static void int17_function(); -static Bit32u int19_function(); +static void int19_function(); static void int1a_function(); static void int70_function(); static void int74_function(); @@ -1435,10 +1435,17 @@ copy_e820_table() copy_e820_table() { Bit8u nr_entries = read_byte(0x9000, 0x1e8); + Bit32u base_mem; if (nr_entries > 32) nr_entries = 32; write_word(0xe000, 0x8, nr_entries); memcpyb(0xe000, 0x10, 0x9000, 0x2d0, nr_entries * 0x14); + /* Report the proper base memory size at address 0x0413: otherwise + * non-e820 code will clobber things if BASE_MEM_IN_K is bigger than + * the first e820 entry. Get the size by reading the second 64bit + * field of the first e820 slot. */ + base_mem = read_dword(0x9000, 0x2d0 + 8); + write_word(0x40, 0x13, base_mem >> 10); } #endif /* HVMASSIST */ @@ -1847,28 +1854,100 @@ print_bios_banner() printf("\n"); } + +//-------------------------------------------------------------------------- +// BIOS Boot Specification 1.0.1 compatibility +// +// Very basic support for the BIOS Boot Specification, which allows expansion +// ROMs to register themselves as boot devices, instead of just stealing the +// INT 19h boot vector. +// +// This is a hack: to do it properly requires a proper PnP BIOS and we aren't +// one; we just lie to the option ROMs to make them behave correctly. +// We also don't support letting option ROMs register as bootable disk +// drives (BCVs), only as bootable devices (BEVs). +// +// http://www.phoenix.com/en/Customer+Services/White+Papers-Specs/pc+industry+specifications.htm +//-------------------------------------------------------------------------- + +/* 256 bytes at 0x9ff00 -- 0x9ffff is used for the IPL boot table. */ +#define IPL_SEG 0x9ff0 +#define IPL_TABLE_OFFSET 0x0000 +#define IPL_TABLE_ENTRIES 8 +#define IPL_COUNT_OFFSET 0x0080 /* u16: number of valid table entries */ +#define IPL_SEQUENCE_OFFSET 0x0082 /* u16: next boot device */ + +struct ipl_entry { + Bit16u type; + Bit16u flags; + Bit32u vector; + Bit32u description; + Bit32u reserved; +}; + +static void +init_boot_vectors() +{ + struct ipl_entry e; + Bit16u count = 0; + Bit16u ss = get_SS(); + + /* Clear out the IPL table. */ + memsetb(IPL_SEG, IPL_TABLE_OFFSET, 0, 0xff); + + /* Floppy drive */ + e.type = 1; e.flags = 0; e.vector = 0; e.description = 0; e.reserved = 0; + memcpyb(IPL_SEG, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e)); + count++; + + /* First HDD */ + e.type = 2; e.flags = 0; e.vector = 0; e.description = 0; e.reserved = 0; + memcpyb(IPL_SEG, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e)); + count++; + +#if BX_ELTORITO_BOOT + /* CDROM */ + e.type = 3; e.flags = 0; e.vector = 0; e.description = 0; e.reserved = 0; + memcpyb(IPL_SEG, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e)); + count++; +#endif + + /* Remember how many devices we have */ + write_word(IPL_SEG, IPL_COUNT_OFFSET, count); + /* Not tried booting anything yet */ + write_word(IPL_SEG, IPL_SEQUENCE_OFFSET, 0xffff); +} + +static Bit8u +get_boot_vector(i, e) +Bit16u i; struct ipl_entry *e; +{ + Bit16u count; + Bit16u ss = get_SS(); + /* Get the count of boot devices, and refuse to overrun the array */ + count = read_word(IPL_SEG, IPL_COUNT_OFFSET); + if (i >= count) return 0; + /* OK to read this device */ + memcpyb(ss, e, IPL_SEG, IPL_TABLE_OFFSET + i * sizeof (*e), sizeof (*e)); + return 1; +} + + //-------------------------------------------------------------------------- // print_boot_device // displays the boot device //-------------------------------------------------------------------------- -static char drivetypes[][10]={"Floppy","Hard Disk","CD-Rom"}; +static char drivetypes[][10]={"", "Floppy","Hard Disk","CD-Rom", "Network"}; void -print_boot_device(cdboot, drive) - Bit8u cdboot; Bit16u drive; +print_boot_device(type) + Bit16u type; { - Bit8u i; - - // cdboot contains 0 if floppy/harddisk, 1 otherwise - // drive contains real/emulated boot drive - - if(cdboot)i=2; // CD-Rom - else if((drive&0x0080)==0x00)i=0; // Floppy - else if((drive&0x0080)==0x80)i=1; // Hard drive - else return; - - printf("Booting from %s...\n",drivetypes[i]); + /* NIC appears as type 0x80 */ + if (type == 0x80 ) type = 0x4; + if (type == 0 || type > 0x4) BX_PANIC("Bad drive type\n"); + printf("Booting from %s...\n", drivetypes[type]); } //-------------------------------------------------------------------------- @@ -1876,29 +1955,20 @@ print_boot_device(cdboot, drive) // displays the reason why boot failed //-------------------------------------------------------------------------- void -print_boot_failure(cdboot, drive, reason, lastdrive) - Bit8u cdboot; Bit8u drive; Bit8u lastdrive; +print_boot_failure(type, reason) + Bit16u type; Bit8u reason; { - Bit16u drivenum = drive&0x7f; - - // cdboot: 1 if boot from cd, 0 otherwise - // drive : drive number - // reason: 0 signature check failed, 1 read error - // lastdrive: 1 boot drive is the last one in boot sequence - - if (cdboot) - bios_printf(BIOS_PRINTF_INFO | BIOS_PRINTF_SCREEN, "Boot from %s failed\n",drivetypes[2]); - else if (drive & 0x80) - bios_printf(BIOS_PRINTF_INFO | BIOS_PRINTF_SCREEN, "Boot from %s %d failed\n", drivetypes[1],drivenum); + if (type == 0 || type > 0x3) BX_PANIC("Bad drive type\n"); + + printf("Boot from %s failed", drivetypes[type]); + if (type < 4) { + /* Report the reason too */ + if (reason==0) + printf(": not a bootable disk"); else - bios_printf(BIOS_PRINTF_INFO | BIOS_PRINTF_SCREEN, "Boot from %s %d failed\n", drivetypes[0],drivenum); - - if (lastdrive==1) { - if (reason==0) - BX_PANIC("Not a bootable disk\n"); - else - BX_PANIC("Could not read the boot disk\n"); + printf(": could not read the boot disk"); } + printf("\n"); } //-------------------------------------------------------------------------- @@ -7546,19 +7616,19 @@ int17_function(regs, ds, iret_addr) } } -// returns bootsegment in ax, drive in bl - Bit32u -int19_function(bseqnr) -Bit8u bseqnr; +void +int19_function(seq_nr) +Bit16u seq_nr; { Bit16u ebda_seg=read_word(0x0040,0x000E); - Bit16u bootseq; + Bit16u bootdev; Bit8u bootdrv; - Bit8u bootcd; Bit8u bootchk; Bit16u bootseg; + Bit16u bootip; Bit16u status; - Bit8u lastdrive=0; + + struct ipl_entry e; // if BX_ELTORITO_BOOT is not defined, old behavior // check bit 5 in CMOS reg 0x2d. load either 0x00 or 0x80 into DL @@ -7575,62 +7645,54 @@ Bit8u bseqnr; // 0x01 : first floppy // 0x02 : first harddrive // 0x03 : first cdrom + // 0x04 - 0x0f : PnP expansion ROMs (e.g. Etherboot) // else : boot failure // Get the boot sequence #if BX_ELTORITO_BOOT - bootseq=inb_cmos(0x3d); - bootseq|=((inb_cmos(0x38) & 0xf0) << 4); - - if (bseqnr==2) bootseq >>= 4; - if (bseqnr==3) bootseq >>= 8; - if (bootseq<0x10) lastdrive = 1; - bootdrv=0x00; bootcd=0; - switch(bootseq & 0x0f) { - case 0x01: bootdrv=0x00; bootcd=0; break; - case 0x02: bootdrv=0x80; bootcd=0; break; - case 0x03: bootdrv=0x00; bootcd=1; break; - default: return 0x00000000; - } -#else - bootseq=inb_cmos(0x2d); - - if (bseqnr==2) { - bootseq ^= 0x20; - lastdrive = 1; + bootdev = inb_cmos(0x3d); + bootdev |= ((inb_cmos(0x38) & 0xf0) << 4); + bootdev >>= 4 * seq_nr; + bootdev &= 0xf; + if (bootdev == 0) BX_PANIC("No bootable device.\n"); + + /* Translate from CMOS runes to an IPL table offset by subtracting 1 */ + bootdev -= 1; +#else + if (seq_nr ==2) BX_PANIC("No more boot devices."); + if (!!(inb_cmos(0x2d) & 0x20) ^ (seq_nr == 1)) + /* Boot from floppy if the bit is set or it's the second boot */ + bootdev = 0x00; + else + bootdev = 0x01; +#endif + + /* Read the boot device from the IPL table */ + if (get_boot_vector(bootdev, &e) == 0) { + BX_INFO("Invalid boot device (0x%x)\n", bootdev); + return; } - bootdrv=0x00; bootcd=0; - if((bootseq&0x20)==0) bootdrv=0x80; -#endif // BX_ELTORITO_BOOT - -#if BX_ELTORITO_BOOT - // We have to boot from cd - if (bootcd != 0) { - status = cdrom_boot(); - - // If failure - if ( (status & 0x00ff) !=0 ) { - print_cdromboot_failure(status); - print_boot_failure(bootcd, bootdrv, 1, lastdrive); - return 0x00000000; - } - - bootseg = read_word(ebda_seg,&EbdaData->cdemu.load_segment); - bootdrv = (Bit8u)(status>>8); - } - -#endif // BX_ELTORITO_BOOT - - // We have to boot from harddisk or floppy - if (bootcd == 0) { - bootseg=0x07c0; + + /* Do the loading, and set up vector as a far pointer to the boot + * address, and bootdrv as the boot drive */ + print_boot_device(e.type); + + switch(e.type) { + case 0x01: /* FDD */ + case 0x02: /* HDD */ + + bootdrv = (e.type == 0x02) ? 0x80 : 0x00; + bootseg = 0x07c0; + status = 0; ASM_START push bp mov bp, sp - - mov ax, #0x0000 - mov _int19_function.status + 2[bp], ax + push ax + push bx + push cx + push dx + mov dl, _int19_function.bootdrv + 2[bp] mov ax, _int19_function.bootseg + 2[bp] mov es, ax ;; segment @@ -7646,43 +7708,83 @@ ASM_START mov _int19_function.status + 2[bp], ax int19_load_done: + pop dx + pop cx + pop bx + pop ax pop bp ASM_END if (status != 0) { - print_boot_failure(bootcd, bootdrv, 1, lastdrive); - return 0x00000000; + print_boot_failure(e.type, 1); + return; + } + + /* Always check the signature on a HDD boot sector; on FDD, only do + * the check if the CMOS doesn't tell us to skip it */ + if (e.type != 0x00 || !((inb_cmos(0x38) & 0x01))) { + if (read_word(bootseg,0x1fe) != 0xaa55) { + print_boot_failure(e.type, 0); + return; } } - // check signature if instructed by cmos reg 0x38, only for floppy - // bootchk = 1 : signature check disabled - // bootchk = 0 : signature check enabled - if (bootdrv != 0) bootchk = 0; - else bootchk = inb_cmos(0x38) & 0x01; + /* Canonicalize bootseg:bootip */ + bootip = (bootseg & 0x0fff) << 4; + bootseg &= 0xf000; + break; #if BX_ELTORITO_BOOT - // if boot from cd, no signature check - if (bootcd != 0) - bootchk = 1; -#endif // BX_ELTORITO_BOOT - - if (bootchk == 0) { - if (read_word(bootseg,0x1fe) != 0xaa55) { - print_boot_failure(bootcd, bootdrv, 0, lastdrive); - return 0x00000000; - } + case 0x03: /* CD-ROM */ + status = cdrom_boot(); + + // If failure + if ( (status & 0x00ff) !=0 ) { + print_cdromboot_failure(status); + print_boot_failure(e.type, 1); + return; } + + bootdrv = (Bit8u)(status>>8); + bootseg = read_word(ebda_seg,&EbdaData->cdemu.load_segment); + /* Canonicalize bootseg:bootip */ + bootip = (bootseg & 0x0fff) << 4; + bootseg &= 0xf000; + break; +#endif + + case 0x80: /* Expansion ROM with a Bootstrap Entry Vector (a far pointer) */ + bootseg = e.vector >> 16; + bootip = e.vector & 0xffff; + break; + + default: return; + } + + /* Debugging info */ + printf("Booting from %x:%x\n", bootseg, bootip); -#if BX_ELTORITO_BOOT - // Print out the boot string - print_boot_device(bootcd, bootdrv); -#else // BX_ELTORITO_BOOT - print_boot_device(0, bootdrv); -#endif // BX_ELTORITO_BOOT - - // return the boot segment - return (((Bit32u)bootdrv) << 16) + bootseg; + /* Jump to the boot vector */ +ASM_START + mov bp, sp + ;; Build an iret stack frame that will take us to the boot vector. + ;; iret pops ip, then cs, then flags, so push them in the opposite order. + pushf + mov ax, _int19_function.bootseg + 0[bp] + push ax + mov ax, _int19_function.bootip + 0[bp] + push ax + ;; Set the magic number in ax and the boot drive in dl. + mov ax, #0xaa55 + mov dl, _int19_function.bootdrv + 0[bp] + ;; Zero some of the other registers. + xor bx, bx + mov ds, bx + mov es, bx + mov bp, bx + ;; Go! + iret +ASM_END } void @@ -8139,14 +8241,29 @@ int13_out: popa iret - ;---------- ;- INT18h - ;---------- -int18_handler: ;; Boot Failure routing - call _int18_panic_msg - hlt - iret +int18_handler: ;; Boot Failure recovery: try the next device. + + ;; Reset SP and SS + mov ax, #0xfffe + mov sp, ax + xor ax, ax + mov ss, ax + + ;; Get the boot sequence number out of the IPL memory + mov bx, #IPL_SEG + mov ds, bx ;; Set segment + mov bx, IPL_SEQUENCE_OFFSET ;; BX is now the sequence number + inc bx ;; ++ + mov IPL_SEQUENCE_OFFSET, bx ;; Write it back + mov ds, ax ;; and reset the segment to zero. + + ;; Carry on in the INT 19h handler, using the new sequence number + push bx + + jmp int19_next_boot ;---------- ;- INT19h - @@ -8154,62 +8271,32 @@ int19_relocated: ;; Boot function, reloc int19_relocated: ;; Boot function, relocated ;; int19 was beginning to be really complex, so now it - ;; just calls an C function, that does the work - ;; it returns in BL the boot drive, and in AX the boot segment - ;; the boot segment will be 0x0000 if something has failed + ;; just calls a C function that does the work push bp mov bp, sp - - ;; drop ds + + ;; Reset SS and SP + mov ax, #0xfffe + mov sp, ax xor ax, ax - mov ds, ax - - ;; 1st boot device - mov ax, #0x0001 + mov ss, ax + + ;; Start from the first boot device (0, in AX) + mov bx, #IPL_SEG + mov ds, bx ;; Set segment to write to the IPL memory + mov IPL_SEQUENCE_OFFSET, ax ;; Save the sequence number + mov ds, ax ;; and reset the segment. + push ax + +int19_next_boot: + + ;; Call the C code for the next boot device call _int19_function - inc sp - inc sp - ;; bl contains the boot drive - ;; ax contains the boot segment or 0 if failure - - test ax, ax ;; if ax is 0 try next boot device - jnz boot_setup - - ;; 2nd boot device - mov ax, #0x0002 - push ax - call _int19_function - inc sp - inc sp - test ax, ax ;; if ax is 0 try next boot device - jnz boot_setup - - ;; 3rd boot device - mov ax, #0x0003 - push ax - call _int19_function - inc sp - inc sp - test ax, ax ;; if ax is 0 call int18 - jz int18_handler - -boot_setup: - mov dl, bl ;; set drive so guest os find it - shl eax, #0x04 ;; convert seg to ip - mov 2[bp], ax ;; set ip - - shr eax, #0x04 ;; get cs back - and ax, #0xF000 ;; remove what went in ip - mov 4[bp], ax ;; set cs - xor ax, ax - mov es, ax ;; set es to zero fixes [ 549815 ] - mov [bp], ax ;; set bp to zero - mov ax, #0xaa55 ;; set ok flag - - pop bp - iret ;; Beam me up Scotty + + ;; Boot failed: invoke the boot recovery function + int #0x18 ;---------- ;- INT1Ch - @@ -9387,6 +9474,15 @@ checksum_loop: pop ax ret + +;; We need a copy of this string, but we are not actually a PnP BIOS, +;; so make sure it is *not* aligned, so OSes will not see it if they scan. +.align 16 + db 0 +pnp_string: + .ascii "$PnP" + + rom_scan: ;; Scan for existence of valid expansion ROMS. ;; Video ROM: from 0xC0000..0xC7FFF in 2k increments @@ -9421,9 +9517,17 @@ block_count_rounded: xor bx, bx ;; Restore DS back to 0000: mov ds, bx push ax ;; Save AX + push di ;; Save DI ;; Push addr of ROM entry point push cx ;; Push seg push #0x0003 ;; Push offset + + ;; Point ES:DI at "$PnP", which tells the ROM that we are a PnP BIOS. + ;; That should stop it grabbing INT 19h; we will use its BEV instead. + mov ax, #0xf000 + mov es, ax + lea di, pnp_string + mov bp, sp ;; Call ROM init routine using seg:off on stack db 0xff ;; call_far ss:[bp+0] db 0x5e @@ -9431,6 +9535,38 @@ block_count_rounded: cli ;; In case expansion ROM BIOS turns IF on add sp, #2 ;; Pop offset value pop cx ;; Pop seg value (restore CX) + + ;; Look at the ROM's PnP Expansion header. Properly, we're supposed + ;; to init all the ROMs and then go back and build an IPL table of + ;; all the bootable devices, but we can get away with one pass. + mov ds, cx ;; ROM base + mov bx, 0x001a ;; 0x1A is the offset into ROM header that contains... + mov ax, [bx] ;; the offset of PnP expansion header, where... + cmp ax, #0x5024 ;; we look for signature "$PnP" + jne no_bev + mov ax, 2[bx] + cmp ax, #0x506e + jne no_bev + mov ax, 0x1a[bx] ;; 0x1A is also the offset into the expansion header of... + cmp ax, #0x0000 ;; the Bootstrap Entry Vector, or zero if there is none. + je no_bev + + ;; Found a device that thinks it can boot the system. Record its BEV. + mov bx, #IPL_SEG ;; Go to the segment where the IPL table lives + mov ds, bx + mov bx, IPL_COUNT_OFFSET ;; Read the number of entries so far + cmp bx, #IPL_TABLE_ENTRIES + je no_bev ;; Get out if the table is full + shl bx, #0x4 ;; Turn count into offset (entries are 16 bytes) + mov 0[bx], #0x80 ;; This entry is a BEV device + mov 6[bx], cx ;; Build a far pointer from the segment... + mov 4[bx], ax ;; and the offset + shr bx, #0x4 ;; Turn the offset back into a count + inc bx ;; We have one more entry now + mov IPL_COUNT_OFFSET, bx ;; Remember that. + +no_bev: + pop di ;; Restore DI pop ax ;; Restore AX rom_scan_increment: shl ax, #5 ;; convert 512-bytes blocks to 16-byte increments @@ -9763,6 +9899,8 @@ post_default_ints: call _copy_e820_table call smbios_init #endif + + call _init_boot_vectors call rom_scan diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/ioemu/hw/pc.c --- a/tools/ioemu/hw/pc.c Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/ioemu/hw/pc.c Wed Jan 17 09:56:40 2007 -0500 @@ -168,6 +168,8 @@ static int get_bios_disk(char *boot_devi return 0x02; /* hard drive */ case 'd': return 0x03; /* cdrom */ + case 'n': + return 0x04; /* network */ } } return 0x00; /* no device */ diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/ioemu/target-i386-dm/helper2.c --- a/tools/ioemu/target-i386-dm/helper2.c Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/ioemu/target-i386-dm/helper2.c Wed Jan 17 09:56:40 2007 -0500 @@ -498,6 +498,8 @@ void handle_buffered_io(void *opaque) void cpu_handle_ioreq(void *opaque) { + extern int vm_running; + extern int shutdown_requested; CPUState *env = opaque; ioreq_t *req = cpu_get_ioreq(); @@ -516,6 +518,25 @@ void cpu_handle_ioreq(void *opaque) } wmb(); /* Update ioreq contents /then/ update state. */ + + /* + * We do this before we send the response so that the tools + * have the opportunity to pick up on the reset before the + * guest resumes and does a hlt with interrupts disabled which + * causes Xen to powerdown the domain. + */ + if (vm_running) { + if (shutdown_requested) { + fprintf(logfile, "shutdown requested in cpu_handle_ioreq\n"); + destroy_hvm_domain(); + } + if (reset_requested) { + fprintf(logfile, "reset requested in cpu_handle_ioreq.\n"); + qemu_system_reset(); + reset_requested = 0; + } + } + req->state = STATE_IORESP_READY; xc_evtchn_notify(xce_handle, ioreq_local_port[send_vcpu]); } diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/ioemu/vl.c --- a/tools/ioemu/vl.c Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/ioemu/vl.c Wed Jan 17 09:56:40 2007 -0500 @@ -6153,7 +6153,7 @@ int main(int argc, char **argv) case QEMU_OPTION_boot: boot_device = strdup(optarg); if (strspn(boot_device, "acd" -#ifdef TARGET_SPARC +#if defined(TARGET_SPARC) || defined(TARGET_I386) "n" #endif ) != strlen(boot_device)) { diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/libxc/xc_hvm_build.c --- a/tools/libxc/xc_hvm_build.c Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/libxc/xc_hvm_build.c Wed Jan 17 09:56:40 2007 -0500 @@ -233,8 +233,7 @@ static int setup_guest(int xc_handle, SCRATCH_PFN)) == NULL) ) goto error_out; memset(shared_info, 0, PAGE_SIZE); - for ( i = 0; i < MAX_VIRT_CPUS; i++ ) - shared_info->vcpu_info[i].evtchn_upcall_mask = 1; + /* NB. evtchn_upcall_mask is unused: leave as zero. */ memset(&shared_info->evtchn_mask[0], 0xff, sizeof(shared_info->evtchn_mask)); munmap(shared_info, PAGE_SIZE); diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/libxc/xc_linux_build.c --- a/tools/libxc/xc_linux_build.c Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/libxc/xc_linux_build.c Wed Jan 17 09:56:40 2007 -0500 @@ -595,6 +595,7 @@ static int compat_check(int xc_handle, s return 0; } +#ifndef __x86_64__//temp if (strstr(xen_caps, "xen-3.0-x86_32p")) { if (dsi->pae_kernel == PAEKERN_bimodal) { dsi->pae_kernel = PAEKERN_extended_cr3; @@ -612,6 +613,7 @@ static int compat_check(int xc_handle, s return 0; } } +#endif return 1; } @@ -739,7 +741,7 @@ static int setup_guest(int xc_handle, /* * Enable shadow translate mode. This must happen after * populate physmap because the p2m reservation is based on - * the domains current memory allocation. + * the domain's current memory allocation. */ if ( xc_shadow_control(xc_handle, dom, XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE, diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/libxc/xc_linux_restore.c --- a/tools/libxc/xc_linux_restore.c Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/libxc/xc_linux_restore.c Wed Jan 17 09:56:40 2007 -0500 @@ -12,7 +12,7 @@ #include "xg_private.h" #include "xg_save_restore.h" -/* max mfn of the whole machine */ +/* max mfn of the current host machine */ static unsigned long max_mfn; /* virtual starting address of the hypervisor */ @@ -29,6 +29,9 @@ static xen_pfn_t *live_p2m = NULL; /* A table mapping each PFN to its new MFN. */ static xen_pfn_t *p2m = NULL; + +/* A table of P2M mappings in the current region */ +static xen_pfn_t *p2m_batch = NULL; static ssize_t @@ -57,46 +60,78 @@ read_exact(int fd, void *buf, size_t cou ** This function inverts that operation, replacing the pfn values with ** the (now known) appropriate mfn values. */ -static int uncanonicalize_pagetable(unsigned long type, void *page) +static int uncanonicalize_pagetable(int xc_handle, uint32_t dom, + unsigned long type, void *page) { int i, pte_last; unsigned long pfn; uint64_t pte; + int nr_mfns = 0; pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8); - /* Now iterate through the page table, uncanonicalizing each PTE */ + /* First pass: work out how many (if any) MFNs we need to alloc */ for(i = 0; i < pte_last; i++) { - + if(pt_levels == 2) pte = ((uint32_t *)page)[i]; else pte = ((uint64_t *)page)[i]; - - if(pte & _PAGE_PRESENT) { - - pfn = (pte >> PAGE_SHIFT) & 0xffffffff; - - if(pfn >= max_pfn) { - /* This "page table page" is probably not one; bail. */ - ERROR("Frame number in type %lu page table is out of range: " - "i=%d pfn=0x%lx max_pfn=%lu", - type >> 28, i, pfn, max_pfn); - return 0; - } - - - pte &= 0xffffff0000000fffULL; - pte |= (uint64_t)p2m[pfn] << PAGE_SHIFT; - - if(pt_levels == 2) - ((uint32_t *)page)[i] = (uint32_t)pte; - else - ((uint64_t *)page)[i] = (uint64_t)pte; - - - - } + + /* XXX SMH: below needs fixing for PROT_NONE etc */ + if(!(pte & _PAGE_PRESENT)) + continue; + + pfn = (pte >> PAGE_SHIFT) & 0xffffffff; + + if(pfn >= max_pfn) { + /* This "page table page" is probably not one; bail. */ + ERROR("Frame number in type %lu page table is out of range: " + "i=%d pfn=0x%lx max_pfn=%lu", + type >> 28, i, pfn, max_pfn); + return 0; + } + + if(p2m[pfn] == INVALID_P2M_ENTRY) { + /* Have a 'valid' PFN without a matching MFN - need to alloc */ + p2m_batch[nr_mfns++] = pfn; + } + } + + + /* Alllocate the requistite number of mfns */ + if (nr_mfns && xc_domain_memory_populate_physmap( + xc_handle, dom, nr_mfns, 0, 0, p2m_batch) != 0) { + ERROR("Failed to allocate memory for batch.!\n"); + errno = ENOMEM; + return 0; + } + + /* Second pass: uncanonicalize each present PTE */ + nr_mfns = 0; + for(i = 0; i < pte_last; i++) { + + if(pt_levels == 2) + pte = ((uint32_t *)page)[i]; + else + pte = ((uint64_t *)page)[i]; + + /* XXX SMH: below needs fixing for PROT_NONE etc */ + if(!(pte & _PAGE_PRESENT)) + continue; + + pfn = (pte >> PAGE_SHIFT) & 0xffffffff; + + if(p2m[pfn] == INVALID_P2M_ENTRY) + p2m[pfn] = p2m_batch[nr_mfns++]; + + pte &= 0xffffff0000000fffULL; + pte |= (uint64_t)p2m[pfn] << PAGE_SHIFT; + + if(pt_levels == 2) + ((uint32_t *)page)[i] = (uint32_t)pte; + else + ((uint64_t *)page)[i] = (uint64_t)pte; } return 1; @@ -140,6 +175,7 @@ int xc_linux_restore(int xc_handle, int /* A temporary mapping of the guest's start_info page. */ start_info_t *start_info; + /* Our mapping of the current region (batch) */ char *region_base; xc_mmu_t *mmu = NULL; @@ -244,8 +280,10 @@ int xc_linux_restore(int xc_handle, int p2m = calloc(max_pfn, sizeof(xen_pfn_t)); pfn_type = calloc(max_pfn, sizeof(unsigned long)); region_mfn = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t)); - - if ((p2m == NULL) || (pfn_type == NULL) || (region_mfn == NULL)) { + p2m_batch = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t)); + + if ((p2m == NULL) || (pfn_type == NULL) || + (region_mfn == NULL) || (p2m_batch == NULL)) { ERROR("memory alloc failed"); errno = ENOMEM; goto out; @@ -253,6 +291,11 @@ int xc_linux_restore(int xc_handle, int if (lock_pages(region_mfn, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) { ERROR("Could not lock region_mfn"); + goto out; + } + + if (lock_pages(p2m_batch, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) { + ERROR("Could not lock p2m_batch"); goto out; } @@ -270,17 +313,9 @@ int xc_linux_restore(int xc_handle, int goto out; } + /* Mark all PFNs as invalid; we allocate on demand */ for ( pfn = 0; pfn < max_pfn; pfn++ ) - p2m[pfn] = pfn; - - if (xc_domain_memory_populate_physmap(xc_handle, dom, max_pfn, - 0, 0, p2m) != 0) { - ERROR("Failed to increase reservation by %lx KB", PFN_TO_KB(max_pfn)); - errno = ENOMEM; - goto out; - } - - DPRINTF("Increased domain reservation by %lx KB\n", PFN_TO_KB(max_pfn)); + p2m[pfn] = INVALID_P2M_ENTRY; if(!(mmu = xc_init_mmu_updates(xc_handle, dom))) { ERROR("Could not initialise for MMU updates"); @@ -298,7 +333,7 @@ int xc_linux_restore(int xc_handle, int n = 0; while (1) { - int j; + int j, nr_mfns = 0; this_pc = (n * 100) / max_pfn; if ( (this_pc - prev_pc) >= 5 ) @@ -333,20 +368,57 @@ int xc_linux_restore(int xc_handle, int goto out; } + /* First pass for this batch: work out how much memory to alloc */ + nr_mfns = 0; for ( i = 0; i < j; i++ ) { unsigned long pfn, pagetype; pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK; + if ( (pagetype != XEN_DOMCTL_PFINFO_XTAB) && + (p2m[pfn] == INVALID_P2M_ENTRY) ) + { + /* Have a live PFN which hasn't had an MFN allocated */ + p2m_batch[nr_mfns++] = pfn; + } + } + + + /* Now allocate a bunch of mfns for this batch */ + if (nr_mfns && xc_domain_memory_populate_physmap( + xc_handle, dom, nr_mfns, 0, 0, p2m_batch) != 0) { + ERROR("Failed to allocate memory for batch.!\n"); + errno = ENOMEM; + goto out; + } + + /* Second pass for this batch: update p2m[] and region_mfn[] */ + nr_mfns = 0; + for ( i = 0; i < j; i++ ) + { + unsigned long pfn, pagetype; + pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; + pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK; + if ( pagetype == XEN_DOMCTL_PFINFO_XTAB) - region_mfn[i] = 0; /* we know map will fail, but don't care */ - else - region_mfn[i] = p2m[pfn]; - } - + region_mfn[i] = ~0UL; /* map will fail but we don't care */ + else + { + if (p2m[pfn] == INVALID_P2M_ENTRY) { + /* We just allocated a new mfn above; update p2m */ + p2m[pfn] = p2m_batch[nr_mfns++]; + } + + /* setup region_mfn[] for batch map */ + region_mfn[i] = p2m[pfn]; + } + } + + /* Map relevant mfns */ region_base = xc_map_foreign_batch( xc_handle, dom, PROT_WRITE, region_mfn, j); + if ( region_base == NULL ) { ERROR("map batch failed"); @@ -401,7 +473,8 @@ int xc_linux_restore(int xc_handle, int pae_extended_cr3 || (pagetype != XEN_DOMCTL_PFINFO_L1TAB)) { - if (!uncanonicalize_pagetable(pagetype, page)) { + if (!uncanonicalize_pagetable(xc_handle, dom, + pagetype, page)) { /* ** Failing to uncanonicalize a page table can be ok ** under live migration since the pages type may have @@ -411,10 +484,8 @@ int xc_linux_restore(int xc_handle, int pagetype >> 28, pfn, mfn); nraces++; continue; - } - - } - + } + } } else if ( pagetype != XEN_DOMCTL_PFINFO_NOTAB ) { @@ -486,7 +557,7 @@ int xc_linux_restore(int xc_handle, int */ int j, k; - + /* First pass: find all L3TABs current in > 4G mfns and get new mfns */ for ( i = 0; i < max_pfn; i++ ) { @@ -555,7 +626,8 @@ int xc_linux_restore(int xc_handle, int } for(k = 0; k < j; k++) { - if(!uncanonicalize_pagetable(XEN_DOMCTL_PFINFO_L1TAB, + if(!uncanonicalize_pagetable(xc_handle, dom, + XEN_DOMCTL_PFINFO_L1TAB, region_base + k*PAGE_SIZE)) { ERROR("failed uncanonicalize pt!"); goto out; @@ -631,7 +703,7 @@ int xc_linux_restore(int xc_handle, int { unsigned int count; unsigned long *pfntab; - int rc; + int nr_frees, rc; if (!read_exact(io_fd, &count, sizeof(count))) { ERROR("Error when reading pfn count"); @@ -648,29 +720,30 @@ int xc_linux_restore(int xc_handle, int goto out; } + nr_frees = 0; for (i = 0; i < count; i++) { unsigned long pfn = pfntab[i]; - if(pfn > max_pfn) - /* shouldn't happen - continue optimistically */ - continue; - - pfntab[i] = p2m[pfn]; - p2m[pfn] = INVALID_P2M_ENTRY; // not in pseudo-physical map - } - - if (count > 0) { + if(p2m[pfn] != INVALID_P2M_ENTRY) { + /* pfn is not in physmap now, but was at some point during + the save/migration process - need to free it */ + pfntab[nr_frees++] = p2m[pfn]; + p2m[pfn] = INVALID_P2M_ENTRY; // not in pseudo-physical map + } + } + + if (nr_frees > 0) { struct xen_memory_reservation reservation = { - .nr_extents = count, + .nr_extents = nr_frees, .extent_order = 0, .domid = dom }; set_xen_guest_handle(reservation.extent_start, pfntab); if ((rc = xc_memory_op(xc_handle, XENMEM_decrease_reservation, - &reservation)) != count) { + &reservation)) != nr_frees) { ERROR("Could not decrease reservation : %d", rc); goto out; } else @@ -791,6 +864,6 @@ int xc_linux_restore(int xc_handle, int free(pfn_type); DPRINTF("Restore exit with rc=%d\n", rc); - + return rc; } diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/libxc/xc_linux_save.c --- a/tools/libxc/xc_linux_save.c Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/libxc/xc_linux_save.c Wed Jan 17 09:56:40 2007 -0500 @@ -44,6 +44,7 @@ static xen_pfn_t *live_p2m = NULL; /* Live mapping of system MFN to PFN table. */ static xen_pfn_t *live_m2p = NULL; +static unsigned long m2p_mfn0; /* grep fodder: machine_to_phys */ @@ -440,13 +441,23 @@ static int canonicalize_pagetable(unsign ** that this check will fail for other L2s. */ if (pt_levels == 3 && type == XEN_DOMCTL_PFINFO_L2TAB) { - -/* XXX index of the L2 entry in PAE mode which holds the guest LPT */ -#define PAE_GLPT_L2ENTRY (495) - pte = ((const uint64_t*)spage)[PAE_GLPT_L2ENTRY]; - - if(((pte >> PAGE_SHIFT) & 0x0fffffff) == live_p2m[pfn]) - xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff; + int hstart; + unsigned long he; + + hstart = (hvirt_start >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff; + he = ((const uint64_t *) spage)[hstart]; + + if ( ((he >> PAGE_SHIFT) & 0x0fffffff) == m2p_mfn0 ) { + /* hvirt starts with xen stuff... */ + xen_start = hstart; + } else if ( hvirt_start != 0xf5800000 ) { + /* old L2s from before hole was shrunk... */ + hstart = (0xf5800000 >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff; + he = ((const uint64_t *) spage)[hstart]; + + if( ((he >> PAGE_SHIFT) & 0x0fffffff) == m2p_mfn0 ) + xen_start = hstart; + } } if (pt_levels == 4 && type == XEN_DOMCTL_PFINFO_L4TAB) { @@ -550,6 +561,8 @@ static xen_pfn_t *xc_map_m2p(int xc_hand return NULL; } + m2p_mfn0 = entries[0].mfn; + free(extent_start); free(entries); @@ -646,13 +659,6 @@ int xc_linux_save(int xc_handle, int io_ ERROR("Domain is not in a valid Linux guest OS state"); goto out; } - - /* cheesy sanity check */ - if ((info.max_memkb >> (PAGE_SHIFT - 10)) > max_mfn) { - ERROR("Invalid state record -- pfn count out of range: %lu", - (info.max_memkb >> (PAGE_SHIFT - 10))); - goto out; - } /* Map the shared info frame */ if(!(live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/libxc/xc_load_elf.c --- a/tools/libxc/xc_load_elf.c Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/libxc/xc_load_elf.c Wed Jan 17 09:56:40 2007 -0500 @@ -406,17 +406,19 @@ static int parseelfimage(const char *ima } /* - * A "bimodal" ELF note indicates the kernel will adjust to the - * current paging mode, including handling extended cr3 syntax. - * If we have ELF notes then PAE=yes implies that we must support - * the extended cr3 syntax. Otherwise we need to find the - * [extended-cr3] syntax in the __xen_guest string. + * A "bimodal" ELF note indicates the kernel will adjust to the current + * paging mode, including handling extended cr3 syntax. If we have ELF + * notes then PAE=yes implies that we must support the extended cr3 syntax. + * Otherwise we need to find the [extended-cr3] syntax in the __xen_guest + * string. We use strstr() to look for "bimodal" to allow guests to use + * "yes,bimodal" or "no,bimodal" for compatibility reasons. */ + dsi->pae_kernel = PAEKERN_no; if ( dsi->__elfnote_section ) { p = xen_elfnote_string(dsi, XEN_ELFNOTE_PAE_MODE); - if ( p != NULL && strncmp(p, "bimodal", 7) == 0 ) + if ( p != NULL && strstr(p, "bimodal") != NULL ) dsi->pae_kernel = PAEKERN_bimodal; else if ( p != NULL && strncmp(p, "yes", 3) == 0 ) dsi->pae_kernel = PAEKERN_extended_cr3; diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/libxc/xc_ptrace.c --- a/tools/libxc/xc_ptrace.c Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/libxc/xc_ptrace.c Wed Jan 17 09:56:40 2007 -0500 @@ -166,14 +166,11 @@ static unsigned long *page_arr * tables. * */ -static unsigned long -to_ma(int cpu, - unsigned long in_addr) -{ - unsigned long maddr = in_addr; - +static uint64_t +to_ma(int cpu, uint64_t maddr) +{ if ( current_is_hvm && paging_enabled(&ctxt[cpu]) ) - maddr = page_array[maddr >> PAGE_SHIFT] << PAGE_SHIFT; + maddr = (uint64_t)page_array[maddr >> PAGE_SHIFT] << PAGE_SHIFT; return maddr; } @@ -225,7 +222,8 @@ map_domain_va_pae( void *guest_va, int perm) { - unsigned long l3e, l2e, l1e, l2p, l1p, p, va = (unsigned long)guest_va; + uint64_t l3e, l2e, l1e, l2p, l1p, p; + unsigned long va = (unsigned long)guest_va; uint64_t *l3, *l2, *l1; static void *v[MAX_VIRT_CPUS]; @@ -380,12 +378,12 @@ map_domain_va( if (!paging_enabled(&ctxt[cpu])) { static void * v; - unsigned long page; + uint64_t page; if ( v != NULL ) munmap(v, PAGE_SIZE); - page = to_ma(cpu, page_array[va >> PAGE_SHIFT]); + page = to_ma(cpu, va); v = xc_map_foreign_range( xc_handle, current_domid, PAGE_SIZE, perm, page >> PAGE_SHIFT); diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/libxc/xg_save_restore.h --- a/tools/libxc/xg_save_restore.h Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/libxc/xg_save_restore.h Wed Jan 17 09:56:40 2007 -0500 @@ -53,8 +53,17 @@ static int get_platform_info(int xc_hand *hvirt_start = xen_params.virt_start; + /* + * XXX For now, 32bit dom0's can only save/restore 32bit domUs + * on 64bit hypervisors, so no need to check which type of domain + * we're dealing with. + */ if (strstr(xen_caps, "xen-3.0-x86_64")) +#if defined(__i386__) + *pt_levels = 3; +#else *pt_levels = 4; +#endif else if (strstr(xen_caps, "xen-3.0-x86_32p")) *pt_levels = 3; else if (strstr(xen_caps, "xen-3.0-x86_32")) diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/libxen/include/xen_cpu_feature.h --- a/tools/libxen/include/xen_cpu_feature.h Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/libxen/include/xen_cpu_feature.h Wed Jan 17 09:56:40 2007 -0500 @@ -198,12 +198,12 @@ enum xen_cpu_feature /** * AMD 3DNow! extensions */ - XEN_CPU_FEATURE_3DNOWEXT, + XEN_CPU_FEATURE_THREEDNOWEXT, /** * 3DNow! */ - XEN_CPU_FEATURE_3DNOW, + XEN_CPU_FEATURE_THREEDNOW, /** * CPU in recovery mode diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/libxen/src/xen_common.c --- a/tools/libxen/src/xen_common.c Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/libxen/src/xen_common.c Wed Jan 17 09:56:40 2007 -0500 @@ -373,11 +373,18 @@ static void server_error_2(xen_session * } -static bool is_container_node(xmlNode *n, char *type) +static bool is_node(xmlNode *n, char *type) { return n->type == XML_ELEMENT_NODE && - 0 == strcmp((char *)n->name, type) && + 0 == strcmp((char *)n->name, type); +} + + +static bool is_container_node(xmlNode *n, char *type) +{ + return + is_node(n, type) && n->children != NULL && n->children == n->last && n->children->type == XML_ELEMENT_NODE; @@ -390,13 +397,30 @@ static bool is_container_node(xmlNode *n */ static xmlChar *string_from_value(xmlNode *n, char *type) { - return - is_container_node(n, "value") && - 0 == strcmp((char *)n->children->name, type) ? - (n->children->children == NULL ? - xmlStrdup(BAD_CAST("")) : - xmlNodeGetContent(n->children->children)) : - NULL; + /* + <value><type>XYZ</type></value> is normal, but the XML-RPC spec also + allows <value>XYZ</value> where XYZ is to be interpreted as a string. + */ + + if (is_container_node(n, "value") && + 0 == strcmp((char *)n->children->name, type)) + { + return + n->children->children == NULL ? + xmlStrdup(BAD_CAST("")) : + xmlNodeGetContent(n->children->children); + } + else if (0 == strcmp(type, "string") && is_node(n, "value")) + { + return + n->children == NULL ? + xmlStrdup(BAD_CAST("")) : + xmlNodeGetContent(n->children); + } + else + { + return NULL; + } } @@ -557,8 +581,14 @@ static void parse_into(xen_session *s, x xmlChar *string = string_from_value(value_node, "double"); if (string == NULL) { +#if PERMISSIVE + fprintf(stderr, + "Expected a Float from the server, but didn't get one\n"); + ((double *)value)[slot] = 0.0; +#else server_error( s, "Expected a Float from the server, but didn't get one"); +#endif } else { diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/libxen/src/xen_cpu_feature.c --- a/tools/libxen/src/xen_cpu_feature.c Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/libxen/src/xen_cpu_feature.c Wed Jan 17 09:56:40 2007 -0500 @@ -62,8 +62,8 @@ static const char *lookup_table[] = "NX", "MMXEXT", "LM", - "3DNOWEXT", - "3DNOW", + "THREEDNOWEXT", + "THREEDNOW", "RECOVERY", "LONGRUN", "LRTI", diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/pygrub/src/pygrub --- a/tools/pygrub/src/pygrub Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/pygrub/src/pygrub Wed Jan 17 09:56:40 2007 -0500 @@ -13,7 +13,7 @@ # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # -import os, sys, string, struct, tempfile +import os, sys, string, struct, tempfile, re import copy import logging @@ -48,8 +48,7 @@ def is_disk_image(file): return True return False -SECTOR_SIZE=512 -def get_active_offset(file): +def get_active_partition(file): """Find the offset for the start of the first active partition " "in the disk image file.""" @@ -58,13 +57,56 @@ def get_active_offset(file): for poff in (446, 462, 478, 494): # partition offsets # active partition has 0x80 as the first byte if struct.unpack("<c", buf[poff:poff+1]) == ('\x80',): - return struct.unpack("<L", - buf[poff+8:poff+12])[0] * SECTOR_SIZE + return buf[poff:poff+16] # if there's not a partition marked as active, fall back to # the first partition - P1 = 446 - return struct.unpack("<L", buf[P1+8:P1+12])[0] * SECTOR_SIZE + return buf[446:446+16] + +SECTOR_SIZE=512 +DK_LABEL_LOC=1 +DKL_MAGIC=0xdabe +V_ROOT=0x2 + +def get_solaris_slice(file, offset): + """Find the root slice in a Solaris VTOC.""" + + fd = os.open(file, os.O_RDONLY) + os.lseek(fd, offset + (DK_LABEL_LOC * SECTOR_SIZE), 0) + buf = os.read(fd, 512) + if struct.unpack("<H", buf[508:510])[0] != DKL_MAGIC: + raise RuntimeError, "Invalid disklabel magic" + + nslices = struct.unpack("<H", buf[30:32])[0] + + for i in range(nslices): + sliceoff = 72 + 12 * i + slicetag = struct.unpack("<H", buf[sliceoff:sliceoff+2])[0] + slicesect = struct.unpack("<L", buf[sliceoff+4:sliceoff+8])[0] + if slicetag == V_ROOT: + return slicesect * SECTOR_SIZE + + raise RuntimeError, "No root slice found" + +FDISK_PART_SOLARIS=0xbf +FDISK_PART_SOLARIS_OLD=0x82 + +def get_fs_offset(file): + if not is_disk_image(file): + return 0 + + partbuf = get_active_partition(file) + if len(partbuf) == 0: + raise RuntimeError, "Unable to find active partition on disk" + + offset = struct.unpack("<L", partbuf[8:12])[0] * SECTOR_SIZE + + type = struct.unpack("<B", partbuf[4:5])[0] + + if type == FDISK_PART_SOLARIS or type == FDISK_PART_SOLARIS_OLD: + offset += get_solaris_slice(file, offset) + + return offset class GrubLineEditor(curses.textpad.Textbox): def __init__(self, screen, startx, starty, line = ""): @@ -143,12 +185,12 @@ class GrubLineEditor(curses.textpad.Text class Grub: - def __init__(self, file, isconfig = False): + def __init__(self, file, fs = None): self.screen = None self.entry_win = None self.text_win = None if file: - self.read_config(file, isconfig) + self.read_config(file, fs) def draw_main_windows(self): if self.screen is None: #only init stuff once @@ -295,8 +337,8 @@ class Grub: # else, we cancelled and should just go back break - def read_config(self, fn, isConfig = False): - """Read the given file to parse the config. If isconfig, then + def read_config(self, fn, fs = None): + """Read the given file to parse the config. If fs = None, then we're being given a raw config file rather than a disk image.""" if not os.access(fn, os.R_OK): @@ -304,38 +346,25 @@ class Grub: self.cf = grub.GrubConf.GrubConfigFile() - if isConfig: + if not fs: # set the config file and parse it self.cf.filename = fn self.cf.parse() return - offset = 0 - if is_disk_image(fn): - offset = get_active_offset(fn) - if offset == -1: - raise RuntimeError, "Unable to find active partition on disk" - - # open the image and read the grub config - fs = fsimage.open(fn, offset) - - if fs is not None: - grubfile = None - for f in ("/boot/grub/menu.lst", "/boot/grub/grub.conf", - "/grub/menu.lst", "/grub/grub.conf"): - if fs.file_exists(f): - grubfile = f - break - if grubfile is None: - raise RuntimeError, "we couldn't find grub config file in the image provided." - f = fs.open_file(grubfile) - buf = f.read() - del f - del fs - # then parse the grub config - self.cf.parse(buf) - else: - raise RuntimeError, "Unable to read filesystem" + grubfile = None + for f in ("/boot/grub/menu.lst", "/boot/grub/grub.conf", + "/grub/menu.lst", "/grub/grub.conf"): + if fs.file_exists(f): + grubfile = f + break + if grubfile is None: + raise RuntimeError, "we couldn't find grub config file in the image provided." + f = fs.open_file(grubfile) + buf = f.read() + del f + # then parse the grub config + self.cf.parse(buf) def run(self): timeout = int(self.cf.timeout) @@ -376,6 +405,9 @@ class Grub: c = self.screen.getch() if mytime != -1: mytime += 1 + if mytime >= int(timeout): + self.isdone = True + break # handle keypresses if c == ord('c'): @@ -431,19 +463,93 @@ def get_entry_idx(cf, entry): return None +def run_grub(file, entry, fs): + global g + global sel + + def run_main(scr, *args): + global sel + global g + sel = g.run() + + g = Grub(file, fs) + if interactive: + curses.wrapper(run_main) + else: + sel = g.cf.default + + # set the entry to boot as requested + if entry is not None: + idx = get_entry_idx(g.cf, entry) + if idx is not None and idx > 0 and idx < len(g.cf.images): + sel = idx + + if sel == -1: + print "No kernel image selected!" + sys.exit(1) + + img = g.cf.images[sel] + + grubcfg = { "kernel": None, "ramdisk": None, "args": None } + + grubcfg["kernel"] = img.kernel[1] + if img.initrd: + grubcfg["ramdisk"] = img.initrd[1] + if img.args: + grubcfg["args"] = img.args + + return grubcfg + +# If nothing has been specified, look for a Solaris domU. If found, perform the +# necessary tweaks. +def sniff_solaris(fs, cfg): + if not fs.file_exists("/platform/i86xen/kernel/unix"): + return cfg + + # darned python + longmode = (sys.maxint != 2147483647L) + if not longmode: + longmode = os.uname()[4] == "x86_64" + if not longmode: + if (os.access("/usr/bin/isainfo", os.R_OK) and + os.popen("/usr/bin/isainfo -b").read() == "64\n"): + longmode = True + + if not cfg["kernel"]: + cfg["kernel"] = "/platform/i86xen/kernel/unix" + cfg["ramdisk"] = "/platform/i86pc/boot_archive" + if longmode: + cfg["kernel"] = "/platform/i86xen/kernel/amd64/unix" + cfg["ramdisk"] = "/platform/i86pc/amd64/boot_archive" + + # Unpleasant. Typically we'll have 'root=foo -k' or 'root=foo /kernel -k', + # and we need to maintain Xen properties (root= and ip=) and the kernel + # before any user args. + + xenargs = "" + userargs = "" + + if not cfg["args"]: + cfg["args"] = cfg["kernel"] + else: + for arg in cfg["args"].split(): + if re.match("^root=", arg) or re.match("^ip=", arg): + xenargs += arg + " " + elif arg != cfg["kernel"]: + userargs += arg + " " + cfg["args"] = xenargs + " " + cfg["kernel"] + " " + userargs + + return cfg + if __name__ == "__main__": sel = None - def run_main(scr, *args): - global sel - sel = g.run() - def usage(): - print >> sys.stderr, "Usage: %s [-q|--quiet] [--output=] [--entry=] <image>" %(sys.argv[0],) + print >> sys.stderr, "Usage: %s [-q|--quiet] [--output=] [--kernel=] [--ramdisk=] [--args=] [--entry=] <image>" %(sys.argv[0],) try: opts, args = getopt.gnu_getopt(sys.argv[1:], 'qh::', - ["quiet", "help", "output=", "entry=", + ["quiet", "help", "output=", "entry=", "kernel=", "ramdisk=", "args=", "isconfig"]) except getopt.GetoptError: usage() @@ -458,6 +564,14 @@ if __name__ == "__main__": entry = None interactive = True isconfig = False + + # what was passed in + incfg = { "kernel": None, "ramdisk": None, "args": None } + # what grub or sniffing chose + chosencfg = { "kernel": None, "ramdisk": None, "args": None } + # what to boot + bootcfg = { "kernel": None, "ramdisk": None, "args": None } + for o, a in opts: if o in ("-q", "--quiet"): interactive = False @@ -466,6 +580,12 @@ if __name__ == "__main__": sys.exit() elif o in ("--output",): output = a + elif o in ("--kernel",): + incfg["kernel"] = a + elif o in ("--ramdisk",): + incfg["ramdisk"] = a + elif o in ("--args",): + incfg["args"] = a elif o in ("--entry",): entry = a # specifying the entry to boot implies non-interactive @@ -478,58 +598,42 @@ if __name__ == "__main__": else: fd = os.open(output, os.O_WRONLY) - g = Grub(file, isconfig) - if interactive: - curses.wrapper(run_main) - else: - sel = g.cf.default - - # set the entry to boot as requested - if entry is not None: - idx = get_entry_idx(g.cf, entry) - if idx is not None and idx > 0 and idx < len(g.cf.images): - sel = idx - - if sel == -1: - print "No kernel image selected!" - sys.exit(1) - - img = g.cf.images[sel] - print "Going to boot %s" %(img.title) - print " kernel: %s" %(img.kernel[1],) - if img.initrd: - print " initrd: %s" %(img.initrd[1],) - + # debug if isconfig: - print " args: %s" %(img.args,) + chosencfg = run_grub(file, entry) + print " kernel: %s" % chosencfg["kernel"] + if img.initrd: + print " initrd: %s" % chosencfg["ramdisk"] + print " args: %s" % chosencfg["args"] sys.exit(0) - - offset = 0 - if is_disk_image(file): - offset = get_active_offset(file) - if offset == -1: - raise RuntimeError, "Unable to find active partition on disk" - - # read the kernel and initrd onto the hostfs - fs = fsimage.open(file, offset) - - kernel = fs.open_file(img.kernel[1],).read() - (tfd, fn) = tempfile.mkstemp(prefix="boot_kernel.", + + fs = fsimage.open(file, get_fs_offset(file)) + + chosencfg = sniff_solaris(fs, incfg) + + if not chosencfg["kernel"]: + chosencfg = run_grub(file, entry, fs) + + data = fs.open_file(chosencfg["kernel"]).read() + (tfd, bootcfg["kernel"]) = tempfile.mkstemp(prefix="boot_kernel.", dir="/var/run/xend/boot") - os.write(tfd, kernel) + os.write(tfd, data) os.close(tfd) - sxp = "linux (kernel %s)" %(fn,) - - if img.initrd: - initrd = fs.open_file(img.initrd[1],).read() - (tfd, fn) = tempfile.mkstemp(prefix="boot_ramdisk.", + + if chosencfg["ramdisk"]: + data = fs.open_file(chosencfg["ramdisk"],).read() + (tfd, bootcfg["ramdisk"]) = tempfile.mkstemp(prefix="boot_ramdisk.", dir="/var/run/xend/boot") - os.write(tfd, initrd) + os.write(tfd, data) os.close(tfd) - sxp += "(ramdisk %s)" %(fn,) else: initrd = None - sxp += "(args '%s')" %(img.args,) + + sxp = "linux (kernel %s)" % bootcfg["kernel"] + if bootcfg["ramdisk"]: + sxp += "(ramdisk %s)" % bootcfg["ramdisk"] + if chosencfg["args"]: + sxp += "(args \"%s\")" % chosencfg["args"] sys.stdout.flush() os.write(fd, sxp) diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/scripts/xapi.py --- a/tools/python/scripts/xapi.py Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/python/scripts/xapi.py Wed Jan 17 09:56:40 2007 -0500 @@ -41,6 +41,7 @@ COMMANDS = { COMMANDS = { 'host-info': ('', 'Get Xen Host Info'), 'host-set-name': ('', 'Set host name'), + 'pif-list': ('', 'List all PIFs'), 'sr-list': ('', 'List all SRs'), 'vbd-list': ('', 'List all VBDs'), 'vbd-create': ('<domname> <pycfg> [opts]', @@ -63,6 +64,15 @@ COMMANDS = { } OPTIONS = { + 'sr-list': [(('-l', '--long'), + {'action':'store_true', + 'help':'List all properties of SR'}) + ], + + 'vdi-list': [(('-l', '--long'), + {'action':'store_true', + 'help':'List all properties of VDI'}) + ], 'vm-list': [(('-l', '--long'), {'action':'store_true', 'help':'List all properties of VMs'}) @@ -145,7 +155,7 @@ def _connect(*args): def _connect(*args): global _server, _session, _initialised if not _initialised: - _server = ServerProxy('httpu:///var/run/xend/xmlrpc.sock') + _server = ServerProxy('httpu:///var/run/xend/xen-api.sock') login = raw_input("Login: ") password = getpass() creds = (login, password) @@ -361,29 +371,53 @@ def xapi_vbd_list(*args): print VBD_LIST_FORMAT % vbd_struct def xapi_vdi_list(*args): + opts, args = parse_args('vdi-list', args, set_defaults = True) + is_long = opts and opts.long + server, session = _connect() vdis = execute(server.VDI.get_all, session) - print VDI_LIST_FORMAT % {'name_label': 'VDI Label', - 'uuid' : 'UUID', - 'virtual_size': 'Sectors', - 'sector_size': 'Sector Size'} - - for vdi in vdis: - vdi_struct = execute(server.VDI.get_record, session, vdi) - print VDI_LIST_FORMAT % vdi_struct + if not is_long: + print VDI_LIST_FORMAT % {'name_label': 'VDI Label', + 'uuid' : 'UUID', + 'virtual_size': 'Sectors', + 'sector_size': 'Sector Size'} + + for vdi in vdis: + vdi_struct = execute(server.VDI.get_record, session, vdi) + print VDI_LIST_FORMAT % vdi_struct + + else: + + for vdi in vdis: + vdi_struct = execute(server.VDI.get_record, session, vdi) + pprint(vdi_struct) def xapi_sr_list(*args): + opts, args = parse_args('sr-list', args, set_defaults = True) + is_long = opts and opts.long + server, session = _connect() srs = execute(server.SR.get_all, session) - print SR_LIST_FORMAT % {'name_label': 'SR Label', - 'uuid' : 'UUID', - 'physical_size': 'Size', - 'type': 'Type'} - for sr in srs: - sr_struct = execute(server.SR.get_record, session, sr) - sr_struct['physical_size'] = int(sr_struct['physical_size'])/MB - print SR_LIST_FORMAT % sr_struct + if not is_long: + print SR_LIST_FORMAT % {'name_label': 'SR Label', + 'uuid' : 'UUID', + 'physical_size': 'Size (MB)', + 'type': 'Type'} + + for sr in srs: + sr_struct = execute(server.SR.get_record, session, sr) + sr_struct['physical_size'] = int(sr_struct['physical_size'])/MB + print SR_LIST_FORMAT % sr_struct + else: + for sr in srs: + sr_struct = execute(server.SR.get_record, session, sr) + pprint(sr_struct) + +def xapi_sr_rename(*args): + server, session = _connect() + sr = execute(server.SR.get_by_name_label, session, args[0]) + execute(server.SR.set_name_label, session, sr[0], args[1]) def xapi_vdi_create(*args): opts, args = parse_args('vdi-create', args) @@ -421,10 +455,11 @@ def xapi_vdi_rename(*args): if len(args) < 2: raise OptionError('Not enough arguments') - vdi_uuid = args[0] + vdi_uuid = execute(server.VDI.get_by_name_label, session, args[0]) vdi_name = args[1] - print 'Renaming VDI %s to %s' % (vdi_uuid, vdi_name) - result = execute(server.VDI.set_name_label, session, vdi_uuid, vdi_name) + + print 'Renaming VDI %s to %s' % (vdi_uuid[0], vdi_name) + result = execute(server.VDI.set_name_label, session, vdi_uuid[0], vdi_name) print 'Done.' @@ -447,6 +482,14 @@ def xapi_vtpm_create(*args): vtpm_rec = execute(server.VTPM.get_record, session, vtpm_uuid) print "Has vtpm record '%s'" % vtpm_rec + +def xapi_pif_list(*args): + server, session = _connect() + pif_uuids = execute(server.PIF.get_all, session) + for pif_uuid in pif_uuids: + pif = execute(server.PIF.get_record, session, pif_uuid) + print pif + # # Command Line Utils @@ -517,10 +560,12 @@ def usage(command = None, print_usage = print print 'Subcommands:' print - sorted_commands = sorted(COMMANDS.keys()) - for command in sorted_commands: - args, description = COMMANDS[command] - print '%-16s %-40s' % (command, description) + + for func in sorted(globals().keys()): + if func.startswith('xapi_'): + command = func[5:].replace('_', '-') + args, description = COMMANDS.get(command, ('', '')) + print '%-16s %-40s' % (command, description) print else: parse_args(command, ['-h']) @@ -549,7 +594,7 @@ def main(args): try: subcmd_func(*args[1:]) except XenAPIError, e: - print 'Error: %s' % str(e.args[1]) + print 'Error: %s' % str(e.args[0]) sys.exit(2) except OptionError, e: print 'Error: %s' % e diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xend/XendAPI.py --- a/tools/python/xen/xend/XendAPI.py Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/python/xen/xend/XendAPI.py Wed Jan 17 09:56:40 2007 -0500 @@ -543,14 +543,14 @@ class XendAPI: try: node = XendNode.instance() if host_uuid != node.uuid: - return xen_api_error([HOST_HANDLE_INVALID, host_uuid]) + return xen_api_error(['HOST_HANDLE_INVALID', host_uuid]) elif _is_valid_ref(network_uuid, node.is_valid_network): network = node.get_network(network_uuid) return xen_api_success(node.PIF_create(name, mtu, vlan, mac, network)) else: - return xen_api_error([NETWORK_HANDLE_INVALID, network_uuid]) + return xen_api_error(['NETWORK_HANDLE_INVALID', network_uuid]) except NetworkAlreadyConnected, exn: return xen_api_error(['NETWORK_ALREADY_CONNECTED', network_uuid, exn.pif_uuid]) @@ -593,10 +593,10 @@ class XendAPI: return xen_api_success(self._get_PIF(ref).set_name(name)) def PIF_set_MAC(self, _, ref, mac): - return xen_api_success(self._get_PIF(ref).set_mac(name)) + return xen_api_success(self._get_PIF(ref).set_mac(mac)) def PIF_set_MTU(self, _, ref, mtu): - return xen_api_success(self._get_PIF(ref).set_mtu(name)) + return xen_api_success(self._get_PIF(ref).set_mtu(mtu)) def PIF_create_VLAN(self, _, ref, network, vlan): try: @@ -604,7 +604,7 @@ class XendAPI: return xen_api_success(XendNode.instance().PIF_create_VLAN( ref, network, vlan)) else: - return xen_api_error([NETWORK_HANDLE_INVALID, network]) + return xen_api_error(['NETWORK_HANDLE_INVALID', network]) except NetworkAlreadyConnected, exn: return xen_api_error(['NETWORK_ALREADY_CONNECTED', network, exn.pif_uuid]) diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xend/XendAPIConstants.py --- a/tools/python/xen/xend/XendAPIConstants.py Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/python/xen/xend/XendAPIConstants.py Wed Jan 17 09:56:40 2007 -0500 @@ -41,7 +41,7 @@ XEN_API_CPU_FEATURE = [ 'CMOV', 'PAT', 'PSE36', 'PN', 'CLFLSH', 'DTES', 'ACPI', 'MMX', 'FXCR', 'XMM', 'XMM2', 'SELFSNOOP', 'HT', 'ACC', 'IA64', 'SYSCALL', 'MP', 'NX', 'MMXEXT', - 'LM', '3DNOWEXT', '3DNOW', 'RECOVERY', 'LONGRUN', + 'LM', 'THREEDNOWEXT', 'THREEDNOW', 'RECOVERY', 'LONGRUN', 'LRTI', 'CXMMX', 'K6_MTRR', 'CYRIX_ARR', 'CENTAUR_MCR', 'K8', 'K7', 'P3', 'P4', 'CONSTANT_TSC', 'FXSAVE_LEAK', 'XMM3', 'MWAIT', 'DSCPL', 'EST', 'TM2', 'CID', 'CX16', @@ -73,3 +73,4 @@ XEN_API_VBD_MODE = ['RO', 'RW'] XEN_API_VBD_MODE = ['RO', 'RW'] XEN_API_VDI_TYPE = ['system', 'user', 'ephemeral'] XEN_API_DRIVER_TYPE = ['ioemu', 'paravirtualised'] +XEN_API_VBD_TYPE = ['CD', 'Disk'] diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xend/XendBootloader.py --- a/tools/python/xen/xend/XendBootloader.py Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/python/xen/xend/XendBootloader.py Wed Jan 17 09:56:40 2007 -0500 @@ -53,6 +53,12 @@ def bootloader(blexec, disk, quiet = Fal child = os.fork() if (not child): args = [ blexec ] + if kernel: + args.append("--kernel=%s" % kernel) + if ramdisk: + args.append("--ramdisk=%s" % ramdisk) + if kernel_args: + args.append("--args=%s" % kernel_args) if quiet: args.append("-q") args.append("--output=%s" % fifo) diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xend/XendCheckpoint.py --- a/tools/python/xen/xend/XendCheckpoint.py Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/python/xen/xend/XendCheckpoint.py Wed Jan 17 09:56:40 2007 -0500 @@ -147,18 +147,20 @@ def restore(xd, fd, dominfo = None, paus assert store_port assert console_port + nr_pfns = (dominfo.getMemoryTarget() + 3) / 4 + try: l = read_exact(fd, sizeof_unsigned_long, "not a valid guest state file: pfn count read") - nr_pfns = unpack("L", l)[0] # native sizeof long - if nr_pfns > 16*1024*1024: # XXX + max_pfn = unpack("L", l)[0] # native sizeof long + if max_pfn > 16*1024*1024: # XXX raise XendError( "not a valid guest state file: pfn count out of range") balloon.free(xc.pages_to_kib(nr_pfns)) cmd = map(str, [xen.util.auxbin.pathTo(XC_RESTORE), - fd, dominfo.getDomid(), nr_pfns, + fd, dominfo.getDomid(), max_pfn, store_port, console_port]) log.debug("[xc_restore]: %s", string.join(cmd)) diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xend/XendConfig.py --- a/tools/python/xen/xend/XendConfig.py Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/python/xen/xend/XendConfig.py Wed Jan 17 09:56:40 2007 -0500 @@ -508,8 +508,12 @@ class XendConfig(dict): pci_devs = [] for pci_dev in sxp.children(config, 'dev'): pci_dev_info = {} - for opt, val in pci_dev[1:]: - pci_dev_info[opt] = val + for opt_val in pci_dev[1:]: + try: + opt, val = opt_val + pci_dev_info[opt] = val + except TypeError: + pass pci_devs.append(pci_dev_info) cfg['devices'][pci_devs_uuid] = (dev_type, @@ -572,7 +576,6 @@ class XendConfig(dict): if 'security' in cfg and isinstance(cfg['security'], str): cfg['security'] = sxp.from_string(cfg['security']) - # TODO: get states old_state = sxp.child_value(sxp_cfg, 'state') if old_state: for i in range(len(CONFIG_OLD_DOM_STATES)): @@ -855,14 +858,15 @@ class XendConfig(dict): for cls in XendDevices.valid_devices(): found = False - # figure if there is a device that is running - if domain: + # figure if there is a dev controller is valid and running + if domain and domain.getDomid() != None: try: controller = domain.getDeviceController(cls) configs = controller.configurations() for config in configs: sxpr.append(['device', config]) - found = True + + found = True except: log.exception("dumping sxp from device controllers") pass @@ -923,11 +927,12 @@ class XendConfig(dict): dev_type = sxp.name(config) dev_info = {} - try: - for opt, val in config[1:]: + for opt_val in config[1:]: + try: + opt, val = opt_val dev_info[opt] = val - except ValueError: - pass # SXP has no options for this device + except (TypeError, ValueError): # unpack error + pass if dev_type == 'vbd': if dev_info.get('dev', '').startswith('ioemu:'): @@ -996,7 +1001,7 @@ class XendConfig(dict): self['vbd_refs'].append(dev_uuid) return dev_uuid - elif dev_type in ('vtpm'): + elif dev_type == 'vtpm': if cfg_xenapi.get('type'): dev_info['type'] = cfg_xenapi.get('type') @@ -1015,15 +1020,17 @@ class XendConfig(dict): @return: Returns True if succesfully found and updated a device conf """ if dev_uuid in self['devices']: - config = sxp.child0(cfg_sxp) - dev_type = sxp.name(config) - dev_info = {} - - try: - for opt, val in config[1:]: - self['devices'][opt] = val - except ValueError: - pass # SXP has no options for this device + if sxp.child0(cfg_sxp) == 'device': + config = sxp.child0(cfg_sxp) + else: + config = cfg_sxp + + for opt_val in config[1:]: + try: + opt, val = opt_val + self['devices'][dev_uuid][opt] = val + except (TypeError, ValueError): + pass # no value for this config option return True @@ -1107,15 +1114,19 @@ class XendConfig(dict): # configuration log.debug("update_with_image_sxp(%s)" % scrub_password(image_sxp)) - kernel_args = sxp.child_value(image_sxp, 'args', '') + kernel_args = "" # attempt to extract extra arguments from SXP config arg_ip = sxp.child_value(image_sxp, 'ip') if arg_ip and not re.search(r'ip=[^ ]+', kernel_args): - kernel_args += ' ip=%s' % arg_ip + kernel_args += 'ip=%s ' % arg_ip arg_root = sxp.child_value(image_sxp, 'root') if arg_root and not re.search(r'root=', kernel_args): - kernel_args += ' root=%s' % arg_root + kernel_args += 'root=%s ' % arg_root + + # user-specified args must come last: previous releases did this and + # some domU kernels rely upon the ordering. + kernel_args += sxp.child_value(image_sxp, 'args', '') if bootloader: self['_temp_using_bootloader'] = '1' diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xend/XendConstants.py --- a/tools/python/xen/xend/XendConstants.py Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/python/xen/xend/XendConstants.py Wed Jan 17 09:56:40 2007 -0500 @@ -80,6 +80,7 @@ MINIMUM_RESTART_TIME = 20 MINIMUM_RESTART_TIME = 20 RESTART_IN_PROGRESS = 'xend/restart_in_progress' +LAST_SHUTDOWN_REASON = 'xend/last_shutdown_reason' # # Device migration stages (eg. XendDomainInfo, XendCheckpoint, server.tpmif) diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/python/xen/xend/XendDomain.py Wed Jan 17 09:56:40 2007 -0500 @@ -377,7 +377,7 @@ class XendDomain: dom0.setVCpuCount(target) - def _refresh(self): + def _refresh(self, refresh_shutdown = True): """Refresh the domain list. Needs to be called when either xenstore has changed or when a method requires up to date information (like uptime, cputime stats). @@ -393,7 +393,7 @@ class XendDomain: for dom in running: domid = dom['domid'] if domid in self.domains: - self.domains[domid].update(dom) + self.domains[domid].update(dom, refresh_shutdown) elif domid not in self.domains and dom['dying'] != 1: try: new_dom = XendDomainInfo.recreate(dom, False) @@ -495,7 +495,7 @@ class XendDomain: """ self.domains_lock.acquire() try: - self._refresh() + self._refresh(refresh_shutdown = False) dom = self.domain_lookup_nr(domid) if not dom: raise XendError("No domain named '%s'." % str(domid)) @@ -731,7 +731,7 @@ class XendDomain: self.domains_lock.acquire() try: - self._refresh() + self._refresh(refresh_shutdown = False) # active domains active_domains = self.domains.values() diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/python/xen/xend/XendDomainInfo.py Wed Jan 17 09:56:40 2007 -0500 @@ -37,7 +37,7 @@ from xen.util.blkif import blkdev_uname_ from xen.util.blkif import blkdev_uname_to_file from xen.util import security -from xen.xend import balloon, sxp, uuid, image, arch +from xen.xend import balloon, sxp, uuid, image, arch, osdep from xen.xend import XendRoot, XendNode, XendConfig from xen.xend.XendConfig import scrub_password @@ -439,7 +439,7 @@ class Common_XendDomainInfo: def shutdown(self, reason): """Shutdown a domain by signalling this via xenstored.""" - log.debug('XendDomainInfo.shutdown') + log.debug('XendDomainInfo.shutdown(%s)', reason) if self.state in (DOM_STATE_SHUTDOWN, DOM_STATE_HALTED,): raise XendError('Domain cannot be shutdown') @@ -496,7 +496,7 @@ class Common_XendDomainInfo: self._waitForDevice(dev_type, devid) return self.getDeviceController(dev_type).sxpr(devid) - def device_configure(self, dev_config, devid = None): + def device_configure(self, dev_sxp, devid = None): """Configure an existing device. @param dev_config: device configuration @@ -506,19 +506,24 @@ class Common_XendDomainInfo: @return: Returns True if successfully updated device @rtype: boolean """ - deviceClass = sxp.name(dev_config) - - # look up uuid of the device - dev_control = self.getDeviceController(deviceClass) - dev_sxpr = dev_control.sxpr(devid) - dev_uuid = sxp.child_value(sxpr, 'uuid') - if not dev_uuid: - return False - - self.info.device_update(dev_uuid, dev_config) - dev_config_dict = self.info['devices'].get(dev_uuid) - if dev_config_dict: - dev_control.reconfigureDevice(devid, dev_config_dict[1]) + + # convert device sxp to a dict + dev_class = sxp.name(dev_sxp) + dev_config = {} + for opt_val in dev_sxp[1:]: + try: + dev_config[opt_val[0]] = opt_val[1] + except IndexError: + pass + + # use DevController.reconfigureDevice to change device config + dev_control = self.getDeviceController(dev_class) + dev_uuid = dev_control.reconfigureDevice(devid, dev_config) + + # update XendConfig with new device info + if dev_uuid: + self.info.device_update(dev_uuid, dev_sxp) + return True def waitForDevices(self): @@ -914,7 +919,7 @@ class Common_XendDomainInfo: return self.info.get('cpu_cap', 0) def getWeight(self): - return self.info['cpu_weight'] + return self.info.get('cpu_weight', 256) def setResume(self, state): self._resume = state @@ -969,9 +974,15 @@ class Common_XendDomainInfo: log.warn('Domain has crashed: name=%s id=%d.', self.info['name_label'], self.domid) + self._writeVm(LAST_SHUTDOWN_REASON, 'crash') if xroot.get_enable_dump(): - self.dumpCore() + try: + self.dumpCore() + except XendError: + # This error has been logged -- there's nothing more + # we can do in this context. + pass restart_reason = 'crash' self._stateSet(DOM_STATE_HALTED) @@ -988,6 +999,7 @@ class Common_XendDomainInfo: log.info('Domain has shutdown: name=%s id=%d reason=%s.', self.info['name_label'], self.domid, reason) + self._writeVm(LAST_SHUTDOWN_REASON, reason) self._clearRestart() @@ -1162,7 +1174,10 @@ class Common_XendDomainInfo: # def dumpCore(self, corefile = None): - """Create a core dump for this domain. Nothrow guarantee.""" + """Create a core dump for this domain. + + @raise: XendError if core dumping failed. + """ try: if not corefile: @@ -1571,7 +1586,7 @@ class Common_XendDomainInfo: else: # Boot using bootloader if not blexec or blexec == 'pygrub': - blexec = '/usr/bin/pygrub' + blexec = osdep.pygrub_path blcfg = None for (devtype, devinfo) in self.info.all_devices_sxpr(): @@ -1997,7 +2012,7 @@ class Common_XendDomainInfo: config['image'] = config.get('uname', '') config['io_read_kbs'] = 0.0 config['io_write_kbs'] = 0.0 - if config['mode'] == 'r': + if config.get('mode', 'r') == 'r': config['mode'] = 'RO' else: config['mode'] = 'RW' diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xend/XendNode.py --- a/tools/python/xen/xend/XendNode.py Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/python/xen/xend/XendNode.py Wed Jan 17 09:56:40 2007 -0500 @@ -103,7 +103,7 @@ class XendNode: saved_pifs = self.state_store.load_state('pif') if saved_pifs: for pif_uuid, pif in saved_pifs.items(): - if pif['network'] in self.networks: + if pif.get('network') in self.networks: network = self.networks[pif['network']] try: self.PIF_create(pif['name'], pif['MTU'], pif['VLAN'], diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xend/osdep.py --- a/tools/python/xen/xend/osdep.py Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/python/xen/xend/osdep.py Wed Jan 17 09:56:40 2007 -0500 @@ -29,8 +29,13 @@ _xend_autorestart = { "SunOS": False, } +_pygrub_path = { + "SunOS": "/usr/lib/xen/bin/pygrub" +} + def _get(var, default=None): return var.get(os.uname()[0], default) scripts_dir = _get(_scripts_dir, "/etc/xen/scripts") xend_autorestart = _get(_xend_autorestart) +pygrub_path = _get(_pygrub_path, "/usr/bin/pygrub") diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xend/server/SrvDaemon.py --- a/tools/python/xen/xend/server/SrvDaemon.py Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/python/xen/xend/server/SrvDaemon.py Wed Jan 17 09:56:40 2007 -0500 @@ -203,7 +203,7 @@ class Daemon: if not osdep.xend_autorestart: self.run(os.fdopen(w, 'w')) - break + os._exit(0) pid = self.fork_pid() if pid: diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xend/server/blkif.py --- a/tools/python/xen/xend/server/blkif.py Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/python/xen/xend/server/blkif.py Wed Jan 17 09:56:40 2007 -0500 @@ -101,6 +101,7 @@ class BlkifController(DevController): self.writeBackend(devid, 'type', new_back['type'], 'params', new_back['params']) + return new_back.get('uuid') else: raise VmError('Refusing to reconfigure device %s:%d to %s' % (self.deviceClass, devid, config)) diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xend/server/netif.py --- a/tools/python/xen/xend/server/netif.py Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/python/xen/xend/server/netif.py Wed Jan 17 09:56:40 2007 -0500 @@ -150,16 +150,20 @@ class NetifController(DevController): devid = self.allocateDeviceID() + # The default type is 'netfront'. + if not typ: + typ = 'netfront' + if not mac: mac = randomMAC() back = { 'script' : script, 'mac' : mac, - 'handle' : "%i" % devid } + 'handle' : "%i" % devid, + 'type' : typ } if typ == 'ioemu': front = {} - back['type'] = 'ioemu' else: front = { 'handle' : "%i" % devid, 'mac' : mac } diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xend/server/vfbif.py --- a/tools/python/xen/xend/server/vfbif.py Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/python/xen/xend/server/vfbif.py Wed Jan 17 09:56:40 2007 -0500 @@ -64,7 +64,7 @@ class VfbifController(DevController): if config.has_key("vncunused"): args += ["--unused"] elif config.has_key("vncdisplay"): - args += ["--vncport", "%d" % (5900 + config["vncdisplay"])] + args += ["--vncport", "%d" % (5900 + int(config["vncdisplay"]))] vnclisten = config.get("vnclisten", xen.xend.XendRoot.instance().get_vnclisten_address()) args += [ "--listen", vnclisten ] diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/python/xen/xm/create.py Wed Jan 17 09:56:40 2007 -0500 @@ -28,6 +28,7 @@ import xmlrpclib from xen.xend import sxp from xen.xend import PrettyPrint +from xen.xend import osdep import xen.xend.XendClient from xen.xend.XendBootloader import bootloader from xen.util import blkif @@ -295,7 +296,8 @@ gopts.var('vfb', val="type={vnc,sdl},vnc For type=vnc, connect an external vncviewer. The server will listen on ADDR (default 127.0.0.1) on port N+5900. N defaults to the domain id. If vncunused=1, the server will try to find an arbitrary - unused port above 5900. + unused port above 5900. vncpasswd overrides the XenD configured + default password. For type=sdl, a viewer will be started automatically using the given DISPLAY and XAUTHORITY, which default to the current user's ones.""") @@ -304,7 +306,7 @@ gopts.var('vif', val="type=TYPE,mac=MAC, fn=append_value, default=[], use="""Add a network interface with the given MAC address and bridge. The vif is configured by calling the given configuration script. - If type is not specified, default is netfront not ioemu device. + If type is not specified, default is netfront. If mac is not specified a random MAC address is used. If not specified then the network backend chooses it's own MAC address. If bridge is not specified the first bridge found is used. @@ -722,8 +724,11 @@ def run_bootloader(vals, config_image): "--entry= directly.") vals.bootargs = "--entry=%s" %(vals.bootentry,) + kernel = sxp.child_value(config_image, 'kernel') + ramdisk = sxp.child_value(config_image, 'ramdisk') + args = sxp.child_value(config_image, 'args') return bootloader(vals.bootloader, file, not vals.console_autoconnect, - vals.bootargs, config_image) + vals.bootargs, kernel, ramdisk, args) def make_config(vals): """Create the domain configuration. @@ -763,7 +768,14 @@ def make_config(vals): config_image = configure_image(vals) if vals.bootloader: - config_image = run_bootloader(vals, config_image) + if vals.bootloader == "pygrub": + vals.bootloader = osdep.pygrub_path + + # if a kernel is specified, we're using the bootloader + # non-interactively, and need to let xend run it so we preserve the + # real kernel choice. + if not vals.kernel: + config_image = run_bootloader(vals, config_image) config.append(['bootloader', vals.bootloader]) if vals.bootargs: config.append(['bootloader_args', vals.bootargs]) @@ -827,7 +839,7 @@ def preprocess_ioports(vals): if len(d) == 1: d.append(d[0]) # Components are in hex: add hex specifier. - hexd = map(lambda v: '0x'+v, d) + hexd = ['0x' + x for x in d] ioports.append(hexd) vals.ioports = ioports @@ -994,8 +1006,6 @@ def preprocess_vnc(vals): vals.extra = vnc + ' ' + vals.extra def preprocess(vals): - if not vals.kernel and not vals.bootloader: - err("No kernel specified") preprocess_disk(vals) preprocess_pci(vals) preprocess_ioports(vals) @@ -1180,6 +1190,7 @@ def config_security_check(config, verbos try: domain_label = security.ssidref2label(security.NULL_SSIDREF) except: + import traceback traceback.print_exc(limit=1) return 0 domain_policy = 'NULL' diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/python/xen/xm/main.py Wed Jan 17 09:56:40 2007 -0500 @@ -130,7 +130,7 @@ SUBCOMMAND_HELP = { 'log' : ('', 'Print Xend log'), 'rename' : ('<Domain> <NewDomainName>', 'Rename a domain.'), 'sched-sedf' : ('<Domain> [options]', 'Get/set EDF parameters.'), - 'sched-credit': ('-d <Domain> [-w[=WEIGHT]|-c[=CAP]]', + 'sched-credit': ('[-d <Domain> [-w[=WEIGHT]|-c[=CAP]]]', 'Get/set credit scheduler parameters.'), 'sysrq' : ('<Domain> <letter>', 'Send a sysrq to a domain.'), 'vcpu-list' : ('[<Domain>]', @@ -717,6 +717,10 @@ def parse_sedf_info(info): 'weight' : get_info('weight', int, -1), } +def domid_match(domid, info): + return domid is None or domid == info['name'] or \ + domid == str(info['domid']) + def xm_brief_list(doms): print '%-40s %3s %5s %5s %10s %9s' % \ ('Name', 'ID', 'Mem', 'VCPUs', 'State', 'Time(s)') @@ -1091,10 +1095,6 @@ def xm_sched_sedf(args): print( ("%(name)-32s %(domid)3d %(period)9.1f %(slice)9.1f" + " %(latency)7.1f %(extratime)6d %(weight)6d") % info) - def domid_match(domid, info): - return domid is None or domid == info['name'] or \ - domid == str(info['domid']) - # we want to just display current info if no parameters are passed if len(args) == 0: domid = None @@ -1174,27 +1174,43 @@ def xm_sched_credit(args): err(opterr) usage('sched-credit') - domain = None + domid = None weight = None cap = None for o, a in opts: if o == "-d": - domain = a + domid = a elif o == "-w": weight = int(a) elif o == "-c": cap = int(a); - if domain is None: - # place holder for system-wide scheduler parameters - err("No domain given.") - usage('sched-credit') + doms = filter(lambda x : domid_match(domid, x), + [parse_doms_info(dom) + for dom in getDomains(None, 'running')]) if weight is None and cap is None: - print server.xend.domain.sched_credit_get(domain) - else: - result = server.xend.domain.sched_credit_set(domain, weight, cap) + # print header if we aren't setting any parameters + print '%-33s %-2s %-6s %-4s' % ('Name','ID','Weight','Cap') + + for d in doms: + try: + info = server.xend.domain.sched_credit_get(d['domid']) + except xmlrpclib.Fault: + # domain does not support sched-credit? + info = {'weight': -1, 'cap': -1} + + info['name'] = d['name'] + info['domid'] = int(d['domid']) + print( ("%(name)-32s %(domid)3d %(weight)6d %(cap)4d") % info) + else: + if domid is None: + # place holder for system-wide scheduler parameters + err("No domain given.") + usage('sched-credit') + + result = server.xend.domain.sched_credit_set(domid, weight, cap) if result != 0: err(str(result)) diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xm/migrate.py --- a/tools/python/xen/xm/migrate.py Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/python/xen/xm/migrate.py Wed Jan 17 09:56:40 2007 -0500 @@ -52,6 +52,7 @@ def help(): def main(argv): opts = gopts + opts.reset() args = opts.parse(argv) if len(args) != 2: diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xm/opts.py --- a/tools/python/xen/xm/opts.py Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/python/xen/xm/opts.py Wed Jan 17 09:56:40 2007 -0500 @@ -559,14 +559,6 @@ def set_bool(opt, k, v): else: opt.opts.err('Invalid value:' +v) -def set_u32(opt, k, v): - """Set an option to an u32 value.""" - try: - v = u32(v) - except: - opt.opts.err('Invalid value: ' + str(v)) - opt.set(v) - def set_value(opt, k, v): """Set an option to a value.""" opt.set(v) diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/python/xen/xm/shutdown.py --- a/tools/python/xen/xm/shutdown.py Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/python/xen/xm/shutdown.py Wed Jan 17 09:56:40 2007 -0500 @@ -118,6 +118,7 @@ def main_dom(opts, args): def main(argv): opts = gopts + opts.reset() args = opts.parse(argv) if opts.vals.help: return diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/tests/Makefile --- a/tools/tests/Makefile Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/tests/Makefile Wed Jan 17 09:56:40 2007 -0500 @@ -7,12 +7,21 @@ TARGET := test_x86_emulator .PHONY: all all: $(TARGET) +.PHONY: blowfish.bin +blowfish.bin: + make -f blowfish.mk all + +blowfish.h: blowfish.bin + (echo "static unsigned int blowfish_code[] = {"; \ + od -v -t x $< | sed 's/^[0-9]* /0x/' | sed 's/ /, 0x/g' | sed 's/$$/,/';\ + echo "};") >$@ + $(TARGET): x86_emulate.o test_x86_emulator.o $(HOSTCC) -o $@ $^ .PHONY: clean clean: - rm -rf $(TARGET) *.o *~ core + rm -rf $(TARGET) *.o *~ core blowfish.h blowfish.bin .PHONY: install install: @@ -20,5 +29,5 @@ x86_emulate.o: $(XEN_ROOT)/xen/arch/x86/ x86_emulate.o: $(XEN_ROOT)/xen/arch/x86/x86_emulate.c $(HOSTCC) $(HOSTCFLAGS) -I$(XEN_ROOT)/xen/include -c -o $@ $< -%.o: %.c +test_x86_emulator.o: test_x86_emulator.c blowfish.h $(HOSTCC) $(HOSTCFLAGS) -I$(XEN_ROOT)/xen/include -c -o $@ $< diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/tests/test_x86_emulator.c --- a/tools/tests/test_x86_emulator.c Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/tests/test_x86_emulator.c Wed Jan 17 09:56:40 2007 -0500 @@ -15,6 +15,19 @@ typedef int64_t s64; #include <asm-x86/x86_emulate.h> #include <sys/mman.h> +#include "blowfish.h" + +#define MMAP_SZ 16384 + +/* EFLAGS bit definitions. */ +#define EFLG_OF (1<<11) +#define EFLG_DF (1<<10) +#define EFLG_SF (1<<7) +#define EFLG_ZF (1<<6) +#define EFLG_AF (1<<4) +#define EFLG_PF (1<<2) +#define EFLG_CF (1<<0) + static int read( unsigned int seg, unsigned long offset, @@ -97,20 +110,25 @@ int main(int argc, char **argv) { struct x86_emulate_ctxt ctxt; struct cpu_user_regs regs; - char instr[20] = { 0x01, 0x08 }; /* add %ecx,(%eax) */ - unsigned int *res; + char *instr; + unsigned int *res, i; int rc; +#ifndef __x86_64__ + unsigned int bcdres_native, bcdres_emul; +#endif ctxt.regs = ®s; - ctxt.mode = X86EMUL_MODE_PROT32; - - res = mmap((void *)0x100000, 0x1000, PROT_READ|PROT_WRITE, + ctxt.addr_size = 32; + ctxt.sp_size = 32; + + res = mmap((void *)0x100000, MMAP_SZ, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); if ( res == MAP_FAILED ) { fprintf(stderr, "mmap to low address failed\n"); exit(1); } + instr = (char *)res + 0x100; printf("%-40s", "Testing addl %%ecx,(%%eax)..."); instr[0] = 0x01; instr[1] = 0x08; @@ -119,7 +137,7 @@ int main(int argc, char **argv) regs.ecx = 0x12345678; regs.eax = (unsigned long)res; *res = 0x7FFFFFFF; - rc = x86_emulate_memop(&ctxt, &emulops); + rc = x86_emulate(&ctxt, &emulops); if ( (rc != 0) || (*res != 0x92345677) || (regs.eflags != 0xa94) || @@ -133,7 +151,7 @@ int main(int argc, char **argv) regs.eip = (unsigned long)&instr[0]; regs.ecx = 0x12345678; regs.eax = 0x7FFFFFFF; - rc = x86_emulate_memop(&ctxt, &emulops); + rc = x86_emulate(&ctxt, &emulops); if ( (rc != 0) || (regs.ecx != 0x12345678) || (regs.eax != 0x92345677) || @@ -152,7 +170,7 @@ int main(int argc, char **argv) regs.ecx = 0x12345678UL; #endif regs.eax = (unsigned long)res; - rc = x86_emulate_memop(&ctxt, &emulops); + rc = x86_emulate(&ctxt, &emulops); if ( (rc != 0) || (*res != 0x92345677) || (regs.ecx != 0x8000000FUL) || @@ -166,7 +184,7 @@ int main(int argc, char **argv) regs.eip = (unsigned long)&instr[0]; regs.ecx = ~0UL; regs.eax = (unsigned long)res; - rc = x86_emulate_memop(&ctxt, &emulops); + rc = x86_emulate(&ctxt, &emulops); if ( (rc != 0) || (*res != 0x92345677) || (regs.ecx != 0x92345677UL) || @@ -181,7 +199,7 @@ int main(int argc, char **argv) regs.eax = 0x92345677UL; regs.ecx = 0xAA; regs.ebx = (unsigned long)res; - rc = x86_emulate_memop(&ctxt, &emulops); + rc = x86_emulate(&ctxt, &emulops); if ( (rc != 0) || (*res != 0x923456AA) || (regs.eflags != 0x244) || @@ -197,7 +215,7 @@ int main(int argc, char **argv) regs.eax = 0xAABBCC77UL; regs.ecx = 0xFF; regs.ebx = (unsigned long)res; - rc = x86_emulate_memop(&ctxt, &emulops); + rc = x86_emulate(&ctxt, &emulops); if ( (rc != 0) || (*res != 0x923456AA) || ((regs.eflags&0x240) != 0x200) || @@ -213,7 +231,7 @@ int main(int argc, char **argv) regs.eip = (unsigned long)&instr[0]; regs.ecx = 0x12345678; regs.eax = (unsigned long)res; - rc = x86_emulate_memop(&ctxt, &emulops); + rc = x86_emulate(&ctxt, &emulops); if ( (rc != 0) || (*res != 0x12345678) || (regs.eflags != 0x200) || @@ -230,7 +248,7 @@ int main(int argc, char **argv) regs.eax = 0x923456AAUL; regs.ecx = 0xDDEEFF00L; regs.ebx = (unsigned long)res; - rc = x86_emulate_memop(&ctxt, &emulops); + rc = x86_emulate(&ctxt, &emulops); if ( (rc != 0) || (*res != 0xDDEEFF00) || (regs.eflags != 0x244) || @@ -247,7 +265,7 @@ int main(int argc, char **argv) regs.eip = (unsigned long)&instr[0]; regs.esi = (unsigned long)res + 0; regs.edi = (unsigned long)res + 2; - rc = x86_emulate_memop(&ctxt, &emulops); + rc = x86_emulate(&ctxt, &emulops); if ( (rc != 0) || (*res != 0x44554455) || (regs.eflags != 0x200) || @@ -264,7 +282,7 @@ int main(int argc, char **argv) regs.eflags = 0x200; regs.eip = (unsigned long)&instr[0]; regs.edi = (unsigned long)res; - rc = x86_emulate_memop(&ctxt, &emulops); + rc = x86_emulate(&ctxt, &emulops); if ( (rc != 0) || (*res != 0x2233445D) || ((regs.eflags&0x201) != 0x201) || @@ -279,7 +297,7 @@ int main(int argc, char **argv) regs.eip = (unsigned long)&instr[0]; regs.eax = -32; regs.edi = (unsigned long)(res+1); - rc = x86_emulate_memop(&ctxt, &emulops); + rc = x86_emulate(&ctxt, &emulops); if ( (rc != 0) || (*res != 0x2233445E) || ((regs.eflags&0x201) != 0x201) || @@ -299,7 +317,7 @@ int main(int argc, char **argv) regs.ecx = 0xCCCCFFFF; regs.eip = (unsigned long)&instr[0]; regs.edi = (unsigned long)res; - rc = x86_emulate_memop(&ctxt, &emulops); + rc = x86_emulate(&ctxt, &emulops); if ( (rc != 0) || (res[0] != 0x9999AAAA) || (res[1] != 0xCCCCFFFF) || @@ -313,7 +331,7 @@ int main(int argc, char **argv) regs.eflags = 0x200; regs.eip = (unsigned long)&instr[0]; regs.edi = (unsigned long)res; - rc = x86_emulate_memop(&ctxt, &emulops); + rc = x86_emulate(&ctxt, &emulops); if ( (rc != 0) || (res[0] != 0x9999AAAA) || (res[1] != 0xCCCCFFFF) || @@ -331,7 +349,7 @@ int main(int argc, char **argv) regs.ecx = 0x12345678; regs.eax = (unsigned long)res; *res = 0x82; - rc = x86_emulate_memop(&ctxt, &emulops); + rc = x86_emulate(&ctxt, &emulops); if ( (rc != 0) || (*res != 0x82) || (regs.ecx != 0xFFFFFF82) || @@ -347,7 +365,7 @@ int main(int argc, char **argv) regs.ecx = 0x12345678; regs.eax = (unsigned long)res; *res = 0x1234aa82; - rc = x86_emulate_memop(&ctxt, &emulops); + rc = x86_emulate(&ctxt, &emulops); if ( (rc != 0) || (*res != 0x1234aa82) || (regs.ecx != 0xaa82) || @@ -363,7 +381,7 @@ int main(int argc, char **argv) regs.ecx = (unsigned long)res; regs.eax = 0x12345678; *res = 0x11111111; - rc = x86_emulate_memop(&ctxt, &emulops); + rc = x86_emulate(&ctxt, &emulops); if ( (rc != 0) || (*res != 0x11116789) || (regs.eax != 0x12341111) || @@ -371,6 +389,139 @@ int main(int argc, char **argv) (regs.eip != (unsigned long)&instr[4]) ) goto fail; printf("okay\n"); + + printf("%-40s", "Testing dec %%ax..."); + instr[0] = 0x66; instr[1] = 0x48; + regs.eflags = 0x200; + regs.eip = (unsigned long)&instr[0]; + regs.eax = 0x00000000; + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != 0) || + (regs.eax != 0x0000ffff) || + ((regs.eflags&0x240) != 0x200) || + (regs.eip != (unsigned long)&instr[2]) ) + goto fail; + printf("okay\n"); + + printf("%-40s", "Testing lea 8(%%ebp),%%eax..."); + instr[0] = 0x8d; instr[1] = 0x45; instr[2] = 0x08; + regs.eflags = 0x200; + regs.eip = (unsigned long)&instr[0]; + regs.eax = 0x12345678; + regs.ebp = 0xaaaaaaaa; + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != 0) || + (regs.eax != 0xaaaaaab2) || + ((regs.eflags&0x240) != 0x200) || + (regs.eip != (unsigned long)&instr[3]) ) + goto fail; + printf("okay\n"); + + printf("%-40s", "Testing daa/das (all inputs)..."); +#ifndef __x86_64__ + /* Bits 0-7: AL; Bit 8: EFLG_AF; Bit 9: EFLG_CF; Bit 10: DAA vs. DAS. */ + for ( i = 0; i < 0x800; i++ ) + { + regs.eflags = (i & 0x200) ? EFLG_CF : 0; + regs.eflags |= (i & 0x100) ? EFLG_AF : 0; + if ( i & 0x400 ) + __asm__ ( + "pushf; and $0xffffffee,(%%esp); or %1,(%%esp); popf; das; " + "pushf; popl %1" + : "=a" (bcdres_native), "=r" (regs.eflags) + : "0" (i & 0xff), "1" (regs.eflags) ); + else + __asm__ ( + "pushf; and $0xffffffee,(%%esp); or %1,(%%esp); popf; daa; " + "pushf; popl %1" + : "=a" (bcdres_native), "=r" (regs.eflags) + : "0" (i & 0xff), "1" (regs.eflags) ); + bcdres_native |= (regs.eflags & EFLG_PF) ? 0x1000 : 0; + bcdres_native |= (regs.eflags & EFLG_ZF) ? 0x800 : 0; + bcdres_native |= (regs.eflags & EFLG_SF) ? 0x400 : 0; + bcdres_native |= (regs.eflags & EFLG_CF) ? 0x200 : 0; + bcdres_native |= (regs.eflags & EFLG_AF) ? 0x100 : 0; + + instr[0] = (i & 0x400) ? 0x2f: 0x27; /* daa/das */ + regs.eflags = (i & 0x200) ? EFLG_CF : 0; + regs.eflags |= (i & 0x100) ? EFLG_AF : 0; + regs.eip = (unsigned long)&instr[0]; + regs.eax = (unsigned char)i; + rc = x86_emulate(&ctxt, &emulops); + bcdres_emul = regs.eax; + bcdres_emul |= (regs.eflags & EFLG_PF) ? 0x1000 : 0; + bcdres_emul |= (regs.eflags & EFLG_ZF) ? 0x800 : 0; + bcdres_emul |= (regs.eflags & EFLG_SF) ? 0x400 : 0; + bcdres_emul |= (regs.eflags & EFLG_CF) ? 0x200 : 0; + bcdres_emul |= (regs.eflags & EFLG_AF) ? 0x100 : 0; + if ( (rc != 0) || (regs.eax > 255) || + (regs.eip != (unsigned long)&instr[1]) ) + goto fail; + + if ( bcdres_emul != bcdres_native ) + { + printf("%s: AL=%02x %s %s\n" + "Output: AL=%02x %s %s %s %s %s\n" + "Emul.: AL=%02x %s %s %s %s %s\n", + (i & 0x400) ? "DAS" : "DAA", + (unsigned char)i, + (i & 0x200) ? "CF" : " ", + (i & 0x100) ? "AF" : " ", + (unsigned char)bcdres_native, + (bcdres_native & 0x200) ? "CF" : " ", + (bcdres_native & 0x100) ? "AF" : " ", + (bcdres_native & 0x1000) ? "PF" : " ", + (bcdres_native & 0x800) ? "ZF" : " ", + (bcdres_native & 0x400) ? "SF" : " ", + (unsigned char)bcdres_emul, + (bcdres_emul & 0x200) ? "CF" : " ", + (bcdres_emul & 0x100) ? "AF" : " ", + (bcdres_emul & 0x1000) ? "PF" : " ", + (bcdres_emul & 0x800) ? "ZF" : " ", + (bcdres_emul & 0x400) ? "SF" : " "); + goto fail; + } + } + printf("okay\n"); +#else + printf("skipped\n"); +#endif + + printf("Testing blowfish code sequence"); + memcpy(res, blowfish_code, sizeof(blowfish_code)); + regs.eax = 2; + regs.edx = 1; + regs.eip = (unsigned long)res; + regs.esp = (unsigned long)res + MMAP_SZ - 4; + *(uint32_t *)(unsigned long)regs.esp = 0x12345678; + regs.eflags = 2; + i = 0; + while ( (uint32_t)regs.eip != 0x12345678 ) + { + if ( (i++ & 8191) == 0 ) + printf("."); + rc = x86_emulate(&ctxt, &emulops); + if ( rc != 0 ) + { + printf("failed at %%eip == %08x\n", (unsigned int)regs.eip); + return 1; + } + } + if ( (regs.esp != ((unsigned long)res + MMAP_SZ)) || + (regs.eax != 2) || (regs.edx != 1) ) + goto fail; + printf("okay\n"); + +#ifndef __x86_64__ + printf("%-40s", "Testing blowfish native execution..."); + asm volatile ( + "movl $0x100000,%%ecx; call *%%ecx" + : "=a" (regs.eax), "=d" (regs.edx) + : "0" (2), "1" (1) : "ecx" ); + if ( (regs.eax != 2) || (regs.edx != 1) ) + goto fail; + printf("okay\n"); +#endif return 0; diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/xenstat/xentop/xentop.c --- a/tools/xenstat/xentop/xentop.c Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/xenstat/xentop/xentop.c Wed Jan 17 09:56:40 2007 -0500 @@ -1067,9 +1067,9 @@ int main(int argc, char **argv) gettimeofday(&curtime, NULL); top(); oldtime = curtime; - sleep(delay); if ((!loop) && !(--iterations)) break; + sleep(delay); } while (1); } diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/xm-test/README --- a/tools/xm-test/README Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/xm-test/README Wed Jan 17 09:56:40 2007 -0500 @@ -207,6 +207,49 @@ running DomUs on the system to provide e running DomUs on the system to provide each test with a "clean slate". +Testing the XML-RPC and Xen-API interfaces of xend +================================================== + +The xm-test suite can be used to test xm's interface with xend using +either XML-RPC or the Xen-API. In order to use either one of these modes, +xm needs to be configured using its configuration file +'/etc/xen/xm-config.xml'. +Note: The current default configuration after a fresh install of the xen +sources currently is to use the XML-RPC interface for communication with xend. + +Example content for the xm-config.xml for using the Xen-API looks as +follows: + +<xm> + <server type='Xen-API' + uri='http://localhost:9363/' + username='me' + password='mypassword' /> +</xm> + +This configuration makes xm talk to xend using port 9363. For this to +work, also xend needs to be configured to listen to port 9363. Therefore +The following line must be in /etc/xen/xend-config.sxp. + +(xen-api-server (( 127.0.0.1:9363 none ))) + +To communicate via the legacy XML-RPC interface, the file +'/etc/xen/xm-config.xml' may simply have the following content or +may be complete remove from the /etc/xen directory. + +<xm> +</xm> + +A few tests have been written for the xm-test suite that test the +Xen-API interface directly without relying on 'xm'. These tests can be +found in the grouptest 'xapi' and for them to work properly, xm must have +been configured to use the Xen-API following the instructions above. To +run these test, the following command line can be invoked: + + # ./runtest.sh -g xapi <logfile> + + + Extending ========= diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/xm-test/configure.ac --- a/tools/xm-test/configure.ac Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/xm-test/configure.ac Wed Jan 17 09:56:40 2007 -0500 @@ -150,6 +150,7 @@ AC_CONFIG_FILES([ tests/vcpu-pin/Makefile tests/vcpu-disable/Makefile tests/vtpm/Makefile + tests/xapi/Makefile tests/enforce_dom0_cpus/Makefile lib/XmTestReport/xmtest.py lib/XmTestLib/config.py diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/xm-test/grouptest/xapi --- a/tools/xm-test/grouptest/xapi Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/xm-test/grouptest/xapi Wed Jan 17 09:56:40 2007 -0500 @@ -1,1 +1,2 @@ vtpm 09_vtpm-xapi.test +xapi vtpm 09_vtpm-xapi.test diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/xm-test/lib/XmTestLib/XenDomain.py --- a/tools/xm-test/lib/XmTestLib/XenDomain.py Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/xm-test/lib/XmTestLib/XenDomain.py Wed Jan 17 09:56:40 2007 -0500 @@ -29,6 +29,7 @@ from config import * from config import * from Console import * from XenDevice import * +from DomainTracking import * from acm import * @@ -147,7 +148,7 @@ class DomainError(Exception): class XenDomain: - def __init__(self, name=None, config=None): + def __init__(self, name=None, config=None, isManaged=False): """Create a domain object. @param config: String filename of config file """ @@ -162,6 +163,10 @@ class XenDomain: self.devices = {} self.netEnv = "bridge" + if os.getenv("XM_MANAGED_DOMAINS"): + isManaged = True + self.isManaged = isManaged + # Set domain type, either PV for ParaVirt domU or HVM for # FullVirt domain if ENABLE_HVM_SUPPORT: @@ -171,7 +176,17 @@ class XenDomain: def start(self, noConsole=False): - ret, output = traceCommand("xm create %s" % self.config) + if not self.isManaged: + ret, output = traceCommand("xm create %s" % self.config) + else: + ret, output = traceCommand("xm new %s" % self.config) + if ret != 0: + _ret, output = traceCommand("xm delete " + + self.config.getOpt("name")) + else: + ret, output = traceCommand("xm start " + + self.config.getOpt("name")) + addManagedDomain(self.config.getOpt("name")) if ret != 0: raise DomainError("Failed to create domain", @@ -218,6 +233,10 @@ class XenDomain: self.closeConsole() ret, output = traceCommand(prog + cmd + self.config.getOpt("name")) + if self.isManaged: + ret, output = traceCommand(prog + " delete " + + self.config.getOpt("name")) + delManagedDomain(self.config.getOpt("name")) return ret @@ -296,7 +315,7 @@ class XmTestDomain(XenDomain): class XmTestDomain(XenDomain): def __init__(self, name=None, extraConfig=None, - baseConfig=arch.configDefaults): + baseConfig=arch.configDefaults, isManaged=False): """Create a new xm-test domain @param name: The requested domain name @param extraConfig: Additional configuration options @@ -312,7 +331,8 @@ class XmTestDomain(XenDomain): elif not config.getOpt("name"): config.setOpt("name", getUniqueName()) - XenDomain.__init__(self, config.getOpt("name"), config=config) + XenDomain.__init__(self, config.getOpt("name"), config=config, + isManaged=isManaged) def minSafeMem(self): return arch.minSafeMem diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/xm-test/lib/XmTestLib/Xm.py --- a/tools/xm-test/lib/XmTestLib/Xm.py Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/xm-test/lib/XmTestLib/Xm.py Wed Jan 17 09:56:40 2007 -0500 @@ -48,6 +48,8 @@ def domid(name): status, output = traceCommand("xm domid " + name); if status != 0 or "Traceback" in output: + return -1 + if output == "None": return -1 try: return int(output) diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/xm-test/lib/XmTestLib/xapi.py --- a/tools/xm-test/lib/XmTestLib/xapi.py Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/xm-test/lib/XmTestLib/xapi.py Wed Jan 17 09:56:40 2007 -0500 @@ -17,50 +17,49 @@ # Copyright (C) 2006 IBM Corporation #============================================================================ +import atexit import os import sys from XmTestLib import * -from xen.util.xmlrpclib2 import ServerProxy +from xen.xm import main as xmmain +from xen.xm import XenAPI +from xen.xm.opts import OptionError from types import DictType +import xml.dom.minidom +def get_login_pwd(): + if xmmain.serverType == xmmain.SERVER_XEN_API: + try: + login, password = xmmain.parseAuthentication() + return (login, password) + except: + raise OptionError("Configuration for login/pwd not found. " + "Need to run xapi-setup.py?") + raise OptionError("Xm configuration file not using Xen-API for " + "communication with xend.") -XAPI_DEFAULT_LOGIN = " " -XAPI_DEFAULT_PASSWORD = " " +sessions=[] -class XenAPIError(Exception): - pass - - -#A list of VMs' UUIDs that were created using vm_create -_VMuuids = [] - -#Terminate previously created managed(!) VMs and destroy their configs -def vm_destroy_all(): - server, session = _connect() - for uuid in _VMuuids: - execute(server.VM.hard_shutdown, session, uuid) - execute(server.VM.destroy , session, uuid) - - -def execute(fn, *args): - result = fn(*args) - if type(result) != DictType: - raise TypeError("Function returned object of type: %s" % - str(type(result))) - if 'Value' not in result: - raise XenAPIError(*result['ErrorDescription']) - return result['Value'] - -_initialised = False -_server = None -_session = None -def _connect(*args): - global _server, _session, _initialised - if not _initialised: - _server = ServerProxy('httpu:///var/run/xend/xen-api.sock') - login = XAPI_DEFAULT_LOGIN - password = XAPI_DEFAULT_PASSWORD - creds = (login, password) - _session = execute(_server.session.login_with_password, *creds) - _initialised = True - return (_server, _session) +def connect(*args): + try: + creds = get_login_pwd() + except Exception, e: + FAIL("%s" % str(e)) + try: + session = XenAPI.Session(xmmain.serverURI) + except: + raise OptionError("Could not create XenAPI session with Xend." \ + "URI=%s" % xmmain.serverURI) + try: + session.login_with_password(*creds) + except: + raise OptionError("Could not login to Xend. URI=%s" % xmmain.serverURI) + def logout(): + try: + for s in sessions: + s.xenapi.session.logout() + except: + pass + sessions.append(session) + atexit.register(logout) + return session diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/xm-test/ramdisk/Makefile.am --- a/tools/xm-test/ramdisk/Makefile.am Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/xm-test/ramdisk/Makefile.am Wed Jan 17 09:56:40 2007 -0500 @@ -36,7 +36,12 @@ XMTEST_VER_IMG = initrd-$(XMTEST_MAJ_VER XMTEST_VER_IMG = initrd-$(XMTEST_MAJ_VER)-$(BR_ARCH).img XMTEST_DL_IMG = $(shell echo $(XMTEST_VER_IMG) | sed -e 's/x86_64/i386/g') -EXTRA_ROOT_DIRS = sys +EXTRA_ROOT_DIRS = sys modules + +BLKDRV = /lib/modules/$(shell uname -r)/kernel/drivers/xen/blkfront/xenblk.ko +NETDRV = /lib/modules/$(shell uname -r)/kernel/drivers/xen/netfront/xennet.ko +PKTDRV = /lib/modules/$(shell uname -r)/kernel/net/packet/af_packet.ko + if HVM all: initrd.img disk.img @@ -60,7 +65,11 @@ endif $(XMTEST_VER_IMG): $(BR_IMG) chmod a+x skel/etc/init.d/rcS - (cd skel; mkdir -p $(EXTRA_ROOT_DIRS); tar cf - .) \ + cd skel && mkdir -p $(EXTRA_ROOT_DIRS) + -[ -e "$(BLKDRV)" ] && cp $(BLKDRV) skel/modules + -[ -e "$(NETDRV)" ] && cp $(NETDRV) skel/modules + -[ -e "$(PKTDRV)" ] && cp $(PKTDRV) skel/modules + (cd skel; tar cf - .) \ | (cd $(BR_SRC)/$(BR_ROOT); tar xvf -) cd $(BR_SRC) && make cp $(BR_IMG) $(XMTEST_VER_IMG) diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/xm-test/ramdisk/skel/etc/init.d/rcS --- a/tools/xm-test/ramdisk/skel/etc/init.d/rcS Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/xm-test/ramdisk/skel/etc/init.d/rcS Wed Jan 17 09:56:40 2007 -0500 @@ -6,3 +6,14 @@ if uname -r | grep -q '^2.6'; then if uname -r | grep -q '^2.6'; then mount -t sysfs none /sys fi + +# If the block, net, and packet drivers are modules, we need to load them +if test -e /modules/xenblk.ko; then + insmod /modules/xenblk.ko > /dev/null 2>&1 +fi +if test -e /modules/xennet.ko; then + insmod /modules/xennet.ko > /dev/null 2>&1 +fi +if test -e /modules/af_packet.ko; then + insmod /modules/af_packet.ko > /dev/null 2>&1 +fi diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/xm-test/runtest.sh --- a/tools/xm-test/runtest.sh Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/xm-test/runtest.sh Wed Jan 17 09:56:40 2007 -0500 @@ -16,6 +16,7 @@ usage() { echo " -r <url> : url of test results repository to use" echo " -s <report> : just submit report <report>" echo " -u : unsafe -- do not run the sanity checks before starting" + echo " -md : all created domains are xend-'managed' domains" echo " -h | --help : show this help" } @@ -218,10 +219,13 @@ unsafe=no unsafe=no GROUPENTERED=default +#Prepare for usage with ACM if [ -d /etc/xen/acm-security/policies ]; then cp -f tests/security-acm/xm-test-security_policy.xml \ /etc/xen/acm-security/policies fi + +unset XM_MANAGED_DOMAINS # Resolve options while [ $# -gt 0 ] @@ -260,6 +264,10 @@ while [ $# -gt 0 ] unsafe=yes report=no ;; + -md) + echo "(use managed domains)" + export XM_MANAGED_DOMAINS=1 + ;; -h|--help) usage exit 0 diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/xm-test/tests/block-destroy/06_block-destroy_check_list_pos.py --- a/tools/xm-test/tests/block-destroy/06_block-destroy_check_list_pos.py Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/xm-test/tests/block-destroy/06_block-destroy_check_list_pos.py Wed Jan 17 09:56:40 2007 -0500 @@ -6,7 +6,7 @@ from XmTestLib import * from XmTestLib import * from XmTestLib.block_utils import * -import re +import re, time def checkXmLongList(domain): s, o = traceCommand("xm list --long %s" % domain.getName()) @@ -35,4 +35,8 @@ block_detach(domain, "xvda1") block_detach(domain, "xvda1") if checkXmLongList(domain): - FAIL("xm long list does not show that xvda1 was removed") + # device info is removed by hotplug scripts - give them a chance + # to fire (they run asynchronously with us). + time.sleep(1) + if checkXmLongList(domain): + FAIL("xm long list does not show that xvda1 was removed") diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/xm-test/tests/sched-credit/01_sched_credit_weight_cap_pos.py --- a/tools/xm-test/tests/sched-credit/01_sched_credit_weight_cap_pos.py Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/xm-test/tests/sched-credit/01_sched_credit_weight_cap_pos.py Wed Jan 17 09:56:40 2007 -0500 @@ -2,14 +2,27 @@ # # Sched-credit tests modified from SEDF tests # + +import re + from XmTestLib import * +paramsRE = re.compile(r'^[^ ]* *[^ ]* *([^ ]*) *([^ ]*)$') + def get_sched_credit_params(domain): - status, output = traceCommand("xm sched-credit -d %s" %(domain.getName())) - params = output.strip('{}').split(', ') - cap = int(params[0].split(':')[1].strip(' ')) - weight = int(params[1].split(':')[1].strip(' ')) - return (status, (weight, cap)) + status, output = traceCommand("xm sched-credit -d %s | tail -1" % + domain.getName()) + + if status != 0: + FAIL("Getting sched-credit parameters return non-zero rv (%d)", + status) + + m = paramsRE.match(output) + if not m: + FAIL("xm sched-credit gave bad output") + weight = int(m.group(1)) + cap = int(m.group(2)) + return (weight, cap) def set_sched_credit_weight(domain, weight): status, output = traceCommand("xm sched-credit -d %s -w %d" %(domain.getName(), weight)) @@ -31,11 +44,8 @@ except DomainError, e: FAIL(str(e)) # check default param values -(status, params) = get_sched_credit_params(domain) -if status != 0: - FAIL("Getting sched-credit parameters return non-zero rv (%d)", status) +(weight, cap) = get_sched_credit_params(domain) -(weight, cap) = params if weight != 256: FAIL("default weight is 256 (got %d)", weight) if cap != 0: @@ -51,11 +61,8 @@ if status != 0: FAIL("Setting sched-credit cap return non-zero rv (%d)", status) # check new param values -(status, params) = get_sched_credit_params(domain) -if status != 0: - FAIL("Getting sched-credit parameters return non-zero rv (%d)", status) +(weight, cap) = get_sched_credit_params(domain) -(weight, cap) = params if weight != 512: FAIL("expected weight of 512 (got %d)", weight) if cap != 100: diff -r 5568efb41da4 -r 3f6a2745b3a3 tools/xm-test/tests/vtpm/09_vtpm-xapi.py --- a/tools/xm-test/tests/vtpm/09_vtpm-xapi.py Mon Jan 15 13:27:20 2007 -0500 +++ b/tools/xm-test/tests/vtpm/09_vtpm-xapi.py Wed Jan 17 09:56:40 2007 -0500 @@ -6,71 +6,66 @@ # Test to test the vtpm class through the Xen-API from XmTestLib import xapi -from XmTestLib.XenManagedDomain import XmTestManagedDomain +from XmTestLib.XenAPIDomain import XmTestAPIDomain from XmTestLib import * from vtpm_utils import * import commands import os -def do_test(): - domain = XmTestManagedDomain() - vm_uuid = domain.get_uuid() +try: + # XmTestAPIDomain tries to establish a connection to XenD + domain = XmTestAPIDomain() +except Exception, e: + SKIP("Skipping test. Error: %s" % str(e)) +vm_uuid = domain.get_uuid() - vtpmcfg = {} - vtpmcfg['type'] = "paravirtualised" - vtpmcfg['backend'] = "Domain-0" - vtpmcfg['instance'] = 1 - vtpmcfg['VM'] = vm_uuid +vtpmcfg = {} +vtpmcfg['type'] = "paravirtualised" +vtpmcfg['backend'] = "Domain-0" +vtpmcfg['instance'] = 1 +vtpmcfg['VM'] = vm_uuid - server, session = xapi._connect() +session = xapi.connect() - vtpm_uuid = xapi.execute(server.VTPM.create, session, vtpmcfg) +vtpm_uuid = session.xenapi.VTPM.create(vtpmcfg) - vtpm_id = xapi.execute(server.VTPM.get_instance, session, vtpm_uuid) - vtpm_be = xapi.execute(server.VTPM.get_backend , session, vtpm_uuid) - if vtpm_be != vtpmcfg['backend']: - FAIL("vTPM's backend is in '%s', expected: '%s'" % - (vtpm_be, vtpmcfg['backend'])) +vtpm_id = session.xenapi.VTPM.get_instance(vtpm_uuid) +vtpm_be = session.xenapi.VTPM.get_backend(vtpm_uuid) +if vtpm_be != vtpmcfg['backend']: + FAIL("vTPM's backend is in '%s', expected: '%s'" % + (vtpm_be, vtpmcfg['backend'])) - driver = xapi.execute(server.VTPM.get_driver, session, vtpm_uuid) - if driver != vtpmcfg['type']: - FAIL("vTPM has driver type '%s', expected: '%s'" % - (driver, vtpmcfg['type'])) +driver = session.xenapi.VTPM.get_driver(vtpm_uuid) +if driver != vtpmcfg['type']: + FAIL("vTPM has driver type '%s', expected: '%s'" % + (driver, vtpmcfg['type'])) - vtpm_rec = xapi.execute(server.VTPM.get_record, session, vtpm_uuid) +vtpm_rec = session.xenapi.VTPM.get_record(vtpm_uuid) - if vtpm_rec['driver'] != vtpmcfg['type']: - FAIL("vTPM record shows driver type '%s', expected: '%s'" % - (vtpm_rec['driver'], vtpmcfg['type'])) - if vtpm_rec['uuid'] != vtpm_uuid: - FAIL("vTPM record shows vtpm uuid '%s', expected: '%s'" % - (vtpm_rec['uuid'], vtpm_uuid)) - if vtpm_rec['VM'] != vm_uuid: - FAIL("vTPM record shows VM uuid '%s', expected: '%s'" % - (vtpm_rec['VM'], vm_uuid)) +if vtpm_rec['driver'] != vtpmcfg['type']: + FAIL("vTPM record shows driver type '%s', expected: '%s'" % + (vtpm_rec['driver'], vtpmcfg['type'])) +if vtpm_rec['uuid'] != vtpm_uuid: + FAIL("vTPM record shows vtpm uuid '%s', expected: '%s'" % + (vtpm_rec['uuid'], vtpm_uuid)) +if vtpm_rec['VM'] != vm_uuid: + FAIL("vTPM record shows VM uuid '%s', expected: '%s'" % + (vtpm_rec['VM'], vm_uuid)) - success = domain.start() +success = domain.start() - console = domain.getConsole() - - try: - run = console.runCmd("cat /sys/devices/xen/vtpm-0/pcrs") - except ConsoleError, e: - saveLog(console.getHistory()) - vtpm_cleanup(domName) - FAIL("No result from dumping the PCRs") - - if re.search("No such file",run["output"]): - vtpm_cleanup(domName) - FAIL("TPM frontend support not compiled into (domU?) kernel") - - domain.stop() - domain.destroy() - - +console = domain.getConsole() try: - do_test() -finally: - #Make sure all domains are gone that were created in this test case - xapi.vm_destroy_all() + run = console.runCmd("cat /sys/devices/xen/vtpm-0/pcrs") +except ConsoleError, e: + saveLog(console.getHistory()) + vtpm_cleanup(domName) + FAIL("No result from dumping the PCRs") + +if re.search("No such file",run["output"]): + vtpm_cleanup(domName) + FAIL("TPM frontend support not compiled into (domU?) kernel") + +domain.stop() +domain.destroy() diff -r 5568efb41da4 -r 3f6a2745b3a3 unmodified_drivers/linux-2.6/platform-pci/evtchn.c --- a/unmodified_drivers/linux-2.6/platform-pci/evtchn.c Mon Jan 15 13:27:20 2007 -0500 +++ b/unmodified_drivers/linux-2.6/platform-pci/evtchn.c Wed Jan 17 09:56:40 2007 -0500 @@ -48,6 +48,12 @@ static struct { void *dev_id; int close; /* close on unbind_from_irqhandler()? */ } evtchns[MAX_EVTCHN]; + +int irq_to_evtchn_port(int irq) +{ + return irq; +} +EXPORT_SYMBOL(irq_to_evtchn_port); void mask_evtchn(int port) { diff -r 5568efb41da4 -r 3f6a2745b3a3 unmodified_drivers/linux-2.6/platform-pci/platform-pci.c --- a/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c Mon Jan 15 13:27:20 2007 -0500 +++ b/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c Wed Jan 17 09:56:40 2007 -0500 @@ -179,7 +179,7 @@ static int get_hypercall_stubs(void) #define get_hypercall_stubs() (0) #endif -static int get_callback_irq(struct pci_dev *pdev) +static uint64_t get_callback_via(struct pci_dev *pdev) { #ifdef __ia64__ int irq; @@ -189,16 +189,24 @@ static int get_callback_irq(struct pci_d } return 0; #else /* !__ia64__ */ - return pdev->irq; + if (pdev->irq < 16) + return pdev->irq; /* ISA IRQ */ + /* We don't know the GSI. Specify the PCI INTx line instead. */ + return (((uint64_t)0x01 << 56) | /* PCI INTx identifier */ + ((uint64_t)pci_domain_nr(pdev->bus) << 32) | + ((uint64_t)pdev->bus->number << 16) | + ((uint64_t)(pdev->devfn & 0xff) << 8) | + ((uint64_t)(pdev->pin - 1) & 3)); #endif } static int __devinit platform_pci_init(struct pci_dev *pdev, const struct pci_device_id *ent) { - int i, ret, callback_irq; + int i, ret; long ioaddr, iolen; long mmio_addr, mmio_len; + uint64_t callback_via; i = pci_enable_device(pdev); if (i) @@ -210,9 +218,9 @@ static int __devinit platform_pci_init(s mmio_addr = pci_resource_start(pdev, 1); mmio_len = pci_resource_len(pdev, 1); - callback_irq = get_callback_irq(pdev); - - if (mmio_addr == 0 || ioaddr == 0 || callback_irq == 0) { + callback_via = get_callback_via(pdev); + + if (mmio_addr == 0 || ioaddr == 0 || callback_via == 0) { printk(KERN_WARNING DRV_NAME ":no resources found\n"); return -ENOENT; } @@ -242,12 +250,12 @@ static int __devinit platform_pci_init(s if ((ret = init_xen_info())) goto out; - if ((ret = request_irq(pdev->irq, evtchn_interrupt, SA_SHIRQ, - "xen-platform-pci", pdev))) { - goto out; - } - - if ((ret = set_callback_irq(callback_irq))) + if ((ret = request_irq(pdev->irq, evtchn_interrupt, + SA_SHIRQ | SA_SAMPLE_RANDOM, + "xen-platform-pci", pdev))) + goto out; + + if ((ret = set_callback_via(callback_via))) goto out; out: @@ -297,7 +305,7 @@ static void __exit platform_pci_module_c { printk(KERN_INFO DRV_NAME ":Do platform module cleanup\n"); /* disable hypervisor for callback irq */ - set_callback_irq(0); + set_callback_via(0); if (pci_device_registered) pci_unregister_driver(&platform_driver); } diff -r 5568efb41da4 -r 3f6a2745b3a3 unmodified_drivers/linux-2.6/platform-pci/platform-pci.h --- a/unmodified_drivers/linux-2.6/platform-pci/platform-pci.h Mon Jan 15 13:27:20 2007 -0500 +++ b/unmodified_drivers/linux-2.6/platform-pci/platform-pci.h Wed Jan 17 09:56:40 2007 -0500 @@ -24,13 +24,13 @@ #include <linux/interrupt.h> #include <xen/interface/hvm/params.h> -static inline int set_callback_irq(int irq) +static inline int set_callback_via(uint64_t via) { struct xen_hvm_param a; a.domid = DOMID_SELF; a.index = HVM_PARAM_CALLBACK_IRQ; - a.value = irq; + a.value = via; return HYPERVISOR_hvm_op(HVMOP_set_param, &a); } diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/Makefile --- a/xen/Makefile Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/Makefile Wed Jan 17 09:56:40 2007 -0500 @@ -14,8 +14,8 @@ default: build .PHONY: dist dist: install -.PHONY: build install clean distclean cscope TAGS tags -build install debug clean distclean cscope TAGS tags:: +.PHONY: build install clean distclean cscope TAGS tags MAP +build install debug clean distclean cscope TAGS tags MAP:: $(MAKE) -f Rules.mk _$@ .PHONY: _build @@ -48,6 +48,7 @@ _debug: .PHONY: _clean _clean: delete-unfresh-files $(MAKE) -C tools clean + $(MAKE) -f $(BASEDIR)/Rules.mk -C include clean $(MAKE) -f $(BASEDIR)/Rules.mk -C common clean $(MAKE) -f $(BASEDIR)/Rules.mk -C drivers clean $(MAKE) -f $(BASEDIR)/Rules.mk -C acm clean @@ -69,6 +70,7 @@ _distclean: clean $(MAKE) -f $(BASEDIR)/Rules.mk include/xen/compile.h $(MAKE) -f $(BASEDIR)/Rules.mk include/xen/acm_policy.h [ -e include/asm ] || ln -sf asm-$(TARGET_ARCH) include/asm + $(MAKE) -f $(BASEDIR)/Rules.mk -C include $(MAKE) -f $(BASEDIR)/Rules.mk -C arch/$(TARGET_ARCH) asm-offsets.s $(MAKE) -f $(BASEDIR)/Rules.mk include/asm-$(TARGET_ARCH)/asm-offsets.h $(MAKE) -f $(BASEDIR)/Rules.mk -C arch/$(TARGET_ARCH) $(TARGET) @@ -158,9 +160,9 @@ _cscope: $(all_sources) > cscope.files cscope -k -b -q -.PHONY: MAP -MAP: - $(NM) -n $(TARGET) | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw] \)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' > System.map +.PHONY: _MAP +_MAP: + $(NM) -n $(TARGET)-syms | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw] \)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' > System.map .PHONY: FORCE FORCE: diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/Rules.mk --- a/xen/Rules.mk Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/Rules.mk Wed Jan 17 09:56:40 2007 -0500 @@ -34,6 +34,7 @@ TARGET := $(BASEDIR)/xen HDRS := $(wildcard $(BASEDIR)/include/xen/*.h) HDRS += $(wildcard $(BASEDIR)/include/public/*.h) +HDRS += $(wildcard $(BASEDIR)/include/compat/*.h) HDRS += $(wildcard $(BASEDIR)/include/asm-$(TARGET_ARCH)/*.h) HDRS += $(wildcard $(BASEDIR)/include/asm-$(TARGET_ARCH)/$(TARGET_SUBARCH)/*.h) diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/ia64/linux-xen/unaligned.c --- a/xen/arch/ia64/linux-xen/unaligned.c Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/ia64/linux-xen/unaligned.c Wed Jan 17 09:56:40 2007 -0500 @@ -24,7 +24,7 @@ #include <asm/uaccess.h> #include <asm/unaligned.h> -extern void die_if_kernel(char *str, struct pt_regs *regs, long err) __attribute__ ((noreturn)); +extern void die_if_kernel(char *str, struct pt_regs *regs, long err); #undef DEBUG_UNALIGNED_TRAP diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/ia64/xen/domain.c --- a/xen/arch/ia64/xen/domain.c Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/ia64/xen/domain.c Wed Jan 17 09:56:40 2007 -0500 @@ -522,14 +522,14 @@ void arch_domain_destroy(struct domain * deallocate_rid_range(d); } -void arch_getdomaininfo_ctxt(struct vcpu *v, struct vcpu_guest_context *c) +void arch_get_info_guest(struct vcpu *v, vcpu_guest_context_u c) { int i; - struct vcpu_extra_regs *er = &c->extra_regs; - - c->user_regs = *vcpu_regs (v); - c->privregs_pfn = get_gpfn_from_mfn(virt_to_maddr(v->arch.privregs) >> - PAGE_SHIFT); + struct vcpu_extra_regs *er = &c.nat->extra_regs; + + c.nat->user_regs = *vcpu_regs(v); + c.nat->privregs_pfn = get_gpfn_from_mfn(virt_to_maddr(v->arch.privregs) >> + PAGE_SHIFT); /* Fill extra regs. */ for (i = 0; i < 8; i++) { @@ -549,12 +549,12 @@ void arch_getdomaininfo_ctxt(struct vcpu er->iva = v->arch.iva; } -int arch_set_info_guest(struct vcpu *v, struct vcpu_guest_context *c) +int arch_set_info_guest(struct vcpu *v, vcpu_guest_context_u c) { struct pt_regs *regs = vcpu_regs (v); struct domain *d = v->domain; - *regs = c->user_regs; + *regs = c.nat->user_regs; if (!d->arch.is_vti) { /* domain runs at PL2/3 */ @@ -562,9 +562,9 @@ int arch_set_info_guest(struct vcpu *v, regs->ar_rsc |= (2 << 2); /* force PL2/3 */ } - if (c->flags & VGCF_EXTRA_REGS) { + if (c.nat->flags & VGCF_EXTRA_REGS) { int i; - struct vcpu_extra_regs *er = &c->extra_regs; + struct vcpu_extra_regs *er = &c.nat->extra_regs; for (i = 0; i < 8; i++) { vcpu_set_itr(v, i, er->itrs[i].pte, diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/ia64/xen/xenmisc.c --- a/xen/arch/ia64/xen/xenmisc.c Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/ia64/xen/xenmisc.c Wed Jan 17 09:56:40 2007 -0500 @@ -57,26 +57,6 @@ struct pt_regs *guest_cpu_user_regs(void struct pt_regs *guest_cpu_user_regs(void) { return vcpu_regs(current); } /////////////////////////////// -// from arch/ia64/traps.c -/////////////////////////////// - -int is_kernel_text(unsigned long addr) -{ - extern char _stext[], _etext[]; - if (addr >= (unsigned long) _stext && - addr <= (unsigned long) _etext) - return 1; - - return 0; -} - -unsigned long kernel_text_end(void) -{ - extern char _etext[]; - return (unsigned long) _etext; -} - -/////////////////////////////// // from common/keyhandler.c /////////////////////////////// void dump_pageframe_info(struct domain *d) @@ -97,7 +77,7 @@ void console_print(char *msg) // called from unaligned.c //////////////////////////////////// -void die_if_kernel(char *str, struct pt_regs *regs, long err) /* __attribute__ ((noreturn)) */ +void die_if_kernel(char *str, struct pt_regs *regs, long err) { if (user_mode(regs)) return; @@ -108,7 +88,7 @@ void die_if_kernel(char *str, struct pt_ domain_crash_synchronous(); } -void vmx_die_if_kernel(char *str, struct pt_regs *regs, long err) /* __attribute__ ((noreturn)) */ +void vmx_die_if_kernel(char *str, struct pt_regs *regs, long err) { if (vmx_user_mode(regs)) return; diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/powerpc/domain.c --- a/xen/arch/powerpc/domain.c Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/powerpc/domain.c Wed Jan 17 09:56:40 2007 -0500 @@ -150,11 +150,9 @@ void vcpu_destroy(struct vcpu *v) { } -int arch_set_info_guest(struct vcpu *v, vcpu_guest_context_t *c) +int arch_set_info_guest(struct vcpu *v, vcpu_guest_context_u c) { - struct domain *d = v->domain; - - memcpy(&v->arch.ctxt, &c->user_regs, sizeof(c->user_regs)); + memcpy(&v->arch.ctxt, &c.nat->user_regs, sizeof(c.nat->user_regs)); printk("Domain[%d].%d: initializing\n", d->domain_id, v->vcpu_id); diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/powerpc/domctl.c --- a/xen/arch/powerpc/domctl.c Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/powerpc/domctl.c Wed Jan 17 09:56:40 2007 -0500 @@ -22,6 +22,7 @@ #include <xen/types.h> #include <xen/lib.h> #include <xen/sched.h> +#include <xen/domain.h> #include <xen/guest_access.h> #include <xen/shadow.h> #include <public/xen.h> @@ -29,10 +30,9 @@ #include <public/sysctl.h> #include <asm/processor.h> -void arch_getdomaininfo_ctxt(struct vcpu *, vcpu_guest_context_t *); -void arch_getdomaininfo_ctxt(struct vcpu *v, vcpu_guest_context_t *c) +void arch_get_info_guest(struct vcpu *v, vcpu_guest_context_u c) { - memcpy(&c->user_regs, &v->arch.ctxt, sizeof(struct cpu_user_regs)); + memcpy(&c.nat->user_regs, &v->arch.ctxt, sizeof(struct cpu_user_regs)); /* XXX fill in rest of vcpu_guest_context_t */ } diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/powerpc/setup.c --- a/xen/arch/powerpc/setup.c Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/powerpc/setup.c Wed Jan 17 09:56:40 2007 -0500 @@ -91,19 +91,6 @@ static struct domain *idle_domain; volatile struct processor_area * volatile global_cpu_table[NR_CPUS]; -int is_kernel_text(unsigned long addr) -{ - if (addr >= (unsigned long) &_start && - addr <= (unsigned long) &_etext) - return 1; - return 0; -} - -unsigned long kernel_text_end(void) -{ - return (unsigned long) &_etext; -} - static void __init do_initcalls(void) { initcall_t *call; diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/powerpc/xen.lds.S --- a/xen/arch/powerpc/xen.lds.S Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/powerpc/xen.lds.S Wed Jan 17 09:56:40 2007 -0500 @@ -114,10 +114,10 @@ SECTIONS . = ALIGN(32); __setup_start = .; - .setup.init : { *(.setup.init) } + .init.setup : { *(.init.setup) } __setup_end = .; __initcall_start = .; - .initcall.init : { *(.initcall.init) } + .initcall.init : { *(.initcall1.init) } __initcall_end = .; __inithcall_start = .; .inithcall.text : { *(.inithcall.text) } diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/boot/x86_32.S --- a/xen/arch/x86/boot/x86_32.S Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/x86/boot/x86_32.S Wed Jan 17 09:56:40 2007 -0500 @@ -11,8 +11,6 @@ .text ENTRY(start) -ENTRY(stext) -ENTRY(_stext) jmp __start .align 4 diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/boot/x86_64.S --- a/xen/arch/x86/boot/x86_64.S Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/x86/boot/x86_64.S Wed Jan 17 09:56:40 2007 -0500 @@ -14,8 +14,6 @@ #define SYM_PHYS(sym) (sym - __PAGE_OFFSET) ENTRY(start) -ENTRY(stext) -ENTRY(_stext) jmp __start .org 0x004 @@ -226,14 +224,33 @@ high_start: .align PAGE_SIZE, 0 ENTRY(gdt_table) .quad 0x0000000000000000 /* unused */ - .quad 0x00cf9a000000ffff /* 0xe008 ring 0 code, compatibility */ - .quad 0x00af9a000000ffff /* 0xe010 ring 0 code, 64-bit mode */ - .quad 0x00cf92000000ffff /* 0xe018 ring 0 data */ + .quad 0x00af9a000000ffff /* 0xe008 ring 0 code, 64-bit mode */ + .quad 0x00cf92000000ffff /* 0xe010 ring 0 data */ + .quad 0x0000000000000000 /* reserved */ .quad 0x00cffa000000ffff /* 0xe023 ring 3 code, compatibility */ .quad 0x00cff2000000ffff /* 0xe02b ring 3 data */ .quad 0x00affa000000ffff /* 0xe033 ring 3 code, 64-bit mode */ - .quad 0x0000000000000000 /* unused */ + .quad 0x00cf9a000000ffff /* 0xe038 ring 0 code, compatibility */ + .org gdt_table - FIRST_RESERVED_GDT_BYTE + __TSS(0) * 8 .fill 4*NR_CPUS,8,0 /* space for TSS and LDT per CPU */ + +#ifdef CONFIG_COMPAT + .align PAGE_SIZE, 0 +/* NB. Even rings != 0 get access to the full 4Gb, as only the */ +/* (compatibility) machine->physical mapping table lives there. */ +ENTRY(compat_gdt_table) + .quad 0x0000000000000000 /* unused */ + .quad 0x00af9a000000ffff /* 0xe008 ring 0 code, 64-bit mode */ + .quad 0x00cf92000000ffff /* 0xe010 ring 0 data */ + .quad 0x00cfba000000ffff /* 0xe019 ring 1 code, compatibility */ + .quad 0x00cfb2000000ffff /* 0xe021 ring 1 data */ + .quad 0x00cffa000000ffff /* 0xe02b ring 3 code, compatibility */ + .quad 0x00cff2000000ffff /* 0xe033 ring 3 data */ + .quad 0x00cf9a000000ffff /* 0xe038 ring 0 code, compatibility */ + .org compat_gdt_table - FIRST_RESERVED_GDT_BYTE + __TSS(0) * 8 + .fill 4*NR_CPUS,8,0 /* space for TSS and LDT per CPU */ +# undef LIMIT +#endif /* Initial PML4 -- level-4 page table. */ .align PAGE_SIZE, 0 diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/compat.c --- a/xen/arch/x86/compat.c Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/x86/compat.c Wed Jan 17 09:56:40 2007 -0500 @@ -9,16 +9,22 @@ #include <xen/guest_access.h> #include <xen/hypercall.h> +#ifndef COMPAT +typedef long ret_t; +#endif + /* Legacy hypercall (as of 0x00030202). */ -long do_physdev_op_compat(XEN_GUEST_HANDLE(physdev_op_t) uop) +ret_t do_physdev_op_compat(XEN_GUEST_HANDLE(physdev_op_t) uop) { struct physdev_op op; if ( unlikely(copy_from_guest(&op, uop, 1) != 0) ) return -EFAULT; - return do_physdev_op(op.cmd, (XEN_GUEST_HANDLE(void)) { &uop.p->u }); + return do_physdev_op(op.cmd, guest_handle_from_ptr(&uop.p->u, void)); } + +#ifndef COMPAT /* Legacy hypercall (as of 0x00030202). */ long do_event_channel_op_compat(XEN_GUEST_HANDLE(evtchn_op_t) uop) @@ -28,5 +34,7 @@ long do_event_channel_op_compat(XEN_GUES if ( unlikely(copy_from_guest(&op, uop, 1) != 0) ) return -EFAULT; - return do_event_channel_op(op.cmd, (XEN_GUEST_HANDLE(void)) {&uop.p->u }); + return do_event_channel_op(op.cmd, guest_handle_from_ptr(&uop.p->u, void)); } + +#endif diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/crash.c --- a/xen/arch/x86/crash.c Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/x86/crash.c Wed Jan 17 09:56:40 2007 -0500 @@ -25,6 +25,7 @@ #include <xen/kexec.h> #include <xen/sched.h> #include <public/xen.h> +#include <asm/shared.h> #include <asm/hvm/hvm.h> static atomic_t waiting_for_crash_ipi; @@ -103,7 +104,7 @@ void machine_crash_shutdown(void) info = kexec_crash_save_info(); info->dom0_pfn_to_mfn_frame_list_list = - dom0->shared_info->arch.pfn_to_mfn_frame_list_list; + arch_get_pfn_to_mfn_frame_list_list(dom0); } /* diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/x86/domain.c Wed Jan 17 09:56:40 2007 -0500 @@ -16,6 +16,7 @@ #include <xen/lib.h> #include <xen/errno.h> #include <xen/sched.h> +#include <xen/domain.h> #include <xen/smp.h> #include <xen/delay.h> #include <xen/softirq.h> @@ -40,6 +41,9 @@ #include <asm/hvm/hvm.h> #include <asm/hvm/support.h> #include <asm/msr.h> +#ifdef CONFIG_COMPAT +#include <compat/vcpu.h> +#endif DEFINE_PER_CPU(struct vcpu *, curr_vcpu); @@ -127,6 +131,195 @@ void free_vcpu_struct(struct vcpu *v) xfree(v); } +#ifdef CONFIG_COMPAT + +int setup_arg_xlat_area(struct vcpu *v, l4_pgentry_t *l4tab) +{ + struct domain *d = v->domain; + unsigned i; + struct page_info *pg; + + if ( !d->arch.mm_arg_xlat_l3 ) + { + pg = alloc_domheap_page(NULL); + if ( !pg ) + return -ENOMEM; + d->arch.mm_arg_xlat_l3 = clear_page(page_to_virt(pg)); + } + + l4tab[l4_table_offset(COMPAT_ARG_XLAT_VIRT_BASE)] = + l4e_from_paddr(__pa(d->arch.mm_arg_xlat_l3), __PAGE_HYPERVISOR); + + for ( i = 0; i < COMPAT_ARG_XLAT_PAGES; ++i ) + { + unsigned long va = COMPAT_ARG_XLAT_VIRT_START(v->vcpu_id) + i * PAGE_SIZE; + l2_pgentry_t *l2tab; + l1_pgentry_t *l1tab; + + if ( !l3e_get_intpte(d->arch.mm_arg_xlat_l3[l3_table_offset(va)]) ) + { + pg = alloc_domheap_page(NULL); + if ( !pg ) + return -ENOMEM; + clear_page(page_to_virt(pg)); + d->arch.mm_arg_xlat_l3[l3_table_offset(va)] = l3e_from_page(pg, __PAGE_HYPERVISOR); + } + l2tab = l3e_to_l2e(d->arch.mm_arg_xlat_l3[l3_table_offset(va)]); + if ( !l2e_get_intpte(l2tab[l2_table_offset(va)]) ) + { + pg = alloc_domheap_page(NULL); + if ( !pg ) + return -ENOMEM; + clear_page(page_to_virt(pg)); + l2tab[l2_table_offset(va)] = l2e_from_page(pg, __PAGE_HYPERVISOR); + } + l1tab = l2e_to_l1e(l2tab[l2_table_offset(va)]); + BUG_ON(l1e_get_intpte(l1tab[l1_table_offset(va)])); + pg = alloc_domheap_page(NULL); + if ( !pg ) + return -ENOMEM; + l1tab[l1_table_offset(va)] = l1e_from_page(pg, PAGE_HYPERVISOR); + } + + return 0; +} + +static void release_arg_xlat_area(struct domain *d) +{ + if ( d->arch.mm_arg_xlat_l3 ) + { + unsigned l3; + + for ( l3 = 0; l3 < L3_PAGETABLE_ENTRIES; ++l3 ) + { + if ( l3e_get_intpte(d->arch.mm_arg_xlat_l3[l3]) ) + { + l2_pgentry_t *l2tab = l3e_to_l2e(d->arch.mm_arg_xlat_l3[l3]); + unsigned l2; + + for ( l2 = 0; l2 < L2_PAGETABLE_ENTRIES; ++l2 ) + { + if ( l2e_get_intpte(l2tab[l2]) ) + { + l1_pgentry_t *l1tab = l2e_to_l1e(l2tab[l2]); + unsigned l1; + + for ( l1 = 0; l1 < L1_PAGETABLE_ENTRIES; ++l1 ) + { + if ( l1e_get_intpte(l1tab[l1]) ) + free_domheap_page(l1e_get_page(l1tab[l1])); + } + free_domheap_page(l2e_get_page(l2tab[l2])); + } + } + free_domheap_page(l3e_get_page(d->arch.mm_arg_xlat_l3[l3])); + } + } + free_domheap_page(virt_to_page(d->arch.mm_arg_xlat_l3)); + } +} + +static int setup_compat_l4(struct vcpu *v) +{ + struct page_info *pg = alloc_domheap_page(NULL); + l4_pgentry_t *l4tab; + int rc; + + if ( !pg ) + return -ENOMEM; + l4tab = copy_page(page_to_virt(pg), idle_pg_table); + l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] = + l4e_from_page(pg, __PAGE_HYPERVISOR); + l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] = + l4e_from_paddr(__pa(v->domain->arch.mm_perdomain_l3), __PAGE_HYPERVISOR); + v->arch.guest_table = pagetable_from_page(pg); + v->arch.guest_table_user = v->arch.guest_table; + + if ( (rc = setup_arg_xlat_area(v, l4tab)) < 0 ) + { + free_domheap_page(pg); + return rc; + } + + return 0; +} + +static void release_compat_l4(struct vcpu *v) +{ + free_domheap_page(pagetable_get_page(v->arch.guest_table)); + v->arch.guest_table = pagetable_null(); + v->arch.guest_table_user = pagetable_null(); +} + +static inline int may_switch_mode(struct domain *d) +{ + return 1; /* XXX */ +} + +int switch_native(struct domain *d) +{ + l1_pgentry_t gdt_l1e; + unsigned int vcpuid; + + if ( !d ) + return -EINVAL; + if ( !may_switch_mode(d) ) + return -EACCES; + if ( !IS_COMPAT(d) ) + return 0; + + clear_bit(_DOMF_compat, &d->domain_flags); + release_arg_xlat_area(d); + + /* switch gdt */ + gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR); + for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ ) + { + d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) + + FIRST_RESERVED_GDT_PAGE)] = gdt_l1e; + if (d->vcpu[vcpuid]) + release_compat_l4(d->vcpu[vcpuid]); + } + + return 0; +} + +int switch_compat(struct domain *d) +{ + l1_pgentry_t gdt_l1e; + unsigned int vcpuid; + + if ( !d ) + return -EINVAL; + if ( compat_disabled ) + return -ENOSYS; + if ( !may_switch_mode(d) ) + return -EACCES; + if ( IS_COMPAT(d) ) + return 0; + + set_bit(_DOMF_compat, &d->domain_flags); + + /* switch gdt */ + gdt_l1e = l1e_from_page(virt_to_page(compat_gdt_table), PAGE_HYPERVISOR); + for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ ) + { + d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) + + FIRST_RESERVED_GDT_PAGE)] = gdt_l1e; + if (d->vcpu[vcpuid] + && setup_compat_l4(d->vcpu[vcpuid]) != 0) + return -ENOMEM; + } + + return 0; +} + +#else +#define release_arg_xlat_area(d) ((void)0) +#define setup_compat_l4(v) 0 +#define release_compat_l4(v) ((void)0) +#endif + int vcpu_initialise(struct vcpu *v) { struct domain *d = v->domain; @@ -161,11 +354,16 @@ int vcpu_initialise(struct vcpu *v) v->arch.perdomain_ptes = d->arch.mm_perdomain_pt + (v->vcpu_id << GDT_LDT_VCPU_SHIFT); + if ( IS_COMPAT(d) && (rc = setup_compat_l4(v)) != 0 ) + return rc; + return 0; } void vcpu_destroy(struct vcpu *v) { + if ( IS_COMPAT(v->domain) ) + release_compat_l4(v); } int arch_domain_create(struct domain *d) @@ -219,6 +417,10 @@ int arch_domain_create(struct domain *d) #endif /* __x86_64__ */ +#ifdef CONFIG_COMPAT + HYPERVISOR_COMPAT_VIRT_START(d) = __HYPERVISOR_COMPAT_VIRT_START; +#endif + shadow_domain_init(d); if ( !is_idle_domain(d) ) @@ -270,47 +472,88 @@ void arch_domain_destroy(struct domain * free_domheap_page(virt_to_page(d->arch.mm_perdomain_l3)); #endif + if ( IS_COMPAT(d) ) + release_arg_xlat_area(d); + free_xenheap_page(d->shared_info); } /* This is called by arch_final_setup_guest and do_boot_vcpu */ int arch_set_info_guest( - struct vcpu *v, struct vcpu_guest_context *c) + struct vcpu *v, vcpu_guest_context_u c) { struct domain *d = v->domain; +#ifdef CONFIG_COMPAT +#define c(fld) (!IS_COMPAT(d) ? (c.nat->fld) : (c.cmp->fld)) +#else +#define c(fld) (c.nat->fld) +#endif unsigned long cr3_pfn = INVALID_MFN; + unsigned long flags = c(flags); int i, rc; if ( !is_hvm_vcpu(v) ) { - fixup_guest_stack_selector(c->user_regs.ss); - fixup_guest_stack_selector(c->kernel_ss); - fixup_guest_code_selector(c->user_regs.cs); - + if ( !IS_COMPAT(d) ) + { + fixup_guest_stack_selector(d, c.nat->user_regs.ss); + fixup_guest_stack_selector(d, c.nat->kernel_ss); + fixup_guest_code_selector(d, c.nat->user_regs.cs); #ifdef __i386__ - fixup_guest_code_selector(c->event_callback_cs); - fixup_guest_code_selector(c->failsafe_callback_cs); -#endif - - for ( i = 0; i < 256; i++ ) - fixup_guest_code_selector(c->trap_ctxt[i].cs); - - /* LDT safety checks. */ - if ( ((c->ldt_base & (PAGE_SIZE-1)) != 0) || - (c->ldt_ents > 8192) || - !array_access_ok(c->ldt_base, c->ldt_ents, LDT_ENTRY_SIZE) ) - return -EINVAL; + fixup_guest_code_selector(d, c.nat->event_callback_cs); + fixup_guest_code_selector(d, c.nat->failsafe_callback_cs); +#endif + + for ( i = 0; i < 256; i++ ) + fixup_guest_code_selector(d, c.nat->trap_ctxt[i].cs); + + /* LDT safety checks. */ + if ( ((c.nat->ldt_base & (PAGE_SIZE-1)) != 0) || + (c.nat->ldt_ents > 8192) || + !array_access_ok(c.nat->ldt_base, + c.nat->ldt_ents, + LDT_ENTRY_SIZE) ) + return -EINVAL; + } +#ifdef CONFIG_COMPAT + else + { + fixup_guest_stack_selector(d, c.cmp->user_regs.ss); + fixup_guest_stack_selector(d, c.cmp->kernel_ss); + fixup_guest_code_selector(d, c.cmp->user_regs.cs); + fixup_guest_code_selector(d, c.cmp->event_callback_cs); + fixup_guest_code_selector(d, c.cmp->failsafe_callback_cs); + + for ( i = 0; i < 256; i++ ) + fixup_guest_code_selector(d, c.cmp->trap_ctxt[i].cs); + + /* LDT safety checks. */ + if ( ((c.cmp->ldt_base & (PAGE_SIZE-1)) != 0) || + (c.cmp->ldt_ents > 8192) || + !compat_array_access_ok(c.cmp->ldt_base, + c.cmp->ldt_ents, + LDT_ENTRY_SIZE) ) + return -EINVAL; + } +#endif } clear_bit(_VCPUF_fpu_initialised, &v->vcpu_flags); - if ( c->flags & VGCF_i387_valid ) + if ( flags & VGCF_I387_VALID ) set_bit(_VCPUF_fpu_initialised, &v->vcpu_flags); v->arch.flags &= ~TF_kernel_mode; - if ( (c->flags & VGCF_in_kernel) || is_hvm_vcpu(v)/*???*/ ) + if ( (flags & VGCF_in_kernel) || is_hvm_vcpu(v)/*???*/ ) v->arch.flags |= TF_kernel_mode; - memcpy(&v->arch.guest_context, c, sizeof(*c)); + if ( !IS_COMPAT(v->domain) ) + memcpy(&v->arch.guest_context, c.nat, sizeof(*c.nat)); +#ifdef CONFIG_COMPAT + else + { + XLAT_vcpu_guest_context(&v->arch.guest_context, c.cmp); + } +#endif /* Only CR0.TS is modifiable by guest or admin. */ v->arch.guest_context.ctrlreg[0] &= X86_CR0_TS; @@ -338,28 +581,66 @@ int arch_set_info_guest( memset(v->arch.guest_context.debugreg, 0, sizeof(v->arch.guest_context.debugreg)); for ( i = 0; i < 8; i++ ) - (void)set_debugreg(v, i, c->debugreg[i]); + (void)set_debugreg(v, i, c(debugreg[i])); if ( v->vcpu_id == 0 ) - d->vm_assist = c->vm_assist; + d->vm_assist = c(vm_assist); if ( !is_hvm_vcpu(v) ) { - if ( (rc = (int)set_gdt(v, c->gdt_frames, c->gdt_ents)) != 0 ) + if ( !IS_COMPAT(d) ) + rc = (int)set_gdt(v, c.nat->gdt_frames, c.nat->gdt_ents); +#ifdef CONFIG_COMPAT + else + { + unsigned long gdt_frames[ARRAY_SIZE(c.cmp->gdt_frames)]; + unsigned int i, n = (c.cmp->gdt_ents + 511) / 512; + + if ( n > ARRAY_SIZE(c.cmp->gdt_frames) ) + return -EINVAL; + for ( i = 0; i < n; ++i ) + gdt_frames[i] = c.cmp->gdt_frames[i]; + rc = (int)set_gdt(v, gdt_frames, c.cmp->gdt_ents); + } +#endif + if ( rc != 0 ) return rc; - cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c->ctrlreg[3])); - - if ( shadow_mode_refcounts(d) - ? !get_page(mfn_to_page(cr3_pfn), d) - : !get_page_and_type(mfn_to_page(cr3_pfn), d, - PGT_base_page_table) ) - { - destroy_gdt(v); - return -EINVAL; - } - - v->arch.guest_table = pagetable_from_pfn(cr3_pfn); + if ( !IS_COMPAT(d) ) + { + cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c.nat->ctrlreg[3])); + + if ( shadow_mode_refcounts(d) + ? !get_page(mfn_to_page(cr3_pfn), d) + : !get_page_and_type(mfn_to_page(cr3_pfn), d, + PGT_base_page_table) ) + { + destroy_gdt(v); + return -EINVAL; + } + + v->arch.guest_table = pagetable_from_pfn(cr3_pfn); + } +#ifdef CONFIG_COMPAT + else + { + l4_pgentry_t *l4tab; + + cr3_pfn = gmfn_to_mfn(d, compat_cr3_to_pfn(c.cmp->ctrlreg[3])); + + if ( shadow_mode_refcounts(d) + ? !get_page(mfn_to_page(cr3_pfn), d) + : !get_page_and_type(mfn_to_page(cr3_pfn), d, + PGT_l3_page_table) ) + { + destroy_gdt(v); + return -EINVAL; + } + + l4tab = __va(pagetable_get_paddr(v->arch.guest_table)); + *l4tab = l4e_from_pfn(cr3_pfn, _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED); + } +#endif } if ( v->vcpu_id == 0 ) @@ -374,6 +655,7 @@ int arch_set_info_guest( update_cr3(v); return 0; +#undef c } long @@ -397,16 +679,16 @@ arch_do_vcpu_op( break; rc = 0; - v->runstate_guest = area.addr.h; + runstate_guest(v) = area.addr.h; if ( v == current ) { - __copy_to_guest(v->runstate_guest, &v->runstate, 1); + __copy_to_guest(runstate_guest(v), &v->runstate, 1); } else { vcpu_runstate_get(v, &runstate); - __copy_to_guest(v->runstate_guest, &runstate, 1); + __copy_to_guest(runstate_guest(v), &runstate, 1); } break; @@ -489,27 +771,30 @@ static void load_segments(struct vcpu *n all_segs_okay &= loadsegment(gs, nctxt->user_regs.gs); } - /* This can only be non-zero if selector is NULL. */ - if ( nctxt->fs_base ) - wrmsr(MSR_FS_BASE, - nctxt->fs_base, - nctxt->fs_base>>32); - - /* Most kernels have non-zero GS base, so don't bother testing. */ - /* (This is also a serialising instruction, avoiding AMD erratum #88.) */ - wrmsr(MSR_SHADOW_GS_BASE, - nctxt->gs_base_kernel, - nctxt->gs_base_kernel>>32); - - /* This can only be non-zero if selector is NULL. */ - if ( nctxt->gs_base_user ) - wrmsr(MSR_GS_BASE, - nctxt->gs_base_user, - nctxt->gs_base_user>>32); - - /* If in kernel mode then switch the GS bases around. */ - if ( n->arch.flags & TF_kernel_mode ) - __asm__ __volatile__ ( "swapgs" ); + if ( !IS_COMPAT(n->domain) ) + { + /* This can only be non-zero if selector is NULL. */ + if ( nctxt->fs_base ) + wrmsr(MSR_FS_BASE, + nctxt->fs_base, + nctxt->fs_base>>32); + + /* Most kernels have non-zero GS base, so don't bother testing. */ + /* (This is also a serialising instruction, avoiding AMD erratum #88.) */ + wrmsr(MSR_SHADOW_GS_BASE, + nctxt->gs_base_kernel, + nctxt->gs_base_kernel>>32); + + /* This can only be non-zero if selector is NULL. */ + if ( nctxt->gs_base_user ) + wrmsr(MSR_GS_BASE, + nctxt->gs_base_user, + nctxt->gs_base_user>>32); + + /* If in kernel mode then switch the GS bases around. */ + if ( (n->arch.flags & TF_kernel_mode) ) + __asm__ __volatile__ ( "swapgs" ); + } if ( unlikely(!all_segs_okay) ) { @@ -520,6 +805,55 @@ static void load_segments(struct vcpu *n (unsigned long *)nctxt->kernel_sp; unsigned long cs_and_mask, rflags; + if ( IS_COMPAT(n->domain) ) + { + unsigned int *esp = ring_1(regs) ? + (unsigned int *)regs->rsp : + (unsigned int *)nctxt->kernel_sp; + unsigned int cs_and_mask, eflags; + int ret = 0; + + /* CS longword also contains full evtchn_upcall_mask. */ + cs_and_mask = (unsigned short)regs->cs | + ((unsigned int)vcpu_info(n, evtchn_upcall_mask) << 16); + /* Fold upcall mask into RFLAGS.IF. */ + eflags = regs->_eflags & ~X86_EFLAGS_IF; + eflags |= !vcpu_info(n, evtchn_upcall_mask) << 9; + + if ( !ring_1(regs) ) + { + ret = put_user(regs->ss, esp-1); + ret |= put_user(regs->_esp, esp-2); + esp -= 2; + } + + if ( ret | + put_user(eflags, esp-1) | + put_user(cs_and_mask, esp-2) | + put_user(regs->_eip, esp-3) | + put_user(nctxt->user_regs.gs, esp-4) | + put_user(nctxt->user_regs.fs, esp-5) | + put_user(nctxt->user_regs.es, esp-6) | + put_user(nctxt->user_regs.ds, esp-7) ) + { + gdprintk(XENLOG_ERR, "Error while creating compat " + "failsafe callback frame.\n"); + domain_crash(n->domain); + } + + if ( test_bit(_VGCF_failsafe_disables_events, + &n->arch.guest_context.flags) ) + vcpu_info(n, evtchn_upcall_mask) = 1; + + regs->entry_vector = TRAP_syscall; + regs->_eflags &= 0xFFFCBEFFUL; + regs->ss = FLAT_COMPAT_KERNEL_SS; + regs->_esp = (unsigned long)(esp-7); + regs->cs = FLAT_COMPAT_KERNEL_CS; + regs->_eip = nctxt->failsafe_callback_eip; + return; + } + if ( !(n->arch.flags & TF_kernel_mode) ) toggle_guest_mode(n); else @@ -527,11 +861,11 @@ static void load_segments(struct vcpu *n /* CS longword also contains full evtchn_upcall_mask. */ cs_and_mask = (unsigned long)regs->cs | - ((unsigned long)n->vcpu_info->evtchn_upcall_mask << 32); + ((unsigned long)vcpu_info(n, evtchn_upcall_mask) << 32); /* Fold upcall mask into RFLAGS.IF. */ rflags = regs->rflags & ~X86_EFLAGS_IF; - rflags |= !n->vcpu_info->evtchn_upcall_mask << 9; + rflags |= !vcpu_info(n, evtchn_upcall_mask) << 9; if ( put_user(regs->ss, rsp- 1) | put_user(regs->rsp, rsp- 2) | @@ -552,7 +886,7 @@ static void load_segments(struct vcpu *n if ( test_bit(_VGCF_failsafe_disables_events, &n->arch.guest_context.flags) ) - n->vcpu_info->evtchn_upcall_mask = 1; + vcpu_info(n, evtchn_upcall_mask) = 1; regs->entry_vector = TRAP_syscall; regs->rflags &= ~(X86_EFLAGS_AC|X86_EFLAGS_VM|X86_EFLAGS_RF| @@ -581,7 +915,7 @@ static void save_segments(struct vcpu *v if ( regs->es ) dirty_segment_mask |= DIRTY_ES; - if ( regs->fs ) + if ( regs->fs || IS_COMPAT(v->domain) ) { dirty_segment_mask |= DIRTY_FS; ctxt->fs_base = 0; /* != 0 selector kills fs_base */ @@ -591,7 +925,7 @@ static void save_segments(struct vcpu *v dirty_segment_mask |= DIRTY_FS_BASE; } - if ( regs->gs ) + if ( regs->gs || IS_COMPAT(v->domain) ) { dirty_segment_mask |= DIRTY_GS; ctxt->gs_base_user = 0; /* != 0 selector kills gs_base_user */ @@ -713,7 +1047,7 @@ void context_switch(struct vcpu *prev, s local_irq_disable(); - if ( is_hvm_vcpu(prev) ) + if ( is_hvm_vcpu(prev) && !list_empty(&prev->arch.hvm_vcpu.tm_list) ) pt_freeze_time(prev); set_current(next); @@ -725,6 +1059,23 @@ void context_switch(struct vcpu *prev, s else { __context_switch(); + +#ifdef CONFIG_COMPAT + if ( is_idle_vcpu(prev) + || IS_COMPAT(prev->domain) != IS_COMPAT(next->domain) ) + { + uint32_t efer_lo, efer_hi; + + local_flush_tlb_one(GDT_VIRT_START(next) + FIRST_RESERVED_GDT_BYTE); + + rdmsr(MSR_EFER, efer_lo, efer_hi); + if ( !IS_COMPAT(next->domain) == !(efer_lo & EFER_SCE) ) + { + efer_lo ^= EFER_SCE; + wrmsr(MSR_EFER, efer_lo, efer_hi); + } + } +#endif /* Re-enable interrupts before restoring state which may fault. */ local_irq_enable(); @@ -739,8 +1090,20 @@ void context_switch(struct vcpu *prev, s context_saved(prev); /* Update per-VCPU guest runstate shared memory area (if registered). */ - if ( !guest_handle_is_null(next->runstate_guest) ) - __copy_to_guest(next->runstate_guest, &next->runstate, 1); + if ( !guest_handle_is_null(runstate_guest(next)) ) + { + if ( !IS_COMPAT(next->domain) ) + __copy_to_guest(runstate_guest(next), &next->runstate, 1); +#ifdef CONFIG_COMPAT + else + { + struct compat_vcpu_runstate_info info; + + XLAT_vcpu_runstate_info(&info, &next->runstate); + __copy_to_guest(next->runstate_guest.compat, &info, 1); + } +#endif + } schedule_tail(next); BUG(); @@ -811,55 +1174,153 @@ unsigned long hypercall_create_continuat for ( i = 0; *p != '\0'; i++ ) mcs->call.args[i] = next_arg(p, args); + if ( IS_COMPAT(current->domain) ) + { + for ( ; i < 6; i++ ) + mcs->call.args[i] = 0; + } } else { regs = guest_cpu_user_regs(); -#if defined(__i386__) regs->eax = op; - - if ( supervisor_mode_kernel || is_hvm_vcpu(current) ) - regs->eip &= ~31; /* re-execute entire hypercall entry stub */ + regs->eip -= 2; /* re-execute 'syscall' / 'int 0x82' */ + +#ifdef __x86_64__ + if ( !IS_COMPAT(current->domain) ) + { + for ( i = 0; *p != '\0'; i++ ) + { + arg = next_arg(p, args); + switch ( i ) + { + case 0: regs->rdi = arg; break; + case 1: regs->rsi = arg; break; + case 2: regs->rdx = arg; break; + case 3: regs->r10 = arg; break; + case 4: regs->r8 = arg; break; + case 5: regs->r9 = arg; break; + } + } + } else - regs->eip -= 2; /* re-execute 'int 0x82' */ - - for ( i = 0; *p != '\0'; i++ ) - { - arg = next_arg(p, args); +#endif + { + if ( supervisor_mode_kernel || is_hvm_vcpu(current) ) + regs->eip &= ~31; /* re-execute entire hypercall entry stub */ + + for ( i = 0; *p != '\0'; i++ ) + { + arg = next_arg(p, args); + switch ( i ) + { + case 0: regs->ebx = arg; break; + case 1: regs->ecx = arg; break; + case 2: regs->edx = arg; break; + case 3: regs->esi = arg; break; + case 4: regs->edi = arg; break; + case 5: regs->ebp = arg; break; + } + } + } + } + + va_end(args); + + return op; +} + +#ifdef CONFIG_COMPAT +int hypercall_xlat_continuation(unsigned int *id, unsigned int mask, ...) +{ + int rc = 0; + struct mc_state *mcs = &this_cpu(mc_state); + struct cpu_user_regs *regs; + unsigned int i, cval = 0; + unsigned long nval = 0; + va_list args; + + BUG_ON(*id > 5); + BUG_ON(mask & (1U << *id)); + + va_start(args, mask); + + if ( test_bit(_MCSF_in_multicall, &mcs->flags) ) + { + if ( !test_bit(_MCSF_call_preempted, &mcs->flags) ) + return 0; + for ( i = 0; i < 6; ++i, mask >>= 1 ) + { + if ( mask & 1 ) + { + nval = va_arg(args, unsigned long); + cval = va_arg(args, unsigned int); + if ( cval == nval ) + mask &= ~1U; + else + BUG_ON(nval == (unsigned int)nval); + } + else if ( id && *id == i ) + { + *id = mcs->call.args[i]; + id = NULL; + } + if ( (mask & 1) && mcs->call.args[i] == nval ) + ++rc; + else + { + cval = mcs->call.args[i]; + BUG_ON(mcs->call.args[i] != cval); + } + mcs->compat_call.args[i] = cval; + } + } + else + { + regs = guest_cpu_user_regs(); + for ( i = 0; i < 6; ++i, mask >>= 1 ) + { + unsigned long *reg; + switch ( i ) { - case 0: regs->ebx = arg; break; - case 1: regs->ecx = arg; break; - case 2: regs->edx = arg; break; - case 3: regs->esi = arg; break; - case 4: regs->edi = arg; break; - case 5: regs->ebp = arg; break; - } - } -#elif defined(__x86_64__) - regs->rax = op; - regs->rip -= 2; /* re-execute 'syscall' */ - - for ( i = 0; *p != '\0'; i++ ) - { - arg = next_arg(p, args); - switch ( i ) - { - case 0: regs->rdi = arg; break; - case 1: regs->rsi = arg; break; - case 2: regs->rdx = arg; break; - case 3: regs->r10 = arg; break; - case 4: regs->r8 = arg; break; - case 5: regs->r9 = arg; break; - } - } -#endif + case 0: reg = ®s->ebx; break; + case 1: reg = ®s->ecx; break; + case 2: reg = ®s->edx; break; + case 3: reg = ®s->esi; break; + case 4: reg = ®s->edi; break; + case 5: reg = ®s->ebp; break; + default: BUG(); reg = NULL; break; + } + if ( (mask & 1) ) + { + nval = va_arg(args, unsigned long); + cval = va_arg(args, unsigned int); + if ( cval == nval ) + mask &= ~1U; + else + BUG_ON(nval == (unsigned int)nval); + } + else if ( id && *id == i ) + { + *id = *reg; + id = NULL; + } + if ( (mask & 1) && *reg == nval ) + { + *reg = cval; + ++rc; + } + else + BUG_ON(*reg != (unsigned int)*reg); + } } va_end(args); - return op; -} + return rc; +} +#endif static void relinquish_memory(struct domain *d, struct list_head *list) { @@ -931,6 +1392,24 @@ void domain_relinquish_resources(struct { /* Drop ref to guest_table (from new_guest_cr3(), svm/vmx cr3 handling, * or sh_update_paging_modes()) */ +#ifdef CONFIG_COMPAT + if ( IS_COMPAT(d) ) + { + if ( is_hvm_vcpu(v) ) + pfn = pagetable_get_pfn(v->arch.guest_table); + else + pfn = l4e_get_pfn(*(l4_pgentry_t *)__va(pagetable_get_paddr(v->arch.guest_table))); + + if ( pfn != 0 ) + { + if ( shadow_mode_refcounts(d) ) + put_page(mfn_to_page(pfn)); + else + put_page_and_type(mfn_to_page(pfn)); + } + continue; + } +#endif pfn = pagetable_get_pfn(v->arch.guest_table); if ( pfn != 0 ) { @@ -938,6 +1417,10 @@ void domain_relinquish_resources(struct put_page(mfn_to_page(pfn)); else put_page_and_type(mfn_to_page(pfn)); +#ifdef __x86_64__ + if ( pfn == pagetable_get_pfn(v->arch.guest_table_user) ) + v->arch.guest_table_user = pagetable_null(); +#endif v->arch.guest_table = pagetable_null(); } diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/x86/domain_build.c Wed Jan 17 09:56:40 2007 -0500 @@ -19,6 +19,7 @@ #include <xen/version.h> #include <xen/iocap.h> #include <xen/bitops.h> +#include <xen/compat.h> #include <asm/regs.h> #include <asm/system.h> #include <asm/io.h> @@ -90,9 +91,11 @@ string_param("dom0_ioports_disable", opt #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) #define L3_PROT (_PAGE_PRESENT) #elif defined(__x86_64__) -/* Allow ring-3 access in long mode as guest cannot use ring 1. */ +/* Allow ring-3 access in long mode as guest cannot use ring 1 ... */ #define BASE_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) #define L1_PROT (BASE_PROT|_PAGE_GUEST_KERNEL) +/* ... except for compatibility mode guests. */ +#define COMPAT_L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) #define L2_PROT (BASE_PROT|_PAGE_DIRTY) #define L3_PROT (BASE_PROT|_PAGE_DIRTY) #define L4_PROT (BASE_PROT|_PAGE_DIRTY) @@ -261,8 +264,8 @@ int construct_dom0(struct domain *d, start_info_t *si; struct vcpu *v = d->vcpu[0]; const char *p; - unsigned long hypercall_page; - int hypercall_page_defined; + unsigned long long value; + int value_defined; #if defined(__i386__) char *image_start = (char *)_image_start; /* use lowmem mappings */ char *initrd_start = (char *)_initrd_start; /* use lowmem mappings */ @@ -319,10 +322,40 @@ int construct_dom0(struct domain *d, nr_pages = compute_dom0_nr_pages(); - if ( (rc = parseelfimage(&dsi)) != 0 ) - return rc; - - xen_pae = (CONFIG_PAGING_LEVELS == 3); + rc = parseelfimage(&dsi); +#ifdef CONFIG_COMPAT + if ( rc == -ENOSYS + && !compat_disabled + && (rc = parseelf32image(&dsi)) == 0 ) + { + l1_pgentry_t gdt_l1e; + + set_bit(_DOMF_compat, &d->domain_flags); + v->vcpu_info = (void *)&d->shared_info->compat.vcpu_info[0]; + + if ( nr_pages != (unsigned int)nr_pages ) + nr_pages = UINT_MAX; + + /* + * Map compatibility Xen segments into every VCPU's GDT. See + * arch_domain_create() for further comments. + */ + gdt_l1e = l1e_from_page(virt_to_page(compat_gdt_table), + PAGE_HYPERVISOR); + for ( i = 0; i < MAX_VIRT_CPUS; i++ ) + d->arch.mm_perdomain_pt[((i << GDT_LDT_VCPU_SHIFT) + + FIRST_RESERVED_GDT_PAGE)] = gdt_l1e; + local_flush_tlb_one(GDT_LDT_VIRT_START + FIRST_RESERVED_GDT_BYTE); + } +#endif + if ( rc != 0) + { + if ( rc == -ENOSYS ) + printk("DOM0 image is not a Xen-compatible Elf image.\n"); + return rc; + } + + xen_pae = (CONFIG_PAGING_LEVELS == 3) || IS_COMPAT(d); if (dsi.pae_kernel == PAEKERN_bimodal) dom0_pae = xen_pae; else @@ -338,7 +371,40 @@ int construct_dom0(struct domain *d, dsi.pae_kernel == PAEKERN_bimodal) ) set_bit(VMASST_TYPE_pae_extended_cr3, &d->vm_assist); - if ( (p = xen_elfnote_string(&dsi, XEN_ELFNOTE_FEATURES)) != NULL ) +#ifdef CONFIG_COMPAT + if ( IS_COMPAT(d) ) + { + value = xen_elf32note_numeric(&dsi, XEN_ELFNOTE_HV_START_LOW, &value_defined); + p = xen_elf32note_string(&dsi, XEN_ELFNOTE_FEATURES); + } + else +#endif + { + value = xen_elfnote_numeric(&dsi, XEN_ELFNOTE_HV_START_LOW, &value_defined); + p = xen_elfnote_string(&dsi, XEN_ELFNOTE_FEATURES); + } + if ( value_defined ) + { +#if CONFIG_PAGING_LEVELS < 4 + unsigned long mask = (1UL << L2_PAGETABLE_SHIFT) - 1; +#else + unsigned long mask = !IS_COMPAT(d) + ? (1UL << L4_PAGETABLE_SHIFT) - 1 + : (1UL << L2_PAGETABLE_SHIFT) - 1; +#endif + + value = (value + mask) & ~mask; +#ifdef CONFIG_COMPAT + HYPERVISOR_COMPAT_VIRT_START(d) = max_t(unsigned int, m2p_compat_vstart, value); + if ( value > (!IS_COMPAT(d) ? + HYPERVISOR_VIRT_START : + __HYPERVISOR_COMPAT_VIRT_START) ) +#else + if ( value > HYPERVISOR_VIRT_START ) +#endif + panic("Domain 0 expects too high a hypervisor start address.\n"); + } + if ( p != NULL ) { parse_features(p, dom0_features_supported, @@ -364,7 +430,9 @@ int construct_dom0(struct domain *d, vinitrd_start = round_pgup(dsi.v_end); vinitrd_end = vinitrd_start + initrd_len; vphysmap_start = round_pgup(vinitrd_end); - vphysmap_end = vphysmap_start + (nr_pages * sizeof(unsigned long)); + vphysmap_end = vphysmap_start + (nr_pages * (!IS_COMPAT(d) ? + sizeof(unsigned long) : + sizeof(unsigned int))); vstartinfo_start = round_pgup(vphysmap_end); vstartinfo_end = (vstartinfo_start + sizeof(struct start_info) + @@ -393,7 +461,9 @@ int construct_dom0(struct domain *d, ((_l) & ~((1UL<<(_s))-1))) >> (_s)) if ( (1 + /* # L4 */ NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */ - NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */ + (!IS_COMPAT(d) ? + NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) : /* # L2 */ + 4) + /* # compat L2 */ NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT)) /* # L1 */ <= nr_pt_pages ) break; @@ -583,22 +653,46 @@ int construct_dom0(struct domain *d, #elif defined(__x86_64__) /* Overlap with Xen protected area? */ - if ( (dsi.v_start < HYPERVISOR_VIRT_END) && - (v_end > HYPERVISOR_VIRT_START) ) + if ( !IS_COMPAT(d) ? + ((dsi.v_start < HYPERVISOR_VIRT_END) && + (v_end > HYPERVISOR_VIRT_START)) : + (v_end > HYPERVISOR_COMPAT_VIRT_START(d)) ) { printk("DOM0 image overlaps with Xen private area.\n"); return -EINVAL; } + if ( IS_COMPAT(d) ) + { + v->arch.guest_context.failsafe_callback_cs = FLAT_COMPAT_KERNEL_CS; + v->arch.guest_context.event_callback_cs = FLAT_COMPAT_KERNEL_CS; + } + /* WARNING: The new domain must have its 'processor' field filled in! */ - maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l4_page_table; - l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; + if ( !IS_COMPAT(d) ) + { + maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l4_page_table; + l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; + } + else + { + page = alloc_domheap_page(NULL); + if ( !page ) + panic("Not enough RAM for domain 0 PML4.\n"); + l4start = l4tab = page_to_virt(page); + } memcpy(l4tab, idle_pg_table, PAGE_SIZE); l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] = l4e_from_paddr(__pa(l4start), __PAGE_HYPERVISOR); l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] = l4e_from_paddr(__pa(d->arch.mm_perdomain_l3), __PAGE_HYPERVISOR); v->arch.guest_table = pagetable_from_paddr(__pa(l4start)); + if ( IS_COMPAT(d) ) + { + v->arch.guest_table_user = v->arch.guest_table; + if ( setup_arg_xlat_area(v, l4start) < 0 ) + panic("Not enough RAM for domain 0 hypercall argument translation.\n"); + } l4tab += l4_table_offset(dsi.v_start); mfn = alloc_spfn; @@ -635,7 +729,7 @@ int construct_dom0(struct domain *d, *l2tab = l2e_from_paddr(__pa(l1start), L2_PROT); l2tab++; } - *l1tab = l1e_from_pfn(mfn, L1_PROT); + *l1tab = l1e_from_pfn(mfn, !IS_COMPAT(d) ? L1_PROT : COMPAT_L1_PROT); l1tab++; page = mfn_to_page(mfn); @@ -645,6 +739,30 @@ int construct_dom0(struct domain *d, mfn++; } + +#ifdef CONFIG_COMPAT + if ( IS_COMPAT(d) ) + { + /* Ensure the first four L3 entries are all populated. */ + for ( i = 0, l3tab = l3start; i < 4; ++i, ++l3tab ) + { + if ( !l3e_get_intpte(*l3tab) ) + { + maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l2_page_table; + l2tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; + clear_page(l2tab); + *l3tab = l3e_from_paddr(__pa(l2tab), L3_PROT); + } + if ( i == 3 ) + l3e_get_page(*l3tab)->u.inuse.type_info |= PGT_pae_xen_l2; + } + /* Install read-only guest visible MPT mapping. */ + l2tab = l3e_to_l2e(l3start[3]); + memcpy(&l2tab[COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d)], + &compat_idle_pg_table_l2[l2_table_offset(HIRO_COMPAT_MPT_VIRT_START)], + COMPAT_L2_PAGETABLE_XEN_SLOTS(d) * sizeof(*l2tab)); + } +#endif /* Pages that are part of page tables must be read only. */ l4tab = l4start + l4_table_offset(vpt_start); @@ -664,7 +782,8 @@ int construct_dom0(struct domain *d, page->u.inuse.type_info |= PGT_validated | 1; /* Top-level p.t. is pinned. */ - if ( (page->u.inuse.type_info & PGT_type_mask) == PGT_l4_page_table ) + if ( (page->u.inuse.type_info & PGT_type_mask) == + (!IS_COMPAT(d) ? PGT_l4_page_table : PGT_l3_page_table) ) { page->count_info += 1; page->u.inuse.type_info += 1 | PGT_pinned; @@ -687,7 +806,7 @@ int construct_dom0(struct domain *d, /* Mask all upcalls... */ for ( i = 0; i < MAX_VIRT_CPUS; i++ ) - d->shared_info->vcpu_info[i].evtchn_upcall_mask = 1; + shared_info(d, vcpu_info[i].evtchn_upcall_mask) = 1; if ( opt_dom0_max_vcpus == 0 ) opt_dom0_max_vcpus = num_online_cpus(); @@ -695,6 +814,8 @@ int construct_dom0(struct domain *d, opt_dom0_max_vcpus = num_online_cpus(); if ( opt_dom0_max_vcpus > MAX_VIRT_CPUS ) opt_dom0_max_vcpus = MAX_VIRT_CPUS; + if ( opt_dom0_max_vcpus > BITS_PER_GUEST_LONG(d) ) + opt_dom0_max_vcpus = BITS_PER_GUEST_LONG(d); printk("Dom0 has maximum %u VCPUs\n", opt_dom0_max_vcpus); for ( i = 1; i < opt_dom0_max_vcpus; i++ ) @@ -711,20 +832,30 @@ int construct_dom0(struct domain *d, write_ptbase(v); /* Copy the OS image and free temporary buffer. */ - (void)loadelfimage(&dsi); - - hypercall_page = - xen_elfnote_numeric(&dsi, XEN_ELFNOTE_HYPERCALL_PAGE, &hypercall_page_defined); - if ( hypercall_page_defined ) - { - if ( (hypercall_page < dsi.v_start) || (hypercall_page >= v_end) ) +#ifdef CONFIG_COMPAT + if ( IS_COMPAT(d) ) + { + (void)loadelf32image(&dsi); + value = + xen_elf32note_numeric(&dsi, XEN_ELFNOTE_HYPERCALL_PAGE, &value_defined); + } + else +#endif + { + (void)loadelfimage(&dsi); + value = + xen_elfnote_numeric(&dsi, XEN_ELFNOTE_HYPERCALL_PAGE, &value_defined); + } + if ( value_defined ) + { + if ( (value < dsi.v_start) || (value >= v_end) ) { write_ptbase(current); local_irq_enable(); printk("Invalid HYPERCALL_PAGE field in ELF notes.\n"); return -1; } - hypercall_page_initialise(d, (void *)hypercall_page); + hypercall_page_initialise(d, (void *)(unsigned long)value); } /* Copy the initial ramdisk. */ @@ -742,12 +873,12 @@ int construct_dom0(struct domain *d, si->shared_info = virt_to_maddr(d->shared_info); si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN; - si->pt_base = vpt_start; + si->pt_base = vpt_start + 2 * PAGE_SIZE * !!IS_COMPAT(d); si->nr_pt_frames = nr_pt_pages; si->mfn_list = vphysmap_start; sprintf(si->magic, "xen-%i.%i-x86_%d%s", xen_major_version(), xen_minor_version(), - BITS_PER_LONG, xen_pae ? "p" : ""); + !IS_COMPAT(d) ? BITS_PER_LONG : 32, xen_pae ? "p" : ""); /* Write the phys->machine and machine->phys table entries. */ for ( pfn = 0; pfn < d->tot_pages; pfn++ ) @@ -758,7 +889,10 @@ int construct_dom0(struct domain *d, if ( pfn > REVERSE_START ) mfn = alloc_epfn - (pfn - REVERSE_START); #endif - ((unsigned long *)vphysmap_start)[pfn] = mfn; + if ( !IS_COMPAT(d) ) + ((unsigned long *)vphysmap_start)[pfn] = mfn; + else + ((unsigned int *)vphysmap_start)[pfn] = mfn; set_gpfn_from_mfn(mfn, pfn); } while ( pfn < nr_pages ) @@ -771,7 +905,10 @@ int construct_dom0(struct domain *d, #ifndef NDEBUG #define pfn (nr_pages - 1 - (pfn - (alloc_epfn - alloc_spfn))) #endif - ((unsigned long *)vphysmap_start)[pfn] = mfn; + if ( !IS_COMPAT(d) ) + ((unsigned long *)vphysmap_start)[pfn] = mfn; + else + ((unsigned int *)vphysmap_start)[pfn] = mfn; set_gpfn_from_mfn(mfn, pfn); #undef pfn page++; pfn++; @@ -795,6 +932,11 @@ int construct_dom0(struct domain *d, si->console.dom0.info_off = sizeof(struct start_info); si->console.dom0.info_size = sizeof(struct dom0_vga_console_info); } + +#ifdef CONFIG_COMPAT + if ( IS_COMPAT(d) ) + xlat_start_info(si, XLAT_start_info_console_dom0); +#endif /* Reinstate the caller's page tables. */ write_ptbase(current); @@ -819,9 +961,11 @@ int construct_dom0(struct domain *d, * [EAX,EBX,ECX,EDX,EDI,EBP are zero] */ regs = &v->arch.guest_context.user_regs; - regs->ds = regs->es = regs->fs = regs->gs = FLAT_KERNEL_DS; - regs->ss = FLAT_KERNEL_SS; - regs->cs = FLAT_KERNEL_CS; + regs->ds = regs->es = regs->fs = regs->gs = !IS_COMPAT(d) + ? FLAT_KERNEL_DS + : FLAT_COMPAT_KERNEL_DS; + regs->ss = !IS_COMPAT(d) ? FLAT_KERNEL_SS : FLAT_COMPAT_KERNEL_SS; + regs->cs = !IS_COMPAT(d) ? FLAT_KERNEL_CS : FLAT_COMPAT_KERNEL_CS; regs->eip = dsi.v_kernentry; regs->esp = vstack_end; regs->esi = vstartinfo_start; @@ -906,12 +1050,27 @@ int elf_sanity_check(const Elf_Ehdr *ehd (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) || (ehdr->e_type != ET_EXEC) ) { - printk("DOM0 image is not a Xen-compatible Elf image.\n"); return 0; } return 1; } + +#ifdef CONFIG_COMPAT +int elf32_sanity_check(const Elf32_Ehdr *ehdr) +{ + if ( !IS_ELF(*ehdr) || + (ehdr->e_ident[EI_CLASS] != ELFCLASS32) || + (ehdr->e_machine != EM_386) || + (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) || + (ehdr->e_type != ET_EXEC) ) + { + return 0; + } + + return 1; +} +#endif /* * Local variables: diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/domctl.c --- a/xen/arch/x86/domctl.c Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/x86/domctl.c Wed Jan 17 09:56:40 2007 -0500 @@ -11,6 +11,7 @@ #include <xen/guest_access.h> #include <public/domctl.h> #include <xen/sched.h> +#include <xen/domain.h> #include <xen/event.h> #include <xen/domain_page.h> #include <asm/msr.h> @@ -23,12 +24,21 @@ #include <asm/hvm/support.h> #include <asm/processor.h> #include <public/hvm/e820.h> - -long arch_do_domctl( +#ifdef CONFIG_COMPAT +#include <compat/xen.h> +#endif + +#ifndef COMPAT +#define _long long +#define copy_from_xxx_offset copy_from_guest_offset +#define copy_to_xxx_offset copy_to_guest_offset +#endif + +_long arch_do_domctl( struct xen_domctl *domctl, XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) { - long ret = 0; + _long ret = 0; switch ( domctl->cmd ) { @@ -40,7 +50,9 @@ long arch_do_domctl( d = find_domain_by_id(domctl->domain); if ( d != NULL ) { - ret = shadow_domctl(d, &domctl->u.shadow_op, u_domctl); + ret = shadow_domctl(d, + &domctl->u.shadow_op, + guest_handle_cast(u_domctl, void)); put_domain(d); copy_to_guest(u_domctl, domctl, 1); } @@ -123,12 +135,12 @@ long arch_do_domctl( case XEN_DOMCTL_getpageframeinfo2: { -#define GPF2_BATCH (PAGE_SIZE / sizeof(long)) +#define GPF2_BATCH (PAGE_SIZE / sizeof(_long)) int n,j; int num = domctl->u.getpageframeinfo2.num; domid_t dom = domctl->domain; struct domain *d; - unsigned long *l_arr; + unsigned _long *l_arr; ret = -ESRCH; if ( unlikely((d = find_domain_by_id(dom)) == NULL) ) @@ -148,9 +160,9 @@ long arch_do_domctl( { int k = ((num-n)>GPF2_BATCH)?GPF2_BATCH:(num-n); - if ( copy_from_guest_offset(l_arr, - domctl->u.getpageframeinfo2.array, - n, k) ) + if ( copy_from_xxx_offset(l_arr, + domctl->u.getpageframeinfo2.array, + n, k) ) { ret = -EINVAL; break; @@ -159,13 +171,13 @@ long arch_do_domctl( for ( j = 0; j < k; j++ ) { struct page_info *page; - unsigned long mfn = l_arr[j]; + unsigned _long mfn = l_arr[j]; page = mfn_to_page(mfn); if ( likely(mfn_valid(mfn) && get_page(page, d)) ) { - unsigned long type = 0; + unsigned _long type = 0; switch( page->u.inuse.type_info & PGT_type_mask ) { @@ -193,8 +205,8 @@ long arch_do_domctl( } - if ( copy_to_guest_offset(domctl->u.getpageframeinfo2.array, - n, l_arr, k) ) + if ( copy_to_xxx_offset(domctl->u.getpageframeinfo2.array, + n, l_arr, k) ) { ret = -EINVAL; break; @@ -214,7 +226,7 @@ long arch_do_domctl( int i; struct domain *d = find_domain_by_id(domctl->domain); unsigned long max_pfns = domctl->u.getmemlist.max_pfns; - unsigned long mfn; + xen_pfn_t mfn; struct list_head *list_ent; ret = -EINVAL; @@ -229,8 +241,8 @@ long arch_do_domctl( { mfn = page_to_mfn(list_entry( list_ent, struct page_info, list)); - if ( copy_to_guest_offset(domctl->u.getmemlist.buffer, - i, &mfn, 1) ) + if ( copy_to_xxx_offset(domctl->u.getmemlist.buffer, + i, &mfn, 1) ) { ret = -EFAULT; break; @@ -289,32 +301,71 @@ long arch_do_domctl( return ret; } -void arch_getdomaininfo_ctxt( - struct vcpu *v, struct vcpu_guest_context *c) +#ifndef COMPAT +void arch_get_info_guest(struct vcpu *v, vcpu_guest_context_u c) { - memcpy(c, &v->arch.guest_context, sizeof(*c)); +#ifdef CONFIG_COMPAT +#define c(fld) (!IS_COMPAT(v->domain) ? (c.nat->fld) : (c.cmp->fld)) +#else +#define c(fld) (c.nat->fld) +#endif + unsigned long flags; + + if ( !IS_COMPAT(v->domain) ) + memcpy(c.nat, &v->arch.guest_context, sizeof(*c.nat)); +#ifdef CONFIG_COMPAT + else + { + XLAT_vcpu_guest_context(c.cmp, &v->arch.guest_context); + } +#endif if ( is_hvm_vcpu(v) ) { - hvm_store_cpu_guest_regs(v, &c->user_regs, c->ctrlreg); + if ( !IS_COMPAT(v->domain) ) + hvm_store_cpu_guest_regs(v, &c.nat->user_regs, c.nat->ctrlreg); +#ifdef CONFIG_COMPAT + else + { + struct cpu_user_regs user_regs; + typeof(c.nat->ctrlreg) ctrlreg; + unsigned i; + + hvm_store_cpu_guest_regs(v, &user_regs, ctrlreg); + XLAT_cpu_user_regs(&c.cmp->user_regs, &user_regs); + for ( i = 0; i < ARRAY_SIZE(c.cmp->ctrlreg); ++i ) + c.cmp->ctrlreg[i] = ctrlreg[i]; + } +#endif } else { /* IOPL privileges are virtualised: merge back into returned eflags. */ - BUG_ON((c->user_regs.eflags & EF_IOPL) != 0); - c->user_regs.eflags |= v->arch.iopl << 12; - } - - c->flags = 0; + BUG_ON((c(user_regs.eflags) & EF_IOPL) != 0); + c(user_regs.eflags |= v->arch.iopl << 12); + } + + flags = 0; if ( test_bit(_VCPUF_fpu_initialised, &v->vcpu_flags) ) - c->flags |= VGCF_i387_valid; + flags |= VGCF_i387_valid; if ( guest_kernel_mode(v, &v->arch.guest_context.user_regs) ) - c->flags |= VGCF_in_kernel; - - c->ctrlreg[3] = xen_pfn_to_cr3(pagetable_get_pfn(v->arch.guest_table)); - - c->vm_assist = v->domain->vm_assist; + flags |= VGCF_in_kernel; + c(flags = flags); + + if ( !IS_COMPAT(v->domain) ) + c.nat->ctrlreg[3] = xen_pfn_to_cr3(pagetable_get_pfn(v->arch.guest_table)); +#ifdef CONFIG_COMPAT + else + { + l4_pgentry_t *l4e = __va(pagetable_get_paddr(v->arch.guest_table)); + c.cmp->ctrlreg[3] = compat_pfn_to_cr3(l4e_get_pfn(*l4e)); + } +#endif + + c(vm_assist = v->domain->vm_assist); +#undef c } +#endif /* * Local variables: diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/e820.c --- a/xen/arch/x86/e820.c Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/x86/e820.c Wed Jan 17 09:56:40 2007 -0500 @@ -1,6 +1,7 @@ #include <xen/config.h> #include <xen/init.h> #include <xen/lib.h> +#include <xen/compat.h> #include <asm/e820.h> #include <asm/page.h> @@ -341,6 +342,39 @@ static void __init clip_4gb(void) #define clip_4gb() ((void)0) #endif +#ifdef CONFIG_COMPAT +static void __init clip_compat(void) +{ + unsigned long long limit; + unsigned int i; + + if ( compat_disabled ) + return; + /* 32-bit guests restricted to 166 GB (with current memory allocator). */ + limit = (unsigned long long)(MACH2PHYS_COMPAT_VIRT_END - + __HYPERVISOR_COMPAT_VIRT_START) << 10; + for ( i = 0; i < e820.nr_map; i++ ) + { + if ( (e820.map[i].addr + e820.map[i].size) <= limit ) + continue; + printk("WARNING: Only the first %Lu GB of the physical memory map " + "can be accessed\n" + " by compatibility mode guests. " + "Truncating the memory map...\n", + limit >> 30); + if ( e820.map[i].addr >= limit ) + e820.nr_map = i; + else + { + e820.map[i].size = limit - e820.map[i].addr; + e820.nr_map = i + 1; + } + } +} +#else +#define clip_compat() ((void)0) +#endif + static void __init clip_mem(void) { int i; @@ -374,6 +408,7 @@ static void __init machine_specific_memo *raw_nr = nr; (void)copy_e820_map(raw, nr); clip_4gb(); + clip_compat(); clip_mem(); } diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/hvm/hpet.c --- a/xen/arch/x86/hvm/hpet.c Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/x86/hvm/hpet.c Wed Jan 17 09:56:40 2007 -0500 @@ -356,8 +356,6 @@ static void hpet_timer_fn(void *opaque) } set_timer(&h->timers[tn], NOW() + hpet_tick_to_ns(h, h->period[tn])); } - - vcpu_kick(h->vcpu); } void hpet_migrate_timers(struct vcpu *v) diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/x86/hvm/hvm.c Wed Jan 17 09:56:40 2007 -0500 @@ -800,7 +800,7 @@ long do_hvm_op(unsigned long op, XEN_GUE d->arch.hvm_domain.buffered_io_va = (unsigned long)p; break; case HVM_PARAM_CALLBACK_IRQ: - hvm_set_callback_gsi(d, a.value); + hvm_set_callback_via(d, a.value); break; } d->arch.hvm_domain.params[a.index] = a.value; diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/hvm/instrlen.c --- a/xen/arch/x86/hvm/instrlen.c Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/x86/hvm/instrlen.c Wed Jan 17 09:56:40 2007 -0500 @@ -201,7 +201,7 @@ static uint8_t twobyte_table[256] = { if ( inst_copy_from_guest(&_x, pc, 1) != 1 ) { \ gdprintk(XENLOG_WARNING, \ "Cannot read from address %lx (eip %lx, mode %d)\n", \ - pc, org_pc, mode); \ + pc, org_pc, address_bytes); \ return -1; \ } \ pc += 1; \ @@ -218,30 +218,20 @@ static uint8_t twobyte_table[256] = { * EXTERNAL this routine calculates the length of the current instruction * pointed to by org_pc. The guest state is _not_ changed by this routine. */ -int hvm_instruction_length(unsigned long org_pc, int mode) +int hvm_instruction_length(unsigned long org_pc, int address_bytes) { uint8_t b, d, twobyte = 0, rex_prefix = 0, modrm_reg = 0; unsigned int op_default, op_bytes, ad_default, ad_bytes, tmp; int length = 0; unsigned long pc = org_pc; - switch ( mode ) - { - case X86EMUL_MODE_REAL: - case X86EMUL_MODE_PROT16: - op_bytes = op_default = ad_bytes = ad_default = 2; - break; - case X86EMUL_MODE_PROT32: - op_bytes = op_default = ad_bytes = ad_default = 4; - break; -#ifdef __x86_64__ - case X86EMUL_MODE_PROT64: + op_bytes = op_default = ad_bytes = ad_default = address_bytes; + if ( op_bytes == 8 ) + { op_bytes = op_default = 4; - ad_bytes = ad_default = 8; - break; +#ifndef __x86_64__ + return -1; #endif - default: - return -1; } /* Legacy prefixes. */ @@ -253,7 +243,7 @@ int hvm_instruction_length(unsigned long op_bytes = op_default ^ 6; /* switch between 2/4 bytes */ break; case 0x67: /* address-size override */ - if ( mode == X86EMUL_MODE_PROT64 ) + if ( ad_default == 8 ) ad_bytes = ad_default ^ 12; /* switch between 4/8 bytes */ else ad_bytes = ad_default ^ 6; /* switch between 2/4 bytes */ @@ -270,7 +260,7 @@ int hvm_instruction_length(unsigned long break; #ifdef __x86_64__ case 0x40 ... 0x4f: - if ( mode == X86EMUL_MODE_PROT64 ) + if ( ad_default == 8 ) { rex_prefix = b; continue; @@ -434,7 +424,7 @@ done: cannot_emulate: gdprintk(XENLOG_WARNING, - "Cannot emulate %02x at address %lx (%lx, mode %d)\n", - b, pc - 1, org_pc, mode); + "Cannot emulate %02x at address %lx (%lx, addr_bytes %d)\n", + b, pc - 1, org_pc, address_bytes); return -1; } diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/hvm/intercept.c --- a/xen/arch/x86/hvm/intercept.c Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/x86/hvm/intercept.c Wed Jan 17 09:56:40 2007 -0500 @@ -182,7 +182,7 @@ int hvm_buffered_io_intercept(ioreq_t *p spin_lock(buffered_io_lock); if ( buffered_iopage->write_pointer - buffered_iopage->read_pointer == - (unsigned long)IOREQ_BUFFER_SLOT_NUM ) { + (unsigned int)IOREQ_BUFFER_SLOT_NUM ) { /* the queue is full. * send the iopacket through the normal path. * NOTE: The arithimetic operation could handle the situation for diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/hvm/irq.c --- a/xen/arch/x86/hvm/irq.c Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/x86/hvm/irq.c Wed Jan 17 09:56:40 2007 -0500 @@ -25,7 +25,7 @@ #include <xen/sched.h> #include <asm/hvm/domain.h> -void hvm_pci_intx_assert( +static void __hvm_pci_intx_assert( struct domain *d, unsigned int device, unsigned int intx) { struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq; @@ -33,10 +33,8 @@ void hvm_pci_intx_assert( ASSERT((device <= 31) && (intx <= 3)); - spin_lock(&hvm_irq->lock); - if ( __test_and_set_bit(device*4 + intx, &hvm_irq->pci_intx) ) - goto out; + return; gsi = hvm_pci_intx_gsi(device, intx); if ( hvm_irq->gsi_assert_count[gsi]++ == 0 ) @@ -50,12 +48,19 @@ void hvm_pci_intx_assert( vioapic_irq_positive_edge(d, isa_irq); vpic_irq_positive_edge(d, isa_irq); } - - out: - spin_unlock(&hvm_irq->lock); -} - -void hvm_pci_intx_deassert( +} + +void hvm_pci_intx_assert( + struct domain *d, unsigned int device, unsigned int intx) +{ + struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq; + + spin_lock(&hvm_irq->lock); + __hvm_pci_intx_assert(d, device, intx); + spin_unlock(&hvm_irq->lock); +} + +static void __hvm_pci_intx_deassert( struct domain *d, unsigned int device, unsigned int intx) { struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq; @@ -63,10 +68,8 @@ void hvm_pci_intx_deassert( ASSERT((device <= 31) && (intx <= 3)); - spin_lock(&hvm_irq->lock); - if ( !__test_and_clear_bit(device*4 + intx, &hvm_irq->pci_intx) ) - goto out; + return; gsi = hvm_pci_intx_gsi(device, intx); --hvm_irq->gsi_assert_count[gsi]; @@ -76,8 +79,15 @@ void hvm_pci_intx_deassert( if ( (--hvm_irq->pci_link_assert_count[link] == 0) && isa_irq && (--hvm_irq->gsi_assert_count[isa_irq] == 0) ) vpic_irq_negative_edge(d, isa_irq); - - out: +} + +void hvm_pci_intx_deassert( + struct domain *d, unsigned int device, unsigned int intx) +{ + struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq; + + spin_lock(&hvm_irq->lock); + __hvm_pci_intx_deassert(d, device, intx); spin_unlock(&hvm_irq->lock); } @@ -123,36 +133,47 @@ void hvm_set_callback_irq_level(void) struct vcpu *v = current; struct domain *d = v->domain; struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq; - unsigned int gsi = hvm_irq->callback_gsi; + unsigned int gsi, pdev, pintx, asserted; /* Fast lock-free tests. */ - if ( (v->vcpu_id != 0) || (gsi == 0) ) + if ( (v->vcpu_id != 0) || + (hvm_irq->callback_via_type == HVMIRQ_callback_none) ) return; spin_lock(&hvm_irq->lock); - gsi = hvm_irq->callback_gsi; - if ( gsi == 0 ) + /* NB. Do not check the evtchn_upcall_mask. It is not used in HVM mode. */ + asserted = !!vcpu_info(v, evtchn_upcall_pending); + if ( hvm_irq->callback_via_asserted == asserted ) goto out; - - if ( local_events_need_delivery() ) - { - if ( !__test_and_set_bit(0, &hvm_irq->callback_irq_wire) && - (hvm_irq->gsi_assert_count[gsi]++ == 0) ) + hvm_irq->callback_via_asserted = asserted; + + /* Callback status has changed. Update the callback via. */ + switch ( hvm_irq->callback_via_type ) + { + case HVMIRQ_callback_gsi: + gsi = hvm_irq->callback_via.gsi; + if ( asserted && (hvm_irq->gsi_assert_count[gsi]++ == 0) ) { vioapic_irq_positive_edge(d, gsi); if ( gsi <= 15 ) vpic_irq_positive_edge(d, gsi); } - } - else - { - if ( __test_and_clear_bit(0, &hvm_irq->callback_irq_wire) && - (--hvm_irq->gsi_assert_count[gsi] == 0) ) + else if ( !asserted && (--hvm_irq->gsi_assert_count[gsi] == 0) ) { if ( gsi <= 15 ) vpic_irq_negative_edge(d, gsi); } + break; + case HVMIRQ_callback_pci_intx: + pdev = hvm_irq->callback_via.pci.dev; + pintx = hvm_irq->callback_via.pci.intx; + if ( asserted ) + __hvm_pci_intx_assert(d, pdev, pintx); + else + __hvm_pci_intx_deassert(d, pdev, pintx); + default: + break; } out: @@ -192,40 +213,79 @@ void hvm_set_pci_link_route(struct domai d->domain_id, link, old_isa_irq, isa_irq); } -void hvm_set_callback_gsi(struct domain *d, unsigned int gsi) -{ - struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq; - unsigned int old_gsi; - - if ( gsi >= ARRAY_SIZE(hvm_irq->gsi_assert_count) ) - gsi = 0; - - spin_lock(&hvm_irq->lock); - - old_gsi = hvm_irq->callback_gsi; - if ( old_gsi == gsi ) - goto out; - hvm_irq->callback_gsi = gsi; - - if ( !test_bit(0, &hvm_irq->callback_irq_wire) ) - goto out; - - if ( old_gsi && (--hvm_irq->gsi_assert_count[old_gsi] == 0) ) - if ( old_gsi <= 15 ) - vpic_irq_negative_edge(d, old_gsi); - - if ( gsi && (hvm_irq->gsi_assert_count[gsi]++ == 0) ) - { - vioapic_irq_positive_edge(d, gsi); - if ( gsi <= 15 ) - vpic_irq_positive_edge(d, gsi); - } - - out: - spin_unlock(&hvm_irq->lock); - - dprintk(XENLOG_G_INFO, "Dom%u callback GSI changed %u -> %u\n", - d->domain_id, old_gsi, gsi); +void hvm_set_callback_via(struct domain *d, uint64_t via) +{ + struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq; + unsigned int gsi=0, pdev=0, pintx=0; + uint8_t via_type; + + via_type = (uint8_t)(via >> 56) + 1; + if ( ((via_type == HVMIRQ_callback_gsi) && (via == 0)) || + (via_type > HVMIRQ_callback_pci_intx) ) + via_type = HVMIRQ_callback_none; + + spin_lock(&hvm_irq->lock); + + /* Tear down old callback via. */ + if ( hvm_irq->callback_via_asserted ) + { + switch ( hvm_irq->callback_via_type ) + { + case HVMIRQ_callback_gsi: + gsi = hvm_irq->callback_via.gsi; + if ( (--hvm_irq->gsi_assert_count[gsi] == 0) && (gsi <= 15) ) + vpic_irq_negative_edge(d, gsi); + break; + case HVMIRQ_callback_pci_intx: + pdev = hvm_irq->callback_via.pci.dev; + pintx = hvm_irq->callback_via.pci.intx; + __hvm_pci_intx_deassert(d, pdev, pintx); + break; + default: + break; + } + } + + /* Set up new callback via. */ + switch ( hvm_irq->callback_via_type = via_type ) + { + case HVMIRQ_callback_gsi: + gsi = hvm_irq->callback_via.gsi = (uint8_t)via; + if ( (gsi == 0) || (gsi >= ARRAY_SIZE(hvm_irq->gsi_assert_count)) ) + hvm_irq->callback_via_type = HVMIRQ_callback_none; + else if ( hvm_irq->callback_via_asserted && + (hvm_irq->gsi_assert_count[gsi]++ == 0) ) + { + vioapic_irq_positive_edge(d, gsi); + if ( gsi <= 15 ) + vpic_irq_positive_edge(d, gsi); + } + break; + case HVMIRQ_callback_pci_intx: + pdev = hvm_irq->callback_via.pci.dev = (uint8_t)(via >> 11) & 31; + pintx = hvm_irq->callback_via.pci.intx = (uint8_t)via & 3; + if ( hvm_irq->callback_via_asserted ) + __hvm_pci_intx_assert(d, pdev, pintx); + break; + default: + break; + } + + spin_unlock(&hvm_irq->lock); + + dprintk(XENLOG_G_INFO, "Dom%u callback via changed to ", d->domain_id); + switch ( via_type ) + { + case HVMIRQ_callback_gsi: + printk("GSI %u\n", gsi); + break; + case HVMIRQ_callback_pci_intx: + printk("PCI INTx Dev 0x%02x Int%c\n", pdev, 'A' + pintx); + break; + default: + printk("None\n"); + break; + } } int cpu_has_pending_irq(struct vcpu *v) diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/hvm/platform.c --- a/xen/arch/x86/hvm/platform.c Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/x86/hvm/platform.c Wed Jan 17 09:56:40 2007 -0500 @@ -352,7 +352,7 @@ static int reg_mem(unsigned char size, u return DECODE_success; } -static int mmio_decode(int mode, unsigned char *opcode, +static int mmio_decode(int address_bytes, unsigned char *opcode, struct hvm_io_op *mmio_op, unsigned char *ad_size, unsigned char *op_size, unsigned char *seg_sel) @@ -368,9 +368,9 @@ static int mmio_decode(int mode, unsigne opcode = check_prefix(opcode, mmio_op, ad_size, op_size, seg_sel, &rex); - switch ( mode ) { - case X86EMUL_MODE_REAL: /* meaning is reversed */ - case X86EMUL_MODE_PROT16: + switch ( address_bytes ) + { + case 2: if ( *op_size == WORD ) *op_size = LONG; else if ( *op_size == LONG ) @@ -384,14 +384,14 @@ static int mmio_decode(int mode, unsigne else if ( *ad_size == 0 ) *ad_size = WORD; break; - case X86EMUL_MODE_PROT32: + case 4: if ( *op_size == 0 ) *op_size = LONG; if ( *ad_size == 0 ) *ad_size = LONG; break; #ifdef __x86_64__ - case X86EMUL_MODE_PROT64: + case 8: if ( *op_size == 0 ) *op_size = rex & 0x8 ? QUAD : LONG; if ( *ad_size == 0 ) @@ -907,7 +907,7 @@ void handle_mmio(unsigned long gpa) struct hvm_io_op *mmio_op; struct cpu_user_regs *regs; unsigned char inst[MAX_INST_LEN], ad_size, op_size, seg_sel; - int i, mode, df, inst_len; + int i, address_bytes, df, inst_len; struct vcpu *v = current; mmio_op = &v->arch.hvm_vcpu.io_op; @@ -919,9 +919,9 @@ void handle_mmio(unsigned long gpa) df = regs->eflags & X86_EFLAGS_DF ? 1 : 0; - mode = hvm_guest_x86_mode(v); + address_bytes = hvm_guest_x86_mode(v); inst_addr = hvm_get_segment_base(v, x86_seg_cs) + regs->eip; - inst_len = hvm_instruction_length(inst_addr, mode); + inst_len = hvm_instruction_length(inst_addr, address_bytes); if ( inst_len <= 0 ) { printk("handle_mmio: failed to get instruction length\n"); @@ -934,8 +934,8 @@ void handle_mmio(unsigned long gpa) domain_crash_synchronous(); } - if ( mmio_decode(mode, inst, mmio_op, &ad_size, &op_size, &seg_sel) - == DECODE_failure ) { + if ( mmio_decode(address_bytes, inst, mmio_op, &ad_size, + &op_size, &seg_sel) == DECODE_failure ) { printk("handle_mmio: failed to decode instruction\n"); printk("mmio opcode: gpa 0x%lx, len %d:", gpa, inst_len); for ( i = 0; i < inst_len; i++ ) diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/x86/hvm/svm/svm.c Wed Jan 17 09:56:40 2007 -0500 @@ -482,15 +482,13 @@ static int svm_guest_x86_mode(struct vcp { struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - if ( vmcb->efer & EFER_LMA ) - return (vmcb->cs.attr.fields.l ? - X86EMUL_MODE_PROT64 : X86EMUL_MODE_PROT32); + if ( (vmcb->efer & EFER_LMA) && vmcb->cs.attr.fields.l ) + return 8; if ( svm_realmode(v) ) - return X86EMUL_MODE_REAL; - - return (vmcb->cs.attr.fields.db ? - X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16); + return 2; + + return (vmcb->cs.attr.fields.db ? 4 : 2); } void svm_update_host_cr3(struct vcpu *v) diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/hvm/vioapic.c --- a/xen/arch/x86/hvm/vioapic.c Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/x86/hvm/vioapic.c Wed Jan 17 09:56:40 2007 -0500 @@ -309,6 +309,13 @@ static uint32_t ioapic_get_delivery_bitm return mask; } +static inline int pit_channel0_enabled(void) +{ + PITState *pit = ¤t->domain->arch.hvm_domain.pl_time.vpit; + struct periodic_time *pt = &pit->channels[0].pt; + return pt->enabled; +} + static void vioapic_deliver(struct vioapic *vioapic, int irq) { uint16_t dest = vioapic->redirtbl[irq].fields.dest_id; @@ -341,7 +348,7 @@ static void vioapic_deliver(struct vioap { #ifdef IRQ0_SPECIAL_ROUTING /* Force round-robin to pick VCPU 0 */ - if ( irq == hvm_isa_irq_to_gsi(0) ) + if ( (irq == hvm_isa_irq_to_gsi(0)) && pit_channel0_enabled() ) { v = vioapic_domain(vioapic)->vcpu[0]; target = v ? vcpu_vlapic(v) : NULL; @@ -374,7 +381,7 @@ static void vioapic_deliver(struct vioap deliver_bitmask &= ~(1 << bit); #ifdef IRQ0_SPECIAL_ROUTING /* Do not deliver timer interrupts to VCPU != 0 */ - if ( irq == hvm_isa_irq_to_gsi(0) ) + if ( (irq == hvm_isa_irq_to_gsi(0)) && pit_channel0_enabled() ) v = vioapic_domain(vioapic)->vcpu[0]; else #endif diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/hvm/vmx/vmcs.c --- a/xen/arch/x86/hvm/vmx/vmcs.c Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Wed Jan 17 09:56:40 2007 -0500 @@ -278,7 +278,14 @@ static void vmx_set_host_env(struct vcpu host_env.tr_base = (unsigned long) &init_tss[cpu]; __vmwrite(HOST_TR_SELECTOR, host_env.tr_selector); __vmwrite(HOST_TR_BASE, host_env.tr_base); - __vmwrite(HOST_RSP, (unsigned long)get_stack_bottom()); + + /* + * Skip end of cpu_user_regs when entering the hypervisor because the + * CPU does not save context onto the stack. SS,RSP,CS,RIP,RFLAGS,etc + * all get saved into the VMCS instead. + */ + __vmwrite(HOST_RSP, + (unsigned long)&get_cpu_info()->guest_cpu_user_regs.error_code); } static void construct_vmcs(struct vcpu *v) diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/x86/hvm/vmx/vmx.c Wed Jan 17 09:56:40 2007 -0500 @@ -410,10 +410,6 @@ static void vmx_store_cpu_guest_regs( regs->eflags = __vmread(GUEST_RFLAGS); regs->ss = __vmread(GUEST_SS_SELECTOR); regs->cs = __vmread(GUEST_CS_SELECTOR); - regs->ds = __vmread(GUEST_DS_SELECTOR); - regs->es = __vmread(GUEST_ES_SELECTOR); - regs->gs = __vmread(GUEST_GS_SELECTOR); - regs->fs = __vmread(GUEST_FS_SELECTOR); regs->eip = __vmread(GUEST_RIP); regs->esp = __vmread(GUEST_RSP); } @@ -429,62 +425,39 @@ static void vmx_store_cpu_guest_regs( vmx_vmcs_exit(v); } -/* - * The VMX spec (section 4.3.1.2, Checks on Guest Segment - * Registers) says that virtual-8086 mode guests' segment - * base-address fields in the VMCS must be equal to their - * corresponding segment selector field shifted right by - * four bits upon vmentry. - * - * This function (called only for VM86-mode guests) fixes - * the bases to be consistent with the selectors in regs - * if they're not already. Without this, we can fail the - * vmentry check mentioned above. - */ -static void fixup_vm86_seg_bases(struct cpu_user_regs *regs) +static void vmx_load_cpu_guest_regs(struct vcpu *v, struct cpu_user_regs *regs) { unsigned long base; - base = __vmread(GUEST_ES_BASE); - if (regs->es << 4 != base) - __vmwrite(GUEST_ES_BASE, regs->es << 4); - base = __vmread(GUEST_CS_BASE); - if (regs->cs << 4 != base) - __vmwrite(GUEST_CS_BASE, regs->cs << 4); - base = __vmread(GUEST_SS_BASE); - if (regs->ss << 4 != base) - __vmwrite(GUEST_SS_BASE, regs->ss << 4); - base = __vmread(GUEST_DS_BASE); - if (regs->ds << 4 != base) - __vmwrite(GUEST_DS_BASE, regs->ds << 4); - base = __vmread(GUEST_FS_BASE); - if (regs->fs << 4 != base) - __vmwrite(GUEST_FS_BASE, regs->fs << 4); - base = __vmread(GUEST_GS_BASE); - if (regs->gs << 4 != base) - __vmwrite(GUEST_GS_BASE, regs->gs << 4); -} - -static void vmx_load_cpu_guest_regs(struct vcpu *v, struct cpu_user_regs *regs) -{ vmx_vmcs_enter(v); __vmwrite(GUEST_SS_SELECTOR, regs->ss); - __vmwrite(GUEST_DS_SELECTOR, regs->ds); - __vmwrite(GUEST_ES_SELECTOR, regs->es); - __vmwrite(GUEST_GS_SELECTOR, regs->gs); - __vmwrite(GUEST_FS_SELECTOR, regs->fs); - __vmwrite(GUEST_RSP, regs->esp); /* NB. Bit 1 of RFLAGS must be set for VMENTRY to succeed. */ __vmwrite(GUEST_RFLAGS, regs->eflags | 2UL); - if (regs->eflags & EF_TF) + + if ( regs->eflags & EF_TF ) __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB); else __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB); - if (regs->eflags & EF_VM) - fixup_vm86_seg_bases(regs); + + if ( regs->eflags & EF_VM ) + { + /* + * The VMX spec (section 4.3.1.2, Checks on Guest Segment + * Registers) says that virtual-8086 mode guests' segment + * base-address fields in the VMCS must be equal to their + * corresponding segment selector field shifted right by + * four bits upon vmentry. + */ + base = __vmread(GUEST_CS_BASE); + if ( (regs->cs << 4) != base ) + __vmwrite(GUEST_CS_BASE, regs->cs << 4); + base = __vmread(GUEST_SS_BASE); + if ( (regs->ss << 4) != base ) + __vmwrite(GUEST_SS_BASE, regs->ss << 4); + } __vmwrite(GUEST_CS_SELECTOR, regs->cs); __vmwrite(GUEST_RIP, regs->eip); @@ -518,8 +491,7 @@ static unsigned long vmx_get_segment_bas ASSERT(v == current); #ifdef __x86_64__ - if ( vmx_long_mode_enabled(v) && - (__vmread(GUEST_CS_AR_BYTES) & (1u<<13)) ) + if ( vmx_long_mode_enabled(v) && (__vmread(GUEST_CS_AR_BYTES) & (1u<<13)) ) long_mode = 1; #endif @@ -694,15 +666,13 @@ static int vmx_guest_x86_mode(struct vcp cs_ar_bytes = __vmread(GUEST_CS_AR_BYTES); - if ( vmx_long_mode_enabled(v) ) - return ((cs_ar_bytes & (1u<<13)) ? - X86EMUL_MODE_PROT64 : X86EMUL_MODE_PROT32); + if ( vmx_long_mode_enabled(v) && (cs_ar_bytes & (1u<<13)) ) + return 8; if ( vmx_realmode(v) ) - return X86EMUL_MODE_REAL; - - return ((cs_ar_bytes & (1u<<14)) ? - X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16); + return 2; + + return ((cs_ar_bytes & (1u<<14)) ? 4 : 2); } static int vmx_pae_enabled(struct vcpu *v) @@ -2253,47 +2223,54 @@ static void vmx_reflect_exception(struct } } +static void vmx_failed_vmentry(unsigned int exit_reason) +{ + unsigned int failed_vmentry_reason = (uint16_t)exit_reason; + unsigned long exit_qualification; + + exit_qualification = __vmread(EXIT_QUALIFICATION); + printk("Failed vm entry (exit reason 0x%x) ", exit_reason); + switch ( failed_vmentry_reason ) + { + case EXIT_REASON_INVALID_GUEST_STATE: + printk("caused by invalid guest state (%ld).\n", exit_qualification); + break; + case EXIT_REASON_MSR_LOADING: + printk("caused by MSR entry %ld loading.\n", exit_qualification); + break; + case EXIT_REASON_MACHINE_CHECK: + printk("caused by machine check.\n"); + break; + default: + printk("reason not known yet!"); + break; + } + + printk("************* VMCS Area **************\n"); + vmcs_dump_vcpu(); + printk("**************************************\n"); + + domain_crash(current->domain); +} + asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs) { unsigned int exit_reason; unsigned long exit_qualification, inst_len = 0; struct vcpu *v = current; + TRACE_3D(TRC_VMX_VMEXIT + v->vcpu_id, 0, 0, 0); + exit_reason = __vmread(VM_EXIT_REASON); perfc_incra(vmexits, exit_reason); + TRACE_VMEXIT(0, exit_reason); if ( exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT ) local_irq_enable(); if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) ) - { - unsigned int failed_vmentry_reason = exit_reason & 0xFFFF; - - exit_qualification = __vmread(EXIT_QUALIFICATION); - printk("Failed vm entry (exit reason 0x%x) ", exit_reason); - switch ( failed_vmentry_reason ) { - case EXIT_REASON_INVALID_GUEST_STATE: - printk("caused by invalid guest state (%ld).\n", exit_qualification); - break; - case EXIT_REASON_MSR_LOADING: - printk("caused by MSR entry %ld loading.\n", exit_qualification); - break; - case EXIT_REASON_MACHINE_CHECK: - printk("caused by machine check.\n"); - break; - default: - printk("reason not known yet!"); - break; - } - - printk("************* VMCS Area **************\n"); - vmcs_dump_vcpu(); - printk("**************************************\n"); - goto exit_and_crash; - } - - TRACE_VMEXIT(0, exit_reason); + return vmx_failed_vmentry(exit_reason); switch ( exit_reason ) { @@ -2521,11 +2498,6 @@ asmlinkage void vmx_trace_vmentry(void) TRACE_VMEXIT(4, 0); } -asmlinkage void vmx_trace_vmexit (void) -{ - TRACE_3D(TRC_VMX_VMEXIT + current->vcpu_id, 0, 0, 0); -} - /* * Local variables: * mode: C diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/hvm/vmx/x86_32/exits.S --- a/xen/arch/x86/hvm/vmx/x86_32/exits.S Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/x86/hvm/vmx/x86_32/exits.S Wed Jan 17 09:56:40 2007 -0500 @@ -29,35 +29,7 @@ andl $~3,reg; \ movl (reg),reg; -/* - * At VMExit time the processor saves the guest selectors, esp, eip, - * and eflags. Therefore we don't save them, but simply decrement - * the kernel stack pointer to make it consistent with the stack frame - * at usual interruption time. The eflags of the host is not saved by VMX, - * and we set it to the fixed value. - * - * We also need the room, especially because orig_eax field is used - * by do_IRQ(). Compared the cpu_user_regs, we skip pushing for the following: - * (10) u32 gs; - * (9) u32 fs; - * (8) u32 ds; - * (7) u32 es; - * <- get_stack_bottom() (= HOST_ESP) - * (6) u32 ss; - * (5) u32 esp; - * (4) u32 eflags; - * (3) u32 cs; - * (2) u32 eip; - * (2/1) u16 entry_vector; - * (1/1) u16 error_code; - * However, get_stack_bottom() actually returns 20 bytes before the real - * bottom of the stack to allow space for: - * domain pointer, DS, ES, FS, GS. Therefore, we effectively skip 6 registers. - */ - -#define NR_SKIPPED_REGS 6 /* See the above explanation */ #define HVM_SAVE_ALL_NOSEGREGS \ - subl $(NR_SKIPPED_REGS*4), %esp; \ movl $0, 0xc(%esp); /* XXX why do we need to force eflags==0 ?? */ \ pushl %eax; \ pushl %ebp; \ @@ -74,14 +46,11 @@ popl %esi; \ popl %edi; \ popl %ebp; \ - popl %eax; \ - addl $(NR_SKIPPED_REGS*4), %esp + popl %eax ALIGN ENTRY(vmx_asm_vmexit_handler) - /* selectors are restored/saved by VMX */ HVM_SAVE_ALL_NOSEGREGS - call vmx_trace_vmexit movl %esp,%eax push %eax call vmx_vmexit_handler diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/hvm/vmx/x86_64/exits.S --- a/xen/arch/x86/hvm/vmx/x86_64/exits.S Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/x86/hvm/vmx/x86_64/exits.S Wed Jan 17 09:56:40 2007 -0500 @@ -29,31 +29,7 @@ andq $~7,reg; \ movq (reg),reg; -/* - * At VMExit time the processor saves the guest selectors, rsp, rip, - * and rflags. Therefore we don't save them, but simply decrement - * the kernel stack pointer to make it consistent with the stack frame - * at usual interruption time. The rflags of the host is not saved by VMX, - * and we set it to the fixed value. - * - * We also need the room, especially because orig_eax field is used - * by do_IRQ(). Compared the cpu_user_regs, we skip pushing for the following: - * (10) u64 gs; - * (9) u64 fs; - * (8) u64 ds; - * (7) u64 es; - * <- get_stack_bottom() (= HOST_ESP) - * (6) u64 ss; - * (5) u64 rsp; - * (4) u64 rflags; - * (3) u64 cs; - * (2) u64 rip; - * (2/1) u32 entry_vector; - * (1/1) u32 error_code; - */ -#define NR_SKIPPED_REGS 6 /* See the above explanation */ #define HVM_SAVE_ALL_NOSEGREGS \ - subq $(NR_SKIPPED_REGS*8), %rsp; \ pushq %rdi; \ pushq %rsi; \ pushq %rdx; \ @@ -85,14 +61,11 @@ popq %rcx; \ popq %rdx; \ popq %rsi; \ - popq %rdi; \ - addq $(NR_SKIPPED_REGS*8), %rsp; + popq %rdi ALIGN ENTRY(vmx_asm_vmexit_handler) - /* selectors are restored/saved by VMX */ HVM_SAVE_ALL_NOSEGREGS - call vmx_trace_vmexit movq %rsp,%rdi call vmx_vmexit_handler jmp vmx_asm_do_vmentry diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/irq.c --- a/xen/arch/x86/irq.c Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/x86/irq.c Wed Jan 17 09:56:40 2007 -0500 @@ -13,6 +13,7 @@ #include <xen/perfc.h> #include <xen/sched.h> #include <xen/keyhandler.h> +#include <xen/compat.h> #include <asm/current.h> #include <asm/smpboot.h> @@ -332,7 +333,7 @@ int pirq_guest_unmask(struct domain *d) irq < NR_IRQS; irq = find_next_bit(d->pirq_mask, NR_IRQS, irq+1) ) { - if ( !test_bit(d->pirq_to_evtchn[irq], s->evtchn_mask) ) + if ( !test_bit(d->pirq_to_evtchn[irq], __shared_info_addr(d, s, evtchn_mask)) ) __pirq_guest_eoi(d, irq); } @@ -624,14 +625,13 @@ static void dump_irqs(unsigned char key) printk("%u(%c%c%c%c)", d->domain_id, (test_bit(d->pirq_to_evtchn[irq], - d->shared_info->evtchn_pending) ? + shared_info_addr(d, evtchn_pending)) ? 'P' : '-'), - (test_bit(d->pirq_to_evtchn[irq]/BITS_PER_LONG, - &d->shared_info->vcpu_info[0]. - evtchn_pending_sel) ? + (test_bit(d->pirq_to_evtchn[irq]/BITS_PER_GUEST_LONG(d), + vcpu_info_addr(d->vcpu[0], evtchn_pending_sel)) ? 'S' : '-'), (test_bit(d->pirq_to_evtchn[irq], - d->shared_info->evtchn_mask) ? + shared_info_addr(d, evtchn_mask)) ? 'M' : '-'), (test_bit(irq, d->pirq_mask) ? 'M' : '-')); diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/x86/mm.c Wed Jan 17 09:56:40 2007 -0500 @@ -106,6 +106,7 @@ #include <asm/ldt.h> #include <asm/x86_emulate.h> #include <asm/e820.h> +#include <asm/hypercall.h> #include <public/memory.h> #define MEM_LOG(_f, _a...) gdprintk(XENLOG_WARNING , _f "\n" , ## _a) @@ -118,20 +119,6 @@ #if !defined(NDEBUG) || defined(CONFIG_X86_PAE) #define PTE_UPDATE_WITH_CMPXCHG #endif - -/* - * Both do_mmuext_op() and do_mmu_update(): - * We steal the m.s.b. of the @count parameter to indicate whether this - * invocation of do_mmu_update() is resuming a previously preempted call. - */ -#define MMU_UPDATE_PREEMPTED (~(~0U>>1)) - -static void free_l2_table(struct page_info *page); -static void free_l1_table(struct page_info *page); - -static int mod_l2_entry(l2_pgentry_t *, l2_pgentry_t, unsigned long, - unsigned long type); -static int mod_l1_entry(l1_pgentry_t *, l1_pgentry_t, unsigned long gl1mfn); /* Used to defer flushing of memory structures. */ struct percpu_mm_info { @@ -157,6 +144,15 @@ struct page_info *frame_table; struct page_info *frame_table; unsigned long max_page; unsigned long total_pages; + +#ifdef CONFIG_COMPAT +l2_pgentry_t *compat_idle_pg_table_l2 = NULL; +#define l3_disallow_mask(d) (!IS_COMPAT(d) ? \ + L3_DISALLOW_MASK : \ + COMPAT_L3_DISALLOW_MASK) +#else +#define l3_disallow_mask(d) L3_DISALLOW_MASK +#endif void __init init_frametable(void) { @@ -433,7 +429,7 @@ static int alloc_segdesc_page(struct pag descs = map_domain_page(page_to_mfn(page)); for ( i = 0; i < 512; i++ ) - if ( unlikely(!check_descriptor(&descs[i])) ) + if ( unlikely(!check_descriptor(page_get_owner(page), &descs[i])) ) goto fail; unmap_domain_page(descs); @@ -661,9 +657,9 @@ get_page_from_l3e( if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) return 1; - if ( unlikely((l3e_get_flags(l3e) & L3_DISALLOW_MASK)) ) - { - MEM_LOG("Bad L3 flags %x", l3e_get_flags(l3e) & L3_DISALLOW_MASK); + if ( unlikely((l3e_get_flags(l3e) & l3_disallow_mask(d))) ) + { + MEM_LOG("Bad L3 flags %x", l3e_get_flags(l3e) & l3_disallow_mask(d)); return 0; } @@ -700,9 +696,10 @@ get_page_from_l4e( #ifdef __x86_64__ #ifdef USER_MAPPINGS_ARE_GLOBAL -#define adjust_guest_l1e(pl1e) \ +#define adjust_guest_l1e(pl1e, d) \ do { \ - if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) ) \ + if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) && \ + likely(!IS_COMPAT(d)) ) \ { \ /* _PAGE_GUEST_KERNEL page cannot have the Global bit set. */ \ if ( (l1e_get_flags((pl1e)) & (_PAGE_GUEST_KERNEL|_PAGE_GLOBAL)) \ @@ -716,37 +713,53 @@ get_page_from_l4e( } \ } while ( 0 ) #else -#define adjust_guest_l1e(pl1e) \ +#define adjust_guest_l1e(pl1e, d) \ do { \ - if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) ) \ + if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) && \ + likely(!IS_COMPAT(d)) ) \ l1e_add_flags((pl1e), _PAGE_USER); \ } while ( 0 ) #endif -#define adjust_guest_l2e(pl2e) \ +#define adjust_guest_l2e(pl2e, d) \ do { \ - if ( likely(l2e_get_flags((pl2e)) & _PAGE_PRESENT) ) \ + if ( likely(l2e_get_flags((pl2e)) & _PAGE_PRESENT) && \ + likely(!IS_COMPAT(d)) ) \ l2e_add_flags((pl2e), _PAGE_USER); \ } while ( 0 ) -#define adjust_guest_l3e(pl3e) \ +#define adjust_guest_l3e(pl3e, d) \ do { \ if ( likely(l3e_get_flags((pl3e)) & _PAGE_PRESENT) ) \ - l3e_add_flags((pl3e), _PAGE_USER); \ + l3e_add_flags((pl3e), likely(!IS_COMPAT(d)) ? \ + _PAGE_USER : \ + _PAGE_USER|_PAGE_RW); \ } while ( 0 ) -#define adjust_guest_l4e(pl4e) \ +#define adjust_guest_l4e(pl4e, d) \ do { \ - if ( likely(l4e_get_flags((pl4e)) & _PAGE_PRESENT) ) \ + if ( likely(l4e_get_flags((pl4e)) & _PAGE_PRESENT) && \ + likely(!IS_COMPAT(d)) ) \ l4e_add_flags((pl4e), _PAGE_USER); \ } while ( 0 ) #else /* !defined(__x86_64__) */ -#define adjust_guest_l1e(_p) ((void)0) -#define adjust_guest_l2e(_p) ((void)0) -#define adjust_guest_l3e(_p) ((void)0) - +#define adjust_guest_l1e(_p, _d) ((void)(_d)) +#define adjust_guest_l2e(_p, _d) ((void)(_d)) +#define adjust_guest_l3e(_p, _d) ((void)(_d)) + +#endif + +#ifdef CONFIG_COMPAT +#define unadjust_guest_l3e(pl3e, d) \ + do { \ + if ( unlikely(IS_COMPAT(d)) && \ + likely(l3e_get_flags((pl3e)) & _PAGE_PRESENT) ) \ + l3e_remove_flags((pl3e), _PAGE_USER|_PAGE_RW|_PAGE_ACCESSED); \ + } while ( 0 ) +#else +#define unadjust_guest_l3e(_p, _d) ((void)(_d)) #endif void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d) @@ -813,7 +826,7 @@ static void put_page_from_l2e(l2_pgentry { if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) && (l2e_get_pfn(l2e) != pfn) ) - put_page_and_type(mfn_to_page(l2e_get_pfn(l2e))); + put_page_and_type(l2e_get_page(l2e)); } @@ -822,7 +835,7 @@ static void put_page_from_l3e(l3_pgentry { if ( (l3e_get_flags(l3e) & _PAGE_PRESENT) && (l3e_get_pfn(l3e) != pfn) ) - put_page_and_type(mfn_to_page(l3e_get_pfn(l3e))); + put_page_and_type(l3e_get_page(l3e)); } #endif @@ -831,7 +844,7 @@ static void put_page_from_l4e(l4_pgentry { if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) && (l4e_get_pfn(l4e) != pfn) ) - put_page_and_type(mfn_to_page(l4e_get_pfn(l4e))); + put_page_and_type(l4e_get_page(l4e)); } #endif @@ -850,7 +863,7 @@ static int alloc_l1_table(struct page_in unlikely(!get_page_from_l1e(pl1e[i], d)) ) goto fail; - adjust_guest_l1e(pl1e[i]); + adjust_guest_l1e(pl1e[i], d); } unmap_domain_page(pl1e); @@ -866,13 +879,20 @@ static int alloc_l1_table(struct page_in return 0; } -#ifdef CONFIG_X86_PAE -static int create_pae_xen_mappings(l3_pgentry_t *pl3e) +#if defined(CONFIG_X86_PAE) || defined(CONFIG_COMPAT) +static int create_pae_xen_mappings(struct domain *d, l3_pgentry_t *pl3e) { struct page_info *page; - l2_pgentry_t *pl2e, l2e; + l2_pgentry_t *pl2e; l3_pgentry_t l3e3; +#ifndef CONFIG_COMPAT + l2_pgentry_t l2e; int i; +#else + + if ( !IS_COMPAT(d) ) + return 1; +#endif pl3e = (l3_pgentry_t *)((unsigned long)pl3e & PAGE_MASK); @@ -905,6 +925,7 @@ static int create_pae_xen_mappings(l3_pg /* Xen private mappings. */ pl2e = map_domain_page(l3e_get_pfn(l3e3)); +#ifndef CONFIG_COMPAT memcpy(&pl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)], &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT], L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t)); @@ -922,11 +943,20 @@ static int create_pae_xen_mappings(l3_pg l2e = l2e_from_pfn(l3e_get_pfn(pl3e[i]), __PAGE_HYPERVISOR); l2e_write(&pl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i], l2e); } +#else + memcpy(&pl2e[COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d)], + &compat_idle_pg_table_l2[l2_table_offset(HIRO_COMPAT_MPT_VIRT_START)], + COMPAT_L2_PAGETABLE_XEN_SLOTS(d) * sizeof(*pl2e)); +#endif unmap_domain_page(pl2e); return 1; } - +#else +# define create_pae_xen_mappings(d, pl3e) (1) +#endif + +#ifdef CONFIG_X86_PAE /* Flush a pgdir update into low-memory caches. */ static void pae_flush_pgd( unsigned long mfn, unsigned int idx, l3_pgentry_t nl3e) @@ -961,12 +991,8 @@ static void pae_flush_pgd( flush_tlb_mask(d->domain_dirty_cpumask); } - -#elif CONFIG_X86_64 -# define create_pae_xen_mappings(pl3e) (1) +#else # define pae_flush_pgd(mfn, idx, nl3e) ((void)0) -#else -# define create_pae_xen_mappings(pl3e) (1) #endif static int alloc_l2_table(struct page_info *page, unsigned long type) @@ -980,11 +1006,11 @@ static int alloc_l2_table(struct page_in for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) { - if ( is_guest_l2_slot(type, i) && + if ( is_guest_l2_slot(d, type, i) && unlikely(!get_page_from_l2e(pl2e[i], pfn, d)) ) goto fail; - adjust_guest_l2e(pl2e[i]); + adjust_guest_l2e(pl2e[i], d); } #if CONFIG_PAGING_LEVELS == 2 @@ -1007,7 +1033,7 @@ static int alloc_l2_table(struct page_in fail: MEM_LOG("Failure in alloc_l2_table: entry %d", i); while ( i-- > 0 ) - if ( is_guest_l2_slot(type, i) ) + if ( is_guest_l2_slot(d, type, i) ) put_page_from_l2e(pl2e[i], pfn); unmap_domain_page(pl2e); @@ -1039,13 +1065,24 @@ static int alloc_l3_table(struct page_in #endif pl3e = map_domain_page(pfn); + + /* + * PAE guests allocate full pages, but aren't required to initialize + * more than the first four entries; when running in compatibility + * mode, however, the full page is visible to the MMU, and hence all + * 512 entries must be valid/verified, which is most easily achieved + * by clearing them out. + */ + if ( IS_COMPAT(d) ) + memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e)); + for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ ) { -#ifdef CONFIG_X86_PAE - if ( i == 3 ) +#if defined(CONFIG_X86_PAE) || defined(CONFIG_COMPAT) + if ( (CONFIG_PAGING_LEVELS < 4 || IS_COMPAT(d)) && i == 3 ) { if ( !(l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) || - (l3e_get_flags(pl3e[i]) & L3_DISALLOW_MASK) || + (l3e_get_flags(pl3e[i]) & l3_disallow_mask(d)) || !get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]), PGT_l2_page_table | PGT_pae_xen_l2, @@ -1058,10 +1095,10 @@ static int alloc_l3_table(struct page_in unlikely(!get_page_from_l3e(pl3e[i], pfn, d)) ) goto fail; - adjust_guest_l3e(pl3e[i]); - } - - if ( !create_pae_xen_mappings(pl3e) ) + adjust_guest_l3e(pl3e[i], d); + } + + if ( !create_pae_xen_mappings(d, pl3e) ) goto fail; unmap_domain_page(pl3e); @@ -1094,7 +1131,7 @@ static int alloc_l4_table(struct page_in unlikely(!get_page_from_l4e(pl4e[i], pfn, d)) ) goto fail; - adjust_guest_l4e(pl4e[i]); + adjust_guest_l4e(pl4e[i], d); } /* Xen private mappings. */ @@ -1104,9 +1141,12 @@ static int alloc_l4_table(struct page_in pl4e[l4_table_offset(LINEAR_PT_VIRT_START)] = l4e_from_pfn(pfn, __PAGE_HYPERVISOR); pl4e[l4_table_offset(PERDOMAIN_VIRT_START)] = - l4e_from_page( - virt_to_page(page_get_owner(page)->arch.mm_perdomain_l3), - __PAGE_HYPERVISOR); + l4e_from_page(virt_to_page(d->arch.mm_perdomain_l3), + __PAGE_HYPERVISOR); + if ( IS_COMPAT(d) ) + pl4e[l4_table_offset(COMPAT_ARG_XLAT_VIRT_BASE)] = + l4e_from_page(virt_to_page(d->arch.mm_arg_xlat_l3), + __PAGE_HYPERVISOR); return 1; @@ -1142,6 +1182,9 @@ static void free_l1_table(struct page_in static void free_l2_table(struct page_info *page) { +#ifdef CONFIG_COMPAT + struct domain *d = page_get_owner(page); +#endif unsigned long pfn = page_to_mfn(page); l2_pgentry_t *pl2e; int i; @@ -1149,7 +1192,7 @@ static void free_l2_table(struct page_in pl2e = map_domain_page(pfn); for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) - if ( is_guest_l2_slot(page->u.inuse.type_info, i) ) + if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) ) put_page_from_l2e(pl2e[i], pfn); unmap_domain_page(pl2e); @@ -1162,6 +1205,7 @@ static void free_l2_table(struct page_in static void free_l3_table(struct page_info *page) { + struct domain *d = page_get_owner(page); unsigned long pfn = page_to_mfn(page); l3_pgentry_t *pl3e; int i; @@ -1170,7 +1214,10 @@ static void free_l3_table(struct page_in for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ ) if ( is_guest_l3_slot(i) ) + { put_page_from_l3e(pl3e[i], pfn); + unadjust_guest_l3e(pl3e[i], d); + } unmap_domain_page(pl3e); } @@ -1270,7 +1317,7 @@ static int mod_l1_entry(l1_pgentry_t *pl return 0; } - adjust_guest_l1e(nl1e); + adjust_guest_l1e(nl1e, d); /* Fast path for identical mapping, r/w and presence. */ if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) ) @@ -1303,8 +1350,9 @@ static int mod_l2_entry(l2_pgentry_t *pl unsigned long type) { l2_pgentry_t ol2e; - - if ( unlikely(!is_guest_l2_slot(type,pgentry_ptr_to_slot(pl2e))) ) + struct domain *d = current->domain; + + if ( unlikely(!is_guest_l2_slot(d, type, pgentry_ptr_to_slot(pl2e))) ) { MEM_LOG("Illegal L2 update attempt in Xen-private area %p", pl2e); return 0; @@ -1322,13 +1370,13 @@ static int mod_l2_entry(l2_pgentry_t *pl return 0; } - adjust_guest_l2e(nl2e); + adjust_guest_l2e(nl2e, d); /* Fast path for identical mapping and presence. */ if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT)) return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current); - if ( unlikely(!get_page_from_l2e(nl2e, pfn, current->domain)) ) + if ( unlikely(!get_page_from_l2e(nl2e, pfn, d)) ) return 0; if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current)) ) @@ -1354,6 +1402,7 @@ static int mod_l3_entry(l3_pgentry_t *pl unsigned long pfn) { l3_pgentry_t ol3e; + struct domain *d = current->domain; int okay; if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) ) @@ -1362,12 +1411,13 @@ static int mod_l3_entry(l3_pgentry_t *pl return 0; } -#ifdef CONFIG_X86_PAE +#if defined(CONFIG_X86_PAE) || defined(CONFIG_COMPAT) /* * Disallow updates to final L3 slot. It contains Xen mappings, and it * would be a pain to ensure they remain continuously valid throughout. */ - if ( pgentry_ptr_to_slot(pl3e) >= 3 ) + if ( (CONFIG_PAGING_LEVELS < 4 || IS_COMPAT(d)) && + pgentry_ptr_to_slot(pl3e) >= 3 ) return 0; #endif @@ -1376,20 +1426,20 @@ static int mod_l3_entry(l3_pgentry_t *pl if ( l3e_get_flags(nl3e) & _PAGE_PRESENT ) { - if ( unlikely(l3e_get_flags(nl3e) & L3_DISALLOW_MASK) ) + if ( unlikely(l3e_get_flags(nl3e) & l3_disallow_mask(d)) ) { MEM_LOG("Bad L3 flags %x", - l3e_get_flags(nl3e) & L3_DISALLOW_MASK); + l3e_get_flags(nl3e) & l3_disallow_mask(d)); return 0; } - adjust_guest_l3e(nl3e); + adjust_guest_l3e(nl3e, d); /* Fast path for identical mapping and presence. */ if (!l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT)) return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current); - if ( unlikely(!get_page_from_l3e(nl3e, pfn, current->domain)) ) + if ( unlikely(!get_page_from_l3e(nl3e, pfn, d)) ) return 0; if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current)) ) @@ -1403,7 +1453,7 @@ static int mod_l3_entry(l3_pgentry_t *pl return 0; } - okay = create_pae_xen_mappings(pl3e); + okay = create_pae_xen_mappings(d, pl3e); BUG_ON(!okay); pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e); @@ -1441,7 +1491,7 @@ static int mod_l4_entry(l4_pgentry_t *pl return 0; } - adjust_guest_l4e(nl4e); + adjust_guest_l4e(nl4e, current->domain); /* Fast path for identical mapping and presence. */ if (!l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT)) @@ -1712,6 +1762,33 @@ int new_guest_cr3(unsigned long mfn) if ( is_hvm_domain(d) && !hvm_paging_enabled(v) ) return 0; +#ifdef CONFIG_COMPAT + if ( IS_COMPAT(d) ) + { + l4_pgentry_t l4e = l4e_from_pfn(mfn, _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED); + + if ( shadow_mode_refcounts(d) ) + { + okay = get_page_from_pagenr(mfn, d); + old_base_mfn = l4e_get_pfn(l4e); + if ( okay && old_base_mfn ) + put_page(mfn_to_page(old_base_mfn)); + } + else + okay = mod_l4_entry(__va(pagetable_get_paddr(v->arch.guest_table)), + l4e, 0); + if ( unlikely(!okay) ) + { + MEM_LOG("Error while installing new compat baseptr %lx", mfn); + return 0; + } + + invalidate_shadow_ldt(v); + write_ptbase(v); + + return 1; + } +#endif if ( shadow_mode_refcounts(d) ) { okay = get_page_from_pagenr(mfn, d); @@ -1950,6 +2027,8 @@ int do_mmuext_op( goto pin_page; case MMUEXT_PIN_L4_TABLE: + if ( IS_COMPAT(FOREIGNDOM) ) + break; type = PGT_l4_page_table; pin_page: @@ -2013,7 +2092,11 @@ int do_mmuext_op( #ifdef __x86_64__ case MMUEXT_NEW_USER_BASEPTR: - okay = 1; + if ( IS_COMPAT(FOREIGNDOM) ) + { + okay = 0; + break; + } if (likely(mfn != 0)) { if ( shadow_mode_refcounts(d) ) @@ -2265,8 +2348,7 @@ int do_mmu_update( case PGT_l2_page_table: { l2_pgentry_t l2e = l2e_from_intpte(req.val); - okay = mod_l2_entry( - (l2_pgentry_t *)va, l2e, mfn, type_info); + okay = mod_l2_entry(va, l2e, mfn, type_info); } break; #if CONFIG_PAGING_LEVELS >= 3 @@ -2279,11 +2361,12 @@ int do_mmu_update( #endif #if CONFIG_PAGING_LEVELS >= 4 case PGT_l4_page_table: - { - l4_pgentry_t l4e = l4e_from_intpte(req.val); - okay = mod_l4_entry(va, l4e, mfn); - } - break; + if ( !IS_COMPAT(FOREIGNDOM) ) + { + l4_pgentry_t l4e = l4e_from_intpte(req.val); + okay = mod_l4_entry(va, l4e, mfn); + } + break; #endif } @@ -2387,7 +2470,7 @@ static int create_grant_pte_mapping( ASSERT(spin_is_locked(&d->big_lock)); - adjust_guest_l1e(nl1e); + adjust_guest_l1e(nl1e, d); gmfn = pte_addr >> PAGE_SHIFT; mfn = gmfn_to_mfn(d, gmfn); @@ -2508,7 +2591,7 @@ static int create_grant_va_mapping( ASSERT(spin_is_locked(&d->big_lock)); - adjust_guest_l1e(nl1e); + adjust_guest_l1e(nl1e, d); pl1e = guest_map_l1e(v, va, &gl1mfn); if ( !pl1e ) @@ -2676,7 +2759,9 @@ int do_update_va_mapping(unsigned long v flush_tlb_mask(d->domain_dirty_cpumask); break; default: - if ( unlikely(get_user(vmask, (unsigned long *)bmap_ptr)) ) + if ( unlikely(!IS_COMPAT(d) ? + get_user(vmask, (unsigned long *)bmap_ptr) : + get_user(vmask, (unsigned int *)bmap_ptr)) ) rc = -EFAULT; pmask = vcpumask_to_pcpumask(d, vmask); flush_tlb_mask(pmask); @@ -2835,7 +2920,7 @@ long do_update_descriptor(u64 pa, u64 de mfn = gmfn_to_mfn(dom, gmfn); if ( (((unsigned int)pa % sizeof(struct desc_struct)) != 0) || !mfn_valid(mfn) || - !check_descriptor(&d) ) + !check_descriptor(dom, &d) ) { UNLOCK_BIGLOCK(dom); return -EINVAL; @@ -3097,7 +3182,7 @@ static int ptwr_emulated_update( unsigned int do_cmpxchg, struct ptwr_emulate_ctxt *ptwr_ctxt) { - unsigned long gmfn, mfn; + unsigned long mfn; struct page_info *page; l1_pgentry_t pte, ol1e, nl1e, *pl1e; struct vcpu *v = current; @@ -3137,8 +3222,7 @@ static int ptwr_emulated_update( } pte = ptwr_ctxt->pte; - gmfn = l1e_get_pfn(pte); - mfn = gmfn_to_mfn(d, gmfn); + mfn = l1e_get_pfn(pte); page = mfn_to_page(mfn); /* We are looking only for read-only mappings of p.t. pages. */ @@ -3151,7 +3235,7 @@ static int ptwr_emulated_update( nl1e = l1e_from_intpte(val); if ( unlikely(!get_page_from_l1e(gl1e_to_ml1e(d, nl1e), d)) ) { - if ( (CONFIG_PAGING_LEVELS == 3) && + if ( (CONFIG_PAGING_LEVELS == 3 || IS_COMPAT(d)) && (bytes == 4) && !do_cmpxchg && (l1e_get_flags(nl1e) & _PAGE_PRESENT) ) @@ -3173,7 +3257,7 @@ static int ptwr_emulated_update( } } - adjust_guest_l1e(nl1e); + adjust_guest_l1e(nl1e, d); /* Checked successfully: do the update (write or cmpxchg). */ pl1e = map_domain_page(page_to_mfn(page)); @@ -3269,7 +3353,6 @@ int ptwr_do_page_fault(struct vcpu *v, u struct cpu_user_regs *regs) { struct domain *d = v->domain; - unsigned long pfn; struct page_info *page; l1_pgentry_t pte; struct ptwr_emulate_ctxt ptwr_ctxt; @@ -3283,8 +3366,7 @@ int ptwr_do_page_fault(struct vcpu *v, u guest_get_eff_l1e(v, addr, &pte); if ( !(l1e_get_flags(pte) & _PAGE_PRESENT) ) goto bail; - pfn = l1e_get_pfn(pte); - page = mfn_to_page(pfn); + page = l1e_get_page(pte); /* We are looking only for read-only mappings of p.t. pages. */ if ( ((l1e_get_flags(pte) & (_PAGE_PRESENT|_PAGE_RW)) != _PAGE_PRESENT) || @@ -3294,10 +3376,11 @@ int ptwr_do_page_fault(struct vcpu *v, u goto bail; ptwr_ctxt.ctxt.regs = guest_cpu_user_regs(); - ptwr_ctxt.ctxt.mode = X86EMUL_MODE_HOST; - ptwr_ctxt.cr2 = addr; - ptwr_ctxt.pte = pte; - if ( x86_emulate_memop(&ptwr_ctxt.ctxt, &ptwr_emulate_ops) ) + ptwr_ctxt.ctxt.addr_size = ptwr_ctxt.ctxt.sp_size = + IS_COMPAT(d) ? 32 : BITS_PER_LONG; + ptwr_ctxt.cr2 = addr; + ptwr_ctxt.pte = pte; + if ( x86_emulate(&ptwr_ctxt.ctxt, &ptwr_emulate_ops) ) goto bail; UNLOCK_BIGLOCK(d); diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/mm/shadow/common.c --- a/xen/arch/x86/mm/shadow/common.c Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/x86/mm/shadow/common.c Wed Jan 17 09:56:40 2007 -0500 @@ -36,6 +36,7 @@ #include <asm/current.h> #include <asm/flushtlb.h> #include <asm/shadow.h> +#include <asm/shared.h> #include "private.h" @@ -109,7 +110,7 @@ static int hvm_translate_linear_addr( unsigned long limit, addr = offset; uint32_t last_byte; - if ( sh_ctxt->ctxt.mode != X86EMUL_MODE_PROT64 ) + if ( sh_ctxt->ctxt.addr_size != 64 ) { /* * COMPATIBILITY MODE: Apply segment checks and add base. @@ -398,7 +399,7 @@ struct x86_emulate_ops *shadow_init_emul struct x86_emulate_ops *shadow_init_emulation( struct sh_emulate_ctxt *sh_ctxt, struct cpu_user_regs *regs) { - struct segment_register *creg; + struct segment_register *creg, *sreg; struct vcpu *v = current; unsigned long addr; @@ -406,7 +407,7 @@ struct x86_emulate_ops *shadow_init_emul if ( !is_hvm_vcpu(v) ) { - sh_ctxt->ctxt.mode = X86EMUL_MODE_HOST; + sh_ctxt->ctxt.addr_size = sh_ctxt->ctxt.sp_size = BITS_PER_LONG; return &pv_shadow_emulator_ops; } @@ -415,14 +416,20 @@ struct x86_emulate_ops *shadow_init_emul creg = hvm_get_seg_reg(x86_seg_cs, sh_ctxt); /* Work out the emulation mode. */ - if ( hvm_long_mode_enabled(v) ) - sh_ctxt->ctxt.mode = creg->attr.fields.l ? - X86EMUL_MODE_PROT64 : X86EMUL_MODE_PROT32; + if ( hvm_long_mode_enabled(v) && creg->attr.fields.l ) + { + sh_ctxt->ctxt.addr_size = sh_ctxt->ctxt.sp_size = 64; + } else if ( regs->eflags & X86_EFLAGS_VM ) - sh_ctxt->ctxt.mode = X86EMUL_MODE_REAL; + { + sh_ctxt->ctxt.addr_size = sh_ctxt->ctxt.sp_size = 16; + } else - sh_ctxt->ctxt.mode = creg->attr.fields.db ? - X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; + { + sreg = hvm_get_seg_reg(x86_seg_ss, sh_ctxt); + sh_ctxt->ctxt.addr_size = creg->attr.fields.db ? 32 : 16; + sh_ctxt->ctxt.sp_size = sreg->attr.fields.db ? 32 : 16; + } /* Attempt to prefetch whole instruction. */ sh_ctxt->insn_buf_bytes = @@ -1304,6 +1311,9 @@ shadow_alloc_p2m_table(struct domain *d) if ( !shadow_set_p2m_entry(d, gfn, mfn) ) goto error; + /* Build a p2m map that matches the m2p entries for this domain's + * allocated pages. Skip any pages that have an explicitly invalid + * or obviously bogus m2p entry. */ for ( entry = d->page_list.next; entry != &d->page_list; entry = entry->next ) @@ -1319,6 +1329,8 @@ shadow_alloc_p2m_table(struct domain *d) (gfn != 0x55555555L) #endif && gfn != INVALID_M2P_ENTRY + && (gfn < + (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof (l1_pgentry_t)) && !shadow_set_p2m_entry(d, gfn, mfn) ) goto error; } @@ -2442,9 +2454,10 @@ static void sh_update_paging_modes(struc /// PV guest /// #if CONFIG_PAGING_LEVELS == 4 - /* When 32-on-64 PV guests are supported, they must choose - * a different mode here */ - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,4); + if ( pv_32bit_guest(v) ) + v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); + else + v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,4); #elif CONFIG_PAGING_LEVELS == 3 v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); #elif CONFIG_PAGING_LEVELS == 2 @@ -2917,7 +2930,7 @@ sh_alloc_log_dirty_bitmap(struct domain { ASSERT(d->arch.shadow.dirty_bitmap == NULL); d->arch.shadow.dirty_bitmap_size = - (d->shared_info->arch.max_pfn + (BITS_PER_LONG - 1)) & + (arch_get_max_pfn(d) + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1); d->arch.shadow.dirty_bitmap = xmalloc_array(unsigned long, @@ -3259,7 +3272,7 @@ void shadow_mark_dirty(struct domain *d, int shadow_domctl(struct domain *d, xen_domctl_shadow_op_t *sc, - XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) + XEN_GUEST_HANDLE(void) u_domctl) { int rc, preempted = 0; diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/x86/mm/shadow/multi.c Wed Jan 17 09:56:40 2007 -0500 @@ -851,9 +851,7 @@ static inline void safe_write_entry(void * then writing the high word before the low word. */ BUILD_BUG_ON(sizeof (shadow_l1e_t) != 2 * sizeof (unsigned long)); d[0] = 0; - wmb(); d[1] = s[1]; - wmb(); d[0] = s[0]; #else /* In 32-bit and 64-bit, sizeof(pte) == sizeof(ulong) == 1 word, @@ -1422,7 +1420,7 @@ void sh_install_xen_entries_in_l4(struct } #endif -#if CONFIG_PAGING_LEVELS == 3 && GUEST_PAGING_LEVELS == 3 +#if (CONFIG_PAGING_LEVELS == 3 || defined(CONFIG_COMPAT)) && GUEST_PAGING_LEVELS == 3 // For 3-on-3 PV guests, we need to make sure the xen mappings are in // place, which means that we need to populate the l2h entry in the l3 // table. @@ -1432,12 +1430,20 @@ void sh_install_xen_entries_in_l2h(struc { struct domain *d = v->domain; shadow_l2e_t *sl2e; +#if CONFIG_PAGING_LEVELS == 3 int i; +#else + + if ( !pv_32bit_guest(v) ) + return; +#endif sl2e = sh_map_domain_page(sl2hmfn); ASSERT(sl2e != NULL); ASSERT(sizeof (l2_pgentry_t) == sizeof (shadow_l2e_t)); +#if CONFIG_PAGING_LEVELS == 3 + /* Copy the common Xen mappings from the idle domain */ memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)], &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT], @@ -1478,6 +1484,15 @@ void sh_install_xen_entries_in_l2h(struc } sh_unmap_domain_page(p2m); } + +#else + + /* Copy the common Xen mappings from the idle domain */ + memcpy(&sl2e[COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d)], + &compat_idle_pg_table_l2[l2_table_offset(HIRO_COMPAT_MPT_VIRT_START)], + COMPAT_L2_PAGETABLE_XEN_SLOTS(d) * sizeof(*sl2e)); + +#endif sh_unmap_domain_page(sl2e); } @@ -1638,12 +1653,15 @@ mfn_t mfn_t sh_make_monitor_table(struct vcpu *v) { + struct domain *d = v->domain; ASSERT(pagetable_get_pfn(v->arch.monitor_table) == 0); + /* Guarantee we can get the memory we need */ + shadow_prealloc(d, SHADOW_MAX_ORDER); + #if CONFIG_PAGING_LEVELS == 4 { - struct domain *d = v->domain; mfn_t m4mfn; m4mfn = shadow_alloc(d, SH_type_monitor_table, 0); sh_install_xen_entries_in_l4(v, m4mfn, m4mfn); @@ -1660,6 +1678,19 @@ sh_make_monitor_table(struct vcpu *v) l4e = sh_map_domain_page(m4mfn); l4e[0] = l4e_from_pfn(mfn_x(m3mfn), __PAGE_HYPERVISOR); sh_unmap_domain_page(l4e); + if ( pv_32bit_guest(v) ) + { + // Install a monitor l2 table in slot 3 of the l3 table. + // This is used for all Xen entries. + mfn_t m2mfn; + l3_pgentry_t *l3e; + m2mfn = shadow_alloc(d, SH_type_monitor_table, 0); + mfn_to_page(m2mfn)->shadow_flags = 2; + l3e = sh_map_domain_page(m3mfn); + l3e[3] = l3e_from_pfn(mfn_x(m2mfn), _PAGE_PRESENT); + sh_install_xen_entries_in_l2h(v, m2mfn); + sh_unmap_domain_page(l3e); + } } #endif /* SHADOW_PAGING_LEVELS < 4 */ return m4mfn; @@ -1668,7 +1699,6 @@ sh_make_monitor_table(struct vcpu *v) #elif CONFIG_PAGING_LEVELS == 3 { - struct domain *d = v->domain; mfn_t m3mfn, m2mfn; l3_pgentry_t *l3e; l2_pgentry_t *l2e; @@ -1702,7 +1732,6 @@ sh_make_monitor_table(struct vcpu *v) #elif CONFIG_PAGING_LEVELS == 2 { - struct domain *d = v->domain; mfn_t m2mfn; m2mfn = shadow_alloc(d, SH_type_monitor_table, 0); sh_install_xen_entries_in_l2(v, m2mfn, m2mfn); @@ -2065,9 +2094,19 @@ void sh_destroy_monitor_table(struct vcp #if (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS != 4) /* Need to destroy the l3 monitor page in slot 0 too */ { + mfn_t m3mfn; l4_pgentry_t *l4e = sh_map_domain_page(mmfn); ASSERT(l4e_get_flags(l4e[0]) & _PAGE_PRESENT); - shadow_free(d, _mfn(l4e_get_pfn(l4e[0]))); + m3mfn = _mfn(l4e_get_pfn(l4e[0])); + if ( pv_32bit_guest(v) ) + { + /* Need to destroy the l2 monitor page in slot 3 too */ + l3_pgentry_t *l3e = sh_map_domain_page(m3mfn); + ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT); + shadow_free(d, _mfn(l3e_get_pfn(l3e[3]))); + sh_unmap_domain_page(l3e); + } + shadow_free(d, m3mfn); sh_unmap_domain_page(l4e); } #elif CONFIG_PAGING_LEVELS == 3 @@ -2836,7 +2875,7 @@ static int sh_page_fault(struct vcpu *v, * it seems very unlikely that any OS grants user access to page tables. */ if ( (regs->error_code & PFEC_user_mode) || - x86_emulate_memop(&emul_ctxt.ctxt, emul_ops) ) + x86_emulate(&emul_ctxt.ctxt, emul_ops) ) { SHADOW_PRINTK("emulator failure, unshadowing mfn %#lx\n", mfn_x(gmfn)); @@ -3044,12 +3083,15 @@ sh_update_linear_entries(struct vcpu *v) #elif (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS == 3) - /* This case only exists in HVM. To give ourselves a linear map of the - * shadows, we need to extend a PAE shadow to 4 levels. We do this by - * having a monitor l3 in slot 0 of the monitor l4 table, and - * copying the PAE l3 entries into it. Then, by having the monitor l4e - * for shadow pagetables also point to the monitor l4, we can use it - * to access the shadows. */ + /* PV: XXX + * + * HVM: To give ourselves a linear map of the shadows, we need to + * extend a PAE shadow to 4 levels. We do this by having a monitor + * l3 in slot 0 of the monitor l4 table, and copying the PAE l3 + * entries into it. Then, by having the monitor l4e for shadow + * pagetables also point to the monitor l4, we can use it to access + * the shadows. + */ if ( shadow_mode_external(d) ) { @@ -3092,6 +3134,8 @@ sh_update_linear_entries(struct vcpu *v) if ( v != current ) sh_unmap_domain_page(ml3e); } + else + domain_crash(d); /* XXX */ #elif CONFIG_PAGING_LEVELS == 3 @@ -3404,7 +3448,7 @@ sh_update_cr3(struct vcpu *v, int do_loc (unsigned long)pagetable_get_pfn(v->arch.guest_table)); #if GUEST_PAGING_LEVELS == 4 - if ( !(v->arch.flags & TF_kernel_mode) ) + if ( !(v->arch.flags & TF_kernel_mode) && !IS_COMPAT(v->domain) ) gmfn = pagetable_get_mfn(v->arch.guest_table_user); else #endif @@ -3900,7 +3944,7 @@ sh_x86_emulate_write(struct vcpu *v, uns if ( !skip ) sh_validate_guest_pt_write(v, mfn, addr, bytes); /* If we are writing zeros to this page, might want to unshadow */ - if ( likely(bytes >= 4) && (*(u32 *)addr == 0) ) + if ( likely(bytes >= 4) && (*(u32 *)addr == 0) && is_lo_pte(vaddr) ) check_for_early_unshadow(v, mfn); sh_unmap_domain_page(addr); @@ -3952,7 +3996,7 @@ sh_x86_emulate_cmpxchg(struct vcpu *v, u vaddr, prev, old, new, *(unsigned long *)addr, bytes); /* If we are writing zeros to this page, might want to unshadow */ - if ( likely(bytes >= 4) && (*(u32 *)addr == 0) ) + if ( likely(bytes >= 4) && (*(u32 *)addr == 0) && is_lo_pte(vaddr) ) check_for_early_unshadow(v, mfn); sh_unmap_domain_page(addr); diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/mm/shadow/private.h --- a/xen/arch/x86/mm/shadow/private.h Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/x86/mm/shadow/private.h Wed Jan 17 09:56:40 2007 -0500 @@ -427,6 +427,11 @@ extern int sh_remove_write_access(struct #undef mfn_valid #define mfn_valid(_mfn) (mfn_x(_mfn) < max_page) +#if GUEST_PAGING_LEVELS >= 3 +# define is_lo_pte(_vaddr) (((_vaddr)&0x4)==0) +#else +# define is_lo_pte(_vaddr) (1) +#endif static inline int sh_mfn_is_a_page_table(mfn_t gmfn) diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/oprofile/nmi_int.c --- a/xen/arch/x86/oprofile/nmi_int.c Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/x86/oprofile/nmi_int.c Wed Jan 17 09:56:40 2007 -0500 @@ -42,7 +42,7 @@ extern size_t strlcpy(char *dest, const extern size_t strlcpy(char *dest, const char *src, size_t size); -int nmi_callback(struct cpu_user_regs *regs, int cpu) +static int nmi_callback(struct cpu_user_regs *regs, int cpu) { int xen_mode, ovf; diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/physdev.c --- a/xen/arch/x86/physdev.c Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/x86/physdev.c Wed Jan 17 09:56:40 2007 -0500 @@ -9,8 +9,13 @@ #include <xen/guest_access.h> #include <asm/current.h> #include <asm/smpboot.h> +#include <asm/hypercall.h> #include <public/xen.h> #include <public/physdev.h> + +#ifndef COMPAT +typedef long ret_t; +#endif int ioapic_guest_read( @@ -19,10 +24,10 @@ ioapic_guest_write( ioapic_guest_write( unsigned long physbase, unsigned int reg, u32 pval); -long do_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg) +ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg) { int irq; - long ret; + ret_t ret; switch ( cmd ) { @@ -129,7 +134,11 @@ long do_physdev_op(int cmd, XEN_GUEST_HA (set_iobitmap.nr_ports > 65536) ) break; ret = 0; +#ifndef COMPAT current->arch.iobmp = set_iobitmap.bitmap; +#else + guest_from_compat_handle(current->arch.iobmp, set_iobitmap.bitmap); +#endif current->arch.iobmp_limit = set_iobitmap.nr_ports; break; } diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/platform_hypercall.c --- a/xen/arch/x86/platform_hypercall.c Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/x86/platform_hypercall.c Wed Jan 17 09:56:40 2007 -0500 @@ -23,11 +23,17 @@ #include <asm/mtrr.h> #include "cpu/mtrr/mtrr.h" -long do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op) +#ifndef COMPAT +typedef long ret_t; +DEFINE_SPINLOCK(xenpf_lock); +#else +extern spinlock_t xenpf_lock; +#endif + +ret_t do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op) { - long ret = 0; + ret_t ret = 0; struct xen_platform_op curop, *op = &curop; - static DEFINE_SPINLOCK(xenpf_lock); if ( !IS_PRIV(current->domain) ) return -EPERM; @@ -105,8 +111,15 @@ long do_platform_op(XEN_GUEST_HANDLE(xen case XENPF_microcode_update: { extern int microcode_update(XEN_GUEST_HANDLE(void), unsigned long len); +#ifndef COMPAT ret = microcode_update(op->u.microcode.data, op->u.microcode.length); +#else + XEN_GUEST_HANDLE(void) data; + + guest_from_compat_handle(data, op->u.microcode.data); + ret = microcode_update(data, op->u.microcode.length); +#endif } break; diff -r 5568efb41da4 -r 3f6a2745b3a3 xen/arch/x86/setup.c --- a/xen/arch/x86/setup.c Mon Jan 15 13:27:20 2007 -0500 +++ b/xen/arch/x86/setup.c Wed Jan 17 09:56:40 2007 -0500 @@ -18,6 +18,10 @@ #include <xen/keyhandler.h> #include <xen/numa.h> #include <public/version.h> +#ifdef CONFIG_COMPAT +#include <compat/platform.h> +#include <compat/xen.h> +#endif #include <asm/bitops.h> #include <asm/smp.h> #include <asm/processor.h> @@ -407,6 +411,23 @@ void __init __start_xen(multiboot_info_t printk("WARNING: Buggy e820 map detected and fixed " "(truncated length fields).\n"); + /* Ensure that all E820 RAM regions are page-aligned and -sized. */ + for ( i = 0; i < e820_raw_nr; i++ ) + { + uint64_t s, e; + if ( e820_raw[i].type != E820_RAM ) + continue; + s = PFN_UP(e820_raw[i].addr); + e = PFN_DOWN(e820_raw[i].addr + e820_raw[i].size); + e820_raw[i].size = 0; /* discarded later */ + if ( s < e ) + { + e820_raw[i].addr = s << PAGE_SHIFT; + e820_raw[i].size = (e - s) << PAGE_SHIFT; + } + } + + /* Sanitise the raw E820 map to produce a final clean version. */ max_page = init_e820(e820_raw, &e820_raw_nr); modules_length = mod[mbi->mods_count-1].mod_end - mod[0].mod_start; @@ -419,7 +440,7 @@ void __init __start_xen(multiboot_info_t printk("Not enough memory to stash the DOM0 kernel image.\n"); for ( ; ; ) ; } - + if ( (e820.map[i].type == E820_RAM) && (e820.map[i].size >= modules_length) && ((e820.map[i].addr + e820.map[i].size) >= @@ -470,10 +491,10 @@ void __init __start_xen(multiboot_info_t start = PFN_UP(e820.map[i].addr); end = PFN_DOWN(e820.map[i].addr + e820.map[i].size); /* Clip the range to exclude what the bootstrapper initialised. */ - if ( end < init_mapped ) - continue; if ( start < init_mapped ) start = init_mapped; + if ( end <= start ) + continue; /* Request the mapping. */ map_pages_to_xen( PAGE_OFFSET + (start << PAGE_SHIFT), @@ -482,7 +503,7 @@ void __init __start_xen(multiboot_info_t #endif } - if ( kexec_crash_area.size > 0 ) + if ( kexec_crash_area.size > 0 && kexec_crash_area.start > 0) _______________________________________________ Xen-ppc-devel mailing list Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-ppc-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |